首页 > 其他分享 >lucene入门实例三 (index索引)

lucene入门实例三 (index索引)

时间:2023-04-20 16:06:44浏览次数:30  
标签:index 索引 assertEquals writer lucene Field import new


copy《lucene in action》的一个索引的例子:

 

 

package com.s.lucene.LIA.index;

import java.io.IOException;

import junit.framework.TestCase;

import org.apache.lucene.analysis.WhitespaceAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory;

public class IndexingTest extends TestCase {
	protected String[] ids = { "1", "2" };
	protected String[] unindexed = { "Netherlands", "Italy" };
	protected String[] unstored = { "Amsterdam has lots of bridges",
			"Venice has lots of canals" };

	protected String[] text = { "Amsterdam", "Venice" };

	private Directory directory;

	protected void setUp() throws Exception {
		directory = new RAMDirectory();

		IndexWriter writer = getWriter();
		//Field.Store.YES 可以retrieved找到值
		//Field.Index.ANALYZED 保存为index,可以被检索到
		for (int i = 0; i < ids.length; i++) {
			Document doc = new Document();
			doc.add(new Field("id", ids[i], Field.Store.YES,
					Field.Index.NOT_ANALYZED));
			doc.add(new Field("country", unindexed[i], Field.Store.YES,
					Field.Index.NO));
			doc.add(new Field("contents", unstored[i], Field.Store.NO,
					Field.Index.ANALYZED));
			doc.add(new Field("city", text[i],
					Field.Store.YES,
					Field.Index.ANALYZED));
			writer.addDocument(doc);
		}

		writer.close();
	}

	private IndexWriter getWriter() throws Exception {
		return new IndexWriter(directory, new WhitespaceAnalyzer(),
				IndexWriter.MaxFieldLength.UNLIMITED);
	}

	protected int getHitCount(String fieldName, String searchString)
			throws IOException {
		IndexSearcher search = new IndexSearcher(directory);
		Term t = new Term(fieldName, searchString);
		Query query = new TermQuery(t);
		int hitcount = search.search(query, 1).totalHits;
		search.close();
		return hitcount;
	}

	public void testIndexWriter() throws Exception {
		IndexWriter writer = getWriter();
		assertEquals(ids.length, writer.numDocs());
		writer.close();
	}

	public void testIndexReader() throws IOException {
		IndexReader reader = IndexReader.open(directory);
		assertEquals(ids.length, reader.maxDoc());
		assertEquals(ids.length, reader.numDocs());
		reader.close();
	}

	public void testDeleteBeforeOptimize() throws Exception {
		IndexWriter writer = getWriter();
		assertEquals(2, writer.numDocs());
		writer.deleteDocuments(new Term("id", "1"));
		writer.commit();
		assertTrue(writer.hasDeletions());
		assertEquals(2, writer.maxDoc());
		assertEquals(1, writer.numDocs());
		writer.close();
	}

	public void testDeleteAfterOptimize() throws Exception {
		IndexWriter writer = getWriter();
		assertEquals(2, writer.numDocs());
		writer.deleteDocuments(new Term("id", "1"));
		writer.optimize();
		writer.commit();
		assertFalse(writer.hasDeletions());
		assertEquals(1, writer.maxDoc());
		assertEquals(1, writer.numDocs());
		writer.close();
	}

	public void testUpdate() throws Exception {
		assertEquals(1, getHitCount("city", "Amsterdam"));
		IndexWriter writer = getWriter();
		Document doc = new Document();
		doc.add(new Field("id", "1", Field.Store.YES, Field.Index.NOT_ANALYZED));
		doc.add(new Field("country", "Netherlands", Field.Store.YES,
				Field.Index.NO));
		doc.add(new Field("contents", "Den Haag has a lot of museums",
				Field.Store.NO, Field.Index.ANALYZED));
		doc.add(new Field("city", "Den Haag", Field.Store.YES,
				Field.Index.ANALYZED));
		writer.updateDocument(new Term("id", "1"), doc);
		writer.close();
		assertEquals(0, getHitCount("city", "Amsterdam"));
		assertEquals(1, getHitCount("city", "Den Haag"));//有空格,无法找到TODO
	}

}

标签:index,索引,assertEquals,writer,lucene,Field,import,new
From: https://blog.51cto.com/u_2465818/6209754

相关文章

  • lucene入门实例二(检索)
    copy一个luceneinaction的例子 packagecom.s.lucene;importjava.io.File;importjava.io.IOException;importorg.apache.lucene.analysis.standard.StandardAnalyzer;importorg.apache.lucene.document.Document;importorg.apache.lucene.queryParser.ParseExcepti......
  • lucene入门实例一(写索引)
    copy一个luceneinaction的入门实例代码:   importjava.io.File;importjava.io.FileFilter;importjava.io.FileReader;importjava.io.IOException;importorg.apache.lucene.analysis.standard.StandardAnalyzer;importorg.apache.lucene.document.Document;impor......
  • jdbc 报错 - 索引中丢失 IN 或 OUT 参数:
    jdbc报错-索引中丢失 IN或OUT参数:通常产生这种异常,是因为语句参数类型不一致所导致,如preparedStatement中的参数本应该是int/integer类型,但是设置参数是setString(1,String.valueof(xxx));或是现在流行的hibernate和ibatis的参数类型配置有问题,Integer配置为varchar2了。......
  • 5 04 | 深入浅出索引(上)
    提到数据库索引,我想你并不陌生,在日常工作中会经常接触到。比如某一个SQL查询比较慢,分析完原因之后,你可能就会说“给某个字段加个索引吧”之类的解决方案。但到底什么是索引,索引又是如何工作的呢?今天就让我们一起来聊聊这个话题吧。数据库索引的内容比较多,我分成了上下两篇文章。......
  • 深度学习--PyTorch定义Tensor以及索引和切片
    深度学习--PyTorch定义Tensor一、创建Tensor1.1未初始化的方法​ 这些方法只是开辟了空间,所附的初始值(非常大,非常小,0),后面还需要我们进行数据的存入。torch.empty():返回一个没有初始化的Tensor,默认是FloatTensor类型。#torch.empty(d1,d2,d3)函数输入的是shapetorch.empty......
  • 6 05 | 深入浅出索引(下)
    在上一篇文章中,我和你介绍了InnoDB索引的数据结构模型,今天我们再继续聊聊跟MySQL索引有关的概念。在开始这篇文章之前,我们先来看一下这个问题:在下面这个表T中,如果我执行select*fromTwherekbetween3and5,需要执行几次树的搜索操作,会扫描多少行?下面是这个表的初始化语句......
  • linq的妙用 分组 交换索引
    //////Splitsacollectionofobjectsintonpageswithan(forexample,ifIhavealistof45shoesandsay'shoes.Split(5)'Iwillnowhave4pagesof10shoesand1pageof5shoes.//////Thetypeofobjectthecollectionshouldcontain.......
  • 搜索引擎基础语法
     搜索语法大全1.intitle搜索范围限定在网页标题上面网页标题通常是对网页内容提纲挈领式的归纳。把查询内容范围限定在网页标题中,有时能获得意想不到的结果语法结构:内容+空格intitle:你要查找的信息(此信息会被限定在网页标题内)例如:web学习intitle:安全注意:intitle:和后......
  • 提高mysql千万级大数据SQL查询优化30条经验(Mysql索引优化注意)
    对查询进行优化,应尽量避免全表扫描,首先应考虑在where及orderby涉及的列上建立索引。应尽量避免在where子句中对字段进行null值判断,否则将导致引擎放弃使用索引而进行全表扫描,如:selectidfromtwherenumisnull可以在num上设置默认值0,确保表中num列没有null值,然......
  • Pycharm这个更新索引是个什么操作,为什么每次启动,都会进行?
    大家好,我是皮皮。一、前言前几天在Python最强王者交流群【吴超建】问了一个Pycharm操作的问题,这里拿出来给大家分享下。每次打开Pycharm的时候,都会有个更新的操作,遇到项目比较多的时候,挨个去索引更新,确实挺费时的。下图是他自己的部分项目,看上去还是挺多的:二、实现过程这里【6G】提......