Lucene创建索引入门案例
最近在学习lucene,参考网上的资料写了一个简单搜索demo;
项目jar包:
//索引关键类
package com.lucene.index;
import java.io.File;
import java.io.IOException;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.util.Version;
import org.wltea.analyzer.lucene.IKAnalyzer;
import com.lucene.vo.User;
/**
* * lucene 检索内存索引 非常简单的例子 * * @author Administrator *
*/
public class searchIndex {
private String[] ids = { "1", "2", "3", "4", "5", "6" };
private String[] emails = { "[email protected]", "[email protected]", "[email protected]", "[email protected]", "[email protected]", "[email protected]" };
// private String[] contents = { "welcome to visited the space,I like book", "hello boy, I like pingpeng ball", "my name is cc I like game", "I like football",
// "I like football and I like basketball too", "I like movie and swim" };
private String[] contents = { "创建一个内存目录对象,所以这里生成的索引会放在磁盘中,而不是在内存中", "创建索引写入对象,该对象既可以把索引写入到磁盘中也可以写入到内存中", "分词器,分词器就是将检索的关键字分割成一组组词组, 它是lucene检索查询的一大特色之一", "这个是分词器拆分最大长度,因为各种不同类型的分词器拆分的字符颗粒细化程度不一样,所以需要设置一个最长的拆分长度",
"文档对象,在lucene中创建的索引可以看成数据库中的一张表,表中也可以有字段,往里面添加内容之后可以根据字段去匹配查询", "I like movie and swim" };
private String[] names = { "zhangsan", "lisi", "john", "jetty", "mike", "jake" };
// 创建一个内存目录对象,所以这里生成的索引会放在磁盘中,而不是在内存中。
private Directory directory = null;
//IK分词器
IKAnalyzer analyzer = null;
public searchIndex() {
try {
directory = FSDirectory.open(new File("H:/lucene/index"));
analyzer = new IKAnalyzer(true);
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
public void index() {
/*
* 创建索引写入对象,该对象既可以把索引写入到磁盘中也可以写入到内存中。
*/
IndexWriter writer;
try {
writer = new IndexWriter(directory, new IndexWriterConfig(Version.LUCENE_36, analyzer));
//创建之前先删除
writer.deleteAll();
// 创建Document
// 文档对象,在lucene中创建的索引可以看成数据库中的一张表,表中也可以有字段,往里面添加内容之后可以根据字段去匹配查询
Document doc =null;
for(int i=0;i<ids.length;i++){
doc = new Document();
doc.add(new Field("id", ids[i], Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));
doc.add(new Field("email", emails[i], Field.Store.YES, Field.Index.NOT_ANALYZED));
doc.add(new Field("content", contents[i], Field.Store.NO, Field.Index.ANALYZED));
doc.add(new Field("name", names[i], Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));
writer.addDocument(doc);
}
writer.close();
} catch (CorruptIndexException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (LockObtainFailedException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
public List<User> search(String keyword) {
long startTime = System.currentTimeMillis();
System.out.println("*****************检索开始**********************");
List<User> userList = new ArrayList<User>();
IndexReader reader;
try {
reader = IndexReader.open(directory);
// 创建IndexSearcher 检索索引的对象,里面要传递上面写入的内存目录对象directory
IndexSearcher searcher = new IndexSearcher(reader);
// 根据搜索关键字 封装一个term组合对象,然后封装成Query查询对象
QueryParser queryParser = new QueryParser(Version.LUCENE_36, "content", analyzer);
Query query = queryParser.parse(keyword);
// 去索引目录中查询,返回的是TopDocs对象,里面存放的就是上面放的document文档对象
TopDocs rs = searcher.search(query, null, 10);
long endTime = System.currentTimeMillis();
System.out.println("总共花费" + (endTime - startTime) + "毫秒,检索到" + rs.totalHits + "条记录。");
User user = null;
for (int i = 0; i < rs.scoreDocs.length; i++) {
// rs.scoreDocs[i].doc 是获取索引中的标志位id, 从0开始记录
Document firstHit = searcher.doc(rs.scoreDocs[i].doc);
user = new User();
user.setId(Long.parseLong(firstHit.get("id")));
user.setName(firstHit.get("name"));
user.setSex(firstHit.get("sex"));
user.setDosomething(firstHit.get("dosometing"));
user.setEmail(firstHit.get("email"));
user.setContent(firstHit.get("content"));
userList.add(user);
// System.out.println("name:" + firstHit.get("name"));
// System.out.println("sex:" + firstHit.get("sex"));
// System.out.println("dosomething:" + firstHit.get("dosometing"));
}
reader.close();
} catch (CorruptIndexException e1) {
// TODO Auto-generated catch block
e1.printStackTrace();
} catch (IOException e1) {
// TODO Auto-generated catch block
e1.printStackTrace();
} catch (ParseException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
System.out.println("*****************检索结束**********************");
return userList;
}
}