Lucene学习

近来项目需要使用Lucene,工作之余上网学习了下相关内容,做个笔记

1.创建索引

步骤:创建IndexWriter

Java代码

IndexWriterwriter=newIndexWriter(

newNIOFSDirectory(newFile(path)),newStandardAnalyzer(

Version.LUCENE_30),MaxFieldLength.LIMITED);

IndexWriterwriter=newIndexWriter(

newNIOFSDirectory(newFile(path)),newStandardAnalyzer(

Version.LUCENE_30),MaxFieldLength.LIMITED);

创建Document

创建Field包含field名和field值

将Field通过Document的add方法添加到Document中

Java代码

Documentdoc=newDocument();

doc.add(newField("text","Itisatextarea",Store.YES,

Index.ANALYZED_NO_NORMS));

doc.add(newField("info","ItisaInfomationarea",Store.YES,

Index.ANALYZED_NO_NORMS));

Documentdoc=newDocument();

doc.add(newField("text","Itisatextarea",Store.YES,

Index.ANALYZED_NO_NORMS));

doc.add(newField("info","ItisaInfomationarea",Store.YES,

Index.ANALYZED_NO_NORMS));

将Document通过IndexWriter的addDocument方法添加到IndexWriter中

关闭IndexWriter

Java代码

writer.addDocument(doc);writer.close();

writer.addDocument(doc);writer.close();

2从索引中根据关键字查询

创建IndexSearcher

Java代码

IndexSearchersearcher=newIndexSearcher(newNIOFSDirectory(newFile(

path)));

IndexSearchersearcher=newIndexSearcher(newNIOFSDirectory(newFile(

path)));

创建Query

Java代码

Queryquery=newQueryParser(Version.LUCENE_30,field,

newStandardAnalyzer(Version.LUCENE_30)).parse(keyword);

Queryquery=newQueryParser(Version.LUCENE_30,field,

newStandardAnalyzer(Version.LUCENE_30)).parse(keyword);

通过IndexSearcher的search方法查找关键字,使用TopDocs封装结果集

Java代码

TopDocsdocs=searcher.search(query,10);

TopDocsdocs=searcher.search(query,10);

全部代码:(包换了合并内存索引到硬盘索引中)

Java代码

importjava.io.File;

importorg.apache.lucene.analysis.standard.StandardAnalyzer;

importorg.apache.lucene.document.Document;

importorg.apache.lucene.document.Field;

importorg.apache.lucene.document.Field.Index;

importorg.apache.lucene.document.Field.Store;

importorg.apache.lucene.index.IndexReader;

importorg.apache.lucene.index.IndexWriter;

importorg.apache.lucene.index.IndexWriter.MaxFieldLength;

importorg.apache.lucene.queryParser.QueryParser;

importorg.apache.lucene.search.IndexSearcher;

importorg.apache.lucene.search.Query;

importorg.apache.lucene.search.ScoreDoc;

importorg.apache.lucene.search.TopDocs;

importorg.apache.lucene.store.NIOFSDirectory;

importorg.apache.lucene.store.RAMDirectory;

importorg.apache.lucene.util.Version;

publicclassTestLucen{

publicstaticfinalStringpath="E:\\workspaces\\lucene\\index";

publicstaticvoidmain(String[]args)throwsException{

writeIndex();

readIndex("text","area");

}

publicstaticvoidwriteIndex()throwsException{

//硬盘索引

IndexWriterwriter=newIndexWriter(

newNIOFSDirectory(newFile(path)),newStandardAnalyzer(

Version.LUCENE_30),MaxFieldLength.LIMITED);

//Ram索引

RAMDirectoryram=newRAMDirectory();

IndexWriterramwriter=newIndexWriter(ram,newStandardAnalyzer(

Version.LUCENE_30),MaxFieldLength.LIMITED);

Documentdoc=newDocument();

Documentdoc1=newDocument();

doc.add(newField("text","Itisatextarea",Store.YES,

Index.ANALYZED_NO_NORMS));

doc.add(newField("info","ItisaInfomationarea",Store.YES,

Index.ANALYZED_NO_NORMS));

writer.addDocument(doc);

doc1.add(newField("text","itisanotherarea",Store.YES,

Index.ANALYZED));

ramwriter.addDocument(doc1);

ramwriter.optimize();

ramwriter.close();

//将Ram索引合并到硬盘索引上,必须先调用ram的close方法

writer.addIndexes(IndexReader.open(ram));

writer.optimize();

writer.close();

}

publicstaticvoidreadIndex(Stringfield,Stringkeyword)throwsException{

IndexSearchersearcher=newIndexSearcher(newNIOFSDirectory(newFile(

path)));

Queryquery=newQueryParser(Version.LUCENE_30,field,

newStandardAnalyzer(Version.LUCENE_30)).parse(keyword);

TopDocsdocs=searcher.search(query,10);

System.out.println("查找到"+docs.totalHits+"个\n对应的text为:");

ScoreDoc[]doc=docs.scoreDocs;

for(ScoreDocd:doc){

Documentdocu=searcher.doc(d.doc);

System.out.println(docu.get(field));

}

}

}

importjava.io.File;

importorg.apache.lucene.analysis.standard.StandardAnalyzer;

importorg.apache.lucene.document.Document;

importorg.apache.lucene.document.Field;

importorg.apache.lucene.document.Field.Index;

importorg.apache.lucene.document.Field.Store;

importorg.apache.lucene.index.IndexReader;

importorg.apache.lucene.index.IndexWriter;

importorg.apache.lucene.index.IndexWriter.MaxFieldLength;

importorg.apache.lucene.queryParser.QueryParser;

importorg.apache.lucene.search.IndexSearcher;

importorg.apache.lucene.search.Query;

importorg.apache.lucene.search.ScoreDoc;

importorg.apache.lucene.search.TopDocs;

importorg.apache.lucene.store.NIOFSDirectory;

importorg.apache.lucene.store.RAMDirectory;

importorg.apache.lucene.util.Version;

publicclassTestLucen{

publicstaticfinalStringpath="E:\\workspaces\\lucene\\index";

publicstaticvoidmain(String[]args)throwsException{

writeIndex();

readIndex("text","area");

}

publicstaticvoidwriteIndex()throwsException{

//硬盘索引

IndexWriterwriter=newIndexWriter(

newNIOFSDirectory(newFile(path)),newStandardAnalyzer(

Version.LUCENE_30),MaxFieldLength.LIMITED);

//Ram索引

RAMDirectoryram=newRAMDirectory();

IndexWriterramwriter=newIndexWriter(ram,newStandardAnalyzer(

Version.LUCENE_30),MaxFieldLength.LIMITED);

Documentdoc=newDocument();

Documentdoc1=newDocument();

doc.add(newField("text","Itisatextarea",Store.YES,

Index.ANALYZED_NO_NORMS));

doc.add(newField("info","ItisaInfomationarea",Store.YES,

Index.ANALYZED_NO_NORMS));

writer.addDocument(doc);

doc1.add(newField("text","itisanotherarea",Store.YES,

Index.ANALYZED));

ramwriter.addDocument(doc1);

ramwriter.optimize();

ramwriter.close();

//将Ram索引合并到硬盘索引上,必须先调用ram的close方法

writer.addIndexes(IndexReader.open(ram));

writer.optimize();

writer.close();

}

publicstaticvoidreadIndex(Stringfield,Stringkeyword)throwsException{

IndexSearchersearcher=newIndexSearcher(newNIOFSDirectory(newFile(

path)));

Queryquery=newQueryParser(Version.LUCENE_30,field,

newStandardAnalyzer(Version.LUCENE_30)).parse(keyword);

TopDocsdocs=searcher.search(query,10);

System.out.println("查找到"+docs.totalHits+"个\n对应的text为:");

ScoreDoc[]doc=docs.scoreDocs;

for(ScoreDocd:doc){

Documentdocu=searcher.doc(d.doc);

System.out.println(docu.get(field));

}

}

}

执行结果:

查找到2个

对应的text为:

Itisatextarea

itisanotherarea

相关推荐