lucene(1)Reading related books

lucene(1)Readingrelatedbooks

1.Introducetolucene

1.1.FieldType

keywordnotanlynize,butindexedandstoredinindexfiles.Forexample:URL,filedirectory,date,username,usernumber,mobilephonenumber.

UnIndexednotindexed,notanlynized,itisstoredinindexfiles.Youonlyneedthisfieldwhenyoushowyourcontenttogether.Forexample,URL,primarykeyofthedatabase.

UnStoredindexed,anlynized,butnotstoredinindexfiles.Forexample,thecontentofthehtmlpage,textdocument.

Textanlynized,indexed,ifthefieldisstring,itwillbestored,ifitisareader,itwillnotbestored.

1.2Update

Itseemsthereisnoupdatemethodforindexfiles,onlycandeletingandadding.

1.3.DocumentFieldboost

boostondocument

doc.add(Field.Keywork(“senderEmail”,senderEmail));

doc.add(Field.Text(“senderName”,senderName));

doc.add(Field.Text(“subject”,subject));

doc.add(Field.UnStored(“body”,body));

if(getSenderDomain().endsWithIgnoreCase(COMPANY_DOMAIN)){

doc.setBoost(1.5);

}elseif(getSenderDomain().endsWithIgnoreCase(BAD_DOMAIN)){

doc.setBoost(0.1);

}

writer.addDocument(doc);

boostonfield

FieldsenderNameField=Field.Text(“senderName”,senderName);

FieldsubjectField=Field.Text(“subject”,subject);

subjectField.setBoost(1.2);

2.LearnfrommyoldprojecteasyviewandeasySearch

2.1Buildeasyview

cloneandbuildmemcachedpackageanduploadittomylocalnexusrepository

>gitclonehttps://github.com/gwhalin/Memcached-Java-Client.git

>ant

Sometimes,mylocalnexuswillhavethiskindofmessage:

:8081/nexus/content/groups/publicwascachedinthelocalrepository,resolution

willnotbereattempteduntiltheupdateintervalofrepohaselapsedor

updatesareforced->[Help1]

deletethe.cachedirectoryinrepository,deletethepaodingdirectoryinrepository,makemavendownloadthepomagain.

2.2BuildeasySearch

3.Studylatestluceneanditegratewitheasyviewandeasysearch

ANALYZEDVSANALYZED_NO_NORMS

Normsarecreatedforquickscoringofdocumentsatquerytime.Thesenormsareusuallyallloadedintomemorysothatwhenyourunaqueryanalyzeroveranindexitcanquicklyscorethesearchresults.

Nonormsmeansthatindex-timefieldanddocumentboostingandfieldlengthnormalizationaredisabled.ThebenefitislessmemoryusageasnormstakeuponebyteofRAMperindexedfieldforeverydocumentintheindex,duringsearching.

Mostoftheclassarethethesameaseasyview.SomechangesinLuceneServiceImpl.java

packagecom.sillycat.easyhunter.plugin.lucene;

importjava.io.File;

importjava.io.IOException;

importjava.util.ArrayList;

importjava.util.Iterator;

importjava.util.List;

importorg.apache.commons.logging.Log;

importorg.apache.commons.logging.LogFactory;

importorg.apache.lucene.analysis.Analyzer;

importorg.apache.lucene.analysis.cjk.CJKAnalyzer;

importorg.apache.lucene.document.Document;

importorg.apache.lucene.index.CorruptIndexException;

importorg.apache.lucene.index.IndexReader;

importorg.apache.lucene.index.IndexWriter;

importorg.apache.lucene.index.IndexWriterConfig;

importorg.apache.lucene.index.IndexWriterConfig.OpenMode;

importorg.apache.lucene.index.Term;

importorg.apache.lucene.queryParser.MultiFieldQueryParser;

importorg.apache.lucene.queryParser.ParseException;

importorg.apache.lucene.queryParser.QueryParser;

importorg.apache.lucene.search.IndexSearcher;

importorg.apache.lucene.search.Query;

importorg.apache.lucene.search.ScoreDoc;

importorg.apache.lucene.search.TopDocs;

importorg.apache.lucene.store.Directory;

importorg.apache.lucene.store.FSDirectory;

importorg.apache.lucene.util.Version;

importcom.sillycat.easyhunter.common.StringUtil;

publicclassLuceneServiceImplimplementsLuceneService{

protectedfinalLoglog=LogFactory.getLog(getClass());

privateAnalyzeranalyzer=newCJKAnalyzer(Version.LUCENE_36);

//defaultindexfilepath

privatestaticfinalStringINDEX_PATH="D:\\lucene\\index";

privateStringindexPath;

publicList<Document>search(String[]keys,Stringsearch,booleanisMore){

IndexSearchersearcher=null;

IndexReaderreader=null;

ScoreDoc[]hits=null;

Directorydir=null;

List<Document>documents=null;

Queryquery=null;

try{

dir=FSDirectory.open(newFile(this.getIndexPath()));

reader=IndexReader.open(dir);

searcher=newIndexSearcher(reader);

MultiFieldQueryParserqueryParser=newMultiFieldQueryParser(

Version.LUCENE_36,keys,analyzer);

queryParser.setDefaultOperator(QueryParser.Operator.OR);

query=queryParser.parse(search);

}catch(IOExceptione){

e.printStackTrace();

}catch(ParseExceptione){

e.printStackTrace();

}

TopDocsresults=null;

intnumTotalHits=0;

//5pagesfirst

try{

results=searcher.search(query,5*10);

hits=results.scoreDocs;

numTotalHits=results.totalHits;

if(isMore&&numTotalHits>0){

//totalpages

hits=searcher.search(query,numTotalHits).scoreDocs;

}

}catch(IOExceptione){

e.printStackTrace();

}

if(hits!=null&&hits.length>0){

documents=newArrayList<Document>(hits.length);

}

for(inti=0;i<hits.length;i++){

try{

Documentdoc=searcher.doc(hits[i].doc);

documents.add(doc);

}catch(CorruptIndexExceptione){

e.printStackTrace();

}catch(IOExceptione){

e.printStackTrace();

}

}

try{

searcher.close();

reader.close();

}catch(IOExceptione){

e.printStackTrace();

}

returndocuments;

}

/**

*搜索

*

*@paramkey

*要搜索的KEY,比如找context字段context

*@paramsearch

*要搜索的内容,比如找context中出现了我爱你

*@parammemory

*true内存的索引,false配置的路径的索引

*/

publicList<Document>search(Stringkey,Stringsearch,booleanisMore){

IndexSearchersearcher=null;

IndexReaderreader=null;

ScoreDoc[]hits=null;

Directorydir=null;

List<Document>documents=null;

Queryquery=null;

try{

dir=FSDirectory.open(newFile(this.getIndexPath()));

reader=IndexReader.open(dir);

searcher=newIndexSearcher(reader);

QueryParserparser=newQueryParser(Version.LUCENE_36,key,

analyzer);

query=parser.parse(search);

}catch(IOExceptione){

e.printStackTrace();

}catch(ParseExceptione){

e.printStackTrace();

}

TopDocsresults=null;

intnumTotalHits=0;

//5pagesfirst

try{

results=searcher.search(query,5*10);

hits=results.scoreDocs;

numTotalHits=results.totalHits;

if(isMore&&numTotalHits>0){

//totalpages

hits=searcher.search(query,numTotalHits).scoreDocs;

}

}catch(IOExceptione){

e.printStackTrace();

}

if(hits!=null&&hits.length>0){

documents=newArrayList<Document>(hits.length);

}

for(inti=0;i<hits.length;i++){

try{

Documentdoc=searcher.doc(hits[i].doc);

documents.add(doc);

}catch(CorruptIndexExceptione){

e.printStackTrace();

}catch(IOExceptione){

e.printStackTrace();

}

}

try{

searcher.close();

reader.close();

}catch(IOExceptione){

e.printStackTrace();

}

returndocuments;

}

/**

*建立索引

*

*@paramlist

*要建立索引的list

*@parammemory

*true内存中建立索引,false配置的路径上存放索引

*/

publicvoidbuildIndex(List<LuceneObject>list,booleanisCreat){

Directorydir=null;

IndexWriterwriter=null;

try{

dir=FSDirectory.open(newFile(this.getIndexPath()));

IndexWriterConfigiwc=newIndexWriterConfig(Version.LUCENE_36,

analyzer);

if(isCreat){

//Createanewindexinthedirectory,removingany

//previouslyindexeddocuments:

iwc.setOpenMode(OpenMode.CREATE);

}else{

//Addnewdocumentstoanexistingindex:

iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);

}

writer=newIndexWriter(dir,iwc);

}catch(IOExceptione){

e.printStackTrace();

}

Iterator<LuceneObject>iterator=list.iterator();

Documentdoc=null;

LuceneObjectbo=null;

try{

while(iterator.hasNext()){

bo=(LuceneObject)iterator.next();

doc=bo.buildindex();

if(writer.getConfig().getOpenMode()==OpenMode.CREATE){

writer.addDocument(doc);

}else{

Termterm=newTerm("id",doc.get("id"));

writer.updateDocument(term,doc);

}

}

}catch(CorruptIndexExceptione){

e.printStackTrace();

}catch(IOExceptione){

e.printStackTrace();

}finally{

try{

writer.close();

}catch(CorruptIndexExceptione){

e.printStackTrace();

}catch(IOExceptione){

e.printStackTrace();

}

}

}

publicStringgetIndexPath(){

if(StringUtil.isBlank(indexPath)){

indexPath=INDEX_PATH;

}

returnindexPath;

}

publicvoidsetIndexPath(StringindexPath){

this.indexPath=indexPath;

}

}

Thetestcaseisasfollow:

packagecom.sillycat.easyhunter.model;

importstaticorg.junit.Assert.assertEquals;

importjava.util.ArrayList;

importjava.util.Date;

importjava.util.List;

importjunit.framework.Assert;

importorg.apache.lucene.document.Document;

importorg.junit.Test;

importorg.junit.runner.RunWith;

importorg.springframework.beans.factory.annotation.Autowired;

importorg.springframework.beans.factory.annotation.Qualifier;

importorg.springframework.test.context.ContextConfiguration;

importorg.springframework.test.context.junit4.SpringJUnit4ClassRunner;

importcom.sillycat.easyhunter.plugin.lucene.LuceneObject;

importcom.sillycat.easyhunter.plugin.lucene.LuceneService;

@RunWith(SpringJUnit4ClassRunner.class)

@ContextConfiguration(locations={"file:src/test/resources/test-context.xml"})

publicclassArticleLuceneServiceTest{

@Autowired

@Qualifier("articleLuceneService")

privateLuceneServicearticleLuceneService;

@Test

publicvoiddumy(){

Assert.assertTrue(true);

}

@Test

publicvoidsearch()throwsException{

List<LuceneObject>list=newArrayList<LuceneObject>();

Articlea1=newArticle();

a1.setAuthor("罗华");

a1.setContent("罗华用中文写的一篇文章,发布在网站上。");

a1.setGmtCreate(newDate());

a1.setId("1");

a1.setTitle("中文的技术BLOG");

Articlea2=newArticle();

a2.setAuthor("康怡怡");

a2.setContent("罗华用中文语言写的一篇文章,发布在网页上。");

a2.setGmtCreate(newDate());

a2.setId("2");

a2.setTitle("英文的BLOG");

a2.setWebsiteURL("http://hi.baidu.com/luohuazju");

list.add(a1);

list.add(a2);

articleLuceneService.buildIndex(list,true);

List<Document>results=articleLuceneService.search("content","网页",

true);

Assert.assertNotNull(results);

assertEquals(1,results.size());

Documentdoc=results.get(0);

assertEquals("康怡怡",doc.get("author"));

assertEquals("2",doc.get("id"));

assertEquals("英文的BLOG",doc.get("title"));

results=articleLuceneService.search("content","中文",true);

Assert.assertNotNull(results);

assertEquals(2,results.size());

results=articleLuceneService.search(newString[]{"title","content","author"},"技术",true);

Assert.assertNotNull(results);

assertEquals(1,results.size());

results=articleLuceneService.search(newString[]{"title","content","author"},"康怡怡",true);

Assert.assertNotNull(results);

assertEquals(1,results.size());

}

}

references:

http://www.slideshare.net/wangscu/jessica-2

http://lucene.apache.org/

http://lucene.apache.org/core/

http://lucene.apache.org/core/3_6_0/index.html

http://lucene.apache.org/core/3_6_0/gettingstarted.html

相关推荐