lucene(1)Reading related books
lucene(1)Readingrelatedbooks
1.Introducetolucene
1.1.FieldType
keywordnotanlynize,butindexedandstoredinindexfiles.Forexample:URL,filedirectory,date,username,usernumber,mobilephonenumber.
UnIndexednotindexed,notanlynized,itisstoredinindexfiles.Youonlyneedthisfieldwhenyoushowyourcontenttogether.Forexample,URL,primarykeyofthedatabase.
UnStoredindexed,anlynized,butnotstoredinindexfiles.Forexample,thecontentofthehtmlpage,textdocument.
Textanlynized,indexed,ifthefieldisstring,itwillbestored,ifitisareader,itwillnotbestored.
1.2Update
Itseemsthereisnoupdatemethodforindexfiles,onlycandeletingandadding.
1.3.DocumentFieldboost
boostondocument
doc.add(Field.Keywork(“senderEmail”,senderEmail));
doc.add(Field.Text(“senderName”,senderName));
doc.add(Field.Text(“subject”,subject));
doc.add(Field.UnStored(“body”,body));
if(getSenderDomain().endsWithIgnoreCase(COMPANY_DOMAIN)){
doc.setBoost(1.5);
}elseif(getSenderDomain().endsWithIgnoreCase(BAD_DOMAIN)){
doc.setBoost(0.1);
}
writer.addDocument(doc);
boostonfield
FieldsenderNameField=Field.Text(“senderName”,senderName);
FieldsubjectField=Field.Text(“subject”,subject);
subjectField.setBoost(1.2);
2.LearnfrommyoldprojecteasyviewandeasySearch
2.1Buildeasyview
cloneandbuildmemcachedpackageanduploadittomylocalnexusrepository
>gitclonehttps://github.com/gwhalin/Memcached-Java-Client.git
>ant
Sometimes,mylocalnexuswillhavethiskindofmessage:
:8081/nexus/content/groups/publicwascachedinthelocalrepository,resolution
willnotbereattempteduntiltheupdateintervalofrepohaselapsedor
updatesareforced->[Help1]
deletethe.cachedirectoryinrepository,deletethepaodingdirectoryinrepository,makemavendownloadthepomagain.
2.2BuildeasySearch
3.Studylatestluceneanditegratewitheasyviewandeasysearch
ANALYZEDVSANALYZED_NO_NORMS
Normsarecreatedforquickscoringofdocumentsatquerytime.Thesenormsareusuallyallloadedintomemorysothatwhenyourunaqueryanalyzeroveranindexitcanquicklyscorethesearchresults.
Nonormsmeansthatindex-timefieldanddocumentboostingandfieldlengthnormalizationaredisabled.ThebenefitislessmemoryusageasnormstakeuponebyteofRAMperindexedfieldforeverydocumentintheindex,duringsearching.
Mostoftheclassarethethesameaseasyview.SomechangesinLuceneServiceImpl.java
packagecom.sillycat.easyhunter.plugin.lucene;
importjava.io.File;
importjava.io.IOException;
importjava.util.ArrayList;
importjava.util.Iterator;
importjava.util.List;
importorg.apache.commons.logging.Log;
importorg.apache.commons.logging.LogFactory;
importorg.apache.lucene.analysis.Analyzer;
importorg.apache.lucene.analysis.cjk.CJKAnalyzer;
importorg.apache.lucene.document.Document;
importorg.apache.lucene.index.CorruptIndexException;
importorg.apache.lucene.index.IndexReader;
importorg.apache.lucene.index.IndexWriter;
importorg.apache.lucene.index.IndexWriterConfig;
importorg.apache.lucene.index.IndexWriterConfig.OpenMode;
importorg.apache.lucene.index.Term;
importorg.apache.lucene.queryParser.MultiFieldQueryParser;
importorg.apache.lucene.queryParser.ParseException;
importorg.apache.lucene.queryParser.QueryParser;
importorg.apache.lucene.search.IndexSearcher;
importorg.apache.lucene.search.Query;
importorg.apache.lucene.search.ScoreDoc;
importorg.apache.lucene.search.TopDocs;
importorg.apache.lucene.store.Directory;
importorg.apache.lucene.store.FSDirectory;
importorg.apache.lucene.util.Version;
importcom.sillycat.easyhunter.common.StringUtil;
publicclassLuceneServiceImplimplementsLuceneService{
protectedfinalLoglog=LogFactory.getLog(getClass());
privateAnalyzeranalyzer=newCJKAnalyzer(Version.LUCENE_36);
//defaultindexfilepath
privatestaticfinalStringINDEX_PATH="D:\\lucene\\index";
privateStringindexPath;
publicList<Document>search(String[]keys,Stringsearch,booleanisMore){
IndexSearchersearcher=null;
IndexReaderreader=null;
ScoreDoc[]hits=null;
Directorydir=null;
List<Document>documents=null;
Queryquery=null;
try{
dir=FSDirectory.open(newFile(this.getIndexPath()));
reader=IndexReader.open(dir);
searcher=newIndexSearcher(reader);
MultiFieldQueryParserqueryParser=newMultiFieldQueryParser(
Version.LUCENE_36,keys,analyzer);
queryParser.setDefaultOperator(QueryParser.Operator.OR);
query=queryParser.parse(search);
}catch(IOExceptione){
e.printStackTrace();
}catch(ParseExceptione){
e.printStackTrace();
}
TopDocsresults=null;
intnumTotalHits=0;
//5pagesfirst
try{
results=searcher.search(query,5*10);
hits=results.scoreDocs;
numTotalHits=results.totalHits;
if(isMore&&numTotalHits>0){
//totalpages
hits=searcher.search(query,numTotalHits).scoreDocs;
}
}catch(IOExceptione){
e.printStackTrace();
}
if(hits!=null&&hits.length>0){
documents=newArrayList<Document>(hits.length);
}
for(inti=0;i<hits.length;i++){
try{
Documentdoc=searcher.doc(hits[i].doc);
documents.add(doc);
}catch(CorruptIndexExceptione){
e.printStackTrace();
}catch(IOExceptione){
e.printStackTrace();
}
}
try{
searcher.close();
reader.close();
}catch(IOExceptione){
e.printStackTrace();
}
returndocuments;
}
/**
*搜索
*
*@paramkey
*要搜索的KEY,比如找context字段context
*@paramsearch
*要搜索的内容,比如找context中出现了我爱你
*@parammemory
*true内存的索引,false配置的路径的索引
*/
publicList<Document>search(Stringkey,Stringsearch,booleanisMore){
IndexSearchersearcher=null;
IndexReaderreader=null;
ScoreDoc[]hits=null;
Directorydir=null;
List<Document>documents=null;
Queryquery=null;
try{
dir=FSDirectory.open(newFile(this.getIndexPath()));
reader=IndexReader.open(dir);
searcher=newIndexSearcher(reader);
QueryParserparser=newQueryParser(Version.LUCENE_36,key,
analyzer);
query=parser.parse(search);
}catch(IOExceptione){
e.printStackTrace();
}catch(ParseExceptione){
e.printStackTrace();
}
TopDocsresults=null;
intnumTotalHits=0;
//5pagesfirst
try{
results=searcher.search(query,5*10);
hits=results.scoreDocs;
numTotalHits=results.totalHits;
if(isMore&&numTotalHits>0){
//totalpages
hits=searcher.search(query,numTotalHits).scoreDocs;
}
}catch(IOExceptione){
e.printStackTrace();
}
if(hits!=null&&hits.length>0){
documents=newArrayList<Document>(hits.length);
}
for(inti=0;i<hits.length;i++){
try{
Documentdoc=searcher.doc(hits[i].doc);
documents.add(doc);
}catch(CorruptIndexExceptione){
e.printStackTrace();
}catch(IOExceptione){
e.printStackTrace();
}
}
try{
searcher.close();
reader.close();
}catch(IOExceptione){
e.printStackTrace();
}
returndocuments;
}
/**
*建立索引
*
*@paramlist
*要建立索引的list
*@parammemory
*true内存中建立索引,false配置的路径上存放索引
*/
publicvoidbuildIndex(List<LuceneObject>list,booleanisCreat){
Directorydir=null;
IndexWriterwriter=null;
try{
dir=FSDirectory.open(newFile(this.getIndexPath()));
IndexWriterConfigiwc=newIndexWriterConfig(Version.LUCENE_36,
analyzer);
if(isCreat){
//Createanewindexinthedirectory,removingany
//previouslyindexeddocuments:
iwc.setOpenMode(OpenMode.CREATE);
}else{
//Addnewdocumentstoanexistingindex:
iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
}
writer=newIndexWriter(dir,iwc);
}catch(IOExceptione){
e.printStackTrace();
}
Iterator<LuceneObject>iterator=list.iterator();
Documentdoc=null;
LuceneObjectbo=null;
try{
while(iterator.hasNext()){
bo=(LuceneObject)iterator.next();
doc=bo.buildindex();
if(writer.getConfig().getOpenMode()==OpenMode.CREATE){
writer.addDocument(doc);
}else{
Termterm=newTerm("id",doc.get("id"));
writer.updateDocument(term,doc);
}
}
}catch(CorruptIndexExceptione){
e.printStackTrace();
}catch(IOExceptione){
e.printStackTrace();
}finally{
try{
writer.close();
}catch(CorruptIndexExceptione){
e.printStackTrace();
}catch(IOExceptione){
e.printStackTrace();
}
}
}
publicStringgetIndexPath(){
if(StringUtil.isBlank(indexPath)){
indexPath=INDEX_PATH;
}
returnindexPath;
}
publicvoidsetIndexPath(StringindexPath){
this.indexPath=indexPath;
}
}
Thetestcaseisasfollow:
packagecom.sillycat.easyhunter.model;
importstaticorg.junit.Assert.assertEquals;
importjava.util.ArrayList;
importjava.util.Date;
importjava.util.List;
importjunit.framework.Assert;
importorg.apache.lucene.document.Document;
importorg.junit.Test;
importorg.junit.runner.RunWith;
importorg.springframework.beans.factory.annotation.Autowired;
importorg.springframework.beans.factory.annotation.Qualifier;
importorg.springframework.test.context.ContextConfiguration;
importorg.springframework.test.context.junit4.SpringJUnit4ClassRunner;
importcom.sillycat.easyhunter.plugin.lucene.LuceneObject;
importcom.sillycat.easyhunter.plugin.lucene.LuceneService;
@RunWith(SpringJUnit4ClassRunner.class)
@ContextConfiguration(locations={"file:src/test/resources/test-context.xml"})
publicclassArticleLuceneServiceTest{
@Autowired
@Qualifier("articleLuceneService")
privateLuceneServicearticleLuceneService;
@Test
publicvoiddumy(){
Assert.assertTrue(true);
}
@Test
publicvoidsearch()throwsException{
List<LuceneObject>list=newArrayList<LuceneObject>();
Articlea1=newArticle();
a1.setAuthor("罗华");
a1.setContent("罗华用中文写的一篇文章,发布在网站上。");
a1.setGmtCreate(newDate());
a1.setId("1");
a1.setTitle("中文的技术BLOG");
Articlea2=newArticle();
a2.setAuthor("康怡怡");
a2.setContent("罗华用中文语言写的一篇文章,发布在网页上。");
a2.setGmtCreate(newDate());
a2.setId("2");
a2.setTitle("英文的BLOG");
a2.setWebsiteURL("http://hi.baidu.com/luohuazju");
list.add(a1);
list.add(a2);
articleLuceneService.buildIndex(list,true);
List<Document>results=articleLuceneService.search("content","网页",
true);
Assert.assertNotNull(results);
assertEquals(1,results.size());
Documentdoc=results.get(0);
assertEquals("康怡怡",doc.get("author"));
assertEquals("2",doc.get("id"));
assertEquals("英文的BLOG",doc.get("title"));
results=articleLuceneService.search("content","中文",true);
Assert.assertNotNull(results);
assertEquals(2,results.size());
results=articleLuceneService.search(newString[]{"title","content","author"},"技术",true);
Assert.assertNotNull(results);
assertEquals(1,results.size());
results=articleLuceneService.search(newString[]{"title","content","author"},"康怡怡",true);
Assert.assertNotNull(results);
assertEquals(1,results.size());
}
}
references:
http://www.slideshare.net/wangscu/jessica-2
http://lucene.apache.org/
http://lucene.apache.org/core/
http://lucene.apache.org/core/3_6_0/index.html
http://lucene.apache.org/core/3_6_0/gettingstarted.html