Lucene 2.4的实例
import java.io.BufferedReader;
importjava.io.File;
importjava.io.FileReader;
importjava.io.IOException;
importjava.io.Reader;
importjava.util.ArrayList;
importjava.util.Date;
importjava.util.List;
importjava.util.Map;
import java.util.Set;import net.paoding.analysis.analyzer.PaodingAnalyzer;
import org.apache.lucene.analysis.Analyzer;
importorg.apache.lucene.analysis.SimpleAnalyzer;
importorg.apache.lucene.analysis.standard.StandardAnalyzer;
importorg.apache.lucene.document.Document;
importorg.apache.lucene.document.Field;
importorg.apache.lucene.document.Fieldable;
importorg.apache.lucene.index.CorruptIndexException;
importorg.apache.lucene.index.IndexWriter;
importorg.apache.lucene.index.Term;
importorg.apache.lucene.index.IndexWriter.MaxFieldLength;
importorg.apache.lucene.queryParser.MultiFieldQueryParser;
importorg.apache.lucene.queryParser.QueryParser;
importorg.apache.lucene.search.BooleanClause;
importorg.apache.lucene.search.IndexSearcher;
importorg.apache.lucene.search.Query;
importorg.apache.lucene.search.ScoreDoc;
importorg.apache.lucene.search.Searcher;
importorg.apache.lucene.search.TermQuery;
importorg.apache.lucene.search.TopDocCollector;
importorg.apache.lucene.store.Directory;
importorg.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.store.RAMDirectory;import com.ole.factory.BeanFactory;
public class TestLucene {
//索引目录
publicstaticfinalStringINDEX_DIR=System.getProperty("user.dir")+
"/index_dir";
publicstaticfinalStringLUCENE_DATA=System.getProperty("user.dir")+
"/lucene_data";
privateString[]columnNameArr={"id","ordercode","incompanyname","outcompanyname",
"buydate","saledate","buygoodsnum","salegoodsnum","buyprice","saleprice",
"trucknum","empcode1","empcode2","losskg","goodscode","orderemp",
"orderdate","orderstate","batchno_out","batchno_in","ys","ss","ysye",
"yf","sf","yfye","carry_in","carry_out"};
/**
*获取数据库数据
*/
@SuppressWarnings("unchecked")
publicList<Map<String,Object>>queryOrderIO(){
BeanFactorybeanFac=BeanFactory.getInstance();
IOrderIOServiceorderService=(IOrderIOService)beanFac.
getApplicationnContext().getBean("orderIOService");
return(List<Map<String,Object>>)orderService.queryOrderIO();
}
/**
*创建索引
*/
publicvoidcreateIndex(){
synchronized(INDEX_DIR){
List<Map<String,Object>>resultList=queryOrderIO();
Datestart=newDate();
Analyzeranalyzer=newStandardAnalyzer();
try{
IndexWriterwriter=newIndexWriter(INDEX_DIR,analyzer,
true,MaxFieldLength.UNLIMITED);
/***********************/
for(Map<String,Object>rowItem:resultList){
Documentdoc=newDocument();
//Set<String>columns=rowItem.keySet();
for(StringcolumnItem:columnNameArr){
Fieldfieldvalue=newField(columnItem,
rowItem.get(columnItem)!=null?rowItem.get(columnItem).toString():"",
Field.Store.YES,Field.Index.ANALYZED,Field.TermVector.WITH_POSITIONS_OFFSETS);//Field.TermVector.NO暂不需要分词
doc.add(field_value);
}
writer.addDocument(doc);
}
writer.optimize();
writer.close();
Dateend=newDate();
longindexTime=end.getTime()-start.getTime();
System.out.println("索引完成所需时间:(ms)");
System.out.println(indexTime);
}catch(CorruptIndexExceptione){
e.printStackTrace();
}catch(LockObtainFailedExceptione){
e.printStackTrace();
}catch(IOExceptione){
e.printStackTrace();
}
System.out.println("创建索引完成!");
}
}
/**
*跟据关键字查询
*/
publicvoidsearchIndex(StringqueryString,StringcolumnName){
try{
IndexSearcherisearcher=newIndexSearcher(INDEX_DIR);
//BooleanClause.Occur[]clauses={BooleanClause.Occur.SHOULD};
//TopDocCollectorcollector=newTopDocCollector(10000);
//QueryParserparser=newQueryParser(INDEX_DIR,newStandardAnalyzer());
//Queryquery=parser.parse(queryString);
Queryquery=newQueryParser(columnName,newStandardAnalyzer()).parse(queryString);
//MultiFieldQueryParser.parse(queryString,
//columnName,newStandardAnalyzer());
//isearcher.search(query,collector);
ScoreDoc[]hits=isearcher.search(query,isearcher.maxDoc()).scoreDocs;
System.out.println("hits.length="+hits.length);
for(inti=0;i<hits.length;i++){
Documentdoc=isearcher.doc(hits[i].doc);
for(Stringcolumn:columnNameArr){
System.out.println(column+"="+doc.get(column));
}
System.out.println("=========================");
}
isearcher.close();
}catch(Exceptione){
e.printStackTrace();
}
}
///**
//*庖丁解牛分析器
//*/
//publicsynchronizedAnalyzergetAnalyzer(){
//returnnewPaodingAnalyzer();
// }publicstaticvoidmain(String[]args){
TestLucenetestLucene=newTestLucene();
//testLucene.createIndex();
testLucene.searchIndex("2009-08-01","buydate");
//testLucene.searchIndex("ordercode","S200908040062",
//newStandardAnalyzer());
}
}
/*
*索引域:Field是Document对象的基本组成单位,每个Field存储了实际的所有文本数据,这些文本数据在内部调用了分析器Aanlyzer的索引项结果
*Field内的检索查询最终以索引项为单位的.比索引更小的单位无法检索到
*中文的索引项一中文分词的结果为检索单元,英文的索引项是以单词为检索单元,检索单元为最小的检索单位
*1.publicField(Stringname,byte[]value,Storestore)
*2.publicField(Stringname,byte[]value,intoffset,intlength,Storestore)
*3.publicField(Stringname,Stringvalue,Storestore,Indexindex)
*4.publicField(Stringname,Stringvalue,Storestore,Indexindex,TermVectortermVector)
*5.publicField(Stringname,Readerreader)
*6.publicField(Stringname,Readerreader,TermVectortermVector)
*7.publicField(Stringname,TokenStreamtokenStream)
*8.publicField(Stringname,TokenStreamtokenStream,TermVectortermVector)
*第1,2个函数用于二进制数据索引;3,4用于直接给定字符串的索引,5,6用于文件内容的索引,即Reader数据流(常用)
*name-域名为固定的参数,用于指定添加域的标识,限定检索的范围或提取属性值
*value-
*Store-控制域数据的存储,表示数据本身是否存储(注意:并不是指索引是否存储)
*1.Store.NO只保存索引,不包含数据本身信息,减少空间采用
*2.Store.YES保存索引并保存数据原始信息
*3.Store.COMPRESS存储并压缩原始信息
*Index-控制索引格式的参数,表示数据是否需要索引,即当前域是否需要用于检索
*1.Index.NO不索引,只作为辅助信息
*2.Index.ANALYZER2.4版本替换6
*3.Index.NOT_ANALYZER2.4版本替换7
*4.Index.ANALYZER_NO_NORMS
*5.Index.NOT_ANALYZER_NO_NOTRMS
*6.Index.TOKENIZED使用分词器并建立索引
*7.Index.UN_TOKENIZED不分词建立索引(某些内容的整体作为索引)
*8.Index.NO_NORMS禁用分析器处理
*TermVector-域内信息是否需要分词,在中文处理中分词是索引的基础
*TermVector保存Token.getPositionIncrement()和Token.startOffset()以及Token.endOffset()信息
*1.Field.TermVector.NO:不保存termvectors
*2.Field.TermVector.YES:保存termvectors
*3.Field.TermVector.WITH_POSITIONS:保存termvectors.(保存值和token位置信息)
*4.Field.TermVector.WITH_OFFSETS:保存termvectors.(保存值和Token的offset)
*5.Field.TermVector.WITH_POSITIONS_OFFSETS:保存termvectors.(保存值和token位置信息和Token的offset)
*/