Lucene 2.4的实例

import java.io.BufferedReader;

importjava.io.File;

importjava.io.FileReader;

importjava.io.IOException;

importjava.io.Reader;

importjava.util.ArrayList;

importjava.util.Date;

importjava.util.List;

importjava.util.Map;

import java.util.Set;

import net.paoding.analysis.analyzer.PaodingAnalyzer;

import org.apache.lucene.analysis.Analyzer;

importorg.apache.lucene.analysis.SimpleAnalyzer;

importorg.apache.lucene.analysis.standard.StandardAnalyzer;

importorg.apache.lucene.document.Document;

importorg.apache.lucene.document.Field;

importorg.apache.lucene.document.Fieldable;

importorg.apache.lucene.index.CorruptIndexException;

importorg.apache.lucene.index.IndexWriter;

importorg.apache.lucene.index.Term;

importorg.apache.lucene.index.IndexWriter.MaxFieldLength;

importorg.apache.lucene.queryParser.MultiFieldQueryParser;

importorg.apache.lucene.queryParser.QueryParser;

importorg.apache.lucene.search.BooleanClause;

importorg.apache.lucene.search.IndexSearcher;

importorg.apache.lucene.search.Query;

importorg.apache.lucene.search.ScoreDoc;

importorg.apache.lucene.search.Searcher;

importorg.apache.lucene.search.TermQuery;

importorg.apache.lucene.search.TopDocCollector;

importorg.apache.lucene.store.Directory;

importorg.apache.lucene.store.LockObtainFailedException;

import org.apache.lucene.store.RAMDirectory;

import com.ole.factory.BeanFactory;

public class TestLucene {

//索引目录

publicstaticfinalStringINDEX_DIR=System.getProperty("user.dir")+

"/index_dir";

publicstaticfinalStringLUCENE_DATA=System.getProperty("user.dir")+

"/lucene_data";

privateString[]columnNameArr={"id","ordercode","incompanyname","outcompanyname",

"buydate","saledate","buygoodsnum","salegoodsnum","buyprice","saleprice",

"trucknum","empcode1","empcode2","losskg","goodscode","orderemp",

"orderdate","orderstate","batchno_out","batchno_in","ys","ss","ysye",

"yf","sf","yfye","carry_in","carry_out"};

/**

*获取数据库数据

*/

@SuppressWarnings("unchecked")

publicList<Map<String,Object>>queryOrderIO(){

BeanFactorybeanFac=BeanFactory.getInstance();

IOrderIOServiceorderService=(IOrderIOService)beanFac.

getApplicationnContext().getBean("orderIOService");

return(List<Map<String,Object>>)orderService.queryOrderIO();

}

/**

*创建索引

*/

publicvoidcreateIndex(){

synchronized(INDEX_DIR){

List<Map<String,Object>>resultList=queryOrderIO();

Datestart=newDate();

Analyzeranalyzer=newStandardAnalyzer();

try{

IndexWriterwriter=newIndexWriter(INDEX_DIR,analyzer,

true,MaxFieldLength.UNLIMITED);

/***********************/

for(Map<String,Object>rowItem:resultList){

Documentdoc=newDocument();

//Set<String>columns=rowItem.keySet();

for(StringcolumnItem:columnNameArr){

Fieldfieldvalue=newField(columnItem,

rowItem.get(columnItem)!=null?rowItem.get(columnItem).toString():"",

Field.Store.YES,Field.Index.ANALYZED,Field.TermVector.WITH_POSITIONS_OFFSETS);//Field.TermVector.NO暂不需要分词

doc.add(field_value);

}

writer.addDocument(doc);

}

writer.optimize();

writer.close();

Dateend=newDate();

longindexTime=end.getTime()-start.getTime();

System.out.println("索引完成所需时间:(ms)");

System.out.println(indexTime);

}catch(CorruptIndexExceptione){

e.printStackTrace();

}catch(LockObtainFailedExceptione){

e.printStackTrace();

}catch(IOExceptione){

e.printStackTrace();

}

System.out.println("创建索引完成!");

}

}

/**

*跟据关键字查询

*/

publicvoidsearchIndex(StringqueryString,StringcolumnName){

try{

IndexSearcherisearcher=newIndexSearcher(INDEX_DIR);

//BooleanClause.Occur[]clauses={BooleanClause.Occur.SHOULD};

//TopDocCollectorcollector=newTopDocCollector(10000);

//QueryParserparser=newQueryParser(INDEX_DIR,newStandardAnalyzer());

//Queryquery=parser.parse(queryString);

Queryquery=newQueryParser(columnName,newStandardAnalyzer()).parse(queryString);

//MultiFieldQueryParser.parse(queryString,

//columnName,newStandardAnalyzer());

//isearcher.search(query,collector);

ScoreDoc[]hits=isearcher.search(query,isearcher.maxDoc()).scoreDocs;

System.out.println("hits.length="+hits.length);

for(inti=0;i<hits.length;i++){

Documentdoc=isearcher.doc(hits[i].doc);

for(Stringcolumn:columnNameArr){

System.out.println(column+"="+doc.get(column));

}

System.out.println("=========================");

}

isearcher.close();

}catch(Exceptione){

e.printStackTrace();

}

}

///**

//*庖丁解牛分析器

//*/

//publicsynchronizedAnalyzergetAnalyzer(){

//returnnewPaodingAnalyzer();

// }

publicstaticvoidmain(String[]args){

TestLucenetestLucene=newTestLucene();

//testLucene.createIndex();

testLucene.searchIndex("2009-08-01","buydate");

//testLucene.searchIndex("ordercode","S200908040062",

//newStandardAnalyzer());

}

}

/*

*索引域:Field是Document对象的基本组成单位,每个Field存储了实际的所有文本数据,这些文本数据在内部调用了分析器Aanlyzer的索引项结果

*Field内的检索查询最终以索引项为单位的.比索引更小的单位无法检索到

*中文的索引项一中文分词的结果为检索单元,英文的索引项是以单词为检索单元,检索单元为最小的检索单位

*1.publicField(Stringname,byte[]value,Storestore)

*2.publicField(Stringname,byte[]value,intoffset,intlength,Storestore)

*3.publicField(Stringname,Stringvalue,Storestore,Indexindex)

*4.publicField(Stringname,Stringvalue,Storestore,Indexindex,TermVectortermVector)

*5.publicField(Stringname,Readerreader)

*6.publicField(Stringname,Readerreader,TermVectortermVector)

*7.publicField(Stringname,TokenStreamtokenStream)

*8.publicField(Stringname,TokenStreamtokenStream,TermVectortermVector)

*第1,2个函数用于二进制数据索引;3,4用于直接给定字符串的索引,5,6用于文件内容的索引,即Reader数据流(常用)

*name-域名为固定的参数,用于指定添加域的标识,限定检索的范围或提取属性值

*value-

*Store-控制域数据的存储,表示数据本身是否存储(注意:并不是指索引是否存储)

*1.Store.NO只保存索引,不包含数据本身信息,减少空间采用

*2.Store.YES保存索引并保存数据原始信息

*3.Store.COMPRESS存储并压缩原始信息

*Index-控制索引格式的参数,表示数据是否需要索引,即当前域是否需要用于检索

*1.Index.NO不索引,只作为辅助信息

*2.Index.ANALYZER2.4版本替换6

*3.Index.NOT_ANALYZER2.4版本替换7

*4.Index.ANALYZER_NO_NORMS

*5.Index.NOT_ANALYZER_NO_NOTRMS

*6.Index.TOKENIZED使用分词器并建立索引

*7.Index.UN_TOKENIZED不分词建立索引(某些内容的整体作为索引)

*8.Index.NO_NORMS禁用分析器处理

*TermVector-域内信息是否需要分词,在中文处理中分词是索引的基础

*TermVector保存Token.getPositionIncrement()和Token.startOffset()以及Token.endOffset()信息

*1.Field.TermVector.NO:不保存termvectors

*2.Field.TermVector.YES:保存termvectors

*3.Field.TermVector.WITH_POSITIONS:保存termvectors.(保存值和token位置信息)

*4.Field.TermVector.WITH_OFFSETS:保存termvectors.(保存值和Token的offset)

*5.Field.TermVector.WITH_POSITIONS_OFFSETS:保存termvectors.(保存值和token位置信息和Token的offset)

*/