Lucene 3.0 初步1(创建索引)
初次接触Lucene,一步一步来,要实现全文检索,第一步就得先创建索引,请直接看代码:
package cn.com.alei.lucene; import java.io.BufferedReader; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStreamReader; import java.util.Date; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.Field.Index; import org.apache.lucene.document.Field.Store; import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.store.Directory; import org.apache.lucene.store.LockObtainFailedException; import org.apache.lucene.store.SimpleFSDirectory; import org.apache.lucene.util.Version; /** * @author alei * @version 创建时间:2012-4-14 下午03:19:09 * 类说明 */ public class Indexer { public static void main(String[] args) throws CorruptIndexException, LockObtainFailedException, IOException { //被索引文件夹的位置 File path = new File("D:\\framework\\luceneDir"); /*存放索引文件的位置*/ File fileindex = new File("D:\\framework\\luceneIndex"); //创建Directory对象 Directory directory = new SimpleFSDirectory(fileindex); //Analyzer主要负责对各种输入的数据源数据进行分析,包括过滤,分词等各种功能,StandardAnalyzer是Lucene里自带的Analyzer子类实现 Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_30); /*/创建IndexWriter对象,第一个参数是Directory,第二个参数为分词器,第三个参数表示是否创建, false表示修改,第四个表示分词的最大值,比如new MaxFieldLength(2) 表示两个字一分,一般用IndexWriter.MaxFieldLength.UNLIMITED */ IndexWriter indexwriter = new IndexWriter(directory,analyzer,true,IndexWriter.MaxFieldLength.UNLIMITED); File[] files = path.listFiles(); long startTime = new Date().getTime(); //增加Document到索引中去 for(int i = 0 ; i<files.length ; i++){ if(files[i].isFile()&&files[i].getName().endsWith(".txt")){ System.out.println("File: " + files[i].getCanonicalPath() + " 正在被索引..."); String fileContent = readFile(files[i],"GB2312"); System.out.println(fileContent); Document document = new Document(); Field fieldcontent = new Field("content" , fileContent , Store.NO , Index.ANALYZED); Field filedpath = new Field("path" , files[i].getCanonicalPath() , Field.Store.NO , Field.Index.NOT_ANALYZED); document.add(fieldcontent); document.add(filedpath); indexwriter.addDocument(document); } } //对索引进行优化 indexwriter.optimize(); indexwriter.close(); long endTime = new Date().getTime(); System.out.println("花费了 :" + (endTime - startTime) + "毫秒把文档加到索引中去!!!"); } public static String readFile(File file , String charset) throws IOException{ BufferedReader bufferedReader = new BufferedReader( new InputStreamReader( new FileInputStream(file),charset)); StringBuilder sb = new StringBuilder(); String str ; while((str=bufferedReader.readLine())!= null){ sb.append(str); } bufferedReader.close(); return sb.toString(); } }
相关推荐
Kafka 2020-09-18
Wepe0 2020-10-30
杜倩 2020-10-29
windle 2020-10-29
minerd 2020-10-28
mengzuchao 2020-10-22
Junzizhiai 2020-10-10
bxqybxqy 2020-09-30
风之沙城 2020-09-24
kingszelda 2020-09-22
大唐帝国前营 2020-08-18
yixu0 2020-08-17
TangCuYu 2020-08-15
xiaoboliu00 2020-08-15
songshijiazuaa 2020-08-15
xclxcl 2020-08-03
zmzmmf 2020-08-03
newfarhui 2020-08-03
likesyour 2020-08-01