基于Lucene多索引进行索引和搜索
Lucene支持创建多个索引目录,同时存储多个索引。我们可能担心的问题是,在索引的过程中,分散地存储到多个索引目录中,是否在搜索时能够得到全局的相关度计算得分,其实Lucene的ParallelMultiSearcher和MultiSearcher支持全局得分的计算,也就是说,虽然索引分布在多个索引目录中,在搜索的时候还会将全部的索引数据聚合在一起进行查询匹配和得分计算。
索引目录处理
下面我们通过将索引随机地分布到以a~z的26个目录中,并实现一个索引和搜索的程序,来验证一下Lucene得分的计算。
首先,实现一个用来构建索引目录以及处理搜索的工具类,代码如下所示:
- package org.shirdrn.lucene;
- import java.io.File;
- import java.io.IOException;
- import java.util.ArrayList;
- import java.util.HashMap;
- import java.util.Iterator;
- import java.util.List;
- import java.util.Map;
- import java.util.Random;
- import java.util.concurrent.locks.Lock;
- import java.util.concurrent.locks.ReentrantLock;
- import org.apache.lucene.index.CorruptIndexException;
- import org.apache.lucene.index.IndexWriter;
- import org.apache.lucene.index.IndexWriterConfig;
- import org.apache.lucene.index.IndexWriter.MaxFieldLength;
- import org.apache.lucene.search.DefaultSimilarity;
- import org.apache.lucene.search.IndexSearcher;
- import org.apache.lucene.search.Searchable;
- import org.apache.lucene.search.Similarity;
- import org.apache.lucene.store.FSDirectory;
- import org.apache.lucene.store.LockObtainFailedException;
- import org.shirdrn.lucene.MultipleIndexing.IndexWriterObj;
- /**
- * Indexing accross multiple Lucene indexes.
- *
- * @author shirdrn
- * @date 2011-12-12
- */
- public class IndexHelper {
- private static WriterHelper writerHelper = null;
- private static SearcherHelper searcherHelper = null;
- public static WriterHelper newWriterHelper(String root, IndexWriterConfig indexConfig) {
- return WriterHelper.newInstance(root, indexConfig);
- }
- public static SearcherHelper newSearcherHelper(String root, IndexWriterConfig indexConfig) {
- return SearcherHelper.newInstance(root, indexConfig);
- }
- protected static class WriterHelper {
- private String alphabet = "abcdefghijklmnopqrstuvwxyz";
- private Lock locker = new ReentrantLock();
- private String indexRootDir = null;
- private IndexWriterConfig indexConfig;
- private Map<Character, IndexWriterObj> indexWriters = new HashMap<Character, IndexWriterObj>();
- private static Random random = new Random();
- private WriterHelper() {
- }
- private synchronized static WriterHelper newInstance(String root, IndexWriterConfig indexConfig) {
- if(writerHelper==null) {
- writerHelper = new WriterHelper();
- writerHelper.indexRootDir = root;
- writerHelper.indexConfig = indexConfig;
- }
- return writerHelper;
- }
- public IndexWriterObj selectIndexWriter() {
- int pos = random.nextInt(alphabet.length());
- char ch = alphabet.charAt(pos);
- String dir = new String(new char[] {ch});
- locker.lock();
- try {
- File path = new File(indexRootDir, dir);
- if(!path.exists()) {
- path.mkdir();
- }
- if(!indexWriters.containsKey(ch)) {
- IndexWriter indexWriter = new IndexWriter(FSDirectory.open(path), indexConfig.getAnalyzer(), MaxFieldLength.UNLIMITED);
- indexWriters.put(ch, new IndexWriterObj(indexWriter, dir));
- }
- } catch (CorruptIndexException e) {
- e.printStackTrace();
- } catch (LockObtainFailedException e) {
- e.printStackTrace();
- } catch (IOException e) {
- e.printStackTrace();
- } finally {
- locker.unlock();
- }
- return indexWriters.get(ch);
- }
- @SuppressWarnings("deprecation")
- public void closeAll(boolean autoOptimize) {
- Iterator<Map.Entry<Character, IndexWriterObj>> iter = indexWriters.entrySet().iterator();
- while(iter.hasNext()) {
- Map.Entry<Character, IndexWriterObj> entry = iter.next();
- try {
- if(autoOptimize) {
- entry.getValue().indexWriter.optimize();
- }
- entry.getValue().indexWriter.close();
- } catch (CorruptIndexException e) {
- e.printStackTrace();
- } catch (IOException e) {
- e.printStackTrace();
- }
- }
- }
- }
- protected static class SearcherHelper {
- private List<IndexSearcher> searchers = new ArrayList<IndexSearcher>();
- private Similarity similarity = new DefaultSimilarity();
- private SearcherHelper() {
- }
- private synchronized static SearcherHelper newInstance(String root, IndexWriterConfig indexConfig) {
- if(searcherHelper==null) {
- searcherHelper = new SearcherHelper();
- if(indexConfig.getSimilarity()!=null) {
- searcherHelper.similarity = indexConfig.getSimilarity();
- }
- File indexRoot = new File(root);
- File[] files = indexRoot.listFiles();
- for(File f : files) {
- IndexSearcher searcher = null;
- try {
- searcher = new IndexSearcher(FSDirectory.open(f));
- } catch (CorruptIndexException e) {
- e.printStackTrace();
- } catch (IOException e) {
- e.printStackTrace();
- }
- if(searcher!=null) {
- searcher.setSimilarity(searcherHelper.similarity);
- searcherHelper.searchers.add(searcher);
- }
- }
- }
- return searcherHelper;
- }
- public void closeAll() {
- Iterator<IndexSearcher> iter = searchers.iterator();
- while(iter.hasNext()) {
- try {
- iter.next().close();
- } catch (IOException e) {
- e.printStackTrace();
- }
- }
- }
- public Searchable[] getSearchers() {
- Searchable[] a = new Searchable[searchers.size()];
- return searchers.toArray(a);
- }
- }
- }
在搜索的时候,通过IndexHelper.SearcherHelper工具来获取多个Searchable实例的数组,调用getSearchers()即可以获取到,提供给MultiSearcher构建搜索。
相关推荐
renjinlong 2020-09-03
Jacry 2020-07-04
IceStreamLab 2020-06-26
mengyue 2020-06-09
PasserbyX 2020-05-16
mameng 2020-05-12
心丨悦 2020-05-06
编码之路 2020-05-03
mengyue 2020-05-02
qiuzhuoxian 2020-02-23
编码之路 2020-02-20
lionelf 2020-02-03
TyCoding 2020-02-01
heniancheng 2020-01-31
某某某 2020-01-30
PinkBean 2020-01-29
某某某 2020-01-12
编码之路 2020-01-01
itmale 2020-01-01