Lucene多字段搜索
最近在学习Lucene的过程中遇到了需要多域搜索并排序的问题,在网上找了找,资料不是很多,现在都列出来,又需要的可以自己认真看看,都是从其他网站粘贴过来的,所以比较乱,感谢原创的作者们!
使用MultiFieldQueryParser类即可。
示例代码:
package com.lucene.search; import java.io.File; import java.io.IOException; 54com.cn import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.queryParser.MultiFieldQueryParser; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.Hits; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; public class Searcher { feedom.net public static void main(String[] args) throws Exception { File indexDir = new File("C:\\target\\index\\book"); if (!indexDir.exists() || !indexDir.isDirectory()) { throw new IOException(); } search(indexDir); } public static void search(File indexDir) throws Exception { Directory fsDir = FSDirectory.getDirectory(indexDir); IndexSearcher searcher = new IndexSearcher(fsDir); String[] queries = { "中文版", "8*" }; String[] fields = { "name", "isbn" }; BooleanClause.Occur[] clauses = { BooleanClause.Occur.SHOULD, BooleanClause.Occur.SHOULD }; Query query = MultiFieldQueryParser.parse(queries, fields, clauses, new StandardAnalyzer()); Hits hits = searcher.search(query); System.out.println("共有" + searcher.maxDoc() + "条索引,命中" + hits.length() + "条"); for (int i = 0; i < hits.length(); i++) { int DocId = hits.id(i); String DocName = hits.doc(i).get("name"); String DocIsbn = hits.doc(i).get("isbn"); String DocPblDt = hits.doc(i).get("pbl_dt"); System.out.println(DocId + ":" + DocName + " ISBN:" + DocIsbn + " PBLDT:" + DocPblDt); } } }
package com.lucene.search; import java.io.File; import java.io.IOException; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.queryParser.MultiFieldQueryParser; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.Hits; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; public class Searcher { public static void main(String[] args) throws Exception { File indexDir = new File("C:\\target\\index\\book"); if (!indexDir.exists() || !indexDir.isDirectory()) { throw new IOException(); } search(indexDir); } public static void search(File indexDir) throws Exception { Directory fsDir = FSDirectory.getDirectory(indexDir); IndexSearcher searcher = new IndexSearcher(fsDir); String[] queries = { "中文版", "8*" }; String[] fields = { "name", "isbn" }; BooleanClause.Occur[] clauses = { BooleanClause.Occur.SHOULD, BooleanClause.Occur.SHOULD }; Query query = MultiFieldQueryParser.parse(queries, fields, clauses, new StandardAnalyzer()); Hits hits = searcher.search(query); System.out.println("共有" + searcher.maxDoc() + "条索引,命中" + hits.length() + "条"); for (int i = 0; i < hits.length(); i++) { int DocId = hits.id(i); String DocName = hits.doc(i).get("name"); String DocIsbn = hits.doc(i).get("isbn"); String DocPblDt = hits.doc(i).get("pbl_dt"); System.out.println(DocId + ":" + DocName + " ISBN:" + DocIsbn + " PBLDT:" + DocPblDt); } } }
注意:BooleanClause.Occur[]数组,它表示多个条件之间的关系:
BooleanClause.Occur.MUST表示and,feedom.net
BooleanClause.Occur.MUST_NOT表示not,54com.cn
BooleanClause.Occur.SHOULD表示or.
---------------------------------------------------------------------------------------------------------
多个关键字直接的关系是或,所以直接使用多域搜索对象查询出来的结果就是这样。
更灵活的控制方式为:
BooleanQuery booleanQuery = new BooleanQuery(); QueryParser parser = new QueryParser("title",分词器); Query titleQuery = parser .parser("中国人民共和国"); booleanQuery.add(titleQuery,....SHOULD); QueryParser parser = new QueryParser("content",分词器); Query contentQuery = parser .parser("中国人民共和国"); booleanQuery.add(contentQuery ,....SHOULD);
--------------------------------------------------------------------------------------------------
package com.lucene.search; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.queryParser.MultiFieldQueryParser; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.Hits; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.MultiSearcher; import org.apache.lucene.search.Query; public class Multisearcher { private static String INDEX_STORE_PATH1 = "C:\\multi\\1"; private static String INDEX_STORE_PATH2 = "C:\\multi\\2"; public static void main(String[] args) throws Exception { Multisearcher.multisearcher(); } public static void multisearcher() throws Exception { IndexWriter writer = new IndexWriter(INDEX_STORE_PATH1, new StandardAnalyzer(), true); writer.setUseCompoundFile(false); Document doc1 = new Document(); Field f1 = new Field("bookname", "钢铁是怎样炼成的", Field.Store.YES, Field.Index.TOKENIZED); Field f11 = new Field("price", "20.5", Field.Store.YES, Field.Index.UN_TOKENIZED); doc1.add(f1); doc1.add(f11); Document doc2 = new Document(); Field f2 = new Field("bookname", "钢铁战士", Field.Store.YES, Field.Index.TOKENIZED); Field f22 = new Field("price", "18.4", Field.Store.YES, Field.Index.UN_TOKENIZED); doc2.add(f2); doc2.add(f22); Document doc3 = new Document(); Field f3 = new Field("bookname", "钢和铁是两种不同的元素", Field.Store.YES, Field.Index.TOKENIZED); Field f33 = new Field("price", "7.6", Field.Store.YES, Field.Index.UN_TOKENIZED); doc3.add(f3); doc3.add(f33); writer.addDocument(doc1); writer.addDocument(doc2); writer.addDocument(doc3); writer.close(); //创建第二个索引器; IndexWriter writer2 = new IndexWriter(INDEX_STORE_PATH2, new StandardAnalyzer(), true); writer2.setUseCompoundFile(false); Document doc4 = new Document(); Field f4 = new Field("bookname", "钢要比铁有更多的元素", Field.Store.YES, Field.Index.TOKENIZED); Field f44 = new Field("price", "22.5", Field.Store.YES, Field.Index.UN_TOKENIZED); doc4.add(f4); doc4.add(f44); Document doc5 = new Document(); Field f5 = new Field("bookname", "钢和铁是两种重要的金属", Field.Store.YES, Field.Index.TOKENIZED); Field f55 = new Field("price", "15.9", Field.Store.YES, Field.Index.UN_TOKENIZED); doc5.add(f5); doc5.add(f55); Document doc6 = new Document(); Field f6 = new Field("bookname", "钢铁是两种重要的金属", Field.Store.YES, Field.Index.TOKENIZED); Field f66 = new Field("price", "19.00", Field.Store.YES, Field.Index.UN_TOKENIZED); doc6.add(f6); doc6.add(f66); writer2.addDocument(doc4); writer2.addDocument(doc5); writer2.addDocument(doc6); writer2.close(); String query1 = "钢"; String query2 = "[10 TO 20]";//注意格式:中括号还有关键字TO是大写的 String[] queries = { query1, query2 }; //指定两个域 Field String field1 = "bookname"; String field2 = "price"; String[] fields = { field1, field2 }; //指定查询字句之间的关系 BooleanClause.Occur[] clauses = { BooleanClause.Occur.MUST, BooleanClause.Occur.MUST }; //转成多域查询 MultiFieldQuery Query q = MultiFieldQueryParser.parse(queries, fields, clauses, new StandardAnalyzer()); //打印Query的内容 System.out.println(q.toString()); //创建两个IndexSearcher,以实现在多个索引目录进行查询 IndexSearcher searcher1 = new IndexSearcher(INDEX_STORE_PATH1); IndexSearcher searcher2 = new IndexSearcher(INDEX_STORE_PATH2); IndexSearcher[] searchers = { searcher1, searcher2 }; //使用MultiSearcher进行多域搜索 MultiSearcher searcher = new MultiSearcher(searchers); Hits hits = searcher.search(q); for (int i = 0; i < hits.length(); i++) { System.out.println(hits.doc(i)); } } }
------------------------------------------------------------------------------------------------------------------------------------------
默认情况下,IndexSearcher类的search方法返回查询结果时,是按文档的分值排序的,可以使用重载的search方法对结果排序
IndexSearcher.search(Query,Sort);
newSort()和Sort.RELEVANCE,以及null一样,采用默认排序,要定义排序字段,方法是将字段传入Sort对象
Sort sort = new Sort(String field);也可以对多个字段排序
Sort sort = new Sort(String[] fields);
例:
Sort sort = new Sort(new SortField[]{new SortField(“title”),new SortField(“name”)}); Hits hits=searcher.search(query,Sort);
多字段查找MultiFieldQueryParser
只在某些Term中查找,不关心在哪个字段
Query query = new MultiFieldQueryParser.parse(“word”,new String[]{“title”,”content”},analyzer);//在title和content中找word
多字段时默认是OR关系,要改变它,使用以下方法:
Query query = MultiFieldQueryParser.parse(“word”,new String[]{“title”,”content”},new int[]{MultiFieldQueryParser.REQUIRED_FIELD,MultiFieldQueryParser.PROHIBITED_FIELD},analyzer);
其中:
REQUIRED_FIELD表示该条件必须有
PROHIBITED_FIELD表示必须不含
搜索多个索引文件MultiSearcher
1)建立多个索引:使用不同的索引目录,实例化不同的IndexWriter
2)建立多索引搜索器:
Searcher[]searchers=newSEARCHER[2];
Searchers[0]=newIndexSearcher(dir1);//搜索索引目录一
Searchers[1]=newIndexSearcher(dir2);//搜索索引目录二
Searchersearcher=newMultiSearcher(serarchers);
3)开始查询:Hitshits=searcher.search(query);
---------------------------------------------------------------------------------------------------------------------------------------
BooleanQuery typeNegativeSearch = new BooleanQuery(); QueryParser parser = new QueryParser("contents", new Analyzer()); parser.setDefaultOperator(QueryParser.AND_OPERATOR); query = parser.parse(queryString); QueryParser parser2 = new QueryParser("adISELL", new Analyzer()); query2 = parser2.parse("\"2\""); QueryParser parser3 = new QueryParser("adISELL", new Analyzer()); query3 = parser3.parse("\"2\""); QueryParser parser4 = new QueryParser("adISELL", new Analyzer()); query4 = parser4.parse("\"2\""); QueryParser parser4 = new QueryParser("adISELL", new Analyzer()); query4 = parser4.parse("\"2\""); 。。。。 QueryParser parser..n = new QueryParser("adISELL", new Analyzer()); query..n = parser..n.parse("\"2\""); typeNegativeSearch.add(query,Occur.MUST); typeNegativeSearch.add(query2,Occur.MUST); typeNegativeSearch.add(query3,Occur.MUST); typeNegativeSearch.add(query4,Occur.MUST); ..... typeNegativeSearch.add(query..n,Occur.MUST); hits = searcher.search(typeNegativeSearch);
1,几种span的querySpanTermQuery:检索效果完全同TermQuery,但内部会记录一些位置信息
,供SpanQuery的其它API使用,是其它属于SpanQuery的Query的基础。
SpanFirstQuery:查找方式为从Field的内容起始位置开始,在一个固定的宽度内查找所指定的
词条。
SpanNearQuery:功能类似PharaseQuery。SpanNearQuery查找所匹配的不一定是短语,还有可
能是另一个SpanQuery的查询结果作为整体考虑,进行嵌套查询。
SpanOrQuery:把所有SpanQuery查询结果综合起来,作为检索结果。
SpanNotQuery:从第一个SpanQuery查询结果中,去掉第二个SpanQuery查询结果,作为检索结
果。
2,多条件索引关系
BooleanClause用于表示布尔查询子句关系的类,包括:BooleanClause.Occur.MUST,
BooleanClause.Occur.MUST_NOT,BooleanClause.Occur.SHOULD。有以下6种组合:
1.MUST和MUST:取得连个查询子句的交集。
2.MUST和MUST_NOT:表示查询结果中不能包含MUST_NOT所对应得查询子句的检索结果。
3.MUST_NOT和MUST_NOT:无意义,检索无结果。
4.SHOULD与MUST、SHOULD与MUST_NOT:SHOULD与MUST连用时,无意义,结果为MUST子句的检索
结果。与MUST_NOT连用时,功能同MUST。
5.SHOULD与SHOULD:表示“或”关系,最终检索结果为所有检索子句的并集。