lucene得到聚类的数量
1.先定义一个baseCollertor
public abstract class BaseCollector extends TopDocsCollector<BaseScoreDoc> { BaseScoreDoc pqTop; int docBase = 0; Scorer scorer; private Comparable cpb = Comparable.DFAULT_COMPARABLE; protected BaseCollector(int numHits,Comparable cpb) { super(new HitQueue(numHits, true,cpb)); if(cpb != null){ this.cpb = cpb; } pqTop = pq.top(); } protected BaseCollector(int numHits) { super(new HitQueue(numHits, true)); pqTop = pq.top(); } /** * 关键代码,别乱改 */ public void collect(int doc) throws IOException { // This collector cannot handle these scores: float score = scorer.score() ; assert score != Float.NEGATIVE_INFINITY; assert !Float.isNaN(score); BaseScoreDoc csb = new BaseScoreDoc(doc,score); csb.doc = doc; csb.score = score; process(csb); if(csb.f < 0){ return ; } totalHits++; if(cpb.lessThan(csb, pqTop)){ return; } pqTop.f = csb.f; pqTop.sortValue = csb.sortValue; pqTop.doc = doc + docBase; pqTop.score = score; pqTop = pq.updateTop(); } public abstract void process(BaseScoreDoc csb); @Override protected TopDocs newTopDocs(ScoreDoc[] results, int start) { if (results == null) { return EMPTY_TOPDOCS; } float maxScore = Float.NaN; if (start == 0) { maxScore = results[0].score; } else { for (int i = pq.size(); i > 1; i--) { pq.pop(); } maxScore = pq.pop().score; } return new TopDocs(totalHits, results, maxScore); } @Override public void setNextReader(IndexReader reader, int base) { docBase = base; } @Override public void setScorer(Scorer scorer) throws IOException { this.scorer = scorer; } @Override public boolean acceptsDocsOutOfOrder() { return false; }
2.写自己的collertor,有两个分类,一个是单位名称分类,一个是地区分类
public class AnimalManagementCollector extends BaseCollector { private Map<String, Integer> unitMap = new HashMap<String, Integer>();//单位名称 private Map<String, Integer> zoneMap = new HashMap<String, Integer>();//地区 public AnimalManagementCollector(int numHits) { super(numHits, Comparable.DESC_COMPARABLE); } @Override public void process(BaseScoreDoc csb) { int doc = csb.doc; String unit_cache = InstrumentFields.UNIT_CACHE[doc]; String zone_cache = InstrumentFields.ZONE_CACHE[doc]; if (!(StringUtil.isEmpty(unit_cache))) { if (unitMap.containsKey(unit_cache)) { unitMap.put(unit_cache, unitMap.get(unit_cache) + 1); } else { unitMap.put(unit_cache, 1); } } if (!(StringUtil.isEmpty(zone_cache))) { if (zoneMap.containsKey(zone_cache)) { zoneMap.put(zone_cache, zoneMap.get(zone_cache) + 1); } else { zoneMap.put(zone_cache, 1); } } } public Map<String, Integer> getUnitMap() { return unitMap; } public void setUnitMap(Map<String, Integer> unitMap) { this.unitMap = unitMap; } public Map<String, Integer> getZoneMap() { return zoneMap; } public void setZoneMap(Map<String, Integer> zoneMap) { this.zoneMap = zoneMap; }
3.定制field
public class AnimalManagementFields { public static String[] UNIT_CACHE; //单位名称 public static String[] ZONE_CACHE; //地区 public synchronized void init(IndexReader ir) { readCache(ir); } public static void readCache(IndexReader ir) { int maxDoc = ir.maxDoc(); final String[] tempUnit = new String[maxDoc + 1]; final String[] tempZone = new String[maxDoc + 1]; FieldExtractor.extract(ir, "unit1", new FieldExtractor.FieldWalker() { @Override public void stroll(int doc, String value) { try { tempUnit[doc] = value; } catch (Exception e) { } } }); FieldExtractor.extract(ir, "zone", new FieldExtractor.FieldWalker() { @Override public void stroll(int doc, String value) { try { tempZone[doc] = value; } catch (Exception e) { } } }); UNIT_CACHE = tempUnit; ZONE_CACHE = tempZone; } }
4.在web.xml设置初始化
<servlet> <servlet-name>Init</servlet-name> <servlet-class>com.dayainfo.action.InitServlet</servlet-class> <load-on-startup>1</load-on-startup> </servlet>
5.在InitServlet中初始化
public class InitServlet extends HttpServlet { private static final long serialVersionUID = 1L; private Logger logger = Logger.getLogger(InitServlet.class); public void init(ServletConfig config) throws ServletException { try { long beginTime1 = System.currentTimeMillis(); AnimalManagementFields animalManagementFields = new AnimalManagementFields(); animalManagementFields.init(SQLCreatReader.getReader(SystemConstant.ANIMAL_MANAGEMENT_LICENCE_INDEX_KEY)); long endTime1 = System.currentTimeMillis(); logger.info("初始化_动物管理许可证_聚类信息耗时:" + StringUtil.millsecondChange(endTime1 - beginTime1, 1) + "秒"); } catch (IOException e) { e.printStackTrace(); } }
6.在搜索中使用
public class AnimalManagementSearchService { private AnimalManagementReturnParam animalManagementReturnParam = new AnimalManagementReturnParam(); private int totalData; private ScoreDoc[] scoreDocs; public void handleInstrumentSearch(AnimalManagementReceiveParam animalManagementReceiveParam) throws IOException { long beginTime = System.currentTimeMillis(); int numHits = animalManagementReceiveParam.getPageSize() * (animalManagementReceiveParam.getCurrentPage()); AnimalManagementCollector animalManagementCollector = new AnimalManagementCollector(numHits); List<AnimalManagementLicenceBean> instrumentBeanListWithPage = luceneSearch(animalManagementCollector, animalManagementReceiveParam); animalManagementReturnParam.setAnimalManagementLicenceList(instrumentBeanListWithPage); animalManagementReturnParam.setUnitMap(animalManagementCollector.getUnitMap()); //单位名称 animalManagementReturnParam.setZoneMap(animalManagementCollector.getZoneMap()); //地区 long endtime = System.currentTimeMillis(); animalManagementReturnParam.setTotalTime(StringUtil.millsecondChange(endtime - beginTime, 1)); } //在索引中搜索数据 public List<AnimalManagementLicenceBean> luceneSearch(AnimalManagementCollector animalManagementCollector, AnimalManagementReceiveParam animalManagementReceiveParam) throws IOException { QueryTerm term = new FuzzyQueryTerm(); DXSearcher dxSearcher = new DXSearcher(SQLCreatReader.getReader(SystemConstant.ANIMAL_MANAGEMENT_LICENCE_INDEX_KEY)); if ("1".equals(animalManagementReceiveParam.getFlag())) { //分类检索 term.addTerm("flag", "1", false); } else { if ((!StringUtil.isEmpty(animalManagementReceiveParam.getUnit()))) { term.addTerm("unit1", animalManagementReceiveParam.getUnit(), false); } if ((!StringUtil.isEmpty(animalManagementReceiveParam.getZone()))) { term.addTerm("zone", animalManagementReceiveParam.getZone(), false); } if (!StringUtil.isEmpty(animalManagementReceiveParam.getField())) { if ("1".equals(animalManagementReceiveParam.getField())) { //全部字段 QueryTerm term1 = new FuzzyQueryTerm(); term1.addTerm("lic_number", animalManagementReceiveParam.getSw(), 2); term1.addTerm("unit", animalManagementReceiveParam.getSw(), 2); term.addTerm(term1, 1); } else if ("2".equals(animalManagementReceiveParam.getField())) { //许可证编号 term.addTerm("lic_number", animalManagementReceiveParam.getSw()); } else if ("3".equals(animalManagementReceiveParam.getField())) { //单位名称 term.addTerm("unit", animalManagementReceiveParam.getSw()); } } } dxSearcher.search(term, animalManagementCollector); if (term.getQuery() != null) { System.out.println("搜索字段:" + term.getQuery().toString()); } int begin = animalManagementReceiveParam.getPageSize() * (animalManagementReceiveParam.getCurrentPage() - 1); int end = animalManagementReceiveParam.getPageSize(); scoreDocs = animalManagementCollector.topDocs(begin, end).scoreDocs; totalData = animalManagementCollector.getTotalHits(); animalManagementReturnParam.setTotalData(totalData); List<AnimalManagementLicenceBean> instrumentBeanList = new ArrayList<AnimalManagementLicenceBean>(); FieldHighlighter fieldHighlighter = new FieldHighlighter(animalManagementReceiveParam.getSw()); for (int i = 0; i < scoreDocs.length; i++) { ScoreDoc scoreDoc = scoreDocs[i]; int docID = scoreDoc.doc; Document doc = dxSearcher.doc(docID); AnimalManagementLicenceBean animalManagementLicenceBean = new AnimalManagementLicenceBean(); if (!StringUtil.isEmpty(doc.get("dxid"))) { animalManagementLicenceBean.setDxid(doc.get("dxid")); } if (!StringUtil.isEmpty(doc.get("title"))) { animalManagementLicenceBean.setTitle(fieldHighlighter.getTextFragment(doc.get("title"), false)); } if (!StringUtil.isEmpty(doc.get("type"))) { animalManagementLicenceBean.setType(fieldHighlighter.getTextFragment(doc.get("type"), false)); } if (!StringUtil.isEmpty(doc.get("lic_number"))) { animalManagementLicenceBean.setLic_number(fieldHighlighter.getTextFragment(doc.get("lic_number"), false)); } if (!StringUtil.isEmpty(doc.get("unit"))) { animalManagementLicenceBean.setUnit(fieldHighlighter.getTextFragment(doc.get("unit"), false)); } if (!StringUtil.isEmpty(doc.get("unit1"))) { animalManagementLicenceBean.setUnit1(fieldHighlighter.getTextFragment(doc.get("unit1"), false)); } if (!StringUtil.isEmpty(doc.get("enable_range"))) { animalManagementLicenceBean.setEnable_range(fieldHighlighter.getTextFragment(doc.get("enable_range"), false)); } if (!StringUtil.isEmpty(doc.get("zone"))) { animalManagementLicenceBean.setZone(fieldHighlighter.getTextFragment(doc.get("zone"), false)); } if (!StringUtil.isEmpty(doc.get("url"))) { animalManagementLicenceBean.setUrl(fieldHighlighter.getTextFragment(doc.get("url"), false)); } instrumentBeanList.add(animalManagementLicenceBean); } return instrumentBeanList; } public AnimalManagementReturnParam getAnimalManagementReturnParam() { return animalManagementReturnParam; } public void setAnimalManagementReturnParam(AnimalManagementReturnParam animalManagementReturnParam) { this.animalManagementReturnParam = animalManagementReturnParam; } }
相关推荐
singnojava 2014-04-22
mogigo00 2020-11-11
Fredreck 2020-07-19
horizonheart 2020-07-05
swazerz 2020-06-04
路漫 2020-05-30
只能做防骑 2020-05-13
horizonheart 2020-05-09
wonner 2020-05-09
NVEFLY 2020-04-19
从早忙到晚的闲人 2020-04-13
Yiyang 2020-03-03
sayhaha 2020-02-05
路漫 2020-01-23
kingzone 2020-01-01
lixiaotao 2019-12-29
sxyhetao 2019-12-14
wuxiaosi0 2019-12-06
huimor 2019-12-07