lucene3.1.0 简单分词实例
环境说明lucene版本3.1.0
分词工具英文版的是用标准版的,即StandardAnalyzer
中文分词是用SmartChineseAnalyzer,lucene包中有
使用的junit4.0测试的
import java.io.StringReader; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.util.Version; import org.junit.Test; public class Analyzertest { //Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_31); Analyzer analyzer = new SmartChineseAnalyzer(Version.LUCENE_31); //String text = "我是中国人"; String text = "IndexWriter javadoc a apach2.0.txt"; @Test public void test () { try { analyzer(analyzer,text); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } } public void analyzer(Analyzer al, String text) throws Exception { TokenStream tokeStream = al.tokenStream("content", new StringReader(text)); //TermAttribute 已过时,文档中推荐使用CharTermAttribute tokeStream.addAttribute(CharTermAttribute.class); while(tokeStream.incrementToken()) { CharTermAttribute ta = tokeStream.getAttribute(CharTermAttribute.class); System.out.println(ta.toString()); //System.out.println(tokeStream.toString()); } } }
相关推荐
编码之路 2020-01-01
qiuzhuoxian 2019-12-31
某某某 2016-08-02
spylyt 2020-09-11
天才幻想家 2020-08-03
AFei00 2020-08-03
sifeimeng 2020-08-01
vtnews 2020-07-29
kikaylee 2020-07-05
zooozx 2020-06-27
xiaocao0 2020-06-25
fkyyly 2020-05-31
谢育政 2020-05-03
zhongweinan 2020-04-24
bensonrachel 2020-04-20
lionelf 2020-04-20