Lucene入门级笔记二 -- 索引库的CRUD API 演示 .

Anthonybuer

2011-10-24

Lucene 对索引库的增删改查操作的 API 演示

没什么说的，apache 的 API 一向简单、不难理解。所以直接拷代码过去稍微看一下就差不多了。

/**
 * "文章" 实体
 */
public class Article {
	private Integer id;
	private String title;
	private String content;
	public Integer getId() {
		return id;
	}
	public void setId(Integer id) {
		this.id = id;
	}
	public String getTitle() {
		return title;
	}
	public void setTitle(String title) {
		this.title = title;
	}
	public String getContent() {
		return content;
	}
	public void setContent(String content) {
		this.content = content;
	}
}
/**
 * 描述某一页的检索结果集
 */
public class QueryResult {
	/* 匹配的总记录数 */
	private int totalCount;
    /* 检索到的文章对象集合 */
	private List<Article> atrticle;
	public QueryResult(int totalCount, List<Article> atrticle) {
		this.totalCount = totalCount;
		this.atrticle = atrticle;
	}
	public int getTotalCount() {
		return totalCount;
	}
	public void setTotalCount(int totalCount) {
		this.totalCount = totalCount;
	}
	public List<Article> getAtrticle() {
		return atrticle;
	}
	public void setAtrticle(List<Article> atrticle) {
		this.atrticle = atrticle;
	}
}
/**
 * 直接操作索引库的 DAO
 */
public class ArticleIndexDao {
	/**
	 * 保存索引
	 * @param article 
	 */
	public void save(Article article) {
		Document document = Article2DocumentUtil.article2document(article);
		IndexWriter indexWriter = null;
		try {
			Directory indexDir = FSDirectory.open(new File("./indexDir/"));
			// 标准分词器，另外 Lucene 还提供了针对多种语言的分词器
			Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_30);
			indexWriter = new IndexWriter(indexDir, analyzer,
					MaxFieldLength.LIMITED);
			indexWriter.addDocument(document);
		} catch (IOException e) {
			throw new RuntimeException(e);
		} finally {
			if (indexWriter != null) {
				try {
					indexWriter.close();
				} catch (IOException e) {
					throw new RuntimeException(e);
				}
			}
		}
	}
	/**
	 * 删除
	 * 
	 * @param article
	 *            删除的 Article 对象
	 */
	public void delete(Article article) {
		IndexWriter indexWriter = null;
		try {
			Directory indexDir = FSDirectory.open(new File("./indexDir/"));
			Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_30);
			indexWriter = new IndexWriter(indexDir, analyzer,
					MaxFieldLength.LIMITED);
			
			/* 意思是将索引库中 id 为 article.getId() 的记录删了 */
			Term term = new Term("id", article.getId() + "");
			indexWriter.deleteDocuments(term);
		} catch (IOException e) {
			throw new RuntimeException(e);
		} finally {
			if (indexWriter != null) {
				try {
					indexWriter.close();
				} catch (IOException e) {
					throw new RuntimeException(e);
				}
			}
		}
	}
	/**
	 * 修改
	 */
	public void update(Article article) {
		IndexWriter indexWriter = null;
		try {
			Directory indexDir = FSDirectory.open(new File("./indexDir/"));
			Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_30);
			indexWriter = new IndexWriter(indexDir, analyzer,
					MaxFieldLength.LIMITED);
			/* 意思是将索引库中 id 为 article.getId() 的记录改了 */
			Term term = new Term("id", article.getId() + "");
			indexWriter.updateDocument(term, Article2DocumentUtil
					.article2document(article));
			/*
			 * updateDocument 等价于： delete(article); save(article);
			 * 在大数据量的时候，采用 "删除再创建" 的效率更高
			 */
		} catch (IOException e) {
			throw new RuntimeException(e);
		} finally {
			if (indexWriter != null) {
				try {
					indexWriter.close();
				} catch (IOException e) {
					throw new RuntimeException(e);
				}
			}
		}
	}
	/**
	 * 分页搜索
	 * @param queryStr 搜索条件
	 * @param firstResult 首条数据位置
	 * @param maxResults 最多取多少条数据
	 * @return 一页结果集
	 */
	public QueryResult search(String queryStr, int firstResult, int maxResults) {
		IndexSearcher indexSearcher = null;
		List<Article> atrticles = new ArrayList<Article>();
		try {
			/*
			 * 构建 IndexSearcher
			 */
			Directory indexDir = FSDirectory.open(new File("./indexDir/"));
			indexSearcher = new IndexSearcher(indexDir);
			
			
			/*
			 * 构建 Query
			 */
			Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_30);
			QueryParser queryParser = new QueryParser(Version.LUCENE_30, "content", analyzer);
			Query query = queryParser.parse(queryStr);
			
			
			/*
			 * 查询
			 */
			TopDocs topDocs = indexSearcher.search(query, firstResult + maxResults);
			int totalCount = topDocs.totalHits;
			ScoreDoc[] scoreDocs = topDocs.scoreDocs;
			
			
			
			/* 保证循环的次数不超过 scoreDocs 的长度*/
			int length = Math.min(firstResult+maxResults, scoreDocs.length);
			
			
			for(int i=firstResult; i<length; i++) {
				
				/*
				 * 根据编号取出Document数据
				 */
				Document document = indexSearcher.doc(i);
				Article article = Article2DocumentUtil.document2article(document);
				
				atrticles.add(article);
			
			}
			
			return new QueryResult(totalCount, atrticles);
			
		} catch (Exception e) {
			throw new RuntimeException(e);
		} finally {
			try {
				if (indexSearcher != null) {
					indexSearcher.close();
				}
			} catch (IOException e) {
				throw new RuntimeException(e);
			}
		}
	}
}
**
 * 工具类：Article 对象与 Document 对象的转换
 */
public class Article2DocumentUtil {
	public static Document article2document(Article article) {
		if(article == null) {
			return null;
		}
		
		Document document = new Document();
		document.add(new Field("id", article.getId()+"", Store.YES, Index.ANALYZED));
		document.add(new Field("title", article.getTitle(), Store.YES, Index.ANALYZED));
		document.add(new Field("content", article.getContent(), Store.YES, Index.ANALYZED));
		
		return document;
	}
	
	public static Article document2article(Document document) {
		if(document == null) {
			return null;
		}
		Article article = new Article();
		article.setId(Integer.parseInt(document.get("id")));
		article.setTitle(document.get("title"));
		article.setContent(document.get("content"));
		
		return article;
	}
}
/**
 * JUnit 测试 
 */
public class TestArticleIndexDao {
	
	/**
	 * 测试保存
	 * @throws Exception
	 */
	@Test
	public void testSave() throws Exception {
		ArticleIndexDao dao = new ArticleIndexDao();
		Article article = new Article();
		article.setId(1);
		article.setTitle("wjh上天山");
		article.setContent("据悉，文建华已于昨日抵达天山。高歌一曲HelloWorld");
		dao.save(article);
	}
	
	@Test
	public void testBatchSave() throws Exception {
		for(int i=0; i<35; i++) {
			ArticleIndexDao dao = new ArticleIndexDao();
			Article article = new Article();
			article.setId(i);
			article.setTitle("wjh上天山第" + i + "集");
			article.setContent("据悉，wjh已于昨日抵达天山。高歌"+ i +"曲 HelloWorld");
			dao.save(article);
		}
	}
	
	@Test
	public void testUpdate() throws Exception {
		ArticleIndexDao dao = new ArticleIndexDao();
		Article article = new Article();
		article.setId(1);
		article.setTitle("wjh上天山");
		article.setContent("wjh已于昨日抵达天山。高歌一曲HelloWorld");
		dao.update(article);
	}
	
	@Test
	public void testDel() throws Exception {
		ArticleIndexDao dao = new ArticleIndexDao();
		Article article = new Article();
		article.setId(1);
		article.setTitle("wjh上天山");
		article.setContent("wjh已于昨日抵达天山。高歌一曲HelloWorld");
		dao.delete(article);
	}
	
	@Test
	public void testSearch() throws Exception {
		ArticleIndexDao dao = new ArticleIndexDao();
		QueryResult queryResult = dao.search("HelloWorld", 30, 10);
		
		int count = queryResult.getTotalCount();
		System.out.println("共匹配了 " + count + " 条记录。");
		
		List<Article> articles = queryResult.getAtrticle();
		for (Article article : articles) {
			System.out.println("id:" + article.getId());
			System.out.println("title:" + article.getTitle());
			System.out.println("content:" + article.getContent());
			System.out.println("----------------");
		}
	}
}

索引 api lucene