lucene如何解析PPT文档
加入jar包(poi-3.0.2-FINAL-20080204.jarpoi-contrib-3.0.2-FINAL-20080204.jarpoi-scratchpad-3.0.2-FINAL-20080204.jar)
package com.cs; public interface Parsable { public String getTitle() ; public String getContent() ; public String getSummary() ; }
package com.cs; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; import org.apache.poi.hslf.HSLFSlideShow; import org.apache.poi.hslf.model.Slide; import org.apache.poi.hslf.model.TextRun; import org.apache.poi.hslf.usermodel.SlideShow; public class PPTParser implements Parsable { private File file; private String content; public PPTParser(File file) { this.file = file; } public String getContent() { if (content != null) { return content; } // HSLFSlideShow contains the main functionality for the Powerpoint file // "reader". It is only a very basic class for now // SlideShow is a friendly wrapper on top of the more scary // HSLFSlideShow InputStream is; try { is = new FileInputStream(file); SlideShow ss = new SlideShow(new HSLFSlideShow(is)); Slide[] slides = ss.getSlides(); StringBuffer sb = new StringBuffer(); for (int i = 0; i < slides.length; i++) { // This class represents a run of text in a powerpoint document. // That run could be text on a sheet, or text in a note. // It is only a very basic class for now TextRun[] t = slides[i].getTextRuns(); for (int j = 0; j < t.length; j++) { sb.append(t[j].getText()); } sb.append(slides[i].getTitle()); } content = sb.toString(); return content; } catch (FileNotFoundException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } return null; } public String getSummary() { String summary; if (content == null) { getContent(); } if (content.length() > 200) { summary = content.substring(0, 200); } else { summary = content; } return summary; } public String getTitle() { // TODO Auto-generated method stub return file.getName(); } public static void main(String[] args) { PPTParser pptParser = new PPTParser(new File("E:\\EclipseStudyWorkspace\\LuceneParse\\fileSource\\搜索引擎-基础.ppt")) ; System.out.println("ppt content : "+pptParser.getContent()) ; } }
相关推荐
renjinlong 2020-09-03
Jacry 2020-07-04
IceStreamLab 2020-06-26
mengyue 2020-06-09
PasserbyX 2020-05-16
mameng 2020-05-12
心丨悦 2020-05-06
编码之路 2020-05-03
mengyue 2020-05-02
qiuzhuoxian 2020-02-23
编码之路 2020-02-20
lionelf 2020-02-03
TyCoding 2020-02-01
heniancheng 2020-01-31
某某某 2020-01-30
PinkBean 2020-01-29
某某某 2020-01-12
编码之路 2020-01-01
itmale 2020-01-01