lucene如何解析PPT文档
加入jar包(poi-3.0.2-FINAL-20080204.jarpoi-contrib-3.0.2-FINAL-20080204.jarpoi-scratchpad-3.0.2-FINAL-20080204.jar)
package com.cs;
public interface Parsable {
public String getTitle() ;
public String getContent() ;
public String getSummary() ;
}package com.cs;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import org.apache.poi.hslf.HSLFSlideShow;
import org.apache.poi.hslf.model.Slide;
import org.apache.poi.hslf.model.TextRun;
import org.apache.poi.hslf.usermodel.SlideShow;
public class PPTParser implements Parsable {
private File file;
private String content;
public PPTParser(File file) {
this.file = file;
}
public String getContent() {
if (content != null) {
return content;
}
// HSLFSlideShow contains the main functionality for the Powerpoint file
// "reader". It is only a very basic class for now
// SlideShow is a friendly wrapper on top of the more scary
// HSLFSlideShow
InputStream is;
try {
is = new FileInputStream(file);
SlideShow ss = new SlideShow(new HSLFSlideShow(is));
Slide[] slides = ss.getSlides();
StringBuffer sb = new StringBuffer();
for (int i = 0; i < slides.length; i++) {
// This class represents a run of text in a powerpoint document.
// That run could be text on a sheet, or text in a note.
// It is only a very basic class for now
TextRun[] t = slides[i].getTextRuns();
for (int j = 0; j < t.length; j++) {
sb.append(t[j].getText());
}
sb.append(slides[i].getTitle());
}
content = sb.toString();
return content;
} catch (FileNotFoundException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return null;
}
public String getSummary() {
String summary;
if (content == null) {
getContent();
}
if (content.length() > 200) {
summary = content.substring(0, 200);
} else {
summary = content;
}
return summary;
}
public String getTitle() {
// TODO Auto-generated method stub
return file.getName();
}
public static void main(String[] args) {
PPTParser pptParser = new PPTParser(new File("E:\\EclipseStudyWorkspace\\LuceneParse\\fileSource\\搜索引擎-基础.ppt")) ;
System.out.println("ppt content : "+pptParser.getContent()) ;
}
} 相关推荐
renjinlong 2020-09-03
Jacry 2020-07-04
IceStreamLab 2020-06-26
mengyue 2020-06-09
PasserbyX 2020-05-16
mameng 2020-05-12
心丨悦 2020-05-06
编码之路 2020-05-03
mengyue 2020-05-02
qiuzhuoxian 2020-02-23
编码之路 2020-02-20
lionelf 2020-02-03
TyCoding 2020-02-01
heniancheng 2020-01-31
某某某 2020-01-30
PinkBean 2020-01-29
某某某 2020-01-12
编码之路 2020-01-01
itmale 2020-01-01