任何人都知道如何使用Apache POI从word文档中提取链接?甚至更好,从一个段落?
答案 0 :(得分:3)
Word 2003及更新版本:
//Links extractor
StringBuffer text = null;
try {
FileInputStream fis = new FileInputStream(new File("YOUR_DOCX_FULL_PATH_HERE));
XWPFDocument document = new XWPFDocument(fis);
text = new StringBuffer();
// First up, all our paragraph based text
Iterator<XWPFParagraph> i = document.getParagraphsIterator();
while(i.hasNext()) {
XWPFParagraph paragraph = i.next();
// Do the paragraph text
for(XWPFRun run : paragraph.getRuns()) {
if(run instanceof XWPFHyperlinkRun) {
text.append(run.toString());
bean.setName(run.toString());
XWPFHyperlink link = ((XWPFHyperlinkRun)run).getHyperlink(document);
if(link != null) {
text.append(" <" + link.getURL() + ">");
}
}
}
}
} catch (Exception e) {
e.printStackTrace();
}