我想使用Apache TIKA提取PST中附加到另一封邮件的邮件内容。当我解析pst时,附加邮件的元数据的X-TIKA:内容密钥没有邮件正文。如何提取附加邮件的正文内容?这是代码:
public void parsePSTfile(File file) throws IOException, TikaException, org.xml.sax.SAXException
{
Parser p = new AutoDetectParser();
RecursiveParserWrapper wrapper = new RecursiveParserWrapper(p,
new BasicContentHandlerFactory(
BasicContentHandlerFactory.HANDLER_TYPE.XML, -1));
InputStream stream = TikaInputStream.get(file);
ParseContext context = new ParseContext();
wrapper.parse(stream, new DefaultHandler(), new Metadata(), context);
for (Metadata metadata : wrapper.getMetadata()) {
for (String name : metadata.names()) {
for (String value : metadata.getValues(name)) {
if (name.equalsIgnoreCase("Message-From"))
//set value in required object
if ("X-TIKA:content".equalsIgnoreCase(name)) {
//Here extracting the message body
AutoDetectParser parser = new AutoDetectParser();
ContentHandler textHandler = new BodyContentHandler();
Metadata xmetadata = new Metadata();
InputStream inputStream = new ByteArrayInputStream(value.getBytes("UTF-8"));
new HtmlParser().parse(inputStream,textHandler,xmetadata,new ParseContext());
//set value of textHandler.toString()in required object
}
}
}
}
}