我试图将包含表格的PDF文件转换为DOCX类型。当我尝试转换它时,我只将输出作为纯文本。如何将包含表格的整个PDF转换为DOCX而不改变格式?我尝试使用JAVA,这是片段。
public static void main(String[] args) throws IOException {
System.out.println("Document converted started");
XWPFDocument doc = new XWPFDocument();
String pdf = "C:\\Users\\30216\\Desktop\\wordtopdf\\sample_full.pdf";
PdfReader reader = new PdfReader(pdf);
PdfReaderContentParser parser = new PdfReaderContentParser(reader);
for (int i = 1; i <= reader.getNumberOfPages(); i++) {
TextExtractionStrategy strategy = parser.processContent(i,
new SimpleTextExtractionStrategy());
String text = strategy.getResultantText();
XWPFParagraph p = doc.createParagraph();
XWPFRun run = p.createRun();
run.setText(text);
run.addBreak(BreakType.PAGE);
}
FileOutputStream out = new FileOutputStream("C:\\Users\\30216\\Desktop\\wordtopdf\\pdftoword.docx");
doc.write(out);
out.close();
reader.close();
System.out.println("Document converted successfully");
}
}