我想从.docx或.txt文件中提取所有内容,然后创建一个包含确切内容的新pdf文档。问题是我想用精确的字体样式提取内容,如果它是粗体或斜体的话......这就是我迄今为止所做的:
public class Main {
public static void main(String[] args) {
JFileChooser fileChooser = new JFileChooser();
File file = null;
do {
int returnedValue = fileChooser.showOpenDialog(null);
if (returnedValue == JFileChooser.APPROVE_OPTION) {
file = fileChooser.getSelectedFile();
} else break;
} while (!file.getAbsolutePath().endsWith(".txt") &&
!file.getAbsolutePath().endsWith(".docx"));
Controller controller = new Controller();
controller.createPDF(file);
}
}
public class Controller {
public void createPDF(File file) {
String text = readFileContent(file);
System.out.println(text);
try {
newPdf(text, file);
} catch (IOException e) {
e.printStackTrace();
} catch (COSVisitorException e) {
e.printStackTrace();
} catch (FontFormatException e) {
e.printStackTrace();
}
}
private void newPdf(String text, File file) throws IOException, COSVisitorException, FontFormatException {
PDDocument doc = new PDDocument();
PDPage page = new PDPage();
doc.addPage(page);
PDPageContentStream contentStream = new PDPageContentStream(doc, page);
PDFont font = PDType1Font.COURIER;
contentStream.setFont(font, 14);
contentStream.beginText();
contentStream.setNonStrokingColor(Color.black);
contentStream.moveTextPositionByAmount(20, 750);
contentStream.drawString(text);
contentStream.endText();
contentStream.close();
doc.save("doc.pdf");
doc.close();
}
private String readFileContent(File file) {
String finalText = new String();
try {
FileReader fileReader = new FileReader(file.getAbsolutePath());
BufferedReader br = new BufferedReader(fileReader);
String line;
while ((line = br.readLine()) != null)
finalText += line;
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
return finalText;
}
}