我需要更改PDF文档中的现有文本。我正在尝试使用Apache PDFBox删除并替换PDF文件中的某些文本,但它无法正常工作。
doc = PDDocument.load("D://out.pdf"); //Input PDF File Name
List pages = doc.getDocumentCatalog().getAllPages();
for (int i = 0; i < pages.size(); i++)
{
PDPage page = (PDPage) pages.get(i);
PDStream contents = page.getContents();
PDFStreamParser parser = new PDFStreamParser(contents.getStream());
parser.parse();
List tokens = parser.getTokens();
for (int j = 0; j < tokens.size(); j++)
{
Object next = tokens.get(j);
if (next instanceof PDFOperator)
{
PDFOperator op = (PDFOperator) next;
if (op.getOperation().equals("replace this line"))
{
COSString previous = (COSString) tokens.get(j - 1);
String string = previous.getString();
string = string.replaceFirst("replace this line", "");
previous.reset();
previous.append(string.getBytes("ISO-8859-1"));
}
}
}
PDStream updatedStream = new PDStream(doc);
OutputStream out = updatedStream.createOutputStream();
ContentStreamWriter tokenWriter = new ContentStreamWriter(out);
tokenWriter.writeTokens(tokens);
page.setContents(updatedStream);
}
doc.save("D://out1.pdf");