使用pdfbox替换PDF文件的文本

时间:2017-04-03 14:42:18

标签: java pdfbox

我有4个fils pdf来自一个文件.doc和我使用4方法将我的文件doc转换为pdf(foxite reader,nitro,webservice和Word)。

然后我使用pdfbox搜索并替换一些单词,问题是,由于某种原因它只适用于(foxite reader和Word)的文件。对于另一个文件 任何人都有线索吗?

这个代码我用它

public static  void replace (String s){
PDDocument doc = null;
int occurrences = 0;
try {
    doc = PDDocument.load(s); //Input PDF File Name
    System.out.println("+e"+doc);
    List pages = doc.getDocumentCatalog().getAllPages();
    for (int i = 0; i < pages.size(); i++) {
        PDPage page = (PDPage) pages.get(i);
        //System.out.println("ddd");
        PDStream contents = page.getContents();
        PDFStreamParser parser = new PDFStreamParser(contents.getStream());
        parser.parse();
        List tokens = parser.getTokens();
        for (int j = 0; j < tokens.size(); j++) {
            //System.out.println("jjjj");
            Object next = tokens.get(j);
            if (next instanceof PDFOperator) {
                PDFOperator op = (PDFOperator) next;
                // Tj and TJ are the two operators that display strings in a PDF
                if (op.getOperation().equals("Tj")) {
                    // Tj takes one operator and that is the string
                    // to display so lets update that operator
                    COSString previous = (COSString) tokens.get(j - 1);
                    String string = previous.getString();
                    if (string.contains("#signature#")) {
                        string = string.replace("#signature#", "sam");
                        occurrences++;
                    }
                    //Word you want to change. Currently this code changes word "Good" to "Bad"
                    previous.reset();
                    previous.append(string.getBytes("ISO-8859-1"));
                } else if (op.getOperation().equals("TJ")) {
                    COSArray previous = (COSArray) tokens.get(j - 1);
                    COSString temp = new COSString();

                    String tempString = "";
                    for (int t = 0; t < previous.size(); t++) {

                        if (previous.get(t) instanceof COSString) {
                            tempString += ((COSString) previous.get(t)).getString();

                        }
                    }

                    temp.append(tempString.getBytes("ISO-8859-1"));
                    tempString = "";
                    tempString = temp.getString();
                    if (tempString.contains("#signature#")) {
                        tempString = tempString.replace("#signature#", "sam");
                        occurrences++;
                    }
                    previous.clear();

                    String[] stringArray = tempString.split(" ");

                    for (String string : stringArray) {
                        COSString cosString = new COSString();
                        string = string + " ";
                        cosString.append(string.getBytes("ISO-8859-1"));
                        previous.add(cosString);
                    }

                }
            }
        }
        // now that the tokens are updated we will replace the page content stream.
        PDStream updatedStream = new PDStream(doc);
        OutputStream out = updatedStream.createOutputStream();
        ContentStreamWriter tokenWriter = new ContentStreamWriter(out);
        tokenWriter.writeTokens(tokens);
        page.setContents(updatedStream);
    }
    System.out.println("number of matches found: " + occurrences);
    doc.save(s+"_convert.pdf"); //Output file name
} catch (Exception ex) {
     System.out.println("eee+"+ex.getMessage());
} finally {
    if (doc != null) {
        try {
            doc.close();
        } catch (IOException ex) {
            ex.getStackTrace();
        }
    }
}

0 个答案:

没有答案