Question

我无法更新PDF中的几个字段。与here和here提到的问题不同，当我单击该字段时，我的数据不会出现。它根本不存在。我尝试了所有语言（阿拉伯语，英语等）。另外，由于我没有在工作计算机中安装Acrobat Reader的权限，因此我将使用Internet Explorer和Chrome打开PDF。我不知道这是否与此有关。

# -*- coding: UTF-8 -*-
from PyPDF2 import PdfFileWriter, PdfFileReader
from PyPDF2.generic import BooleanObject, NameObject, IndirectObject

def set_need_appearances_writer(writer: PdfFileWriter):
    # See 12.7.2 and 7.7.2 for more information:
    # 
    try:
        catalog = writer._root_object
        # get the AcroForm tree and add "/NeedAppearances attribute
        if "/AcroForm" not in catalog:
            writer._root_object.update({
                NameObject("/AcroForm"): IndirectObject(len(writer._objects), 0, 
                writer)})

        need_appearances = NameObject("/NeedAppearances")
        writer._root_object["/AcroForm"][need_appearances] = BooleanObject(True)
        return writer

    except Exception as e:
        print('set_need_appearances_writer() catch : ', repr(e))
        return writer

def update_form_values(infile, outfile, newvals=None):

    pdf = PdfFileReader(open(infile, 'rb'), strict= False)
    #Changing the needappearances boolean
    if "/AcroForm" in pdf.trailer["/Root"]:
        pdf.trailer["/Root"]["/AcroForm"].update(
            {NameObject("/NeedAppearances"): BooleanObject(True)})

    #decoded = {k: v.decode('utf-8') for k, v in newvals.items()}

    writer = PdfFileWriter()
    set_need_appearances_writer(writer)
    # Changing the needappearances boolean
    if "/AcroForm" in writer._root_object:
        writer._root_object["/AcroForm"].update(
            {NameObject("/NeedAppearances"): BooleanObject(True)})

    #tmp = io.BytesIO()

    for i in range(pdf.getNumPages()):
        page = pdf.getPage(i)
        try:
            if newvals:
                writer.updatePageFormFieldValues(page, newvals)
            else:
                writer.updatePageFormFieldValues(page,
                                                 {k: f'#{i} {k}={v}'
                                                  for i, (k, v) in enumerate(newvals.items())
                                                  })
            writer.addPage(page)

        except Exception as e:
            print(repr(e))
            writer.addPage(page)

    with open(outfile, 'wb') as out:
        writer.write(out)

    #writer.write(tmp)

    # return tmp.getvalue()

if __name__ == '__main__':
    pdf_file_name = "complete_doc.pdf"
    #mydict = {k: v.encode('utf-8') for k, v in data_dict.items()}
    update_form_values(pdf_file_name, 'out-' + pdf_file_name,data_dict)  # enumerate & fill the fields with their own names
    #with open('arabic.txt', 'wb') as out:
    #    out.write(output)

少数字段未在PDF中更新

0 个答案: