当我从任何源PDF中打印PDF时,文件大小会下降并删除表单中显示的文本框。简而言之,它会使文件变平。 这是我想要实现的行为。
以下代码使用另一个PDF作为源(我想要展平的那个)来创建PDF,它也会写入文本框形式。
我可以在没有文本框的情况下获得PDF,将其展平吗?就像Adobe以PDF格式打印PDF一样。
我的其他代码看起来像这样减去一些东西:
import os
import StringIO
from pyPdf import PdfFileWriter, PdfFileReader
from reportlab.pdfgen import canvas
from reportlab.lib.pagesizes import letter
directory = os.path.join(os.getcwd(), "source") # dir we are interested in
fif = [f for f in os.listdir(directory) if f[-3:] == 'pdf'] # get the PDFs
for i in fif:
packet = StringIO.StringIO()
can = canvas.Canvas(packet, pagesize=letter)
can.rotate(-90)
can.save()
packet.seek(0)
new_pdf = PdfFileReader(packet)
fname = os.path.join('source', i)
existing_pdf = PdfFileReader(file(fname, "rb"))
output = PdfFileWriter()
nump = existing_pdf.getNumPages()
page = existing_pdf.getPage(0)
for l in range(nump):
output.addPage(existing_pdf.getPage(l))
page.mergePage(new_pdf.getPage(0))
outputStream = file("out-"+i, "wb")
output.write(outputStream)
outputStream.close()
print fName + " written as", i
总结:我有一个pdf,我添加了一个文本框,覆盖信息并添加新信息,然后我从该pdf打印pdf。文本框不再可编辑或可移动。我想自动完成该过程,但我尝试的所有内容仍然允许该文本框可编辑。
答案 0 :(得分:1)
将pdf转换为图像的一种简单方法,而不是将这些图像转换为pdf的方法。
您需要pdf2image和PIL
像这样
from pdf2image import convert_from_path
from PIL import Image
images = convert_from_path('temp.pdf')
im1 = images[0]
pdf1_filename = "flattened.pdf"
im1.save(pdf1_filename, "PDF" ,resolution=100.0, save_all=True, append_images=images)
答案 1 :(得分:0)
使用pdfrw(How to Populate Fillable PDF's with Python)将输入内容的表格弄平时,我发现我不得不使用generate_fdf(pdftk flatten loses fillable field data)添加额外的步骤。
os.system('pdftk '+outtemp+' generate_fdf output '+outfdf)
os.system('pdftk '+outtemp+' fill_form '+outfdf+' output '+outpdf)
之所以使用此解决方案,是因为我能够在Mac上使用ghostscript的pdf2ps和ps2pdf来平整文件,但是当我在Amazon Linux实例上运行该文件时,分辨率较低。我不知道为什么会这样,所以转到了pdftk解决方案。
答案 2 :(得分:0)
对于Adobe Docs,可以将“可编辑表单”字段的位位置更改为1,以使该字段为“只读”。我在这里提供了完整的解决方案,但它使用的是Django:
https://stackoverflow.com/a/55301804/8382028
Adobe Docs(第552页):
https://www.adobe.com/content/dam/acom/en/devnet/pdf/pdfs/pdf_reference_archives/PDFReference.pdf
使用PyPDF2填充字段,然后遍历注释以更改位的位置:
from io import BytesIO
import PyPDF2
from PyPDF2.generic import BooleanObject, NameObject, IndirectObject, NumberObject
# open the pdf
input_stream = open("YourPDF.pdf", "rb")
pdf_reader = PyPDF2.PdfFileReader(input_stream, strict=False)
if "/AcroForm" in pdf_reader.trailer["/Root"]:
pdf_reader.trailer["/Root"]["/AcroForm"].update(
{NameObject("/NeedAppearances"): BooleanObject(True)})
pdf_writer = PyPDF2.PdfFileWriter()
set_need_appearances_writer(pdf_writer)
if "/AcroForm" in pdf_writer._root_object:
# Acro form is form field, set needs appearances to fix printing issues
pdf_writer._root_object["/AcroForm"].update(
{NameObject("/NeedAppearances"): BooleanObject(True)})
data_dict = dict() # this is a dict of your form values
pdf_writer.addPage(pdf_reader.getPage(0))
page = pdf_writer.getPage(0)
# update form fields
pdf_writer.updatePageFormFieldValues(page, data_dict)
for j in range(0, len(page['/Annots'])):
writer_annot = page['/Annots'][j].getObject()
for field in data_dict:
if writer_annot.get('/T') == field:
writer_annot.update({
NameObject("/Ff"): NumberObject(1) # make ReadOnly
})
output_stream = BytesIO()
pdf_writer.write(output_stream)
# output_stream is your flattened PDF
def set_need_appearances_writer(writer):
# basically used to ensured there are not
# overlapping form fields, which makes printing hard
try:
catalog = writer._root_object
# get the AcroForm tree and add "/NeedAppearances attribute
if "/AcroForm" not in catalog:
writer._root_object.update({
NameObject("/AcroForm"): IndirectObject(len(writer._objects), 0, writer)})
need_appearances = NameObject("/NeedAppearances")
writer._root_object["/AcroForm"][need_appearances] = BooleanObject(True)
except Exception as e:
print('set_need_appearances_writer() catch : ', repr(e))
return writer
答案 3 :(得分:0)
一种适用于Windows的解决方案,它可以转换许多pdf页面并也可以平化chackbox的值。由于某些原因,@ ViaTech代码在我的电脑上不起作用(Windows7 python 3.8)
遵循@ViaTech指示,并广泛使用this post中的@hchillon代码
from PyPDF2 import PdfFileReader, PdfFileWriter
from PyPDF2.generic import BooleanObject, NameObject, IndirectObject, TextStringObject, NumberObject
def set_need_appearances_writer(writer):
try:
catalog = writer._root_object
# get the AcroForm tree and add "/NeedAppearances attribute
if "/AcroForm" not in catalog:
writer._root_object.update({
NameObject("/AcroForm"): IndirectObject(len(writer._objects), 0, writer)})
need_appearances = NameObject("/NeedAppearances")
writer._root_object["/AcroForm"][need_appearances] = BooleanObject(True)
return writer
except Exception as e:
print('set_need_appearances_writer() catch : ', repr(e))
return writer
class PdfFileFiller(object):
def __init__(self, infile):
self.pdf = PdfFileReader(open(infile, "rb"), strict=False)
if "/AcroForm" in self.pdf.trailer["/Root"]:
self.pdf.trailer["/Root"]["/AcroForm"].update(
{NameObject("/NeedAppearances"): BooleanObject(True)})
# newvals and newchecks have keys have to be filled. '' is not accepted
def update_form_values(self, outfile, newvals=None, newchecks=None):
self.pdf2 = MyPdfFileWriter()
trailer = self.pdf.trailer["/Root"]["/AcroForm"]
self.pdf2._root_object.update({
NameObject('/AcroForm'): trailer})
set_need_appearances_writer(self.pdf2)
if "/AcroForm" in self.pdf2._root_object:
self.pdf2._root_object["/AcroForm"].update(
{NameObject("/NeedAppearances"): BooleanObject(True)})
for i in range(self.pdf.getNumPages()):
self.pdf2.addPage(self.pdf.getPage(i))
self.pdf2.updatePageFormFieldValues(self.pdf2.getPage(i), newvals)
for j in range(0, len(self.pdf.getPage(i)['/Annots'])):
writer_annot = self.pdf.getPage(i)['/Annots'][j].getObject()
for field in newvals:
writer_annot.update({NameObject("/Ff"): NumberObject(1)})
self.pdf2.updatePageFormCheckboxValues(self.pdf2.getPage(i), newchecks)
with open(outfile, 'wb') as out:
self.pdf2.write(out)
class MyPdfFileWriter(PdfFileWriter):
def __init__(self):
super().__init__()
def updatePageFormCheckboxValues(self, page, fields):
for j in range(0, len(page['/Annots'])):
writer_annot = page['/Annots'][j].getObject()
for field in fields:
writer_annot.update({NameObject("/Ff"): NumberObject(1)})
origin = ## Put input pdf path here
destination = ## Put output pdf path here, even if the file does not exist yet
newchecks = {} # A dict with all checkbox values that need to be changed
newvals = {'':''} # A dict with all entry values that need to be changed
# newvals dict has to be equal to {'':''} in case that no changes are needed
c = PdfFileFiller(origin)
c.update_form_values(outfile=destination, newvals=newvals, newchecks=newchecks)
print('PDF has been created\n')