本节从PyPDF2库中导入必要的类
from PyPDF2 import PdfFileReader, PdfFileWriter
from PyPDF2.pdf import ContentStream
from PyPDF2.generic import TextStringObject, NameObject
from PyPDF2.utils import b_
>The watermark says SAMPLE on it so I've tried different capitalization cases
wm_text = 'Sample'
replace_with = ''
>I'm hoping to just replace the SAMPLE watermark with nothing so a space could suffice
> Load PDF into pyPDF
source = PdfFileReader(open('input.pdf', "rb"))
output = PdfFileWriter()
> For each page
for page in range(source.getNumPages()):
# Get the current page and it's contents
page = source.getPage(page)
content_object = page["/Contents"].getObject()
content = ContentStream(content_object, source)
> Loop over all pdf elements
for operands, operator in content.operations:
被告知要根据我的PDF文件调整此部分
if operator == b_("TJ"):
text = operands[0][0]
if isinstance(text, TextStringObject) and text.startswith(wm_text):
operands[0] = TextStringObject(replace_with)
将修改后的内容设置为页面上的内容对象
page.__setitem__(NameObject('/Contents'), content)
将页面添加到输出
output.addPage(page)
写出流 outputStream = open(" output.pdf"," wb") output.write(OutputStream中)
答案 0 :(得分:2)
使用问题中的代码是可在Python 3中使用的函数。
def removeWatermark(wm_text, inputFile, outputFile):
from PyPDF4 import PdfFileReader, PdfFileWriter
from PyPDF4.pdf import ContentStream
from PyPDF4.generic import TextStringObject, NameObject
from PyPDF4.utils import b_
with open(inputFile, "rb") as f:
source = PdfFileReader(f, "rb")
output = PdfFileWriter()
for page in range(source.getNumPages()):
page = source.getPage(page)
content_object = page["/Contents"].getObject()
content = ContentStream(content_object, source)
for operands, operator in content.operations:
if operator == b_("Tj"):
text = operands[0]
if isinstance(text, str) and text.startswith(wm_text):
operands[0] = TextStringObject('')
page.__setitem__(NameObject('/Contents'), content)
output.addPage(page)
with open(outputFile, "wb") as outputStream:
output.write(outputStream)
wm_text = 'wm_text'
inputFile = r'input.pdf'
outputFile = r"output.pdf"
removeWatermark(wm_text, inputFile, outputFile)