以下部分脚本用于将单个网页打印到本地存储的PDF文件中。现在我想让它将多个网页打印成相应的PDF文件。
然而,当它运行时,它只生成一个文件。什么地方出了错?
from pyPdf import PdfFileWriter, PdfFileReader
import StringIO
from reportlab.pdfgen import canvas
from reportlab.lib.pagesizes import letter
from xhtml2pdf import pisa
from PyQt4.QtCore import *
from PyQt4.QtGui import *
from PyQt4.QtWebKit import *
import os, sys
LN = {'http://www.google':'Google',
'http://www.smh.com.au/text/':'SMH',
'http://www.ap.org/products-services/text':'AP'}
for url, file_name in LN.iteritems():
tem_pdf = "c:\\tem_pdf.pdf"
app = QApplication(sys.argv)
web = QWebView()
#Read the URL given
web.load(QUrl(url))
printer = QPrinter()
#setting format
printer.setPageSize(QPrinter.A4)
printer.setOrientation(QPrinter.Landscape)#Landscape / Portrait
printer.setOutputFormat(QPrinter.PdfFormat)
#export file
printer.setOutputFileName(tem_pdf)
def convertIt():
web.print_(printer)
QApplication.exit()
QObject.connect(web, SIGNAL("loadFinished(bool)"), convertIt)
app.exec_()
sys.exit
outputPDF = PdfFileWriter()
packet = StringIO.StringIO()
# create a new PDF with Reportlab
can = canvas.Canvas(packet, pagesize=letter)
can.setFont("Helvetica", 9)
can.save()
#move to the beginning of the StringIO buffer
packet.seek(0)
new_pdf = PdfFileReader(packet)
# read the existing PDF
existing_pdf = PdfFileReader(file(tem_pdf, "rb"))
pages = existing_pdf.getNumPages()
output = PdfFileWriter()
# add the "watermark" (which is the new pdf) on the existing page
for x in range(0,pages):
page = existing_pdf.getPage(x)
page.mergePage(new_pdf.getPage(0))
output.addPage(page)
# finally, write "output" to a real file
final_file = 'c:\\' + file_name + '.pdf'
outputStream = file(final_file, "wb")
output.write(outputStream)
outputStream.close()
print final_file, 'is ready.'
答案 0 :(得分:0)
发现使用 pdfkit 满足多个PDF生成需求。但不是上述问题的答案。
可以在http://madalgo.au.dk/~jakobt/wkhtmltoxdoc/wkhtmltopdf_0.10.0_rc2-doc.html
找到更多选项import pdfkit
LN = {'http://www.google.com':'Google',
'http://www.smh.com.au/text/':'SMH',
'http://www.ap.org/products-services/text':'AP'}
for url, file_name in LN.iteritems():
options = {'quiet': '',
}
pdfkit.from_url(url, 'c:\\' + file_name + '.pdf', options=options)