使用Python 2.76将多个网页打印成PDF

时间:2014-07-22 04:14:51

标签: python pdf web

以下部分脚本用于将单个网页打印到本地存储的PDF文件中。现在我想让它将多个网页打印成相应的PDF文件。

然而,当它运行时,它只生成一个文件。什么地方出了错?

from pyPdf import PdfFileWriter, PdfFileReader
import StringIO
from reportlab.pdfgen import canvas
from reportlab.lib.pagesizes import letter
from xhtml2pdf import pisa
from PyQt4.QtCore import *
from PyQt4.QtGui import * 
from PyQt4.QtWebKit import *
import os, sys


LN = {'http://www.google':'Google',
      'http://www.smh.com.au/text/':'SMH',
      'http://www.ap.org/products-services/text':'AP'}

for url, file_name in LN.iteritems():

    tem_pdf = "c:\\tem_pdf.pdf"

    app = QApplication(sys.argv)
    web = QWebView()
    #Read the URL given
    web.load(QUrl(url))
    printer = QPrinter()
    #setting format
    printer.setPageSize(QPrinter.A4)
    printer.setOrientation(QPrinter.Landscape)#Landscape / Portrait
    printer.setOutputFormat(QPrinter.PdfFormat)
    #export file
    printer.setOutputFileName(tem_pdf)

    def convertIt():
        web.print_(printer)
        QApplication.exit()

    QObject.connect(web, SIGNAL("loadFinished(bool)"), convertIt)
    app.exec_()
    sys.exit

    outputPDF = PdfFileWriter()
    packet = StringIO.StringIO()
    # create a new PDF with Reportlab
    can = canvas.Canvas(packet, pagesize=letter)
    can.setFont("Helvetica", 9)
    can.save()

    #move to the beginning of the StringIO buffer
    packet.seek(0)
    new_pdf = PdfFileReader(packet)
    # read the existing PDF
    existing_pdf = PdfFileReader(file(tem_pdf, "rb"))
    pages = existing_pdf.getNumPages()
    output = PdfFileWriter()
    # add the "watermark" (which is the new pdf) on the existing page
    for x in range(0,pages):
        page = existing_pdf.getPage(x)
        page.mergePage(new_pdf.getPage(0))
        output.addPage(page)

    # finally, write "output" to a real file

    final_file = 'c:\\' + file_name + '.pdf'
    outputStream = file(final_file, "wb")
    output.write(outputStream)
    outputStream.close()

    print final_file, 'is ready.'

1 个答案:

答案 0 :(得分:0)

发现使用 pdfkit 满足多个PDF生成需求。但不是上述问题的答案。

可以在http://madalgo.au.dk/~jakobt/wkhtmltoxdoc/wkhtmltopdf_0.10.0_rc2-doc.html

找到更多选项
import pdfkit

LN = {'http://www.google.com':'Google',
      'http://www.smh.com.au/text/':'SMH',
      'http://www.ap.org/products-services/text':'AP'}

for url, file_name in LN.iteritems():

    options = {'quiet': '',
                   }

    pdfkit.from_url(url, 'c:\\' + file_name + '.pdf', options=options)