(Python)更改使用xtopdf生成的PDF文件的页面大小和格式

时间:2017-07-27 13:35:24

标签: python pdf reportlab pdf-writer tablib

我想用Python转换xlsx。我使用模块tablib和xtopdf来构建结构良好的表。效果很好!不幸的是,内容不适合一个pdf页面。所以我想将pagesize和format格式更改为水平A3。但我不知道这是怎么回事。我的代码:

import random
import tablib
from openpyxl import load_workbook
from xtopdf import PDFWriter
from pyPdf import PdfFileWriter, PdfFileReader


workbook = load_workbook('C:/Users/user1/Testexcel.xlsx', guess_types=True, data_only=True)
worksheet = workbook.get_sheet_by_name('Testsheet')
ws_range = worksheet.iter_rows('A4:H6')

# Helper function to output a string to both screen and PDF.
def print_and_write(pw, strng):
    print strng
    pw.writeLine(strng)

# Create an empty Dataset and set its headers.
data = tablib.Dataset()
data.headers = ['col1', 'col2', 'col3', 'col4']
widths = [30, 20, 10, 20] # Display widths for columns.

for row in ws_range:
    col1 = str(row[0].value)
    col2 = str(row[1].value)
    col3 = str(row[2].value)
    col4 = str(row[3].value)
    columns = [col1, col2, col3, col4]
    row = [ str(col).center(widths[idx]) for idx, col in enumerate(columns) ]
    data.append(row)

# Set up the PDFWriter.
pw = PDFWriter('C:/Users/user1/Test.pdf')
pw.setFont('Courier', 10)
pw.setHeader('Test')
pw.setFooter('Test')

# Generate header and data rows as strings; output them to screen and PDF.

separator = '-' * sum(widths)
print_and_write(pw, separator)

# Output headers
header_strs = [ header.center(widths[idx]) for idx, header in enumerate(data.headers) ]
print_and_write(pw, ''.join(header_strs))
print_and_write(pw, separator)

# Output data
for row in data:
    print_and_write(pw, ''.join(row))

print_and_write(pw, separator)
pw.close()

发现xtopdf本身的PDFWriter实现了reportlab库的canvas对象。在canvas类中声明了一个属性pagesize,它默认设置为' A4'。但是,如果我将条目更改为' A3'结果pdf仍然在' A4'。

class Canvas(textobject._PDFColorSetter):
    from reportlab.pdfgen import canvas
    c = canvas.Canvas("hello.pdf")
    from reportlab.lib.units import inch
    # move the origin up and to the left
    c.translate(inch,inch)
    # define a large font
    c.setFont("Helvetica", 80)
    # choose some colors
    c.setStrokeColorRGB(0.2,0.5,0.3)
    c.setFillColorRGB(1,0,1)
    # draw a rectangle
    c.rect(inch,inch,6*inch,9*inch, fill=1)
    # make text go straight up
    c.rotate(90)
    # change color
    c.setFillColorRGB(0,0,0.77)
    # say hello (note after rotate the y coord needs to be negative!)
    c.drawString(3*inch, -3*inch, "Hello World")
    c.showPage()
    c.save()
    """

    def __init__(self,filename,
                 pagesize='A3',
                 bottomup = 1,
                 pageCompression=None,
                 encoding = None,
                 invariant = None,
                 verbosity=0):
        """Create a canvas of a given size. etc.

        You may pass a file-like object to filename as an alternative to
        a string.

        Most of the attributes are private - we will use set/get methods
        as the preferred interface.  Default page size is A4."""
        if pagesize is None: pagesize = 'A3'
        if encoding is None: encoding = rl_config.defaultEncoding
        if invariant is None: invariant = rl_config.invariant
        self._filename = filename
        self._encodingName = encoding
        self._doc = pdfdoc.PDFDocument(encoding,
                                       compression=pageCompression,
                                       invariant=invariant, filename=filename)


        #this only controls whether it prints 'saved ...' - 0 disables
        self._verbosity = verbosity

        #this is called each time a page is output if non-null
        self._onPage = None

        self._pagesize = pagesize
        self._pageRotation = 0
        #self._currentPageHasImages = 0
        self._pageTransition = None
        self._pageDuration = None
        self._destinations = {} # dictionary of destinations for cross indexing.

        self.setPageCompression(pageCompression)
        self._pageNumber = 1   # keep a count
        #self3 = []    #where the current page's marking operators accumulate
        # when we create a form we need to save operations not in the form
        self._codeStack = []
        self._restartAccumulators()  # restart all accumulation state (generalized, arw)
        self._annotationCount = 0

        self._outlines = [] # list for a name tree
        self._psCommandsBeforePage = [] #for postscript tray/font commands
        self._psCommandsAfterPage = [] #for postscript tray/font commands

        #PostScript has the origin at bottom left. It is easy to achieve a top-
        #down coord system by translating to the top of the page and setting y
        #scale to -1, but then text is inverted.  So self.bottomup is used
        #to also set the text matrix accordingly.  You can now choose your
        #drawing coordinates.
        self.bottomup = bottomup
        self.imageCaching = rl_config.defaultImageCaching
        self._make_preamble()
        self.init_graphics_state()
        self.state_stack = []

编辑:我认为reportlab模块中的更改不被系统接受。试图删除字典reportlab并尝试在命令行中导入它。具有讽刺意味的是,虽然python不再能找到那个模块,但它可以工作。

2 个答案:

答案 0 :(得分:1)

试试这个

from reportlab.pdfgen import canvas
from reportlab.lib.units import mm
c = canvas.Canvas("hello.pdf", pagesize = (297 * mm, 420 * mm))  
# or (420 * mm, 297 * mm) if you want it in portrait format
# values for inch: 11.69 * inch , 16.53 * inch

#the following would create an empty page
c.showPage()
c.save()

答案 1 :(得分:0)

只需forked a project named xtopdf at bitbucket并进行了以下更改:

 ##------------------------ PDFWriter.__init__ ----------------------------

-   def __init__(self, pdf_fn):
+   def __init__(self, pdf_fn, pagesize='A4'):
    '''
    Constructor.
    "pdf_fn" arg is the name of the PDF file to be created.
    '''

        self.__pdf_fn = pdf_fn         # file name of PDF file
-       self.__canv = canvas.Canvas(pdf_fn)     # canvas to write on
+       self.__canv = canvas.Canvas(pdf_fn, pagesize)     # canvas to write on
        self.__font_name = None       # font name
        self.__font_size = None       # font size
        self.__header_str = None      # header string (partial)
你可以尝试一下吗?使用pw = PDFWriter('C:/Users/user1/Test.pdf', 'A3')