PyPDF2.utils.PdfReadError:在指定位置找不到外部参照表

时间:2017-07-23 17:21:34

标签: python django pypdf2

创建Django迁移后,我添加了文件字段验证后出现了一个问题。 我的模特是:

class TextItemSourceFile(models.Model):
    FILE_TYPE = 'text_source'
    BASE_DIR = 'text_source'

    EXT_WHITELIST = ('.doc', '.docx', '.odt', '.pdf', '.rtf', '.djvu', '.djv')
    MIME_WHITELIST = (
        'application/CDFV2-unknown',
        # for doc and docx files
        'application/msword',
        'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
        # for odt files
        'application/vnd.oasis.opendocument.text',
        # for pdf files
        'application/pdf',
        'application/x-pdf',
        # for rtf files
        'text/rtf',
        'application/rtf',
        # for djvu files
        'image/vnd.djvu',
        'image/x-djvu'
    )

    MAX_SIZE = 10 * 1024 * 1024

    uuid = models.UUIDField(
        verbose_name="UUID",
        primary_key=True, default=uuid.uuid4
    )
    author = models.ForeignKey(
        verbose_name="author",
        to='auth.User'
    )
    date_created = models.DateTimeField(
        verbose_name="date created",
        auto_now_add=True
    )
    name = models.CharField(
        verbose_name="original file name",
        max_length=200,
        null=False, blank=False
    )
    file = models.FileField(
        verbose_name="text file",
        upload_to=get_file_path, max_length=200,
        storage=OverwriteStorage(),
        null=False, blank=False,
        validators=[
            FileValidator(
                extension_whitelist=EXT_WHITELIST,
                mime_whitelist=MIME_WHITELIST,
                max_size=MAX_SIZE
            )
        ]
    )
    cover = models.OneToOneField(
        verbose_name="cover image",
        to=TextItemCoverFile,
        null=True, blank=True
    )
    is_used = models.BooleanField(
        verbose_name="is used",
        default=False
    )

    def save(self, *args, **kwargs):
        prev = TextItemSourceFile.objects.filter(uuid=self.uuid).first()

        super().save(*args, **kwargs)

        if attr_changed(self, prev, 'file'):
            file_extension = os.path.splitext(self.file.name)[1].lower()
            if file_extension == '.pdf':
                cover = TextItemCoverFile(
                    author=self.author,
                    is_used=True
                )
                cover.file.save('cover.jpg', make_pdf_preview(self.file.name))
                cover.save()
                self.cover = cover
                self.save(force_update=True)
            else:
                self.cover = None
                self.save(force_update=True)
My validator is:

@deconstructible
class FileValidator(object):

    def __init__(self, extension_whitelist=None, mime_whitelist=None, max_size=None):
        self.extension_whitelist = extension_whitelist
        self.mime_whitelist = mime_whitelist
        self.max_size = max_size

    def __call__(self, value):
        if self.extension_whitelist is not None:
            self.validate_extension(value)

        if self.mime_whitelist is not None:
            self.validate_mime(value)

        if self.max_size is not None:
            self.validate_size(value)

    def validate_extension(self, file_obj):
        extension = os.path.splitext(file_obj.name)[1]
        if extension not in self.extension_whitelist:
            raise ValidationError(ERROR_UNSUPPORTED_FILE_FOUND)

    def validate_mime(self, file_obj):
        mime_type = magic.from_buffer(file_obj.read(2048), mime=True).decode('ascii')
        if mime_type not in self.mime_whitelist:
            raise ValidationError(ERROR_UNSUPPORTED_MIME)

    def validate_size(self, file_obj):
        if file_obj.size > int(self.max_size):
            raise ValidationError(ERROR_FILE_SIZE_EXCEEDED)

/home/env/project/apps/abstract/utils.py
def make_pdf_preview(pdf_file_name):
    """

    :param pdf_file_name: of the PDF file that should be converted
    :return: content file with 1-st page converted to PNG
    """
    file_extension = os.path.splitext(pdf_file_name)[1].lower()
    assert file_extension == '.pdf'

    with storage.open(pdf_file_name, 'rb') as pdf_file:
        reader = PdfFileReader(pdf_file)
        writer = PdfFileWriter()
        writer.addPage(reader.getPage(0))
        stream = io.BytesIO()
        writer.write(stream)
        stream.seek(0)

        with wand_image(file=stream, resolution=200) as img:
            img.format = 'PNG'
            img.compression_quality = 99
            result_pic = io.BytesIO()
            img.save(file=result_pic)
            result_pic.seek(0)
            return ContentFile(result_pic.getvalue())

class TextFileAPITest(TestCase):

    def setUp(self):
        self.c = APIClient()
        self.user, self.userp = create_user_with_profile('user')

    def test_text_file_upload(self):
        self.c.login(username=self.user.username, password='111')

        response = self.c.post(
            '/en/api/files/text_file/',
            data={
                'file': get_temporary_pdf()
            },
            format='multipart'
        )
def get_temporary_pdf(file_path=None):
    file_path = file_path or os.path.join(settings.STATIC_ROOT, 'test/test.pdf')
    with open(file_path, 'rb') as test_pdf:
        pdf_file = SimpleUploadedFile(file_path.split('/')[-1], test_pdf.read(), 'application/pdf')
        pdf_file.seek(0)
        return pdf_file

在我向模型添加验证器之前我已经

PdfReadWarning: Xref table not zero-indexed. ID numbers for objects will be corrected. [pdf.py:1736]

After migrations, I run the test

Traceback (most recent call last):
.............................................................................
line 448, in save
    cover.file.save('cover.jpg', make_pdf_preview(self.file.name))
  File "/home/env/project/apps/abstract/utils.py", line 506, in make_pdf_preview
    reader = PdfFileReader(pdf_file)
  File "/home/env/lib/python3.5/site-packages/PyPDF2/pdf.py", line 1084, in __init__
    self.read(stream)
  File "/home/env/lib/python3.5/site-packages/PyPDF2/pdf.py", line 1901, in read
    raise utils.PdfReadError("Could not find xref table at specified location")
PyPDF2.utils.PdfReadError: Could not find xref table at specified location

That is, in the utility in the line 
reader = PdfFileReader(pdf_file)
the exception is generated

我尝试按照此处所述更改测试文件: PdfFileReader: PdfReadError: Could not find xref table at specified location 但是,这对我没有帮助.... 我没有在互联网上看到过这个例外的更多varinats。 我将不胜感激任何帮助

0 个答案:

没有答案