创建Django迁移后,我添加了文件字段验证后出现了一个问题。 我的模特是:
class TextItemSourceFile(models.Model):
FILE_TYPE = 'text_source'
BASE_DIR = 'text_source'
EXT_WHITELIST = ('.doc', '.docx', '.odt', '.pdf', '.rtf', '.djvu', '.djv')
MIME_WHITELIST = (
'application/CDFV2-unknown',
# for doc and docx files
'application/msword',
'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
# for odt files
'application/vnd.oasis.opendocument.text',
# for pdf files
'application/pdf',
'application/x-pdf',
# for rtf files
'text/rtf',
'application/rtf',
# for djvu files
'image/vnd.djvu',
'image/x-djvu'
)
MAX_SIZE = 10 * 1024 * 1024
uuid = models.UUIDField(
verbose_name="UUID",
primary_key=True, default=uuid.uuid4
)
author = models.ForeignKey(
verbose_name="author",
to='auth.User'
)
date_created = models.DateTimeField(
verbose_name="date created",
auto_now_add=True
)
name = models.CharField(
verbose_name="original file name",
max_length=200,
null=False, blank=False
)
file = models.FileField(
verbose_name="text file",
upload_to=get_file_path, max_length=200,
storage=OverwriteStorage(),
null=False, blank=False,
validators=[
FileValidator(
extension_whitelist=EXT_WHITELIST,
mime_whitelist=MIME_WHITELIST,
max_size=MAX_SIZE
)
]
)
cover = models.OneToOneField(
verbose_name="cover image",
to=TextItemCoverFile,
null=True, blank=True
)
is_used = models.BooleanField(
verbose_name="is used",
default=False
)
def save(self, *args, **kwargs):
prev = TextItemSourceFile.objects.filter(uuid=self.uuid).first()
super().save(*args, **kwargs)
if attr_changed(self, prev, 'file'):
file_extension = os.path.splitext(self.file.name)[1].lower()
if file_extension == '.pdf':
cover = TextItemCoverFile(
author=self.author,
is_used=True
)
cover.file.save('cover.jpg', make_pdf_preview(self.file.name))
cover.save()
self.cover = cover
self.save(force_update=True)
else:
self.cover = None
self.save(force_update=True)
My validator is:
@deconstructible
class FileValidator(object):
def __init__(self, extension_whitelist=None, mime_whitelist=None, max_size=None):
self.extension_whitelist = extension_whitelist
self.mime_whitelist = mime_whitelist
self.max_size = max_size
def __call__(self, value):
if self.extension_whitelist is not None:
self.validate_extension(value)
if self.mime_whitelist is not None:
self.validate_mime(value)
if self.max_size is not None:
self.validate_size(value)
def validate_extension(self, file_obj):
extension = os.path.splitext(file_obj.name)[1]
if extension not in self.extension_whitelist:
raise ValidationError(ERROR_UNSUPPORTED_FILE_FOUND)
def validate_mime(self, file_obj):
mime_type = magic.from_buffer(file_obj.read(2048), mime=True).decode('ascii')
if mime_type not in self.mime_whitelist:
raise ValidationError(ERROR_UNSUPPORTED_MIME)
def validate_size(self, file_obj):
if file_obj.size > int(self.max_size):
raise ValidationError(ERROR_FILE_SIZE_EXCEEDED)
/home/env/project/apps/abstract/utils.py
def make_pdf_preview(pdf_file_name):
"""
:param pdf_file_name: of the PDF file that should be converted
:return: content file with 1-st page converted to PNG
"""
file_extension = os.path.splitext(pdf_file_name)[1].lower()
assert file_extension == '.pdf'
with storage.open(pdf_file_name, 'rb') as pdf_file:
reader = PdfFileReader(pdf_file)
writer = PdfFileWriter()
writer.addPage(reader.getPage(0))
stream = io.BytesIO()
writer.write(stream)
stream.seek(0)
with wand_image(file=stream, resolution=200) as img:
img.format = 'PNG'
img.compression_quality = 99
result_pic = io.BytesIO()
img.save(file=result_pic)
result_pic.seek(0)
return ContentFile(result_pic.getvalue())
class TextFileAPITest(TestCase):
def setUp(self):
self.c = APIClient()
self.user, self.userp = create_user_with_profile('user')
def test_text_file_upload(self):
self.c.login(username=self.user.username, password='111')
response = self.c.post(
'/en/api/files/text_file/',
data={
'file': get_temporary_pdf()
},
format='multipart'
)
#
def get_temporary_pdf(file_path=None):
file_path = file_path or os.path.join(settings.STATIC_ROOT, 'test/test.pdf')
with open(file_path, 'rb') as test_pdf:
pdf_file = SimpleUploadedFile(file_path.split('/')[-1], test_pdf.read(), 'application/pdf')
pdf_file.seek(0)
return pdf_file
在我向模型添加验证器之前我已经
PdfReadWarning: Xref table not zero-indexed. ID numbers for objects will be corrected. [pdf.py:1736]
After migrations, I run the test
Traceback (most recent call last):
.............................................................................
line 448, in save
cover.file.save('cover.jpg', make_pdf_preview(self.file.name))
File "/home/env/project/apps/abstract/utils.py", line 506, in make_pdf_preview
reader = PdfFileReader(pdf_file)
File "/home/env/lib/python3.5/site-packages/PyPDF2/pdf.py", line 1084, in __init__
self.read(stream)
File "/home/env/lib/python3.5/site-packages/PyPDF2/pdf.py", line 1901, in read
raise utils.PdfReadError("Could not find xref table at specified location")
PyPDF2.utils.PdfReadError: Could not find xref table at specified location
That is, in the utility in the line
reader = PdfFileReader(pdf_file)
the exception is generated
我尝试按照此处所述更改测试文件: PdfFileReader: PdfReadError: Could not find xref table at specified location 但是,这对我没有帮助.... 我没有在互联网上看到过这个例外的更多varinats。 我将不胜感激任何帮助