Feature: auto-clean some invalid pdfs (#7651)

This commit is contained in:
shamoon
2024-09-25 08:57:20 -07:00
committed by GitHub
parent c92c3e224a
commit 5e687d9a93
7 changed files with 100 additions and 4 deletions

View File

@@ -1389,9 +1389,18 @@ class PostDocumentSerializer(serializers.Serializer):
mime_type = magic.from_buffer(document_data, mime=True)
if not is_mime_type_supported(mime_type):
raise serializers.ValidationError(
_("File type %(type)s not supported") % {"type": mime_type},
)
if (
mime_type in settings.CONSUMER_PDF_RECOVERABLE_MIME_TYPES
and document.name.endswith(
".pdf",
)
):
# If the file is an invalid PDF, we can try to recover it later in the consumer
mime_type = "application/pdf"
else:
raise serializers.ValidationError(
_("File type %(type)s not supported") % {"type": mime_type},
)
return document.name, document_data