Use the original image file for the checksum, not the maybe alpha removed version (#4781)

This commit is contained in:
Trenton H 2023-12-02 16:18:06 -08:00 committed by GitHub
parent aff56077a8
commit 5b502b1e1a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 20 additions and 17 deletions

View File

@ -519,7 +519,11 @@ class Consumer(LoggingMixin):
document.filename = generate_unique_filename(document)
create_source_path_directory(document.source_path)
self._write(document.storage_type, self.path, document.source_path)
self._write(
document.storage_type,
self.original_path,
document.source_path,
)
self._write(
document.storage_type,
@ -711,21 +715,20 @@ class Consumer(LoggingMixin):
storage_type = Document.STORAGE_TYPE_UNENCRYPTED
with open(self.path, "rb") as f:
document = Document.objects.create(
title=(
self._parse_title_placeholders(self.override_title)
if self.override_title is not None
else file_info.title
)[:127],
content=text,
mime_type=mime_type,
checksum=hashlib.md5(f.read()).hexdigest(),
created=create_date,
modified=create_date,
storage_type=storage_type,
original_filename=self.filename,
)
document = Document.objects.create(
title=(
self._parse_title_placeholders(self.override_title)
if self.override_title is not None
else file_info.title
)[:127],
content=text,
mime_type=mime_type,
checksum=hashlib.md5(self.original_path.read_bytes()).hexdigest(),
created=create_date,
modified=create_date,
storage_type=storage_type,
original_filename=self.filename,
)
self.apply_overrides(document)

View File

@ -105,7 +105,7 @@ def get_supported_file_extensions() -> set[str]:
return extensions
def get_parser_class_for_mime_type(mime_type: str) -> Optional["DocumentParser"]:
def get_parser_class_for_mime_type(mime_type: str) -> Optional[type["DocumentParser"]]:
"""
Returns the best parser (by weight) for the given mimetype or
None if no parser exists