mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-08-14 00:26:21 +00:00
Fixhancement: improve text thumbnail generation for large files (#10483)
This commit is contained in:
@@ -16,7 +16,15 @@ class TextDocumentParser(DocumentParser):
|
||||
logging_name = "paperless.parsing.text"
|
||||
|
||||
def get_thumbnail(self, document_path: Path, mime_type, file_name=None) -> Path:
|
||||
text = self.read_file_handle_unicode_errors(document_path)
|
||||
# Avoid reading entire file into memory
|
||||
max_chars = 100_000
|
||||
file_size_limit = 50 * 1024 * 1024
|
||||
|
||||
if document_path.stat().st_size > file_size_limit:
|
||||
text = "[File too large to preview]"
|
||||
else:
|
||||
with Path(document_path).open("r", encoding="utf-8", errors="replace") as f:
|
||||
text = f.read(max_chars)
|
||||
|
||||
img = Image.new("RGB", (500, 700), color="white")
|
||||
draw = ImageDraw.Draw(img)
|
||||
@@ -25,7 +33,7 @@ class TextDocumentParser(DocumentParser):
|
||||
size=20,
|
||||
layout_engine=ImageFont.Layout.BASIC,
|
||||
)
|
||||
draw.text((5, 5), text, font=font, fill="black")
|
||||
draw.multiline_text((5, 5), text, font=font, fill="black", spacing=4)
|
||||
|
||||
out_path = self.tempdir / "thumb.webp"
|
||||
img.save(out_path, format="WEBP")
|
||||
|
Reference in New Issue
Block a user