Feature: Allow user to control PIL image pixel limit (#5997)

2026-02-05 23:32:46 -06:00 · 2024-03-04 16:19:56 -08:00
parent 82cb4591ce
commit 122bd9fd5b
7 changed files with 47 additions and 1 deletions
--- a/src/documents/barcodes.py
+++ b/src/documents/barcodes.py
@@ -20,6 +20,7 @@ from documents.plugins.base import StopConsumeTaskError
 from documents.plugins.helpers import ProgressStatusOptions
 from documents.utils import copy_basic_file_stats
 from documents.utils import copy_file_with_basic_stats
+from documents.utils import maybe_override_pixel_limit

 logger = logging.getLogger("paperless.barcodes")

@@ -81,6 +82,9 @@ class BarcodePlugin(ConsumeTaskPlugin):
        self.barcodes: list[Barcode] = []

    def run(self) -> Optional[str]:
+        # Some operations may use PIL, override pixel setting if needed
+        maybe_override_pixel_limit()
+
        # Maybe do the conversion of TIFF to PDF
        self.convert_from_tiff_to_pdf()

--- a/src/documents/converters.py
+++ b/src/documents/converters.py
@@ -6,6 +6,7 @@ from django.conf import settings
 from PIL import Image

 from documents.utils import copy_basic_file_stats
+from documents.utils import maybe_override_pixel_limit


 def convert_from_tiff_to_pdf(tiff_path: Path, target_directory: Path) -> Path:
@@ -17,6 +18,9 @@ def convert_from_tiff_to_pdf(tiff_path: Path, target_directory: Path) -> Path:

    Returns the path of the PDF created.
    """
+    # override pixel setting if needed
+    maybe_override_pixel_limit()
+
    with Image.open(tiff_path) as im:
        has_alpha_layer = im.mode in ("RGBA", "LA")
    if has_alpha_layer:
--- a/src/documents/utils.py
+++ b/src/documents/utils.py
@@ -1,8 +1,12 @@
 import shutil
 from os import utime
 from pathlib import Path
+from typing import Optional
 from typing import Union

+from django.conf import settings
+from PIL import Image
+

 def _coerce_to_path(
    source: Union[Path, str],
@@ -40,3 +44,15 @@ def copy_file_with_basic_stats(

    shutil.copy(source, dest)
    copy_basic_file_stats(source, dest)
+
+
+def maybe_override_pixel_limit() -> None:
+    """
+    Maybe overrides the PIL limit on pixel count, if configured to allow it
+    """
+    limit: Optional[Union[float, int]] = settings.MAX_IMAGE_PIXELS
+    if limit is not None and limit >= 0:
+        pixel_count = limit
+        if pixel_count == 0:
+            pixel_count = None
+        Image.MAX_IMAGE_PIXELS = pixel_count
--- a/src/paperless/settings.py
+++ b/src/paperless/settings.py
@@ -970,6 +970,10 @@ OCR_COLOR_CONVERSION_STRATEGY = os.getenv(

 OCR_USER_ARGS = os.getenv("PAPERLESS_OCR_USER_ARGS")

+MAX_IMAGE_PIXELS: Final[Optional[int]] = __get_optional_int(
+    "PAPERLESS_MAX_IMAGE_PIXELS",
+)
+
 # GNUPG needs a home directory for some reason
 GNUPG_HOME = os.getenv("HOME", "/tmp")

--- a/src/paperless_tesseract/parsers.py
+++ b/src/paperless_tesseract/parsers.py
@@ -12,6 +12,7 @@ from PIL import Image
 from documents.parsers import DocumentParser
 from documents.parsers import ParseError
 from documents.parsers import make_thumbnail_from_pdf
+from documents.utils import maybe_override_pixel_limit
 from paperless.config import OcrConfig
 from paperless.models import ArchiveFileChoices
 from paperless.models import CleanChoices
@@ -255,6 +256,9 @@ class RasterisedDocumentParser(DocumentParser):
            ocrmypdf_args["sidecar"] = sidecar_file

        if self.is_image(mime_type):
+            # This may be required, depending on the known imformation
+            maybe_override_pixel_limit()
+
            dpi = self.get_dpi(input_file)
            a4_dpi = self.calculate_a4_dpi(input_file)

--- a/src/paperless_tesseract/tests/test_parser.py
+++ b/src/paperless_tesseract/tests/test_parser.py
@@ -246,7 +246,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):

        self.assertRaises(ParseError, f)

-    @override_settings(OCR_IMAGE_DPI=72)
+    @override_settings(OCR_IMAGE_DPI=72, MAX_IMAGE_PIXELS=0)
    def test_image_no_dpi_default(self):
        parser = RasterisedDocumentParser(None)