Feature: Allow user to control PIL image pixel limit (#5997)

2025-12-14 01:21:14 -06:00 · 2024-03-04 16:19:56 -08:00
parent 35574f3b86
commit b9636a3def
7 changed files with 47 additions and 1 deletions
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -969,6 +969,20 @@ be used with caution!
    Defaults to None, which does not add any additional apps.
 #### [`PAPERLESS_MAX_IMAGE_PIXELS=<number>`](#PAPERLESS_MAX_IMAGE_PIXELS) {#PAPERLESS_MAX_IMAGE_PIXELS}
 : Configures the maximum size of an image PIL will allow to load without warning or error.
 : If unset, will default to the value determined by
 [Pillow](https://pillow.readthedocs.io/en/stable/reference/Image.html#PIL.Image.MAX_IMAGE_PIXELS).
    Defaults to None, which does change the limit
    !!! warning
        This limit is designed to prevent denial of service from malicious files.
        It should only be raised or disabled in certain circumstances and with great care.
 ## Document Consumption {#consume_config}
 #### [`PAPERLESS_CONSUMER_DELETE_DUPLICATES=<bool>`](#PAPERLESS_CONSUMER_DELETE_DUPLICATES) {#PAPERLESS_CONSUMER_DELETE_DUPLICATES}
--- a/src/documents/barcodes.py
+++ b/src/documents/barcodes.py
@@ -20,6 +20,7 @@ from documents.plugins.base import StopConsumeTaskError
 from documents.plugins.helpers import ProgressStatusOptions
 from documents.utils import copy_basic_file_stats
 from documents.utils import copy_file_with_basic_stats
 from documents.utils import maybe_override_pixel_limit
 logger = logging.getLogger("paperless.barcodes")
@@ -81,6 +82,9 @@ class BarcodePlugin(ConsumeTaskPlugin):
        self.barcodes: list[Barcode] = []
    def run(self) -> Optional[str]:
        # Some operations may use PIL, override pixel setting if needed
        maybe_override_pixel_limit()
        # Maybe do the conversion of TIFF to PDF
        self.convert_from_tiff_to_pdf()
--- a/src/documents/converters.py
+++ b/src/documents/converters.py
@@ -6,6 +6,7 @@ from django.conf import settings
 from PIL import Image
 from documents.utils import copy_basic_file_stats
 from documents.utils import maybe_override_pixel_limit
 def convert_from_tiff_to_pdf(tiff_path: Path, target_directory: Path) -> Path:
@@ -17,6 +18,9 @@ def convert_from_tiff_to_pdf(tiff_path: Path, target_directory: Path) -> Path:
    Returns the path of the PDF created.
    """
    # override pixel setting if needed
    maybe_override_pixel_limit()
    with Image.open(tiff_path) as im:
        has_alpha_layer = im.mode in ("RGBA", "LA")
    if has_alpha_layer:
--- a/src/documents/utils.py
+++ b/src/documents/utils.py
@@ -1,8 +1,12 @@
 import shutil
 from os import utime
 from pathlib import Path
 from typing import Optional
 from typing import Union
 from django.conf import settings
 from PIL import Image
 def _coerce_to_path(
    source: Union[Path, str],
@@ -40,3 +44,15 @@ def copy_file_with_basic_stats(
    shutil.copy(source, dest)
    copy_basic_file_stats(source, dest)
 def maybe_override_pixel_limit() -> None:
    """
    Maybe overrides the PIL limit on pixel count, if configured to allow it
    """
    limit: Optional[Union[float, int]] = settings.MAX_IMAGE_PIXELS
    if limit is not None and limit >= 0:
        pixel_count = limit
        if pixel_count == 0:
            pixel_count = None
        Image.MAX_IMAGE_PIXELS = pixel_count
--- a/src/paperless/settings.py
+++ b/src/paperless/settings.py
@@ -970,6 +970,10 @@ OCR_COLOR_CONVERSION_STRATEGY = os.getenv(
 OCR_USER_ARGS = os.getenv("PAPERLESS_OCR_USER_ARGS")
 MAX_IMAGE_PIXELS: Final[Optional[int]] = __get_optional_int(
    "PAPERLESS_MAX_IMAGE_PIXELS",
 )
 # GNUPG needs a home directory for some reason
 GNUPG_HOME = os.getenv("HOME", "/tmp")
--- a/src/paperless_tesseract/parsers.py
+++ b/src/paperless_tesseract/parsers.py
@@ -12,6 +12,7 @@ from PIL import Image
 from documents.parsers import DocumentParser
 from documents.parsers import ParseError
 from documents.parsers import make_thumbnail_from_pdf
 from documents.utils import maybe_override_pixel_limit
 from paperless.config import OcrConfig
 from paperless.models import ArchiveFileChoices
 from paperless.models import CleanChoices
@@ -255,6 +256,9 @@ class RasterisedDocumentParser(DocumentParser):
            ocrmypdf_args["sidecar"] = sidecar_file
        if self.is_image(mime_type):
            # This may be required, depending on the known imformation
            maybe_override_pixel_limit()
            dpi = self.get_dpi(input_file)
            a4_dpi = self.calculate_a4_dpi(input_file)
--- a/src/paperless_tesseract/tests/test_parser.py
+++ b/src/paperless_tesseract/tests/test_parser.py
@@ -246,7 +246,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
        self.assertRaises(ParseError, f)
-    @override_settings(OCR_IMAGE_DPI=72)
+    @override_settings(OCR_IMAGE_DPI=72, MAX_IMAGE_PIXELS=0)
    def test_image_no_dpi_default(self):
        parser = RasterisedDocumentParser(None)