diff --git a/docs/configuration.md b/docs/configuration.md index c7b710c66..b1e882845 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -969,6 +969,20 @@ be used with caution! Defaults to None, which does not add any additional apps. +#### [`PAPERLESS_MAX_IMAGE_PIXELS=`](#PAPERLESS_MAX_IMAGE_PIXELS) {#PAPERLESS_MAX_IMAGE_PIXELS} + +: Configures the maximum size of an image PIL will allow to load without warning or error. + +: If unset, will default to the value determined by +[Pillow](https://pillow.readthedocs.io/en/stable/reference/Image.html#PIL.Image.MAX_IMAGE_PIXELS). + + Defaults to None, which does change the limit + + !!! warning + + This limit is designed to prevent denial of service from malicious files. + It should only be raised or disabled in certain circumstances and with great care. + ## Document Consumption {#consume_config} #### [`PAPERLESS_CONSUMER_DELETE_DUPLICATES=`](#PAPERLESS_CONSUMER_DELETE_DUPLICATES) {#PAPERLESS_CONSUMER_DELETE_DUPLICATES} diff --git a/src/documents/barcodes.py b/src/documents/barcodes.py index e68ba4f8c..e77b35fb3 100644 --- a/src/documents/barcodes.py +++ b/src/documents/barcodes.py @@ -20,6 +20,7 @@ from documents.plugins.base import StopConsumeTaskError from documents.plugins.helpers import ProgressStatusOptions from documents.utils import copy_basic_file_stats from documents.utils import copy_file_with_basic_stats +from documents.utils import maybe_override_pixel_limit logger = logging.getLogger("paperless.barcodes") @@ -81,6 +82,9 @@ class BarcodePlugin(ConsumeTaskPlugin): self.barcodes: list[Barcode] = [] def run(self) -> Optional[str]: + # Some operations may use PIL, override pixel setting if needed + maybe_override_pixel_limit() + # Maybe do the conversion of TIFF to PDF self.convert_from_tiff_to_pdf() diff --git a/src/documents/converters.py b/src/documents/converters.py index e3a7cb786..5c5ba1e07 100644 --- a/src/documents/converters.py +++ b/src/documents/converters.py @@ -6,6 +6,7 @@ from django.conf import settings from PIL import Image from documents.utils import copy_basic_file_stats +from documents.utils import maybe_override_pixel_limit def convert_from_tiff_to_pdf(tiff_path: Path, target_directory: Path) -> Path: @@ -17,6 +18,9 @@ def convert_from_tiff_to_pdf(tiff_path: Path, target_directory: Path) -> Path: Returns the path of the PDF created. """ + # override pixel setting if needed + maybe_override_pixel_limit() + with Image.open(tiff_path) as im: has_alpha_layer = im.mode in ("RGBA", "LA") if has_alpha_layer: diff --git a/src/documents/utils.py b/src/documents/utils.py index b84c9b53c..29f4de14d 100644 --- a/src/documents/utils.py +++ b/src/documents/utils.py @@ -1,8 +1,12 @@ import shutil from os import utime from pathlib import Path +from typing import Optional from typing import Union +from django.conf import settings +from PIL import Image + def _coerce_to_path( source: Union[Path, str], @@ -40,3 +44,15 @@ def copy_file_with_basic_stats( shutil.copy(source, dest) copy_basic_file_stats(source, dest) + + +def maybe_override_pixel_limit() -> None: + """ + Maybe overrides the PIL limit on pixel count, if configured to allow it + """ + limit: Optional[Union[float, int]] = settings.MAX_IMAGE_PIXELS + if limit is not None and limit >= 0: + pixel_count = limit + if pixel_count == 0: + pixel_count = None + Image.MAX_IMAGE_PIXELS = pixel_count diff --git a/src/paperless/settings.py b/src/paperless/settings.py index 1c6113273..77adb6bbf 100644 --- a/src/paperless/settings.py +++ b/src/paperless/settings.py @@ -970,6 +970,10 @@ OCR_COLOR_CONVERSION_STRATEGY = os.getenv( OCR_USER_ARGS = os.getenv("PAPERLESS_OCR_USER_ARGS") +MAX_IMAGE_PIXELS: Final[Optional[int]] = __get_optional_int( + "PAPERLESS_MAX_IMAGE_PIXELS", +) + # GNUPG needs a home directory for some reason GNUPG_HOME = os.getenv("HOME", "/tmp") diff --git a/src/paperless_tesseract/parsers.py b/src/paperless_tesseract/parsers.py index 020922703..c483a3da4 100644 --- a/src/paperless_tesseract/parsers.py +++ b/src/paperless_tesseract/parsers.py @@ -12,6 +12,7 @@ from PIL import Image from documents.parsers import DocumentParser from documents.parsers import ParseError from documents.parsers import make_thumbnail_from_pdf +from documents.utils import maybe_override_pixel_limit from paperless.config import OcrConfig from paperless.models import ArchiveFileChoices from paperless.models import CleanChoices @@ -255,6 +256,9 @@ class RasterisedDocumentParser(DocumentParser): ocrmypdf_args["sidecar"] = sidecar_file if self.is_image(mime_type): + # This may be required, depending on the known imformation + maybe_override_pixel_limit() + dpi = self.get_dpi(input_file) a4_dpi = self.calculate_a4_dpi(input_file) diff --git a/src/paperless_tesseract/tests/test_parser.py b/src/paperless_tesseract/tests/test_parser.py index f64cb69f0..fae64742e 100644 --- a/src/paperless_tesseract/tests/test_parser.py +++ b/src/paperless_tesseract/tests/test_parser.py @@ -246,7 +246,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase): self.assertRaises(ParseError, f) - @override_settings(OCR_IMAGE_DPI=72) + @override_settings(OCR_IMAGE_DPI=72, MAX_IMAGE_PIXELS=0) def test_image_no_dpi_default(self): parser = RasterisedDocumentParser(None)