Feature: Allow user to control PIL image pixel limit ()

This commit is contained in:
Trenton H 2024-03-04 16:19:56 -08:00 committed by GitHub
parent 35574f3b86
commit b9636a3def
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 47 additions and 1 deletions

@ -969,6 +969,20 @@ be used with caution!
Defaults to None, which does not add any additional apps.
#### [`PAPERLESS_MAX_IMAGE_PIXELS=<number>`](#PAPERLESS_MAX_IMAGE_PIXELS) {#PAPERLESS_MAX_IMAGE_PIXELS}
: Configures the maximum size of an image PIL will allow to load without warning or error.
: If unset, will default to the value determined by
[Pillow](https://pillow.readthedocs.io/en/stable/reference/Image.html#PIL.Image.MAX_IMAGE_PIXELS).
Defaults to None, which does change the limit
!!! warning
This limit is designed to prevent denial of service from malicious files.
It should only be raised or disabled in certain circumstances and with great care.
## Document Consumption {#consume_config}
#### [`PAPERLESS_CONSUMER_DELETE_DUPLICATES=<bool>`](#PAPERLESS_CONSUMER_DELETE_DUPLICATES) {#PAPERLESS_CONSUMER_DELETE_DUPLICATES}

@ -20,6 +20,7 @@ from documents.plugins.base import StopConsumeTaskError
from documents.plugins.helpers import ProgressStatusOptions
from documents.utils import copy_basic_file_stats
from documents.utils import copy_file_with_basic_stats
from documents.utils import maybe_override_pixel_limit
logger = logging.getLogger("paperless.barcodes")
@ -81,6 +82,9 @@ class BarcodePlugin(ConsumeTaskPlugin):
self.barcodes: list[Barcode] = []
def run(self) -> Optional[str]:
# Some operations may use PIL, override pixel setting if needed
maybe_override_pixel_limit()
# Maybe do the conversion of TIFF to PDF
self.convert_from_tiff_to_pdf()

@ -6,6 +6,7 @@ from django.conf import settings
from PIL import Image
from documents.utils import copy_basic_file_stats
from documents.utils import maybe_override_pixel_limit
def convert_from_tiff_to_pdf(tiff_path: Path, target_directory: Path) -> Path:
@ -17,6 +18,9 @@ def convert_from_tiff_to_pdf(tiff_path: Path, target_directory: Path) -> Path:
Returns the path of the PDF created.
"""
# override pixel setting if needed
maybe_override_pixel_limit()
with Image.open(tiff_path) as im:
has_alpha_layer = im.mode in ("RGBA", "LA")
if has_alpha_layer:

@ -1,8 +1,12 @@
import shutil
from os import utime
from pathlib import Path
from typing import Optional
from typing import Union
from django.conf import settings
from PIL import Image
def _coerce_to_path(
source: Union[Path, str],
@ -40,3 +44,15 @@ def copy_file_with_basic_stats(
shutil.copy(source, dest)
copy_basic_file_stats(source, dest)
def maybe_override_pixel_limit() -> None:
"""
Maybe overrides the PIL limit on pixel count, if configured to allow it
"""
limit: Optional[Union[float, int]] = settings.MAX_IMAGE_PIXELS
if limit is not None and limit >= 0:
pixel_count = limit
if pixel_count == 0:
pixel_count = None
Image.MAX_IMAGE_PIXELS = pixel_count

@ -970,6 +970,10 @@ OCR_COLOR_CONVERSION_STRATEGY = os.getenv(
OCR_USER_ARGS = os.getenv("PAPERLESS_OCR_USER_ARGS")
MAX_IMAGE_PIXELS: Final[Optional[int]] = __get_optional_int(
"PAPERLESS_MAX_IMAGE_PIXELS",
)
# GNUPG needs a home directory for some reason
GNUPG_HOME = os.getenv("HOME", "/tmp")

@ -12,6 +12,7 @@ from PIL import Image
from documents.parsers import DocumentParser
from documents.parsers import ParseError
from documents.parsers import make_thumbnail_from_pdf
from documents.utils import maybe_override_pixel_limit
from paperless.config import OcrConfig
from paperless.models import ArchiveFileChoices
from paperless.models import CleanChoices
@ -255,6 +256,9 @@ class RasterisedDocumentParser(DocumentParser):
ocrmypdf_args["sidecar"] = sidecar_file
if self.is_image(mime_type):
# This may be required, depending on the known imformation
maybe_override_pixel_limit()
dpi = self.get_dpi(input_file)
a4_dpi = self.calculate_a4_dpi(input_file)

@ -246,7 +246,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
self.assertRaises(ParseError, f)
@override_settings(OCR_IMAGE_DPI=72)
@override_settings(OCR_IMAGE_DPI=72, MAX_IMAGE_PIXELS=0)
def test_image_no_dpi_default(self):
parser = RasterisedDocumentParser(None)