Feature: Allow user to control PIL image pixel limit (#5997)

This commit is contained in:
Trenton H 2024-03-04 16:19:56 -08:00 committed by GitHub
parent 35574f3b86
commit b9636a3def
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 47 additions and 1 deletions

View File

@ -969,6 +969,20 @@ be used with caution!
Defaults to None, which does not add any additional apps. Defaults to None, which does not add any additional apps.
#### [`PAPERLESS_MAX_IMAGE_PIXELS=<number>`](#PAPERLESS_MAX_IMAGE_PIXELS) {#PAPERLESS_MAX_IMAGE_PIXELS}
: Configures the maximum size of an image PIL will allow to load without warning or error.
: If unset, will default to the value determined by
[Pillow](https://pillow.readthedocs.io/en/stable/reference/Image.html#PIL.Image.MAX_IMAGE_PIXELS).
Defaults to None, which does change the limit
!!! warning
This limit is designed to prevent denial of service from malicious files.
It should only be raised or disabled in certain circumstances and with great care.
## Document Consumption {#consume_config} ## Document Consumption {#consume_config}
#### [`PAPERLESS_CONSUMER_DELETE_DUPLICATES=<bool>`](#PAPERLESS_CONSUMER_DELETE_DUPLICATES) {#PAPERLESS_CONSUMER_DELETE_DUPLICATES} #### [`PAPERLESS_CONSUMER_DELETE_DUPLICATES=<bool>`](#PAPERLESS_CONSUMER_DELETE_DUPLICATES) {#PAPERLESS_CONSUMER_DELETE_DUPLICATES}

View File

@ -20,6 +20,7 @@ from documents.plugins.base import StopConsumeTaskError
from documents.plugins.helpers import ProgressStatusOptions from documents.plugins.helpers import ProgressStatusOptions
from documents.utils import copy_basic_file_stats from documents.utils import copy_basic_file_stats
from documents.utils import copy_file_with_basic_stats from documents.utils import copy_file_with_basic_stats
from documents.utils import maybe_override_pixel_limit
logger = logging.getLogger("paperless.barcodes") logger = logging.getLogger("paperless.barcodes")
@ -81,6 +82,9 @@ class BarcodePlugin(ConsumeTaskPlugin):
self.barcodes: list[Barcode] = [] self.barcodes: list[Barcode] = []
def run(self) -> Optional[str]: def run(self) -> Optional[str]:
# Some operations may use PIL, override pixel setting if needed
maybe_override_pixel_limit()
# Maybe do the conversion of TIFF to PDF # Maybe do the conversion of TIFF to PDF
self.convert_from_tiff_to_pdf() self.convert_from_tiff_to_pdf()

View File

@ -6,6 +6,7 @@ from django.conf import settings
from PIL import Image from PIL import Image
from documents.utils import copy_basic_file_stats from documents.utils import copy_basic_file_stats
from documents.utils import maybe_override_pixel_limit
def convert_from_tiff_to_pdf(tiff_path: Path, target_directory: Path) -> Path: def convert_from_tiff_to_pdf(tiff_path: Path, target_directory: Path) -> Path:
@ -17,6 +18,9 @@ def convert_from_tiff_to_pdf(tiff_path: Path, target_directory: Path) -> Path:
Returns the path of the PDF created. Returns the path of the PDF created.
""" """
# override pixel setting if needed
maybe_override_pixel_limit()
with Image.open(tiff_path) as im: with Image.open(tiff_path) as im:
has_alpha_layer = im.mode in ("RGBA", "LA") has_alpha_layer = im.mode in ("RGBA", "LA")
if has_alpha_layer: if has_alpha_layer:

View File

@ -1,8 +1,12 @@
import shutil import shutil
from os import utime from os import utime
from pathlib import Path from pathlib import Path
from typing import Optional
from typing import Union from typing import Union
from django.conf import settings
from PIL import Image
def _coerce_to_path( def _coerce_to_path(
source: Union[Path, str], source: Union[Path, str],
@ -40,3 +44,15 @@ def copy_file_with_basic_stats(
shutil.copy(source, dest) shutil.copy(source, dest)
copy_basic_file_stats(source, dest) copy_basic_file_stats(source, dest)
def maybe_override_pixel_limit() -> None:
"""
Maybe overrides the PIL limit on pixel count, if configured to allow it
"""
limit: Optional[Union[float, int]] = settings.MAX_IMAGE_PIXELS
if limit is not None and limit >= 0:
pixel_count = limit
if pixel_count == 0:
pixel_count = None
Image.MAX_IMAGE_PIXELS = pixel_count

View File

@ -970,6 +970,10 @@ OCR_COLOR_CONVERSION_STRATEGY = os.getenv(
OCR_USER_ARGS = os.getenv("PAPERLESS_OCR_USER_ARGS") OCR_USER_ARGS = os.getenv("PAPERLESS_OCR_USER_ARGS")
MAX_IMAGE_PIXELS: Final[Optional[int]] = __get_optional_int(
"PAPERLESS_MAX_IMAGE_PIXELS",
)
# GNUPG needs a home directory for some reason # GNUPG needs a home directory for some reason
GNUPG_HOME = os.getenv("HOME", "/tmp") GNUPG_HOME = os.getenv("HOME", "/tmp")

View File

@ -12,6 +12,7 @@ from PIL import Image
from documents.parsers import DocumentParser from documents.parsers import DocumentParser
from documents.parsers import ParseError from documents.parsers import ParseError
from documents.parsers import make_thumbnail_from_pdf from documents.parsers import make_thumbnail_from_pdf
from documents.utils import maybe_override_pixel_limit
from paperless.config import OcrConfig from paperless.config import OcrConfig
from paperless.models import ArchiveFileChoices from paperless.models import ArchiveFileChoices
from paperless.models import CleanChoices from paperless.models import CleanChoices
@ -255,6 +256,9 @@ class RasterisedDocumentParser(DocumentParser):
ocrmypdf_args["sidecar"] = sidecar_file ocrmypdf_args["sidecar"] = sidecar_file
if self.is_image(mime_type): if self.is_image(mime_type):
# This may be required, depending on the known imformation
maybe_override_pixel_limit()
dpi = self.get_dpi(input_file) dpi = self.get_dpi(input_file)
a4_dpi = self.calculate_a4_dpi(input_file) a4_dpi = self.calculate_a4_dpi(input_file)

View File

@ -246,7 +246,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
self.assertRaises(ParseError, f) self.assertRaises(ParseError, f)
@override_settings(OCR_IMAGE_DPI=72) @override_settings(OCR_IMAGE_DPI=72, MAX_IMAGE_PIXELS=0)
def test_image_no_dpi_default(self): def test_image_no_dpi_default(self):
parser = RasterisedDocumentParser(None) parser = RasterisedDocumentParser(None)