Feature: Allow user to control PIL image pixel limit (#5997)

This commit is contained in:
Trenton H
2024-03-04 16:19:56 -08:00
committed by GitHub
parent 82cb4591ce
commit 122bd9fd5b
7 changed files with 47 additions and 1 deletions

View File

@@ -20,6 +20,7 @@ from documents.plugins.base import StopConsumeTaskError
from documents.plugins.helpers import ProgressStatusOptions
from documents.utils import copy_basic_file_stats
from documents.utils import copy_file_with_basic_stats
from documents.utils import maybe_override_pixel_limit
logger = logging.getLogger("paperless.barcodes")
@@ -81,6 +82,9 @@ class BarcodePlugin(ConsumeTaskPlugin):
self.barcodes: list[Barcode] = []
def run(self) -> Optional[str]:
# Some operations may use PIL, override pixel setting if needed
maybe_override_pixel_limit()
# Maybe do the conversion of TIFF to PDF
self.convert_from_tiff_to_pdf()

View File

@@ -6,6 +6,7 @@ from django.conf import settings
from PIL import Image
from documents.utils import copy_basic_file_stats
from documents.utils import maybe_override_pixel_limit
def convert_from_tiff_to_pdf(tiff_path: Path, target_directory: Path) -> Path:
@@ -17,6 +18,9 @@ def convert_from_tiff_to_pdf(tiff_path: Path, target_directory: Path) -> Path:
Returns the path of the PDF created.
"""
# override pixel setting if needed
maybe_override_pixel_limit()
with Image.open(tiff_path) as im:
has_alpha_layer = im.mode in ("RGBA", "LA")
if has_alpha_layer:

View File

@@ -1,8 +1,12 @@
import shutil
from os import utime
from pathlib import Path
from typing import Optional
from typing import Union
from django.conf import settings
from PIL import Image
def _coerce_to_path(
source: Union[Path, str],
@@ -40,3 +44,15 @@ def copy_file_with_basic_stats(
shutil.copy(source, dest)
copy_basic_file_stats(source, dest)
def maybe_override_pixel_limit() -> None:
"""
Maybe overrides the PIL limit on pixel count, if configured to allow it
"""
limit: Optional[Union[float, int]] = settings.MAX_IMAGE_PIXELS
if limit is not None and limit >= 0:
pixel_count = limit
if pixel_count == 0:
pixel_count = None
Image.MAX_IMAGE_PIXELS = pixel_count

View File

@@ -970,6 +970,10 @@ OCR_COLOR_CONVERSION_STRATEGY = os.getenv(
OCR_USER_ARGS = os.getenv("PAPERLESS_OCR_USER_ARGS")
MAX_IMAGE_PIXELS: Final[Optional[int]] = __get_optional_int(
"PAPERLESS_MAX_IMAGE_PIXELS",
)
# GNUPG needs a home directory for some reason
GNUPG_HOME = os.getenv("HOME", "/tmp")

View File

@@ -12,6 +12,7 @@ from PIL import Image
from documents.parsers import DocumentParser
from documents.parsers import ParseError
from documents.parsers import make_thumbnail_from_pdf
from documents.utils import maybe_override_pixel_limit
from paperless.config import OcrConfig
from paperless.models import ArchiveFileChoices
from paperless.models import CleanChoices
@@ -255,6 +256,9 @@ class RasterisedDocumentParser(DocumentParser):
ocrmypdf_args["sidecar"] = sidecar_file
if self.is_image(mime_type):
# This may be required, depending on the known imformation
maybe_override_pixel_limit()
dpi = self.get_dpi(input_file)
a4_dpi = self.calculate_a4_dpi(input_file)

View File

@@ -246,7 +246,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
self.assertRaises(ParseError, f)
@override_settings(OCR_IMAGE_DPI=72)
@override_settings(OCR_IMAGE_DPI=72, MAX_IMAGE_PIXELS=0)
def test_image_no_dpi_default(self):
parser = RasterisedDocumentParser(None)