diff --git a/docs/advanced_usage.md b/docs/advanced_usage.md index d4ff80f87..863be639b 100644 --- a/docs/advanced_usage.md +++ b/docs/advanced_usage.md @@ -437,7 +437,7 @@ with Prometheus, as it exports metrics. For details on its capabilities, refer to the [Flower](https://flower.readthedocs.io/en/latest/index.html) documentation. -Flower can be enabled with the setting [PAPERLESS_ENABLE_FLOWER](configuration/#PAPERLESS_ENABLE_FLOWER). +Flower can be enabled with the setting [PAPERLESS_ENABLE_FLOWER](configuration.md#PAPERLESS_ENABLE_FLOWER). To configure Flower further, create a `flowerconfig.py` and place it into the `src/paperless` directory. For a Docker installation, you can use volumes to accomplish this: diff --git a/docs/configuration.md b/docs/configuration.md index 5fd14caf1..c7b710c66 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -766,6 +766,8 @@ but could result in missing text content. If unset, will default to the value determined by [Pillow](https://pillow.readthedocs.io/en/stable/reference/Image.html#PIL.Image.MAX_IMAGE_PIXELS). + Setting this value to 0 will entirely disable the limit. See the below warning. + !!! note Increasing this limit could cause Paperless to consume additional @@ -775,7 +777,7 @@ but could result in missing text content. !!! warning The limit is intended to prevent malicious files from consuming - system resources and causing crashes and other errors. Only increase + system resources and causing crashes and other errors. Only change this value if you are certain your documents are not malicious and you need the text which was not OCRed diff --git a/src/paperless/migrations/0003_alter_applicationconfiguration_max_image_pixels.py b/src/paperless/migrations/0003_alter_applicationconfiguration_max_image_pixels.py new file mode 100644 index 000000000..c27feefb3 --- /dev/null +++ b/src/paperless/migrations/0003_alter_applicationconfiguration_max_image_pixels.py @@ -0,0 +1,24 @@ +# Generated by Django 4.2.10 on 2024-03-04 17:30 + +import django.core.validators +from django.db import migrations +from django.db import models + + +class Migration(migrations.Migration): + + dependencies = [ + ("paperless", "0002_applicationconfiguration_app_logo_and_more"), + ] + + operations = [ + migrations.AlterField( + model_name="applicationconfiguration", + name="max_image_pixels", + field=models.FloatField( + null=True, + validators=[django.core.validators.MinValueValidator(0.0)], + verbose_name="Sets the maximum image size for decompression", + ), + ), + ] diff --git a/src/paperless/models.py b/src/paperless/models.py index 72805dc56..1f6cfbced 100644 --- a/src/paperless/models.py +++ b/src/paperless/models.py @@ -151,7 +151,7 @@ class ApplicationConfiguration(AbstractSingletonModel): max_image_pixels = models.FloatField( verbose_name=_("Sets the maximum image size for decompression"), null=True, - validators=[MinValueValidator(1_000_000.0)], + validators=[MinValueValidator(0.0)], ) color_conversion_strategy = models.CharField( diff --git a/src/paperless_tesseract/parsers.py b/src/paperless_tesseract/parsers.py index 09086585e..020922703 100644 --- a/src/paperless_tesseract/parsers.py +++ b/src/paperless_tesseract/parsers.py @@ -293,20 +293,19 @@ class RasterisedDocumentParser(DocumentParser): f"they will not be used. Error: {e}", ) - if self.settings.max_image_pixel is not None: + if ( + self.settings.max_image_pixel is not None + and self.settings.max_image_pixel >= 0 + ): # Convert pixels to mega-pixels and provide to ocrmypdf max_pixels_mpixels = self.settings.max_image_pixel / 1_000_000.0 - if max_pixels_mpixels > 0: - self.log.debug( - f"Calculated {max_pixels_mpixels} megapixels for OCR", - ) - - ocrmypdf_args["max_image_mpixels"] = max_pixels_mpixels - else: - self.log.warning( - "There is an issue with PAPERLESS_OCR_MAX_IMAGE_PIXELS, " - "this value must be at least 1 megapixel if set", - ) + msg = ( + "OCR pixel limit is disabled!" + if max_pixels_mpixels == 0 + else f"Calculated {max_pixels_mpixels} megapixels for OCR" + ) + self.log.debug(msg) + ocrmypdf_args["max_image_mpixels"] = max_pixels_mpixels return ocrmypdf_args