Feature: Allow a user to disable the pixel limit for OCR entirely (#5996)

2026-02-05 23:32:46 -06:00 · 2024-03-04 14:37:36 -08:00
parent 269ab4e987
commit 23398f5ed1
5 changed files with 40 additions and 15 deletions
--- a/src/paperless/migrations/0003_alter_applicationconfiguration_max_image_pixels.py
+++ b/src/paperless/migrations/0003_alter_applicationconfiguration_max_image_pixels.py
@@ -0,0 +1,24 @@
+# Generated by Django 4.2.10 on 2024-03-04 17:30
+
+import django.core.validators
+from django.db import migrations
+from django.db import models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ("paperless", "0002_applicationconfiguration_app_logo_and_more"),
+    ]
+
+    operations = [
+        migrations.AlterField(
+            model_name="applicationconfiguration",
+            name="max_image_pixels",
+            field=models.FloatField(
+                null=True,
+                validators=[django.core.validators.MinValueValidator(0.0)],
+                verbose_name="Sets the maximum image size for decompression",
+            ),
+        ),
+    ]
--- a/src/paperless/models.py
+++ b/src/paperless/models.py
@@ -151,7 +151,7 @@ class ApplicationConfiguration(AbstractSingletonModel):
    max_image_pixels = models.FloatField(
        verbose_name=_("Sets the maximum image size for decompression"),
        null=True,
-        validators=[MinValueValidator(1_000_000.0)],
+        validators=[MinValueValidator(0.0)],
    )

    color_conversion_strategy = models.CharField(
--- a/src/paperless_tesseract/parsers.py
+++ b/src/paperless_tesseract/parsers.py
@@ -293,20 +293,19 @@ class RasterisedDocumentParser(DocumentParser):
                    f"they will not be used. Error: {e}",
                )

-        if self.settings.max_image_pixel is not None:
+        if (
+            self.settings.max_image_pixel is not None
+            and self.settings.max_image_pixel >= 0
+        ):
            # Convert pixels to mega-pixels and provide to ocrmypdf
            max_pixels_mpixels = self.settings.max_image_pixel / 1_000_000.0
-            if max_pixels_mpixels > 0:
-                self.log.debug(
-                    f"Calculated {max_pixels_mpixels} megapixels for OCR",
-                )
-
-                ocrmypdf_args["max_image_mpixels"] = max_pixels_mpixels
-            else:
-                self.log.warning(
-                    "There is an issue with PAPERLESS_OCR_MAX_IMAGE_PIXELS, "
-                    "this value must be at least 1 megapixel if set",
-                )
+            msg = (
+                "OCR pixel limit is disabled!"
+                if max_pixels_mpixels == 0
+                else f"Calculated {max_pixels_mpixels} megapixels for OCR"
+            )
+            self.log.debug(msg)
+            ocrmypdf_args["max_image_mpixels"] = max_pixels_mpixels

        return ocrmypdf_args