Feature: Allow a user to disable the pixel limit for OCR entirely (#5996)

This commit is contained in:
Trenton H
2024-03-04 14:37:36 -08:00
committed by GitHub
parent 269ab4e987
commit 23398f5ed1
5 changed files with 40 additions and 15 deletions

View File

@@ -0,0 +1,24 @@
# Generated by Django 4.2.10 on 2024-03-04 17:30
import django.core.validators
from django.db import migrations
from django.db import models
class Migration(migrations.Migration):
dependencies = [
("paperless", "0002_applicationconfiguration_app_logo_and_more"),
]
operations = [
migrations.AlterField(
model_name="applicationconfiguration",
name="max_image_pixels",
field=models.FloatField(
null=True,
validators=[django.core.validators.MinValueValidator(0.0)],
verbose_name="Sets the maximum image size for decompression",
),
),
]

View File

@@ -151,7 +151,7 @@ class ApplicationConfiguration(AbstractSingletonModel):
max_image_pixels = models.FloatField(
verbose_name=_("Sets the maximum image size for decompression"),
null=True,
validators=[MinValueValidator(1_000_000.0)],
validators=[MinValueValidator(0.0)],
)
color_conversion_strategy = models.CharField(

View File

@@ -293,20 +293,19 @@ class RasterisedDocumentParser(DocumentParser):
f"they will not be used. Error: {e}",
)
if self.settings.max_image_pixel is not None:
if (
self.settings.max_image_pixel is not None
and self.settings.max_image_pixel >= 0
):
# Convert pixels to mega-pixels and provide to ocrmypdf
max_pixels_mpixels = self.settings.max_image_pixel / 1_000_000.0
if max_pixels_mpixels > 0:
self.log.debug(
f"Calculated {max_pixels_mpixels} megapixels for OCR",
)
ocrmypdf_args["max_image_mpixels"] = max_pixels_mpixels
else:
self.log.warning(
"There is an issue with PAPERLESS_OCR_MAX_IMAGE_PIXELS, "
"this value must be at least 1 megapixel if set",
)
msg = (
"OCR pixel limit is disabled!"
if max_pixels_mpixels == 0
else f"Calculated {max_pixels_mpixels} megapixels for OCR"
)
self.log.debug(msg)
ocrmypdf_args["max_image_mpixels"] = max_pixels_mpixels
return ocrmypdf_args