mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-04-02 13:45:10 -05:00
Feature: Allow a user to disable the pixel limit for OCR entirely (#5996)
This commit is contained in:
parent
6379e7b54f
commit
6779042242
@ -437,7 +437,7 @@ with Prometheus, as it exports metrics. For details on its capabilities,
|
|||||||
refer to the [Flower](https://flower.readthedocs.io/en/latest/index.html)
|
refer to the [Flower](https://flower.readthedocs.io/en/latest/index.html)
|
||||||
documentation.
|
documentation.
|
||||||
|
|
||||||
Flower can be enabled with the setting [PAPERLESS_ENABLE_FLOWER](configuration/#PAPERLESS_ENABLE_FLOWER).
|
Flower can be enabled with the setting [PAPERLESS_ENABLE_FLOWER](configuration.md#PAPERLESS_ENABLE_FLOWER).
|
||||||
To configure Flower further, create a `flowerconfig.py` and
|
To configure Flower further, create a `flowerconfig.py` and
|
||||||
place it into the `src/paperless` directory. For a Docker
|
place it into the `src/paperless` directory. For a Docker
|
||||||
installation, you can use volumes to accomplish this:
|
installation, you can use volumes to accomplish this:
|
||||||
|
@ -766,6 +766,8 @@ but could result in missing text content.
|
|||||||
If unset, will default to the value determined by
|
If unset, will default to the value determined by
|
||||||
[Pillow](https://pillow.readthedocs.io/en/stable/reference/Image.html#PIL.Image.MAX_IMAGE_PIXELS).
|
[Pillow](https://pillow.readthedocs.io/en/stable/reference/Image.html#PIL.Image.MAX_IMAGE_PIXELS).
|
||||||
|
|
||||||
|
Setting this value to 0 will entirely disable the limit. See the below warning.
|
||||||
|
|
||||||
!!! note
|
!!! note
|
||||||
|
|
||||||
Increasing this limit could cause Paperless to consume additional
|
Increasing this limit could cause Paperless to consume additional
|
||||||
@ -775,7 +777,7 @@ but could result in missing text content.
|
|||||||
!!! warning
|
!!! warning
|
||||||
|
|
||||||
The limit is intended to prevent malicious files from consuming
|
The limit is intended to prevent malicious files from consuming
|
||||||
system resources and causing crashes and other errors. Only increase
|
system resources and causing crashes and other errors. Only change
|
||||||
this value if you are certain your documents are not malicious and
|
this value if you are certain your documents are not malicious and
|
||||||
you need the text which was not OCRed
|
you need the text which was not OCRed
|
||||||
|
|
||||||
|
@ -0,0 +1,24 @@
|
|||||||
|
# Generated by Django 4.2.10 on 2024-03-04 17:30
|
||||||
|
|
||||||
|
import django.core.validators
|
||||||
|
from django.db import migrations
|
||||||
|
from django.db import models
|
||||||
|
|
||||||
|
|
||||||
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
|
dependencies = [
|
||||||
|
("paperless", "0002_applicationconfiguration_app_logo_and_more"),
|
||||||
|
]
|
||||||
|
|
||||||
|
operations = [
|
||||||
|
migrations.AlterField(
|
||||||
|
model_name="applicationconfiguration",
|
||||||
|
name="max_image_pixels",
|
||||||
|
field=models.FloatField(
|
||||||
|
null=True,
|
||||||
|
validators=[django.core.validators.MinValueValidator(0.0)],
|
||||||
|
verbose_name="Sets the maximum image size for decompression",
|
||||||
|
),
|
||||||
|
),
|
||||||
|
]
|
@ -151,7 +151,7 @@ class ApplicationConfiguration(AbstractSingletonModel):
|
|||||||
max_image_pixels = models.FloatField(
|
max_image_pixels = models.FloatField(
|
||||||
verbose_name=_("Sets the maximum image size for decompression"),
|
verbose_name=_("Sets the maximum image size for decompression"),
|
||||||
null=True,
|
null=True,
|
||||||
validators=[MinValueValidator(1_000_000.0)],
|
validators=[MinValueValidator(0.0)],
|
||||||
)
|
)
|
||||||
|
|
||||||
color_conversion_strategy = models.CharField(
|
color_conversion_strategy = models.CharField(
|
||||||
|
@ -293,20 +293,19 @@ class RasterisedDocumentParser(DocumentParser):
|
|||||||
f"they will not be used. Error: {e}",
|
f"they will not be used. Error: {e}",
|
||||||
)
|
)
|
||||||
|
|
||||||
if self.settings.max_image_pixel is not None:
|
if (
|
||||||
|
self.settings.max_image_pixel is not None
|
||||||
|
and self.settings.max_image_pixel >= 0
|
||||||
|
):
|
||||||
# Convert pixels to mega-pixels and provide to ocrmypdf
|
# Convert pixels to mega-pixels and provide to ocrmypdf
|
||||||
max_pixels_mpixels = self.settings.max_image_pixel / 1_000_000.0
|
max_pixels_mpixels = self.settings.max_image_pixel / 1_000_000.0
|
||||||
if max_pixels_mpixels > 0:
|
msg = (
|
||||||
self.log.debug(
|
"OCR pixel limit is disabled!"
|
||||||
f"Calculated {max_pixels_mpixels} megapixels for OCR",
|
if max_pixels_mpixels == 0
|
||||||
)
|
else f"Calculated {max_pixels_mpixels} megapixels for OCR"
|
||||||
|
)
|
||||||
ocrmypdf_args["max_image_mpixels"] = max_pixels_mpixels
|
self.log.debug(msg)
|
||||||
else:
|
ocrmypdf_args["max_image_mpixels"] = max_pixels_mpixels
|
||||||
self.log.warning(
|
|
||||||
"There is an issue with PAPERLESS_OCR_MAX_IMAGE_PIXELS, "
|
|
||||||
"this value must be at least 1 megapixel if set",
|
|
||||||
)
|
|
||||||
|
|
||||||
return ocrmypdf_args
|
return ocrmypdf_args
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user