mirror of
				https://github.com/paperless-ngx/paperless-ngx.git
				synced 2025-10-30 03:56:23 -05:00 
			
		
		
		
	Feature: Allow a user to disable the pixel limit for OCR entirely (#5996)
This commit is contained in:
		| @@ -437,7 +437,7 @@ with Prometheus, as it exports metrics. For details on its capabilities, | ||||
| refer to the [Flower](https://flower.readthedocs.io/en/latest/index.html) | ||||
| documentation. | ||||
|  | ||||
| Flower can be enabled with the setting [PAPERLESS_ENABLE_FLOWER](configuration/#PAPERLESS_ENABLE_FLOWER). | ||||
| Flower can be enabled with the setting [PAPERLESS_ENABLE_FLOWER](configuration.md#PAPERLESS_ENABLE_FLOWER). | ||||
| To configure Flower further, create a `flowerconfig.py` and | ||||
| place it into the `src/paperless` directory. For a Docker | ||||
| installation, you can use volumes to accomplish this: | ||||
|   | ||||
| @@ -766,6 +766,8 @@ but could result in missing text content. | ||||
|     If unset, will default to the value determined by | ||||
|     [Pillow](https://pillow.readthedocs.io/en/stable/reference/Image.html#PIL.Image.MAX_IMAGE_PIXELS). | ||||
|  | ||||
|     Setting this value to 0 will entirely disable the limit.  See the below warning. | ||||
|  | ||||
|     !!! note | ||||
|  | ||||
|         Increasing this limit could cause Paperless to consume additional | ||||
| @@ -775,7 +777,7 @@ but could result in missing text content. | ||||
|     !!! warning | ||||
|  | ||||
|         The limit is intended to prevent malicious files from consuming | ||||
|         system resources and causing crashes and other errors. Only increase | ||||
|         system resources and causing crashes and other errors. Only change | ||||
|         this value if you are certain your documents are not malicious and | ||||
|         you need the text which was not OCRed | ||||
|  | ||||
|   | ||||
| @@ -0,0 +1,24 @@ | ||||
| # Generated by Django 4.2.10 on 2024-03-04 17:30 | ||||
|  | ||||
| import django.core.validators | ||||
| from django.db import migrations | ||||
| from django.db import models | ||||
|  | ||||
|  | ||||
| class Migration(migrations.Migration): | ||||
|  | ||||
|     dependencies = [ | ||||
|         ("paperless", "0002_applicationconfiguration_app_logo_and_more"), | ||||
|     ] | ||||
|  | ||||
|     operations = [ | ||||
|         migrations.AlterField( | ||||
|             model_name="applicationconfiguration", | ||||
|             name="max_image_pixels", | ||||
|             field=models.FloatField( | ||||
|                 null=True, | ||||
|                 validators=[django.core.validators.MinValueValidator(0.0)], | ||||
|                 verbose_name="Sets the maximum image size for decompression", | ||||
|             ), | ||||
|         ), | ||||
|     ] | ||||
| @@ -151,7 +151,7 @@ class ApplicationConfiguration(AbstractSingletonModel): | ||||
|     max_image_pixels = models.FloatField( | ||||
|         verbose_name=_("Sets the maximum image size for decompression"), | ||||
|         null=True, | ||||
|         validators=[MinValueValidator(1_000_000.0)], | ||||
|         validators=[MinValueValidator(0.0)], | ||||
|     ) | ||||
|  | ||||
|     color_conversion_strategy = models.CharField( | ||||
|   | ||||
| @@ -293,20 +293,19 @@ class RasterisedDocumentParser(DocumentParser): | ||||
|                     f"they will not be used. Error: {e}", | ||||
|                 ) | ||||
|  | ||||
|         if self.settings.max_image_pixel is not None: | ||||
|         if ( | ||||
|             self.settings.max_image_pixel is not None | ||||
|             and self.settings.max_image_pixel >= 0 | ||||
|         ): | ||||
|             # Convert pixels to mega-pixels and provide to ocrmypdf | ||||
|             max_pixels_mpixels = self.settings.max_image_pixel / 1_000_000.0 | ||||
|             if max_pixels_mpixels > 0: | ||||
|                 self.log.debug( | ||||
|                     f"Calculated {max_pixels_mpixels} megapixels for OCR", | ||||
|             msg = ( | ||||
|                 "OCR pixel limit is disabled!" | ||||
|                 if max_pixels_mpixels == 0 | ||||
|                 else f"Calculated {max_pixels_mpixels} megapixels for OCR" | ||||
|             ) | ||||
|  | ||||
|             self.log.debug(msg) | ||||
|             ocrmypdf_args["max_image_mpixels"] = max_pixels_mpixels | ||||
|             else: | ||||
|                 self.log.warning( | ||||
|                     "There is an issue with PAPERLESS_OCR_MAX_IMAGE_PIXELS, " | ||||
|                     "this value must be at least 1 megapixel if set", | ||||
|                 ) | ||||
|  | ||||
|         return ocrmypdf_args | ||||
|  | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Trenton H
					Trenton H