mirror of
				https://github.com/paperless-ngx/paperless-ngx.git
				synced 2025-10-30 03:56:23 -05:00 
			
		
		
		
	Feature: Allow a user to disable the pixel limit for OCR entirely (#5996)
This commit is contained in:
		| @@ -437,7 +437,7 @@ with Prometheus, as it exports metrics. For details on its capabilities, | |||||||
| refer to the [Flower](https://flower.readthedocs.io/en/latest/index.html) | refer to the [Flower](https://flower.readthedocs.io/en/latest/index.html) | ||||||
| documentation. | documentation. | ||||||
|  |  | ||||||
| Flower can be enabled with the setting [PAPERLESS_ENABLE_FLOWER](configuration/#PAPERLESS_ENABLE_FLOWER). | Flower can be enabled with the setting [PAPERLESS_ENABLE_FLOWER](configuration.md#PAPERLESS_ENABLE_FLOWER). | ||||||
| To configure Flower further, create a `flowerconfig.py` and | To configure Flower further, create a `flowerconfig.py` and | ||||||
| place it into the `src/paperless` directory. For a Docker | place it into the `src/paperless` directory. For a Docker | ||||||
| installation, you can use volumes to accomplish this: | installation, you can use volumes to accomplish this: | ||||||
|   | |||||||
| @@ -766,6 +766,8 @@ but could result in missing text content. | |||||||
|     If unset, will default to the value determined by |     If unset, will default to the value determined by | ||||||
|     [Pillow](https://pillow.readthedocs.io/en/stable/reference/Image.html#PIL.Image.MAX_IMAGE_PIXELS). |     [Pillow](https://pillow.readthedocs.io/en/stable/reference/Image.html#PIL.Image.MAX_IMAGE_PIXELS). | ||||||
|  |  | ||||||
|  |     Setting this value to 0 will entirely disable the limit.  See the below warning. | ||||||
|  |  | ||||||
|     !!! note |     !!! note | ||||||
|  |  | ||||||
|         Increasing this limit could cause Paperless to consume additional |         Increasing this limit could cause Paperless to consume additional | ||||||
| @@ -775,7 +777,7 @@ but could result in missing text content. | |||||||
|     !!! warning |     !!! warning | ||||||
|  |  | ||||||
|         The limit is intended to prevent malicious files from consuming |         The limit is intended to prevent malicious files from consuming | ||||||
|         system resources and causing crashes and other errors. Only increase |         system resources and causing crashes and other errors. Only change | ||||||
|         this value if you are certain your documents are not malicious and |         this value if you are certain your documents are not malicious and | ||||||
|         you need the text which was not OCRed |         you need the text which was not OCRed | ||||||
|  |  | ||||||
|   | |||||||
| @@ -0,0 +1,24 @@ | |||||||
|  | # Generated by Django 4.2.10 on 2024-03-04 17:30 | ||||||
|  |  | ||||||
|  | import django.core.validators | ||||||
|  | from django.db import migrations | ||||||
|  | from django.db import models | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class Migration(migrations.Migration): | ||||||
|  |  | ||||||
|  |     dependencies = [ | ||||||
|  |         ("paperless", "0002_applicationconfiguration_app_logo_and_more"), | ||||||
|  |     ] | ||||||
|  |  | ||||||
|  |     operations = [ | ||||||
|  |         migrations.AlterField( | ||||||
|  |             model_name="applicationconfiguration", | ||||||
|  |             name="max_image_pixels", | ||||||
|  |             field=models.FloatField( | ||||||
|  |                 null=True, | ||||||
|  |                 validators=[django.core.validators.MinValueValidator(0.0)], | ||||||
|  |                 verbose_name="Sets the maximum image size for decompression", | ||||||
|  |             ), | ||||||
|  |         ), | ||||||
|  |     ] | ||||||
| @@ -151,7 +151,7 @@ class ApplicationConfiguration(AbstractSingletonModel): | |||||||
|     max_image_pixels = models.FloatField( |     max_image_pixels = models.FloatField( | ||||||
|         verbose_name=_("Sets the maximum image size for decompression"), |         verbose_name=_("Sets the maximum image size for decompression"), | ||||||
|         null=True, |         null=True, | ||||||
|         validators=[MinValueValidator(1_000_000.0)], |         validators=[MinValueValidator(0.0)], | ||||||
|     ) |     ) | ||||||
|  |  | ||||||
|     color_conversion_strategy = models.CharField( |     color_conversion_strategy = models.CharField( | ||||||
|   | |||||||
| @@ -293,20 +293,19 @@ class RasterisedDocumentParser(DocumentParser): | |||||||
|                     f"they will not be used. Error: {e}", |                     f"they will not be used. Error: {e}", | ||||||
|                 ) |                 ) | ||||||
|  |  | ||||||
|         if self.settings.max_image_pixel is not None: |         if ( | ||||||
|  |             self.settings.max_image_pixel is not None | ||||||
|  |             and self.settings.max_image_pixel >= 0 | ||||||
|  |         ): | ||||||
|             # Convert pixels to mega-pixels and provide to ocrmypdf |             # Convert pixels to mega-pixels and provide to ocrmypdf | ||||||
|             max_pixels_mpixels = self.settings.max_image_pixel / 1_000_000.0 |             max_pixels_mpixels = self.settings.max_image_pixel / 1_000_000.0 | ||||||
|             if max_pixels_mpixels > 0: |             msg = ( | ||||||
|                 self.log.debug( |                 "OCR pixel limit is disabled!" | ||||||
|                     f"Calculated {max_pixels_mpixels} megapixels for OCR", |                 if max_pixels_mpixels == 0 | ||||||
|                 ) |                 else f"Calculated {max_pixels_mpixels} megapixels for OCR" | ||||||
|  |             ) | ||||||
|                 ocrmypdf_args["max_image_mpixels"] = max_pixels_mpixels |             self.log.debug(msg) | ||||||
|             else: |             ocrmypdf_args["max_image_mpixels"] = max_pixels_mpixels | ||||||
|                 self.log.warning( |  | ||||||
|                     "There is an issue with PAPERLESS_OCR_MAX_IMAGE_PIXELS, " |  | ||||||
|                     "this value must be at least 1 megapixel if set", |  | ||||||
|                 ) |  | ||||||
|  |  | ||||||
|         return ocrmypdf_args |         return ocrmypdf_args | ||||||
|  |  | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Trenton H
					Trenton H