implement PAPERLESS_OCR_MAX_IMAGE_PIXELS

This commit is contained in:
Henning Häcker 2022-03-19 01:03:45 +01:00 committed by Johann Bauer
parent 9a758fc3dc
commit a8887b211e
3 changed files with 11 additions and 0 deletions

View File

@ -389,6 +389,15 @@ PAPERLESS_OCR_IMAGE_DPI=<num>
Default is none, which will automatically calculate image DPI so that
the produced PDF documents are A4 sized.
PAPERLESS_OCR_MAX_IMAGE_PIXELS=<num>
Paperless will not OCR images that have more pixels than this limit.
This is intended to prevent decompression bombs from overloading paperless.
Increasing this limit is desired if you face a DecompressionBombError despite
the concerning file not being malicious; this could e.g. be caused by invalidly
recognized metadata.
If you have enough resources or if you are certain that your uploaded files
are not malicious you can increase this value to your needs.
The default value is 256000000.
PAPERLESS_OCR_USER_ARGS=<json>
OCRmyPDF offers many more options. Use this parameter to specify any

View File

@ -8,6 +8,7 @@ from documents.parsers import make_thumbnail_from_pdf
from documents.parsers import ParseError
from PIL import Image
Image.MAX_IMAGE_PIXELS = os.environ.get('PAPERLESS_OCR_MAX_IMAGE_PIXELS', Image.MAX_IMAGE_PIXELS)
class NoTextFoundException(Exception):
pass

View File

@ -6,6 +6,7 @@ from PIL import Image
from PIL import ImageDraw
from PIL import ImageFont
Image.MAX_IMAGE_PIXELS = os.environ.get('PAPERLESS_OCR_MAX_IMAGE_PIXELS', Image.MAX_IMAGE_PIXELS)
class TextDocumentParser(DocumentParser):
"""