mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-07-28 18:24:38 -05:00
reorganised settings documentation and added OCR_USER_ARGS
This commit is contained in:
@@ -350,6 +350,8 @@ OCR_MODE = os.getenv("PAPERLESS_OCR_MODE", "skip")
|
||||
|
||||
OCR_IMAGE_DPI = os.getenv("PAPERLESS_OCR_IMAGE_DPI")
|
||||
|
||||
OCR_USER_ARGS = os.getenv("PAPERLESS_OCR_USER_ARGS", "{}")
|
||||
|
||||
# GNUPG needs a home directory for some reason
|
||||
GNUPG_HOME = os.getenv("HOME", "/tmp")
|
||||
|
||||
|
@@ -1,3 +1,4 @@
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import subprocess
|
||||
@@ -118,10 +119,22 @@ class RasterisedDocumentParser(DocumentParser):
|
||||
f"no DPI information is present in this image and "
|
||||
f"OCR_IMAGE_DPI is not set.")
|
||||
|
||||
if settings.OCR_USER_ARGS:
|
||||
try:
|
||||
user_args = json.loads(settings.OCR_USER_ARGS)
|
||||
ocr_args = {**ocr_args, **user_args}
|
||||
except Exception as e:
|
||||
self.log(
|
||||
"warning",
|
||||
f"There is an issue with PAPERLESS_OCR_USER_ARGS, so "
|
||||
f"they will not be used: {e}")
|
||||
|
||||
# This forces tesseract to use one core per page.
|
||||
os.environ['OMP_THREAD_LIMIT'] = "1"
|
||||
|
||||
try:
|
||||
self.log("debug",
|
||||
f"Calling OCRmyPDF with {str(ocr_args)}")
|
||||
ocrmypdf.ocr(**ocr_args)
|
||||
# success! announce results
|
||||
self.archive_path = archive_path
|
||||
|
Reference in New Issue
Block a user