Add support for using pre-existing text from PDFs

2026-02-01 23:19:00 -06:00 · 2018-01-30 20:13:35 +00:00
parent 7ad7323cc7
commit 269c32ce6a
7 changed files with 60 additions and 13 deletions
--- a/src/paperless/settings.py
+++ b/src/paperless/settings.py
@@ -210,6 +210,9 @@ OCR_LANGUAGE = os.getenv("PAPERLESS_OCR_LANGUAGE", "eng")
 # The amount of threads to use for OCR
 OCR_THREADS = os.getenv("PAPERLESS_OCR_THREADS")

+# OCR all documents?
+OCR_ALWAYS = bool(os.getenv("PAPERLESS_OCR_ALWAYS", "NO").lower() in ("yes", "y", "1", "t", "true"))
+
 # If this is true, any failed attempts to OCR a PDF will result in the PDF
 # being indexed anyway, with whatever we could get.  If it's False, the file
 # will simply be left in the CONSUMPTION_DIR.