From ce98019b49d96110c7b1da4595dfcfb0c21916f3 Mon Sep 17 00:00:00 2001 From: Matt Date: Thu, 1 Feb 2018 10:08:57 -0500 Subject: [PATCH] Fixing error sentinel for pdftotext when the PDF has no text (scanned images). It was causing a crash previously. --- src/paperless_tesseract/parsers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/paperless_tesseract/parsers.py b/src/paperless_tesseract/parsers.py index c90c9f020..43c898df5 100644 --- a/src/paperless_tesseract/parsers.py +++ b/src/paperless_tesseract/parsers.py @@ -235,6 +235,6 @@ def get_text_from_pdf(pdf_file): try: pdf = pdftotext.PDF(f) except pdftotext.Error: - return False + return "" return "\n".join(pdf)