mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-04-02 13:45:10 -05:00
Workaround for all PDFminer.six issues.
This commit is contained in:
parent
48e8076e34
commit
814d90745b
@ -121,7 +121,12 @@ class RasterisedDocumentParser(DocumentParser):
|
|||||||
|
|
||||||
self.log("debug", f"Extracted text from PDF file {pdf_file}")
|
self.log("debug", f"Extracted text from PDF file {pdf_file}")
|
||||||
return stripped
|
return stripped
|
||||||
except PDFException:
|
except Exception:
|
||||||
|
# TODO catch all for various issues with PDFminer.six.
|
||||||
|
# If PDFminer fails, fall back to OCR.
|
||||||
|
self.log("warn",
|
||||||
|
"Error while getting text from PDF document with "
|
||||||
|
"pdfminer.six", exc_info=True)
|
||||||
# probably not a PDF file.
|
# probably not a PDF file.
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user