Adding more test coverage, in particular around Tika and its parser

This commit is contained in:
Trenton H
2023-02-02 12:46:49 -08:00
parent 8154c7b53a
commit bdcba570cb
8 changed files with 164 additions and 30 deletions

View File

@@ -161,7 +161,7 @@ class RasterisedDocumentParser(DocumentParser):
except Exception:
# TODO catch all for various issues with PDFminer.six.
# If PDFminer fails, fall back to OCR.
# If pdftotext fails, fall back to OCR.
self.log(
"warning",
"Error while getting text from PDF document with " "pdfminer.six",