This commit is contained in:
jonaswinkler 2021-03-22 22:46:35 +01:00
parent 27cb243a2f
commit d26c46e034

View File

@ -119,6 +119,8 @@ class RasterisedDocumentParser(DocumentParser):
try:
text = extract_text(pdf_file)
stripped = strip_excess_whitespace(text)
stripped = stripped.replace("\0", " ")
self.log("debug", f"Extracted text from PDF file {pdf_file}")
return stripped
except PDFException: