mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-04-02 13:45:10 -05:00
Changes the error mode to replace instead of ignore, to better highlight where a problem happened
This commit is contained in:
parent
111960c530
commit
3205d52331
@ -329,7 +329,7 @@ class DocumentParser(LoggingMixin):
|
|||||||
text = filepath.read_text(encoding="utf-8")
|
text = filepath.read_text(encoding="utf-8")
|
||||||
except UnicodeDecodeError as e:
|
except UnicodeDecodeError as e:
|
||||||
self.log("warning", f"Unicode error during text reading, continuing: {e}")
|
self.log("warning", f"Unicode error during text reading, continuing: {e}")
|
||||||
text = filepath.read_bytes().decode("utf-8", errors="ignore")
|
text = filepath.read_bytes().decode("utf-8", errors="replace")
|
||||||
return text
|
return text
|
||||||
|
|
||||||
def extract_metadata(self, document_path, mime_type):
|
def extract_metadata(self, document_path, mime_type):
|
||||||
|
@ -48,5 +48,5 @@ class TestTextParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
|||||||
"text/plain",
|
"text/plain",
|
||||||
)
|
)
|
||||||
|
|
||||||
self.assertEqual(parser.get_text(), "Pantothensure\n")
|
self.assertEqual(parser.get_text(), "Pantothens<EFBFBD>ure\n")
|
||||||
self.assertIsNone(parser.get_archive_path())
|
self.assertIsNone(parser.get_archive_path())
|
||||||
|
Loading…
x
Reference in New Issue
Block a user