Changes the error mode to replace instead of ignore, to better highlight where a problem happened

This commit is contained in:
Trenton Holmes 2023-05-13 07:47:21 -07:00 committed by Trenton H
parent 111960c530
commit 3205d52331
2 changed files with 2 additions and 2 deletions

View File

@ -329,7 +329,7 @@ class DocumentParser(LoggingMixin):
text = filepath.read_text(encoding="utf-8")
except UnicodeDecodeError as e:
self.log("warning", f"Unicode error during text reading, continuing: {e}")
text = filepath.read_bytes().decode("utf-8", errors="ignore")
text = filepath.read_bytes().decode("utf-8", errors="replace")
return text
def extract_metadata(self, document_path, mime_type):

View File

@ -48,5 +48,5 @@ class TestTextParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
"text/plain",
)
self.assertEqual(parser.get_text(), "Pantothensure\n")
self.assertEqual(parser.get_text(), "Pantothens<EFBFBD>ure\n")
self.assertIsNone(parser.get_archive_path())