Fix: Test metadata items for Unicode issues (#5707)

Test each key for unicode issues and reject ones which will fail inside DRF
This commit is contained in:
Trenton H 2024-02-09 12:08:23 -08:00 committed by GitHub
parent cd3b1a221e
commit 0b1523f4e5
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 18 additions and 3 deletions

View File

@ -69,6 +69,11 @@ class MailDocumentParser(DocumentParser):
for key, value in mail.headers.items():
value = ", ".join(i for i in value)
try:
value.encode("utf-8")
except UnicodeEncodeError as e: # pragma: no cover
self.log.debug(f"Skipping header {key}: {e}")
continue
result.append(
{

View File

@ -55,11 +55,21 @@ class RasterisedDocumentParser(DocumentParser):
value = str(value)
try:
m = namespace_pattern.match(key)
if m is None: # pragma: no cover
continue
namespace = m.group(1)
key_value = m.group(2)
try:
namespace.encode("utf-8")
key_value.encode("utf-8")
except UnicodeEncodeError as e: # pragma: no cover
self.log.debug(f"Skipping metadata key {key}: {e}")
continue
result.append(
{
"namespace": m.group(1),
"prefix": meta.REVERSE_NS[m.group(1)],
"key": m.group(2),
"namespace": namespace,
"prefix": meta.REVERSE_NS[namespace],
"key": key_value,
"value": value,
},
)