Fix: Test metadata items for Unicode issues (#5707)

Test each key for unicode issues and reject ones which will fail inside DRF
This commit is contained in:
Trenton H 2024-02-09 12:08:23 -08:00 committed by GitHub
parent cd3b1a221e
commit 0b1523f4e5
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 18 additions and 3 deletions

View File

@ -69,6 +69,11 @@ class MailDocumentParser(DocumentParser):
for key, value in mail.headers.items(): for key, value in mail.headers.items():
value = ", ".join(i for i in value) value = ", ".join(i for i in value)
try:
value.encode("utf-8")
except UnicodeEncodeError as e: # pragma: no cover
self.log.debug(f"Skipping header {key}: {e}")
continue
result.append( result.append(
{ {

View File

@ -55,11 +55,21 @@ class RasterisedDocumentParser(DocumentParser):
value = str(value) value = str(value)
try: try:
m = namespace_pattern.match(key) m = namespace_pattern.match(key)
if m is None: # pragma: no cover
continue
namespace = m.group(1)
key_value = m.group(2)
try:
namespace.encode("utf-8")
key_value.encode("utf-8")
except UnicodeEncodeError as e: # pragma: no cover
self.log.debug(f"Skipping metadata key {key}: {e}")
continue
result.append( result.append(
{ {
"namespace": m.group(1), "namespace": namespace,
"prefix": meta.REVERSE_NS[m.group(1)], "prefix": meta.REVERSE_NS[namespace],
"key": m.group(2), "key": key_value,
"value": value, "value": value,
}, },
) )