mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-04-02 13:45:10 -05:00
Fix: Test metadata items for Unicode issues (#5707)
Test each key for unicode issues and reject ones which will fail inside DRF
This commit is contained in:
parent
cd3b1a221e
commit
0b1523f4e5
@ -69,6 +69,11 @@ class MailDocumentParser(DocumentParser):
|
|||||||
|
|
||||||
for key, value in mail.headers.items():
|
for key, value in mail.headers.items():
|
||||||
value = ", ".join(i for i in value)
|
value = ", ".join(i for i in value)
|
||||||
|
try:
|
||||||
|
value.encode("utf-8")
|
||||||
|
except UnicodeEncodeError as e: # pragma: no cover
|
||||||
|
self.log.debug(f"Skipping header {key}: {e}")
|
||||||
|
continue
|
||||||
|
|
||||||
result.append(
|
result.append(
|
||||||
{
|
{
|
||||||
|
@ -55,11 +55,21 @@ class RasterisedDocumentParser(DocumentParser):
|
|||||||
value = str(value)
|
value = str(value)
|
||||||
try:
|
try:
|
||||||
m = namespace_pattern.match(key)
|
m = namespace_pattern.match(key)
|
||||||
|
if m is None: # pragma: no cover
|
||||||
|
continue
|
||||||
|
namespace = m.group(1)
|
||||||
|
key_value = m.group(2)
|
||||||
|
try:
|
||||||
|
namespace.encode("utf-8")
|
||||||
|
key_value.encode("utf-8")
|
||||||
|
except UnicodeEncodeError as e: # pragma: no cover
|
||||||
|
self.log.debug(f"Skipping metadata key {key}: {e}")
|
||||||
|
continue
|
||||||
result.append(
|
result.append(
|
||||||
{
|
{
|
||||||
"namespace": m.group(1),
|
"namespace": namespace,
|
||||||
"prefix": meta.REVERSE_NS[m.group(1)],
|
"prefix": meta.REVERSE_NS[namespace],
|
||||||
"key": m.group(2),
|
"key": key_value,
|
||||||
"value": value,
|
"value": value,
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user