mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-04-02 13:45:10 -05:00
Merge pull request #1591 from paperless-ngx/fix/1583-tika-str
Fix: Parsing Tika documents fails with AttributeError
This commit is contained in:
commit
8ed401aec1
@ -1,4 +1,5 @@
|
|||||||
import os
|
import os
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
import dateutil.parser
|
import dateutil.parser
|
||||||
import requests
|
import requests
|
||||||
@ -28,6 +29,11 @@ class TikaDocumentParser(DocumentParser):
|
|||||||
|
|
||||||
def extract_metadata(self, document_path, mime_type):
|
def extract_metadata(self, document_path, mime_type):
|
||||||
tika_server = settings.TIKA_ENDPOINT
|
tika_server = settings.TIKA_ENDPOINT
|
||||||
|
|
||||||
|
# tika does not support a PathLike, only strings
|
||||||
|
# ensure this is a string
|
||||||
|
document_path = str(document_path)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
parsed = parser.from_file(document_path, tika_server)
|
parsed = parser.from_file(document_path, tika_server)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
@ -47,10 +53,14 @@ class TikaDocumentParser(DocumentParser):
|
|||||||
for key in parsed["metadata"]
|
for key in parsed["metadata"]
|
||||||
]
|
]
|
||||||
|
|
||||||
def parse(self, document_path, mime_type, file_name=None):
|
def parse(self, document_path: Path, mime_type, file_name=None):
|
||||||
self.log("info", f"Sending {document_path} to Tika server")
|
self.log("info", f"Sending {document_path} to Tika server")
|
||||||
tika_server = settings.TIKA_ENDPOINT
|
tika_server = settings.TIKA_ENDPOINT
|
||||||
|
|
||||||
|
# tika does not support a PathLike, only strings
|
||||||
|
# ensure this is a string
|
||||||
|
document_path = str(document_path)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
parsed = parser.from_file(document_path, tika_server)
|
parsed = parser.from_file(document_path, tika_server)
|
||||||
except Exception as err:
|
except Exception as err:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user