From d4cb84ff76423ed0b9efb0aa465830a94b552115 Mon Sep 17 00:00:00 2001 From: Trenton Holmes Date: Wed, 14 Sep 2022 07:48:12 -0700 Subject: [PATCH] Ensure the tika parse function gets a string, not a PathLike --- src/paperless_tika/parsers.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/src/paperless_tika/parsers.py b/src/paperless_tika/parsers.py index e706e3aa5..1cfb1eecb 100644 --- a/src/paperless_tika/parsers.py +++ b/src/paperless_tika/parsers.py @@ -1,4 +1,5 @@ import os +from pathlib import Path import dateutil.parser import requests @@ -28,6 +29,11 @@ class TikaDocumentParser(DocumentParser): def extract_metadata(self, document_path, mime_type): tika_server = settings.TIKA_ENDPOINT + + # tika does not support a PathLike, only strings + # ensure this is a string + document_path = str(document_path) + try: parsed = parser.from_file(document_path, tika_server) except Exception as e: @@ -47,10 +53,14 @@ class TikaDocumentParser(DocumentParser): for key in parsed["metadata"] ] - def parse(self, document_path, mime_type, file_name=None): + def parse(self, document_path: Path, mime_type, file_name=None): self.log("info", f"Sending {document_path} to Tika server") tika_server = settings.TIKA_ENDPOINT + # tika does not support a PathLike, only strings + # ensure this is a string + document_path = str(document_path) + try: parsed = parser.from_file(document_path, tika_server) except Exception as err: