diff --git a/src/paperless_tika/parsers.py b/src/paperless_tika/parsers.py index 13e937daa..cde6543be 100644 --- a/src/paperless_tika/parsers.py +++ b/src/paperless_tika/parsers.py @@ -68,7 +68,8 @@ class TikaDocumentParser(DocumentParser): url = gotenberg_server + "/convert/office" self.log("info", f"Converting {document_path} to PDF as {pdf_path}") - files = {"files": (file_name, open(document_path, "rb"))} + files = {"files": (file_name or os.path.basename(document_path), + open(document_path, "rb"))} headers = {} try: diff --git a/src/paperless_tika/tests/test_tika_parser.py b/src/paperless_tika/tests/test_tika_parser.py new file mode 100644 index 000000000..67563e5fe --- /dev/null +++ b/src/paperless_tika/tests/test_tika_parser.py @@ -0,0 +1,60 @@ +import datetime +import os +from pathlib import Path +from unittest import mock + +from django.test import TestCase +from requests import Response + +from paperless_tika.parsers import TikaDocumentParser + + +class TestTikaParser(TestCase): + + def setUp(self) -> None: + self.parser = TikaDocumentParser(logging_group=None) + + def tearDown(self) -> None: + self.parser.cleanup() + + @mock.patch("paperless_tika.parsers.parser.from_file") + @mock.patch("paperless_tika.parsers.requests.post") + def test_parse(self, post, from_file): + from_file.return_value = { + "content": "the content", + "metadata": { + "Creation-Date": "2020-11-21" + } + } + response = Response() + response._content = b"PDF document" + response.status_code = 200 + post.return_value = response + + file = os.path.join(self.parser.tempdir, "input.odt") + Path(file).touch() + self.parser.parse(file, "application/vnd.oasis.opendocument.text") + + self.assertEqual(self.parser.text, "the content") + self.assertIsNotNone(self.parser.archive_path) + with open(self.parser.archive_path, "rb") as f: + self.assertEqual(f.read(), b"PDF document") + + self.assertEqual(self.parser.date, datetime.datetime(2020, 11, 21)) + + @mock.patch("paperless_tika.parsers.parser.from_file") + def test_metadata(self, from_file): + from_file.return_value = { + "metadata": { + "Creation-Date": "2020-11-21", + "Some-key": "value" + } + } + + file = os.path.join(self.parser.tempdir, "input.odt") + Path(file).touch() + + metadata = self.parser.extract_metadata(file, "application/vnd.oasis.opendocument.text") + + self.assertTrue("Creation-Date" in [m['key'] for m in metadata]) + self.assertTrue("Some-key" in [m['key'] for m in metadata])