Runs the pre-commit hooks over all the Python files

2025-12-14 01:21:14 -06:00 · 2022-03-11 10:55:51 -08:00
parent d3e9799279
commit 1771d18a21
94 changed files with 1638 additions and 991 deletions
--- a/src/paperless_tika/parsers.py
+++ b/src/paperless_tika/parsers.py
@@ -1,10 +1,11 @@
 import os
-import requests
+
 import dateutil.parser
-
+import requests
 from django.conf import settings
-
-from documents.parsers import DocumentParser, ParseError, make_thumbnail_from_pdf
+from documents.parsers import DocumentParser
+from documents.parsers import make_thumbnail_from_pdf
+from documents.parsers import ParseError
 from tika import parser


@@ -20,7 +21,9 @@ class TikaDocumentParser(DocumentParser):
            self.archive_path = self.convert_to_pdf(document_path, file_name)

        return make_thumbnail_from_pdf(
-            self.archive_path, self.tempdir, self.logging_group
+            self.archive_path,
+            self.tempdir,
+            self.logging_group,
        )

    def extract_metadata(self, document_path, mime_type):
@@ -53,7 +56,7 @@ class TikaDocumentParser(DocumentParser):
        except Exception as err:
            raise ParseError(
                f"Could not parse {document_path} with tika server at "
-                f"{tika_server}: {err}"
+                f"{tika_server}: {err}",
            )

        self.text = parsed["content"].strip()
@@ -74,22 +77,23 @@ class TikaDocumentParser(DocumentParser):
        url = gotenberg_server + "/forms/libreoffice/convert"

        self.log("info", f"Converting {document_path} to PDF as {pdf_path}")
-        files = {
-            "files": (
-                file_name or os.path.basename(document_path),
-                open(document_path, "rb"),
-            )
-        }
-        headers = {}
+        with open(document_path, "rb") as document_handle:
+            files = {
+                "files": (
+                    file_name or os.path.basename(document_path),
+                    document_handle,
+                ),
+            }
+            headers = {}

-        try:
-            response = requests.post(url, files=files, headers=headers)
-            response.raise_for_status()  # ensure we notice bad responses
-        except Exception as err:
-            raise ParseError(f"Error while converting document to PDF: {err}")
+            try:
+                response = requests.post(url, files=files, headers=headers)
+                response.raise_for_status()  # ensure we notice bad responses
+            except Exception as err:
+                raise ParseError(f"Error while converting document to PDF: {err}")

-        file = open(pdf_path, "wb")
-        file.write(response.content)
-        file.close()
+        with open(pdf_path, "wb") as file:
+            file.write(response.content)
+            file.close()

        return pdf_path
--- a/src/paperless_tika/signals.py
+++ b/src/paperless_tika/signals.py
@@ -10,12 +10,12 @@ def tika_consumer_declaration(sender, **kwargs):
        "weight": 10,
        "mime_types": {
            "application/msword": ".doc",
-            "application/vnd.openxmlformats-officedocument.wordprocessingml.document": ".docx",  # NOQA: E501
+            "application/vnd.openxmlformats-officedocument.wordprocessingml.document": ".docx",  # noqa: E501
            "application/vnd.ms-excel": ".xls",
-            "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": ".xlsx",  # NOQA: E501
+            "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": ".xlsx",  # noqa: E501
            "application/vnd.ms-powerpoint": ".ppt",
-            "application/vnd.openxmlformats-officedocument.presentationml.presentation": ".pptx",  # NOQA: E501
-            "application/vnd.openxmlformats-officedocument.presentationml.slideshow": ".ppsx",  # NOQA: E501
+            "application/vnd.openxmlformats-officedocument.presentationml.presentation": ".pptx",  # noqa: E501
+            "application/vnd.openxmlformats-officedocument.presentationml.slideshow": ".ppsx",  # noqa: E501
            "application/vnd.oasis.opendocument.presentation": ".odp",
            "application/vnd.oasis.opendocument.spreadsheet": ".ods",
            "application/vnd.oasis.opendocument.text": ".odt",
--- a/src/paperless_tika/tests/test_tika_parser.py
+++ b/src/paperless_tika/tests/test_tika_parser.py
@@ -4,9 +4,8 @@ from pathlib import Path
 from unittest import mock

 from django.test import TestCase
-from requests import Response
-
 from paperless_tika.parsers import TikaDocumentParser
+from requests import Response


 class TestTikaParser(TestCase):
@@ -42,14 +41,15 @@ class TestTikaParser(TestCase):
    @mock.patch("paperless_tika.parsers.parser.from_file")
    def test_metadata(self, from_file):
        from_file.return_value = {
-            "metadata": {"Creation-Date": "2020-11-21", "Some-key": "value"}
+            "metadata": {"Creation-Date": "2020-11-21", "Some-key": "value"},
        }

        file = os.path.join(self.parser.tempdir, "input.odt")
        Path(file).touch()

        metadata = self.parser.extract_metadata(
-            file, "application/vnd.oasis.opendocument.text"
+            file,
+            "application/vnd.oasis.opendocument.text",
        )

        self.assertTrue("Creation-Date" in [m["key"] for m in metadata])