Merge branch 'dev' into feature-permissions

2026-01-28 22:59:03 -06:00 · 2023-01-01 17:51:41 -08:00
parent d71d388c08 cf82cb35c9
commit a08467342c
49 changed files with 2587 additions and 1172 deletions
--- a/src/documents/tests/test_api.py
+++ b/src/documents/tests/test_api.py
@@ -797,6 +797,8 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
    @mock.patch("documents.views.consume_file.delay")
    def test_upload(self, m):

+        m.return_value = celery.result.AsyncResult(id=str(uuid.uuid4()))
+
        with open(
            os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"),
            "rb",
@@ -820,6 +822,8 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
    @mock.patch("documents.views.consume_file.delay")
    def test_upload_empty_metadata(self, m):

+        m.return_value = celery.result.AsyncResult(id=str(uuid.uuid4()))
+
        with open(
            os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"),
            "rb",
@@ -843,6 +847,8 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
    @mock.patch("documents.views.consume_file.delay")
    def test_upload_invalid_form(self, m):

+        m.return_value = celery.result.AsyncResult(id=str(uuid.uuid4()))
+
        with open(
            os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"),
            "rb",
@@ -857,6 +863,8 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
    @mock.patch("documents.views.consume_file.delay")
    def test_upload_invalid_file(self, m):

+        m.return_value = celery.result.AsyncResult(id=str(uuid.uuid4()))
+
        with open(
            os.path.join(os.path.dirname(__file__), "samples", "simple.zip"),
            "rb",
@@ -870,6 +878,9 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):

    @mock.patch("documents.views.consume_file.delay")
    def test_upload_with_title(self, async_task):
+
+        async_task.return_value = celery.result.AsyncResult(id=str(uuid.uuid4()))
+
        with open(
            os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"),
            "rb",
@@ -888,6 +899,9 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):

    @mock.patch("documents.views.consume_file.delay")
    def test_upload_with_correspondent(self, async_task):
+
+        async_task.return_value = celery.result.AsyncResult(id=str(uuid.uuid4()))
+
        c = Correspondent.objects.create(name="test-corres")
        with open(
            os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"),
@@ -907,6 +921,9 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):

    @mock.patch("documents.views.consume_file.delay")
    def test_upload_with_invalid_correspondent(self, async_task):
+
+        async_task.return_value = celery.result.AsyncResult(id=str(uuid.uuid4()))
+
        with open(
            os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"),
            "rb",
@@ -921,6 +938,9 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):

    @mock.patch("documents.views.consume_file.delay")
    def test_upload_with_document_type(self, async_task):
+
+        async_task.return_value = celery.result.AsyncResult(id=str(uuid.uuid4()))
+
        dt = DocumentType.objects.create(name="invoice")
        with open(
            os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"),
@@ -940,6 +960,9 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):

    @mock.patch("documents.views.consume_file.delay")
    def test_upload_with_invalid_document_type(self, async_task):
+
+        async_task.return_value = celery.result.AsyncResult(id=str(uuid.uuid4()))
+
        with open(
            os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"),
            "rb",
@@ -954,6 +977,9 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):

    @mock.patch("documents.views.consume_file.delay")
    def test_upload_with_tags(self, async_task):
+
+        async_task.return_value = celery.result.AsyncResult(id=str(uuid.uuid4()))
+
        t1 = Tag.objects.create(name="tag1")
        t2 = Tag.objects.create(name="tag2")
        with open(
@@ -974,6 +1000,9 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):

    @mock.patch("documents.views.consume_file.delay")
    def test_upload_with_invalid_tags(self, async_task):
+
+        async_task.return_value = celery.result.AsyncResult(id=str(uuid.uuid4()))
+
        t1 = Tag.objects.create(name="tag1")
        t2 = Tag.objects.create(name="tag2")
        with open(
@@ -990,6 +1019,9 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):

    @mock.patch("documents.views.consume_file.delay")
    def test_upload_with_created(self, async_task):
+
+        async_task.return_value = celery.result.AsyncResult(id=str(uuid.uuid4()))
+
        created = datetime.datetime(
            2022,
            5,
@@ -3040,6 +3072,59 @@ class TestTasks(APITestCase):
        self.assertEqual(returned_task2["status"], celery.states.PENDING)
        self.assertEqual(returned_task2["task_file_name"], task2.task_file_name)

+    def test_get_single_task_status(self):
+        """
+        GIVEN
+            - Query parameter for a valid task ID
+        WHEN:
+            - API call is made to get task status
+        THEN:
+            - Single task data is returned
+        """
+
+        id1 = str(uuid.uuid4())
+        task1 = PaperlessTask.objects.create(
+            task_id=id1,
+            task_file_name="task_one.pdf",
+        )
+
+        _ = PaperlessTask.objects.create(
+            task_id=str(uuid.uuid4()),
+            task_file_name="task_two.pdf",
+        )
+
+        response = self.client.get(self.ENDPOINT + f"?task_id={id1}")
+
+        self.assertEqual(response.status_code, 200)
+        self.assertEqual(len(response.data), 1)
+        returned_task1 = response.data[0]
+
+        self.assertEqual(returned_task1["task_id"], task1.task_id)
+
+    def test_get_single_task_status_not_valid(self):
+        """
+        GIVEN
+            - Query parameter for a non-existent task ID
+        WHEN:
+            - API call is made to get task status
+        THEN:
+            - No task data is returned
+        """
+        task1 = PaperlessTask.objects.create(
+            task_id=str(uuid.uuid4()),
+            task_file_name="task_one.pdf",
+        )
+
+        _ = PaperlessTask.objects.create(
+            task_id=str(uuid.uuid4()),
+            task_file_name="task_two.pdf",
+        )
+
+        response = self.client.get(self.ENDPOINT + "?task_id=bad-task-id")
+
+        self.assertEqual(response.status_code, 200)
+        self.assertEqual(len(response.data), 0)
+
    def test_acknowledge_tasks(self):
        """
        GIVEN:
--- a/src/documents/views.py
+++ b/src/documents/views.py
@@ -660,7 +660,7 @@ class PostDocumentView(GenericAPIView):

        task_id = str(uuid.uuid4())

-        consume_file.delay(
+        async_task = consume_file.delay(
            temp_filename,
            override_filename=doc_name,
            override_title=title,
@@ -672,7 +672,7 @@ class PostDocumentView(GenericAPIView):
            override_owner_id=owner_id,
        )

-        return Response("OK")
+        return Response(async_task.id)


 class SelectionDataView(GenericAPIView):
@@ -929,13 +929,18 @@ class TasksViewSet(ReadOnlyModelViewSet):
    permission_classes = (IsAuthenticated,)
    serializer_class = TasksViewSerializer

-    queryset = (
-        PaperlessTask.objects.filter(
-            acknowledged=False,
+    def get_queryset(self):
+        queryset = (
+            PaperlessTask.objects.filter(
+                acknowledged=False,
+            )
+            .order_by("date_created")
+            .reverse()
        )
-        .order_by("date_created")
-        .reverse()
-    )
+        task_id = self.request.query_params.get("task_id")
+        if task_id is not None:
+            queryset = PaperlessTask.objects.filter(task_id=task_id)
+        return queryset


 class AcknowledgeTasksView(GenericAPIView):
--- a/src/locale/pt_PT/LC_MESSAGES/django.po
+++ b/src/locale/pt_PT/LC_MESSAGES/django.po
@@ -3,7 +3,7 @@ msgstr ""
 "Project-Id-Version: paperless-ngx\n"
 "Report-Msgid-Bugs-To: \n"
 "POT-Creation-Date: 2022-11-09 21:50+0000\n"
-"PO-Revision-Date: 2022-11-09 23:11\n"
+"PO-Revision-Date: 2022-12-30 15:36\n"
 "Last-Translator: \n"
 "Language-Team: Portuguese\n"
 "Language: pt_PT\n"
@@ -100,7 +100,7 @@ msgstr "tipos de documento"

 #: documents/models.py:93
 msgid "path"
-msgstr ""
+msgstr "caminho"

 #: documents/models.py:99 documents/models.py:127
 msgid "storage path"
@@ -396,7 +396,7 @@ msgstr "regras de filtragem"

 #: documents/models.py:536
 msgid "Task ID"
-msgstr ""
+msgstr "ID da tarefa"

 #: documents/models.py:537
 msgid "Celery ID for the Task that was run"
@@ -412,7 +412,7 @@ msgstr ""

 #: documents/models.py:549 documents/models.py:556
 msgid "Task Name"
-msgstr ""
+msgstr "Nome da Tarefa"

 #: documents/models.py:550
 msgid "Name of the file which the Task was run for"
@@ -626,7 +626,7 @@ msgstr ""

 #: paperless/settings.py:395
 msgid "Serbian"
-msgstr ""
+msgstr "Sérvio"

 #: paperless/settings.py:396
 msgid "Swedish"
@@ -634,11 +634,11 @@ msgstr "Sueco"

 #: paperless/settings.py:397
 msgid "Turkish"
-msgstr ""
+msgstr "Turco"

 #: paperless/settings.py:398
 msgid "Chinese Simplified"
-msgstr ""
+msgstr "Chinês Simplificado"

 #: paperless/urls.py:161
 msgid "Paperless-ngx administration"
--- a/src/paperless/version.py
+++ b/src/paperless/version.py
@@ -1,7 +1,7 @@
 from typing import Final
 from typing import Tuple

-__version__: Final[Tuple[int, int, int]] = (1, 11, 0)
+__version__: Final[Tuple[int, int, int]] = (1, 11, 3)
 # Version string like X.Y.Z
 __full_version_str__: Final[str] = ".".join(map(str, __version__))
 # Version string like X.Y
--- a/src/paperless_mail/parsers.py
+++ b/src/paperless_mail/parsers.py
@@ -8,6 +8,8 @@ import requests
 from bleach import clean
 from bleach import linkify
 from django.conf import settings
+from django.utils.timezone import is_naive
+from django.utils.timezone import make_aware
 from documents.parsers import DocumentParser
 from documents.parsers import make_thumbnail_from_pdf
 from documents.parsers import ParseError
@@ -135,7 +137,11 @@ class MailDocumentParser(DocumentParser):

        self.text += f"\n\n{strip_text(mail.text)}"

-        self.date = mail.date
+        if is_naive(mail.date):
+            self.date = make_aware(mail.date)
+        else:
+            self.date = mail.date
+
        self.archive_path = self.generate_pdf(document_path)

    def tika_parse(self, html: str):
--- a/src/paperless_mail/serialisers.py
+++ b/src/paperless_mail/serialisers.py
@@ -86,6 +86,7 @@ class MailRuleSerializer(serializers.ModelSerializer):
            "assign_document_type",
            "order",
            "attachment_type",
+            "consumption_scope",
        ]

    def update(self, instance, validated_data):
--- a/src/paperless_tesseract/parsers.py
+++ b/src/paperless_tesseract/parsers.py
@@ -1,6 +1,8 @@
 import json
 import os
 import re
+from pathlib import Path
+from typing import Optional

 from django.conf import settings
 from documents.parsers import DocumentParser
@@ -99,7 +101,7 @@ class RasterisedDocumentParser(DocumentParser):
            self.log("warning", f"Error while calculating DPI for image {image}: {e}")
            return None

-    def extract_text(self, sidecar_file, pdf_file):
+    def extract_text(self, sidecar_file: Optional[Path], pdf_file: Path):
        # When re-doing OCR, the sidecar contains ONLY the new text, not
        # the whole text, so do not utilize it in that case
        if (
@@ -139,11 +141,15 @@ class RasterisedDocumentParser(DocumentParser):

            self.log("debug", f"Detected language {lang}")

-            if lang in {
-                "ar",  # Arabic
-                "he",  # Hebrew,
-                "fa",  # Persian
-            }:
+            if (
+                lang
+                in {
+                    "ar",  # Arabic
+                    "he",  # Hebrew,
+                    "fa",  # Persian
+                }
+                and pdf_file.name != "archive-fallback.pdf"
+            ):
                raise RtlLanguageException()
            return stripped
        except RtlLanguageException:
@@ -275,7 +281,7 @@ class RasterisedDocumentParser(DocumentParser):

        return ocrmypdf_args

-    def parse(self, document_path, mime_type, file_name=None):
+    def parse(self, document_path: Path, mime_type, file_name=None):
        # This forces tesseract to use one core per page.
        os.environ["OMP_THREAD_LIMIT"] = "1"

@@ -300,8 +306,8 @@ class RasterisedDocumentParser(DocumentParser):
        import ocrmypdf
        from ocrmypdf import InputFileError, EncryptedPdfError

-        archive_path = os.path.join(self.tempdir, "archive.pdf")
-        sidecar_file = os.path.join(self.tempdir, "sidecar.txt")
+        archive_path = Path(os.path.join(self.tempdir, "archive.pdf"))
+        sidecar_file = Path(os.path.join(self.tempdir, "sidecar.txt"))

        args = self.construct_ocrmypdf_parameters(
            document_path,
@@ -335,8 +341,12 @@ class RasterisedDocumentParser(DocumentParser):
                f"Attempting force OCR to get the text.",
            )

-            archive_path_fallback = os.path.join(self.tempdir, "archive-fallback.pdf")
-            sidecar_file_fallback = os.path.join(self.tempdir, "sidecar-fallback.txt")
+            archive_path_fallback = Path(
+                os.path.join(self.tempdir, "archive-fallback.pdf"),
+            )
+            sidecar_file_fallback = Path(
+                os.path.join(self.tempdir, "sidecar-fallback.txt"),
+            )

            # Attempt to run OCR with safe settings.