From 70d9a6fd36a7d541b552fd4e2727f237bb00724c Mon Sep 17 00:00:00 2001
From: shamoon <4887959+shamoon@users.noreply.github.com>
Date: Mon, 2 Dec 2024 19:11:40 -0800
Subject: [PATCH] Enhancement: auto-link duplicate document for failed tasks
 (#8415)

---
 src/documents/serialisers.py          | 13 ++++++++++---
 src/documents/tests/test_api_tasks.py | 25 +++++++++++++++++++++++++
 2 files changed, 35 insertions(+), 3 deletions(-)

diff --git a/src/documents/serialisers.py b/src/documents/serialisers.py
index d431d0673..8b1605af9 100644
--- a/src/documents/serialisers.py
+++ b/src/documents/serialisers.py
@@ -1605,13 +1605,20 @@ class TasksViewSerializer(OwnedObjectSerializer):
         return "file"
 
     related_document = serializers.SerializerMethodField()
-    related_doc_re = re.compile(r"New document id (\d+) created")
+    created_doc_re = re.compile(r"New document id (\d+) created")
+    duplicate_doc_re = re.compile(r"It is a duplicate of .* \(#(\d+)\)")
 
     def get_related_document(self, obj):
         result = None
-        if obj.status is not None and obj.status == states.SUCCESS:
+        re = None
+        match obj.status:
+            case states.SUCCESS:
+                re = self.created_doc_re
+            case states.FAILURE:
+                re = self.duplicate_doc_re
+        if re is not None:
             try:
-                result = self.related_doc_re.search(obj.result).group(1)
+                result = re.search(obj.result).group(1)
             except Exception:
                 pass
 
diff --git a/src/documents/tests/test_api_tasks.py b/src/documents/tests/test_api_tasks.py
index dd5425278..922067930 100644
--- a/src/documents/tests/test_api_tasks.py
+++ b/src/documents/tests/test_api_tasks.py
@@ -284,3 +284,28 @@ class TestTasks(DirectoriesMixin, APITestCase):
         returned_data = response.data[0]
 
         self.assertEqual(returned_data["task_file_name"], "anothertest.pdf")
+
+    def test_task_result_failed_duplicate_includes_related_doc(self):
+        """
+        GIVEN:
+            - A celery task failed with a duplicate error
+        WHEN:
+            - API call is made to get tasks
+        THEN:
+            - The returned data includes a related document link
+        """
+        PaperlessTask.objects.create(
+            task_id=str(uuid.uuid4()),
+            task_file_name="task_one.pdf",
+            status=celery.states.FAILURE,
+            result="Not consuming task_one.pdf: It is a duplicate of task_one_existing.pdf (#1234).",
+        )
+
+        response = self.client.get(self.ENDPOINT)
+
+        self.assertEqual(response.status_code, status.HTTP_200_OK)
+        self.assertEqual(len(response.data), 1)
+
+        returned_data = response.data[0]
+
+        self.assertEqual(returned_data["related_document"], "1234")