Enhancement: auto-link duplicate document for failed tasks (#8415)

This commit is contained in:
shamoon 2024-12-02 19:11:40 -08:00 committed by GitHub
parent 0406fca59b
commit 70d9a6fd36
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 35 additions and 3 deletions

View File

@ -1605,13 +1605,20 @@ class TasksViewSerializer(OwnedObjectSerializer):
return "file"
related_document = serializers.SerializerMethodField()
related_doc_re = re.compile(r"New document id (\d+) created")
created_doc_re = re.compile(r"New document id (\d+) created")
duplicate_doc_re = re.compile(r"It is a duplicate of .* \(#(\d+)\)")
def get_related_document(self, obj):
result = None
if obj.status is not None and obj.status == states.SUCCESS:
re = None
match obj.status:
case states.SUCCESS:
re = self.created_doc_re
case states.FAILURE:
re = self.duplicate_doc_re
if re is not None:
try:
result = self.related_doc_re.search(obj.result).group(1)
result = re.search(obj.result).group(1)
except Exception:
pass

View File

@ -284,3 +284,28 @@ class TestTasks(DirectoriesMixin, APITestCase):
returned_data = response.data[0]
self.assertEqual(returned_data["task_file_name"], "anothertest.pdf")
def test_task_result_failed_duplicate_includes_related_doc(self):
"""
GIVEN:
- A celery task failed with a duplicate error
WHEN:
- API call is made to get tasks
THEN:
- The returned data includes a related document link
"""
PaperlessTask.objects.create(
task_id=str(uuid.uuid4()),
task_file_name="task_one.pdf",
status=celery.states.FAILURE,
result="Not consuming task_one.pdf: It is a duplicate of task_one_existing.pdf (#1234).",
)
response = self.client.get(self.ENDPOINT)
self.assertEqual(response.status_code, status.HTTP_200_OK)
self.assertEqual(len(response.data), 1)
returned_data = response.data[0]
self.assertEqual(returned_data["related_document"], "1234")