Add an optional flag to the PostDocument endpoint to skip auto tagging

2025-12-18 01:41:14 -06:00 · 2025-02-13 20:34:16 +01:00
parent a9ef7ff58e
commit 9f19ac12dd
7 changed files with 50 additions and 0 deletions
--- a/docs/api.md
+++ b/docs/api.md
@@ -192,6 +192,8 @@ The endpoint supports the following optional form fields:
 -   `tags`: Similar to correspondent. Specify this multiple times to
    have multiple tags added to the document.
 -   `archive_serial_number`: An optional archive serial number to set.
 -   `skip_auto_tags`: Boolean to indicate that the classifier should not
    attempt to determine and add tags to the document.
 -   `custom_fields`: An array of custom field ids to assign (with an empty
    value) to the document.
--- a/src/documents/consumer.py
+++ b/src/documents/consumer.py
@@ -577,6 +577,7 @@ class ConsumerPlugin(
                    original_file=self.unmodified_original
                    if self.unmodified_original
                    else self.working_copy,
                    skip_auto_tagging=self.metadata.skip_auto_tagging,
                )
                # After everything is in the database, copy the files into
--- a/src/documents/data_models.py
+++ b/src/documents/data_models.py
@@ -30,6 +30,7 @@ class DocumentMetadataOverrides:
    change_users: list[int] | None = None
    change_groups: list[int] | None = None
    custom_field_ids: list[int] | None = None
    skip_auto_tagging: bool | None = None
    def update(self, other: "DocumentMetadataOverrides") -> "DocumentMetadataOverrides":
        """
@@ -49,6 +50,8 @@ class DocumentMetadataOverrides:
            self.storage_path_id = other.storage_path_id
        if other.owner_id is not None:
            self.owner_id = other.owner_id
        if other.skip_auto_tagging is not None:
            self.skip_auto_tagging = other.skip_auto_tagging
        # merge
        if self.tag_ids is None:
--- a/src/documents/serialisers.py
+++ b/src/documents/serialisers.py
@@ -1536,6 +1536,13 @@ class PostDocumentSerializer(serializers.Serializer):
        required=False,
    )
    skip_auto_tagging = serializers.BooleanField(
        label="Skip auto tagging",
        default=False,
        write_only=True,
        required=False,
    )
    def validate_document(self, document):
        document_data = document.file.read()
        mime_type = magic.from_buffer(document_data, mime=True)
--- a/src/documents/signals/handlers.py
+++ b/src/documents/signals/handlers.py
@@ -206,8 +206,12 @@ def set_tags(
    base_url=None,
    stdout=None,
    style_func=None,
    skip_auto_tagging=False,
    **kwargs,
 ):
    if skip_auto_tagging:
        return
    if replace:
        Document.tags.through.objects.filter(document=document).exclude(
            Q(tag__is_inbox_tag=True),
--- a/src/documents/tests/test_consumer.py
+++ b/src/documents/tests/test_consumer.py
@@ -854,6 +854,37 @@ class TestConsumer(
        self._assert_first_last_send_progress()
    @mock.patch("documents.consumer.load_classifier")
    def testClassifyDocumentWithSkippedTags(self, m):
        correspondent = Correspondent.objects.create(
            name="test",
            matching_algorithm=Correspondent.MATCH_AUTO,
        )
        dtype = DocumentType.objects.create(
            name="test",
            matching_algorithm=DocumentType.MATCH_AUTO,
        )
        t1 = Tag.objects.create(name="t1", matching_algorithm=Tag.MATCH_AUTO)
        t2 = Tag.objects.create(name="t2", matching_algorithm=Tag.MATCH_AUTO)
        m.return_value = MagicMock()
        m.return_value.predict_correspondent.return_value = correspondent.pk
        m.return_value.predict_document_type.return_value = dtype.pk
        m.return_value.predict_tags.return_value = [t2.pk]
        overrides = DocumentMetadataOverrides(tag_ids=[t1.pk], skip_auto_tagging=True)
        with self.get_consumer(self.get_test_file(), overrides) as consumer:
            consumer.run()
            document = Document.objects.first()
        self.assertEqual(document.correspondent, correspondent)
        self.assertEqual(document.document_type, dtype)
        self.assertIn(t1, document.tags.all())
        self.assertNotIn(t2, document.tags.all())
        self._assert_first_last_send_progress()
    @override_settings(CONSUMER_DELETE_DUPLICATES=True)
    def test_delete_duplicate(self):
        dst = self.get_test_file()
--- a/src/documents/views.py
+++ b/src/documents/views.py
@@ -1385,6 +1385,7 @@ class PostDocumentView(GenericAPIView):
        created = serializer.validated_data.get("created")
        archive_serial_number = serializer.validated_data.get("archive_serial_number")
        custom_field_ids = serializer.validated_data.get("custom_fields")
        skip_auto_tagging = serializer.validated_data.get("skip_auto_tagging")
        t = int(mktime(datetime.now().timetuple()))
@@ -1413,6 +1414,7 @@ class PostDocumentView(GenericAPIView):
            asn=archive_serial_number,
            owner_id=request.user.id,
            custom_field_ids=custom_field_ids,
            skip_auto_tagging=skip_auto_tagging,
        )
        async_task = consume_file.delay(