Treat CONSUMER_DELETE_DUPLICATES as a hard no

This commit is contained in:
shamoon
2026-01-18 12:27:31 -08:00
parent b5413525c4
commit ef661ae101
3 changed files with 51 additions and 7 deletions

View File

@@ -1146,8 +1146,9 @@ via the consumption directory, you can disable the consumer to save resources.
#### [`PAPERLESS_CONSUMER_DELETE_DUPLICATES=<bool>`](#PAPERLESS_CONSUMER_DELETE_DUPLICATES) {#PAPERLESS_CONSUMER_DELETE_DUPLICATES}
: When the consumer detects a duplicate document, it will not touch
the original document. This default behavior can be changed here.
: As of version 3.0 Paperless-ngx allows duplicate documents to be consumed by default, _except_ when
this setting is enabled. When enabled, Paperless will check if a document with the same hash already
exists in the system and delete the duplicate file from the consumption directory without consuming it.
Defaults to false.

View File

@@ -785,16 +785,53 @@ class ConsumerPreflightPlugin(
Q(checksum=checksum) | Q(archive_checksum=checksum),
)
if existing_doc.exists():
existing_doc = existing_doc.order_by("-created")
duplicates_in_trash = existing_doc.filter(deleted_at__isnull=False)
log_msg = (
f"Consuming duplicate {self.filename}: "
f"{existing_doc.count()} existing document(s) share the same content."
)
if existing_doc.filter(deleted_at__isnull=False).exists():
if duplicates_in_trash.exists():
log_msg += " Note: at least one existing document is in the trash."
self.log.warning(log_msg)
if settings.CONSUMER_DELETE_DUPLICATES:
duplicate = existing_doc.first()
duplicate_label = (
duplicate.title
or duplicate.original_filename
or (Path(duplicate.filename).name if duplicate.filename else None)
or str(duplicate.pk)
)
try:
Path(self.input_doc.original_file).unlink()
except FileNotFoundError:
pass
except Exception as exc: # pragma: no cover
self.log.warning(
f"Could not delete duplicate file {self.input_doc.original_file}: {exc}",
)
failure_msg = (
f"Not consuming {self.filename}: "
f"It is a duplicate of {duplicate_label} (#{duplicate.pk})"
)
status_msg = ConsumerStatusShortMessage.DOCUMENT_ALREADY_EXISTS
if duplicates_in_trash.exists():
status_msg = (
ConsumerStatusShortMessage.DOCUMENT_ALREADY_EXISTS_IN_TRASH
)
failure_msg += " Note: existing document is in the trash."
self._fail(
status_msg,
failure_msg,
)
def pre_check_directories(self):
"""
Ensure all required directories exist before attempting to use them

View File

@@ -719,12 +719,18 @@ class TestConsumer(
dst = self.get_test_file()
self.assertIsFile(dst)
with self.get_consumer(dst) as consumer:
consumer.run()
expected_message = (
f"{dst.name}: Not consuming {dst.name}: "
f"It is a duplicate of {document.title} (#{document.pk})"
)
with self.assertRaisesMessage(ConsumerError, expected_message):
with self.get_consumer(dst) as consumer:
consumer.run()
self.assertIsNotFile(dst)
self.assertEqual(Document.objects.count(), 2)
self._assert_first_last_send_progress()
self.assertEqual(Document.objects.count(), 1)
self._assert_first_last_send_progress(last_status=ProgressStatusOptions.FAILED)
@override_settings(CONSUMER_DELETE_DUPLICATES=False)
def test_no_delete_duplicate(self):