diff --git a/paperless.conf.example b/paperless.conf.example index 48df40ab2..1c62256ab 100644 --- a/paperless.conf.example +++ b/paperless.conf.example @@ -143,6 +143,10 @@ PAPERLESS_EMAIL_SECRET="" #### Software Tweaks #### ############################################################################### +# When the consumer detects a duplicate document, it will not touch the +# original document. This default behavior can be changed here. +#PAPERLESS_CONSUMER_DELETE_DUPLICATES="false" + # After a document is consumed, Paperless can trigger an arbitrary script if # you like. This script will be passed a number of arguments for you to work # with. The default is blank, which means nothing will be executed. For more diff --git a/src/documents/consumer.py b/src/documents/consumer.py index f61d11136..75e6f6120 100755 --- a/src/documents/consumer.py +++ b/src/documents/consumer.py @@ -84,6 +84,8 @@ class Consumer: "warning", "Skipping {} as it appears to be a duplicate".format(doc) ) + if settings.CONSUMER_DELETE_DUPLICATES: + self._cleanup_doc(doc) return False self.log("info", "Consuming {}".format(doc)) diff --git a/src/paperless/settings.py b/src/paperless/settings.py index 2c96350dc..06dfdcd84 100644 --- a/src/paperless/settings.py +++ b/src/paperless/settings.py @@ -258,6 +258,8 @@ Q_CLUSTER = { # Paperless Specific Settings # ############################################################################### +CONSUMER_DELETE_DUPLICATES = __get_boolean("PAPERLESS_CONSUMER_DELETE_DUPLICATES") + # The default language that tesseract will attempt to use when parsing # documents. It should be a 3-letter language code consistent with ISO 639. OCR_LANGUAGE = os.getenv("PAPERLESS_OCR_LANGUAGE", "eng")