added a setting: delete duplicate documents

This commit is contained in:
Jonas Winkler 2020-11-10 01:47:58 +01:00
parent 54f04650d1
commit 83f82f3caf
3 changed files with 8 additions and 0 deletions

View File

@ -143,6 +143,10 @@ PAPERLESS_EMAIL_SECRET=""
#### Software Tweaks ####
###############################################################################
# When the consumer detects a duplicate document, it will not touch the
# original document. This default behavior can be changed here.
#PAPERLESS_CONSUMER_DELETE_DUPLICATES="false"
# After a document is consumed, Paperless can trigger an arbitrary script if
# you like. This script will be passed a number of arguments for you to work
# with. The default is blank, which means nothing will be executed. For more

View File

@ -84,6 +84,8 @@ class Consumer:
"warning",
"Skipping {} as it appears to be a duplicate".format(doc)
)
if settings.CONSUMER_DELETE_DUPLICATES:
self._cleanup_doc(doc)
return False
self.log("info", "Consuming {}".format(doc))

View File

@ -258,6 +258,8 @@ Q_CLUSTER = {
# Paperless Specific Settings #
###############################################################################
CONSUMER_DELETE_DUPLICATES = __get_boolean("PAPERLESS_CONSUMER_DELETE_DUPLICATES")
# The default language that tesseract will attempt to use when parsing
# documents. It should be a 3-letter language code consistent with ISO 639.
OCR_LANGUAGE = os.getenv("PAPERLESS_OCR_LANGUAGE", "eng")