added a setting: delete duplicate documents

This commit is contained in:
Jonas Winkler
2020-11-10 01:47:58 +01:00
parent 1ddbf416d4
commit 3048342de7
3 changed files with 8 additions and 0 deletions

View File

@@ -84,6 +84,8 @@ class Consumer:
"warning",
"Skipping {} as it appears to be a duplicate".format(doc)
)
if settings.CONSUMER_DELETE_DUPLICATES:
self._cleanup_doc(doc)
return False
self.log("info", "Consuming {}".format(doc))

View File

@@ -258,6 +258,8 @@ Q_CLUSTER = {
# Paperless Specific Settings #
###############################################################################
CONSUMER_DELETE_DUPLICATES = __get_boolean("PAPERLESS_CONSUMER_DELETE_DUPLICATES")
# The default language that tesseract will attempt to use when parsing
# documents. It should be a 3-letter language code consistent with ISO 639.
OCR_LANGUAGE = os.getenv("PAPERLESS_OCR_LANGUAGE", "eng")