Make ignores configurable

Adds config file setting PAPERLESS_CONSUMER_IGNORE_PATTERNS.
This commit is contained in:
Daniel Albers 2021-08-08 21:29:36 +02:00
parent 566b3ed53d
commit 3ebe6d5aef
No known key found for this signature in database
GPG Key ID: A84EEFE67A2A182A
4 changed files with 29 additions and 10 deletions

View File

@ -57,6 +57,7 @@
#PAPERLESS_CONSUMER_POLLING=10 #PAPERLESS_CONSUMER_POLLING=10
#PAPERLESS_CONSUMER_DELETE_DUPLICATES=false #PAPERLESS_CONSUMER_DELETE_DUPLICATES=false
#PAPERLESS_CONSUMER_RECURSIVE=false #PAPERLESS_CONSUMER_RECURSIVE=false
#PAPERLESS_CONSUMER_IGNORE_PATTERNS=[".DS_STORE/*", "._*", ".stfolder/*"]
#PAPERLESS_CONSUMER_SUBDIRS_AS_TAGS=false #PAPERLESS_CONSUMER_SUBDIRS_AS_TAGS=false
#PAPERLESS_OPTIMIZE_THUMBNAILS=true #PAPERLESS_OPTIMIZE_THUMBNAILS=true
#PAPERLESS_POST_CONSUME_SCRIPT=/path/to/an/arbitrary/script.sh #PAPERLESS_POST_CONSUME_SCRIPT=/path/to/an/arbitrary/script.sh

View File

@ -1,6 +1,6 @@
import logging import logging
import os import os
from pathlib import Path from pathlib import Path, PurePath
from threading import Thread from threading import Thread
from time import sleep from time import sleep
@ -36,15 +36,11 @@ def _tags_from_path(filepath):
return tag_ids return tag_ids
def _is_ignored(filepath): def _is_ignored(filepath: str) -> bool:
# https://github.com/jonaswinkler/paperless-ng/discussions/1037 filepath_relative = PurePath(filepath).relative_to(
basename = os.path.basename(filepath) settings.CONSUMPTION_DIR)
if basename == ".DS_STORE": return any(
return True filepath_relative.match(p) for p in settings.CONSUMER_IGNORE_PATTERNS)
if basename.startswith("._"):
return True
return False
def _consume(filepath): def _consume(filepath):

View File

@ -222,6 +222,22 @@ class TestConsumer(DirectoriesMixin, ConsumerMixin, TransactionTestCase):
fnames = [os.path.basename(args[1]) for args, _ in self.task_mock.call_args_list] fnames = [os.path.basename(args[1]) for args, _ in self.task_mock.call_args_list]
self.assertCountEqual(fnames, ["my_file.pdf", "my_second_file.pdf"]) self.assertCountEqual(fnames, ["my_file.pdf", "my_second_file.pdf"])
def test_is_ignored(self):
test_paths = [
(os.path.join(self.dirs.consumption_dir, "foo.pdf"), False),
(os.path.join(self.dirs.consumption_dir, "foo","bar.pdf"), False),
(os.path.join(self.dirs.consumption_dir, ".DS_STORE", "foo.pdf"), True),
(os.path.join(self.dirs.consumption_dir, "foo", ".DS_STORE", "bar.pdf"), True),
(os.path.join(self.dirs.consumption_dir, ".stfolder", "foo.pdf"), True),
(os.path.join(self.dirs.consumption_dir, "._foo.pdf"), True),
(os.path.join(self.dirs.consumption_dir, "._foo", "bar.pdf"), False),
]
for file_path, expected_ignored in test_paths:
self.assertEqual(
expected_ignored,
document_consumer._is_ignored(file_path),
f'_is_ignored("{file_path}") != {expected_ignored}')
@override_settings(CONSUMER_POLLING=1, CONSUMER_POLLING_DELAY=1, CONSUMER_POLLING_RETRY_COUNT=20) @override_settings(CONSUMER_POLLING=1, CONSUMER_POLLING_DELAY=1, CONSUMER_POLLING_RETRY_COUNT=20)
class TestConsumerPolling(TestConsumer): class TestConsumerPolling(TestConsumer):

View File

@ -458,6 +458,12 @@ CONSUMER_DELETE_DUPLICATES = __get_boolean("PAPERLESS_CONSUMER_DELETE_DUPLICATES
CONSUMER_RECURSIVE = __get_boolean("PAPERLESS_CONSUMER_RECURSIVE") CONSUMER_RECURSIVE = __get_boolean("PAPERLESS_CONSUMER_RECURSIVE")
# Ignore glob patterns, relative to PAPERLESS_CONSUMPTION_DIR
CONSUMER_IGNORE_PATTERNS = list(
json.loads(
os.getenv("PAPERLESS_CONSUMER_IGNORE_PATTERNS",
'[".DS_STORE/*", "._*", ".stfolder/*"]')))
CONSUMER_SUBDIRS_AS_TAGS = __get_boolean("PAPERLESS_CONSUMER_SUBDIRS_AS_TAGS") CONSUMER_SUBDIRS_AS_TAGS = __get_boolean("PAPERLESS_CONSUMER_SUBDIRS_AS_TAGS")
OPTIMIZE_THUMBNAILS = __get_boolean("PAPERLESS_OPTIMIZE_THUMBNAILS", "true") OPTIMIZE_THUMBNAILS = __get_boolean("PAPERLESS_OPTIMIZE_THUMBNAILS", "true")