diff --git a/paperless.conf.example b/paperless.conf.example index b1b63879d..a62d92e05 100644 --- a/paperless.conf.example +++ b/paperless.conf.example @@ -57,6 +57,7 @@ #PAPERLESS_CONSUMER_POLLING=10 #PAPERLESS_CONSUMER_DELETE_DUPLICATES=false #PAPERLESS_CONSUMER_RECURSIVE=false +#PAPERLESS_CONSUMER_IGNORE_PATTERNS=[".DS_STORE/*", "._*", ".stfolder/*"] #PAPERLESS_CONSUMER_SUBDIRS_AS_TAGS=false #PAPERLESS_OPTIMIZE_THUMBNAILS=true #PAPERLESS_POST_CONSUME_SCRIPT=/path/to/an/arbitrary/script.sh diff --git a/src/documents/management/commands/document_consumer.py b/src/documents/management/commands/document_consumer.py index 9f0ce79c3..eb8c57c84 100644 --- a/src/documents/management/commands/document_consumer.py +++ b/src/documents/management/commands/document_consumer.py @@ -1,6 +1,6 @@ import logging import os -from pathlib import Path +from pathlib import Path, PurePath from threading import Thread from time import sleep @@ -36,15 +36,11 @@ def _tags_from_path(filepath): return tag_ids -def _is_ignored(filepath): - # https://github.com/jonaswinkler/paperless-ng/discussions/1037 - basename = os.path.basename(filepath) - if basename == ".DS_STORE": - return True - if basename.startswith("._"): - return True - - return False +def _is_ignored(filepath: str) -> bool: + filepath_relative = PurePath(filepath).relative_to( + settings.CONSUMPTION_DIR) + return any( + filepath_relative.match(p) for p in settings.CONSUMER_IGNORE_PATTERNS) def _consume(filepath): diff --git a/src/documents/tests/test_management_consumer.py b/src/documents/tests/test_management_consumer.py index ec5a8dc0b..377e8fc54 100644 --- a/src/documents/tests/test_management_consumer.py +++ b/src/documents/tests/test_management_consumer.py @@ -222,6 +222,22 @@ class TestConsumer(DirectoriesMixin, ConsumerMixin, TransactionTestCase): fnames = [os.path.basename(args[1]) for args, _ in self.task_mock.call_args_list] self.assertCountEqual(fnames, ["my_file.pdf", "my_second_file.pdf"]) + def test_is_ignored(self): + test_paths = [ + (os.path.join(self.dirs.consumption_dir, "foo.pdf"), False), + (os.path.join(self.dirs.consumption_dir, "foo","bar.pdf"), False), + (os.path.join(self.dirs.consumption_dir, ".DS_STORE", "foo.pdf"), True), + (os.path.join(self.dirs.consumption_dir, "foo", ".DS_STORE", "bar.pdf"), True), + (os.path.join(self.dirs.consumption_dir, ".stfolder", "foo.pdf"), True), + (os.path.join(self.dirs.consumption_dir, "._foo.pdf"), True), + (os.path.join(self.dirs.consumption_dir, "._foo", "bar.pdf"), False), + ] + for file_path, expected_ignored in test_paths: + self.assertEqual( + expected_ignored, + document_consumer._is_ignored(file_path), + f'_is_ignored("{file_path}") != {expected_ignored}') + @override_settings(CONSUMER_POLLING=1, CONSUMER_POLLING_DELAY=1, CONSUMER_POLLING_RETRY_COUNT=20) class TestConsumerPolling(TestConsumer): diff --git a/src/paperless/settings.py b/src/paperless/settings.py index 254c7a6ce..5f03a406e 100644 --- a/src/paperless/settings.py +++ b/src/paperless/settings.py @@ -458,6 +458,12 @@ CONSUMER_DELETE_DUPLICATES = __get_boolean("PAPERLESS_CONSUMER_DELETE_DUPLICATES CONSUMER_RECURSIVE = __get_boolean("PAPERLESS_CONSUMER_RECURSIVE") +# Ignore glob patterns, relative to PAPERLESS_CONSUMPTION_DIR +CONSUMER_IGNORE_PATTERNS = list( + json.loads( + os.getenv("PAPERLESS_CONSUMER_IGNORE_PATTERNS", + '[".DS_STORE/*", "._*", ".stfolder/*"]'))) + CONSUMER_SUBDIRS_AS_TAGS = __get_boolean("PAPERLESS_CONSUMER_SUBDIRS_AS_TAGS") OPTIMIZE_THUMBNAILS = __get_boolean("PAPERLESS_OPTIMIZE_THUMBNAILS", "true")