mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2026-01-12 21:44:21 -06:00
Separates out the ignore file from the ignore folder and updates documentation
This commit is contained in:
@@ -1168,21 +1168,44 @@ don't exist yet.
|
|||||||
|
|
||||||
#### [`PAPERLESS_CONSUMER_IGNORE_PATTERNS=<json>`](#PAPERLESS_CONSUMER_IGNORE_PATTERNS) {#PAPERLESS_CONSUMER_IGNORE_PATTERNS}
|
#### [`PAPERLESS_CONSUMER_IGNORE_PATTERNS=<json>`](#PAPERLESS_CONSUMER_IGNORE_PATTERNS) {#PAPERLESS_CONSUMER_IGNORE_PATTERNS}
|
||||||
|
|
||||||
: By default, paperless ignores certain files and folders in the
|
: Additional regex patterns for files to ignore in the consumption directory. Patterns are matched against filenames only (not full paths)
|
||||||
consumption directory, such as system files created by the Mac OS
|
using Python's `re.match()`, which anchors at the start of the filename.
|
||||||
or hidden folders some tools use to store data.
|
|
||||||
|
|
||||||
This can be adjusted by configuring a custom json array with
|
See the [watchfiles documentation](https://watchfiles.helpmanual.io/api/filters/#watchfiles.BaseFilter.ignore_entity_patterns)
|
||||||
patterns to exclude.
|
|
||||||
|
|
||||||
For example, `.DS_STORE/*` will ignore any files found in a folder
|
This setting is for additional patterns beyond the built-in defaults. Common system files and directories are already ignored automatically.
|
||||||
named `.DS_STORE`, including `.DS_STORE/bar.pdf` and `foo/.DS_STORE/bar.pdf`
|
|
||||||
|
|
||||||
A pattern like `._*` will ignore anything starting with `._`, including:
|
Example custom patterns:
|
||||||
`._foo.pdf` and `._bar/foo.pdf`
|
|
||||||
|
|
||||||
Defaults to
|
```json
|
||||||
`[".DS_Store", ".DS_STORE", "._*", ".stfolder/*", ".stversions/*", ".localized/*", "desktop.ini", "@eaDir/*", "Thumbs.db"]`.
|
["^temp_", "\\.bak$", "^~"]
|
||||||
|
```
|
||||||
|
|
||||||
|
This would ignore:
|
||||||
|
|
||||||
|
- Files starting with `temp_` (e.g., `temp_scan.pdf`)
|
||||||
|
- Files ending with `.bak` (e.g., `document.pdf.bak`)
|
||||||
|
- Files starting with `~` (e.g., `~$document.docx`)
|
||||||
|
|
||||||
|
Defaults to `[]` (empty list, uses only built-in defaults).
|
||||||
|
|
||||||
|
The default ignores are `[.DS_Store, .DS_STORE, ._*, desktop.ini, Thumbs.db]` and cannot be overridden.
|
||||||
|
|
||||||
|
#### [`PAPERLESS_CONSUMER_IGNORE_DIRS=<json>`](#PAPERLESS_CONSUMER_IGNORE_DIRS) {#PAPERLESS_CONSUMER_IGNORE_DIRS}
|
||||||
|
|
||||||
|
: Additional directory names to ignore in the consumption directory. Directories matching these names (and all their contents) will be skipped.
|
||||||
|
|
||||||
|
This setting is for additional directories beyond the built-in defaults. Matching is done by directory name only, not full path.
|
||||||
|
|
||||||
|
Example:
|
||||||
|
|
||||||
|
```json
|
||||||
|
["temp", "incoming", ".hidden"]
|
||||||
|
```
|
||||||
|
|
||||||
|
Defaults to `[]` (empty list, uses only built-in defaults).
|
||||||
|
|
||||||
|
The default ignores are `[.DS_Store, .DS_STORE, ._*, desktop.ini, Thumbs.db]` and cannot be overridden.
|
||||||
|
|
||||||
#### [`PAPERLESS_CONSUMER_BARCODE_SCANNER=<string>`](#PAPERLESS_CONSUMER_BARCODE_SCANNER) {#PAPERLESS_CONSUMER_BARCODE_SCANNER}
|
#### [`PAPERLESS_CONSUMER_BARCODE_SCANNER=<string>`](#PAPERLESS_CONSUMER_BARCODE_SCANNER) {#PAPERLESS_CONSUMER_BARCODE_SCANNER}
|
||||||
|
|
||||||
@@ -1283,23 +1306,22 @@ within your documents.
|
|||||||
|
|
||||||
#### [`PAPERLESS_CONSUMER_POLLING_INTERVAL=<num>`](#PAPERLESS_CONSUMER_POLLING_INTERVAL) {#PAPERLESS_CONSUMER_POLLING_INTERVAL}
|
#### [`PAPERLESS_CONSUMER_POLLING_INTERVAL=<num>`](#PAPERLESS_CONSUMER_POLLING_INTERVAL) {#PAPERLESS_CONSUMER_POLLING_INTERVAL}
|
||||||
|
|
||||||
: If paperless won't find documents added to your consume folder, it
|
: Configures how the consumer detects new files in the consumption directory.
|
||||||
might not be able to automatically detect filesystem changes. In
|
|
||||||
that case, specify a polling interval in seconds here, which will
|
|
||||||
then cause paperless to periodically check your consumption
|
|
||||||
directory for changes. This will also disable listening for file
|
|
||||||
system changes with `inotify`.
|
|
||||||
|
|
||||||
Defaults to 0, which disables polling and uses filesystem
|
When set to `0` (default), paperless uses native filesystem notifications for efficient, immediate detection of new files.
|
||||||
notifications.
|
|
||||||
|
When set to a positive number, paperless polls the consumption directory at that interval in seconds. Use polling for network filesystems (NFS, SMB/CIFS) where native notifications may not work reliably.
|
||||||
|
|
||||||
|
Defaults to 0.
|
||||||
|
|
||||||
#### [`PAPERLESS_CONSUMER_STABILITY_DELAY=<num>`](#PAPERLESS_CONSUMER_STABILITY_DELAY) {#PAPERLESS_CONSUMER_STABILITY_DELAY}
|
#### [`PAPERLESS_CONSUMER_STABILITY_DELAY=<num>`](#PAPERLESS_CONSUMER_STABILITY_DELAY) {#PAPERLESS_CONSUMER_STABILITY_DELAY}
|
||||||
|
|
||||||
: Once a file has been detected in the consume folder, it must remain unchanged for this
|
: Sets the time in seconds that a file must remain unchanged (same size and modification time) before paperless will begin consuming it.
|
||||||
many seconds before consumption will start on it. If the file is modified, its size changes
|
|
||||||
or the watching detects any other change on it, the timer will restart.
|
|
||||||
|
|
||||||
Defaults to 5.
|
Increase this value if you experience issues with files being consumed before they are fully written, particularly on slower network storage or
|
||||||
|
with certain scanner quirks
|
||||||
|
|
||||||
|
Defaults to 5.0 seconds.
|
||||||
|
|
||||||
## Workflow webhooks
|
## Workflow webhooks
|
||||||
|
|
||||||
|
|||||||
@@ -9,7 +9,6 @@ native OS notifications and polling fallback.
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
import re
|
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from threading import Event
|
from threading import Event
|
||||||
@@ -57,7 +56,7 @@ class TrackedFile:
|
|||||||
self.last_mtime = stat.st_mtime
|
self.last_mtime = stat.st_mtime
|
||||||
self.last_size = stat.st_size
|
self.last_size = stat.st_size
|
||||||
return True
|
return True
|
||||||
except (FileNotFoundError, PermissionError):
|
except (FileNotFoundError, PermissionError, OSError):
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def is_unchanged(self) -> bool:
|
def is_unchanged(self) -> bool:
|
||||||
@@ -68,7 +67,7 @@ class TrackedFile:
|
|||||||
try:
|
try:
|
||||||
stat = self.path.stat()
|
stat = self.path.stat()
|
||||||
return stat.st_mtime == self.last_mtime and stat.st_size == self.last_size
|
return stat.st_mtime == self.last_mtime and stat.st_size == self.last_size
|
||||||
except (FileNotFoundError, PermissionError):
|
except (FileNotFoundError, PermissionError, OSError):
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
@@ -138,7 +137,7 @@ class FileStabilityTracker:
|
|||||||
to_remove: list[Path] = []
|
to_remove: list[Path] = []
|
||||||
to_yield: list[Path] = []
|
to_yield: list[Path] = []
|
||||||
|
|
||||||
for path, tracked in self._tracked.items():
|
for path, tracked in list(self._tracked.items()):
|
||||||
time_since_event = current_time - tracked.last_event_time
|
time_since_event = current_time - tracked.last_event_time
|
||||||
|
|
||||||
if time_since_event < self.stability_delay:
|
if time_since_event < self.stability_delay:
|
||||||
@@ -165,7 +164,7 @@ class FileStabilityTracker:
|
|||||||
# Not a regular file (directory, symlink, etc.)
|
# Not a regular file (directory, symlink, etc.)
|
||||||
to_remove.append(path)
|
to_remove.append(path)
|
||||||
logger.debug(f"Path is not a regular file: {path}")
|
logger.debug(f"Path is not a regular file: {path}")
|
||||||
except (PermissionError, FileNotFoundError) as e:
|
except (PermissionError, OSError) as e:
|
||||||
logger.warning(f"Cannot access {path}: {e}")
|
logger.warning(f"Cannot access {path}: {e}")
|
||||||
to_remove.append(path)
|
to_remove.append(path)
|
||||||
|
|
||||||
@@ -190,34 +189,37 @@ class FileStabilityTracker:
|
|||||||
|
|
||||||
class ConsumerFilter(DefaultFilter):
|
class ConsumerFilter(DefaultFilter):
|
||||||
"""
|
"""
|
||||||
Custom filter for the document consumer.
|
Filter for watchfiles that accepts only supported document types
|
||||||
|
and ignores system files/directories.
|
||||||
|
|
||||||
Filters files based on:
|
Extends DefaultFilter leveraging its built-in filtering:
|
||||||
- Supported file extensions
|
- `ignore_dirs`: Directory names to ignore (and all their contents)
|
||||||
- User-configured ignore patterns (regex)
|
- `ignore_entity_patterns`: Regex patterns matched against filename/dirname only
|
||||||
- Default ignore patterns for common system files
|
|
||||||
|
We add custom logic for file extension filtering (only accept supported
|
||||||
|
document types), which the library doesn't provide.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# Default regex patterns to ignore (matched against filename only)
|
# Regex patterns for files to always ignore (matched against filename only)
|
||||||
DEFAULT_IGNORE_PATTERNS: Final[frozenset[str]] = frozenset(
|
# These are passed to DefaultFilter.ignore_entity_patterns
|
||||||
{
|
DEFAULT_IGNORE_PATTERNS: Final[tuple[str, ...]] = (
|
||||||
r"^\.DS_Store$",
|
r"^\.DS_Store$",
|
||||||
r"^\.DS_STORE$",
|
r"^\.DS_STORE$",
|
||||||
r"^\._.*",
|
r"^\._.*",
|
||||||
r"^desktop\.ini$",
|
r"^desktop\.ini$",
|
||||||
r"^Thumbs\.db$",
|
r"^Thumbs\.db$",
|
||||||
},
|
|
||||||
)
|
)
|
||||||
|
|
||||||
# Directories to always ignore (matched by name via DefaultFilter)
|
# Directories to always ignore (passed to DefaultFilter.ignore_dirs)
|
||||||
|
# These are matched by directory name, not full path
|
||||||
DEFAULT_IGNORE_DIRS: Final[tuple[str, ...]] = (
|
DEFAULT_IGNORE_DIRS: Final[tuple[str, ...]] = (
|
||||||
".stfolder",
|
".stfolder", # Syncthing
|
||||||
".stversions",
|
".stversions", # Syncthing
|
||||||
".localized",
|
".localized", # macOS
|
||||||
"@eaDir",
|
"@eaDir", # Synology NAS
|
||||||
".Spotlight-V100",
|
".Spotlight-V100", # macOS
|
||||||
".Trashes",
|
".Trashes", # macOS
|
||||||
"__MACOSX",
|
"__MACOSX", # macOS archive artifacts
|
||||||
)
|
)
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
@@ -225,38 +227,37 @@ class ConsumerFilter(DefaultFilter):
|
|||||||
*,
|
*,
|
||||||
supported_extensions: frozenset[str] | None = None,
|
supported_extensions: frozenset[str] | None = None,
|
||||||
ignore_patterns: list[str] | None = None,
|
ignore_patterns: list[str] | None = None,
|
||||||
consumption_dir: Path | None = None,
|
ignore_dirs: list[str] | None = None,
|
||||||
) -> None:
|
) -> None:
|
||||||
"""
|
"""
|
||||||
Initialize the consumer filter.
|
Initialize the consumer filter.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
supported_extensions: Set of supported file extensions (e.g., {".pdf", ".png"}).
|
supported_extensions: Set of file extensions to accept (e.g., {".pdf", ".png"}).
|
||||||
If None, uses get_supported_file_extensions().
|
If None, uses get_supported_file_extensions().
|
||||||
ignore_patterns: Additional regex patterns to ignore (matched against filename).
|
ignore_patterns: Additional regex patterns to ignore (matched against filename).
|
||||||
consumption_dir: Base consumption directory (unused, kept for API compatibility).
|
ignore_dirs: Additional directory names to ignore (merged with defaults).
|
||||||
"""
|
"""
|
||||||
# Combine default and user patterns
|
|
||||||
all_patterns = set(self.DEFAULT_IGNORE_PATTERNS)
|
|
||||||
if ignore_patterns:
|
|
||||||
all_patterns.update(ignore_patterns)
|
|
||||||
|
|
||||||
# Compile all patterns
|
|
||||||
self._ignore_regexes: list[re.Pattern[str]] = [
|
|
||||||
re.compile(pattern) for pattern in all_patterns
|
|
||||||
]
|
|
||||||
|
|
||||||
# Get supported extensions
|
# Get supported extensions
|
||||||
if supported_extensions is None:
|
if supported_extensions is None:
|
||||||
supported_extensions = frozenset(get_supported_file_extensions())
|
supported_extensions = frozenset(get_supported_file_extensions())
|
||||||
self._supported_extensions = supported_extensions
|
self._supported_extensions = supported_extensions
|
||||||
|
|
||||||
# Call parent with directory ignore list
|
# Combine default and user patterns
|
||||||
# DefaultFilter.ignore_dirs matches directory names, not full paths
|
all_patterns: list[str] = list(self.DEFAULT_IGNORE_PATTERNS)
|
||||||
|
if ignore_patterns:
|
||||||
|
all_patterns.extend(ignore_patterns)
|
||||||
|
|
||||||
|
# Combine default and user ignore_dirs
|
||||||
|
all_ignore_dirs: list[str] = list(self.DEFAULT_IGNORE_DIRS)
|
||||||
|
if ignore_dirs:
|
||||||
|
all_ignore_dirs.extend(ignore_dirs)
|
||||||
|
|
||||||
|
# Let DefaultFilter handle all the pattern and directory filtering
|
||||||
super().__init__(
|
super().__init__(
|
||||||
ignore_dirs=self.DEFAULT_IGNORE_DIRS,
|
ignore_dirs=tuple(all_ignore_dirs),
|
||||||
ignore_entity_patterns=None,
|
ignore_entity_patterns=tuple(all_patterns),
|
||||||
ignore_paths=None,
|
ignore_paths=(),
|
||||||
)
|
)
|
||||||
|
|
||||||
def __call__(self, change: Change, path: str) -> bool:
|
def __call__(self, change: Change, path: str) -> bool:
|
||||||
@@ -264,39 +265,32 @@ class ConsumerFilter(DefaultFilter):
|
|||||||
Filter function for watchfiles.
|
Filter function for watchfiles.
|
||||||
|
|
||||||
Returns True if the path should be watched, False to ignore.
|
Returns True if the path should be watched, False to ignore.
|
||||||
|
|
||||||
|
The parent DefaultFilter handles:
|
||||||
|
- Hidden files/directories (starting with .)
|
||||||
|
- Directories in ignore_dirs
|
||||||
|
- Files/directories matching ignore_entity_patterns
|
||||||
|
|
||||||
|
We additionally filter files by extension.
|
||||||
"""
|
"""
|
||||||
# Let parent filter handle directory ignoring and basic checks
|
# Let parent filter handle directory ignoring and pattern matching
|
||||||
if not super().__call__(change, path):
|
if not super().__call__(change, path):
|
||||||
return False
|
return False
|
||||||
|
|
||||||
path_obj = Path(path)
|
path_obj = Path(path)
|
||||||
|
|
||||||
# For directories, parent filter already handled ignore_dirs
|
# For directories, parent filter already handled everything
|
||||||
if path_obj.is_dir():
|
if path_obj.is_dir():
|
||||||
return True
|
return True
|
||||||
|
|
||||||
# For files, check extension
|
# For files, check extension
|
||||||
if not self._has_supported_extension(path_obj):
|
return self._has_supported_extension(path_obj)
|
||||||
return False
|
|
||||||
|
|
||||||
# Check filename against ignore patterns
|
|
||||||
return not self._matches_ignore_pattern(path_obj.name)
|
|
||||||
|
|
||||||
def _has_supported_extension(self, path: Path) -> bool:
|
def _has_supported_extension(self, path: Path) -> bool:
|
||||||
"""Check if the file has a supported extension."""
|
"""Check if the file has a supported extension."""
|
||||||
suffix = path.suffix.lower()
|
suffix = path.suffix.lower()
|
||||||
return suffix in self._supported_extensions
|
return suffix in self._supported_extensions
|
||||||
|
|
||||||
def _matches_ignore_pattern(self, filename: str) -> bool:
|
|
||||||
"""Check if the filename matches any ignore pattern."""
|
|
||||||
for regex in self._ignore_regexes:
|
|
||||||
if regex.match(filename):
|
|
||||||
logger.debug(
|
|
||||||
f"Filename {filename} matched ignore pattern {regex.pattern}",
|
|
||||||
)
|
|
||||||
return True
|
|
||||||
return False
|
|
||||||
|
|
||||||
|
|
||||||
def _tags_from_path(filepath: Path, consumption_dir: Path) -> list[int]:
|
def _tags_from_path(filepath: Path, consumption_dir: Path) -> list[int]:
|
||||||
"""
|
"""
|
||||||
@@ -338,7 +332,7 @@ def _consume_file(
|
|||||||
if not filepath.is_file():
|
if not filepath.is_file():
|
||||||
logger.debug(f"Not consuming {filepath}: not a file or doesn't exist")
|
logger.debug(f"Not consuming {filepath}: not a file or doesn't exist")
|
||||||
return
|
return
|
||||||
except (PermissionError, FileNotFoundError) as e:
|
except (PermissionError, OSError) as e:
|
||||||
logger.warning(f"Not consuming {filepath}: {e}")
|
logger.warning(f"Not consuming {filepath}: {e}")
|
||||||
return
|
return
|
||||||
|
|
||||||
@@ -347,7 +341,7 @@ def _consume_file(
|
|||||||
if subdirs_as_tags:
|
if subdirs_as_tags:
|
||||||
try:
|
try:
|
||||||
tag_ids = _tags_from_path(filepath, consumption_dir)
|
tag_ids = _tags_from_path(filepath, consumption_dir)
|
||||||
except Exception: # pragma: nocover
|
except Exception:
|
||||||
logger.exception(f"Error creating tags from path for {filepath}")
|
logger.exception(f"Error creating tags from path for {filepath}")
|
||||||
|
|
||||||
# Queue for consumption
|
# Queue for consumption
|
||||||
@@ -404,7 +398,7 @@ class Command(BaseCommand):
|
|||||||
# Resolve consumption directory
|
# Resolve consumption directory
|
||||||
directory = options.get("directory")
|
directory = options.get("directory")
|
||||||
if not directory:
|
if not directory:
|
||||||
directory = settings.CONSUMPTION_DIR
|
directory = getattr(settings, "CONSUMPTION_DIR", None)
|
||||||
if not directory:
|
if not directory:
|
||||||
raise CommandError("CONSUMPTION_DIR is not configured")
|
raise CommandError("CONSUMPTION_DIR is not configured")
|
||||||
|
|
||||||
@@ -425,13 +419,14 @@ class Command(BaseCommand):
|
|||||||
polling_interval: float = settings.CONSUMER_POLLING_INTERVAL
|
polling_interval: float = settings.CONSUMER_POLLING_INTERVAL
|
||||||
stability_delay: float = settings.CONSUMER_STABILITY_DELAY
|
stability_delay: float = settings.CONSUMER_STABILITY_DELAY
|
||||||
ignore_patterns: list[str] = settings.CONSUMER_IGNORE_PATTERNS
|
ignore_patterns: list[str] = settings.CONSUMER_IGNORE_PATTERNS
|
||||||
|
ignore_dirs: list[str] = settings.CONSUMER_IGNORE_DIRS
|
||||||
is_testing: bool = options.get("testing", False)
|
is_testing: bool = options.get("testing", False)
|
||||||
is_oneshot: bool = options.get("oneshot", False)
|
is_oneshot: bool = options.get("oneshot", False)
|
||||||
|
|
||||||
# Create filter
|
# Create filter
|
||||||
consumer_filter = ConsumerFilter(
|
consumer_filter = ConsumerFilter(
|
||||||
ignore_patterns=ignore_patterns,
|
ignore_patterns=ignore_patterns,
|
||||||
consumption_dir=directory,
|
ignore_dirs=ignore_dirs,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Process existing files
|
# Process existing files
|
||||||
@@ -559,10 +554,10 @@ class Command(BaseCommand):
|
|||||||
elif is_testing:
|
elif is_testing:
|
||||||
# In testing, use short timeout to check stop flag
|
# In testing, use short timeout to check stop flag
|
||||||
timeout_ms = testing_timeout_ms
|
timeout_ms = testing_timeout_ms
|
||||||
else: # pragma: nocover
|
else:
|
||||||
# No pending files, wait indefinitely
|
# No pending files, wait indefinitely
|
||||||
timeout_ms = 0
|
timeout_ms = 0
|
||||||
|
|
||||||
except KeyboardInterrupt: # pragma: nocover
|
except KeyboardInterrupt:
|
||||||
logger.info("Received interrupt, stopping consumer")
|
logger.info("Received interrupt, stopping consumer")
|
||||||
self.stop_flag.set()
|
self.stop_flag.set()
|
||||||
|
|||||||
@@ -46,9 +46,6 @@ if TYPE_CHECKING:
|
|||||||
from pytest_mock import MockerFixture
|
from pytest_mock import MockerFixture
|
||||||
|
|
||||||
|
|
||||||
# -- Fixtures --
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def stability_tracker() -> FileStabilityTracker:
|
def stability_tracker() -> FileStabilityTracker:
|
||||||
"""Create a FileStabilityTracker with a short delay for testing."""
|
"""Create a FileStabilityTracker with a short delay for testing."""
|
||||||
@@ -355,6 +352,28 @@ class TestConsumerFilter:
|
|||||||
for pattern in ConsumerFilter.DEFAULT_IGNORE_PATTERNS:
|
for pattern in ConsumerFilter.DEFAULT_IGNORE_PATTERNS:
|
||||||
re.compile(pattern)
|
re.compile(pattern)
|
||||||
|
|
||||||
|
def test_custom_ignore_dirs(self, tmp_path: Path) -> None:
|
||||||
|
"""Test filter respects custom ignore_dirs."""
|
||||||
|
filter_obj = ConsumerFilter(
|
||||||
|
supported_extensions=frozenset({".pdf"}),
|
||||||
|
ignore_dirs=["custom_ignored_dir"],
|
||||||
|
)
|
||||||
|
|
||||||
|
# Custom ignored directory should be rejected
|
||||||
|
custom_dir = tmp_path / "custom_ignored_dir"
|
||||||
|
custom_dir.mkdir()
|
||||||
|
assert filter_obj(Change.added, str(custom_dir)) is False
|
||||||
|
|
||||||
|
# Normal directory should be accepted
|
||||||
|
normal_dir = tmp_path / "normal_dir"
|
||||||
|
normal_dir.mkdir()
|
||||||
|
assert filter_obj(Change.added, str(normal_dir)) is True
|
||||||
|
|
||||||
|
# Default ignored directories should still be ignored
|
||||||
|
stfolder = tmp_path / ".stfolder"
|
||||||
|
stfolder.mkdir()
|
||||||
|
assert filter_obj(Change.added, str(stfolder)) is False
|
||||||
|
|
||||||
|
|
||||||
class TestConsumerFilterDefaults:
|
class TestConsumerFilterDefaults:
|
||||||
"""Tests for ConsumerFilter with default settings."""
|
"""Tests for ConsumerFilter with default settings."""
|
||||||
@@ -617,6 +636,8 @@ class ConsumerThread(Thread):
|
|||||||
|
|
||||||
def run(self) -> None:
|
def run(self) -> None:
|
||||||
try:
|
try:
|
||||||
|
# Use override_settings to avoid polluting global settings
|
||||||
|
# which would affect other tests running on the same worker
|
||||||
with override_settings(
|
with override_settings(
|
||||||
SCRATCH_DIR=self.scratch_dir,
|
SCRATCH_DIR=self.scratch_dir,
|
||||||
CONSUMER_RECURSIVE=self.recursive,
|
CONSUMER_RECURSIVE=self.recursive,
|
||||||
@@ -633,8 +654,9 @@ class ConsumerThread(Thread):
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.exception = e
|
self.exception = e
|
||||||
finally:
|
finally:
|
||||||
Tag.objects.all().delete()
|
|
||||||
# Close database connections created in this thread
|
# Close database connections created in this thread
|
||||||
|
# Important: Do not perform any database operations here (like Tag cleanup)
|
||||||
|
# as they create new connections that won't be properly closed
|
||||||
db.connections.close_all()
|
db.connections.close_all()
|
||||||
|
|
||||||
def stop(self) -> None:
|
def stop(self) -> None:
|
||||||
@@ -672,7 +694,7 @@ def start_consumer(
|
|||||||
finally:
|
finally:
|
||||||
# Cleanup all threads that were started
|
# Cleanup all threads that were started
|
||||||
for thread in threads:
|
for thread in threads:
|
||||||
thread.stop()
|
thread.stop_and_wait()
|
||||||
|
|
||||||
failed_threads = []
|
failed_threads = []
|
||||||
for thread in threads:
|
for thread in threads:
|
||||||
@@ -680,9 +702,11 @@ def start_consumer(
|
|||||||
if thread.is_alive():
|
if thread.is_alive():
|
||||||
failed_threads.append(thread)
|
failed_threads.append(thread)
|
||||||
|
|
||||||
# Clean up any Tags created by threads
|
# Clean up any Tags created by threads (they bypass test transaction isolation)
|
||||||
Tag.objects.all().delete()
|
Tag.objects.all().delete()
|
||||||
|
|
||||||
|
db.connections.close_all()
|
||||||
|
|
||||||
if failed_threads:
|
if failed_threads:
|
||||||
pytest.fail(
|
pytest.fail(
|
||||||
f"{len(failed_threads)} consumer thread(s) did not stop within timeout",
|
f"{len(failed_threads)} consumer thread(s) did not stop within timeout",
|
||||||
@@ -799,6 +823,8 @@ class TestCommandWatch:
|
|||||||
assert thread.is_alive()
|
assert thread.is_alive()
|
||||||
finally:
|
finally:
|
||||||
thread.stop_and_wait(timeout=5.0)
|
thread.stop_and_wait(timeout=5.0)
|
||||||
|
# Clean up any Tags created by the thread
|
||||||
|
Tag.objects.all().delete()
|
||||||
|
|
||||||
assert not thread.is_alive()
|
assert not thread.is_alive()
|
||||||
|
|
||||||
@@ -860,8 +886,15 @@ class TestCommandWatchRecursive:
|
|||||||
sample_pdf: Path,
|
sample_pdf: Path,
|
||||||
mock_consume_file_delay: MagicMock,
|
mock_consume_file_delay: MagicMock,
|
||||||
start_consumer: Callable[..., ConsumerThread],
|
start_consumer: Callable[..., ConsumerThread],
|
||||||
|
mocker: MockerFixture,
|
||||||
) -> None:
|
) -> None:
|
||||||
"""Test subdirs_as_tags creates tags from directory names."""
|
"""Test subdirs_as_tags creates tags from directory names."""
|
||||||
|
# Mock _tags_from_path to avoid database operations in the consumer thread
|
||||||
|
mock_tags = mocker.patch(
|
||||||
|
"documents.management.commands.document_consumer._tags_from_path",
|
||||||
|
return_value=[1, 2],
|
||||||
|
)
|
||||||
|
|
||||||
subdir = consumption_dir / "Invoices" / "2024"
|
subdir = consumption_dir / "Invoices" / "2024"
|
||||||
subdir.mkdir(parents=True)
|
subdir.mkdir(parents=True)
|
||||||
|
|
||||||
@@ -875,6 +908,7 @@ class TestCommandWatchRecursive:
|
|||||||
raise thread.exception
|
raise thread.exception
|
||||||
|
|
||||||
mock_consume_file_delay.delay.assert_called()
|
mock_consume_file_delay.delay.assert_called()
|
||||||
|
mock_tags.assert_called()
|
||||||
call_args = mock_consume_file_delay.delay.call_args
|
call_args = mock_consume_file_delay.delay.call_args
|
||||||
overrides = call_args[0][1]
|
overrides = call_args[0][1]
|
||||||
assert overrides.tag_ids is not None
|
assert overrides.tag_ids is not None
|
||||||
@@ -934,3 +968,5 @@ class TestCommandWatchEdgeCases:
|
|||||||
assert thread.is_alive()
|
assert thread.is_alive()
|
||||||
finally:
|
finally:
|
||||||
thread.stop_and_wait(timeout=5.0)
|
thread.stop_and_wait(timeout=5.0)
|
||||||
|
# Clean up any Tags created by the thread
|
||||||
|
Tag.objects.all().delete()
|
||||||
|
|||||||
@@ -1019,7 +1019,7 @@ CONSUMER_DELETE_DUPLICATES = __get_boolean("PAPERLESS_CONSUMER_DELETE_DUPLICATES
|
|||||||
|
|
||||||
CONSUMER_RECURSIVE = __get_boolean("PAPERLESS_CONSUMER_RECURSIVE")
|
CONSUMER_RECURSIVE = __get_boolean("PAPERLESS_CONSUMER_RECURSIVE")
|
||||||
|
|
||||||
# Ignore regex patterns, relative to PAPERLESS_CONSUMPTION_DIR
|
# Ignore regex patterns, matched against filename only
|
||||||
CONSUMER_IGNORE_PATTERNS = list(
|
CONSUMER_IGNORE_PATTERNS = list(
|
||||||
json.loads(
|
json.loads(
|
||||||
os.getenv(
|
os.getenv(
|
||||||
@@ -1029,6 +1029,16 @@ CONSUMER_IGNORE_PATTERNS = list(
|
|||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Directories to always ignore. These are matched by directory name, not full path
|
||||||
|
CONSUMER_IGNORE_DIRS = list(
|
||||||
|
json.loads(
|
||||||
|
os.getenv(
|
||||||
|
"PAPERLESS_CONSUMER_IGNORE_DIRS",
|
||||||
|
json.dumps([]),
|
||||||
|
),
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
CONSUMER_SUBDIRS_AS_TAGS = __get_boolean("PAPERLESS_CONSUMER_SUBDIRS_AS_TAGS")
|
CONSUMER_SUBDIRS_AS_TAGS = __get_boolean("PAPERLESS_CONSUMER_SUBDIRS_AS_TAGS")
|
||||||
|
|
||||||
CONSUMER_ENABLE_BARCODES: Final[bool] = __get_boolean(
|
CONSUMER_ENABLE_BARCODES: Final[bool] = __get_boolean(
|
||||||
|
|||||||
Reference in New Issue
Block a user