Merge branch 'dev' into feature-ai

This commit is contained in:
shamoon
2025-09-30 11:09:24 -07:00
95 changed files with 5000 additions and 2131 deletions

View File

@@ -82,6 +82,13 @@ def _is_ignored(filepath: Path) -> bool:
def _consume(filepath: Path) -> None:
# Check permissions early
try:
filepath.stat()
except (PermissionError, OSError):
logger.warning(f"Not consuming file {filepath}: Permission denied.")
return
if filepath.is_dir() or _is_ignored(filepath):
return
@@ -323,7 +330,12 @@ class Command(BaseCommand):
# Also make sure the file exists still, some scanners might write a
# temporary file first
file_still_exists = filepath.exists() and filepath.is_file()
try:
file_still_exists = filepath.exists() and filepath.is_file()
except (PermissionError, OSError): # pragma: no cover
# If we can't check, let it fail in the _consume function
file_still_exists = True
continue
if waited_long_enough and file_still_exists:
_consume(filepath)

View File

@@ -92,6 +92,9 @@ class Command(MultiProcessMixin, ProgressBarMixin, BaseCommand):
# doc to doc is obviously not useful
if first_doc.pk == second_doc.pk:
continue
# Skip empty documents (e.g. password-protected)
if first_doc.content.strip() == "" or second_doc.content.strip() == "":
continue
# Skip matching which have already been matched together
# doc 1 to doc 2 is the same as doc 2 to doc 1
doc_1_to_doc_2 = (first_doc.pk, second_doc.pk)