Just curious

2026-02-22 00:49:35 -06:00 · 2026-02-01 21:05:19 -08:00
parent a9c0b06e28
commit c6d85a8d28
4 changed files with 0 additions and 109 deletions
--- a/src/documents/management/commands/document_consumer.py
+++ b/src/documents/management/commands/document_consumer.py
@@ -28,7 +28,6 @@ from documents.data_models import ConsumableDocument
 from documents.data_models import DocumentMetadataOverrides
 from documents.data_models import DocumentSource
 from documents.models import Tag
-from documents.parsers import get_supported_file_extensions
 from documents.tasks import consume_file

 if TYPE_CHECKING:
@@ -216,7 +215,6 @@ class ConsumerFilter(DefaultFilter):
    def __init__(
        self,
        *,
-        supported_extensions: frozenset[str] | None = None,
        ignore_patterns: list[str] | None = None,
        ignore_dirs: list[str] | None = None,
    ) -> None:
@@ -224,16 +222,9 @@ class ConsumerFilter(DefaultFilter):
        Initialize the consumer filter.

        Args:
-            supported_extensions: Set of file extensions to accept (e.g., {".pdf", ".png"}).
-                If None, uses get_supported_file_extensions().
            ignore_patterns: Additional regex patterns to ignore (matched against filename).
            ignore_dirs: Additional directory names to ignore (merged with defaults).
        """
-        # Get supported extensions
-        if supported_extensions is None:
-            supported_extensions = frozenset(get_supported_file_extensions())
-        self._supported_extensions = supported_extensions
-
        # Combine default and user patterns
        all_patterns: list[str] = list(self.DEFAULT_IGNORE_PATTERNS)
        if ignore_patterns:
@@ -261,8 +252,6 @@ class ConsumerFilter(DefaultFilter):
        - Hidden files/directories (starting with .)
        - Directories in ignore_dirs
        - Files/directories matching ignore_entity_patterns
-
-        We additionally filter files by extension.
        """
        # Let parent filter handle directory ignoring and pattern matching
        if not super().__call__(change, path):
@@ -274,14 +263,6 @@ class ConsumerFilter(DefaultFilter):
        if path_obj.is_dir():
            return True

-        # For files, check extension
-        return self._has_supported_extension(path_obj)
-
-    def _has_supported_extension(self, path: Path) -> bool:
-        """Check if the file has a supported extension."""
-        suffix = path.suffix.lower()
-        return suffix in self._supported_extensions
-

 def _tags_from_path(filepath: Path, consumption_dir: Path) -> list[int]:
    """
--- a/src/documents/parsers.py
+++ b/src/documents/parsers.py
@@ -85,34 +85,6 @@ def get_default_file_extension(mime_type: str) -> str:
        return ""


-@lru_cache(maxsize=8)
-def is_file_ext_supported(ext: str) -> bool:
-    """
-    Returns True if the file extension is supported, False otherwise
-    TODO: Investigate why this really exists, why not use mimetype
-    """
-    if ext:
-        return ext.lower() in get_supported_file_extensions()
-    else:
-        return False
-
-
-def get_supported_file_extensions() -> set[str]:
-    extensions = set()
-    for response in document_consumer_declaration.send(None):
-        parser_declaration = response[1]
-        supported_mime_types = parser_declaration["mime_types"]
-
-        for mime_type in supported_mime_types:
-            extensions.update(mimetypes.guess_all_extensions(mime_type))
-            # Python's stdlib might be behind, so also add what the parser
-            # says is the default extension
-            # This makes image/webp supported on Python < 3.11
-            extensions.add(supported_mime_types[mime_type])
-
-    return extensions
-
-
 def get_parser_class_for_mime_type(mime_type: str) -> type[DocumentParser] | None:
    """
    Returns the best parser (by weight) for the given mimetype or
--- a/src/documents/tests/test_management_consumer.py
+++ b/src/documents/tests/test_management_consumer.py
@@ -90,7 +90,6 @@ def sample_pdf(tmp_path: Path) -> Path:
 def consumer_filter() -> ConsumerFilter:
    """Create a ConsumerFilter for testing."""
    return ConsumerFilter(
-        supported_extensions=frozenset({".pdf", ".png", ".jpg"}),
        ignore_patterns=[r"^custom_ignore"],
    )

@@ -105,15 +104,6 @@ def mock_consume_file_delay(mocker: MockerFixture) -> MagicMock:
    return mock_task


-@pytest.fixture
-def mock_supported_extensions(mocker: MockerFixture) -> MagicMock:
-    """Mock get_supported_file_extensions to return only .pdf."""
-    return mocker.patch(
-        "documents.management.commands.document_consumer.get_supported_file_extensions",
-        return_value={".pdf"},
-    )
-
-
 def wait_for_mock_call(
    mock_obj: MagicMock,
    timeout_s: float = 5.0,
@@ -395,7 +385,6 @@ class TestConsumerFilter:
    def test_custom_ignore_dirs(self, tmp_path: Path) -> None:
        """Test filter respects custom ignore_dirs."""
        filter_obj = ConsumerFilter(
-            supported_extensions=frozenset({".pdf"}),
            ignore_dirs=["custom_ignored_dir"],
        )

@@ -415,25 +404,6 @@ class TestConsumerFilter:
        assert filter_obj(Change.added, str(stfolder)) is False


-class TestConsumerFilterDefaults:
-    """Tests for ConsumerFilter with default settings."""
-
-    def test_filter_with_mocked_extensions(
-        self,
-        tmp_path: Path,
-        mocker: MockerFixture,
-    ) -> None:
-        """Test filter works when using mocked extensions from parser."""
-        mocker.patch(
-            "documents.management.commands.document_consumer.get_supported_file_extensions",
-            return_value={".pdf", ".png"},
-        )
-        filter_obj = ConsumerFilter()
-        test_file = tmp_path / "document.pdf"
-        test_file.touch()
-        assert filter_obj(Change.added, str(test_file)) is True
-
-
 class TestConsumeFile:
    """Tests for the _consume_file function."""

@@ -605,7 +575,6 @@ class TestCommandValidation:
            cmd.handle(directory=str(sample_pdf), oneshot=True, testing=False)


-@pytest.mark.usefixtures("mock_supported_extensions")
 class TestCommandOneshot:
    """Tests for oneshot mode."""

@@ -652,25 +621,6 @@ class TestCommandOneshot:

        mock_consume_file_delay.delay.assert_called_once()

-    def test_ignores_unsupported_extensions(
-        self,
-        consumption_dir: Path,
-        scratch_dir: Path,
-        mock_consume_file_delay: MagicMock,
-        settings: SettingsWrapper,
-    ) -> None:
-        """Test oneshot mode ignores unsupported file extensions."""
-        target = consumption_dir / "document.xyz"
-        target.write_bytes(b"content")
-
-        settings.SCRATCH_DIR = scratch_dir
-        settings.CONSUMER_IGNORE_PATTERNS = []
-
-        cmd = Command()
-        cmd.handle(directory=str(consumption_dir), oneshot=True, testing=False)
-
-        mock_consume_file_delay.delay.assert_not_called()
-

 class ConsumerThread(Thread):
    """Thread wrapper for running the consumer command with proper cleanup."""
@@ -739,7 +689,6 @@ class ConsumerThread(Thread):
 def start_consumer(
    consumption_dir: Path,
    scratch_dir: Path,
-    mock_supported_extensions: MagicMock,
 ) -> Generator[Callable[..., ConsumerThread], None, None]:
    """Start a consumer thread and ensure cleanup."""
    threads: list[ConsumerThread] = []
@@ -875,7 +824,6 @@ class TestCommandWatch:
        assert call_args.original_file.name == "valid.pdf"

    @pytest.mark.django_db
-    @pytest.mark.usefixtures("mock_supported_extensions")
    def test_stop_flag_stops_consumer(
        self,
        consumption_dir: Path,
@@ -1017,7 +965,6 @@ class TestCommandWatchEdgeCases:

        mock_consume_file_delay.delay.assert_not_called()

-    @pytest.mark.usefixtures("mock_supported_extensions")
    def test_handles_task_exception(
        self,
        consumption_dir: Path,
--- a/src/documents/tests/test_parsers.py
+++ b/src/documents/tests/test_parsers.py
@@ -7,7 +7,6 @@ from django.test import override_settings

 from documents.parsers import get_default_file_extension
 from documents.parsers import get_parser_class_for_mime_type
-from documents.parsers import get_supported_file_extensions
 from documents.parsers import is_file_ext_supported
 from paperless_tesseract.parsers import RasterisedDocumentParser
 from paperless_text.parsers import TextDocumentParser
@@ -145,10 +144,7 @@ class TestParserAvailability(TestCase):
            ("image/webp", ".webp"),
        ]

-        supported_exts = get_supported_file_extensions()
-
        for mime_type, ext in supported_mimes_and_exts:
-            self.assertIn(ext, supported_exts)
            self.assertEqual(get_default_file_extension(mime_type), ext)
            self.assertIsInstance(
                get_parser_class_for_mime_type(mime_type)(logging_group=None),
@@ -169,10 +165,7 @@ class TestParserAvailability(TestCase):
            ("text/csv", ".csv"),
        ]

-        supported_exts = get_supported_file_extensions()
-
        for mime_type, ext in supported_mimes_and_exts:
-            self.assertIn(ext, supported_exts)
            self.assertEqual(get_default_file_extension(mime_type), ext)
            self.assertIsInstance(
                get_parser_class_for_mime_type(mime_type)(logging_group=None),
@@ -202,10 +195,8 @@ class TestParserAvailability(TestCase):
        with override_settings(TIKA_ENABLED=True, INSTALLED_APPS=["paperless_tika"]):
            app = apps.get_app_config("paperless_tika")
            app.ready()
-            supported_exts = get_supported_file_extensions()

        for mime_type, ext in supported_mimes_and_exts:
-            self.assertIn(ext, supported_exts)
            self.assertEqual(get_default_file_extension(mime_type), ext)
            self.assertIsInstance(
                get_parser_class_for_mime_type(mime_type)(logging_group=None),