diff --git a/pyproject.toml b/pyproject.toml index f5c484ae4..64df97c17 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "paperless-ngx" -version = "2.20.3" +version = "2.20.4" description = "A community-supported supercharged document management system: scan, index and archive all your physical documents" readme = "README.md" requires-python = ">=3.10" diff --git a/src-ui/package.json b/src-ui/package.json index dcfe3ed63..9690e86c0 100644 --- a/src-ui/package.json +++ b/src-ui/package.json @@ -1,6 +1,6 @@ { "name": "paperless-ngx-ui", - "version": "2.20.3", + "version": "2.20.4", "scripts": { "preinstall": "npx only-allow pnpm", "ng": "ng", diff --git a/src-ui/src/environments/environment.prod.ts b/src-ui/src/environments/environment.prod.ts index c8bb844e9..d27ab9966 100644 --- a/src-ui/src/environments/environment.prod.ts +++ b/src-ui/src/environments/environment.prod.ts @@ -6,7 +6,7 @@ export const environment = { apiVersion: '9', // match src/paperless/settings.py appTitle: 'Paperless-ngx', tag: 'prod', - version: '2.20.3', + version: '2.20.4', webSocketHost: window.location.host, webSocketProtocol: window.location.protocol == 'https:' ? 'wss:' : 'ws:', webSocketBaseUrl: base_url.pathname + 'ws/', diff --git a/src/documents/data_models.py b/src/documents/data_models.py index 7f98a1f05..2623a6138 100644 --- a/src/documents/data_models.py +++ b/src/documents/data_models.py @@ -22,7 +22,7 @@ class DocumentMetadataOverrides: document_type_id: int | None = None tag_ids: list[int] | None = None storage_path_id: int | None = None - created: datetime.datetime | None = None + created: datetime.date | None = None asn: int | None = None owner_id: int | None = None view_users: list[int] | None = None @@ -100,6 +100,7 @@ class DocumentMetadataOverrides: overrides.storage_path_id = doc.storage_path.id if doc.storage_path else None overrides.owner_id = doc.owner.id if doc.owner else None overrides.tag_ids = list(doc.tags.values_list("id", flat=True)) + overrides.created = doc.created overrides.view_users = list( get_users_with_perms( diff --git a/src/documents/index.py b/src/documents/index.py index 6b994ac8c..ea26ea926 100644 --- a/src/documents/index.py +++ b/src/documents/index.py @@ -10,6 +10,7 @@ from datetime import time from datetime import timedelta from datetime import timezone from shutil import rmtree +from time import sleep from typing import TYPE_CHECKING from typing import Literal @@ -32,6 +33,7 @@ from whoosh.highlight import HtmlFormatter from whoosh.idsets import BitSet from whoosh.idsets import DocIdSet from whoosh.index import FileIndex +from whoosh.index import LockError from whoosh.index import create_in from whoosh.index import exists_in from whoosh.index import open_dir @@ -97,11 +99,33 @@ def get_schema() -> Schema: def open_index(*, recreate=False) -> FileIndex: - try: - if exists_in(settings.INDEX_DIR) and not recreate: - return open_dir(settings.INDEX_DIR, schema=get_schema()) - except Exception: - logger.exception("Error while opening the index, recreating.") + transient_exceptions = (FileNotFoundError, LockError) + max_retries = 3 + retry_delay = 0.1 + + for attempt in range(max_retries + 1): + try: + if exists_in(settings.INDEX_DIR) and not recreate: + return open_dir(settings.INDEX_DIR, schema=get_schema()) + break + except transient_exceptions as exc: + is_last_attempt = attempt == max_retries or recreate + if is_last_attempt: + logger.exception( + "Error while opening the index after retries, recreating.", + ) + break + + logger.warning( + "Transient error while opening the index (attempt %s/%s): %s. Retrying.", + attempt + 1, + max_retries + 1, + exc, + ) + sleep(retry_delay) + except Exception: + logger.exception("Error while opening the index, recreating.") + break # create_in doesn't handle corrupted indexes very well, remove the directory entirely first if settings.INDEX_DIR.is_dir(): diff --git a/src/documents/serialisers.py b/src/documents/serialisers.py index 5c90c6f1c..5c71de9a9 100644 --- a/src/documents/serialisers.py +++ b/src/documents/serialisers.py @@ -18,6 +18,8 @@ from django.core.exceptions import ValidationError from django.core.validators import DecimalValidator from django.core.validators import EmailValidator from django.core.validators import MaxLengthValidator +from django.core.validators import MaxValueValidator +from django.core.validators import MinValueValidator from django.core.validators import RegexValidator from django.core.validators import integer_validator from django.db.models import Count @@ -875,6 +877,13 @@ class CustomFieldInstanceSerializer(serializers.ModelSerializer): uri_validator(data["value"]) elif field.data_type == CustomField.FieldDataType.INT: integer_validator(data["value"]) + try: + value_int = int(data["value"]) + except (TypeError, ValueError): + raise serializers.ValidationError("Enter a valid integer.") + # Keep values within the PostgreSQL integer range + MinValueValidator(-2147483648)(value_int) + MaxValueValidator(2147483647)(value_int) elif ( field.data_type == CustomField.FieldDataType.MONETARY and data["value"] != "" diff --git a/src/documents/signals/handlers.py b/src/documents/signals/handlers.py index 5f2c8b4b2..4ec00258a 100644 --- a/src/documents/signals/handlers.py +++ b/src/documents/signals/handlers.py @@ -418,7 +418,15 @@ def update_filename_and_move_files( return instance = instance.document - def validate_move(instance, old_path: Path, new_path: Path): + def validate_move(instance, old_path: Path, new_path: Path, root: Path): + if not new_path.is_relative_to(root): + msg = ( + f"Document {instance!s}: Refusing to move file outside root {root}: " + f"{new_path}." + ) + logger.warning(msg) + raise CannotMoveFilesException(msg) + if not old_path.is_file(): # Can't do anything if the old file does not exist anymore. msg = f"Document {instance!s}: File {old_path} doesn't exist." @@ -507,12 +515,22 @@ def update_filename_and_move_files( return if move_original: - validate_move(instance, old_source_path, instance.source_path) + validate_move( + instance, + old_source_path, + instance.source_path, + settings.ORIGINALS_DIR, + ) create_source_path_directory(instance.source_path) shutil.move(old_source_path, instance.source_path) if move_archive: - validate_move(instance, old_archive_path, instance.archive_path) + validate_move( + instance, + old_archive_path, + instance.archive_path, + settings.ARCHIVE_DIR, + ) create_source_path_directory(instance.archive_path) shutil.move(old_archive_path, instance.archive_path) diff --git a/src/documents/tasks.py b/src/documents/tasks.py index 606f278db..6c415ad69 100644 --- a/src/documents/tasks.py +++ b/src/documents/tasks.py @@ -493,7 +493,7 @@ def check_scheduled_workflows(): trigger.schedule_is_recurring and workflow_runs.exists() and ( - workflow_runs.last().run_at + workflow_runs.first().run_at > now - datetime.timedelta( days=trigger.schedule_recurring_interval_days, diff --git a/src/documents/templating/filepath.py b/src/documents/templating/filepath.py index 7d76e7f31..805cefbdb 100644 --- a/src/documents/templating/filepath.py +++ b/src/documents/templating/filepath.py @@ -262,6 +262,17 @@ def get_custom_fields_context( return field_data +def _is_safe_relative_path(value: str) -> bool: + if value == "": + return True + + path = PurePath(value) + if path.is_absolute() or path.drive: + return False + + return ".." not in path.parts + + def validate_filepath_template_and_render( template_string: str, document: Document | None = None, @@ -309,6 +320,12 @@ def validate_filepath_template_and_render( ) rendered_template = template.render(context) + if not _is_safe_relative_path(rendered_template): + logger.warning( + "Template rendered an unsafe path (absolute or containing traversal).", + ) + return None + # We're good! return rendered_template except UndefinedError: diff --git a/src/documents/tests/test_api_documents.py b/src/documents/tests/test_api_documents.py index 87190c23b..f40ef157f 100644 --- a/src/documents/tests/test_api_documents.py +++ b/src/documents/tests/test_api_documents.py @@ -1664,6 +1664,44 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase): self.consume_file_mock.assert_not_called() + def test_patch_document_integer_custom_field_out_of_range(self): + """ + GIVEN: + - An integer custom field + - A document + WHEN: + - Patching the document with an integer value exceeding PostgreSQL's range + THEN: + - HTTP 400 is returned (validation catches the overflow) + - No custom field instance is created + """ + cf_int = CustomField.objects.create( + name="intfield", + data_type=CustomField.FieldDataType.INT, + ) + doc = Document.objects.create( + title="Doc", + checksum="123", + mime_type="application/pdf", + ) + + response = self.client.patch( + f"/api/documents/{doc.pk}/", + { + "custom_fields": [ + { + "field": cf_int.pk, + "value": 2**31, # overflow for PostgreSQL integer fields + }, + ], + }, + format="json", + ) + + self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) + self.assertIn("custom_fields", response.data) + self.assertEqual(CustomFieldInstance.objects.count(), 0) + def test_upload_with_webui_source(self): """ GIVEN: A document with a source file diff --git a/src/documents/tests/test_api_objects.py b/src/documents/tests/test_api_objects.py index 014dd3c2a..0eb99f023 100644 --- a/src/documents/tests/test_api_objects.py +++ b/src/documents/tests/test_api_objects.py @@ -219,6 +219,30 @@ class TestApiStoragePaths(DirectoriesMixin, APITestCase): self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) self.assertEqual(StoragePath.objects.count(), 1) + def test_api_create_storage_path_rejects_traversal(self): + """ + GIVEN: + - API request to create a storage paths + - Storage path attempts directory traversal + WHEN: + - API is called + THEN: + - Correct HTTP 400 response + - No storage path is created + """ + response = self.client.post( + self.ENDPOINT, + json.dumps( + { + "name": "Traversal path", + "path": "../../../../../tmp/proof", + }, + ), + content_type="application/json", + ) + self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) + self.assertEqual(StoragePath.objects.count(), 1) + def test_api_storage_path_placeholders(self): """ GIVEN: diff --git a/src/documents/tests/test_bulk_edit.py b/src/documents/tests/test_bulk_edit.py index e1379386f..b2cb89b8b 100644 --- a/src/documents/tests/test_bulk_edit.py +++ b/src/documents/tests/test_bulk_edit.py @@ -581,7 +581,7 @@ class TestPDFActions(DirectoriesMixin, TestCase): - Consume file should be called """ doc_ids = [self.doc1.id, self.doc2.id, self.doc3.id] - metadata_document_id = self.doc1.id + metadata_document_id = self.doc2.id user = User.objects.create(username="test_user") result = bulk_edit.merge( @@ -606,7 +606,8 @@ class TestPDFActions(DirectoriesMixin, TestCase): # With metadata_document_id overrides result = bulk_edit.merge(doc_ids, metadata_document_id=metadata_document_id) consume_file_args, _ = mock_consume_file.call_args - self.assertEqual(consume_file_args[1].title, "A (merged)") + self.assertEqual(consume_file_args[1].title, "B (merged)") + self.assertEqual(consume_file_args[1].created, self.doc2.created) self.assertEqual(result, "OK") diff --git a/src/documents/tests/test_index.py b/src/documents/tests/test_index.py index f216feedb..3167bb762 100644 --- a/src/documents/tests/test_index.py +++ b/src/documents/tests/test_index.py @@ -1,6 +1,7 @@ from datetime import datetime from unittest import mock +from django.conf import settings from django.contrib.auth.models import User from django.test import SimpleTestCase from django.test import TestCase @@ -251,3 +252,120 @@ class TestRewriteNaturalDateKeywords(SimpleTestCase): result = self._rewrite_with_now("added:today", fixed_now) # Should convert to UTC properly self.assertIn("added:[20250719", result) + + +class TestIndexResilience(DirectoriesMixin, SimpleTestCase): + def _assert_recreate_called(self, mock_create_in): + mock_create_in.assert_called_once() + path_arg, schema_arg = mock_create_in.call_args.args + self.assertEqual(path_arg, settings.INDEX_DIR) + self.assertEqual(schema_arg.__class__.__name__, "Schema") + + def test_transient_missing_segment_does_not_force_recreate(self): + """ + GIVEN: + - Index directory exists + WHEN: + - open_index is called + - Opening the index raises FileNotFoundError once due to a + transient missing segment + THEN: + - Index is opened successfully on retry + - Index is not recreated + """ + file_marker = settings.INDEX_DIR / "file_marker.txt" + file_marker.write_text("keep") + expected_index = object() + + with ( + mock.patch("documents.index.exists_in", return_value=True), + mock.patch( + "documents.index.open_dir", + side_effect=[FileNotFoundError("missing"), expected_index], + ) as mock_open_dir, + mock.patch( + "documents.index.create_in", + ) as mock_create_in, + mock.patch( + "documents.index.rmtree", + ) as mock_rmtree, + ): + ix = index.open_index() + + self.assertIs(ix, expected_index) + self.assertGreaterEqual(mock_open_dir.call_count, 2) + mock_rmtree.assert_not_called() + mock_create_in.assert_not_called() + self.assertEqual(file_marker.read_text(), "keep") + + def test_transient_errors_exhaust_retries_and_recreate(self): + """ + GIVEN: + - Index directory exists + WHEN: + - open_index is called + - Opening the index raises FileNotFoundError multiple times due to + transient missing segments + THEN: + - Index is recreated after retries are exhausted + """ + recreated_index = object() + + with ( + self.assertLogs("paperless.index", level="ERROR") as cm, + mock.patch("documents.index.exists_in", return_value=True), + mock.patch( + "documents.index.open_dir", + side_effect=FileNotFoundError("missing"), + ) as mock_open_dir, + mock.patch("documents.index.rmtree") as mock_rmtree, + mock.patch( + "documents.index.create_in", + return_value=recreated_index, + ) as mock_create_in, + ): + ix = index.open_index() + + self.assertIs(ix, recreated_index) + self.assertEqual(mock_open_dir.call_count, 4) + mock_rmtree.assert_called_once_with(settings.INDEX_DIR) + self._assert_recreate_called(mock_create_in) + self.assertIn( + "Error while opening the index after retries, recreating.", + cm.output[0], + ) + + def test_non_transient_error_recreates_index(self): + """ + GIVEN: + - Index directory exists + WHEN: + - open_index is called + - Opening the index raises a "non-transient" error + THEN: + - Index is recreated + """ + recreated_index = object() + + with ( + self.assertLogs("paperless.index", level="ERROR") as cm, + mock.patch("documents.index.exists_in", return_value=True), + mock.patch( + "documents.index.open_dir", + side_effect=RuntimeError("boom"), + ), + mock.patch("documents.index.rmtree") as mock_rmtree, + mock.patch( + "documents.index.create_in", + return_value=recreated_index, + ) as mock_create_in, + ): + ix = index.open_index() + + self.assertIs(ix, recreated_index) + mock_rmtree.assert_called_once_with(settings.INDEX_DIR) + self._assert_recreate_called(mock_create_in) + self.assertIn( + "Error while opening the index, recreating.", + cm.output[0], + ) diff --git a/src/documents/tests/test_workflows.py b/src/documents/tests/test_workflows.py index 249183b6e..deb40a165 100644 --- a/src/documents/tests/test_workflows.py +++ b/src/documents/tests/test_workflows.py @@ -2094,6 +2094,68 @@ class TestWorkflows( doc.refresh_from_db() self.assertIsNone(doc.owner) + def test_workflow_scheduled_recurring_respects_latest_run(self): + """ + GIVEN: + - Scheduled workflow marked as recurring with a 1-day interval + - Document that matches the trigger + - Two prior runs exist: one 2 days ago and one 1 hour ago + WHEN: + - Scheduled workflows are checked again + THEN: + - Workflow does not run because the most recent run is inside the interval + """ + trigger = WorkflowTrigger.objects.create( + type=WorkflowTrigger.WorkflowTriggerType.SCHEDULED, + schedule_date_field=WorkflowTrigger.ScheduleDateField.CREATED, + schedule_is_recurring=True, + schedule_recurring_interval_days=1, + ) + action = WorkflowAction.objects.create( + assign_title="Doc assign owner", + assign_owner=self.user2, + ) + w = Workflow.objects.create( + name="Workflow 1", + order=0, + ) + w.triggers.add(trigger) + w.actions.add(action) + w.save() + + doc = Document.objects.create( + title="sample test", + correspondent=self.c, + original_filename="sample.pdf", + created=timezone.now().date() - timedelta(days=3), + ) + + WorkflowRun.objects.create( + workflow=w, + document=doc, + type=WorkflowTrigger.WorkflowTriggerType.SCHEDULED, + run_at=timezone.now() - timedelta(days=2), + ) + WorkflowRun.objects.create( + workflow=w, + document=doc, + type=WorkflowTrigger.WorkflowTriggerType.SCHEDULED, + run_at=timezone.now() - timedelta(hours=1), + ) + + tasks.check_scheduled_workflows() + + doc.refresh_from_db() + self.assertIsNone(doc.owner) + self.assertEqual( + WorkflowRun.objects.filter( + workflow=w, + document=doc, + type=WorkflowTrigger.WorkflowTriggerType.SCHEDULED, + ).count(), + 2, + ) + def test_workflow_scheduled_trigger_negative_offset_customfield(self): """ GIVEN: diff --git a/src/documents/views.py b/src/documents/views.py index ba265926c..d5910497f 100644 --- a/src/documents/views.py +++ b/src/documents/views.py @@ -708,6 +708,7 @@ class DocumentViewSet( "title", "correspondent__name", "document_type__name", + "storage_path__name", "created", "modified", "added", diff --git a/src/paperless/version.py b/src/paperless/version.py index c0c6439d4..0ce227357 100644 --- a/src/paperless/version.py +++ b/src/paperless/version.py @@ -1,6 +1,6 @@ from typing import Final -__version__: Final[tuple[int, int, int]] = (2, 20, 3) +__version__: Final[tuple[int, int, int]] = (2, 20, 4) # Version string like X.Y.Z __full_version_str__: Final[str] = ".".join(map(str, __version__)) # Version string like X.Y diff --git a/uv.lock b/uv.lock index d1cf11ee2..fccc00ada 100644 --- a/uv.lock +++ b/uv.lock @@ -2115,7 +2115,7 @@ wheels = [ [[package]] name = "paperless-ngx" -version = "2.20.3" +version = "2.20.4" source = { virtual = "." } dependencies = [ { name = "babel", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },