mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2026-01-14 21:54:22 -06:00
Merge branch 'hotfix/v2.20.4'
This commit is contained in:
@@ -1,6 +1,6 @@
|
||||
[project]
|
||||
name = "paperless-ngx"
|
||||
version = "2.20.3"
|
||||
version = "2.20.4"
|
||||
description = "A community-supported supercharged document management system: scan, index and archive all your physical documents"
|
||||
readme = "README.md"
|
||||
requires-python = ">=3.10"
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "paperless-ngx-ui",
|
||||
"version": "2.20.3",
|
||||
"version": "2.20.4",
|
||||
"scripts": {
|
||||
"preinstall": "npx only-allow pnpm",
|
||||
"ng": "ng",
|
||||
|
||||
@@ -6,7 +6,7 @@ export const environment = {
|
||||
apiVersion: '9', // match src/paperless/settings.py
|
||||
appTitle: 'Paperless-ngx',
|
||||
tag: 'prod',
|
||||
version: '2.20.3',
|
||||
version: '2.20.4',
|
||||
webSocketHost: window.location.host,
|
||||
webSocketProtocol: window.location.protocol == 'https:' ? 'wss:' : 'ws:',
|
||||
webSocketBaseUrl: base_url.pathname + 'ws/',
|
||||
|
||||
@@ -22,7 +22,7 @@ class DocumentMetadataOverrides:
|
||||
document_type_id: int | None = None
|
||||
tag_ids: list[int] | None = None
|
||||
storage_path_id: int | None = None
|
||||
created: datetime.datetime | None = None
|
||||
created: datetime.date | None = None
|
||||
asn: int | None = None
|
||||
owner_id: int | None = None
|
||||
view_users: list[int] | None = None
|
||||
@@ -100,6 +100,7 @@ class DocumentMetadataOverrides:
|
||||
overrides.storage_path_id = doc.storage_path.id if doc.storage_path else None
|
||||
overrides.owner_id = doc.owner.id if doc.owner else None
|
||||
overrides.tag_ids = list(doc.tags.values_list("id", flat=True))
|
||||
overrides.created = doc.created
|
||||
|
||||
overrides.view_users = list(
|
||||
get_users_with_perms(
|
||||
|
||||
@@ -10,6 +10,7 @@ from datetime import time
|
||||
from datetime import timedelta
|
||||
from datetime import timezone
|
||||
from shutil import rmtree
|
||||
from time import sleep
|
||||
from typing import TYPE_CHECKING
|
||||
from typing import Literal
|
||||
|
||||
@@ -32,6 +33,7 @@ from whoosh.highlight import HtmlFormatter
|
||||
from whoosh.idsets import BitSet
|
||||
from whoosh.idsets import DocIdSet
|
||||
from whoosh.index import FileIndex
|
||||
from whoosh.index import LockError
|
||||
from whoosh.index import create_in
|
||||
from whoosh.index import exists_in
|
||||
from whoosh.index import open_dir
|
||||
@@ -97,11 +99,33 @@ def get_schema() -> Schema:
|
||||
|
||||
|
||||
def open_index(*, recreate=False) -> FileIndex:
|
||||
try:
|
||||
if exists_in(settings.INDEX_DIR) and not recreate:
|
||||
return open_dir(settings.INDEX_DIR, schema=get_schema())
|
||||
except Exception:
|
||||
logger.exception("Error while opening the index, recreating.")
|
||||
transient_exceptions = (FileNotFoundError, LockError)
|
||||
max_retries = 3
|
||||
retry_delay = 0.1
|
||||
|
||||
for attempt in range(max_retries + 1):
|
||||
try:
|
||||
if exists_in(settings.INDEX_DIR) and not recreate:
|
||||
return open_dir(settings.INDEX_DIR, schema=get_schema())
|
||||
break
|
||||
except transient_exceptions as exc:
|
||||
is_last_attempt = attempt == max_retries or recreate
|
||||
if is_last_attempt:
|
||||
logger.exception(
|
||||
"Error while opening the index after retries, recreating.",
|
||||
)
|
||||
break
|
||||
|
||||
logger.warning(
|
||||
"Transient error while opening the index (attempt %s/%s): %s. Retrying.",
|
||||
attempt + 1,
|
||||
max_retries + 1,
|
||||
exc,
|
||||
)
|
||||
sleep(retry_delay)
|
||||
except Exception:
|
||||
logger.exception("Error while opening the index, recreating.")
|
||||
break
|
||||
|
||||
# create_in doesn't handle corrupted indexes very well, remove the directory entirely first
|
||||
if settings.INDEX_DIR.is_dir():
|
||||
|
||||
@@ -18,6 +18,8 @@ from django.core.exceptions import ValidationError
|
||||
from django.core.validators import DecimalValidator
|
||||
from django.core.validators import EmailValidator
|
||||
from django.core.validators import MaxLengthValidator
|
||||
from django.core.validators import MaxValueValidator
|
||||
from django.core.validators import MinValueValidator
|
||||
from django.core.validators import RegexValidator
|
||||
from django.core.validators import integer_validator
|
||||
from django.db.models import Count
|
||||
@@ -875,6 +877,13 @@ class CustomFieldInstanceSerializer(serializers.ModelSerializer):
|
||||
uri_validator(data["value"])
|
||||
elif field.data_type == CustomField.FieldDataType.INT:
|
||||
integer_validator(data["value"])
|
||||
try:
|
||||
value_int = int(data["value"])
|
||||
except (TypeError, ValueError):
|
||||
raise serializers.ValidationError("Enter a valid integer.")
|
||||
# Keep values within the PostgreSQL integer range
|
||||
MinValueValidator(-2147483648)(value_int)
|
||||
MaxValueValidator(2147483647)(value_int)
|
||||
elif (
|
||||
field.data_type == CustomField.FieldDataType.MONETARY
|
||||
and data["value"] != ""
|
||||
|
||||
@@ -418,7 +418,15 @@ def update_filename_and_move_files(
|
||||
return
|
||||
instance = instance.document
|
||||
|
||||
def validate_move(instance, old_path: Path, new_path: Path):
|
||||
def validate_move(instance, old_path: Path, new_path: Path, root: Path):
|
||||
if not new_path.is_relative_to(root):
|
||||
msg = (
|
||||
f"Document {instance!s}: Refusing to move file outside root {root}: "
|
||||
f"{new_path}."
|
||||
)
|
||||
logger.warning(msg)
|
||||
raise CannotMoveFilesException(msg)
|
||||
|
||||
if not old_path.is_file():
|
||||
# Can't do anything if the old file does not exist anymore.
|
||||
msg = f"Document {instance!s}: File {old_path} doesn't exist."
|
||||
@@ -507,12 +515,22 @@ def update_filename_and_move_files(
|
||||
return
|
||||
|
||||
if move_original:
|
||||
validate_move(instance, old_source_path, instance.source_path)
|
||||
validate_move(
|
||||
instance,
|
||||
old_source_path,
|
||||
instance.source_path,
|
||||
settings.ORIGINALS_DIR,
|
||||
)
|
||||
create_source_path_directory(instance.source_path)
|
||||
shutil.move(old_source_path, instance.source_path)
|
||||
|
||||
if move_archive:
|
||||
validate_move(instance, old_archive_path, instance.archive_path)
|
||||
validate_move(
|
||||
instance,
|
||||
old_archive_path,
|
||||
instance.archive_path,
|
||||
settings.ARCHIVE_DIR,
|
||||
)
|
||||
create_source_path_directory(instance.archive_path)
|
||||
shutil.move(old_archive_path, instance.archive_path)
|
||||
|
||||
|
||||
@@ -493,7 +493,7 @@ def check_scheduled_workflows():
|
||||
trigger.schedule_is_recurring
|
||||
and workflow_runs.exists()
|
||||
and (
|
||||
workflow_runs.last().run_at
|
||||
workflow_runs.first().run_at
|
||||
> now
|
||||
- datetime.timedelta(
|
||||
days=trigger.schedule_recurring_interval_days,
|
||||
|
||||
@@ -262,6 +262,17 @@ def get_custom_fields_context(
|
||||
return field_data
|
||||
|
||||
|
||||
def _is_safe_relative_path(value: str) -> bool:
|
||||
if value == "":
|
||||
return True
|
||||
|
||||
path = PurePath(value)
|
||||
if path.is_absolute() or path.drive:
|
||||
return False
|
||||
|
||||
return ".." not in path.parts
|
||||
|
||||
|
||||
def validate_filepath_template_and_render(
|
||||
template_string: str,
|
||||
document: Document | None = None,
|
||||
@@ -309,6 +320,12 @@ def validate_filepath_template_and_render(
|
||||
)
|
||||
rendered_template = template.render(context)
|
||||
|
||||
if not _is_safe_relative_path(rendered_template):
|
||||
logger.warning(
|
||||
"Template rendered an unsafe path (absolute or containing traversal).",
|
||||
)
|
||||
return None
|
||||
|
||||
# We're good!
|
||||
return rendered_template
|
||||
except UndefinedError:
|
||||
|
||||
@@ -1664,6 +1664,44 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase):
|
||||
|
||||
self.consume_file_mock.assert_not_called()
|
||||
|
||||
def test_patch_document_integer_custom_field_out_of_range(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- An integer custom field
|
||||
- A document
|
||||
WHEN:
|
||||
- Patching the document with an integer value exceeding PostgreSQL's range
|
||||
THEN:
|
||||
- HTTP 400 is returned (validation catches the overflow)
|
||||
- No custom field instance is created
|
||||
"""
|
||||
cf_int = CustomField.objects.create(
|
||||
name="intfield",
|
||||
data_type=CustomField.FieldDataType.INT,
|
||||
)
|
||||
doc = Document.objects.create(
|
||||
title="Doc",
|
||||
checksum="123",
|
||||
mime_type="application/pdf",
|
||||
)
|
||||
|
||||
response = self.client.patch(
|
||||
f"/api/documents/{doc.pk}/",
|
||||
{
|
||||
"custom_fields": [
|
||||
{
|
||||
"field": cf_int.pk,
|
||||
"value": 2**31, # overflow for PostgreSQL integer fields
|
||||
},
|
||||
],
|
||||
},
|
||||
format="json",
|
||||
)
|
||||
|
||||
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
|
||||
self.assertIn("custom_fields", response.data)
|
||||
self.assertEqual(CustomFieldInstance.objects.count(), 0)
|
||||
|
||||
def test_upload_with_webui_source(self):
|
||||
"""
|
||||
GIVEN: A document with a source file
|
||||
|
||||
@@ -219,6 +219,30 @@ class TestApiStoragePaths(DirectoriesMixin, APITestCase):
|
||||
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
|
||||
self.assertEqual(StoragePath.objects.count(), 1)
|
||||
|
||||
def test_api_create_storage_path_rejects_traversal(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- API request to create a storage paths
|
||||
- Storage path attempts directory traversal
|
||||
WHEN:
|
||||
- API is called
|
||||
THEN:
|
||||
- Correct HTTP 400 response
|
||||
- No storage path is created
|
||||
"""
|
||||
response = self.client.post(
|
||||
self.ENDPOINT,
|
||||
json.dumps(
|
||||
{
|
||||
"name": "Traversal path",
|
||||
"path": "../../../../../tmp/proof",
|
||||
},
|
||||
),
|
||||
content_type="application/json",
|
||||
)
|
||||
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
|
||||
self.assertEqual(StoragePath.objects.count(), 1)
|
||||
|
||||
def test_api_storage_path_placeholders(self):
|
||||
"""
|
||||
GIVEN:
|
||||
|
||||
@@ -581,7 +581,7 @@ class TestPDFActions(DirectoriesMixin, TestCase):
|
||||
- Consume file should be called
|
||||
"""
|
||||
doc_ids = [self.doc1.id, self.doc2.id, self.doc3.id]
|
||||
metadata_document_id = self.doc1.id
|
||||
metadata_document_id = self.doc2.id
|
||||
user = User.objects.create(username="test_user")
|
||||
|
||||
result = bulk_edit.merge(
|
||||
@@ -606,7 +606,8 @@ class TestPDFActions(DirectoriesMixin, TestCase):
|
||||
# With metadata_document_id overrides
|
||||
result = bulk_edit.merge(doc_ids, metadata_document_id=metadata_document_id)
|
||||
consume_file_args, _ = mock_consume_file.call_args
|
||||
self.assertEqual(consume_file_args[1].title, "A (merged)")
|
||||
self.assertEqual(consume_file_args[1].title, "B (merged)")
|
||||
self.assertEqual(consume_file_args[1].created, self.doc2.created)
|
||||
|
||||
self.assertEqual(result, "OK")
|
||||
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
from datetime import datetime
|
||||
from unittest import mock
|
||||
|
||||
from django.conf import settings
|
||||
from django.contrib.auth.models import User
|
||||
from django.test import SimpleTestCase
|
||||
from django.test import TestCase
|
||||
@@ -251,3 +252,120 @@ class TestRewriteNaturalDateKeywords(SimpleTestCase):
|
||||
result = self._rewrite_with_now("added:today", fixed_now)
|
||||
# Should convert to UTC properly
|
||||
self.assertIn("added:[20250719", result)
|
||||
|
||||
|
||||
class TestIndexResilience(DirectoriesMixin, SimpleTestCase):
|
||||
def _assert_recreate_called(self, mock_create_in):
|
||||
mock_create_in.assert_called_once()
|
||||
path_arg, schema_arg = mock_create_in.call_args.args
|
||||
self.assertEqual(path_arg, settings.INDEX_DIR)
|
||||
self.assertEqual(schema_arg.__class__.__name__, "Schema")
|
||||
|
||||
def test_transient_missing_segment_does_not_force_recreate(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- Index directory exists
|
||||
WHEN:
|
||||
- open_index is called
|
||||
- Opening the index raises FileNotFoundError once due to a
|
||||
transient missing segment
|
||||
THEN:
|
||||
- Index is opened successfully on retry
|
||||
- Index is not recreated
|
||||
"""
|
||||
file_marker = settings.INDEX_DIR / "file_marker.txt"
|
||||
file_marker.write_text("keep")
|
||||
expected_index = object()
|
||||
|
||||
with (
|
||||
mock.patch("documents.index.exists_in", return_value=True),
|
||||
mock.patch(
|
||||
"documents.index.open_dir",
|
||||
side_effect=[FileNotFoundError("missing"), expected_index],
|
||||
) as mock_open_dir,
|
||||
mock.patch(
|
||||
"documents.index.create_in",
|
||||
) as mock_create_in,
|
||||
mock.patch(
|
||||
"documents.index.rmtree",
|
||||
) as mock_rmtree,
|
||||
):
|
||||
ix = index.open_index()
|
||||
|
||||
self.assertIs(ix, expected_index)
|
||||
self.assertGreaterEqual(mock_open_dir.call_count, 2)
|
||||
mock_rmtree.assert_not_called()
|
||||
mock_create_in.assert_not_called()
|
||||
self.assertEqual(file_marker.read_text(), "keep")
|
||||
|
||||
def test_transient_errors_exhaust_retries_and_recreate(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- Index directory exists
|
||||
WHEN:
|
||||
- open_index is called
|
||||
- Opening the index raises FileNotFoundError multiple times due to
|
||||
transient missing segments
|
||||
THEN:
|
||||
- Index is recreated after retries are exhausted
|
||||
"""
|
||||
recreated_index = object()
|
||||
|
||||
with (
|
||||
self.assertLogs("paperless.index", level="ERROR") as cm,
|
||||
mock.patch("documents.index.exists_in", return_value=True),
|
||||
mock.patch(
|
||||
"documents.index.open_dir",
|
||||
side_effect=FileNotFoundError("missing"),
|
||||
) as mock_open_dir,
|
||||
mock.patch("documents.index.rmtree") as mock_rmtree,
|
||||
mock.patch(
|
||||
"documents.index.create_in",
|
||||
return_value=recreated_index,
|
||||
) as mock_create_in,
|
||||
):
|
||||
ix = index.open_index()
|
||||
|
||||
self.assertIs(ix, recreated_index)
|
||||
self.assertEqual(mock_open_dir.call_count, 4)
|
||||
mock_rmtree.assert_called_once_with(settings.INDEX_DIR)
|
||||
self._assert_recreate_called(mock_create_in)
|
||||
self.assertIn(
|
||||
"Error while opening the index after retries, recreating.",
|
||||
cm.output[0],
|
||||
)
|
||||
|
||||
def test_non_transient_error_recreates_index(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- Index directory exists
|
||||
WHEN:
|
||||
- open_index is called
|
||||
- Opening the index raises a "non-transient" error
|
||||
THEN:
|
||||
- Index is recreated
|
||||
"""
|
||||
recreated_index = object()
|
||||
|
||||
with (
|
||||
self.assertLogs("paperless.index", level="ERROR") as cm,
|
||||
mock.patch("documents.index.exists_in", return_value=True),
|
||||
mock.patch(
|
||||
"documents.index.open_dir",
|
||||
side_effect=RuntimeError("boom"),
|
||||
),
|
||||
mock.patch("documents.index.rmtree") as mock_rmtree,
|
||||
mock.patch(
|
||||
"documents.index.create_in",
|
||||
return_value=recreated_index,
|
||||
) as mock_create_in,
|
||||
):
|
||||
ix = index.open_index()
|
||||
|
||||
self.assertIs(ix, recreated_index)
|
||||
mock_rmtree.assert_called_once_with(settings.INDEX_DIR)
|
||||
self._assert_recreate_called(mock_create_in)
|
||||
self.assertIn(
|
||||
"Error while opening the index, recreating.",
|
||||
cm.output[0],
|
||||
)
|
||||
|
||||
@@ -2094,6 +2094,68 @@ class TestWorkflows(
|
||||
doc.refresh_from_db()
|
||||
self.assertIsNone(doc.owner)
|
||||
|
||||
def test_workflow_scheduled_recurring_respects_latest_run(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- Scheduled workflow marked as recurring with a 1-day interval
|
||||
- Document that matches the trigger
|
||||
- Two prior runs exist: one 2 days ago and one 1 hour ago
|
||||
WHEN:
|
||||
- Scheduled workflows are checked again
|
||||
THEN:
|
||||
- Workflow does not run because the most recent run is inside the interval
|
||||
"""
|
||||
trigger = WorkflowTrigger.objects.create(
|
||||
type=WorkflowTrigger.WorkflowTriggerType.SCHEDULED,
|
||||
schedule_date_field=WorkflowTrigger.ScheduleDateField.CREATED,
|
||||
schedule_is_recurring=True,
|
||||
schedule_recurring_interval_days=1,
|
||||
)
|
||||
action = WorkflowAction.objects.create(
|
||||
assign_title="Doc assign owner",
|
||||
assign_owner=self.user2,
|
||||
)
|
||||
w = Workflow.objects.create(
|
||||
name="Workflow 1",
|
||||
order=0,
|
||||
)
|
||||
w.triggers.add(trigger)
|
||||
w.actions.add(action)
|
||||
w.save()
|
||||
|
||||
doc = Document.objects.create(
|
||||
title="sample test",
|
||||
correspondent=self.c,
|
||||
original_filename="sample.pdf",
|
||||
created=timezone.now().date() - timedelta(days=3),
|
||||
)
|
||||
|
||||
WorkflowRun.objects.create(
|
||||
workflow=w,
|
||||
document=doc,
|
||||
type=WorkflowTrigger.WorkflowTriggerType.SCHEDULED,
|
||||
run_at=timezone.now() - timedelta(days=2),
|
||||
)
|
||||
WorkflowRun.objects.create(
|
||||
workflow=w,
|
||||
document=doc,
|
||||
type=WorkflowTrigger.WorkflowTriggerType.SCHEDULED,
|
||||
run_at=timezone.now() - timedelta(hours=1),
|
||||
)
|
||||
|
||||
tasks.check_scheduled_workflows()
|
||||
|
||||
doc.refresh_from_db()
|
||||
self.assertIsNone(doc.owner)
|
||||
self.assertEqual(
|
||||
WorkflowRun.objects.filter(
|
||||
workflow=w,
|
||||
document=doc,
|
||||
type=WorkflowTrigger.WorkflowTriggerType.SCHEDULED,
|
||||
).count(),
|
||||
2,
|
||||
)
|
||||
|
||||
def test_workflow_scheduled_trigger_negative_offset_customfield(self):
|
||||
"""
|
||||
GIVEN:
|
||||
|
||||
@@ -708,6 +708,7 @@ class DocumentViewSet(
|
||||
"title",
|
||||
"correspondent__name",
|
||||
"document_type__name",
|
||||
"storage_path__name",
|
||||
"created",
|
||||
"modified",
|
||||
"added",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
from typing import Final
|
||||
|
||||
__version__: Final[tuple[int, int, int]] = (2, 20, 3)
|
||||
__version__: Final[tuple[int, int, int]] = (2, 20, 4)
|
||||
# Version string like X.Y.Z
|
||||
__full_version_str__: Final[str] = ".".join(map(str, __version__))
|
||||
# Version string like X.Y
|
||||
|
||||
Reference in New Issue
Block a user