Merge branch 'hotfix/v2.20.4'

This commit is contained in:
shamoon
2026-01-13 11:45:55 -08:00
17 changed files with 330 additions and 17 deletions

View File

@@ -1,6 +1,6 @@
[project]
name = "paperless-ngx"
version = "2.20.3"
version = "2.20.4"
description = "A community-supported supercharged document management system: scan, index and archive all your physical documents"
readme = "README.md"
requires-python = ">=3.10"

View File

@@ -1,6 +1,6 @@
{
"name": "paperless-ngx-ui",
"version": "2.20.3",
"version": "2.20.4",
"scripts": {
"preinstall": "npx only-allow pnpm",
"ng": "ng",

View File

@@ -6,7 +6,7 @@ export const environment = {
apiVersion: '9', // match src/paperless/settings.py
appTitle: 'Paperless-ngx',
tag: 'prod',
version: '2.20.3',
version: '2.20.4',
webSocketHost: window.location.host,
webSocketProtocol: window.location.protocol == 'https:' ? 'wss:' : 'ws:',
webSocketBaseUrl: base_url.pathname + 'ws/',

View File

@@ -22,7 +22,7 @@ class DocumentMetadataOverrides:
document_type_id: int | None = None
tag_ids: list[int] | None = None
storage_path_id: int | None = None
created: datetime.datetime | None = None
created: datetime.date | None = None
asn: int | None = None
owner_id: int | None = None
view_users: list[int] | None = None
@@ -100,6 +100,7 @@ class DocumentMetadataOverrides:
overrides.storage_path_id = doc.storage_path.id if doc.storage_path else None
overrides.owner_id = doc.owner.id if doc.owner else None
overrides.tag_ids = list(doc.tags.values_list("id", flat=True))
overrides.created = doc.created
overrides.view_users = list(
get_users_with_perms(

View File

@@ -10,6 +10,7 @@ from datetime import time
from datetime import timedelta
from datetime import timezone
from shutil import rmtree
from time import sleep
from typing import TYPE_CHECKING
from typing import Literal
@@ -32,6 +33,7 @@ from whoosh.highlight import HtmlFormatter
from whoosh.idsets import BitSet
from whoosh.idsets import DocIdSet
from whoosh.index import FileIndex
from whoosh.index import LockError
from whoosh.index import create_in
from whoosh.index import exists_in
from whoosh.index import open_dir
@@ -97,11 +99,33 @@ def get_schema() -> Schema:
def open_index(*, recreate=False) -> FileIndex:
try:
if exists_in(settings.INDEX_DIR) and not recreate:
return open_dir(settings.INDEX_DIR, schema=get_schema())
except Exception:
logger.exception("Error while opening the index, recreating.")
transient_exceptions = (FileNotFoundError, LockError)
max_retries = 3
retry_delay = 0.1
for attempt in range(max_retries + 1):
try:
if exists_in(settings.INDEX_DIR) and not recreate:
return open_dir(settings.INDEX_DIR, schema=get_schema())
break
except transient_exceptions as exc:
is_last_attempt = attempt == max_retries or recreate
if is_last_attempt:
logger.exception(
"Error while opening the index after retries, recreating.",
)
break
logger.warning(
"Transient error while opening the index (attempt %s/%s): %s. Retrying.",
attempt + 1,
max_retries + 1,
exc,
)
sleep(retry_delay)
except Exception:
logger.exception("Error while opening the index, recreating.")
break
# create_in doesn't handle corrupted indexes very well, remove the directory entirely first
if settings.INDEX_DIR.is_dir():

View File

@@ -18,6 +18,8 @@ from django.core.exceptions import ValidationError
from django.core.validators import DecimalValidator
from django.core.validators import EmailValidator
from django.core.validators import MaxLengthValidator
from django.core.validators import MaxValueValidator
from django.core.validators import MinValueValidator
from django.core.validators import RegexValidator
from django.core.validators import integer_validator
from django.db.models import Count
@@ -875,6 +877,13 @@ class CustomFieldInstanceSerializer(serializers.ModelSerializer):
uri_validator(data["value"])
elif field.data_type == CustomField.FieldDataType.INT:
integer_validator(data["value"])
try:
value_int = int(data["value"])
except (TypeError, ValueError):
raise serializers.ValidationError("Enter a valid integer.")
# Keep values within the PostgreSQL integer range
MinValueValidator(-2147483648)(value_int)
MaxValueValidator(2147483647)(value_int)
elif (
field.data_type == CustomField.FieldDataType.MONETARY
and data["value"] != ""

View File

@@ -418,7 +418,15 @@ def update_filename_and_move_files(
return
instance = instance.document
def validate_move(instance, old_path: Path, new_path: Path):
def validate_move(instance, old_path: Path, new_path: Path, root: Path):
if not new_path.is_relative_to(root):
msg = (
f"Document {instance!s}: Refusing to move file outside root {root}: "
f"{new_path}."
)
logger.warning(msg)
raise CannotMoveFilesException(msg)
if not old_path.is_file():
# Can't do anything if the old file does not exist anymore.
msg = f"Document {instance!s}: File {old_path} doesn't exist."
@@ -507,12 +515,22 @@ def update_filename_and_move_files(
return
if move_original:
validate_move(instance, old_source_path, instance.source_path)
validate_move(
instance,
old_source_path,
instance.source_path,
settings.ORIGINALS_DIR,
)
create_source_path_directory(instance.source_path)
shutil.move(old_source_path, instance.source_path)
if move_archive:
validate_move(instance, old_archive_path, instance.archive_path)
validate_move(
instance,
old_archive_path,
instance.archive_path,
settings.ARCHIVE_DIR,
)
create_source_path_directory(instance.archive_path)
shutil.move(old_archive_path, instance.archive_path)

View File

@@ -493,7 +493,7 @@ def check_scheduled_workflows():
trigger.schedule_is_recurring
and workflow_runs.exists()
and (
workflow_runs.last().run_at
workflow_runs.first().run_at
> now
- datetime.timedelta(
days=trigger.schedule_recurring_interval_days,

View File

@@ -262,6 +262,17 @@ def get_custom_fields_context(
return field_data
def _is_safe_relative_path(value: str) -> bool:
if value == "":
return True
path = PurePath(value)
if path.is_absolute() or path.drive:
return False
return ".." not in path.parts
def validate_filepath_template_and_render(
template_string: str,
document: Document | None = None,
@@ -309,6 +320,12 @@ def validate_filepath_template_and_render(
)
rendered_template = template.render(context)
if not _is_safe_relative_path(rendered_template):
logger.warning(
"Template rendered an unsafe path (absolute or containing traversal).",
)
return None
# We're good!
return rendered_template
except UndefinedError:

View File

@@ -1664,6 +1664,44 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase):
self.consume_file_mock.assert_not_called()
def test_patch_document_integer_custom_field_out_of_range(self):
"""
GIVEN:
- An integer custom field
- A document
WHEN:
- Patching the document with an integer value exceeding PostgreSQL's range
THEN:
- HTTP 400 is returned (validation catches the overflow)
- No custom field instance is created
"""
cf_int = CustomField.objects.create(
name="intfield",
data_type=CustomField.FieldDataType.INT,
)
doc = Document.objects.create(
title="Doc",
checksum="123",
mime_type="application/pdf",
)
response = self.client.patch(
f"/api/documents/{doc.pk}/",
{
"custom_fields": [
{
"field": cf_int.pk,
"value": 2**31, # overflow for PostgreSQL integer fields
},
],
},
format="json",
)
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
self.assertIn("custom_fields", response.data)
self.assertEqual(CustomFieldInstance.objects.count(), 0)
def test_upload_with_webui_source(self):
"""
GIVEN: A document with a source file

View File

@@ -219,6 +219,30 @@ class TestApiStoragePaths(DirectoriesMixin, APITestCase):
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
self.assertEqual(StoragePath.objects.count(), 1)
def test_api_create_storage_path_rejects_traversal(self):
"""
GIVEN:
- API request to create a storage paths
- Storage path attempts directory traversal
WHEN:
- API is called
THEN:
- Correct HTTP 400 response
- No storage path is created
"""
response = self.client.post(
self.ENDPOINT,
json.dumps(
{
"name": "Traversal path",
"path": "../../../../../tmp/proof",
},
),
content_type="application/json",
)
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
self.assertEqual(StoragePath.objects.count(), 1)
def test_api_storage_path_placeholders(self):
"""
GIVEN:

View File

@@ -581,7 +581,7 @@ class TestPDFActions(DirectoriesMixin, TestCase):
- Consume file should be called
"""
doc_ids = [self.doc1.id, self.doc2.id, self.doc3.id]
metadata_document_id = self.doc1.id
metadata_document_id = self.doc2.id
user = User.objects.create(username="test_user")
result = bulk_edit.merge(
@@ -606,7 +606,8 @@ class TestPDFActions(DirectoriesMixin, TestCase):
# With metadata_document_id overrides
result = bulk_edit.merge(doc_ids, metadata_document_id=metadata_document_id)
consume_file_args, _ = mock_consume_file.call_args
self.assertEqual(consume_file_args[1].title, "A (merged)")
self.assertEqual(consume_file_args[1].title, "B (merged)")
self.assertEqual(consume_file_args[1].created, self.doc2.created)
self.assertEqual(result, "OK")

View File

@@ -1,6 +1,7 @@
from datetime import datetime
from unittest import mock
from django.conf import settings
from django.contrib.auth.models import User
from django.test import SimpleTestCase
from django.test import TestCase
@@ -251,3 +252,120 @@ class TestRewriteNaturalDateKeywords(SimpleTestCase):
result = self._rewrite_with_now("added:today", fixed_now)
# Should convert to UTC properly
self.assertIn("added:[20250719", result)
class TestIndexResilience(DirectoriesMixin, SimpleTestCase):
def _assert_recreate_called(self, mock_create_in):
mock_create_in.assert_called_once()
path_arg, schema_arg = mock_create_in.call_args.args
self.assertEqual(path_arg, settings.INDEX_DIR)
self.assertEqual(schema_arg.__class__.__name__, "Schema")
def test_transient_missing_segment_does_not_force_recreate(self):
"""
GIVEN:
- Index directory exists
WHEN:
- open_index is called
- Opening the index raises FileNotFoundError once due to a
transient missing segment
THEN:
- Index is opened successfully on retry
- Index is not recreated
"""
file_marker = settings.INDEX_DIR / "file_marker.txt"
file_marker.write_text("keep")
expected_index = object()
with (
mock.patch("documents.index.exists_in", return_value=True),
mock.patch(
"documents.index.open_dir",
side_effect=[FileNotFoundError("missing"), expected_index],
) as mock_open_dir,
mock.patch(
"documents.index.create_in",
) as mock_create_in,
mock.patch(
"documents.index.rmtree",
) as mock_rmtree,
):
ix = index.open_index()
self.assertIs(ix, expected_index)
self.assertGreaterEqual(mock_open_dir.call_count, 2)
mock_rmtree.assert_not_called()
mock_create_in.assert_not_called()
self.assertEqual(file_marker.read_text(), "keep")
def test_transient_errors_exhaust_retries_and_recreate(self):
"""
GIVEN:
- Index directory exists
WHEN:
- open_index is called
- Opening the index raises FileNotFoundError multiple times due to
transient missing segments
THEN:
- Index is recreated after retries are exhausted
"""
recreated_index = object()
with (
self.assertLogs("paperless.index", level="ERROR") as cm,
mock.patch("documents.index.exists_in", return_value=True),
mock.patch(
"documents.index.open_dir",
side_effect=FileNotFoundError("missing"),
) as mock_open_dir,
mock.patch("documents.index.rmtree") as mock_rmtree,
mock.patch(
"documents.index.create_in",
return_value=recreated_index,
) as mock_create_in,
):
ix = index.open_index()
self.assertIs(ix, recreated_index)
self.assertEqual(mock_open_dir.call_count, 4)
mock_rmtree.assert_called_once_with(settings.INDEX_DIR)
self._assert_recreate_called(mock_create_in)
self.assertIn(
"Error while opening the index after retries, recreating.",
cm.output[0],
)
def test_non_transient_error_recreates_index(self):
"""
GIVEN:
- Index directory exists
WHEN:
- open_index is called
- Opening the index raises a "non-transient" error
THEN:
- Index is recreated
"""
recreated_index = object()
with (
self.assertLogs("paperless.index", level="ERROR") as cm,
mock.patch("documents.index.exists_in", return_value=True),
mock.patch(
"documents.index.open_dir",
side_effect=RuntimeError("boom"),
),
mock.patch("documents.index.rmtree") as mock_rmtree,
mock.patch(
"documents.index.create_in",
return_value=recreated_index,
) as mock_create_in,
):
ix = index.open_index()
self.assertIs(ix, recreated_index)
mock_rmtree.assert_called_once_with(settings.INDEX_DIR)
self._assert_recreate_called(mock_create_in)
self.assertIn(
"Error while opening the index, recreating.",
cm.output[0],
)

View File

@@ -2094,6 +2094,68 @@ class TestWorkflows(
doc.refresh_from_db()
self.assertIsNone(doc.owner)
def test_workflow_scheduled_recurring_respects_latest_run(self):
"""
GIVEN:
- Scheduled workflow marked as recurring with a 1-day interval
- Document that matches the trigger
- Two prior runs exist: one 2 days ago and one 1 hour ago
WHEN:
- Scheduled workflows are checked again
THEN:
- Workflow does not run because the most recent run is inside the interval
"""
trigger = WorkflowTrigger.objects.create(
type=WorkflowTrigger.WorkflowTriggerType.SCHEDULED,
schedule_date_field=WorkflowTrigger.ScheduleDateField.CREATED,
schedule_is_recurring=True,
schedule_recurring_interval_days=1,
)
action = WorkflowAction.objects.create(
assign_title="Doc assign owner",
assign_owner=self.user2,
)
w = Workflow.objects.create(
name="Workflow 1",
order=0,
)
w.triggers.add(trigger)
w.actions.add(action)
w.save()
doc = Document.objects.create(
title="sample test",
correspondent=self.c,
original_filename="sample.pdf",
created=timezone.now().date() - timedelta(days=3),
)
WorkflowRun.objects.create(
workflow=w,
document=doc,
type=WorkflowTrigger.WorkflowTriggerType.SCHEDULED,
run_at=timezone.now() - timedelta(days=2),
)
WorkflowRun.objects.create(
workflow=w,
document=doc,
type=WorkflowTrigger.WorkflowTriggerType.SCHEDULED,
run_at=timezone.now() - timedelta(hours=1),
)
tasks.check_scheduled_workflows()
doc.refresh_from_db()
self.assertIsNone(doc.owner)
self.assertEqual(
WorkflowRun.objects.filter(
workflow=w,
document=doc,
type=WorkflowTrigger.WorkflowTriggerType.SCHEDULED,
).count(),
2,
)
def test_workflow_scheduled_trigger_negative_offset_customfield(self):
"""
GIVEN:

View File

@@ -708,6 +708,7 @@ class DocumentViewSet(
"title",
"correspondent__name",
"document_type__name",
"storage_path__name",
"created",
"modified",
"added",

View File

@@ -1,6 +1,6 @@
from typing import Final
__version__: Final[tuple[int, int, int]] = (2, 20, 3)
__version__: Final[tuple[int, int, int]] = (2, 20, 4)
# Version string like X.Y.Z
__full_version_str__: Final[str] = ".".join(map(str, __version__))
# Version string like X.Y

2
uv.lock generated
View File

@@ -2115,7 +2115,7 @@ wheels = [
[[package]]
name = "paperless-ngx"
version = "2.20.3"
version = "2.20.4"
source = { virtual = "." }
dependencies = [
{ name = "babel", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },