Merge branch 'dev' into feature-pw-removal-workflow-action

This commit is contained in:
shamoon
2026-01-09 20:45:05 -08:00
committed by GitHub
55 changed files with 4084 additions and 3908 deletions

View File

@@ -10,6 +10,7 @@ from datetime import time
from datetime import timedelta
from datetime import timezone
from shutil import rmtree
from time import sleep
from typing import TYPE_CHECKING
from typing import Literal
@@ -32,6 +33,7 @@ from whoosh.highlight import HtmlFormatter
from whoosh.idsets import BitSet
from whoosh.idsets import DocIdSet
from whoosh.index import FileIndex
from whoosh.index import LockError
from whoosh.index import create_in
from whoosh.index import exists_in
from whoosh.index import open_dir
@@ -97,11 +99,33 @@ def get_schema() -> Schema:
def open_index(*, recreate=False) -> FileIndex:
try:
if exists_in(settings.INDEX_DIR) and not recreate:
return open_dir(settings.INDEX_DIR, schema=get_schema())
except Exception:
logger.exception("Error while opening the index, recreating.")
transient_exceptions = (FileNotFoundError, LockError)
max_retries = 3
retry_delay = 0.1
for attempt in range(max_retries + 1):
try:
if exists_in(settings.INDEX_DIR) and not recreate:
return open_dir(settings.INDEX_DIR, schema=get_schema())
break
except transient_exceptions as exc:
is_last_attempt = attempt == max_retries or recreate
if is_last_attempt:
logger.exception(
"Error while opening the index after retries, recreating.",
)
break
logger.warning(
"Transient error while opening the index (attempt %s/%s): %s. Retrying.",
attempt + 1,
max_retries + 1,
exc,
)
sleep(retry_delay)
except Exception:
logger.exception("Error while opening the index, recreating.")
break
# create_in doesn't handle corrupted indexes very well, remove the directory entirely first
if settings.INDEX_DIR.is_dir():

View File

@@ -18,6 +18,8 @@ from django.core.exceptions import ValidationError
from django.core.validators import DecimalValidator
from django.core.validators import EmailValidator
from django.core.validators import MaxLengthValidator
from django.core.validators import MaxValueValidator
from django.core.validators import MinValueValidator
from django.core.validators import RegexValidator
from django.core.validators import integer_validator
from django.db.models import Count
@@ -875,6 +877,13 @@ class CustomFieldInstanceSerializer(serializers.ModelSerializer):
uri_validator(data["value"])
elif field.data_type == CustomField.FieldDataType.INT:
integer_validator(data["value"])
try:
value_int = int(data["value"])
except (TypeError, ValueError):
raise serializers.ValidationError("Enter a valid integer.")
# Keep values within the PostgreSQL integer range
MinValueValidator(-2147483648)(value_int)
MaxValueValidator(2147483647)(value_int)
elif (
field.data_type == CustomField.FieldDataType.MONETARY
and data["value"] != ""

View File

@@ -493,7 +493,7 @@ def check_scheduled_workflows():
trigger.schedule_is_recurring
and workflow_runs.exists()
and (
workflow_runs.last().run_at
workflow_runs.first().run_at
> now
- datetime.timedelta(
days=trigger.schedule_recurring_interval_days,

View File

@@ -1664,6 +1664,44 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase):
self.consume_file_mock.assert_not_called()
def test_patch_document_integer_custom_field_out_of_range(self):
"""
GIVEN:
- An integer custom field
- A document
WHEN:
- Patching the document with an integer value exceeding PostgreSQL's range
THEN:
- HTTP 400 is returned (validation catches the overflow)
- No custom field instance is created
"""
cf_int = CustomField.objects.create(
name="intfield",
data_type=CustomField.FieldDataType.INT,
)
doc = Document.objects.create(
title="Doc",
checksum="123",
mime_type="application/pdf",
)
response = self.client.patch(
f"/api/documents/{doc.pk}/",
{
"custom_fields": [
{
"field": cf_int.pk,
"value": 2**31, # overflow for PostgreSQL integer fields
},
],
},
format="json",
)
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
self.assertIn("custom_fields", response.data)
self.assertEqual(CustomFieldInstance.objects.count(), 0)
def test_upload_with_webui_source(self):
"""
GIVEN: A document with a source file

View File

@@ -1,6 +1,7 @@
from datetime import datetime
from unittest import mock
from django.conf import settings
from django.contrib.auth.models import User
from django.test import SimpleTestCase
from django.test import TestCase
@@ -251,3 +252,120 @@ class TestRewriteNaturalDateKeywords(SimpleTestCase):
result = self._rewrite_with_now("added:today", fixed_now)
# Should convert to UTC properly
self.assertIn("added:[20250719", result)
class TestIndexResilience(DirectoriesMixin, SimpleTestCase):
def _assert_recreate_called(self, mock_create_in):
mock_create_in.assert_called_once()
path_arg, schema_arg = mock_create_in.call_args.args
self.assertEqual(path_arg, settings.INDEX_DIR)
self.assertEqual(schema_arg.__class__.__name__, "Schema")
def test_transient_missing_segment_does_not_force_recreate(self):
"""
GIVEN:
- Index directory exists
WHEN:
- open_index is called
- Opening the index raises FileNotFoundError once due to a
transient missing segment
THEN:
- Index is opened successfully on retry
- Index is not recreated
"""
file_marker = settings.INDEX_DIR / "file_marker.txt"
file_marker.write_text("keep")
expected_index = object()
with (
mock.patch("documents.index.exists_in", return_value=True),
mock.patch(
"documents.index.open_dir",
side_effect=[FileNotFoundError("missing"), expected_index],
) as mock_open_dir,
mock.patch(
"documents.index.create_in",
) as mock_create_in,
mock.patch(
"documents.index.rmtree",
) as mock_rmtree,
):
ix = index.open_index()
self.assertIs(ix, expected_index)
self.assertGreaterEqual(mock_open_dir.call_count, 2)
mock_rmtree.assert_not_called()
mock_create_in.assert_not_called()
self.assertEqual(file_marker.read_text(), "keep")
def test_transient_errors_exhaust_retries_and_recreate(self):
"""
GIVEN:
- Index directory exists
WHEN:
- open_index is called
- Opening the index raises FileNotFoundError multiple times due to
transient missing segments
THEN:
- Index is recreated after retries are exhausted
"""
recreated_index = object()
with (
self.assertLogs("paperless.index", level="ERROR") as cm,
mock.patch("documents.index.exists_in", return_value=True),
mock.patch(
"documents.index.open_dir",
side_effect=FileNotFoundError("missing"),
) as mock_open_dir,
mock.patch("documents.index.rmtree") as mock_rmtree,
mock.patch(
"documents.index.create_in",
return_value=recreated_index,
) as mock_create_in,
):
ix = index.open_index()
self.assertIs(ix, recreated_index)
self.assertEqual(mock_open_dir.call_count, 4)
mock_rmtree.assert_called_once_with(settings.INDEX_DIR)
self._assert_recreate_called(mock_create_in)
self.assertIn(
"Error while opening the index after retries, recreating.",
cm.output[0],
)
def test_non_transient_error_recreates_index(self):
"""
GIVEN:
- Index directory exists
WHEN:
- open_index is called
- Opening the index raises a "non-transient" error
THEN:
- Index is recreated
"""
recreated_index = object()
with (
self.assertLogs("paperless.index", level="ERROR") as cm,
mock.patch("documents.index.exists_in", return_value=True),
mock.patch(
"documents.index.open_dir",
side_effect=RuntimeError("boom"),
),
mock.patch("documents.index.rmtree") as mock_rmtree,
mock.patch(
"documents.index.create_in",
return_value=recreated_index,
) as mock_create_in,
):
ix = index.open_index()
self.assertIs(ix, recreated_index)
mock_rmtree.assert_called_once_with(settings.INDEX_DIR)
self._assert_recreate_called(mock_create_in)
self.assertIn(
"Error while opening the index, recreating.",
cm.output[0],
)

View File

@@ -2096,6 +2096,68 @@ class TestWorkflows(
doc.refresh_from_db()
self.assertIsNone(doc.owner)
def test_workflow_scheduled_recurring_respects_latest_run(self):
"""
GIVEN:
- Scheduled workflow marked as recurring with a 1-day interval
- Document that matches the trigger
- Two prior runs exist: one 2 days ago and one 1 hour ago
WHEN:
- Scheduled workflows are checked again
THEN:
- Workflow does not run because the most recent run is inside the interval
"""
trigger = WorkflowTrigger.objects.create(
type=WorkflowTrigger.WorkflowTriggerType.SCHEDULED,
schedule_date_field=WorkflowTrigger.ScheduleDateField.CREATED,
schedule_is_recurring=True,
schedule_recurring_interval_days=1,
)
action = WorkflowAction.objects.create(
assign_title="Doc assign owner",
assign_owner=self.user2,
)
w = Workflow.objects.create(
name="Workflow 1",
order=0,
)
w.triggers.add(trigger)
w.actions.add(action)
w.save()
doc = Document.objects.create(
title="sample test",
correspondent=self.c,
original_filename="sample.pdf",
created=timezone.now().date() - timedelta(days=3),
)
WorkflowRun.objects.create(
workflow=w,
document=doc,
type=WorkflowTrigger.WorkflowTriggerType.SCHEDULED,
run_at=timezone.now() - timedelta(days=2),
)
WorkflowRun.objects.create(
workflow=w,
document=doc,
type=WorkflowTrigger.WorkflowTriggerType.SCHEDULED,
run_at=timezone.now() - timedelta(hours=1),
)
tasks.check_scheduled_workflows()
doc.refresh_from_db()
self.assertIsNone(doc.owner)
self.assertEqual(
WorkflowRun.objects.filter(
workflow=w,
document=doc,
type=WorkflowTrigger.WorkflowTriggerType.SCHEDULED,
).count(),
2,
)
def test_workflow_scheduled_trigger_negative_offset_customfield(self):
"""
GIVEN: