mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-11-23 23:49:08 -06:00
Merge branch 'dev' into feature-ai
This commit is contained in:
@@ -13,6 +13,7 @@ from shutil import rmtree
|
||||
from typing import TYPE_CHECKING
|
||||
from typing import Literal
|
||||
|
||||
from dateutil.relativedelta import relativedelta
|
||||
from django.conf import settings
|
||||
from django.utils import timezone as django_timezone
|
||||
from django.utils.timezone import get_current_timezone
|
||||
@@ -533,32 +534,84 @@ def get_permissions_criterias(user: User | None = None) -> list:
|
||||
def rewrite_natural_date_keywords(query_string: str) -> str:
|
||||
"""
|
||||
Rewrites natural date keywords (e.g. added:today or added:"yesterday") to UTC range syntax for Whoosh.
|
||||
This resolves timezone issues with date parsing in Whoosh as well as adding support for more
|
||||
natural date keywords.
|
||||
"""
|
||||
|
||||
tz = get_current_timezone()
|
||||
local_now = now().astimezone(tz)
|
||||
|
||||
today = local_now.date()
|
||||
yesterday = today - timedelta(days=1)
|
||||
|
||||
ranges = {
|
||||
"today": (
|
||||
datetime.combine(today, time.min, tzinfo=tz),
|
||||
datetime.combine(today, time.max, tzinfo=tz),
|
||||
),
|
||||
"yesterday": (
|
||||
datetime.combine(yesterday, time.min, tzinfo=tz),
|
||||
datetime.combine(yesterday, time.max, tzinfo=tz),
|
||||
),
|
||||
}
|
||||
|
||||
pattern = r"(\b(?:added|created))\s*:\s*[\"']?(today|yesterday)[\"']?"
|
||||
# all supported Keywords
|
||||
pattern = r"(\b(?:added|created|modified))\s*:\s*[\"']?(today|yesterday|this month|previous month|previous week|previous quarter|this year|previous year)[\"']?"
|
||||
|
||||
def repl(m):
|
||||
field, keyword = m.group(1), m.group(2)
|
||||
start, end = ranges[keyword]
|
||||
field = m.group(1)
|
||||
keyword = m.group(2).lower()
|
||||
|
||||
match keyword:
|
||||
case "today":
|
||||
start = datetime.combine(today, time.min, tzinfo=tz)
|
||||
end = datetime.combine(today, time.max, tzinfo=tz)
|
||||
|
||||
case "yesterday":
|
||||
yesterday = today - timedelta(days=1)
|
||||
start = datetime.combine(yesterday, time.min, tzinfo=tz)
|
||||
end = datetime.combine(yesterday, time.max, tzinfo=tz)
|
||||
|
||||
case "this month":
|
||||
start = datetime(local_now.year, local_now.month, 1, 0, 0, 0, tzinfo=tz)
|
||||
end = start + relativedelta(months=1) - timedelta(seconds=1)
|
||||
|
||||
case "previous month":
|
||||
this_month_start = datetime(
|
||||
local_now.year,
|
||||
local_now.month,
|
||||
1,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
tzinfo=tz,
|
||||
)
|
||||
start = this_month_start - relativedelta(months=1)
|
||||
end = this_month_start - timedelta(seconds=1)
|
||||
|
||||
case "this year":
|
||||
start = datetime(local_now.year, 1, 1, 0, 0, 0, tzinfo=tz)
|
||||
end = datetime.combine(today, time.max, tzinfo=tz)
|
||||
|
||||
case "previous week":
|
||||
days_since_monday = local_now.weekday()
|
||||
this_week_start = datetime.combine(
|
||||
today - timedelta(days=days_since_monday),
|
||||
time.min,
|
||||
tzinfo=tz,
|
||||
)
|
||||
start = this_week_start - timedelta(days=7)
|
||||
end = this_week_start - timedelta(seconds=1)
|
||||
|
||||
case "previous quarter":
|
||||
current_quarter = (local_now.month - 1) // 3 + 1
|
||||
this_quarter_start_month = (current_quarter - 1) * 3 + 1
|
||||
this_quarter_start = datetime(
|
||||
local_now.year,
|
||||
this_quarter_start_month,
|
||||
1,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
tzinfo=tz,
|
||||
)
|
||||
start = this_quarter_start - relativedelta(months=3)
|
||||
end = this_quarter_start - timedelta(seconds=1)
|
||||
|
||||
case "previous year":
|
||||
start = datetime(local_now.year - 1, 1, 1, 0, 0, 0, tzinfo=tz)
|
||||
end = datetime(local_now.year - 1, 12, 31, 23, 59, 59, tzinfo=tz)
|
||||
|
||||
# Convert to UTC and format
|
||||
start_str = start.astimezone(timezone.utc).strftime("%Y%m%d%H%M%S")
|
||||
end_str = end.astimezone(timezone.utc).strftime("%Y%m%d%H%M%S")
|
||||
return f"{field}:[{start_str} TO {end_str}]"
|
||||
|
||||
return re.sub(pattern, repl, query_string)
|
||||
return re.sub(pattern, repl, query_string, flags=re.IGNORECASE)
|
||||
|
||||
@@ -399,6 +399,7 @@ class CannotMoveFilesException(Exception):
|
||||
@receiver(models.signals.post_save, sender=CustomFieldInstance, weak=False)
|
||||
@receiver(models.signals.m2m_changed, sender=Document.tags.through, weak=False)
|
||||
@receiver(models.signals.post_save, sender=Document, weak=False)
|
||||
@shared_task
|
||||
def update_filename_and_move_files(
|
||||
sender,
|
||||
instance: Document | CustomFieldInstance,
|
||||
@@ -571,7 +572,7 @@ def check_paths_and_prune_custom_fields(sender, instance: CustomField, **kwargs)
|
||||
cf_instance.save(update_fields=["value_select"])
|
||||
|
||||
# Update the filename and move files if necessary
|
||||
update_filename_and_move_files(sender, cf_instance)
|
||||
update_filename_and_move_files.delay(sender, cf_instance)
|
||||
|
||||
|
||||
@receiver(models.signals.post_delete, sender=CustomField)
|
||||
|
||||
@@ -4,6 +4,7 @@ from unittest.mock import ANY
|
||||
|
||||
from django.contrib.auth.models import Permission
|
||||
from django.contrib.auth.models import User
|
||||
from django.test import override_settings
|
||||
from rest_framework import status
|
||||
from rest_framework.test import APITestCase
|
||||
|
||||
@@ -211,6 +212,7 @@ class TestCustomFieldsAPI(DirectoriesMixin, APITestCase):
|
||||
],
|
||||
)
|
||||
|
||||
@override_settings(CELERY_TASK_ALWAYS_EAGER=True)
|
||||
def test_custom_field_select_options_pruned(self):
|
||||
"""
|
||||
GIVEN:
|
||||
@@ -242,7 +244,7 @@ class TestCustomFieldsAPI(DirectoriesMixin, APITestCase):
|
||||
CustomFieldInstance.objects.create(
|
||||
document=doc,
|
||||
field=custom_field_select,
|
||||
value_text="abc-123",
|
||||
value_select="def-456",
|
||||
)
|
||||
|
||||
resp = self.client.patch(
|
||||
|
||||
@@ -569,7 +569,7 @@ class TestFileHandling(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
||||
self.assertEqual(generate_filename(doc), Path("document_apple.pdf"))
|
||||
|
||||
# handler should not have been called
|
||||
self.assertEqual(m.call_count, 0)
|
||||
self.assertEqual(m.delay.call_count, 0)
|
||||
cf.extra_data = {
|
||||
"select_options": [
|
||||
{"label": "aubergine", "id": "abc123"},
|
||||
@@ -579,8 +579,8 @@ class TestFileHandling(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
||||
}
|
||||
cf.save()
|
||||
self.assertEqual(generate_filename(doc), Path("document_aubergine.pdf"))
|
||||
# handler should have been called
|
||||
self.assertEqual(m.call_count, 1)
|
||||
# handler should have been called via delay
|
||||
self.assertEqual(m.delay.call_count, 1)
|
||||
|
||||
|
||||
class TestFileHandlingWithArchive(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
||||
|
||||
@@ -2,6 +2,7 @@ from datetime import datetime
|
||||
from unittest import mock
|
||||
|
||||
from django.contrib.auth.models import User
|
||||
from django.test import SimpleTestCase
|
||||
from django.test import TestCase
|
||||
from django.test import override_settings
|
||||
from django.utils.timezone import get_current_timezone
|
||||
@@ -127,3 +128,126 @@ class TestAutoComplete(DirectoriesMixin, TestCase):
|
||||
response = self.client.get("/api/documents/?query=added:yesterday")
|
||||
results = response.json()["results"]
|
||||
self.assertEqual(len(results), 0)
|
||||
|
||||
|
||||
@override_settings(TIME_ZONE="UTC")
|
||||
class TestRewriteNaturalDateKeywords(SimpleTestCase):
|
||||
"""
|
||||
Unit tests for rewrite_natural_date_keywords function.
|
||||
"""
|
||||
|
||||
def _rewrite_with_now(self, query: str, now_dt: datetime) -> str:
|
||||
with mock.patch("documents.index.now", return_value=now_dt):
|
||||
return index.rewrite_natural_date_keywords(query)
|
||||
|
||||
def _assert_rewrite_contains(
|
||||
self,
|
||||
query: str,
|
||||
now_dt: datetime,
|
||||
*expected_fragments: str,
|
||||
) -> str:
|
||||
result = self._rewrite_with_now(query, now_dt)
|
||||
for fragment in expected_fragments:
|
||||
self.assertIn(fragment, result)
|
||||
return result
|
||||
|
||||
def test_range_keywords(self):
|
||||
"""
|
||||
Test various different range keywords
|
||||
"""
|
||||
cases = [
|
||||
(
|
||||
"added:today",
|
||||
datetime(2025, 7, 20, 15, 30, 45, tzinfo=timezone.utc),
|
||||
("added:[20250720", "TO 20250720"),
|
||||
),
|
||||
(
|
||||
"added:yesterday",
|
||||
datetime(2025, 7, 20, 15, 30, 45, tzinfo=timezone.utc),
|
||||
("added:[20250719", "TO 20250719"),
|
||||
),
|
||||
(
|
||||
"added:this month",
|
||||
datetime(2025, 7, 15, 12, 0, 0, tzinfo=timezone.utc),
|
||||
("added:[20250701", "TO 20250731"),
|
||||
),
|
||||
(
|
||||
"added:previous month",
|
||||
datetime(2025, 7, 15, 12, 0, 0, tzinfo=timezone.utc),
|
||||
("added:[20250601", "TO 20250630"),
|
||||
),
|
||||
(
|
||||
"added:this year",
|
||||
datetime(2025, 7, 15, 12, 0, 0, tzinfo=timezone.utc),
|
||||
("added:[20250101", "TO 20250715"),
|
||||
),
|
||||
(
|
||||
"added:previous year",
|
||||
datetime(2025, 7, 15, 12, 0, 0, tzinfo=timezone.utc),
|
||||
("added:[20240101", "TO 20241231"),
|
||||
),
|
||||
# Previous quarter from July 15, 2025 is April-June.
|
||||
(
|
||||
"added:previous quarter",
|
||||
datetime(2025, 7, 15, 12, 0, 0, tzinfo=timezone.utc),
|
||||
("added:[20250401", "TO 20250630"),
|
||||
),
|
||||
# July 20, 2025 is a Sunday (weekday 6) so previous week is July 7-13.
|
||||
(
|
||||
"added:previous week",
|
||||
datetime(2025, 7, 20, 12, 0, 0, tzinfo=timezone.utc),
|
||||
("added:[20250707", "TO 20250713"),
|
||||
),
|
||||
]
|
||||
|
||||
for query, now_dt, fragments in cases:
|
||||
with self.subTest(query=query):
|
||||
self._assert_rewrite_contains(query, now_dt, *fragments)
|
||||
|
||||
def test_additional_fields(self):
|
||||
fixed_now = datetime(2025, 7, 20, 15, 30, 45, tzinfo=timezone.utc)
|
||||
# created
|
||||
self._assert_rewrite_contains("created:today", fixed_now, "created:[20250720")
|
||||
# modified
|
||||
self._assert_rewrite_contains("modified:today", fixed_now, "modified:[20250720")
|
||||
|
||||
def test_basic_syntax_variants(self):
|
||||
"""
|
||||
Test that quoting, casing, and multi-clause queries are parsed.
|
||||
"""
|
||||
fixed_now = datetime(2025, 7, 20, 15, 30, 45, tzinfo=timezone.utc)
|
||||
|
||||
# quoted keywords
|
||||
result1 = self._rewrite_with_now('added:"today"', fixed_now)
|
||||
result2 = self._rewrite_with_now("added:'today'", fixed_now)
|
||||
self.assertIn("added:[20250720", result1)
|
||||
self.assertIn("added:[20250720", result2)
|
||||
|
||||
# case insensitivity
|
||||
for query in ("added:TODAY", "added:Today", "added:ToDaY"):
|
||||
with self.subTest(case_variant=query):
|
||||
self._assert_rewrite_contains(query, fixed_now, "added:[20250720")
|
||||
|
||||
# multiple clauses
|
||||
result = self._rewrite_with_now("added:today created:yesterday", fixed_now)
|
||||
self.assertIn("added:[20250720", result)
|
||||
self.assertIn("created:[20250719", result)
|
||||
|
||||
def test_no_match(self):
|
||||
"""
|
||||
Test that queries without keywords are unchanged.
|
||||
"""
|
||||
query = "title:test content:example"
|
||||
result = index.rewrite_natural_date_keywords(query)
|
||||
self.assertEqual(query, result)
|
||||
|
||||
@override_settings(TIME_ZONE="Pacific/Auckland")
|
||||
def test_timezone_awareness(self):
|
||||
"""
|
||||
Test timezone conversion.
|
||||
"""
|
||||
# July 20, 2025 1:00 AM NZST = July 19, 2025 13:00 UTC
|
||||
fixed_now = datetime(2025, 7, 20, 1, 0, 0, tzinfo=get_current_timezone())
|
||||
result = self._rewrite_with_now("added:today", fixed_now)
|
||||
# Should convert to UTC properly
|
||||
self.assertIn("added:[20250719", result)
|
||||
|
||||
@@ -571,7 +571,7 @@ class TestExportImport(
|
||||
with self.assertRaises(CommandError) as e:
|
||||
call_command(*args)
|
||||
|
||||
self.assertEqual("That path isn't a directory", str(e))
|
||||
self.assertEqual("That path doesn't exist", str(e.exception))
|
||||
|
||||
def test_export_target_exists_but_is_file(self):
|
||||
"""
|
||||
@@ -589,7 +589,7 @@ class TestExportImport(
|
||||
with self.assertRaises(CommandError) as e:
|
||||
call_command(*args)
|
||||
|
||||
self.assertEqual("That path isn't a directory", str(e))
|
||||
self.assertEqual("That path isn't a directory", str(e.exception))
|
||||
|
||||
def test_export_target_not_writable(self):
|
||||
"""
|
||||
@@ -608,7 +608,10 @@ class TestExportImport(
|
||||
with self.assertRaises(CommandError) as e:
|
||||
call_command(*args)
|
||||
|
||||
self.assertEqual("That path doesn't appear to be writable", str(e))
|
||||
self.assertEqual(
|
||||
"That path doesn't appear to be writable",
|
||||
str(e.exception),
|
||||
)
|
||||
|
||||
def test_no_archive(self):
|
||||
"""
|
||||
|
||||
@@ -34,7 +34,7 @@ class TestFuzzyMatchCommand(TestCase):
|
||||
"""
|
||||
with self.assertRaises(CommandError) as e:
|
||||
self.call_command("--ratio", "-1")
|
||||
self.assertIn("The ratio must be between 0 and 100", str(e))
|
||||
self.assertIn("The ratio must be between 0 and 100", str(e.exception))
|
||||
|
||||
def test_invalid_ratio_upper_limit(self):
|
||||
"""
|
||||
@@ -47,7 +47,7 @@ class TestFuzzyMatchCommand(TestCase):
|
||||
"""
|
||||
with self.assertRaises(CommandError) as e:
|
||||
self.call_command("--ratio", "101")
|
||||
self.assertIn("The ratio must be between 0 and 100", str(e))
|
||||
self.assertIn("The ratio must be between 0 and 100", str(e.exception))
|
||||
|
||||
def test_invalid_process_count(self):
|
||||
"""
|
||||
@@ -60,7 +60,7 @@ class TestFuzzyMatchCommand(TestCase):
|
||||
"""
|
||||
with self.assertRaises(CommandError) as e:
|
||||
self.call_command("--processes", "0")
|
||||
self.assertIn("There must be at least 1 process", str(e))
|
||||
self.assertIn("There must be at least 1 process", str(e.exception))
|
||||
|
||||
def test_no_matches(self):
|
||||
"""
|
||||
|
||||
@@ -40,10 +40,10 @@ class TestCommandImport(
|
||||
"--no-progress-bar",
|
||||
str(self.dirs.scratch_dir),
|
||||
)
|
||||
self.assertIn(
|
||||
"That directory doesn't appear to contain a manifest.json file.",
|
||||
str(e),
|
||||
)
|
||||
self.assertIn(
|
||||
"That directory doesn't appear to contain a manifest.json file.",
|
||||
str(e.exception),
|
||||
)
|
||||
|
||||
def test_check_manifest_malformed(self):
|
||||
"""
|
||||
@@ -66,10 +66,10 @@ class TestCommandImport(
|
||||
"--no-progress-bar",
|
||||
str(self.dirs.scratch_dir),
|
||||
)
|
||||
self.assertIn(
|
||||
"The manifest file contains a record which does not refer to an actual document file.",
|
||||
str(e),
|
||||
)
|
||||
self.assertIn(
|
||||
"The manifest file contains a record which does not refer to an actual document file.",
|
||||
str(e.exception),
|
||||
)
|
||||
|
||||
def test_check_manifest_file_not_found(self):
|
||||
"""
|
||||
@@ -95,7 +95,7 @@ class TestCommandImport(
|
||||
"--no-progress-bar",
|
||||
str(self.dirs.scratch_dir),
|
||||
)
|
||||
self.assertIn('The manifest file refers to "noexist.pdf"', str(e))
|
||||
self.assertIn('The manifest file refers to "noexist.pdf"', str(e.exception))
|
||||
|
||||
def test_import_permission_error(self):
|
||||
"""
|
||||
@@ -129,14 +129,14 @@ class TestCommandImport(
|
||||
cmd.data_only = False
|
||||
with self.assertRaises(CommandError) as cm:
|
||||
cmd.check_manifest_validity()
|
||||
self.assertInt("Failed to read from original file", str(cm.exception))
|
||||
self.assertIn("Failed to read from original file", str(cm.exception))
|
||||
|
||||
original_path.chmod(0o444)
|
||||
archive_path.chmod(0o222)
|
||||
|
||||
with self.assertRaises(CommandError) as cm:
|
||||
cmd.check_manifest_validity()
|
||||
self.assertInt("Failed to read from archive file", str(cm.exception))
|
||||
self.assertIn("Failed to read from archive file", str(cm.exception))
|
||||
|
||||
def test_import_source_not_existing(self):
|
||||
"""
|
||||
@@ -149,7 +149,7 @@ class TestCommandImport(
|
||||
"""
|
||||
with self.assertRaises(CommandError) as cm:
|
||||
call_command("document_importer", Path("/tmp/notapath"))
|
||||
self.assertInt("That path doesn't exist", str(cm.exception))
|
||||
self.assertIn("That path doesn't exist", str(cm.exception))
|
||||
|
||||
def test_import_source_not_readable(self):
|
||||
"""
|
||||
@@ -165,10 +165,10 @@ class TestCommandImport(
|
||||
path.chmod(0o222)
|
||||
with self.assertRaises(CommandError) as cm:
|
||||
call_command("document_importer", path)
|
||||
self.assertInt(
|
||||
"That path doesn't appear to be readable",
|
||||
str(cm.exception),
|
||||
)
|
||||
self.assertIn(
|
||||
"That path doesn't appear to be readable",
|
||||
str(cm.exception),
|
||||
)
|
||||
|
||||
def test_import_source_does_not_exist(self):
|
||||
"""
|
||||
@@ -185,8 +185,7 @@ class TestCommandImport(
|
||||
|
||||
with self.assertRaises(CommandError) as e:
|
||||
call_command("document_importer", "--no-progress-bar", str(path))
|
||||
|
||||
self.assertIn("That path doesn't exist", str(e))
|
||||
self.assertIn("That path doesn't exist", str(e.exception))
|
||||
|
||||
def test_import_files_exist(self):
|
||||
"""
|
||||
|
||||
Reference in New Issue
Block a user