mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-11-21 04:36:53 -06:00
254 lines
9.1 KiB
Python
254 lines
9.1 KiB
Python
from datetime import datetime
|
|
from unittest import mock
|
|
|
|
from django.contrib.auth.models import User
|
|
from django.test import SimpleTestCase
|
|
from django.test import TestCase
|
|
from django.test import override_settings
|
|
from django.utils.timezone import get_current_timezone
|
|
from django.utils.timezone import timezone
|
|
|
|
from documents import index
|
|
from documents.models import Document
|
|
from documents.tests.utils import DirectoriesMixin
|
|
|
|
|
|
class TestAutoComplete(DirectoriesMixin, TestCase):
|
|
def test_auto_complete(self):
|
|
doc1 = Document.objects.create(
|
|
title="doc1",
|
|
checksum="A",
|
|
content="test test2 test3",
|
|
)
|
|
doc2 = Document.objects.create(title="doc2", checksum="B", content="test test2")
|
|
doc3 = Document.objects.create(title="doc3", checksum="C", content="test2")
|
|
|
|
index.add_or_update_document(doc1)
|
|
index.add_or_update_document(doc2)
|
|
index.add_or_update_document(doc3)
|
|
|
|
ix = index.open_index()
|
|
|
|
self.assertListEqual(
|
|
index.autocomplete(ix, "tes"),
|
|
[b"test2", b"test", b"test3"],
|
|
)
|
|
self.assertListEqual(
|
|
index.autocomplete(ix, "tes", limit=3),
|
|
[b"test2", b"test", b"test3"],
|
|
)
|
|
self.assertListEqual(index.autocomplete(ix, "tes", limit=1), [b"test2"])
|
|
self.assertListEqual(index.autocomplete(ix, "tes", limit=0), [])
|
|
|
|
def test_archive_serial_number_ranging(self):
|
|
"""
|
|
GIVEN:
|
|
- Document with an archive serial number above schema allowed size
|
|
WHEN:
|
|
- Document is provided to the index
|
|
THEN:
|
|
- Error is logged
|
|
- Document ASN is reset to 0 for the index
|
|
"""
|
|
doc1 = Document.objects.create(
|
|
title="doc1",
|
|
checksum="A",
|
|
content="test test2 test3",
|
|
# yes, this is allowed, unless full_clean is run
|
|
# DRF does call the validators, this test won't
|
|
archive_serial_number=Document.ARCHIVE_SERIAL_NUMBER_MAX + 1,
|
|
)
|
|
with self.assertLogs("paperless.index", level="ERROR") as cm:
|
|
with mock.patch(
|
|
"documents.index.AsyncWriter.update_document",
|
|
) as mocked_update_doc:
|
|
index.add_or_update_document(doc1)
|
|
|
|
mocked_update_doc.assert_called_once()
|
|
_, kwargs = mocked_update_doc.call_args
|
|
|
|
self.assertEqual(kwargs["asn"], 0)
|
|
|
|
error_str = cm.output[0]
|
|
expected_str = "ERROR:paperless.index:Not indexing Archive Serial Number 4294967296 of document 1"
|
|
self.assertIn(expected_str, error_str)
|
|
|
|
def test_archive_serial_number_is_none(self):
|
|
"""
|
|
GIVEN:
|
|
- Document with no archive serial number
|
|
WHEN:
|
|
- Document is provided to the index
|
|
THEN:
|
|
- ASN isn't touched
|
|
"""
|
|
doc1 = Document.objects.create(
|
|
title="doc1",
|
|
checksum="A",
|
|
content="test test2 test3",
|
|
)
|
|
with mock.patch(
|
|
"documents.index.AsyncWriter.update_document",
|
|
) as mocked_update_doc:
|
|
index.add_or_update_document(doc1)
|
|
|
|
mocked_update_doc.assert_called_once()
|
|
_, kwargs = mocked_update_doc.call_args
|
|
|
|
self.assertIsNone(kwargs["asn"])
|
|
|
|
@override_settings(TIME_ZONE="Pacific/Auckland")
|
|
def test_added_today_respects_local_timezone_boundary(self):
|
|
tz = get_current_timezone()
|
|
fixed_now = datetime(2025, 7, 20, 15, 0, 0, tzinfo=tz)
|
|
|
|
# Fake a time near the local boundary (1 AM NZT = 13:00 UTC on previous UTC day)
|
|
local_dt = datetime(2025, 7, 20, 1, 0, 0).replace(tzinfo=tz)
|
|
utc_dt = local_dt.astimezone(timezone.utc)
|
|
|
|
doc = Document.objects.create(
|
|
title="Time zone",
|
|
content="Testing added:today",
|
|
checksum="edgecase123",
|
|
added=utc_dt,
|
|
)
|
|
|
|
with index.open_index_writer() as writer:
|
|
index.update_document(writer, doc)
|
|
|
|
superuser = User.objects.create_superuser(username="testuser")
|
|
self.client.force_login(superuser)
|
|
|
|
with mock.patch("documents.index.now", return_value=fixed_now):
|
|
response = self.client.get("/api/documents/?query=added:today")
|
|
results = response.json()["results"]
|
|
self.assertEqual(len(results), 1)
|
|
self.assertEqual(results[0]["id"], doc.id)
|
|
|
|
response = self.client.get("/api/documents/?query=added:yesterday")
|
|
results = response.json()["results"]
|
|
self.assertEqual(len(results), 0)
|
|
|
|
|
|
@override_settings(TIME_ZONE="UTC")
|
|
class TestRewriteNaturalDateKeywords(SimpleTestCase):
|
|
"""
|
|
Unit tests for rewrite_natural_date_keywords function.
|
|
"""
|
|
|
|
def _rewrite_with_now(self, query: str, now_dt: datetime) -> str:
|
|
with mock.patch("documents.index.now", return_value=now_dt):
|
|
return index.rewrite_natural_date_keywords(query)
|
|
|
|
def _assert_rewrite_contains(
|
|
self,
|
|
query: str,
|
|
now_dt: datetime,
|
|
*expected_fragments: str,
|
|
) -> str:
|
|
result = self._rewrite_with_now(query, now_dt)
|
|
for fragment in expected_fragments:
|
|
self.assertIn(fragment, result)
|
|
return result
|
|
|
|
def test_range_keywords(self):
|
|
"""
|
|
Test various different range keywords
|
|
"""
|
|
cases = [
|
|
(
|
|
"added:today",
|
|
datetime(2025, 7, 20, 15, 30, 45, tzinfo=timezone.utc),
|
|
("added:[20250720", "TO 20250720"),
|
|
),
|
|
(
|
|
"added:yesterday",
|
|
datetime(2025, 7, 20, 15, 30, 45, tzinfo=timezone.utc),
|
|
("added:[20250719", "TO 20250719"),
|
|
),
|
|
(
|
|
"added:this month",
|
|
datetime(2025, 7, 15, 12, 0, 0, tzinfo=timezone.utc),
|
|
("added:[20250701", "TO 20250731"),
|
|
),
|
|
(
|
|
"added:previous month",
|
|
datetime(2025, 7, 15, 12, 0, 0, tzinfo=timezone.utc),
|
|
("added:[20250601", "TO 20250630"),
|
|
),
|
|
(
|
|
"added:this year",
|
|
datetime(2025, 7, 15, 12, 0, 0, tzinfo=timezone.utc),
|
|
("added:[20250101", "TO 20250715"),
|
|
),
|
|
(
|
|
"added:previous year",
|
|
datetime(2025, 7, 15, 12, 0, 0, tzinfo=timezone.utc),
|
|
("added:[20240101", "TO 20241231"),
|
|
),
|
|
# Previous quarter from July 15, 2025 is April-June.
|
|
(
|
|
"added:previous quarter",
|
|
datetime(2025, 7, 15, 12, 0, 0, tzinfo=timezone.utc),
|
|
("added:[20250401", "TO 20250630"),
|
|
),
|
|
# July 20, 2025 is a Sunday (weekday 6) so previous week is July 7-13.
|
|
(
|
|
"added:previous week",
|
|
datetime(2025, 7, 20, 12, 0, 0, tzinfo=timezone.utc),
|
|
("added:[20250707", "TO 20250713"),
|
|
),
|
|
]
|
|
|
|
for query, now_dt, fragments in cases:
|
|
with self.subTest(query=query):
|
|
self._assert_rewrite_contains(query, now_dt, *fragments)
|
|
|
|
def test_additional_fields(self):
|
|
fixed_now = datetime(2025, 7, 20, 15, 30, 45, tzinfo=timezone.utc)
|
|
# created
|
|
self._assert_rewrite_contains("created:today", fixed_now, "created:[20250720")
|
|
# modified
|
|
self._assert_rewrite_contains("modified:today", fixed_now, "modified:[20250720")
|
|
|
|
def test_basic_syntax_variants(self):
|
|
"""
|
|
Test that quoting, casing, and multi-clause queries are parsed.
|
|
"""
|
|
fixed_now = datetime(2025, 7, 20, 15, 30, 45, tzinfo=timezone.utc)
|
|
|
|
# quoted keywords
|
|
result1 = self._rewrite_with_now('added:"today"', fixed_now)
|
|
result2 = self._rewrite_with_now("added:'today'", fixed_now)
|
|
self.assertIn("added:[20250720", result1)
|
|
self.assertIn("added:[20250720", result2)
|
|
|
|
# case insensitivity
|
|
for query in ("added:TODAY", "added:Today", "added:ToDaY"):
|
|
with self.subTest(case_variant=query):
|
|
self._assert_rewrite_contains(query, fixed_now, "added:[20250720")
|
|
|
|
# multiple clauses
|
|
result = self._rewrite_with_now("added:today created:yesterday", fixed_now)
|
|
self.assertIn("added:[20250720", result)
|
|
self.assertIn("created:[20250719", result)
|
|
|
|
def test_no_match(self):
|
|
"""
|
|
Test that queries without keywords are unchanged.
|
|
"""
|
|
query = "title:test content:example"
|
|
result = index.rewrite_natural_date_keywords(query)
|
|
self.assertEqual(query, result)
|
|
|
|
@override_settings(TIME_ZONE="Pacific/Auckland")
|
|
def test_timezone_awareness(self):
|
|
"""
|
|
Test timezone conversion.
|
|
"""
|
|
# July 20, 2025 1:00 AM NZST = July 19, 2025 13:00 UTC
|
|
fixed_now = datetime(2025, 7, 20, 1, 0, 0, tzinfo=get_current_timezone())
|
|
result = self._rewrite_with_now("added:today", fixed_now)
|
|
# Should convert to UTC properly
|
|
self.assertIn("added:[20250719", result)
|