Files
paperless-ngx/src/documents/tests/test_index.py

254 lines
9.1 KiB
Python

from datetime import datetime
from unittest import mock
from django.contrib.auth.models import User
from django.test import SimpleTestCase
from django.test import TestCase
from django.test import override_settings
from django.utils.timezone import get_current_timezone
from django.utils.timezone import timezone
from documents import index
from documents.models import Document
from documents.tests.utils import DirectoriesMixin
class TestAutoComplete(DirectoriesMixin, TestCase):
def test_auto_complete(self):
doc1 = Document.objects.create(
title="doc1",
checksum="A",
content="test test2 test3",
)
doc2 = Document.objects.create(title="doc2", checksum="B", content="test test2")
doc3 = Document.objects.create(title="doc3", checksum="C", content="test2")
index.add_or_update_document(doc1)
index.add_or_update_document(doc2)
index.add_or_update_document(doc3)
ix = index.open_index()
self.assertListEqual(
index.autocomplete(ix, "tes"),
[b"test2", b"test", b"test3"],
)
self.assertListEqual(
index.autocomplete(ix, "tes", limit=3),
[b"test2", b"test", b"test3"],
)
self.assertListEqual(index.autocomplete(ix, "tes", limit=1), [b"test2"])
self.assertListEqual(index.autocomplete(ix, "tes", limit=0), [])
def test_archive_serial_number_ranging(self):
"""
GIVEN:
- Document with an archive serial number above schema allowed size
WHEN:
- Document is provided to the index
THEN:
- Error is logged
- Document ASN is reset to 0 for the index
"""
doc1 = Document.objects.create(
title="doc1",
checksum="A",
content="test test2 test3",
# yes, this is allowed, unless full_clean is run
# DRF does call the validators, this test won't
archive_serial_number=Document.ARCHIVE_SERIAL_NUMBER_MAX + 1,
)
with self.assertLogs("paperless.index", level="ERROR") as cm:
with mock.patch(
"documents.index.AsyncWriter.update_document",
) as mocked_update_doc:
index.add_or_update_document(doc1)
mocked_update_doc.assert_called_once()
_, kwargs = mocked_update_doc.call_args
self.assertEqual(kwargs["asn"], 0)
error_str = cm.output[0]
expected_str = "ERROR:paperless.index:Not indexing Archive Serial Number 4294967296 of document 1"
self.assertIn(expected_str, error_str)
def test_archive_serial_number_is_none(self):
"""
GIVEN:
- Document with no archive serial number
WHEN:
- Document is provided to the index
THEN:
- ASN isn't touched
"""
doc1 = Document.objects.create(
title="doc1",
checksum="A",
content="test test2 test3",
)
with mock.patch(
"documents.index.AsyncWriter.update_document",
) as mocked_update_doc:
index.add_or_update_document(doc1)
mocked_update_doc.assert_called_once()
_, kwargs = mocked_update_doc.call_args
self.assertIsNone(kwargs["asn"])
@override_settings(TIME_ZONE="Pacific/Auckland")
def test_added_today_respects_local_timezone_boundary(self):
tz = get_current_timezone()
fixed_now = datetime(2025, 7, 20, 15, 0, 0, tzinfo=tz)
# Fake a time near the local boundary (1 AM NZT = 13:00 UTC on previous UTC day)
local_dt = datetime(2025, 7, 20, 1, 0, 0).replace(tzinfo=tz)
utc_dt = local_dt.astimezone(timezone.utc)
doc = Document.objects.create(
title="Time zone",
content="Testing added:today",
checksum="edgecase123",
added=utc_dt,
)
with index.open_index_writer() as writer:
index.update_document(writer, doc)
superuser = User.objects.create_superuser(username="testuser")
self.client.force_login(superuser)
with mock.patch("documents.index.now", return_value=fixed_now):
response = self.client.get("/api/documents/?query=added:today")
results = response.json()["results"]
self.assertEqual(len(results), 1)
self.assertEqual(results[0]["id"], doc.id)
response = self.client.get("/api/documents/?query=added:yesterday")
results = response.json()["results"]
self.assertEqual(len(results), 0)
@override_settings(TIME_ZONE="UTC")
class TestRewriteNaturalDateKeywords(SimpleTestCase):
"""
Unit tests for rewrite_natural_date_keywords function.
"""
def _rewrite_with_now(self, query: str, now_dt: datetime) -> str:
with mock.patch("documents.index.now", return_value=now_dt):
return index.rewrite_natural_date_keywords(query)
def _assert_rewrite_contains(
self,
query: str,
now_dt: datetime,
*expected_fragments: str,
) -> str:
result = self._rewrite_with_now(query, now_dt)
for fragment in expected_fragments:
self.assertIn(fragment, result)
return result
def test_range_keywords(self):
"""
Test various different range keywords
"""
cases = [
(
"added:today",
datetime(2025, 7, 20, 15, 30, 45, tzinfo=timezone.utc),
("added:[20250720", "TO 20250720"),
),
(
"added:yesterday",
datetime(2025, 7, 20, 15, 30, 45, tzinfo=timezone.utc),
("added:[20250719", "TO 20250719"),
),
(
"added:this month",
datetime(2025, 7, 15, 12, 0, 0, tzinfo=timezone.utc),
("added:[20250701", "TO 20250731"),
),
(
"added:previous month",
datetime(2025, 7, 15, 12, 0, 0, tzinfo=timezone.utc),
("added:[20250601", "TO 20250630"),
),
(
"added:this year",
datetime(2025, 7, 15, 12, 0, 0, tzinfo=timezone.utc),
("added:[20250101", "TO 20250715"),
),
(
"added:previous year",
datetime(2025, 7, 15, 12, 0, 0, tzinfo=timezone.utc),
("added:[20240101", "TO 20241231"),
),
# Previous quarter from July 15, 2025 is April-June.
(
"added:previous quarter",
datetime(2025, 7, 15, 12, 0, 0, tzinfo=timezone.utc),
("added:[20250401", "TO 20250630"),
),
# July 20, 2025 is a Sunday (weekday 6) so previous week is July 7-13.
(
"added:previous week",
datetime(2025, 7, 20, 12, 0, 0, tzinfo=timezone.utc),
("added:[20250707", "TO 20250713"),
),
]
for query, now_dt, fragments in cases:
with self.subTest(query=query):
self._assert_rewrite_contains(query, now_dt, *fragments)
def test_additional_fields(self):
fixed_now = datetime(2025, 7, 20, 15, 30, 45, tzinfo=timezone.utc)
# created
self._assert_rewrite_contains("created:today", fixed_now, "created:[20250720")
# modified
self._assert_rewrite_contains("modified:today", fixed_now, "modified:[20250720")
def test_basic_syntax_variants(self):
"""
Test that quoting, casing, and multi-clause queries are parsed.
"""
fixed_now = datetime(2025, 7, 20, 15, 30, 45, tzinfo=timezone.utc)
# quoted keywords
result1 = self._rewrite_with_now('added:"today"', fixed_now)
result2 = self._rewrite_with_now("added:'today'", fixed_now)
self.assertIn("added:[20250720", result1)
self.assertIn("added:[20250720", result2)
# case insensitivity
for query in ("added:TODAY", "added:Today", "added:ToDaY"):
with self.subTest(case_variant=query):
self._assert_rewrite_contains(query, fixed_now, "added:[20250720")
# multiple clauses
result = self._rewrite_with_now("added:today created:yesterday", fixed_now)
self.assertIn("added:[20250720", result)
self.assertIn("created:[20250719", result)
def test_no_match(self):
"""
Test that queries without keywords are unchanged.
"""
query = "title:test content:example"
result = index.rewrite_natural_date_keywords(query)
self.assertEqual(query, result)
@override_settings(TIME_ZONE="Pacific/Auckland")
def test_timezone_awareness(self):
"""
Test timezone conversion.
"""
# July 20, 2025 1:00 AM NZST = July 19, 2025 13:00 UTC
fixed_now = datetime(2025, 7, 20, 1, 0, 0, tzinfo=get_current_timezone())
result = self._rewrite_with_now("added:today", fixed_now)
# Should convert to UTC properly
self.assertIn("added:[20250719", result)