Compare commits

..

2 Commits

Author SHA1 Message Date
shamoon
0b265aadc6 Add error handling w retty when opening index 2025-12-17 09:14:26 -08:00
shamoon
84511d07cd Add test to mock issue 2025-12-17 09:07:22 -08:00
2 changed files with 58 additions and 5 deletions

View File

@@ -10,6 +10,7 @@ from datetime import time
from datetime import timedelta from datetime import timedelta
from datetime import timezone from datetime import timezone
from shutil import rmtree from shutil import rmtree
from time import sleep
from typing import TYPE_CHECKING from typing import TYPE_CHECKING
from typing import Literal from typing import Literal
@@ -32,6 +33,7 @@ from whoosh.highlight import HtmlFormatter
from whoosh.idsets import BitSet from whoosh.idsets import BitSet
from whoosh.idsets import DocIdSet from whoosh.idsets import DocIdSet
from whoosh.index import FileIndex from whoosh.index import FileIndex
from whoosh.index import LockError
from whoosh.index import create_in from whoosh.index import create_in
from whoosh.index import exists_in from whoosh.index import exists_in
from whoosh.index import open_dir from whoosh.index import open_dir
@@ -97,11 +99,33 @@ def get_schema() -> Schema:
def open_index(*, recreate=False) -> FileIndex: def open_index(*, recreate=False) -> FileIndex:
try: transient_exceptions = (FileNotFoundError, LockError)
if exists_in(settings.INDEX_DIR) and not recreate: max_retries = 3
return open_dir(settings.INDEX_DIR, schema=get_schema()) retry_delay = 0.1
except Exception:
logger.exception("Error while opening the index, recreating.") for attempt in range(max_retries + 1):
try:
if exists_in(settings.INDEX_DIR) and not recreate:
return open_dir(settings.INDEX_DIR, schema=get_schema())
break
except transient_exceptions as exc:
is_last_attempt = attempt == max_retries or recreate
if is_last_attempt:
logger.exception(
"Error while opening the index after retries, recreating.",
)
break
logger.warning(
"Transient error while opening the index (attempt %s/%s): %s. Retrying.",
attempt + 1,
max_retries + 1,
exc,
)
sleep(retry_delay)
except Exception:
logger.exception("Error while opening the index, recreating.")
break
# create_in doesn't handle corrupted indexes very well, remove the directory entirely first # create_in doesn't handle corrupted indexes very well, remove the directory entirely first
if settings.INDEX_DIR.is_dir(): if settings.INDEX_DIR.is_dir():

View File

@@ -1,6 +1,7 @@
from datetime import datetime from datetime import datetime
from unittest import mock from unittest import mock
from django.conf import settings
from django.contrib.auth.models import User from django.contrib.auth.models import User
from django.test import SimpleTestCase from django.test import SimpleTestCase
from django.test import TestCase from django.test import TestCase
@@ -251,3 +252,31 @@ class TestRewriteNaturalDateKeywords(SimpleTestCase):
result = self._rewrite_with_now("added:today", fixed_now) result = self._rewrite_with_now("added:today", fixed_now)
# Should convert to UTC properly # Should convert to UTC properly
self.assertIn("added:[20250719", result) self.assertIn("added:[20250719", result)
class TestIndexResilience(DirectoriesMixin, SimpleTestCase):
def test_transient_missing_segment_does_not_force_recreate(self):
file_marker = settings.INDEX_DIR / "file_marker.txt"
file_marker.write_text("keep")
expected_index = object()
with (
mock.patch("documents.index.exists_in", return_value=True),
mock.patch(
"documents.index.open_dir",
side_effect=[FileNotFoundError("missing"), expected_index],
) as mock_open_dir,
mock.patch(
"documents.index.create_in",
) as mock_create_in,
mock.patch(
"documents.index.rmtree",
) as mock_rmtree,
):
ix = index.open_index()
self.assertIs(ix, expected_index)
self.assertGreaterEqual(mock_open_dir.call_count, 2)
mock_rmtree.assert_not_called()
mock_create_in.assert_not_called()
self.assertEqual(file_marker.read_text(), "keep")