mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2026-01-08 21:24:26 -06:00
Fixhancement: add error handling and retry when opening index (#11731)
This commit is contained in:
@@ -10,6 +10,7 @@ from datetime import time
|
||||
from datetime import timedelta
|
||||
from datetime import timezone
|
||||
from shutil import rmtree
|
||||
from time import sleep
|
||||
from typing import TYPE_CHECKING
|
||||
from typing import Literal
|
||||
|
||||
@@ -32,6 +33,7 @@ from whoosh.highlight import HtmlFormatter
|
||||
from whoosh.idsets import BitSet
|
||||
from whoosh.idsets import DocIdSet
|
||||
from whoosh.index import FileIndex
|
||||
from whoosh.index import LockError
|
||||
from whoosh.index import create_in
|
||||
from whoosh.index import exists_in
|
||||
from whoosh.index import open_dir
|
||||
@@ -97,11 +99,33 @@ def get_schema() -> Schema:
|
||||
|
||||
|
||||
def open_index(*, recreate=False) -> FileIndex:
|
||||
try:
|
||||
if exists_in(settings.INDEX_DIR) and not recreate:
|
||||
return open_dir(settings.INDEX_DIR, schema=get_schema())
|
||||
except Exception:
|
||||
logger.exception("Error while opening the index, recreating.")
|
||||
transient_exceptions = (FileNotFoundError, LockError)
|
||||
max_retries = 3
|
||||
retry_delay = 0.1
|
||||
|
||||
for attempt in range(max_retries + 1):
|
||||
try:
|
||||
if exists_in(settings.INDEX_DIR) and not recreate:
|
||||
return open_dir(settings.INDEX_DIR, schema=get_schema())
|
||||
break
|
||||
except transient_exceptions as exc:
|
||||
is_last_attempt = attempt == max_retries or recreate
|
||||
if is_last_attempt:
|
||||
logger.exception(
|
||||
"Error while opening the index after retries, recreating.",
|
||||
)
|
||||
break
|
||||
|
||||
logger.warning(
|
||||
"Transient error while opening the index (attempt %s/%s): %s. Retrying.",
|
||||
attempt + 1,
|
||||
max_retries + 1,
|
||||
exc,
|
||||
)
|
||||
sleep(retry_delay)
|
||||
except Exception:
|
||||
logger.exception("Error while opening the index, recreating.")
|
||||
break
|
||||
|
||||
# create_in doesn't handle corrupted indexes very well, remove the directory entirely first
|
||||
if settings.INDEX_DIR.is_dir():
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
from datetime import datetime
|
||||
from unittest import mock
|
||||
|
||||
from django.conf import settings
|
||||
from django.contrib.auth.models import User
|
||||
from django.test import SimpleTestCase
|
||||
from django.test import TestCase
|
||||
@@ -251,3 +252,120 @@ class TestRewriteNaturalDateKeywords(SimpleTestCase):
|
||||
result = self._rewrite_with_now("added:today", fixed_now)
|
||||
# Should convert to UTC properly
|
||||
self.assertIn("added:[20250719", result)
|
||||
|
||||
|
||||
class TestIndexResilience(DirectoriesMixin, SimpleTestCase):
|
||||
def _assert_recreate_called(self, mock_create_in):
|
||||
mock_create_in.assert_called_once()
|
||||
path_arg, schema_arg = mock_create_in.call_args.args
|
||||
self.assertEqual(path_arg, settings.INDEX_DIR)
|
||||
self.assertEqual(schema_arg.__class__.__name__, "Schema")
|
||||
|
||||
def test_transient_missing_segment_does_not_force_recreate(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- Index directory exists
|
||||
WHEN:
|
||||
- open_index is called
|
||||
- Opening the index raises FileNotFoundError once due to a
|
||||
transient missing segment
|
||||
THEN:
|
||||
- Index is opened successfully on retry
|
||||
- Index is not recreated
|
||||
"""
|
||||
file_marker = settings.INDEX_DIR / "file_marker.txt"
|
||||
file_marker.write_text("keep")
|
||||
expected_index = object()
|
||||
|
||||
with (
|
||||
mock.patch("documents.index.exists_in", return_value=True),
|
||||
mock.patch(
|
||||
"documents.index.open_dir",
|
||||
side_effect=[FileNotFoundError("missing"), expected_index],
|
||||
) as mock_open_dir,
|
||||
mock.patch(
|
||||
"documents.index.create_in",
|
||||
) as mock_create_in,
|
||||
mock.patch(
|
||||
"documents.index.rmtree",
|
||||
) as mock_rmtree,
|
||||
):
|
||||
ix = index.open_index()
|
||||
|
||||
self.assertIs(ix, expected_index)
|
||||
self.assertGreaterEqual(mock_open_dir.call_count, 2)
|
||||
mock_rmtree.assert_not_called()
|
||||
mock_create_in.assert_not_called()
|
||||
self.assertEqual(file_marker.read_text(), "keep")
|
||||
|
||||
def test_transient_errors_exhaust_retries_and_recreate(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- Index directory exists
|
||||
WHEN:
|
||||
- open_index is called
|
||||
- Opening the index raises FileNotFoundError multiple times due to
|
||||
transient missing segments
|
||||
THEN:
|
||||
- Index is recreated after retries are exhausted
|
||||
"""
|
||||
recreated_index = object()
|
||||
|
||||
with (
|
||||
self.assertLogs("paperless.index", level="ERROR") as cm,
|
||||
mock.patch("documents.index.exists_in", return_value=True),
|
||||
mock.patch(
|
||||
"documents.index.open_dir",
|
||||
side_effect=FileNotFoundError("missing"),
|
||||
) as mock_open_dir,
|
||||
mock.patch("documents.index.rmtree") as mock_rmtree,
|
||||
mock.patch(
|
||||
"documents.index.create_in",
|
||||
return_value=recreated_index,
|
||||
) as mock_create_in,
|
||||
):
|
||||
ix = index.open_index()
|
||||
|
||||
self.assertIs(ix, recreated_index)
|
||||
self.assertEqual(mock_open_dir.call_count, 4)
|
||||
mock_rmtree.assert_called_once_with(settings.INDEX_DIR)
|
||||
self._assert_recreate_called(mock_create_in)
|
||||
self.assertIn(
|
||||
"Error while opening the index after retries, recreating.",
|
||||
cm.output[0],
|
||||
)
|
||||
|
||||
def test_non_transient_error_recreates_index(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- Index directory exists
|
||||
WHEN:
|
||||
- open_index is called
|
||||
- Opening the index raises a "non-transient" error
|
||||
THEN:
|
||||
- Index is recreated
|
||||
"""
|
||||
recreated_index = object()
|
||||
|
||||
with (
|
||||
self.assertLogs("paperless.index", level="ERROR") as cm,
|
||||
mock.patch("documents.index.exists_in", return_value=True),
|
||||
mock.patch(
|
||||
"documents.index.open_dir",
|
||||
side_effect=RuntimeError("boom"),
|
||||
),
|
||||
mock.patch("documents.index.rmtree") as mock_rmtree,
|
||||
mock.patch(
|
||||
"documents.index.create_in",
|
||||
return_value=recreated_index,
|
||||
) as mock_create_in,
|
||||
):
|
||||
ix = index.open_index()
|
||||
|
||||
self.assertIs(ix, recreated_index)
|
||||
mock_rmtree.assert_called_once_with(settings.INDEX_DIR)
|
||||
self._assert_recreate_called(mock_create_in)
|
||||
self.assertIn(
|
||||
"Error while opening the index, recreating.",
|
||||
cm.output[0],
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user