From fba416e8e1d9f6a8d0aa2fa92149757657d9cf96 Mon Sep 17 00:00:00 2001 From: Trenton H <797416+stumpylog@users.noreply.github.com> Date: Wed, 28 Feb 2024 16:07:17 -0800 Subject: [PATCH] Fixes a corrupted index not being handled by whoosh.create_in (#5950) --- src/documents/index.py | 9 ++++++--- src/documents/tests/test_matchables.py | 3 ++- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/src/documents/index.py b/src/documents/index.py index de651c13d..b787c7635 100644 --- a/src/documents/index.py +++ b/src/documents/index.py @@ -5,6 +5,7 @@ from collections import Counter from contextlib import contextmanager from datetime import datetime from datetime import timezone +from shutil import rmtree from typing import Optional from dateutil.parser import isoparse @@ -36,7 +37,6 @@ from whoosh.searching import Searcher from whoosh.util.times import timespan from whoosh.writing import AsyncWriter -# from documents.models import CustomMetadata from documents.models import CustomFieldInstance from documents.models import Document from documents.models import Note @@ -87,8 +87,11 @@ def open_index(recreate=False) -> FileIndex: except Exception: logger.exception("Error while opening the index, recreating.") - if not os.path.isdir(settings.INDEX_DIR): - settings.INDEX_DIR.mkdir(parents=True, exist_ok=True) + # create_in doesn't handle corrupted indexes very well, remove the directory entirely first + if os.path.isdir(settings.INDEX_DIR): + rmtree(settings.INDEX_DIR) + settings.INDEX_DIR.mkdir(parents=True, exist_ok=True) + return create_in(settings.INDEX_DIR, get_schema()) diff --git a/src/documents/tests/test_matchables.py b/src/documents/tests/test_matchables.py index 34bdffe95..4626e2c08 100644 --- a/src/documents/tests/test_matchables.py +++ b/src/documents/tests/test_matchables.py @@ -1,6 +1,7 @@ import shutil import tempfile from collections.abc import Iterable +from pathlib import Path from random import randint from django.contrib.admin.models import LogEntry @@ -396,7 +397,7 @@ class TestDocumentConsumptionFinishedSignal(TestCase): mime_type="application/pdf", ) - self.index_dir = tempfile.mkdtemp() + self.index_dir = Path(tempfile.mkdtemp()) # TODO: we should not need the index here. override_settings(INDEX_DIR=self.index_dir).enable()