diff --git a/src/documents/index.py b/src/documents/index.py index de651c13d..b787c7635 100644 --- a/src/documents/index.py +++ b/src/documents/index.py @@ -5,6 +5,7 @@ from collections import Counter from contextlib import contextmanager from datetime import datetime from datetime import timezone +from shutil import rmtree from typing import Optional from dateutil.parser import isoparse @@ -36,7 +37,6 @@ from whoosh.searching import Searcher from whoosh.util.times import timespan from whoosh.writing import AsyncWriter -# from documents.models import CustomMetadata from documents.models import CustomFieldInstance from documents.models import Document from documents.models import Note @@ -87,8 +87,11 @@ def open_index(recreate=False) -> FileIndex: except Exception: logger.exception("Error while opening the index, recreating.") - if not os.path.isdir(settings.INDEX_DIR): - settings.INDEX_DIR.mkdir(parents=True, exist_ok=True) + # create_in doesn't handle corrupted indexes very well, remove the directory entirely first + if os.path.isdir(settings.INDEX_DIR): + rmtree(settings.INDEX_DIR) + settings.INDEX_DIR.mkdir(parents=True, exist_ok=True) + return create_in(settings.INDEX_DIR, get_schema()) diff --git a/src/documents/tests/test_matchables.py b/src/documents/tests/test_matchables.py index 34bdffe95..4626e2c08 100644 --- a/src/documents/tests/test_matchables.py +++ b/src/documents/tests/test_matchables.py @@ -1,6 +1,7 @@ import shutil import tempfile from collections.abc import Iterable +from pathlib import Path from random import randint from django.contrib.admin.models import LogEntry @@ -396,7 +397,7 @@ class TestDocumentConsumptionFinishedSignal(TestCase): mime_type="application/pdf", ) - self.index_dir = tempfile.mkdtemp() + self.index_dir = Path(tempfile.mkdtemp()) # TODO: we should not need the index here. override_settings(INDEX_DIR=self.index_dir).enable()