Fixes a corrupted index not being handled by whoosh.create_in (#5950)

This commit is contained in:
Trenton H 2024-02-28 16:07:17 -08:00 committed by GitHub
parent 3d8de50b5a
commit fba416e8e1
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 8 additions and 4 deletions

View File

@ -5,6 +5,7 @@ from collections import Counter
from contextlib import contextmanager
from datetime import datetime
from datetime import timezone
from shutil import rmtree
from typing import Optional
from dateutil.parser import isoparse
@ -36,7 +37,6 @@ from whoosh.searching import Searcher
from whoosh.util.times import timespan
from whoosh.writing import AsyncWriter
# from documents.models import CustomMetadata
from documents.models import CustomFieldInstance
from documents.models import Document
from documents.models import Note
@ -87,8 +87,11 @@ def open_index(recreate=False) -> FileIndex:
except Exception:
logger.exception("Error while opening the index, recreating.")
if not os.path.isdir(settings.INDEX_DIR):
settings.INDEX_DIR.mkdir(parents=True, exist_ok=True)
# create_in doesn't handle corrupted indexes very well, remove the directory entirely first
if os.path.isdir(settings.INDEX_DIR):
rmtree(settings.INDEX_DIR)
settings.INDEX_DIR.mkdir(parents=True, exist_ok=True)
return create_in(settings.INDEX_DIR, get_schema())

View File

@ -1,6 +1,7 @@
import shutil
import tempfile
from collections.abc import Iterable
from pathlib import Path
from random import randint
from django.contrib.admin.models import LogEntry
@ -396,7 +397,7 @@ class TestDocumentConsumptionFinishedSignal(TestCase):
mime_type="application/pdf",
)
self.index_dir = tempfile.mkdtemp()
self.index_dir = Path(tempfile.mkdtemp())
# TODO: we should not need the index here.
override_settings(INDEX_DIR=self.index_dir).enable()