Fixes a corrupted index not being handled by whoosh.create_in (#5950)

This commit is contained in:
Trenton H 2024-02-28 16:07:17 -08:00 committed by GitHub
parent 3d8de50b5a
commit fba416e8e1
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 8 additions and 4 deletions

View File

@ -5,6 +5,7 @@ from collections import Counter
from contextlib import contextmanager from contextlib import contextmanager
from datetime import datetime from datetime import datetime
from datetime import timezone from datetime import timezone
from shutil import rmtree
from typing import Optional from typing import Optional
from dateutil.parser import isoparse from dateutil.parser import isoparse
@ -36,7 +37,6 @@ from whoosh.searching import Searcher
from whoosh.util.times import timespan from whoosh.util.times import timespan
from whoosh.writing import AsyncWriter from whoosh.writing import AsyncWriter
# from documents.models import CustomMetadata
from documents.models import CustomFieldInstance from documents.models import CustomFieldInstance
from documents.models import Document from documents.models import Document
from documents.models import Note from documents.models import Note
@ -87,8 +87,11 @@ def open_index(recreate=False) -> FileIndex:
except Exception: except Exception:
logger.exception("Error while opening the index, recreating.") logger.exception("Error while opening the index, recreating.")
if not os.path.isdir(settings.INDEX_DIR): # create_in doesn't handle corrupted indexes very well, remove the directory entirely first
settings.INDEX_DIR.mkdir(parents=True, exist_ok=True) if os.path.isdir(settings.INDEX_DIR):
rmtree(settings.INDEX_DIR)
settings.INDEX_DIR.mkdir(parents=True, exist_ok=True)
return create_in(settings.INDEX_DIR, get_schema()) return create_in(settings.INDEX_DIR, get_schema())

View File

@ -1,6 +1,7 @@
import shutil import shutil
import tempfile import tempfile
from collections.abc import Iterable from collections.abc import Iterable
from pathlib import Path
from random import randint from random import randint
from django.contrib.admin.models import LogEntry from django.contrib.admin.models import LogEntry
@ -396,7 +397,7 @@ class TestDocumentConsumptionFinishedSignal(TestCase):
mime_type="application/pdf", mime_type="application/pdf",
) )
self.index_dir = tempfile.mkdtemp() self.index_dir = Path(tempfile.mkdtemp())
# TODO: we should not need the index here. # TODO: we should not need the index here.
override_settings(INDEX_DIR=self.index_dir).enable() override_settings(INDEX_DIR=self.index_dir).enable()