Fix loading / error handling

This commit is contained in:
shamoon
2025-08-31 14:52:32 -07:00
parent b9afc9b65d
commit 887b314744

View File

@@ -135,31 +135,44 @@ class DocumentClassifier:
# Catch warnings for processing # Catch warnings for processing
with warnings.catch_warnings(record=True) as w: with warnings.catch_warnings(record=True) as w:
state = None
try: try:
state = joblib.load(settings.MODEL_FILE, mmap_mode="r") state = joblib.load(settings.MODEL_FILE, mmap_mode="r")
except ValueError:
# Some environments may fail to mmap small files; fall back to normal load
state = joblib.load(settings.MODEL_FILE, mmap_mode=None)
except Exception as err: except Exception as err:
# As a fallback, try to detect old pickle-based and mark incompatible # Fallback to old pickle-based format. Try to read the version and a field to
# distinguish truly corrupt files from incompatible versions.
try: try:
with Path(settings.MODEL_FILE).open("rb") as f: with Path(settings.MODEL_FILE).open("rb") as f:
_ = pickle.load(f) _version = pickle.load(f)
raise IncompatibleClassifierVersionError( try:
"Cannot load classifier, incompatible versions.", _ = pickle.load(f)
) from err except Exception as inner:
except IncompatibleClassifierVersionError: raise ClassifierModelCorruptError from inner
# Old, incompatible format
raise IncompatibleClassifierVersionError(
"Cannot load classifier, incompatible versions.",
) from err
except (
IncompatibleClassifierVersionError,
ClassifierModelCorruptError,
):
raise raise
except Exception: except Exception:
# Not even a readable pickle header # Not even a readable pickle header
raise ClassifierModelCorruptError from err raise ClassifierModelCorruptError from err
try: if (
if ( not isinstance(state, dict)
not isinstance(state, dict) or state.get("format_version") != self.FORMAT_VERSION
or state.get("format_version") != self.FORMAT_VERSION ):
): raise IncompatibleClassifierVersionError(
raise IncompatibleClassifierVersionError( "Cannot load classifier, incompatible versions.",
"Cannot load classifier, incompatible versions.", )
)
try:
self.last_doc_change_time = state.get("last_doc_change_time") self.last_doc_change_time = state.get("last_doc_change_time")
self.last_auto_type_hash = state.get("last_auto_type_hash") self.last_auto_type_hash = state.get("last_auto_type_hash")
@@ -171,8 +184,6 @@ class DocumentClassifier:
self.correspondent_classifier = state.get("correspondent_classifier") self.correspondent_classifier = state.get("correspondent_classifier")
self.document_type_classifier = state.get("document_type_classifier") self.document_type_classifier = state.get("document_type_classifier")
self.storage_path_classifier = state.get("storage_path_classifier") self.storage_path_classifier = state.get("storage_path_classifier")
except IncompatibleClassifierVersionError:
raise
except Exception as err: except Exception as err:
raise ClassifierModelCorruptError from err raise ClassifierModelCorruptError from err