Fix caching, maybe

2025-09-01 01:46:16 +00:00 · 2025-08-31 16:08:32 -07:00
parent fc5b9bdf59
commit bc22a282d6
1 changed files with 36 additions and 7 deletions
--- a/src/documents/classifier.py
+++ b/src/documents/classifier.py
@@ -53,8 +53,24 @@ class ClassifierModelCorruptError(Exception):
    pass


+def _model_cache_token() -> tuple[str, int, int]:
+    p = Path(settings.MODEL_FILE)
+    if p.exists():
+        try:
+            st = p.stat()
+            return (str(p), int(st.st_mtime), int(st.st_size))
+        except OSError:
+            return (str(p), 0, 0)
+    return (str(p), 0, 0)
+
+
@lru_cache(maxsize=1)
-def load_classifier(*, raise_exception: bool = False) -> DocumentClassifier | None:
+def _load_classifier_cached(
+    token: tuple[str, int, int],
+    *,
+    raise_exception: bool = False,
+) -> DocumentClassifier | None:
+    # token used only for cache key; logic depends on current settings
    if not settings.MODEL_FILE.is_file():
        logger.debug(
            "Document classification model does not exist (yet), not "
@@ -65,20 +81,23 @@ def load_classifier(*, raise_exception: bool = False) -> DocumentClassifier | No
    classifier = DocumentClassifier()
    try:
        classifier.load()
-
    except IncompatibleClassifierVersionError as e:
        logger.info(f"Classifier version incompatible: {e.message}, will re-train")
-        Path(settings.MODEL_FILE).unlink()
+        try:
+            Path(settings.MODEL_FILE).unlink()
+        except Exception:
+            pass
        classifier = None
        if raise_exception:
            raise e
    except ClassifierModelCorruptError as e:
-        # there's something wrong with the model file.
        logger.exception(
-            "Unrecoverable error while loading document "
-            "classification model, deleting model file.",
+            "Unrecoverable error while loading document classification model, deleting model file.",
        )
-        Path(settings.MODEL_FILE).unlink
+        try:
+            Path(settings.MODEL_FILE).unlink()
+        except Exception:
+            pass
        classifier = None
        if raise_exception:
            raise e
@@ -96,6 +115,11 @@ def load_classifier(*, raise_exception: bool = False) -> DocumentClassifier | No
    return classifier


+def load_classifier(*, raise_exception: bool = False) -> DocumentClassifier | None:
+    token = _model_cache_token()
+    return _load_classifier_cached(token, raise_exception=raise_exception)
+
+
 class DocumentClassifier:
    # v7 - Updated scikit-learn package version
    # v8 - Added storage path classifier
@@ -223,6 +247,11 @@ class DocumentClassifier:
        joblib.dump(state, target_file_temp, compress=3)

        target_file_temp.rename(target_file)
+        # Invalidate cached classifier loader so subsequent calls see the new file
+        try:
+            _load_classifier_cached.cache_clear()
+        except Exception:
+            pass

    def train(self) -> bool:
        # Get non-inbox documents