mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-09-01 01:46:16 +00:00
Fix caching, maybe
This commit is contained in:
@@ -53,8 +53,24 @@ class ClassifierModelCorruptError(Exception):
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def _model_cache_token() -> tuple[str, int, int]:
|
||||||
|
p = Path(settings.MODEL_FILE)
|
||||||
|
if p.exists():
|
||||||
|
try:
|
||||||
|
st = p.stat()
|
||||||
|
return (str(p), int(st.st_mtime), int(st.st_size))
|
||||||
|
except OSError:
|
||||||
|
return (str(p), 0, 0)
|
||||||
|
return (str(p), 0, 0)
|
||||||
|
|
||||||
|
|
||||||
@lru_cache(maxsize=1)
|
@lru_cache(maxsize=1)
|
||||||
def load_classifier(*, raise_exception: bool = False) -> DocumentClassifier | None:
|
def _load_classifier_cached(
|
||||||
|
token: tuple[str, int, int],
|
||||||
|
*,
|
||||||
|
raise_exception: bool = False,
|
||||||
|
) -> DocumentClassifier | None:
|
||||||
|
# token used only for cache key; logic depends on current settings
|
||||||
if not settings.MODEL_FILE.is_file():
|
if not settings.MODEL_FILE.is_file():
|
||||||
logger.debug(
|
logger.debug(
|
||||||
"Document classification model does not exist (yet), not "
|
"Document classification model does not exist (yet), not "
|
||||||
@@ -65,20 +81,23 @@ def load_classifier(*, raise_exception: bool = False) -> DocumentClassifier | No
|
|||||||
classifier = DocumentClassifier()
|
classifier = DocumentClassifier()
|
||||||
try:
|
try:
|
||||||
classifier.load()
|
classifier.load()
|
||||||
|
|
||||||
except IncompatibleClassifierVersionError as e:
|
except IncompatibleClassifierVersionError as e:
|
||||||
logger.info(f"Classifier version incompatible: {e.message}, will re-train")
|
logger.info(f"Classifier version incompatible: {e.message}, will re-train")
|
||||||
Path(settings.MODEL_FILE).unlink()
|
try:
|
||||||
|
Path(settings.MODEL_FILE).unlink()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
classifier = None
|
classifier = None
|
||||||
if raise_exception:
|
if raise_exception:
|
||||||
raise e
|
raise e
|
||||||
except ClassifierModelCorruptError as e:
|
except ClassifierModelCorruptError as e:
|
||||||
# there's something wrong with the model file.
|
|
||||||
logger.exception(
|
logger.exception(
|
||||||
"Unrecoverable error while loading document "
|
"Unrecoverable error while loading document classification model, deleting model file.",
|
||||||
"classification model, deleting model file.",
|
|
||||||
)
|
)
|
||||||
Path(settings.MODEL_FILE).unlink
|
try:
|
||||||
|
Path(settings.MODEL_FILE).unlink()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
classifier = None
|
classifier = None
|
||||||
if raise_exception:
|
if raise_exception:
|
||||||
raise e
|
raise e
|
||||||
@@ -96,6 +115,11 @@ def load_classifier(*, raise_exception: bool = False) -> DocumentClassifier | No
|
|||||||
return classifier
|
return classifier
|
||||||
|
|
||||||
|
|
||||||
|
def load_classifier(*, raise_exception: bool = False) -> DocumentClassifier | None:
|
||||||
|
token = _model_cache_token()
|
||||||
|
return _load_classifier_cached(token, raise_exception=raise_exception)
|
||||||
|
|
||||||
|
|
||||||
class DocumentClassifier:
|
class DocumentClassifier:
|
||||||
# v7 - Updated scikit-learn package version
|
# v7 - Updated scikit-learn package version
|
||||||
# v8 - Added storage path classifier
|
# v8 - Added storage path classifier
|
||||||
@@ -223,6 +247,11 @@ class DocumentClassifier:
|
|||||||
joblib.dump(state, target_file_temp, compress=3)
|
joblib.dump(state, target_file_temp, compress=3)
|
||||||
|
|
||||||
target_file_temp.rename(target_file)
|
target_file_temp.rename(target_file)
|
||||||
|
# Invalidate cached classifier loader so subsequent calls see the new file
|
||||||
|
try:
|
||||||
|
_load_classifier_cached.cache_clear()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
def train(self) -> bool:
|
def train(self) -> bool:
|
||||||
# Get non-inbox documents
|
# Get non-inbox documents
|
||||||
|
Reference in New Issue
Block a user