revert a faulty change that caused memory usage to explode

This commit is contained in:
jonaswinkler 2021-02-13 19:51:04 +01:00
parent f91f4d71bb
commit b48e67d714
4 changed files with 18 additions and 37 deletions

@ -5,7 +5,6 @@ import pickle
import re import re
from django.conf import settings from django.conf import settings
from django.core.cache import cache
from documents.models import Document, MatchingModel from documents.models import Document, MatchingModel
@ -31,29 +30,23 @@ def load_classifier():
) )
return None return None
version = os.stat(settings.MODEL_FILE).st_mtime classifier = DocumentClassifier()
try:
classifier.load()
classifier = cache.get("paperless-classifier", version=version) except (EOFError, IncompatibleClassifierVersionError) as e:
# there's something wrong with the model file.
if not classifier: logger.exception(
classifier = DocumentClassifier() f"Unrecoverable error while loading document "
try: f"classification model, deleting model file."
classifier.load() )
cache.set("paperless-classifier", classifier, os.unlink(settings.MODEL_FILE)
version=version, timeout=86400) classifier = None
except (EOFError, IncompatibleClassifierVersionError) as e: except OSError as e:
# there's something wrong with the model file. logger.error(
logger.exception( f"Error while loading document classification model: {str(e)}"
f"Unrecoverable error while loading document " )
f"classification model, deleting model file." classifier = None
)
os.unlink(settings.MODEL_FILE)
classifier = None
except OSError as e:
logger.error(
f"Error while loading document classification model: {str(e)}"
)
classifier = None
return classifier return classifier

@ -3,6 +3,7 @@ import tempfile
from pathlib import Path from pathlib import Path
from unittest import mock from unittest import mock
import pytest
from django.conf import settings from django.conf import settings
from django.test import TestCase, override_settings from django.test import TestCase, override_settings
@ -233,7 +234,6 @@ class TestClassifier(DirectoriesMixin, TestCase):
self.assertFalse(os.path.exists(settings.MODEL_FILE)) self.assertFalse(os.path.exists(settings.MODEL_FILE))
self.assertIsNone(load_classifier()) self.assertIsNone(load_classifier())
@override_settings(CACHES={'default': {'BACKEND': 'django.core.cache.backends.dummy.DummyCache'}})
@mock.patch("documents.classifier.DocumentClassifier.load") @mock.patch("documents.classifier.DocumentClassifier.load")
def test_load_classifier(self, load): def test_load_classifier(self, load):
Path(settings.MODEL_FILE).touch() Path(settings.MODEL_FILE).touch()
@ -242,6 +242,7 @@ class TestClassifier(DirectoriesMixin, TestCase):
@override_settings(CACHES={'default': {'BACKEND': 'django.core.cache.backends.locmem.LocMemCache'}}) @override_settings(CACHES={'default': {'BACKEND': 'django.core.cache.backends.locmem.LocMemCache'}})
@override_settings(MODEL_FILE=os.path.join(os.path.dirname(__file__), "data", "model.pickle")) @override_settings(MODEL_FILE=os.path.join(os.path.dirname(__file__), "data", "model.pickle"))
@pytest.mark.skip(reason="Disabled caching due to high memory usage - need to investigate.")
def test_load_classifier_cached(self): def test_load_classifier_cached(self):
classifier = load_classifier() classifier = load_classifier()
self.assertIsNotNone(classifier) self.assertIsNotNone(classifier)
@ -250,7 +251,6 @@ class TestClassifier(DirectoriesMixin, TestCase):
classifier2 = load_classifier() classifier2 = load_classifier()
load.assert_not_called() load.assert_not_called()
@override_settings(CACHES={'default': {'BACKEND': 'django.core.cache.backends.dummy.DummyCache'}})
@mock.patch("documents.classifier.DocumentClassifier.load") @mock.patch("documents.classifier.DocumentClassifier.load")
def test_load_classifier_incompatible_version(self, load): def test_load_classifier_incompatible_version(self, load):
Path(settings.MODEL_FILE).touch() Path(settings.MODEL_FILE).touch()
@ -260,7 +260,6 @@ class TestClassifier(DirectoriesMixin, TestCase):
self.assertIsNone(load_classifier()) self.assertIsNone(load_classifier())
self.assertFalse(os.path.exists(settings.MODEL_FILE)) self.assertFalse(os.path.exists(settings.MODEL_FILE))
@override_settings(CACHES={'default': {'BACKEND': 'django.core.cache.backends.dummy.DummyCache'}})
@mock.patch("documents.classifier.DocumentClassifier.load") @mock.patch("documents.classifier.DocumentClassifier.load")
def test_load_classifier_os_error(self, load): def test_load_classifier_os_error(self, load):
Path(settings.MODEL_FILE).touch() Path(settings.MODEL_FILE).touch()

@ -52,7 +52,6 @@ class TestTasks(DirectoriesMixin, TestCase):
load_classifier.assert_called_once() load_classifier.assert_called_once()
self.assertFalse(os.path.isfile(settings.MODEL_FILE)) self.assertFalse(os.path.isfile(settings.MODEL_FILE))
@override_settings(CACHES={'default': {'BACKEND': 'django.core.cache.backends.dummy.DummyCache'}})
def test_train_classifier(self): def test_train_classifier(self):
c = Correspondent.objects.create(matching_algorithm=Tag.MATCH_AUTO, name="test") c = Correspondent.objects.create(matching_algorithm=Tag.MATCH_AUTO, name="test")
doc = Document.objects.create(correspondent=c, content="test", title="test") doc = Document.objects.create(correspondent=c, content="test", title="test")

@ -169,16 +169,6 @@ CHANNEL_LAYERS = {
}, },
} }
CACHES = {
"default": {
"BACKEND": "django_redis.cache.RedisCache",
"LOCATION": os.getenv("PAPERLESS_REDIS", "redis://localhost:6379"),
"OPTIONS": {
"CLIENT_CLASS": "django_redis.client.DefaultClient",
}
}
}
############################################################################### ###############################################################################
# Security # # Security #
############################################################################### ###############################################################################