revert a faulty change that caused memory usage to explode #537

This commit is contained in:
jonaswinkler 2021-02-13 19:51:04 +01:00
parent 269b7aec38
commit c946263f31
4 changed files with 18 additions and 37 deletions

View File

@ -5,7 +5,6 @@ import pickle
import re import re
from django.conf import settings from django.conf import settings
from django.core.cache import cache
from documents.models import Document, MatchingModel from documents.models import Document, MatchingModel
@ -31,29 +30,23 @@ def load_classifier():
) )
return None return None
version = os.stat(settings.MODEL_FILE).st_mtime classifier = DocumentClassifier()
try:
classifier.load()
classifier = cache.get("paperless-classifier", version=version) except (EOFError, IncompatibleClassifierVersionError) as e:
# there's something wrong with the model file.
if not classifier: logger.exception(
classifier = DocumentClassifier() f"Unrecoverable error while loading document "
try: f"classification model, deleting model file."
classifier.load() )
cache.set("paperless-classifier", classifier, os.unlink(settings.MODEL_FILE)
version=version, timeout=86400) classifier = None
except (EOFError, IncompatibleClassifierVersionError) as e: except OSError as e:
# there's something wrong with the model file. logger.error(
logger.exception( f"Error while loading document classification model: {str(e)}"
f"Unrecoverable error while loading document " )
f"classification model, deleting model file." classifier = None
)
os.unlink(settings.MODEL_FILE)
classifier = None
except OSError as e:
logger.error(
f"Error while loading document classification model: {str(e)}"
)
classifier = None
return classifier return classifier

View File

@ -3,6 +3,7 @@ import tempfile
from pathlib import Path from pathlib import Path
from unittest import mock from unittest import mock
import pytest
from django.conf import settings from django.conf import settings
from django.test import TestCase, override_settings from django.test import TestCase, override_settings
@ -233,7 +234,6 @@ class TestClassifier(DirectoriesMixin, TestCase):
self.assertFalse(os.path.exists(settings.MODEL_FILE)) self.assertFalse(os.path.exists(settings.MODEL_FILE))
self.assertIsNone(load_classifier()) self.assertIsNone(load_classifier())
@override_settings(CACHES={'default': {'BACKEND': 'django.core.cache.backends.dummy.DummyCache'}})
@mock.patch("documents.classifier.DocumentClassifier.load") @mock.patch("documents.classifier.DocumentClassifier.load")
def test_load_classifier(self, load): def test_load_classifier(self, load):
Path(settings.MODEL_FILE).touch() Path(settings.MODEL_FILE).touch()
@ -242,6 +242,7 @@ class TestClassifier(DirectoriesMixin, TestCase):
@override_settings(CACHES={'default': {'BACKEND': 'django.core.cache.backends.locmem.LocMemCache'}}) @override_settings(CACHES={'default': {'BACKEND': 'django.core.cache.backends.locmem.LocMemCache'}})
@override_settings(MODEL_FILE=os.path.join(os.path.dirname(__file__), "data", "model.pickle")) @override_settings(MODEL_FILE=os.path.join(os.path.dirname(__file__), "data", "model.pickle"))
@pytest.mark.skip(reason="Disabled caching due to high memory usage - need to investigate.")
def test_load_classifier_cached(self): def test_load_classifier_cached(self):
classifier = load_classifier() classifier = load_classifier()
self.assertIsNotNone(classifier) self.assertIsNotNone(classifier)
@ -250,7 +251,6 @@ class TestClassifier(DirectoriesMixin, TestCase):
classifier2 = load_classifier() classifier2 = load_classifier()
load.assert_not_called() load.assert_not_called()
@override_settings(CACHES={'default': {'BACKEND': 'django.core.cache.backends.dummy.DummyCache'}})
@mock.patch("documents.classifier.DocumentClassifier.load") @mock.patch("documents.classifier.DocumentClassifier.load")
def test_load_classifier_incompatible_version(self, load): def test_load_classifier_incompatible_version(self, load):
Path(settings.MODEL_FILE).touch() Path(settings.MODEL_FILE).touch()
@ -260,7 +260,6 @@ class TestClassifier(DirectoriesMixin, TestCase):
self.assertIsNone(load_classifier()) self.assertIsNone(load_classifier())
self.assertFalse(os.path.exists(settings.MODEL_FILE)) self.assertFalse(os.path.exists(settings.MODEL_FILE))
@override_settings(CACHES={'default': {'BACKEND': 'django.core.cache.backends.dummy.DummyCache'}})
@mock.patch("documents.classifier.DocumentClassifier.load") @mock.patch("documents.classifier.DocumentClassifier.load")
def test_load_classifier_os_error(self, load): def test_load_classifier_os_error(self, load):
Path(settings.MODEL_FILE).touch() Path(settings.MODEL_FILE).touch()

View File

@ -52,7 +52,6 @@ class TestTasks(DirectoriesMixin, TestCase):
load_classifier.assert_called_once() load_classifier.assert_called_once()
self.assertFalse(os.path.isfile(settings.MODEL_FILE)) self.assertFalse(os.path.isfile(settings.MODEL_FILE))
@override_settings(CACHES={'default': {'BACKEND': 'django.core.cache.backends.dummy.DummyCache'}})
def test_train_classifier(self): def test_train_classifier(self):
c = Correspondent.objects.create(matching_algorithm=Tag.MATCH_AUTO, name="test") c = Correspondent.objects.create(matching_algorithm=Tag.MATCH_AUTO, name="test")
doc = Document.objects.create(correspondent=c, content="test", title="test") doc = Document.objects.create(correspondent=c, content="test", title="test")

View File

@ -169,16 +169,6 @@ CHANNEL_LAYERS = {
}, },
} }
CACHES = {
"default": {
"BACKEND": "django_redis.cache.RedisCache",
"LOCATION": os.getenv("PAPERLESS_REDIS", "redis://localhost:6379"),
"OPTIONS": {
"CLIENT_CLASS": "django_redis.client.DefaultClient",
}
}
}
############################################################################### ###############################################################################
# Security # # Security #
############################################################################### ###############################################################################