From 7e88085377ec68edced2ce528eb0b347059abab6 Mon Sep 17 00:00:00 2001 From: jonaswinkler Date: Mon, 15 Feb 2021 11:25:25 +0100 Subject: [PATCH] load sklearn modules only when training data has changed --- src/documents/classifier.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/documents/classifier.py b/src/documents/classifier.py index b577997e3..47dd7dfc7 100755 --- a/src/documents/classifier.py +++ b/src/documents/classifier.py @@ -95,9 +95,6 @@ class DocumentClassifier(object): pickle.dump(self.document_type_classifier, f) def train(self): - from sklearn.feature_extraction.text import CountVectorizer - from sklearn.neural_network import MLPClassifier - from sklearn.preprocessing import MultiLabelBinarizer, LabelBinarizer data = list() labels_tags = list() @@ -162,6 +159,10 @@ class DocumentClassifier(object): ) ) + from sklearn.feature_extraction.text import CountVectorizer + from sklearn.neural_network import MLPClassifier + from sklearn.preprocessing import MultiLabelBinarizer, LabelBinarizer + # Step 2: vectorize data logger.debug("Vectorizing data...") self.data_vectorizer = CountVectorizer(