load sklearn modules only when training data has changed

This commit is contained in:
jonaswinkler 2021-02-15 11:25:25 +01:00
parent 5e669534f2
commit 7e88085377

View File

@ -95,9 +95,6 @@ class DocumentClassifier(object):
pickle.dump(self.document_type_classifier, f)
def train(self):
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import MultiLabelBinarizer, LabelBinarizer
data = list()
labels_tags = list()
@ -162,6 +159,10 @@ class DocumentClassifier(object):
)
)
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import MultiLabelBinarizer, LabelBinarizer
# Step 2: vectorize data
logger.debug("Vectorizing data...")
self.data_vectorizer = CountVectorizer(