mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-04-02 13:45:10 -05:00
load sklearn modules only when training data has changed
This commit is contained in:
parent
5e669534f2
commit
7e88085377
@ -95,9 +95,6 @@ class DocumentClassifier(object):
|
||||
pickle.dump(self.document_type_classifier, f)
|
||||
|
||||
def train(self):
|
||||
from sklearn.feature_extraction.text import CountVectorizer
|
||||
from sklearn.neural_network import MLPClassifier
|
||||
from sklearn.preprocessing import MultiLabelBinarizer, LabelBinarizer
|
||||
|
||||
data = list()
|
||||
labels_tags = list()
|
||||
@ -162,6 +159,10 @@ class DocumentClassifier(object):
|
||||
)
|
||||
)
|
||||
|
||||
from sklearn.feature_extraction.text import CountVectorizer
|
||||
from sklearn.neural_network import MLPClassifier
|
||||
from sklearn.preprocessing import MultiLabelBinarizer, LabelBinarizer
|
||||
|
||||
# Step 2: vectorize data
|
||||
logger.debug("Vectorizing data...")
|
||||
self.data_vectorizer = CountVectorizer(
|
||||
|
Loading…
x
Reference in New Issue
Block a user