mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-04-02 13:45:10 -05:00
load sklearn modules only when training data has changed
This commit is contained in:
parent
5e669534f2
commit
7e88085377
@ -95,9 +95,6 @@ class DocumentClassifier(object):
|
|||||||
pickle.dump(self.document_type_classifier, f)
|
pickle.dump(self.document_type_classifier, f)
|
||||||
|
|
||||||
def train(self):
|
def train(self):
|
||||||
from sklearn.feature_extraction.text import CountVectorizer
|
|
||||||
from sklearn.neural_network import MLPClassifier
|
|
||||||
from sklearn.preprocessing import MultiLabelBinarizer, LabelBinarizer
|
|
||||||
|
|
||||||
data = list()
|
data = list()
|
||||||
labels_tags = list()
|
labels_tags = list()
|
||||||
@ -162,6 +159,10 @@ class DocumentClassifier(object):
|
|||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
from sklearn.feature_extraction.text import CountVectorizer
|
||||||
|
from sklearn.neural_network import MLPClassifier
|
||||||
|
from sklearn.preprocessing import MultiLabelBinarizer, LabelBinarizer
|
||||||
|
|
||||||
# Step 2: vectorize data
|
# Step 2: vectorize data
|
||||||
logger.debug("Vectorizing data...")
|
logger.debug("Vectorizing data...")
|
||||||
self.data_vectorizer = CountVectorizer(
|
self.data_vectorizer = CountVectorizer(
|
||||||
|
Loading…
x
Reference in New Issue
Block a user