removed matching model fields, automatic classifier reloading, added autmatic_classification field to matching model

This commit is contained in:
Jonas Winkler
2018-09-04 18:40:26 +02:00
parent 804b3d98f9
commit 9d4155a907
8 changed files with 126 additions and 143 deletions

View File

@@ -42,9 +42,14 @@ class Command(Renderable, BaseCommand):
# Step 2: vectorize data
logging.getLogger(__name__).info("Vectorizing data...")
clf.data_vectorizer = CountVectorizer(analyzer='char', ngram_range=(1, 5), min_df=0.05)
clf.data_vectorizer = CountVectorizer(analyzer='char', ngram_range=(2, 6), min_df=0.1)
data_vectorized = clf.data_vectorizer.fit_transform(data)
print(clf.data_vectorizer.vocabulary_)
logging.getLogger(__name__).info("Shape of vectorized data: {}".format(data_vectorized.shape))
clf.tags_binarizer = MultiLabelBinarizer()
labels_tags_vectorized = clf.tags_binarizer.fit_transform(labels_tags)

View File

@@ -46,7 +46,11 @@ class Command(Renderable, BaseCommand):
documents = Document.objects.all().exclude(tags__is_archived_tag=True).distinct()
logging.getLogger(__name__).info("Loading classifier")
clf = DocumentClassifier.load_classifier()
try:
clf = DocumentClassifier.load_classifier()
except FileNotFoundError:
logging.getLogger(__name__).fatal("Cannot classify documents, classifier model file was not found.")
return
for document in documents: