mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-07-28 18:24:38 -05:00
removed matching model fields, automatic classifier reloading, added autmatic_classification field to matching model
This commit is contained in:
@@ -42,9 +42,14 @@ class Command(Renderable, BaseCommand):
|
||||
|
||||
# Step 2: vectorize data
|
||||
logging.getLogger(__name__).info("Vectorizing data...")
|
||||
clf.data_vectorizer = CountVectorizer(analyzer='char', ngram_range=(1, 5), min_df=0.05)
|
||||
clf.data_vectorizer = CountVectorizer(analyzer='char', ngram_range=(2, 6), min_df=0.1)
|
||||
data_vectorized = clf.data_vectorizer.fit_transform(data)
|
||||
|
||||
print(clf.data_vectorizer.vocabulary_)
|
||||
|
||||
logging.getLogger(__name__).info("Shape of vectorized data: {}".format(data_vectorized.shape))
|
||||
|
||||
|
||||
clf.tags_binarizer = MultiLabelBinarizer()
|
||||
labels_tags_vectorized = clf.tags_binarizer.fit_transform(labels_tags)
|
||||
|
||||
|
@@ -46,7 +46,11 @@ class Command(Renderable, BaseCommand):
|
||||
documents = Document.objects.all().exclude(tags__is_archived_tag=True).distinct()
|
||||
|
||||
logging.getLogger(__name__).info("Loading classifier")
|
||||
clf = DocumentClassifier.load_classifier()
|
||||
try:
|
||||
clf = DocumentClassifier.load_classifier()
|
||||
except FileNotFoundError:
|
||||
logging.getLogger(__name__).fatal("Cannot classify documents, classifier model file was not found.")
|
||||
return
|
||||
|
||||
|
||||
for document in documents:
|
||||
|
Reference in New Issue
Block a user