mirror of
				https://github.com/paperless-ngx/paperless-ngx.git
				synced 2025-11-03 03:16:10 -06:00 
			
		
		
		
	removed matching model fields, automatic classifier reloading, added autmatic_classification field to matching model
This commit is contained in:
		@@ -42,9 +42,14 @@ class Command(Renderable, BaseCommand):
 | 
			
		||||
 | 
			
		||||
        # Step 2: vectorize data
 | 
			
		||||
        logging.getLogger(__name__).info("Vectorizing data...")
 | 
			
		||||
        clf.data_vectorizer = CountVectorizer(analyzer='char', ngram_range=(1, 5), min_df=0.05)
 | 
			
		||||
        clf.data_vectorizer = CountVectorizer(analyzer='char', ngram_range=(2, 6), min_df=0.1)
 | 
			
		||||
        data_vectorized = clf.data_vectorizer.fit_transform(data)
 | 
			
		||||
 | 
			
		||||
        print(clf.data_vectorizer.vocabulary_)
 | 
			
		||||
 | 
			
		||||
        logging.getLogger(__name__).info("Shape of vectorized data: {}".format(data_vectorized.shape))
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
        clf.tags_binarizer = MultiLabelBinarizer()
 | 
			
		||||
        labels_tags_vectorized = clf.tags_binarizer.fit_transform(labels_tags)
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -46,7 +46,11 @@ class Command(Renderable, BaseCommand):
 | 
			
		||||
            documents = Document.objects.all().exclude(tags__is_archived_tag=True).distinct()
 | 
			
		||||
 | 
			
		||||
        logging.getLogger(__name__).info("Loading classifier")
 | 
			
		||||
        clf = DocumentClassifier.load_classifier()
 | 
			
		||||
        try:
 | 
			
		||||
            clf = DocumentClassifier.load_classifier()
 | 
			
		||||
        except FileNotFoundError:
 | 
			
		||||
            logging.getLogger(__name__).fatal("Cannot classify documents, classifier model file was not found.")
 | 
			
		||||
            return
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
        for document in documents:
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user