mirror of
				https://github.com/paperless-ngx/paperless-ngx.git
				synced 2025-10-30 03:56:23 -05:00 
			
		
		
		
	load sklearn modules only when training data has changed
This commit is contained in:
		| @@ -95,9 +95,6 @@ class DocumentClassifier(object): | ||||
|             pickle.dump(self.document_type_classifier, f) | ||||
|  | ||||
|     def train(self): | ||||
|         from sklearn.feature_extraction.text import CountVectorizer | ||||
|         from sklearn.neural_network import MLPClassifier | ||||
|         from sklearn.preprocessing import MultiLabelBinarizer, LabelBinarizer | ||||
|  | ||||
|         data = list() | ||||
|         labels_tags = list() | ||||
| @@ -162,6 +159,10 @@ class DocumentClassifier(object): | ||||
|             ) | ||||
|         ) | ||||
|  | ||||
|         from sklearn.feature_extraction.text import CountVectorizer | ||||
|         from sklearn.neural_network import MLPClassifier | ||||
|         from sklearn.preprocessing import MultiLabelBinarizer, LabelBinarizer | ||||
|  | ||||
|         # Step 2: vectorize data | ||||
|         logger.debug("Vectorizing data...") | ||||
|         self.data_vectorizer = CountVectorizer( | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 jonaswinkler
					jonaswinkler