mirror of
				https://github.com/paperless-ngx/paperless-ngx.git
				synced 2025-10-30 03:56:23 -05:00 
			
		
		
		
	Code style adjustments
This commit is contained in:
		| @@ -18,9 +18,9 @@ def select_action( | ||||
|     if not modeladmin.has_change_permission(request): | ||||
|         raise PermissionDenied | ||||
|  | ||||
|     if request.POST.get('post'): | ||||
|     if request.POST.get("post"): | ||||
|         n = queryset.count() | ||||
|         selected_object = modelclass.objects.get(id=request.POST.get('obj_id')) | ||||
|         selected_object = modelclass.objects.get(id=request.POST.get("obj_id")) | ||||
|         if n: | ||||
|             for document in queryset: | ||||
|                 if document_action: | ||||
| @@ -139,28 +139,52 @@ def remove_correspondent_from_selected(modeladmin, request, queryset): | ||||
|  | ||||
|  | ||||
| def set_document_type_on_selected(modeladmin, request, queryset): | ||||
|     return select_action(modeladmin=modeladmin, request=request, queryset=queryset, | ||||
|     return select_action( | ||||
|         modeladmin=modeladmin, | ||||
|         request=request, | ||||
|         queryset=queryset, | ||||
|         title="Set document type on multiple documents", | ||||
|         action="set_document_type_on_selected", | ||||
|         modelclass=DocumentType, | ||||
|                          success_message="Successfully set document type %(selected_object)s on %(count)d %(items)s.", | ||||
|                          queryset_action=lambda qs, document_type: qs.update(document_type=document_type)) | ||||
|         success_message="Successfully set document type %(selected_object)s " | ||||
|                         "on %(count)d %(items)s.", | ||||
|         queryset_action=lambda qs, document_type: qs.update( | ||||
|             document_type=document_type) | ||||
|     ) | ||||
|  | ||||
|  | ||||
| def remove_document_type_from_selected(modeladmin, request, queryset): | ||||
|     return simple_action(modeladmin=modeladmin, request=request, queryset=queryset, | ||||
|                          success_message="Successfully removed document type from %(count)d %(items)s.", | ||||
|                          queryset_action=lambda qs: qs.update(document_type=None)) | ||||
|     return simple_action( | ||||
|         modeladmin=modeladmin, | ||||
|         request=request, | ||||
|         queryset=queryset, | ||||
|         success_message="Successfully removed document type from %(count)d " | ||||
|                         "%(items)s.", | ||||
|         queryset_action=lambda qs: qs.update(document_type=None) | ||||
|     ) | ||||
|  | ||||
|  | ||||
| def run_document_classifier_on_selected(modeladmin, request, queryset): | ||||
|     try: | ||||
|         clf = DocumentClassifier.load_classifier() | ||||
|         return simple_action(modeladmin=modeladmin, request=request, queryset=queryset, | ||||
|                              success_message="Successfully applied document classifier to %(count)d %(items)s.", | ||||
|                              document_action=lambda doc: clf.classify_document(doc, classify_correspondent=True, classify_tags=True, classify_document_type=True)) | ||||
|         return simple_action( | ||||
|             modeladmin=modeladmin, | ||||
|             request=request, | ||||
|             queryset=queryset, | ||||
|             success_message="Successfully applied document classifier to " | ||||
|                             "%(count)d %(items)s.", | ||||
|             document_action=lambda doc: clf.classify_document( | ||||
|                 doc, | ||||
|                 classify_correspondent=True, | ||||
|                 classify_tags=True, | ||||
|                 classify_document_type=True) | ||||
|         ) | ||||
|     except FileNotFoundError: | ||||
|         modeladmin.message_user(request, "Classifier model file not found.", messages.ERROR) | ||||
|         modeladmin.message_user( | ||||
|             request, | ||||
|             "Classifier model file not found.", | ||||
|             messages.ERROR | ||||
|         ) | ||||
|         return None | ||||
|  | ||||
|  | ||||
| @@ -171,7 +195,10 @@ set_correspondent_on_selected.short_description = \ | ||||
|     "Set correspondent on selected documents" | ||||
| remove_correspondent_from_selected.short_description = \ | ||||
|     "Remove correspondent from selected documents" | ||||
| set_document_type_on_selected.short_description = "Set document type on selected documents" | ||||
| remove_document_type_from_selected.short_description = "Remove document type from selected documents" | ||||
| run_document_classifier_on_selected.short_description = "Run document classifier on selected" | ||||
| set_document_type_on_selected.short_description = \ | ||||
|     "Set document type on selected documents" | ||||
| remove_document_type_from_selected.short_description = \ | ||||
|     "Remove document type from selected documents" | ||||
| run_document_classifier_on_selected.short_description = \ | ||||
|     "Run document classifier on selected" | ||||
|  | ||||
|   | ||||
| @@ -124,7 +124,7 @@ class CorrespondentAdmin(CommonAdmin): | ||||
|         "document_count", | ||||
|         "last_correspondence" | ||||
|     ) | ||||
|     list_editable = ("automatic_classification") | ||||
|     list_editable = ("automatic_classification",) | ||||
|  | ||||
|     def get_queryset(self, request): | ||||
|         qs = super(CorrespondentAdmin, self).get_queryset(request) | ||||
| @@ -145,7 +145,11 @@ class CorrespondentAdmin(CommonAdmin): | ||||
|  | ||||
| class TagAdmin(CommonAdmin): | ||||
|  | ||||
|     list_display = ("name", "colour", "automatic_classification", "document_count") | ||||
|     list_display = ( | ||||
|         "name", | ||||
|         "colour", | ||||
|         "automatic_classification", | ||||
|         "document_count") | ||||
|     list_filter = ("colour",) | ||||
|     list_editable = ("colour", "automatic_classification") | ||||
|  | ||||
| @@ -238,8 +242,8 @@ class DocumentAdmin(CommonAdmin): | ||||
|  | ||||
|         extra_context = extra_context or {} | ||||
|         doc = Document.objects.get(id=object_id) | ||||
|         extra_context['download_url'] = doc.download_url | ||||
|         extra_context['file_type'] = doc.file_type | ||||
|         extra_context["download_url"] = doc.download_url | ||||
|         extra_context["file_type"] = doc.file_type | ||||
|  | ||||
|         if self.document_queue and object_id: | ||||
|             if int(object_id) in self.document_queue: | ||||
|   | ||||
							
								
								
									
										2
									
								
								src/documents/classifier.py
									
									
									
									
									
										
										
										Normal file → Executable file
									
								
							
							
						
						
									
										2
									
								
								src/documents/classifier.py
									
									
									
									
									
										
										
										Normal file → Executable file
									
								
							| @@ -87,7 +87,7 @@ class DocumentClassifier(object): | ||||
|  | ||||
|         # Step 2: vectorize data | ||||
|         logging.getLogger(__name__).info("Vectorizing data...") | ||||
|         self.data_vectorizer = CountVectorizer(analyzer='char', ngram_range=(3, 5), min_df=0.1) | ||||
|         self.data_vectorizer = CountVectorizer(analyzer="char", ngram_range=(3, 5), min_df=0.1) | ||||
|         data_vectorized = self.data_vectorizer.fit_transform(data) | ||||
|  | ||||
|         self.tags_binarizer = MultiLabelBinarizer() | ||||
|   | ||||
							
								
								
									
										0
									
								
								src/documents/filters.py
									
									
									
									
									
										
										
										Normal file → Executable file
									
								
							
							
						
						
									
										0
									
								
								src/documents/filters.py
									
									
									
									
									
										
										
										Normal file → Executable file
									
								
							
							
								
								
									
										8
									
								
								src/documents/management/commands/document_create_classifier.py
									
									
									
									
									
										
										
										Normal file → Executable file
									
								
							
							
						
						
									
										8
									
								
								src/documents/management/commands/document_create_classifier.py
									
									
									
									
									
										
										
										Normal file → Executable file
									
								
							| @@ -1,6 +1,4 @@ | ||||
| import logging | ||||
| import os.path | ||||
| import pickle | ||||
|  | ||||
| from django.core.management.base import BaseCommand | ||||
| from documents.classifier import DocumentClassifier | ||||
| @@ -19,9 +17,7 @@ class Command(Renderable, BaseCommand): | ||||
|  | ||||
|     def handle(self, *args, **options): | ||||
|         clf = DocumentClassifier() | ||||
|  | ||||
|         clf.train() | ||||
|  | ||||
|         logging.getLogger(__name__).info("Saving models to " + settings.MODEL_FILE + "...") | ||||
|  | ||||
|         logging.getLogger(__name__).info("Saving models to " + | ||||
|                                          settings.MODEL_FILE + "...") | ||||
|         clf.save_classifier() | ||||
|   | ||||
| @@ -1,40 +0,0 @@ | ||||
| from django.core.management.base import BaseCommand | ||||
|  | ||||
| from documents.classifier import preprocess_content | ||||
| from documents.models import Document | ||||
| from ...mixins import Renderable | ||||
|  | ||||
|  | ||||
| class Command(Renderable, BaseCommand): | ||||
|  | ||||
|     help = """ | ||||
|         There is no help. | ||||
|     """.replace("    ", "") | ||||
|  | ||||
|     def __init__(self, *args, **kwargs): | ||||
|         BaseCommand.__init__(self, *args, **kwargs) | ||||
|  | ||||
|     def handle(self, *args, **options): | ||||
|         with open("dataset_tags.txt", "w") as f: | ||||
|             for doc in Document.objects.exclude(tags__is_inbox_tag=True): | ||||
|                 labels = [] | ||||
|                 for tag in doc.tags.filter(automatic_classification=True): | ||||
|                     labels.append(tag.name) | ||||
|                 f.write(",".join(labels)) | ||||
|                 f.write(";") | ||||
|                 f.write(preprocess_content(doc.content)) | ||||
|                 f.write("\n") | ||||
|  | ||||
|         with open("dataset_types.txt", "w") as f: | ||||
|             for doc in Document.objects.exclude(tags__is_inbox_tag=True): | ||||
|                 f.write(doc.document_type.name if doc.document_type is not None and doc.document_type.automatic_classification else "-") | ||||
|                 f.write(";") | ||||
|                 f.write(preprocess_content(doc.content)) | ||||
|                 f.write("\n") | ||||
|  | ||||
|         with open("dataset_correspondents.txt", "w") as f: | ||||
|             for doc in Document.objects.exclude(tags__is_inbox_tag=True): | ||||
|                 f.write(doc.correspondent.name if doc.correspondent is not None and doc.correspondent.automatic_classification else "-") | ||||
|                 f.write(";") | ||||
|                 f.write(preprocess_content(doc.content)) | ||||
|                 f.write("\n") | ||||
							
								
								
									
										9
									
								
								src/documents/management/commands/document_retagger.py
									
									
									
									
									
										
										
										Normal file → Executable file
									
								
							
							
						
						
									
										9
									
								
								src/documents/management/commands/document_retagger.py
									
									
									
									
									
										
										
										Normal file → Executable file
									
								
							| @@ -11,7 +11,10 @@ from ...mixins import Renderable | ||||
| class Command(Renderable, BaseCommand): | ||||
|  | ||||
|     help = """ | ||||
|         There is no help. #TODO | ||||
|         Using the current classification model, assigns correspondents, tags | ||||
|         and document types to all documents, effectively allowing you to | ||||
|         back-tag all previously indexed documents with metadata created (or | ||||
|         modified) after their initial import. | ||||
|     """.replace("    ", "") | ||||
|  | ||||
|     def __init__(self, *args, **kwargs): | ||||
| @@ -44,7 +47,7 @@ class Command(Renderable, BaseCommand): | ||||
|  | ||||
|         self.verbosity = options["verbosity"] | ||||
|  | ||||
|         if options['inbox_only']: | ||||
|         if options["inbox_only"]: | ||||
|             documents = Document.objects.filter(tags__is_inbox_tag=True).exclude(tags__is_archived_tag=True).distinct() | ||||
|         else: | ||||
|             documents = Document.objects.all().exclude(tags__is_archived_tag=True).distinct() | ||||
| @@ -58,4 +61,4 @@ class Command(Renderable, BaseCommand): | ||||
|  | ||||
|         for document in documents: | ||||
|             logging.getLogger(__name__).info("Processing document {}".format(document.title)) | ||||
|             clf.classify_document(document, classify_document_type=options['type'], classify_tags=options['tags'], classify_correspondent=options['correspondent'], replace_tags=options['replace_tags']) | ||||
|             clf.classify_document(document, classify_document_type=options["type"], classify_tags=options["tags"], classify_correspondent=options["correspondent"], replace_tags=options["replace_tags"]) | ||||
|   | ||||
							
								
								
									
										19
									
								
								src/documents/models.py
									
									
									
									
									
										
										
										Normal file → Executable file
									
								
							
							
						
						
									
										19
									
								
								src/documents/models.py
									
									
									
									
									
										
										
										Normal file → Executable file
									
								
							| @@ -26,7 +26,11 @@ class MatchingModel(models.Model): | ||||
|     name = models.CharField(max_length=128, unique=True) | ||||
|     slug = models.SlugField(blank=True) | ||||
|  | ||||
|     automatic_classification = models.BooleanField(default=False, help_text='Automatically assign to newly added documents based on current usage in your document collection.') | ||||
|     automatic_classification = models.BooleanField( | ||||
|         default=False, | ||||
|         help_text="Automatically assign to newly added documents based on " | ||||
|                   "current usage in your document collection." | ||||
|     ) | ||||
|  | ||||
|     class Meta: | ||||
|         abstract = True | ||||
| @@ -75,11 +79,16 @@ class Tag(MatchingModel): | ||||
|  | ||||
|     is_inbox_tag = models.BooleanField( | ||||
|         default=False, | ||||
|         help_text="Marks this tag as an inbox tag: All newly consumed documents will be tagged with inbox tags.") | ||||
|         help_text="Marks this tag as an inbox tag: All newly consumed " | ||||
|                   "documents will be tagged with inbox tags." | ||||
|     ) | ||||
|  | ||||
|     is_archived_tag = models.BooleanField( | ||||
|         default=False, | ||||
|         help_text="Marks this tag as an archive tag: All documents tagged with archive tags will never be modified automatically (i.e., modifying tags by matching rules)") | ||||
|         help_text="Marks this tag as an archive tag: All documents tagged " | ||||
|                   "with archive tags will never be modified automatically " | ||||
|                   "(i.e., modifying tags by matching rules)" | ||||
|     ) | ||||
|  | ||||
|  | ||||
| class DocumentType(MatchingModel): | ||||
| @@ -170,7 +179,9 @@ class Document(models.Model): | ||||
|         null=True, | ||||
|         unique=True, | ||||
|         db_index=True, | ||||
|         help_text="The position of this document in your physical document archive.") | ||||
|         help_text="The position of this document in your physical document " | ||||
|                   "archive." | ||||
|     ) | ||||
|  | ||||
|     class Meta: | ||||
|         ordering = ("correspondent", "title") | ||||
|   | ||||
							
								
								
									
										11
									
								
								src/documents/signals/handlers.py
									
									
									
									
									
										
										
										Normal file → Executable file
									
								
							
							
						
						
									
										11
									
								
								src/documents/signals/handlers.py
									
									
									
									
									
										
										
										Normal file → Executable file
									
								
							| @@ -23,9 +23,16 @@ def classify_document(sender, document=None, logging_group=None, **kwargs): | ||||
|     global classifier | ||||
|     try: | ||||
|         classifier.reload() | ||||
|         classifier.classify_document(document, classify_correspondent=True, classify_tags=True, classify_document_type=True) | ||||
|         classifier.classify_document( | ||||
|             document, | ||||
|             classify_correspondent=True, | ||||
|             classify_tags=True, | ||||
|             classify_document_type=True | ||||
|         ) | ||||
|     except FileNotFoundError: | ||||
|         logging.getLogger(__name__).fatal("Cannot classify document, classifier model file was not found.") | ||||
|         logging.getLogger(__name__).fatal( | ||||
|             "Cannot classify document, classifier model file was not found." | ||||
|         ) | ||||
|  | ||||
|  | ||||
| def add_inbox_tags(sender, document=None, logging_group=None, **kwargs): | ||||
|   | ||||
							
								
								
									
										3
									
								
								src/documents/views.py
									
									
									
									
									
										
										
										Normal file → Executable file
									
								
							
							
						
						
									
										3
									
								
								src/documents/views.py
									
									
									
									
									
										
										
										Normal file → Executable file
									
								
							| @@ -28,7 +28,8 @@ from .serialisers import ( | ||||
|     DocumentSerializer, | ||||
|     LogSerializer, | ||||
|     TagSerializer, | ||||
|     DocumentTypeSerializer) | ||||
|     DocumentTypeSerializer | ||||
| ) | ||||
|  | ||||
|  | ||||
| class IndexView(TemplateView): | ||||
|   | ||||
| @@ -201,7 +201,10 @@ MEDIA_URL = os.getenv("PAPERLESS_MEDIA_URL", "/media/") | ||||
|  | ||||
| # Document classification models location | ||||
| MODEL_FILE = os.getenv( | ||||
|     "PAPERLESS_MODEL_FILE", os.path.join(BASE_DIR, "..", "models", "model.pickle")) | ||||
|     "PAPERLESS_MODEL_FILE", os.path.join( | ||||
|         BASE_DIR, "..", "models", "model.pickle" | ||||
|     ) | ||||
| ) | ||||
|  | ||||
|  | ||||
| # Paperless-specific stuff | ||||
|   | ||||
							
								
								
									
										3
									
								
								src/paperless/urls.py
									
									
									
									
									
										
										
										Normal file → Executable file
									
								
							
							
						
						
									
										3
									
								
								src/paperless/urls.py
									
									
									
									
									
										
										
										Normal file → Executable file
									
								
							| @@ -13,7 +13,8 @@ from documents.views import ( | ||||
|     LogViewSet, | ||||
|     PushView, | ||||
|     TagViewSet, | ||||
|     DocumentTypeViewSet) | ||||
|     DocumentTypeViewSet | ||||
| ) | ||||
| from reminders.views import ReminderViewSet | ||||
|  | ||||
| router = DefaultRouter() | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Jonas Winkler
					Jonas Winkler