mirror of
				https://github.com/paperless-ngx/paperless-ngx.git
				synced 2025-11-03 03:16:10 -06:00 
			
		
		
		
	Code style adjustments
This commit is contained in:
		@@ -18,9 +18,9 @@ def select_action(
 | 
			
		||||
    if not modeladmin.has_change_permission(request):
 | 
			
		||||
        raise PermissionDenied
 | 
			
		||||
 | 
			
		||||
    if request.POST.get('post'):
 | 
			
		||||
    if request.POST.get("post"):
 | 
			
		||||
        n = queryset.count()
 | 
			
		||||
        selected_object = modelclass.objects.get(id=request.POST.get('obj_id'))
 | 
			
		||||
        selected_object = modelclass.objects.get(id=request.POST.get("obj_id"))
 | 
			
		||||
        if n:
 | 
			
		||||
            for document in queryset:
 | 
			
		||||
                if document_action:
 | 
			
		||||
@@ -139,28 +139,52 @@ def remove_correspondent_from_selected(modeladmin, request, queryset):
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def set_document_type_on_selected(modeladmin, request, queryset):
 | 
			
		||||
    return select_action(modeladmin=modeladmin, request=request, queryset=queryset,
 | 
			
		||||
                         title="Set document type on multiple documents",
 | 
			
		||||
                         action="set_document_type_on_selected",
 | 
			
		||||
                         modelclass=DocumentType,
 | 
			
		||||
                         success_message="Successfully set document type %(selected_object)s on %(count)d %(items)s.",
 | 
			
		||||
                         queryset_action=lambda qs, document_type: qs.update(document_type=document_type))
 | 
			
		||||
    return select_action(
 | 
			
		||||
        modeladmin=modeladmin,
 | 
			
		||||
        request=request,
 | 
			
		||||
        queryset=queryset,
 | 
			
		||||
        title="Set document type on multiple documents",
 | 
			
		||||
        action="set_document_type_on_selected",
 | 
			
		||||
        modelclass=DocumentType,
 | 
			
		||||
        success_message="Successfully set document type %(selected_object)s "
 | 
			
		||||
                        "on %(count)d %(items)s.",
 | 
			
		||||
        queryset_action=lambda qs, document_type: qs.update(
 | 
			
		||||
            document_type=document_type)
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def remove_document_type_from_selected(modeladmin, request, queryset):
 | 
			
		||||
    return simple_action(modeladmin=modeladmin, request=request, queryset=queryset,
 | 
			
		||||
                         success_message="Successfully removed document type from %(count)d %(items)s.",
 | 
			
		||||
                         queryset_action=lambda qs: qs.update(document_type=None))
 | 
			
		||||
    return simple_action(
 | 
			
		||||
        modeladmin=modeladmin,
 | 
			
		||||
        request=request,
 | 
			
		||||
        queryset=queryset,
 | 
			
		||||
        success_message="Successfully removed document type from %(count)d "
 | 
			
		||||
                        "%(items)s.",
 | 
			
		||||
        queryset_action=lambda qs: qs.update(document_type=None)
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def run_document_classifier_on_selected(modeladmin, request, queryset):
 | 
			
		||||
    try:
 | 
			
		||||
        clf = DocumentClassifier.load_classifier()
 | 
			
		||||
        return simple_action(modeladmin=modeladmin, request=request, queryset=queryset,
 | 
			
		||||
                             success_message="Successfully applied document classifier to %(count)d %(items)s.",
 | 
			
		||||
                             document_action=lambda doc: clf.classify_document(doc, classify_correspondent=True, classify_tags=True, classify_document_type=True))
 | 
			
		||||
        return simple_action(
 | 
			
		||||
            modeladmin=modeladmin,
 | 
			
		||||
            request=request,
 | 
			
		||||
            queryset=queryset,
 | 
			
		||||
            success_message="Successfully applied document classifier to "
 | 
			
		||||
                            "%(count)d %(items)s.",
 | 
			
		||||
            document_action=lambda doc: clf.classify_document(
 | 
			
		||||
                doc,
 | 
			
		||||
                classify_correspondent=True,
 | 
			
		||||
                classify_tags=True,
 | 
			
		||||
                classify_document_type=True)
 | 
			
		||||
        )
 | 
			
		||||
    except FileNotFoundError:
 | 
			
		||||
        modeladmin.message_user(request, "Classifier model file not found.", messages.ERROR)
 | 
			
		||||
        modeladmin.message_user(
 | 
			
		||||
            request,
 | 
			
		||||
            "Classifier model file not found.",
 | 
			
		||||
            messages.ERROR
 | 
			
		||||
        )
 | 
			
		||||
        return None
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@@ -171,7 +195,10 @@ set_correspondent_on_selected.short_description = \
 | 
			
		||||
    "Set correspondent on selected documents"
 | 
			
		||||
remove_correspondent_from_selected.short_description = \
 | 
			
		||||
    "Remove correspondent from selected documents"
 | 
			
		||||
set_document_type_on_selected.short_description = "Set document type on selected documents"
 | 
			
		||||
remove_document_type_from_selected.short_description = "Remove document type from selected documents"
 | 
			
		||||
run_document_classifier_on_selected.short_description = "Run document classifier on selected"
 | 
			
		||||
set_document_type_on_selected.short_description = \
 | 
			
		||||
    "Set document type on selected documents"
 | 
			
		||||
remove_document_type_from_selected.short_description = \
 | 
			
		||||
    "Remove document type from selected documents"
 | 
			
		||||
run_document_classifier_on_selected.short_description = \
 | 
			
		||||
    "Run document classifier on selected"
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -124,7 +124,7 @@ class CorrespondentAdmin(CommonAdmin):
 | 
			
		||||
        "document_count",
 | 
			
		||||
        "last_correspondence"
 | 
			
		||||
    )
 | 
			
		||||
    list_editable = ("automatic_classification")
 | 
			
		||||
    list_editable = ("automatic_classification",)
 | 
			
		||||
 | 
			
		||||
    def get_queryset(self, request):
 | 
			
		||||
        qs = super(CorrespondentAdmin, self).get_queryset(request)
 | 
			
		||||
@@ -145,7 +145,11 @@ class CorrespondentAdmin(CommonAdmin):
 | 
			
		||||
 | 
			
		||||
class TagAdmin(CommonAdmin):
 | 
			
		||||
 | 
			
		||||
    list_display = ("name", "colour", "automatic_classification", "document_count")
 | 
			
		||||
    list_display = (
 | 
			
		||||
        "name",
 | 
			
		||||
        "colour",
 | 
			
		||||
        "automatic_classification",
 | 
			
		||||
        "document_count")
 | 
			
		||||
    list_filter = ("colour",)
 | 
			
		||||
    list_editable = ("colour", "automatic_classification")
 | 
			
		||||
 | 
			
		||||
@@ -238,8 +242,8 @@ class DocumentAdmin(CommonAdmin):
 | 
			
		||||
 | 
			
		||||
        extra_context = extra_context or {}
 | 
			
		||||
        doc = Document.objects.get(id=object_id)
 | 
			
		||||
        extra_context['download_url'] = doc.download_url
 | 
			
		||||
        extra_context['file_type'] = doc.file_type
 | 
			
		||||
        extra_context["download_url"] = doc.download_url
 | 
			
		||||
        extra_context["file_type"] = doc.file_type
 | 
			
		||||
 | 
			
		||||
        if self.document_queue and object_id:
 | 
			
		||||
            if int(object_id) in self.document_queue:
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										2
									
								
								src/documents/classifier.py
									
									
									
									
									
										
										
										Normal file → Executable file
									
								
							
							
						
						
									
										2
									
								
								src/documents/classifier.py
									
									
									
									
									
										
										
										Normal file → Executable file
									
								
							@@ -87,7 +87,7 @@ class DocumentClassifier(object):
 | 
			
		||||
 | 
			
		||||
        # Step 2: vectorize data
 | 
			
		||||
        logging.getLogger(__name__).info("Vectorizing data...")
 | 
			
		||||
        self.data_vectorizer = CountVectorizer(analyzer='char', ngram_range=(3, 5), min_df=0.1)
 | 
			
		||||
        self.data_vectorizer = CountVectorizer(analyzer="char", ngram_range=(3, 5), min_df=0.1)
 | 
			
		||||
        data_vectorized = self.data_vectorizer.fit_transform(data)
 | 
			
		||||
 | 
			
		||||
        self.tags_binarizer = MultiLabelBinarizer()
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										0
									
								
								src/documents/filters.py
									
									
									
									
									
										
										
										Normal file → Executable file
									
								
							
							
						
						
									
										0
									
								
								src/documents/filters.py
									
									
									
									
									
										
										
										Normal file → Executable file
									
								
							
							
								
								
									
										8
									
								
								src/documents/management/commands/document_create_classifier.py
									
									
									
									
									
										
										
										Normal file → Executable file
									
								
							
							
						
						
									
										8
									
								
								src/documents/management/commands/document_create_classifier.py
									
									
									
									
									
										
										
										Normal file → Executable file
									
								
							@@ -1,6 +1,4 @@
 | 
			
		||||
import logging
 | 
			
		||||
import os.path
 | 
			
		||||
import pickle
 | 
			
		||||
 | 
			
		||||
from django.core.management.base import BaseCommand
 | 
			
		||||
from documents.classifier import DocumentClassifier
 | 
			
		||||
@@ -19,9 +17,7 @@ class Command(Renderable, BaseCommand):
 | 
			
		||||
 | 
			
		||||
    def handle(self, *args, **options):
 | 
			
		||||
        clf = DocumentClassifier()
 | 
			
		||||
 | 
			
		||||
        clf.train()
 | 
			
		||||
 | 
			
		||||
        logging.getLogger(__name__).info("Saving models to " + settings.MODEL_FILE + "...")
 | 
			
		||||
 | 
			
		||||
        logging.getLogger(__name__).info("Saving models to " +
 | 
			
		||||
                                         settings.MODEL_FILE + "...")
 | 
			
		||||
        clf.save_classifier()
 | 
			
		||||
 
 | 
			
		||||
@@ -1,40 +0,0 @@
 | 
			
		||||
from django.core.management.base import BaseCommand
 | 
			
		||||
 | 
			
		||||
from documents.classifier import preprocess_content
 | 
			
		||||
from documents.models import Document
 | 
			
		||||
from ...mixins import Renderable
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class Command(Renderable, BaseCommand):
 | 
			
		||||
 | 
			
		||||
    help = """
 | 
			
		||||
        There is no help.
 | 
			
		||||
    """.replace("    ", "")
 | 
			
		||||
 | 
			
		||||
    def __init__(self, *args, **kwargs):
 | 
			
		||||
        BaseCommand.__init__(self, *args, **kwargs)
 | 
			
		||||
 | 
			
		||||
    def handle(self, *args, **options):
 | 
			
		||||
        with open("dataset_tags.txt", "w") as f:
 | 
			
		||||
            for doc in Document.objects.exclude(tags__is_inbox_tag=True):
 | 
			
		||||
                labels = []
 | 
			
		||||
                for tag in doc.tags.filter(automatic_classification=True):
 | 
			
		||||
                    labels.append(tag.name)
 | 
			
		||||
                f.write(",".join(labels))
 | 
			
		||||
                f.write(";")
 | 
			
		||||
                f.write(preprocess_content(doc.content))
 | 
			
		||||
                f.write("\n")
 | 
			
		||||
 | 
			
		||||
        with open("dataset_types.txt", "w") as f:
 | 
			
		||||
            for doc in Document.objects.exclude(tags__is_inbox_tag=True):
 | 
			
		||||
                f.write(doc.document_type.name if doc.document_type is not None and doc.document_type.automatic_classification else "-")
 | 
			
		||||
                f.write(";")
 | 
			
		||||
                f.write(preprocess_content(doc.content))
 | 
			
		||||
                f.write("\n")
 | 
			
		||||
 | 
			
		||||
        with open("dataset_correspondents.txt", "w") as f:
 | 
			
		||||
            for doc in Document.objects.exclude(tags__is_inbox_tag=True):
 | 
			
		||||
                f.write(doc.correspondent.name if doc.correspondent is not None and doc.correspondent.automatic_classification else "-")
 | 
			
		||||
                f.write(";")
 | 
			
		||||
                f.write(preprocess_content(doc.content))
 | 
			
		||||
                f.write("\n")
 | 
			
		||||
							
								
								
									
										9
									
								
								src/documents/management/commands/document_retagger.py
									
									
									
									
									
										
										
										Normal file → Executable file
									
								
							
							
						
						
									
										9
									
								
								src/documents/management/commands/document_retagger.py
									
									
									
									
									
										
										
										Normal file → Executable file
									
								
							@@ -11,7 +11,10 @@ from ...mixins import Renderable
 | 
			
		||||
class Command(Renderable, BaseCommand):
 | 
			
		||||
 | 
			
		||||
    help = """
 | 
			
		||||
        There is no help. #TODO
 | 
			
		||||
        Using the current classification model, assigns correspondents, tags
 | 
			
		||||
        and document types to all documents, effectively allowing you to
 | 
			
		||||
        back-tag all previously indexed documents with metadata created (or
 | 
			
		||||
        modified) after their initial import.
 | 
			
		||||
    """.replace("    ", "")
 | 
			
		||||
 | 
			
		||||
    def __init__(self, *args, **kwargs):
 | 
			
		||||
@@ -44,7 +47,7 @@ class Command(Renderable, BaseCommand):
 | 
			
		||||
 | 
			
		||||
        self.verbosity = options["verbosity"]
 | 
			
		||||
 | 
			
		||||
        if options['inbox_only']:
 | 
			
		||||
        if options["inbox_only"]:
 | 
			
		||||
            documents = Document.objects.filter(tags__is_inbox_tag=True).exclude(tags__is_archived_tag=True).distinct()
 | 
			
		||||
        else:
 | 
			
		||||
            documents = Document.objects.all().exclude(tags__is_archived_tag=True).distinct()
 | 
			
		||||
@@ -58,4 +61,4 @@ class Command(Renderable, BaseCommand):
 | 
			
		||||
 | 
			
		||||
        for document in documents:
 | 
			
		||||
            logging.getLogger(__name__).info("Processing document {}".format(document.title))
 | 
			
		||||
            clf.classify_document(document, classify_document_type=options['type'], classify_tags=options['tags'], classify_correspondent=options['correspondent'], replace_tags=options['replace_tags'])
 | 
			
		||||
            clf.classify_document(document, classify_document_type=options["type"], classify_tags=options["tags"], classify_correspondent=options["correspondent"], replace_tags=options["replace_tags"])
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										19
									
								
								src/documents/models.py
									
									
									
									
									
										
										
										Normal file → Executable file
									
								
							
							
						
						
									
										19
									
								
								src/documents/models.py
									
									
									
									
									
										
										
										Normal file → Executable file
									
								
							@@ -26,7 +26,11 @@ class MatchingModel(models.Model):
 | 
			
		||||
    name = models.CharField(max_length=128, unique=True)
 | 
			
		||||
    slug = models.SlugField(blank=True)
 | 
			
		||||
 | 
			
		||||
    automatic_classification = models.BooleanField(default=False, help_text='Automatically assign to newly added documents based on current usage in your document collection.')
 | 
			
		||||
    automatic_classification = models.BooleanField(
 | 
			
		||||
        default=False,
 | 
			
		||||
        help_text="Automatically assign to newly added documents based on "
 | 
			
		||||
                  "current usage in your document collection."
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
    class Meta:
 | 
			
		||||
        abstract = True
 | 
			
		||||
@@ -75,11 +79,16 @@ class Tag(MatchingModel):
 | 
			
		||||
 | 
			
		||||
    is_inbox_tag = models.BooleanField(
 | 
			
		||||
        default=False,
 | 
			
		||||
        help_text="Marks this tag as an inbox tag: All newly consumed documents will be tagged with inbox tags.")
 | 
			
		||||
        help_text="Marks this tag as an inbox tag: All newly consumed "
 | 
			
		||||
                  "documents will be tagged with inbox tags."
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
    is_archived_tag = models.BooleanField(
 | 
			
		||||
        default=False,
 | 
			
		||||
        help_text="Marks this tag as an archive tag: All documents tagged with archive tags will never be modified automatically (i.e., modifying tags by matching rules)")
 | 
			
		||||
        help_text="Marks this tag as an archive tag: All documents tagged "
 | 
			
		||||
                  "with archive tags will never be modified automatically "
 | 
			
		||||
                  "(i.e., modifying tags by matching rules)"
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class DocumentType(MatchingModel):
 | 
			
		||||
@@ -170,7 +179,9 @@ class Document(models.Model):
 | 
			
		||||
        null=True,
 | 
			
		||||
        unique=True,
 | 
			
		||||
        db_index=True,
 | 
			
		||||
        help_text="The position of this document in your physical document archive.")
 | 
			
		||||
        help_text="The position of this document in your physical document "
 | 
			
		||||
                  "archive."
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
    class Meta:
 | 
			
		||||
        ordering = ("correspondent", "title")
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										11
									
								
								src/documents/signals/handlers.py
									
									
									
									
									
										
										
										Normal file → Executable file
									
								
							
							
						
						
									
										11
									
								
								src/documents/signals/handlers.py
									
									
									
									
									
										
										
										Normal file → Executable file
									
								
							@@ -23,9 +23,16 @@ def classify_document(sender, document=None, logging_group=None, **kwargs):
 | 
			
		||||
    global classifier
 | 
			
		||||
    try:
 | 
			
		||||
        classifier.reload()
 | 
			
		||||
        classifier.classify_document(document, classify_correspondent=True, classify_tags=True, classify_document_type=True)
 | 
			
		||||
        classifier.classify_document(
 | 
			
		||||
            document,
 | 
			
		||||
            classify_correspondent=True,
 | 
			
		||||
            classify_tags=True,
 | 
			
		||||
            classify_document_type=True
 | 
			
		||||
        )
 | 
			
		||||
    except FileNotFoundError:
 | 
			
		||||
        logging.getLogger(__name__).fatal("Cannot classify document, classifier model file was not found.")
 | 
			
		||||
        logging.getLogger(__name__).fatal(
 | 
			
		||||
            "Cannot classify document, classifier model file was not found."
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def add_inbox_tags(sender, document=None, logging_group=None, **kwargs):
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										3
									
								
								src/documents/views.py
									
									
									
									
									
										
										
										Normal file → Executable file
									
								
							
							
						
						
									
										3
									
								
								src/documents/views.py
									
									
									
									
									
										
										
										Normal file → Executable file
									
								
							@@ -28,7 +28,8 @@ from .serialisers import (
 | 
			
		||||
    DocumentSerializer,
 | 
			
		||||
    LogSerializer,
 | 
			
		||||
    TagSerializer,
 | 
			
		||||
    DocumentTypeSerializer)
 | 
			
		||||
    DocumentTypeSerializer
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class IndexView(TemplateView):
 | 
			
		||||
 
 | 
			
		||||
@@ -201,7 +201,10 @@ MEDIA_URL = os.getenv("PAPERLESS_MEDIA_URL", "/media/")
 | 
			
		||||
 | 
			
		||||
# Document classification models location
 | 
			
		||||
MODEL_FILE = os.getenv(
 | 
			
		||||
    "PAPERLESS_MODEL_FILE", os.path.join(BASE_DIR, "..", "models", "model.pickle"))
 | 
			
		||||
    "PAPERLESS_MODEL_FILE", os.path.join(
 | 
			
		||||
        BASE_DIR, "..", "models", "model.pickle"
 | 
			
		||||
    )
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# Paperless-specific stuff
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										3
									
								
								src/paperless/urls.py
									
									
									
									
									
										
										
										Normal file → Executable file
									
								
							
							
						
						
									
										3
									
								
								src/paperless/urls.py
									
									
									
									
									
										
										
										Normal file → Executable file
									
								
							@@ -13,7 +13,8 @@ from documents.views import (
 | 
			
		||||
    LogViewSet,
 | 
			
		||||
    PushView,
 | 
			
		||||
    TagViewSet,
 | 
			
		||||
    DocumentTypeViewSet)
 | 
			
		||||
    DocumentTypeViewSet
 | 
			
		||||
)
 | 
			
		||||
from reminders.views import ReminderViewSet
 | 
			
		||||
 | 
			
		||||
router = DefaultRouter()
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user