Code style adjustments

2026-02-01 23:19:00 -06:00 · 2018-09-25 16:09:33 +02:00
parent ddff8a0450
commit efc7bf1d23
12 changed files with 94 additions and 81 deletions
--- a/src/documents/actions.py
+++ b/src/documents/actions.py
@@ -18,9 +18,9 @@ def select_action(
    if not modeladmin.has_change_permission(request):
        raise PermissionDenied

-    if request.POST.get('post'):
+    if request.POST.get("post"):
        n = queryset.count()
-        selected_object = modelclass.objects.get(id=request.POST.get('obj_id'))
+        selected_object = modelclass.objects.get(id=request.POST.get("obj_id"))
        if n:
            for document in queryset:
                if document_action:
@@ -139,28 +139,52 @@ def remove_correspondent_from_selected(modeladmin, request, queryset):


 def set_document_type_on_selected(modeladmin, request, queryset):
-    return select_action(modeladmin=modeladmin, request=request, queryset=queryset,
+    return select_action(
+        modeladmin=modeladmin,
+        request=request,
+        queryset=queryset,
        title="Set document type on multiple documents",
        action="set_document_type_on_selected",
        modelclass=DocumentType,
-                         success_message="Successfully set document type %(selected_object)s on %(count)d %(items)s.",
-                         queryset_action=lambda qs, document_type: qs.update(document_type=document_type))
+        success_message="Successfully set document type %(selected_object)s "
+                        "on %(count)d %(items)s.",
+        queryset_action=lambda qs, document_type: qs.update(
+            document_type=document_type)
+    )


 def remove_document_type_from_selected(modeladmin, request, queryset):
-    return simple_action(modeladmin=modeladmin, request=request, queryset=queryset,
-                         success_message="Successfully removed document type from %(count)d %(items)s.",
-                         queryset_action=lambda qs: qs.update(document_type=None))
+    return simple_action(
+        modeladmin=modeladmin,
+        request=request,
+        queryset=queryset,
+        success_message="Successfully removed document type from %(count)d "
+                        "%(items)s.",
+        queryset_action=lambda qs: qs.update(document_type=None)
+    )


 def run_document_classifier_on_selected(modeladmin, request, queryset):
    try:
        clf = DocumentClassifier.load_classifier()
-        return simple_action(modeladmin=modeladmin, request=request, queryset=queryset,
-                             success_message="Successfully applied document classifier to %(count)d %(items)s.",
-                             document_action=lambda doc: clf.classify_document(doc, classify_correspondent=True, classify_tags=True, classify_document_type=True))
+        return simple_action(
+            modeladmin=modeladmin,
+            request=request,
+            queryset=queryset,
+            success_message="Successfully applied document classifier to "
+                            "%(count)d %(items)s.",
+            document_action=lambda doc: clf.classify_document(
+                doc,
+                classify_correspondent=True,
+                classify_tags=True,
+                classify_document_type=True)
+        )
    except FileNotFoundError:
-        modeladmin.message_user(request, "Classifier model file not found.", messages.ERROR)
+        modeladmin.message_user(
+            request,
+            "Classifier model file not found.",
+            messages.ERROR
+        )
        return None


@@ -171,7 +195,10 @@ set_correspondent_on_selected.short_description = \
    "Set correspondent on selected documents"
 remove_correspondent_from_selected.short_description = \
    "Remove correspondent from selected documents"
-set_document_type_on_selected.short_description = "Set document type on selected documents"
-remove_document_type_from_selected.short_description = "Remove document type from selected documents"
-run_document_classifier_on_selected.short_description = "Run document classifier on selected"
+set_document_type_on_selected.short_description = \
+    "Set document type on selected documents"
+remove_document_type_from_selected.short_description = \
+    "Remove document type from selected documents"
+run_document_classifier_on_selected.short_description = \
+    "Run document classifier on selected"

--- a/src/documents/admin.py
+++ b/src/documents/admin.py
@@ -124,7 +124,7 @@ class CorrespondentAdmin(CommonAdmin):
        "document_count",
        "last_correspondence"
    )
-    list_editable = ("automatic_classification")
+    list_editable = ("automatic_classification",)

    def get_queryset(self, request):
        qs = super(CorrespondentAdmin, self).get_queryset(request)
@@ -145,7 +145,11 @@ class CorrespondentAdmin(CommonAdmin):

 class TagAdmin(CommonAdmin):

-    list_display = ("name", "colour", "automatic_classification", "document_count")
+    list_display = (
+        "name",
+        "colour",
+        "automatic_classification",
+        "document_count")
    list_filter = ("colour",)
    list_editable = ("colour", "automatic_classification")

@@ -238,8 +242,8 @@ class DocumentAdmin(CommonAdmin):

        extra_context = extra_context or {}
        doc = Document.objects.get(id=object_id)
-        extra_context['download_url'] = doc.download_url
-        extra_context['file_type'] = doc.file_type
+        extra_context["download_url"] = doc.download_url
+        extra_context["file_type"] = doc.file_type

        if self.document_queue and object_id:
            if int(object_id) in self.document_queue:
--- a/src/documents/classifier.py
+++ b/src/documents/classifier.py
@@ -87,7 +87,7 @@ class DocumentClassifier(object):

        # Step 2: vectorize data
        logging.getLogger(__name__).info("Vectorizing data...")
-        self.data_vectorizer = CountVectorizer(analyzer='char', ngram_range=(3, 5), min_df=0.1)
+        self.data_vectorizer = CountVectorizer(analyzer="char", ngram_range=(3, 5), min_df=0.1)
        data_vectorized = self.data_vectorizer.fit_transform(data)

        self.tags_binarizer = MultiLabelBinarizer()
--- a/src/documents/filters.py
+++ b/src/documents/filters.py
--- a/src/documents/management/commands/document_create_classifier.py
+++ b/src/documents/management/commands/document_create_classifier.py
@@ -1,6 +1,4 @@
 import logging
-import os.path
-import pickle

 from django.core.management.base import BaseCommand
 from documents.classifier import DocumentClassifier
@@ -19,9 +17,7 @@ class Command(Renderable, BaseCommand):

    def handle(self, *args, **options):
        clf = DocumentClassifier()
-
        clf.train()
-
-        logging.getLogger(__name__).info("Saving models to " + settings.MODEL_FILE + "...")
-
+        logging.getLogger(__name__).info("Saving models to " +
+                                         settings.MODEL_FILE + "...")
        clf.save_classifier()
--- a/src/documents/management/commands/document_create_dataset.py
+++ b/src/documents/management/commands/document_create_dataset.py
@@ -1,40 +0,0 @@
-from django.core.management.base import BaseCommand
-
-from documents.classifier import preprocess_content
-from documents.models import Document
-from ...mixins import Renderable
-
-
-class Command(Renderable, BaseCommand):
-
-    help = """
-        There is no help.
-    """.replace("    ", "")
-
-    def __init__(self, *args, **kwargs):
-        BaseCommand.__init__(self, *args, **kwargs)
-
-    def handle(self, *args, **options):
-        with open("dataset_tags.txt", "w") as f:
-            for doc in Document.objects.exclude(tags__is_inbox_tag=True):
-                labels = []
-                for tag in doc.tags.filter(automatic_classification=True):
-                    labels.append(tag.name)
-                f.write(",".join(labels))
-                f.write(";")
-                f.write(preprocess_content(doc.content))
-                f.write("\n")
-
-        with open("dataset_types.txt", "w") as f:
-            for doc in Document.objects.exclude(tags__is_inbox_tag=True):
-                f.write(doc.document_type.name if doc.document_type is not None and doc.document_type.automatic_classification else "-")
-                f.write(";")
-                f.write(preprocess_content(doc.content))
-                f.write("\n")
-
-        with open("dataset_correspondents.txt", "w") as f:
-            for doc in Document.objects.exclude(tags__is_inbox_tag=True):
-                f.write(doc.correspondent.name if doc.correspondent is not None and doc.correspondent.automatic_classification else "-")
-                f.write(";")
-                f.write(preprocess_content(doc.content))
-                f.write("\n")
--- a/src/documents/management/commands/document_retagger.py
+++ b/src/documents/management/commands/document_retagger.py
@@ -11,7 +11,10 @@ from ...mixins import Renderable
 class Command(Renderable, BaseCommand):

    help = """
-        There is no help. #TODO
+        Using the current classification model, assigns correspondents, tags
+        and document types to all documents, effectively allowing you to
+        back-tag all previously indexed documents with metadata created (or
+        modified) after their initial import.
    """.replace("    ", "")

    def __init__(self, *args, **kwargs):
@@ -44,7 +47,7 @@ class Command(Renderable, BaseCommand):

        self.verbosity = options["verbosity"]

-        if options['inbox_only']:
+        if options["inbox_only"]:
            documents = Document.objects.filter(tags__is_inbox_tag=True).exclude(tags__is_archived_tag=True).distinct()
        else:
            documents = Document.objects.all().exclude(tags__is_archived_tag=True).distinct()
@@ -58,4 +61,4 @@ class Command(Renderable, BaseCommand):

        for document in documents:
            logging.getLogger(__name__).info("Processing document {}".format(document.title))
-            clf.classify_document(document, classify_document_type=options['type'], classify_tags=options['tags'], classify_correspondent=options['correspondent'], replace_tags=options['replace_tags'])
+            clf.classify_document(document, classify_document_type=options["type"], classify_tags=options["tags"], classify_correspondent=options["correspondent"], replace_tags=options["replace_tags"])
--- a/src/documents/models.py
+++ b/src/documents/models.py
@@ -26,7 +26,11 @@ class MatchingModel(models.Model):
    name = models.CharField(max_length=128, unique=True)
    slug = models.SlugField(blank=True)

-    automatic_classification = models.BooleanField(default=False, help_text='Automatically assign to newly added documents based on current usage in your document collection.')
+    automatic_classification = models.BooleanField(
+        default=False,
+        help_text="Automatically assign to newly added documents based on "
+                  "current usage in your document collection."
+    )

    class Meta:
        abstract = True
@@ -75,11 +79,16 @@ class Tag(MatchingModel):

    is_inbox_tag = models.BooleanField(
        default=False,
-        help_text="Marks this tag as an inbox tag: All newly consumed documents will be tagged with inbox tags.")
+        help_text="Marks this tag as an inbox tag: All newly consumed "
+                  "documents will be tagged with inbox tags."
+    )

    is_archived_tag = models.BooleanField(
        default=False,
-        help_text="Marks this tag as an archive tag: All documents tagged with archive tags will never be modified automatically (i.e., modifying tags by matching rules)")
+        help_text="Marks this tag as an archive tag: All documents tagged "
+                  "with archive tags will never be modified automatically "
+                  "(i.e., modifying tags by matching rules)"
+    )


 class DocumentType(MatchingModel):
@@ -170,7 +179,9 @@ class Document(models.Model):
        null=True,
        unique=True,
        db_index=True,
-        help_text="The position of this document in your physical document archive.")
+        help_text="The position of this document in your physical document "
+                  "archive."
+    )

    class Meta:
        ordering = ("correspondent", "title")
--- a/src/documents/signals/handlers.py
+++ b/src/documents/signals/handlers.py
@@ -23,9 +23,16 @@ def classify_document(sender, document=None, logging_group=None, **kwargs):
    global classifier
    try:
        classifier.reload()
-        classifier.classify_document(document, classify_correspondent=True, classify_tags=True, classify_document_type=True)
+        classifier.classify_document(
+            document,
+            classify_correspondent=True,
+            classify_tags=True,
+            classify_document_type=True
+        )
    except FileNotFoundError:
-        logging.getLogger(__name__).fatal("Cannot classify document, classifier model file was not found.")
+        logging.getLogger(__name__).fatal(
+            "Cannot classify document, classifier model file was not found."
+        )


 def add_inbox_tags(sender, document=None, logging_group=None, **kwargs):
--- a/src/documents/views.py
+++ b/src/documents/views.py
@@ -28,7 +28,8 @@ from .serialisers import (
    DocumentSerializer,
    LogSerializer,
    TagSerializer,
-    DocumentTypeSerializer)
+    DocumentTypeSerializer
+)


 class IndexView(TemplateView):
--- a/src/paperless/settings.py
+++ b/src/paperless/settings.py
@@ -201,7 +201,10 @@ MEDIA_URL = os.getenv("PAPERLESS_MEDIA_URL", "/media/")

 # Document classification models location
 MODEL_FILE = os.getenv(
-    "PAPERLESS_MODEL_FILE", os.path.join(BASE_DIR, "..", "models", "model.pickle"))
+    "PAPERLESS_MODEL_FILE", os.path.join(
+        BASE_DIR, "..", "models", "model.pickle"
+    )
+)


 # Paperless-specific stuff
--- a/src/paperless/urls.py
+++ b/src/paperless/urls.py
@@ -13,7 +13,8 @@ from documents.views import (
    LogViewSet,
    PushView,
    TagViewSet,
-    DocumentTypeViewSet)
+    DocumentTypeViewSet
+)
 from reminders.views import ReminderViewSet

 router = DefaultRouter()