Code style adjustments

This commit is contained in:
Jonas Winkler 2018-09-25 16:09:33 +02:00
parent 779ea6a015
commit 60618381f8
12 changed files with 94 additions and 81 deletions

View File

@ -18,9 +18,9 @@ def select_action(
if not modeladmin.has_change_permission(request):
raise PermissionDenied
if request.POST.get('post'):
if request.POST.get("post"):
n = queryset.count()
selected_object = modelclass.objects.get(id=request.POST.get('obj_id'))
selected_object = modelclass.objects.get(id=request.POST.get("obj_id"))
if n:
for document in queryset:
if document_action:
@ -139,28 +139,52 @@ def remove_correspondent_from_selected(modeladmin, request, queryset):
def set_document_type_on_selected(modeladmin, request, queryset):
return select_action(modeladmin=modeladmin, request=request, queryset=queryset,
title="Set document type on multiple documents",
action="set_document_type_on_selected",
modelclass=DocumentType,
success_message="Successfully set document type %(selected_object)s on %(count)d %(items)s.",
queryset_action=lambda qs, document_type: qs.update(document_type=document_type))
return select_action(
modeladmin=modeladmin,
request=request,
queryset=queryset,
title="Set document type on multiple documents",
action="set_document_type_on_selected",
modelclass=DocumentType,
success_message="Successfully set document type %(selected_object)s "
"on %(count)d %(items)s.",
queryset_action=lambda qs, document_type: qs.update(
document_type=document_type)
)
def remove_document_type_from_selected(modeladmin, request, queryset):
return simple_action(modeladmin=modeladmin, request=request, queryset=queryset,
success_message="Successfully removed document type from %(count)d %(items)s.",
queryset_action=lambda qs: qs.update(document_type=None))
return simple_action(
modeladmin=modeladmin,
request=request,
queryset=queryset,
success_message="Successfully removed document type from %(count)d "
"%(items)s.",
queryset_action=lambda qs: qs.update(document_type=None)
)
def run_document_classifier_on_selected(modeladmin, request, queryset):
try:
clf = DocumentClassifier.load_classifier()
return simple_action(modeladmin=modeladmin, request=request, queryset=queryset,
success_message="Successfully applied document classifier to %(count)d %(items)s.",
document_action=lambda doc: clf.classify_document(doc, classify_correspondent=True, classify_tags=True, classify_document_type=True))
return simple_action(
modeladmin=modeladmin,
request=request,
queryset=queryset,
success_message="Successfully applied document classifier to "
"%(count)d %(items)s.",
document_action=lambda doc: clf.classify_document(
doc,
classify_correspondent=True,
classify_tags=True,
classify_document_type=True)
)
except FileNotFoundError:
modeladmin.message_user(request, "Classifier model file not found.", messages.ERROR)
modeladmin.message_user(
request,
"Classifier model file not found.",
messages.ERROR
)
return None
@ -171,7 +195,10 @@ set_correspondent_on_selected.short_description = \
"Set correspondent on selected documents"
remove_correspondent_from_selected.short_description = \
"Remove correspondent from selected documents"
set_document_type_on_selected.short_description = "Set document type on selected documents"
remove_document_type_from_selected.short_description = "Remove document type from selected documents"
run_document_classifier_on_selected.short_description = "Run document classifier on selected"
set_document_type_on_selected.short_description = \
"Set document type on selected documents"
remove_document_type_from_selected.short_description = \
"Remove document type from selected documents"
run_document_classifier_on_selected.short_description = \
"Run document classifier on selected"

View File

@ -124,7 +124,7 @@ class CorrespondentAdmin(CommonAdmin):
"document_count",
"last_correspondence"
)
list_editable = ("automatic_classification")
list_editable = ("automatic_classification",)
def get_queryset(self, request):
qs = super(CorrespondentAdmin, self).get_queryset(request)
@ -145,7 +145,11 @@ class CorrespondentAdmin(CommonAdmin):
class TagAdmin(CommonAdmin):
list_display = ("name", "colour", "automatic_classification", "document_count")
list_display = (
"name",
"colour",
"automatic_classification",
"document_count")
list_filter = ("colour",)
list_editable = ("colour", "automatic_classification")
@ -238,8 +242,8 @@ class DocumentAdmin(CommonAdmin):
extra_context = extra_context or {}
doc = Document.objects.get(id=object_id)
extra_context['download_url'] = doc.download_url
extra_context['file_type'] = doc.file_type
extra_context["download_url"] = doc.download_url
extra_context["file_type"] = doc.file_type
if self.document_queue and object_id:
if int(object_id) in self.document_queue:

2
src/documents/classifier.py Normal file → Executable file
View File

@ -87,7 +87,7 @@ class DocumentClassifier(object):
# Step 2: vectorize data
logging.getLogger(__name__).info("Vectorizing data...")
self.data_vectorizer = CountVectorizer(analyzer='char', ngram_range=(3, 5), min_df=0.1)
self.data_vectorizer = CountVectorizer(analyzer="char", ngram_range=(3, 5), min_df=0.1)
data_vectorized = self.data_vectorizer.fit_transform(data)
self.tags_binarizer = MultiLabelBinarizer()

0
src/documents/filters.py Normal file → Executable file
View File

View File

@ -1,6 +1,4 @@
import logging
import os.path
import pickle
from django.core.management.base import BaseCommand
from documents.classifier import DocumentClassifier
@ -19,9 +17,7 @@ class Command(Renderable, BaseCommand):
def handle(self, *args, **options):
clf = DocumentClassifier()
clf.train()
logging.getLogger(__name__).info("Saving models to " + settings.MODEL_FILE + "...")
logging.getLogger(__name__).info("Saving models to " +
settings.MODEL_FILE + "...")
clf.save_classifier()

View File

@ -1,40 +0,0 @@
from django.core.management.base import BaseCommand
from documents.classifier import preprocess_content
from documents.models import Document
from ...mixins import Renderable
class Command(Renderable, BaseCommand):
help = """
There is no help.
""".replace(" ", "")
def __init__(self, *args, **kwargs):
BaseCommand.__init__(self, *args, **kwargs)
def handle(self, *args, **options):
with open("dataset_tags.txt", "w") as f:
for doc in Document.objects.exclude(tags__is_inbox_tag=True):
labels = []
for tag in doc.tags.filter(automatic_classification=True):
labels.append(tag.name)
f.write(",".join(labels))
f.write(";")
f.write(preprocess_content(doc.content))
f.write("\n")
with open("dataset_types.txt", "w") as f:
for doc in Document.objects.exclude(tags__is_inbox_tag=True):
f.write(doc.document_type.name if doc.document_type is not None and doc.document_type.automatic_classification else "-")
f.write(";")
f.write(preprocess_content(doc.content))
f.write("\n")
with open("dataset_correspondents.txt", "w") as f:
for doc in Document.objects.exclude(tags__is_inbox_tag=True):
f.write(doc.correspondent.name if doc.correspondent is not None and doc.correspondent.automatic_classification else "-")
f.write(";")
f.write(preprocess_content(doc.content))
f.write("\n")

9
src/documents/management/commands/document_retagger.py Normal file → Executable file
View File

@ -11,7 +11,10 @@ from ...mixins import Renderable
class Command(Renderable, BaseCommand):
help = """
There is no help. #TODO
Using the current classification model, assigns correspondents, tags
and document types to all documents, effectively allowing you to
back-tag all previously indexed documents with metadata created (or
modified) after their initial import.
""".replace(" ", "")
def __init__(self, *args, **kwargs):
@ -44,7 +47,7 @@ class Command(Renderable, BaseCommand):
self.verbosity = options["verbosity"]
if options['inbox_only']:
if options["inbox_only"]:
documents = Document.objects.filter(tags__is_inbox_tag=True).exclude(tags__is_archived_tag=True).distinct()
else:
documents = Document.objects.all().exclude(tags__is_archived_tag=True).distinct()
@ -58,4 +61,4 @@ class Command(Renderable, BaseCommand):
for document in documents:
logging.getLogger(__name__).info("Processing document {}".format(document.title))
clf.classify_document(document, classify_document_type=options['type'], classify_tags=options['tags'], classify_correspondent=options['correspondent'], replace_tags=options['replace_tags'])
clf.classify_document(document, classify_document_type=options["type"], classify_tags=options["tags"], classify_correspondent=options["correspondent"], replace_tags=options["replace_tags"])

19
src/documents/models.py Normal file → Executable file
View File

@ -26,7 +26,11 @@ class MatchingModel(models.Model):
name = models.CharField(max_length=128, unique=True)
slug = models.SlugField(blank=True)
automatic_classification = models.BooleanField(default=False, help_text='Automatically assign to newly added documents based on current usage in your document collection.')
automatic_classification = models.BooleanField(
default=False,
help_text="Automatically assign to newly added documents based on "
"current usage in your document collection."
)
class Meta:
abstract = True
@ -75,11 +79,16 @@ class Tag(MatchingModel):
is_inbox_tag = models.BooleanField(
default=False,
help_text="Marks this tag as an inbox tag: All newly consumed documents will be tagged with inbox tags.")
help_text="Marks this tag as an inbox tag: All newly consumed "
"documents will be tagged with inbox tags."
)
is_archived_tag = models.BooleanField(
default=False,
help_text="Marks this tag as an archive tag: All documents tagged with archive tags will never be modified automatically (i.e., modifying tags by matching rules)")
help_text="Marks this tag as an archive tag: All documents tagged "
"with archive tags will never be modified automatically "
"(i.e., modifying tags by matching rules)"
)
class DocumentType(MatchingModel):
@ -170,7 +179,9 @@ class Document(models.Model):
null=True,
unique=True,
db_index=True,
help_text="The position of this document in your physical document archive.")
help_text="The position of this document in your physical document "
"archive."
)
class Meta:
ordering = ("correspondent", "title")

11
src/documents/signals/handlers.py Normal file → Executable file
View File

@ -23,9 +23,16 @@ def classify_document(sender, document=None, logging_group=None, **kwargs):
global classifier
try:
classifier.reload()
classifier.classify_document(document, classify_correspondent=True, classify_tags=True, classify_document_type=True)
classifier.classify_document(
document,
classify_correspondent=True,
classify_tags=True,
classify_document_type=True
)
except FileNotFoundError:
logging.getLogger(__name__).fatal("Cannot classify document, classifier model file was not found.")
logging.getLogger(__name__).fatal(
"Cannot classify document, classifier model file was not found."
)
def add_inbox_tags(sender, document=None, logging_group=None, **kwargs):

3
src/documents/views.py Normal file → Executable file
View File

@ -28,7 +28,8 @@ from .serialisers import (
DocumentSerializer,
LogSerializer,
TagSerializer,
DocumentTypeSerializer)
DocumentTypeSerializer
)
class IndexView(TemplateView):

View File

@ -201,7 +201,10 @@ MEDIA_URL = os.getenv("PAPERLESS_MEDIA_URL", "/media/")
# Document classification models location
MODEL_FILE = os.getenv(
"PAPERLESS_MODEL_FILE", os.path.join(BASE_DIR, "..", "models", "model.pickle"))
"PAPERLESS_MODEL_FILE", os.path.join(
BASE_DIR, "..", "models", "model.pickle"
)
)
# Paperless-specific stuff

3
src/paperless/urls.py Normal file → Executable file
View File

@ -13,7 +13,8 @@ from documents.views import (
LogViewSet,
PushView,
TagViewSet,
DocumentTypeViewSet)
DocumentTypeViewSet
)
from reminders.views import ReminderViewSet
router = DefaultRouter()