mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-04-02 13:45:10 -05:00
Code style adjustments
This commit is contained in:
parent
779ea6a015
commit
60618381f8
@ -18,9 +18,9 @@ def select_action(
|
||||
if not modeladmin.has_change_permission(request):
|
||||
raise PermissionDenied
|
||||
|
||||
if request.POST.get('post'):
|
||||
if request.POST.get("post"):
|
||||
n = queryset.count()
|
||||
selected_object = modelclass.objects.get(id=request.POST.get('obj_id'))
|
||||
selected_object = modelclass.objects.get(id=request.POST.get("obj_id"))
|
||||
if n:
|
||||
for document in queryset:
|
||||
if document_action:
|
||||
@ -139,28 +139,52 @@ def remove_correspondent_from_selected(modeladmin, request, queryset):
|
||||
|
||||
|
||||
def set_document_type_on_selected(modeladmin, request, queryset):
|
||||
return select_action(modeladmin=modeladmin, request=request, queryset=queryset,
|
||||
title="Set document type on multiple documents",
|
||||
action="set_document_type_on_selected",
|
||||
modelclass=DocumentType,
|
||||
success_message="Successfully set document type %(selected_object)s on %(count)d %(items)s.",
|
||||
queryset_action=lambda qs, document_type: qs.update(document_type=document_type))
|
||||
return select_action(
|
||||
modeladmin=modeladmin,
|
||||
request=request,
|
||||
queryset=queryset,
|
||||
title="Set document type on multiple documents",
|
||||
action="set_document_type_on_selected",
|
||||
modelclass=DocumentType,
|
||||
success_message="Successfully set document type %(selected_object)s "
|
||||
"on %(count)d %(items)s.",
|
||||
queryset_action=lambda qs, document_type: qs.update(
|
||||
document_type=document_type)
|
||||
)
|
||||
|
||||
|
||||
def remove_document_type_from_selected(modeladmin, request, queryset):
|
||||
return simple_action(modeladmin=modeladmin, request=request, queryset=queryset,
|
||||
success_message="Successfully removed document type from %(count)d %(items)s.",
|
||||
queryset_action=lambda qs: qs.update(document_type=None))
|
||||
return simple_action(
|
||||
modeladmin=modeladmin,
|
||||
request=request,
|
||||
queryset=queryset,
|
||||
success_message="Successfully removed document type from %(count)d "
|
||||
"%(items)s.",
|
||||
queryset_action=lambda qs: qs.update(document_type=None)
|
||||
)
|
||||
|
||||
|
||||
def run_document_classifier_on_selected(modeladmin, request, queryset):
|
||||
try:
|
||||
clf = DocumentClassifier.load_classifier()
|
||||
return simple_action(modeladmin=modeladmin, request=request, queryset=queryset,
|
||||
success_message="Successfully applied document classifier to %(count)d %(items)s.",
|
||||
document_action=lambda doc: clf.classify_document(doc, classify_correspondent=True, classify_tags=True, classify_document_type=True))
|
||||
return simple_action(
|
||||
modeladmin=modeladmin,
|
||||
request=request,
|
||||
queryset=queryset,
|
||||
success_message="Successfully applied document classifier to "
|
||||
"%(count)d %(items)s.",
|
||||
document_action=lambda doc: clf.classify_document(
|
||||
doc,
|
||||
classify_correspondent=True,
|
||||
classify_tags=True,
|
||||
classify_document_type=True)
|
||||
)
|
||||
except FileNotFoundError:
|
||||
modeladmin.message_user(request, "Classifier model file not found.", messages.ERROR)
|
||||
modeladmin.message_user(
|
||||
request,
|
||||
"Classifier model file not found.",
|
||||
messages.ERROR
|
||||
)
|
||||
return None
|
||||
|
||||
|
||||
@ -171,7 +195,10 @@ set_correspondent_on_selected.short_description = \
|
||||
"Set correspondent on selected documents"
|
||||
remove_correspondent_from_selected.short_description = \
|
||||
"Remove correspondent from selected documents"
|
||||
set_document_type_on_selected.short_description = "Set document type on selected documents"
|
||||
remove_document_type_from_selected.short_description = "Remove document type from selected documents"
|
||||
run_document_classifier_on_selected.short_description = "Run document classifier on selected"
|
||||
set_document_type_on_selected.short_description = \
|
||||
"Set document type on selected documents"
|
||||
remove_document_type_from_selected.short_description = \
|
||||
"Remove document type from selected documents"
|
||||
run_document_classifier_on_selected.short_description = \
|
||||
"Run document classifier on selected"
|
||||
|
||||
|
@ -124,7 +124,7 @@ class CorrespondentAdmin(CommonAdmin):
|
||||
"document_count",
|
||||
"last_correspondence"
|
||||
)
|
||||
list_editable = ("automatic_classification")
|
||||
list_editable = ("automatic_classification",)
|
||||
|
||||
def get_queryset(self, request):
|
||||
qs = super(CorrespondentAdmin, self).get_queryset(request)
|
||||
@ -145,7 +145,11 @@ class CorrespondentAdmin(CommonAdmin):
|
||||
|
||||
class TagAdmin(CommonAdmin):
|
||||
|
||||
list_display = ("name", "colour", "automatic_classification", "document_count")
|
||||
list_display = (
|
||||
"name",
|
||||
"colour",
|
||||
"automatic_classification",
|
||||
"document_count")
|
||||
list_filter = ("colour",)
|
||||
list_editable = ("colour", "automatic_classification")
|
||||
|
||||
@ -238,8 +242,8 @@ class DocumentAdmin(CommonAdmin):
|
||||
|
||||
extra_context = extra_context or {}
|
||||
doc = Document.objects.get(id=object_id)
|
||||
extra_context['download_url'] = doc.download_url
|
||||
extra_context['file_type'] = doc.file_type
|
||||
extra_context["download_url"] = doc.download_url
|
||||
extra_context["file_type"] = doc.file_type
|
||||
|
||||
if self.document_queue and object_id:
|
||||
if int(object_id) in self.document_queue:
|
||||
|
2
src/documents/classifier.py
Normal file → Executable file
2
src/documents/classifier.py
Normal file → Executable file
@ -87,7 +87,7 @@ class DocumentClassifier(object):
|
||||
|
||||
# Step 2: vectorize data
|
||||
logging.getLogger(__name__).info("Vectorizing data...")
|
||||
self.data_vectorizer = CountVectorizer(analyzer='char', ngram_range=(3, 5), min_df=0.1)
|
||||
self.data_vectorizer = CountVectorizer(analyzer="char", ngram_range=(3, 5), min_df=0.1)
|
||||
data_vectorized = self.data_vectorizer.fit_transform(data)
|
||||
|
||||
self.tags_binarizer = MultiLabelBinarizer()
|
||||
|
0
src/documents/filters.py
Normal file → Executable file
0
src/documents/filters.py
Normal file → Executable file
8
src/documents/management/commands/document_create_classifier.py
Normal file → Executable file
8
src/documents/management/commands/document_create_classifier.py
Normal file → Executable file
@ -1,6 +1,4 @@
|
||||
import logging
|
||||
import os.path
|
||||
import pickle
|
||||
|
||||
from django.core.management.base import BaseCommand
|
||||
from documents.classifier import DocumentClassifier
|
||||
@ -19,9 +17,7 @@ class Command(Renderable, BaseCommand):
|
||||
|
||||
def handle(self, *args, **options):
|
||||
clf = DocumentClassifier()
|
||||
|
||||
clf.train()
|
||||
|
||||
logging.getLogger(__name__).info("Saving models to " + settings.MODEL_FILE + "...")
|
||||
|
||||
logging.getLogger(__name__).info("Saving models to " +
|
||||
settings.MODEL_FILE + "...")
|
||||
clf.save_classifier()
|
||||
|
@ -1,40 +0,0 @@
|
||||
from django.core.management.base import BaseCommand
|
||||
|
||||
from documents.classifier import preprocess_content
|
||||
from documents.models import Document
|
||||
from ...mixins import Renderable
|
||||
|
||||
|
||||
class Command(Renderable, BaseCommand):
|
||||
|
||||
help = """
|
||||
There is no help.
|
||||
""".replace(" ", "")
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
BaseCommand.__init__(self, *args, **kwargs)
|
||||
|
||||
def handle(self, *args, **options):
|
||||
with open("dataset_tags.txt", "w") as f:
|
||||
for doc in Document.objects.exclude(tags__is_inbox_tag=True):
|
||||
labels = []
|
||||
for tag in doc.tags.filter(automatic_classification=True):
|
||||
labels.append(tag.name)
|
||||
f.write(",".join(labels))
|
||||
f.write(";")
|
||||
f.write(preprocess_content(doc.content))
|
||||
f.write("\n")
|
||||
|
||||
with open("dataset_types.txt", "w") as f:
|
||||
for doc in Document.objects.exclude(tags__is_inbox_tag=True):
|
||||
f.write(doc.document_type.name if doc.document_type is not None and doc.document_type.automatic_classification else "-")
|
||||
f.write(";")
|
||||
f.write(preprocess_content(doc.content))
|
||||
f.write("\n")
|
||||
|
||||
with open("dataset_correspondents.txt", "w") as f:
|
||||
for doc in Document.objects.exclude(tags__is_inbox_tag=True):
|
||||
f.write(doc.correspondent.name if doc.correspondent is not None and doc.correspondent.automatic_classification else "-")
|
||||
f.write(";")
|
||||
f.write(preprocess_content(doc.content))
|
||||
f.write("\n")
|
9
src/documents/management/commands/document_retagger.py
Normal file → Executable file
9
src/documents/management/commands/document_retagger.py
Normal file → Executable file
@ -11,7 +11,10 @@ from ...mixins import Renderable
|
||||
class Command(Renderable, BaseCommand):
|
||||
|
||||
help = """
|
||||
There is no help. #TODO
|
||||
Using the current classification model, assigns correspondents, tags
|
||||
and document types to all documents, effectively allowing you to
|
||||
back-tag all previously indexed documents with metadata created (or
|
||||
modified) after their initial import.
|
||||
""".replace(" ", "")
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
@ -44,7 +47,7 @@ class Command(Renderable, BaseCommand):
|
||||
|
||||
self.verbosity = options["verbosity"]
|
||||
|
||||
if options['inbox_only']:
|
||||
if options["inbox_only"]:
|
||||
documents = Document.objects.filter(tags__is_inbox_tag=True).exclude(tags__is_archived_tag=True).distinct()
|
||||
else:
|
||||
documents = Document.objects.all().exclude(tags__is_archived_tag=True).distinct()
|
||||
@ -58,4 +61,4 @@ class Command(Renderable, BaseCommand):
|
||||
|
||||
for document in documents:
|
||||
logging.getLogger(__name__).info("Processing document {}".format(document.title))
|
||||
clf.classify_document(document, classify_document_type=options['type'], classify_tags=options['tags'], classify_correspondent=options['correspondent'], replace_tags=options['replace_tags'])
|
||||
clf.classify_document(document, classify_document_type=options["type"], classify_tags=options["tags"], classify_correspondent=options["correspondent"], replace_tags=options["replace_tags"])
|
||||
|
19
src/documents/models.py
Normal file → Executable file
19
src/documents/models.py
Normal file → Executable file
@ -26,7 +26,11 @@ class MatchingModel(models.Model):
|
||||
name = models.CharField(max_length=128, unique=True)
|
||||
slug = models.SlugField(blank=True)
|
||||
|
||||
automatic_classification = models.BooleanField(default=False, help_text='Automatically assign to newly added documents based on current usage in your document collection.')
|
||||
automatic_classification = models.BooleanField(
|
||||
default=False,
|
||||
help_text="Automatically assign to newly added documents based on "
|
||||
"current usage in your document collection."
|
||||
)
|
||||
|
||||
class Meta:
|
||||
abstract = True
|
||||
@ -75,11 +79,16 @@ class Tag(MatchingModel):
|
||||
|
||||
is_inbox_tag = models.BooleanField(
|
||||
default=False,
|
||||
help_text="Marks this tag as an inbox tag: All newly consumed documents will be tagged with inbox tags.")
|
||||
help_text="Marks this tag as an inbox tag: All newly consumed "
|
||||
"documents will be tagged with inbox tags."
|
||||
)
|
||||
|
||||
is_archived_tag = models.BooleanField(
|
||||
default=False,
|
||||
help_text="Marks this tag as an archive tag: All documents tagged with archive tags will never be modified automatically (i.e., modifying tags by matching rules)")
|
||||
help_text="Marks this tag as an archive tag: All documents tagged "
|
||||
"with archive tags will never be modified automatically "
|
||||
"(i.e., modifying tags by matching rules)"
|
||||
)
|
||||
|
||||
|
||||
class DocumentType(MatchingModel):
|
||||
@ -170,7 +179,9 @@ class Document(models.Model):
|
||||
null=True,
|
||||
unique=True,
|
||||
db_index=True,
|
||||
help_text="The position of this document in your physical document archive.")
|
||||
help_text="The position of this document in your physical document "
|
||||
"archive."
|
||||
)
|
||||
|
||||
class Meta:
|
||||
ordering = ("correspondent", "title")
|
||||
|
11
src/documents/signals/handlers.py
Normal file → Executable file
11
src/documents/signals/handlers.py
Normal file → Executable file
@ -23,9 +23,16 @@ def classify_document(sender, document=None, logging_group=None, **kwargs):
|
||||
global classifier
|
||||
try:
|
||||
classifier.reload()
|
||||
classifier.classify_document(document, classify_correspondent=True, classify_tags=True, classify_document_type=True)
|
||||
classifier.classify_document(
|
||||
document,
|
||||
classify_correspondent=True,
|
||||
classify_tags=True,
|
||||
classify_document_type=True
|
||||
)
|
||||
except FileNotFoundError:
|
||||
logging.getLogger(__name__).fatal("Cannot classify document, classifier model file was not found.")
|
||||
logging.getLogger(__name__).fatal(
|
||||
"Cannot classify document, classifier model file was not found."
|
||||
)
|
||||
|
||||
|
||||
def add_inbox_tags(sender, document=None, logging_group=None, **kwargs):
|
||||
|
3
src/documents/views.py
Normal file → Executable file
3
src/documents/views.py
Normal file → Executable file
@ -28,7 +28,8 @@ from .serialisers import (
|
||||
DocumentSerializer,
|
||||
LogSerializer,
|
||||
TagSerializer,
|
||||
DocumentTypeSerializer)
|
||||
DocumentTypeSerializer
|
||||
)
|
||||
|
||||
|
||||
class IndexView(TemplateView):
|
||||
|
@ -201,7 +201,10 @@ MEDIA_URL = os.getenv("PAPERLESS_MEDIA_URL", "/media/")
|
||||
|
||||
# Document classification models location
|
||||
MODEL_FILE = os.getenv(
|
||||
"PAPERLESS_MODEL_FILE", os.path.join(BASE_DIR, "..", "models", "model.pickle"))
|
||||
"PAPERLESS_MODEL_FILE", os.path.join(
|
||||
BASE_DIR, "..", "models", "model.pickle"
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
# Paperless-specific stuff
|
||||
|
3
src/paperless/urls.py
Normal file → Executable file
3
src/paperless/urls.py
Normal file → Executable file
@ -13,7 +13,8 @@ from documents.views import (
|
||||
LogViewSet,
|
||||
PushView,
|
||||
TagViewSet,
|
||||
DocumentTypeViewSet)
|
||||
DocumentTypeViewSet
|
||||
)
|
||||
from reminders.views import ReminderViewSet
|
||||
|
||||
router = DefaultRouter()
|
||||
|
Loading…
x
Reference in New Issue
Block a user