Code style adjustments

This commit is contained in:
Jonas Winkler
2018-09-25 16:09:33 +02:00
parent ddff8a0450
commit efc7bf1d23
12 changed files with 94 additions and 81 deletions

View File

@@ -1,6 +1,4 @@
import logging
import os.path
import pickle
from django.core.management.base import BaseCommand
from documents.classifier import DocumentClassifier
@@ -19,9 +17,7 @@ class Command(Renderable, BaseCommand):
def handle(self, *args, **options):
clf = DocumentClassifier()
clf.train()
logging.getLogger(__name__).info("Saving models to " + settings.MODEL_FILE + "...")
logging.getLogger(__name__).info("Saving models to " +
settings.MODEL_FILE + "...")
clf.save_classifier()

View File

@@ -1,40 +0,0 @@
from django.core.management.base import BaseCommand
from documents.classifier import preprocess_content
from documents.models import Document
from ...mixins import Renderable
class Command(Renderable, BaseCommand):
help = """
There is no help.
""".replace(" ", "")
def __init__(self, *args, **kwargs):
BaseCommand.__init__(self, *args, **kwargs)
def handle(self, *args, **options):
with open("dataset_tags.txt", "w") as f:
for doc in Document.objects.exclude(tags__is_inbox_tag=True):
labels = []
for tag in doc.tags.filter(automatic_classification=True):
labels.append(tag.name)
f.write(",".join(labels))
f.write(";")
f.write(preprocess_content(doc.content))
f.write("\n")
with open("dataset_types.txt", "w") as f:
for doc in Document.objects.exclude(tags__is_inbox_tag=True):
f.write(doc.document_type.name if doc.document_type is not None and doc.document_type.automatic_classification else "-")
f.write(";")
f.write(preprocess_content(doc.content))
f.write("\n")
with open("dataset_correspondents.txt", "w") as f:
for doc in Document.objects.exclude(tags__is_inbox_tag=True):
f.write(doc.correspondent.name if doc.correspondent is not None and doc.correspondent.automatic_classification else "-")
f.write(";")
f.write(preprocess_content(doc.content))
f.write("\n")

9
src/documents/management/commands/document_retagger.py Normal file → Executable file
View File

@@ -11,7 +11,10 @@ from ...mixins import Renderable
class Command(Renderable, BaseCommand):
help = """
There is no help. #TODO
Using the current classification model, assigns correspondents, tags
and document types to all documents, effectively allowing you to
back-tag all previously indexed documents with metadata created (or
modified) after their initial import.
""".replace(" ", "")
def __init__(self, *args, **kwargs):
@@ -44,7 +47,7 @@ class Command(Renderable, BaseCommand):
self.verbosity = options["verbosity"]
if options['inbox_only']:
if options["inbox_only"]:
documents = Document.objects.filter(tags__is_inbox_tag=True).exclude(tags__is_archived_tag=True).distinct()
else:
documents = Document.objects.all().exclude(tags__is_archived_tag=True).distinct()
@@ -58,4 +61,4 @@ class Command(Renderable, BaseCommand):
for document in documents:
logging.getLogger(__name__).info("Processing document {}".format(document.title))
clf.classify_document(document, classify_document_type=options['type'], classify_tags=options['tags'], classify_correspondent=options['correspondent'], replace_tags=options['replace_tags'])
clf.classify_document(document, classify_document_type=options["type"], classify_tags=options["tags"], classify_correspondent=options["correspondent"], replace_tags=options["replace_tags"])