Merge branch 'machine-learning' into dev

This commit is contained in:
Jonas Winkler
2018-09-11 14:36:21 +02:00
7 changed files with 66 additions and 45 deletions

View File

@@ -18,7 +18,7 @@ class Command(Renderable, BaseCommand):
with open("dataset_tags.txt", "w") as f:
for doc in Document.objects.exclude(tags__is_inbox_tag=True):
labels = []
for tag in doc.tags.all():
for tag in doc.tags.filter(automatic_classification=True):
labels.append(tag.name)
f.write(",".join(labels))
f.write(";")
@@ -27,14 +27,14 @@ class Command(Renderable, BaseCommand):
with open("dataset_types.txt", "w") as f:
for doc in Document.objects.exclude(tags__is_inbox_tag=True):
f.write(doc.document_type.name if doc.document_type is not None else "None")
f.write(doc.document_type.name if doc.document_type is not None and doc.document_type.automatic_classification else "-")
f.write(";")
f.write(preprocess_content(doc.content))
f.write("\n")
with open("dataset_correspondents.txt", "w") as f:
for doc in Document.objects.exclude(tags__is_inbox_tag=True):
f.write(doc.correspondent.name if doc.correspondent is not None else "None")
f.write(doc.correspondent.name if doc.correspondent is not None and doc.correspondent.automatic_classification else "-")
f.write(";")
f.write(preprocess_content(doc.content))
f.write("\n")

View File

@@ -35,6 +35,10 @@ class Command(Renderable, BaseCommand):
"-i", "--inbox-only",
action="store_true"
)
parser.add_argument(
"-r", "--replace-tags",
action="store_true"
)
def handle(self, *args, **options):
@@ -52,7 +56,6 @@ class Command(Renderable, BaseCommand):
logging.getLogger(__name__).fatal("Cannot classify documents, classifier model file was not found.")
return
for document in documents:
logging.getLogger(__name__).info("Processing document {}".format(document.title))
clf.classify_document(document, classify_type=options['type'], classify_tags=options['tags'], classify_correspondent=options['correspondent'])
clf.classify_document(document, classify_document_type=options['type'], classify_tags=options['tags'], classify_correspondent=options['correspondent'], replace_tags=options['replace_tags'])