mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-07-28 18:24:38 -05:00
Merge branch 'machine-learning' into dev
This commit is contained in:
@@ -18,7 +18,7 @@ class Command(Renderable, BaseCommand):
|
||||
with open("dataset_tags.txt", "w") as f:
|
||||
for doc in Document.objects.exclude(tags__is_inbox_tag=True):
|
||||
labels = []
|
||||
for tag in doc.tags.all():
|
||||
for tag in doc.tags.filter(automatic_classification=True):
|
||||
labels.append(tag.name)
|
||||
f.write(",".join(labels))
|
||||
f.write(";")
|
||||
@@ -27,14 +27,14 @@ class Command(Renderable, BaseCommand):
|
||||
|
||||
with open("dataset_types.txt", "w") as f:
|
||||
for doc in Document.objects.exclude(tags__is_inbox_tag=True):
|
||||
f.write(doc.document_type.name if doc.document_type is not None else "None")
|
||||
f.write(doc.document_type.name if doc.document_type is not None and doc.document_type.automatic_classification else "-")
|
||||
f.write(";")
|
||||
f.write(preprocess_content(doc.content))
|
||||
f.write("\n")
|
||||
|
||||
with open("dataset_correspondents.txt", "w") as f:
|
||||
for doc in Document.objects.exclude(tags__is_inbox_tag=True):
|
||||
f.write(doc.correspondent.name if doc.correspondent is not None else "None")
|
||||
f.write(doc.correspondent.name if doc.correspondent is not None and doc.correspondent.automatic_classification else "-")
|
||||
f.write(";")
|
||||
f.write(preprocess_content(doc.content))
|
||||
f.write("\n")
|
||||
|
@@ -35,6 +35,10 @@ class Command(Renderable, BaseCommand):
|
||||
"-i", "--inbox-only",
|
||||
action="store_true"
|
||||
)
|
||||
parser.add_argument(
|
||||
"-r", "--replace-tags",
|
||||
action="store_true"
|
||||
)
|
||||
|
||||
def handle(self, *args, **options):
|
||||
|
||||
@@ -52,7 +56,6 @@ class Command(Renderable, BaseCommand):
|
||||
logging.getLogger(__name__).fatal("Cannot classify documents, classifier model file was not found.")
|
||||
return
|
||||
|
||||
|
||||
for document in documents:
|
||||
logging.getLogger(__name__).info("Processing document {}".format(document.title))
|
||||
clf.classify_document(document, classify_type=options['type'], classify_tags=options['tags'], classify_correspondent=options['correspondent'])
|
||||
clf.classify_document(document, classify_document_type=options['type'], classify_tags=options['tags'], classify_correspondent=options['correspondent'], replace_tags=options['replace_tags'])
|
||||
|
Reference in New Issue
Block a user