From 77aee832e4abb92b3c986088408fd5ecb76e7c09 Mon Sep 17 00:00:00 2001 From: Dashie Date: Tue, 31 Jan 2017 22:18:05 +0100 Subject: [PATCH 1/3] Add manager command to re-tag documents without correspondent --- .../commands/document_correspondents.py | 43 +++++++++++++++++++ 1 file changed, 43 insertions(+) create mode 100644 src/documents/management/commands/document_correspondents.py diff --git a/src/documents/management/commands/document_correspondents.py b/src/documents/management/commands/document_correspondents.py new file mode 100644 index 000000000..596379784 --- /dev/null +++ b/src/documents/management/commands/document_correspondents.py @@ -0,0 +1,43 @@ +from django.core.management.base import BaseCommand + +from documents.models import Document, Correspondent + +from ...mixins import Renderable + + +class Command(Renderable, BaseCommand): + + help = """ + Using the current set of correspondent rules, apply said rules to all + documents in the database, effectively allowing you to back-tag all + previously indexed documents with correspondent created (or modified) after + their initial import. + """.replace(" ", "") + + def __init__(self, *args, **kwargs): + self.verbosity = 0 + BaseCommand.__init__(self, *args, **kwargs) + + def handle(self, *args, **options): + + self.verbosity = options["verbosity"] + + for document in Document.objects.all(): + # No matching correspondents, so no need to continue + if document.correspondent: + continue + + potential_correspondents = list(Correspondent.match_all(document.content)) + if not potential_correspondents: + continue + + potential_count = len(potential_correspondents) + + selected = potential_correspondents[0] + if potential_count > 1: + message = "Detected {} potential correspondents for {}, so we've opted for {}" + print(message.format(potential_count, document, selected)) + + print('Tagging {} with correspondent "{}"'.format(document, selected)) + document.correspondent = selected + document.save(update_fields=("correspondent",)) From 11accaff7f7e72d6d47b263d814985d7b08b4941 Mon Sep 17 00:00:00 2001 From: Dashie Date: Tue, 31 Jan 2017 22:37:48 +0100 Subject: [PATCH 2/3] Fix line length --- .../management/commands/document_correspondents.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/documents/management/commands/document_correspondents.py b/src/documents/management/commands/document_correspondents.py index 596379784..243eb9b19 100644 --- a/src/documents/management/commands/document_correspondents.py +++ b/src/documents/management/commands/document_correspondents.py @@ -6,12 +6,11 @@ from ...mixins import Renderable class Command(Renderable, BaseCommand): - help = """ Using the current set of correspondent rules, apply said rules to all documents in the database, effectively allowing you to back-tag all - previously indexed documents with correspondent created (or modified) after - their initial import. + previously indexed documents with correspondent created (or modified) + after their initial import. """.replace(" ", "") def __init__(self, *args, **kwargs): @@ -27,7 +26,8 @@ class Command(Renderable, BaseCommand): if document.correspondent: continue - potential_correspondents = list(Correspondent.match_all(document.content)) + potential_correspondents = list( + Correspondent.match_all(document.content)) if not potential_correspondents: continue @@ -35,9 +35,11 @@ class Command(Renderable, BaseCommand): selected = potential_correspondents[0] if potential_count > 1: - message = "Detected {} potential correspondents for {}, so we've opted for {}" + message = "Detected {} potential correspondents for {}, " \ + "so we've opted for {}" print(message.format(potential_count, document, selected)) - print('Tagging {} with correspondent "{}"'.format(document, selected)) + print('Tagging {} with correspondent "{}"'.format(document, + selected)) document.correspondent = selected document.save(update_fields=("correspondent",)) From 11a9c756b3dc514c89d2cd68d37994a350f3467a Mon Sep 17 00:00:00 2001 From: Daniel Quinn Date: Thu, 8 Feb 2018 20:03:29 +0000 Subject: [PATCH 3/3] Updated for style and to add a --use-first option --- .../commands/document_correspondents.py | 61 +++++++++++++++---- 1 file changed, 49 insertions(+), 12 deletions(-) diff --git a/src/documents/management/commands/document_correspondents.py b/src/documents/management/commands/document_correspondents.py index 243eb9b19..0709c49d2 100644 --- a/src/documents/management/commands/document_correspondents.py +++ b/src/documents/management/commands/document_correspondents.py @@ -1,11 +1,14 @@ +import sys + from django.core.management.base import BaseCommand -from documents.models import Document, Correspondent +from documents.models import Correspondent, Document from ...mixins import Renderable class Command(Renderable, BaseCommand): + help = """ Using the current set of correspondent rules, apply said rules to all documents in the database, effectively allowing you to back-tag all @@ -13,33 +16,67 @@ class Command(Renderable, BaseCommand): after their initial import. """.replace(" ", "") + TOO_MANY_CONTINUE = ( + "Detected {} potential correspondents for {}, so we've opted for {}") + TOO_MANY_SKIP = ( + "Detected {} potential correspondents for {}, so we're skipping it") + CHANGE_MESSAGE = ( + 'Document {}: "{}" was given the correspondent id {}: "{}"') + def __init__(self, *args, **kwargs): self.verbosity = 0 BaseCommand.__init__(self, *args, **kwargs) + def add_arguments(self, parser): + parser.add_argument( + "--use-first", + default=False, + action="store_true", + help="By default this command won't try to assign a correspondent " + "if more than one matches the document. Use this flag if " + "you'd rather it just pick the first one it finds." + ) + def handle(self, *args, **options): self.verbosity = options["verbosity"] - for document in Document.objects.all(): - # No matching correspondents, so no need to continue - if document.correspondent: - continue + for document in Document.objects.filter(correspondent__isnull=True): potential_correspondents = list( Correspondent.match_all(document.content)) + if not potential_correspondents: continue potential_count = len(potential_correspondents) + correspondent = potential_correspondents[0] - selected = potential_correspondents[0] if potential_count > 1: - message = "Detected {} potential correspondents for {}, " \ - "so we've opted for {}" - print(message.format(potential_count, document, selected)) + if not options["use_first"]: + print( + self.TOO_MANY_SKIP.format(potential_count, document), + file=sys.stderr + ) + continue + print( + self.TOO_MANY_CONTINUE.format( + potential_count, + document, + correspondent + ), + file=sys.stderr + ) - print('Tagging {} with correspondent "{}"'.format(document, - selected)) - document.correspondent = selected + document.correspondent = correspondent document.save(update_fields=("correspondent",)) + + print( + self.CHANGE_MESSAGE.format( + document.pk, + document.title, + correspondent.pk, + correspondent.name + ), + file=sys.stderr + )