From d7ab69fed973e368bdeb4b3da22359670d625b49 Mon Sep 17 00:00:00 2001 From: Jonas Winkler Date: Fri, 24 Aug 2018 13:45:15 +0200 Subject: [PATCH] Added document type --- src/documents/actions.py | 66 ++++++++++++++++++- src/documents/admin.py | 27 ++++++-- src/documents/apps.py | 2 + .../migrations/0023_auto_20180823_1155.py | 33 ++++++++++ src/documents/models.py | 14 ++++ src/documents/signals/handlers.py | 31 ++++++++- .../document/change_list_results.html | 13 +++- .../documents/document/set_document_type.html | 46 +++++++++++++ src/documents/templatetags/hacks.py | 0 9 files changed, 222 insertions(+), 10 deletions(-) mode change 100644 => 100755 src/documents/apps.py create mode 100644 src/documents/migrations/0023_auto_20180823_1155.py create mode 100755 src/documents/templates/admin/documents/document/set_document_type.html mode change 100644 => 100755 src/documents/templatetags/hacks.py diff --git a/src/documents/actions.py b/src/documents/actions.py index 7f23baabd..144d3df03 100755 --- a/src/documents/actions.py +++ b/src/documents/actions.py @@ -4,7 +4,7 @@ from django.contrib.admin.utils import model_ngettext from django.core.exceptions import PermissionDenied from django.template.response import TemplateResponse -from documents.models import Tag, Correspondent +from documents.models import Tag, Correspondent, DocumentType def add_tag_to_selected(modeladmin, request, queryset): @@ -159,3 +159,67 @@ def remove_correspondent_from_selected(modeladmin, request, queryset): remove_correspondent_from_selected.short_description = "Remove correspondent from selected documents" + + +def set_document_type_on_selected(modeladmin, request, queryset): + opts = modeladmin.model._meta + app_label = opts.app_label + + if not modeladmin.has_change_permission(request): + raise PermissionDenied + + if request.POST.get('post'): + n = queryset.count() + document_type = DocumentType.objects.get(id=request.POST.get('document_type_id')) + if n: + for obj in queryset: + obj_display = str(obj) + modeladmin.log_change(request, obj, obj_display) + queryset.update(document_type=document_type) + modeladmin.message_user(request, "Successfully set document type %(document_type)s on %(count)d %(items)s." % { + "document_type": document_type.name, "count": n, "items": model_ngettext(modeladmin.opts, n) + }, messages.SUCCESS) + + # Return None to display the change list page again. + return None + + title = "Set document type on multiple documents" + + context = dict( + modeladmin.admin_site.each_context(request), + title=title, + queryset=queryset, + opts=opts, + action_checkbox_name=helpers.ACTION_CHECKBOX_NAME, + media=modeladmin.media, + document_types=DocumentType.objects.all() + ) + + request.current_app = modeladmin.admin_site.name + + return TemplateResponse(request, + "admin/%s/%s/set_document_type.html" % (app_label, opts.model_name) + , context) + + +set_document_type_on_selected.short_description = "Set document type on selected documents" + + +def remove_document_type_from_selected(modeladmin, request, queryset): + if not modeladmin.has_change_permission(request): + raise PermissionDenied + + n = queryset.count() + if n: + for obj in queryset: + obj_display = str(obj) + modeladmin.log_change(request, obj, obj_display) + queryset.update(document_type=None) + modeladmin.message_user(request, "Successfully removed document type from %(count)d %(items)s." % { + "count": n, "items": model_ngettext(modeladmin.opts, n) + }, messages.SUCCESS) + + return None + + +remove_document_type_from_selected.short_description = "Remove document type from selected documents" diff --git a/src/documents/admin.py b/src/documents/admin.py index 4097fcb3b..d0cd23e9b 100755 --- a/src/documents/admin.py +++ b/src/documents/admin.py @@ -12,8 +12,8 @@ from django.utils.http import urlquote from django.utils.safestring import mark_safe from documents.actions import add_tag_to_selected, remove_tag_from_selected, set_correspondent_on_selected, \ - remove_correspondent_from_selected -from .models import Correspondent, Tag, Document, Log + remove_correspondent_from_selected, set_document_type_on_selected, remove_document_type_from_selected +from .models import Correspondent, Tag, Document, Log, DocumentType class FinancialYearFilter(admin.SimpleListFilter): @@ -120,6 +120,22 @@ class TagAdmin(CommonAdmin): def document_count(self, obj): return obj.documents.count() +class DocumentTypeAdmin(CommonAdmin): + + list_display = ("name", "match", "matching_algorithm", "document_count") + list_filter = ("matching_algorithm",) + list_editable = ("match", "matching_algorithm") + + def save_model(self, request, obj, form, change): + super().save_model(request, obj, form, change) + + for document in Document.objects.filter(document_type__isnull=True).exclude(tags__is_archived_tag=True): + if obj.matches(document.content): + document.document_type = obj + document.save(update_fields=("document_type",)) + + def document_count(self, obj): + return obj.documents.count() class DocumentAdmin(CommonAdmin): @@ -132,12 +148,12 @@ class DocumentAdmin(CommonAdmin): search_fields = ("correspondent__name", "title", "content", "tags__name") readonly_fields = ("added",) list_display = ("title", "created", "added", "thumbnail", "correspondent", - "tags_", "archive_serial_number") - list_filter = ("tags", "correspondent", FinancialYearFilter) + "tags_", "archive_serial_number", "document_type") + list_filter = ("document_type", "tags", "correspondent", FinancialYearFilter) ordering = ["-created", "correspondent"] - actions = [add_tag_to_selected, remove_tag_from_selected, set_correspondent_on_selected, remove_correspondent_from_selected] + actions = [add_tag_to_selected, remove_tag_from_selected, set_correspondent_on_selected, remove_correspondent_from_selected, set_document_type_on_selected, remove_document_type_from_selected] date_hierarchy = 'created' @@ -273,6 +289,7 @@ class LogAdmin(CommonAdmin): admin.site.register(Correspondent, CorrespondentAdmin) admin.site.register(Tag, TagAdmin) +admin.site.register(DocumentType, DocumentTypeAdmin) admin.site.register(Document, DocumentAdmin) admin.site.register(Log, LogAdmin) diff --git a/src/documents/apps.py b/src/documents/apps.py old mode 100644 new mode 100755 index 435e1afa5..7b2d50f31 --- a/src/documents/apps.py +++ b/src/documents/apps.py @@ -13,6 +13,7 @@ class DocumentsConfig(AppConfig): from .signals.handlers import ( set_correspondent, set_tags, + set_document_type, run_pre_consume_script, run_post_consume_script, cleanup_document_deletion, @@ -23,6 +24,7 @@ class DocumentsConfig(AppConfig): document_consumption_finished.connect(set_tags) document_consumption_finished.connect(set_correspondent) + document_consumption_finished.connect(set_document_type) document_consumption_finished.connect(set_log_entry) document_consumption_finished.connect(run_post_consume_script) diff --git a/src/documents/migrations/0023_auto_20180823_1155.py b/src/documents/migrations/0023_auto_20180823_1155.py new file mode 100644 index 000000000..e8d8098f4 --- /dev/null +++ b/src/documents/migrations/0023_auto_20180823_1155.py @@ -0,0 +1,33 @@ +# Generated by Django 2.0.7 on 2018-08-23 11:55 + +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + + dependencies = [ + ('documents', '0022_workflow_improvements'), + ] + + operations = [ + migrations.CreateModel( + name='DocumentType', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('name', models.CharField(max_length=128, unique=True)), + ('slug', models.SlugField(blank=True)), + ('match', models.CharField(blank=True, max_length=256)), + ('matching_algorithm', models.PositiveIntegerField(choices=[(1, 'Any'), (2, 'All'), (3, 'Literal'), (4, 'Regular Expression'), (5, 'Fuzzy Match')], default=1, help_text='Which algorithm you want to use when matching text to the OCR\'d PDF. Here, "any" looks for any occurrence of any word provided in the PDF, while "all" requires that every word provided appear in the PDF, albeit not in the order provided. A "literal" match means that the text you enter must appear in the PDF exactly as you\'ve entered it, and "regular expression" uses a regex to match the PDF. (If you don\'t know what a regex is, you probably don\'t want this option.) Finally, a "fuzzy match" looks for words or phrases that are mostly—but not exactly—the same, which can be useful for matching against documents containg imperfections that foil accurate OCR.')), + ('is_insensitive', models.BooleanField(default=True)), + ], + options={ + 'abstract': False, + }, + ), + migrations.AddField( + model_name='document', + name='document_type', + field=models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='documents', to='documents.DocumentType'), + ), + ] diff --git a/src/documents/models.py b/src/documents/models.py index 2da28da57..da30ce4bd 100755 --- a/src/documents/models.py +++ b/src/documents/models.py @@ -15,6 +15,7 @@ from django.db import models from django.template.defaultfilters import slugify from django.utils import timezone +from reminders.models import Reminder from .managers import LogManager @@ -189,6 +190,11 @@ class Tag(MatchingModel): help_text="Marks this tag as an archive tag: All documents tagged with archive tags will never be modified automatically (i.e., modifying tags by matching rules)") +class DocumentType(MatchingModel): + + pass + + class Document(models.Model): TYPE_PDF = "pdf" @@ -215,6 +221,14 @@ class Document(models.Model): title = models.CharField(max_length=128, blank=True, db_index=True) + document_type = models.ForeignKey( + DocumentType, + blank=True, + null=True, + related_name="documents", + on_delete=models.SET_NULL + ) + content = models.TextField( db_index=True, blank=True, diff --git a/src/documents/signals/handlers.py b/src/documents/signals/handlers.py index b3579b567..77713333e 100755 --- a/src/documents/signals/handlers.py +++ b/src/documents/signals/handlers.py @@ -8,7 +8,7 @@ from django.contrib.auth.models import User from django.contrib.contenttypes.models import ContentType from django.utils import timezone -from ..models import Correspondent, Document, Tag +from ..models import Correspondent, Document, Tag, DocumentType def logger(message, group): @@ -44,6 +44,35 @@ def set_correspondent(sender, document=None, logging_group=None, **kwargs): document.save(update_fields=("correspondent",)) +def set_document_type(sender, document=None, logging_group=None, **kwargs): + + # No sense in assigning a correspondent when one is already set. + if document.document_type: + return + + # No matching document types, so no need to continue + potential_document_types = list(DocumentType.match_all(document.content)) + if not potential_document_types: + return + + potential_count = len(potential_document_types) + selected = potential_document_types[0] + if potential_count > 1: + message = "Detected {} potential document types, so we've opted for {}" + logger( + message.format(potential_count, selected), + logging_group + ) + + logger( + 'Assigning document type "{}" to "{}" '.format(selected, document), + logging_group + ) + + document.document_type = selected + document.save(update_fields=("document_type",)) + + def set_tags(sender, document=None, logging_group=None, **kwargs): current_tags = set(document.tags.all()) diff --git a/src/documents/templates/admin/documents/document/change_list_results.html b/src/documents/templates/admin/documents/document/change_list_results.html index d295ce601..a669e4b06 100755 --- a/src/documents/templates/admin/documents/document/change_list_results.html +++ b/src/documents/templates/admin/documents/document/change_list_results.html @@ -24,7 +24,7 @@ border: 1px solid #cccccc; border-radius: 2%; overflow: hidden; - height: 300px; + height: 350px; position: relative; } .result .header { @@ -61,6 +61,11 @@ .result a.tag { color: #ffffff; } + .result .documentType { + padding: 5px; + background-color: #eeeeee; + text-align: center; + } .result .date { padding: 5px; } @@ -163,7 +168,8 @@ {# 5: Correspondent #} {# 6: Tags #} {# 7: Archive serial number #} - {# 8: Document edit url #} + {# 8: Document type #} + {# 9: Document edit url #}
@@ -177,7 +183,7 @@ selection would not be possible with mouse click + drag. Instead, the underlying link would be dragged. {% endcomment %} - +
{{ result.0 }}
{{ result.5 }} @@ -185,6 +191,7 @@ {{ result.1 }}
+ {% if '>-<' not in result.8 %}
{{ result.8 }}
{% endif %}
{{ result.6 }}
{{ result.2 }}
diff --git a/src/documents/templates/admin/documents/document/set_document_type.html b/src/documents/templates/admin/documents/document/set_document_type.html new file mode 100755 index 000000000..0419e0ebb --- /dev/null +++ b/src/documents/templates/admin/documents/document/set_document_type.html @@ -0,0 +1,46 @@ +{% extends "admin/base_site.html" %} +{% load i18n l10n admin_urls static %} +{% load staticfiles %} + +{% block extrahead %} +{{ block.super }} +{{ media }} + + +{% endblock %} + +{% block bodyclass %}{{ block.super }} app-{{ opts.app_label }} model-{{ opts.model_name }} delete-confirmation delete-selected-confirmation{% endblock %} + +{% block breadcrumbs %} + +{% endblock %} + +{% block content %} +

Please select the document type.

+
{% csrf_token %} +
+ {% for obj in queryset %} + + {% endfor %} +

+ +

+ + + +

+ + {% trans "Go back" %} +

+
+
+{% endblock %} \ No newline at end of file diff --git a/src/documents/templatetags/hacks.py b/src/documents/templatetags/hacks.py old mode 100644 new mode 100755