Added document type

This commit is contained in:
Jonas Winkler 2018-08-24 13:45:15 +02:00
parent dfa5ea423f
commit d7ab69fed9
9 changed files with 222 additions and 10 deletions

View File

@ -4,7 +4,7 @@ from django.contrib.admin.utils import model_ngettext
from django.core.exceptions import PermissionDenied from django.core.exceptions import PermissionDenied
from django.template.response import TemplateResponse from django.template.response import TemplateResponse
from documents.models import Tag, Correspondent from documents.models import Tag, Correspondent, DocumentType
def add_tag_to_selected(modeladmin, request, queryset): def add_tag_to_selected(modeladmin, request, queryset):
@ -159,3 +159,67 @@ def remove_correspondent_from_selected(modeladmin, request, queryset):
remove_correspondent_from_selected.short_description = "Remove correspondent from selected documents" remove_correspondent_from_selected.short_description = "Remove correspondent from selected documents"
def set_document_type_on_selected(modeladmin, request, queryset):
opts = modeladmin.model._meta
app_label = opts.app_label
if not modeladmin.has_change_permission(request):
raise PermissionDenied
if request.POST.get('post'):
n = queryset.count()
document_type = DocumentType.objects.get(id=request.POST.get('document_type_id'))
if n:
for obj in queryset:
obj_display = str(obj)
modeladmin.log_change(request, obj, obj_display)
queryset.update(document_type=document_type)
modeladmin.message_user(request, "Successfully set document type %(document_type)s on %(count)d %(items)s." % {
"document_type": document_type.name, "count": n, "items": model_ngettext(modeladmin.opts, n)
}, messages.SUCCESS)
# Return None to display the change list page again.
return None
title = "Set document type on multiple documents"
context = dict(
modeladmin.admin_site.each_context(request),
title=title,
queryset=queryset,
opts=opts,
action_checkbox_name=helpers.ACTION_CHECKBOX_NAME,
media=modeladmin.media,
document_types=DocumentType.objects.all()
)
request.current_app = modeladmin.admin_site.name
return TemplateResponse(request,
"admin/%s/%s/set_document_type.html" % (app_label, opts.model_name)
, context)
set_document_type_on_selected.short_description = "Set document type on selected documents"
def remove_document_type_from_selected(modeladmin, request, queryset):
if not modeladmin.has_change_permission(request):
raise PermissionDenied
n = queryset.count()
if n:
for obj in queryset:
obj_display = str(obj)
modeladmin.log_change(request, obj, obj_display)
queryset.update(document_type=None)
modeladmin.message_user(request, "Successfully removed document type from %(count)d %(items)s." % {
"count": n, "items": model_ngettext(modeladmin.opts, n)
}, messages.SUCCESS)
return None
remove_document_type_from_selected.short_description = "Remove document type from selected documents"

View File

@ -12,8 +12,8 @@ from django.utils.http import urlquote
from django.utils.safestring import mark_safe from django.utils.safestring import mark_safe
from documents.actions import add_tag_to_selected, remove_tag_from_selected, set_correspondent_on_selected, \ from documents.actions import add_tag_to_selected, remove_tag_from_selected, set_correspondent_on_selected, \
remove_correspondent_from_selected remove_correspondent_from_selected, set_document_type_on_selected, remove_document_type_from_selected
from .models import Correspondent, Tag, Document, Log from .models import Correspondent, Tag, Document, Log, DocumentType
class FinancialYearFilter(admin.SimpleListFilter): class FinancialYearFilter(admin.SimpleListFilter):
@ -120,6 +120,22 @@ class TagAdmin(CommonAdmin):
def document_count(self, obj): def document_count(self, obj):
return obj.documents.count() return obj.documents.count()
class DocumentTypeAdmin(CommonAdmin):
list_display = ("name", "match", "matching_algorithm", "document_count")
list_filter = ("matching_algorithm",)
list_editable = ("match", "matching_algorithm")
def save_model(self, request, obj, form, change):
super().save_model(request, obj, form, change)
for document in Document.objects.filter(document_type__isnull=True).exclude(tags__is_archived_tag=True):
if obj.matches(document.content):
document.document_type = obj
document.save(update_fields=("document_type",))
def document_count(self, obj):
return obj.documents.count()
class DocumentAdmin(CommonAdmin): class DocumentAdmin(CommonAdmin):
@ -132,12 +148,12 @@ class DocumentAdmin(CommonAdmin):
search_fields = ("correspondent__name", "title", "content", "tags__name") search_fields = ("correspondent__name", "title", "content", "tags__name")
readonly_fields = ("added",) readonly_fields = ("added",)
list_display = ("title", "created", "added", "thumbnail", "correspondent", list_display = ("title", "created", "added", "thumbnail", "correspondent",
"tags_", "archive_serial_number") "tags_", "archive_serial_number", "document_type")
list_filter = ("tags", "correspondent", FinancialYearFilter) list_filter = ("document_type", "tags", "correspondent", FinancialYearFilter)
ordering = ["-created", "correspondent"] ordering = ["-created", "correspondent"]
actions = [add_tag_to_selected, remove_tag_from_selected, set_correspondent_on_selected, remove_correspondent_from_selected] actions = [add_tag_to_selected, remove_tag_from_selected, set_correspondent_on_selected, remove_correspondent_from_selected, set_document_type_on_selected, remove_document_type_from_selected]
date_hierarchy = 'created' date_hierarchy = 'created'
@ -273,6 +289,7 @@ class LogAdmin(CommonAdmin):
admin.site.register(Correspondent, CorrespondentAdmin) admin.site.register(Correspondent, CorrespondentAdmin)
admin.site.register(Tag, TagAdmin) admin.site.register(Tag, TagAdmin)
admin.site.register(DocumentType, DocumentTypeAdmin)
admin.site.register(Document, DocumentAdmin) admin.site.register(Document, DocumentAdmin)
admin.site.register(Log, LogAdmin) admin.site.register(Log, LogAdmin)

2
src/documents/apps.py Normal file → Executable file
View File

@ -13,6 +13,7 @@ class DocumentsConfig(AppConfig):
from .signals.handlers import ( from .signals.handlers import (
set_correspondent, set_correspondent,
set_tags, set_tags,
set_document_type,
run_pre_consume_script, run_pre_consume_script,
run_post_consume_script, run_post_consume_script,
cleanup_document_deletion, cleanup_document_deletion,
@ -23,6 +24,7 @@ class DocumentsConfig(AppConfig):
document_consumption_finished.connect(set_tags) document_consumption_finished.connect(set_tags)
document_consumption_finished.connect(set_correspondent) document_consumption_finished.connect(set_correspondent)
document_consumption_finished.connect(set_document_type)
document_consumption_finished.connect(set_log_entry) document_consumption_finished.connect(set_log_entry)
document_consumption_finished.connect(run_post_consume_script) document_consumption_finished.connect(run_post_consume_script)

View File

@ -0,0 +1,33 @@
# Generated by Django 2.0.7 on 2018-08-23 11:55
from django.db import migrations, models
import django.db.models.deletion
class Migration(migrations.Migration):
dependencies = [
('documents', '0022_workflow_improvements'),
]
operations = [
migrations.CreateModel(
name='DocumentType',
fields=[
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('name', models.CharField(max_length=128, unique=True)),
('slug', models.SlugField(blank=True)),
('match', models.CharField(blank=True, max_length=256)),
('matching_algorithm', models.PositiveIntegerField(choices=[(1, 'Any'), (2, 'All'), (3, 'Literal'), (4, 'Regular Expression'), (5, 'Fuzzy Match')], default=1, help_text='Which algorithm you want to use when matching text to the OCR\'d PDF. Here, "any" looks for any occurrence of any word provided in the PDF, while "all" requires that every word provided appear in the PDF, albeit not in the order provided. A "literal" match means that the text you enter must appear in the PDF exactly as you\'ve entered it, and "regular expression" uses a regex to match the PDF. (If you don\'t know what a regex is, you probably don\'t want this option.) Finally, a "fuzzy match" looks for words or phrases that are mostly—but not exactly—the same, which can be useful for matching against documents containg imperfections that foil accurate OCR.')),
('is_insensitive', models.BooleanField(default=True)),
],
options={
'abstract': False,
},
),
migrations.AddField(
model_name='document',
name='document_type',
field=models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='documents', to='documents.DocumentType'),
),
]

View File

@ -15,6 +15,7 @@ from django.db import models
from django.template.defaultfilters import slugify from django.template.defaultfilters import slugify
from django.utils import timezone from django.utils import timezone
from reminders.models import Reminder
from .managers import LogManager from .managers import LogManager
@ -189,6 +190,11 @@ class Tag(MatchingModel):
help_text="Marks this tag as an archive tag: All documents tagged with archive tags will never be modified automatically (i.e., modifying tags by matching rules)") help_text="Marks this tag as an archive tag: All documents tagged with archive tags will never be modified automatically (i.e., modifying tags by matching rules)")
class DocumentType(MatchingModel):
pass
class Document(models.Model): class Document(models.Model):
TYPE_PDF = "pdf" TYPE_PDF = "pdf"
@ -215,6 +221,14 @@ class Document(models.Model):
title = models.CharField(max_length=128, blank=True, db_index=True) title = models.CharField(max_length=128, blank=True, db_index=True)
document_type = models.ForeignKey(
DocumentType,
blank=True,
null=True,
related_name="documents",
on_delete=models.SET_NULL
)
content = models.TextField( content = models.TextField(
db_index=True, db_index=True,
blank=True, blank=True,

View File

@ -8,7 +8,7 @@ from django.contrib.auth.models import User
from django.contrib.contenttypes.models import ContentType from django.contrib.contenttypes.models import ContentType
from django.utils import timezone from django.utils import timezone
from ..models import Correspondent, Document, Tag from ..models import Correspondent, Document, Tag, DocumentType
def logger(message, group): def logger(message, group):
@ -44,6 +44,35 @@ def set_correspondent(sender, document=None, logging_group=None, **kwargs):
document.save(update_fields=("correspondent",)) document.save(update_fields=("correspondent",))
def set_document_type(sender, document=None, logging_group=None, **kwargs):
# No sense in assigning a correspondent when one is already set.
if document.document_type:
return
# No matching document types, so no need to continue
potential_document_types = list(DocumentType.match_all(document.content))
if not potential_document_types:
return
potential_count = len(potential_document_types)
selected = potential_document_types[0]
if potential_count > 1:
message = "Detected {} potential document types, so we've opted for {}"
logger(
message.format(potential_count, selected),
logging_group
)
logger(
'Assigning document type "{}" to "{}" '.format(selected, document),
logging_group
)
document.document_type = selected
document.save(update_fields=("document_type",))
def set_tags(sender, document=None, logging_group=None, **kwargs): def set_tags(sender, document=None, logging_group=None, **kwargs):
current_tags = set(document.tags.all()) current_tags = set(document.tags.all())

View File

@ -24,7 +24,7 @@
border: 1px solid #cccccc; border: 1px solid #cccccc;
border-radius: 2%; border-radius: 2%;
overflow: hidden; overflow: hidden;
height: 300px; height: 350px;
position: relative; position: relative;
} }
.result .header { .result .header {
@ -61,6 +61,11 @@
.result a.tag { .result a.tag {
color: #ffffff; color: #ffffff;
} }
.result .documentType {
padding: 5px;
background-color: #eeeeee;
text-align: center;
}
.result .date { .result .date {
padding: 5px; padding: 5px;
} }
@ -163,7 +168,8 @@
{# 5: Correspondent #} {# 5: Correspondent #}
{# 6: Tags #} {# 6: Tags #}
{# 7: Archive serial number #} {# 7: Archive serial number #}
{# 8: Document edit url #} {# 8: Document type #}
{# 9: Document edit url #}
<div class="box"> <div class="box">
<div class="result"> <div class="result">
<div class="header"> <div class="header">
@ -177,7 +183,7 @@
selection would not be possible with mouse click + drag. Instead, selection would not be possible with mouse click + drag. Instead,
the underlying link would be dragged. the underlying link would be dragged.
{% endcomment %} {% endcomment %}
<div class="headerLink" onclick="location.href='{{ result.8 }}';"></div> <div class="headerLink" onclick="location.href='{{ result.9 }}';"></div>
<div class="checkbox">{{ result.0 }}</div> <div class="checkbox">{{ result.0 }}</div>
<div class="info"> <div class="info">
{{ result.5 }} {{ result.5 }}
@ -185,6 +191,7 @@
{{ result.1 }} {{ result.1 }}
<div style="clear: both;"></div> <div style="clear: both;"></div>
</div> </div>
{% if '>-<' not in result.8 %}<div class="documentType">{{ result.8 }}</div>{% endif %}
<div class="tags">{{ result.6 }}</div> <div class="tags">{{ result.6 }}</div>
<div class="date">{{ result.2 }}</div> <div class="date">{{ result.2 }}</div>
<div style="clear: both;"></div> <div style="clear: both;"></div>

View File

@ -0,0 +1,46 @@
{% extends "admin/base_site.html" %}
{% load i18n l10n admin_urls static %}
{% load staticfiles %}
{% block extrahead %}
{{ block.super }}
{{ media }}
<script type="text/javascript" src="{% static 'admin/js/cancel.js' %}"></script>
{% endblock %}
{% block bodyclass %}{{ block.super }} app-{{ opts.app_label }} model-{{ opts.model_name }} delete-confirmation delete-selected-confirmation{% endblock %}
{% block breadcrumbs %}
<div class="breadcrumbs">
<a href="{% url 'admin:index' %}">{% trans 'Home' %}</a>
&rsaquo; <a href="{% url 'admin:app_list' app_label=opts.app_label %}">{{ opts.app_config.verbose_name }}</a>
&rsaquo; <a href="{% url opts|admin_urlname:'changelist' %}">{{ opts.verbose_name_plural|capfirst }}</a>
&rsaquo; {{title}}
</div>
{% endblock %}
{% block content %}
<p>Please select the document type.</p>
<form method="post">{% csrf_token %}
<div>
{% for obj in queryset %}
<input type="hidden" name="{{ action_checkbox_name }}" value="{{ obj.pk|unlocalize }}"/>
{% endfor %}
<p>
<select name="document_type_id">
{% for document_type in document_types %}
<option value="{{document_type.id}}">{{document_type.name}}</option>
{% endfor %}
</select>
</p>
<input type="hidden" name="action" value="set_document_type_on_selected"/>
<input type="hidden" name="post" value="yes"/>
<p>
<input type="submit" value="{% trans " Confirm" %}" />
<a href="#" class="button cancel-link">{% trans "Go back" %}</a>
</p>
</div>
</form>
{% endblock %}

0
src/documents/templatetags/hacks.py Normal file → Executable file
View File