Merge remote-tracking branch 'upstream/master'

This commit is contained in:
Jonas Winkler
2018-12-11 12:06:15 +01:00
46 changed files with 1171 additions and 695 deletions

View File

@@ -64,12 +64,12 @@ class FinancialYearFilter(admin.SimpleListFilter):
# To keep it simple we use the same string for both
# query parameter and the display.
return (query, query)
return query, query
else:
query = "{0}-{0}".format(date.year)
display = "{}".format(date.year)
return (query, display)
return query, display
def lookups(self, request, model_admin):
if not settings.FY_START or not settings.FY_END:
@@ -91,25 +91,24 @@ class FinancialYearFilter(admin.SimpleListFilter):
class RecentCorrespondentFilter(admin.RelatedFieldListFilter):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.title = "correspondent (recent)"
"""
If PAPERLESS_RECENT_CORRESPONDENT_YEARS is set, we limit the available
correspondents to documents sent our way over the past ``n`` years.
"""
def field_choices(self, field, request, model_admin):
years = settings.PAPERLESS_RECENT_CORRESPONDENT_YEARS
days = 365 * years
correspondents = Correspondent.objects.all()
lookups = []
if years and years > 0:
correspondents = Correspondent.objects.filter(
self.title = "Correspondent (Recent)"
days = 365 * years
correspondents = correspondents.filter(
documents__created__gte=datetime.now() - timedelta(days=days)
).distinct()
for c in correspondents:
lookups.append((c.id, c.name))
return lookups
return [(c.id, c.name) for c in correspondents]
class CommonAdmin(admin.ModelAdmin):
@@ -124,7 +123,9 @@ class CorrespondentAdmin(CommonAdmin):
"document_count",
"last_correspondence"
)
list_editable = ("automatic_classification")
list_editable = ("automatic_classification",)
readonly_fields = ("slug",)
def get_queryset(self, request):
qs = super(CorrespondentAdmin, self).get_queryset(request)
@@ -149,6 +150,11 @@ class TagAdmin(CommonAdmin):
list_filter = ("colour",)
list_editable = ("colour", "automatic_classification")
readonly_fields = ("slug",)
class Media:
js = ("js/colours.js",)
def get_queryset(self, request):
qs = super(TagAdmin, self).get_queryset(request)
qs = qs.annotate(document_count=models.Count("documents"))
@@ -164,6 +170,8 @@ class DocumentTypeAdmin(CommonAdmin):
list_display = ("name", "automatic_classification", "document_count")
list_editable = ("automatic_classification",)
readonly_fields = ("slug",)
def get_queryset(self, request):
qs = super(DocumentTypeAdmin, self).get_queryset(request)
qs = qs.annotate(document_count=models.Count("documents"))
@@ -182,14 +190,13 @@ class DocumentAdmin(CommonAdmin):
}
search_fields = ("correspondent__name", "title", "content", "tags__name")
readonly_fields = ("added",)
readonly_fields = ("added", "file_type", "storage_type",)
list_display = ("title", "created", "added", "thumbnail", "correspondent",
"tags_", "archive_serial_number", "document_type")
list_filter = (
"document_type",
"tags",
("correspondent", RecentCorrespondentFilter),
"correspondent",
FinancialYearFilter
)

View File

@@ -1,3 +1,4 @@
from django.db import transaction
import datetime
import hashlib
import logging
@@ -111,8 +112,11 @@ class Consumer:
if not self.try_consume_file(file):
self._ignore.append((file, mtime))
@transaction.atomic
def try_consume_file(self, file):
"Return True if file was consumed"
"""
Return True if file was consumed
"""
if not re.match(FileInfo.REGEXES["title"], file):
return False
@@ -145,7 +149,7 @@ class Consumer:
parsed_document = parser_class(doc)
try:
thumbnail = parsed_document.get_thumbnail()
thumbnail = parsed_document.get_optimised_thumbnail()
date = parsed_document.get_date()
document = self._store(
parsed_document.get_text(),

View File

@@ -1,4 +1,4 @@
from django_filters.rest_framework import CharFilter, FilterSet, BooleanFilter, ModelChoiceFilter
from django_filters.rest_framework import BooleanFilter, FilterSet
from .models import Correspondent, Document, Tag, DocumentType

View File

@@ -0,0 +1,52 @@
# Generated by Django 2.0.8 on 2018-10-07 14:20
from django.db import migrations, models
from django.utils.text import slugify
def re_slug_all_the_things(apps, schema_editor):
"""
Rewrite all slug values to make sure they're actually slugs before we brand
them as uneditable.
"""
Tag = apps.get_model("documents", "Tag")
Correspondent = apps.get_model("documents", "Correspondent")
for klass in (Tag, Correspondent):
for instance in klass.objects.all():
klass.objects.filter(
pk=instance.pk
).update(
slug=slugify(instance.slug)
)
class Migration(migrations.Migration):
dependencies = [
('documents', '0021_document_storage_type'),
]
operations = [
migrations.AlterModelOptions(
name='tag',
options={'ordering': ('name',)},
),
migrations.AlterField(
model_name='correspondent',
name='slug',
field=models.SlugField(blank=True, editable=False),
),
migrations.AlterField(
model_name='document',
name='file_type',
field=models.CharField(choices=[('pdf', 'PDF'), ('png', 'PNG'), ('jpg', 'JPG'), ('gif', 'GIF'), ('tiff', 'TIFF'), ('txt', 'TXT'), ('csv', 'CSV'), ('md', 'MD')], editable=False, max_length=4),
),
migrations.AlterField(
model_name='tag',
name='slug',
field=models.SlugField(blank=True, editable=False),
),
migrations.RunPython(re_slug_all_the_things, migrations.RunPython.noop)
]

View File

@@ -6,7 +6,7 @@ from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('documents', '0021_document_storage_type'),
('documents', '0022_auto_20181007_1420'),
]
operations = [

View File

@@ -7,7 +7,7 @@ import django.db.models.deletion
class Migration(migrations.Migration):
dependencies = [
('documents', '0022_workflow_improvements'),
('documents', '1001_workflow_improvements'),
]
operations = [

View File

@@ -18,7 +18,7 @@ def reverse_automatic_classification(apps, schema_editor):
class Migration(migrations.Migration):
dependencies = [
('documents', '0023_auto_20180823_1155'),
('documents', '1002_auto_20180823_1155'),
]
operations = [

View File

@@ -0,0 +1,36 @@
# Generated by Django 2.0.8 on 2018-10-07 14:20
from django.db import migrations, models
from django.utils.text import slugify
def re_slug_all_the_things(apps, schema_editor):
"""
Rewrite all slug values to make sure they're actually slugs before we brand
them as uneditable.
"""
DocumentType = apps.get_model("documents", "DocumentType")
for instance in DocumentType.objects.all():
DocumentType.objects.filter(
pk=instance.pk
).update(
slug=slugify(instance.slug)
)
class Migration(migrations.Migration):
dependencies = [
('documents', '1003_auto_20180904_1425'),
]
operations = [
migrations.AlterField(
model_name='documenttype',
name='slug',
field=models.SlugField(blank=True, editable=False),
),
migrations.RunPython(re_slug_all_the_things, migrations.RunPython.noop)
]

View File

@@ -11,6 +11,7 @@ from django.conf import settings
from django.db import models
from django.template.defaultfilters import slugify
from django.utils import timezone
from django.utils.text import slugify
from fuzzywuzzy import fuzz
from .managers import LogManager
@@ -24,7 +25,7 @@ except ImportError:
class MatchingModel(models.Model):
name = models.CharField(max_length=128, unique=True)
slug = models.SlugField(blank=True)
slug = models.SlugField(blank=True, editable=False)
automatic_classification = models.BooleanField(default=False, help_text='Automatically assign to newly added documents based on current usage in your document collection.')
@@ -37,8 +38,7 @@ class MatchingModel(models.Model):
def save(self, *args, **kwargs):
if not self.slug:
self.slug = slugify(self.name)
self.slug = slugify(self.name)
models.Model.save(self, *args, **kwargs)
@@ -369,7 +369,7 @@ class FileInfo:
r = []
for t in tags.split(","):
r.append(Tag.objects.get_or_create(
slug=t.lower(),
slug=slugify(t),
defaults={"name": t}
)[0])
return tuple(r)

View File

@@ -1,23 +1,31 @@
import logging
import shutil
import tempfile
import os
import re
import shutil
import subprocess
import tempfile
import dateparser
from django.conf import settings
from django.utils import timezone
# This regular expression will try to find dates in the document at
# hand and will match the following formats:
# - XX.YY.ZZZZ with XX + YY being 1 or 2 and ZZZZ being 2 or 4 digits
# - XX/YY/ZZZZ with XX + YY being 1 or 2 and ZZZZ being 2 or 4 digits
# - XX-YY-ZZZZ with XX + YY being 1 or 2 and ZZZZ being 2 or 4 digits
# - ZZZZ.XX.YY with XX + YY being 1 or 2 and ZZZZ being 2 or 4 digits
# - ZZZZ/XX/YY with XX + YY being 1 or 2 and ZZZZ being 2 or 4 digits
# - ZZZZ-XX-YY with XX + YY being 1 or 2 and ZZZZ being 2 or 4 digits
# - XX. MONTH ZZZZ with XX being 1 or 2 and ZZZZ being 2 or 4 digits
# - MONTH ZZZZ, with ZZZZ being 4 digits
# - MONTH XX, ZZZZ with XX being 1 or 2 and ZZZZ being 4 digits
DATE_REGEX = re.compile(
r'\b([0-9]{1,2})[\.\/-]([0-9]{1,2})[\.\/-]([0-9]{4}|[0-9]{2})\b|' +
r'\b([0-9]{1,2}[\. ]+[^ ]{3,9} ([0-9]{4}|[0-9]{2}))\b|' +
r'\b([^\W\d_]{3,9} [0-9]{1,2}, ([0-9]{4}))\b|' +
r'\b([^\W\d_]{3,9} [0-9]{4})\b'
r'(\b|(?!=([_-])))([0-9]{1,2})[\.\/-]([0-9]{1,2})[\.\/-]([0-9]{4}|[0-9]{2})(\b|(?=([_-])))|' + # NOQA: E501
r'(\b|(?!=([_-])))([0-9]{4}|[0-9]{2})[\.\/-]([0-9]{1,2})[\.\/-]([0-9]{1,2})(\b|(?=([_-])))|' + # NOQA: E501
r'(\b|(?!=([_-])))([0-9]{1,2}[\. ]+[^ ]{3,9} ([0-9]{4}|[0-9]{2}))(\b|(?=([_-])))|' + # NOQA: E501
r'(\b|(?!=([_-])))([^\W\d_]{3,9} [0-9]{1,2}, ([0-9]{4}))(\b|(?=([_-])))|' +
r'(\b|(?!=([_-])))([^\W\d_]{3,9} [0-9]{4})(\b|(?=([_-])))'
)
@@ -32,6 +40,9 @@ class DocumentParser:
"""
SCRATCH = settings.SCRATCH_DIR
DATE_ORDER = settings.DATE_ORDER
FILENAME_DATE_ORDER = settings.FILENAME_DATE_ORDER
OPTIPNG = settings.OPTIPNG_BINARY
def __init__(self, path):
self.document_path = path
@@ -45,6 +56,19 @@ class DocumentParser:
"""
raise NotImplementedError()
def optimise_thumbnail(self, in_path):
out_path = os.path.join(self.tempdir, "optipng.png")
args = (self.OPTIPNG, "-o5", in_path, "-out", out_path)
if not subprocess.Popen(args).wait() == 0:
raise ParseError("Optipng failed at {}".format(args))
return out_path
def get_optimised_thumbnail(self):
return self.optimise_thumbnail(self.get_thumbnail())
def get_text(self):
"""
Returns the text from the document and only the text.
@@ -55,7 +79,82 @@ class DocumentParser:
"""
Returns the date of the document.
"""
raise NotImplementedError()
def __parser(ds, date_order):
"""
Call dateparser.parse with a particular date ordering
"""
return dateparser.parse(
ds,
settings={
"DATE_ORDER": date_order,
"PREFER_DAY_OF_MONTH": "first",
"RETURN_AS_TIMEZONE_AWARE":
True
}
)
date = None
date_string = None
next_year = timezone.now().year + 5 # Arbitrary 5 year future limit
title = os.path.basename(self.document_path)
# if filename date parsing is enabled, search there first:
if self.FILENAME_DATE_ORDER:
self.log("info", "Checking document title for date")
for m in re.finditer(DATE_REGEX, title):
date_string = m.group(0)
try:
date = __parser(date_string, self.FILENAME_DATE_ORDER)
except TypeError:
# Skip all matches that do not parse to a proper date
continue
if date is not None and next_year > date.year > 1900:
self.log(
"info",
"Detected document date {} based on string {} "
"from document title"
"".format(date.isoformat(), date_string)
)
return date
try:
# getting text after checking filename will save time if only
# looking at the filename instead of the whole text
text = self.get_text()
except ParseError:
return None
# Iterate through all regex matches in text and try to parse the date
for m in re.finditer(DATE_REGEX, text):
date_string = m.group(0)
try:
date = __parser(date_string, self.DATE_ORDER)
except TypeError:
# Skip all matches that do not parse to a proper date
continue
if date is not None and next_year > date.year > 1900:
break
else:
date = None
if date is not None:
self.log(
"info",
"Detected document date {} based on string {}".format(
date.isoformat(),
date_string
)
)
else:
self.log("info", "Unable to detect date for document")
return date
def log(self, level, message):
getattr(self.logger, level)(message, extra={

View File

@@ -0,0 +1,66 @@
// The following jQuery snippet will add a small square next to the selection
// drop-down on the `Add tag` page that will update to show the selected tag
// color as the drop-down value is changed.
django.jQuery(document).ready(function(){
if (django.jQuery("#id_colour").length) {
let colour;
let colour_num;
colour_num = django.jQuery("#id_colour").val() - 1;
colour = django.jQuery('#id_colour')[0][colour_num].text;
django.jQuery('#id_colour').after('<div class="colour_square"></div>');
django.jQuery('.colour_square').css({
'float': 'left',
'width': '20px',
'height': '20px',
'margin': '5px',
'border': '1px solid rgba(0, 0, 0, .2)',
'background': colour
});
django.jQuery('#id_colour').change(function () {
colour_num = django.jQuery("#id_colour").val() - 1;
colour = django.jQuery('#id_colour')[0][colour_num].text;
django.jQuery('.colour_square').css({'background': colour});
});
} else if (django.jQuery("select[id*='colour']").length) {
django.jQuery('select[id*="-colour"]').each(function (index, element) {
let id;
let loop_colour_num;
let loop_colour;
id = "colour_square_" + index;
django.jQuery(element).after('<div class="colour_square" id="' + id + '"></div>');
loop_colour_num = django.jQuery(element).val() - 1;
loop_colour = django.jQuery(element)[0][loop_colour_num].text;
django.jQuery("<style type='text/css'>\
.colour_square{ \
float: left; \
width: 20px; \
height: 20px; \
margin: 5px; \
border: 1px solid rgba(0,0,0,.2); \
} </style>").appendTo("head");
django.jQuery('#' + id).css({'background': loop_colour});
console.log(id, loop_colour_num, loop_colour);
django.jQuery(element).change(function () {
loop_colour_num = django.jQuery(element).val() - 1;
loop_colour = django.jQuery(element)[0][loop_colour_num].text;
django.jQuery('#' + id).css({'background': loop_colour});
console.log('#' + id, loop_colour)
});
})
}
});