mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-07-28 18:24:38 -05:00
Merge branch 'master' of github.com:danielquinn/paperless into ENH_filename_date_parsing
This commit is contained in:
146
src/documents/actions.py
Normal file
146
src/documents/actions.py
Normal file
@@ -0,0 +1,146 @@
|
||||
from django.contrib import messages
|
||||
from django.contrib.admin import helpers
|
||||
from django.contrib.admin.utils import model_ngettext
|
||||
from django.core.exceptions import PermissionDenied
|
||||
from django.template.response import TemplateResponse
|
||||
|
||||
from documents.models import Correspondent, Tag
|
||||
|
||||
|
||||
def select_action(
|
||||
modeladmin, request, queryset, title, action, modelclass,
|
||||
success_message="", document_action=None, queryset_action=None):
|
||||
|
||||
opts = modeladmin.model._meta
|
||||
app_label = opts.app_label
|
||||
|
||||
if not modeladmin.has_change_permission(request):
|
||||
raise PermissionDenied
|
||||
|
||||
if request.POST.get('post'):
|
||||
n = queryset.count()
|
||||
selected_object = modelclass.objects.get(id=request.POST.get('obj_id'))
|
||||
if n:
|
||||
for document in queryset:
|
||||
if document_action:
|
||||
document_action(document, selected_object)
|
||||
document_display = str(document)
|
||||
modeladmin.log_change(request, document, document_display)
|
||||
if queryset_action:
|
||||
queryset_action(queryset, selected_object)
|
||||
|
||||
modeladmin.message_user(request, success_message % {
|
||||
"selected_object": selected_object.name,
|
||||
"count": n,
|
||||
"items": model_ngettext(modeladmin.opts, n)
|
||||
}, messages.SUCCESS)
|
||||
|
||||
# Return None to display the change list page again.
|
||||
return None
|
||||
|
||||
context = dict(
|
||||
modeladmin.admin_site.each_context(request),
|
||||
title=title,
|
||||
queryset=queryset,
|
||||
opts=opts,
|
||||
action_checkbox_name=helpers.ACTION_CHECKBOX_NAME,
|
||||
media=modeladmin.media,
|
||||
action=action,
|
||||
objects=modelclass.objects.all(),
|
||||
itemname=model_ngettext(modelclass, 1)
|
||||
)
|
||||
|
||||
request.current_app = modeladmin.admin_site.name
|
||||
|
||||
return TemplateResponse(
|
||||
request,
|
||||
"admin/{}/{}/select_object.html".format(app_label, opts.model_name),
|
||||
context
|
||||
)
|
||||
|
||||
|
||||
def simple_action(
|
||||
modeladmin, request, queryset, success_message="",
|
||||
document_action=None, queryset_action=None):
|
||||
|
||||
if not modeladmin.has_change_permission(request):
|
||||
raise PermissionDenied
|
||||
|
||||
n = queryset.count()
|
||||
if n:
|
||||
for document in queryset:
|
||||
if document_action:
|
||||
document_action(document)
|
||||
document_display = str(document)
|
||||
modeladmin.log_change(request, document, document_display)
|
||||
if queryset_action:
|
||||
queryset_action(queryset)
|
||||
modeladmin.message_user(request, success_message % {
|
||||
"count": n, "items": model_ngettext(modeladmin.opts, n)
|
||||
}, messages.SUCCESS)
|
||||
|
||||
# Return None to display the change list page again.
|
||||
return None
|
||||
|
||||
|
||||
def add_tag_to_selected(modeladmin, request, queryset):
|
||||
return select_action(
|
||||
modeladmin=modeladmin,
|
||||
request=request,
|
||||
queryset=queryset,
|
||||
title="Add tag to multiple documents",
|
||||
action="add_tag_to_selected",
|
||||
modelclass=Tag,
|
||||
success_message="Successfully added tag %(selected_object)s to "
|
||||
"%(count)d %(items)s.",
|
||||
document_action=lambda doc, tag: doc.tags.add(tag)
|
||||
)
|
||||
|
||||
|
||||
def remove_tag_from_selected(modeladmin, request, queryset):
|
||||
return select_action(
|
||||
modeladmin=modeladmin,
|
||||
request=request,
|
||||
queryset=queryset,
|
||||
title="Remove tag from multiple documents",
|
||||
action="remove_tag_from_selected",
|
||||
modelclass=Tag,
|
||||
success_message="Successfully removed tag %(selected_object)s from "
|
||||
"%(count)d %(items)s.",
|
||||
document_action=lambda doc, tag: doc.tags.remove(tag)
|
||||
)
|
||||
|
||||
|
||||
def set_correspondent_on_selected(modeladmin, request, queryset):
|
||||
|
||||
return select_action(
|
||||
modeladmin=modeladmin,
|
||||
request=request,
|
||||
queryset=queryset,
|
||||
title="Set correspondent on multiple documents",
|
||||
action="set_correspondent_on_selected",
|
||||
modelclass=Correspondent,
|
||||
success_message="Successfully set correspondent %(selected_object)s "
|
||||
"on %(count)d %(items)s.",
|
||||
queryset_action=lambda qs, corr: qs.update(correspondent=corr)
|
||||
)
|
||||
|
||||
|
||||
def remove_correspondent_from_selected(modeladmin, request, queryset):
|
||||
return simple_action(
|
||||
modeladmin=modeladmin,
|
||||
request=request,
|
||||
queryset=queryset,
|
||||
success_message="Successfully removed correspondent from %(count)d "
|
||||
"%(items)s.",
|
||||
queryset_action=lambda qs: qs.update(correspondent=None)
|
||||
)
|
||||
|
||||
|
||||
add_tag_to_selected.short_description = "Add tag to selected documents"
|
||||
remove_tag_from_selected.short_description = \
|
||||
"Remove tag from selected documents"
|
||||
set_correspondent_on_selected.short_description = \
|
||||
"Set correspondent on selected documents"
|
||||
remove_correspondent_from_selected.short_description = \
|
||||
"Remove correspondent from selected documents"
|
@@ -1,42 +1,25 @@
|
||||
from datetime import datetime
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
from django.conf import settings
|
||||
from django.contrib import admin
|
||||
from django.contrib.auth.models import User, Group
|
||||
try:
|
||||
from django.core.urlresolvers import reverse
|
||||
except ImportError:
|
||||
from django.urls import reverse
|
||||
from django.contrib import admin, messages
|
||||
from django.contrib.admin.templatetags.admin_urls import add_preserved_filters
|
||||
from django.contrib.auth.models import Group, User
|
||||
from django.db import models
|
||||
from django.http import HttpResponseRedirect
|
||||
from django.templatetags.static import static
|
||||
from django.utils.safestring import mark_safe
|
||||
from django.urls import reverse
|
||||
from django.utils.html import format_html, format_html_join
|
||||
from django.utils.http import urlquote
|
||||
from django.utils.safestring import mark_safe
|
||||
|
||||
from .models import Correspondent, Tag, Document, Log
|
||||
from documents.actions import (
|
||||
add_tag_to_selected,
|
||||
remove_correspondent_from_selected,
|
||||
remove_tag_from_selected,
|
||||
set_correspondent_on_selected
|
||||
)
|
||||
|
||||
|
||||
class MonthListFilter(admin.SimpleListFilter):
|
||||
|
||||
title = "Month"
|
||||
|
||||
# Parameter for the filter that will be used in the URL query.
|
||||
parameter_name = "month"
|
||||
|
||||
def lookups(self, request, model_admin):
|
||||
r = []
|
||||
for document in Document.objects.all():
|
||||
r.append((
|
||||
document.created.strftime("%Y-%m"),
|
||||
document.created.strftime("%B %Y")
|
||||
))
|
||||
return sorted(set(r), key=lambda x: x[0], reverse=True)
|
||||
|
||||
def queryset(self, request, queryset):
|
||||
|
||||
if not self.value():
|
||||
return None
|
||||
|
||||
year, month = self.value().split("-")
|
||||
return queryset.filter(created__year=year, created__month=month)
|
||||
from .models import Correspondent, Document, Log, Tag
|
||||
|
||||
|
||||
class FinancialYearFilter(admin.SimpleListFilter):
|
||||
@@ -78,12 +61,12 @@ class FinancialYearFilter(admin.SimpleListFilter):
|
||||
|
||||
# To keep it simple we use the same string for both
|
||||
# query parameter and the display.
|
||||
return (query, query)
|
||||
return query, query
|
||||
|
||||
else:
|
||||
query = "{0}-{0}".format(date.year)
|
||||
display = "{}".format(date.year)
|
||||
return (query, display)
|
||||
return query, display
|
||||
|
||||
def lookups(self, request, model_admin):
|
||||
if not settings.FY_START or not settings.FY_END:
|
||||
@@ -104,29 +87,79 @@ class FinancialYearFilter(admin.SimpleListFilter):
|
||||
created__lte=self._fy_end(end))
|
||||
|
||||
|
||||
class RecentCorrespondentFilter(admin.RelatedFieldListFilter):
|
||||
"""
|
||||
If PAPERLESS_RECENT_CORRESPONDENT_YEARS is set, we limit the available
|
||||
correspondents to documents sent our way over the past ``n`` years.
|
||||
"""
|
||||
|
||||
def field_choices(self, field, request, model_admin):
|
||||
|
||||
years = settings.PAPERLESS_RECENT_CORRESPONDENT_YEARS
|
||||
correspondents = Correspondent.objects.all()
|
||||
|
||||
if years and years > 0:
|
||||
self.title = "Correspondent (Recent)"
|
||||
days = 365 * years
|
||||
correspondents = correspondents.filter(
|
||||
documents__created__gte=datetime.now() - timedelta(days=days)
|
||||
).distinct()
|
||||
|
||||
return [(c.id, c.name) for c in correspondents]
|
||||
|
||||
|
||||
class CommonAdmin(admin.ModelAdmin):
|
||||
list_per_page = settings.PAPERLESS_LIST_PER_PAGE
|
||||
|
||||
|
||||
class CorrespondentAdmin(CommonAdmin):
|
||||
|
||||
list_display = ("name", "match", "matching_algorithm", "document_count")
|
||||
list_display = (
|
||||
"name",
|
||||
"match",
|
||||
"matching_algorithm",
|
||||
"document_count",
|
||||
"last_correspondence"
|
||||
)
|
||||
list_filter = ("matching_algorithm",)
|
||||
list_editable = ("match", "matching_algorithm")
|
||||
|
||||
readonly_fields = ("slug",)
|
||||
|
||||
def get_queryset(self, request):
|
||||
qs = super(CorrespondentAdmin, self).get_queryset(request)
|
||||
qs = qs.annotate(
|
||||
document_count=models.Count("documents"),
|
||||
last_correspondence=models.Max("documents__created")
|
||||
)
|
||||
return qs
|
||||
|
||||
def document_count(self, obj):
|
||||
return obj.documents.count()
|
||||
return obj.document_count
|
||||
document_count.admin_order_field = "document_count"
|
||||
|
||||
def last_correspondence(self, obj):
|
||||
return obj.last_correspondence
|
||||
last_correspondence.admin_order_field = "last_correspondence"
|
||||
|
||||
|
||||
class TagAdmin(CommonAdmin):
|
||||
|
||||
list_display = ("name", "colour", "match", "matching_algorithm",
|
||||
"document_count")
|
||||
list_display = (
|
||||
"name", "colour", "match", "matching_algorithm", "document_count")
|
||||
list_filter = ("colour", "matching_algorithm")
|
||||
list_editable = ("colour", "match", "matching_algorithm")
|
||||
|
||||
readonly_fields = ("slug",)
|
||||
|
||||
def get_queryset(self, request):
|
||||
qs = super(TagAdmin, self).get_queryset(request)
|
||||
qs = qs.annotate(document_count=models.Count("documents"))
|
||||
return qs
|
||||
|
||||
def document_count(self, obj):
|
||||
return obj.documents.count()
|
||||
return obj.document_count
|
||||
document_count.admin_order_field = "document_count"
|
||||
|
||||
|
||||
class DocumentAdmin(CommonAdmin):
|
||||
@@ -137,15 +170,32 @@ class DocumentAdmin(CommonAdmin):
|
||||
}
|
||||
|
||||
search_fields = ("correspondent__name", "title", "content", "tags__name")
|
||||
readonly_fields = ("added",)
|
||||
readonly_fields = ("added", "file_type", "storage_type",)
|
||||
list_display = ("title", "created", "added", "thumbnail", "correspondent",
|
||||
"tags_")
|
||||
list_filter = ("tags", "correspondent", FinancialYearFilter,
|
||||
MonthListFilter)
|
||||
list_filter = (
|
||||
"tags",
|
||||
("correspondent", RecentCorrespondentFilter),
|
||||
FinancialYearFilter
|
||||
)
|
||||
|
||||
filter_horizontal = ("tags",)
|
||||
|
||||
ordering = ["-created", "correspondent"]
|
||||
|
||||
actions = [
|
||||
add_tag_to_selected,
|
||||
remove_tag_from_selected,
|
||||
set_correspondent_on_selected,
|
||||
remove_correspondent_from_selected
|
||||
]
|
||||
|
||||
date_hierarchy = "created"
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
self.document_queue = []
|
||||
|
||||
def has_add_permission(self, request):
|
||||
return False
|
||||
|
||||
@@ -153,6 +203,79 @@ class DocumentAdmin(CommonAdmin):
|
||||
return obj.created.date().strftime("%Y-%m-%d")
|
||||
created_.short_description = "Created"
|
||||
|
||||
def changelist_view(self, request, extra_context=None):
|
||||
|
||||
response = super().changelist_view(
|
||||
request,
|
||||
extra_context=extra_context
|
||||
)
|
||||
|
||||
if request.method == "GET":
|
||||
cl = self.get_changelist_instance(request)
|
||||
self.document_queue = [doc.id for doc in cl.queryset]
|
||||
|
||||
return response
|
||||
|
||||
def change_view(self, request, object_id=None, form_url='',
|
||||
extra_context=None):
|
||||
|
||||
extra_context = extra_context or {}
|
||||
|
||||
if self.document_queue and object_id:
|
||||
if int(object_id) in self.document_queue:
|
||||
# There is a queue of documents
|
||||
current_index = self.document_queue.index(int(object_id))
|
||||
if current_index < len(self.document_queue) - 1:
|
||||
# ... and there are still documents in the queue
|
||||
extra_context["next_object"] = self.document_queue[
|
||||
current_index + 1
|
||||
]
|
||||
|
||||
return super(DocumentAdmin, self).change_view(
|
||||
request,
|
||||
object_id,
|
||||
form_url,
|
||||
extra_context=extra_context,
|
||||
)
|
||||
|
||||
def response_change(self, request, obj):
|
||||
|
||||
# This is mostly copied from ModelAdmin.response_change()
|
||||
opts = self.model._meta
|
||||
preserved_filters = self.get_preserved_filters(request)
|
||||
|
||||
msg_dict = {
|
||||
"name": opts.verbose_name,
|
||||
"obj": format_html(
|
||||
'<a href="{}">{}</a>',
|
||||
urlquote(request.path),
|
||||
obj
|
||||
),
|
||||
}
|
||||
if "_saveandeditnext" in request.POST:
|
||||
msg = format_html(
|
||||
'The {name} "{obj}" was changed successfully. '
|
||||
'Editing next object.',
|
||||
**msg_dict
|
||||
)
|
||||
self.message_user(request, msg, messages.SUCCESS)
|
||||
redirect_url = reverse(
|
||||
"admin:{}_{}_change".format(opts.app_label, opts.model_name),
|
||||
args=(request.POST["_next_object"],),
|
||||
current_app=self.admin_site.name
|
||||
)
|
||||
redirect_url = add_preserved_filters(
|
||||
{
|
||||
"preserved_filters": preserved_filters,
|
||||
"opts": opts
|
||||
},
|
||||
redirect_url
|
||||
)
|
||||
return HttpResponseRedirect(redirect_url)
|
||||
|
||||
return super().response_change(request, obj)
|
||||
|
||||
@mark_safe
|
||||
def thumbnail(self, obj):
|
||||
return self._html_tag(
|
||||
"a",
|
||||
@@ -165,8 +288,8 @@ class DocumentAdmin(CommonAdmin):
|
||||
),
|
||||
href=obj.download_url
|
||||
)
|
||||
thumbnail.allow_tags = True
|
||||
|
||||
@mark_safe
|
||||
def tags_(self, obj):
|
||||
r = ""
|
||||
for tag in obj.tags.all():
|
||||
@@ -183,10 +306,11 @@ class DocumentAdmin(CommonAdmin):
|
||||
)
|
||||
}
|
||||
)
|
||||
return mark_safe(r)
|
||||
tags_.allow_tags = True
|
||||
return r
|
||||
|
||||
@mark_safe
|
||||
def document(self, obj):
|
||||
# TODO: is this method even used anymore?
|
||||
return self._html_tag(
|
||||
"a",
|
||||
self._html_tag(
|
||||
@@ -199,7 +323,6 @@ class DocumentAdmin(CommonAdmin):
|
||||
),
|
||||
href=obj.download_url
|
||||
)
|
||||
document.allow_tags = True
|
||||
|
||||
@staticmethod
|
||||
def _html_tag(kind, inside=None, **kwargs):
|
||||
|
@@ -1,3 +1,4 @@
|
||||
from django.db import transaction
|
||||
import datetime
|
||||
import hashlib
|
||||
import logging
|
||||
@@ -111,8 +112,11 @@ class Consumer:
|
||||
if not self.try_consume_file(file):
|
||||
self._ignore.append((file, mtime))
|
||||
|
||||
@transaction.atomic
|
||||
def try_consume_file(self, file):
|
||||
"Return True if file was consumed"
|
||||
"""
|
||||
Return True if file was consumed
|
||||
"""
|
||||
|
||||
if not re.match(FileInfo.REGEXES["title"], file):
|
||||
return False
|
||||
@@ -145,7 +149,7 @@ class Consumer:
|
||||
parsed_document = parser_class(doc)
|
||||
|
||||
try:
|
||||
thumbnail = parsed_document.get_thumbnail()
|
||||
thumbnail = parsed_document.get_optimised_thumbnail()
|
||||
date = parsed_document.get_date()
|
||||
document = self._store(
|
||||
parsed_document.get_text(),
|
||||
|
@@ -1,8 +1,14 @@
|
||||
from django_filters.rest_framework import CharFilter, FilterSet, BooleanFilter
|
||||
from django_filters.rest_framework import BooleanFilter, FilterSet
|
||||
|
||||
from .models import Correspondent, Document, Tag
|
||||
|
||||
|
||||
CHAR_KWARGS = (
|
||||
"startswith", "endswith", "contains",
|
||||
"istartswith", "iendswith", "icontains"
|
||||
)
|
||||
|
||||
|
||||
class CorrespondentFilterSet(FilterSet):
|
||||
|
||||
class Meta:
|
||||
@@ -31,34 +37,24 @@ class TagFilterSet(FilterSet):
|
||||
|
||||
class DocumentFilterSet(FilterSet):
|
||||
|
||||
CHAR_KWARGS = {
|
||||
"lookup_expr": (
|
||||
"startswith",
|
||||
"endswith",
|
||||
"contains",
|
||||
"istartswith",
|
||||
"iendswith",
|
||||
"icontains"
|
||||
)
|
||||
}
|
||||
|
||||
correspondent__name = CharFilter(
|
||||
field_name="correspondent__name", **CHAR_KWARGS)
|
||||
correspondent__slug = CharFilter(
|
||||
field_name="correspondent__slug", **CHAR_KWARGS)
|
||||
tags__name = CharFilter(
|
||||
field_name="tags__name", **CHAR_KWARGS)
|
||||
tags__slug = CharFilter(
|
||||
field_name="tags__slug", **CHAR_KWARGS)
|
||||
tags__empty = BooleanFilter(
|
||||
field_name="tags", lookup_expr="isnull", distinct=True)
|
||||
tags_empty = BooleanFilter(
|
||||
label="Is tagged",
|
||||
field_name="tags",
|
||||
lookup_expr="isnull",
|
||||
exclude=True
|
||||
)
|
||||
|
||||
class Meta:
|
||||
model = Document
|
||||
fields = {
|
||||
"title": [
|
||||
"startswith", "endswith", "contains",
|
||||
"istartswith", "iendswith", "icontains"
|
||||
],
|
||||
"content": ["contains", "icontains"],
|
||||
|
||||
"title": CHAR_KWARGS,
|
||||
"content": ("contains", "icontains"),
|
||||
|
||||
"correspondent__name": CHAR_KWARGS,
|
||||
"correspondent__slug": CHAR_KWARGS,
|
||||
|
||||
"tags__name": CHAR_KWARGS,
|
||||
"tags__slug": CHAR_KWARGS,
|
||||
|
||||
}
|
||||
|
@@ -55,7 +55,12 @@ class Command(Renderable, BaseCommand):
|
||||
documents = Document.objects.all()
|
||||
document_map = {d.pk: d for d in documents}
|
||||
manifest = json.loads(serializers.serialize("json", documents))
|
||||
for document_dict in manifest:
|
||||
|
||||
for index, document_dict in enumerate(manifest):
|
||||
|
||||
# Force output to unencrypted as that will be the current state.
|
||||
# The importer will make the decision to encrypt or not.
|
||||
manifest[index]["fields"]["storage_type"] = Document.STORAGE_TYPE_UNENCRYPTED # NOQA: E501
|
||||
|
||||
document = document_map[document_dict["pk"]]
|
||||
|
||||
|
@@ -94,7 +94,7 @@ class Command(Renderable, BaseCommand):
|
||||
document_path = os.path.join(self.source, doc_file)
|
||||
thumbnail_path = os.path.join(self.source, thumb_file)
|
||||
|
||||
if document.storage_type == Document.STORAGE_TYPE_GPG:
|
||||
if settings.PASSPHRASE:
|
||||
|
||||
with open(document_path, "rb") as unencrypted:
|
||||
with open(document.source_path, "wb") as encrypted:
|
||||
@@ -112,3 +112,15 @@ class Command(Renderable, BaseCommand):
|
||||
|
||||
shutil.copy(document_path, document.source_path)
|
||||
shutil.copy(thumbnail_path, document.thumbnail_path)
|
||||
|
||||
# Reset the storage type to whatever we've used while importing
|
||||
|
||||
storage_type = Document.STORAGE_TYPE_UNENCRYPTED
|
||||
if settings.PASSPHRASE:
|
||||
storage_type = Document.STORAGE_TYPE_GPG
|
||||
|
||||
Document.objects.filter(
|
||||
pk__in=[r["pk"] for r in self.manifest]
|
||||
).update(
|
||||
storage_type=storage_type
|
||||
)
|
||||
|
@@ -158,9 +158,4 @@ class Migration(migrations.Migration):
|
||||
name='modified',
|
||||
field=models.DateTimeField(auto_now=True, db_index=True),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='document',
|
||||
name='checksum',
|
||||
field=models.CharField(editable=False, help_text='The checksum of the original document (before it was encrypted). We use this to prevent duplicate document imports.', max_length=32, unique=True),
|
||||
),
|
||||
]
|
||||
|
@@ -12,6 +12,11 @@ class Migration(migrations.Migration):
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AlterField(
|
||||
model_name='document',
|
||||
name='checksum',
|
||||
field=models.CharField(editable=False, help_text='The checksum of the original document (before it was encrypted). We use this to prevent duplicate document imports.', max_length=32, unique=True),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='correspondent',
|
||||
name='is_insensitive',
|
||||
|
52
src/documents/migrations/0022_auto_20181007_1420.py
Normal file
52
src/documents/migrations/0022_auto_20181007_1420.py
Normal file
@@ -0,0 +1,52 @@
|
||||
# Generated by Django 2.0.8 on 2018-10-07 14:20
|
||||
|
||||
from django.db import migrations, models
|
||||
from django.utils.text import slugify
|
||||
|
||||
|
||||
def re_slug_all_the_things(apps, schema_editor):
|
||||
"""
|
||||
Rewrite all slug values to make sure they're actually slugs before we brand
|
||||
them as uneditable.
|
||||
"""
|
||||
|
||||
Tag = apps.get_model("documents", "Tag")
|
||||
Correspondent = apps.get_model("documents", "Tag")
|
||||
|
||||
for klass in (Tag, Correspondent):
|
||||
for instance in klass.objects.all():
|
||||
klass.objects.filter(
|
||||
pk=instance.pk
|
||||
).update(
|
||||
slug=slugify(instance.slug)
|
||||
)
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('documents', '0021_document_storage_type'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AlterModelOptions(
|
||||
name='tag',
|
||||
options={'ordering': ('name',)},
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='correspondent',
|
||||
name='slug',
|
||||
field=models.SlugField(blank=True, editable=False),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='document',
|
||||
name='file_type',
|
||||
field=models.CharField(choices=[('pdf', 'PDF'), ('png', 'PNG'), ('jpg', 'JPG'), ('gif', 'GIF'), ('tiff', 'TIFF'), ('txt', 'TXT'), ('csv', 'CSV'), ('md', 'MD')], editable=False, max_length=4),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='tag',
|
||||
name='slug',
|
||||
field=models.SlugField(blank=True, editable=False),
|
||||
),
|
||||
migrations.RunPython(re_slug_all_the_things, migrations.RunPython.noop)
|
||||
]
|
@@ -11,6 +11,7 @@ from django.conf import settings
|
||||
from django.db import models
|
||||
from django.template.defaultfilters import slugify
|
||||
from django.utils import timezone
|
||||
from django.utils.text import slugify
|
||||
from fuzzywuzzy import fuzz
|
||||
|
||||
from .managers import LogManager
|
||||
@@ -37,7 +38,7 @@ class MatchingModel(models.Model):
|
||||
)
|
||||
|
||||
name = models.CharField(max_length=128, unique=True)
|
||||
slug = models.SlugField(blank=True)
|
||||
slug = models.SlugField(blank=True, editable=False)
|
||||
|
||||
match = models.CharField(max_length=256, blank=True)
|
||||
matching_algorithm = models.PositiveIntegerField(
|
||||
@@ -147,9 +148,7 @@ class MatchingModel(models.Model):
|
||||
def save(self, *args, **kwargs):
|
||||
|
||||
self.match = self.match.lower()
|
||||
|
||||
if not self.slug:
|
||||
self.slug = slugify(self.name)
|
||||
self.slug = slugify(self.name)
|
||||
|
||||
models.Model.save(self, *args, **kwargs)
|
||||
|
||||
@@ -452,7 +451,7 @@ class FileInfo:
|
||||
r = []
|
||||
for t in tags.split(","):
|
||||
r.append(Tag.objects.get_or_create(
|
||||
slug=t.lower(),
|
||||
slug=slugify(t),
|
||||
defaults={"name": t}
|
||||
)[0])
|
||||
return tuple(r)
|
||||
|
@@ -1,9 +1,13 @@
|
||||
import logging
|
||||
import shutil
|
||||
import tempfile
|
||||
import os
|
||||
import re
|
||||
import shutil
|
||||
import subprocess
|
||||
import tempfile
|
||||
|
||||
import dateparser
|
||||
from django.conf import settings
|
||||
from django.utils import timezone
|
||||
|
||||
# This regular expression will try to find dates in the document at
|
||||
# hand and will match the following formats:
|
||||
@@ -36,6 +40,9 @@ class DocumentParser:
|
||||
"""
|
||||
|
||||
SCRATCH = settings.SCRATCH_DIR
|
||||
DATE_ORDER = settings.DATE_ORDER
|
||||
FILENAME_DATE_ORDER = settings.FILENAME_DATE_ORDER
|
||||
OPTIPNG = settings.OPTIPNG_BINARY
|
||||
|
||||
def __init__(self, path):
|
||||
self.document_path = path
|
||||
@@ -49,6 +56,19 @@ class DocumentParser:
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
def optimise_thumbnail(self, in_path):
|
||||
|
||||
out_path = os.path.join(self.tempdir, "optipng.png")
|
||||
|
||||
args = (self.OPTIPNG, "-o5", in_path, "-out", out_path)
|
||||
if not subprocess.Popen(args).wait() == 0:
|
||||
raise ParseError("Optipng failed at {}".format(args))
|
||||
|
||||
return out_path
|
||||
|
||||
def get_optimised_thumbnail(self):
|
||||
return self.optimise_thumbnail(self.get_thumbnail())
|
||||
|
||||
def get_text(self):
|
||||
"""
|
||||
Returns the text from the document and only the text.
|
||||
@@ -59,7 +79,75 @@ class DocumentParser:
|
||||
"""
|
||||
Returns the date of the document.
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
def __parser__(ds, date_order):
|
||||
"""
|
||||
Call dateparser.parse with a particular date ordering
|
||||
"""
|
||||
return dateparser.parse(ds,
|
||||
settings={"DATE_ORDER": date_order,
|
||||
"PREFER_DAY_OF_MONTH": "first",
|
||||
"RETURN_AS_TIMEZONE_AWARE":
|
||||
True})
|
||||
date = None
|
||||
date_string = None
|
||||
|
||||
next_year = timezone.now().year + 5 # Arbitrary 5 year future limit
|
||||
title = os.path.basename(self.document_path)
|
||||
|
||||
# if filename date parsing is enabled, search there first:
|
||||
if self.FILENAME_DATE_ORDER:
|
||||
self.log("info", "Checking document title for date")
|
||||
for m in re.finditer(DATE_REGEX, title):
|
||||
date_string = m.group(0)
|
||||
|
||||
try:
|
||||
date = __parser__(date_string, self.FILENAME_DATE_ORDER)
|
||||
except TypeError:
|
||||
# Skip all matches that do not parse to a proper date
|
||||
continue
|
||||
|
||||
if date is not None and next_year > date.year > 1900:
|
||||
self.log("info",
|
||||
"Detected document date {} based on string {} "
|
||||
"from document title"
|
||||
"".format(date.isoformat(), date_string))
|
||||
return date
|
||||
|
||||
try:
|
||||
# getting text after checking filename will save time if only
|
||||
# looking at the filename instead of the whole text
|
||||
text = self.get_text()
|
||||
except ParseError:
|
||||
return None
|
||||
|
||||
# Iterate through all regex matches in text and try to parse the date
|
||||
for m in re.finditer(DATE_REGEX, text):
|
||||
date_string = m.group(0)
|
||||
|
||||
try:
|
||||
date = __parser__(date_string, self.DATE_ORDER)
|
||||
except TypeError:
|
||||
# Skip all matches that do not parse to a proper date
|
||||
continue
|
||||
|
||||
if date is not None and next_year > date.year > 1900:
|
||||
break
|
||||
else:
|
||||
date = None
|
||||
|
||||
if date is not None:
|
||||
self.log(
|
||||
"info",
|
||||
"Detected document date {} based on string {}".format(
|
||||
date.isoformat(),
|
||||
date_string
|
||||
)
|
||||
)
|
||||
else:
|
||||
self.log("info", "Unable to detect date for document")
|
||||
|
||||
return date
|
||||
|
||||
def log(self, level, message):
|
||||
getattr(self.logger, level)(message, extra={
|
||||
|
@@ -1,5 +1,21 @@
|
||||
{% extends 'admin/change_form.html' %}
|
||||
|
||||
{% block content %}
|
||||
|
||||
{{ block.super }}
|
||||
|
||||
{% if next_object %}
|
||||
<script type="text/javascript">//<![CDATA[
|
||||
(function($){
|
||||
$('<input type="submit" value="Save and edit next" name="_saveandeditnext" />')
|
||||
.prependTo('div.submit-row');
|
||||
$('<input type="hidden" value="{{next_object}}" name="_next_object" />')
|
||||
.prependTo('div.submit-row');
|
||||
})(django.jQuery);
|
||||
//]]></script>
|
||||
{% endif %}
|
||||
|
||||
{% endblock content %}
|
||||
|
||||
{% block footer %}
|
||||
|
||||
@@ -10,4 +26,4 @@
|
||||
django.jQuery(".field-created input").first().attr("type", "date")
|
||||
</script>
|
||||
|
||||
{% endblock footer %}
|
||||
{% endblock footer %}
|
||||
|
@@ -0,0 +1,50 @@
|
||||
{% extends "admin/base_site.html" %}
|
||||
|
||||
|
||||
{% load i18n l10n admin_urls static %}
|
||||
{% load staticfiles %}
|
||||
|
||||
|
||||
{% block extrahead %}
|
||||
{{ block.super }}
|
||||
{{ media }}
|
||||
<script type="text/javascript" src="{% static 'admin/js/cancel.js' %}"></script>
|
||||
{% endblock %}
|
||||
|
||||
|
||||
{% block bodyclass %}{{ block.super }} app-{{ opts.app_label }} model-{{ opts.model_name }} delete-confirmation delete-selected-confirmation{% endblock %}
|
||||
|
||||
|
||||
{% block breadcrumbs %}
|
||||
<div class="breadcrumbs">
|
||||
<a href="{% url 'admin:index' %}">{% trans 'Home' %}</a>
|
||||
› <a href="{% url 'admin:app_list' app_label=opts.app_label %}">{{ opts.app_config.verbose_name }}</a>
|
||||
› <a href="{% url opts|admin_urlname:'changelist' %}">{{ opts.verbose_name_plural|capfirst }}</a>
|
||||
› {{ title }}
|
||||
</div>
|
||||
{% endblock %}
|
||||
|
||||
{% block content %}
|
||||
<p>Please select the {{itemname}}.</p>
|
||||
<form method="post">{% csrf_token %}
|
||||
<div>
|
||||
{% for obj in queryset %}
|
||||
<input type="hidden" name="{{ action_checkbox_name }}" value="{{ obj.pk|unlocalize }}"/>
|
||||
{% endfor %}
|
||||
<p>
|
||||
<select name="obj_id">
|
||||
{% for obj in objects %}
|
||||
<option value="{{ obj.id }}">{{ obj.name }}</option>
|
||||
{% endfor %}
|
||||
</select>
|
||||
</p>
|
||||
|
||||
<input type="hidden" name="action" value="{{ action }}"/>
|
||||
<input type="hidden" name="post" value="yes" />
|
||||
<p>
|
||||
<input type="submit" value="{% trans 'Confirm' %}" />
|
||||
<a href="#" class="button cancel-link">{% trans "Go back" %}</a>
|
||||
</p>
|
||||
</div>
|
||||
</form>
|
||||
{% endblock %}
|
Reference in New Issue
Block a user