This commit is contained in:
Daniel Quinn 2016-02-21 00:14:50 +00:00
parent a5124cade6
commit 422ae9303a
13 changed files with 89 additions and 40 deletions

@ -56,26 +56,35 @@ class DocumentAdmin(admin.ModelAdmin):
def tags_(self, obj):
r = ""
for tag in obj.tags.all():
r += '<a class="tag" style="background-color: {};" href="{}">{}</a>'.format(
tag.get_colour_display(),
"{}?tags__id__exact={}".format(
reverse("admin:documents_document_changelist"),
tag.pk
),
tag.slug
colour = tag.get_colour_display()
r += html_tag(
"a",
tag.slug,
**{
"class": "tag",
"style": "background-color: {};".format(colour),
"href": "{}?tags__id__exact={}".format(
reverse("admin:documents_document_changelist"),
tag.pk
)
}
)
return r
tags_.allow_tags = True
def document(self, obj):
return '<a href="{}">' \
'<img src="{}" width="22" height="22" alt="{} icon" title="{}">' \
'</a>'.format(
obj.download_url,
static("documents/img/{}.png".format(obj.file_type)),
obj.file_type,
obj.file_name
)
return html_tag(
"a",
html_tag(
"img",
src=static("documents/img/{}.png".format(obj.file_type)),
width=22,
height=22,
alt=obj.file_type,
title=obj.file_name
),
href=obj.download_url
)
document.allow_tags = True
admin.site.register(Sender)
@ -85,3 +94,16 @@ admin.site.register(Document, DocumentAdmin)
# Unless we implement multi-user, these default registrations don't make sense.
admin.site.unregister(Group)
admin.site.unregister(User)
def html_tag(kind, inside=None, **kwargs):
attributes = []
for lft, rgt in kwargs.items():
attributes.append('{}="{}"'.format(lft, rgt))
if inside is not None:
return "<{kind} {attributes}>{inside}</{kind}>".format(
kind=kind, attributes=" ".join(attributes), inside=inside)
return "<{} {}/>".format(kind, " ".join(attributes))

@ -127,7 +127,8 @@ class Consumer(object):
self._store(text, doc)
except OCRError:
self._ignore.append(doc)
Log.error("OCR FAILURE: {}".format(doc), Log.COMPONENT_CONSUMER)
Log.error(
"OCR FAILURE: {}".format(doc), Log.COMPONENT_CONSUMER)
self._cleanup_tempdir(tempdir)
continue
else:
@ -190,8 +191,8 @@ class Consumer(object):
Log.warning("Language detection failed!", Log.COMPONENT_CONSUMER)
if settings.FORGIVING_OCR:
Log.warning(
"As FORGIVING_OCR is enabled, we're going to make the best "
"with what we have.",
"As FORGIVING_OCR is enabled, we're going to make the "
"best with what we have.",
Log.COMPONENT_CONSUMER
)
raw_text = self._assemble_ocr_sections(pngs, middle, raw_text)
@ -246,8 +247,8 @@ class Consumer(object):
def _guess_attributes_from_name(self, parseable):
"""
We use a crude naming convention to make handling the sender, title, and
tags easier:
We use a crude naming convention to make handling the sender, title,
and tags easier:
"<sender> - <title> - <tags>.<suffix>"
"<sender> - <title>.<suffix>"
"<title>.<suffix>"

@ -26,15 +26,17 @@ class UploadForm(forms.Form):
sender = forms.CharField(
max_length=Sender._meta.get_field("name").max_length, required=False)
title = forms.CharField(
max_length=Document._meta.get_field("title").max_length, required=False)
max_length=Document._meta.get_field("title").max_length,
required=False
)
document = forms.FileField()
signature = forms.CharField(max_length=256)
def clean_sender(self):
"""
I suppose it might look cleaner to use .get_or_create() here, but that
would also allow someone to fill up the db with bogus senders before all
validation was met.
would also allow someone to fill up the db with bogus senders before
all validation was met.
"""
sender = self.cleaned_data.get("sender")
if not sender:

@ -185,10 +185,10 @@ ISO639 = {
"yo": "yor",
"za": "zha",
# Tessdata contains two values for Chinese, "chi_sim" and "chi_tra". I have
# no idea which one is better, so I just picked the bigger file.
# Tessdata contains two values for Chinese, "chi_sim" and "chi_tra". I
# have no idea which one is better, so I just picked the bigger file.
"zh": "chi_tra",
"zu": "zul"
}
}

@ -10,8 +10,8 @@ class Command(Renderable, BaseCommand):
help = """
Using the current set of tagging rules, apply said rules to all
documents in the database, effectively allowing you to back-tag all
previously indexed documents with tags created (or modified) after their
initial import.
previously indexed documents with tags created (or modified) after
their initial import.
""".replace(" ", "")
def __init__(self, *args, **kwargs):

@ -13,7 +13,7 @@ from django.core.management.commands.loaddata import Command as LoadDataCommand
class Command(LoadDataCommand):
def parse_name(self, fixture_name):
self.compression_formats['stdin'] = (lambda x,y: sys.stdin, None)
self.compression_formats['stdin'] = (lambda x, y: sys.stdin, None)
if fixture_name == '-':
return '-', 'json', 'stdin'

@ -1,7 +1,7 @@
class Renderable(object):
"""
A handy mixin to make it easier/cleaner to print output based on a verbosity
value.
A handy mixin to make it easier/cleaner to print output based on a
verbosity value.
"""
def _render(self, text, verbosity):

@ -36,7 +36,7 @@ class Sender(SluggedModel):
class Tag(SluggedModel):
COLOURS = (
(1, "#a6cee3"),
(2, "#1f78b4"),
@ -71,9 +71,9 @@ class Tag(SluggedModel):
default=MATCH_ANY,
help_text=(
"Which algorithm you want to use when matching text to the OCR'd "
"PDF. Here, \"any\" looks for any occurrence of any word provided "
"in the PDF, while \"all\" requires that every word provided "
"appear in the PDF, albeit not in the order provided. A "
"PDF. Here, \"any\" looks for any occurrence of any word "
"provided in the PDF, while \"all\" requires that every word "
"provided appear in the PDF, albeit not in the order provided. A "
"\"literal\" match means that the text you enter must appear in "
"the PDF exactly as you've entered it, and \"regular expression\" "
"uses a regex to match the PDF. If you don't know what a regex "

@ -14,7 +14,8 @@ class TagSerializer(serializers.ModelSerializer):
class Meta(object):
model = Tag
fields = ("id", "slug", "name", "colour", "match", "matching_algorithm")
fields = (
"id", "slug", "name", "colour", "match", "matching_algorithm")
class DocumentSerializer(serializers.ModelSerializer):

@ -4,10 +4,10 @@ from ..consumer import Consumer
class TestAttachment(TestCase):
TAGS = ("tag1", "tag2", "tag3")
CONSUMER = Consumer()
def _test_guess_attributes_from_name(self, path, sender, title, tags):
for suffix in ("pdf", "png", "jpg", "jpeg", "gif"):
f = path.format(suffix)

@ -117,4 +117,3 @@ class TestTagMatching(TestCase):
self.assertFalse(t.matches("I have alpha, charlie, and gamma in me"))
self.assertFalse(t.matches("I have alphas, charlie, and gamma in me"))
self.assertFalse(t.matches("I have alphas in me"))

@ -29,10 +29,20 @@ router.register(r'tags', TagViewSet)
router.register(r'documents', DocumentViewSet)
urlpatterns = [
url(r"^api/auth/", include('rest_framework.urls', namespace='rest_framework')),
# API
url(
r"^api/auth/",
include('rest_framework.urls', namespace='rest_framework')
),
url(r"^api/", include(router.urls)),
# File downloads
url(r"^fetch/(?P<pk>\d+)$", PdfView.as_view(), name="fetch"),
# The Django admin
url(r"", admin.site.urls),
] + static.static(settings.MEDIA_URL, document_root=settings.MEDIA_ROOT)
if settings.UPLOAD_SHARED_SECRET:

14
tox.ini Normal file

@ -0,0 +1,14 @@
# Tox (http://tox.testrun.org/) is a tool for running tests
# in multiple virtualenvs. This configuration file will run the
# test suite on all supported python versions. To use it, "pip install tox"
# and then run "tox" from this directory.
#[tox]
#envlist = py34, py35
#[testenv]
#commands = {envpython} src/manage.py test
#deps =
[pep8]
exclude=migrations,src/paperless/settings.py