This commit is contained in:
Daniel Quinn 2016-02-21 00:14:50 +00:00
parent a5124cade6
commit 422ae9303a
13 changed files with 89 additions and 40 deletions

View File

@ -56,26 +56,35 @@ class DocumentAdmin(admin.ModelAdmin):
def tags_(self, obj): def tags_(self, obj):
r = "" r = ""
for tag in obj.tags.all(): for tag in obj.tags.all():
r += '<a class="tag" style="background-color: {};" href="{}">{}</a>'.format( colour = tag.get_colour_display()
tag.get_colour_display(), r += html_tag(
"{}?tags__id__exact={}".format( "a",
reverse("admin:documents_document_changelist"), tag.slug,
tag.pk **{
), "class": "tag",
tag.slug "style": "background-color: {};".format(colour),
"href": "{}?tags__id__exact={}".format(
reverse("admin:documents_document_changelist"),
tag.pk
)
}
) )
return r return r
tags_.allow_tags = True tags_.allow_tags = True
def document(self, obj): def document(self, obj):
return '<a href="{}">' \ return html_tag(
'<img src="{}" width="22" height="22" alt="{} icon" title="{}">' \ "a",
'</a>'.format( html_tag(
obj.download_url, "img",
static("documents/img/{}.png".format(obj.file_type)), src=static("documents/img/{}.png".format(obj.file_type)),
obj.file_type, width=22,
obj.file_name height=22,
) alt=obj.file_type,
title=obj.file_name
),
href=obj.download_url
)
document.allow_tags = True document.allow_tags = True
admin.site.register(Sender) admin.site.register(Sender)
@ -85,3 +94,16 @@ admin.site.register(Document, DocumentAdmin)
# Unless we implement multi-user, these default registrations don't make sense. # Unless we implement multi-user, these default registrations don't make sense.
admin.site.unregister(Group) admin.site.unregister(Group)
admin.site.unregister(User) admin.site.unregister(User)
def html_tag(kind, inside=None, **kwargs):
attributes = []
for lft, rgt in kwargs.items():
attributes.append('{}="{}"'.format(lft, rgt))
if inside is not None:
return "<{kind} {attributes}>{inside}</{kind}>".format(
kind=kind, attributes=" ".join(attributes), inside=inside)
return "<{} {}/>".format(kind, " ".join(attributes))

View File

@ -127,7 +127,8 @@ class Consumer(object):
self._store(text, doc) self._store(text, doc)
except OCRError: except OCRError:
self._ignore.append(doc) self._ignore.append(doc)
Log.error("OCR FAILURE: {}".format(doc), Log.COMPONENT_CONSUMER) Log.error(
"OCR FAILURE: {}".format(doc), Log.COMPONENT_CONSUMER)
self._cleanup_tempdir(tempdir) self._cleanup_tempdir(tempdir)
continue continue
else: else:
@ -190,8 +191,8 @@ class Consumer(object):
Log.warning("Language detection failed!", Log.COMPONENT_CONSUMER) Log.warning("Language detection failed!", Log.COMPONENT_CONSUMER)
if settings.FORGIVING_OCR: if settings.FORGIVING_OCR:
Log.warning( Log.warning(
"As FORGIVING_OCR is enabled, we're going to make the best " "As FORGIVING_OCR is enabled, we're going to make the "
"with what we have.", "best with what we have.",
Log.COMPONENT_CONSUMER Log.COMPONENT_CONSUMER
) )
raw_text = self._assemble_ocr_sections(pngs, middle, raw_text) raw_text = self._assemble_ocr_sections(pngs, middle, raw_text)
@ -246,8 +247,8 @@ class Consumer(object):
def _guess_attributes_from_name(self, parseable): def _guess_attributes_from_name(self, parseable):
""" """
We use a crude naming convention to make handling the sender, title, and We use a crude naming convention to make handling the sender, title,
tags easier: and tags easier:
"<sender> - <title> - <tags>.<suffix>" "<sender> - <title> - <tags>.<suffix>"
"<sender> - <title>.<suffix>" "<sender> - <title>.<suffix>"
"<title>.<suffix>" "<title>.<suffix>"

View File

@ -26,15 +26,17 @@ class UploadForm(forms.Form):
sender = forms.CharField( sender = forms.CharField(
max_length=Sender._meta.get_field("name").max_length, required=False) max_length=Sender._meta.get_field("name").max_length, required=False)
title = forms.CharField( title = forms.CharField(
max_length=Document._meta.get_field("title").max_length, required=False) max_length=Document._meta.get_field("title").max_length,
required=False
)
document = forms.FileField() document = forms.FileField()
signature = forms.CharField(max_length=256) signature = forms.CharField(max_length=256)
def clean_sender(self): def clean_sender(self):
""" """
I suppose it might look cleaner to use .get_or_create() here, but that I suppose it might look cleaner to use .get_or_create() here, but that
would also allow someone to fill up the db with bogus senders before all would also allow someone to fill up the db with bogus senders before
validation was met. all validation was met.
""" """
sender = self.cleaned_data.get("sender") sender = self.cleaned_data.get("sender")
if not sender: if not sender:

View File

@ -185,10 +185,10 @@ ISO639 = {
"yo": "yor", "yo": "yor",
"za": "zha", "za": "zha",
# Tessdata contains two values for Chinese, "chi_sim" and "chi_tra". I have # Tessdata contains two values for Chinese, "chi_sim" and "chi_tra". I
# no idea which one is better, so I just picked the bigger file. # have no idea which one is better, so I just picked the bigger file.
"zh": "chi_tra", "zh": "chi_tra",
"zu": "zul" "zu": "zul"
} }

View File

@ -10,8 +10,8 @@ class Command(Renderable, BaseCommand):
help = """ help = """
Using the current set of tagging rules, apply said rules to all Using the current set of tagging rules, apply said rules to all
documents in the database, effectively allowing you to back-tag all documents in the database, effectively allowing you to back-tag all
previously indexed documents with tags created (or modified) after their previously indexed documents with tags created (or modified) after
initial import. their initial import.
""".replace(" ", "") """.replace(" ", "")
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):

View File

@ -13,7 +13,7 @@ from django.core.management.commands.loaddata import Command as LoadDataCommand
class Command(LoadDataCommand): class Command(LoadDataCommand):
def parse_name(self, fixture_name): def parse_name(self, fixture_name):
self.compression_formats['stdin'] = (lambda x,y: sys.stdin, None) self.compression_formats['stdin'] = (lambda x, y: sys.stdin, None)
if fixture_name == '-': if fixture_name == '-':
return '-', 'json', 'stdin' return '-', 'json', 'stdin'

View File

@ -1,7 +1,7 @@
class Renderable(object): class Renderable(object):
""" """
A handy mixin to make it easier/cleaner to print output based on a verbosity A handy mixin to make it easier/cleaner to print output based on a
value. verbosity value.
""" """
def _render(self, text, verbosity): def _render(self, text, verbosity):

View File

@ -36,7 +36,7 @@ class Sender(SluggedModel):
class Tag(SluggedModel): class Tag(SluggedModel):
COLOURS = ( COLOURS = (
(1, "#a6cee3"), (1, "#a6cee3"),
(2, "#1f78b4"), (2, "#1f78b4"),
@ -71,9 +71,9 @@ class Tag(SluggedModel):
default=MATCH_ANY, default=MATCH_ANY,
help_text=( help_text=(
"Which algorithm you want to use when matching text to the OCR'd " "Which algorithm you want to use when matching text to the OCR'd "
"PDF. Here, \"any\" looks for any occurrence of any word provided " "PDF. Here, \"any\" looks for any occurrence of any word "
"in the PDF, while \"all\" requires that every word provided " "provided in the PDF, while \"all\" requires that every word "
"appear in the PDF, albeit not in the order provided. A " "provided appear in the PDF, albeit not in the order provided. A "
"\"literal\" match means that the text you enter must appear in " "\"literal\" match means that the text you enter must appear in "
"the PDF exactly as you've entered it, and \"regular expression\" " "the PDF exactly as you've entered it, and \"regular expression\" "
"uses a regex to match the PDF. If you don't know what a regex " "uses a regex to match the PDF. If you don't know what a regex "

View File

@ -14,7 +14,8 @@ class TagSerializer(serializers.ModelSerializer):
class Meta(object): class Meta(object):
model = Tag model = Tag
fields = ("id", "slug", "name", "colour", "match", "matching_algorithm") fields = (
"id", "slug", "name", "colour", "match", "matching_algorithm")
class DocumentSerializer(serializers.ModelSerializer): class DocumentSerializer(serializers.ModelSerializer):

View File

@ -4,10 +4,10 @@ from ..consumer import Consumer
class TestAttachment(TestCase): class TestAttachment(TestCase):
TAGS = ("tag1", "tag2", "tag3") TAGS = ("tag1", "tag2", "tag3")
CONSUMER = Consumer() CONSUMER = Consumer()
def _test_guess_attributes_from_name(self, path, sender, title, tags): def _test_guess_attributes_from_name(self, path, sender, title, tags):
for suffix in ("pdf", "png", "jpg", "jpeg", "gif"): for suffix in ("pdf", "png", "jpg", "jpeg", "gif"):
f = path.format(suffix) f = path.format(suffix)

View File

@ -117,4 +117,3 @@ class TestTagMatching(TestCase):
self.assertFalse(t.matches("I have alpha, charlie, and gamma in me")) self.assertFalse(t.matches("I have alpha, charlie, and gamma in me"))
self.assertFalse(t.matches("I have alphas, charlie, and gamma in me")) self.assertFalse(t.matches("I have alphas, charlie, and gamma in me"))
self.assertFalse(t.matches("I have alphas in me")) self.assertFalse(t.matches("I have alphas in me"))

View File

@ -29,10 +29,20 @@ router.register(r'tags', TagViewSet)
router.register(r'documents', DocumentViewSet) router.register(r'documents', DocumentViewSet)
urlpatterns = [ urlpatterns = [
url(r"^api/auth/", include('rest_framework.urls', namespace='rest_framework')),
# API
url(
r"^api/auth/",
include('rest_framework.urls', namespace='rest_framework')
),
url(r"^api/", include(router.urls)), url(r"^api/", include(router.urls)),
# File downloads
url(r"^fetch/(?P<pk>\d+)$", PdfView.as_view(), name="fetch"), url(r"^fetch/(?P<pk>\d+)$", PdfView.as_view(), name="fetch"),
# The Django admin
url(r"", admin.site.urls), url(r"", admin.site.urls),
] + static.static(settings.MEDIA_URL, document_root=settings.MEDIA_ROOT) ] + static.static(settings.MEDIA_URL, document_root=settings.MEDIA_ROOT)
if settings.UPLOAD_SHARED_SECRET: if settings.UPLOAD_SHARED_SECRET:

14
tox.ini Normal file
View File

@ -0,0 +1,14 @@
# Tox (http://tox.testrun.org/) is a tool for running tests
# in multiple virtualenvs. This configuration file will run the
# test suite on all supported python versions. To use it, "pip install tox"
# and then run "tox" from this directory.
#[tox]
#envlist = py34, py35
#[testenv]
#commands = {envpython} src/manage.py test
#deps =
[pep8]
exclude=migrations,src/paperless/settings.py