This commit is contained in:
Daniel Quinn 2016-02-21 00:14:50 +00:00
parent a5124cade6
commit 422ae9303a
13 changed files with 89 additions and 40 deletions

View File

@ -56,25 +56,34 @@ class DocumentAdmin(admin.ModelAdmin):
def tags_(self, obj):
r = ""
for tag in obj.tags.all():
r += '<a class="tag" style="background-color: {};" href="{}">{}</a>'.format(
tag.get_colour_display(),
"{}?tags__id__exact={}".format(
colour = tag.get_colour_display()
r += html_tag(
"a",
tag.slug,
**{
"class": "tag",
"style": "background-color: {};".format(colour),
"href": "{}?tags__id__exact={}".format(
reverse("admin:documents_document_changelist"),
tag.pk
),
tag.slug
)
}
)
return r
tags_.allow_tags = True
def document(self, obj):
return '<a href="{}">' \
'<img src="{}" width="22" height="22" alt="{} icon" title="{}">' \
'</a>'.format(
obj.download_url,
static("documents/img/{}.png".format(obj.file_type)),
obj.file_type,
obj.file_name
return html_tag(
"a",
html_tag(
"img",
src=static("documents/img/{}.png".format(obj.file_type)),
width=22,
height=22,
alt=obj.file_type,
title=obj.file_name
),
href=obj.download_url
)
document.allow_tags = True
@ -85,3 +94,16 @@ admin.site.register(Document, DocumentAdmin)
# Unless we implement multi-user, these default registrations don't make sense.
admin.site.unregister(Group)
admin.site.unregister(User)
def html_tag(kind, inside=None, **kwargs):
attributes = []
for lft, rgt in kwargs.items():
attributes.append('{}="{}"'.format(lft, rgt))
if inside is not None:
return "<{kind} {attributes}>{inside}</{kind}>".format(
kind=kind, attributes=" ".join(attributes), inside=inside)
return "<{} {}/>".format(kind, " ".join(attributes))

View File

@ -127,7 +127,8 @@ class Consumer(object):
self._store(text, doc)
except OCRError:
self._ignore.append(doc)
Log.error("OCR FAILURE: {}".format(doc), Log.COMPONENT_CONSUMER)
Log.error(
"OCR FAILURE: {}".format(doc), Log.COMPONENT_CONSUMER)
self._cleanup_tempdir(tempdir)
continue
else:
@ -190,8 +191,8 @@ class Consumer(object):
Log.warning("Language detection failed!", Log.COMPONENT_CONSUMER)
if settings.FORGIVING_OCR:
Log.warning(
"As FORGIVING_OCR is enabled, we're going to make the best "
"with what we have.",
"As FORGIVING_OCR is enabled, we're going to make the "
"best with what we have.",
Log.COMPONENT_CONSUMER
)
raw_text = self._assemble_ocr_sections(pngs, middle, raw_text)
@ -246,8 +247,8 @@ class Consumer(object):
def _guess_attributes_from_name(self, parseable):
"""
We use a crude naming convention to make handling the sender, title, and
tags easier:
We use a crude naming convention to make handling the sender, title,
and tags easier:
"<sender> - <title> - <tags>.<suffix>"
"<sender> - <title>.<suffix>"
"<title>.<suffix>"

View File

@ -26,15 +26,17 @@ class UploadForm(forms.Form):
sender = forms.CharField(
max_length=Sender._meta.get_field("name").max_length, required=False)
title = forms.CharField(
max_length=Document._meta.get_field("title").max_length, required=False)
max_length=Document._meta.get_field("title").max_length,
required=False
)
document = forms.FileField()
signature = forms.CharField(max_length=256)
def clean_sender(self):
"""
I suppose it might look cleaner to use .get_or_create() here, but that
would also allow someone to fill up the db with bogus senders before all
validation was met.
would also allow someone to fill up the db with bogus senders before
all validation was met.
"""
sender = self.cleaned_data.get("sender")
if not sender:

View File

@ -185,8 +185,8 @@ ISO639 = {
"yo": "yor",
"za": "zha",
# Tessdata contains two values for Chinese, "chi_sim" and "chi_tra". I have
# no idea which one is better, so I just picked the bigger file.
# Tessdata contains two values for Chinese, "chi_sim" and "chi_tra". I
# have no idea which one is better, so I just picked the bigger file.
"zh": "chi_tra",
"zu": "zul"

View File

@ -10,8 +10,8 @@ class Command(Renderable, BaseCommand):
help = """
Using the current set of tagging rules, apply said rules to all
documents in the database, effectively allowing you to back-tag all
previously indexed documents with tags created (or modified) after their
initial import.
previously indexed documents with tags created (or modified) after
their initial import.
""".replace(" ", "")
def __init__(self, *args, **kwargs):

View File

@ -1,7 +1,7 @@
class Renderable(object):
"""
A handy mixin to make it easier/cleaner to print output based on a verbosity
value.
A handy mixin to make it easier/cleaner to print output based on a
verbosity value.
"""
def _render(self, text, verbosity):

View File

@ -71,9 +71,9 @@ class Tag(SluggedModel):
default=MATCH_ANY,
help_text=(
"Which algorithm you want to use when matching text to the OCR'd "
"PDF. Here, \"any\" looks for any occurrence of any word provided "
"in the PDF, while \"all\" requires that every word provided "
"appear in the PDF, albeit not in the order provided. A "
"PDF. Here, \"any\" looks for any occurrence of any word "
"provided in the PDF, while \"all\" requires that every word "
"provided appear in the PDF, albeit not in the order provided. A "
"\"literal\" match means that the text you enter must appear in "
"the PDF exactly as you've entered it, and \"regular expression\" "
"uses a regex to match the PDF. If you don't know what a regex "

View File

@ -14,7 +14,8 @@ class TagSerializer(serializers.ModelSerializer):
class Meta(object):
model = Tag
fields = ("id", "slug", "name", "colour", "match", "matching_algorithm")
fields = (
"id", "slug", "name", "colour", "match", "matching_algorithm")
class DocumentSerializer(serializers.ModelSerializer):

View File

@ -117,4 +117,3 @@ class TestTagMatching(TestCase):
self.assertFalse(t.matches("I have alpha, charlie, and gamma in me"))
self.assertFalse(t.matches("I have alphas, charlie, and gamma in me"))
self.assertFalse(t.matches("I have alphas in me"))

View File

@ -29,10 +29,20 @@ router.register(r'tags', TagViewSet)
router.register(r'documents', DocumentViewSet)
urlpatterns = [
url(r"^api/auth/", include('rest_framework.urls', namespace='rest_framework')),
# API
url(
r"^api/auth/",
include('rest_framework.urls', namespace='rest_framework')
),
url(r"^api/", include(router.urls)),
# File downloads
url(r"^fetch/(?P<pk>\d+)$", PdfView.as_view(), name="fetch"),
# The Django admin
url(r"", admin.site.urls),
] + static.static(settings.MEDIA_URL, document_root=settings.MEDIA_ROOT)
if settings.UPLOAD_SHARED_SECRET:

14
tox.ini Normal file
View File

@ -0,0 +1,14 @@
# Tox (http://tox.testrun.org/) is a tool for running tests
# in multiple virtualenvs. This configuration file will run the
# test suite on all supported python versions. To use it, "pip install tox"
# and then run "tox" from this directory.
#[tox]
#envlist = py34, py35
#[testenv]
#commands = {envpython} src/manage.py test
#deps =
[pep8]
exclude=migrations,src/paperless/settings.py