Added and implemented a rudimentary logger

This commit is contained in:
Daniel Quinn 2016-02-14 16:09:52 +00:00
parent 88acf50fe0
commit 7843ea5037
14 changed files with 142 additions and 36 deletions

View File

@ -17,9 +17,9 @@ from django.conf import settings
from django.utils import timezone
from django.template.defaultfilters import slugify
from logger.models import Log
from paperless.db import GnuPG
from .mixins import Renderable
from .models import Sender, Tag, Document
from .languages import ISO639
@ -27,7 +27,6 @@ from .languages import ISO639
def image_to_string(args):
self, png, lang = args
with Image.open(os.path.join(self.SCRATCH, png)) as f:
self._render(" {}".format(f.filename), 3)
return self.OCR.image_to_string(f, lang=lang)
@ -39,7 +38,7 @@ class ConsumerError(Exception):
pass
class Consumer(Renderable):
class Consumer(object):
"""
Loop over every file found in CONSUMPTION_DIR and:
1. Convert it to a greyscale png
@ -110,7 +109,7 @@ class Consumer(Renderable):
if self._is_ready(doc):
continue
self._render("Consuming {}".format(doc), 1)
Log.info("Consuming {}".format(doc), Log.COMPONENT_CONSUMER)
pngs = self._get_greyscale(doc)
@ -118,7 +117,7 @@ class Consumer(Renderable):
text = self._get_ocr(pngs)
except OCRError:
self._ignore.append(doc)
self._render("OCR FAILURE: {}".format(doc), 0)
Log.error("OCR FAILURE: {}".format(doc), Log.COMPONENT_CONSUMER)
continue
self._store(text, doc)
@ -126,7 +125,10 @@ class Consumer(Renderable):
def _get_greyscale(self, doc):
self._render(" Generating greyscale image from {}".format(doc), 2)
Log.debug(
"Generating greyscale image from {}".format(doc),
Log.COMPONENT_CONSUMER
)
i = random.randint(1000000, 9999999)
png = os.path.join(self.SCRATCH, "{}.png".format(i))
@ -141,7 +143,10 @@ class Consumer(Renderable):
def _guess_language(self, text):
try:
guess = langdetect.detect(text)
self._render(" Language detected: {}".format(guess), 2)
Log.debug(
"Language detected: {}".format(guess),
Log.COMPONENT_CONSUMER
)
return guess
except Exception:
return None
@ -152,19 +157,19 @@ class Consumer(Renderable):
simple language detection trial & error.
"""
self._render(" OCRing the document", 2)
Log.debug("OCRing the document", Log.COMPONENT_CONSUMER)
raw_text = self._ocr(pngs, self.DEFAULT_OCR_LANGUAGE)
guessed_language = self._guess_language(raw_text)
if not guessed_language or guessed_language not in ISO639:
self._render("Language detection failed!", 0)
Log.warning("Language detection failed!", Log.COMPONENT_CONSUMER)
if settings.FORGIVING_OCR:
self._render(
Log.warning(
"As FORGIVING_OCR is enabled, we're going to make the best "
"with what we have.",
1
Log.COMPONENT_CONSUMER
)
return raw_text
raise OCRError
@ -176,12 +181,12 @@ class Consumer(Renderable):
return self._ocr(pngs, ISO639[guessed_language])
except pyocr.pyocr.tesseract.TesseractError:
if settings.FORGIVING_OCR:
self._render(
Log.warning(
"OCR for {} failed, but we're going to stick with what "
"we've got since FORGIVING_OCR is enabled.".format(
guessed_language
),
0
Log.COMPONENT_CONSUMER
)
return raw_text
raise OCRError
@ -191,12 +196,12 @@ class Consumer(Renderable):
Performs a single OCR attempt.
"""
self._render(" Parsing for {}".format(lang), 2)
Log.debug("Parsing for {}".format(lang), Log.COMPONENT_CONSUMER)
with Pool(processes=self.THREADS) as pool:
r = pool.map(image_to_string,
itertools.product([self], pngs, [lang]))
r = "".join(r)
r = pool.map(
image_to_string, itertools.product([self], pngs, [lang]))
r = " ".join(r)
# Strip out excess white space to allow matching to go smoother
return re.sub(r"\s+", " ", r)
@ -251,7 +256,7 @@ class Consumer(Renderable):
stats = os.stat(doc)
self._render(" Saving record to database", 2)
Log.debug("Saving record to database", Log.COMPONENT_CONSUMER)
document = Document.objects.create(
sender=sender,
@ -266,12 +271,13 @@ class Consumer(Renderable):
if relevant_tags:
tag_names = ", ".join([t.slug for t in relevant_tags])
self._render(" Tagging with {}".format(tag_names), 2)
Log.debug(
"Tagging with {}".format(tag_names), Log.COMPONENT_CONSUMER)
document.tags.add(*relevant_tags)
with open(doc, "rb") as unencrypted:
with open(document.source_path, "wb") as encrypted:
self._render(" Encrypting", 3)
Log.debug("Encrypting", Log.COMPONENT_CONSUMER)
encrypted.write(GnuPG.encrypted(unencrypted))
def _cleanup(self, pngs, doc):
@ -280,11 +286,9 @@ class Consumer(Renderable):
self.SCRATCH, re.sub(r"^.*/(\d+)-\d+.png$", "\\1*", pngs[0]))
for f in list(glob.glob(png_glob)) + [doc]:
self._render(" Deleting {}".format(f), 2)
Log.debug("Deleting {}".format(f), Log.COMPONENT_CONSUMER)
os.unlink(f)
self._render("", 2)
def _is_ready(self, doc):
"""
Detect whether `doc` is ready to consume or if it's still being written

View File

@ -11,8 +11,9 @@ from dateutil import parser
from django.conf import settings
from logger.models import Log
from .consumer import Consumer
from .mixins import Renderable
from .models import Sender
@ -24,7 +25,7 @@ class InvalidMessageError(Exception):
pass
class Message(Renderable):
class Message(object):
"""
A crude, but simple email message class. We assume that there's a subject
and n attachments, and that we don't care about the message body.
@ -53,7 +54,8 @@ class Message(Renderable):
self._set_time(message)
self._render('Fetching email: "{}"'.format(self.subject), 1)
Log.info(
'Importing email: "{}"'.format(self.subject), Log.COMPONENT_MAIL)
attachments = []
for part in message.walk():
@ -132,7 +134,7 @@ class Attachment(object):
return self.data
class MailFetcher(Renderable):
class MailFetcher(object):
def __init__(self, verbosity=1):
@ -157,11 +159,14 @@ class MailFetcher(Renderable):
if self._enabled:
self._render("Checking mail", 1)
Log.info("Checking mail", Log.COMPONENT_MAIL)
for message in self._get_messages():
self._render(' Storing email: "{}"'.format(message.subject), 1)
Log.debug(
'Storing email: "{}"'.format(message.subject),
Log.COMPONENT_MAIL
)
t = int(time.mktime(message.time.timetuple()))
file_name = os.path.join(Consumer.CONSUME, message.file_name)
@ -188,7 +193,7 @@ class MailFetcher(Renderable):
self._connection.logout()
except Exception as e:
self._render(e, 0)
Log.error(e, Log.COMPONENT_MAIL)
return r
@ -215,7 +220,7 @@ class MailFetcher(Renderable):
try:
message = Message(data[0][1], self.verbosity)
except InvalidMessageError as e:
self._render(e, 0)
Log.error(e, Log.COMPONENT_MAIL)
else:
self._connection.store(num, "+FLAGS", "\\Deleted")

View File

@ -7,10 +7,9 @@ from django.core.management.base import BaseCommand, CommandError
from ...consumer import Consumer, ConsumerError
from ...mail import MailFetcher, MailFetcherError
from ...mixins import Renderable
class Command(Renderable, BaseCommand):
class Command(BaseCommand):
"""
On every iteration of an infinite loop, consume what we can from the
consumption directory, and fetch any mail available.

View File

@ -46,7 +46,7 @@ class Command(Renderable, BaseCommand):
target = os.path.join(self.target, document.parseable_file_name)
self._render("Exporting: {}".format(target), 1)
print("Exporting: {}".format(target))
with open(target, "wb") as f:
f.write(GnuPG.decrypted(document.source_file))

View File

@ -27,6 +27,5 @@ class Command(Renderable, BaseCommand):
pk__in=document.tags.values_list("pk", flat=True))
for tag in tags:
if tag.matches(document.content):
self._render(
'Tagging {} with "{}"'.format(document, tag), 1)
print('Tagging {} with "{}"'.format(document, tag))
document.tags.add(tag)

0
src/logger/__init__.py Normal file
View File

12
src/logger/admin.py Normal file
View File

@ -0,0 +1,12 @@
from django.contrib import admin
from .models import Log
class LogAdmin(admin.ModelAdmin):
list_display = ("message", "level", "component")
list_filter = ("level", "component",)
admin.site.register(Log, LogAdmin)

5
src/logger/apps.py Normal file
View File

@ -0,0 +1,5 @@
from django.apps import AppConfig
class LoggerConfig(AppConfig):
name = 'logger'

View File

@ -0,0 +1,26 @@
# -*- coding: utf-8 -*-
# Generated by Django 1.9 on 2016-02-14 16:08
from __future__ import unicode_literals
from django.db import migrations, models
class Migration(migrations.Migration):
initial = True
dependencies = [
]
operations = [
migrations.CreateModel(
name='Log',
fields=[
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('time', models.DateTimeField(auto_now_add=True)),
('message', models.TextField()),
('level', models.PositiveIntegerField(choices=[(1, 'Error'), (2, 'Warning'), (3, 'Informational'), (4, 'Debugging')], default=3)),
('component', models.PositiveIntegerField(choices=[(1, 'Consumer'), (2, 'Mail Fetcher')])),
],
),
]

View File

47
src/logger/models.py Normal file
View File

@ -0,0 +1,47 @@
from django.db import models
class Log(models.Model):
LEVEL_ERROR = 1
LEVEL_WARNING = 2
LEVEL_INFO = 3
LEVEL_DEBUG = 4
LEVELS = (
(LEVEL_ERROR, "Error"),
(LEVEL_WARNING, "Warning"),
(LEVEL_INFO, "Informational"),
(LEVEL_DEBUG, "Debugging"),
)
COMPONENT_CONSUMER = 1
COMPONENT_MAIL = 2
COMPONENTS = (
(COMPONENT_CONSUMER, "Consumer"),
(COMPONENT_MAIL, "Mail Fetcher")
)
time = models.DateTimeField(auto_now_add=True)
message = models.TextField()
level = models.PositiveIntegerField(choices=LEVELS, default=LEVEL_INFO)
component = models.PositiveIntegerField(choices=COMPONENTS)
@classmethod
def error(cls, message, component):
cls.objects.create(
message=message, level=cls.LEVEL_ERROR, component=component)
@classmethod
def warning(cls, message, component):
cls.objects.create(
message=message, level=cls.LEVEL_WARNING, component=component)
@classmethod
def info(cls, message, component):
cls.objects.create(
message=message, level=cls.LEVEL_INFO, component=component)
@classmethod
def debug(cls, message, component):
cls.objects.create(
message=message, level=cls.LEVEL_DEBUG, component=component)

3
src/logger/tests.py Normal file
View File

@ -0,0 +1,3 @@
from django.test import TestCase
# Create your tests here.

3
src/logger/views.py Normal file
View File

@ -0,0 +1,3 @@
from django.shortcuts import render
# Create your views here.

View File

@ -31,6 +31,7 @@ ALLOWED_HOSTS = []
# Application definition
INSTALLED_APPS = [
'django.contrib.admin',
'django.contrib.auth',
'django.contrib.contenttypes',
@ -41,6 +42,8 @@ INSTALLED_APPS = [
"django_extensions",
"documents",
"logger",
]
MIDDLEWARE_CLASSES = [