Added and implemented a rudimentary logger

This commit is contained in:
Daniel Quinn 2016-02-14 16:09:52 +00:00
parent 88acf50fe0
commit 7843ea5037
14 changed files with 142 additions and 36 deletions

View File

@ -17,9 +17,9 @@ from django.conf import settings
from django.utils import timezone from django.utils import timezone
from django.template.defaultfilters import slugify from django.template.defaultfilters import slugify
from logger.models import Log
from paperless.db import GnuPG from paperless.db import GnuPG
from .mixins import Renderable
from .models import Sender, Tag, Document from .models import Sender, Tag, Document
from .languages import ISO639 from .languages import ISO639
@ -27,7 +27,6 @@ from .languages import ISO639
def image_to_string(args): def image_to_string(args):
self, png, lang = args self, png, lang = args
with Image.open(os.path.join(self.SCRATCH, png)) as f: with Image.open(os.path.join(self.SCRATCH, png)) as f:
self._render(" {}".format(f.filename), 3)
return self.OCR.image_to_string(f, lang=lang) return self.OCR.image_to_string(f, lang=lang)
@ -39,7 +38,7 @@ class ConsumerError(Exception):
pass pass
class Consumer(Renderable): class Consumer(object):
""" """
Loop over every file found in CONSUMPTION_DIR and: Loop over every file found in CONSUMPTION_DIR and:
1. Convert it to a greyscale png 1. Convert it to a greyscale png
@ -110,7 +109,7 @@ class Consumer(Renderable):
if self._is_ready(doc): if self._is_ready(doc):
continue continue
self._render("Consuming {}".format(doc), 1) Log.info("Consuming {}".format(doc), Log.COMPONENT_CONSUMER)
pngs = self._get_greyscale(doc) pngs = self._get_greyscale(doc)
@ -118,7 +117,7 @@ class Consumer(Renderable):
text = self._get_ocr(pngs) text = self._get_ocr(pngs)
except OCRError: except OCRError:
self._ignore.append(doc) self._ignore.append(doc)
self._render("OCR FAILURE: {}".format(doc), 0) Log.error("OCR FAILURE: {}".format(doc), Log.COMPONENT_CONSUMER)
continue continue
self._store(text, doc) self._store(text, doc)
@ -126,7 +125,10 @@ class Consumer(Renderable):
def _get_greyscale(self, doc): def _get_greyscale(self, doc):
self._render(" Generating greyscale image from {}".format(doc), 2) Log.debug(
"Generating greyscale image from {}".format(doc),
Log.COMPONENT_CONSUMER
)
i = random.randint(1000000, 9999999) i = random.randint(1000000, 9999999)
png = os.path.join(self.SCRATCH, "{}.png".format(i)) png = os.path.join(self.SCRATCH, "{}.png".format(i))
@ -141,7 +143,10 @@ class Consumer(Renderable):
def _guess_language(self, text): def _guess_language(self, text):
try: try:
guess = langdetect.detect(text) guess = langdetect.detect(text)
self._render(" Language detected: {}".format(guess), 2) Log.debug(
"Language detected: {}".format(guess),
Log.COMPONENT_CONSUMER
)
return guess return guess
except Exception: except Exception:
return None return None
@ -152,19 +157,19 @@ class Consumer(Renderable):
simple language detection trial & error. simple language detection trial & error.
""" """
self._render(" OCRing the document", 2) Log.debug("OCRing the document", Log.COMPONENT_CONSUMER)
raw_text = self._ocr(pngs, self.DEFAULT_OCR_LANGUAGE) raw_text = self._ocr(pngs, self.DEFAULT_OCR_LANGUAGE)
guessed_language = self._guess_language(raw_text) guessed_language = self._guess_language(raw_text)
if not guessed_language or guessed_language not in ISO639: if not guessed_language or guessed_language not in ISO639:
self._render("Language detection failed!", 0) Log.warning("Language detection failed!", Log.COMPONENT_CONSUMER)
if settings.FORGIVING_OCR: if settings.FORGIVING_OCR:
self._render( Log.warning(
"As FORGIVING_OCR is enabled, we're going to make the best " "As FORGIVING_OCR is enabled, we're going to make the best "
"with what we have.", "with what we have.",
1 Log.COMPONENT_CONSUMER
) )
return raw_text return raw_text
raise OCRError raise OCRError
@ -176,12 +181,12 @@ class Consumer(Renderable):
return self._ocr(pngs, ISO639[guessed_language]) return self._ocr(pngs, ISO639[guessed_language])
except pyocr.pyocr.tesseract.TesseractError: except pyocr.pyocr.tesseract.TesseractError:
if settings.FORGIVING_OCR: if settings.FORGIVING_OCR:
self._render( Log.warning(
"OCR for {} failed, but we're going to stick with what " "OCR for {} failed, but we're going to stick with what "
"we've got since FORGIVING_OCR is enabled.".format( "we've got since FORGIVING_OCR is enabled.".format(
guessed_language guessed_language
), ),
0 Log.COMPONENT_CONSUMER
) )
return raw_text return raw_text
raise OCRError raise OCRError
@ -191,12 +196,12 @@ class Consumer(Renderable):
Performs a single OCR attempt. Performs a single OCR attempt.
""" """
self._render(" Parsing for {}".format(lang), 2) Log.debug("Parsing for {}".format(lang), Log.COMPONENT_CONSUMER)
with Pool(processes=self.THREADS) as pool: with Pool(processes=self.THREADS) as pool:
r = pool.map(image_to_string, r = pool.map(
itertools.product([self], pngs, [lang])) image_to_string, itertools.product([self], pngs, [lang]))
r = "".join(r) r = " ".join(r)
# Strip out excess white space to allow matching to go smoother # Strip out excess white space to allow matching to go smoother
return re.sub(r"\s+", " ", r) return re.sub(r"\s+", " ", r)
@ -251,7 +256,7 @@ class Consumer(Renderable):
stats = os.stat(doc) stats = os.stat(doc)
self._render(" Saving record to database", 2) Log.debug("Saving record to database", Log.COMPONENT_CONSUMER)
document = Document.objects.create( document = Document.objects.create(
sender=sender, sender=sender,
@ -266,12 +271,13 @@ class Consumer(Renderable):
if relevant_tags: if relevant_tags:
tag_names = ", ".join([t.slug for t in relevant_tags]) tag_names = ", ".join([t.slug for t in relevant_tags])
self._render(" Tagging with {}".format(tag_names), 2) Log.debug(
"Tagging with {}".format(tag_names), Log.COMPONENT_CONSUMER)
document.tags.add(*relevant_tags) document.tags.add(*relevant_tags)
with open(doc, "rb") as unencrypted: with open(doc, "rb") as unencrypted:
with open(document.source_path, "wb") as encrypted: with open(document.source_path, "wb") as encrypted:
self._render(" Encrypting", 3) Log.debug("Encrypting", Log.COMPONENT_CONSUMER)
encrypted.write(GnuPG.encrypted(unencrypted)) encrypted.write(GnuPG.encrypted(unencrypted))
def _cleanup(self, pngs, doc): def _cleanup(self, pngs, doc):
@ -280,11 +286,9 @@ class Consumer(Renderable):
self.SCRATCH, re.sub(r"^.*/(\d+)-\d+.png$", "\\1*", pngs[0])) self.SCRATCH, re.sub(r"^.*/(\d+)-\d+.png$", "\\1*", pngs[0]))
for f in list(glob.glob(png_glob)) + [doc]: for f in list(glob.glob(png_glob)) + [doc]:
self._render(" Deleting {}".format(f), 2) Log.debug("Deleting {}".format(f), Log.COMPONENT_CONSUMER)
os.unlink(f) os.unlink(f)
self._render("", 2)
def _is_ready(self, doc): def _is_ready(self, doc):
""" """
Detect whether `doc` is ready to consume or if it's still being written Detect whether `doc` is ready to consume or if it's still being written

View File

@ -11,8 +11,9 @@ from dateutil import parser
from django.conf import settings from django.conf import settings
from logger.models import Log
from .consumer import Consumer from .consumer import Consumer
from .mixins import Renderable
from .models import Sender from .models import Sender
@ -24,7 +25,7 @@ class InvalidMessageError(Exception):
pass pass
class Message(Renderable): class Message(object):
""" """
A crude, but simple email message class. We assume that there's a subject A crude, but simple email message class. We assume that there's a subject
and n attachments, and that we don't care about the message body. and n attachments, and that we don't care about the message body.
@ -53,7 +54,8 @@ class Message(Renderable):
self._set_time(message) self._set_time(message)
self._render('Fetching email: "{}"'.format(self.subject), 1) Log.info(
'Importing email: "{}"'.format(self.subject), Log.COMPONENT_MAIL)
attachments = [] attachments = []
for part in message.walk(): for part in message.walk():
@ -132,7 +134,7 @@ class Attachment(object):
return self.data return self.data
class MailFetcher(Renderable): class MailFetcher(object):
def __init__(self, verbosity=1): def __init__(self, verbosity=1):
@ -157,11 +159,14 @@ class MailFetcher(Renderable):
if self._enabled: if self._enabled:
self._render("Checking mail", 1) Log.info("Checking mail", Log.COMPONENT_MAIL)
for message in self._get_messages(): for message in self._get_messages():
self._render(' Storing email: "{}"'.format(message.subject), 1) Log.debug(
'Storing email: "{}"'.format(message.subject),
Log.COMPONENT_MAIL
)
t = int(time.mktime(message.time.timetuple())) t = int(time.mktime(message.time.timetuple()))
file_name = os.path.join(Consumer.CONSUME, message.file_name) file_name = os.path.join(Consumer.CONSUME, message.file_name)
@ -188,7 +193,7 @@ class MailFetcher(Renderable):
self._connection.logout() self._connection.logout()
except Exception as e: except Exception as e:
self._render(e, 0) Log.error(e, Log.COMPONENT_MAIL)
return r return r
@ -215,7 +220,7 @@ class MailFetcher(Renderable):
try: try:
message = Message(data[0][1], self.verbosity) message = Message(data[0][1], self.verbosity)
except InvalidMessageError as e: except InvalidMessageError as e:
self._render(e, 0) Log.error(e, Log.COMPONENT_MAIL)
else: else:
self._connection.store(num, "+FLAGS", "\\Deleted") self._connection.store(num, "+FLAGS", "\\Deleted")

View File

@ -7,10 +7,9 @@ from django.core.management.base import BaseCommand, CommandError
from ...consumer import Consumer, ConsumerError from ...consumer import Consumer, ConsumerError
from ...mail import MailFetcher, MailFetcherError from ...mail import MailFetcher, MailFetcherError
from ...mixins import Renderable
class Command(Renderable, BaseCommand): class Command(BaseCommand):
""" """
On every iteration of an infinite loop, consume what we can from the On every iteration of an infinite loop, consume what we can from the
consumption directory, and fetch any mail available. consumption directory, and fetch any mail available.

View File

@ -46,7 +46,7 @@ class Command(Renderable, BaseCommand):
target = os.path.join(self.target, document.parseable_file_name) target = os.path.join(self.target, document.parseable_file_name)
self._render("Exporting: {}".format(target), 1) print("Exporting: {}".format(target))
with open(target, "wb") as f: with open(target, "wb") as f:
f.write(GnuPG.decrypted(document.source_file)) f.write(GnuPG.decrypted(document.source_file))

View File

@ -27,6 +27,5 @@ class Command(Renderable, BaseCommand):
pk__in=document.tags.values_list("pk", flat=True)) pk__in=document.tags.values_list("pk", flat=True))
for tag in tags: for tag in tags:
if tag.matches(document.content): if tag.matches(document.content):
self._render( print('Tagging {} with "{}"'.format(document, tag))
'Tagging {} with "{}"'.format(document, tag), 1)
document.tags.add(tag) document.tags.add(tag)

0
src/logger/__init__.py Normal file
View File

12
src/logger/admin.py Normal file
View File

@ -0,0 +1,12 @@
from django.contrib import admin
from .models import Log
class LogAdmin(admin.ModelAdmin):
list_display = ("message", "level", "component")
list_filter = ("level", "component",)
admin.site.register(Log, LogAdmin)

5
src/logger/apps.py Normal file
View File

@ -0,0 +1,5 @@
from django.apps import AppConfig
class LoggerConfig(AppConfig):
name = 'logger'

View File

@ -0,0 +1,26 @@
# -*- coding: utf-8 -*-
# Generated by Django 1.9 on 2016-02-14 16:08
from __future__ import unicode_literals
from django.db import migrations, models
class Migration(migrations.Migration):
initial = True
dependencies = [
]
operations = [
migrations.CreateModel(
name='Log',
fields=[
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('time', models.DateTimeField(auto_now_add=True)),
('message', models.TextField()),
('level', models.PositiveIntegerField(choices=[(1, 'Error'), (2, 'Warning'), (3, 'Informational'), (4, 'Debugging')], default=3)),
('component', models.PositiveIntegerField(choices=[(1, 'Consumer'), (2, 'Mail Fetcher')])),
],
),
]

View File

47
src/logger/models.py Normal file
View File

@ -0,0 +1,47 @@
from django.db import models
class Log(models.Model):
LEVEL_ERROR = 1
LEVEL_WARNING = 2
LEVEL_INFO = 3
LEVEL_DEBUG = 4
LEVELS = (
(LEVEL_ERROR, "Error"),
(LEVEL_WARNING, "Warning"),
(LEVEL_INFO, "Informational"),
(LEVEL_DEBUG, "Debugging"),
)
COMPONENT_CONSUMER = 1
COMPONENT_MAIL = 2
COMPONENTS = (
(COMPONENT_CONSUMER, "Consumer"),
(COMPONENT_MAIL, "Mail Fetcher")
)
time = models.DateTimeField(auto_now_add=True)
message = models.TextField()
level = models.PositiveIntegerField(choices=LEVELS, default=LEVEL_INFO)
component = models.PositiveIntegerField(choices=COMPONENTS)
@classmethod
def error(cls, message, component):
cls.objects.create(
message=message, level=cls.LEVEL_ERROR, component=component)
@classmethod
def warning(cls, message, component):
cls.objects.create(
message=message, level=cls.LEVEL_WARNING, component=component)
@classmethod
def info(cls, message, component):
cls.objects.create(
message=message, level=cls.LEVEL_INFO, component=component)
@classmethod
def debug(cls, message, component):
cls.objects.create(
message=message, level=cls.LEVEL_DEBUG, component=component)

3
src/logger/tests.py Normal file
View File

@ -0,0 +1,3 @@
from django.test import TestCase
# Create your tests here.

3
src/logger/views.py Normal file
View File

@ -0,0 +1,3 @@
from django.shortcuts import render
# Create your views here.

View File

@ -31,6 +31,7 @@ ALLOWED_HOSTS = []
# Application definition # Application definition
INSTALLED_APPS = [ INSTALLED_APPS = [
'django.contrib.admin', 'django.contrib.admin',
'django.contrib.auth', 'django.contrib.auth',
'django.contrib.contenttypes', 'django.contrib.contenttypes',
@ -41,6 +42,8 @@ INSTALLED_APPS = [
"django_extensions", "django_extensions",
"documents", "documents",
"logger",
] ]
MIDDLEWARE_CLASSES = [ MIDDLEWARE_CLASSES = [