diff --git a/src/documents/consumer.py b/src/documents/consumer.py index 3f3b9e9a3..8a7729ffb 100644 --- a/src/documents/consumer.py +++ b/src/documents/consumer.py @@ -17,9 +17,9 @@ from django.conf import settings from django.utils import timezone from django.template.defaultfilters import slugify +from logger.models import Log from paperless.db import GnuPG -from .mixins import Renderable from .models import Sender, Tag, Document from .languages import ISO639 @@ -27,7 +27,6 @@ from .languages import ISO639 def image_to_string(args): self, png, lang = args with Image.open(os.path.join(self.SCRATCH, png)) as f: - self._render(" {}".format(f.filename), 3) return self.OCR.image_to_string(f, lang=lang) @@ -39,7 +38,7 @@ class ConsumerError(Exception): pass -class Consumer(Renderable): +class Consumer(object): """ Loop over every file found in CONSUMPTION_DIR and: 1. Convert it to a greyscale png @@ -110,7 +109,7 @@ class Consumer(Renderable): if self._is_ready(doc): continue - self._render("Consuming {}".format(doc), 1) + Log.info("Consuming {}".format(doc), Log.COMPONENT_CONSUMER) pngs = self._get_greyscale(doc) @@ -118,7 +117,7 @@ class Consumer(Renderable): text = self._get_ocr(pngs) except OCRError: self._ignore.append(doc) - self._render("OCR FAILURE: {}".format(doc), 0) + Log.error("OCR FAILURE: {}".format(doc), Log.COMPONENT_CONSUMER) continue self._store(text, doc) @@ -126,7 +125,10 @@ class Consumer(Renderable): def _get_greyscale(self, doc): - self._render(" Generating greyscale image from {}".format(doc), 2) + Log.debug( + "Generating greyscale image from {}".format(doc), + Log.COMPONENT_CONSUMER + ) i = random.randint(1000000, 9999999) png = os.path.join(self.SCRATCH, "{}.png".format(i)) @@ -141,7 +143,10 @@ class Consumer(Renderable): def _guess_language(self, text): try: guess = langdetect.detect(text) - self._render(" Language detected: {}".format(guess), 2) + Log.debug( + "Language detected: {}".format(guess), + Log.COMPONENT_CONSUMER + ) return guess except Exception: return None @@ -152,19 +157,19 @@ class Consumer(Renderable): simple language detection trial & error. """ - self._render(" OCRing the document", 2) + Log.debug("OCRing the document", Log.COMPONENT_CONSUMER) raw_text = self._ocr(pngs, self.DEFAULT_OCR_LANGUAGE) guessed_language = self._guess_language(raw_text) if not guessed_language or guessed_language not in ISO639: - self._render("Language detection failed!", 0) + Log.warning("Language detection failed!", Log.COMPONENT_CONSUMER) if settings.FORGIVING_OCR: - self._render( + Log.warning( "As FORGIVING_OCR is enabled, we're going to make the best " "with what we have.", - 1 + Log.COMPONENT_CONSUMER ) return raw_text raise OCRError @@ -176,12 +181,12 @@ class Consumer(Renderable): return self._ocr(pngs, ISO639[guessed_language]) except pyocr.pyocr.tesseract.TesseractError: if settings.FORGIVING_OCR: - self._render( + Log.warning( "OCR for {} failed, but we're going to stick with what " "we've got since FORGIVING_OCR is enabled.".format( guessed_language ), - 0 + Log.COMPONENT_CONSUMER ) return raw_text raise OCRError @@ -191,12 +196,12 @@ class Consumer(Renderable): Performs a single OCR attempt. """ - self._render(" Parsing for {}".format(lang), 2) + Log.debug("Parsing for {}".format(lang), Log.COMPONENT_CONSUMER) with Pool(processes=self.THREADS) as pool: - r = pool.map(image_to_string, - itertools.product([self], pngs, [lang])) - r = "".join(r) + r = pool.map( + image_to_string, itertools.product([self], pngs, [lang])) + r = " ".join(r) # Strip out excess white space to allow matching to go smoother return re.sub(r"\s+", " ", r) @@ -251,7 +256,7 @@ class Consumer(Renderable): stats = os.stat(doc) - self._render(" Saving record to database", 2) + Log.debug("Saving record to database", Log.COMPONENT_CONSUMER) document = Document.objects.create( sender=sender, @@ -266,12 +271,13 @@ class Consumer(Renderable): if relevant_tags: tag_names = ", ".join([t.slug for t in relevant_tags]) - self._render(" Tagging with {}".format(tag_names), 2) + Log.debug( + "Tagging with {}".format(tag_names), Log.COMPONENT_CONSUMER) document.tags.add(*relevant_tags) with open(doc, "rb") as unencrypted: with open(document.source_path, "wb") as encrypted: - self._render(" Encrypting", 3) + Log.debug("Encrypting", Log.COMPONENT_CONSUMER) encrypted.write(GnuPG.encrypted(unencrypted)) def _cleanup(self, pngs, doc): @@ -280,11 +286,9 @@ class Consumer(Renderable): self.SCRATCH, re.sub(r"^.*/(\d+)-\d+.png$", "\\1*", pngs[0])) for f in list(glob.glob(png_glob)) + [doc]: - self._render(" Deleting {}".format(f), 2) + Log.debug("Deleting {}".format(f), Log.COMPONENT_CONSUMER) os.unlink(f) - self._render("", 2) - def _is_ready(self, doc): """ Detect whether `doc` is ready to consume or if it's still being written diff --git a/src/documents/mail.py b/src/documents/mail.py index 550c4bd83..384567e60 100644 --- a/src/documents/mail.py +++ b/src/documents/mail.py @@ -11,8 +11,9 @@ from dateutil import parser from django.conf import settings +from logger.models import Log + from .consumer import Consumer -from .mixins import Renderable from .models import Sender @@ -24,7 +25,7 @@ class InvalidMessageError(Exception): pass -class Message(Renderable): +class Message(object): """ A crude, but simple email message class. We assume that there's a subject and n attachments, and that we don't care about the message body. @@ -53,7 +54,8 @@ class Message(Renderable): self._set_time(message) - self._render('Fetching email: "{}"'.format(self.subject), 1) + Log.info( + 'Importing email: "{}"'.format(self.subject), Log.COMPONENT_MAIL) attachments = [] for part in message.walk(): @@ -132,7 +134,7 @@ class Attachment(object): return self.data -class MailFetcher(Renderable): +class MailFetcher(object): def __init__(self, verbosity=1): @@ -157,11 +159,14 @@ class MailFetcher(Renderable): if self._enabled: - self._render("Checking mail", 1) + Log.info("Checking mail", Log.COMPONENT_MAIL) for message in self._get_messages(): - self._render(' Storing email: "{}"'.format(message.subject), 1) + Log.debug( + 'Storing email: "{}"'.format(message.subject), + Log.COMPONENT_MAIL + ) t = int(time.mktime(message.time.timetuple())) file_name = os.path.join(Consumer.CONSUME, message.file_name) @@ -188,7 +193,7 @@ class MailFetcher(Renderable): self._connection.logout() except Exception as e: - self._render(e, 0) + Log.error(e, Log.COMPONENT_MAIL) return r @@ -215,7 +220,7 @@ class MailFetcher(Renderable): try: message = Message(data[0][1], self.verbosity) except InvalidMessageError as e: - self._render(e, 0) + Log.error(e, Log.COMPONENT_MAIL) else: self._connection.store(num, "+FLAGS", "\\Deleted") diff --git a/src/documents/management/commands/document_consumer.py b/src/documents/management/commands/document_consumer.py index 067323c44..ae72381e2 100644 --- a/src/documents/management/commands/document_consumer.py +++ b/src/documents/management/commands/document_consumer.py @@ -7,10 +7,9 @@ from django.core.management.base import BaseCommand, CommandError from ...consumer import Consumer, ConsumerError from ...mail import MailFetcher, MailFetcherError -from ...mixins import Renderable -class Command(Renderable, BaseCommand): +class Command(BaseCommand): """ On every iteration of an infinite loop, consume what we can from the consumption directory, and fetch any mail available. diff --git a/src/documents/management/commands/document_exporter.py b/src/documents/management/commands/document_exporter.py index c8129ad7b..f64aaa39f 100644 --- a/src/documents/management/commands/document_exporter.py +++ b/src/documents/management/commands/document_exporter.py @@ -46,7 +46,7 @@ class Command(Renderable, BaseCommand): target = os.path.join(self.target, document.parseable_file_name) - self._render("Exporting: {}".format(target), 1) + print("Exporting: {}".format(target)) with open(target, "wb") as f: f.write(GnuPG.decrypted(document.source_file)) diff --git a/src/documents/management/commands/document_retagger.py b/src/documents/management/commands/document_retagger.py index 84a887de0..d7519f53b 100644 --- a/src/documents/management/commands/document_retagger.py +++ b/src/documents/management/commands/document_retagger.py @@ -27,6 +27,5 @@ class Command(Renderable, BaseCommand): pk__in=document.tags.values_list("pk", flat=True)) for tag in tags: if tag.matches(document.content): - self._render( - 'Tagging {} with "{}"'.format(document, tag), 1) + print('Tagging {} with "{}"'.format(document, tag)) document.tags.add(tag) diff --git a/src/logger/__init__.py b/src/logger/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/src/logger/admin.py b/src/logger/admin.py new file mode 100644 index 000000000..dc9446821 --- /dev/null +++ b/src/logger/admin.py @@ -0,0 +1,12 @@ +from django.contrib import admin + +from .models import Log + + +class LogAdmin(admin.ModelAdmin): + + list_display = ("message", "level", "component") + list_filter = ("level", "component",) + + +admin.site.register(Log, LogAdmin) diff --git a/src/logger/apps.py b/src/logger/apps.py new file mode 100644 index 000000000..2c1a7d735 --- /dev/null +++ b/src/logger/apps.py @@ -0,0 +1,5 @@ +from django.apps import AppConfig + + +class LoggerConfig(AppConfig): + name = 'logger' diff --git a/src/logger/migrations/0001_initial.py b/src/logger/migrations/0001_initial.py new file mode 100644 index 000000000..b9b81c296 --- /dev/null +++ b/src/logger/migrations/0001_initial.py @@ -0,0 +1,26 @@ +# -*- coding: utf-8 -*- +# Generated by Django 1.9 on 2016-02-14 16:08 +from __future__ import unicode_literals + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + initial = True + + dependencies = [ + ] + + operations = [ + migrations.CreateModel( + name='Log', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('time', models.DateTimeField(auto_now_add=True)), + ('message', models.TextField()), + ('level', models.PositiveIntegerField(choices=[(1, 'Error'), (2, 'Warning'), (3, 'Informational'), (4, 'Debugging')], default=3)), + ('component', models.PositiveIntegerField(choices=[(1, 'Consumer'), (2, 'Mail Fetcher')])), + ], + ), + ] diff --git a/src/logger/migrations/__init__.py b/src/logger/migrations/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/src/logger/models.py b/src/logger/models.py new file mode 100644 index 000000000..e301b5ada --- /dev/null +++ b/src/logger/models.py @@ -0,0 +1,47 @@ +from django.db import models + + +class Log(models.Model): + + LEVEL_ERROR = 1 + LEVEL_WARNING = 2 + LEVEL_INFO = 3 + LEVEL_DEBUG = 4 + LEVELS = ( + (LEVEL_ERROR, "Error"), + (LEVEL_WARNING, "Warning"), + (LEVEL_INFO, "Informational"), + (LEVEL_DEBUG, "Debugging"), + ) + + COMPONENT_CONSUMER = 1 + COMPONENT_MAIL = 2 + COMPONENTS = ( + (COMPONENT_CONSUMER, "Consumer"), + (COMPONENT_MAIL, "Mail Fetcher") + ) + + time = models.DateTimeField(auto_now_add=True) + message = models.TextField() + level = models.PositiveIntegerField(choices=LEVELS, default=LEVEL_INFO) + component = models.PositiveIntegerField(choices=COMPONENTS) + + @classmethod + def error(cls, message, component): + cls.objects.create( + message=message, level=cls.LEVEL_ERROR, component=component) + + @classmethod + def warning(cls, message, component): + cls.objects.create( + message=message, level=cls.LEVEL_WARNING, component=component) + + @classmethod + def info(cls, message, component): + cls.objects.create( + message=message, level=cls.LEVEL_INFO, component=component) + + @classmethod + def debug(cls, message, component): + cls.objects.create( + message=message, level=cls.LEVEL_DEBUG, component=component) diff --git a/src/logger/tests.py b/src/logger/tests.py new file mode 100644 index 000000000..7ce503c2d --- /dev/null +++ b/src/logger/tests.py @@ -0,0 +1,3 @@ +from django.test import TestCase + +# Create your tests here. diff --git a/src/logger/views.py b/src/logger/views.py new file mode 100644 index 000000000..91ea44a21 --- /dev/null +++ b/src/logger/views.py @@ -0,0 +1,3 @@ +from django.shortcuts import render + +# Create your views here. diff --git a/src/paperless/settings.py b/src/paperless/settings.py index 88be33a8c..444989990 100644 --- a/src/paperless/settings.py +++ b/src/paperless/settings.py @@ -31,6 +31,7 @@ ALLOWED_HOSTS = [] # Application definition INSTALLED_APPS = [ + 'django.contrib.admin', 'django.contrib.auth', 'django.contrib.contenttypes', @@ -41,6 +42,8 @@ INSTALLED_APPS = [ "django_extensions", "documents", + "logger", + ] MIDDLEWARE_CLASSES = [