mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-04-09 09:58:20 -05:00
Added and implemented a rudimentary logger
This commit is contained in:
parent
88acf50fe0
commit
7843ea5037
@ -17,9 +17,9 @@ from django.conf import settings
|
|||||||
from django.utils import timezone
|
from django.utils import timezone
|
||||||
from django.template.defaultfilters import slugify
|
from django.template.defaultfilters import slugify
|
||||||
|
|
||||||
|
from logger.models import Log
|
||||||
from paperless.db import GnuPG
|
from paperless.db import GnuPG
|
||||||
|
|
||||||
from .mixins import Renderable
|
|
||||||
from .models import Sender, Tag, Document
|
from .models import Sender, Tag, Document
|
||||||
from .languages import ISO639
|
from .languages import ISO639
|
||||||
|
|
||||||
@ -27,7 +27,6 @@ from .languages import ISO639
|
|||||||
def image_to_string(args):
|
def image_to_string(args):
|
||||||
self, png, lang = args
|
self, png, lang = args
|
||||||
with Image.open(os.path.join(self.SCRATCH, png)) as f:
|
with Image.open(os.path.join(self.SCRATCH, png)) as f:
|
||||||
self._render(" {}".format(f.filename), 3)
|
|
||||||
return self.OCR.image_to_string(f, lang=lang)
|
return self.OCR.image_to_string(f, lang=lang)
|
||||||
|
|
||||||
|
|
||||||
@ -39,7 +38,7 @@ class ConsumerError(Exception):
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
class Consumer(Renderable):
|
class Consumer(object):
|
||||||
"""
|
"""
|
||||||
Loop over every file found in CONSUMPTION_DIR and:
|
Loop over every file found in CONSUMPTION_DIR and:
|
||||||
1. Convert it to a greyscale png
|
1. Convert it to a greyscale png
|
||||||
@ -110,7 +109,7 @@ class Consumer(Renderable):
|
|||||||
if self._is_ready(doc):
|
if self._is_ready(doc):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
self._render("Consuming {}".format(doc), 1)
|
Log.info("Consuming {}".format(doc), Log.COMPONENT_CONSUMER)
|
||||||
|
|
||||||
pngs = self._get_greyscale(doc)
|
pngs = self._get_greyscale(doc)
|
||||||
|
|
||||||
@ -118,7 +117,7 @@ class Consumer(Renderable):
|
|||||||
text = self._get_ocr(pngs)
|
text = self._get_ocr(pngs)
|
||||||
except OCRError:
|
except OCRError:
|
||||||
self._ignore.append(doc)
|
self._ignore.append(doc)
|
||||||
self._render("OCR FAILURE: {}".format(doc), 0)
|
Log.error("OCR FAILURE: {}".format(doc), Log.COMPONENT_CONSUMER)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
self._store(text, doc)
|
self._store(text, doc)
|
||||||
@ -126,7 +125,10 @@ class Consumer(Renderable):
|
|||||||
|
|
||||||
def _get_greyscale(self, doc):
|
def _get_greyscale(self, doc):
|
||||||
|
|
||||||
self._render(" Generating greyscale image from {}".format(doc), 2)
|
Log.debug(
|
||||||
|
"Generating greyscale image from {}".format(doc),
|
||||||
|
Log.COMPONENT_CONSUMER
|
||||||
|
)
|
||||||
|
|
||||||
i = random.randint(1000000, 9999999)
|
i = random.randint(1000000, 9999999)
|
||||||
png = os.path.join(self.SCRATCH, "{}.png".format(i))
|
png = os.path.join(self.SCRATCH, "{}.png".format(i))
|
||||||
@ -141,7 +143,10 @@ class Consumer(Renderable):
|
|||||||
def _guess_language(self, text):
|
def _guess_language(self, text):
|
||||||
try:
|
try:
|
||||||
guess = langdetect.detect(text)
|
guess = langdetect.detect(text)
|
||||||
self._render(" Language detected: {}".format(guess), 2)
|
Log.debug(
|
||||||
|
"Language detected: {}".format(guess),
|
||||||
|
Log.COMPONENT_CONSUMER
|
||||||
|
)
|
||||||
return guess
|
return guess
|
||||||
except Exception:
|
except Exception:
|
||||||
return None
|
return None
|
||||||
@ -152,19 +157,19 @@ class Consumer(Renderable):
|
|||||||
simple language detection trial & error.
|
simple language detection trial & error.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
self._render(" OCRing the document", 2)
|
Log.debug("OCRing the document", Log.COMPONENT_CONSUMER)
|
||||||
|
|
||||||
raw_text = self._ocr(pngs, self.DEFAULT_OCR_LANGUAGE)
|
raw_text = self._ocr(pngs, self.DEFAULT_OCR_LANGUAGE)
|
||||||
|
|
||||||
guessed_language = self._guess_language(raw_text)
|
guessed_language = self._guess_language(raw_text)
|
||||||
|
|
||||||
if not guessed_language or guessed_language not in ISO639:
|
if not guessed_language or guessed_language not in ISO639:
|
||||||
self._render("Language detection failed!", 0)
|
Log.warning("Language detection failed!", Log.COMPONENT_CONSUMER)
|
||||||
if settings.FORGIVING_OCR:
|
if settings.FORGIVING_OCR:
|
||||||
self._render(
|
Log.warning(
|
||||||
"As FORGIVING_OCR is enabled, we're going to make the best "
|
"As FORGIVING_OCR is enabled, we're going to make the best "
|
||||||
"with what we have.",
|
"with what we have.",
|
||||||
1
|
Log.COMPONENT_CONSUMER
|
||||||
)
|
)
|
||||||
return raw_text
|
return raw_text
|
||||||
raise OCRError
|
raise OCRError
|
||||||
@ -176,12 +181,12 @@ class Consumer(Renderable):
|
|||||||
return self._ocr(pngs, ISO639[guessed_language])
|
return self._ocr(pngs, ISO639[guessed_language])
|
||||||
except pyocr.pyocr.tesseract.TesseractError:
|
except pyocr.pyocr.tesseract.TesseractError:
|
||||||
if settings.FORGIVING_OCR:
|
if settings.FORGIVING_OCR:
|
||||||
self._render(
|
Log.warning(
|
||||||
"OCR for {} failed, but we're going to stick with what "
|
"OCR for {} failed, but we're going to stick with what "
|
||||||
"we've got since FORGIVING_OCR is enabled.".format(
|
"we've got since FORGIVING_OCR is enabled.".format(
|
||||||
guessed_language
|
guessed_language
|
||||||
),
|
),
|
||||||
0
|
Log.COMPONENT_CONSUMER
|
||||||
)
|
)
|
||||||
return raw_text
|
return raw_text
|
||||||
raise OCRError
|
raise OCRError
|
||||||
@ -191,12 +196,12 @@ class Consumer(Renderable):
|
|||||||
Performs a single OCR attempt.
|
Performs a single OCR attempt.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
self._render(" Parsing for {}".format(lang), 2)
|
Log.debug("Parsing for {}".format(lang), Log.COMPONENT_CONSUMER)
|
||||||
|
|
||||||
with Pool(processes=self.THREADS) as pool:
|
with Pool(processes=self.THREADS) as pool:
|
||||||
r = pool.map(image_to_string,
|
r = pool.map(
|
||||||
itertools.product([self], pngs, [lang]))
|
image_to_string, itertools.product([self], pngs, [lang]))
|
||||||
r = "".join(r)
|
r = " ".join(r)
|
||||||
|
|
||||||
# Strip out excess white space to allow matching to go smoother
|
# Strip out excess white space to allow matching to go smoother
|
||||||
return re.sub(r"\s+", " ", r)
|
return re.sub(r"\s+", " ", r)
|
||||||
@ -251,7 +256,7 @@ class Consumer(Renderable):
|
|||||||
|
|
||||||
stats = os.stat(doc)
|
stats = os.stat(doc)
|
||||||
|
|
||||||
self._render(" Saving record to database", 2)
|
Log.debug("Saving record to database", Log.COMPONENT_CONSUMER)
|
||||||
|
|
||||||
document = Document.objects.create(
|
document = Document.objects.create(
|
||||||
sender=sender,
|
sender=sender,
|
||||||
@ -266,12 +271,13 @@ class Consumer(Renderable):
|
|||||||
|
|
||||||
if relevant_tags:
|
if relevant_tags:
|
||||||
tag_names = ", ".join([t.slug for t in relevant_tags])
|
tag_names = ", ".join([t.slug for t in relevant_tags])
|
||||||
self._render(" Tagging with {}".format(tag_names), 2)
|
Log.debug(
|
||||||
|
"Tagging with {}".format(tag_names), Log.COMPONENT_CONSUMER)
|
||||||
document.tags.add(*relevant_tags)
|
document.tags.add(*relevant_tags)
|
||||||
|
|
||||||
with open(doc, "rb") as unencrypted:
|
with open(doc, "rb") as unencrypted:
|
||||||
with open(document.source_path, "wb") as encrypted:
|
with open(document.source_path, "wb") as encrypted:
|
||||||
self._render(" Encrypting", 3)
|
Log.debug("Encrypting", Log.COMPONENT_CONSUMER)
|
||||||
encrypted.write(GnuPG.encrypted(unencrypted))
|
encrypted.write(GnuPG.encrypted(unencrypted))
|
||||||
|
|
||||||
def _cleanup(self, pngs, doc):
|
def _cleanup(self, pngs, doc):
|
||||||
@ -280,11 +286,9 @@ class Consumer(Renderable):
|
|||||||
self.SCRATCH, re.sub(r"^.*/(\d+)-\d+.png$", "\\1*", pngs[0]))
|
self.SCRATCH, re.sub(r"^.*/(\d+)-\d+.png$", "\\1*", pngs[0]))
|
||||||
|
|
||||||
for f in list(glob.glob(png_glob)) + [doc]:
|
for f in list(glob.glob(png_glob)) + [doc]:
|
||||||
self._render(" Deleting {}".format(f), 2)
|
Log.debug("Deleting {}".format(f), Log.COMPONENT_CONSUMER)
|
||||||
os.unlink(f)
|
os.unlink(f)
|
||||||
|
|
||||||
self._render("", 2)
|
|
||||||
|
|
||||||
def _is_ready(self, doc):
|
def _is_ready(self, doc):
|
||||||
"""
|
"""
|
||||||
Detect whether `doc` is ready to consume or if it's still being written
|
Detect whether `doc` is ready to consume or if it's still being written
|
||||||
|
@ -11,8 +11,9 @@ from dateutil import parser
|
|||||||
|
|
||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
|
|
||||||
|
from logger.models import Log
|
||||||
|
|
||||||
from .consumer import Consumer
|
from .consumer import Consumer
|
||||||
from .mixins import Renderable
|
|
||||||
from .models import Sender
|
from .models import Sender
|
||||||
|
|
||||||
|
|
||||||
@ -24,7 +25,7 @@ class InvalidMessageError(Exception):
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
class Message(Renderable):
|
class Message(object):
|
||||||
"""
|
"""
|
||||||
A crude, but simple email message class. We assume that there's a subject
|
A crude, but simple email message class. We assume that there's a subject
|
||||||
and n attachments, and that we don't care about the message body.
|
and n attachments, and that we don't care about the message body.
|
||||||
@ -53,7 +54,8 @@ class Message(Renderable):
|
|||||||
|
|
||||||
self._set_time(message)
|
self._set_time(message)
|
||||||
|
|
||||||
self._render('Fetching email: "{}"'.format(self.subject), 1)
|
Log.info(
|
||||||
|
'Importing email: "{}"'.format(self.subject), Log.COMPONENT_MAIL)
|
||||||
|
|
||||||
attachments = []
|
attachments = []
|
||||||
for part in message.walk():
|
for part in message.walk():
|
||||||
@ -132,7 +134,7 @@ class Attachment(object):
|
|||||||
return self.data
|
return self.data
|
||||||
|
|
||||||
|
|
||||||
class MailFetcher(Renderable):
|
class MailFetcher(object):
|
||||||
|
|
||||||
def __init__(self, verbosity=1):
|
def __init__(self, verbosity=1):
|
||||||
|
|
||||||
@ -157,11 +159,14 @@ class MailFetcher(Renderable):
|
|||||||
|
|
||||||
if self._enabled:
|
if self._enabled:
|
||||||
|
|
||||||
self._render("Checking mail", 1)
|
Log.info("Checking mail", Log.COMPONENT_MAIL)
|
||||||
|
|
||||||
for message in self._get_messages():
|
for message in self._get_messages():
|
||||||
|
|
||||||
self._render(' Storing email: "{}"'.format(message.subject), 1)
|
Log.debug(
|
||||||
|
'Storing email: "{}"'.format(message.subject),
|
||||||
|
Log.COMPONENT_MAIL
|
||||||
|
)
|
||||||
|
|
||||||
t = int(time.mktime(message.time.timetuple()))
|
t = int(time.mktime(message.time.timetuple()))
|
||||||
file_name = os.path.join(Consumer.CONSUME, message.file_name)
|
file_name = os.path.join(Consumer.CONSUME, message.file_name)
|
||||||
@ -188,7 +193,7 @@ class MailFetcher(Renderable):
|
|||||||
self._connection.logout()
|
self._connection.logout()
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self._render(e, 0)
|
Log.error(e, Log.COMPONENT_MAIL)
|
||||||
|
|
||||||
return r
|
return r
|
||||||
|
|
||||||
@ -215,7 +220,7 @@ class MailFetcher(Renderable):
|
|||||||
try:
|
try:
|
||||||
message = Message(data[0][1], self.verbosity)
|
message = Message(data[0][1], self.verbosity)
|
||||||
except InvalidMessageError as e:
|
except InvalidMessageError as e:
|
||||||
self._render(e, 0)
|
Log.error(e, Log.COMPONENT_MAIL)
|
||||||
else:
|
else:
|
||||||
self._connection.store(num, "+FLAGS", "\\Deleted")
|
self._connection.store(num, "+FLAGS", "\\Deleted")
|
||||||
|
|
||||||
|
@ -7,10 +7,9 @@ from django.core.management.base import BaseCommand, CommandError
|
|||||||
|
|
||||||
from ...consumer import Consumer, ConsumerError
|
from ...consumer import Consumer, ConsumerError
|
||||||
from ...mail import MailFetcher, MailFetcherError
|
from ...mail import MailFetcher, MailFetcherError
|
||||||
from ...mixins import Renderable
|
|
||||||
|
|
||||||
|
|
||||||
class Command(Renderable, BaseCommand):
|
class Command(BaseCommand):
|
||||||
"""
|
"""
|
||||||
On every iteration of an infinite loop, consume what we can from the
|
On every iteration of an infinite loop, consume what we can from the
|
||||||
consumption directory, and fetch any mail available.
|
consumption directory, and fetch any mail available.
|
||||||
|
@ -46,7 +46,7 @@ class Command(Renderable, BaseCommand):
|
|||||||
|
|
||||||
target = os.path.join(self.target, document.parseable_file_name)
|
target = os.path.join(self.target, document.parseable_file_name)
|
||||||
|
|
||||||
self._render("Exporting: {}".format(target), 1)
|
print("Exporting: {}".format(target))
|
||||||
|
|
||||||
with open(target, "wb") as f:
|
with open(target, "wb") as f:
|
||||||
f.write(GnuPG.decrypted(document.source_file))
|
f.write(GnuPG.decrypted(document.source_file))
|
||||||
|
@ -27,6 +27,5 @@ class Command(Renderable, BaseCommand):
|
|||||||
pk__in=document.tags.values_list("pk", flat=True))
|
pk__in=document.tags.values_list("pk", flat=True))
|
||||||
for tag in tags:
|
for tag in tags:
|
||||||
if tag.matches(document.content):
|
if tag.matches(document.content):
|
||||||
self._render(
|
print('Tagging {} with "{}"'.format(document, tag))
|
||||||
'Tagging {} with "{}"'.format(document, tag), 1)
|
|
||||||
document.tags.add(tag)
|
document.tags.add(tag)
|
||||||
|
0
src/logger/__init__.py
Normal file
0
src/logger/__init__.py
Normal file
12
src/logger/admin.py
Normal file
12
src/logger/admin.py
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
from django.contrib import admin
|
||||||
|
|
||||||
|
from .models import Log
|
||||||
|
|
||||||
|
|
||||||
|
class LogAdmin(admin.ModelAdmin):
|
||||||
|
|
||||||
|
list_display = ("message", "level", "component")
|
||||||
|
list_filter = ("level", "component",)
|
||||||
|
|
||||||
|
|
||||||
|
admin.site.register(Log, LogAdmin)
|
5
src/logger/apps.py
Normal file
5
src/logger/apps.py
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
from django.apps import AppConfig
|
||||||
|
|
||||||
|
|
||||||
|
class LoggerConfig(AppConfig):
|
||||||
|
name = 'logger'
|
26
src/logger/migrations/0001_initial.py
Normal file
26
src/logger/migrations/0001_initial.py
Normal file
@ -0,0 +1,26 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
# Generated by Django 1.9 on 2016-02-14 16:08
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from django.db import migrations, models
|
||||||
|
|
||||||
|
|
||||||
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
|
initial = True
|
||||||
|
|
||||||
|
dependencies = [
|
||||||
|
]
|
||||||
|
|
||||||
|
operations = [
|
||||||
|
migrations.CreateModel(
|
||||||
|
name='Log',
|
||||||
|
fields=[
|
||||||
|
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
|
||||||
|
('time', models.DateTimeField(auto_now_add=True)),
|
||||||
|
('message', models.TextField()),
|
||||||
|
('level', models.PositiveIntegerField(choices=[(1, 'Error'), (2, 'Warning'), (3, 'Informational'), (4, 'Debugging')], default=3)),
|
||||||
|
('component', models.PositiveIntegerField(choices=[(1, 'Consumer'), (2, 'Mail Fetcher')])),
|
||||||
|
],
|
||||||
|
),
|
||||||
|
]
|
0
src/logger/migrations/__init__.py
Normal file
0
src/logger/migrations/__init__.py
Normal file
47
src/logger/models.py
Normal file
47
src/logger/models.py
Normal file
@ -0,0 +1,47 @@
|
|||||||
|
from django.db import models
|
||||||
|
|
||||||
|
|
||||||
|
class Log(models.Model):
|
||||||
|
|
||||||
|
LEVEL_ERROR = 1
|
||||||
|
LEVEL_WARNING = 2
|
||||||
|
LEVEL_INFO = 3
|
||||||
|
LEVEL_DEBUG = 4
|
||||||
|
LEVELS = (
|
||||||
|
(LEVEL_ERROR, "Error"),
|
||||||
|
(LEVEL_WARNING, "Warning"),
|
||||||
|
(LEVEL_INFO, "Informational"),
|
||||||
|
(LEVEL_DEBUG, "Debugging"),
|
||||||
|
)
|
||||||
|
|
||||||
|
COMPONENT_CONSUMER = 1
|
||||||
|
COMPONENT_MAIL = 2
|
||||||
|
COMPONENTS = (
|
||||||
|
(COMPONENT_CONSUMER, "Consumer"),
|
||||||
|
(COMPONENT_MAIL, "Mail Fetcher")
|
||||||
|
)
|
||||||
|
|
||||||
|
time = models.DateTimeField(auto_now_add=True)
|
||||||
|
message = models.TextField()
|
||||||
|
level = models.PositiveIntegerField(choices=LEVELS, default=LEVEL_INFO)
|
||||||
|
component = models.PositiveIntegerField(choices=COMPONENTS)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def error(cls, message, component):
|
||||||
|
cls.objects.create(
|
||||||
|
message=message, level=cls.LEVEL_ERROR, component=component)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def warning(cls, message, component):
|
||||||
|
cls.objects.create(
|
||||||
|
message=message, level=cls.LEVEL_WARNING, component=component)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def info(cls, message, component):
|
||||||
|
cls.objects.create(
|
||||||
|
message=message, level=cls.LEVEL_INFO, component=component)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def debug(cls, message, component):
|
||||||
|
cls.objects.create(
|
||||||
|
message=message, level=cls.LEVEL_DEBUG, component=component)
|
3
src/logger/tests.py
Normal file
3
src/logger/tests.py
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
from django.test import TestCase
|
||||||
|
|
||||||
|
# Create your tests here.
|
3
src/logger/views.py
Normal file
3
src/logger/views.py
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
from django.shortcuts import render
|
||||||
|
|
||||||
|
# Create your views here.
|
@ -31,6 +31,7 @@ ALLOWED_HOSTS = []
|
|||||||
# Application definition
|
# Application definition
|
||||||
|
|
||||||
INSTALLED_APPS = [
|
INSTALLED_APPS = [
|
||||||
|
|
||||||
'django.contrib.admin',
|
'django.contrib.admin',
|
||||||
'django.contrib.auth',
|
'django.contrib.auth',
|
||||||
'django.contrib.contenttypes',
|
'django.contrib.contenttypes',
|
||||||
@ -41,6 +42,8 @@ INSTALLED_APPS = [
|
|||||||
"django_extensions",
|
"django_extensions",
|
||||||
|
|
||||||
"documents",
|
"documents",
|
||||||
|
"logger",
|
||||||
|
|
||||||
]
|
]
|
||||||
|
|
||||||
MIDDLEWARE_CLASSES = [
|
MIDDLEWARE_CLASSES = [
|
||||||
|
Loading…
x
Reference in New Issue
Block a user