rework most of the logging

This commit is contained in:
jonaswinkler 2021-02-05 01:10:29 +01:00
parent 0d3ab3aaf7
commit 431d4fd8e4
18 changed files with 80 additions and 71 deletions

View File

@ -13,7 +13,7 @@ class IncompatibleClassifierVersionError(Exception):
pass pass
logger = logging.getLogger(__name__) logger = logging.getLogger("paperless.classifier")
def preprocess_content(content): def preprocess_content(content):
@ -115,7 +115,7 @@ class DocumentClassifier(object):
labels_document_type = list() labels_document_type = list()
# Step 1: Extract and preprocess training data from the database. # Step 1: Extract and preprocess training data from the database.
logging.getLogger(__name__).debug("Gathering data from database...") logger.debug("Gathering data from database...")
m = hashlib.sha1() m = hashlib.sha1()
for doc in Document.objects.order_by('pk').exclude(tags__is_inbox_tag=True): # NOQA: E501 for doc in Document.objects.order_by('pk').exclude(tags__is_inbox_tag=True): # NOQA: E501
preprocessed_content = preprocess_content(doc.content) preprocessed_content = preprocess_content(doc.content)
@ -162,7 +162,7 @@ class DocumentClassifier(object):
num_correspondents = len(set(labels_correspondent) | {-1}) - 1 num_correspondents = len(set(labels_correspondent) | {-1}) - 1
num_document_types = len(set(labels_document_type) | {-1}) - 1 num_document_types = len(set(labels_document_type) | {-1}) - 1
logging.getLogger(__name__).debug( logger.debug(
"{} documents, {} tag(s), {} correspondent(s), " "{} documents, {} tag(s), {} correspondent(s), "
"{} document type(s).".format( "{} document type(s).".format(
len(data), len(data),
@ -173,7 +173,7 @@ class DocumentClassifier(object):
) )
# Step 2: vectorize data # Step 2: vectorize data
logging.getLogger(__name__).debug("Vectorizing data...") logger.debug("Vectorizing data...")
self.data_vectorizer = CountVectorizer( self.data_vectorizer = CountVectorizer(
analyzer="word", analyzer="word",
ngram_range=(1, 2), ngram_range=(1, 2),
@ -183,7 +183,7 @@ class DocumentClassifier(object):
# Step 3: train the classifiers # Step 3: train the classifiers
if num_tags > 0: if num_tags > 0:
logging.getLogger(__name__).debug("Training tags classifier...") logger.debug("Training tags classifier...")
if num_tags == 1: if num_tags == 1:
# Special case where only one tag has auto: # Special case where only one tag has auto:
@ -202,12 +202,12 @@ class DocumentClassifier(object):
self.tags_classifier.fit(data_vectorized, labels_tags_vectorized) self.tags_classifier.fit(data_vectorized, labels_tags_vectorized)
else: else:
self.tags_classifier = None self.tags_classifier = None
logging.getLogger(__name__).debug( logger.debug(
"There are no tags. Not training tags classifier." "There are no tags. Not training tags classifier."
) )
if num_correspondents > 0: if num_correspondents > 0:
logging.getLogger(__name__).debug( logger.debug(
"Training correspondent classifier..." "Training correspondent classifier..."
) )
self.correspondent_classifier = MLPClassifier(tol=0.01) self.correspondent_classifier = MLPClassifier(tol=0.01)
@ -217,13 +217,13 @@ class DocumentClassifier(object):
) )
else: else:
self.correspondent_classifier = None self.correspondent_classifier = None
logging.getLogger(__name__).debug( logger.debug(
"There are no correspondents. Not training correspondent " "There are no correspondents. Not training correspondent "
"classifier." "classifier."
) )
if num_document_types > 0: if num_document_types > 0:
logging.getLogger(__name__).debug( logger.debug(
"Training document type classifier..." "Training document type classifier..."
) )
self.document_type_classifier = MLPClassifier(tol=0.01) self.document_type_classifier = MLPClassifier(tol=0.01)
@ -233,7 +233,7 @@ class DocumentClassifier(object):
) )
else: else:
self.document_type_classifier = None self.document_type_classifier = None
logging.getLogger(__name__).debug( logger.debug(
"There are no document types. Not training document type " "There are no document types. Not training document type "
"classifier." "classifier."
) )

View File

@ -47,6 +47,8 @@ MESSAGE_FINISHED = "finished"
class Consumer(LoggingMixin): class Consumer(LoggingMixin):
logging_name = "paperless.consumer"
def _send_progress(self, current_progress, max_progress, status, def _send_progress(self, current_progress, max_progress, status,
message=None, document_id=None): message=None, document_id=None):
payload = { payload = {

View File

@ -8,6 +8,9 @@ from django.conf import settings
from django.template.defaultfilters import slugify from django.template.defaultfilters import slugify
logger = logging.getLogger("paperless.filehandling")
class defaultdictNoStr(defaultdict): class defaultdictNoStr(defaultdict):
def __str__(self): def __str__(self):
@ -140,7 +143,7 @@ def generate_filename(doc, counter=0, append_gpg=True):
path = path.strip(os.sep) path = path.strip(os.sep)
except (ValueError, KeyError, IndexError): except (ValueError, KeyError, IndexError):
logging.getLogger(__name__).warning( logger.warning(
f"Invalid PAPERLESS_FILENAME_FORMAT: " f"Invalid PAPERLESS_FILENAME_FORMAT: "
f"{settings.PAPERLESS_FILENAME_FORMAT}, falling back to default") f"{settings.PAPERLESS_FILENAME_FORMAT}, falling back to default")

View File

@ -12,7 +12,7 @@ from whoosh.qparser.dateparse import DateParserPlugin
from whoosh.writing import AsyncWriter from whoosh.writing import AsyncWriter
logger = logging.getLogger(__name__) logger = logging.getLogger("paperless.index")
class JsonFormatter(Formatter): class JsonFormatter(Formatter):

View File

@ -4,33 +4,21 @@ import uuid
from django.conf import settings from django.conf import settings
class PaperlessHandler(logging.Handler):
def emit(self, record):
if settings.DISABLE_DBHANDLER:
return
# We have to do the import here or Django will barf when it tries to
# load this because the apps aren't loaded at that point
from .models import Log
kwargs = {"message": record.msg, "level": record.levelno}
if hasattr(record, "group"):
kwargs["group"] = record.group
Log.objects.create(**kwargs)
class LoggingMixin: class LoggingMixin:
logging_group = None logging_group = None
logging_name = None
def renew_logging_group(self): def renew_logging_group(self):
self.logging_group = uuid.uuid4() self.logging_group = uuid.uuid4()
def log(self, level, message, **kwargs): def log(self, level, message, **kwargs):
target = ".".join([self.__class__.__module__, self.__class__.__name__]) if self.logging_name:
logger = logging.getLogger(target) logger = logging.getLogger(self.logging_name)
else:
name = ".".join([self.__class__.__module__, self.__class__.__name__])
logger = logging.getLogger(name)
getattr(logger, level)(message, extra={ getattr(logger, level)(message, extra={
"group": self.logging_group "group": self.logging_group

View File

@ -20,7 +20,7 @@ from ...file_handling import create_source_path_directory
from ...parsers import get_parser_class_for_mime_type from ...parsers import get_parser_class_for_mime_type
logger = logging.getLogger(__name__) logger = logging.getLogger("paperless.management.archiver")
def handle_document(document_id): def handle_document(document_id):

View File

@ -17,7 +17,7 @@ try:
except ImportError: except ImportError:
INotify = flags = None INotify = flags = None
logger = logging.getLogger(__name__) logger = logging.getLogger("paperless.management.consumer")
def _tags_from_path(filepath): def _tags_from_path(filepath):
@ -108,12 +108,7 @@ class Command(BaseCommand):
# This is here primarily for the tests and is irrelevant in production. # This is here primarily for the tests and is irrelevant in production.
stop_flag = False stop_flag = False
def __init__(self, *args, **kwargs): observer = None
self.logger = logging.getLogger(__name__)
BaseCommand.__init__(self, *args, **kwargs)
self.observer = None
def add_arguments(self, parser): def add_arguments(self, parser):
parser.add_argument( parser.add_argument(
@ -161,7 +156,7 @@ class Command(BaseCommand):
logger.debug("Consumer exiting.") logger.debug("Consumer exiting.")
def handle_polling(self, directory, recursive): def handle_polling(self, directory, recursive):
logging.getLogger(__name__).info( logger.info(
f"Polling directory for changes: {directory}") f"Polling directory for changes: {directory}")
self.observer = PollingObserver(timeout=settings.CONSUMER_POLLING) self.observer = PollingObserver(timeout=settings.CONSUMER_POLLING)
self.observer.schedule(Handler(), directory, recursive=recursive) self.observer.schedule(Handler(), directory, recursive=recursive)
@ -176,7 +171,7 @@ class Command(BaseCommand):
self.observer.join() self.observer.join()
def handle_inotify(self, directory, recursive): def handle_inotify(self, directory, recursive):
logging.getLogger(__name__).info( logger.info(
f"Using inotify to watch directory for changes: {directory}") f"Using inotify to watch directory for changes: {directory}")
inotify = INotify() inotify = INotify()

View File

@ -7,6 +7,9 @@ from documents.models import Document
from ...signals.handlers import set_correspondent, set_document_type, set_tags from ...signals.handlers import set_correspondent, set_document_type, set_tags
logger = logging.getLogger("paperless.management.retagger")
class Command(BaseCommand): class Command(BaseCommand):
help = """ help = """
@ -65,7 +68,7 @@ class Command(BaseCommand):
classifier = load_classifier() classifier = load_classifier()
for document in documents: for document in documents:
logging.getLogger(__name__).info( logger.info(
f"Processing document {document.title}") f"Processing document {document.title}")
if options['correspondent']: if options['correspondent']:

View File

@ -6,7 +6,7 @@ from fuzzywuzzy import fuzz
from documents.models import MatchingModel, Correspondent, DocumentType, Tag from documents.models import MatchingModel, Correspondent, DocumentType, Tag
logger = logging.getLogger(__name__) logger = logging.getLogger("paperless.matching")
def log_reason(matching_model, document, reason): def log_reason(matching_model, document, reason):

View File

@ -36,7 +36,7 @@ DATE_REGEX = re.compile(
) )
logger = logging.getLogger(__name__) logger = logging.getLogger("paperless.parsing")
def is_mime_type_supported(mime_type): def is_mime_type_supported(mime_type):
@ -261,6 +261,8 @@ class DocumentParser(LoggingMixin):
`paperless_tesseract.parsers` for inspiration. `paperless_tesseract.parsers` for inspiration.
""" """
logging_name = "paperless.parsing"
def __init__(self, logging_group, progress_callback=None): def __init__(self, logging_group, progress_callback=None):
super().__init__() super().__init__()
self.logging_group = logging_group self.logging_group = logging_group
@ -316,5 +318,5 @@ class DocumentParser(LoggingMixin):
return self.date return self.date
def cleanup(self): def cleanup(self):
self.log("debug", "Deleting directory {}".format(self.tempdir)) self.log("debug", f"Deleting directory {self.tempdir}")
shutil.rmtree(self.tempdir) shutil.rmtree(self.tempdir)

View File

@ -19,8 +19,7 @@ from ..file_handling import delete_empty_directories, \
from ..models import Document, Tag from ..models import Document, Tag
def logger(message, group): logger = logging.getLogger("paperless.handlers")
logging.getLogger(__name__).debug(message, extra={"group": group})
def add_inbox_tags(sender, document=None, logging_group=None, **kwargs): def add_inbox_tags(sender, document=None, logging_group=None, **kwargs):
@ -48,23 +47,23 @@ def set_correspondent(sender,
selected = None selected = None
if potential_count > 1: if potential_count > 1:
if use_first: if use_first:
logger( logger.info(
f"Detected {potential_count} potential correspondents, " f"Detected {potential_count} potential correspondents, "
f"so we've opted for {selected}", f"so we've opted for {selected}",
logging_group extra={'group': logging_group}
) )
else: else:
logger( logger.info(
f"Detected {potential_count} potential correspondents, " f"Detected {potential_count} potential correspondents, "
f"not assigning any correspondent", f"not assigning any correspondent",
logging_group extra={'group': logging_group}
) )
return return
if selected or replace: if selected or replace:
logger( logger.info(
f"Assigning correspondent {selected} to {document}", f"Assigning correspondent {selected} to {document}",
logging_group extra={'group': logging_group}
) )
document.correspondent = selected document.correspondent = selected
@ -92,23 +91,23 @@ def set_document_type(sender,
if potential_count > 1: if potential_count > 1:
if use_first: if use_first:
logger( logger.info(
f"Detected {potential_count} potential document types, " f"Detected {potential_count} potential document types, "
f"so we've opted for {selected}", f"so we've opted for {selected}",
logging_group extra={'group': logging_group}
) )
else: else:
logger( logger.info(
f"Detected {potential_count} potential document types, " f"Detected {potential_count} potential document types, "
f"not assigning any document type", f"not assigning any document type",
logging_group extra={'group': logging_group}
) )
return return
if selected or replace: if selected or replace:
logger( logger.info(
f"Assigning document type {selected} to {document}", f"Assigning document type {selected} to {document}",
logging_group extra={'group': logging_group}
) )
document.document_type = selected document.document_type = selected
@ -138,9 +137,9 @@ def set_tags(sender,
return return
message = 'Tagging "{}" with "{}"' message = 'Tagging "{}" with "{}"'
logger( logger.info(
message.format(document, ", ".join([t.name for t in relevant_tags])), message.format(document, ", ".join([t.name for t in relevant_tags])),
logging_group extra={'group': logging_group}
) )
document.tags.add(*relevant_tags) document.tags.add(*relevant_tags)
@ -155,10 +154,10 @@ def cleanup_document_deletion(sender, instance, using, **kwargs):
if os.path.isfile(f): if os.path.isfile(f):
try: try:
os.unlink(f) os.unlink(f)
logging.getLogger(__name__).debug( logger.debug(
f"Deleted file {f}.") f"Deleted file {f}.")
except OSError as e: except OSError as e:
logging.getLogger(__name__).warning( logger.warning(
f"While deleting document {str(instance)}, the file " f"While deleting document {str(instance)}, the file "
f"{f} could not be deleted: {e}" f"{f} could not be deleted: {e}"
) )
@ -177,13 +176,13 @@ def cleanup_document_deletion(sender, instance, using, **kwargs):
def validate_move(instance, old_path, new_path): def validate_move(instance, old_path, new_path):
if not os.path.isfile(old_path): if not os.path.isfile(old_path):
# Can't do anything if the old file does not exist anymore. # Can't do anything if the old file does not exist anymore.
logging.getLogger(__name__).fatal( logger.fatal(
f"Document {str(instance)}: File {old_path} has gone.") f"Document {str(instance)}: File {old_path} has gone.")
return False return False
if os.path.isfile(new_path): if os.path.isfile(new_path):
# Can't do anything if the new file already exists. Skip updating file. # Can't do anything if the new file already exists. Skip updating file.
logging.getLogger(__name__).warning( logger.warning(
f"Document {str(instance)}: Cannot rename file " f"Document {str(instance)}: Cannot rename file "
f"since target path {new_path} already exists.") f"since target path {new_path} already exists.")
return False return False

View File

@ -12,6 +12,9 @@ from documents.models import Document, Tag, DocumentType, Correspondent
from documents.sanity_checker import SanityFailedError from documents.sanity_checker import SanityFailedError
logger = logging.getLogger("paperless.tasks")
def index_optimize(): def index_optimize():
ix = index.open_index() ix = index.open_index()
writer = AsyncWriter(ix) writer = AsyncWriter(ix)
@ -45,18 +48,18 @@ def train_classifier():
try: try:
if classifier.train(): if classifier.train():
logging.getLogger(__name__).info( logger.info(
"Saving updated classifier model to {}...".format( "Saving updated classifier model to {}...".format(
settings.MODEL_FILE) settings.MODEL_FILE)
) )
classifier.save_classifier() classifier.save_classifier()
else: else:
logging.getLogger(__name__).debug( logger.debug(
"Training data unchanged." "Training data unchanged."
) )
except Exception as e: except Exception as e:
logging.getLogger(__name__).warning( logger.warning(
"Classifier error: " + str(e) "Classifier error: " + str(e)
) )

View File

@ -58,6 +58,9 @@ from .serialisers import (
) )
logger = logging.getLogger("paperless.api")
class IndexView(TemplateView): class IndexView(TemplateView):
template_name = "index.html" template_name = "index.html"
@ -488,7 +491,7 @@ class SearchView(APIView):
try: try:
doc = Document.objects.get(id=r['id']) doc = Document.objects.get(id=r['id'])
except Document.DoesNotExist: except Document.DoesNotExist:
logging.getLogger(__name__).warning( logger.warning(
f"Search index returned a non-existing document: " f"Search index returned a non-existing document: "
f"id: {r['id']}, title: {r['title']}. " f"id: {r['id']}, title: {r['title']}. "
f"Search index needs reindex." f"Search index needs reindex."

View File

@ -102,6 +102,8 @@ def get_mailbox(server, port, security):
class MailAccountHandler(LoggingMixin): class MailAccountHandler(LoggingMixin):
logging_name = "paperless.mail"
def _correspondent_from_name(self, name): def _correspondent_from_name(self, name):
try: try:
return Correspondent.objects.get_or_create(name=name)[0] return Correspondent.objects.get_or_create(name=name)[0]

View File

@ -4,6 +4,9 @@ from paperless_mail.mail import MailAccountHandler, MailError
from paperless_mail.models import MailAccount from paperless_mail.models import MailAccount
logger = logging.getLogger("paperless.mail.tasks")
def process_mail_accounts(): def process_mail_accounts():
total_new_documents = 0 total_new_documents = 0
for account in MailAccount.objects.all(): for account in MailAccount.objects.all():
@ -11,7 +14,7 @@ def process_mail_accounts():
total_new_documents += MailAccountHandler().handle_mail_account( total_new_documents += MailAccountHandler().handle_mail_account(
account) account)
except MailError as e: except MailError as e:
logging.getLogger(__name__).error( logger.error(
f"Error while processing mail account {account}: {e}", f"Error while processing mail account {account}: {e}",
exc_info=True exc_info=True
) )
@ -27,4 +30,4 @@ def process_mail_account(name):
account = MailAccount.objects.get(name=name) account = MailAccount.objects.get(name=name)
MailAccountHandler().handle_mail_account(account) MailAccountHandler().handle_mail_account(account)
except MailAccount.DoesNotExist: except MailAccount.DoesNotExist:
logging.getLogger(__name__).error(f"Unknown mail acccount: {name}") logger.error(f"Unknown mail acccount: {name}")

View File

@ -19,6 +19,8 @@ class RasterisedDocumentParser(DocumentParser):
image, whether it's a PDF, or other graphical format (JPEG, TIFF, etc.) image, whether it's a PDF, or other graphical format (JPEG, TIFF, etc.)
""" """
logging_name = "paperless.parsing.tesseract"
def extract_metadata(self, document_path, mime_type): def extract_metadata(self, document_path, mime_type):
namespace_pattern = re.compile(r"\{(.*)\}(.*)") namespace_pattern = re.compile(r"\{(.*)\}(.*)")

View File

@ -11,6 +11,8 @@ class TextDocumentParser(DocumentParser):
This parser directly parses a text document (.txt, .md, or .csv) This parser directly parses a text document (.txt, .md, or .csv)
""" """
logging_name = "paperless.parsing.text"
def get_thumbnail(self, document_path, mime_type): def get_thumbnail(self, document_path, mime_type):
def read_text(): def read_text():

View File

@ -14,6 +14,8 @@ class TikaDocumentParser(DocumentParser):
This parser sends documents to a local tika server This parser sends documents to a local tika server
""" """
logging_name = "paperless.parsing.tika"
def get_thumbnail(self, document_path, mime_type): def get_thumbnail(self, document_path, mime_type):
if not self.archive_path: if not self.archive_path:
self.archive_path = self.convert_to_pdf(document_path) self.archive_path = self.convert_to_pdf(document_path)