From 431d4fd8e460bf9e93b912fb5a55b76aaa6933df Mon Sep 17 00:00:00 2001 From: jonaswinkler Date: Fri, 5 Feb 2021 01:10:29 +0100 Subject: [PATCH] rework most of the logging --- src/documents/classifier.py | 20 +++++----- src/documents/consumer.py | 2 + src/documents/file_handling.py | 5 ++- src/documents/index.py | 2 +- src/documents/loggers.py | 26 ++++--------- .../management/commands/document_archiver.py | 2 +- .../management/commands/document_consumer.py | 13 ++----- .../management/commands/document_retagger.py | 5 ++- src/documents/matching.py | 2 +- src/documents/parsers.py | 6 ++- src/documents/signals/handlers.py | 39 +++++++++---------- src/documents/tasks.py | 9 +++-- src/documents/views.py | 5 ++- src/paperless_mail/mail.py | 2 + src/paperless_mail/tasks.py | 7 +++- src/paperless_tesseract/parsers.py | 2 + src/paperless_text/parsers.py | 2 + src/paperless_tika/parsers.py | 2 + 18 files changed, 80 insertions(+), 71 deletions(-) diff --git a/src/documents/classifier.py b/src/documents/classifier.py index 3051b82a5..2acebe04a 100755 --- a/src/documents/classifier.py +++ b/src/documents/classifier.py @@ -13,7 +13,7 @@ class IncompatibleClassifierVersionError(Exception): pass -logger = logging.getLogger(__name__) +logger = logging.getLogger("paperless.classifier") def preprocess_content(content): @@ -115,7 +115,7 @@ class DocumentClassifier(object): labels_document_type = list() # Step 1: Extract and preprocess training data from the database. - logging.getLogger(__name__).debug("Gathering data from database...") + logger.debug("Gathering data from database...") m = hashlib.sha1() for doc in Document.objects.order_by('pk').exclude(tags__is_inbox_tag=True): # NOQA: E501 preprocessed_content = preprocess_content(doc.content) @@ -162,7 +162,7 @@ class DocumentClassifier(object): num_correspondents = len(set(labels_correspondent) | {-1}) - 1 num_document_types = len(set(labels_document_type) | {-1}) - 1 - logging.getLogger(__name__).debug( + logger.debug( "{} documents, {} tag(s), {} correspondent(s), " "{} document type(s).".format( len(data), @@ -173,7 +173,7 @@ class DocumentClassifier(object): ) # Step 2: vectorize data - logging.getLogger(__name__).debug("Vectorizing data...") + logger.debug("Vectorizing data...") self.data_vectorizer = CountVectorizer( analyzer="word", ngram_range=(1, 2), @@ -183,7 +183,7 @@ class DocumentClassifier(object): # Step 3: train the classifiers if num_tags > 0: - logging.getLogger(__name__).debug("Training tags classifier...") + logger.debug("Training tags classifier...") if num_tags == 1: # Special case where only one tag has auto: @@ -202,12 +202,12 @@ class DocumentClassifier(object): self.tags_classifier.fit(data_vectorized, labels_tags_vectorized) else: self.tags_classifier = None - logging.getLogger(__name__).debug( + logger.debug( "There are no tags. Not training tags classifier." ) if num_correspondents > 0: - logging.getLogger(__name__).debug( + logger.debug( "Training correspondent classifier..." ) self.correspondent_classifier = MLPClassifier(tol=0.01) @@ -217,13 +217,13 @@ class DocumentClassifier(object): ) else: self.correspondent_classifier = None - logging.getLogger(__name__).debug( + logger.debug( "There are no correspondents. Not training correspondent " "classifier." ) if num_document_types > 0: - logging.getLogger(__name__).debug( + logger.debug( "Training document type classifier..." ) self.document_type_classifier = MLPClassifier(tol=0.01) @@ -233,7 +233,7 @@ class DocumentClassifier(object): ) else: self.document_type_classifier = None - logging.getLogger(__name__).debug( + logger.debug( "There are no document types. Not training document type " "classifier." ) diff --git a/src/documents/consumer.py b/src/documents/consumer.py index f8f7576ef..23988c7c1 100755 --- a/src/documents/consumer.py +++ b/src/documents/consumer.py @@ -47,6 +47,8 @@ MESSAGE_FINISHED = "finished" class Consumer(LoggingMixin): + logging_name = "paperless.consumer" + def _send_progress(self, current_progress, max_progress, status, message=None, document_id=None): payload = { diff --git a/src/documents/file_handling.py b/src/documents/file_handling.py index dd798e8ab..6ee97f5c3 100644 --- a/src/documents/file_handling.py +++ b/src/documents/file_handling.py @@ -8,6 +8,9 @@ from django.conf import settings from django.template.defaultfilters import slugify +logger = logging.getLogger("paperless.filehandling") + + class defaultdictNoStr(defaultdict): def __str__(self): @@ -140,7 +143,7 @@ def generate_filename(doc, counter=0, append_gpg=True): path = path.strip(os.sep) except (ValueError, KeyError, IndexError): - logging.getLogger(__name__).warning( + logger.warning( f"Invalid PAPERLESS_FILENAME_FORMAT: " f"{settings.PAPERLESS_FILENAME_FORMAT}, falling back to default") diff --git a/src/documents/index.py b/src/documents/index.py index 51197e252..bc17daeb1 100644 --- a/src/documents/index.py +++ b/src/documents/index.py @@ -12,7 +12,7 @@ from whoosh.qparser.dateparse import DateParserPlugin from whoosh.writing import AsyncWriter -logger = logging.getLogger(__name__) +logger = logging.getLogger("paperless.index") class JsonFormatter(Formatter): diff --git a/src/documents/loggers.py b/src/documents/loggers.py index 863bc0c34..4b0f03552 100644 --- a/src/documents/loggers.py +++ b/src/documents/loggers.py @@ -4,33 +4,21 @@ import uuid from django.conf import settings -class PaperlessHandler(logging.Handler): - def emit(self, record): - if settings.DISABLE_DBHANDLER: - return - - # We have to do the import here or Django will barf when it tries to - # load this because the apps aren't loaded at that point - from .models import Log - - kwargs = {"message": record.msg, "level": record.levelno} - - if hasattr(record, "group"): - kwargs["group"] = record.group - - Log.objects.create(**kwargs) - - class LoggingMixin: logging_group = None + logging_name = None + def renew_logging_group(self): self.logging_group = uuid.uuid4() def log(self, level, message, **kwargs): - target = ".".join([self.__class__.__module__, self.__class__.__name__]) - logger = logging.getLogger(target) + if self.logging_name: + logger = logging.getLogger(self.logging_name) + else: + name = ".".join([self.__class__.__module__, self.__class__.__name__]) + logger = logging.getLogger(name) getattr(logger, level)(message, extra={ "group": self.logging_group diff --git a/src/documents/management/commands/document_archiver.py b/src/documents/management/commands/document_archiver.py index 9e0d8fabf..d2ff9c8c2 100644 --- a/src/documents/management/commands/document_archiver.py +++ b/src/documents/management/commands/document_archiver.py @@ -20,7 +20,7 @@ from ...file_handling import create_source_path_directory from ...parsers import get_parser_class_for_mime_type -logger = logging.getLogger(__name__) +logger = logging.getLogger("paperless.management.archiver") def handle_document(document_id): diff --git a/src/documents/management/commands/document_consumer.py b/src/documents/management/commands/document_consumer.py index 232a5b23c..c01743628 100644 --- a/src/documents/management/commands/document_consumer.py +++ b/src/documents/management/commands/document_consumer.py @@ -17,7 +17,7 @@ try: except ImportError: INotify = flags = None -logger = logging.getLogger(__name__) +logger = logging.getLogger("paperless.management.consumer") def _tags_from_path(filepath): @@ -108,12 +108,7 @@ class Command(BaseCommand): # This is here primarily for the tests and is irrelevant in production. stop_flag = False - def __init__(self, *args, **kwargs): - - self.logger = logging.getLogger(__name__) - - BaseCommand.__init__(self, *args, **kwargs) - self.observer = None + observer = None def add_arguments(self, parser): parser.add_argument( @@ -161,7 +156,7 @@ class Command(BaseCommand): logger.debug("Consumer exiting.") def handle_polling(self, directory, recursive): - logging.getLogger(__name__).info( + logger.info( f"Polling directory for changes: {directory}") self.observer = PollingObserver(timeout=settings.CONSUMER_POLLING) self.observer.schedule(Handler(), directory, recursive=recursive) @@ -176,7 +171,7 @@ class Command(BaseCommand): self.observer.join() def handle_inotify(self, directory, recursive): - logging.getLogger(__name__).info( + logger.info( f"Using inotify to watch directory for changes: {directory}") inotify = INotify() diff --git a/src/documents/management/commands/document_retagger.py b/src/documents/management/commands/document_retagger.py index a37ab9ef2..854117fdd 100755 --- a/src/documents/management/commands/document_retagger.py +++ b/src/documents/management/commands/document_retagger.py @@ -7,6 +7,9 @@ from documents.models import Document from ...signals.handlers import set_correspondent, set_document_type, set_tags +logger = logging.getLogger("paperless.management.retagger") + + class Command(BaseCommand): help = """ @@ -65,7 +68,7 @@ class Command(BaseCommand): classifier = load_classifier() for document in documents: - logging.getLogger(__name__).info( + logger.info( f"Processing document {document.title}") if options['correspondent']: diff --git a/src/documents/matching.py b/src/documents/matching.py index fa9ce6af3..21102bc98 100644 --- a/src/documents/matching.py +++ b/src/documents/matching.py @@ -6,7 +6,7 @@ from fuzzywuzzy import fuzz from documents.models import MatchingModel, Correspondent, DocumentType, Tag -logger = logging.getLogger(__name__) +logger = logging.getLogger("paperless.matching") def log_reason(matching_model, document, reason): diff --git a/src/documents/parsers.py b/src/documents/parsers.py index a2eb14cef..98af4f080 100644 --- a/src/documents/parsers.py +++ b/src/documents/parsers.py @@ -36,7 +36,7 @@ DATE_REGEX = re.compile( ) -logger = logging.getLogger(__name__) +logger = logging.getLogger("paperless.parsing") def is_mime_type_supported(mime_type): @@ -261,6 +261,8 @@ class DocumentParser(LoggingMixin): `paperless_tesseract.parsers` for inspiration. """ + logging_name = "paperless.parsing" + def __init__(self, logging_group, progress_callback=None): super().__init__() self.logging_group = logging_group @@ -316,5 +318,5 @@ class DocumentParser(LoggingMixin): return self.date def cleanup(self): - self.log("debug", "Deleting directory {}".format(self.tempdir)) + self.log("debug", f"Deleting directory {self.tempdir}") shutil.rmtree(self.tempdir) diff --git a/src/documents/signals/handlers.py b/src/documents/signals/handlers.py index 76c1e76ed..848bcb900 100755 --- a/src/documents/signals/handlers.py +++ b/src/documents/signals/handlers.py @@ -19,8 +19,7 @@ from ..file_handling import delete_empty_directories, \ from ..models import Document, Tag -def logger(message, group): - logging.getLogger(__name__).debug(message, extra={"group": group}) +logger = logging.getLogger("paperless.handlers") def add_inbox_tags(sender, document=None, logging_group=None, **kwargs): @@ -48,23 +47,23 @@ def set_correspondent(sender, selected = None if potential_count > 1: if use_first: - logger( + logger.info( f"Detected {potential_count} potential correspondents, " f"so we've opted for {selected}", - logging_group + extra={'group': logging_group} ) else: - logger( + logger.info( f"Detected {potential_count} potential correspondents, " f"not assigning any correspondent", - logging_group + extra={'group': logging_group} ) return if selected or replace: - logger( + logger.info( f"Assigning correspondent {selected} to {document}", - logging_group + extra={'group': logging_group} ) document.correspondent = selected @@ -92,23 +91,23 @@ def set_document_type(sender, if potential_count > 1: if use_first: - logger( + logger.info( f"Detected {potential_count} potential document types, " f"so we've opted for {selected}", - logging_group + extra={'group': logging_group} ) else: - logger( + logger.info( f"Detected {potential_count} potential document types, " f"not assigning any document type", - logging_group + extra={'group': logging_group} ) return if selected or replace: - logger( + logger.info( f"Assigning document type {selected} to {document}", - logging_group + extra={'group': logging_group} ) document.document_type = selected @@ -138,9 +137,9 @@ def set_tags(sender, return message = 'Tagging "{}" with "{}"' - logger( + logger.info( message.format(document, ", ".join([t.name for t in relevant_tags])), - logging_group + extra={'group': logging_group} ) document.tags.add(*relevant_tags) @@ -155,10 +154,10 @@ def cleanup_document_deletion(sender, instance, using, **kwargs): if os.path.isfile(f): try: os.unlink(f) - logging.getLogger(__name__).debug( + logger.debug( f"Deleted file {f}.") except OSError as e: - logging.getLogger(__name__).warning( + logger.warning( f"While deleting document {str(instance)}, the file " f"{f} could not be deleted: {e}" ) @@ -177,13 +176,13 @@ def cleanup_document_deletion(sender, instance, using, **kwargs): def validate_move(instance, old_path, new_path): if not os.path.isfile(old_path): # Can't do anything if the old file does not exist anymore. - logging.getLogger(__name__).fatal( + logger.fatal( f"Document {str(instance)}: File {old_path} has gone.") return False if os.path.isfile(new_path): # Can't do anything if the new file already exists. Skip updating file. - logging.getLogger(__name__).warning( + logger.warning( f"Document {str(instance)}: Cannot rename file " f"since target path {new_path} already exists.") return False diff --git a/src/documents/tasks.py b/src/documents/tasks.py index e0d726d3e..8c7d91585 100644 --- a/src/documents/tasks.py +++ b/src/documents/tasks.py @@ -12,6 +12,9 @@ from documents.models import Document, Tag, DocumentType, Correspondent from documents.sanity_checker import SanityFailedError +logger = logging.getLogger("paperless.tasks") + + def index_optimize(): ix = index.open_index() writer = AsyncWriter(ix) @@ -45,18 +48,18 @@ def train_classifier(): try: if classifier.train(): - logging.getLogger(__name__).info( + logger.info( "Saving updated classifier model to {}...".format( settings.MODEL_FILE) ) classifier.save_classifier() else: - logging.getLogger(__name__).debug( + logger.debug( "Training data unchanged." ) except Exception as e: - logging.getLogger(__name__).warning( + logger.warning( "Classifier error: " + str(e) ) diff --git a/src/documents/views.py b/src/documents/views.py index c2b31c996..e413d4b35 100755 --- a/src/documents/views.py +++ b/src/documents/views.py @@ -58,6 +58,9 @@ from .serialisers import ( ) +logger = logging.getLogger("paperless.api") + + class IndexView(TemplateView): template_name = "index.html" @@ -488,7 +491,7 @@ class SearchView(APIView): try: doc = Document.objects.get(id=r['id']) except Document.DoesNotExist: - logging.getLogger(__name__).warning( + logger.warning( f"Search index returned a non-existing document: " f"id: {r['id']}, title: {r['title']}. " f"Search index needs reindex." diff --git a/src/paperless_mail/mail.py b/src/paperless_mail/mail.py index e1916c1dd..4a62ffd95 100644 --- a/src/paperless_mail/mail.py +++ b/src/paperless_mail/mail.py @@ -102,6 +102,8 @@ def get_mailbox(server, port, security): class MailAccountHandler(LoggingMixin): + logging_name = "paperless.mail" + def _correspondent_from_name(self, name): try: return Correspondent.objects.get_or_create(name=name)[0] diff --git a/src/paperless_mail/tasks.py b/src/paperless_mail/tasks.py index 68fb859a4..086edef7b 100644 --- a/src/paperless_mail/tasks.py +++ b/src/paperless_mail/tasks.py @@ -4,6 +4,9 @@ from paperless_mail.mail import MailAccountHandler, MailError from paperless_mail.models import MailAccount +logger = logging.getLogger("paperless.mail.tasks") + + def process_mail_accounts(): total_new_documents = 0 for account in MailAccount.objects.all(): @@ -11,7 +14,7 @@ def process_mail_accounts(): total_new_documents += MailAccountHandler().handle_mail_account( account) except MailError as e: - logging.getLogger(__name__).error( + logger.error( f"Error while processing mail account {account}: {e}", exc_info=True ) @@ -27,4 +30,4 @@ def process_mail_account(name): account = MailAccount.objects.get(name=name) MailAccountHandler().handle_mail_account(account) except MailAccount.DoesNotExist: - logging.getLogger(__name__).error(f"Unknown mail acccount: {name}") + logger.error(f"Unknown mail acccount: {name}") diff --git a/src/paperless_tesseract/parsers.py b/src/paperless_tesseract/parsers.py index 5a08c601b..0a976b569 100644 --- a/src/paperless_tesseract/parsers.py +++ b/src/paperless_tesseract/parsers.py @@ -19,6 +19,8 @@ class RasterisedDocumentParser(DocumentParser): image, whether it's a PDF, or other graphical format (JPEG, TIFF, etc.) """ + logging_name = "paperless.parsing.tesseract" + def extract_metadata(self, document_path, mime_type): namespace_pattern = re.compile(r"\{(.*)\}(.*)") diff --git a/src/paperless_text/parsers.py b/src/paperless_text/parsers.py index c1afe07fc..c307bf10b 100644 --- a/src/paperless_text/parsers.py +++ b/src/paperless_text/parsers.py @@ -11,6 +11,8 @@ class TextDocumentParser(DocumentParser): This parser directly parses a text document (.txt, .md, or .csv) """ + logging_name = "paperless.parsing.text" + def get_thumbnail(self, document_path, mime_type): def read_text(): diff --git a/src/paperless_tika/parsers.py b/src/paperless_tika/parsers.py index cde6543be..b888af820 100644 --- a/src/paperless_tika/parsers.py +++ b/src/paperless_tika/parsers.py @@ -14,6 +14,8 @@ class TikaDocumentParser(DocumentParser): This parser sends documents to a local tika server """ + logging_name = "paperless.parsing.tika" + def get_thumbnail(self, document_path, mime_type): if not self.archive_path: self.archive_path = self.convert_to_pdf(document_path)