rework most of the logging

This commit is contained in:
jonaswinkler 2021-02-05 01:10:29 +01:00
parent 0d3ab3aaf7
commit 431d4fd8e4
18 changed files with 80 additions and 71 deletions

View File

@ -13,7 +13,7 @@ class IncompatibleClassifierVersionError(Exception):
pass
logger = logging.getLogger(__name__)
logger = logging.getLogger("paperless.classifier")
def preprocess_content(content):
@ -115,7 +115,7 @@ class DocumentClassifier(object):
labels_document_type = list()
# Step 1: Extract and preprocess training data from the database.
logging.getLogger(__name__).debug("Gathering data from database...")
logger.debug("Gathering data from database...")
m = hashlib.sha1()
for doc in Document.objects.order_by('pk').exclude(tags__is_inbox_tag=True): # NOQA: E501
preprocessed_content = preprocess_content(doc.content)
@ -162,7 +162,7 @@ class DocumentClassifier(object):
num_correspondents = len(set(labels_correspondent) | {-1}) - 1
num_document_types = len(set(labels_document_type) | {-1}) - 1
logging.getLogger(__name__).debug(
logger.debug(
"{} documents, {} tag(s), {} correspondent(s), "
"{} document type(s).".format(
len(data),
@ -173,7 +173,7 @@ class DocumentClassifier(object):
)
# Step 2: vectorize data
logging.getLogger(__name__).debug("Vectorizing data...")
logger.debug("Vectorizing data...")
self.data_vectorizer = CountVectorizer(
analyzer="word",
ngram_range=(1, 2),
@ -183,7 +183,7 @@ class DocumentClassifier(object):
# Step 3: train the classifiers
if num_tags > 0:
logging.getLogger(__name__).debug("Training tags classifier...")
logger.debug("Training tags classifier...")
if num_tags == 1:
# Special case where only one tag has auto:
@ -202,12 +202,12 @@ class DocumentClassifier(object):
self.tags_classifier.fit(data_vectorized, labels_tags_vectorized)
else:
self.tags_classifier = None
logging.getLogger(__name__).debug(
logger.debug(
"There are no tags. Not training tags classifier."
)
if num_correspondents > 0:
logging.getLogger(__name__).debug(
logger.debug(
"Training correspondent classifier..."
)
self.correspondent_classifier = MLPClassifier(tol=0.01)
@ -217,13 +217,13 @@ class DocumentClassifier(object):
)
else:
self.correspondent_classifier = None
logging.getLogger(__name__).debug(
logger.debug(
"There are no correspondents. Not training correspondent "
"classifier."
)
if num_document_types > 0:
logging.getLogger(__name__).debug(
logger.debug(
"Training document type classifier..."
)
self.document_type_classifier = MLPClassifier(tol=0.01)
@ -233,7 +233,7 @@ class DocumentClassifier(object):
)
else:
self.document_type_classifier = None
logging.getLogger(__name__).debug(
logger.debug(
"There are no document types. Not training document type "
"classifier."
)

View File

@ -47,6 +47,8 @@ MESSAGE_FINISHED = "finished"
class Consumer(LoggingMixin):
logging_name = "paperless.consumer"
def _send_progress(self, current_progress, max_progress, status,
message=None, document_id=None):
payload = {

View File

@ -8,6 +8,9 @@ from django.conf import settings
from django.template.defaultfilters import slugify
logger = logging.getLogger("paperless.filehandling")
class defaultdictNoStr(defaultdict):
def __str__(self):
@ -140,7 +143,7 @@ def generate_filename(doc, counter=0, append_gpg=True):
path = path.strip(os.sep)
except (ValueError, KeyError, IndexError):
logging.getLogger(__name__).warning(
logger.warning(
f"Invalid PAPERLESS_FILENAME_FORMAT: "
f"{settings.PAPERLESS_FILENAME_FORMAT}, falling back to default")

View File

@ -12,7 +12,7 @@ from whoosh.qparser.dateparse import DateParserPlugin
from whoosh.writing import AsyncWriter
logger = logging.getLogger(__name__)
logger = logging.getLogger("paperless.index")
class JsonFormatter(Formatter):

View File

@ -4,33 +4,21 @@ import uuid
from django.conf import settings
class PaperlessHandler(logging.Handler):
def emit(self, record):
if settings.DISABLE_DBHANDLER:
return
# We have to do the import here or Django will barf when it tries to
# load this because the apps aren't loaded at that point
from .models import Log
kwargs = {"message": record.msg, "level": record.levelno}
if hasattr(record, "group"):
kwargs["group"] = record.group
Log.objects.create(**kwargs)
class LoggingMixin:
logging_group = None
logging_name = None
def renew_logging_group(self):
self.logging_group = uuid.uuid4()
def log(self, level, message, **kwargs):
target = ".".join([self.__class__.__module__, self.__class__.__name__])
logger = logging.getLogger(target)
if self.logging_name:
logger = logging.getLogger(self.logging_name)
else:
name = ".".join([self.__class__.__module__, self.__class__.__name__])
logger = logging.getLogger(name)
getattr(logger, level)(message, extra={
"group": self.logging_group

View File

@ -20,7 +20,7 @@ from ...file_handling import create_source_path_directory
from ...parsers import get_parser_class_for_mime_type
logger = logging.getLogger(__name__)
logger = logging.getLogger("paperless.management.archiver")
def handle_document(document_id):

View File

@ -17,7 +17,7 @@ try:
except ImportError:
INotify = flags = None
logger = logging.getLogger(__name__)
logger = logging.getLogger("paperless.management.consumer")
def _tags_from_path(filepath):
@ -108,12 +108,7 @@ class Command(BaseCommand):
# This is here primarily for the tests and is irrelevant in production.
stop_flag = False
def __init__(self, *args, **kwargs):
self.logger = logging.getLogger(__name__)
BaseCommand.__init__(self, *args, **kwargs)
self.observer = None
observer = None
def add_arguments(self, parser):
parser.add_argument(
@ -161,7 +156,7 @@ class Command(BaseCommand):
logger.debug("Consumer exiting.")
def handle_polling(self, directory, recursive):
logging.getLogger(__name__).info(
logger.info(
f"Polling directory for changes: {directory}")
self.observer = PollingObserver(timeout=settings.CONSUMER_POLLING)
self.observer.schedule(Handler(), directory, recursive=recursive)
@ -176,7 +171,7 @@ class Command(BaseCommand):
self.observer.join()
def handle_inotify(self, directory, recursive):
logging.getLogger(__name__).info(
logger.info(
f"Using inotify to watch directory for changes: {directory}")
inotify = INotify()

View File

@ -7,6 +7,9 @@ from documents.models import Document
from ...signals.handlers import set_correspondent, set_document_type, set_tags
logger = logging.getLogger("paperless.management.retagger")
class Command(BaseCommand):
help = """
@ -65,7 +68,7 @@ class Command(BaseCommand):
classifier = load_classifier()
for document in documents:
logging.getLogger(__name__).info(
logger.info(
f"Processing document {document.title}")
if options['correspondent']:

View File

@ -6,7 +6,7 @@ from fuzzywuzzy import fuzz
from documents.models import MatchingModel, Correspondent, DocumentType, Tag
logger = logging.getLogger(__name__)
logger = logging.getLogger("paperless.matching")
def log_reason(matching_model, document, reason):

View File

@ -36,7 +36,7 @@ DATE_REGEX = re.compile(
)
logger = logging.getLogger(__name__)
logger = logging.getLogger("paperless.parsing")
def is_mime_type_supported(mime_type):
@ -261,6 +261,8 @@ class DocumentParser(LoggingMixin):
`paperless_tesseract.parsers` for inspiration.
"""
logging_name = "paperless.parsing"
def __init__(self, logging_group, progress_callback=None):
super().__init__()
self.logging_group = logging_group
@ -316,5 +318,5 @@ class DocumentParser(LoggingMixin):
return self.date
def cleanup(self):
self.log("debug", "Deleting directory {}".format(self.tempdir))
self.log("debug", f"Deleting directory {self.tempdir}")
shutil.rmtree(self.tempdir)

View File

@ -19,8 +19,7 @@ from ..file_handling import delete_empty_directories, \
from ..models import Document, Tag
def logger(message, group):
logging.getLogger(__name__).debug(message, extra={"group": group})
logger = logging.getLogger("paperless.handlers")
def add_inbox_tags(sender, document=None, logging_group=None, **kwargs):
@ -48,23 +47,23 @@ def set_correspondent(sender,
selected = None
if potential_count > 1:
if use_first:
logger(
logger.info(
f"Detected {potential_count} potential correspondents, "
f"so we've opted for {selected}",
logging_group
extra={'group': logging_group}
)
else:
logger(
logger.info(
f"Detected {potential_count} potential correspondents, "
f"not assigning any correspondent",
logging_group
extra={'group': logging_group}
)
return
if selected or replace:
logger(
logger.info(
f"Assigning correspondent {selected} to {document}",
logging_group
extra={'group': logging_group}
)
document.correspondent = selected
@ -92,23 +91,23 @@ def set_document_type(sender,
if potential_count > 1:
if use_first:
logger(
logger.info(
f"Detected {potential_count} potential document types, "
f"so we've opted for {selected}",
logging_group
extra={'group': logging_group}
)
else:
logger(
logger.info(
f"Detected {potential_count} potential document types, "
f"not assigning any document type",
logging_group
extra={'group': logging_group}
)
return
if selected or replace:
logger(
logger.info(
f"Assigning document type {selected} to {document}",
logging_group
extra={'group': logging_group}
)
document.document_type = selected
@ -138,9 +137,9 @@ def set_tags(sender,
return
message = 'Tagging "{}" with "{}"'
logger(
logger.info(
message.format(document, ", ".join([t.name for t in relevant_tags])),
logging_group
extra={'group': logging_group}
)
document.tags.add(*relevant_tags)
@ -155,10 +154,10 @@ def cleanup_document_deletion(sender, instance, using, **kwargs):
if os.path.isfile(f):
try:
os.unlink(f)
logging.getLogger(__name__).debug(
logger.debug(
f"Deleted file {f}.")
except OSError as e:
logging.getLogger(__name__).warning(
logger.warning(
f"While deleting document {str(instance)}, the file "
f"{f} could not be deleted: {e}"
)
@ -177,13 +176,13 @@ def cleanup_document_deletion(sender, instance, using, **kwargs):
def validate_move(instance, old_path, new_path):
if not os.path.isfile(old_path):
# Can't do anything if the old file does not exist anymore.
logging.getLogger(__name__).fatal(
logger.fatal(
f"Document {str(instance)}: File {old_path} has gone.")
return False
if os.path.isfile(new_path):
# Can't do anything if the new file already exists. Skip updating file.
logging.getLogger(__name__).warning(
logger.warning(
f"Document {str(instance)}: Cannot rename file "
f"since target path {new_path} already exists.")
return False

View File

@ -12,6 +12,9 @@ from documents.models import Document, Tag, DocumentType, Correspondent
from documents.sanity_checker import SanityFailedError
logger = logging.getLogger("paperless.tasks")
def index_optimize():
ix = index.open_index()
writer = AsyncWriter(ix)
@ -45,18 +48,18 @@ def train_classifier():
try:
if classifier.train():
logging.getLogger(__name__).info(
logger.info(
"Saving updated classifier model to {}...".format(
settings.MODEL_FILE)
)
classifier.save_classifier()
else:
logging.getLogger(__name__).debug(
logger.debug(
"Training data unchanged."
)
except Exception as e:
logging.getLogger(__name__).warning(
logger.warning(
"Classifier error: " + str(e)
)

View File

@ -58,6 +58,9 @@ from .serialisers import (
)
logger = logging.getLogger("paperless.api")
class IndexView(TemplateView):
template_name = "index.html"
@ -488,7 +491,7 @@ class SearchView(APIView):
try:
doc = Document.objects.get(id=r['id'])
except Document.DoesNotExist:
logging.getLogger(__name__).warning(
logger.warning(
f"Search index returned a non-existing document: "
f"id: {r['id']}, title: {r['title']}. "
f"Search index needs reindex."

View File

@ -102,6 +102,8 @@ def get_mailbox(server, port, security):
class MailAccountHandler(LoggingMixin):
logging_name = "paperless.mail"
def _correspondent_from_name(self, name):
try:
return Correspondent.objects.get_or_create(name=name)[0]

View File

@ -4,6 +4,9 @@ from paperless_mail.mail import MailAccountHandler, MailError
from paperless_mail.models import MailAccount
logger = logging.getLogger("paperless.mail.tasks")
def process_mail_accounts():
total_new_documents = 0
for account in MailAccount.objects.all():
@ -11,7 +14,7 @@ def process_mail_accounts():
total_new_documents += MailAccountHandler().handle_mail_account(
account)
except MailError as e:
logging.getLogger(__name__).error(
logger.error(
f"Error while processing mail account {account}: {e}",
exc_info=True
)
@ -27,4 +30,4 @@ def process_mail_account(name):
account = MailAccount.objects.get(name=name)
MailAccountHandler().handle_mail_account(account)
except MailAccount.DoesNotExist:
logging.getLogger(__name__).error(f"Unknown mail acccount: {name}")
logger.error(f"Unknown mail acccount: {name}")

View File

@ -19,6 +19,8 @@ class RasterisedDocumentParser(DocumentParser):
image, whether it's a PDF, or other graphical format (JPEG, TIFF, etc.)
"""
logging_name = "paperless.parsing.tesseract"
def extract_metadata(self, document_path, mime_type):
namespace_pattern = re.compile(r"\{(.*)\}(.*)")

View File

@ -11,6 +11,8 @@ class TextDocumentParser(DocumentParser):
This parser directly parses a text document (.txt, .md, or .csv)
"""
logging_name = "paperless.parsing.text"
def get_thumbnail(self, document_path, mime_type):
def read_text():

View File

@ -14,6 +14,8 @@ class TikaDocumentParser(DocumentParser):
This parser sends documents to a local tika server
"""
logging_name = "paperless.parsing.tika"
def get_thumbnail(self, document_path, mime_type):
if not self.archive_path:
self.archive_path = self.convert_to_pdf(document_path)