mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-04-02 13:45:10 -05:00
rework most of the logging
This commit is contained in:
parent
0d3ab3aaf7
commit
431d4fd8e4
@ -13,7 +13,7 @@ class IncompatibleClassifierVersionError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
logger = logging.getLogger("paperless.classifier")
|
||||
|
||||
|
||||
def preprocess_content(content):
|
||||
@ -115,7 +115,7 @@ class DocumentClassifier(object):
|
||||
labels_document_type = list()
|
||||
|
||||
# Step 1: Extract and preprocess training data from the database.
|
||||
logging.getLogger(__name__).debug("Gathering data from database...")
|
||||
logger.debug("Gathering data from database...")
|
||||
m = hashlib.sha1()
|
||||
for doc in Document.objects.order_by('pk').exclude(tags__is_inbox_tag=True): # NOQA: E501
|
||||
preprocessed_content = preprocess_content(doc.content)
|
||||
@ -162,7 +162,7 @@ class DocumentClassifier(object):
|
||||
num_correspondents = len(set(labels_correspondent) | {-1}) - 1
|
||||
num_document_types = len(set(labels_document_type) | {-1}) - 1
|
||||
|
||||
logging.getLogger(__name__).debug(
|
||||
logger.debug(
|
||||
"{} documents, {} tag(s), {} correspondent(s), "
|
||||
"{} document type(s).".format(
|
||||
len(data),
|
||||
@ -173,7 +173,7 @@ class DocumentClassifier(object):
|
||||
)
|
||||
|
||||
# Step 2: vectorize data
|
||||
logging.getLogger(__name__).debug("Vectorizing data...")
|
||||
logger.debug("Vectorizing data...")
|
||||
self.data_vectorizer = CountVectorizer(
|
||||
analyzer="word",
|
||||
ngram_range=(1, 2),
|
||||
@ -183,7 +183,7 @@ class DocumentClassifier(object):
|
||||
|
||||
# Step 3: train the classifiers
|
||||
if num_tags > 0:
|
||||
logging.getLogger(__name__).debug("Training tags classifier...")
|
||||
logger.debug("Training tags classifier...")
|
||||
|
||||
if num_tags == 1:
|
||||
# Special case where only one tag has auto:
|
||||
@ -202,12 +202,12 @@ class DocumentClassifier(object):
|
||||
self.tags_classifier.fit(data_vectorized, labels_tags_vectorized)
|
||||
else:
|
||||
self.tags_classifier = None
|
||||
logging.getLogger(__name__).debug(
|
||||
logger.debug(
|
||||
"There are no tags. Not training tags classifier."
|
||||
)
|
||||
|
||||
if num_correspondents > 0:
|
||||
logging.getLogger(__name__).debug(
|
||||
logger.debug(
|
||||
"Training correspondent classifier..."
|
||||
)
|
||||
self.correspondent_classifier = MLPClassifier(tol=0.01)
|
||||
@ -217,13 +217,13 @@ class DocumentClassifier(object):
|
||||
)
|
||||
else:
|
||||
self.correspondent_classifier = None
|
||||
logging.getLogger(__name__).debug(
|
||||
logger.debug(
|
||||
"There are no correspondents. Not training correspondent "
|
||||
"classifier."
|
||||
)
|
||||
|
||||
if num_document_types > 0:
|
||||
logging.getLogger(__name__).debug(
|
||||
logger.debug(
|
||||
"Training document type classifier..."
|
||||
)
|
||||
self.document_type_classifier = MLPClassifier(tol=0.01)
|
||||
@ -233,7 +233,7 @@ class DocumentClassifier(object):
|
||||
)
|
||||
else:
|
||||
self.document_type_classifier = None
|
||||
logging.getLogger(__name__).debug(
|
||||
logger.debug(
|
||||
"There are no document types. Not training document type "
|
||||
"classifier."
|
||||
)
|
||||
|
@ -47,6 +47,8 @@ MESSAGE_FINISHED = "finished"
|
||||
|
||||
class Consumer(LoggingMixin):
|
||||
|
||||
logging_name = "paperless.consumer"
|
||||
|
||||
def _send_progress(self, current_progress, max_progress, status,
|
||||
message=None, document_id=None):
|
||||
payload = {
|
||||
|
@ -8,6 +8,9 @@ from django.conf import settings
|
||||
from django.template.defaultfilters import slugify
|
||||
|
||||
|
||||
logger = logging.getLogger("paperless.filehandling")
|
||||
|
||||
|
||||
class defaultdictNoStr(defaultdict):
|
||||
|
||||
def __str__(self):
|
||||
@ -140,7 +143,7 @@ def generate_filename(doc, counter=0, append_gpg=True):
|
||||
path = path.strip(os.sep)
|
||||
|
||||
except (ValueError, KeyError, IndexError):
|
||||
logging.getLogger(__name__).warning(
|
||||
logger.warning(
|
||||
f"Invalid PAPERLESS_FILENAME_FORMAT: "
|
||||
f"{settings.PAPERLESS_FILENAME_FORMAT}, falling back to default")
|
||||
|
||||
|
@ -12,7 +12,7 @@ from whoosh.qparser.dateparse import DateParserPlugin
|
||||
from whoosh.writing import AsyncWriter
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
logger = logging.getLogger("paperless.index")
|
||||
|
||||
|
||||
class JsonFormatter(Formatter):
|
||||
|
@ -4,33 +4,21 @@ import uuid
|
||||
from django.conf import settings
|
||||
|
||||
|
||||
class PaperlessHandler(logging.Handler):
|
||||
def emit(self, record):
|
||||
if settings.DISABLE_DBHANDLER:
|
||||
return
|
||||
|
||||
# We have to do the import here or Django will barf when it tries to
|
||||
# load this because the apps aren't loaded at that point
|
||||
from .models import Log
|
||||
|
||||
kwargs = {"message": record.msg, "level": record.levelno}
|
||||
|
||||
if hasattr(record, "group"):
|
||||
kwargs["group"] = record.group
|
||||
|
||||
Log.objects.create(**kwargs)
|
||||
|
||||
|
||||
class LoggingMixin:
|
||||
|
||||
logging_group = None
|
||||
|
||||
logging_name = None
|
||||
|
||||
def renew_logging_group(self):
|
||||
self.logging_group = uuid.uuid4()
|
||||
|
||||
def log(self, level, message, **kwargs):
|
||||
target = ".".join([self.__class__.__module__, self.__class__.__name__])
|
||||
logger = logging.getLogger(target)
|
||||
if self.logging_name:
|
||||
logger = logging.getLogger(self.logging_name)
|
||||
else:
|
||||
name = ".".join([self.__class__.__module__, self.__class__.__name__])
|
||||
logger = logging.getLogger(name)
|
||||
|
||||
getattr(logger, level)(message, extra={
|
||||
"group": self.logging_group
|
||||
|
@ -20,7 +20,7 @@ from ...file_handling import create_source_path_directory
|
||||
from ...parsers import get_parser_class_for_mime_type
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
logger = logging.getLogger("paperless.management.archiver")
|
||||
|
||||
|
||||
def handle_document(document_id):
|
||||
|
@ -17,7 +17,7 @@ try:
|
||||
except ImportError:
|
||||
INotify = flags = None
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
logger = logging.getLogger("paperless.management.consumer")
|
||||
|
||||
|
||||
def _tags_from_path(filepath):
|
||||
@ -108,12 +108,7 @@ class Command(BaseCommand):
|
||||
# This is here primarily for the tests and is irrelevant in production.
|
||||
stop_flag = False
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
|
||||
self.logger = logging.getLogger(__name__)
|
||||
|
||||
BaseCommand.__init__(self, *args, **kwargs)
|
||||
self.observer = None
|
||||
observer = None
|
||||
|
||||
def add_arguments(self, parser):
|
||||
parser.add_argument(
|
||||
@ -161,7 +156,7 @@ class Command(BaseCommand):
|
||||
logger.debug("Consumer exiting.")
|
||||
|
||||
def handle_polling(self, directory, recursive):
|
||||
logging.getLogger(__name__).info(
|
||||
logger.info(
|
||||
f"Polling directory for changes: {directory}")
|
||||
self.observer = PollingObserver(timeout=settings.CONSUMER_POLLING)
|
||||
self.observer.schedule(Handler(), directory, recursive=recursive)
|
||||
@ -176,7 +171,7 @@ class Command(BaseCommand):
|
||||
self.observer.join()
|
||||
|
||||
def handle_inotify(self, directory, recursive):
|
||||
logging.getLogger(__name__).info(
|
||||
logger.info(
|
||||
f"Using inotify to watch directory for changes: {directory}")
|
||||
|
||||
inotify = INotify()
|
||||
|
@ -7,6 +7,9 @@ from documents.models import Document
|
||||
from ...signals.handlers import set_correspondent, set_document_type, set_tags
|
||||
|
||||
|
||||
logger = logging.getLogger("paperless.management.retagger")
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
|
||||
help = """
|
||||
@ -65,7 +68,7 @@ class Command(BaseCommand):
|
||||
classifier = load_classifier()
|
||||
|
||||
for document in documents:
|
||||
logging.getLogger(__name__).info(
|
||||
logger.info(
|
||||
f"Processing document {document.title}")
|
||||
|
||||
if options['correspondent']:
|
||||
|
@ -6,7 +6,7 @@ from fuzzywuzzy import fuzz
|
||||
from documents.models import MatchingModel, Correspondent, DocumentType, Tag
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
logger = logging.getLogger("paperless.matching")
|
||||
|
||||
|
||||
def log_reason(matching_model, document, reason):
|
||||
|
@ -36,7 +36,7 @@ DATE_REGEX = re.compile(
|
||||
)
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
logger = logging.getLogger("paperless.parsing")
|
||||
|
||||
|
||||
def is_mime_type_supported(mime_type):
|
||||
@ -261,6 +261,8 @@ class DocumentParser(LoggingMixin):
|
||||
`paperless_tesseract.parsers` for inspiration.
|
||||
"""
|
||||
|
||||
logging_name = "paperless.parsing"
|
||||
|
||||
def __init__(self, logging_group, progress_callback=None):
|
||||
super().__init__()
|
||||
self.logging_group = logging_group
|
||||
@ -316,5 +318,5 @@ class DocumentParser(LoggingMixin):
|
||||
return self.date
|
||||
|
||||
def cleanup(self):
|
||||
self.log("debug", "Deleting directory {}".format(self.tempdir))
|
||||
self.log("debug", f"Deleting directory {self.tempdir}")
|
||||
shutil.rmtree(self.tempdir)
|
||||
|
@ -19,8 +19,7 @@ from ..file_handling import delete_empty_directories, \
|
||||
from ..models import Document, Tag
|
||||
|
||||
|
||||
def logger(message, group):
|
||||
logging.getLogger(__name__).debug(message, extra={"group": group})
|
||||
logger = logging.getLogger("paperless.handlers")
|
||||
|
||||
|
||||
def add_inbox_tags(sender, document=None, logging_group=None, **kwargs):
|
||||
@ -48,23 +47,23 @@ def set_correspondent(sender,
|
||||
selected = None
|
||||
if potential_count > 1:
|
||||
if use_first:
|
||||
logger(
|
||||
logger.info(
|
||||
f"Detected {potential_count} potential correspondents, "
|
||||
f"so we've opted for {selected}",
|
||||
logging_group
|
||||
extra={'group': logging_group}
|
||||
)
|
||||
else:
|
||||
logger(
|
||||
logger.info(
|
||||
f"Detected {potential_count} potential correspondents, "
|
||||
f"not assigning any correspondent",
|
||||
logging_group
|
||||
extra={'group': logging_group}
|
||||
)
|
||||
return
|
||||
|
||||
if selected or replace:
|
||||
logger(
|
||||
logger.info(
|
||||
f"Assigning correspondent {selected} to {document}",
|
||||
logging_group
|
||||
extra={'group': logging_group}
|
||||
)
|
||||
|
||||
document.correspondent = selected
|
||||
@ -92,23 +91,23 @@ def set_document_type(sender,
|
||||
|
||||
if potential_count > 1:
|
||||
if use_first:
|
||||
logger(
|
||||
logger.info(
|
||||
f"Detected {potential_count} potential document types, "
|
||||
f"so we've opted for {selected}",
|
||||
logging_group
|
||||
extra={'group': logging_group}
|
||||
)
|
||||
else:
|
||||
logger(
|
||||
logger.info(
|
||||
f"Detected {potential_count} potential document types, "
|
||||
f"not assigning any document type",
|
||||
logging_group
|
||||
extra={'group': logging_group}
|
||||
)
|
||||
return
|
||||
|
||||
if selected or replace:
|
||||
logger(
|
||||
logger.info(
|
||||
f"Assigning document type {selected} to {document}",
|
||||
logging_group
|
||||
extra={'group': logging_group}
|
||||
)
|
||||
|
||||
document.document_type = selected
|
||||
@ -138,9 +137,9 @@ def set_tags(sender,
|
||||
return
|
||||
|
||||
message = 'Tagging "{}" with "{}"'
|
||||
logger(
|
||||
logger.info(
|
||||
message.format(document, ", ".join([t.name for t in relevant_tags])),
|
||||
logging_group
|
||||
extra={'group': logging_group}
|
||||
)
|
||||
|
||||
document.tags.add(*relevant_tags)
|
||||
@ -155,10 +154,10 @@ def cleanup_document_deletion(sender, instance, using, **kwargs):
|
||||
if os.path.isfile(f):
|
||||
try:
|
||||
os.unlink(f)
|
||||
logging.getLogger(__name__).debug(
|
||||
logger.debug(
|
||||
f"Deleted file {f}.")
|
||||
except OSError as e:
|
||||
logging.getLogger(__name__).warning(
|
||||
logger.warning(
|
||||
f"While deleting document {str(instance)}, the file "
|
||||
f"{f} could not be deleted: {e}"
|
||||
)
|
||||
@ -177,13 +176,13 @@ def cleanup_document_deletion(sender, instance, using, **kwargs):
|
||||
def validate_move(instance, old_path, new_path):
|
||||
if not os.path.isfile(old_path):
|
||||
# Can't do anything if the old file does not exist anymore.
|
||||
logging.getLogger(__name__).fatal(
|
||||
logger.fatal(
|
||||
f"Document {str(instance)}: File {old_path} has gone.")
|
||||
return False
|
||||
|
||||
if os.path.isfile(new_path):
|
||||
# Can't do anything if the new file already exists. Skip updating file.
|
||||
logging.getLogger(__name__).warning(
|
||||
logger.warning(
|
||||
f"Document {str(instance)}: Cannot rename file "
|
||||
f"since target path {new_path} already exists.")
|
||||
return False
|
||||
|
@ -12,6 +12,9 @@ from documents.models import Document, Tag, DocumentType, Correspondent
|
||||
from documents.sanity_checker import SanityFailedError
|
||||
|
||||
|
||||
logger = logging.getLogger("paperless.tasks")
|
||||
|
||||
|
||||
def index_optimize():
|
||||
ix = index.open_index()
|
||||
writer = AsyncWriter(ix)
|
||||
@ -45,18 +48,18 @@ def train_classifier():
|
||||
|
||||
try:
|
||||
if classifier.train():
|
||||
logging.getLogger(__name__).info(
|
||||
logger.info(
|
||||
"Saving updated classifier model to {}...".format(
|
||||
settings.MODEL_FILE)
|
||||
)
|
||||
classifier.save_classifier()
|
||||
else:
|
||||
logging.getLogger(__name__).debug(
|
||||
logger.debug(
|
||||
"Training data unchanged."
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logging.getLogger(__name__).warning(
|
||||
logger.warning(
|
||||
"Classifier error: " + str(e)
|
||||
)
|
||||
|
||||
|
@ -58,6 +58,9 @@ from .serialisers import (
|
||||
)
|
||||
|
||||
|
||||
logger = logging.getLogger("paperless.api")
|
||||
|
||||
|
||||
class IndexView(TemplateView):
|
||||
template_name = "index.html"
|
||||
|
||||
@ -488,7 +491,7 @@ class SearchView(APIView):
|
||||
try:
|
||||
doc = Document.objects.get(id=r['id'])
|
||||
except Document.DoesNotExist:
|
||||
logging.getLogger(__name__).warning(
|
||||
logger.warning(
|
||||
f"Search index returned a non-existing document: "
|
||||
f"id: {r['id']}, title: {r['title']}. "
|
||||
f"Search index needs reindex."
|
||||
|
@ -102,6 +102,8 @@ def get_mailbox(server, port, security):
|
||||
|
||||
class MailAccountHandler(LoggingMixin):
|
||||
|
||||
logging_name = "paperless.mail"
|
||||
|
||||
def _correspondent_from_name(self, name):
|
||||
try:
|
||||
return Correspondent.objects.get_or_create(name=name)[0]
|
||||
|
@ -4,6 +4,9 @@ from paperless_mail.mail import MailAccountHandler, MailError
|
||||
from paperless_mail.models import MailAccount
|
||||
|
||||
|
||||
logger = logging.getLogger("paperless.mail.tasks")
|
||||
|
||||
|
||||
def process_mail_accounts():
|
||||
total_new_documents = 0
|
||||
for account in MailAccount.objects.all():
|
||||
@ -11,7 +14,7 @@ def process_mail_accounts():
|
||||
total_new_documents += MailAccountHandler().handle_mail_account(
|
||||
account)
|
||||
except MailError as e:
|
||||
logging.getLogger(__name__).error(
|
||||
logger.error(
|
||||
f"Error while processing mail account {account}: {e}",
|
||||
exc_info=True
|
||||
)
|
||||
@ -27,4 +30,4 @@ def process_mail_account(name):
|
||||
account = MailAccount.objects.get(name=name)
|
||||
MailAccountHandler().handle_mail_account(account)
|
||||
except MailAccount.DoesNotExist:
|
||||
logging.getLogger(__name__).error(f"Unknown mail acccount: {name}")
|
||||
logger.error(f"Unknown mail acccount: {name}")
|
||||
|
@ -19,6 +19,8 @@ class RasterisedDocumentParser(DocumentParser):
|
||||
image, whether it's a PDF, or other graphical format (JPEG, TIFF, etc.)
|
||||
"""
|
||||
|
||||
logging_name = "paperless.parsing.tesseract"
|
||||
|
||||
def extract_metadata(self, document_path, mime_type):
|
||||
namespace_pattern = re.compile(r"\{(.*)\}(.*)")
|
||||
|
||||
|
@ -11,6 +11,8 @@ class TextDocumentParser(DocumentParser):
|
||||
This parser directly parses a text document (.txt, .md, or .csv)
|
||||
"""
|
||||
|
||||
logging_name = "paperless.parsing.text"
|
||||
|
||||
def get_thumbnail(self, document_path, mime_type):
|
||||
|
||||
def read_text():
|
||||
|
@ -14,6 +14,8 @@ class TikaDocumentParser(DocumentParser):
|
||||
This parser sends documents to a local tika server
|
||||
"""
|
||||
|
||||
logging_name = "paperless.parsing.tika"
|
||||
|
||||
def get_thumbnail(self, document_path, mime_type):
|
||||
if not self.archive_path:
|
||||
self.archive_path = self.convert_to_pdf(document_path)
|
||||
|
Loading…
x
Reference in New Issue
Block a user