mirror of
				https://github.com/paperless-ngx/paperless-ngx.git
				synced 2025-10-30 03:56:23 -05:00 
			
		
		
		
	rework most of the logging
This commit is contained in:
		| @@ -13,7 +13,7 @@ class IncompatibleClassifierVersionError(Exception): | ||||
|     pass | ||||
|  | ||||
|  | ||||
| logger = logging.getLogger(__name__) | ||||
| logger = logging.getLogger("paperless.classifier") | ||||
|  | ||||
|  | ||||
| def preprocess_content(content): | ||||
| @@ -115,7 +115,7 @@ class DocumentClassifier(object): | ||||
|         labels_document_type = list() | ||||
|  | ||||
|         # Step 1: Extract and preprocess training data from the database. | ||||
|         logging.getLogger(__name__).debug("Gathering data from database...") | ||||
|         logger.debug("Gathering data from database...") | ||||
|         m = hashlib.sha1() | ||||
|         for doc in Document.objects.order_by('pk').exclude(tags__is_inbox_tag=True):  # NOQA: E501 | ||||
|             preprocessed_content = preprocess_content(doc.content) | ||||
| @@ -162,7 +162,7 @@ class DocumentClassifier(object): | ||||
|         num_correspondents = len(set(labels_correspondent) | {-1}) - 1 | ||||
|         num_document_types = len(set(labels_document_type) | {-1}) - 1 | ||||
|  | ||||
|         logging.getLogger(__name__).debug( | ||||
|         logger.debug( | ||||
|             "{} documents, {} tag(s), {} correspondent(s), " | ||||
|             "{} document type(s).".format( | ||||
|                 len(data), | ||||
| @@ -173,7 +173,7 @@ class DocumentClassifier(object): | ||||
|         ) | ||||
|  | ||||
|         # Step 2: vectorize data | ||||
|         logging.getLogger(__name__).debug("Vectorizing data...") | ||||
|         logger.debug("Vectorizing data...") | ||||
|         self.data_vectorizer = CountVectorizer( | ||||
|             analyzer="word", | ||||
|             ngram_range=(1, 2), | ||||
| @@ -183,7 +183,7 @@ class DocumentClassifier(object): | ||||
|  | ||||
|         # Step 3: train the classifiers | ||||
|         if num_tags > 0: | ||||
|             logging.getLogger(__name__).debug("Training tags classifier...") | ||||
|             logger.debug("Training tags classifier...") | ||||
|  | ||||
|             if num_tags == 1: | ||||
|                 # Special case where only one tag has auto: | ||||
| @@ -202,12 +202,12 @@ class DocumentClassifier(object): | ||||
|             self.tags_classifier.fit(data_vectorized, labels_tags_vectorized) | ||||
|         else: | ||||
|             self.tags_classifier = None | ||||
|             logging.getLogger(__name__).debug( | ||||
|             logger.debug( | ||||
|                 "There are no tags. Not training tags classifier." | ||||
|             ) | ||||
|  | ||||
|         if num_correspondents > 0: | ||||
|             logging.getLogger(__name__).debug( | ||||
|             logger.debug( | ||||
|                 "Training correspondent classifier..." | ||||
|             ) | ||||
|             self.correspondent_classifier = MLPClassifier(tol=0.01) | ||||
| @@ -217,13 +217,13 @@ class DocumentClassifier(object): | ||||
|             ) | ||||
|         else: | ||||
|             self.correspondent_classifier = None | ||||
|             logging.getLogger(__name__).debug( | ||||
|             logger.debug( | ||||
|                 "There are no correspondents. Not training correspondent " | ||||
|                 "classifier." | ||||
|             ) | ||||
|  | ||||
|         if num_document_types > 0: | ||||
|             logging.getLogger(__name__).debug( | ||||
|             logger.debug( | ||||
|                 "Training document type classifier..." | ||||
|             ) | ||||
|             self.document_type_classifier = MLPClassifier(tol=0.01) | ||||
| @@ -233,7 +233,7 @@ class DocumentClassifier(object): | ||||
|             ) | ||||
|         else: | ||||
|             self.document_type_classifier = None | ||||
|             logging.getLogger(__name__).debug( | ||||
|             logger.debug( | ||||
|                 "There are no document types. Not training document type " | ||||
|                 "classifier." | ||||
|             ) | ||||
|   | ||||
| @@ -47,6 +47,8 @@ MESSAGE_FINISHED = "finished" | ||||
|  | ||||
| class Consumer(LoggingMixin): | ||||
|  | ||||
|     logging_name = "paperless.consumer" | ||||
|  | ||||
|     def _send_progress(self, current_progress, max_progress, status, | ||||
|                        message=None, document_id=None): | ||||
|         payload = { | ||||
|   | ||||
| @@ -8,6 +8,9 @@ from django.conf import settings | ||||
| from django.template.defaultfilters import slugify | ||||
|  | ||||
|  | ||||
| logger = logging.getLogger("paperless.filehandling") | ||||
|  | ||||
|  | ||||
| class defaultdictNoStr(defaultdict): | ||||
|  | ||||
|     def __str__(self): | ||||
| @@ -140,7 +143,7 @@ def generate_filename(doc, counter=0, append_gpg=True): | ||||
|             path = path.strip(os.sep) | ||||
|  | ||||
|     except (ValueError, KeyError, IndexError): | ||||
|         logging.getLogger(__name__).warning( | ||||
|         logger.warning( | ||||
|             f"Invalid PAPERLESS_FILENAME_FORMAT: " | ||||
|             f"{settings.PAPERLESS_FILENAME_FORMAT}, falling back to default") | ||||
|  | ||||
|   | ||||
| @@ -12,7 +12,7 @@ from whoosh.qparser.dateparse import DateParserPlugin | ||||
| from whoosh.writing import AsyncWriter | ||||
|  | ||||
|  | ||||
| logger = logging.getLogger(__name__) | ||||
| logger = logging.getLogger("paperless.index") | ||||
|  | ||||
|  | ||||
| class JsonFormatter(Formatter): | ||||
|   | ||||
| @@ -4,33 +4,21 @@ import uuid | ||||
| from django.conf import settings | ||||
|  | ||||
|  | ||||
| class PaperlessHandler(logging.Handler): | ||||
|     def emit(self, record): | ||||
|         if settings.DISABLE_DBHANDLER: | ||||
|             return | ||||
|  | ||||
|         # We have to do the import here or Django will barf when it tries to | ||||
|         # load this because the apps aren't loaded at that point | ||||
|         from .models import Log | ||||
|  | ||||
|         kwargs = {"message": record.msg, "level": record.levelno} | ||||
|  | ||||
|         if hasattr(record, "group"): | ||||
|             kwargs["group"] = record.group | ||||
|  | ||||
|         Log.objects.create(**kwargs) | ||||
|  | ||||
|  | ||||
| class LoggingMixin: | ||||
|  | ||||
|     logging_group = None | ||||
|  | ||||
|     logging_name = None | ||||
|  | ||||
|     def renew_logging_group(self): | ||||
|         self.logging_group = uuid.uuid4() | ||||
|  | ||||
|     def log(self, level, message, **kwargs): | ||||
|         target = ".".join([self.__class__.__module__, self.__class__.__name__]) | ||||
|         logger = logging.getLogger(target) | ||||
|         if self.logging_name: | ||||
|             logger = logging.getLogger(self.logging_name) | ||||
|         else: | ||||
|             name = ".".join([self.__class__.__module__, self.__class__.__name__]) | ||||
|             logger = logging.getLogger(name) | ||||
|  | ||||
|         getattr(logger, level)(message, extra={ | ||||
|             "group": self.logging_group | ||||
|   | ||||
| @@ -20,7 +20,7 @@ from ...file_handling import create_source_path_directory | ||||
| from ...parsers import get_parser_class_for_mime_type | ||||
|  | ||||
|  | ||||
| logger = logging.getLogger(__name__) | ||||
| logger = logging.getLogger("paperless.management.archiver") | ||||
|  | ||||
|  | ||||
| def handle_document(document_id): | ||||
|   | ||||
| @@ -17,7 +17,7 @@ try: | ||||
| except ImportError: | ||||
|     INotify = flags = None | ||||
|  | ||||
| logger = logging.getLogger(__name__) | ||||
| logger = logging.getLogger("paperless.management.consumer") | ||||
|  | ||||
|  | ||||
| def _tags_from_path(filepath): | ||||
| @@ -108,12 +108,7 @@ class Command(BaseCommand): | ||||
|     # This is here primarily for the tests and is irrelevant in production. | ||||
|     stop_flag = False | ||||
|  | ||||
|     def __init__(self, *args, **kwargs): | ||||
|  | ||||
|         self.logger = logging.getLogger(__name__) | ||||
|  | ||||
|         BaseCommand.__init__(self, *args, **kwargs) | ||||
|         self.observer = None | ||||
|     observer = None | ||||
|  | ||||
|     def add_arguments(self, parser): | ||||
|         parser.add_argument( | ||||
| @@ -161,7 +156,7 @@ class Command(BaseCommand): | ||||
|         logger.debug("Consumer exiting.") | ||||
|  | ||||
|     def handle_polling(self, directory, recursive): | ||||
|         logging.getLogger(__name__).info( | ||||
|         logger.info( | ||||
|             f"Polling directory for changes: {directory}") | ||||
|         self.observer = PollingObserver(timeout=settings.CONSUMER_POLLING) | ||||
|         self.observer.schedule(Handler(), directory, recursive=recursive) | ||||
| @@ -176,7 +171,7 @@ class Command(BaseCommand): | ||||
|         self.observer.join() | ||||
|  | ||||
|     def handle_inotify(self, directory, recursive): | ||||
|         logging.getLogger(__name__).info( | ||||
|         logger.info( | ||||
|             f"Using inotify to watch directory for changes: {directory}") | ||||
|  | ||||
|         inotify = INotify() | ||||
|   | ||||
| @@ -7,6 +7,9 @@ from documents.models import Document | ||||
| from ...signals.handlers import set_correspondent, set_document_type, set_tags | ||||
|  | ||||
|  | ||||
| logger = logging.getLogger("paperless.management.retagger") | ||||
|  | ||||
|  | ||||
| class Command(BaseCommand): | ||||
|  | ||||
|     help = """ | ||||
| @@ -65,7 +68,7 @@ class Command(BaseCommand): | ||||
|         classifier = load_classifier() | ||||
|  | ||||
|         for document in documents: | ||||
|             logging.getLogger(__name__).info( | ||||
|             logger.info( | ||||
|                 f"Processing document {document.title}") | ||||
|  | ||||
|             if options['correspondent']: | ||||
|   | ||||
| @@ -6,7 +6,7 @@ from fuzzywuzzy import fuzz | ||||
| from documents.models import MatchingModel, Correspondent, DocumentType, Tag | ||||
|  | ||||
|  | ||||
| logger = logging.getLogger(__name__) | ||||
| logger = logging.getLogger("paperless.matching") | ||||
|  | ||||
|  | ||||
| def log_reason(matching_model, document, reason): | ||||
|   | ||||
| @@ -36,7 +36,7 @@ DATE_REGEX = re.compile( | ||||
| ) | ||||
|  | ||||
|  | ||||
| logger = logging.getLogger(__name__) | ||||
| logger = logging.getLogger("paperless.parsing") | ||||
|  | ||||
|  | ||||
| def is_mime_type_supported(mime_type): | ||||
| @@ -261,6 +261,8 @@ class DocumentParser(LoggingMixin): | ||||
|     `paperless_tesseract.parsers` for inspiration. | ||||
|     """ | ||||
|  | ||||
|     logging_name = "paperless.parsing" | ||||
|  | ||||
|     def __init__(self, logging_group, progress_callback=None): | ||||
|         super().__init__() | ||||
|         self.logging_group = logging_group | ||||
| @@ -316,5 +318,5 @@ class DocumentParser(LoggingMixin): | ||||
|         return self.date | ||||
|  | ||||
|     def cleanup(self): | ||||
|         self.log("debug", "Deleting directory {}".format(self.tempdir)) | ||||
|         self.log("debug", f"Deleting directory {self.tempdir}") | ||||
|         shutil.rmtree(self.tempdir) | ||||
|   | ||||
| @@ -19,8 +19,7 @@ from ..file_handling import delete_empty_directories, \ | ||||
| from ..models import Document, Tag | ||||
|  | ||||
|  | ||||
| def logger(message, group): | ||||
|     logging.getLogger(__name__).debug(message, extra={"group": group}) | ||||
| logger = logging.getLogger("paperless.handlers") | ||||
|  | ||||
|  | ||||
| def add_inbox_tags(sender, document=None, logging_group=None, **kwargs): | ||||
| @@ -48,23 +47,23 @@ def set_correspondent(sender, | ||||
|         selected = None | ||||
|     if potential_count > 1: | ||||
|         if use_first: | ||||
|             logger( | ||||
|             logger.info( | ||||
|                 f"Detected {potential_count} potential correspondents, " | ||||
|                 f"so we've opted for {selected}", | ||||
|                 logging_group | ||||
|                 extra={'group': logging_group} | ||||
|             ) | ||||
|         else: | ||||
|             logger( | ||||
|             logger.info( | ||||
|                 f"Detected {potential_count} potential correspondents, " | ||||
|                 f"not assigning any correspondent", | ||||
|                 logging_group | ||||
|                 extra={'group': logging_group} | ||||
|             ) | ||||
|             return | ||||
|  | ||||
|     if selected or replace: | ||||
|         logger( | ||||
|         logger.info( | ||||
|             f"Assigning correspondent {selected} to {document}", | ||||
|             logging_group | ||||
|             extra={'group': logging_group} | ||||
|         ) | ||||
|  | ||||
|         document.correspondent = selected | ||||
| @@ -92,23 +91,23 @@ def set_document_type(sender, | ||||
|  | ||||
|     if potential_count > 1: | ||||
|         if use_first: | ||||
|             logger( | ||||
|             logger.info( | ||||
|                 f"Detected {potential_count} potential document types, " | ||||
|                 f"so we've opted for {selected}", | ||||
|                 logging_group | ||||
|                 extra={'group': logging_group} | ||||
|             ) | ||||
|         else: | ||||
|             logger( | ||||
|             logger.info( | ||||
|                 f"Detected {potential_count} potential document types, " | ||||
|                 f"not assigning any document type", | ||||
|                 logging_group | ||||
|                 extra={'group': logging_group} | ||||
|             ) | ||||
|             return | ||||
|  | ||||
|     if selected or replace: | ||||
|         logger( | ||||
|         logger.info( | ||||
|             f"Assigning document type {selected} to {document}", | ||||
|             logging_group | ||||
|             extra={'group': logging_group} | ||||
|         ) | ||||
|  | ||||
|         document.document_type = selected | ||||
| @@ -138,9 +137,9 @@ def set_tags(sender, | ||||
|         return | ||||
|  | ||||
|     message = 'Tagging "{}" with "{}"' | ||||
|     logger( | ||||
|     logger.info( | ||||
|         message.format(document, ", ".join([t.name for t in relevant_tags])), | ||||
|         logging_group | ||||
|         extra={'group': logging_group} | ||||
|     ) | ||||
|  | ||||
|     document.tags.add(*relevant_tags) | ||||
| @@ -155,10 +154,10 @@ def cleanup_document_deletion(sender, instance, using, **kwargs): | ||||
|             if os.path.isfile(f): | ||||
|                 try: | ||||
|                     os.unlink(f) | ||||
|                     logging.getLogger(__name__).debug( | ||||
|                     logger.debug( | ||||
|                         f"Deleted file {f}.") | ||||
|                 except OSError as e: | ||||
|                     logging.getLogger(__name__).warning( | ||||
|                     logger.warning( | ||||
|                         f"While deleting document {str(instance)}, the file " | ||||
|                         f"{f} could not be deleted: {e}" | ||||
|                     ) | ||||
| @@ -177,13 +176,13 @@ def cleanup_document_deletion(sender, instance, using, **kwargs): | ||||
| def validate_move(instance, old_path, new_path): | ||||
|     if not os.path.isfile(old_path): | ||||
|         # Can't do anything if the old file does not exist anymore. | ||||
|         logging.getLogger(__name__).fatal( | ||||
|         logger.fatal( | ||||
|             f"Document {str(instance)}: File {old_path} has gone.") | ||||
|         return False | ||||
|  | ||||
|     if os.path.isfile(new_path): | ||||
|         # Can't do anything if the new file already exists. Skip updating file. | ||||
|         logging.getLogger(__name__).warning( | ||||
|         logger.warning( | ||||
|             f"Document {str(instance)}: Cannot rename file " | ||||
|             f"since target path {new_path} already exists.") | ||||
|         return False | ||||
|   | ||||
| @@ -12,6 +12,9 @@ from documents.models import Document, Tag, DocumentType, Correspondent | ||||
| from documents.sanity_checker import SanityFailedError | ||||
|  | ||||
|  | ||||
| logger = logging.getLogger("paperless.tasks") | ||||
|  | ||||
|  | ||||
| def index_optimize(): | ||||
|     ix = index.open_index() | ||||
|     writer = AsyncWriter(ix) | ||||
| @@ -45,18 +48,18 @@ def train_classifier(): | ||||
|  | ||||
|     try: | ||||
|         if classifier.train(): | ||||
|             logging.getLogger(__name__).info( | ||||
|             logger.info( | ||||
|                 "Saving updated classifier model to {}...".format( | ||||
|                     settings.MODEL_FILE) | ||||
|             ) | ||||
|             classifier.save_classifier() | ||||
|         else: | ||||
|             logging.getLogger(__name__).debug( | ||||
|             logger.debug( | ||||
|                 "Training data unchanged." | ||||
|             ) | ||||
|  | ||||
|     except Exception as e: | ||||
|         logging.getLogger(__name__).warning( | ||||
|         logger.warning( | ||||
|             "Classifier error: " + str(e) | ||||
|         ) | ||||
|  | ||||
|   | ||||
| @@ -58,6 +58,9 @@ from .serialisers import ( | ||||
| ) | ||||
|  | ||||
|  | ||||
| logger = logging.getLogger("paperless.api") | ||||
|  | ||||
|  | ||||
| class IndexView(TemplateView): | ||||
|     template_name = "index.html" | ||||
|  | ||||
| @@ -488,7 +491,7 @@ class SearchView(APIView): | ||||
|         try: | ||||
|             doc = Document.objects.get(id=r['id']) | ||||
|         except Document.DoesNotExist: | ||||
|             logging.getLogger(__name__).warning( | ||||
|             logger.warning( | ||||
|                 f"Search index returned a non-existing document: " | ||||
|                 f"id: {r['id']}, title: {r['title']}. " | ||||
|                 f"Search index needs reindex." | ||||
|   | ||||
| @@ -102,6 +102,8 @@ def get_mailbox(server, port, security): | ||||
|  | ||||
| class MailAccountHandler(LoggingMixin): | ||||
|  | ||||
|     logging_name = "paperless.mail" | ||||
|  | ||||
|     def _correspondent_from_name(self, name): | ||||
|         try: | ||||
|             return Correspondent.objects.get_or_create(name=name)[0] | ||||
|   | ||||
| @@ -4,6 +4,9 @@ from paperless_mail.mail import MailAccountHandler, MailError | ||||
| from paperless_mail.models import MailAccount | ||||
|  | ||||
|  | ||||
| logger = logging.getLogger("paperless.mail.tasks") | ||||
|  | ||||
|  | ||||
| def process_mail_accounts(): | ||||
|     total_new_documents = 0 | ||||
|     for account in MailAccount.objects.all(): | ||||
| @@ -11,7 +14,7 @@ def process_mail_accounts(): | ||||
|             total_new_documents += MailAccountHandler().handle_mail_account( | ||||
|                 account) | ||||
|         except MailError as e: | ||||
|             logging.getLogger(__name__).error( | ||||
|             logger.error( | ||||
|                 f"Error while processing mail account {account}: {e}", | ||||
|                 exc_info=True | ||||
|             ) | ||||
| @@ -27,4 +30,4 @@ def process_mail_account(name): | ||||
|         account = MailAccount.objects.get(name=name) | ||||
|         MailAccountHandler().handle_mail_account(account) | ||||
|     except MailAccount.DoesNotExist: | ||||
|         logging.getLogger(__name__).error(f"Unknown mail acccount: {name}") | ||||
|         logger.error(f"Unknown mail acccount: {name}") | ||||
|   | ||||
| @@ -19,6 +19,8 @@ class RasterisedDocumentParser(DocumentParser): | ||||
|     image, whether it's a PDF, or other graphical format (JPEG, TIFF, etc.) | ||||
|     """ | ||||
|  | ||||
|     logging_name = "paperless.parsing.tesseract" | ||||
|  | ||||
|     def extract_metadata(self, document_path, mime_type): | ||||
|         namespace_pattern = re.compile(r"\{(.*)\}(.*)") | ||||
|  | ||||
|   | ||||
| @@ -11,6 +11,8 @@ class TextDocumentParser(DocumentParser): | ||||
|     This parser directly parses a text document (.txt, .md, or .csv) | ||||
|     """ | ||||
|  | ||||
|     logging_name = "paperless.parsing.text" | ||||
|  | ||||
|     def get_thumbnail(self, document_path, mime_type): | ||||
|  | ||||
|         def read_text(): | ||||
|   | ||||
| @@ -14,6 +14,8 @@ class TikaDocumentParser(DocumentParser): | ||||
|     This parser sends documents to a local tika server | ||||
|     """ | ||||
|  | ||||
|     logging_name = "paperless.parsing.tika" | ||||
|  | ||||
|     def get_thumbnail(self, document_path, mime_type): | ||||
|         if not self.archive_path: | ||||
|             self.archive_path = self.convert_to_pdf(document_path) | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 jonaswinkler
					jonaswinkler