diff --git a/src/documents/consumer.py b/src/documents/consumer.py index b5ec58483..7c4ddf003 100644 --- a/src/documents/consumer.py +++ b/src/documents/consumer.py @@ -92,7 +92,7 @@ class Consumer(LoggingMixin): exception: Optional[Exception] = None, ): self._send_progress(100, 100, "FAILED", message) - self.log("error", log_message or message, exc_info=exc_info) + self.log.error(log_message or message, exc_info=exc_info) raise ConsumerError(f"{self.filename}: {log_message or message}") from exception def __init__(self): @@ -175,7 +175,7 @@ class Consumer(LoggingMixin): f"{settings.PRE_CONSUME_SCRIPT} does not exist.", ) - self.log("info", f"Executing pre-consume script {settings.PRE_CONSUME_SCRIPT}") + self.log.info(f"Executing pre-consume script {settings.PRE_CONSUME_SCRIPT}") working_file_path = str(self.path) original_file_path = str(self.original_path) @@ -218,8 +218,7 @@ class Consumer(LoggingMixin): f"{settings.POST_CONSUME_SCRIPT} does not exist.", ) - self.log( - "info", + self.log.info( f"Executing post-consume script {settings.POST_CONSUME_SCRIPT}", ) @@ -311,11 +310,6 @@ class Consumer(LoggingMixin): self._send_progress(0, 100, "STARTING", MESSAGE_NEW_FILE) - # this is for grouping logging entries for this particular file - # together. - - self.renew_logging_group() - # Make sure that preconditions for consuming the file are met. self.pre_check_file_exists() @@ -323,7 +317,7 @@ class Consumer(LoggingMixin): self.pre_check_duplicate() self.pre_check_asn_value() - self.log("info", f"Consuming {self.filename}") + self.log.info(f"Consuming {self.filename}") # For the actual work, copy the file into a tempdir self.original_path = self.path @@ -338,7 +332,7 @@ class Consumer(LoggingMixin): mime_type = magic.from_file(self.path, mime=True) - self.log("debug", f"Detected mime type: {mime_type}") + self.log.debug(f"Detected mime type: {mime_type}") # Based on the mime type, get the parser for that type parser_class: Optional[Type[DocumentParser]] = get_parser_class_for_mime_type( @@ -370,7 +364,7 @@ class Consumer(LoggingMixin): progress_callback, ) - self.log("debug", f"Parser: {type(document_parser).__name__}") + self.log.debug(f"Parser: {type(document_parser).__name__}") # However, this already created working directories which we have to # clean up. @@ -384,10 +378,10 @@ class Consumer(LoggingMixin): try: self._send_progress(20, 100, "WORKING", MESSAGE_PARSING_DOCUMENT) - self.log("debug", f"Parsing {self.filename}...") + self.log.debug(f"Parsing {self.filename}...") document_parser.parse(self.path, mime_type, self.filename) - self.log("debug", f"Generating thumbnail for {self.filename}...") + self.log.debug(f"Generating thumbnail for {self.filename}...") self._send_progress(70, 100, "WORKING", MESSAGE_GENERATING_THUMBNAIL) thumbnail = document_parser.get_thumbnail( self.path, @@ -475,7 +469,7 @@ class Consumer(LoggingMixin): document.save() # Delete the file only if it was successfully consumed - self.log("debug", f"Deleting file {self.path}") + self.log.debug(f"Deleting file {self.path}") os.unlink(self.path) self.original_path.unlink() @@ -486,7 +480,7 @@ class Consumer(LoggingMixin): ) if os.path.isfile(shadow_file): - self.log("debug", f"Deleting file {shadow_file}") + self.log.debug(f"Deleting file {shadow_file}") os.unlink(shadow_file) except Exception as e: @@ -503,7 +497,7 @@ class Consumer(LoggingMixin): self.run_post_consume_script(document) - self.log("info", f"Document {document} consumption finished") + self.log.info(f"Document {document} consumption finished") self._send_progress(100, 100, "SUCCESS", MESSAGE_FINISHED, document.id) @@ -522,26 +516,25 @@ class Consumer(LoggingMixin): file_info = FileInfo.from_filename(self.filename) - self.log("debug", "Saving record to database") + self.log.debug("Saving record to database") if self.override_created is not None: create_date = self.override_created - self.log( - "debug", + self.log.debug( f"Creation date from post_documents parameter: {create_date}", ) elif file_info.created is not None: create_date = file_info.created - self.log("debug", f"Creation date from FileInfo: {create_date}") + self.log.debug(f"Creation date from FileInfo: {create_date}") elif date is not None: create_date = date - self.log("debug", f"Creation date from parse_date: {create_date}") + self.log.debug(f"Creation date from parse_date: {create_date}") else: stats = os.stat(self.path) create_date = timezone.make_aware( datetime.datetime.fromtimestamp(stats.st_mtime), ) - self.log("debug", f"Creation date from st_mtime: {create_date}") + self.log.debug(f"Creation date from st_mtime: {create_date}") storage_type = Document.STORAGE_TYPE_UNENCRYPTED @@ -595,8 +588,7 @@ class Consumer(LoggingMixin): Decodes a process stdout and stderr streams and logs them to the main log """ # Log what the script exited as - self.log( - "info", + self.log.info( f"{completed_process.args[0]} exited {completed_process.returncode}", ) @@ -609,9 +601,9 @@ class Consumer(LoggingMixin): "\n", ) ) - self.log("info", "Script stdout:") + self.log.info("Script stdout:") for line in stdout_str: - self.log("info", line) + self.log.info(line) if len(completed_process.stderr): stderr_str = ( @@ -622,6 +614,6 @@ class Consumer(LoggingMixin): ) ) - self.log("warning", "Script stderr:") + self.log.warning("Script stderr:") for line in stderr_str: - self.log("warning", line) + self.log.warning(line) diff --git a/src/documents/loggers.py b/src/documents/loggers.py index 0f03135d5..0fc5cc162 100644 --- a/src/documents/loggers.py +++ b/src/documents/loggers.py @@ -3,18 +3,16 @@ import uuid class LoggingMixin: - logging_group = None - - logging_name = None + def __init__(self) -> None: + self.renew_logging_group() def renew_logging_group(self): + """ + Creates a new UUID to group subsequent log calls together with + the extra data named group + """ self.logging_group = uuid.uuid4() - - def log(self, level, message, **kwargs): - if self.logging_name: - logger = logging.getLogger(self.logging_name) - else: - name = ".".join([self.__class__.__module__, self.__class__.__name__]) - logger = logging.getLogger(name) - - getattr(logger, level)(message, extra={"group": self.logging_group}, **kwargs) + self.log = logging.LoggerAdapter( + logging.getLogger(self.logging_name), + extra={"group": self.logging_group}, + ) diff --git a/src/documents/parsers.py b/src/documents/parsers.py index 93d78832e..265219412 100644 --- a/src/documents/parsers.py +++ b/src/documents/parsers.py @@ -328,7 +328,7 @@ class DocumentParser(LoggingMixin): try: text = filepath.read_text(encoding="utf-8") except UnicodeDecodeError as e: - self.log("warning", f"Unicode error during text reading, continuing: {e}") + self.log.warning(f"Unicode error during text reading, continuing: {e}") text = filepath.read_bytes().decode("utf-8", errors="replace") return text @@ -354,5 +354,5 @@ class DocumentParser(LoggingMixin): return self.date def cleanup(self): - self.log("debug", f"Deleting directory {self.tempdir}") + self.log.debug(f"Deleting directory {self.tempdir}") shutil.rmtree(self.tempdir) diff --git a/src/documents/tests/test_consumer.py b/src/documents/tests/test_consumer.py index 8ab40f5ac..441cffddf 100644 --- a/src/documents/tests/test_consumer.py +++ b/src/documents/tests/test_consumer.py @@ -832,8 +832,7 @@ class PreConsumeTestCase(TestCase): } self.assertDictEqual(environment, {**environment, **subset}) - @mock.patch("documents.consumer.Consumer.log") - def test_script_with_output(self, mocked_log): + def test_script_with_output(self): """ GIVEN: - A script which outputs to stdout and stderr @@ -854,14 +853,19 @@ class PreConsumeTestCase(TestCase): os.chmod(script.name, st.st_mode | stat.S_IEXEC) with override_settings(PRE_CONSUME_SCRIPT=script.name): - c = Consumer() - c.path = "path-to-file" - c.run_pre_consume_script() + with self.assertLogs("paperless.consumer", level="INFO") as cm: + c = Consumer() + c.path = "path-to-file" - mocked_log.assert_called() - - mocked_log.assert_any_call("info", "This message goes to stdout") - mocked_log.assert_any_call("warning", "This message goes to stderr") + c.run_pre_consume_script() + self.assertIn( + "INFO:paperless.consumer:This message goes to stdout", + cm.output, + ) + self.assertIn( + "WARNING:paperless.consumer:This message goes to stderr", + cm.output, + ) def test_script_exit_non_zero(self): """ diff --git a/src/paperless_mail/mail.py b/src/paperless_mail/mail.py index de03a4861..b525ef91d 100644 --- a/src/paperless_mail/mail.py +++ b/src/paperless_mail/mail.py @@ -422,7 +422,7 @@ class MailAccountHandler(LoggingMixin): try: return Correspondent.objects.get_or_create(name=name)[0] except DatabaseError as e: - self.log("error", f"Error while retrieving correspondent {name}: {e}") + self.log.error(f"Error while retrieving correspondent {name}: {e}") return None def _get_title(self, message, att, rule): @@ -468,7 +468,7 @@ class MailAccountHandler(LoggingMixin): self.renew_logging_group() - self.log("debug", f"Processing mail account {account}") + self.log.debug(f"Processing mail account {account}") total_processed_files = 0 try: @@ -480,13 +480,12 @@ class MailAccountHandler(LoggingMixin): supports_gmail_labels = "X-GM-EXT-1" in M.client.capabilities supports_auth_plain = "AUTH=PLAIN" in M.client.capabilities - self.log("debug", f"GMAIL Label Support: {supports_gmail_labels}") - self.log("debug", f"AUTH=PLAIN Support: {supports_auth_plain}") + self.log.debug(f"GMAIL Label Support: {supports_gmail_labels}") + self.log.debug(f"AUTH=PLAIN Support: {supports_auth_plain}") mailbox_login(M, account) - self.log( - "debug", + self.log.debug( f"Account {account}: Processing " f"{account.rules.count()} rule(s)", ) @@ -499,16 +498,13 @@ class MailAccountHandler(LoggingMixin): supports_gmail_labels, ) except Exception as e: - self.log( - "error", + self.log.exception( f"Rule {rule}: Error while processing rule: {e}", - exc_info=True, ) except MailError: raise except Exception as e: - self.log( - "error", + self.log.error( f"Error while retrieving mailbox {account}: {e}", exc_info=False, ) @@ -521,21 +517,19 @@ class MailAccountHandler(LoggingMixin): rule: MailRule, supports_gmail_labels: bool, ): - self.log("debug", f"Rule {rule}: Selecting folder {rule.folder}") + self.log.debug(f"Rule {rule}: Selecting folder {rule.folder}") try: M.folder.set(rule.folder) except MailboxFolderSelectError as err: - self.log( - "error", + self.log.error( f"Unable to access folder {rule.folder}, attempting folder listing", ) try: for folder_info in M.folder.list(): - self.log("info", f"Located folder: {folder_info.name}") + self.log.info(f"Located folder: {folder_info.name}") except Exception as e: - self.log( - "error", + self.log.error( "Exception during folder listing, unable to provide list folders: " + str(e), ) @@ -547,8 +541,7 @@ class MailAccountHandler(LoggingMixin): criterias = make_criterias(rule, supports_gmail_labels) - self.log( - "debug", + self.log.debug( f"Rule {rule}: Searching folder with criteria {str(criterias)}", ) @@ -572,7 +565,7 @@ class MailAccountHandler(LoggingMixin): uid=message.uid, folder=rule.folder, ).exists(): - self.log("debug", f"Skipping mail {message}, already processed.") + self.log.debug(f"Skipping mail {message}, already processed.") continue try: @@ -581,13 +574,11 @@ class MailAccountHandler(LoggingMixin): total_processed_files += processed_files mails_processed += 1 except Exception as e: - self.log( - "error", + self.log.exception( f"Rule {rule}: Error while processing mail {message.uid}: {e}", - exc_info=True, ) - self.log("debug", f"Rule {rule}: Processed {mails_processed} matching mail(s)") + self.log.debug(f"Rule {rule}: Processed {mails_processed} matching mail(s)") return total_processed_files @@ -602,8 +593,7 @@ class MailAccountHandler(LoggingMixin): ): return processed_elements - self.log( - "debug", + self.log.debug( f"Rule {rule}: " f"Processing mail {message.subject} from {message.from_} with " f"{len(message.attachments)} attachment(s)", @@ -657,8 +647,7 @@ class MailAccountHandler(LoggingMixin): and rule.attachment_type == MailRule.AttachmentProcessing.ATTACHMENTS_ONLY ): - self.log( - "debug", + self.log.debug( f"Rule {rule}: " f"Skipping attachment {att.filename} " f"with content disposition {att.content_disposition}", @@ -688,8 +677,7 @@ class MailAccountHandler(LoggingMixin): with open(temp_filename, "wb") as f: f.write(att.payload) - self.log( - "info", + self.log.info( f"Rule {rule}: " f"Consuming attachment {att.filename} from mail " f"{message.subject} from {message.from_}", @@ -717,8 +705,7 @@ class MailAccountHandler(LoggingMixin): processed_attachments += 1 else: - self.log( - "debug", + self.log.debug( f"Rule {rule}: " f"Skipping attachment {att.filename} " f"since guessed mime type {mime_type} is not supported " @@ -784,8 +771,7 @@ class MailAccountHandler(LoggingMixin): f.write(message.obj.as_bytes()) - self.log( - "info", + self.log.info( f"Rule {rule}: " f"Consuming eml from mail " f"{message.subject} from {message.from_}", diff --git a/src/paperless_mail/parsers.py b/src/paperless_mail/parsers.py index b838290cb..ded4d6a9b 100644 --- a/src/paperless_mail/parsers.py +++ b/src/paperless_mail/parsers.py @@ -64,8 +64,7 @@ class MailDocumentParser(DocumentParser): try: mail = self.get_parsed(document_path) except ParseError as e: - self.log( - "warning", + self.log.warning( f"Error while fetching document metadata for {document_path}: {e}", ) return result @@ -146,7 +145,7 @@ class MailDocumentParser(DocumentParser): self.archive_path = self.generate_pdf(document_path) def tika_parse(self, html: str): - self.log("info", "Sending content to Tika server") + self.log.info("Sending content to Tika server") try: parsed = parser.from_buffer(html, self.tika_server) @@ -251,7 +250,7 @@ class MailDocumentParser(DocumentParser): def generate_pdf_from_mail(self, mail): url = self.gotenberg_server + "/forms/chromium/convert/html" - self.log("info", "Converting mail to PDF") + self.log.info("Converting mail to PDF") css_file = os.path.join(os.path.dirname(__file__), "templates/output.css") @@ -320,7 +319,7 @@ class MailDocumentParser(DocumentParser): def generate_pdf_from_html(self, orig_html, attachments): url = self.gotenberg_server + "/forms/chromium/convert/html" - self.log("info", "Converting html to PDF") + self.log.info("Converting html to PDF") files = {} for name, file in self.transform_inline_html(orig_html, attachments): diff --git a/src/paperless_mail/tests/test_parsers.py b/src/paperless_mail/tests/test_parsers.py index e6cae1121..4288bfbae 100644 --- a/src/paperless_mail/tests/test_parsers.py +++ b/src/paperless_mail/tests/test_parsers.py @@ -129,8 +129,7 @@ class TestParser(FileSystemAssertsMixin, TestCase): ) self.assertEqual(mocked_return, thumb) - @mock.patch("documents.loggers.LoggingMixin.log") - def test_extract_metadata_fail(self, m: mock.MagicMock): + def test_extract_metadata_fail(self): """ GIVEN: - Fresh start @@ -140,8 +139,12 @@ class TestParser(FileSystemAssertsMixin, TestCase): - A log warning should be generated """ # Validate if warning is logged when parsing fails - self.assertEqual([], self.parser.extract_metadata("na", "message/rfc822")) - self.assertEqual("warning", m.call_args[0][0]) + with self.assertLogs("paperless.parsing.mail", level="WARNING") as cm: + self.assertEqual([], self.parser.extract_metadata("na", "message/rfc822")) + self.assertIn( + "WARNING:paperless.parsing.mail:Error while fetching document metadata for na", + cm.output[0], + ) def test_extract_metadata(self): """ diff --git a/src/paperless_tesseract/parsers.py b/src/paperless_tesseract/parsers.py index 151af97dc..b6a721b2f 100644 --- a/src/paperless_tesseract/parsers.py +++ b/src/paperless_tesseract/parsers.py @@ -54,8 +54,7 @@ class RasterisedDocumentParser(DocumentParser): }, ) except Exception as e: - self.log( - "warning", + self.log.warning( f"Error while reading metadata {key}: {value}. Error: {e}", ) return result @@ -98,7 +97,7 @@ class RasterisedDocumentParser(DocumentParser): x, y = im.info["dpi"] return round(x) except Exception as e: - self.log("warning", f"Error while getting DPI from image {image}: {e}") + self.log.warning(f"Error while getting DPI from image {image}: {e}") return None def calculate_a4_dpi(self, image): @@ -107,11 +106,11 @@ class RasterisedDocumentParser(DocumentParser): width, height = im.size # divide image width by A4 width (210mm) in inches. dpi = int(width / (21 / 2.54)) - self.log("debug", f"Estimated DPI {dpi} based on image width {width}") + self.log.debug(f"Estimated DPI {dpi} based on image width {width}") return dpi except Exception as e: - self.log("warning", f"Error while calculating DPI for image {image}: {e}") + self.log.warning(f"Error while calculating DPI for image {image}: {e}") return None def extract_text(self, sidecar_file: Optional[Path], pdf_file: Path): @@ -127,10 +126,10 @@ class RasterisedDocumentParser(DocumentParser): if "[OCR skipped on page" not in text: # This happens when there's already text in the input file. # The sidecar file will only contain text for OCR'ed pages. - self.log("debug", "Using text from sidecar file") + self.log.debug("Using text from sidecar file") return post_process_text(text) else: - self.log("debug", "Incomplete sidecar file: discarding.") + self.log.debug("Incomplete sidecar file: discarding.") # no success with the sidecar file, try PDF @@ -160,8 +159,7 @@ class RasterisedDocumentParser(DocumentParser): except Exception: # If pdftotext fails, fall back to OCR. - self.log( - "warning", + self.log.warning( "Error while getting text from PDF document with pdftotext", exc_info=True, ) @@ -227,15 +225,14 @@ class RasterisedDocumentParser(DocumentParser): a4_dpi = self.calculate_a4_dpi(input_file) if self.has_alpha(input_file): - self.log( - "info", + self.log.info( f"Removing alpha layer from {input_file} " "for compatibility with img2pdf", ) self.remove_alpha(input_file) if dpi: - self.log("debug", f"Detected DPI for image {input_file}: {dpi}") + self.log.debug(f"Detected DPI for image {input_file}: {dpi}") ocrmypdf_args["image_dpi"] = dpi elif settings.OCR_IMAGE_DPI: ocrmypdf_args["image_dpi"] = settings.OCR_IMAGE_DPI @@ -253,8 +250,7 @@ class RasterisedDocumentParser(DocumentParser): user_args = json.loads(settings.OCR_USER_ARGS) ocrmypdf_args = {**ocrmypdf_args, **user_args} except Exception as e: - self.log( - "warning", + self.log.warning( f"There is an issue with PAPERLESS_OCR_USER_ARGS, so " f"they will not be used. Error: {e}", ) @@ -263,15 +259,13 @@ class RasterisedDocumentParser(DocumentParser): # Convert pixels to mega-pixels and provide to ocrmypdf max_pixels_mpixels = settings.OCR_MAX_IMAGE_PIXELS / 1_000_000.0 if max_pixels_mpixels > 0: - self.log( - "debug", + self.log.debug( f"Calculated {max_pixels_mpixels} megapixels for OCR", ) ocrmypdf_args["max_image_mpixels"] = max_pixels_mpixels else: - self.log( - "warning", + self.log.warning( "There is an issue with PAPERLESS_OCR_MAX_IMAGE_PIXELS, " "this value must be at least 1 megapixel if set", ) @@ -299,7 +293,7 @@ class RasterisedDocumentParser(DocumentParser): or settings.OCR_SKIP_ARCHIVE_FILE in ["with_text", "always"] ) if skip_archive_for_text and original_has_text: - self.log("debug", "Document has text, skipping OCRmyPDF entirely.") + self.log.debug("Document has text, skipping OCRmyPDF entirely.") self.text = text_original return @@ -322,7 +316,7 @@ class RasterisedDocumentParser(DocumentParser): ) try: - self.log("debug", f"Calling OCRmyPDF with args: {args}") + self.log.debug(f"Calling OCRmyPDF with args: {args}") ocrmypdf.ocr(**args) if settings.OCR_SKIP_ARCHIVE_FILE != "always": @@ -333,16 +327,14 @@ class RasterisedDocumentParser(DocumentParser): if not self.text: raise NoTextFoundException("No text was found in the original document") except EncryptedPdfError: - self.log( - "warning", + self.log.warning( "This file is encrypted, OCR is impossible. Using " "any text present in the original file.", ) if original_has_text: self.text = text_original except (NoTextFoundException, InputFileError) as e: - self.log( - "warning", + self.log.warning( f"Encountered an error while running OCR: {str(e)}. " f"Attempting force OCR to get the text.", ) @@ -365,7 +357,7 @@ class RasterisedDocumentParser(DocumentParser): ) try: - self.log("debug", f"Fallback: Calling OCRmyPDF with args: {args}") + self.log.debug(f"Fallback: Calling OCRmyPDF with args: {args}") ocrmypdf.ocr(**args) # Don't return the archived file here, since this file @@ -390,8 +382,7 @@ class RasterisedDocumentParser(DocumentParser): if original_has_text: self.text = text_original else: - self.log( - "warning", + self.log.warning( f"No text was found in {document_path}, the content will " f"be empty.", ) diff --git a/src/paperless_tika/parsers.py b/src/paperless_tika/parsers.py index 779abbe59..d71f038a3 100644 --- a/src/paperless_tika/parsers.py +++ b/src/paperless_tika/parsers.py @@ -38,8 +38,7 @@ class TikaDocumentParser(DocumentParser): try: parsed = parser.from_file(document_path, tika_server) except Exception as e: - self.log( - "warning", + self.log.warning( f"Error while fetching document metadata for {document_path}: {e}", ) return [] @@ -55,7 +54,7 @@ class TikaDocumentParser(DocumentParser): ] def parse(self, document_path: Path, mime_type, file_name=None): - self.log("info", f"Sending {document_path} to Tika server") + self.log.info(f"Sending {document_path} to Tika server") tika_server = settings.TIKA_ENDPOINT # tika does not support a PathLike, only strings @@ -75,8 +74,7 @@ class TikaDocumentParser(DocumentParser): try: self.date = dateutil.parser.isoparse(parsed["metadata"]["Creation-Date"]) except Exception as e: - self.log( - "warning", + self.log.warning( f"Unable to extract date for document {document_path}: {e}", ) @@ -87,7 +85,7 @@ class TikaDocumentParser(DocumentParser): gotenberg_server = settings.TIKA_GOTENBERG_ENDPOINT url = gotenberg_server + "/forms/libreoffice/convert" - self.log("info", f"Converting {document_path} to PDF as {pdf_path}") + self.log.info(f"Converting {document_path} to PDF as {pdf_path}") with open(document_path, "rb") as document_handle: files = { "files": (