diff --git a/src/documents/consumer.py b/src/documents/consumer.py index 25dee5daf..ca8e2d378 100644 --- a/src/documents/consumer.py +++ b/src/documents/consumer.py @@ -149,7 +149,12 @@ class ConsumerPlugin( self._send_progress(100, 100, ProgressStatusOptions.FAILED, message) self.log.error(log_message or message, exc_info=exc_info) # Move the file to the failed directory - if self.input_doc.original_file.exists(): + if ( + self.input_doc.original_file.exists() + and not Path( + settings.CONSUMPTION_FAILED_DIR / self.input_doc.original_file.name, + ).exists() + ): copy_file_with_basic_stats( self.input_doc.original_file, settings.CONSUMPTION_FAILED_DIR / self.input_doc.original_file.name, @@ -809,9 +814,23 @@ class CleanPDFPlugin( NoCleanupPluginMixin, NoSetupPluginMixin, AlwaysRunPluginMixin, + LoggingMixin, ConsumeTaskPlugin, ): NAME: str = "CleanPDFPlugin" + logging_name = "paperless.consumer" + + def __init__( + self, + input_doc: ConsumableDocument, + metadata: DocumentMetadataOverrides, + status_mgr: ProgressManager, + base_tmp_dir: Path, + task_id: str, + ) -> None: + super().__init__(input_doc, metadata, status_mgr, base_tmp_dir, task_id) + + self.renew_logging_group() def run(self) -> str | None: """ @@ -819,15 +838,19 @@ class CleanPDFPlugin( """ msg = None try: - run_subprocess( + result = run_subprocess( [ "qpdf", "--replace-input", - self.working_copy, + self.input_doc.original_file, ], logger=self.log, ) - msg = "PDF successfully cleaned" + msg = ( + f"Error while cleaning PDF: {result.stderr}" + if result.returncode != 0 + else "PDF cleaned successfully" + ) except Exception as e: msg = "Error while cleaning PDF" self.log.error(e) diff --git a/src/documents/tasks.py b/src/documents/tasks.py index c02c363ae..7799abe2a 100644 --- a/src/documents/tasks.py +++ b/src/documents/tasks.py @@ -48,6 +48,7 @@ from documents.plugins.helpers import ProgressStatusOptions from documents.sanity_checker import SanityCheckFailedException from documents.signals import document_updated from documents.signals.handlers import cleanup_document_deletion +from documents.utils import copy_file_with_basic_stats if settings.AUDIT_LOG_ENABLED: from auditlog.models import LogEntry @@ -185,10 +186,13 @@ def retry_failed_file(task_id: str, clean: bool = False, skip_ocr: bool = False) if not failed_file.exists(): logger.error(f"Failed file {failed_file} not found") return + working_copy = settings.SCRATCH_DIR / failed_file.name + copy_file_with_basic_stats(failed_file, working_copy) + consume_file( ConsumableDocument( source=DocumentSource.ConsumeFolder, - original_file=failed_file, + original_file=working_copy, ), clean=clean, # skip_ocr=skip_ocr, diff --git a/src/documents/tests/test_tasks.py b/src/documents/tests/test_tasks.py index a305924c9..8dc1edc39 100644 --- a/src/documents/tests/test_tasks.py +++ b/src/documents/tests/test_tasks.py @@ -204,8 +204,7 @@ class TestRetryConsumeTask( TestCase, ): @override_settings(CONSUMPTION_FAILED_DIR=Path(__file__).parent / "samples") - @mock.patch("documents.consumer.run_subprocess") - def test_retry_consume(self, m): + def test_retry_consume(self): test_file = self.SAMPLE_DIR / "corrupted.pdf" temp_copy = self.dirs.scratch_dir / test_file.name shutil.copy(test_file, temp_copy) @@ -248,13 +247,7 @@ class TestRetryConsumeTask( # Ensure the file is moved to the failed dir self.assertIsFile(settings.CONSUMPTION_FAILED_DIR / task.task_file_name) - tasks.retry_failed_file(task_id=task.task_id) - - m.assert_called_once() - - args, _ = m.call_args - - command = args[0] - - self.assertEqual(command[0], "qpdf") - self.assertEqual(command[1], "--replace-input") + with mock.patch("documents.tasks.ProgressManager", DummyProgressManager): + with self.assertLogs("documents.tasks", level="INFO") as cm: + tasks.retry_failed_file(task_id=task.task_id, clean=True) + self.assertIn("PDF cleaned successfully", cm.output[0])