This commit is contained in:
shamoon 2024-11-07 12:28:20 -08:00
parent 85c661dff2
commit af1c64e969
No known key found for this signature in database
3 changed files with 37 additions and 17 deletions

View File

@ -149,7 +149,12 @@ class ConsumerPlugin(
self._send_progress(100, 100, ProgressStatusOptions.FAILED, message)
self.log.error(log_message or message, exc_info=exc_info)
# Move the file to the failed directory
if self.input_doc.original_file.exists():
if (
self.input_doc.original_file.exists()
and not Path(
settings.CONSUMPTION_FAILED_DIR / self.input_doc.original_file.name,
).exists()
):
copy_file_with_basic_stats(
self.input_doc.original_file,
settings.CONSUMPTION_FAILED_DIR / self.input_doc.original_file.name,
@ -809,9 +814,23 @@ class CleanPDFPlugin(
NoCleanupPluginMixin,
NoSetupPluginMixin,
AlwaysRunPluginMixin,
LoggingMixin,
ConsumeTaskPlugin,
):
NAME: str = "CleanPDFPlugin"
logging_name = "paperless.consumer"
def __init__(
self,
input_doc: ConsumableDocument,
metadata: DocumentMetadataOverrides,
status_mgr: ProgressManager,
base_tmp_dir: Path,
task_id: str,
) -> None:
super().__init__(input_doc, metadata, status_mgr, base_tmp_dir, task_id)
self.renew_logging_group()
def run(self) -> str | None:
"""
@ -819,15 +838,19 @@ class CleanPDFPlugin(
"""
msg = None
try:
run_subprocess(
result = run_subprocess(
[
"qpdf",
"--replace-input",
self.working_copy,
self.input_doc.original_file,
],
logger=self.log,
)
msg = "PDF successfully cleaned"
msg = (
f"Error while cleaning PDF: {result.stderr}"
if result.returncode != 0
else "PDF cleaned successfully"
)
except Exception as e:
msg = "Error while cleaning PDF"
self.log.error(e)

View File

@ -48,6 +48,7 @@ from documents.plugins.helpers import ProgressStatusOptions
from documents.sanity_checker import SanityCheckFailedException
from documents.signals import document_updated
from documents.signals.handlers import cleanup_document_deletion
from documents.utils import copy_file_with_basic_stats
if settings.AUDIT_LOG_ENABLED:
from auditlog.models import LogEntry
@ -185,10 +186,13 @@ def retry_failed_file(task_id: str, clean: bool = False, skip_ocr: bool = False)
if not failed_file.exists():
logger.error(f"Failed file {failed_file} not found")
return
working_copy = settings.SCRATCH_DIR / failed_file.name
copy_file_with_basic_stats(failed_file, working_copy)
consume_file(
ConsumableDocument(
source=DocumentSource.ConsumeFolder,
original_file=failed_file,
original_file=working_copy,
),
clean=clean,
# skip_ocr=skip_ocr,

View File

@ -204,8 +204,7 @@ class TestRetryConsumeTask(
TestCase,
):
@override_settings(CONSUMPTION_FAILED_DIR=Path(__file__).parent / "samples")
@mock.patch("documents.consumer.run_subprocess")
def test_retry_consume(self, m):
def test_retry_consume(self):
test_file = self.SAMPLE_DIR / "corrupted.pdf"
temp_copy = self.dirs.scratch_dir / test_file.name
shutil.copy(test_file, temp_copy)
@ -248,13 +247,7 @@ class TestRetryConsumeTask(
# Ensure the file is moved to the failed dir
self.assertIsFile(settings.CONSUMPTION_FAILED_DIR / task.task_file_name)
tasks.retry_failed_file(task_id=task.task_id)
m.assert_called_once()
args, _ = m.call_args
command = args[0]
self.assertEqual(command[0], "qpdf")
self.assertEqual(command[1], "--replace-input")
with mock.patch("documents.tasks.ProgressManager", DummyProgressManager):
with self.assertLogs("documents.tasks", level="INFO") as cm:
tasks.retry_failed_file(task_id=task.task_id, clean=True)
self.assertIn("PDF cleaned successfully", cm.output[0])