mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-04-02 13:45:10 -05:00
Move it out of consumer
This commit is contained in:
parent
af1c64e969
commit
0fcd69b739
@ -808,50 +808,3 @@ class ConsumerPlugin(
|
||||
copy_basic_file_stats(source, target)
|
||||
except Exception: # pragma: no cover
|
||||
pass
|
||||
|
||||
|
||||
class CleanPDFPlugin(
|
||||
NoCleanupPluginMixin,
|
||||
NoSetupPluginMixin,
|
||||
AlwaysRunPluginMixin,
|
||||
LoggingMixin,
|
||||
ConsumeTaskPlugin,
|
||||
):
|
||||
NAME: str = "CleanPDFPlugin"
|
||||
logging_name = "paperless.consumer"
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
input_doc: ConsumableDocument,
|
||||
metadata: DocumentMetadataOverrides,
|
||||
status_mgr: ProgressManager,
|
||||
base_tmp_dir: Path,
|
||||
task_id: str,
|
||||
) -> None:
|
||||
super().__init__(input_doc, metadata, status_mgr, base_tmp_dir, task_id)
|
||||
|
||||
self.renew_logging_group()
|
||||
|
||||
def run(self) -> str | None:
|
||||
"""
|
||||
Tries to clean a PDF file with qpdf
|
||||
"""
|
||||
msg = None
|
||||
try:
|
||||
result = run_subprocess(
|
||||
[
|
||||
"qpdf",
|
||||
"--replace-input",
|
||||
self.input_doc.original_file,
|
||||
],
|
||||
logger=self.log,
|
||||
)
|
||||
msg = (
|
||||
f"Error while cleaning PDF: {result.stderr}"
|
||||
if result.returncode != 0
|
||||
else "PDF cleaned successfully"
|
||||
)
|
||||
except Exception as e:
|
||||
msg = "Error while cleaning PDF"
|
||||
self.log.error(e)
|
||||
return msg
|
||||
|
@ -24,7 +24,6 @@ from documents.barcodes import BarcodePlugin
|
||||
from documents.caching import clear_document_caches
|
||||
from documents.classifier import DocumentClassifier
|
||||
from documents.classifier import load_classifier
|
||||
from documents.consumer import CleanPDFPlugin
|
||||
from documents.consumer import ConsumerPlugin
|
||||
from documents.consumer import WorkflowTriggerPlugin
|
||||
from documents.data_models import ConsumableDocument
|
||||
@ -49,6 +48,7 @@ from documents.sanity_checker import SanityCheckFailedException
|
||||
from documents.signals import document_updated
|
||||
from documents.signals.handlers import cleanup_document_deletion
|
||||
from documents.utils import copy_file_with_basic_stats
|
||||
from documents.utils import run_subprocess
|
||||
|
||||
if settings.AUDIT_LOG_ENABLED:
|
||||
from auditlog.models import LogEntry
|
||||
@ -111,7 +111,6 @@ def consume_file(
|
||||
self: Task,
|
||||
input_doc: ConsumableDocument,
|
||||
overrides: DocumentMetadataOverrides | None = None,
|
||||
clean: bool = False,
|
||||
):
|
||||
# Default no overrides
|
||||
if overrides is None:
|
||||
@ -124,9 +123,6 @@ def consume_file(
|
||||
ConsumerPlugin,
|
||||
]
|
||||
|
||||
if clean:
|
||||
plugins.insert(0, CleanPDFPlugin)
|
||||
|
||||
with (
|
||||
ProgressManager(
|
||||
overrides.filename or input_doc.original_file.name,
|
||||
@ -189,13 +185,32 @@ def retry_failed_file(task_id: str, clean: bool = False, skip_ocr: bool = False)
|
||||
working_copy = settings.SCRATCH_DIR / failed_file.name
|
||||
copy_file_with_basic_stats(failed_file, working_copy)
|
||||
|
||||
if clean:
|
||||
try:
|
||||
result = run_subprocess(
|
||||
[
|
||||
"qpdf",
|
||||
"--replace-input",
|
||||
"--warning-exit-0",
|
||||
working_copy,
|
||||
],
|
||||
logger=logger,
|
||||
)
|
||||
if result.returncode != 0:
|
||||
raise Exception(
|
||||
f"qpdf failed with exit code {result.returncode}, error: {result.stderr}",
|
||||
)
|
||||
else:
|
||||
logger.debug("PDF cleaned successfully")
|
||||
except Exception as e:
|
||||
logger.error(f"Error while cleaning PDF: {e}")
|
||||
return
|
||||
|
||||
consume_file(
|
||||
ConsumableDocument(
|
||||
source=DocumentSource.ConsumeFolder,
|
||||
original_file=working_copy,
|
||||
),
|
||||
clean=clean,
|
||||
# skip_ocr=skip_ocr,
|
||||
)
|
||||
|
||||
|
||||
|
Binary file not shown.
@ -248,6 +248,6 @@ class TestRetryConsumeTask(
|
||||
self.assertIsFile(settings.CONSUMPTION_FAILED_DIR / task.task_file_name)
|
||||
|
||||
with mock.patch("documents.tasks.ProgressManager", DummyProgressManager):
|
||||
with self.assertLogs("documents.tasks", level="INFO") as cm:
|
||||
with self.assertLogs() as cm:
|
||||
tasks.retry_failed_file(task_id=task.task_id, clean=True)
|
||||
self.assertIn("PDF cleaned successfully", cm.output[0])
|
||||
self.assertIn("New document id 1 created", cm.output[-1])
|
||||
|
Loading…
x
Reference in New Issue
Block a user