mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-04-02 13:45:10 -05:00
Move it out of consumer
This commit is contained in:
parent
af1c64e969
commit
0fcd69b739
@ -808,50 +808,3 @@ class ConsumerPlugin(
|
|||||||
copy_basic_file_stats(source, target)
|
copy_basic_file_stats(source, target)
|
||||||
except Exception: # pragma: no cover
|
except Exception: # pragma: no cover
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
class CleanPDFPlugin(
|
|
||||||
NoCleanupPluginMixin,
|
|
||||||
NoSetupPluginMixin,
|
|
||||||
AlwaysRunPluginMixin,
|
|
||||||
LoggingMixin,
|
|
||||||
ConsumeTaskPlugin,
|
|
||||||
):
|
|
||||||
NAME: str = "CleanPDFPlugin"
|
|
||||||
logging_name = "paperless.consumer"
|
|
||||||
|
|
||||||
def __init__(
|
|
||||||
self,
|
|
||||||
input_doc: ConsumableDocument,
|
|
||||||
metadata: DocumentMetadataOverrides,
|
|
||||||
status_mgr: ProgressManager,
|
|
||||||
base_tmp_dir: Path,
|
|
||||||
task_id: str,
|
|
||||||
) -> None:
|
|
||||||
super().__init__(input_doc, metadata, status_mgr, base_tmp_dir, task_id)
|
|
||||||
|
|
||||||
self.renew_logging_group()
|
|
||||||
|
|
||||||
def run(self) -> str | None:
|
|
||||||
"""
|
|
||||||
Tries to clean a PDF file with qpdf
|
|
||||||
"""
|
|
||||||
msg = None
|
|
||||||
try:
|
|
||||||
result = run_subprocess(
|
|
||||||
[
|
|
||||||
"qpdf",
|
|
||||||
"--replace-input",
|
|
||||||
self.input_doc.original_file,
|
|
||||||
],
|
|
||||||
logger=self.log,
|
|
||||||
)
|
|
||||||
msg = (
|
|
||||||
f"Error while cleaning PDF: {result.stderr}"
|
|
||||||
if result.returncode != 0
|
|
||||||
else "PDF cleaned successfully"
|
|
||||||
)
|
|
||||||
except Exception as e:
|
|
||||||
msg = "Error while cleaning PDF"
|
|
||||||
self.log.error(e)
|
|
||||||
return msg
|
|
||||||
|
@ -24,7 +24,6 @@ from documents.barcodes import BarcodePlugin
|
|||||||
from documents.caching import clear_document_caches
|
from documents.caching import clear_document_caches
|
||||||
from documents.classifier import DocumentClassifier
|
from documents.classifier import DocumentClassifier
|
||||||
from documents.classifier import load_classifier
|
from documents.classifier import load_classifier
|
||||||
from documents.consumer import CleanPDFPlugin
|
|
||||||
from documents.consumer import ConsumerPlugin
|
from documents.consumer import ConsumerPlugin
|
||||||
from documents.consumer import WorkflowTriggerPlugin
|
from documents.consumer import WorkflowTriggerPlugin
|
||||||
from documents.data_models import ConsumableDocument
|
from documents.data_models import ConsumableDocument
|
||||||
@ -49,6 +48,7 @@ from documents.sanity_checker import SanityCheckFailedException
|
|||||||
from documents.signals import document_updated
|
from documents.signals import document_updated
|
||||||
from documents.signals.handlers import cleanup_document_deletion
|
from documents.signals.handlers import cleanup_document_deletion
|
||||||
from documents.utils import copy_file_with_basic_stats
|
from documents.utils import copy_file_with_basic_stats
|
||||||
|
from documents.utils import run_subprocess
|
||||||
|
|
||||||
if settings.AUDIT_LOG_ENABLED:
|
if settings.AUDIT_LOG_ENABLED:
|
||||||
from auditlog.models import LogEntry
|
from auditlog.models import LogEntry
|
||||||
@ -111,7 +111,6 @@ def consume_file(
|
|||||||
self: Task,
|
self: Task,
|
||||||
input_doc: ConsumableDocument,
|
input_doc: ConsumableDocument,
|
||||||
overrides: DocumentMetadataOverrides | None = None,
|
overrides: DocumentMetadataOverrides | None = None,
|
||||||
clean: bool = False,
|
|
||||||
):
|
):
|
||||||
# Default no overrides
|
# Default no overrides
|
||||||
if overrides is None:
|
if overrides is None:
|
||||||
@ -124,9 +123,6 @@ def consume_file(
|
|||||||
ConsumerPlugin,
|
ConsumerPlugin,
|
||||||
]
|
]
|
||||||
|
|
||||||
if clean:
|
|
||||||
plugins.insert(0, CleanPDFPlugin)
|
|
||||||
|
|
||||||
with (
|
with (
|
||||||
ProgressManager(
|
ProgressManager(
|
||||||
overrides.filename or input_doc.original_file.name,
|
overrides.filename or input_doc.original_file.name,
|
||||||
@ -189,13 +185,32 @@ def retry_failed_file(task_id: str, clean: bool = False, skip_ocr: bool = False)
|
|||||||
working_copy = settings.SCRATCH_DIR / failed_file.name
|
working_copy = settings.SCRATCH_DIR / failed_file.name
|
||||||
copy_file_with_basic_stats(failed_file, working_copy)
|
copy_file_with_basic_stats(failed_file, working_copy)
|
||||||
|
|
||||||
|
if clean:
|
||||||
|
try:
|
||||||
|
result = run_subprocess(
|
||||||
|
[
|
||||||
|
"qpdf",
|
||||||
|
"--replace-input",
|
||||||
|
"--warning-exit-0",
|
||||||
|
working_copy,
|
||||||
|
],
|
||||||
|
logger=logger,
|
||||||
|
)
|
||||||
|
if result.returncode != 0:
|
||||||
|
raise Exception(
|
||||||
|
f"qpdf failed with exit code {result.returncode}, error: {result.stderr}",
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
logger.debug("PDF cleaned successfully")
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error while cleaning PDF: {e}")
|
||||||
|
return
|
||||||
|
|
||||||
consume_file(
|
consume_file(
|
||||||
ConsumableDocument(
|
ConsumableDocument(
|
||||||
source=DocumentSource.ConsumeFolder,
|
source=DocumentSource.ConsumeFolder,
|
||||||
original_file=working_copy,
|
original_file=working_copy,
|
||||||
),
|
),
|
||||||
clean=clean,
|
|
||||||
# skip_ocr=skip_ocr,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
Binary file not shown.
@ -248,6 +248,6 @@ class TestRetryConsumeTask(
|
|||||||
self.assertIsFile(settings.CONSUMPTION_FAILED_DIR / task.task_file_name)
|
self.assertIsFile(settings.CONSUMPTION_FAILED_DIR / task.task_file_name)
|
||||||
|
|
||||||
with mock.patch("documents.tasks.ProgressManager", DummyProgressManager):
|
with mock.patch("documents.tasks.ProgressManager", DummyProgressManager):
|
||||||
with self.assertLogs("documents.tasks", level="INFO") as cm:
|
with self.assertLogs() as cm:
|
||||||
tasks.retry_failed_file(task_id=task.task_id, clean=True)
|
tasks.retry_failed_file(task_id=task.task_id, clean=True)
|
||||||
self.assertIn("PDF cleaned successfully", cm.output[0])
|
self.assertIn("New document id 1 created", cm.output[-1])
|
||||||
|
Loading…
x
Reference in New Issue
Block a user