mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-04-02 13:45:10 -05:00
Changes the consumer to work on a temporary copy and provies that copy to the pre-consume script for modifications
This commit is contained in:
parent
9784ea4a60
commit
7dd9a4e089
@ -1,7 +1,10 @@
|
|||||||
import datetime
|
import datetime
|
||||||
import hashlib
|
import hashlib
|
||||||
import os
|
import os
|
||||||
|
import shutil
|
||||||
|
import tempfile
|
||||||
import uuid
|
import uuid
|
||||||
|
from pathlib import Path
|
||||||
from subprocess import CompletedProcess
|
from subprocess import CompletedProcess
|
||||||
from subprocess import run
|
from subprocess import run
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
@ -94,7 +97,8 @@ class Consumer(LoggingMixin):
|
|||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
self.path = None
|
self.path: Optional[Path] = None
|
||||||
|
self.original_path: Optional[Path] = None
|
||||||
self.filename = None
|
self.filename = None
|
||||||
self.override_title = None
|
self.override_title = None
|
||||||
self.override_correspondent_id = None
|
self.override_correspondent_id = None
|
||||||
@ -167,16 +171,18 @@ class Consumer(LoggingMixin):
|
|||||||
|
|
||||||
self.log("info", f"Executing pre-consume script {settings.PRE_CONSUME_SCRIPT}")
|
self.log("info", f"Executing pre-consume script {settings.PRE_CONSUME_SCRIPT}")
|
||||||
|
|
||||||
filepath_arg = os.path.normpath(self.path)
|
working_file_path = str(self.path)
|
||||||
|
original_file_path = str(self.original_path)
|
||||||
|
|
||||||
script_env = os.environ.copy()
|
script_env = os.environ.copy()
|
||||||
script_env["DOCUMENT_SOURCE_PATH"] = filepath_arg
|
script_env["DOCUMENT_SOURCE_PATH"] = original_file_path
|
||||||
|
script_env["DOCUMENT_WORKING_PATH"] = working_file_path
|
||||||
|
|
||||||
try:
|
try:
|
||||||
completed_proc = run(
|
completed_proc = run(
|
||||||
args=[
|
args=[
|
||||||
settings.PRE_CONSUME_SCRIPT,
|
settings.PRE_CONSUME_SCRIPT,
|
||||||
filepath_arg,
|
original_file_path,
|
||||||
],
|
],
|
||||||
env=script_env,
|
env=script_env,
|
||||||
capture_output=True,
|
capture_output=True,
|
||||||
@ -195,7 +201,7 @@ class Consumer(LoggingMixin):
|
|||||||
exception=e,
|
exception=e,
|
||||||
)
|
)
|
||||||
|
|
||||||
def run_post_consume_script(self, document):
|
def run_post_consume_script(self, document: Document):
|
||||||
if not settings.POST_CONSUME_SCRIPT:
|
if not settings.POST_CONSUME_SCRIPT:
|
||||||
return
|
return
|
||||||
|
|
||||||
@ -285,8 +291,8 @@ class Consumer(LoggingMixin):
|
|||||||
Return the document object if it was successfully created.
|
Return the document object if it was successfully created.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
self.path = path
|
self.path = Path(path).resolve()
|
||||||
self.filename = override_filename or os.path.basename(path)
|
self.filename = override_filename or self.path.name
|
||||||
self.override_title = override_title
|
self.override_title = override_title
|
||||||
self.override_correspondent_id = override_correspondent_id
|
self.override_correspondent_id = override_correspondent_id
|
||||||
self.override_document_type_id = override_document_type_id
|
self.override_document_type_id = override_document_type_id
|
||||||
@ -311,6 +317,15 @@ class Consumer(LoggingMixin):
|
|||||||
|
|
||||||
self.log("info", f"Consuming {self.filename}")
|
self.log("info", f"Consuming {self.filename}")
|
||||||
|
|
||||||
|
# For the actual work, copy the file into a tempdir
|
||||||
|
self.original_path = self.path
|
||||||
|
tempdir = tempfile.TemporaryDirectory(
|
||||||
|
prefix="paperless-ngx",
|
||||||
|
dir=settings.SCRATCH_DIR,
|
||||||
|
)
|
||||||
|
self.path = Path(tempdir.name) / Path(self.filename)
|
||||||
|
shutil.copy(self.original_path, self.path)
|
||||||
|
|
||||||
# Determine the parser class.
|
# Determine the parser class.
|
||||||
|
|
||||||
mime_type = magic.from_file(self.path, mime=True)
|
mime_type = magic.from_file(self.path, mime=True)
|
||||||
@ -453,11 +468,12 @@ class Consumer(LoggingMixin):
|
|||||||
# Delete the file only if it was successfully consumed
|
# Delete the file only if it was successfully consumed
|
||||||
self.log("debug", f"Deleting file {self.path}")
|
self.log("debug", f"Deleting file {self.path}")
|
||||||
os.unlink(self.path)
|
os.unlink(self.path)
|
||||||
|
self.original_path.unlink()
|
||||||
|
|
||||||
# https://github.com/jonaswinkler/paperless-ng/discussions/1037
|
# https://github.com/jonaswinkler/paperless-ng/discussions/1037
|
||||||
shadow_file = os.path.join(
|
shadow_file = os.path.join(
|
||||||
os.path.dirname(self.path),
|
os.path.dirname(self.original_path),
|
||||||
"._" + os.path.basename(self.path),
|
"._" + os.path.basename(self.original_path),
|
||||||
)
|
)
|
||||||
|
|
||||||
if os.path.isfile(shadow_file):
|
if os.path.isfile(shadow_file):
|
||||||
@ -474,6 +490,7 @@ class Consumer(LoggingMixin):
|
|||||||
)
|
)
|
||||||
finally:
|
finally:
|
||||||
document_parser.cleanup()
|
document_parser.cleanup()
|
||||||
|
tempdir.cleanup()
|
||||||
|
|
||||||
self.run_post_consume_script(document)
|
self.run_post_consume_script(document)
|
||||||
|
|
||||||
|
@ -833,7 +833,8 @@ class PreConsumeTestCase(TestCase):
|
|||||||
with tempfile.NamedTemporaryFile() as script:
|
with tempfile.NamedTemporaryFile() as script:
|
||||||
with override_settings(PRE_CONSUME_SCRIPT=script.name):
|
with override_settings(PRE_CONSUME_SCRIPT=script.name):
|
||||||
c = Consumer()
|
c = Consumer()
|
||||||
c.path = "path-to-file"
|
c.original_path = "path-to-file"
|
||||||
|
c.path = "/tmp/somewhere/path-to-file"
|
||||||
c.run_pre_consume_script()
|
c.run_pre_consume_script()
|
||||||
|
|
||||||
m.assert_called_once()
|
m.assert_called_once()
|
||||||
@ -841,10 +842,19 @@ class PreConsumeTestCase(TestCase):
|
|||||||
args, kwargs = m.call_args
|
args, kwargs = m.call_args
|
||||||
|
|
||||||
command = kwargs["args"]
|
command = kwargs["args"]
|
||||||
|
environment = kwargs["env"]
|
||||||
|
|
||||||
self.assertEqual(command[0], script.name)
|
self.assertEqual(command[0], script.name)
|
||||||
self.assertEqual(command[1], "path-to-file")
|
self.assertEqual(command[1], "path-to-file")
|
||||||
|
|
||||||
|
self.assertDictContainsSubset(
|
||||||
|
{
|
||||||
|
"DOCUMENT_SOURCE_PATH": c.original_path,
|
||||||
|
"DOCUMENT_WORKING_PATH": c.path,
|
||||||
|
},
|
||||||
|
environment,
|
||||||
|
)
|
||||||
|
|
||||||
@mock.patch("documents.consumer.Consumer.log")
|
@mock.patch("documents.consumer.Consumer.log")
|
||||||
def test_script_with_output(self, mocked_log):
|
def test_script_with_output(self, mocked_log):
|
||||||
"""
|
"""
|
||||||
@ -961,9 +971,10 @@ class PostConsumeTestCase(TestCase):
|
|||||||
|
|
||||||
m.assert_called_once()
|
m.assert_called_once()
|
||||||
|
|
||||||
args, kwargs = m.call_args
|
_, kwargs = m.call_args
|
||||||
|
|
||||||
command = kwargs["args"]
|
command = kwargs["args"]
|
||||||
|
environment = kwargs["env"]
|
||||||
|
|
||||||
self.assertEqual(command[0], script.name)
|
self.assertEqual(command[0], script.name)
|
||||||
self.assertEqual(command[1], str(doc.pk))
|
self.assertEqual(command[1], str(doc.pk))
|
||||||
@ -972,6 +983,17 @@ class PostConsumeTestCase(TestCase):
|
|||||||
self.assertEqual(command[7], "my_bank")
|
self.assertEqual(command[7], "my_bank")
|
||||||
self.assertCountEqual(command[8].split(","), ["a", "b"])
|
self.assertCountEqual(command[8].split(","), ["a", "b"])
|
||||||
|
|
||||||
|
self.assertDictContainsSubset(
|
||||||
|
{
|
||||||
|
"DOCUMENT_ID": str(doc.pk),
|
||||||
|
"DOCUMENT_DOWNLOAD_URL": f"/api/documents/{doc.pk}/download/",
|
||||||
|
"DOCUMENT_THUMBNAIL_URL": f"/api/documents/{doc.pk}/thumb/",
|
||||||
|
"DOCUMENT_CORRESPONDENT": "my_bank",
|
||||||
|
"DOCUMENT_TAGS": "a,b",
|
||||||
|
},
|
||||||
|
environment,
|
||||||
|
)
|
||||||
|
|
||||||
def test_script_exit_non_zero(self):
|
def test_script_exit_non_zero(self):
|
||||||
"""
|
"""
|
||||||
GIVEN:
|
GIVEN:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user