mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-09-01 01:46:16 +00:00
Changes the consumer to work on a temporary copy and provies that copy to the pre-consume script for modifications
This commit is contained in:
@@ -1,7 +1,10 @@
|
||||
import datetime
|
||||
import hashlib
|
||||
import os
|
||||
import shutil
|
||||
import tempfile
|
||||
import uuid
|
||||
from pathlib import Path
|
||||
from subprocess import CompletedProcess
|
||||
from subprocess import run
|
||||
from typing import Optional
|
||||
@@ -94,7 +97,8 @@ class Consumer(LoggingMixin):
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.path = None
|
||||
self.path: Optional[Path] = None
|
||||
self.original_path: Optional[Path] = None
|
||||
self.filename = None
|
||||
self.override_title = None
|
||||
self.override_correspondent_id = None
|
||||
@@ -167,16 +171,18 @@ class Consumer(LoggingMixin):
|
||||
|
||||
self.log("info", f"Executing pre-consume script {settings.PRE_CONSUME_SCRIPT}")
|
||||
|
||||
filepath_arg = os.path.normpath(self.path)
|
||||
working_file_path = str(self.path)
|
||||
original_file_path = str(self.original_path)
|
||||
|
||||
script_env = os.environ.copy()
|
||||
script_env["DOCUMENT_SOURCE_PATH"] = filepath_arg
|
||||
script_env["DOCUMENT_SOURCE_PATH"] = original_file_path
|
||||
script_env["DOCUMENT_WORKING_PATH"] = working_file_path
|
||||
|
||||
try:
|
||||
completed_proc = run(
|
||||
args=[
|
||||
settings.PRE_CONSUME_SCRIPT,
|
||||
filepath_arg,
|
||||
original_file_path,
|
||||
],
|
||||
env=script_env,
|
||||
capture_output=True,
|
||||
@@ -195,7 +201,7 @@ class Consumer(LoggingMixin):
|
||||
exception=e,
|
||||
)
|
||||
|
||||
def run_post_consume_script(self, document):
|
||||
def run_post_consume_script(self, document: Document):
|
||||
if not settings.POST_CONSUME_SCRIPT:
|
||||
return
|
||||
|
||||
@@ -285,8 +291,8 @@ class Consumer(LoggingMixin):
|
||||
Return the document object if it was successfully created.
|
||||
"""
|
||||
|
||||
self.path = path
|
||||
self.filename = override_filename or os.path.basename(path)
|
||||
self.path = Path(path).resolve()
|
||||
self.filename = override_filename or self.path.name
|
||||
self.override_title = override_title
|
||||
self.override_correspondent_id = override_correspondent_id
|
||||
self.override_document_type_id = override_document_type_id
|
||||
@@ -311,6 +317,15 @@ class Consumer(LoggingMixin):
|
||||
|
||||
self.log("info", f"Consuming {self.filename}")
|
||||
|
||||
# For the actual work, copy the file into a tempdir
|
||||
self.original_path = self.path
|
||||
tempdir = tempfile.TemporaryDirectory(
|
||||
prefix="paperless-ngx",
|
||||
dir=settings.SCRATCH_DIR,
|
||||
)
|
||||
self.path = Path(tempdir.name) / Path(self.filename)
|
||||
shutil.copy(self.original_path, self.path)
|
||||
|
||||
# Determine the parser class.
|
||||
|
||||
mime_type = magic.from_file(self.path, mime=True)
|
||||
@@ -453,11 +468,12 @@ class Consumer(LoggingMixin):
|
||||
# Delete the file only if it was successfully consumed
|
||||
self.log("debug", f"Deleting file {self.path}")
|
||||
os.unlink(self.path)
|
||||
self.original_path.unlink()
|
||||
|
||||
# https://github.com/jonaswinkler/paperless-ng/discussions/1037
|
||||
shadow_file = os.path.join(
|
||||
os.path.dirname(self.path),
|
||||
"._" + os.path.basename(self.path),
|
||||
os.path.dirname(self.original_path),
|
||||
"._" + os.path.basename(self.original_path),
|
||||
)
|
||||
|
||||
if os.path.isfile(shadow_file):
|
||||
@@ -474,6 +490,7 @@ class Consumer(LoggingMixin):
|
||||
)
|
||||
finally:
|
||||
document_parser.cleanup()
|
||||
tempdir.cleanup()
|
||||
|
||||
self.run_post_consume_script(document)
|
||||
|
||||
|
Reference in New Issue
Block a user