mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-09-14 21:45:37 -05:00
Format Python code with black
This commit is contained in:
@@ -15,15 +15,11 @@ from filelock import FileLock
|
||||
from rest_framework.reverse import reverse
|
||||
|
||||
from .classifier import load_classifier
|
||||
from .file_handling import create_source_path_directory, \
|
||||
generate_unique_filename
|
||||
from .file_handling import create_source_path_directory, generate_unique_filename
|
||||
from .loggers import LoggingMixin
|
||||
from .models import Document, FileInfo, Correspondent, DocumentType, Tag
|
||||
from .parsers import ParseError, get_parser_class_for_mime_type, parse_date
|
||||
from .signals import (
|
||||
document_consumption_finished,
|
||||
document_consumption_started
|
||||
)
|
||||
from .signals import document_consumption_finished, document_consumption_started
|
||||
|
||||
|
||||
class ConsumerError(Exception):
|
||||
@@ -49,23 +45,26 @@ class Consumer(LoggingMixin):
|
||||
|
||||
logging_name = "paperless.consumer"
|
||||
|
||||
def _send_progress(self, current_progress, max_progress, status,
|
||||
message=None, document_id=None):
|
||||
def _send_progress(
|
||||
self, current_progress, max_progress, status, message=None, document_id=None
|
||||
):
|
||||
payload = {
|
||||
'filename': os.path.basename(self.filename) if self.filename else None, # NOQA: E501
|
||||
'task_id': self.task_id,
|
||||
'current_progress': current_progress,
|
||||
'max_progress': max_progress,
|
||||
'status': status,
|
||||
'message': message,
|
||||
'document_id': document_id
|
||||
"filename": os.path.basename(self.filename)
|
||||
if self.filename
|
||||
else None, # NOQA: E501
|
||||
"task_id": self.task_id,
|
||||
"current_progress": current_progress,
|
||||
"max_progress": max_progress,
|
||||
"status": status,
|
||||
"message": message,
|
||||
"document_id": document_id,
|
||||
}
|
||||
async_to_sync(self.channel_layer.group_send)("status_updates",
|
||||
{'type': 'status_update',
|
||||
'data': payload})
|
||||
async_to_sync(self.channel_layer.group_send)(
|
||||
"status_updates", {"type": "status_update", "data": payload}
|
||||
)
|
||||
|
||||
def _fail(self, message, log_message=None, exc_info=None):
|
||||
self._send_progress(100, 100, 'FAILED', message)
|
||||
self._send_progress(100, 100, "FAILED", message)
|
||||
self.log("error", log_message or message, exc_info=exc_info)
|
||||
raise ConsumerError(f"{self.filename}: {log_message or message}")
|
||||
|
||||
@@ -84,19 +83,20 @@ class Consumer(LoggingMixin):
|
||||
def pre_check_file_exists(self):
|
||||
if not os.path.isfile(self.path):
|
||||
self._fail(
|
||||
MESSAGE_FILE_NOT_FOUND,
|
||||
f"Cannot consume {self.path}: File not found."
|
||||
MESSAGE_FILE_NOT_FOUND, f"Cannot consume {self.path}: File not found."
|
||||
)
|
||||
|
||||
def pre_check_duplicate(self):
|
||||
with open(self.path, "rb") as f:
|
||||
checksum = hashlib.md5(f.read()).hexdigest()
|
||||
if Document.objects.filter(Q(checksum=checksum) | Q(archive_checksum=checksum)).exists(): # NOQA: E501
|
||||
if Document.objects.filter(
|
||||
Q(checksum=checksum) | Q(archive_checksum=checksum)
|
||||
).exists(): # NOQA: E501
|
||||
if settings.CONSUMER_DELETE_DUPLICATES:
|
||||
os.unlink(self.path)
|
||||
self._fail(
|
||||
MESSAGE_DOCUMENT_ALREADY_EXISTS,
|
||||
f"Not consuming {self.filename}: It is a duplicate."
|
||||
f"Not consuming {self.filename}: It is a duplicate.",
|
||||
)
|
||||
|
||||
def pre_check_directories(self):
|
||||
@@ -113,10 +113,10 @@ class Consumer(LoggingMixin):
|
||||
self._fail(
|
||||
MESSAGE_PRE_CONSUME_SCRIPT_NOT_FOUND,
|
||||
f"Configured pre-consume script "
|
||||
f"{settings.PRE_CONSUME_SCRIPT} does not exist.")
|
||||
f"{settings.PRE_CONSUME_SCRIPT} does not exist.",
|
||||
)
|
||||
|
||||
self.log("info",
|
||||
f"Executing pre-consume script {settings.PRE_CONSUME_SCRIPT}")
|
||||
self.log("info", f"Executing pre-consume script {settings.PRE_CONSUME_SCRIPT}")
|
||||
|
||||
try:
|
||||
Popen((settings.PRE_CONSUME_SCRIPT, self.path)).wait()
|
||||
@@ -124,7 +124,7 @@ class Consumer(LoggingMixin):
|
||||
self._fail(
|
||||
MESSAGE_PRE_CONSUME_SCRIPT_ERROR,
|
||||
f"Error while executing pre-consume script: {e}",
|
||||
exc_info=True
|
||||
exc_info=True,
|
||||
)
|
||||
|
||||
def run_post_consume_script(self, document):
|
||||
@@ -135,42 +135,44 @@ class Consumer(LoggingMixin):
|
||||
self._fail(
|
||||
MESSAGE_POST_CONSUME_SCRIPT_NOT_FOUND,
|
||||
f"Configured post-consume script "
|
||||
f"{settings.POST_CONSUME_SCRIPT} does not exist."
|
||||
f"{settings.POST_CONSUME_SCRIPT} does not exist.",
|
||||
)
|
||||
|
||||
self.log(
|
||||
"info",
|
||||
f"Executing post-consume script {settings.POST_CONSUME_SCRIPT}"
|
||||
"info", f"Executing post-consume script {settings.POST_CONSUME_SCRIPT}"
|
||||
)
|
||||
|
||||
try:
|
||||
Popen((
|
||||
settings.POST_CONSUME_SCRIPT,
|
||||
str(document.pk),
|
||||
document.get_public_filename(),
|
||||
os.path.normpath(document.source_path),
|
||||
os.path.normpath(document.thumbnail_path),
|
||||
reverse("document-download", kwargs={"pk": document.pk}),
|
||||
reverse("document-thumb", kwargs={"pk": document.pk}),
|
||||
str(document.correspondent),
|
||||
str(",".join(document.tags.all().values_list(
|
||||
"name", flat=True)))
|
||||
)).wait()
|
||||
Popen(
|
||||
(
|
||||
settings.POST_CONSUME_SCRIPT,
|
||||
str(document.pk),
|
||||
document.get_public_filename(),
|
||||
os.path.normpath(document.source_path),
|
||||
os.path.normpath(document.thumbnail_path),
|
||||
reverse("document-download", kwargs={"pk": document.pk}),
|
||||
reverse("document-thumb", kwargs={"pk": document.pk}),
|
||||
str(document.correspondent),
|
||||
str(",".join(document.tags.all().values_list("name", flat=True))),
|
||||
)
|
||||
).wait()
|
||||
except Exception as e:
|
||||
self._fail(
|
||||
MESSAGE_POST_CONSUME_SCRIPT_ERROR,
|
||||
f"Error while executing post-consume script: {e}",
|
||||
exc_info=True
|
||||
exc_info=True,
|
||||
)
|
||||
|
||||
def try_consume_file(self,
|
||||
path,
|
||||
override_filename=None,
|
||||
override_title=None,
|
||||
override_correspondent_id=None,
|
||||
override_document_type_id=None,
|
||||
override_tag_ids=None,
|
||||
task_id=None):
|
||||
def try_consume_file(
|
||||
self,
|
||||
path,
|
||||
override_filename=None,
|
||||
override_title=None,
|
||||
override_correspondent_id=None,
|
||||
override_document_type_id=None,
|
||||
override_tag_ids=None,
|
||||
task_id=None,
|
||||
):
|
||||
"""
|
||||
Return the document object if it was successfully created.
|
||||
"""
|
||||
@@ -183,7 +185,7 @@ class Consumer(LoggingMixin):
|
||||
self.override_tag_ids = override_tag_ids
|
||||
self.task_id = task_id or str(uuid.uuid4())
|
||||
|
||||
self._send_progress(0, 100, 'STARTING', MESSAGE_NEW_FILE)
|
||||
self._send_progress(0, 100, "STARTING", MESSAGE_NEW_FILE)
|
||||
|
||||
# this is for grouping logging entries for this particular file
|
||||
# together.
|
||||
@@ -206,17 +208,12 @@ class Consumer(LoggingMixin):
|
||||
|
||||
parser_class = get_parser_class_for_mime_type(mime_type)
|
||||
if not parser_class:
|
||||
self._fail(
|
||||
MESSAGE_UNSUPPORTED_TYPE,
|
||||
f"Unsupported mime type {mime_type}"
|
||||
)
|
||||
self._fail(MESSAGE_UNSUPPORTED_TYPE, f"Unsupported mime type {mime_type}")
|
||||
|
||||
# Notify all listeners that we're going to do some work.
|
||||
|
||||
document_consumption_started.send(
|
||||
sender=self.__class__,
|
||||
filename=self.path,
|
||||
logging_group=self.logging_group
|
||||
sender=self.__class__, filename=self.path, logging_group=self.logging_group
|
||||
)
|
||||
|
||||
self.run_pre_consume_script()
|
||||
@@ -243,21 +240,20 @@ class Consumer(LoggingMixin):
|
||||
archive_path = None
|
||||
|
||||
try:
|
||||
self._send_progress(20, 100, 'WORKING', MESSAGE_PARSING_DOCUMENT)
|
||||
self._send_progress(20, 100, "WORKING", MESSAGE_PARSING_DOCUMENT)
|
||||
self.log("debug", "Parsing {}...".format(self.filename))
|
||||
document_parser.parse(self.path, mime_type, self.filename)
|
||||
|
||||
self.log("debug", f"Generating thumbnail for {self.filename}...")
|
||||
self._send_progress(70, 100, 'WORKING',
|
||||
MESSAGE_GENERATING_THUMBNAIL)
|
||||
self._send_progress(70, 100, "WORKING", MESSAGE_GENERATING_THUMBNAIL)
|
||||
thumbnail = document_parser.get_optimised_thumbnail(
|
||||
self.path, mime_type, self.filename)
|
||||
self.path, mime_type, self.filename
|
||||
)
|
||||
|
||||
text = document_parser.get_text()
|
||||
date = document_parser.get_date()
|
||||
if not date:
|
||||
self._send_progress(90, 100, 'WORKING',
|
||||
MESSAGE_PARSE_DATE)
|
||||
self._send_progress(90, 100, "WORKING", MESSAGE_PARSE_DATE)
|
||||
date = parse_date(self.filename, text)
|
||||
archive_path = document_parser.get_archive_path()
|
||||
|
||||
@@ -266,7 +262,7 @@ class Consumer(LoggingMixin):
|
||||
self._fail(
|
||||
str(e),
|
||||
f"Error while consuming document {self.filename}: {e}",
|
||||
exc_info=True
|
||||
exc_info=True,
|
||||
)
|
||||
|
||||
# Prepare the document classifier.
|
||||
@@ -277,18 +273,14 @@ class Consumer(LoggingMixin):
|
||||
|
||||
classifier = load_classifier()
|
||||
|
||||
self._send_progress(95, 100, 'WORKING', MESSAGE_SAVE_DOCUMENT)
|
||||
self._send_progress(95, 100, "WORKING", MESSAGE_SAVE_DOCUMENT)
|
||||
# now that everything is done, we can start to store the document
|
||||
# in the system. This will be a transaction and reasonably fast.
|
||||
try:
|
||||
with transaction.atomic():
|
||||
|
||||
# store the document.
|
||||
document = self._store(
|
||||
text=text,
|
||||
date=date,
|
||||
mime_type=mime_type
|
||||
)
|
||||
document = self._store(text=text, date=date, mime_type=mime_type)
|
||||
|
||||
# If we get here, it was successful. Proceed with post-consume
|
||||
# hooks. If they fail, nothing will get changed.
|
||||
@@ -297,7 +289,7 @@ class Consumer(LoggingMixin):
|
||||
sender=self.__class__,
|
||||
document=document,
|
||||
logging_group=self.logging_group,
|
||||
classifier=classifier
|
||||
classifier=classifier,
|
||||
)
|
||||
|
||||
# After everything is in the database, copy the files into
|
||||
@@ -306,24 +298,25 @@ class Consumer(LoggingMixin):
|
||||
document.filename = generate_unique_filename(document)
|
||||
create_source_path_directory(document.source_path)
|
||||
|
||||
self._write(document.storage_type,
|
||||
self.path, document.source_path)
|
||||
self._write(document.storage_type, self.path, document.source_path)
|
||||
|
||||
self._write(document.storage_type,
|
||||
thumbnail, document.thumbnail_path)
|
||||
self._write(
|
||||
document.storage_type, thumbnail, document.thumbnail_path
|
||||
)
|
||||
|
||||
if archive_path and os.path.isfile(archive_path):
|
||||
document.archive_filename = generate_unique_filename(
|
||||
document,
|
||||
archive_filename=True
|
||||
document, archive_filename=True
|
||||
)
|
||||
create_source_path_directory(document.archive_path)
|
||||
self._write(document.storage_type,
|
||||
archive_path, document.archive_path)
|
||||
self._write(
|
||||
document.storage_type, archive_path, document.archive_path
|
||||
)
|
||||
|
||||
with open(archive_path, 'rb') as f:
|
||||
with open(archive_path, "rb") as f:
|
||||
document.archive_checksum = hashlib.md5(
|
||||
f.read()).hexdigest()
|
||||
f.read()
|
||||
).hexdigest()
|
||||
|
||||
# Don't save with the lock active. Saving will cause the file
|
||||
# renaming logic to aquire the lock as well.
|
||||
@@ -335,8 +328,8 @@ class Consumer(LoggingMixin):
|
||||
|
||||
# https://github.com/jonaswinkler/paperless-ng/discussions/1037
|
||||
shadow_file = os.path.join(
|
||||
os.path.dirname(self.path),
|
||||
"._" + os.path.basename(self.path))
|
||||
os.path.dirname(self.path), "._" + os.path.basename(self.path)
|
||||
)
|
||||
|
||||
if os.path.isfile(shadow_file):
|
||||
self.log("debug", "Deleting file {}".format(shadow_file))
|
||||
@@ -345,21 +338,17 @@ class Consumer(LoggingMixin):
|
||||
except Exception as e:
|
||||
self._fail(
|
||||
str(e),
|
||||
f"The following error occured while consuming "
|
||||
f"{self.filename}: {e}",
|
||||
exc_info=True
|
||||
f"The following error occured while consuming " f"{self.filename}: {e}",
|
||||
exc_info=True,
|
||||
)
|
||||
finally:
|
||||
document_parser.cleanup()
|
||||
|
||||
self.run_post_consume_script(document)
|
||||
|
||||
self.log(
|
||||
"info",
|
||||
"Document {} consumption finished".format(document)
|
||||
)
|
||||
self.log("info", "Document {} consumption finished".format(document))
|
||||
|
||||
self._send_progress(100, 100, 'SUCCESS', MESSAGE_FINISHED, document.id)
|
||||
self._send_progress(100, 100, "SUCCESS", MESSAGE_FINISHED, document.id)
|
||||
|
||||
return document
|
||||
|
||||
@@ -373,8 +362,11 @@ class Consumer(LoggingMixin):
|
||||
|
||||
self.log("debug", "Saving record to database")
|
||||
|
||||
created = file_info.created or date or timezone.make_aware(
|
||||
datetime.datetime.fromtimestamp(stats.st_mtime))
|
||||
created = (
|
||||
file_info.created
|
||||
or date
|
||||
or timezone.make_aware(datetime.datetime.fromtimestamp(stats.st_mtime))
|
||||
)
|
||||
|
||||
storage_type = Document.STORAGE_TYPE_UNENCRYPTED
|
||||
|
||||
@@ -386,7 +378,7 @@ class Consumer(LoggingMixin):
|
||||
checksum=hashlib.md5(f.read()).hexdigest(),
|
||||
created=created,
|
||||
modified=created,
|
||||
storage_type=storage_type
|
||||
storage_type=storage_type,
|
||||
)
|
||||
|
||||
self.apply_overrides(document)
|
||||
@@ -398,11 +390,13 @@ class Consumer(LoggingMixin):
|
||||
def apply_overrides(self, document):
|
||||
if self.override_correspondent_id:
|
||||
document.correspondent = Correspondent.objects.get(
|
||||
pk=self.override_correspondent_id)
|
||||
pk=self.override_correspondent_id
|
||||
)
|
||||
|
||||
if self.override_document_type_id:
|
||||
document.document_type = DocumentType.objects.get(
|
||||
pk=self.override_document_type_id)
|
||||
pk=self.override_document_type_id
|
||||
)
|
||||
|
||||
if self.override_tag_ids:
|
||||
for tag_id in self.override_tag_ids:
|
||||
|
Reference in New Issue
Block a user