mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-08-12 00:19:48 +00:00
Feature: PDF actions - merge, split & rotate (#6094)
This commit is contained in:
@@ -1,15 +1,27 @@
|
||||
import hashlib
|
||||
import itertools
|
||||
import logging
|
||||
import os
|
||||
from typing import Optional
|
||||
|
||||
from celery import chord
|
||||
from django.conf import settings
|
||||
from django.db.models import Q
|
||||
|
||||
from documents.data_models import ConsumableDocument
|
||||
from documents.data_models import DocumentMetadataOverrides
|
||||
from documents.data_models import DocumentSource
|
||||
from documents.models import Correspondent
|
||||
from documents.models import Document
|
||||
from documents.models import DocumentType
|
||||
from documents.models import StoragePath
|
||||
from documents.permissions import set_permissions_for_object
|
||||
from documents.tasks import bulk_update_documents
|
||||
from documents.tasks import consume_file
|
||||
from documents.tasks import update_document_archive_file
|
||||
|
||||
logger = logging.getLogger("paperless.bulk_edit")
|
||||
|
||||
|
||||
def set_correspondent(doc_ids, correspondent):
|
||||
if correspondent:
|
||||
@@ -146,3 +158,137 @@ def set_permissions(doc_ids, set_permissions, owner=None, merge=False):
|
||||
bulk_update_documents.delay(document_ids=affected_docs)
|
||||
|
||||
return "OK"
|
||||
|
||||
|
||||
def rotate(doc_ids: list[int], degrees: int):
|
||||
logger.info(
|
||||
f"Attempting to rotate {len(doc_ids)} documents by {degrees} degrees.",
|
||||
)
|
||||
qs = Document.objects.filter(id__in=doc_ids)
|
||||
affected_docs = []
|
||||
import pikepdf
|
||||
|
||||
rotate_tasks = []
|
||||
for doc in qs:
|
||||
if doc.mime_type != "application/pdf":
|
||||
logger.warning(
|
||||
f"Document {doc.id} is not a PDF, skipping rotation.",
|
||||
)
|
||||
continue
|
||||
try:
|
||||
with pikepdf.open(doc.source_path, allow_overwriting_input=True) as pdf:
|
||||
for page in pdf.pages:
|
||||
page.rotate(degrees, relative=True)
|
||||
pdf.save()
|
||||
doc.checksum = hashlib.md5(doc.source_path.read_bytes()).hexdigest()
|
||||
doc.save()
|
||||
rotate_tasks.append(
|
||||
update_document_archive_file.s(
|
||||
document_id=doc.id,
|
||||
),
|
||||
)
|
||||
logger.info(
|
||||
f"Rotated document {doc.id} by {degrees} degrees",
|
||||
)
|
||||
affected_docs.append(doc.id)
|
||||
except Exception as e:
|
||||
logger.exception(f"Error rotating document {doc.id}: {e}")
|
||||
|
||||
if len(affected_docs) > 0:
|
||||
bulk_update_task = bulk_update_documents.s(document_ids=affected_docs)
|
||||
chord(header=rotate_tasks, body=bulk_update_task).delay()
|
||||
|
||||
return "OK"
|
||||
|
||||
|
||||
def merge(doc_ids: list[int], metadata_document_id: Optional[int] = None):
|
||||
logger.info(
|
||||
f"Attempting to merge {len(doc_ids)} documents into a single document.",
|
||||
)
|
||||
qs = Document.objects.filter(id__in=doc_ids)
|
||||
affected_docs = []
|
||||
import pikepdf
|
||||
|
||||
merged_pdf = pikepdf.new()
|
||||
version = merged_pdf.pdf_version
|
||||
# use doc_ids to preserve order
|
||||
for doc_id in doc_ids:
|
||||
doc = qs.get(id=doc_id)
|
||||
try:
|
||||
with pikepdf.open(str(doc.source_path)) as pdf:
|
||||
version = max(version, pdf.pdf_version)
|
||||
merged_pdf.pages.extend(pdf.pages)
|
||||
affected_docs.append(doc.id)
|
||||
except Exception as e:
|
||||
logger.exception(
|
||||
f"Error merging document {doc.id}, it will not be included in the merge: {e}",
|
||||
)
|
||||
if len(affected_docs) == 0:
|
||||
logger.warning("No documents were merged")
|
||||
return "OK"
|
||||
|
||||
filepath = os.path.join(
|
||||
settings.SCRATCH_DIR,
|
||||
f"{'_'.join([str(doc_id) for doc_id in doc_ids])[:100]}_merged.pdf",
|
||||
)
|
||||
merged_pdf.remove_unreferenced_resources()
|
||||
merged_pdf.save(filepath, min_version=version)
|
||||
merged_pdf.close()
|
||||
|
||||
if metadata_document_id:
|
||||
metadata_document = qs.get(id=metadata_document_id)
|
||||
if metadata_document is not None:
|
||||
overrides = DocumentMetadataOverrides.from_document(metadata_document)
|
||||
overrides.title = metadata_document.title + " (merged)"
|
||||
else:
|
||||
overrides = DocumentMetadataOverrides()
|
||||
|
||||
logger.info("Adding merged document to the task queue.")
|
||||
consume_file.delay(
|
||||
ConsumableDocument(
|
||||
source=DocumentSource.ConsumeFolder,
|
||||
original_file=filepath,
|
||||
),
|
||||
overrides,
|
||||
)
|
||||
|
||||
return "OK"
|
||||
|
||||
|
||||
def split(doc_ids: list[int], pages: list[list[int]]):
|
||||
logger.info(
|
||||
f"Attempting to split document {doc_ids[0]} into {len(pages)} documents",
|
||||
)
|
||||
doc = Document.objects.get(id=doc_ids[0])
|
||||
import pikepdf
|
||||
|
||||
try:
|
||||
with pikepdf.open(doc.source_path) as pdf:
|
||||
for idx, split_doc in enumerate(pages):
|
||||
dst = pikepdf.new()
|
||||
for page in split_doc:
|
||||
dst.pages.append(pdf.pages[page - 1])
|
||||
filepath = os.path.join(
|
||||
settings.SCRATCH_DIR,
|
||||
f"{doc.id}_{split_doc[0]}-{split_doc[-1]}.pdf",
|
||||
)
|
||||
dst.remove_unreferenced_resources()
|
||||
dst.save(filepath)
|
||||
dst.close()
|
||||
|
||||
overrides = DocumentMetadataOverrides().from_document(doc)
|
||||
overrides.title = f"{doc.title} (split {idx + 1})"
|
||||
logger.info(
|
||||
f"Adding split document with pages {split_doc} to the task queue.",
|
||||
)
|
||||
consume_file.delay(
|
||||
ConsumableDocument(
|
||||
source=DocumentSource.ConsumeFolder,
|
||||
original_file=filepath,
|
||||
),
|
||||
overrides,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.exception(f"Error splitting document {doc.id}: {e}")
|
||||
|
||||
return "OK"
|
||||
|
Reference in New Issue
Block a user