paperless-ngx/src/documents/converters.py
Dennis Brakhane 8c7554e081
Feature: collate two single-sided multipage scans (#3784)
* Feature: collate two single-sided scans

Some ADF only support single-sided scans, making scanning
double-sided documents a bit annoying.

This new feature enables Paperless to do most of the work,
by merging two seperate scans into a single one, collating
the even and odd numbered pages.

* Documentation: clarify that collation is disabled by default

* Apply suggestions from code review

Co-authored-by: shamoon <4887959+shamoon@users.noreply.github.com>

* Address code review remarks

* Grammar fixes

---------

Co-authored-by: shamoon <4887959+shamoon@users.noreply.github.com>
2023-07-24 00:29:04 -07:00

47 lines
1.3 KiB
Python

from pathlib import Path
from subprocess import run
import img2pdf
from django.conf import settings
from PIL import Image
from documents.utils import copy_basic_file_stats
def convert_from_tiff_to_pdf(tiff_path: Path, target_directory: Path) -> Path:
"""
Converts a TIFF file into a PDF file.
The PDF will be created in the given target_directory and share the name of
the original TIFF file, as well as its stats (mtime etc.).
Returns the path of the PDF created.
"""
with Image.open(tiff_path) as im:
has_alpha_layer = im.mode in ("RGBA", "LA")
if has_alpha_layer:
# Note the save into the temp folder, so as not to trigger a new
# consume
scratch_image = target_directory / tiff_path.name
run(
[
settings.CONVERT_BINARY,
"-alpha",
"off",
tiff_path,
scratch_image,
],
)
else:
# Not modifying the original, safe to use in place
scratch_image = tiff_path
pdf_path = (target_directory / tiff_path.name).with_suffix(".pdf")
with scratch_image.open("rb") as img_file, pdf_path.open("wb") as pdf_file:
pdf_file.write(img2pdf.convert(img_file))
# Copy what file stat is possible
copy_basic_file_stats(tiff_path, pdf_path)
return pdf_path