Feature: collate two single-sided multipage scans (#3784)

* Feature: collate two single-sided scans

Some ADF only support single-sided scans, making scanning
double-sided documents a bit annoying.

This new feature enables Paperless to do most of the work,
by merging two seperate scans into a single one, collating
the even and odd numbered pages.

* Documentation: clarify that collation is disabled by default

* Apply suggestions from code review

Co-authored-by: shamoon <4887959+shamoon@users.noreply.github.com>

* Address code review remarks

* Grammar fixes

---------

Co-authored-by: shamoon <4887959+shamoon@users.noreply.github.com>
This commit is contained in:
Dennis Brakhane
2023-07-24 09:29:04 +02:00
committed by GitHub
parent 9f5d47c320
commit 8c7554e081
11 changed files with 584 additions and 48 deletions

View File

@@ -0,0 +1,46 @@
from pathlib import Path
from subprocess import run
import img2pdf
from django.conf import settings
from PIL import Image
from documents.utils import copy_basic_file_stats
def convert_from_tiff_to_pdf(tiff_path: Path, target_directory: Path) -> Path:
"""
Converts a TIFF file into a PDF file.
The PDF will be created in the given target_directory and share the name of
the original TIFF file, as well as its stats (mtime etc.).
Returns the path of the PDF created.
"""
with Image.open(tiff_path) as im:
has_alpha_layer = im.mode in ("RGBA", "LA")
if has_alpha_layer:
# Note the save into the temp folder, so as not to trigger a new
# consume
scratch_image = target_directory / tiff_path.name
run(
[
settings.CONVERT_BINARY,
"-alpha",
"off",
tiff_path,
scratch_image,
],
)
else:
# Not modifying the original, safe to use in place
scratch_image = tiff_path
pdf_path = (target_directory / tiff_path.name).with_suffix(".pdf")
with scratch_image.open("rb") as img_file, pdf_path.open("wb") as pdf_file:
pdf_file.write(img2pdf.convert(img_file))
# Copy what file stat is possible
copy_basic_file_stats(tiff_path, pdf_path)
return pdf_path