Feature: collate two single-sided multipage scans (#3784)

* Feature: collate two single-sided scans

Some ADF only support single-sided scans, making scanning
double-sided documents a bit annoying.

This new feature enables Paperless to do most of the work,
by merging two seperate scans into a single one, collating
the even and odd numbered pages.

* Documentation: clarify that collation is disabled by default

* Apply suggestions from code review

Co-authored-by: shamoon <4887959+shamoon@users.noreply.github.com>

* Address code review remarks

* Grammar fixes

---------

Co-authored-by: shamoon <4887959+shamoon@users.noreply.github.com>
This commit is contained in:
Dennis Brakhane
2023-07-24 09:29:04 +02:00
committed by GitHub
parent e160580c8b
commit ef749f9a29
11 changed files with 584 additions and 48 deletions

View File

@@ -2,13 +2,11 @@ import logging
import tempfile
from dataclasses import dataclass
from pathlib import Path
from subprocess import run
from typing import Dict
from typing import Final
from typing import List
from typing import Optional
import img2pdf
from django.conf import settings
from pdf2image import convert_from_path
from pdf2image.exceptions import PDFPageCountError
@@ -16,6 +14,7 @@ from pikepdf import Page
from pikepdf import Pdf
from PIL import Image
from documents.converters import convert_from_tiff_to_pdf
from documents.data_models import DocumentSource
from documents.utils import copy_basic_file_stats
from documents.utils import copy_file_with_basic_stats
@@ -55,7 +54,7 @@ class BarcodeReader:
self.mime: Final[str] = mime_type
self.pdf_file: Path = self.file
self.barcodes: List[Barcode] = []
self.temp_dir: Optional[Path] = None
self.temp_dir: Optional[tempfile.TemporaryDirectory] = None
if settings.CONSUMER_BARCODE_TIFF_SUPPORT:
self.SUPPORTED_FILE_MIMES = {"application/pdf", "image/tiff"}
@@ -155,34 +154,7 @@ class BarcodeReader:
if self.mime != "image/tiff":
return
with Image.open(self.file) as im:
has_alpha_layer = im.mode in ("RGBA", "LA")
if has_alpha_layer:
# Note the save into the temp folder, so as not to trigger a new
# consume
scratch_image = Path(self.temp_dir.name) / Path(self.file.name)
run(
[
settings.CONVERT_BINARY,
"-alpha",
"off",
self.file,
scratch_image,
],
)
else:
# Not modifying the original, safe to use in place
scratch_image = self.file
self.pdf_file = Path(self.temp_dir.name) / Path(self.file.name).with_suffix(
".pdf",
)
with scratch_image.open("rb") as img_file, self.pdf_file.open("wb") as pdf_file:
pdf_file.write(img2pdf.convert(img_file))
# Copy what file stat is possible
copy_basic_file_stats(self.file, self.pdf_file)
self.pdf_file = convert_from_tiff_to_pdf(self.file, Path(self.temp_dir.name))
def detect(self) -> None:
"""