Feature: collate two single-sided multipage scans (#3784)

* Feature: collate two single-sided scans Some ADF only support single-sided scans, making scanning double-sided documents a bit annoying. This new feature enables Paperless to do most of the work, by merging two seperate scans into a single one, collating the even and odd numbered pages. * Documentation: clarify that collation is disabled by default * Apply suggestions from code review Co-authored-by: shamoon <4887959+shamoon@users.noreply.github.com> * Address code review remarks * Grammar fixes --------- Co-authored-by: shamoon <4887959+shamoon@users.noreply.github.com>
2025-12-20 01:45:58 -06:00 · 2023-07-24 09:29:04 +02:00
parent e160580c8b
commit ef749f9a29
11 changed files with 584 additions and 48 deletions
--- a/src/documents/barcodes.py
+++ b/src/documents/barcodes.py
@@ -2,13 +2,11 @@ import logging
 import tempfile
 from dataclasses import dataclass
 from pathlib import Path
-from subprocess import run
 from typing import Dict
 from typing import Final
 from typing import List
 from typing import Optional

-import img2pdf
 from django.conf import settings
 from pdf2image import convert_from_path
 from pdf2image.exceptions import PDFPageCountError
@@ -16,6 +14,7 @@ from pikepdf import Page
 from pikepdf import Pdf
 from PIL import Image

+from documents.converters import convert_from_tiff_to_pdf
 from documents.data_models import DocumentSource
 from documents.utils import copy_basic_file_stats
 from documents.utils import copy_file_with_basic_stats
@@ -55,7 +54,7 @@ class BarcodeReader:
        self.mime: Final[str] = mime_type
        self.pdf_file: Path = self.file
        self.barcodes: List[Barcode] = []
-        self.temp_dir: Optional[Path] = None
+        self.temp_dir: Optional[tempfile.TemporaryDirectory] = None

        if settings.CONSUMER_BARCODE_TIFF_SUPPORT:
            self.SUPPORTED_FILE_MIMES = {"application/pdf", "image/tiff"}
@@ -155,34 +154,7 @@ class BarcodeReader:
        if self.mime != "image/tiff":
            return

-        with Image.open(self.file) as im:
-            has_alpha_layer = im.mode in ("RGBA", "LA")
-        if has_alpha_layer:
-            # Note the save into the temp folder, so as not to trigger a new
-            # consume
-            scratch_image = Path(self.temp_dir.name) / Path(self.file.name)
-            run(
-                [
-                    settings.CONVERT_BINARY,
-                    "-alpha",
-                    "off",
-                    self.file,
-                    scratch_image,
-                ],
-            )
-        else:
-            # Not modifying the original, safe to use in place
-            scratch_image = self.file
-
-        self.pdf_file = Path(self.temp_dir.name) / Path(self.file.name).with_suffix(
-            ".pdf",
-        )
-
-        with scratch_image.open("rb") as img_file, self.pdf_file.open("wb") as pdf_file:
-            pdf_file.write(img2pdf.convert(img_file))
-
-        # Copy what file stat is possible
-        copy_basic_file_stats(self.file, self.pdf_file)
+        self.pdf_file = convert_from_tiff_to_pdf(self.file, Path(self.temp_dir.name))

    def detect(self) -> None:
        """