Refactor: performance and storage optimization of barcode scanning (#7646)

---------

Co-authored-by: Lukas Metzger <1814751+loewexy@users.noreply.github.com>
Co-authored-by: shamoon <4887959+shamoon@users.noreply.github.com>
This commit is contained in:
Lukas Metzger
2024-09-08 01:11:36 +02:00
committed by GitHub
parent e98d52830f
commit cc25cbc026
3 changed files with 53 additions and 7 deletions

View File

@@ -7,8 +7,8 @@ from typing import Optional
from django.conf import settings
from pdf2image import convert_from_path
from pdf2image.exceptions import PDFPageCountError
from pikepdf import Page
from pikepdf import PasswordError
from pikepdf import Pdf
from PIL import Image
@@ -231,13 +231,41 @@ class BarcodePlugin(ConsumeTaskPlugin):
logger.debug("Scanning for barcodes using ZXING")
try:
pages_from_path = convert_from_path(
self.pdf_file,
dpi=settings.CONSUMER_BARCODE_DPI,
output_folder=self.temp_dir.name,
# Read number of pages from pdf
with Pdf.open(self.pdf_file) as pdf:
num_of_pages = len(pdf.pages)
logger.debug(f"PDF has {num_of_pages} pages")
# Get limit from configuration
barcode_max_pages = (
num_of_pages
if settings.CONSUMER_BARCODE_MAX_PAGES == 0
else settings.CONSUMER_BARCODE_MAX_PAGES
)
for current_page_number, page in enumerate(pages_from_path):
if barcode_max_pages < num_of_pages: # pragma: no cover
logger.debug(
f"Barcodes detection will be limited to the first {barcode_max_pages} pages",
)
# Loop al page
for current_page_number in range(min(num_of_pages, barcode_max_pages)):
logger.debug(f"Processing page {current_page_number}")
# Convert page to image
page = convert_from_path(
self.pdf_file,
dpi=settings.CONSUMER_BARCODE_DPI,
output_folder=self.temp_dir.name,
first_page=current_page_number + 1,
last_page=current_page_number + 1,
)[0]
# Remember filename, since it is lost by upscaling
page_filepath = Path(page.filename)
logger.debug(f"Image is at {page_filepath}")
# Upscale image if configured
factor = settings.CONSUMER_BARCODE_UPSCALE
if factor > 1.0:
logger.debug(
@@ -248,14 +276,18 @@ class BarcodePlugin(ConsumeTaskPlugin):
(int(round(x * factor)), (int(round(y * factor)))),
)
# Detect barcodes
for barcode_value in reader(page):
self.barcodes.append(
Barcode(current_page_number, barcode_value),
)
# Delete temporary image file
page_filepath.unlink()
# Password protected files can't be checked
# This is the exception raised for those
except PDFPageCountError as e:
except PasswordError as e:
logger.warning(
f"File is likely password protected, not checking for barcodes: {e}",
)

View File

@@ -925,6 +925,11 @@ CONSUMER_BARCODE_UPSCALE: Final[float] = __get_float(
CONSUMER_BARCODE_DPI: Final[int] = __get_int("PAPERLESS_CONSUMER_BARCODE_DPI", 300)
CONSUMER_BARCODE_MAX_PAGES: Final[int] = __get_int(
"PAPERLESS_CONSUMER_BARCODE_MAX_PAGES",
0,
)
CONSUMER_ENABLE_TAG_BARCODE: Final[bool] = __get_boolean(
"PAPERLESS_CONSUMER_ENABLE_TAG_BARCODE",
)