mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-04-02 13:45:10 -05:00
Refactor: performance and storage optimization of barcode scanning (#7646)
--------- Co-authored-by: Lukas Metzger <1814751+loewexy@users.noreply.github.com> Co-authored-by: shamoon <4887959+shamoon@users.noreply.github.com>
This commit is contained in:
parent
e98d52830f
commit
cc25cbc026
@ -1289,6 +1289,15 @@ combination with PAPERLESS_CONSUMER_BARCODE_UPSCALE bigger than 1.0.
|
||||
|
||||
Defaults to "300"
|
||||
|
||||
#### [`PAPERLESS_CONSUMER_BARCODE_MAX_PAGES=<int>`](#PAPERLESS_CONSUMER_BARCODE_MAX_PAGES) {#PAPERLESS_CONSUMER_BARCODE_MAX_PAGES}
|
||||
|
||||
: Because barcode detection is a computationally-intensive operation, this setting
|
||||
limits the detection of barcodes to a number of first pages. If your scanner has
|
||||
a limit for the number of pages that can be scanned it would be sensible to set this
|
||||
as the limit here.
|
||||
|
||||
Defaults to "0", allowing all pages to be checked for barcodes.
|
||||
|
||||
#### [`PAPERLESS_CONSUMER_ENABLE_TAG_BARCODE=<bool>`](#PAPERLESS_CONSUMER_ENABLE_TAG_BARCODE) {#PAPERLESS_CONSUMER_ENABLE_TAG_BARCODE}
|
||||
|
||||
: Enables the detection of barcodes in the scanned document and
|
||||
|
@ -7,8 +7,8 @@ from typing import Optional
|
||||
|
||||
from django.conf import settings
|
||||
from pdf2image import convert_from_path
|
||||
from pdf2image.exceptions import PDFPageCountError
|
||||
from pikepdf import Page
|
||||
from pikepdf import PasswordError
|
||||
from pikepdf import Pdf
|
||||
from PIL import Image
|
||||
|
||||
@ -231,13 +231,41 @@ class BarcodePlugin(ConsumeTaskPlugin):
|
||||
logger.debug("Scanning for barcodes using ZXING")
|
||||
|
||||
try:
|
||||
pages_from_path = convert_from_path(
|
||||
self.pdf_file,
|
||||
dpi=settings.CONSUMER_BARCODE_DPI,
|
||||
output_folder=self.temp_dir.name,
|
||||
# Read number of pages from pdf
|
||||
with Pdf.open(self.pdf_file) as pdf:
|
||||
num_of_pages = len(pdf.pages)
|
||||
logger.debug(f"PDF has {num_of_pages} pages")
|
||||
|
||||
# Get limit from configuration
|
||||
barcode_max_pages = (
|
||||
num_of_pages
|
||||
if settings.CONSUMER_BARCODE_MAX_PAGES == 0
|
||||
else settings.CONSUMER_BARCODE_MAX_PAGES
|
||||
)
|
||||
|
||||
for current_page_number, page in enumerate(pages_from_path):
|
||||
if barcode_max_pages < num_of_pages: # pragma: no cover
|
||||
logger.debug(
|
||||
f"Barcodes detection will be limited to the first {barcode_max_pages} pages",
|
||||
)
|
||||
|
||||
# Loop al page
|
||||
for current_page_number in range(min(num_of_pages, barcode_max_pages)):
|
||||
logger.debug(f"Processing page {current_page_number}")
|
||||
|
||||
# Convert page to image
|
||||
page = convert_from_path(
|
||||
self.pdf_file,
|
||||
dpi=settings.CONSUMER_BARCODE_DPI,
|
||||
output_folder=self.temp_dir.name,
|
||||
first_page=current_page_number + 1,
|
||||
last_page=current_page_number + 1,
|
||||
)[0]
|
||||
|
||||
# Remember filename, since it is lost by upscaling
|
||||
page_filepath = Path(page.filename)
|
||||
logger.debug(f"Image is at {page_filepath}")
|
||||
|
||||
# Upscale image if configured
|
||||
factor = settings.CONSUMER_BARCODE_UPSCALE
|
||||
if factor > 1.0:
|
||||
logger.debug(
|
||||
@ -248,14 +276,18 @@ class BarcodePlugin(ConsumeTaskPlugin):
|
||||
(int(round(x * factor)), (int(round(y * factor)))),
|
||||
)
|
||||
|
||||
# Detect barcodes
|
||||
for barcode_value in reader(page):
|
||||
self.barcodes.append(
|
||||
Barcode(current_page_number, barcode_value),
|
||||
)
|
||||
|
||||
# Delete temporary image file
|
||||
page_filepath.unlink()
|
||||
|
||||
# Password protected files can't be checked
|
||||
# This is the exception raised for those
|
||||
except PDFPageCountError as e:
|
||||
except PasswordError as e:
|
||||
logger.warning(
|
||||
f"File is likely password protected, not checking for barcodes: {e}",
|
||||
)
|
||||
|
@ -925,6 +925,11 @@ CONSUMER_BARCODE_UPSCALE: Final[float] = __get_float(
|
||||
|
||||
CONSUMER_BARCODE_DPI: Final[int] = __get_int("PAPERLESS_CONSUMER_BARCODE_DPI", 300)
|
||||
|
||||
CONSUMER_BARCODE_MAX_PAGES: Final[int] = __get_int(
|
||||
"PAPERLESS_CONSUMER_BARCODE_MAX_PAGES",
|
||||
0,
|
||||
)
|
||||
|
||||
CONSUMER_ENABLE_TAG_BARCODE: Final[bool] = __get_boolean(
|
||||
"PAPERLESS_CONSUMER_ENABLE_TAG_BARCODE",
|
||||
)
|
||||
|
Loading…
x
Reference in New Issue
Block a user