mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-04-02 13:45:10 -05:00
Rescales images from PDFs so zbar can better find them
This commit is contained in:
parent
a0c1c48dca
commit
4195d5746f
@ -4,6 +4,7 @@ import shutil
|
|||||||
import tempfile
|
import tempfile
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from functools import lru_cache
|
from functools import lru_cache
|
||||||
|
from math import ceil
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import List
|
from typing import List
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
@ -172,6 +173,24 @@ def scan_file_for_barcodes(
|
|||||||
# raise an exception, triggering fallback
|
# raise an exception, triggering fallback
|
||||||
pillow_img = pdfimage.as_pil_image()
|
pillow_img = pdfimage.as_pil_image()
|
||||||
|
|
||||||
|
# Scale the image down
|
||||||
|
# See: https://github.com/paperless-ngx/paperless-ngx/issues/2385
|
||||||
|
# TLDR: zbar has issues with larger images
|
||||||
|
width, height = pillow_img.size
|
||||||
|
if width > 512:
|
||||||
|
scaler = ceil(width / 512)
|
||||||
|
new_width = int(width / scaler)
|
||||||
|
new_height = int(height / scaler)
|
||||||
|
pillow_img = pillow_img.resize((new_width, new_height))
|
||||||
|
|
||||||
|
width, height = pillow_img.size
|
||||||
|
|
||||||
|
if height > 1024:
|
||||||
|
scaler = ceil(height / 1024)
|
||||||
|
new_width = int(width / scaler)
|
||||||
|
new_height = int(height / scaler)
|
||||||
|
pillow_img = pillow_img.resize((new_width, new_height))
|
||||||
|
|
||||||
for barcode_value in barcode_reader(pillow_img):
|
for barcode_value in barcode_reader(pillow_img):
|
||||||
detected_barcodes.append(Barcode(page_num, barcode_value))
|
detected_barcodes.append(Barcode(page_num, barcode_value))
|
||||||
|
|
||||||
@ -234,12 +253,12 @@ def get_separating_barcodes(barcodes: List[Barcode]) -> List[int]:
|
|||||||
"""
|
"""
|
||||||
Search the parsed barcodes for separators
|
Search the parsed barcodes for separators
|
||||||
and returns a list of page numbers, which
|
and returns a list of page numbers, which
|
||||||
separate the file into new files
|
separate the file into new files.
|
||||||
"""
|
"""
|
||||||
# filter all barcodes for the separator string
|
# filter all barcodes for the separator string
|
||||||
# get the page numbers of the separating barcodes
|
# get the page numbers of the separating barcodes
|
||||||
|
|
||||||
return [bc.page for bc in barcodes if bc.is_separator]
|
return list({bc.page for bc in barcodes if bc.is_separator})
|
||||||
|
|
||||||
|
|
||||||
def get_asn_from_barcodes(barcodes: List[Barcode]) -> Optional[int]:
|
def get_asn_from_barcodes(barcodes: List[Barcode]) -> Optional[int]:
|
||||||
@ -266,7 +285,7 @@ def get_asn_from_barcodes(barcodes: List[Barcode]) -> Optional[int]:
|
|||||||
try:
|
try:
|
||||||
asn = int(asn_text)
|
asn = int(asn_text)
|
||||||
except ValueError as e:
|
except ValueError as e:
|
||||||
logger.warn(f"Failed to parse ASN number because: {e}")
|
logger.warning(f"Failed to parse ASN number because: {e}")
|
||||||
|
|
||||||
return asn
|
return asn
|
||||||
|
|
||||||
|
BIN
src/documents/tests/samples/barcodes/many-qr-codes.pdf
Normal file
BIN
src/documents/tests/samples/barcodes/many-qr-codes.pdf
Normal file
Binary file not shown.
@ -447,6 +447,31 @@ class TestBarcode(DirectoriesMixin, TestCase):
|
|||||||
self.assertEqual(doc_barcode_info.pdf_path, test_file)
|
self.assertEqual(doc_barcode_info.pdf_path, test_file)
|
||||||
self.assertListEqual(separator_page_numbers, [])
|
self.assertListEqual(separator_page_numbers, [])
|
||||||
|
|
||||||
|
@override_settings(CONSUMER_BARCODE_STRING="ADAR-NEXTDOC")
|
||||||
|
def test_scan_file_for_separating_qr_barcodes(self):
|
||||||
|
"""
|
||||||
|
GIVEN:
|
||||||
|
- Input PDF with certain QR codes that aren't detected at current size
|
||||||
|
WHEN:
|
||||||
|
- The input file is scanned for barcodes
|
||||||
|
THEN:
|
||||||
|
- QR codes are detected
|
||||||
|
"""
|
||||||
|
test_file = os.path.join(
|
||||||
|
self.BARCODE_SAMPLE_DIR,
|
||||||
|
"many-qr-codes.pdf",
|
||||||
|
)
|
||||||
|
|
||||||
|
doc_barcode_info = barcodes.scan_file_for_barcodes(
|
||||||
|
test_file,
|
||||||
|
)
|
||||||
|
separator_page_numbers = barcodes.get_separating_barcodes(
|
||||||
|
doc_barcode_info.barcodes,
|
||||||
|
)
|
||||||
|
|
||||||
|
self.assertGreater(len(doc_barcode_info.barcodes), 0)
|
||||||
|
self.assertListEqual(separator_page_numbers, [1])
|
||||||
|
|
||||||
def test_separate_pages(self):
|
def test_separate_pages(self):
|
||||||
test_file = os.path.join(
|
test_file = os.path.join(
|
||||||
self.BARCODE_SAMPLE_DIR,
|
self.BARCODE_SAMPLE_DIR,
|
||||||
|
Loading…
x
Reference in New Issue
Block a user