mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-04-02 13:45:10 -05:00
Feature: support barcode upscaling for better detection of small barcodes (#3655)
This commit is contained in:
parent
b5d04e575e
commit
931f5f9c27
@ -1095,6 +1095,27 @@ barcode.
|
|||||||
|
|
||||||
Defaults to "ASN"
|
Defaults to "ASN"
|
||||||
|
|
||||||
|
`PAPERLESS_CONSUMER_BARCODE_UPSCALE=<float>`
|
||||||
|
|
||||||
|
: Defines the upscale factor used in barcode detection.
|
||||||
|
Improves the detection of small barcodes, i.e. with a value of 1.5 by
|
||||||
|
upscaling the document beforce the detection process. Upscaling will
|
||||||
|
only take place if value is bigger than 1.0. Otherwise upscaling will
|
||||||
|
not be performed to save resources. Try using in combination with
|
||||||
|
PAPERLESS_CONSUMER_BARCODE_DPI set to a value higher than default.
|
||||||
|
|
||||||
|
Defaults to 0.0
|
||||||
|
|
||||||
|
`PAPERLESS_CONSUMER_BARCODE_DPI=<int>`
|
||||||
|
|
||||||
|
: During barcode detection every page from a PDF document needs
|
||||||
|
to be converted to an image. A dpi value can be specified in the
|
||||||
|
conversion process. Default is 300. If the detection of small barcodes
|
||||||
|
fails a bigger dpi value i.e. 600 can fix the issue. Try using in
|
||||||
|
combination with PAPERLESS_CONSUMER_BARCODE_UPSCALE bigger than 1.0.
|
||||||
|
|
||||||
|
Defaults to "300"
|
||||||
|
|
||||||
## Binaries
|
## Binaries
|
||||||
|
|
||||||
There are a few external software packages that Paperless expects to
|
There are a few external software packages that Paperless expects to
|
||||||
|
@ -66,6 +66,8 @@
|
|||||||
#PAPERLESS_CONSUMER_SUBDIRS_AS_TAGS=false
|
#PAPERLESS_CONSUMER_SUBDIRS_AS_TAGS=false
|
||||||
#PAPERLESS_CONSUMER_ENABLE_BARCODES=false
|
#PAPERLESS_CONSUMER_ENABLE_BARCODES=false
|
||||||
#PAPERLESS_CONSUMER_BARCODE_STRING=PATCHT
|
#PAPERLESS_CONSUMER_BARCODE_STRING=PATCHT
|
||||||
|
#PAPERLESS_CONSUMER_BARCODE_UPSCALE=0.0
|
||||||
|
#PAPERLESS_CONSUMER_BARCODE_DPI=300
|
||||||
#PAPERLESS_PRE_CONSUME_SCRIPT=/path/to/an/arbitrary/script.sh
|
#PAPERLESS_PRE_CONSUME_SCRIPT=/path/to/an/arbitrary/script.sh
|
||||||
#PAPERLESS_POST_CONSUME_SCRIPT=/path/to/an/arbitrary/script.sh
|
#PAPERLESS_POST_CONSUME_SCRIPT=/path/to/an/arbitrary/script.sh
|
||||||
#PAPERLESS_FILENAME_DATE_ORDER=YMD
|
#PAPERLESS_FILENAME_DATE_ORDER=YMD
|
||||||
|
@ -203,11 +203,21 @@ class BarcodeReader:
|
|||||||
try:
|
try:
|
||||||
pages_from_path = convert_from_path(
|
pages_from_path = convert_from_path(
|
||||||
self.pdf_file,
|
self.pdf_file,
|
||||||
dpi=300,
|
dpi=settings.CONSUMER_BARCODE_DPI,
|
||||||
output_folder=self.temp_dir.name,
|
output_folder=self.temp_dir.name,
|
||||||
)
|
)
|
||||||
|
|
||||||
for current_page_number, page in enumerate(pages_from_path):
|
for current_page_number, page in enumerate(pages_from_path):
|
||||||
|
factor = settings.CONSUMER_BARCODE_UPSCALE
|
||||||
|
if factor > 1.0:
|
||||||
|
logger.debug(
|
||||||
|
f"Upscaling image by {factor} for better barcode detection",
|
||||||
|
)
|
||||||
|
x, y = page.size
|
||||||
|
page = page.resize(
|
||||||
|
(int(round(x * factor)), (int(round(y * factor)))),
|
||||||
|
)
|
||||||
|
|
||||||
for barcode_value in reader(page):
|
for barcode_value in reader(page):
|
||||||
self.barcodes.append(
|
self.barcodes.append(
|
||||||
Barcode(current_page_number, barcode_value),
|
Barcode(current_page_number, barcode_value),
|
||||||
|
Binary file not shown.
@ -906,6 +906,47 @@ class TestAsnBarcode(DirectoriesMixin, TestCase):
|
|||||||
input_doc,
|
input_doc,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@override_settings(CONSUMER_BARCODE_SCANNER="PYZBAR")
|
||||||
|
def test_scan_file_for_qrcode_without_upscale(self):
|
||||||
|
"""
|
||||||
|
GIVEN:
|
||||||
|
- A printed and scanned PDF document with a rather small QR code
|
||||||
|
WHEN:
|
||||||
|
- ASN barcode detection is run with default settings
|
||||||
|
- pyzbar is used for detection, as zxing would behave differently, and detect the QR code
|
||||||
|
THEN:
|
||||||
|
- ASN is not detected
|
||||||
|
"""
|
||||||
|
|
||||||
|
test_file = self.BARCODE_SAMPLE_DIR / "barcode-qr-asn-000123-upscale-dpi.pdf"
|
||||||
|
|
||||||
|
with BarcodeReader(test_file, "application/pdf") as reader:
|
||||||
|
reader.detect()
|
||||||
|
self.assertEqual(len(reader.barcodes), 0)
|
||||||
|
|
||||||
|
@override_settings(CONSUMER_BARCODE_SCANNER="PYZBAR")
|
||||||
|
@override_settings(CONSUMER_BARCODE_DPI=600)
|
||||||
|
@override_settings(CONSUMER_BARCODE_UPSCALE=1.5)
|
||||||
|
def test_scan_file_for_qrcode_with_upscale(self):
|
||||||
|
"""
|
||||||
|
GIVEN:
|
||||||
|
- A printed and scanned PDF document with a rather small QR code
|
||||||
|
WHEN:
|
||||||
|
- ASN barcode detection is run with 600dpi and an upscale factor of 1.5 and pyzbar
|
||||||
|
- pyzbar is used for detection, as zxing would behave differently.
|
||||||
|
Upscaling is a workaround for detection problems with pyzbar,
|
||||||
|
when you cannot switch to zxing (aarch64 build problems of zxing)
|
||||||
|
THEN:
|
||||||
|
- ASN 123 is detected
|
||||||
|
"""
|
||||||
|
|
||||||
|
test_file = self.BARCODE_SAMPLE_DIR / "barcode-qr-asn-000123-upscale-dpi.pdf"
|
||||||
|
|
||||||
|
with BarcodeReader(test_file, "application/pdf") as reader:
|
||||||
|
reader.detect()
|
||||||
|
self.assertEqual(len(reader.barcodes), 1)
|
||||||
|
self.assertEqual(reader.asn, 123)
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.skipif(
|
@pytest.mark.skipif(
|
||||||
not HAS_ZXING_LIB,
|
not HAS_ZXING_LIB,
|
||||||
|
@ -781,6 +781,16 @@ CONSUMER_ASN_BARCODE_PREFIX: Final[str] = os.getenv(
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
CONSUMER_BARCODE_UPSCALE: Final[float] = float(
|
||||||
|
os.getenv("PAPERLESS_CONSUMER_BARCODE_UPSCALE", 0.0),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
CONSUMER_BARCODE_DPI: Final[str] = int(
|
||||||
|
os.getenv("PAPERLESS_CONSUMER_BARCODE_DPI", 300),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
OCR_PAGES = int(os.getenv("PAPERLESS_OCR_PAGES", 0))
|
OCR_PAGES = int(os.getenv("PAPERLESS_OCR_PAGES", 0))
|
||||||
|
|
||||||
# The default language that tesseract will attempt to use when parsing
|
# The default language that tesseract will attempt to use when parsing
|
||||||
|
Loading…
x
Reference in New Issue
Block a user