diff --git a/Pipfile b/Pipfile index 7058b8ff1..b89eff575 100644 --- a/Pipfile +++ b/Pipfile @@ -58,6 +58,7 @@ nltk = "*" pdf2image = "*" flower = "*" bleach = "*" +zxing-cpp = {version = "*", platform_machine = "== 'x86_64'"} # # Packages locked due to issues (try to check if these are fixed in a release every so often) # diff --git a/Pipfile.lock b/Pipfile.lock index b3bfc3ba8..5ab143b34 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -1,7 +1,7 @@ { "_meta": { "hash": { - "sha256": "01320f2ef2a561c37d17aaad61a7871b5a379dd1ac97fdaab586936b60dec92e" + "sha256": "8395f25f876a71a7307a55dd542e69a4cdcb3be3be38c4e89ed06ce3d52a5345" }, "pipfile-spec": 6, "requires": {}, @@ -48,7 +48,7 @@ "sha256:2163e1640ddb52b7a8c80d0a67a08587e5d245cc9c553a74a847056bc2976b15", "sha256:8ca1e4fcf50d07413d66d1a5e416e42cfdf5851c981d679a09851a6853383b3c" ], - "markers": "python_version < '3.11'", + "markers": "python_full_version <= '3.11.2'", "version": "==4.0.2" }, "attrs": { @@ -1877,7 +1877,7 @@ "sha256:5cb5f4a79139d699607b3ef622a1dedafa84e115ab0024e0d9c044a9479ca7cb", "sha256:fb33085c39dd998ac16d1431ebc293a8b3eedd00fd4a32de0ff79002c19511b4" ], - "markers": "python_version < '3.10'", + "markers": "python_version >= '3.7'", "version": "==4.5.0" }, "tzdata": { @@ -2220,6 +2220,29 @@ ], "markers": "python_version >= '3.6'", "version": "==0.20.0" + }, + "zxing-cpp": { + "hashes": [ + "sha256:1b67b221aae15aad9b5609d99c38d57875bc0a4fef864142d7ca37e9ee7880b0", + "sha256:1d665c45029346c70ae3df5dbc36f6335ffe4f275e98dc43772fa32a65844196", + "sha256:214a6a0e49b92fda8d2761c74f5bfd24a677b9bf1d0ef0e083412486af97faa9", + "sha256:54282d0e5c573754049113a0cdbf14cc1c6b986432a367d8a788112afa92a1d5", + "sha256:5ce391f21763f00d5be3431e16d075e263e4b9205c2cf55d708625cb234b1f15", + "sha256:5fd89065f620d6b78281308c6abfb760d95760a1c9b88eb7ac612b52b331bd41", + "sha256:631a0c783ad233c85295e0cf4cd7740f1fe2853124c61b1ef6bcf7eb5d2fa5e6", + "sha256:76caafb8fc1e12c2e5ec33ce4f340a0e15e9a2aabfbfeaec170e8a2b405b8a77", + "sha256:8da9c912cca5829eedb2800ce3eaa1b1e52742f536aa9e798be69bf09639f399", + "sha256:95dd06dc559f53c1ca0eb59dbaebd802ebc839937baaf2f8d2b3def3e814c07f", + "sha256:97919f07c62edf1c8e0722fd64893057ce636b7067cf47bd593e98cc7e404d74", + "sha256:9f0c2c03f5df470ef71a7590be5042161e7590da767d4260a6d0d61a3fa80b88", + "sha256:a788551ddf3a6ba1152ff9a0b81d57018a3cc586544087c39d881428745faf1f", + "sha256:ea54fd242f93eea7bf039a68287e5e57fdf77d78e3bd5b4cbb2d289bb3380d63", + "sha256:f0eefdfad91e15e3f5b7ed16d83806a36f96ca482f4b042baa6297784a58b0b3", + "sha256:f70eefa5dc1fd9238087c024ef22f3d99ba79cb932a2c5bc5b0f1e152037722e" + ], + "index": "pypi", + "markers": "platform_machine == 'x86_64'", + "version": "==2.0.0" } }, "develop": { @@ -3112,7 +3135,7 @@ "sha256:5cb5f4a79139d699607b3ef622a1dedafa84e115ab0024e0d9c044a9479ca7cb", "sha256:fb33085c39dd998ac16d1431ebc293a8b3eedd00fd4a32de0ff79002c19511b4" ], - "markers": "python_version < '3.10'", + "markers": "python_version >= '3.7'", "version": "==4.5.0" }, "urllib3": { @@ -3676,7 +3699,7 @@ "sha256:5cb5f4a79139d699607b3ef622a1dedafa84e115ab0024e0d9c044a9479ca7cb", "sha256:fb33085c39dd998ac16d1431ebc293a8b3eedd00fd4a32de0ff79002c19511b4" ], - "markers": "python_version < '3.10'", + "markers": "python_version >= '3.7'", "version": "==4.5.0" }, "urllib3": { diff --git a/docs/configuration.md b/docs/configuration.md index 61b510305..e07071b05 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -902,6 +902,16 @@ file, which are separated by one or multiple barcode pages. Defaults to false. +`PAPERLESS_CONSUMER_BARCODE_SCANNER=` + +: Sets the barcode scanner used for barcode functionality. + + Currently, "PYZBAR" (the default) or "ZXING" might be selected. + If you have problems that your Barcodes/QR-Codes are not detected + (especially with bad scan quality and/or small codes), try the other one. + + zxing is not available on all platforms. + `PAPERLESS_CONSUMER_BARCODE_TIFF_SUPPORT=` : Whether TIFF image files should be scanned for barcodes. This will diff --git a/src/documents/barcodes.py b/src/documents/barcodes.py index 3ecf6f96a..1f520c546 100644 --- a/src/documents/barcodes.py +++ b/src/documents/barcodes.py @@ -5,10 +5,12 @@ import tempfile from dataclasses import dataclass from functools import lru_cache from pathlib import Path +from subprocess import run from typing import Dict from typing import List from typing import Optional +import img2pdf import magic from django.conf import settings from pdf2image import convert_from_path @@ -16,8 +18,6 @@ from pdf2image.exceptions import PDFPageCountError from pikepdf import Page from pikepdf import Pdf from PIL import Image -from PIL import ImageSequence -from pyzbar import pyzbar logger = logging.getLogger("paperless.barcodes") @@ -83,18 +83,35 @@ def barcode_reader(image: Image) -> List[str]: Returns a list containing all found barcodes """ barcodes = [] - # Decode the barcode image - detected_barcodes = pyzbar.decode(image) - if detected_barcodes: - # Traverse through all the detected barcodes in image + if settings.CONSUMER_BARCODE_SCANNER == "PYZBAR": + logger.debug("Scanning for barcodes using PYZBAR") + from pyzbar import pyzbar + + # Decode the barcode image + detected_barcodes = pyzbar.decode(image) + + if detected_barcodes: + # Traverse through all the detected barcodes in image + for barcode in detected_barcodes: + if barcode.data: + decoded_barcode = barcode.data.decode("utf-8") + barcodes.append(decoded_barcode) + logger.debug( + f"Barcode of type {str(barcode.type)} found: {decoded_barcode}", + ) + elif settings.CONSUMER_BARCODE_SCANNER == "ZXING": + logger.debug("Scanning for barcodes using ZXING") + import zxingcpp + + detected_barcodes = zxingcpp.read_barcodes(image) for barcode in detected_barcodes: - if barcode.data: - decoded_barcode = barcode.data.decode("utf-8") - barcodes.append(decoded_barcode) + if barcode.text: + barcodes.append(barcode.text) logger.debug( - f"Barcode of type {str(barcode.type)} found: {decoded_barcode}", + f"Barcode of type {str(barcode.format)} found: {barcode.text}", ) + return barcodes @@ -125,21 +142,21 @@ def convert_from_tiff_to_pdf(filepath: Path) -> Path: f"Cannot convert mime type {mime_type} from {filepath} to pdf.", ) return None - with Image.open(filepath) as image: - images = [] - for i, page in enumerate(ImageSequence.Iterator(image)): - page = page.convert("RGB") - images.append(page) - try: - if len(images) == 1: - images[0].save(newpath) - else: - images[0].save(newpath, save_all=True, append_images=images[1:]) - except OSError as e: # pragma: no cover - logger.warning( - f"Could not save the file as pdf. Error: {str(e)}", - ) - return None + with Image.open(filepath) as im: + has_alpha_layer = im.mode in ("RGBA", "LA") + if has_alpha_layer: + run( + [ + settings.CONVERT_BINARY, + "-alpha", + "off", + filepath, + filepath, + ], + ) + with filepath.open("rb") as img_file: + with newpath.open("wb") as pdf_file: + pdf_file.write(img2pdf.convert(img_file)) return newpath diff --git a/src/documents/tests/test_barcodes.py b/src/documents/tests/test_barcodes.py index 02ed26308..a1e08c5cf 100644 --- a/src/documents/tests/test_barcodes.py +++ b/src/documents/tests/test_barcodes.py @@ -3,6 +3,7 @@ import shutil from pathlib import Path from unittest import mock +import pytest from django.conf import settings from django.test import override_settings from django.test import TestCase @@ -13,7 +14,15 @@ from documents.tests.utils import DirectoriesMixin from documents.tests.utils import FileSystemAssertsMixin from PIL import Image +try: + import zxingcpp + ZXING_AVAILIBLE = True +except ImportError: + ZXING_AVAILIBLE = False + + +@override_settings(CONSUMER_BARCODE_SCANNER="PYZBAR") class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase): SAMPLE_DIR = Path(__file__).parent / "samples" @@ -1030,3 +1039,21 @@ class TestAsnBarcodes(DirectoriesMixin, TestCase): tasks.consume_file, dst, ) + + +@pytest.mark.skipif( + not ZXING_AVAILIBLE, + reason="No zxingcpp", +) +@override_settings(CONSUMER_BARCODE_SCANNER="ZXING") +class TestBarcodeZxing(TestBarcode): + pass + + +@pytest.mark.skipif( + not ZXING_AVAILIBLE, + reason="No zxingcpp", +) +@override_settings(CONSUMER_BARCODE_SCANNER="ZXING") +class TestAsnBarcodesZxing(TestAsnBarcodes): + pass diff --git a/src/paperless/checks.py b/src/paperless/checks.py index 658ec9d31..3da37e264 100644 --- a/src/paperless/checks.py +++ b/src/paperless/checks.py @@ -166,4 +166,17 @@ def settings_values_check(app_configs, **kwargs): ) return msgs - return _ocrmypdf_settings_check() + _timezone_validate() + def _barcode_scanner_validate(): + """ + Validates the barcode scanner type + """ + msgs = [] + if settings.CONSUMER_BARCODE_SCANNER not in ["PYZBAR", "ZXING"]: + msgs.append( + Error(f'Invalid Barcode Scanner "{settings.CONSUMER_BARCODE_SCANNER}"'), + ) + return msgs + + return ( + _ocrmypdf_settings_check() + _timezone_validate() + _barcode_scanner_validate() + ) diff --git a/src/paperless/settings.py b/src/paperless/settings.py index d052b4cac..b6ee75fda 100644 --- a/src/paperless/settings.py +++ b/src/paperless/settings.py @@ -732,6 +732,12 @@ CONSUMER_BARCODE_STRING: Final[str] = os.getenv( "PATCHT", ) +consumer_barcode_scanner_tmp: Final[str] = os.getenv( + "PAPERLESS_CONSUMER_BARCODE_SCANNER", + "PYZBAR", +) +CONSUMER_BARCODE_SCANNER = consumer_barcode_scanner_tmp.upper() + CONSUMER_ENABLE_ASN_BARCODE: Final[bool] = __get_boolean( "PAPERLESS_CONSUMER_ENABLE_ASN_BARCODE", ) diff --git a/src/paperless/tests/test_checks.py b/src/paperless/tests/test_checks.py index 3740d2f8a..7c233de23 100644 --- a/src/paperless/tests/test_checks.py +++ b/src/paperless/tests/test_checks.py @@ -176,3 +176,26 @@ class TestSettingsChecks(DirectoriesMixin, TestCase): msg = msgs[0] self.assertIn('Timezone "TheMoon\\MyCrater"', msg.msg) + + @override_settings(CONSUMER_BARCODE_SCANNER="Invalid") + def test_barcode_scanner_invalid(self): + msgs = settings_values_check(None) + self.assertEqual(len(msgs), 1) + + msg = msgs[0] + + self.assertIn('Invalid Barcode Scanner "Invalid"', msg.msg) + + @override_settings(CONSUMER_BARCODE_SCANNER="") + def test_barcode_scanner_empty(self): + msgs = settings_values_check(None) + self.assertEqual(len(msgs), 1) + + msg = msgs[0] + + self.assertIn('Invalid Barcode Scanner ""', msg.msg) + + @override_settings(CONSUMER_BARCODE_SCANNER="PYZBAR") + def test_barcode_scanner_valid(self): + msgs = settings_values_check(None) + self.assertEqual(len(msgs), 0)