Feature: Allow tagging by putting barcode stickers on documents (#5580)

This commit is contained in:
pkrahmer
2024-02-05 18:38:19 +01:00
committed by GitHub
parent c7e0c32226
commit fb82aa0ee1
5 changed files with 248 additions and 1 deletions

View File

@@ -14,6 +14,7 @@ from PIL import Image
from documents.converters import convert_from_tiff_to_pdf
from documents.data_models import ConsumableDocument
from documents.models import Tag
from documents.plugins.base import ConsumeTaskPlugin
from documents.plugins.base import StopConsumeTaskError
from documents.plugins.helpers import ProgressStatusOptions
@@ -65,7 +66,9 @@ class BarcodePlugin(ConsumeTaskPlugin):
supported_mimes = {"application/pdf"}
return (
settings.CONSUMER_ENABLE_ASN_BARCODE or settings.CONSUMER_ENABLE_BARCODES
settings.CONSUMER_ENABLE_ASN_BARCODE
or settings.CONSUMER_ENABLE_BARCODES
or settings.CONSUMER_ENABLE_TAG_BARCODE
) and self.input_doc.mime_type in supported_mimes
def setup(self):
@@ -90,6 +93,16 @@ class BarcodePlugin(ConsumeTaskPlugin):
logger.info(f"Found ASN in barcode: {located_asn}")
self.metadata.asn = located_asn
# try reading tags from barcodes
if settings.CONSUMER_ENABLE_TAG_BARCODE:
tags = self.tags
if tags is not None and len(tags) > 0:
if self.metadata.tag_ids:
self.metadata.tag_ids += tags
else:
self.metadata.tag_ids = tags
logger.info(f"Found tags in barcode: {tags}")
separator_pages = self.get_separation_pages()
if not separator_pages:
return "No pages to split on!"
@@ -279,6 +292,53 @@ class BarcodePlugin(ConsumeTaskPlugin):
return asn
@property
def tags(self) -> Optional[list[int]]:
"""
Search the parsed barcodes for any tags.
Returns the detected tag ids (or empty list)
"""
tags = []
# Ensure the barcodes have been read
self.detect()
for x in self.barcodes:
tag_texts = x.value
for raw in tag_texts.split(","):
try:
tag = None
for regex in settings.CONSUMER_TAG_BARCODE_MAPPING:
if re.match(regex, raw, flags=re.IGNORECASE):
sub = settings.CONSUMER_TAG_BARCODE_MAPPING[regex]
tag = (
re.sub(regex, sub, raw, flags=re.IGNORECASE)
if sub
else raw
)
break
if tag:
tag = Tag.objects.get_or_create(
name__iexact=tag,
defaults={"name": tag},
)[0]
logger.debug(
f"Found Tag Barcode '{raw}', substituted "
f"to '{tag}' and mapped to "
f"tag #{tag.pk}.",
)
tags.append(tag.pk)
except Exception as e:
logger.error(
f"Failed to find or create TAG '{raw}' because: {e}",
)
return tags
def get_separation_pages(self) -> dict[int, bool]:
"""
Search the parsed barcodes for separators and returns a dict of page

View File

@@ -14,6 +14,7 @@ from documents.barcodes import BarcodePlugin
from documents.data_models import ConsumableDocument
from documents.data_models import DocumentMetadataOverrides
from documents.data_models import DocumentSource
from documents.models import Tag
from documents.tests.utils import DirectoriesMixin
from documents.tests.utils import DocumentConsumeDelayMixin
from documents.tests.utils import DummyProgressManager
@@ -741,3 +742,125 @@ class TestBarcodeZxing(TestBarcode):
@override_settings(CONSUMER_BARCODE_SCANNER="ZXING")
class TestAsnBarcodesZxing(TestAsnBarcode):
pass
class TestTagBarcode(DirectoriesMixin, SampleDirMixin, GetReaderPluginMixin, TestCase):
@contextmanager
def get_reader(self, filepath: Path) -> BarcodePlugin:
reader = BarcodePlugin(
ConsumableDocument(DocumentSource.ConsumeFolder, original_file=filepath),
DocumentMetadataOverrides(),
DummyProgressManager(filepath.name, None),
self.dirs.scratch_dir,
"task-id",
)
reader.setup()
yield reader
reader.cleanup()
@override_settings(CONSUMER_ENABLE_TAG_BARCODE=True)
def test_scan_file_without_matching_barcodes(self):
"""
GIVEN:
- PDF containing tag barcodes but none with matching prefix (default "TAG:")
WHEN:
- File is scanned for barcodes
THEN:
- No TAG has been created
"""
test_file = self.BARCODE_SAMPLE_DIR / "barcode-39-asn-custom-prefix.pdf"
with self.get_reader(test_file) as reader:
reader.run()
tags = reader.metadata.tag_ids
self.assertEqual(tags, None)
@override_settings(
CONSUMER_ENABLE_TAG_BARCODE=False,
CONSUMER_TAG_BARCODE_MAPPING={"CUSTOM-PREFIX-(.*)": "\\g<1>"},
)
def test_scan_file_with_matching_barcode_but_function_disabled(self):
"""
GIVEN:
- PDF containing a tag barcode with matching custom prefix
- The tag barcode functionality is disabled
WHEN:
- File is scanned for barcodes
THEN:
- No TAG has been created
"""
test_file = self.BARCODE_SAMPLE_DIR / "barcode-39-asn-custom-prefix.pdf"
with self.get_reader(test_file) as reader:
reader.run()
tags = reader.metadata.tag_ids
self.assertEqual(tags, None)
@override_settings(
CONSUMER_ENABLE_TAG_BARCODE=True,
CONSUMER_TAG_BARCODE_MAPPING={"CUSTOM-PREFIX-(.*)": "\\g<1>"},
)
def test_scan_file_for_tag_custom_prefix(self):
"""
GIVEN:
- PDF containing a tag barcode with custom prefix
- The barcode mapping accepts this prefix and removes it from the mapped tag value
- The created tag is the non-prefixed values
WHEN:
- File is scanned for barcodes
THEN:
- The TAG is located
- One TAG has been created
"""
test_file = self.BARCODE_SAMPLE_DIR / "barcode-39-asn-custom-prefix.pdf"
with self.get_reader(test_file) as reader:
reader.metadata.tag_ids = [99]
reader.run()
self.assertEqual(reader.pdf_file, test_file)
tags = reader.metadata.tag_ids
self.assertEqual(len(tags), 2)
self.assertEqual(tags[0], 99)
self.assertEqual(Tag.objects.get(name__iexact="00123").pk, tags[1])
@override_settings(
CONSUMER_ENABLE_TAG_BARCODE=True,
CONSUMER_TAG_BARCODE_MAPPING={"ASN(.*)": "\\g<1>"},
)
def test_scan_file_for_many_custom_tags(self):
"""
GIVEN:
- PDF containing multiple tag barcode with custom prefix
- The barcode mapping accepts this prefix and removes it from the mapped tag value
- The created tags are the non-prefixed values
WHEN:
- File is scanned for barcodes
THEN:
- The TAG is located
- File Tags have been created
"""
test_file = self.BARCODE_SAMPLE_DIR / "split-by-asn-1.pdf"
with self.get_reader(test_file) as reader:
reader.run()
tags = reader.metadata.tag_ids
self.assertEqual(len(tags), 5)
self.assertEqual(Tag.objects.get(name__iexact="00123").pk, tags[0])
self.assertEqual(Tag.objects.get(name__iexact="00124").pk, tags[1])
self.assertEqual(Tag.objects.get(name__iexact="00125").pk, tags[2])
self.assertEqual(Tag.objects.get(name__iexact="00126").pk, tags[3])
self.assertEqual(Tag.objects.get(name__iexact="00127").pk, tags[4])
@override_settings(
CONSUMER_ENABLE_TAG_BARCODE=True,
CONSUMER_TAG_BARCODE_MAPPING={"CUSTOM-PREFIX-(.*)": "\\g<3>"},
)
def test_scan_file_for_tag_raises_value_error(self):
"""
GIVEN:
- Any error occurs during tag barcode processing
THEN:
- The processing should be skipped and not break the import
"""
test_file = self.BARCODE_SAMPLE_DIR / "barcode-39-asn-custom-prefix.pdf"
with self.get_reader(test_file) as reader:
reader.run()
# expect error to be caught and logged only
tags = reader.metadata.tag_ids
self.assertEqual(tags, None)

View File

@@ -853,6 +853,19 @@ CONSUMER_BARCODE_UPSCALE: Final[float] = __get_float(
CONSUMER_BARCODE_DPI: Final[int] = __get_int("PAPERLESS_CONSUMER_BARCODE_DPI", 300)
CONSUMER_ENABLE_TAG_BARCODE: Final[bool] = __get_boolean(
"PAPERLESS_CONSUMER_ENABLE_TAG_BARCODE",
)
CONSUMER_TAG_BARCODE_MAPPING = dict(
json.loads(
os.getenv(
"PAPERLESS_CONSUMER_TAG_BARCODE_MAPPING",
'{"TAG:(.*)": "\\\\g<1>"}',
),
),
)
CONSUMER_ENABLE_COLLATE_DOUBLE_SIDED: Final[bool] = __get_boolean(
"PAPERLESS_CONSUMER_ENABLE_COLLATE_DOUBLE_SIDED",
)