mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-04-02 13:45:10 -05:00
Feature: Allow tagging by putting barcode stickers on documents (#5580)
This commit is contained in:
parent
c7e0c32226
commit
fb82aa0ee1
@ -1173,6 +1173,55 @@ combination with PAPERLESS_CONSUMER_BARCODE_UPSCALE bigger than 1.0.
|
||||
|
||||
Defaults to "300"
|
||||
|
||||
#### [`PAPERLESS_CONSUMER_ENABLE_TAG_BARCODE=<bool>`](#PAPERLESS_CONSUMER_ENABLE_TAG_BARCODE) {#PAPERLESS_CONSUMER_ENABLE_TAG_BARCODE}
|
||||
|
||||
: Enables the detection of barcodes in the scanned document and
|
||||
assigns or creates tags if a properly formatted barcode is detected.
|
||||
|
||||
The barcode must match one of the (configurable) regular expressions.
|
||||
If the barcode text contains ',' (comma), it is split into multiple
|
||||
barcodes which are individually processed for tagging.
|
||||
|
||||
Matching is case insensitive.
|
||||
|
||||
Defaults to false.
|
||||
|
||||
#### [`PAPERLESS_CONSUMER_TAG_BARCODE_MAPPING=<json dict>`](#PAPERLESS_CONSUMER_TAG_BARCODE_MAPPING) {#PAPERLESS_CONSUMER_TAG_BARCODE_MAPPING}
|
||||
|
||||
: Defines a dictionary of filter regex and substitute expressions.
|
||||
|
||||
Syntax: {"<regex>": "<substitute>" [,...]]}
|
||||
|
||||
A barcode is considered for tagging if the barcode text matches
|
||||
at least one of the provided <regex> pattern.
|
||||
|
||||
If a match is found, the <substitute> rule is applied. This allows very
|
||||
versatile reformatting and mapping of barcode pattern to tag values.
|
||||
|
||||
If a tag is not found it will be created.
|
||||
|
||||
Defaults to:
|
||||
|
||||
{"TAG:(.*)": "\\g<1>"} which defines
|
||||
- a regex TAG:(.*) which includes barcodes beginning with TAG:
|
||||
followed by any text that gets stored into match group #1 and
|
||||
- a substitute \\g<1> that replaces the original barcode text
|
||||
by the content in match group #1.
|
||||
Consequently, the tag is the barcode text without its TAG: prefix.
|
||||
|
||||
More examples:
|
||||
|
||||
{"ASN12.*": "JOHN", "ASN13.*": "SMITH"} for example maps
|
||||
- ASN12nnnn barcodes to the tag JOHN and
|
||||
- ASN13nnnn barcodes to the tag SMITH.
|
||||
|
||||
{"T-J": "JOHN", "T-S": "SMITH", "T-D": "DOE"} directly maps
|
||||
- T-J barcodes to the tag JOHN,
|
||||
- T-S barcodes to the tag SMITH and
|
||||
- T-D barcodes to the tag DOE.
|
||||
|
||||
Please refer to the Python regex documentation for more information.
|
||||
|
||||
## Audit Trail
|
||||
|
||||
#### [`PAPERLESS_AUDIT_LOG_ENABLED=<bool>`](#PAPERLESS_AUDIT_LOG_ENABLED) {#PAPERLESS_AUDIT_LOG_ENABLED}
|
||||
|
@ -68,6 +68,8 @@
|
||||
#PAPERLESS_CONSUMER_BARCODE_STRING=PATCHT
|
||||
#PAPERLESS_CONSUMER_BARCODE_UPSCALE=0.0
|
||||
#PAPERLESS_CONSUMER_BARCODE_DPI=300
|
||||
#PAPERLESS_CONSUMER_ENABLE_TAG_BARCODE=false
|
||||
#PAPERLESS_CONSUMER_TAG_BARCODE_MAPPING={"TAG:(.*)": "\\g<1>"}
|
||||
#PAPERLESS_CONSUMER_ENABLE_COLLATE_DOUBLE_SIDED=false
|
||||
#PAPERLESS_CONSUMER_COLLATE_DOUBLE_SIDED_SUBDIR_NAME=double-sided
|
||||
#PAPERLESS_CONSUMER_COLLATE_DOUBLE_SIDED_TIFF_SUPPORT=false
|
||||
|
@ -14,6 +14,7 @@ from PIL import Image
|
||||
|
||||
from documents.converters import convert_from_tiff_to_pdf
|
||||
from documents.data_models import ConsumableDocument
|
||||
from documents.models import Tag
|
||||
from documents.plugins.base import ConsumeTaskPlugin
|
||||
from documents.plugins.base import StopConsumeTaskError
|
||||
from documents.plugins.helpers import ProgressStatusOptions
|
||||
@ -65,7 +66,9 @@ class BarcodePlugin(ConsumeTaskPlugin):
|
||||
supported_mimes = {"application/pdf"}
|
||||
|
||||
return (
|
||||
settings.CONSUMER_ENABLE_ASN_BARCODE or settings.CONSUMER_ENABLE_BARCODES
|
||||
settings.CONSUMER_ENABLE_ASN_BARCODE
|
||||
or settings.CONSUMER_ENABLE_BARCODES
|
||||
or settings.CONSUMER_ENABLE_TAG_BARCODE
|
||||
) and self.input_doc.mime_type in supported_mimes
|
||||
|
||||
def setup(self):
|
||||
@ -90,6 +93,16 @@ class BarcodePlugin(ConsumeTaskPlugin):
|
||||
logger.info(f"Found ASN in barcode: {located_asn}")
|
||||
self.metadata.asn = located_asn
|
||||
|
||||
# try reading tags from barcodes
|
||||
if settings.CONSUMER_ENABLE_TAG_BARCODE:
|
||||
tags = self.tags
|
||||
if tags is not None and len(tags) > 0:
|
||||
if self.metadata.tag_ids:
|
||||
self.metadata.tag_ids += tags
|
||||
else:
|
||||
self.metadata.tag_ids = tags
|
||||
logger.info(f"Found tags in barcode: {tags}")
|
||||
|
||||
separator_pages = self.get_separation_pages()
|
||||
if not separator_pages:
|
||||
return "No pages to split on!"
|
||||
@ -279,6 +292,53 @@ class BarcodePlugin(ConsumeTaskPlugin):
|
||||
|
||||
return asn
|
||||
|
||||
@property
|
||||
def tags(self) -> Optional[list[int]]:
|
||||
"""
|
||||
Search the parsed barcodes for any tags.
|
||||
Returns the detected tag ids (or empty list)
|
||||
"""
|
||||
tags = []
|
||||
|
||||
# Ensure the barcodes have been read
|
||||
self.detect()
|
||||
|
||||
for x in self.barcodes:
|
||||
tag_texts = x.value
|
||||
|
||||
for raw in tag_texts.split(","):
|
||||
try:
|
||||
tag = None
|
||||
for regex in settings.CONSUMER_TAG_BARCODE_MAPPING:
|
||||
if re.match(regex, raw, flags=re.IGNORECASE):
|
||||
sub = settings.CONSUMER_TAG_BARCODE_MAPPING[regex]
|
||||
tag = (
|
||||
re.sub(regex, sub, raw, flags=re.IGNORECASE)
|
||||
if sub
|
||||
else raw
|
||||
)
|
||||
break
|
||||
|
||||
if tag:
|
||||
tag = Tag.objects.get_or_create(
|
||||
name__iexact=tag,
|
||||
defaults={"name": tag},
|
||||
)[0]
|
||||
|
||||
logger.debug(
|
||||
f"Found Tag Barcode '{raw}', substituted "
|
||||
f"to '{tag}' and mapped to "
|
||||
f"tag #{tag.pk}.",
|
||||
)
|
||||
tags.append(tag.pk)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"Failed to find or create TAG '{raw}' because: {e}",
|
||||
)
|
||||
|
||||
return tags
|
||||
|
||||
def get_separation_pages(self) -> dict[int, bool]:
|
||||
"""
|
||||
Search the parsed barcodes for separators and returns a dict of page
|
||||
|
@ -14,6 +14,7 @@ from documents.barcodes import BarcodePlugin
|
||||
from documents.data_models import ConsumableDocument
|
||||
from documents.data_models import DocumentMetadataOverrides
|
||||
from documents.data_models import DocumentSource
|
||||
from documents.models import Tag
|
||||
from documents.tests.utils import DirectoriesMixin
|
||||
from documents.tests.utils import DocumentConsumeDelayMixin
|
||||
from documents.tests.utils import DummyProgressManager
|
||||
@ -741,3 +742,125 @@ class TestBarcodeZxing(TestBarcode):
|
||||
@override_settings(CONSUMER_BARCODE_SCANNER="ZXING")
|
||||
class TestAsnBarcodesZxing(TestAsnBarcode):
|
||||
pass
|
||||
|
||||
|
||||
class TestTagBarcode(DirectoriesMixin, SampleDirMixin, GetReaderPluginMixin, TestCase):
|
||||
@contextmanager
|
||||
def get_reader(self, filepath: Path) -> BarcodePlugin:
|
||||
reader = BarcodePlugin(
|
||||
ConsumableDocument(DocumentSource.ConsumeFolder, original_file=filepath),
|
||||
DocumentMetadataOverrides(),
|
||||
DummyProgressManager(filepath.name, None),
|
||||
self.dirs.scratch_dir,
|
||||
"task-id",
|
||||
)
|
||||
reader.setup()
|
||||
yield reader
|
||||
reader.cleanup()
|
||||
|
||||
@override_settings(CONSUMER_ENABLE_TAG_BARCODE=True)
|
||||
def test_scan_file_without_matching_barcodes(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- PDF containing tag barcodes but none with matching prefix (default "TAG:")
|
||||
WHEN:
|
||||
- File is scanned for barcodes
|
||||
THEN:
|
||||
- No TAG has been created
|
||||
"""
|
||||
test_file = self.BARCODE_SAMPLE_DIR / "barcode-39-asn-custom-prefix.pdf"
|
||||
with self.get_reader(test_file) as reader:
|
||||
reader.run()
|
||||
tags = reader.metadata.tag_ids
|
||||
self.assertEqual(tags, None)
|
||||
|
||||
@override_settings(
|
||||
CONSUMER_ENABLE_TAG_BARCODE=False,
|
||||
CONSUMER_TAG_BARCODE_MAPPING={"CUSTOM-PREFIX-(.*)": "\\g<1>"},
|
||||
)
|
||||
def test_scan_file_with_matching_barcode_but_function_disabled(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- PDF containing a tag barcode with matching custom prefix
|
||||
- The tag barcode functionality is disabled
|
||||
WHEN:
|
||||
- File is scanned for barcodes
|
||||
THEN:
|
||||
- No TAG has been created
|
||||
"""
|
||||
test_file = self.BARCODE_SAMPLE_DIR / "barcode-39-asn-custom-prefix.pdf"
|
||||
with self.get_reader(test_file) as reader:
|
||||
reader.run()
|
||||
tags = reader.metadata.tag_ids
|
||||
self.assertEqual(tags, None)
|
||||
|
||||
@override_settings(
|
||||
CONSUMER_ENABLE_TAG_BARCODE=True,
|
||||
CONSUMER_TAG_BARCODE_MAPPING={"CUSTOM-PREFIX-(.*)": "\\g<1>"},
|
||||
)
|
||||
def test_scan_file_for_tag_custom_prefix(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- PDF containing a tag barcode with custom prefix
|
||||
- The barcode mapping accepts this prefix and removes it from the mapped tag value
|
||||
- The created tag is the non-prefixed values
|
||||
WHEN:
|
||||
- File is scanned for barcodes
|
||||
THEN:
|
||||
- The TAG is located
|
||||
- One TAG has been created
|
||||
"""
|
||||
test_file = self.BARCODE_SAMPLE_DIR / "barcode-39-asn-custom-prefix.pdf"
|
||||
with self.get_reader(test_file) as reader:
|
||||
reader.metadata.tag_ids = [99]
|
||||
reader.run()
|
||||
self.assertEqual(reader.pdf_file, test_file)
|
||||
tags = reader.metadata.tag_ids
|
||||
self.assertEqual(len(tags), 2)
|
||||
self.assertEqual(tags[0], 99)
|
||||
self.assertEqual(Tag.objects.get(name__iexact="00123").pk, tags[1])
|
||||
|
||||
@override_settings(
|
||||
CONSUMER_ENABLE_TAG_BARCODE=True,
|
||||
CONSUMER_TAG_BARCODE_MAPPING={"ASN(.*)": "\\g<1>"},
|
||||
)
|
||||
def test_scan_file_for_many_custom_tags(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- PDF containing multiple tag barcode with custom prefix
|
||||
- The barcode mapping accepts this prefix and removes it from the mapped tag value
|
||||
- The created tags are the non-prefixed values
|
||||
WHEN:
|
||||
- File is scanned for barcodes
|
||||
THEN:
|
||||
- The TAG is located
|
||||
- File Tags have been created
|
||||
"""
|
||||
test_file = self.BARCODE_SAMPLE_DIR / "split-by-asn-1.pdf"
|
||||
with self.get_reader(test_file) as reader:
|
||||
reader.run()
|
||||
tags = reader.metadata.tag_ids
|
||||
self.assertEqual(len(tags), 5)
|
||||
self.assertEqual(Tag.objects.get(name__iexact="00123").pk, tags[0])
|
||||
self.assertEqual(Tag.objects.get(name__iexact="00124").pk, tags[1])
|
||||
self.assertEqual(Tag.objects.get(name__iexact="00125").pk, tags[2])
|
||||
self.assertEqual(Tag.objects.get(name__iexact="00126").pk, tags[3])
|
||||
self.assertEqual(Tag.objects.get(name__iexact="00127").pk, tags[4])
|
||||
|
||||
@override_settings(
|
||||
CONSUMER_ENABLE_TAG_BARCODE=True,
|
||||
CONSUMER_TAG_BARCODE_MAPPING={"CUSTOM-PREFIX-(.*)": "\\g<3>"},
|
||||
)
|
||||
def test_scan_file_for_tag_raises_value_error(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- Any error occurs during tag barcode processing
|
||||
THEN:
|
||||
- The processing should be skipped and not break the import
|
||||
"""
|
||||
test_file = self.BARCODE_SAMPLE_DIR / "barcode-39-asn-custom-prefix.pdf"
|
||||
with self.get_reader(test_file) as reader:
|
||||
reader.run()
|
||||
# expect error to be caught and logged only
|
||||
tags = reader.metadata.tag_ids
|
||||
self.assertEqual(tags, None)
|
||||
|
@ -853,6 +853,19 @@ CONSUMER_BARCODE_UPSCALE: Final[float] = __get_float(
|
||||
|
||||
CONSUMER_BARCODE_DPI: Final[int] = __get_int("PAPERLESS_CONSUMER_BARCODE_DPI", 300)
|
||||
|
||||
CONSUMER_ENABLE_TAG_BARCODE: Final[bool] = __get_boolean(
|
||||
"PAPERLESS_CONSUMER_ENABLE_TAG_BARCODE",
|
||||
)
|
||||
|
||||
CONSUMER_TAG_BARCODE_MAPPING = dict(
|
||||
json.loads(
|
||||
os.getenv(
|
||||
"PAPERLESS_CONSUMER_TAG_BARCODE_MAPPING",
|
||||
'{"TAG:(.*)": "\\\\g<1>"}',
|
||||
),
|
||||
),
|
||||
)
|
||||
|
||||
CONSUMER_ENABLE_COLLATE_DOUBLE_SIDED: Final[bool] = __get_boolean(
|
||||
"PAPERLESS_CONSUMER_ENABLE_COLLATE_DOUBLE_SIDED",
|
||||
)
|
||||
|
Loading…
x
Reference in New Issue
Block a user