mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-04-02 13:45:10 -05:00
Feature: Allow tagging by putting barcode stickers on documents (#5580)
This commit is contained in:
parent
c7e0c32226
commit
fb82aa0ee1
@ -1173,6 +1173,55 @@ combination with PAPERLESS_CONSUMER_BARCODE_UPSCALE bigger than 1.0.
|
|||||||
|
|
||||||
Defaults to "300"
|
Defaults to "300"
|
||||||
|
|
||||||
|
#### [`PAPERLESS_CONSUMER_ENABLE_TAG_BARCODE=<bool>`](#PAPERLESS_CONSUMER_ENABLE_TAG_BARCODE) {#PAPERLESS_CONSUMER_ENABLE_TAG_BARCODE}
|
||||||
|
|
||||||
|
: Enables the detection of barcodes in the scanned document and
|
||||||
|
assigns or creates tags if a properly formatted barcode is detected.
|
||||||
|
|
||||||
|
The barcode must match one of the (configurable) regular expressions.
|
||||||
|
If the barcode text contains ',' (comma), it is split into multiple
|
||||||
|
barcodes which are individually processed for tagging.
|
||||||
|
|
||||||
|
Matching is case insensitive.
|
||||||
|
|
||||||
|
Defaults to false.
|
||||||
|
|
||||||
|
#### [`PAPERLESS_CONSUMER_TAG_BARCODE_MAPPING=<json dict>`](#PAPERLESS_CONSUMER_TAG_BARCODE_MAPPING) {#PAPERLESS_CONSUMER_TAG_BARCODE_MAPPING}
|
||||||
|
|
||||||
|
: Defines a dictionary of filter regex and substitute expressions.
|
||||||
|
|
||||||
|
Syntax: {"<regex>": "<substitute>" [,...]]}
|
||||||
|
|
||||||
|
A barcode is considered for tagging if the barcode text matches
|
||||||
|
at least one of the provided <regex> pattern.
|
||||||
|
|
||||||
|
If a match is found, the <substitute> rule is applied. This allows very
|
||||||
|
versatile reformatting and mapping of barcode pattern to tag values.
|
||||||
|
|
||||||
|
If a tag is not found it will be created.
|
||||||
|
|
||||||
|
Defaults to:
|
||||||
|
|
||||||
|
{"TAG:(.*)": "\\g<1>"} which defines
|
||||||
|
- a regex TAG:(.*) which includes barcodes beginning with TAG:
|
||||||
|
followed by any text that gets stored into match group #1 and
|
||||||
|
- a substitute \\g<1> that replaces the original barcode text
|
||||||
|
by the content in match group #1.
|
||||||
|
Consequently, the tag is the barcode text without its TAG: prefix.
|
||||||
|
|
||||||
|
More examples:
|
||||||
|
|
||||||
|
{"ASN12.*": "JOHN", "ASN13.*": "SMITH"} for example maps
|
||||||
|
- ASN12nnnn barcodes to the tag JOHN and
|
||||||
|
- ASN13nnnn barcodes to the tag SMITH.
|
||||||
|
|
||||||
|
{"T-J": "JOHN", "T-S": "SMITH", "T-D": "DOE"} directly maps
|
||||||
|
- T-J barcodes to the tag JOHN,
|
||||||
|
- T-S barcodes to the tag SMITH and
|
||||||
|
- T-D barcodes to the tag DOE.
|
||||||
|
|
||||||
|
Please refer to the Python regex documentation for more information.
|
||||||
|
|
||||||
## Audit Trail
|
## Audit Trail
|
||||||
|
|
||||||
#### [`PAPERLESS_AUDIT_LOG_ENABLED=<bool>`](#PAPERLESS_AUDIT_LOG_ENABLED) {#PAPERLESS_AUDIT_LOG_ENABLED}
|
#### [`PAPERLESS_AUDIT_LOG_ENABLED=<bool>`](#PAPERLESS_AUDIT_LOG_ENABLED) {#PAPERLESS_AUDIT_LOG_ENABLED}
|
||||||
|
@ -68,6 +68,8 @@
|
|||||||
#PAPERLESS_CONSUMER_BARCODE_STRING=PATCHT
|
#PAPERLESS_CONSUMER_BARCODE_STRING=PATCHT
|
||||||
#PAPERLESS_CONSUMER_BARCODE_UPSCALE=0.0
|
#PAPERLESS_CONSUMER_BARCODE_UPSCALE=0.0
|
||||||
#PAPERLESS_CONSUMER_BARCODE_DPI=300
|
#PAPERLESS_CONSUMER_BARCODE_DPI=300
|
||||||
|
#PAPERLESS_CONSUMER_ENABLE_TAG_BARCODE=false
|
||||||
|
#PAPERLESS_CONSUMER_TAG_BARCODE_MAPPING={"TAG:(.*)": "\\g<1>"}
|
||||||
#PAPERLESS_CONSUMER_ENABLE_COLLATE_DOUBLE_SIDED=false
|
#PAPERLESS_CONSUMER_ENABLE_COLLATE_DOUBLE_SIDED=false
|
||||||
#PAPERLESS_CONSUMER_COLLATE_DOUBLE_SIDED_SUBDIR_NAME=double-sided
|
#PAPERLESS_CONSUMER_COLLATE_DOUBLE_SIDED_SUBDIR_NAME=double-sided
|
||||||
#PAPERLESS_CONSUMER_COLLATE_DOUBLE_SIDED_TIFF_SUPPORT=false
|
#PAPERLESS_CONSUMER_COLLATE_DOUBLE_SIDED_TIFF_SUPPORT=false
|
||||||
|
@ -14,6 +14,7 @@ from PIL import Image
|
|||||||
|
|
||||||
from documents.converters import convert_from_tiff_to_pdf
|
from documents.converters import convert_from_tiff_to_pdf
|
||||||
from documents.data_models import ConsumableDocument
|
from documents.data_models import ConsumableDocument
|
||||||
|
from documents.models import Tag
|
||||||
from documents.plugins.base import ConsumeTaskPlugin
|
from documents.plugins.base import ConsumeTaskPlugin
|
||||||
from documents.plugins.base import StopConsumeTaskError
|
from documents.plugins.base import StopConsumeTaskError
|
||||||
from documents.plugins.helpers import ProgressStatusOptions
|
from documents.plugins.helpers import ProgressStatusOptions
|
||||||
@ -65,7 +66,9 @@ class BarcodePlugin(ConsumeTaskPlugin):
|
|||||||
supported_mimes = {"application/pdf"}
|
supported_mimes = {"application/pdf"}
|
||||||
|
|
||||||
return (
|
return (
|
||||||
settings.CONSUMER_ENABLE_ASN_BARCODE or settings.CONSUMER_ENABLE_BARCODES
|
settings.CONSUMER_ENABLE_ASN_BARCODE
|
||||||
|
or settings.CONSUMER_ENABLE_BARCODES
|
||||||
|
or settings.CONSUMER_ENABLE_TAG_BARCODE
|
||||||
) and self.input_doc.mime_type in supported_mimes
|
) and self.input_doc.mime_type in supported_mimes
|
||||||
|
|
||||||
def setup(self):
|
def setup(self):
|
||||||
@ -90,6 +93,16 @@ class BarcodePlugin(ConsumeTaskPlugin):
|
|||||||
logger.info(f"Found ASN in barcode: {located_asn}")
|
logger.info(f"Found ASN in barcode: {located_asn}")
|
||||||
self.metadata.asn = located_asn
|
self.metadata.asn = located_asn
|
||||||
|
|
||||||
|
# try reading tags from barcodes
|
||||||
|
if settings.CONSUMER_ENABLE_TAG_BARCODE:
|
||||||
|
tags = self.tags
|
||||||
|
if tags is not None and len(tags) > 0:
|
||||||
|
if self.metadata.tag_ids:
|
||||||
|
self.metadata.tag_ids += tags
|
||||||
|
else:
|
||||||
|
self.metadata.tag_ids = tags
|
||||||
|
logger.info(f"Found tags in barcode: {tags}")
|
||||||
|
|
||||||
separator_pages = self.get_separation_pages()
|
separator_pages = self.get_separation_pages()
|
||||||
if not separator_pages:
|
if not separator_pages:
|
||||||
return "No pages to split on!"
|
return "No pages to split on!"
|
||||||
@ -279,6 +292,53 @@ class BarcodePlugin(ConsumeTaskPlugin):
|
|||||||
|
|
||||||
return asn
|
return asn
|
||||||
|
|
||||||
|
@property
|
||||||
|
def tags(self) -> Optional[list[int]]:
|
||||||
|
"""
|
||||||
|
Search the parsed barcodes for any tags.
|
||||||
|
Returns the detected tag ids (or empty list)
|
||||||
|
"""
|
||||||
|
tags = []
|
||||||
|
|
||||||
|
# Ensure the barcodes have been read
|
||||||
|
self.detect()
|
||||||
|
|
||||||
|
for x in self.barcodes:
|
||||||
|
tag_texts = x.value
|
||||||
|
|
||||||
|
for raw in tag_texts.split(","):
|
||||||
|
try:
|
||||||
|
tag = None
|
||||||
|
for regex in settings.CONSUMER_TAG_BARCODE_MAPPING:
|
||||||
|
if re.match(regex, raw, flags=re.IGNORECASE):
|
||||||
|
sub = settings.CONSUMER_TAG_BARCODE_MAPPING[regex]
|
||||||
|
tag = (
|
||||||
|
re.sub(regex, sub, raw, flags=re.IGNORECASE)
|
||||||
|
if sub
|
||||||
|
else raw
|
||||||
|
)
|
||||||
|
break
|
||||||
|
|
||||||
|
if tag:
|
||||||
|
tag = Tag.objects.get_or_create(
|
||||||
|
name__iexact=tag,
|
||||||
|
defaults={"name": tag},
|
||||||
|
)[0]
|
||||||
|
|
||||||
|
logger.debug(
|
||||||
|
f"Found Tag Barcode '{raw}', substituted "
|
||||||
|
f"to '{tag}' and mapped to "
|
||||||
|
f"tag #{tag.pk}.",
|
||||||
|
)
|
||||||
|
tags.append(tag.pk)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(
|
||||||
|
f"Failed to find or create TAG '{raw}' because: {e}",
|
||||||
|
)
|
||||||
|
|
||||||
|
return tags
|
||||||
|
|
||||||
def get_separation_pages(self) -> dict[int, bool]:
|
def get_separation_pages(self) -> dict[int, bool]:
|
||||||
"""
|
"""
|
||||||
Search the parsed barcodes for separators and returns a dict of page
|
Search the parsed barcodes for separators and returns a dict of page
|
||||||
|
@ -14,6 +14,7 @@ from documents.barcodes import BarcodePlugin
|
|||||||
from documents.data_models import ConsumableDocument
|
from documents.data_models import ConsumableDocument
|
||||||
from documents.data_models import DocumentMetadataOverrides
|
from documents.data_models import DocumentMetadataOverrides
|
||||||
from documents.data_models import DocumentSource
|
from documents.data_models import DocumentSource
|
||||||
|
from documents.models import Tag
|
||||||
from documents.tests.utils import DirectoriesMixin
|
from documents.tests.utils import DirectoriesMixin
|
||||||
from documents.tests.utils import DocumentConsumeDelayMixin
|
from documents.tests.utils import DocumentConsumeDelayMixin
|
||||||
from documents.tests.utils import DummyProgressManager
|
from documents.tests.utils import DummyProgressManager
|
||||||
@ -741,3 +742,125 @@ class TestBarcodeZxing(TestBarcode):
|
|||||||
@override_settings(CONSUMER_BARCODE_SCANNER="ZXING")
|
@override_settings(CONSUMER_BARCODE_SCANNER="ZXING")
|
||||||
class TestAsnBarcodesZxing(TestAsnBarcode):
|
class TestAsnBarcodesZxing(TestAsnBarcode):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class TestTagBarcode(DirectoriesMixin, SampleDirMixin, GetReaderPluginMixin, TestCase):
|
||||||
|
@contextmanager
|
||||||
|
def get_reader(self, filepath: Path) -> BarcodePlugin:
|
||||||
|
reader = BarcodePlugin(
|
||||||
|
ConsumableDocument(DocumentSource.ConsumeFolder, original_file=filepath),
|
||||||
|
DocumentMetadataOverrides(),
|
||||||
|
DummyProgressManager(filepath.name, None),
|
||||||
|
self.dirs.scratch_dir,
|
||||||
|
"task-id",
|
||||||
|
)
|
||||||
|
reader.setup()
|
||||||
|
yield reader
|
||||||
|
reader.cleanup()
|
||||||
|
|
||||||
|
@override_settings(CONSUMER_ENABLE_TAG_BARCODE=True)
|
||||||
|
def test_scan_file_without_matching_barcodes(self):
|
||||||
|
"""
|
||||||
|
GIVEN:
|
||||||
|
- PDF containing tag barcodes but none with matching prefix (default "TAG:")
|
||||||
|
WHEN:
|
||||||
|
- File is scanned for barcodes
|
||||||
|
THEN:
|
||||||
|
- No TAG has been created
|
||||||
|
"""
|
||||||
|
test_file = self.BARCODE_SAMPLE_DIR / "barcode-39-asn-custom-prefix.pdf"
|
||||||
|
with self.get_reader(test_file) as reader:
|
||||||
|
reader.run()
|
||||||
|
tags = reader.metadata.tag_ids
|
||||||
|
self.assertEqual(tags, None)
|
||||||
|
|
||||||
|
@override_settings(
|
||||||
|
CONSUMER_ENABLE_TAG_BARCODE=False,
|
||||||
|
CONSUMER_TAG_BARCODE_MAPPING={"CUSTOM-PREFIX-(.*)": "\\g<1>"},
|
||||||
|
)
|
||||||
|
def test_scan_file_with_matching_barcode_but_function_disabled(self):
|
||||||
|
"""
|
||||||
|
GIVEN:
|
||||||
|
- PDF containing a tag barcode with matching custom prefix
|
||||||
|
- The tag barcode functionality is disabled
|
||||||
|
WHEN:
|
||||||
|
- File is scanned for barcodes
|
||||||
|
THEN:
|
||||||
|
- No TAG has been created
|
||||||
|
"""
|
||||||
|
test_file = self.BARCODE_SAMPLE_DIR / "barcode-39-asn-custom-prefix.pdf"
|
||||||
|
with self.get_reader(test_file) as reader:
|
||||||
|
reader.run()
|
||||||
|
tags = reader.metadata.tag_ids
|
||||||
|
self.assertEqual(tags, None)
|
||||||
|
|
||||||
|
@override_settings(
|
||||||
|
CONSUMER_ENABLE_TAG_BARCODE=True,
|
||||||
|
CONSUMER_TAG_BARCODE_MAPPING={"CUSTOM-PREFIX-(.*)": "\\g<1>"},
|
||||||
|
)
|
||||||
|
def test_scan_file_for_tag_custom_prefix(self):
|
||||||
|
"""
|
||||||
|
GIVEN:
|
||||||
|
- PDF containing a tag barcode with custom prefix
|
||||||
|
- The barcode mapping accepts this prefix and removes it from the mapped tag value
|
||||||
|
- The created tag is the non-prefixed values
|
||||||
|
WHEN:
|
||||||
|
- File is scanned for barcodes
|
||||||
|
THEN:
|
||||||
|
- The TAG is located
|
||||||
|
- One TAG has been created
|
||||||
|
"""
|
||||||
|
test_file = self.BARCODE_SAMPLE_DIR / "barcode-39-asn-custom-prefix.pdf"
|
||||||
|
with self.get_reader(test_file) as reader:
|
||||||
|
reader.metadata.tag_ids = [99]
|
||||||
|
reader.run()
|
||||||
|
self.assertEqual(reader.pdf_file, test_file)
|
||||||
|
tags = reader.metadata.tag_ids
|
||||||
|
self.assertEqual(len(tags), 2)
|
||||||
|
self.assertEqual(tags[0], 99)
|
||||||
|
self.assertEqual(Tag.objects.get(name__iexact="00123").pk, tags[1])
|
||||||
|
|
||||||
|
@override_settings(
|
||||||
|
CONSUMER_ENABLE_TAG_BARCODE=True,
|
||||||
|
CONSUMER_TAG_BARCODE_MAPPING={"ASN(.*)": "\\g<1>"},
|
||||||
|
)
|
||||||
|
def test_scan_file_for_many_custom_tags(self):
|
||||||
|
"""
|
||||||
|
GIVEN:
|
||||||
|
- PDF containing multiple tag barcode with custom prefix
|
||||||
|
- The barcode mapping accepts this prefix and removes it from the mapped tag value
|
||||||
|
- The created tags are the non-prefixed values
|
||||||
|
WHEN:
|
||||||
|
- File is scanned for barcodes
|
||||||
|
THEN:
|
||||||
|
- The TAG is located
|
||||||
|
- File Tags have been created
|
||||||
|
"""
|
||||||
|
test_file = self.BARCODE_SAMPLE_DIR / "split-by-asn-1.pdf"
|
||||||
|
with self.get_reader(test_file) as reader:
|
||||||
|
reader.run()
|
||||||
|
tags = reader.metadata.tag_ids
|
||||||
|
self.assertEqual(len(tags), 5)
|
||||||
|
self.assertEqual(Tag.objects.get(name__iexact="00123").pk, tags[0])
|
||||||
|
self.assertEqual(Tag.objects.get(name__iexact="00124").pk, tags[1])
|
||||||
|
self.assertEqual(Tag.objects.get(name__iexact="00125").pk, tags[2])
|
||||||
|
self.assertEqual(Tag.objects.get(name__iexact="00126").pk, tags[3])
|
||||||
|
self.assertEqual(Tag.objects.get(name__iexact="00127").pk, tags[4])
|
||||||
|
|
||||||
|
@override_settings(
|
||||||
|
CONSUMER_ENABLE_TAG_BARCODE=True,
|
||||||
|
CONSUMER_TAG_BARCODE_MAPPING={"CUSTOM-PREFIX-(.*)": "\\g<3>"},
|
||||||
|
)
|
||||||
|
def test_scan_file_for_tag_raises_value_error(self):
|
||||||
|
"""
|
||||||
|
GIVEN:
|
||||||
|
- Any error occurs during tag barcode processing
|
||||||
|
THEN:
|
||||||
|
- The processing should be skipped and not break the import
|
||||||
|
"""
|
||||||
|
test_file = self.BARCODE_SAMPLE_DIR / "barcode-39-asn-custom-prefix.pdf"
|
||||||
|
with self.get_reader(test_file) as reader:
|
||||||
|
reader.run()
|
||||||
|
# expect error to be caught and logged only
|
||||||
|
tags = reader.metadata.tag_ids
|
||||||
|
self.assertEqual(tags, None)
|
||||||
|
@ -853,6 +853,19 @@ CONSUMER_BARCODE_UPSCALE: Final[float] = __get_float(
|
|||||||
|
|
||||||
CONSUMER_BARCODE_DPI: Final[int] = __get_int("PAPERLESS_CONSUMER_BARCODE_DPI", 300)
|
CONSUMER_BARCODE_DPI: Final[int] = __get_int("PAPERLESS_CONSUMER_BARCODE_DPI", 300)
|
||||||
|
|
||||||
|
CONSUMER_ENABLE_TAG_BARCODE: Final[bool] = __get_boolean(
|
||||||
|
"PAPERLESS_CONSUMER_ENABLE_TAG_BARCODE",
|
||||||
|
)
|
||||||
|
|
||||||
|
CONSUMER_TAG_BARCODE_MAPPING = dict(
|
||||||
|
json.loads(
|
||||||
|
os.getenv(
|
||||||
|
"PAPERLESS_CONSUMER_TAG_BARCODE_MAPPING",
|
||||||
|
'{"TAG:(.*)": "\\\\g<1>"}',
|
||||||
|
),
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
CONSUMER_ENABLE_COLLATE_DOUBLE_SIDED: Final[bool] = __get_boolean(
|
CONSUMER_ENABLE_COLLATE_DOUBLE_SIDED: Final[bool] = __get_boolean(
|
||||||
"PAPERLESS_CONSUMER_ENABLE_COLLATE_DOUBLE_SIDED",
|
"PAPERLESS_CONSUMER_ENABLE_COLLATE_DOUBLE_SIDED",
|
||||||
)
|
)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user