Feature: Parse ASN from barcode

ASN-Barcodes are identified by a configurable prefix
This commit is contained in:
Peter Kappelt 2023-01-15 15:55:00 +01:00 committed by Trenton H
parent 585cc24dd5
commit 92b9fc1ba9
4 changed files with 72 additions and 0 deletions

View File

@ -293,3 +293,36 @@ def save_to_dir(
os.rename(dst, dst_new)
else:
logger.warning(f"{str(filepath)} or {str(target_dir)} don't exist.")
def scan_file_for_asn_barcode(filepath: str) -> Tuple[Optional[str], Optional[int]]:
"""
Scan the provided pdf file for barcodes that contain the ASN
for this document.
The first barcode that starts with CONSUMER_ASN_BARCODE_PREFIX
is considered the ASN to be used.
Returns a PDF filepath and the detected ASN (or None)
"""
asn = None
pdf_filepath, barcodes = scan_file_for_barcodes(filepath)
# only the barcode text is important here -> discard the page number
barcodes = [text for _, text in barcodes]
# get the first barcode that starts with CONSUMER_ASN_BARCODE_PREFIX
asn_text = next(
(x for x in barcodes if x.startswith(settings.CONSUMER_ASN_BARCODE_PREFIX))
)
logger.debug(f"Found ASN Barcode: {asn_text}")
if asn_text:
# remove the prefix and remove whitespace
asn_text = asn_text[len(settings.CONSUMER_ASN_BARCODE_PREFIX) :].strip()
# now, try parsing the ASN number
try:
asn = int(asn_text)
except ValueError as e:
logger.warn(f"Failed to parse ASN number because: {e}")
return pdf_filepath, asn

View File

@ -98,6 +98,7 @@ class Consumer(LoggingMixin):
self.override_correspondent_id = None
self.override_tag_ids = None
self.override_document_type_id = None
self.override_asn = None
self.task_id = None
self.channel_layer = get_channel_layer()
@ -130,6 +131,20 @@ class Consumer(LoggingMixin):
os.makedirs(settings.ORIGINALS_DIR, exist_ok=True)
os.makedirs(settings.ARCHIVE_DIR, exist_ok=True)
def pre_check_asn_unique(self):
"""
Check that if override_asn is given, it is unique
"""
if not self.override_asn:
# check not necessary in case no ASN gets set
return
if Document.objects.filter(archive_serial_number=self.override_asn).exists():
self.log(
"warning",
f"A document with ASN {self.override_asn} already exists. No ASN will be set!",
)
self.override_asn = None
def run_pre_consume_script(self):
if not settings.PRE_CONSUME_SCRIPT:
return
@ -255,6 +270,7 @@ class Consumer(LoggingMixin):
override_tag_ids=None,
task_id=None,
override_created=None,
override_asn=None,
) -> Document:
"""
Return the document object if it was successfully created.
@ -268,6 +284,7 @@ class Consumer(LoggingMixin):
self.override_tag_ids = override_tag_ids
self.task_id = task_id or str(uuid.uuid4())
self.override_created = override_created
self.override_asn = override_asn
self._send_progress(0, 100, "STARTING", MESSAGE_NEW_FILE)
@ -281,6 +298,7 @@ class Consumer(LoggingMixin):
self.pre_check_file_exists()
self.pre_check_directories()
self.pre_check_duplicate()
self.pre_check_asn_unique()
self.log("info", f"Consuming {self.filename}")
@ -526,6 +544,9 @@ class Consumer(LoggingMixin):
for tag_id in self.override_tag_ids:
document.tags.add(Tag.objects.get(pk=tag_id))
if self.override_asn:
document.archive_serial_number = self.override_asn
def _write(self, storage_type, source, target):
with open(source, "rb") as read_file:
with open(target, "wb") as write_file:

View File

@ -175,6 +175,13 @@ def consume_file(
# the barcodes has been split and will be consumed separately
return "File successfully split"
# try reading ASN barcodes
asn = None
if settings.CONSUMER_ENABLE_ASN_BARCODE:
_, asn = barcodes.scan_file_for_asn_barcode(path)
if asn:
logger.info(f"Using ASN {asn} from barcode")
# continue with consumption if no barcode was found
document = Consumer().try_consume_file(
path,
@ -185,6 +192,7 @@ def consume_file(
override_tag_ids=override_tag_ids,
task_id=task_id,
override_created=override_created,
override_asn=asn
)
if document:

View File

@ -657,6 +657,16 @@ CONSUMER_BARCODE_STRING: Final[str] = os.getenv(
"PATCHT",
)
CONSUMER_ENABLE_ASN_BARCODE: Final[bool] = __get_boolean(
"PAPERLESS_CONSUMER_ENABLE_ASN_BARCODE",
)
CONSUMER_ASN_BARCODE_PREFIX: Final[str] = os.getenv(
"PAPERLESS_CONSUMER_ASN_BARCODE_PREFIX",
"ASN",
)
OCR_PAGES = int(os.getenv("PAPERLESS_OCR_PAGES", 0))
# The default language that tesseract will attempt to use when parsing