mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-04-02 13:45:10 -05:00
Feature: Parse ASN from barcode
ASN-Barcodes are identified by a configurable prefix
This commit is contained in:
parent
585cc24dd5
commit
92b9fc1ba9
@ -293,3 +293,36 @@ def save_to_dir(
|
||||
os.rename(dst, dst_new)
|
||||
else:
|
||||
logger.warning(f"{str(filepath)} or {str(target_dir)} don't exist.")
|
||||
|
||||
|
||||
def scan_file_for_asn_barcode(filepath: str) -> Tuple[Optional[str], Optional[int]]:
|
||||
"""
|
||||
Scan the provided pdf file for barcodes that contain the ASN
|
||||
for this document.
|
||||
The first barcode that starts with CONSUMER_ASN_BARCODE_PREFIX
|
||||
is considered the ASN to be used.
|
||||
Returns a PDF filepath and the detected ASN (or None)
|
||||
"""
|
||||
asn = None
|
||||
|
||||
pdf_filepath, barcodes = scan_file_for_barcodes(filepath)
|
||||
# only the barcode text is important here -> discard the page number
|
||||
barcodes = [text for _, text in barcodes]
|
||||
# get the first barcode that starts with CONSUMER_ASN_BARCODE_PREFIX
|
||||
asn_text = next(
|
||||
(x for x in barcodes if x.startswith(settings.CONSUMER_ASN_BARCODE_PREFIX))
|
||||
)
|
||||
|
||||
logger.debug(f"Found ASN Barcode: {asn_text}")
|
||||
|
||||
if asn_text:
|
||||
# remove the prefix and remove whitespace
|
||||
asn_text = asn_text[len(settings.CONSUMER_ASN_BARCODE_PREFIX) :].strip()
|
||||
|
||||
# now, try parsing the ASN number
|
||||
try:
|
||||
asn = int(asn_text)
|
||||
except ValueError as e:
|
||||
logger.warn(f"Failed to parse ASN number because: {e}")
|
||||
|
||||
return pdf_filepath, asn
|
||||
|
@ -98,6 +98,7 @@ class Consumer(LoggingMixin):
|
||||
self.override_correspondent_id = None
|
||||
self.override_tag_ids = None
|
||||
self.override_document_type_id = None
|
||||
self.override_asn = None
|
||||
self.task_id = None
|
||||
|
||||
self.channel_layer = get_channel_layer()
|
||||
@ -130,6 +131,20 @@ class Consumer(LoggingMixin):
|
||||
os.makedirs(settings.ORIGINALS_DIR, exist_ok=True)
|
||||
os.makedirs(settings.ARCHIVE_DIR, exist_ok=True)
|
||||
|
||||
def pre_check_asn_unique(self):
|
||||
"""
|
||||
Check that if override_asn is given, it is unique
|
||||
"""
|
||||
if not self.override_asn:
|
||||
# check not necessary in case no ASN gets set
|
||||
return
|
||||
if Document.objects.filter(archive_serial_number=self.override_asn).exists():
|
||||
self.log(
|
||||
"warning",
|
||||
f"A document with ASN {self.override_asn} already exists. No ASN will be set!",
|
||||
)
|
||||
self.override_asn = None
|
||||
|
||||
def run_pre_consume_script(self):
|
||||
if not settings.PRE_CONSUME_SCRIPT:
|
||||
return
|
||||
@ -255,6 +270,7 @@ class Consumer(LoggingMixin):
|
||||
override_tag_ids=None,
|
||||
task_id=None,
|
||||
override_created=None,
|
||||
override_asn=None,
|
||||
) -> Document:
|
||||
"""
|
||||
Return the document object if it was successfully created.
|
||||
@ -268,6 +284,7 @@ class Consumer(LoggingMixin):
|
||||
self.override_tag_ids = override_tag_ids
|
||||
self.task_id = task_id or str(uuid.uuid4())
|
||||
self.override_created = override_created
|
||||
self.override_asn = override_asn
|
||||
|
||||
self._send_progress(0, 100, "STARTING", MESSAGE_NEW_FILE)
|
||||
|
||||
@ -281,6 +298,7 @@ class Consumer(LoggingMixin):
|
||||
self.pre_check_file_exists()
|
||||
self.pre_check_directories()
|
||||
self.pre_check_duplicate()
|
||||
self.pre_check_asn_unique()
|
||||
|
||||
self.log("info", f"Consuming {self.filename}")
|
||||
|
||||
@ -526,6 +544,9 @@ class Consumer(LoggingMixin):
|
||||
for tag_id in self.override_tag_ids:
|
||||
document.tags.add(Tag.objects.get(pk=tag_id))
|
||||
|
||||
if self.override_asn:
|
||||
document.archive_serial_number = self.override_asn
|
||||
|
||||
def _write(self, storage_type, source, target):
|
||||
with open(source, "rb") as read_file:
|
||||
with open(target, "wb") as write_file:
|
||||
|
@ -175,6 +175,13 @@ def consume_file(
|
||||
# the barcodes has been split and will be consumed separately
|
||||
return "File successfully split"
|
||||
|
||||
# try reading ASN barcodes
|
||||
asn = None
|
||||
if settings.CONSUMER_ENABLE_ASN_BARCODE:
|
||||
_, asn = barcodes.scan_file_for_asn_barcode(path)
|
||||
if asn:
|
||||
logger.info(f"Using ASN {asn} from barcode")
|
||||
|
||||
# continue with consumption if no barcode was found
|
||||
document = Consumer().try_consume_file(
|
||||
path,
|
||||
@ -185,6 +192,7 @@ def consume_file(
|
||||
override_tag_ids=override_tag_ids,
|
||||
task_id=task_id,
|
||||
override_created=override_created,
|
||||
override_asn=asn
|
||||
)
|
||||
|
||||
if document:
|
||||
|
@ -657,6 +657,16 @@ CONSUMER_BARCODE_STRING: Final[str] = os.getenv(
|
||||
"PATCHT",
|
||||
)
|
||||
|
||||
CONSUMER_ENABLE_ASN_BARCODE: Final[bool] = __get_boolean(
|
||||
"PAPERLESS_CONSUMER_ENABLE_ASN_BARCODE",
|
||||
)
|
||||
|
||||
CONSUMER_ASN_BARCODE_PREFIX: Final[str] = os.getenv(
|
||||
"PAPERLESS_CONSUMER_ASN_BARCODE_PREFIX",
|
||||
"ASN",
|
||||
)
|
||||
|
||||
|
||||
OCR_PAGES = int(os.getenv("PAPERLESS_OCR_PAGES", 0))
|
||||
|
||||
# The default language that tesseract will attempt to use when parsing
|
||||
|
Loading…
x
Reference in New Issue
Block a user