mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-04-02 13:45:10 -05:00
Feature: Parse ASN from barcode
ASN-Barcodes are identified by a configurable prefix
This commit is contained in:
parent
585cc24dd5
commit
92b9fc1ba9
@ -293,3 +293,36 @@ def save_to_dir(
|
|||||||
os.rename(dst, dst_new)
|
os.rename(dst, dst_new)
|
||||||
else:
|
else:
|
||||||
logger.warning(f"{str(filepath)} or {str(target_dir)} don't exist.")
|
logger.warning(f"{str(filepath)} or {str(target_dir)} don't exist.")
|
||||||
|
|
||||||
|
|
||||||
|
def scan_file_for_asn_barcode(filepath: str) -> Tuple[Optional[str], Optional[int]]:
|
||||||
|
"""
|
||||||
|
Scan the provided pdf file for barcodes that contain the ASN
|
||||||
|
for this document.
|
||||||
|
The first barcode that starts with CONSUMER_ASN_BARCODE_PREFIX
|
||||||
|
is considered the ASN to be used.
|
||||||
|
Returns a PDF filepath and the detected ASN (or None)
|
||||||
|
"""
|
||||||
|
asn = None
|
||||||
|
|
||||||
|
pdf_filepath, barcodes = scan_file_for_barcodes(filepath)
|
||||||
|
# only the barcode text is important here -> discard the page number
|
||||||
|
barcodes = [text for _, text in barcodes]
|
||||||
|
# get the first barcode that starts with CONSUMER_ASN_BARCODE_PREFIX
|
||||||
|
asn_text = next(
|
||||||
|
(x for x in barcodes if x.startswith(settings.CONSUMER_ASN_BARCODE_PREFIX))
|
||||||
|
)
|
||||||
|
|
||||||
|
logger.debug(f"Found ASN Barcode: {asn_text}")
|
||||||
|
|
||||||
|
if asn_text:
|
||||||
|
# remove the prefix and remove whitespace
|
||||||
|
asn_text = asn_text[len(settings.CONSUMER_ASN_BARCODE_PREFIX) :].strip()
|
||||||
|
|
||||||
|
# now, try parsing the ASN number
|
||||||
|
try:
|
||||||
|
asn = int(asn_text)
|
||||||
|
except ValueError as e:
|
||||||
|
logger.warn(f"Failed to parse ASN number because: {e}")
|
||||||
|
|
||||||
|
return pdf_filepath, asn
|
||||||
|
@ -98,6 +98,7 @@ class Consumer(LoggingMixin):
|
|||||||
self.override_correspondent_id = None
|
self.override_correspondent_id = None
|
||||||
self.override_tag_ids = None
|
self.override_tag_ids = None
|
||||||
self.override_document_type_id = None
|
self.override_document_type_id = None
|
||||||
|
self.override_asn = None
|
||||||
self.task_id = None
|
self.task_id = None
|
||||||
|
|
||||||
self.channel_layer = get_channel_layer()
|
self.channel_layer = get_channel_layer()
|
||||||
@ -130,6 +131,20 @@ class Consumer(LoggingMixin):
|
|||||||
os.makedirs(settings.ORIGINALS_DIR, exist_ok=True)
|
os.makedirs(settings.ORIGINALS_DIR, exist_ok=True)
|
||||||
os.makedirs(settings.ARCHIVE_DIR, exist_ok=True)
|
os.makedirs(settings.ARCHIVE_DIR, exist_ok=True)
|
||||||
|
|
||||||
|
def pre_check_asn_unique(self):
|
||||||
|
"""
|
||||||
|
Check that if override_asn is given, it is unique
|
||||||
|
"""
|
||||||
|
if not self.override_asn:
|
||||||
|
# check not necessary in case no ASN gets set
|
||||||
|
return
|
||||||
|
if Document.objects.filter(archive_serial_number=self.override_asn).exists():
|
||||||
|
self.log(
|
||||||
|
"warning",
|
||||||
|
f"A document with ASN {self.override_asn} already exists. No ASN will be set!",
|
||||||
|
)
|
||||||
|
self.override_asn = None
|
||||||
|
|
||||||
def run_pre_consume_script(self):
|
def run_pre_consume_script(self):
|
||||||
if not settings.PRE_CONSUME_SCRIPT:
|
if not settings.PRE_CONSUME_SCRIPT:
|
||||||
return
|
return
|
||||||
@ -255,6 +270,7 @@ class Consumer(LoggingMixin):
|
|||||||
override_tag_ids=None,
|
override_tag_ids=None,
|
||||||
task_id=None,
|
task_id=None,
|
||||||
override_created=None,
|
override_created=None,
|
||||||
|
override_asn=None,
|
||||||
) -> Document:
|
) -> Document:
|
||||||
"""
|
"""
|
||||||
Return the document object if it was successfully created.
|
Return the document object if it was successfully created.
|
||||||
@ -268,6 +284,7 @@ class Consumer(LoggingMixin):
|
|||||||
self.override_tag_ids = override_tag_ids
|
self.override_tag_ids = override_tag_ids
|
||||||
self.task_id = task_id or str(uuid.uuid4())
|
self.task_id = task_id or str(uuid.uuid4())
|
||||||
self.override_created = override_created
|
self.override_created = override_created
|
||||||
|
self.override_asn = override_asn
|
||||||
|
|
||||||
self._send_progress(0, 100, "STARTING", MESSAGE_NEW_FILE)
|
self._send_progress(0, 100, "STARTING", MESSAGE_NEW_FILE)
|
||||||
|
|
||||||
@ -281,6 +298,7 @@ class Consumer(LoggingMixin):
|
|||||||
self.pre_check_file_exists()
|
self.pre_check_file_exists()
|
||||||
self.pre_check_directories()
|
self.pre_check_directories()
|
||||||
self.pre_check_duplicate()
|
self.pre_check_duplicate()
|
||||||
|
self.pre_check_asn_unique()
|
||||||
|
|
||||||
self.log("info", f"Consuming {self.filename}")
|
self.log("info", f"Consuming {self.filename}")
|
||||||
|
|
||||||
@ -526,6 +544,9 @@ class Consumer(LoggingMixin):
|
|||||||
for tag_id in self.override_tag_ids:
|
for tag_id in self.override_tag_ids:
|
||||||
document.tags.add(Tag.objects.get(pk=tag_id))
|
document.tags.add(Tag.objects.get(pk=tag_id))
|
||||||
|
|
||||||
|
if self.override_asn:
|
||||||
|
document.archive_serial_number = self.override_asn
|
||||||
|
|
||||||
def _write(self, storage_type, source, target):
|
def _write(self, storage_type, source, target):
|
||||||
with open(source, "rb") as read_file:
|
with open(source, "rb") as read_file:
|
||||||
with open(target, "wb") as write_file:
|
with open(target, "wb") as write_file:
|
||||||
|
@ -175,6 +175,13 @@ def consume_file(
|
|||||||
# the barcodes has been split and will be consumed separately
|
# the barcodes has been split and will be consumed separately
|
||||||
return "File successfully split"
|
return "File successfully split"
|
||||||
|
|
||||||
|
# try reading ASN barcodes
|
||||||
|
asn = None
|
||||||
|
if settings.CONSUMER_ENABLE_ASN_BARCODE:
|
||||||
|
_, asn = barcodes.scan_file_for_asn_barcode(path)
|
||||||
|
if asn:
|
||||||
|
logger.info(f"Using ASN {asn} from barcode")
|
||||||
|
|
||||||
# continue with consumption if no barcode was found
|
# continue with consumption if no barcode was found
|
||||||
document = Consumer().try_consume_file(
|
document = Consumer().try_consume_file(
|
||||||
path,
|
path,
|
||||||
@ -185,6 +192,7 @@ def consume_file(
|
|||||||
override_tag_ids=override_tag_ids,
|
override_tag_ids=override_tag_ids,
|
||||||
task_id=task_id,
|
task_id=task_id,
|
||||||
override_created=override_created,
|
override_created=override_created,
|
||||||
|
override_asn=asn
|
||||||
)
|
)
|
||||||
|
|
||||||
if document:
|
if document:
|
||||||
|
@ -657,6 +657,16 @@ CONSUMER_BARCODE_STRING: Final[str] = os.getenv(
|
|||||||
"PATCHT",
|
"PATCHT",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
CONSUMER_ENABLE_ASN_BARCODE: Final[bool] = __get_boolean(
|
||||||
|
"PAPERLESS_CONSUMER_ENABLE_ASN_BARCODE",
|
||||||
|
)
|
||||||
|
|
||||||
|
CONSUMER_ASN_BARCODE_PREFIX: Final[str] = os.getenv(
|
||||||
|
"PAPERLESS_CONSUMER_ASN_BARCODE_PREFIX",
|
||||||
|
"ASN",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
OCR_PAGES = int(os.getenv("PAPERLESS_OCR_PAGES", 0))
|
OCR_PAGES = int(os.getenv("PAPERLESS_OCR_PAGES", 0))
|
||||||
|
|
||||||
# The default language that tesseract will attempt to use when parsing
|
# The default language that tesseract will attempt to use when parsing
|
||||||
|
Loading…
x
Reference in New Issue
Block a user