diff --git a/src/documents/barcodes.py b/src/documents/barcodes.py index 43c48046a..638dfed6e 100644 --- a/src/documents/barcodes.py +++ b/src/documents/barcodes.py @@ -293,3 +293,36 @@ def save_to_dir( os.rename(dst, dst_new) else: logger.warning(f"{str(filepath)} or {str(target_dir)} don't exist.") + + +def scan_file_for_asn_barcode(filepath: str) -> Tuple[Optional[str], Optional[int]]: + """ + Scan the provided pdf file for barcodes that contain the ASN + for this document. + The first barcode that starts with CONSUMER_ASN_BARCODE_PREFIX + is considered the ASN to be used. + Returns a PDF filepath and the detected ASN (or None) + """ + asn = None + + pdf_filepath, barcodes = scan_file_for_barcodes(filepath) + # only the barcode text is important here -> discard the page number + barcodes = [text for _, text in barcodes] + # get the first barcode that starts with CONSUMER_ASN_BARCODE_PREFIX + asn_text = next( + (x for x in barcodes if x.startswith(settings.CONSUMER_ASN_BARCODE_PREFIX)) + ) + + logger.debug(f"Found ASN Barcode: {asn_text}") + + if asn_text: + # remove the prefix and remove whitespace + asn_text = asn_text[len(settings.CONSUMER_ASN_BARCODE_PREFIX) :].strip() + + # now, try parsing the ASN number + try: + asn = int(asn_text) + except ValueError as e: + logger.warn(f"Failed to parse ASN number because: {e}") + + return pdf_filepath, asn diff --git a/src/documents/consumer.py b/src/documents/consumer.py index b46b3a683..6f42a692c 100644 --- a/src/documents/consumer.py +++ b/src/documents/consumer.py @@ -98,6 +98,7 @@ class Consumer(LoggingMixin): self.override_correspondent_id = None self.override_tag_ids = None self.override_document_type_id = None + self.override_asn = None self.task_id = None self.channel_layer = get_channel_layer() @@ -130,6 +131,20 @@ class Consumer(LoggingMixin): os.makedirs(settings.ORIGINALS_DIR, exist_ok=True) os.makedirs(settings.ARCHIVE_DIR, exist_ok=True) + def pre_check_asn_unique(self): + """ + Check that if override_asn is given, it is unique + """ + if not self.override_asn: + # check not necessary in case no ASN gets set + return + if Document.objects.filter(archive_serial_number=self.override_asn).exists(): + self.log( + "warning", + f"A document with ASN {self.override_asn} already exists. No ASN will be set!", + ) + self.override_asn = None + def run_pre_consume_script(self): if not settings.PRE_CONSUME_SCRIPT: return @@ -255,6 +270,7 @@ class Consumer(LoggingMixin): override_tag_ids=None, task_id=None, override_created=None, + override_asn=None, ) -> Document: """ Return the document object if it was successfully created. @@ -268,6 +284,7 @@ class Consumer(LoggingMixin): self.override_tag_ids = override_tag_ids self.task_id = task_id or str(uuid.uuid4()) self.override_created = override_created + self.override_asn = override_asn self._send_progress(0, 100, "STARTING", MESSAGE_NEW_FILE) @@ -281,6 +298,7 @@ class Consumer(LoggingMixin): self.pre_check_file_exists() self.pre_check_directories() self.pre_check_duplicate() + self.pre_check_asn_unique() self.log("info", f"Consuming {self.filename}") @@ -526,6 +544,9 @@ class Consumer(LoggingMixin): for tag_id in self.override_tag_ids: document.tags.add(Tag.objects.get(pk=tag_id)) + if self.override_asn: + document.archive_serial_number = self.override_asn + def _write(self, storage_type, source, target): with open(source, "rb") as read_file: with open(target, "wb") as write_file: diff --git a/src/documents/tasks.py b/src/documents/tasks.py index 0168b42ba..1b7f15d5a 100644 --- a/src/documents/tasks.py +++ b/src/documents/tasks.py @@ -175,6 +175,13 @@ def consume_file( # the barcodes has been split and will be consumed separately return "File successfully split" + # try reading ASN barcodes + asn = None + if settings.CONSUMER_ENABLE_ASN_BARCODE: + _, asn = barcodes.scan_file_for_asn_barcode(path) + if asn: + logger.info(f"Using ASN {asn} from barcode") + # continue with consumption if no barcode was found document = Consumer().try_consume_file( path, @@ -185,6 +192,7 @@ def consume_file( override_tag_ids=override_tag_ids, task_id=task_id, override_created=override_created, + override_asn=asn ) if document: diff --git a/src/paperless/settings.py b/src/paperless/settings.py index c5bb4801c..cf119ea8a 100644 --- a/src/paperless/settings.py +++ b/src/paperless/settings.py @@ -657,6 +657,16 @@ CONSUMER_BARCODE_STRING: Final[str] = os.getenv( "PATCHT", ) +CONSUMER_ENABLE_ASN_BARCODE: Final[bool] = __get_boolean( + "PAPERLESS_CONSUMER_ENABLE_ASN_BARCODE", +) + +CONSUMER_ASN_BARCODE_PREFIX: Final[str] = os.getenv( + "PAPERLESS_CONSUMER_ASN_BARCODE_PREFIX", + "ASN", +) + + OCR_PAGES = int(os.getenv("PAPERLESS_OCR_PAGES", 0)) # The default language that tesseract will attempt to use when parsing