mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-07-28 18:24:38 -05:00
Merge branch 'dev' into feature-permissions
This commit is contained in:
@@ -2,10 +2,12 @@ import logging
|
||||
import os
|
||||
import shutil
|
||||
import tempfile
|
||||
from dataclasses import dataclass
|
||||
from functools import lru_cache
|
||||
from math import ceil
|
||||
from pathlib import Path
|
||||
from typing import List
|
||||
from typing import Optional
|
||||
from typing import Tuple
|
||||
|
||||
import magic
|
||||
from django.conf import settings
|
||||
@@ -25,6 +27,42 @@ class BarcodeImageFormatError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class Barcode:
|
||||
"""
|
||||
Holds the information about a single barcode and its location
|
||||
"""
|
||||
|
||||
page: int
|
||||
value: str
|
||||
|
||||
@property
|
||||
def is_separator(self) -> bool:
|
||||
"""
|
||||
Returns True if the barcode value equals the configured separation value,
|
||||
False otherwise
|
||||
"""
|
||||
return self.value == settings.CONSUMER_BARCODE_STRING
|
||||
|
||||
@property
|
||||
def is_asn(self) -> bool:
|
||||
"""
|
||||
Returns True if the barcode value matches the configured ASN prefix,
|
||||
False otherwise
|
||||
"""
|
||||
return self.value.startswith(settings.CONSUMER_ASN_BARCODE_PREFIX)
|
||||
|
||||
|
||||
@dataclass
|
||||
class DocumentBarcodeInfo:
|
||||
"""
|
||||
Describes a single document's barcode status
|
||||
"""
|
||||
|
||||
pdf_path: Path
|
||||
barcodes: List[Barcode]
|
||||
|
||||
|
||||
@lru_cache(maxsize=8)
|
||||
def supported_file_type(mime_type) -> bool:
|
||||
"""
|
||||
@@ -107,14 +145,17 @@ def convert_from_tiff_to_pdf(filepath: str) -> str:
|
||||
return newpath
|
||||
|
||||
|
||||
def scan_file_for_separating_barcodes(filepath: str) -> Tuple[Optional[str], List[int]]:
|
||||
def scan_file_for_barcodes(
|
||||
filepath: str,
|
||||
) -> DocumentBarcodeInfo:
|
||||
"""
|
||||
Scan the provided pdf file for page separating barcodes
|
||||
Returns a PDF filepath and a list of pagenumbers,
|
||||
which separate the file into new files
|
||||
Scan the provided pdf file for any barcodes
|
||||
Returns a PDF filepath and a list of
|
||||
(page_number, barcode_text) tuples
|
||||
"""
|
||||
|
||||
def _pikepdf_barcode_scan(pdf_filepath: str):
|
||||
def _pikepdf_barcode_scan(pdf_filepath: str) -> List[Barcode]:
|
||||
detected_barcodes = []
|
||||
with Pdf.open(pdf_filepath) as pdf:
|
||||
for page_num, page in enumerate(pdf.pages):
|
||||
for image_key in page.images:
|
||||
@@ -132,24 +173,43 @@ def scan_file_for_separating_barcodes(filepath: str) -> Tuple[Optional[str], Lis
|
||||
# raise an exception, triggering fallback
|
||||
pillow_img = pdfimage.as_pil_image()
|
||||
|
||||
detected_barcodes = barcode_reader(pillow_img)
|
||||
# Scale the image down
|
||||
# See: https://github.com/paperless-ngx/paperless-ngx/issues/2385
|
||||
# TLDR: zbar has issues with larger images
|
||||
width, height = pillow_img.size
|
||||
if width > 1024:
|
||||
scaler = ceil(width / 1024)
|
||||
new_width = int(width / scaler)
|
||||
new_height = int(height / scaler)
|
||||
pillow_img = pillow_img.resize((new_width, new_height))
|
||||
|
||||
if settings.CONSUMER_BARCODE_STRING in detected_barcodes:
|
||||
separator_page_numbers.append(page_num)
|
||||
width, height = pillow_img.size
|
||||
if height > 2048:
|
||||
scaler = ceil(height / 2048)
|
||||
new_width = int(width / scaler)
|
||||
new_height = int(height / scaler)
|
||||
pillow_img = pillow_img.resize((new_width, new_height))
|
||||
|
||||
def _pdf2image_barcode_scan(pdf_filepath: str):
|
||||
for barcode_value in barcode_reader(pillow_img):
|
||||
detected_barcodes.append(Barcode(page_num, barcode_value))
|
||||
|
||||
return detected_barcodes
|
||||
|
||||
def _pdf2image_barcode_scan(pdf_filepath: str) -> List[Barcode]:
|
||||
detected_barcodes = []
|
||||
# use a temporary directory in case the file is too big to handle in memory
|
||||
with tempfile.TemporaryDirectory() as path:
|
||||
pages_from_path = convert_from_path(pdf_filepath, output_folder=path)
|
||||
for current_page_number, page in enumerate(pages_from_path):
|
||||
current_barcodes = barcode_reader(page)
|
||||
if settings.CONSUMER_BARCODE_STRING in current_barcodes:
|
||||
separator_page_numbers.append(current_page_number)
|
||||
for barcode_value in barcode_reader(page):
|
||||
detected_barcodes.append(
|
||||
Barcode(current_page_number, barcode_value),
|
||||
)
|
||||
return detected_barcodes
|
||||
|
||||
separator_page_numbers = []
|
||||
pdf_filepath = None
|
||||
|
||||
mime_type = get_file_mime_type(filepath)
|
||||
barcodes = []
|
||||
|
||||
if supported_file_type(mime_type):
|
||||
pdf_filepath = filepath
|
||||
@@ -159,7 +219,7 @@ def scan_file_for_separating_barcodes(filepath: str) -> Tuple[Optional[str], Lis
|
||||
# Always try pikepdf first, it's usually fine, faster and
|
||||
# uses less memory
|
||||
try:
|
||||
_pikepdf_barcode_scan(pdf_filepath)
|
||||
barcodes = _pikepdf_barcode_scan(pdf_filepath)
|
||||
# Password protected files can't be checked
|
||||
except PasswordError as e:
|
||||
logger.warning(
|
||||
@@ -172,9 +232,7 @@ def scan_file_for_separating_barcodes(filepath: str) -> Tuple[Optional[str], Lis
|
||||
f"Falling back to pdf2image because: {e}",
|
||||
)
|
||||
try:
|
||||
# Clear the list in case some processing worked
|
||||
separator_page_numbers = []
|
||||
_pdf2image_barcode_scan(pdf_filepath)
|
||||
barcodes = _pdf2image_barcode_scan(pdf_filepath)
|
||||
# This file is really borked, allow the consumption to continue
|
||||
# but it may fail further on
|
||||
except Exception as e: # pragma: no cover
|
||||
@@ -186,7 +244,49 @@ def scan_file_for_separating_barcodes(filepath: str) -> Tuple[Optional[str], Lis
|
||||
logger.warning(
|
||||
f"Unsupported file format for barcode reader: {str(mime_type)}",
|
||||
)
|
||||
return pdf_filepath, separator_page_numbers
|
||||
|
||||
return DocumentBarcodeInfo(pdf_filepath, barcodes)
|
||||
|
||||
|
||||
def get_separating_barcodes(barcodes: List[Barcode]) -> List[int]:
|
||||
"""
|
||||
Search the parsed barcodes for separators
|
||||
and returns a list of page numbers, which
|
||||
separate the file into new files.
|
||||
"""
|
||||
# filter all barcodes for the separator string
|
||||
# get the page numbers of the separating barcodes
|
||||
|
||||
return list({bc.page for bc in barcodes if bc.is_separator})
|
||||
|
||||
|
||||
def get_asn_from_barcodes(barcodes: List[Barcode]) -> Optional[int]:
|
||||
"""
|
||||
Search the parsed barcodes for any ASNs.
|
||||
The first barcode that starts with CONSUMER_ASN_BARCODE_PREFIX
|
||||
is considered the ASN to be used.
|
||||
Returns the detected ASN (or None)
|
||||
"""
|
||||
asn = None
|
||||
|
||||
# get the first barcode that starts with CONSUMER_ASN_BARCODE_PREFIX
|
||||
asn_text = next(
|
||||
(x.value for x in barcodes if x.is_asn),
|
||||
None,
|
||||
)
|
||||
|
||||
if asn_text:
|
||||
logger.debug(f"Found ASN Barcode: {asn_text}")
|
||||
# remove the prefix and remove whitespace
|
||||
asn_text = asn_text[len(settings.CONSUMER_ASN_BARCODE_PREFIX) :].strip()
|
||||
|
||||
# now, try parsing the ASN number
|
||||
try:
|
||||
asn = int(asn_text)
|
||||
except ValueError as e:
|
||||
logger.warning(f"Failed to parse ASN number because: {e}")
|
||||
|
||||
return asn
|
||||
|
||||
|
||||
def separate_pages(filepath: str, pages_to_split_on: List[int]) -> List[str]:
|
||||
|
@@ -40,6 +40,8 @@ class ConsumerError(Exception):
|
||||
|
||||
|
||||
MESSAGE_DOCUMENT_ALREADY_EXISTS = "document_already_exists"
|
||||
MESSAGE_ASN_ALREADY_EXISTS = "asn_already_exists"
|
||||
MESSAGE_ASN_RANGE = "asn_value_out_of_range"
|
||||
MESSAGE_FILE_NOT_FOUND = "file_not_found"
|
||||
MESSAGE_PRE_CONSUME_SCRIPT_NOT_FOUND = "pre_consume_script_not_found"
|
||||
MESSAGE_PRE_CONSUME_SCRIPT_ERROR = "pre_consume_script_error"
|
||||
@@ -99,6 +101,7 @@ class Consumer(LoggingMixin):
|
||||
self.override_correspondent_id = None
|
||||
self.override_tag_ids = None
|
||||
self.override_document_type_id = None
|
||||
self.override_asn = None
|
||||
self.task_id = None
|
||||
self.owner_id = None
|
||||
|
||||
@@ -132,6 +135,27 @@ class Consumer(LoggingMixin):
|
||||
os.makedirs(settings.ORIGINALS_DIR, exist_ok=True)
|
||||
os.makedirs(settings.ARCHIVE_DIR, exist_ok=True)
|
||||
|
||||
def pre_check_asn_value(self):
|
||||
"""
|
||||
Check that if override_asn is given, it is unique and within a valid range
|
||||
"""
|
||||
if not self.override_asn:
|
||||
# check not necessary in case no ASN gets set
|
||||
return
|
||||
# Validate the range is above zero and less than uint32_t max
|
||||
# otherwise, Whoosh can't handle it in the index
|
||||
if self.override_asn < 0 or self.override_asn > 0xFF_FF_FF_FF:
|
||||
self._fail(
|
||||
MESSAGE_ASN_RANGE,
|
||||
f"Not consuming {self.filename}: "
|
||||
f"Given ASN {self.override_asn} is out of range [0, 4,294,967,295]",
|
||||
)
|
||||
if Document.objects.filter(archive_serial_number=self.override_asn).exists():
|
||||
self._fail(
|
||||
MESSAGE_ASN_ALREADY_EXISTS,
|
||||
f"Not consuming {self.filename}: Given ASN already exists!",
|
||||
)
|
||||
|
||||
def run_pre_consume_script(self):
|
||||
if not settings.PRE_CONSUME_SCRIPT:
|
||||
return
|
||||
@@ -257,6 +281,7 @@ class Consumer(LoggingMixin):
|
||||
override_tag_ids=None,
|
||||
task_id=None,
|
||||
override_created=None,
|
||||
override_asn=None,
|
||||
override_owner_id=None,
|
||||
) -> Document:
|
||||
"""
|
||||
@@ -271,6 +296,7 @@ class Consumer(LoggingMixin):
|
||||
self.override_tag_ids = override_tag_ids
|
||||
self.task_id = task_id or str(uuid.uuid4())
|
||||
self.override_created = override_created
|
||||
self.override_asn = override_asn
|
||||
self.override_owner_id = override_owner_id
|
||||
|
||||
self._send_progress(0, 100, "STARTING", MESSAGE_NEW_FILE)
|
||||
@@ -285,6 +311,7 @@ class Consumer(LoggingMixin):
|
||||
self.pre_check_file_exists()
|
||||
self.pre_check_directories()
|
||||
self.pre_check_duplicate()
|
||||
self.pre_check_asn_value()
|
||||
|
||||
self.log("info", f"Consuming {self.filename}")
|
||||
|
||||
@@ -530,6 +557,9 @@ class Consumer(LoggingMixin):
|
||||
for tag_id in self.override_tag_ids:
|
||||
document.tags.add(Tag.objects.get(pk=tag_id))
|
||||
|
||||
if self.override_asn:
|
||||
document.archive_serial_number = self.override_asn
|
||||
|
||||
if self.override_owner_id:
|
||||
document.owner = User.objects.get(
|
||||
pk=self.override_owner_id,
|
||||
|
@@ -35,7 +35,7 @@ def get_schema():
|
||||
id=NUMERIC(stored=True, unique=True),
|
||||
title=TEXT(sortable=True),
|
||||
content=TEXT(),
|
||||
asn=NUMERIC(sortable=True),
|
||||
asn=NUMERIC(sortable=True, signed=False),
|
||||
correspondent=TEXT(sortable=True),
|
||||
correspondent_id=NUMERIC(),
|
||||
has_correspondent=BOOLEAN(),
|
||||
|
@@ -63,15 +63,6 @@ class Command(BaseCommand):
|
||||
"modified is used instead.",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"-f",
|
||||
"--use-filename-format",
|
||||
default=False,
|
||||
action="store_true",
|
||||
help="Use PAPERLESS_FILENAME_FORMAT for storing files in the "
|
||||
"export directory, if configured.",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"-d",
|
||||
"--delete",
|
||||
@@ -83,10 +74,45 @@ class Command(BaseCommand):
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--no-progress-bar",
|
||||
"-f",
|
||||
"--use-filename-format",
|
||||
default=False,
|
||||
action="store_true",
|
||||
help="If set, the progress bar will not be shown",
|
||||
help="Use PAPERLESS_FILENAME_FORMAT for storing files in the "
|
||||
"export directory, if configured.",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"-na",
|
||||
"--no-archive",
|
||||
default=False,
|
||||
action="store_true",
|
||||
help="Avoid exporting archive files",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"-nt",
|
||||
"--no-thumbnail",
|
||||
default=False,
|
||||
action="store_true",
|
||||
help="Avoid exporting thumbnail files",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"-p",
|
||||
"--use-folder-prefix",
|
||||
default=False,
|
||||
action="store_true",
|
||||
help="Export files in dedicated folders according to their nature: "
|
||||
"archive, originals or thumbnails",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"-sm",
|
||||
"--split-manifest",
|
||||
default=False,
|
||||
action="store_true",
|
||||
help="Export document information in individual manifest json files.",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
@@ -97,21 +123,36 @@ class Command(BaseCommand):
|
||||
help="Export the documents to a zip file in the given directory",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--no-progress-bar",
|
||||
default=False,
|
||||
action="store_true",
|
||||
help="If set, the progress bar will not be shown",
|
||||
)
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
BaseCommand.__init__(self, *args, **kwargs)
|
||||
self.target: Path = None
|
||||
self.split_manifest = False
|
||||
self.files_in_export_dir: Set[Path] = set()
|
||||
self.exported_files: List[Path] = []
|
||||
self.compare_checksums = False
|
||||
self.use_filename_format = False
|
||||
self.use_folder_prefix = False
|
||||
self.delete = False
|
||||
self.no_archive = False
|
||||
self.no_thumbnail = False
|
||||
|
||||
def handle(self, *args, **options):
|
||||
|
||||
self.target = Path(options["target"]).resolve()
|
||||
self.split_manifest = options["split_manifest"]
|
||||
self.compare_checksums = options["compare_checksums"]
|
||||
self.use_filename_format = options["use_filename_format"]
|
||||
self.use_folder_prefix = options["use_folder_prefix"]
|
||||
self.delete = options["delete"]
|
||||
self.no_archive = options["no_archive"]
|
||||
self.no_thumbnail = options["no_thumbnail"]
|
||||
zip_export: bool = options["zip"]
|
||||
|
||||
# If zipping, save the original target for later and
|
||||
@@ -179,14 +220,17 @@ class Command(BaseCommand):
|
||||
serializers.serialize("json", StoragePath.objects.all()),
|
||||
)
|
||||
|
||||
manifest += json.loads(
|
||||
comments = json.loads(
|
||||
serializers.serialize("json", Comment.objects.all()),
|
||||
)
|
||||
if not self.split_manifest:
|
||||
manifest += comments
|
||||
|
||||
documents = Document.objects.order_by("id")
|
||||
document_map = {d.pk: d for d in documents}
|
||||
document_manifest = json.loads(serializers.serialize("json", documents))
|
||||
manifest += document_manifest
|
||||
if not self.split_manifest:
|
||||
manifest += document_manifest
|
||||
|
||||
manifest += json.loads(
|
||||
serializers.serialize("json", MailAccount.objects.all()),
|
||||
@@ -243,15 +287,24 @@ class Command(BaseCommand):
|
||||
|
||||
# 3.3. write filenames into manifest
|
||||
original_name = base_name
|
||||
if self.use_folder_prefix:
|
||||
original_name = os.path.join("originals", original_name)
|
||||
original_target = (self.target / Path(original_name)).resolve()
|
||||
document_dict[EXPORTER_FILE_NAME] = original_name
|
||||
|
||||
thumbnail_name = base_name + "-thumbnail.webp"
|
||||
thumbnail_target = (self.target / Path(thumbnail_name)).resolve()
|
||||
document_dict[EXPORTER_THUMBNAIL_NAME] = thumbnail_name
|
||||
if not self.no_thumbnail:
|
||||
thumbnail_name = base_name + "-thumbnail.webp"
|
||||
if self.use_folder_prefix:
|
||||
thumbnail_name = os.path.join("thumbnails", thumbnail_name)
|
||||
thumbnail_target = (self.target / Path(thumbnail_name)).resolve()
|
||||
document_dict[EXPORTER_THUMBNAIL_NAME] = thumbnail_name
|
||||
else:
|
||||
thumbnail_target = None
|
||||
|
||||
if document.has_archive_version:
|
||||
if not self.no_archive and document.has_archive_version:
|
||||
archive_name = base_name + "-archive.pdf"
|
||||
if self.use_folder_prefix:
|
||||
archive_name = os.path.join("archive", archive_name)
|
||||
archive_target = (self.target / Path(archive_name)).resolve()
|
||||
document_dict[EXPORTER_ARCHIVE_NAME] = archive_name
|
||||
else:
|
||||
@@ -266,10 +319,11 @@ class Command(BaseCommand):
|
||||
original_target.write_bytes(GnuPG.decrypted(out_file))
|
||||
os.utime(original_target, times=(t, t))
|
||||
|
||||
thumbnail_target.parent.mkdir(parents=True, exist_ok=True)
|
||||
with document.thumbnail_file as out_file:
|
||||
thumbnail_target.write_bytes(GnuPG.decrypted(out_file))
|
||||
os.utime(thumbnail_target, times=(t, t))
|
||||
if thumbnail_target:
|
||||
thumbnail_target.parent.mkdir(parents=True, exist_ok=True)
|
||||
with document.thumbnail_file as out_file:
|
||||
thumbnail_target.write_bytes(GnuPG.decrypted(out_file))
|
||||
os.utime(thumbnail_target, times=(t, t))
|
||||
|
||||
if archive_target:
|
||||
archive_target.parent.mkdir(parents=True, exist_ok=True)
|
||||
@@ -283,7 +337,8 @@ class Command(BaseCommand):
|
||||
original_target,
|
||||
)
|
||||
|
||||
self.check_and_copy(document.thumbnail_path, None, thumbnail_target)
|
||||
if thumbnail_target:
|
||||
self.check_and_copy(document.thumbnail_path, None, thumbnail_target)
|
||||
|
||||
if archive_target:
|
||||
self.check_and_copy(
|
||||
@@ -292,22 +347,40 @@ class Command(BaseCommand):
|
||||
archive_target,
|
||||
)
|
||||
|
||||
if self.split_manifest:
|
||||
manifest_name = base_name + "-manifest.json"
|
||||
if self.use_folder_prefix:
|
||||
manifest_name = os.path.join("json", manifest_name)
|
||||
manifest_name = (self.target / Path(manifest_name)).resolve()
|
||||
manifest_name.parent.mkdir(parents=True, exist_ok=True)
|
||||
content = [document_manifest[index]]
|
||||
content += list(
|
||||
filter(
|
||||
lambda d: d["fields"]["document"] == document_dict["pk"],
|
||||
comments,
|
||||
),
|
||||
)
|
||||
manifest_name.write_text(json.dumps(content, indent=2))
|
||||
if manifest_name in self.files_in_export_dir:
|
||||
self.files_in_export_dir.remove(manifest_name)
|
||||
|
||||
# 4.1 write manifest to target folder
|
||||
manifest_path = (self.target / Path("manifest.json")).resolve()
|
||||
manifest_path.write_text(json.dumps(manifest, indent=2))
|
||||
if manifest_path in self.files_in_export_dir:
|
||||
self.files_in_export_dir.remove(manifest_path)
|
||||
|
||||
# 4.2 write version information to target folder
|
||||
version_path = (self.target / Path("version.json")).resolve()
|
||||
version_path.write_text(
|
||||
json.dumps({"version": version.__full_version_str__}, indent=2),
|
||||
)
|
||||
if version_path in self.files_in_export_dir:
|
||||
self.files_in_export_dir.remove(version_path)
|
||||
|
||||
if self.delete:
|
||||
# 5. Remove files which we did not explicitly export in this run
|
||||
|
||||
if manifest_path in self.files_in_export_dir:
|
||||
self.files_in_export_dir.remove(manifest_path)
|
||||
|
||||
for f in self.files_in_export_dir:
|
||||
f.unlink()
|
||||
|
||||
|
@@ -72,11 +72,21 @@ class Command(BaseCommand):
|
||||
if not os.access(self.source, os.R_OK):
|
||||
raise CommandError("That path doesn't appear to be readable")
|
||||
|
||||
manifest_path = os.path.normpath(os.path.join(self.source, "manifest.json"))
|
||||
self._check_manifest_exists(manifest_path)
|
||||
manifest_paths = []
|
||||
|
||||
with open(manifest_path) as f:
|
||||
main_manifest_path = os.path.normpath(
|
||||
os.path.join(self.source, "manifest.json"),
|
||||
)
|
||||
self._check_manifest_exists(main_manifest_path)
|
||||
|
||||
with open(main_manifest_path) as f:
|
||||
self.manifest = json.load(f)
|
||||
manifest_paths.append(main_manifest_path)
|
||||
|
||||
for file in Path(self.source).glob("**/*-manifest.json"):
|
||||
with open(file) as f:
|
||||
self.manifest += json.load(f)
|
||||
manifest_paths.append(file)
|
||||
|
||||
version_path = os.path.normpath(os.path.join(self.source, "version.json"))
|
||||
if os.path.exists(version_path):
|
||||
@@ -109,7 +119,8 @@ class Command(BaseCommand):
|
||||
):
|
||||
# Fill up the database with whatever is in the manifest
|
||||
try:
|
||||
call_command("loaddata", manifest_path)
|
||||
for manifest_path in manifest_paths:
|
||||
call_command("loaddata", manifest_path)
|
||||
except (FieldDoesNotExist, DeserializationError) as e:
|
||||
self.stdout.write(self.style.ERROR("Database import failed"))
|
||||
if (
|
||||
@@ -193,8 +204,11 @@ class Command(BaseCommand):
|
||||
doc_file = record[EXPORTER_FILE_NAME]
|
||||
document_path = os.path.join(self.source, doc_file)
|
||||
|
||||
thumb_file = record[EXPORTER_THUMBNAIL_NAME]
|
||||
thumbnail_path = Path(os.path.join(self.source, thumb_file)).resolve()
|
||||
if EXPORTER_THUMBNAIL_NAME in record:
|
||||
thumb_file = record[EXPORTER_THUMBNAIL_NAME]
|
||||
thumbnail_path = Path(os.path.join(self.source, thumb_file)).resolve()
|
||||
else:
|
||||
thumbnail_path = None
|
||||
|
||||
if EXPORTER_ARCHIVE_NAME in record:
|
||||
archive_file = record[EXPORTER_ARCHIVE_NAME]
|
||||
@@ -212,19 +226,21 @@ class Command(BaseCommand):
|
||||
|
||||
shutil.copy2(document_path, document.source_path)
|
||||
|
||||
if thumbnail_path.suffix in {".png", ".PNG"}:
|
||||
run_convert(
|
||||
density=300,
|
||||
scale="500x5000>",
|
||||
alpha="remove",
|
||||
strip=True,
|
||||
trim=False,
|
||||
auto_orient=True,
|
||||
input_file=f"{thumbnail_path}[0]",
|
||||
output_file=str(document.thumbnail_path),
|
||||
)
|
||||
else:
|
||||
shutil.copy2(thumbnail_path, document.thumbnail_path)
|
||||
if thumbnail_path:
|
||||
if thumbnail_path.suffix in {".png", ".PNG"}:
|
||||
run_convert(
|
||||
density=300,
|
||||
scale="500x5000>",
|
||||
alpha="remove",
|
||||
strip=True,
|
||||
trim=False,
|
||||
auto_orient=True,
|
||||
input_file=f"{thumbnail_path}[0]",
|
||||
output_file=str(document.thumbnail_path),
|
||||
)
|
||||
else:
|
||||
shutil.copy2(thumbnail_path, document.thumbnail_path)
|
||||
|
||||
if archive_path:
|
||||
create_source_path_directory(document.archive_path)
|
||||
# TODO: this assumes that the export is valid and
|
||||
|
@@ -24,7 +24,7 @@ class Migration(migrations.Migration):
|
||||
),
|
||||
),
|
||||
("task_id", models.CharField(max_length=128)),
|
||||
("name", models.CharField(max_length=256)),
|
||||
("name", models.CharField(max_length=256, null=True)),
|
||||
(
|
||||
"created",
|
||||
models.DateTimeField(auto_now=True, verbose_name="created"),
|
||||
|
@@ -0,0 +1,30 @@
|
||||
# Generated by Django 4.1.4 on 2023-01-24 17:56
|
||||
|
||||
import django.core.validators
|
||||
from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
("documents", "1028_remove_paperlesstask_task_args_and_more"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AlterField(
|
||||
model_name="document",
|
||||
name="archive_serial_number",
|
||||
field=models.PositiveIntegerField(
|
||||
blank=True,
|
||||
db_index=True,
|
||||
help_text="The position of this document in your physical document archive.",
|
||||
null=True,
|
||||
unique=True,
|
||||
validators=[
|
||||
django.core.validators.MaxValueValidator(4294967295),
|
||||
django.core.validators.MinValueValidator(0),
|
||||
],
|
||||
verbose_name="archive serial number",
|
||||
),
|
||||
),
|
||||
]
|
@@ -10,6 +10,8 @@ import pathvalidate
|
||||
from celery import states
|
||||
from django.conf import settings
|
||||
from django.contrib.auth.models import User
|
||||
from django.core.validators import MaxValueValidator
|
||||
from django.core.validators import MinValueValidator
|
||||
from django.db import models
|
||||
from django.utils import timezone
|
||||
from django.utils.translation import gettext_lazy as _
|
||||
@@ -240,12 +242,16 @@ class Document(ModelWithOwner):
|
||||
help_text=_("The original name of the file when it was uploaded"),
|
||||
)
|
||||
|
||||
archive_serial_number = models.IntegerField(
|
||||
archive_serial_number = models.PositiveIntegerField(
|
||||
_("archive serial number"),
|
||||
blank=True,
|
||||
null=True,
|
||||
unique=True,
|
||||
db_index=True,
|
||||
validators=[
|
||||
MaxValueValidator(0xFF_FF_FF_FF),
|
||||
MinValueValidator(0),
|
||||
],
|
||||
help_text=_(
|
||||
"The position of this document in your physical document " "archive.",
|
||||
),
|
||||
|
@@ -100,6 +100,7 @@ def consume_file(
|
||||
):
|
||||
|
||||
path = Path(path).resolve()
|
||||
asn = None
|
||||
|
||||
# Celery converts this to a string, but everything expects a datetime
|
||||
# Long term solution is to not use JSON for the serializer but pickle instead
|
||||
@@ -111,71 +112,83 @@ def consume_file(
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# check for separators in current document
|
||||
if settings.CONSUMER_ENABLE_BARCODES:
|
||||
# read all barcodes in the current document
|
||||
if settings.CONSUMER_ENABLE_BARCODES or settings.CONSUMER_ENABLE_ASN_BARCODE:
|
||||
doc_barcode_info = barcodes.scan_file_for_barcodes(path)
|
||||
|
||||
pdf_filepath, separators = barcodes.scan_file_for_separating_barcodes(path)
|
||||
# split document by separator pages, if enabled
|
||||
if settings.CONSUMER_ENABLE_BARCODES:
|
||||
separators = barcodes.get_separating_barcodes(doc_barcode_info.barcodes)
|
||||
|
||||
if separators:
|
||||
logger.debug(
|
||||
f"Pages with separators found in: {str(path)}",
|
||||
)
|
||||
document_list = barcodes.separate_pages(pdf_filepath, separators)
|
||||
if len(separators) > 0:
|
||||
logger.debug(
|
||||
f"Pages with separators found in: {str(path)}",
|
||||
)
|
||||
document_list = barcodes.separate_pages(
|
||||
doc_barcode_info.pdf_path,
|
||||
separators,
|
||||
)
|
||||
|
||||
if document_list:
|
||||
for n, document in enumerate(document_list):
|
||||
# save to consumption dir
|
||||
# rename it to the original filename with number prefix
|
||||
if override_filename:
|
||||
newname = f"{str(n)}_" + override_filename
|
||||
else:
|
||||
newname = None
|
||||
if document_list:
|
||||
for n, document in enumerate(document_list):
|
||||
# save to consumption dir
|
||||
# rename it to the original filename with number prefix
|
||||
if override_filename:
|
||||
newname = f"{str(n)}_" + override_filename
|
||||
else:
|
||||
newname = None
|
||||
|
||||
# If the file is an upload, it's in the scratch directory
|
||||
# Move it to consume directory to be picked up
|
||||
# Otherwise, use the current parent to keep possible tags
|
||||
# from subdirectories
|
||||
# If the file is an upload, it's in the scratch directory
|
||||
# Move it to consume directory to be picked up
|
||||
# Otherwise, use the current parent to keep possible tags
|
||||
# from subdirectories
|
||||
try:
|
||||
# is_relative_to would be nicer, but new in 3.9
|
||||
_ = path.relative_to(settings.SCRATCH_DIR)
|
||||
save_to_dir = settings.CONSUMPTION_DIR
|
||||
except ValueError:
|
||||
save_to_dir = path.parent
|
||||
|
||||
barcodes.save_to_dir(
|
||||
document,
|
||||
newname=newname,
|
||||
target_dir=save_to_dir,
|
||||
)
|
||||
|
||||
# Delete the PDF file which was split
|
||||
os.remove(doc_barcode_info.pdf_path)
|
||||
|
||||
# If the original was a TIFF, remove the original file as well
|
||||
if str(doc_barcode_info.pdf_path) != str(path):
|
||||
logger.debug(f"Deleting file {path}")
|
||||
os.unlink(path)
|
||||
|
||||
# notify the sender, otherwise the progress bar
|
||||
# in the UI stays stuck
|
||||
payload = {
|
||||
"filename": override_filename,
|
||||
"task_id": task_id,
|
||||
"current_progress": 100,
|
||||
"max_progress": 100,
|
||||
"status": "SUCCESS",
|
||||
"message": "finished",
|
||||
}
|
||||
try:
|
||||
# is_relative_to would be nicer, but new in 3.9
|
||||
_ = path.relative_to(settings.SCRATCH_DIR)
|
||||
save_to_dir = settings.CONSUMPTION_DIR
|
||||
except ValueError:
|
||||
save_to_dir = path.parent
|
||||
async_to_sync(get_channel_layer().group_send)(
|
||||
"status_updates",
|
||||
{"type": "status_update", "data": payload},
|
||||
)
|
||||
except ConnectionError as e:
|
||||
logger.warning(f"ConnectionError on status send: {str(e)}")
|
||||
# consuming stops here, since the original document with
|
||||
# the barcodes has been split and will be consumed separately
|
||||
return "File successfully split"
|
||||
|
||||
barcodes.save_to_dir(
|
||||
document,
|
||||
newname=newname,
|
||||
target_dir=save_to_dir,
|
||||
)
|
||||
|
||||
# Delete the PDF file which was split
|
||||
os.remove(pdf_filepath)
|
||||
|
||||
# If the original was a TIFF, remove the original file as well
|
||||
if str(pdf_filepath) != str(path):
|
||||
logger.debug(f"Deleting file {path}")
|
||||
os.unlink(path)
|
||||
|
||||
# notify the sender, otherwise the progress bar
|
||||
# in the UI stays stuck
|
||||
payload = {
|
||||
"filename": override_filename,
|
||||
"task_id": task_id,
|
||||
"current_progress": 100,
|
||||
"max_progress": 100,
|
||||
"status": "SUCCESS",
|
||||
"message": "finished",
|
||||
}
|
||||
try:
|
||||
async_to_sync(get_channel_layer().group_send)(
|
||||
"status_updates",
|
||||
{"type": "status_update", "data": payload},
|
||||
)
|
||||
except ConnectionError as e:
|
||||
logger.warning(f"ConnectionError on status send: {str(e)}")
|
||||
# consuming stops here, since the original document with
|
||||
# the barcodes has been split and will be consumed separately
|
||||
return "File successfully split"
|
||||
# try reading the ASN from barcode
|
||||
if settings.CONSUMER_ENABLE_ASN_BARCODE:
|
||||
asn = barcodes.get_asn_from_barcodes(doc_barcode_info.barcodes)
|
||||
if asn:
|
||||
logger.info(f"Found ASN in barcode: {asn}")
|
||||
|
||||
# continue with consumption if no barcode was found
|
||||
document = Consumer().try_consume_file(
|
||||
@@ -187,6 +200,7 @@ def consume_file(
|
||||
override_tag_ids=override_tag_ids,
|
||||
task_id=task_id,
|
||||
override_created=override_created,
|
||||
override_asn=asn,
|
||||
override_owner_id=override_owner_id,
|
||||
)
|
||||
|
||||
|
Binary file not shown.
BIN
src/documents/tests/samples/barcodes/barcode-39-asn-123.pdf
Normal file
BIN
src/documents/tests/samples/barcodes/barcode-39-asn-123.pdf
Normal file
Binary file not shown.
BIN
src/documents/tests/samples/barcodes/barcode-39-asn-123.png
Normal file
BIN
src/documents/tests/samples/barcodes/barcode-39-asn-123.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 12 KiB |
Binary file not shown.
Binary file not shown.
After Width: | Height: | Size: 21 KiB |
BIN
src/documents/tests/samples/barcodes/barcode-39-asn-invalid.pdf
Normal file
BIN
src/documents/tests/samples/barcodes/barcode-39-asn-invalid.pdf
Normal file
Binary file not shown.
BIN
src/documents/tests/samples/barcodes/barcode-39-asn-invalid.png
Normal file
BIN
src/documents/tests/samples/barcodes/barcode-39-asn-invalid.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 11 KiB |
BIN
src/documents/tests/samples/barcodes/many-qr-codes.pdf
Normal file
BIN
src/documents/tests/samples/barcodes/many-qr-codes.pdf
Normal file
Binary file not shown.
@@ -9,6 +9,7 @@ from django.test import override_settings
|
||||
from django.test import TestCase
|
||||
from documents import barcodes
|
||||
from documents import tasks
|
||||
from documents.consumer import ConsumerError
|
||||
from documents.tests.utils import DirectoriesMixin
|
||||
from PIL import Image
|
||||
|
||||
@@ -110,6 +111,58 @@ class TestBarcode(DirectoriesMixin, TestCase):
|
||||
img = Image.open(test_file)
|
||||
self.assertEqual(barcodes.barcode_reader(img), ["CUSTOM BARCODE"])
|
||||
|
||||
def test_barcode_reader_asn_normal(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- Image containing standard ASNxxxxx barcode
|
||||
WHEN:
|
||||
- Image is scanned for barcodes
|
||||
THEN:
|
||||
- The barcode is located
|
||||
- The barcode value is correct
|
||||
"""
|
||||
test_file = os.path.join(
|
||||
self.BARCODE_SAMPLE_DIR,
|
||||
"barcode-39-asn-123.png",
|
||||
)
|
||||
img = Image.open(test_file)
|
||||
self.assertEqual(barcodes.barcode_reader(img), ["ASN00123"])
|
||||
|
||||
def test_barcode_reader_asn_invalid(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- Image containing invalid ASNxxxxx barcode
|
||||
- The number portion of the ASN is not a number
|
||||
WHEN:
|
||||
- Image is scanned for barcodes
|
||||
THEN:
|
||||
- The barcode is located
|
||||
- The barcode value is correct
|
||||
"""
|
||||
test_file = os.path.join(
|
||||
self.BARCODE_SAMPLE_DIR,
|
||||
"barcode-39-asn-invalid.png",
|
||||
)
|
||||
img = Image.open(test_file)
|
||||
self.assertEqual(barcodes.barcode_reader(img), ["ASNXYZXYZ"])
|
||||
|
||||
def test_barcode_reader_asn_custom_prefix(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- Image containing custom prefix barcode
|
||||
WHEN:
|
||||
- Image is scanned for barcodes
|
||||
THEN:
|
||||
- The barcode is located
|
||||
- The barcode value is correct
|
||||
"""
|
||||
test_file = os.path.join(
|
||||
self.BARCODE_SAMPLE_DIR,
|
||||
"barcode-39-asn-custom-prefix.png",
|
||||
)
|
||||
img = Image.open(test_file)
|
||||
self.assertEqual(barcodes.barcode_reader(img), ["CUSTOM-PREFIX-00123"])
|
||||
|
||||
def test_get_mime_type(self):
|
||||
tiff_file = os.path.join(
|
||||
self.SAMPLE_DIR,
|
||||
@@ -167,20 +220,26 @@ class TestBarcode(DirectoriesMixin, TestCase):
|
||||
self.BARCODE_SAMPLE_DIR,
|
||||
"patch-code-t.pdf",
|
||||
)
|
||||
pdf_file, separator_page_numbers = barcodes.scan_file_for_separating_barcodes(
|
||||
doc_barcode_info = barcodes.scan_file_for_barcodes(
|
||||
test_file,
|
||||
)
|
||||
separator_page_numbers = barcodes.get_separating_barcodes(
|
||||
doc_barcode_info.barcodes,
|
||||
)
|
||||
|
||||
self.assertEqual(pdf_file, test_file)
|
||||
self.assertEqual(doc_barcode_info.pdf_path, test_file)
|
||||
self.assertListEqual(separator_page_numbers, [0])
|
||||
|
||||
def test_scan_file_for_separating_barcodes_none_present(self):
|
||||
test_file = os.path.join(self.SAMPLE_DIR, "simple.pdf")
|
||||
pdf_file, separator_page_numbers = barcodes.scan_file_for_separating_barcodes(
|
||||
doc_barcode_info = barcodes.scan_file_for_barcodes(
|
||||
test_file,
|
||||
)
|
||||
separator_page_numbers = barcodes.get_separating_barcodes(
|
||||
doc_barcode_info.barcodes,
|
||||
)
|
||||
|
||||
self.assertEqual(pdf_file, test_file)
|
||||
self.assertEqual(doc_barcode_info.pdf_path, test_file)
|
||||
self.assertListEqual(separator_page_numbers, [])
|
||||
|
||||
def test_scan_file_for_separating_barcodes3(self):
|
||||
@@ -188,11 +247,14 @@ class TestBarcode(DirectoriesMixin, TestCase):
|
||||
self.BARCODE_SAMPLE_DIR,
|
||||
"patch-code-t-middle.pdf",
|
||||
)
|
||||
pdf_file, separator_page_numbers = barcodes.scan_file_for_separating_barcodes(
|
||||
doc_barcode_info = barcodes.scan_file_for_barcodes(
|
||||
test_file,
|
||||
)
|
||||
separator_page_numbers = barcodes.get_separating_barcodes(
|
||||
doc_barcode_info.barcodes,
|
||||
)
|
||||
|
||||
self.assertEqual(pdf_file, test_file)
|
||||
self.assertEqual(doc_barcode_info.pdf_path, test_file)
|
||||
self.assertListEqual(separator_page_numbers, [1])
|
||||
|
||||
def test_scan_file_for_separating_barcodes4(self):
|
||||
@@ -200,11 +262,14 @@ class TestBarcode(DirectoriesMixin, TestCase):
|
||||
self.BARCODE_SAMPLE_DIR,
|
||||
"several-patcht-codes.pdf",
|
||||
)
|
||||
pdf_file, separator_page_numbers = barcodes.scan_file_for_separating_barcodes(
|
||||
doc_barcode_info = barcodes.scan_file_for_barcodes(
|
||||
test_file,
|
||||
)
|
||||
separator_page_numbers = barcodes.get_separating_barcodes(
|
||||
doc_barcode_info.barcodes,
|
||||
)
|
||||
|
||||
self.assertEqual(pdf_file, test_file)
|
||||
self.assertEqual(doc_barcode_info.pdf_path, test_file)
|
||||
self.assertListEqual(separator_page_numbers, [2, 5])
|
||||
|
||||
def test_scan_file_for_separating_barcodes_upsidedown(self):
|
||||
@@ -212,14 +277,17 @@ class TestBarcode(DirectoriesMixin, TestCase):
|
||||
self.BARCODE_SAMPLE_DIR,
|
||||
"patch-code-t-middle_reverse.pdf",
|
||||
)
|
||||
pdf_file, separator_page_numbers = barcodes.scan_file_for_separating_barcodes(
|
||||
doc_barcode_info = barcodes.scan_file_for_barcodes(
|
||||
test_file,
|
||||
)
|
||||
separator_page_numbers = barcodes.get_separating_barcodes(
|
||||
doc_barcode_info.barcodes,
|
||||
)
|
||||
|
||||
self.assertEqual(pdf_file, test_file)
|
||||
self.assertEqual(doc_barcode_info.pdf_path, test_file)
|
||||
self.assertListEqual(separator_page_numbers, [1])
|
||||
|
||||
def test_scan_file_for_separating_barcodes_pillow_transcode_error(self):
|
||||
def test_scan_file_for_barcodes_pillow_transcode_error(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- A PDF containing an image which cannot be transcoded to a PIL image
|
||||
@@ -273,7 +341,7 @@ class TestBarcode(DirectoriesMixin, TestCase):
|
||||
with mock.patch("documents.barcodes.barcode_reader") as reader:
|
||||
reader.return_value = list()
|
||||
|
||||
_, _ = barcodes.scan_file_for_separating_barcodes(
|
||||
_ = barcodes.scan_file_for_barcodes(
|
||||
str(device_n_pdf.name),
|
||||
)
|
||||
|
||||
@@ -292,11 +360,14 @@ class TestBarcode(DirectoriesMixin, TestCase):
|
||||
self.BARCODE_SAMPLE_DIR,
|
||||
"barcode-fax-image.pdf",
|
||||
)
|
||||
pdf_file, separator_page_numbers = barcodes.scan_file_for_separating_barcodes(
|
||||
doc_barcode_info = barcodes.scan_file_for_barcodes(
|
||||
test_file,
|
||||
)
|
||||
separator_page_numbers = barcodes.get_separating_barcodes(
|
||||
doc_barcode_info.barcodes,
|
||||
)
|
||||
|
||||
self.assertEqual(pdf_file, test_file)
|
||||
self.assertEqual(doc_barcode_info.pdf_path, test_file)
|
||||
self.assertListEqual(separator_page_numbers, [1])
|
||||
|
||||
def test_scan_file_for_separating_qr_barcodes(self):
|
||||
@@ -304,11 +375,14 @@ class TestBarcode(DirectoriesMixin, TestCase):
|
||||
self.BARCODE_SAMPLE_DIR,
|
||||
"patch-code-t-qr.pdf",
|
||||
)
|
||||
pdf_file, separator_page_numbers = barcodes.scan_file_for_separating_barcodes(
|
||||
doc_barcode_info = barcodes.scan_file_for_barcodes(
|
||||
test_file,
|
||||
)
|
||||
separator_page_numbers = barcodes.get_separating_barcodes(
|
||||
doc_barcode_info.barcodes,
|
||||
)
|
||||
|
||||
self.assertEqual(pdf_file, test_file)
|
||||
self.assertEqual(doc_barcode_info.pdf_path, test_file)
|
||||
self.assertListEqual(separator_page_numbers, [0])
|
||||
|
||||
@override_settings(CONSUMER_BARCODE_STRING="CUSTOM BARCODE")
|
||||
@@ -317,11 +391,14 @@ class TestBarcode(DirectoriesMixin, TestCase):
|
||||
self.BARCODE_SAMPLE_DIR,
|
||||
"barcode-39-custom.pdf",
|
||||
)
|
||||
pdf_file, separator_page_numbers = barcodes.scan_file_for_separating_barcodes(
|
||||
doc_barcode_info = barcodes.scan_file_for_barcodes(
|
||||
test_file,
|
||||
)
|
||||
separator_page_numbers = barcodes.get_separating_barcodes(
|
||||
doc_barcode_info.barcodes,
|
||||
)
|
||||
|
||||
self.assertEqual(pdf_file, test_file)
|
||||
self.assertEqual(doc_barcode_info.pdf_path, test_file)
|
||||
self.assertListEqual(separator_page_numbers, [0])
|
||||
|
||||
@override_settings(CONSUMER_BARCODE_STRING="CUSTOM BARCODE")
|
||||
@@ -330,11 +407,14 @@ class TestBarcode(DirectoriesMixin, TestCase):
|
||||
self.BARCODE_SAMPLE_DIR,
|
||||
"barcode-qr-custom.pdf",
|
||||
)
|
||||
pdf_file, separator_page_numbers = barcodes.scan_file_for_separating_barcodes(
|
||||
doc_barcode_info = barcodes.scan_file_for_barcodes(
|
||||
test_file,
|
||||
)
|
||||
separator_page_numbers = barcodes.get_separating_barcodes(
|
||||
doc_barcode_info.barcodes,
|
||||
)
|
||||
|
||||
self.assertEqual(pdf_file, test_file)
|
||||
self.assertEqual(doc_barcode_info.pdf_path, test_file)
|
||||
self.assertListEqual(separator_page_numbers, [0])
|
||||
|
||||
@override_settings(CONSUMER_BARCODE_STRING="CUSTOM BARCODE")
|
||||
@@ -343,11 +423,14 @@ class TestBarcode(DirectoriesMixin, TestCase):
|
||||
self.BARCODE_SAMPLE_DIR,
|
||||
"barcode-128-custom.pdf",
|
||||
)
|
||||
pdf_file, separator_page_numbers = barcodes.scan_file_for_separating_barcodes(
|
||||
doc_barcode_info = barcodes.scan_file_for_barcodes(
|
||||
test_file,
|
||||
)
|
||||
separator_page_numbers = barcodes.get_separating_barcodes(
|
||||
doc_barcode_info.barcodes,
|
||||
)
|
||||
|
||||
self.assertEqual(pdf_file, test_file)
|
||||
self.assertEqual(doc_barcode_info.pdf_path, test_file)
|
||||
self.assertListEqual(separator_page_numbers, [0])
|
||||
|
||||
def test_scan_file_for_separating_wrong_qr_barcodes(self):
|
||||
@@ -355,13 +438,41 @@ class TestBarcode(DirectoriesMixin, TestCase):
|
||||
self.BARCODE_SAMPLE_DIR,
|
||||
"barcode-39-custom.pdf",
|
||||
)
|
||||
pdf_file, separator_page_numbers = barcodes.scan_file_for_separating_barcodes(
|
||||
doc_barcode_info = barcodes.scan_file_for_barcodes(
|
||||
test_file,
|
||||
)
|
||||
separator_page_numbers = barcodes.get_separating_barcodes(
|
||||
doc_barcode_info.barcodes,
|
||||
)
|
||||
|
||||
self.assertEqual(pdf_file, test_file)
|
||||
self.assertEqual(doc_barcode_info.pdf_path, test_file)
|
||||
self.assertListEqual(separator_page_numbers, [])
|
||||
|
||||
@override_settings(CONSUMER_BARCODE_STRING="ADAR-NEXTDOC")
|
||||
def test_scan_file_for_separating_qr_barcodes(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- Input PDF with certain QR codes that aren't detected at current size
|
||||
WHEN:
|
||||
- The input file is scanned for barcodes
|
||||
THEN:
|
||||
- QR codes are detected
|
||||
"""
|
||||
test_file = os.path.join(
|
||||
self.BARCODE_SAMPLE_DIR,
|
||||
"many-qr-codes.pdf",
|
||||
)
|
||||
|
||||
doc_barcode_info = barcodes.scan_file_for_barcodes(
|
||||
test_file,
|
||||
)
|
||||
separator_page_numbers = barcodes.get_separating_barcodes(
|
||||
doc_barcode_info.barcodes,
|
||||
)
|
||||
|
||||
self.assertGreater(len(doc_barcode_info.barcodes), 0)
|
||||
self.assertListEqual(separator_page_numbers, [1])
|
||||
|
||||
def test_separate_pages(self):
|
||||
test_file = os.path.join(
|
||||
self.BARCODE_SAMPLE_DIR,
|
||||
@@ -450,11 +561,14 @@ class TestBarcode(DirectoriesMixin, TestCase):
|
||||
)
|
||||
tempdir = tempfile.mkdtemp(prefix="paperless-", dir=settings.SCRATCH_DIR)
|
||||
|
||||
pdf_file, separator_page_numbers = barcodes.scan_file_for_separating_barcodes(
|
||||
doc_barcode_info = barcodes.scan_file_for_barcodes(
|
||||
test_file,
|
||||
)
|
||||
separator_page_numbers = barcodes.get_separating_barcodes(
|
||||
doc_barcode_info.barcodes,
|
||||
)
|
||||
|
||||
self.assertEqual(test_file, pdf_file)
|
||||
self.assertEqual(test_file, doc_barcode_info.pdf_path)
|
||||
self.assertTrue(len(separator_page_numbers) > 0)
|
||||
|
||||
document_list = barcodes.separate_pages(test_file, separator_page_numbers)
|
||||
@@ -559,12 +673,155 @@ class TestBarcode(DirectoriesMixin, TestCase):
|
||||
WHEN:
|
||||
- File is scanned for barcode
|
||||
THEN:
|
||||
- Scanning handle the exception without exception
|
||||
- Scanning handles the exception without exception
|
||||
"""
|
||||
test_file = os.path.join(self.SAMPLE_DIR, "password-is-test.pdf")
|
||||
pdf_file, separator_page_numbers = barcodes.scan_file_for_separating_barcodes(
|
||||
doc_barcode_info = barcodes.scan_file_for_barcodes(
|
||||
test_file,
|
||||
)
|
||||
separator_page_numbers = barcodes.get_separating_barcodes(
|
||||
doc_barcode_info.barcodes,
|
||||
)
|
||||
|
||||
self.assertEqual(doc_barcode_info.pdf_path, test_file)
|
||||
self.assertListEqual(separator_page_numbers, [])
|
||||
|
||||
def test_scan_file_for_asn_barcode(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- PDF containing an ASN barcode
|
||||
- The ASN value is 123
|
||||
WHEN:
|
||||
- File is scanned for barcodes
|
||||
THEN:
|
||||
- The ASN is located
|
||||
- The ASN integer value is correct
|
||||
"""
|
||||
test_file = os.path.join(
|
||||
self.BARCODE_SAMPLE_DIR,
|
||||
"barcode-39-asn-123.pdf",
|
||||
)
|
||||
doc_barcode_info = barcodes.scan_file_for_barcodes(
|
||||
test_file,
|
||||
)
|
||||
asn = barcodes.get_asn_from_barcodes(doc_barcode_info.barcodes)
|
||||
|
||||
self.assertEqual(doc_barcode_info.pdf_path, test_file)
|
||||
self.assertEqual(asn, 123)
|
||||
|
||||
def test_scan_file_for_asn_not_existing(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- PDF without an ASN barcode
|
||||
WHEN:
|
||||
- File is scanned for barcodes
|
||||
THEN:
|
||||
- No ASN is retrieved from the document
|
||||
"""
|
||||
test_file = os.path.join(
|
||||
self.BARCODE_SAMPLE_DIR,
|
||||
"patch-code-t.pdf",
|
||||
)
|
||||
doc_barcode_info = barcodes.scan_file_for_barcodes(
|
||||
test_file,
|
||||
)
|
||||
asn = barcodes.get_asn_from_barcodes(doc_barcode_info.barcodes)
|
||||
|
||||
self.assertEqual(doc_barcode_info.pdf_path, test_file)
|
||||
self.assertEqual(asn, None)
|
||||
|
||||
def test_scan_file_for_asn_barcode_invalid(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- PDF containing an ASN barcode
|
||||
- The ASN value is XYZXYZ
|
||||
WHEN:
|
||||
- File is scanned for barcodes
|
||||
THEN:
|
||||
- The ASN is located
|
||||
- The ASN value is not used
|
||||
"""
|
||||
test_file = os.path.join(
|
||||
self.BARCODE_SAMPLE_DIR,
|
||||
"barcode-39-asn-invalid.pdf",
|
||||
)
|
||||
doc_barcode_info = barcodes.scan_file_for_barcodes(
|
||||
test_file,
|
||||
)
|
||||
|
||||
self.assertEqual(pdf_file, test_file)
|
||||
self.assertListEqual(separator_page_numbers, [])
|
||||
asn = barcodes.get_asn_from_barcodes(doc_barcode_info.barcodes)
|
||||
|
||||
self.assertEqual(doc_barcode_info.pdf_path, test_file)
|
||||
self.assertEqual(asn, None)
|
||||
|
||||
@override_settings(CONSUMER_ASN_BARCODE_PREFIX="CUSTOM-PREFIX-")
|
||||
def test_scan_file_for_asn_custom_prefix(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- PDF containing an ASN barcode with custom prefix
|
||||
- The ASN value is 123
|
||||
WHEN:
|
||||
- File is scanned for barcodes
|
||||
THEN:
|
||||
- The ASN is located
|
||||
- The ASN integer value is correct
|
||||
"""
|
||||
test_file = os.path.join(
|
||||
self.BARCODE_SAMPLE_DIR,
|
||||
"barcode-39-asn-custom-prefix.pdf",
|
||||
)
|
||||
doc_barcode_info = barcodes.scan_file_for_barcodes(
|
||||
test_file,
|
||||
)
|
||||
asn = barcodes.get_asn_from_barcodes(doc_barcode_info.barcodes)
|
||||
|
||||
self.assertEqual(doc_barcode_info.pdf_path, test_file)
|
||||
self.assertEqual(asn, 123)
|
||||
|
||||
@override_settings(CONSUMER_ENABLE_ASN_BARCODE=True)
|
||||
def test_consume_barcode_file_asn_assignment(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- PDF containing an ASN barcode
|
||||
- The ASN value is 123
|
||||
WHEN:
|
||||
- File is scanned for barcodes
|
||||
THEN:
|
||||
- The ASN is located
|
||||
- The ASN integer value is correct
|
||||
- The ASN is provided as the override value to the consumer
|
||||
"""
|
||||
test_file = os.path.join(
|
||||
self.BARCODE_SAMPLE_DIR,
|
||||
"barcode-39-asn-123.pdf",
|
||||
)
|
||||
|
||||
dst = os.path.join(settings.SCRATCH_DIR, "barcode-39-asn-123.pdf")
|
||||
shutil.copy(test_file, dst)
|
||||
|
||||
with mock.patch("documents.consumer.Consumer.try_consume_file") as mocked_call:
|
||||
tasks.consume_file(dst)
|
||||
|
||||
args, kwargs = mocked_call.call_args
|
||||
|
||||
self.assertEqual(kwargs["override_asn"], 123)
|
||||
|
||||
@override_settings(CONSUMER_ENABLE_ASN_BARCODE=True)
|
||||
def test_asn_too_large(self):
|
||||
|
||||
src = os.path.join(
|
||||
os.path.dirname(__file__),
|
||||
"samples",
|
||||
"barcodes",
|
||||
"barcode-128-asn-too-large.pdf",
|
||||
)
|
||||
dst = os.path.join(self.dirs.scratch_dir, "barcode-128-asn-too-large.pdf")
|
||||
shutil.copy(src, dst)
|
||||
|
||||
with mock.patch("documents.consumer.Consumer._send_progress"):
|
||||
self.assertRaisesMessage(
|
||||
ConsumerError,
|
||||
"Given ASN 4294967296 is out of range [0, 4,294,967,295]",
|
||||
tasks.consume_file,
|
||||
dst,
|
||||
)
|
||||
|
@@ -102,6 +102,10 @@ class TestExportImport(DirectoriesMixin, TestCase):
|
||||
use_filename_format=False,
|
||||
compare_checksums=False,
|
||||
delete=False,
|
||||
no_archive=False,
|
||||
no_thumbnail=False,
|
||||
split_manifest=False,
|
||||
use_folder_prefix=False,
|
||||
):
|
||||
args = ["document_exporter", self.target]
|
||||
if use_filename_format:
|
||||
@@ -110,6 +114,14 @@ class TestExportImport(DirectoriesMixin, TestCase):
|
||||
args += ["--compare-checksums"]
|
||||
if delete:
|
||||
args += ["--delete"]
|
||||
if no_archive:
|
||||
args += ["--no-archive"]
|
||||
if no_thumbnail:
|
||||
args += ["--no-thumbnail"]
|
||||
if split_manifest:
|
||||
args += ["--split-manifest"]
|
||||
if use_folder_prefix:
|
||||
args += ["--use-folder-prefix"]
|
||||
|
||||
call_command(*args)
|
||||
|
||||
@@ -497,3 +509,140 @@ class TestExportImport(DirectoriesMixin, TestCase):
|
||||
call_command(*args)
|
||||
|
||||
self.assertEqual("That path doesn't appear to be writable", str(e))
|
||||
|
||||
def test_no_archive(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- Request to export documents to directory
|
||||
WHEN:
|
||||
- Option no-archive is used
|
||||
THEN:
|
||||
- Manifest.json doesn't contain information about archive files
|
||||
- Documents can be imported again
|
||||
"""
|
||||
shutil.rmtree(os.path.join(self.dirs.media_dir, "documents"))
|
||||
shutil.copytree(
|
||||
os.path.join(os.path.dirname(__file__), "samples", "documents"),
|
||||
os.path.join(self.dirs.media_dir, "documents"),
|
||||
)
|
||||
|
||||
manifest = self._do_export()
|
||||
has_archive = False
|
||||
for element in manifest:
|
||||
if element["model"] == "documents.document":
|
||||
has_archive = (
|
||||
has_archive or document_exporter.EXPORTER_ARCHIVE_NAME in element
|
||||
)
|
||||
self.assertTrue(has_archive)
|
||||
|
||||
has_archive = False
|
||||
manifest = self._do_export(no_archive=True)
|
||||
for element in manifest:
|
||||
if element["model"] == "documents.document":
|
||||
has_archive = (
|
||||
has_archive or document_exporter.EXPORTER_ARCHIVE_NAME in element
|
||||
)
|
||||
self.assertFalse(has_archive)
|
||||
|
||||
with paperless_environment() as dirs:
|
||||
self.assertEqual(Document.objects.count(), 4)
|
||||
Document.objects.all().delete()
|
||||
self.assertEqual(Document.objects.count(), 0)
|
||||
call_command("document_importer", self.target)
|
||||
self.assertEqual(Document.objects.count(), 4)
|
||||
|
||||
def test_no_thumbnail(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- Request to export documents to directory
|
||||
WHEN:
|
||||
- Option no-thumbnails is used
|
||||
THEN:
|
||||
- Manifest.json doesn't contain information about thumbnails
|
||||
- Documents can be imported again
|
||||
"""
|
||||
shutil.rmtree(os.path.join(self.dirs.media_dir, "documents"))
|
||||
shutil.copytree(
|
||||
os.path.join(os.path.dirname(__file__), "samples", "documents"),
|
||||
os.path.join(self.dirs.media_dir, "documents"),
|
||||
)
|
||||
|
||||
manifest = self._do_export()
|
||||
has_thumbnail = False
|
||||
for element in manifest:
|
||||
if element["model"] == "documents.document":
|
||||
has_thumbnail = (
|
||||
has_thumbnail
|
||||
or document_exporter.EXPORTER_THUMBNAIL_NAME in element
|
||||
)
|
||||
self.assertTrue(has_thumbnail)
|
||||
|
||||
has_thumbnail = False
|
||||
manifest = self._do_export(no_thumbnail=True)
|
||||
for element in manifest:
|
||||
if element["model"] == "documents.document":
|
||||
has_thumbnail = (
|
||||
has_thumbnail
|
||||
or document_exporter.EXPORTER_THUMBNAIL_NAME in element
|
||||
)
|
||||
self.assertFalse(has_thumbnail)
|
||||
|
||||
with paperless_environment() as dirs:
|
||||
self.assertEqual(Document.objects.count(), 4)
|
||||
Document.objects.all().delete()
|
||||
self.assertEqual(Document.objects.count(), 0)
|
||||
call_command("document_importer", self.target)
|
||||
self.assertEqual(Document.objects.count(), 4)
|
||||
|
||||
def test_split_manifest(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- Request to export documents to directory
|
||||
WHEN:
|
||||
- Option split_manifest is used
|
||||
THEN:
|
||||
- Main manifest.json file doesn't contain information about documents
|
||||
- Documents can be imported again
|
||||
"""
|
||||
shutil.rmtree(os.path.join(self.dirs.media_dir, "documents"))
|
||||
shutil.copytree(
|
||||
os.path.join(os.path.dirname(__file__), "samples", "documents"),
|
||||
os.path.join(self.dirs.media_dir, "documents"),
|
||||
)
|
||||
|
||||
manifest = self._do_export(split_manifest=True)
|
||||
has_document = False
|
||||
for element in manifest:
|
||||
has_document = has_document or element["model"] == "documents.document"
|
||||
self.assertFalse(has_document)
|
||||
|
||||
with paperless_environment() as dirs:
|
||||
self.assertEqual(Document.objects.count(), 4)
|
||||
Document.objects.all().delete()
|
||||
self.assertEqual(Document.objects.count(), 0)
|
||||
call_command("document_importer", self.target)
|
||||
self.assertEqual(Document.objects.count(), 4)
|
||||
|
||||
def test_folder_prefix(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- Request to export documents to directory
|
||||
WHEN:
|
||||
- Option use_folder_prefix is used
|
||||
THEN:
|
||||
- Documents can be imported again
|
||||
"""
|
||||
shutil.rmtree(os.path.join(self.dirs.media_dir, "documents"))
|
||||
shutil.copytree(
|
||||
os.path.join(os.path.dirname(__file__), "samples", "documents"),
|
||||
os.path.join(self.dirs.media_dir, "documents"),
|
||||
)
|
||||
|
||||
manifest = self._do_export(use_folder_prefix=True)
|
||||
|
||||
with paperless_environment() as dirs:
|
||||
self.assertEqual(Document.objects.count(), 4)
|
||||
Document.objects.all().delete()
|
||||
self.assertEqual(Document.objects.count(), 0)
|
||||
call_command("document_importer", self.target)
|
||||
self.assertEqual(Document.objects.count(), 4)
|
||||
|
@@ -201,7 +201,7 @@ class TagViewSet(ModelViewSet):
|
||||
ObjectOwnedOrGrandtedPermissionsFilter,
|
||||
)
|
||||
filterset_class = TagFilterSet
|
||||
ordering_fields = ("name", "matching_algorithm", "match", "document_count")
|
||||
ordering_fields = ("color", "name", "matching_algorithm", "match", "document_count")
|
||||
|
||||
|
||||
class DocumentTypeViewSet(ModelViewSet, PassUserMixin):
|
||||
|
Reference in New Issue
Block a user