Changes out the settings and a decent amount of test code to be pathlib compatible

This commit is contained in:
Trenton H
2023-02-07 14:05:18 -08:00
parent 7cb14374cf
commit 41bcfcaffe
9 changed files with 192 additions and 309 deletions

View File

@@ -98,7 +98,7 @@ def barcode_reader(image: Image) -> List[str]:
return barcodes
def get_file_mime_type(path: str) -> str:
def get_file_mime_type(path: Path) -> str:
"""
Determines the file type, based on MIME type.
@@ -109,21 +109,20 @@ def get_file_mime_type(path: str) -> str:
return mime_type
def convert_from_tiff_to_pdf(filepath: str) -> str:
def convert_from_tiff_to_pdf(filepath: Path) -> Path:
"""
converts a given TIFF image file to pdf into a temporary directory.
Returns the new pdf file.
"""
file_name = os.path.splitext(os.path.basename(filepath))[0]
mime_type = get_file_mime_type(filepath)
tempdir = tempfile.mkdtemp(prefix="paperless-", dir=settings.SCRATCH_DIR)
# use old file name with pdf extension
if mime_type == "image/tiff":
newpath = os.path.join(tempdir, file_name + ".pdf")
newpath = Path(tempdir) / Path(filepath.name).with_suffix(".pdf")
else:
logger.warning(
f"Cannot convert mime type {str(mime_type)} from {str(filepath)} to pdf.",
f"Cannot convert mime type {mime_type} from {filepath} to pdf.",
)
return None
with Image.open(filepath) as image:
@@ -145,7 +144,7 @@ def convert_from_tiff_to_pdf(filepath: str) -> str:
def scan_file_for_barcodes(
filepath: str,
filepath: Path,
) -> DocumentBarcodeInfo:
"""
Scan the provided pdf file for any barcodes
@@ -252,7 +251,7 @@ def get_asn_from_barcodes(barcodes: List[Barcode]) -> Optional[int]:
return asn
def separate_pages(filepath: str, pages_to_split_on: Dict[int, bool]) -> List[str]:
def separate_pages(filepath: Path, pages_to_split_on: Dict[int, bool]) -> List[Path]:
"""
Separate the provided pdf file on the pages_to_split_on.
The pages which are defined by the keys in page_numbers
@@ -268,8 +267,8 @@ def separate_pages(filepath: str, pages_to_split_on: Dict[int, bool]) -> List[st
return document_paths
os.makedirs(settings.SCRATCH_DIR, exist_ok=True)
tempdir = tempfile.mkdtemp(prefix="paperless-", dir=settings.SCRATCH_DIR)
fname = os.path.splitext(os.path.basename(filepath))[0]
tempdir = Path(tempfile.mkdtemp(prefix="paperless-", dir=settings.SCRATCH_DIR))
fname = filepath.with_suffix("").name
pdf = Pdf.open(filepath)
# Start with an empty document
@@ -307,7 +306,7 @@ def separate_pages(filepath: str, pages_to_split_on: Dict[int, bool]) -> List[st
output_filename = f"{fname}_document_{doc_idx}.pdf"
logger.debug(f"pdf no:{doc_idx} has {len(dst.pages)} pages")
savepath = os.path.join(tempdir, output_filename)
savepath = tempdir / output_filename
with open(savepath, "wb") as out:
dst.save(out)
document_paths.append(savepath)
@@ -316,18 +315,18 @@ def separate_pages(filepath: str, pages_to_split_on: Dict[int, bool]) -> List[st
def save_to_dir(
filepath: str,
filepath: Path,
newname: str = None,
target_dir: str = settings.CONSUMPTION_DIR,
target_dir: Path = settings.CONSUMPTION_DIR,
):
"""
Copies filepath to target_dir.
Optionally rename the file.
"""
if os.path.isfile(filepath) and os.path.isdir(target_dir):
if filepath.is_file() and target_dir.is_dir():
dest = target_dir
if newname is not None:
dest = os.path.join(dest, newname)
dest = dest / newname
shutil.copy(filepath, dest)
logging.debug(f"saved {str(filepath)} to {str(dest)}")
else:

View File

@@ -2,7 +2,6 @@ import logging
import os
import pickle
import re
import shutil
import warnings
from datetime import datetime
from hashlib import sha256
@@ -122,7 +121,7 @@ class DocumentClassifier:
def save(self):
target_file = settings.MODEL_FILE
target_file_temp = settings.MODEL_FILE + ".part"
target_file_temp = settings.MODEL_FILE.with_suffix(".pickle.part")
with open(target_file_temp, "wb") as f:
pickle.dump(self.FORMAT_VERSION, f)
@@ -138,9 +137,7 @@ class DocumentClassifier:
pickle.dump(self.document_type_classifier, f)
pickle.dump(self.storage_path_classifier, f)
if os.path.isfile(target_file):
os.unlink(target_file)
shutil.move(target_file_temp, target_file)
target_file_temp.rename(target_file)
def train(self):

View File

@@ -3,6 +3,7 @@ import logging
import os
import re
from collections import OrderedDict
from pathlib import Path
from typing import Final
from typing import Optional
@@ -282,7 +283,7 @@ class Document(ModelWithOwner):
return res
@property
def source_path(self) -> str:
def source_path(self) -> Path:
if self.filename:
fname = str(self.filename)
else:
@@ -290,7 +291,7 @@ class Document(ModelWithOwner):
if self.storage_type == self.STORAGE_TYPE_GPG:
fname += ".gpg" # pragma: no cover
return os.path.join(settings.ORIGINALS_DIR, fname)
return (settings.ORIGINALS_DIR / Path(fname)).resolve()
@property
def source_file(self):
@@ -301,9 +302,9 @@ class Document(ModelWithOwner):
return self.archive_filename is not None
@property
def archive_path(self) -> Optional[str]:
def archive_path(self) -> Optional[Path]:
if self.has_archive_version:
return os.path.join(settings.ARCHIVE_DIR, str(self.archive_filename))
return (settings.ARCHIVE_DIR / Path(str(self.archive_filename))).resolve()
else:
return None
@@ -335,14 +336,14 @@ class Document(ModelWithOwner):
return get_default_file_extension(self.mime_type)
@property
def thumbnail_path(self) -> str:
def thumbnail_path(self) -> Path:
webp_file_name = f"{self.pk:07}.webp"
if self.storage_type == self.STORAGE_TYPE_GPG:
webp_file_name += ".gpg"
webp_file_path = os.path.join(settings.THUMBNAIL_DIR, webp_file_name)
webp_file_path = settings.THUMBNAIL_DIR / Path(webp_file_name)
return os.path.normpath(webp_file_path)
return webp_file_path.resolve()
@property
def thumbnail_file(self):

View File

@@ -1,5 +1,6 @@
import os
import shutil
from pathlib import Path
from unittest import mock
from django.conf import settings
@@ -15,12 +16,9 @@ from PIL import Image
class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
SAMPLE_DIR = os.path.join(
os.path.dirname(__file__),
"samples",
)
SAMPLE_DIR = Path(__file__).parent / "samples"
BARCODE_SAMPLE_DIR = os.path.join(SAMPLE_DIR, "barcodes")
BARCODE_SAMPLE_DIR = SAMPLE_DIR / "barcodes"
def test_barcode_reader_png(self):
"""
@@ -31,7 +29,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
THEN:
- The barcode is detected
"""
test_file = os.path.join(self.BARCODE_SAMPLE_DIR, "barcode-39-PATCHT.png")
test_file = self.BARCODE_SAMPLE_DIR / "barcode-39-PATCHT.png"
img = Image.open(test_file)
separator_barcode = settings.CONSUMER_BARCODE_STRING
self.assertEqual(barcodes.barcode_reader(img), [separator_barcode])
@@ -45,10 +43,8 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
THEN:
- The barcode is detected
"""
test_file = os.path.join(
self.BARCODE_SAMPLE_DIR,
"patch-code-t.pbm",
)
test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t.pbm"
img = Image.open(test_file)
separator_barcode = str(settings.CONSUMER_BARCODE_STRING)
self.assertEqual(barcodes.barcode_reader(img), [separator_barcode])
@@ -62,10 +58,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
THEN:
- The barcode is detected
"""
test_file = os.path.join(
self.BARCODE_SAMPLE_DIR,
"barcode-39-PATCHT-distortion.png",
)
test_file = self.BARCODE_SAMPLE_DIR / "barcode-39-PATCHT-distortion.png"
img = Image.open(test_file)
separator_barcode = str(settings.CONSUMER_BARCODE_STRING)
self.assertEqual(barcodes.barcode_reader(img), [separator_barcode])
@@ -79,10 +72,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
THEN:
- The barcode is detected
"""
test_file = os.path.join(
self.BARCODE_SAMPLE_DIR,
"barcode-39-PATCHT-distortion2.png",
)
test_file = self.BARCODE_SAMPLE_DIR / "barcode-39-PATCHT-distortion2.png"
img = Image.open(test_file)
separator_barcode = str(settings.CONSUMER_BARCODE_STRING)
self.assertEqual(barcodes.barcode_reader(img), [separator_barcode])
@@ -96,10 +86,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
THEN:
- No barcode is detected
"""
test_file = os.path.join(
self.BARCODE_SAMPLE_DIR,
"barcode-39-PATCHT-unreadable.png",
)
test_file = self.BARCODE_SAMPLE_DIR / "barcode-39-PATCHT-unreadable.png"
img = Image.open(test_file)
self.assertEqual(barcodes.barcode_reader(img), [])
@@ -112,10 +99,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
THEN:
- The barcode is detected
"""
test_file = os.path.join(
self.BARCODE_SAMPLE_DIR,
"qr-code-PATCHT.png",
)
test_file = self.BARCODE_SAMPLE_DIR / "qr-code-PATCHT.png"
img = Image.open(test_file)
separator_barcode = str(settings.CONSUMER_BARCODE_STRING)
self.assertEqual(barcodes.barcode_reader(img), [separator_barcode])
@@ -129,10 +113,8 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
THEN:
- The barcode is detected
"""
test_file = os.path.join(
self.BARCODE_SAMPLE_DIR,
"barcode-128-PATCHT.png",
)
test_file = self.BARCODE_SAMPLE_DIR / "barcode-128-PATCHT.png"
img = Image.open(test_file)
separator_barcode = str(settings.CONSUMER_BARCODE_STRING)
self.assertEqual(barcodes.barcode_reader(img), [separator_barcode])
@@ -146,7 +128,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
THEN:
- No barcode is detected
"""
test_file = os.path.join(self.SAMPLE_DIR, "simple.png")
test_file = self.SAMPLE_DIR / "simple.png"
img = Image.open(test_file)
self.assertListEqual(barcodes.barcode_reader(img), [])
@@ -159,10 +141,8 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
THEN:
- The barcode is detected
"""
test_file = os.path.join(
self.BARCODE_SAMPLE_DIR,
"barcode-39-custom.png",
)
test_file = self.BARCODE_SAMPLE_DIR / "barcode-39-custom.png"
img = Image.open(test_file)
self.assertEqual(barcodes.barcode_reader(img), ["CUSTOM BARCODE"])
@@ -175,10 +155,8 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
THEN:
- The barcode is detected
"""
test_file = os.path.join(
self.BARCODE_SAMPLE_DIR,
"barcode-qr-custom.png",
)
test_file = self.BARCODE_SAMPLE_DIR / "barcode-qr-custom.png"
img = Image.open(test_file)
self.assertEqual(barcodes.barcode_reader(img), ["CUSTOM BARCODE"])
@@ -191,10 +169,8 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
THEN:
- The barcode is detected
"""
test_file = os.path.join(
self.BARCODE_SAMPLE_DIR,
"barcode-128-custom.png",
)
test_file = self.BARCODE_SAMPLE_DIR / "barcode-128-custom.png"
img = Image.open(test_file)
self.assertEqual(barcodes.barcode_reader(img), ["CUSTOM BARCODE"])
@@ -207,20 +183,14 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
THEN:
-
"""
tiff_file = os.path.join(
self.SAMPLE_DIR,
"simple.tiff",
)
pdf_file = os.path.join(
self.SAMPLE_DIR,
"simple.pdf",
)
png_file = os.path.join(
self.BARCODE_SAMPLE_DIR,
"barcode-128-custom.png",
)
tiff_file_no_extension = os.path.join(settings.SCRATCH_DIR, "testfile1")
pdf_file_no_extension = os.path.join(settings.SCRATCH_DIR, "testfile2")
tiff_file = self.SAMPLE_DIR / "simple.tiff"
pdf_file = self.SAMPLE_DIR / "simple.pdf"
png_file = self.BARCODE_SAMPLE_DIR / "barcode-128-custom.png"
tiff_file_no_extension = settings.SCRATCH_DIR / "testfile1"
pdf_file_no_extension = settings.SCRATCH_DIR / "testfile2"
shutil.copy(tiff_file, tiff_file_no_extension)
shutil.copy(pdf_file, pdf_file_no_extension)
@@ -245,17 +215,14 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
THEN:
-
"""
test_file = os.path.join(
os.path.dirname(__file__),
"samples",
"simple.tiff",
)
dst = os.path.join(settings.SCRATCH_DIR, "simple.tiff")
test_file = self.SAMPLE_DIR / "simple.tiff"
dst = settings.SCRATCH_DIR / "simple.tiff"
shutil.copy(test_file, dst)
target_file = barcodes.convert_from_tiff_to_pdf(dst)
file_extension = os.path.splitext(os.path.basename(target_file))[1]
self.assertIsFile(target_file)
self.assertEqual(file_extension, ".pdf")
self.assertEqual(target_file.suffix, ".pdf")
def test_convert_error_from_pdf_to_pdf(self):
"""
@@ -266,11 +233,9 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
THEN:
-
"""
test_file = os.path.join(
self.SAMPLE_DIR,
"simple.pdf",
)
dst = os.path.join(settings.SCRATCH_DIR, "simple.pdf")
test_file = self.SAMPLE_DIR / "simple.pdf"
dst = settings.SCRATCH_DIR / "simple.pdf"
shutil.copy(test_file, dst)
self.assertIsNone(barcodes.convert_from_tiff_to_pdf(dst))
@@ -283,10 +248,8 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
THEN:
-
"""
test_file = os.path.join(
self.BARCODE_SAMPLE_DIR,
"patch-code-t.pdf",
)
test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t.pdf"
doc_barcode_info = barcodes.scan_file_for_barcodes(
test_file,
)
@@ -306,7 +269,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
THEN:
-
"""
test_file = os.path.join(self.SAMPLE_DIR, "simple.pdf")
test_file = self.SAMPLE_DIR / "simple.pdf"
doc_barcode_info = barcodes.scan_file_for_barcodes(
test_file,
)
@@ -326,10 +289,8 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
THEN:
- Barcode is detected on page 1 (zero indexed)
"""
test_file = os.path.join(
self.BARCODE_SAMPLE_DIR,
"patch-code-t-middle.pdf",
)
test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t-middle.pdf"
doc_barcode_info = barcodes.scan_file_for_barcodes(
test_file,
)
@@ -349,10 +310,8 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
THEN:
- Barcode is detected on pages 2 and 5 (zero indexed)
"""
test_file = os.path.join(
self.BARCODE_SAMPLE_DIR,
"several-patcht-codes.pdf",
)
test_file = self.BARCODE_SAMPLE_DIR / "several-patcht-codes.pdf"
doc_barcode_info = barcodes.scan_file_for_barcodes(
test_file,
)
@@ -373,10 +332,8 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
THEN:
- Barcode is detected on page 1 (zero indexed)
"""
test_file = os.path.join(
self.BARCODE_SAMPLE_DIR,
"patch-code-t-middle_reverse.pdf",
)
test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t-middle_reverse.pdf"
doc_barcode_info = barcodes.scan_file_for_barcodes(
test_file,
)
@@ -396,10 +353,8 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
THEN:
- The barcode is still detected
"""
test_file = os.path.join(
self.BARCODE_SAMPLE_DIR,
"barcode-fax-image.pdf",
)
test_file = self.BARCODE_SAMPLE_DIR / "barcode-fax-image.pdf"
doc_barcode_info = barcodes.scan_file_for_barcodes(
test_file,
)
@@ -420,10 +375,8 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
THEN:
- Barcode is detected on page 0 (zero indexed)
"""
test_file = os.path.join(
self.BARCODE_SAMPLE_DIR,
"patch-code-t-qr.pdf",
)
test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t-qr.pdf"
doc_barcode_info = barcodes.scan_file_for_barcodes(
test_file,
)
@@ -445,10 +398,8 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
THEN:
- Barcode is detected on page 0 (zero indexed)
"""
test_file = os.path.join(
self.BARCODE_SAMPLE_DIR,
"barcode-39-custom.pdf",
)
test_file = self.BARCODE_SAMPLE_DIR / "barcode-39-custom.pdf"
doc_barcode_info = barcodes.scan_file_for_barcodes(
test_file,
)
@@ -471,10 +422,8 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
THEN:
- Barcode is detected on page 0 (zero indexed)
"""
test_file = os.path.join(
self.BARCODE_SAMPLE_DIR,
"barcode-qr-custom.pdf",
)
test_file = self.BARCODE_SAMPLE_DIR / "barcode-qr-custom.pdf"
doc_barcode_info = barcodes.scan_file_for_barcodes(
test_file,
)
@@ -497,10 +446,8 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
THEN:
- Barcode is detected on page 0 (zero indexed)
"""
test_file = os.path.join(
self.BARCODE_SAMPLE_DIR,
"barcode-128-custom.pdf",
)
test_file = self.BARCODE_SAMPLE_DIR / "barcode-128-custom.pdf"
doc_barcode_info = barcodes.scan_file_for_barcodes(
test_file,
)
@@ -522,10 +469,8 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
THEN:
- No split pages are detected
"""
test_file = os.path.join(
self.BARCODE_SAMPLE_DIR,
"barcode-39-custom.pdf",
)
test_file = self.BARCODE_SAMPLE_DIR / "barcode-39-custom.pdf"
doc_barcode_info = barcodes.scan_file_for_barcodes(
test_file,
)
@@ -546,10 +491,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
THEN:
- QR codes are detected
"""
test_file = os.path.join(
self.BARCODE_SAMPLE_DIR,
"many-qr-codes.pdf",
)
test_file = self.BARCODE_SAMPLE_DIR / "many-qr-codes.pdf"
doc_barcode_info = barcodes.scan_file_for_barcodes(
test_file,
@@ -570,10 +512,8 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
THEN:
- Two new documents are produced
"""
test_file = os.path.join(
self.BARCODE_SAMPLE_DIR,
"patch-code-t-middle.pdf",
)
test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t-middle.pdf"
documents = barcodes.separate_pages(test_file, {1: False})
self.assertEqual(len(documents), 2)
@@ -587,11 +527,8 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
THEN:
- Only two files are output
"""
test_file = os.path.join(
os.path.dirname(__file__),
self.BARCODE_SAMPLE_DIR,
"patch-code-t-double.pdf",
)
test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t-double.pdf"
pages = barcodes.separate_pages(test_file, {1: False, 2: False})
self.assertEqual(len(pages), 2)
@@ -606,10 +543,8 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
- No new documents are produced
- A warning is logged
"""
test_file = os.path.join(
self.BARCODE_SAMPLE_DIR,
"patch-code-t-middle.pdf",
)
test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t-middle.pdf"
with self.assertLogs("paperless.barcodes", level="WARNING") as cm:
pages = barcodes.separate_pages(test_file, {})
self.assertEqual(pages, [])
@@ -629,12 +564,10 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
THEN:
- The file exists
"""
test_file = os.path.join(
self.BARCODE_SAMPLE_DIR,
"patch-code-t.pdf",
)
test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t.pdf"
barcodes.save_to_dir(test_file, target_dir=settings.SCRATCH_DIR)
target_file = os.path.join(settings.SCRATCH_DIR, "patch-code-t.pdf")
target_file = settings.SCRATCH_DIR / "patch-code-t.pdf"
self.assertIsFile(target_file)
def test_save_to_dir_not_existing(self):
@@ -647,11 +580,9 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
THEN:
- The file exists
"""
test_file = os.path.join(
self.BARCODE_SAMPLE_DIR,
"patch-code-t.pdf",
)
nonexistingdir = "/nowhere"
test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t.pdf"
nonexistingdir = Path("/nowhere")
self.assertIsNotDir(nonexistingdir)
with self.assertLogs("paperless.barcodes", level="WARNING") as cm:
@@ -673,16 +604,14 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
THEN:
- The file exists
"""
test_file = os.path.join(
self.BARCODE_SAMPLE_DIR,
"patch-code-t.pdf",
)
test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t.pdf"
barcodes.save_to_dir(
test_file,
newname="newname.pdf",
target_dir=settings.SCRATCH_DIR,
)
target_file = os.path.join(settings.SCRATCH_DIR, "newname.pdf")
target_file = settings.SCRATCH_DIR / "newname.pdf"
self.assertIsFile(target_file)
def test_barcode_splitter(self):
@@ -694,10 +623,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
THEN:
- Correct number of files produced
"""
test_file = os.path.join(
self.BARCODE_SAMPLE_DIR,
"patch-code-t-middle.pdf",
)
test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t-middle.pdf"
doc_barcode_info = barcodes.scan_file_for_barcodes(
test_file,
@@ -715,14 +641,9 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
for document in document_list:
barcodes.save_to_dir(document, target_dir=settings.SCRATCH_DIR)
target_file1 = os.path.join(
settings.SCRATCH_DIR,
"patch-code-t-middle_document_0.pdf",
)
target_file2 = os.path.join(
settings.SCRATCH_DIR,
"patch-code-t-middle_document_1.pdf",
)
target_file1 = settings.SCRATCH_DIR / "patch-code-t-middle_document_0.pdf"
target_file2 = settings.SCRATCH_DIR / "patch-code-t-middle_document_1.pdf"
self.assertIsFile(target_file1)
self.assertIsFile(target_file2)
@@ -737,12 +658,9 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
THEN:
- The file was split
"""
test_file = os.path.join(
self.BARCODE_SAMPLE_DIR,
"patch-code-t-middle.pdf",
)
test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t-middle.pdf"
dst = os.path.join(settings.SCRATCH_DIR, "patch-code-t-middle.pdf")
dst = settings.SCRATCH_DIR / "patch-code-t-middle.pdf"
shutil.copy(test_file, dst)
with mock.patch("documents.tasks.async_to_sync"):
@@ -761,11 +679,9 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
THEN:
- The file was split
"""
test_file = os.path.join(
self.BARCODE_SAMPLE_DIR,
"patch-code-t-middle.tiff",
)
dst = os.path.join(settings.SCRATCH_DIR, "patch-code-t-middle.tiff")
test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t-middle.tiff"
dst = settings.SCRATCH_DIR / "patch-code-t-middle.tiff"
shutil.copy(test_file, dst)
with mock.patch("documents.tasks.async_to_sync"):
@@ -786,11 +702,9 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
- Barcode reader reported warning
- Consumption continued with the file
"""
test_file = os.path.join(
self.SAMPLE_DIR,
"simple.jpg",
)
dst = os.path.join(settings.SCRATCH_DIR, "simple.jpg")
test_file = self.SAMPLE_DIR / "simple.jpg"
dst = settings.SCRATCH_DIR / "simple.jpg"
shutil.copy(test_file, dst)
with self.assertLogs("paperless.barcodes", level="WARNING") as cm:
@@ -825,11 +739,9 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
THEN:
- The file was split
"""
test_file = os.path.join(
self.BARCODE_SAMPLE_DIR,
"patch-code-t-middle.tiff",
)
dst = os.path.join(settings.SCRATCH_DIR, "patch-code-t-middle")
test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t-middle.tiff"
dst = settings.SCRATCH_DIR / "patch-code-t-middle"
shutil.copy(test_file, dst)
with mock.patch("documents.tasks.async_to_sync"):
@@ -844,7 +756,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
THEN:
- Scanning handles the exception without crashing
"""
test_file = os.path.join(self.SAMPLE_DIR, "password-is-test.pdf")
test_file = self.SAMPLE_DIR / "password-is-test.pdf"
with self.assertLogs("paperless.barcodes", level="WARNING") as cm:
doc_barcode_info = barcodes.scan_file_for_barcodes(
test_file,
@@ -873,11 +785,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
THEN:
- Correct number of files produced, split correctly by correct pages
"""
test_file = os.path.join(
os.path.dirname(__file__),
self.BARCODE_SAMPLE_DIR,
"split-by-asn-2.pdf",
)
test_file = self.BARCODE_SAMPLE_DIR / "split-by-asn-2.pdf"
doc_barcode_info = barcodes.scan_file_for_barcodes(
test_file,
@@ -914,11 +822,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
THEN:
- Correct number of files produced, split correctly by correct pages
"""
test_file = os.path.join(
os.path.dirname(__file__),
self.BARCODE_SAMPLE_DIR,
"split-by-asn-1.pdf",
)
test_file = self.BARCODE_SAMPLE_DIR / "split-by-asn-1.pdf"
doc_barcode_info = barcodes.scan_file_for_barcodes(
test_file,
@@ -944,12 +848,9 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
class TestAsnBarcodes(DirectoriesMixin, TestCase):
SAMPLE_DIR = os.path.join(
os.path.dirname(__file__),
"samples",
)
SAMPLE_DIR = Path(__file__).parent / "samples"
BARCODE_SAMPLE_DIR = os.path.join(SAMPLE_DIR, "barcodes")
BARCODE_SAMPLE_DIR = SAMPLE_DIR / "barcodes"
def test_barcode_reader_asn_normal(self):
"""
@@ -961,10 +862,8 @@ class TestAsnBarcodes(DirectoriesMixin, TestCase):
- The barcode is located
- The barcode value is correct
"""
test_file = os.path.join(
self.BARCODE_SAMPLE_DIR,
"barcode-39-asn-123.png",
)
test_file = self.BARCODE_SAMPLE_DIR / "barcode-39-asn-123.png"
img = Image.open(test_file)
self.assertEqual(barcodes.barcode_reader(img), ["ASN00123"])
@@ -979,10 +878,8 @@ class TestAsnBarcodes(DirectoriesMixin, TestCase):
- The barcode is located
- The barcode value is correct
"""
test_file = os.path.join(
self.BARCODE_SAMPLE_DIR,
"barcode-39-asn-invalid.png",
)
test_file = self.BARCODE_SAMPLE_DIR / "barcode-39-asn-invalid.png"
img = Image.open(test_file)
self.assertEqual(barcodes.barcode_reader(img), ["ASNXYZXYZ"])
@@ -996,10 +893,8 @@ class TestAsnBarcodes(DirectoriesMixin, TestCase):
- The barcode is located
- The barcode value is correct
"""
test_file = os.path.join(
self.BARCODE_SAMPLE_DIR,
"barcode-39-asn-custom-prefix.png",
)
test_file = self.BARCODE_SAMPLE_DIR / "barcode-39-asn-custom-prefix.png"
img = Image.open(test_file)
self.assertEqual(barcodes.barcode_reader(img), ["CUSTOM-PREFIX-00123"])
@@ -1015,10 +910,8 @@ class TestAsnBarcodes(DirectoriesMixin, TestCase):
- The ASN is located
- The ASN integer value is correct
"""
test_file = os.path.join(
self.BARCODE_SAMPLE_DIR,
"barcode-39-asn-custom-prefix.pdf",
)
test_file = self.BARCODE_SAMPLE_DIR / "barcode-39-asn-custom-prefix.pdf"
doc_barcode_info = barcodes.scan_file_for_barcodes(
test_file,
)
@@ -1038,10 +931,8 @@ class TestAsnBarcodes(DirectoriesMixin, TestCase):
- The ASN is located
- The ASN value is not used
"""
test_file = os.path.join(
self.BARCODE_SAMPLE_DIR,
"barcode-39-asn-invalid.pdf",
)
test_file = self.BARCODE_SAMPLE_DIR / "barcode-39-asn-invalid.pdf"
doc_barcode_info = barcodes.scan_file_for_barcodes(
test_file,
)
@@ -1064,12 +955,9 @@ class TestAsnBarcodes(DirectoriesMixin, TestCase):
- The ASN integer value is correct
- The ASN is provided as the override value to the consumer
"""
test_file = os.path.join(
self.BARCODE_SAMPLE_DIR,
"barcode-39-asn-123.pdf",
)
test_file = self.BARCODE_SAMPLE_DIR / "barcode-39-asn-123.pdf"
dst = os.path.join(settings.SCRATCH_DIR, "barcode-39-asn-123.pdf")
dst = settings.SCRATCH_DIR / "barcode-39-asn-123.pdf"
shutil.copy(test_file, dst)
with mock.patch("documents.consumer.Consumer.try_consume_file") as mocked_call:
@@ -1090,10 +978,8 @@ class TestAsnBarcodes(DirectoriesMixin, TestCase):
- The ASN is located
- The ASN integer value is correct
"""
test_file = os.path.join(
self.BARCODE_SAMPLE_DIR,
"barcode-39-asn-123.pdf",
)
test_file = self.BARCODE_SAMPLE_DIR / "barcode-39-asn-123.pdf"
doc_barcode_info = barcodes.scan_file_for_barcodes(
test_file,
)
@@ -1111,10 +997,8 @@ class TestAsnBarcodes(DirectoriesMixin, TestCase):
THEN:
- No ASN is retrieved from the document
"""
test_file = os.path.join(
self.BARCODE_SAMPLE_DIR,
"patch-code-t.pdf",
)
test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t.pdf"
doc_barcode_info = barcodes.scan_file_for_barcodes(
test_file,
)
@@ -1134,13 +1018,9 @@ class TestAsnBarcodes(DirectoriesMixin, TestCase):
THEN:
- Exception is raised regarding size limits
"""
src = os.path.join(
os.path.dirname(__file__),
"samples",
"barcodes",
"barcode-128-asn-too-large.pdf",
)
dst = os.path.join(self.dirs.scratch_dir, "barcode-128-asn-too-large.pdf")
src = self.BARCODE_SAMPLE_DIR / "barcode-128-asn-too-large.pdf"
dst = self.dirs.scratch_dir / "barcode-128-asn-too-large.pdf"
shutil.copy(src, dst)
with mock.patch("documents.consumer.Consumer._send_progress"):

View File

@@ -19,6 +19,7 @@ from ..models import Document
from ..models import DocumentType
from ..models import StoragePath
from .utils import DirectoriesMixin
from .utils import FileSystemAssertsMixin
class TestFileHandling(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
@@ -47,7 +48,7 @@ class TestFileHandling(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
# Test default source_path
self.assertEqual(
document.source_path,
os.path.join(settings.ORIGINALS_DIR, f"{document.pk:07d}.pdf"),
settings.ORIGINALS_DIR / f"{document.pk:07d}.pdf",
)
document.filename = generate_filename(document)
@@ -72,10 +73,14 @@ class TestFileHandling(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
document.save()
# Check proper handling of files
self.assertIsDir(os.path.join(settings.ORIGINALS_DIR, "test"))
self.assertIsNotDir(os.path.join(settings.ORIGINALS_DIR, "none"))
self.assertIsDir(
settings.ORIGINALS_DIR / "test",
)
self.assertIsNotDir(
settings.ORIGINALS_DIR / "none",
)
self.assertIsFile(
os.path.join(settings.ORIGINALS_DIR, "test/test.pdf.gpg"),
settings.ORIGINALS_DIR / "test" / "test.pdf.gpg",
)
@override_settings(FILENAME_FORMAT="{correspondent}/{correspondent}")
@@ -89,12 +94,12 @@ class TestFileHandling(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
document.filename = generate_filename(document)
self.assertEqual(document.filename, "none/none.pdf")
create_source_path_directory(document.source_path)
Path(document.source_path).touch()
document.source_path.touch()
# Test source_path
self.assertEqual(
document.source_path,
os.path.join(settings.ORIGINALS_DIR, "none/none.pdf"),
settings.ORIGINALS_DIR / "none" / "none.pdf",
)
# Make the folder read- and execute-only (no writing and no renaming)
@@ -106,7 +111,7 @@ class TestFileHandling(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
# Check proper handling of files
self.assertIsFile(
os.path.join(settings.ORIGINALS_DIR, "none/none.pdf"),
settings.ORIGINALS_DIR / "none" / "none.pdf",
)
self.assertEqual(document.filename, "none/none.pdf")
@@ -232,9 +237,9 @@ class TestFileHandling(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
create_source_path_directory(document.source_path)
Path(document.source_path).touch()
important_file = document.source_path + "test"
Path(important_file).touch()
document.source_path.touch()
important_file = document.source_path.with_suffix(".test")
important_file.touch()
# Set a correspondent and save the document
document.correspondent = Correspondent.objects.get_or_create(name="test")[0]
@@ -379,7 +384,7 @@ class TestFileHandling(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
self.assertEqual(
doc.source_path,
os.path.join(settings.ORIGINALS_DIR, "etc", "something", "doc1.pdf"),
settings.ORIGINALS_DIR / "etc" / "something" / "doc1.pdf",
)
@override_settings(
@@ -599,11 +604,11 @@ class TestFileHandlingWithArchive(DirectoriesMixin, FileSystemAssertsMixin, Test
self.assertIsFile(doc.archive_path)
self.assertEqual(
doc.source_path,
os.path.join(settings.ORIGINALS_DIR, "none", "my_doc.pdf"),
settings.ORIGINALS_DIR / "none" / "my_doc.pdf",
)
self.assertEqual(
doc.archive_path,
os.path.join(settings.ARCHIVE_DIR, "none", "my_doc.pdf"),
settings.ARCHIVE_DIR / "none" / "my_doc.pdf",
)
@override_settings(FILENAME_FORMAT="{correspondent}/{title}")
@@ -698,7 +703,7 @@ class TestFileHandlingWithArchive(DirectoriesMixin, FileSystemAssertsMixin, Test
@mock.patch("documents.signals.handlers.os.rename")
def test_move_archive_error(self, m):
def fake_rename(src, dst):
if "archive" in src:
if "archive" in str(src):
raise OSError()
else:
os.remove(src)
@@ -749,7 +754,7 @@ class TestFileHandlingWithArchive(DirectoriesMixin, FileSystemAssertsMixin, Test
@mock.patch("documents.signals.handlers.os.rename")
def test_move_file_error(self, m):
def fake_rename(src, dst):
if "original" in src:
if "original" in str(src):
raise OSError()
else:
os.remove(src)

View File

@@ -359,7 +359,7 @@ class TestExportImport(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
self.assertIsFile(os.path.join(self.target, "manifest.json"))
self.assertIsFile(os.path.join(self.target, "wow2", "none.pdf"))
self.assertIsFile(
(os.path.join(self.target, "wow2", "none_01.pdf")),
os.path.join(self.target, "wow2", "none_01.pdf"),
)
def test_export_missing_files(self):

View File

@@ -58,8 +58,8 @@ class TestMakeThumbnails(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
self.assertIsNotFile(self.d1.thumbnail_path)
self.assertIsNotFile(self.d2.thumbnail_path)
call_command("document_thumbnails")
self.assertTrue(self.d1.thumbnail_path)
self.assertTrue(self.d2.thumbnail_path)
self.assertIsFile(self.d1.thumbnail_path)
self.assertIsFile(self.d2.thumbnail_path)
def test_command_documentid(self):
self.assertIsNotFile(self.d1.thumbnail_path)

View File

@@ -1,4 +1,3 @@
import os
import shutil
import tempfile
from collections import namedtuple
@@ -19,23 +18,22 @@ def setup_directories():
dirs = namedtuple("Dirs", ())
dirs.data_dir = tempfile.mkdtemp()
dirs.scratch_dir = tempfile.mkdtemp()
dirs.media_dir = tempfile.mkdtemp()
dirs.consumption_dir = tempfile.mkdtemp()
dirs.static_dir = tempfile.mkdtemp()
dirs.index_dir = os.path.join(dirs.data_dir, "index")
dirs.originals_dir = os.path.join(dirs.media_dir, "documents", "originals")
dirs.thumbnail_dir = os.path.join(dirs.media_dir, "documents", "thumbnails")
dirs.archive_dir = os.path.join(dirs.media_dir, "documents", "archive")
dirs.logging_dir = os.path.join(dirs.data_dir, "log")
dirs.data_dir = Path(tempfile.mkdtemp())
dirs.scratch_dir = Path(tempfile.mkdtemp())
dirs.media_dir = Path(tempfile.mkdtemp())
dirs.consumption_dir = Path(tempfile.mkdtemp())
dirs.static_dir = Path(tempfile.mkdtemp())
dirs.index_dir = dirs.data_dir / "index"
dirs.originals_dir = dirs.media_dir / "documents" / "originals"
dirs.thumbnail_dir = dirs.media_dir / "documents" / "thumbnails"
dirs.archive_dir = dirs.media_dir / "documents" / "archive"
dirs.logging_dir = dirs.data_dir / "log"
os.makedirs(dirs.index_dir, exist_ok=True)
os.makedirs(dirs.originals_dir, exist_ok=True)
os.makedirs(dirs.thumbnail_dir, exist_ok=True)
os.makedirs(dirs.archive_dir, exist_ok=True)
os.makedirs(dirs.logging_dir, exist_ok=True)
dirs.index_dir.mkdir(parents=True, exist_ok=True)
dirs.originals_dir.mkdir(parents=True, exist_ok=True)
dirs.thumbnail_dir.mkdir(parents=True, exist_ok=True)
dirs.archive_dir.mkdir(parents=True, exist_ok=True)
dirs.logging_dir.mkdir(parents=True, exist_ok=True)
dirs.settings_override = override_settings(
DATA_DIR=dirs.data_dir,
@@ -48,8 +46,8 @@ def setup_directories():
LOGGING_DIR=dirs.logging_dir,
INDEX_DIR=dirs.index_dir,
STATIC_ROOT=dirs.static_dir,
MODEL_FILE=os.path.join(dirs.data_dir, "classification_model.pickle"),
MEDIA_LOCK=os.path.join(dirs.media_dir, "media.lock"),
MODEL_FILE=dirs.data_dir / "classification_model.pickle",
MEDIA_LOCK=dirs.media_dir / "media.lock",
)
dirs.settings_override.enable()