Changes out the settings and a decent amount of test code to be pathlib compatible

This commit is contained in:
Trenton H 2023-02-07 14:05:18 -08:00
parent 7cb14374cf
commit 41bcfcaffe
9 changed files with 192 additions and 309 deletions

View File

@ -98,7 +98,7 @@ def barcode_reader(image: Image) -> List[str]:
return barcodes
def get_file_mime_type(path: str) -> str:
def get_file_mime_type(path: Path) -> str:
"""
Determines the file type, based on MIME type.
@ -109,21 +109,20 @@ def get_file_mime_type(path: str) -> str:
return mime_type
def convert_from_tiff_to_pdf(filepath: str) -> str:
def convert_from_tiff_to_pdf(filepath: Path) -> Path:
"""
converts a given TIFF image file to pdf into a temporary directory.
Returns the new pdf file.
"""
file_name = os.path.splitext(os.path.basename(filepath))[0]
mime_type = get_file_mime_type(filepath)
tempdir = tempfile.mkdtemp(prefix="paperless-", dir=settings.SCRATCH_DIR)
# use old file name with pdf extension
if mime_type == "image/tiff":
newpath = os.path.join(tempdir, file_name + ".pdf")
newpath = Path(tempdir) / Path(filepath.name).with_suffix(".pdf")
else:
logger.warning(
f"Cannot convert mime type {str(mime_type)} from {str(filepath)} to pdf.",
f"Cannot convert mime type {mime_type} from {filepath} to pdf.",
)
return None
with Image.open(filepath) as image:
@ -145,7 +144,7 @@ def convert_from_tiff_to_pdf(filepath: str) -> str:
def scan_file_for_barcodes(
filepath: str,
filepath: Path,
) -> DocumentBarcodeInfo:
"""
Scan the provided pdf file for any barcodes
@ -252,7 +251,7 @@ def get_asn_from_barcodes(barcodes: List[Barcode]) -> Optional[int]:
return asn
def separate_pages(filepath: str, pages_to_split_on: Dict[int, bool]) -> List[str]:
def separate_pages(filepath: Path, pages_to_split_on: Dict[int, bool]) -> List[Path]:
"""
Separate the provided pdf file on the pages_to_split_on.
The pages which are defined by the keys in page_numbers
@ -268,8 +267,8 @@ def separate_pages(filepath: str, pages_to_split_on: Dict[int, bool]) -> List[st
return document_paths
os.makedirs(settings.SCRATCH_DIR, exist_ok=True)
tempdir = tempfile.mkdtemp(prefix="paperless-", dir=settings.SCRATCH_DIR)
fname = os.path.splitext(os.path.basename(filepath))[0]
tempdir = Path(tempfile.mkdtemp(prefix="paperless-", dir=settings.SCRATCH_DIR))
fname = filepath.with_suffix("").name
pdf = Pdf.open(filepath)
# Start with an empty document
@ -307,7 +306,7 @@ def separate_pages(filepath: str, pages_to_split_on: Dict[int, bool]) -> List[st
output_filename = f"{fname}_document_{doc_idx}.pdf"
logger.debug(f"pdf no:{doc_idx} has {len(dst.pages)} pages")
savepath = os.path.join(tempdir, output_filename)
savepath = tempdir / output_filename
with open(savepath, "wb") as out:
dst.save(out)
document_paths.append(savepath)
@ -316,18 +315,18 @@ def separate_pages(filepath: str, pages_to_split_on: Dict[int, bool]) -> List[st
def save_to_dir(
filepath: str,
filepath: Path,
newname: str = None,
target_dir: str = settings.CONSUMPTION_DIR,
target_dir: Path = settings.CONSUMPTION_DIR,
):
"""
Copies filepath to target_dir.
Optionally rename the file.
"""
if os.path.isfile(filepath) and os.path.isdir(target_dir):
if filepath.is_file() and target_dir.is_dir():
dest = target_dir
if newname is not None:
dest = os.path.join(dest, newname)
dest = dest / newname
shutil.copy(filepath, dest)
logging.debug(f"saved {str(filepath)} to {str(dest)}")
else:

View File

@ -2,7 +2,6 @@ import logging
import os
import pickle
import re
import shutil
import warnings
from datetime import datetime
from hashlib import sha256
@ -122,7 +121,7 @@ class DocumentClassifier:
def save(self):
target_file = settings.MODEL_FILE
target_file_temp = settings.MODEL_FILE + ".part"
target_file_temp = settings.MODEL_FILE.with_suffix(".pickle.part")
with open(target_file_temp, "wb") as f:
pickle.dump(self.FORMAT_VERSION, f)
@ -138,9 +137,7 @@ class DocumentClassifier:
pickle.dump(self.document_type_classifier, f)
pickle.dump(self.storage_path_classifier, f)
if os.path.isfile(target_file):
os.unlink(target_file)
shutil.move(target_file_temp, target_file)
target_file_temp.rename(target_file)
def train(self):

View File

@ -3,6 +3,7 @@ import logging
import os
import re
from collections import OrderedDict
from pathlib import Path
from typing import Final
from typing import Optional
@ -282,7 +283,7 @@ class Document(ModelWithOwner):
return res
@property
def source_path(self) -> str:
def source_path(self) -> Path:
if self.filename:
fname = str(self.filename)
else:
@ -290,7 +291,7 @@ class Document(ModelWithOwner):
if self.storage_type == self.STORAGE_TYPE_GPG:
fname += ".gpg" # pragma: no cover
return os.path.join(settings.ORIGINALS_DIR, fname)
return (settings.ORIGINALS_DIR / Path(fname)).resolve()
@property
def source_file(self):
@ -301,9 +302,9 @@ class Document(ModelWithOwner):
return self.archive_filename is not None
@property
def archive_path(self) -> Optional[str]:
def archive_path(self) -> Optional[Path]:
if self.has_archive_version:
return os.path.join(settings.ARCHIVE_DIR, str(self.archive_filename))
return (settings.ARCHIVE_DIR / Path(str(self.archive_filename))).resolve()
else:
return None
@ -335,14 +336,14 @@ class Document(ModelWithOwner):
return get_default_file_extension(self.mime_type)
@property
def thumbnail_path(self) -> str:
def thumbnail_path(self) -> Path:
webp_file_name = f"{self.pk:07}.webp"
if self.storage_type == self.STORAGE_TYPE_GPG:
webp_file_name += ".gpg"
webp_file_path = os.path.join(settings.THUMBNAIL_DIR, webp_file_name)
webp_file_path = settings.THUMBNAIL_DIR / Path(webp_file_name)
return os.path.normpath(webp_file_path)
return webp_file_path.resolve()
@property
def thumbnail_file(self):

View File

@ -1,5 +1,6 @@
import os
import shutil
from pathlib import Path
from unittest import mock
from django.conf import settings
@ -15,12 +16,9 @@ from PIL import Image
class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
SAMPLE_DIR = os.path.join(
os.path.dirname(__file__),
"samples",
)
SAMPLE_DIR = Path(__file__).parent / "samples"
BARCODE_SAMPLE_DIR = os.path.join(SAMPLE_DIR, "barcodes")
BARCODE_SAMPLE_DIR = SAMPLE_DIR / "barcodes"
def test_barcode_reader_png(self):
"""
@ -31,7 +29,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
THEN:
- The barcode is detected
"""
test_file = os.path.join(self.BARCODE_SAMPLE_DIR, "barcode-39-PATCHT.png")
test_file = self.BARCODE_SAMPLE_DIR / "barcode-39-PATCHT.png"
img = Image.open(test_file)
separator_barcode = settings.CONSUMER_BARCODE_STRING
self.assertEqual(barcodes.barcode_reader(img), [separator_barcode])
@ -45,10 +43,8 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
THEN:
- The barcode is detected
"""
test_file = os.path.join(
self.BARCODE_SAMPLE_DIR,
"patch-code-t.pbm",
)
test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t.pbm"
img = Image.open(test_file)
separator_barcode = str(settings.CONSUMER_BARCODE_STRING)
self.assertEqual(barcodes.barcode_reader(img), [separator_barcode])
@ -62,10 +58,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
THEN:
- The barcode is detected
"""
test_file = os.path.join(
self.BARCODE_SAMPLE_DIR,
"barcode-39-PATCHT-distortion.png",
)
test_file = self.BARCODE_SAMPLE_DIR / "barcode-39-PATCHT-distortion.png"
img = Image.open(test_file)
separator_barcode = str(settings.CONSUMER_BARCODE_STRING)
self.assertEqual(barcodes.barcode_reader(img), [separator_barcode])
@ -79,10 +72,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
THEN:
- The barcode is detected
"""
test_file = os.path.join(
self.BARCODE_SAMPLE_DIR,
"barcode-39-PATCHT-distortion2.png",
)
test_file = self.BARCODE_SAMPLE_DIR / "barcode-39-PATCHT-distortion2.png"
img = Image.open(test_file)
separator_barcode = str(settings.CONSUMER_BARCODE_STRING)
self.assertEqual(barcodes.barcode_reader(img), [separator_barcode])
@ -96,10 +86,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
THEN:
- No barcode is detected
"""
test_file = os.path.join(
self.BARCODE_SAMPLE_DIR,
"barcode-39-PATCHT-unreadable.png",
)
test_file = self.BARCODE_SAMPLE_DIR / "barcode-39-PATCHT-unreadable.png"
img = Image.open(test_file)
self.assertEqual(barcodes.barcode_reader(img), [])
@ -112,10 +99,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
THEN:
- The barcode is detected
"""
test_file = os.path.join(
self.BARCODE_SAMPLE_DIR,
"qr-code-PATCHT.png",
)
test_file = self.BARCODE_SAMPLE_DIR / "qr-code-PATCHT.png"
img = Image.open(test_file)
separator_barcode = str(settings.CONSUMER_BARCODE_STRING)
self.assertEqual(barcodes.barcode_reader(img), [separator_barcode])
@ -129,10 +113,8 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
THEN:
- The barcode is detected
"""
test_file = os.path.join(
self.BARCODE_SAMPLE_DIR,
"barcode-128-PATCHT.png",
)
test_file = self.BARCODE_SAMPLE_DIR / "barcode-128-PATCHT.png"
img = Image.open(test_file)
separator_barcode = str(settings.CONSUMER_BARCODE_STRING)
self.assertEqual(barcodes.barcode_reader(img), [separator_barcode])
@ -146,7 +128,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
THEN:
- No barcode is detected
"""
test_file = os.path.join(self.SAMPLE_DIR, "simple.png")
test_file = self.SAMPLE_DIR / "simple.png"
img = Image.open(test_file)
self.assertListEqual(barcodes.barcode_reader(img), [])
@ -159,10 +141,8 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
THEN:
- The barcode is detected
"""
test_file = os.path.join(
self.BARCODE_SAMPLE_DIR,
"barcode-39-custom.png",
)
test_file = self.BARCODE_SAMPLE_DIR / "barcode-39-custom.png"
img = Image.open(test_file)
self.assertEqual(barcodes.barcode_reader(img), ["CUSTOM BARCODE"])
@ -175,10 +155,8 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
THEN:
- The barcode is detected
"""
test_file = os.path.join(
self.BARCODE_SAMPLE_DIR,
"barcode-qr-custom.png",
)
test_file = self.BARCODE_SAMPLE_DIR / "barcode-qr-custom.png"
img = Image.open(test_file)
self.assertEqual(barcodes.barcode_reader(img), ["CUSTOM BARCODE"])
@ -191,10 +169,8 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
THEN:
- The barcode is detected
"""
test_file = os.path.join(
self.BARCODE_SAMPLE_DIR,
"barcode-128-custom.png",
)
test_file = self.BARCODE_SAMPLE_DIR / "barcode-128-custom.png"
img = Image.open(test_file)
self.assertEqual(barcodes.barcode_reader(img), ["CUSTOM BARCODE"])
@ -207,20 +183,14 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
THEN:
-
"""
tiff_file = os.path.join(
self.SAMPLE_DIR,
"simple.tiff",
)
pdf_file = os.path.join(
self.SAMPLE_DIR,
"simple.pdf",
)
png_file = os.path.join(
self.BARCODE_SAMPLE_DIR,
"barcode-128-custom.png",
)
tiff_file_no_extension = os.path.join(settings.SCRATCH_DIR, "testfile1")
pdf_file_no_extension = os.path.join(settings.SCRATCH_DIR, "testfile2")
tiff_file = self.SAMPLE_DIR / "simple.tiff"
pdf_file = self.SAMPLE_DIR / "simple.pdf"
png_file = self.BARCODE_SAMPLE_DIR / "barcode-128-custom.png"
tiff_file_no_extension = settings.SCRATCH_DIR / "testfile1"
pdf_file_no_extension = settings.SCRATCH_DIR / "testfile2"
shutil.copy(tiff_file, tiff_file_no_extension)
shutil.copy(pdf_file, pdf_file_no_extension)
@ -245,17 +215,14 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
THEN:
-
"""
test_file = os.path.join(
os.path.dirname(__file__),
"samples",
"simple.tiff",
)
dst = os.path.join(settings.SCRATCH_DIR, "simple.tiff")
test_file = self.SAMPLE_DIR / "simple.tiff"
dst = settings.SCRATCH_DIR / "simple.tiff"
shutil.copy(test_file, dst)
target_file = barcodes.convert_from_tiff_to_pdf(dst)
file_extension = os.path.splitext(os.path.basename(target_file))[1]
self.assertIsFile(target_file)
self.assertEqual(file_extension, ".pdf")
self.assertEqual(target_file.suffix, ".pdf")
def test_convert_error_from_pdf_to_pdf(self):
"""
@ -266,11 +233,9 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
THEN:
-
"""
test_file = os.path.join(
self.SAMPLE_DIR,
"simple.pdf",
)
dst = os.path.join(settings.SCRATCH_DIR, "simple.pdf")
test_file = self.SAMPLE_DIR / "simple.pdf"
dst = settings.SCRATCH_DIR / "simple.pdf"
shutil.copy(test_file, dst)
self.assertIsNone(barcodes.convert_from_tiff_to_pdf(dst))
@ -283,10 +248,8 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
THEN:
-
"""
test_file = os.path.join(
self.BARCODE_SAMPLE_DIR,
"patch-code-t.pdf",
)
test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t.pdf"
doc_barcode_info = barcodes.scan_file_for_barcodes(
test_file,
)
@ -306,7 +269,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
THEN:
-
"""
test_file = os.path.join(self.SAMPLE_DIR, "simple.pdf")
test_file = self.SAMPLE_DIR / "simple.pdf"
doc_barcode_info = barcodes.scan_file_for_barcodes(
test_file,
)
@ -326,10 +289,8 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
THEN:
- Barcode is detected on page 1 (zero indexed)
"""
test_file = os.path.join(
self.BARCODE_SAMPLE_DIR,
"patch-code-t-middle.pdf",
)
test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t-middle.pdf"
doc_barcode_info = barcodes.scan_file_for_barcodes(
test_file,
)
@ -349,10 +310,8 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
THEN:
- Barcode is detected on pages 2 and 5 (zero indexed)
"""
test_file = os.path.join(
self.BARCODE_SAMPLE_DIR,
"several-patcht-codes.pdf",
)
test_file = self.BARCODE_SAMPLE_DIR / "several-patcht-codes.pdf"
doc_barcode_info = barcodes.scan_file_for_barcodes(
test_file,
)
@ -373,10 +332,8 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
THEN:
- Barcode is detected on page 1 (zero indexed)
"""
test_file = os.path.join(
self.BARCODE_SAMPLE_DIR,
"patch-code-t-middle_reverse.pdf",
)
test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t-middle_reverse.pdf"
doc_barcode_info = barcodes.scan_file_for_barcodes(
test_file,
)
@ -396,10 +353,8 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
THEN:
- The barcode is still detected
"""
test_file = os.path.join(
self.BARCODE_SAMPLE_DIR,
"barcode-fax-image.pdf",
)
test_file = self.BARCODE_SAMPLE_DIR / "barcode-fax-image.pdf"
doc_barcode_info = barcodes.scan_file_for_barcodes(
test_file,
)
@ -420,10 +375,8 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
THEN:
- Barcode is detected on page 0 (zero indexed)
"""
test_file = os.path.join(
self.BARCODE_SAMPLE_DIR,
"patch-code-t-qr.pdf",
)
test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t-qr.pdf"
doc_barcode_info = barcodes.scan_file_for_barcodes(
test_file,
)
@ -445,10 +398,8 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
THEN:
- Barcode is detected on page 0 (zero indexed)
"""
test_file = os.path.join(
self.BARCODE_SAMPLE_DIR,
"barcode-39-custom.pdf",
)
test_file = self.BARCODE_SAMPLE_DIR / "barcode-39-custom.pdf"
doc_barcode_info = barcodes.scan_file_for_barcodes(
test_file,
)
@ -471,10 +422,8 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
THEN:
- Barcode is detected on page 0 (zero indexed)
"""
test_file = os.path.join(
self.BARCODE_SAMPLE_DIR,
"barcode-qr-custom.pdf",
)
test_file = self.BARCODE_SAMPLE_DIR / "barcode-qr-custom.pdf"
doc_barcode_info = barcodes.scan_file_for_barcodes(
test_file,
)
@ -497,10 +446,8 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
THEN:
- Barcode is detected on page 0 (zero indexed)
"""
test_file = os.path.join(
self.BARCODE_SAMPLE_DIR,
"barcode-128-custom.pdf",
)
test_file = self.BARCODE_SAMPLE_DIR / "barcode-128-custom.pdf"
doc_barcode_info = barcodes.scan_file_for_barcodes(
test_file,
)
@ -522,10 +469,8 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
THEN:
- No split pages are detected
"""
test_file = os.path.join(
self.BARCODE_SAMPLE_DIR,
"barcode-39-custom.pdf",
)
test_file = self.BARCODE_SAMPLE_DIR / "barcode-39-custom.pdf"
doc_barcode_info = barcodes.scan_file_for_barcodes(
test_file,
)
@ -546,10 +491,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
THEN:
- QR codes are detected
"""
test_file = os.path.join(
self.BARCODE_SAMPLE_DIR,
"many-qr-codes.pdf",
)
test_file = self.BARCODE_SAMPLE_DIR / "many-qr-codes.pdf"
doc_barcode_info = barcodes.scan_file_for_barcodes(
test_file,
@ -570,10 +512,8 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
THEN:
- Two new documents are produced
"""
test_file = os.path.join(
self.BARCODE_SAMPLE_DIR,
"patch-code-t-middle.pdf",
)
test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t-middle.pdf"
documents = barcodes.separate_pages(test_file, {1: False})
self.assertEqual(len(documents), 2)
@ -587,11 +527,8 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
THEN:
- Only two files are output
"""
test_file = os.path.join(
os.path.dirname(__file__),
self.BARCODE_SAMPLE_DIR,
"patch-code-t-double.pdf",
)
test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t-double.pdf"
pages = barcodes.separate_pages(test_file, {1: False, 2: False})
self.assertEqual(len(pages), 2)
@ -606,10 +543,8 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
- No new documents are produced
- A warning is logged
"""
test_file = os.path.join(
self.BARCODE_SAMPLE_DIR,
"patch-code-t-middle.pdf",
)
test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t-middle.pdf"
with self.assertLogs("paperless.barcodes", level="WARNING") as cm:
pages = barcodes.separate_pages(test_file, {})
self.assertEqual(pages, [])
@ -629,12 +564,10 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
THEN:
- The file exists
"""
test_file = os.path.join(
self.BARCODE_SAMPLE_DIR,
"patch-code-t.pdf",
)
test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t.pdf"
barcodes.save_to_dir(test_file, target_dir=settings.SCRATCH_DIR)
target_file = os.path.join(settings.SCRATCH_DIR, "patch-code-t.pdf")
target_file = settings.SCRATCH_DIR / "patch-code-t.pdf"
self.assertIsFile(target_file)
def test_save_to_dir_not_existing(self):
@ -647,11 +580,9 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
THEN:
- The file exists
"""
test_file = os.path.join(
self.BARCODE_SAMPLE_DIR,
"patch-code-t.pdf",
)
nonexistingdir = "/nowhere"
test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t.pdf"
nonexistingdir = Path("/nowhere")
self.assertIsNotDir(nonexistingdir)
with self.assertLogs("paperless.barcodes", level="WARNING") as cm:
@ -673,16 +604,14 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
THEN:
- The file exists
"""
test_file = os.path.join(
self.BARCODE_SAMPLE_DIR,
"patch-code-t.pdf",
)
test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t.pdf"
barcodes.save_to_dir(
test_file,
newname="newname.pdf",
target_dir=settings.SCRATCH_DIR,
)
target_file = os.path.join(settings.SCRATCH_DIR, "newname.pdf")
target_file = settings.SCRATCH_DIR / "newname.pdf"
self.assertIsFile(target_file)
def test_barcode_splitter(self):
@ -694,10 +623,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
THEN:
- Correct number of files produced
"""
test_file = os.path.join(
self.BARCODE_SAMPLE_DIR,
"patch-code-t-middle.pdf",
)
test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t-middle.pdf"
doc_barcode_info = barcodes.scan_file_for_barcodes(
test_file,
@ -715,14 +641,9 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
for document in document_list:
barcodes.save_to_dir(document, target_dir=settings.SCRATCH_DIR)
target_file1 = os.path.join(
settings.SCRATCH_DIR,
"patch-code-t-middle_document_0.pdf",
)
target_file2 = os.path.join(
settings.SCRATCH_DIR,
"patch-code-t-middle_document_1.pdf",
)
target_file1 = settings.SCRATCH_DIR / "patch-code-t-middle_document_0.pdf"
target_file2 = settings.SCRATCH_DIR / "patch-code-t-middle_document_1.pdf"
self.assertIsFile(target_file1)
self.assertIsFile(target_file2)
@ -737,12 +658,9 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
THEN:
- The file was split
"""
test_file = os.path.join(
self.BARCODE_SAMPLE_DIR,
"patch-code-t-middle.pdf",
)
test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t-middle.pdf"
dst = os.path.join(settings.SCRATCH_DIR, "patch-code-t-middle.pdf")
dst = settings.SCRATCH_DIR / "patch-code-t-middle.pdf"
shutil.copy(test_file, dst)
with mock.patch("documents.tasks.async_to_sync"):
@ -761,11 +679,9 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
THEN:
- The file was split
"""
test_file = os.path.join(
self.BARCODE_SAMPLE_DIR,
"patch-code-t-middle.tiff",
)
dst = os.path.join(settings.SCRATCH_DIR, "patch-code-t-middle.tiff")
test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t-middle.tiff"
dst = settings.SCRATCH_DIR / "patch-code-t-middle.tiff"
shutil.copy(test_file, dst)
with mock.patch("documents.tasks.async_to_sync"):
@ -786,11 +702,9 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
- Barcode reader reported warning
- Consumption continued with the file
"""
test_file = os.path.join(
self.SAMPLE_DIR,
"simple.jpg",
)
dst = os.path.join(settings.SCRATCH_DIR, "simple.jpg")
test_file = self.SAMPLE_DIR / "simple.jpg"
dst = settings.SCRATCH_DIR / "simple.jpg"
shutil.copy(test_file, dst)
with self.assertLogs("paperless.barcodes", level="WARNING") as cm:
@ -825,11 +739,9 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
THEN:
- The file was split
"""
test_file = os.path.join(
self.BARCODE_SAMPLE_DIR,
"patch-code-t-middle.tiff",
)
dst = os.path.join(settings.SCRATCH_DIR, "patch-code-t-middle")
test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t-middle.tiff"
dst = settings.SCRATCH_DIR / "patch-code-t-middle"
shutil.copy(test_file, dst)
with mock.patch("documents.tasks.async_to_sync"):
@ -844,7 +756,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
THEN:
- Scanning handles the exception without crashing
"""
test_file = os.path.join(self.SAMPLE_DIR, "password-is-test.pdf")
test_file = self.SAMPLE_DIR / "password-is-test.pdf"
with self.assertLogs("paperless.barcodes", level="WARNING") as cm:
doc_barcode_info = barcodes.scan_file_for_barcodes(
test_file,
@ -873,11 +785,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
THEN:
- Correct number of files produced, split correctly by correct pages
"""
test_file = os.path.join(
os.path.dirname(__file__),
self.BARCODE_SAMPLE_DIR,
"split-by-asn-2.pdf",
)
test_file = self.BARCODE_SAMPLE_DIR / "split-by-asn-2.pdf"
doc_barcode_info = barcodes.scan_file_for_barcodes(
test_file,
@ -914,11 +822,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
THEN:
- Correct number of files produced, split correctly by correct pages
"""
test_file = os.path.join(
os.path.dirname(__file__),
self.BARCODE_SAMPLE_DIR,
"split-by-asn-1.pdf",
)
test_file = self.BARCODE_SAMPLE_DIR / "split-by-asn-1.pdf"
doc_barcode_info = barcodes.scan_file_for_barcodes(
test_file,
@ -944,12 +848,9 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
class TestAsnBarcodes(DirectoriesMixin, TestCase):
SAMPLE_DIR = os.path.join(
os.path.dirname(__file__),
"samples",
)
SAMPLE_DIR = Path(__file__).parent / "samples"
BARCODE_SAMPLE_DIR = os.path.join(SAMPLE_DIR, "barcodes")
BARCODE_SAMPLE_DIR = SAMPLE_DIR / "barcodes"
def test_barcode_reader_asn_normal(self):
"""
@ -961,10 +862,8 @@ class TestAsnBarcodes(DirectoriesMixin, TestCase):
- The barcode is located
- The barcode value is correct
"""
test_file = os.path.join(
self.BARCODE_SAMPLE_DIR,
"barcode-39-asn-123.png",
)
test_file = self.BARCODE_SAMPLE_DIR / "barcode-39-asn-123.png"
img = Image.open(test_file)
self.assertEqual(barcodes.barcode_reader(img), ["ASN00123"])
@ -979,10 +878,8 @@ class TestAsnBarcodes(DirectoriesMixin, TestCase):
- The barcode is located
- The barcode value is correct
"""
test_file = os.path.join(
self.BARCODE_SAMPLE_DIR,
"barcode-39-asn-invalid.png",
)
test_file = self.BARCODE_SAMPLE_DIR / "barcode-39-asn-invalid.png"
img = Image.open(test_file)
self.assertEqual(barcodes.barcode_reader(img), ["ASNXYZXYZ"])
@ -996,10 +893,8 @@ class TestAsnBarcodes(DirectoriesMixin, TestCase):
- The barcode is located
- The barcode value is correct
"""
test_file = os.path.join(
self.BARCODE_SAMPLE_DIR,
"barcode-39-asn-custom-prefix.png",
)
test_file = self.BARCODE_SAMPLE_DIR / "barcode-39-asn-custom-prefix.png"
img = Image.open(test_file)
self.assertEqual(barcodes.barcode_reader(img), ["CUSTOM-PREFIX-00123"])
@ -1015,10 +910,8 @@ class TestAsnBarcodes(DirectoriesMixin, TestCase):
- The ASN is located
- The ASN integer value is correct
"""
test_file = os.path.join(
self.BARCODE_SAMPLE_DIR,
"barcode-39-asn-custom-prefix.pdf",
)
test_file = self.BARCODE_SAMPLE_DIR / "barcode-39-asn-custom-prefix.pdf"
doc_barcode_info = barcodes.scan_file_for_barcodes(
test_file,
)
@ -1038,10 +931,8 @@ class TestAsnBarcodes(DirectoriesMixin, TestCase):
- The ASN is located
- The ASN value is not used
"""
test_file = os.path.join(
self.BARCODE_SAMPLE_DIR,
"barcode-39-asn-invalid.pdf",
)
test_file = self.BARCODE_SAMPLE_DIR / "barcode-39-asn-invalid.pdf"
doc_barcode_info = barcodes.scan_file_for_barcodes(
test_file,
)
@ -1064,12 +955,9 @@ class TestAsnBarcodes(DirectoriesMixin, TestCase):
- The ASN integer value is correct
- The ASN is provided as the override value to the consumer
"""
test_file = os.path.join(
self.BARCODE_SAMPLE_DIR,
"barcode-39-asn-123.pdf",
)
test_file = self.BARCODE_SAMPLE_DIR / "barcode-39-asn-123.pdf"
dst = os.path.join(settings.SCRATCH_DIR, "barcode-39-asn-123.pdf")
dst = settings.SCRATCH_DIR / "barcode-39-asn-123.pdf"
shutil.copy(test_file, dst)
with mock.patch("documents.consumer.Consumer.try_consume_file") as mocked_call:
@ -1090,10 +978,8 @@ class TestAsnBarcodes(DirectoriesMixin, TestCase):
- The ASN is located
- The ASN integer value is correct
"""
test_file = os.path.join(
self.BARCODE_SAMPLE_DIR,
"barcode-39-asn-123.pdf",
)
test_file = self.BARCODE_SAMPLE_DIR / "barcode-39-asn-123.pdf"
doc_barcode_info = barcodes.scan_file_for_barcodes(
test_file,
)
@ -1111,10 +997,8 @@ class TestAsnBarcodes(DirectoriesMixin, TestCase):
THEN:
- No ASN is retrieved from the document
"""
test_file = os.path.join(
self.BARCODE_SAMPLE_DIR,
"patch-code-t.pdf",
)
test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t.pdf"
doc_barcode_info = barcodes.scan_file_for_barcodes(
test_file,
)
@ -1134,13 +1018,9 @@ class TestAsnBarcodes(DirectoriesMixin, TestCase):
THEN:
- Exception is raised regarding size limits
"""
src = os.path.join(
os.path.dirname(__file__),
"samples",
"barcodes",
"barcode-128-asn-too-large.pdf",
)
dst = os.path.join(self.dirs.scratch_dir, "barcode-128-asn-too-large.pdf")
src = self.BARCODE_SAMPLE_DIR / "barcode-128-asn-too-large.pdf"
dst = self.dirs.scratch_dir / "barcode-128-asn-too-large.pdf"
shutil.copy(src, dst)
with mock.patch("documents.consumer.Consumer._send_progress"):

View File

@ -19,6 +19,7 @@ from ..models import Document
from ..models import DocumentType
from ..models import StoragePath
from .utils import DirectoriesMixin
from .utils import FileSystemAssertsMixin
class TestFileHandling(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
@ -47,7 +48,7 @@ class TestFileHandling(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
# Test default source_path
self.assertEqual(
document.source_path,
os.path.join(settings.ORIGINALS_DIR, f"{document.pk:07d}.pdf"),
settings.ORIGINALS_DIR / f"{document.pk:07d}.pdf",
)
document.filename = generate_filename(document)
@ -72,10 +73,14 @@ class TestFileHandling(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
document.save()
# Check proper handling of files
self.assertIsDir(os.path.join(settings.ORIGINALS_DIR, "test"))
self.assertIsNotDir(os.path.join(settings.ORIGINALS_DIR, "none"))
self.assertIsDir(
settings.ORIGINALS_DIR / "test",
)
self.assertIsNotDir(
settings.ORIGINALS_DIR / "none",
)
self.assertIsFile(
os.path.join(settings.ORIGINALS_DIR, "test/test.pdf.gpg"),
settings.ORIGINALS_DIR / "test" / "test.pdf.gpg",
)
@override_settings(FILENAME_FORMAT="{correspondent}/{correspondent}")
@ -89,12 +94,12 @@ class TestFileHandling(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
document.filename = generate_filename(document)
self.assertEqual(document.filename, "none/none.pdf")
create_source_path_directory(document.source_path)
Path(document.source_path).touch()
document.source_path.touch()
# Test source_path
self.assertEqual(
document.source_path,
os.path.join(settings.ORIGINALS_DIR, "none/none.pdf"),
settings.ORIGINALS_DIR / "none" / "none.pdf",
)
# Make the folder read- and execute-only (no writing and no renaming)
@ -106,7 +111,7 @@ class TestFileHandling(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
# Check proper handling of files
self.assertIsFile(
os.path.join(settings.ORIGINALS_DIR, "none/none.pdf"),
settings.ORIGINALS_DIR / "none" / "none.pdf",
)
self.assertEqual(document.filename, "none/none.pdf")
@ -232,9 +237,9 @@ class TestFileHandling(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
create_source_path_directory(document.source_path)
Path(document.source_path).touch()
important_file = document.source_path + "test"
Path(important_file).touch()
document.source_path.touch()
important_file = document.source_path.with_suffix(".test")
important_file.touch()
# Set a correspondent and save the document
document.correspondent = Correspondent.objects.get_or_create(name="test")[0]
@ -379,7 +384,7 @@ class TestFileHandling(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
self.assertEqual(
doc.source_path,
os.path.join(settings.ORIGINALS_DIR, "etc", "something", "doc1.pdf"),
settings.ORIGINALS_DIR / "etc" / "something" / "doc1.pdf",
)
@override_settings(
@ -599,11 +604,11 @@ class TestFileHandlingWithArchive(DirectoriesMixin, FileSystemAssertsMixin, Test
self.assertIsFile(doc.archive_path)
self.assertEqual(
doc.source_path,
os.path.join(settings.ORIGINALS_DIR, "none", "my_doc.pdf"),
settings.ORIGINALS_DIR / "none" / "my_doc.pdf",
)
self.assertEqual(
doc.archive_path,
os.path.join(settings.ARCHIVE_DIR, "none", "my_doc.pdf"),
settings.ARCHIVE_DIR / "none" / "my_doc.pdf",
)
@override_settings(FILENAME_FORMAT="{correspondent}/{title}")
@ -698,7 +703,7 @@ class TestFileHandlingWithArchive(DirectoriesMixin, FileSystemAssertsMixin, Test
@mock.patch("documents.signals.handlers.os.rename")
def test_move_archive_error(self, m):
def fake_rename(src, dst):
if "archive" in src:
if "archive" in str(src):
raise OSError()
else:
os.remove(src)
@ -749,7 +754,7 @@ class TestFileHandlingWithArchive(DirectoriesMixin, FileSystemAssertsMixin, Test
@mock.patch("documents.signals.handlers.os.rename")
def test_move_file_error(self, m):
def fake_rename(src, dst):
if "original" in src:
if "original" in str(src):
raise OSError()
else:
os.remove(src)

View File

@ -359,7 +359,7 @@ class TestExportImport(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
self.assertIsFile(os.path.join(self.target, "manifest.json"))
self.assertIsFile(os.path.join(self.target, "wow2", "none.pdf"))
self.assertIsFile(
(os.path.join(self.target, "wow2", "none_01.pdf")),
os.path.join(self.target, "wow2", "none_01.pdf"),
)
def test_export_missing_files(self):

View File

@ -58,8 +58,8 @@ class TestMakeThumbnails(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
self.assertIsNotFile(self.d1.thumbnail_path)
self.assertIsNotFile(self.d2.thumbnail_path)
call_command("document_thumbnails")
self.assertTrue(self.d1.thumbnail_path)
self.assertTrue(self.d2.thumbnail_path)
self.assertIsFile(self.d1.thumbnail_path)
self.assertIsFile(self.d2.thumbnail_path)
def test_command_documentid(self):
self.assertIsNotFile(self.d1.thumbnail_path)

View File

@ -1,4 +1,3 @@
import os
import shutil
import tempfile
from collections import namedtuple
@ -19,23 +18,22 @@ def setup_directories():
dirs = namedtuple("Dirs", ())
dirs.data_dir = tempfile.mkdtemp()
dirs.scratch_dir = tempfile.mkdtemp()
dirs.media_dir = tempfile.mkdtemp()
dirs.consumption_dir = tempfile.mkdtemp()
dirs.static_dir = tempfile.mkdtemp()
dirs.index_dir = os.path.join(dirs.data_dir, "index")
dirs.originals_dir = os.path.join(dirs.media_dir, "documents", "originals")
dirs.thumbnail_dir = os.path.join(dirs.media_dir, "documents", "thumbnails")
dirs.archive_dir = os.path.join(dirs.media_dir, "documents", "archive")
dirs.logging_dir = os.path.join(dirs.data_dir, "log")
dirs.data_dir = Path(tempfile.mkdtemp())
dirs.scratch_dir = Path(tempfile.mkdtemp())
dirs.media_dir = Path(tempfile.mkdtemp())
dirs.consumption_dir = Path(tempfile.mkdtemp())
dirs.static_dir = Path(tempfile.mkdtemp())
dirs.index_dir = dirs.data_dir / "index"
dirs.originals_dir = dirs.media_dir / "documents" / "originals"
dirs.thumbnail_dir = dirs.media_dir / "documents" / "thumbnails"
dirs.archive_dir = dirs.media_dir / "documents" / "archive"
dirs.logging_dir = dirs.data_dir / "log"
os.makedirs(dirs.index_dir, exist_ok=True)
os.makedirs(dirs.originals_dir, exist_ok=True)
os.makedirs(dirs.thumbnail_dir, exist_ok=True)
os.makedirs(dirs.archive_dir, exist_ok=True)
os.makedirs(dirs.logging_dir, exist_ok=True)
dirs.index_dir.mkdir(parents=True, exist_ok=True)
dirs.originals_dir.mkdir(parents=True, exist_ok=True)
dirs.thumbnail_dir.mkdir(parents=True, exist_ok=True)
dirs.archive_dir.mkdir(parents=True, exist_ok=True)
dirs.logging_dir.mkdir(parents=True, exist_ok=True)
dirs.settings_override = override_settings(
DATA_DIR=dirs.data_dir,
@ -48,8 +46,8 @@ def setup_directories():
LOGGING_DIR=dirs.logging_dir,
INDEX_DIR=dirs.index_dir,
STATIC_ROOT=dirs.static_dir,
MODEL_FILE=os.path.join(dirs.data_dir, "classification_model.pickle"),
MEDIA_LOCK=os.path.join(dirs.media_dir, "media.lock"),
MODEL_FILE=dirs.data_dir / "classification_model.pickle",
MEDIA_LOCK=dirs.media_dir / "media.lock",
)
dirs.settings_override.enable()

View File

@ -5,11 +5,14 @@ import multiprocessing
import os
import re
import tempfile
from os import PathLike
from pathlib import Path
from typing import Dict
from typing import Final
from typing import Optional
from typing import Set
from typing import Tuple
from typing import Union
from urllib.parse import urlparse
from celery.schedules import crontab
@ -63,11 +66,11 @@ def __get_float(key: str, default: float) -> float:
return float(os.getenv(key, default))
def __get_path(key: str, default: str) -> str:
def __get_path(key: str, default: Union[PathLike, str]) -> Path:
"""
Return a normalized, absolute path based on the environment variable or a default
"""
return os.path.abspath(os.path.normpath(os.environ.get(key, default)))
return Path(os.environ.get(key, default)).resolve()
def _parse_redis_url(env_redis: Optional[str]) -> Tuple[str]:
@ -201,16 +204,16 @@ DEBUG = __get_boolean("PAPERLESS_DEBUG", "NO")
# Directories #
###############################################################################
BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
BASE_DIR: Path = Path(__file__).resolve().parent.parent
STATIC_ROOT = __get_path("PAPERLESS_STATICDIR", os.path.join(BASE_DIR, "..", "static"))
STATIC_ROOT = __get_path("PAPERLESS_STATICDIR", BASE_DIR.parent / "static")
MEDIA_ROOT = __get_path("PAPERLESS_MEDIA_ROOT", os.path.join(BASE_DIR, "..", "media"))
ORIGINALS_DIR = os.path.join(MEDIA_ROOT, "documents", "originals")
ARCHIVE_DIR = os.path.join(MEDIA_ROOT, "documents", "archive")
THUMBNAIL_DIR = os.path.join(MEDIA_ROOT, "documents", "thumbnails")
MEDIA_ROOT = __get_path("PAPERLESS_MEDIA_ROOT", BASE_DIR.parent / "media")
ORIGINALS_DIR = MEDIA_ROOT / "documents" / "originals"
ARCHIVE_DIR = MEDIA_ROOT / "documents" / "archive"
THUMBNAIL_DIR = MEDIA_ROOT / "documents" / "thumbnails"
DATA_DIR = __get_path("PAPERLESS_DATA_DIR", os.path.join(BASE_DIR, "..", "data"))
DATA_DIR = __get_path("PAPERLESS_DATA_DIR", BASE_DIR.parent / "data")
NLTK_DIR = __get_path("PAPERLESS_NLTK_DIR", "/usr/share/nltk_data")
@ -218,21 +221,21 @@ TRASH_DIR = os.getenv("PAPERLESS_TRASH_DIR")
# Lock file for synchronizing changes to the MEDIA directory across multiple
# threads.
MEDIA_LOCK = os.path.join(MEDIA_ROOT, "media.lock")
INDEX_DIR = os.path.join(DATA_DIR, "index")
MODEL_FILE = os.path.join(DATA_DIR, "classification_model.pickle")
MEDIA_LOCK = MEDIA_ROOT / "media.lock"
INDEX_DIR = DATA_DIR / "index"
MODEL_FILE = DATA_DIR / "classification_model.pickle"
LOGGING_DIR = __get_path("PAPERLESS_LOGGING_DIR", os.path.join(DATA_DIR, "log"))
LOGGING_DIR = __get_path("PAPERLESS_LOGGING_DIR", DATA_DIR / "log")
CONSUMPTION_DIR = __get_path(
"PAPERLESS_CONSUMPTION_DIR",
os.path.join(BASE_DIR, "..", "consume"),
BASE_DIR.parent / "consume",
)
# This will be created if it doesn't exist
SCRATCH_DIR = __get_path(
"PAPERLESS_SCRATCH_DIR",
os.path.join(tempfile.gettempdir(), "paperless"),
Path(tempfile.gettempdir()) / "paperless",
)
###############################################################################