From 41bcfcaffe9ea6cb35e7c20803118a017247bb62 Mon Sep 17 00:00:00 2001 From: Trenton H <797416+stumpylog@users.noreply.github.com> Date: Tue, 7 Feb 2023 14:05:18 -0800 Subject: [PATCH] Changes out the settings and a decent amount of test code to be pathlib compatible --- src/documents/barcodes.py | 27 +- src/documents/classifier.py | 7 +- src/documents/models.py | 15 +- src/documents/tests/test_barcodes.py | 342 ++++++------------ src/documents/tests/test_file_handling.py | 35 +- .../tests/test_management_exporter.py | 2 +- .../tests/test_management_thumbnails.py | 4 +- src/documents/tests/utils.py | 36 +- src/paperless/settings.py | 33 +- 9 files changed, 192 insertions(+), 309 deletions(-) diff --git a/src/documents/barcodes.py b/src/documents/barcodes.py index 416cf6b2d..3ecf6f96a 100644 --- a/src/documents/barcodes.py +++ b/src/documents/barcodes.py @@ -98,7 +98,7 @@ def barcode_reader(image: Image) -> List[str]: return barcodes -def get_file_mime_type(path: str) -> str: +def get_file_mime_type(path: Path) -> str: """ Determines the file type, based on MIME type. @@ -109,21 +109,20 @@ def get_file_mime_type(path: str) -> str: return mime_type -def convert_from_tiff_to_pdf(filepath: str) -> str: +def convert_from_tiff_to_pdf(filepath: Path) -> Path: """ converts a given TIFF image file to pdf into a temporary directory. Returns the new pdf file. """ - file_name = os.path.splitext(os.path.basename(filepath))[0] mime_type = get_file_mime_type(filepath) tempdir = tempfile.mkdtemp(prefix="paperless-", dir=settings.SCRATCH_DIR) # use old file name with pdf extension if mime_type == "image/tiff": - newpath = os.path.join(tempdir, file_name + ".pdf") + newpath = Path(tempdir) / Path(filepath.name).with_suffix(".pdf") else: logger.warning( - f"Cannot convert mime type {str(mime_type)} from {str(filepath)} to pdf.", + f"Cannot convert mime type {mime_type} from {filepath} to pdf.", ) return None with Image.open(filepath) as image: @@ -145,7 +144,7 @@ def convert_from_tiff_to_pdf(filepath: str) -> str: def scan_file_for_barcodes( - filepath: str, + filepath: Path, ) -> DocumentBarcodeInfo: """ Scan the provided pdf file for any barcodes @@ -252,7 +251,7 @@ def get_asn_from_barcodes(barcodes: List[Barcode]) -> Optional[int]: return asn -def separate_pages(filepath: str, pages_to_split_on: Dict[int, bool]) -> List[str]: +def separate_pages(filepath: Path, pages_to_split_on: Dict[int, bool]) -> List[Path]: """ Separate the provided pdf file on the pages_to_split_on. The pages which are defined by the keys in page_numbers @@ -268,8 +267,8 @@ def separate_pages(filepath: str, pages_to_split_on: Dict[int, bool]) -> List[st return document_paths os.makedirs(settings.SCRATCH_DIR, exist_ok=True) - tempdir = tempfile.mkdtemp(prefix="paperless-", dir=settings.SCRATCH_DIR) - fname = os.path.splitext(os.path.basename(filepath))[0] + tempdir = Path(tempfile.mkdtemp(prefix="paperless-", dir=settings.SCRATCH_DIR)) + fname = filepath.with_suffix("").name pdf = Pdf.open(filepath) # Start with an empty document @@ -307,7 +306,7 @@ def separate_pages(filepath: str, pages_to_split_on: Dict[int, bool]) -> List[st output_filename = f"{fname}_document_{doc_idx}.pdf" logger.debug(f"pdf no:{doc_idx} has {len(dst.pages)} pages") - savepath = os.path.join(tempdir, output_filename) + savepath = tempdir / output_filename with open(savepath, "wb") as out: dst.save(out) document_paths.append(savepath) @@ -316,18 +315,18 @@ def separate_pages(filepath: str, pages_to_split_on: Dict[int, bool]) -> List[st def save_to_dir( - filepath: str, + filepath: Path, newname: str = None, - target_dir: str = settings.CONSUMPTION_DIR, + target_dir: Path = settings.CONSUMPTION_DIR, ): """ Copies filepath to target_dir. Optionally rename the file. """ - if os.path.isfile(filepath) and os.path.isdir(target_dir): + if filepath.is_file() and target_dir.is_dir(): dest = target_dir if newname is not None: - dest = os.path.join(dest, newname) + dest = dest / newname shutil.copy(filepath, dest) logging.debug(f"saved {str(filepath)} to {str(dest)}") else: diff --git a/src/documents/classifier.py b/src/documents/classifier.py index ce2441f84..d2f5ed060 100644 --- a/src/documents/classifier.py +++ b/src/documents/classifier.py @@ -2,7 +2,6 @@ import logging import os import pickle import re -import shutil import warnings from datetime import datetime from hashlib import sha256 @@ -122,7 +121,7 @@ class DocumentClassifier: def save(self): target_file = settings.MODEL_FILE - target_file_temp = settings.MODEL_FILE + ".part" + target_file_temp = settings.MODEL_FILE.with_suffix(".pickle.part") with open(target_file_temp, "wb") as f: pickle.dump(self.FORMAT_VERSION, f) @@ -138,9 +137,7 @@ class DocumentClassifier: pickle.dump(self.document_type_classifier, f) pickle.dump(self.storage_path_classifier, f) - if os.path.isfile(target_file): - os.unlink(target_file) - shutil.move(target_file_temp, target_file) + target_file_temp.rename(target_file) def train(self): diff --git a/src/documents/models.py b/src/documents/models.py index 177885de0..68fba37b9 100644 --- a/src/documents/models.py +++ b/src/documents/models.py @@ -3,6 +3,7 @@ import logging import os import re from collections import OrderedDict +from pathlib import Path from typing import Final from typing import Optional @@ -282,7 +283,7 @@ class Document(ModelWithOwner): return res @property - def source_path(self) -> str: + def source_path(self) -> Path: if self.filename: fname = str(self.filename) else: @@ -290,7 +291,7 @@ class Document(ModelWithOwner): if self.storage_type == self.STORAGE_TYPE_GPG: fname += ".gpg" # pragma: no cover - return os.path.join(settings.ORIGINALS_DIR, fname) + return (settings.ORIGINALS_DIR / Path(fname)).resolve() @property def source_file(self): @@ -301,9 +302,9 @@ class Document(ModelWithOwner): return self.archive_filename is not None @property - def archive_path(self) -> Optional[str]: + def archive_path(self) -> Optional[Path]: if self.has_archive_version: - return os.path.join(settings.ARCHIVE_DIR, str(self.archive_filename)) + return (settings.ARCHIVE_DIR / Path(str(self.archive_filename))).resolve() else: return None @@ -335,14 +336,14 @@ class Document(ModelWithOwner): return get_default_file_extension(self.mime_type) @property - def thumbnail_path(self) -> str: + def thumbnail_path(self) -> Path: webp_file_name = f"{self.pk:07}.webp" if self.storage_type == self.STORAGE_TYPE_GPG: webp_file_name += ".gpg" - webp_file_path = os.path.join(settings.THUMBNAIL_DIR, webp_file_name) + webp_file_path = settings.THUMBNAIL_DIR / Path(webp_file_name) - return os.path.normpath(webp_file_path) + return webp_file_path.resolve() @property def thumbnail_file(self): diff --git a/src/documents/tests/test_barcodes.py b/src/documents/tests/test_barcodes.py index 7019c07b3..02ed26308 100644 --- a/src/documents/tests/test_barcodes.py +++ b/src/documents/tests/test_barcodes.py @@ -1,5 +1,6 @@ import os import shutil +from pathlib import Path from unittest import mock from django.conf import settings @@ -15,12 +16,9 @@ from PIL import Image class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase): - SAMPLE_DIR = os.path.join( - os.path.dirname(__file__), - "samples", - ) + SAMPLE_DIR = Path(__file__).parent / "samples" - BARCODE_SAMPLE_DIR = os.path.join(SAMPLE_DIR, "barcodes") + BARCODE_SAMPLE_DIR = SAMPLE_DIR / "barcodes" def test_barcode_reader_png(self): """ @@ -31,7 +29,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase): THEN: - The barcode is detected """ - test_file = os.path.join(self.BARCODE_SAMPLE_DIR, "barcode-39-PATCHT.png") + test_file = self.BARCODE_SAMPLE_DIR / "barcode-39-PATCHT.png" img = Image.open(test_file) separator_barcode = settings.CONSUMER_BARCODE_STRING self.assertEqual(barcodes.barcode_reader(img), [separator_barcode]) @@ -45,10 +43,8 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase): THEN: - The barcode is detected """ - test_file = os.path.join( - self.BARCODE_SAMPLE_DIR, - "patch-code-t.pbm", - ) + test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t.pbm" + img = Image.open(test_file) separator_barcode = str(settings.CONSUMER_BARCODE_STRING) self.assertEqual(barcodes.barcode_reader(img), [separator_barcode]) @@ -62,10 +58,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase): THEN: - The barcode is detected """ - test_file = os.path.join( - self.BARCODE_SAMPLE_DIR, - "barcode-39-PATCHT-distortion.png", - ) + test_file = self.BARCODE_SAMPLE_DIR / "barcode-39-PATCHT-distortion.png" img = Image.open(test_file) separator_barcode = str(settings.CONSUMER_BARCODE_STRING) self.assertEqual(barcodes.barcode_reader(img), [separator_barcode]) @@ -79,10 +72,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase): THEN: - The barcode is detected """ - test_file = os.path.join( - self.BARCODE_SAMPLE_DIR, - "barcode-39-PATCHT-distortion2.png", - ) + test_file = self.BARCODE_SAMPLE_DIR / "barcode-39-PATCHT-distortion2.png" img = Image.open(test_file) separator_barcode = str(settings.CONSUMER_BARCODE_STRING) self.assertEqual(barcodes.barcode_reader(img), [separator_barcode]) @@ -96,10 +86,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase): THEN: - No barcode is detected """ - test_file = os.path.join( - self.BARCODE_SAMPLE_DIR, - "barcode-39-PATCHT-unreadable.png", - ) + test_file = self.BARCODE_SAMPLE_DIR / "barcode-39-PATCHT-unreadable.png" img = Image.open(test_file) self.assertEqual(barcodes.barcode_reader(img), []) @@ -112,10 +99,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase): THEN: - The barcode is detected """ - test_file = os.path.join( - self.BARCODE_SAMPLE_DIR, - "qr-code-PATCHT.png", - ) + test_file = self.BARCODE_SAMPLE_DIR / "qr-code-PATCHT.png" img = Image.open(test_file) separator_barcode = str(settings.CONSUMER_BARCODE_STRING) self.assertEqual(barcodes.barcode_reader(img), [separator_barcode]) @@ -129,10 +113,8 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase): THEN: - The barcode is detected """ - test_file = os.path.join( - self.BARCODE_SAMPLE_DIR, - "barcode-128-PATCHT.png", - ) + test_file = self.BARCODE_SAMPLE_DIR / "barcode-128-PATCHT.png" + img = Image.open(test_file) separator_barcode = str(settings.CONSUMER_BARCODE_STRING) self.assertEqual(barcodes.barcode_reader(img), [separator_barcode]) @@ -146,7 +128,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase): THEN: - No barcode is detected """ - test_file = os.path.join(self.SAMPLE_DIR, "simple.png") + test_file = self.SAMPLE_DIR / "simple.png" img = Image.open(test_file) self.assertListEqual(barcodes.barcode_reader(img), []) @@ -159,10 +141,8 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase): THEN: - The barcode is detected """ - test_file = os.path.join( - self.BARCODE_SAMPLE_DIR, - "barcode-39-custom.png", - ) + test_file = self.BARCODE_SAMPLE_DIR / "barcode-39-custom.png" + img = Image.open(test_file) self.assertEqual(barcodes.barcode_reader(img), ["CUSTOM BARCODE"]) @@ -175,10 +155,8 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase): THEN: - The barcode is detected """ - test_file = os.path.join( - self.BARCODE_SAMPLE_DIR, - "barcode-qr-custom.png", - ) + test_file = self.BARCODE_SAMPLE_DIR / "barcode-qr-custom.png" + img = Image.open(test_file) self.assertEqual(barcodes.barcode_reader(img), ["CUSTOM BARCODE"]) @@ -191,10 +169,8 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase): THEN: - The barcode is detected """ - test_file = os.path.join( - self.BARCODE_SAMPLE_DIR, - "barcode-128-custom.png", - ) + test_file = self.BARCODE_SAMPLE_DIR / "barcode-128-custom.png" + img = Image.open(test_file) self.assertEqual(barcodes.barcode_reader(img), ["CUSTOM BARCODE"]) @@ -207,20 +183,14 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase): THEN: - """ - tiff_file = os.path.join( - self.SAMPLE_DIR, - "simple.tiff", - ) - pdf_file = os.path.join( - self.SAMPLE_DIR, - "simple.pdf", - ) - png_file = os.path.join( - self.BARCODE_SAMPLE_DIR, - "barcode-128-custom.png", - ) - tiff_file_no_extension = os.path.join(settings.SCRATCH_DIR, "testfile1") - pdf_file_no_extension = os.path.join(settings.SCRATCH_DIR, "testfile2") + tiff_file = self.SAMPLE_DIR / "simple.tiff" + + pdf_file = self.SAMPLE_DIR / "simple.pdf" + + png_file = self.BARCODE_SAMPLE_DIR / "barcode-128-custom.png" + + tiff_file_no_extension = settings.SCRATCH_DIR / "testfile1" + pdf_file_no_extension = settings.SCRATCH_DIR / "testfile2" shutil.copy(tiff_file, tiff_file_no_extension) shutil.copy(pdf_file, pdf_file_no_extension) @@ -245,17 +215,14 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase): THEN: - """ - test_file = os.path.join( - os.path.dirname(__file__), - "samples", - "simple.tiff", - ) - dst = os.path.join(settings.SCRATCH_DIR, "simple.tiff") + test_file = self.SAMPLE_DIR / "simple.tiff" + + dst = settings.SCRATCH_DIR / "simple.tiff" shutil.copy(test_file, dst) target_file = barcodes.convert_from_tiff_to_pdf(dst) - file_extension = os.path.splitext(os.path.basename(target_file))[1] + self.assertIsFile(target_file) - self.assertEqual(file_extension, ".pdf") + self.assertEqual(target_file.suffix, ".pdf") def test_convert_error_from_pdf_to_pdf(self): """ @@ -266,11 +233,9 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase): THEN: - """ - test_file = os.path.join( - self.SAMPLE_DIR, - "simple.pdf", - ) - dst = os.path.join(settings.SCRATCH_DIR, "simple.pdf") + test_file = self.SAMPLE_DIR / "simple.pdf" + + dst = settings.SCRATCH_DIR / "simple.pdf" shutil.copy(test_file, dst) self.assertIsNone(barcodes.convert_from_tiff_to_pdf(dst)) @@ -283,10 +248,8 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase): THEN: - """ - test_file = os.path.join( - self.BARCODE_SAMPLE_DIR, - "patch-code-t.pdf", - ) + test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t.pdf" + doc_barcode_info = barcodes.scan_file_for_barcodes( test_file, ) @@ -306,7 +269,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase): THEN: - """ - test_file = os.path.join(self.SAMPLE_DIR, "simple.pdf") + test_file = self.SAMPLE_DIR / "simple.pdf" doc_barcode_info = barcodes.scan_file_for_barcodes( test_file, ) @@ -326,10 +289,8 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase): THEN: - Barcode is detected on page 1 (zero indexed) """ - test_file = os.path.join( - self.BARCODE_SAMPLE_DIR, - "patch-code-t-middle.pdf", - ) + test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t-middle.pdf" + doc_barcode_info = barcodes.scan_file_for_barcodes( test_file, ) @@ -349,10 +310,8 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase): THEN: - Barcode is detected on pages 2 and 5 (zero indexed) """ - test_file = os.path.join( - self.BARCODE_SAMPLE_DIR, - "several-patcht-codes.pdf", - ) + test_file = self.BARCODE_SAMPLE_DIR / "several-patcht-codes.pdf" + doc_barcode_info = barcodes.scan_file_for_barcodes( test_file, ) @@ -373,10 +332,8 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase): THEN: - Barcode is detected on page 1 (zero indexed) """ - test_file = os.path.join( - self.BARCODE_SAMPLE_DIR, - "patch-code-t-middle_reverse.pdf", - ) + test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t-middle_reverse.pdf" + doc_barcode_info = barcodes.scan_file_for_barcodes( test_file, ) @@ -396,10 +353,8 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase): THEN: - The barcode is still detected """ - test_file = os.path.join( - self.BARCODE_SAMPLE_DIR, - "barcode-fax-image.pdf", - ) + test_file = self.BARCODE_SAMPLE_DIR / "barcode-fax-image.pdf" + doc_barcode_info = barcodes.scan_file_for_barcodes( test_file, ) @@ -420,10 +375,8 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase): THEN: - Barcode is detected on page 0 (zero indexed) """ - test_file = os.path.join( - self.BARCODE_SAMPLE_DIR, - "patch-code-t-qr.pdf", - ) + test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t-qr.pdf" + doc_barcode_info = barcodes.scan_file_for_barcodes( test_file, ) @@ -445,10 +398,8 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase): THEN: - Barcode is detected on page 0 (zero indexed) """ - test_file = os.path.join( - self.BARCODE_SAMPLE_DIR, - "barcode-39-custom.pdf", - ) + test_file = self.BARCODE_SAMPLE_DIR / "barcode-39-custom.pdf" + doc_barcode_info = barcodes.scan_file_for_barcodes( test_file, ) @@ -471,10 +422,8 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase): THEN: - Barcode is detected on page 0 (zero indexed) """ - test_file = os.path.join( - self.BARCODE_SAMPLE_DIR, - "barcode-qr-custom.pdf", - ) + test_file = self.BARCODE_SAMPLE_DIR / "barcode-qr-custom.pdf" + doc_barcode_info = barcodes.scan_file_for_barcodes( test_file, ) @@ -497,10 +446,8 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase): THEN: - Barcode is detected on page 0 (zero indexed) """ - test_file = os.path.join( - self.BARCODE_SAMPLE_DIR, - "barcode-128-custom.pdf", - ) + test_file = self.BARCODE_SAMPLE_DIR / "barcode-128-custom.pdf" + doc_barcode_info = barcodes.scan_file_for_barcodes( test_file, ) @@ -522,10 +469,8 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase): THEN: - No split pages are detected """ - test_file = os.path.join( - self.BARCODE_SAMPLE_DIR, - "barcode-39-custom.pdf", - ) + test_file = self.BARCODE_SAMPLE_DIR / "barcode-39-custom.pdf" + doc_barcode_info = barcodes.scan_file_for_barcodes( test_file, ) @@ -546,10 +491,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase): THEN: - QR codes are detected """ - test_file = os.path.join( - self.BARCODE_SAMPLE_DIR, - "many-qr-codes.pdf", - ) + test_file = self.BARCODE_SAMPLE_DIR / "many-qr-codes.pdf" doc_barcode_info = barcodes.scan_file_for_barcodes( test_file, @@ -570,10 +512,8 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase): THEN: - Two new documents are produced """ - test_file = os.path.join( - self.BARCODE_SAMPLE_DIR, - "patch-code-t-middle.pdf", - ) + test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t-middle.pdf" + documents = barcodes.separate_pages(test_file, {1: False}) self.assertEqual(len(documents), 2) @@ -587,11 +527,8 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase): THEN: - Only two files are output """ - test_file = os.path.join( - os.path.dirname(__file__), - self.BARCODE_SAMPLE_DIR, - "patch-code-t-double.pdf", - ) + test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t-double.pdf" + pages = barcodes.separate_pages(test_file, {1: False, 2: False}) self.assertEqual(len(pages), 2) @@ -606,10 +543,8 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase): - No new documents are produced - A warning is logged """ - test_file = os.path.join( - self.BARCODE_SAMPLE_DIR, - "patch-code-t-middle.pdf", - ) + test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t-middle.pdf" + with self.assertLogs("paperless.barcodes", level="WARNING") as cm: pages = barcodes.separate_pages(test_file, {}) self.assertEqual(pages, []) @@ -629,12 +564,10 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase): THEN: - The file exists """ - test_file = os.path.join( - self.BARCODE_SAMPLE_DIR, - "patch-code-t.pdf", - ) + test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t.pdf" + barcodes.save_to_dir(test_file, target_dir=settings.SCRATCH_DIR) - target_file = os.path.join(settings.SCRATCH_DIR, "patch-code-t.pdf") + target_file = settings.SCRATCH_DIR / "patch-code-t.pdf" self.assertIsFile(target_file) def test_save_to_dir_not_existing(self): @@ -647,11 +580,9 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase): THEN: - The file exists """ - test_file = os.path.join( - self.BARCODE_SAMPLE_DIR, - "patch-code-t.pdf", - ) - nonexistingdir = "/nowhere" + test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t.pdf" + + nonexistingdir = Path("/nowhere") self.assertIsNotDir(nonexistingdir) with self.assertLogs("paperless.barcodes", level="WARNING") as cm: @@ -673,16 +604,14 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase): THEN: - The file exists """ - test_file = os.path.join( - self.BARCODE_SAMPLE_DIR, - "patch-code-t.pdf", - ) + test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t.pdf" + barcodes.save_to_dir( test_file, newname="newname.pdf", target_dir=settings.SCRATCH_DIR, ) - target_file = os.path.join(settings.SCRATCH_DIR, "newname.pdf") + target_file = settings.SCRATCH_DIR / "newname.pdf" self.assertIsFile(target_file) def test_barcode_splitter(self): @@ -694,10 +623,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase): THEN: - Correct number of files produced """ - test_file = os.path.join( - self.BARCODE_SAMPLE_DIR, - "patch-code-t-middle.pdf", - ) + test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t-middle.pdf" doc_barcode_info = barcodes.scan_file_for_barcodes( test_file, @@ -715,14 +641,9 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase): for document in document_list: barcodes.save_to_dir(document, target_dir=settings.SCRATCH_DIR) - target_file1 = os.path.join( - settings.SCRATCH_DIR, - "patch-code-t-middle_document_0.pdf", - ) - target_file2 = os.path.join( - settings.SCRATCH_DIR, - "patch-code-t-middle_document_1.pdf", - ) + target_file1 = settings.SCRATCH_DIR / "patch-code-t-middle_document_0.pdf" + + target_file2 = settings.SCRATCH_DIR / "patch-code-t-middle_document_1.pdf" self.assertIsFile(target_file1) self.assertIsFile(target_file2) @@ -737,12 +658,9 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase): THEN: - The file was split """ - test_file = os.path.join( - self.BARCODE_SAMPLE_DIR, - "patch-code-t-middle.pdf", - ) + test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t-middle.pdf" - dst = os.path.join(settings.SCRATCH_DIR, "patch-code-t-middle.pdf") + dst = settings.SCRATCH_DIR / "patch-code-t-middle.pdf" shutil.copy(test_file, dst) with mock.patch("documents.tasks.async_to_sync"): @@ -761,11 +679,9 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase): THEN: - The file was split """ - test_file = os.path.join( - self.BARCODE_SAMPLE_DIR, - "patch-code-t-middle.tiff", - ) - dst = os.path.join(settings.SCRATCH_DIR, "patch-code-t-middle.tiff") + test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t-middle.tiff" + + dst = settings.SCRATCH_DIR / "patch-code-t-middle.tiff" shutil.copy(test_file, dst) with mock.patch("documents.tasks.async_to_sync"): @@ -786,11 +702,9 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase): - Barcode reader reported warning - Consumption continued with the file """ - test_file = os.path.join( - self.SAMPLE_DIR, - "simple.jpg", - ) - dst = os.path.join(settings.SCRATCH_DIR, "simple.jpg") + test_file = self.SAMPLE_DIR / "simple.jpg" + + dst = settings.SCRATCH_DIR / "simple.jpg" shutil.copy(test_file, dst) with self.assertLogs("paperless.barcodes", level="WARNING") as cm: @@ -825,11 +739,9 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase): THEN: - The file was split """ - test_file = os.path.join( - self.BARCODE_SAMPLE_DIR, - "patch-code-t-middle.tiff", - ) - dst = os.path.join(settings.SCRATCH_DIR, "patch-code-t-middle") + test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t-middle.tiff" + + dst = settings.SCRATCH_DIR / "patch-code-t-middle" shutil.copy(test_file, dst) with mock.patch("documents.tasks.async_to_sync"): @@ -844,7 +756,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase): THEN: - Scanning handles the exception without crashing """ - test_file = os.path.join(self.SAMPLE_DIR, "password-is-test.pdf") + test_file = self.SAMPLE_DIR / "password-is-test.pdf" with self.assertLogs("paperless.barcodes", level="WARNING") as cm: doc_barcode_info = barcodes.scan_file_for_barcodes( test_file, @@ -873,11 +785,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase): THEN: - Correct number of files produced, split correctly by correct pages """ - test_file = os.path.join( - os.path.dirname(__file__), - self.BARCODE_SAMPLE_DIR, - "split-by-asn-2.pdf", - ) + test_file = self.BARCODE_SAMPLE_DIR / "split-by-asn-2.pdf" doc_barcode_info = barcodes.scan_file_for_barcodes( test_file, @@ -914,11 +822,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase): THEN: - Correct number of files produced, split correctly by correct pages """ - test_file = os.path.join( - os.path.dirname(__file__), - self.BARCODE_SAMPLE_DIR, - "split-by-asn-1.pdf", - ) + test_file = self.BARCODE_SAMPLE_DIR / "split-by-asn-1.pdf" doc_barcode_info = barcodes.scan_file_for_barcodes( test_file, @@ -944,12 +848,9 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase): class TestAsnBarcodes(DirectoriesMixin, TestCase): - SAMPLE_DIR = os.path.join( - os.path.dirname(__file__), - "samples", - ) + SAMPLE_DIR = Path(__file__).parent / "samples" - BARCODE_SAMPLE_DIR = os.path.join(SAMPLE_DIR, "barcodes") + BARCODE_SAMPLE_DIR = SAMPLE_DIR / "barcodes" def test_barcode_reader_asn_normal(self): """ @@ -961,10 +862,8 @@ class TestAsnBarcodes(DirectoriesMixin, TestCase): - The barcode is located - The barcode value is correct """ - test_file = os.path.join( - self.BARCODE_SAMPLE_DIR, - "barcode-39-asn-123.png", - ) + test_file = self.BARCODE_SAMPLE_DIR / "barcode-39-asn-123.png" + img = Image.open(test_file) self.assertEqual(barcodes.barcode_reader(img), ["ASN00123"]) @@ -979,10 +878,8 @@ class TestAsnBarcodes(DirectoriesMixin, TestCase): - The barcode is located - The barcode value is correct """ - test_file = os.path.join( - self.BARCODE_SAMPLE_DIR, - "barcode-39-asn-invalid.png", - ) + test_file = self.BARCODE_SAMPLE_DIR / "barcode-39-asn-invalid.png" + img = Image.open(test_file) self.assertEqual(barcodes.barcode_reader(img), ["ASNXYZXYZ"]) @@ -996,10 +893,8 @@ class TestAsnBarcodes(DirectoriesMixin, TestCase): - The barcode is located - The barcode value is correct """ - test_file = os.path.join( - self.BARCODE_SAMPLE_DIR, - "barcode-39-asn-custom-prefix.png", - ) + test_file = self.BARCODE_SAMPLE_DIR / "barcode-39-asn-custom-prefix.png" + img = Image.open(test_file) self.assertEqual(barcodes.barcode_reader(img), ["CUSTOM-PREFIX-00123"]) @@ -1015,10 +910,8 @@ class TestAsnBarcodes(DirectoriesMixin, TestCase): - The ASN is located - The ASN integer value is correct """ - test_file = os.path.join( - self.BARCODE_SAMPLE_DIR, - "barcode-39-asn-custom-prefix.pdf", - ) + test_file = self.BARCODE_SAMPLE_DIR / "barcode-39-asn-custom-prefix.pdf" + doc_barcode_info = barcodes.scan_file_for_barcodes( test_file, ) @@ -1038,10 +931,8 @@ class TestAsnBarcodes(DirectoriesMixin, TestCase): - The ASN is located - The ASN value is not used """ - test_file = os.path.join( - self.BARCODE_SAMPLE_DIR, - "barcode-39-asn-invalid.pdf", - ) + test_file = self.BARCODE_SAMPLE_DIR / "barcode-39-asn-invalid.pdf" + doc_barcode_info = barcodes.scan_file_for_barcodes( test_file, ) @@ -1064,12 +955,9 @@ class TestAsnBarcodes(DirectoriesMixin, TestCase): - The ASN integer value is correct - The ASN is provided as the override value to the consumer """ - test_file = os.path.join( - self.BARCODE_SAMPLE_DIR, - "barcode-39-asn-123.pdf", - ) + test_file = self.BARCODE_SAMPLE_DIR / "barcode-39-asn-123.pdf" - dst = os.path.join(settings.SCRATCH_DIR, "barcode-39-asn-123.pdf") + dst = settings.SCRATCH_DIR / "barcode-39-asn-123.pdf" shutil.copy(test_file, dst) with mock.patch("documents.consumer.Consumer.try_consume_file") as mocked_call: @@ -1090,10 +978,8 @@ class TestAsnBarcodes(DirectoriesMixin, TestCase): - The ASN is located - The ASN integer value is correct """ - test_file = os.path.join( - self.BARCODE_SAMPLE_DIR, - "barcode-39-asn-123.pdf", - ) + test_file = self.BARCODE_SAMPLE_DIR / "barcode-39-asn-123.pdf" + doc_barcode_info = barcodes.scan_file_for_barcodes( test_file, ) @@ -1111,10 +997,8 @@ class TestAsnBarcodes(DirectoriesMixin, TestCase): THEN: - No ASN is retrieved from the document """ - test_file = os.path.join( - self.BARCODE_SAMPLE_DIR, - "patch-code-t.pdf", - ) + test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t.pdf" + doc_barcode_info = barcodes.scan_file_for_barcodes( test_file, ) @@ -1134,13 +1018,9 @@ class TestAsnBarcodes(DirectoriesMixin, TestCase): THEN: - Exception is raised regarding size limits """ - src = os.path.join( - os.path.dirname(__file__), - "samples", - "barcodes", - "barcode-128-asn-too-large.pdf", - ) - dst = os.path.join(self.dirs.scratch_dir, "barcode-128-asn-too-large.pdf") + src = self.BARCODE_SAMPLE_DIR / "barcode-128-asn-too-large.pdf" + + dst = self.dirs.scratch_dir / "barcode-128-asn-too-large.pdf" shutil.copy(src, dst) with mock.patch("documents.consumer.Consumer._send_progress"): diff --git a/src/documents/tests/test_file_handling.py b/src/documents/tests/test_file_handling.py index 3f1dabdcb..0d548264c 100644 --- a/src/documents/tests/test_file_handling.py +++ b/src/documents/tests/test_file_handling.py @@ -19,6 +19,7 @@ from ..models import Document from ..models import DocumentType from ..models import StoragePath from .utils import DirectoriesMixin +from .utils import FileSystemAssertsMixin class TestFileHandling(DirectoriesMixin, FileSystemAssertsMixin, TestCase): @@ -47,7 +48,7 @@ class TestFileHandling(DirectoriesMixin, FileSystemAssertsMixin, TestCase): # Test default source_path self.assertEqual( document.source_path, - os.path.join(settings.ORIGINALS_DIR, f"{document.pk:07d}.pdf"), + settings.ORIGINALS_DIR / f"{document.pk:07d}.pdf", ) document.filename = generate_filename(document) @@ -72,10 +73,14 @@ class TestFileHandling(DirectoriesMixin, FileSystemAssertsMixin, TestCase): document.save() # Check proper handling of files - self.assertIsDir(os.path.join(settings.ORIGINALS_DIR, "test")) - self.assertIsNotDir(os.path.join(settings.ORIGINALS_DIR, "none")) + self.assertIsDir( + settings.ORIGINALS_DIR / "test", + ) + self.assertIsNotDir( + settings.ORIGINALS_DIR / "none", + ) self.assertIsFile( - os.path.join(settings.ORIGINALS_DIR, "test/test.pdf.gpg"), + settings.ORIGINALS_DIR / "test" / "test.pdf.gpg", ) @override_settings(FILENAME_FORMAT="{correspondent}/{correspondent}") @@ -89,12 +94,12 @@ class TestFileHandling(DirectoriesMixin, FileSystemAssertsMixin, TestCase): document.filename = generate_filename(document) self.assertEqual(document.filename, "none/none.pdf") create_source_path_directory(document.source_path) - Path(document.source_path).touch() + document.source_path.touch() # Test source_path self.assertEqual( document.source_path, - os.path.join(settings.ORIGINALS_DIR, "none/none.pdf"), + settings.ORIGINALS_DIR / "none" / "none.pdf", ) # Make the folder read- and execute-only (no writing and no renaming) @@ -106,7 +111,7 @@ class TestFileHandling(DirectoriesMixin, FileSystemAssertsMixin, TestCase): # Check proper handling of files self.assertIsFile( - os.path.join(settings.ORIGINALS_DIR, "none/none.pdf"), + settings.ORIGINALS_DIR / "none" / "none.pdf", ) self.assertEqual(document.filename, "none/none.pdf") @@ -232,9 +237,9 @@ class TestFileHandling(DirectoriesMixin, FileSystemAssertsMixin, TestCase): create_source_path_directory(document.source_path) - Path(document.source_path).touch() - important_file = document.source_path + "test" - Path(important_file).touch() + document.source_path.touch() + important_file = document.source_path.with_suffix(".test") + important_file.touch() # Set a correspondent and save the document document.correspondent = Correspondent.objects.get_or_create(name="test")[0] @@ -379,7 +384,7 @@ class TestFileHandling(DirectoriesMixin, FileSystemAssertsMixin, TestCase): self.assertEqual( doc.source_path, - os.path.join(settings.ORIGINALS_DIR, "etc", "something", "doc1.pdf"), + settings.ORIGINALS_DIR / "etc" / "something" / "doc1.pdf", ) @override_settings( @@ -599,11 +604,11 @@ class TestFileHandlingWithArchive(DirectoriesMixin, FileSystemAssertsMixin, Test self.assertIsFile(doc.archive_path) self.assertEqual( doc.source_path, - os.path.join(settings.ORIGINALS_DIR, "none", "my_doc.pdf"), + settings.ORIGINALS_DIR / "none" / "my_doc.pdf", ) self.assertEqual( doc.archive_path, - os.path.join(settings.ARCHIVE_DIR, "none", "my_doc.pdf"), + settings.ARCHIVE_DIR / "none" / "my_doc.pdf", ) @override_settings(FILENAME_FORMAT="{correspondent}/{title}") @@ -698,7 +703,7 @@ class TestFileHandlingWithArchive(DirectoriesMixin, FileSystemAssertsMixin, Test @mock.patch("documents.signals.handlers.os.rename") def test_move_archive_error(self, m): def fake_rename(src, dst): - if "archive" in src: + if "archive" in str(src): raise OSError() else: os.remove(src) @@ -749,7 +754,7 @@ class TestFileHandlingWithArchive(DirectoriesMixin, FileSystemAssertsMixin, Test @mock.patch("documents.signals.handlers.os.rename") def test_move_file_error(self, m): def fake_rename(src, dst): - if "original" in src: + if "original" in str(src): raise OSError() else: os.remove(src) diff --git a/src/documents/tests/test_management_exporter.py b/src/documents/tests/test_management_exporter.py index 42f544f55..18fb6d662 100644 --- a/src/documents/tests/test_management_exporter.py +++ b/src/documents/tests/test_management_exporter.py @@ -359,7 +359,7 @@ class TestExportImport(DirectoriesMixin, FileSystemAssertsMixin, TestCase): self.assertIsFile(os.path.join(self.target, "manifest.json")) self.assertIsFile(os.path.join(self.target, "wow2", "none.pdf")) self.assertIsFile( - (os.path.join(self.target, "wow2", "none_01.pdf")), + os.path.join(self.target, "wow2", "none_01.pdf"), ) def test_export_missing_files(self): diff --git a/src/documents/tests/test_management_thumbnails.py b/src/documents/tests/test_management_thumbnails.py index c66e0aa0a..0767e4e37 100644 --- a/src/documents/tests/test_management_thumbnails.py +++ b/src/documents/tests/test_management_thumbnails.py @@ -58,8 +58,8 @@ class TestMakeThumbnails(DirectoriesMixin, FileSystemAssertsMixin, TestCase): self.assertIsNotFile(self.d1.thumbnail_path) self.assertIsNotFile(self.d2.thumbnail_path) call_command("document_thumbnails") - self.assertTrue(self.d1.thumbnail_path) - self.assertTrue(self.d2.thumbnail_path) + self.assertIsFile(self.d1.thumbnail_path) + self.assertIsFile(self.d2.thumbnail_path) def test_command_documentid(self): self.assertIsNotFile(self.d1.thumbnail_path) diff --git a/src/documents/tests/utils.py b/src/documents/tests/utils.py index 9362d378b..0a8da9ef9 100644 --- a/src/documents/tests/utils.py +++ b/src/documents/tests/utils.py @@ -1,4 +1,3 @@ -import os import shutil import tempfile from collections import namedtuple @@ -19,23 +18,22 @@ def setup_directories(): dirs = namedtuple("Dirs", ()) - dirs.data_dir = tempfile.mkdtemp() - dirs.scratch_dir = tempfile.mkdtemp() - dirs.media_dir = tempfile.mkdtemp() - dirs.consumption_dir = tempfile.mkdtemp() - dirs.static_dir = tempfile.mkdtemp() - dirs.index_dir = os.path.join(dirs.data_dir, "index") - dirs.originals_dir = os.path.join(dirs.media_dir, "documents", "originals") - dirs.thumbnail_dir = os.path.join(dirs.media_dir, "documents", "thumbnails") - dirs.archive_dir = os.path.join(dirs.media_dir, "documents", "archive") - dirs.logging_dir = os.path.join(dirs.data_dir, "log") + dirs.data_dir = Path(tempfile.mkdtemp()) + dirs.scratch_dir = Path(tempfile.mkdtemp()) + dirs.media_dir = Path(tempfile.mkdtemp()) + dirs.consumption_dir = Path(tempfile.mkdtemp()) + dirs.static_dir = Path(tempfile.mkdtemp()) + dirs.index_dir = dirs.data_dir / "index" + dirs.originals_dir = dirs.media_dir / "documents" / "originals" + dirs.thumbnail_dir = dirs.media_dir / "documents" / "thumbnails" + dirs.archive_dir = dirs.media_dir / "documents" / "archive" + dirs.logging_dir = dirs.data_dir / "log" - os.makedirs(dirs.index_dir, exist_ok=True) - os.makedirs(dirs.originals_dir, exist_ok=True) - os.makedirs(dirs.thumbnail_dir, exist_ok=True) - os.makedirs(dirs.archive_dir, exist_ok=True) - - os.makedirs(dirs.logging_dir, exist_ok=True) + dirs.index_dir.mkdir(parents=True, exist_ok=True) + dirs.originals_dir.mkdir(parents=True, exist_ok=True) + dirs.thumbnail_dir.mkdir(parents=True, exist_ok=True) + dirs.archive_dir.mkdir(parents=True, exist_ok=True) + dirs.logging_dir.mkdir(parents=True, exist_ok=True) dirs.settings_override = override_settings( DATA_DIR=dirs.data_dir, @@ -48,8 +46,8 @@ def setup_directories(): LOGGING_DIR=dirs.logging_dir, INDEX_DIR=dirs.index_dir, STATIC_ROOT=dirs.static_dir, - MODEL_FILE=os.path.join(dirs.data_dir, "classification_model.pickle"), - MEDIA_LOCK=os.path.join(dirs.media_dir, "media.lock"), + MODEL_FILE=dirs.data_dir / "classification_model.pickle", + MEDIA_LOCK=dirs.media_dir / "media.lock", ) dirs.settings_override.enable() diff --git a/src/paperless/settings.py b/src/paperless/settings.py index 44e843a9c..205842893 100644 --- a/src/paperless/settings.py +++ b/src/paperless/settings.py @@ -5,11 +5,14 @@ import multiprocessing import os import re import tempfile +from os import PathLike +from pathlib import Path from typing import Dict from typing import Final from typing import Optional from typing import Set from typing import Tuple +from typing import Union from urllib.parse import urlparse from celery.schedules import crontab @@ -63,11 +66,11 @@ def __get_float(key: str, default: float) -> float: return float(os.getenv(key, default)) -def __get_path(key: str, default: str) -> str: +def __get_path(key: str, default: Union[PathLike, str]) -> Path: """ Return a normalized, absolute path based on the environment variable or a default """ - return os.path.abspath(os.path.normpath(os.environ.get(key, default))) + return Path(os.environ.get(key, default)).resolve() def _parse_redis_url(env_redis: Optional[str]) -> Tuple[str]: @@ -201,16 +204,16 @@ DEBUG = __get_boolean("PAPERLESS_DEBUG", "NO") # Directories # ############################################################################### -BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) +BASE_DIR: Path = Path(__file__).resolve().parent.parent -STATIC_ROOT = __get_path("PAPERLESS_STATICDIR", os.path.join(BASE_DIR, "..", "static")) +STATIC_ROOT = __get_path("PAPERLESS_STATICDIR", BASE_DIR.parent / "static") -MEDIA_ROOT = __get_path("PAPERLESS_MEDIA_ROOT", os.path.join(BASE_DIR, "..", "media")) -ORIGINALS_DIR = os.path.join(MEDIA_ROOT, "documents", "originals") -ARCHIVE_DIR = os.path.join(MEDIA_ROOT, "documents", "archive") -THUMBNAIL_DIR = os.path.join(MEDIA_ROOT, "documents", "thumbnails") +MEDIA_ROOT = __get_path("PAPERLESS_MEDIA_ROOT", BASE_DIR.parent / "media") +ORIGINALS_DIR = MEDIA_ROOT / "documents" / "originals" +ARCHIVE_DIR = MEDIA_ROOT / "documents" / "archive" +THUMBNAIL_DIR = MEDIA_ROOT / "documents" / "thumbnails" -DATA_DIR = __get_path("PAPERLESS_DATA_DIR", os.path.join(BASE_DIR, "..", "data")) +DATA_DIR = __get_path("PAPERLESS_DATA_DIR", BASE_DIR.parent / "data") NLTK_DIR = __get_path("PAPERLESS_NLTK_DIR", "/usr/share/nltk_data") @@ -218,21 +221,21 @@ TRASH_DIR = os.getenv("PAPERLESS_TRASH_DIR") # Lock file for synchronizing changes to the MEDIA directory across multiple # threads. -MEDIA_LOCK = os.path.join(MEDIA_ROOT, "media.lock") -INDEX_DIR = os.path.join(DATA_DIR, "index") -MODEL_FILE = os.path.join(DATA_DIR, "classification_model.pickle") +MEDIA_LOCK = MEDIA_ROOT / "media.lock" +INDEX_DIR = DATA_DIR / "index" +MODEL_FILE = DATA_DIR / "classification_model.pickle" -LOGGING_DIR = __get_path("PAPERLESS_LOGGING_DIR", os.path.join(DATA_DIR, "log")) +LOGGING_DIR = __get_path("PAPERLESS_LOGGING_DIR", DATA_DIR / "log") CONSUMPTION_DIR = __get_path( "PAPERLESS_CONSUMPTION_DIR", - os.path.join(BASE_DIR, "..", "consume"), + BASE_DIR.parent / "consume", ) # This will be created if it doesn't exist SCRATCH_DIR = __get_path( "PAPERLESS_SCRATCH_DIR", - os.path.join(tempfile.gettempdir(), "paperless"), + Path(tempfile.gettempdir()) / "paperless", ) ###############################################################################