mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-04-02 13:45:10 -05:00
Chore: switch from os.path to pathlib.Path (#9060)
This commit is contained in:
parent
aaaa6c1393
commit
76d363f22d
@ -209,37 +209,18 @@ lint.per-file-ignores."src/documents/management/commands/document_consumer.py" =
|
|||||||
lint.per-file-ignores."src/documents/management/commands/document_exporter.py" = [
|
lint.per-file-ignores."src/documents/management/commands/document_exporter.py" = [
|
||||||
"PTH",
|
"PTH",
|
||||||
] # TODO Enable & remove
|
] # TODO Enable & remove
|
||||||
lint.per-file-ignores."src/documents/migrations/0012_auto_20160305_0040.py" = [
|
|
||||||
"PTH",
|
|
||||||
] # TODO Enable & remove
|
|
||||||
lint.per-file-ignores."src/documents/migrations/0014_document_checksum.py" = [
|
|
||||||
"PTH",
|
|
||||||
] # TODO Enable & remove
|
|
||||||
lint.per-file-ignores."src/documents/migrations/1003_mime_types.py" = [
|
|
||||||
"PTH",
|
|
||||||
] # TODO Enable & remove
|
|
||||||
lint.per-file-ignores."src/documents/migrations/1012_fix_archive_files.py" = [
|
lint.per-file-ignores."src/documents/migrations/1012_fix_archive_files.py" = [
|
||||||
"PTH",
|
"PTH",
|
||||||
] # TODO Enable & remove
|
] # TODO Enable & remove
|
||||||
lint.per-file-ignores."src/documents/models.py" = [
|
lint.per-file-ignores."src/documents/models.py" = [
|
||||||
"PTH",
|
|
||||||
"SIM115",
|
"SIM115",
|
||||||
] # TODO PTH Enable & remove
|
]
|
||||||
lint.per-file-ignores."src/documents/parsers.py" = [
|
lint.per-file-ignores."src/documents/parsers.py" = [
|
||||||
"PTH",
|
"PTH",
|
||||||
] # TODO Enable & remove
|
] # TODO Enable & remove
|
||||||
lint.per-file-ignores."src/documents/signals/handlers.py" = [
|
lint.per-file-ignores."src/documents/signals/handlers.py" = [
|
||||||
"PTH",
|
"PTH",
|
||||||
] # TODO Enable & remove
|
] # TODO Enable & remove
|
||||||
lint.per-file-ignores."src/documents/tasks.py" = [
|
|
||||||
"PTH",
|
|
||||||
] # TODO Enable & remove
|
|
||||||
lint.per-file-ignores."src/documents/tests/test_api_app_config.py" = [
|
|
||||||
"PTH",
|
|
||||||
] # TODO Enable & remove
|
|
||||||
lint.per-file-ignores."src/documents/tests/test_classifier.py" = [
|
|
||||||
"PTH",
|
|
||||||
] # TODO Enable & remove
|
|
||||||
lint.per-file-ignores."src/documents/tests/test_consumer.py" = [
|
lint.per-file-ignores."src/documents/tests/test_consumer.py" = [
|
||||||
"PTH",
|
"PTH",
|
||||||
] # TODO Enable & remove
|
] # TODO Enable & remove
|
||||||
@ -255,9 +236,6 @@ lint.per-file-ignores."src/documents/tests/test_management_consumer.py" = [
|
|||||||
lint.per-file-ignores."src/documents/tests/test_management_exporter.py" = [
|
lint.per-file-ignores."src/documents/tests/test_management_exporter.py" = [
|
||||||
"PTH",
|
"PTH",
|
||||||
] # TODO Enable & remove
|
] # TODO Enable & remove
|
||||||
lint.per-file-ignores."src/documents/tests/test_management_thumbnails.py" = [
|
|
||||||
"PTH",
|
|
||||||
] # TODO Enable & remove
|
|
||||||
lint.per-file-ignores."src/documents/tests/test_migration_archive_files.py" = [
|
lint.per-file-ignores."src/documents/tests/test_migration_archive_files.py" = [
|
||||||
"PTH",
|
"PTH",
|
||||||
] # TODO Enable & remove
|
] # TODO Enable & remove
|
||||||
@ -270,12 +248,6 @@ lint.per-file-ignores."src/documents/tests/test_migration_mime_type.py" = [
|
|||||||
lint.per-file-ignores."src/documents/tests/test_sanity_check.py" = [
|
lint.per-file-ignores."src/documents/tests/test_sanity_check.py" = [
|
||||||
"PTH",
|
"PTH",
|
||||||
] # TODO Enable & remove
|
] # TODO Enable & remove
|
||||||
lint.per-file-ignores."src/documents/tests/test_tasks.py" = [
|
|
||||||
"PTH",
|
|
||||||
] # TODO Enable & remove
|
|
||||||
lint.per-file-ignores."src/documents/tests/test_views.py" = [
|
|
||||||
"PTH",
|
|
||||||
] # TODO Enable & remove
|
|
||||||
lint.per-file-ignores."src/documents/views.py" = [
|
lint.per-file-ignores."src/documents/views.py" = [
|
||||||
"PTH",
|
"PTH",
|
||||||
] # TODO Enable & remove
|
] # TODO Enable & remove
|
||||||
@ -285,34 +257,16 @@ lint.per-file-ignores."src/paperless/checks.py" = [
|
|||||||
lint.per-file-ignores."src/paperless/settings.py" = [
|
lint.per-file-ignores."src/paperless/settings.py" = [
|
||||||
"PTH",
|
"PTH",
|
||||||
] # TODO Enable & remove
|
] # TODO Enable & remove
|
||||||
lint.per-file-ignores."src/paperless/tests/test_checks.py" = [
|
|
||||||
"PTH",
|
|
||||||
] # TODO Enable & remove
|
|
||||||
lint.per-file-ignores."src/paperless/urls.py" = [
|
|
||||||
"PTH",
|
|
||||||
] # TODO Enable & remove
|
|
||||||
lint.per-file-ignores."src/paperless/views.py" = [
|
lint.per-file-ignores."src/paperless/views.py" = [
|
||||||
"PTH",
|
"PTH",
|
||||||
] # TODO Enable & remove
|
] # TODO Enable & remove
|
||||||
lint.per-file-ignores."src/paperless_mail/mail.py" = [
|
lint.per-file-ignores."src/paperless_mail/mail.py" = [
|
||||||
"PTH",
|
"PTH",
|
||||||
] # TODO Enable & remove
|
] # TODO Enable & remove
|
||||||
lint.per-file-ignores."src/paperless_mail/preprocessor.py" = [
|
|
||||||
"PTH",
|
|
||||||
] # TODO Enable & remove
|
|
||||||
lint.per-file-ignores."src/paperless_tesseract/parsers.py" = [
|
|
||||||
"PTH",
|
|
||||||
] # TODO Enable & remove
|
|
||||||
lint.per-file-ignores."src/paperless_tesseract/tests/test_parser.py" = [
|
lint.per-file-ignores."src/paperless_tesseract/tests/test_parser.py" = [
|
||||||
"PTH",
|
"PTH",
|
||||||
"RUF001",
|
"RUF001",
|
||||||
] # TODO PTH Enable & remove
|
] # TODO PTH Enable & remove
|
||||||
lint.per-file-ignores."src/paperless_tika/tests/test_live_tika.py" = [
|
|
||||||
"PTH",
|
|
||||||
] # TODO Enable & remove
|
|
||||||
lint.per-file-ignores."src/paperless_tika/tests/test_tika_parser.py" = [
|
|
||||||
"PTH",
|
|
||||||
] # TODO Enable & remove
|
|
||||||
lint.isort.force-single-line = true
|
lint.isort.force-single-line = true
|
||||||
|
|
||||||
[tool.pytest.ini_options]
|
[tool.pytest.ini_options]
|
||||||
|
@ -5,6 +5,7 @@ import re
|
|||||||
import shutil
|
import shutil
|
||||||
import subprocess
|
import subprocess
|
||||||
import tempfile
|
import tempfile
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
import gnupg
|
import gnupg
|
||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
@ -34,16 +35,16 @@ class GnuPG:
|
|||||||
|
|
||||||
|
|
||||||
def move_documents_and_create_thumbnails(apps, schema_editor):
|
def move_documents_and_create_thumbnails(apps, schema_editor):
|
||||||
os.makedirs(
|
(Path(settings.MEDIA_ROOT) / "documents" / "originals").mkdir(
|
||||||
os.path.join(settings.MEDIA_ROOT, "documents", "originals"),
|
parents=True,
|
||||||
exist_ok=True,
|
exist_ok=True,
|
||||||
)
|
)
|
||||||
os.makedirs(
|
(Path(settings.MEDIA_ROOT) / "documents" / "thumbnails").mkdir(
|
||||||
os.path.join(settings.MEDIA_ROOT, "documents", "thumbnails"),
|
parents=True,
|
||||||
exist_ok=True,
|
exist_ok=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
documents = os.listdir(os.path.join(settings.MEDIA_ROOT, "documents"))
|
documents: list[str] = os.listdir(Path(settings.MEDIA_ROOT) / "documents")
|
||||||
|
|
||||||
if set(documents) == {"originals", "thumbnails"}:
|
if set(documents) == {"originals", "thumbnails"}:
|
||||||
return
|
return
|
||||||
@ -60,10 +61,7 @@ def move_documents_and_create_thumbnails(apps, schema_editor):
|
|||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
try:
|
Path(settings.SCRATCH_DIR).mkdir(parents=True, exists_ok=True)
|
||||||
os.makedirs(settings.SCRATCH_DIR)
|
|
||||||
except FileExistsError:
|
|
||||||
pass
|
|
||||||
|
|
||||||
for f in sorted(documents):
|
for f in sorted(documents):
|
||||||
if not f.endswith("gpg"):
|
if not f.endswith("gpg"):
|
||||||
@ -77,14 +75,13 @@ def move_documents_and_create_thumbnails(apps, schema_editor):
|
|||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
thumb_temp = tempfile.mkdtemp(prefix="paperless", dir=settings.SCRATCH_DIR)
|
thumb_temp: str = tempfile.mkdtemp(prefix="paperless", dir=settings.SCRATCH_DIR)
|
||||||
orig_temp = tempfile.mkdtemp(prefix="paperless", dir=settings.SCRATCH_DIR)
|
orig_temp: str = tempfile.mkdtemp(prefix="paperless", dir=settings.SCRATCH_DIR)
|
||||||
|
|
||||||
orig_source = os.path.join(settings.MEDIA_ROOT, "documents", f)
|
orig_source: Path = Path(settings.MEDIA_ROOT) / "documents" / f
|
||||||
orig_target = os.path.join(orig_temp, f.replace(".gpg", ""))
|
orig_target: Path = Path(orig_temp) / f.replace(".gpg", "")
|
||||||
|
|
||||||
with open(orig_source, "rb") as encrypted:
|
with orig_source.open("rb") as encrypted, orig_target.open("wb") as unencrypted:
|
||||||
with open(orig_target, "wb") as unencrypted:
|
|
||||||
unencrypted.write(GnuPG.decrypted(encrypted))
|
unencrypted.write(GnuPG.decrypted(encrypted))
|
||||||
|
|
||||||
subprocess.Popen(
|
subprocess.Popen(
|
||||||
@ -95,27 +92,29 @@ def move_documents_and_create_thumbnails(apps, schema_editor):
|
|||||||
"-alpha",
|
"-alpha",
|
||||||
"remove",
|
"remove",
|
||||||
orig_target,
|
orig_target,
|
||||||
os.path.join(thumb_temp, "convert-%04d.png"),
|
Path(thumb_temp) / "convert-%04d.png",
|
||||||
),
|
),
|
||||||
).wait()
|
).wait()
|
||||||
|
|
||||||
thumb_source = os.path.join(thumb_temp, "convert-0000.png")
|
thumb_source: Path = Path(thumb_temp) / "convert-0000.png"
|
||||||
thumb_target = os.path.join(
|
thumb_target: Path = (
|
||||||
settings.MEDIA_ROOT,
|
Path(settings.MEDIA_ROOT)
|
||||||
"documents",
|
/ "documents"
|
||||||
"thumbnails",
|
/ "thumbnails"
|
||||||
re.sub(r"(\d+)\.\w+(\.gpg)", "\\1.png\\2", f),
|
/ re.sub(r"(\d+)\.\w+(\.gpg)", "\\1.png\\2", f)
|
||||||
)
|
)
|
||||||
with open(thumb_source, "rb") as unencrypted:
|
with (
|
||||||
with open(thumb_target, "wb") as encrypted:
|
thumb_source.open("rb") as unencrypted,
|
||||||
|
thumb_target.open("wb") as encrypted,
|
||||||
|
):
|
||||||
encrypted.write(GnuPG.encrypted(unencrypted))
|
encrypted.write(GnuPG.encrypted(unencrypted))
|
||||||
|
|
||||||
shutil.rmtree(thumb_temp)
|
shutil.rmtree(thumb_temp)
|
||||||
shutil.rmtree(orig_temp)
|
shutil.rmtree(orig_temp)
|
||||||
|
|
||||||
shutil.move(
|
shutil.move(
|
||||||
os.path.join(settings.MEDIA_ROOT, "documents", f),
|
Path(settings.MEDIA_ROOT) / "documents" / f,
|
||||||
os.path.join(settings.MEDIA_ROOT, "documents", "originals", f),
|
Path(settings.MEDIA_ROOT) / "documents" / "originals" / f,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
# Generated by Django 1.9.4 on 2016-03-28 19:09
|
# Generated by Django 1.9.4 on 2016-03-28 19:09
|
||||||
|
|
||||||
import hashlib
|
import hashlib
|
||||||
import os
|
from pathlib import Path
|
||||||
|
|
||||||
import django.utils.timezone
|
import django.utils.timezone
|
||||||
import gnupg
|
import gnupg
|
||||||
@ -58,16 +58,16 @@ class Document:
|
|||||||
|
|
||||||
@property
|
@property
|
||||||
def source_path(self):
|
def source_path(self):
|
||||||
return os.path.join(
|
return (
|
||||||
settings.MEDIA_ROOT,
|
Path(settings.MEDIA_ROOT)
|
||||||
"documents",
|
/ "documents"
|
||||||
"originals",
|
/ "originals"
|
||||||
f"{self.pk:07}.{self.file_type}.gpg",
|
/ f"{self.pk:07}.{self.file_type}.gpg"
|
||||||
)
|
).as_posix()
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def source_file(self):
|
def source_file(self):
|
||||||
return open(self.source_path, "rb")
|
return Path(self.source_path).open("rb")
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def file_name(self):
|
def file_name(self):
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
# Generated by Django 3.1.3 on 2020-11-20 11:21
|
# Generated by Django 3.1.3 on 2020-11-20 11:21
|
||||||
import os
|
from pathlib import Path
|
||||||
|
|
||||||
import magic
|
import magic
|
||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
@ -12,15 +12,15 @@ STORAGE_TYPE_UNENCRYPTED = "unencrypted"
|
|||||||
STORAGE_TYPE_GPG = "gpg"
|
STORAGE_TYPE_GPG = "gpg"
|
||||||
|
|
||||||
|
|
||||||
def source_path(self):
|
def source_path(self) -> Path:
|
||||||
if self.filename:
|
if self.filename:
|
||||||
fname = str(self.filename)
|
fname: str = str(self.filename)
|
||||||
else:
|
else:
|
||||||
fname = f"{self.pk:07}.{self.file_type}"
|
fname = f"{self.pk:07}.{self.file_type}"
|
||||||
if self.storage_type == STORAGE_TYPE_GPG:
|
if self.storage_type == STORAGE_TYPE_GPG:
|
||||||
fname += ".gpg"
|
fname += ".gpg"
|
||||||
|
|
||||||
return os.path.join(settings.ORIGINALS_DIR, fname)
|
return Path(settings.ORIGINALS_DIR) / fname
|
||||||
|
|
||||||
|
|
||||||
def add_mime_types(apps, schema_editor):
|
def add_mime_types(apps, schema_editor):
|
||||||
@ -28,7 +28,7 @@ def add_mime_types(apps, schema_editor):
|
|||||||
documents = Document.objects.all()
|
documents = Document.objects.all()
|
||||||
|
|
||||||
for d in documents:
|
for d in documents:
|
||||||
f = open(source_path(d), "rb")
|
with Path(source_path(d)).open("rb") as f:
|
||||||
if d.storage_type == STORAGE_TYPE_GPG:
|
if d.storage_type == STORAGE_TYPE_GPG:
|
||||||
data = GnuPG.decrypted(f)
|
data = GnuPG.decrypted(f)
|
||||||
else:
|
else:
|
||||||
@ -37,15 +37,13 @@ def add_mime_types(apps, schema_editor):
|
|||||||
d.mime_type = magic.from_buffer(data, mime=True)
|
d.mime_type = magic.from_buffer(data, mime=True)
|
||||||
d.save()
|
d.save()
|
||||||
|
|
||||||
f.close()
|
|
||||||
|
|
||||||
|
|
||||||
def add_file_extensions(apps, schema_editor):
|
def add_file_extensions(apps, schema_editor):
|
||||||
Document = apps.get_model("documents", "Document")
|
Document = apps.get_model("documents", "Document")
|
||||||
documents = Document.objects.all()
|
documents = Document.objects.all()
|
||||||
|
|
||||||
for d in documents:
|
for d in documents:
|
||||||
d.file_type = os.path.splitext(d.filename)[1].strip(".")
|
d.file_type = Path(d.filename).suffix.lstrip(".")
|
||||||
d.save()
|
d.save()
|
||||||
|
|
||||||
|
|
||||||
|
@ -315,7 +315,7 @@ class Document(SoftDeleteModel, ModelWithOwner):
|
|||||||
|
|
||||||
@property
|
@property
|
||||||
def source_file(self):
|
def source_file(self):
|
||||||
return open(self.source_path, "rb")
|
return Path(self.source_path).open("rb")
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def has_archive_version(self) -> bool:
|
def has_archive_version(self) -> bool:
|
||||||
@ -330,7 +330,7 @@ class Document(SoftDeleteModel, ModelWithOwner):
|
|||||||
|
|
||||||
@property
|
@property
|
||||||
def archive_file(self):
|
def archive_file(self):
|
||||||
return open(self.archive_path, "rb")
|
return Path(self.archive_path).open("rb")
|
||||||
|
|
||||||
def get_public_filename(self, *, archive=False, counter=0, suffix=None) -> str:
|
def get_public_filename(self, *, archive=False, counter=0, suffix=None) -> str:
|
||||||
"""
|
"""
|
||||||
@ -367,7 +367,7 @@ class Document(SoftDeleteModel, ModelWithOwner):
|
|||||||
|
|
||||||
@property
|
@property
|
||||||
def thumbnail_file(self):
|
def thumbnail_file(self):
|
||||||
return open(self.thumbnail_path, "rb")
|
return Path(self.thumbnail_path).open("rb")
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def created_date(self):
|
def created_date(self):
|
||||||
|
@ -272,7 +272,7 @@ def update_document_content_maybe_archive_file(document_id):
|
|||||||
with transaction.atomic():
|
with transaction.atomic():
|
||||||
oldDocument = Document.objects.get(pk=document.pk)
|
oldDocument = Document.objects.get(pk=document.pk)
|
||||||
if parser.get_archive_path():
|
if parser.get_archive_path():
|
||||||
with open(parser.get_archive_path(), "rb") as f:
|
with Path(parser.get_archive_path()).open("rb") as f:
|
||||||
checksum = hashlib.md5(f.read()).hexdigest()
|
checksum = hashlib.md5(f.read()).hexdigest()
|
||||||
# I'm going to save first so that in case the file move
|
# I'm going to save first so that in case the file move
|
||||||
# fails, the database is rolled back.
|
# fails, the database is rolled back.
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
import json
|
import json
|
||||||
import os
|
from pathlib import Path
|
||||||
|
|
||||||
from django.contrib.auth.models import User
|
from django.contrib.auth.models import User
|
||||||
from rest_framework import status
|
from rest_framework import status
|
||||||
@ -136,10 +136,7 @@ class TestApiAppConfig(DirectoriesMixin, APITestCase):
|
|||||||
THEN:
|
THEN:
|
||||||
- old app_logo file is deleted
|
- old app_logo file is deleted
|
||||||
"""
|
"""
|
||||||
with open(
|
with (Path(__file__).parent / "samples" / "simple.jpg").open("rb") as f:
|
||||||
os.path.join(os.path.dirname(__file__), "samples", "simple.jpg"),
|
|
||||||
"rb",
|
|
||||||
) as f:
|
|
||||||
self.client.patch(
|
self.client.patch(
|
||||||
f"{self.ENDPOINT}1/",
|
f"{self.ENDPOINT}1/",
|
||||||
{
|
{
|
||||||
@ -148,15 +145,12 @@ class TestApiAppConfig(DirectoriesMixin, APITestCase):
|
|||||||
)
|
)
|
||||||
config = ApplicationConfiguration.objects.first()
|
config = ApplicationConfiguration.objects.first()
|
||||||
old_logo = config.app_logo
|
old_logo = config.app_logo
|
||||||
self.assertTrue(os.path.exists(old_logo.path))
|
self.assertTrue(Path(old_logo.path).exists())
|
||||||
with open(
|
with (Path(__file__).parent / "samples" / "simple.png").open("rb") as f:
|
||||||
os.path.join(os.path.dirname(__file__), "samples", "simple.png"),
|
|
||||||
"rb",
|
|
||||||
) as f:
|
|
||||||
self.client.patch(
|
self.client.patch(
|
||||||
f"{self.ENDPOINT}1/",
|
f"{self.ENDPOINT}1/",
|
||||||
{
|
{
|
||||||
"app_logo": f,
|
"app_logo": f,
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
self.assertFalse(os.path.exists(old_logo.path))
|
self.assertFalse(Path(old_logo.path).exists())
|
||||||
|
@ -1,4 +1,3 @@
|
|||||||
import os
|
|
||||||
import re
|
import re
|
||||||
import shutil
|
import shutil
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
@ -617,7 +616,7 @@ class TestClassifier(DirectoriesMixin, TestCase):
|
|||||||
self.assertListEqual(self.classifier.predict_tags(doc2.content), [])
|
self.assertListEqual(self.classifier.predict_tags(doc2.content), [])
|
||||||
|
|
||||||
def test_load_classifier_not_exists(self):
|
def test_load_classifier_not_exists(self):
|
||||||
self.assertFalse(os.path.exists(settings.MODEL_FILE))
|
self.assertFalse(Path(settings.MODEL_FILE).exists())
|
||||||
self.assertIsNone(load_classifier())
|
self.assertIsNone(load_classifier())
|
||||||
|
|
||||||
@mock.patch("documents.classifier.DocumentClassifier.load")
|
@mock.patch("documents.classifier.DocumentClassifier.load")
|
||||||
@ -632,7 +631,7 @@ class TestClassifier(DirectoriesMixin, TestCase):
|
|||||||
},
|
},
|
||||||
)
|
)
|
||||||
@override_settings(
|
@override_settings(
|
||||||
MODEL_FILE=os.path.join(os.path.dirname(__file__), "data", "model.pickle"),
|
MODEL_FILE=(Path(__file__).parent / "data" / "model.pickle").as_posix(),
|
||||||
)
|
)
|
||||||
@pytest.mark.skip(
|
@pytest.mark.skip(
|
||||||
reason="Disabled caching due to high memory usage - need to investigate.",
|
reason="Disabled caching due to high memory usage - need to investigate.",
|
||||||
@ -648,24 +647,24 @@ class TestClassifier(DirectoriesMixin, TestCase):
|
|||||||
@mock.patch("documents.classifier.DocumentClassifier.load")
|
@mock.patch("documents.classifier.DocumentClassifier.load")
|
||||||
def test_load_classifier_incompatible_version(self, load):
|
def test_load_classifier_incompatible_version(self, load):
|
||||||
Path(settings.MODEL_FILE).touch()
|
Path(settings.MODEL_FILE).touch()
|
||||||
self.assertTrue(os.path.exists(settings.MODEL_FILE))
|
self.assertTrue(Path(settings.MODEL_FILE).exists())
|
||||||
|
|
||||||
load.side_effect = IncompatibleClassifierVersionError("Dummy Error")
|
load.side_effect = IncompatibleClassifierVersionError("Dummy Error")
|
||||||
self.assertIsNone(load_classifier())
|
self.assertIsNone(load_classifier())
|
||||||
self.assertFalse(os.path.exists(settings.MODEL_FILE))
|
self.assertFalse(Path(settings.MODEL_FILE).exists())
|
||||||
|
|
||||||
@mock.patch("documents.classifier.DocumentClassifier.load")
|
@mock.patch("documents.classifier.DocumentClassifier.load")
|
||||||
def test_load_classifier_os_error(self, load):
|
def test_load_classifier_os_error(self, load):
|
||||||
Path(settings.MODEL_FILE).touch()
|
Path(settings.MODEL_FILE).touch()
|
||||||
self.assertTrue(os.path.exists(settings.MODEL_FILE))
|
self.assertTrue(Path(settings.MODEL_FILE).exists())
|
||||||
|
|
||||||
load.side_effect = OSError()
|
load.side_effect = OSError()
|
||||||
self.assertIsNone(load_classifier())
|
self.assertIsNone(load_classifier())
|
||||||
self.assertTrue(os.path.exists(settings.MODEL_FILE))
|
self.assertTrue(Path(settings.MODEL_FILE).exists())
|
||||||
|
|
||||||
def test_load_old_classifier_version(self):
|
def test_load_old_classifier_version(self):
|
||||||
shutil.copy(
|
shutil.copy(
|
||||||
os.path.join(os.path.dirname(__file__), "data", "v1.17.4.model.pickle"),
|
Path(__file__).parent / "data" / "v1.17.4.model.pickle",
|
||||||
self.dirs.scratch_dir,
|
self.dirs.scratch_dir,
|
||||||
)
|
)
|
||||||
with override_settings(
|
with override_settings(
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
import os
|
|
||||||
import shutil
|
import shutil
|
||||||
|
from pathlib import Path
|
||||||
from unittest import mock
|
from unittest import mock
|
||||||
|
|
||||||
from django.core.management import call_command
|
from django.core.management import call_command
|
||||||
@ -22,7 +22,7 @@ class TestMakeThumbnails(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
|||||||
filename="test.pdf",
|
filename="test.pdf",
|
||||||
)
|
)
|
||||||
shutil.copy(
|
shutil.copy(
|
||||||
os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"),
|
Path(__file__).parent / "samples" / "simple.pdf",
|
||||||
self.d1.source_path,
|
self.d1.source_path,
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -34,7 +34,7 @@ class TestMakeThumbnails(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
|||||||
filename="test2.pdf",
|
filename="test2.pdf",
|
||||||
)
|
)
|
||||||
shutil.copy(
|
shutil.copy(
|
||||||
os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"),
|
Path(__file__).parent / "samples" / "simple.pdf",
|
||||||
self.d2.source_path,
|
self.d2.source_path,
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -46,7 +46,7 @@ class TestMakeThumbnails(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
|||||||
filename="test3.pdf",
|
filename="test3.pdf",
|
||||||
)
|
)
|
||||||
shutil.copy(
|
shutil.copy(
|
||||||
os.path.join(os.path.dirname(__file__), "samples", "password-is-test.pdf"),
|
Path(__file__).parent / "samples" / "password-is-test.pdf",
|
||||||
self.d3.source_path,
|
self.d3.source_path,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -1,4 +1,3 @@
|
|||||||
import os
|
|
||||||
import shutil
|
import shutil
|
||||||
from datetime import timedelta
|
from datetime import timedelta
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
@ -88,18 +87,18 @@ class TestClassifier(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
|||||||
|
|
||||||
tasks.train_classifier()
|
tasks.train_classifier()
|
||||||
self.assertIsFile(settings.MODEL_FILE)
|
self.assertIsFile(settings.MODEL_FILE)
|
||||||
mtime = os.stat(settings.MODEL_FILE).st_mtime
|
mtime = Path(settings.MODEL_FILE).stat().st_mtime
|
||||||
|
|
||||||
tasks.train_classifier()
|
tasks.train_classifier()
|
||||||
self.assertIsFile(settings.MODEL_FILE)
|
self.assertIsFile(settings.MODEL_FILE)
|
||||||
mtime2 = os.stat(settings.MODEL_FILE).st_mtime
|
mtime2 = Path(settings.MODEL_FILE).stat().st_mtime
|
||||||
self.assertEqual(mtime, mtime2)
|
self.assertEqual(mtime, mtime2)
|
||||||
|
|
||||||
doc.content = "test2"
|
doc.content = "test2"
|
||||||
doc.save()
|
doc.save()
|
||||||
tasks.train_classifier()
|
tasks.train_classifier()
|
||||||
self.assertIsFile(settings.MODEL_FILE)
|
self.assertIsFile(settings.MODEL_FILE)
|
||||||
mtime3 = os.stat(settings.MODEL_FILE).st_mtime
|
mtime3 = Path(settings.MODEL_FILE).stat().st_mtime
|
||||||
self.assertNotEqual(mtime2, mtime3)
|
self.assertNotEqual(mtime2, mtime3)
|
||||||
|
|
||||||
|
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
import os
|
|
||||||
import tempfile
|
import tempfile
|
||||||
from datetime import timedelta
|
from datetime import timedelta
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
from django.contrib.auth.models import Permission
|
from django.contrib.auth.models import Permission
|
||||||
@ -107,12 +107,12 @@ class TestViews(DirectoriesMixin, TestCase):
|
|||||||
|
|
||||||
content = b"This is a test"
|
content = b"This is a test"
|
||||||
|
|
||||||
with open(filename, "wb") as f:
|
with Path(filename).open("wb") as f:
|
||||||
f.write(content)
|
f.write(content)
|
||||||
|
|
||||||
doc = Document.objects.create(
|
doc = Document.objects.create(
|
||||||
title="none",
|
title="none",
|
||||||
filename=os.path.basename(filename),
|
filename=Path(filename).name,
|
||||||
mime_type="application/pdf",
|
mime_type="application/pdf",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -38,9 +38,9 @@ class TestChecks(DirectoriesMixin, TestCase):
|
|||||||
self.assertTrue(msg.msg.endswith("is set but doesn't exist."))
|
self.assertTrue(msg.msg.endswith("is set but doesn't exist."))
|
||||||
|
|
||||||
def test_paths_check_no_access(self):
|
def test_paths_check_no_access(self):
|
||||||
os.chmod(self.dirs.data_dir, 0o000)
|
Path(self.dirs.data_dir).chmod(0o000)
|
||||||
os.chmod(self.dirs.media_dir, 0o000)
|
Path(self.dirs.media_dir).chmod(0o000)
|
||||||
os.chmod(self.dirs.consumption_dir, 0o000)
|
Path(self.dirs.consumption_dir).chmod(0o000)
|
||||||
|
|
||||||
self.addCleanup(os.chmod, self.dirs.data_dir, 0o777)
|
self.addCleanup(os.chmod, self.dirs.data_dir, 0o777)
|
||||||
self.addCleanup(os.chmod, self.dirs.media_dir, 0o777)
|
self.addCleanup(os.chmod, self.dirs.media_dir, 0o777)
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
import os
|
from pathlib import Path
|
||||||
|
|
||||||
from allauth.account import views as allauth_account_views
|
from allauth.account import views as allauth_account_views
|
||||||
from allauth.mfa.base import views as allauth_mfa_views
|
from allauth.mfa.base import views as allauth_mfa_views
|
||||||
@ -270,7 +270,7 @@ urlpatterns = [
|
|||||||
re_path(
|
re_path(
|
||||||
r"^logo(?P<path>.*)$",
|
r"^logo(?P<path>.*)$",
|
||||||
serve,
|
serve,
|
||||||
kwargs={"document_root": os.path.join(settings.MEDIA_ROOT, "logo")},
|
kwargs={"document_root": Path(settings.MEDIA_ROOT) / "logo"},
|
||||||
),
|
),
|
||||||
# allauth
|
# allauth
|
||||||
path(
|
path(
|
||||||
|
@ -1,8 +1,8 @@
|
|||||||
import abc
|
import abc
|
||||||
import os
|
|
||||||
from email import message_from_bytes
|
from email import message_from_bytes
|
||||||
from email import policy
|
from email import policy
|
||||||
from email.message import Message
|
from email.message import Message
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
from gnupg import GPG
|
from gnupg import GPG
|
||||||
@ -50,7 +50,7 @@ class MailMessageDecryptor(MailMessagePreprocessor, LoggingMixin):
|
|||||||
return False
|
return False
|
||||||
if settings.EMAIL_GNUPG_HOME is None:
|
if settings.EMAIL_GNUPG_HOME is None:
|
||||||
return True
|
return True
|
||||||
return os.path.isdir(settings.EMAIL_GNUPG_HOME)
|
return Path(settings.EMAIL_GNUPG_HOME).is_dir()
|
||||||
|
|
||||||
def run(self, message: MailMessage) -> MailMessage:
|
def run(self, message: MailMessage) -> MailMessage:
|
||||||
if not hasattr(message, "obj"):
|
if not hasattr(message, "obj"):
|
||||||
|
@ -159,7 +159,7 @@ class RasterisedDocumentParser(DocumentParser):
|
|||||||
# the whole text, so do not utilize it in that case
|
# the whole text, so do not utilize it in that case
|
||||||
if (
|
if (
|
||||||
sidecar_file is not None
|
sidecar_file is not None
|
||||||
and os.path.isfile(sidecar_file)
|
and sidecar_file.is_file()
|
||||||
and self.settings.mode != "redo"
|
and self.settings.mode != "redo"
|
||||||
):
|
):
|
||||||
text = self.read_file_handle_unicode_errors(sidecar_file)
|
text = self.read_file_handle_unicode_errors(sidecar_file)
|
||||||
@ -174,7 +174,7 @@ class RasterisedDocumentParser(DocumentParser):
|
|||||||
|
|
||||||
# no success with the sidecar file, try PDF
|
# no success with the sidecar file, try PDF
|
||||||
|
|
||||||
if not os.path.isfile(pdf_file):
|
if not Path(pdf_file).is_file():
|
||||||
return None
|
return None
|
||||||
|
|
||||||
try:
|
try:
|
||||||
@ -368,8 +368,8 @@ class RasterisedDocumentParser(DocumentParser):
|
|||||||
from ocrmypdf import SubprocessOutputError
|
from ocrmypdf import SubprocessOutputError
|
||||||
from ocrmypdf.exceptions import DigitalSignatureError
|
from ocrmypdf.exceptions import DigitalSignatureError
|
||||||
|
|
||||||
archive_path = Path(os.path.join(self.tempdir, "archive.pdf"))
|
archive_path = Path(self.tempdir) / "archive.pdf"
|
||||||
sidecar_file = Path(os.path.join(self.tempdir, "sidecar.txt"))
|
sidecar_file = Path(self.tempdir) / "sidecar.txt"
|
||||||
|
|
||||||
args = self.construct_ocrmypdf_parameters(
|
args = self.construct_ocrmypdf_parameters(
|
||||||
document_path,
|
document_path,
|
||||||
@ -412,12 +412,8 @@ class RasterisedDocumentParser(DocumentParser):
|
|||||||
f"Attempting force OCR to get the text.",
|
f"Attempting force OCR to get the text.",
|
||||||
)
|
)
|
||||||
|
|
||||||
archive_path_fallback = Path(
|
archive_path_fallback = Path(self.tempdir) / "archive-fallback.pdf"
|
||||||
os.path.join(self.tempdir, "archive-fallback.pdf"),
|
sidecar_file_fallback = Path(self.tempdir) / "sidecar-fallback.txt"
|
||||||
)
|
|
||||||
sidecar_file_fallback = Path(
|
|
||||||
os.path.join(self.tempdir, "sidecar-fallback.txt"),
|
|
||||||
)
|
|
||||||
|
|
||||||
# Attempt to run OCR with safe settings.
|
# Attempt to run OCR with safe settings.
|
||||||
|
|
||||||
|
@ -75,7 +75,7 @@ class TestTikaParserAgainstServer:
|
|||||||
== "This is an DOCX test document, also made September 14, 2022"
|
== "This is an DOCX test document, also made September 14, 2022"
|
||||||
)
|
)
|
||||||
assert tika_parser.archive_path is not None
|
assert tika_parser.archive_path is not None
|
||||||
with open(tika_parser.archive_path, "rb") as f:
|
with Path(tika_parser.archive_path).open("rb") as f:
|
||||||
assert b"PDF-" in f.read()[:10]
|
assert b"PDF-" in f.read()[:10]
|
||||||
|
|
||||||
# self.assertEqual(tika_parser.date, datetime.datetime(2022, 9, 14))
|
# self.assertEqual(tika_parser.date, datetime.datetime(2022, 9, 14))
|
||||||
@ -104,7 +104,7 @@ class TestTikaParserAgainstServer:
|
|||||||
in tika_parser.text
|
in tika_parser.text
|
||||||
)
|
)
|
||||||
assert tika_parser.archive_path is not None
|
assert tika_parser.archive_path is not None
|
||||||
with open(tika_parser.archive_path, "rb") as f:
|
with Path(tika_parser.archive_path).open("rb") as f:
|
||||||
assert b"PDF-" in f.read()[:10]
|
assert b"PDF-" in f.read()[:10]
|
||||||
|
|
||||||
def test_tika_fails_multi_part(
|
def test_tika_fails_multi_part(
|
||||||
@ -130,5 +130,5 @@ class TestTikaParserAgainstServer:
|
|||||||
)
|
)
|
||||||
|
|
||||||
assert tika_parser.archive_path is not None
|
assert tika_parser.archive_path is not None
|
||||||
with open(tika_parser.archive_path, "rb") as f:
|
with Path(tika_parser.archive_path).open("rb") as f:
|
||||||
assert b"PDF-" in f.read()[:10]
|
assert b"PDF-" in f.read()[:10]
|
||||||
|
@ -38,7 +38,7 @@ class TestTikaParser:
|
|||||||
|
|
||||||
assert tika_parser.text == "the content"
|
assert tika_parser.text == "the content"
|
||||||
assert tika_parser.archive_path is not None
|
assert tika_parser.archive_path is not None
|
||||||
with open(tika_parser.archive_path, "rb") as f:
|
with Path(tika_parser.archive_path).open("rb") as f:
|
||||||
assert f.read() == b"PDF document"
|
assert f.read() == b"PDF document"
|
||||||
|
|
||||||
assert tika_parser.date == datetime.datetime(
|
assert tika_parser.date == datetime.datetime(
|
||||||
|
Loading…
x
Reference in New Issue
Block a user