Merge branch 'dev' into feature-permissions

This commit is contained in:
shamoon
2023-01-16 15:59:25 -08:00
committed by GitHub
29 changed files with 923 additions and 482 deletions

View File

@@ -19,7 +19,7 @@ from watchdog.observers.polling import PollingObserver
try:
from inotifyrecursive import INotify, flags
except ImportError:
except ImportError: # pragma: nocover
INotify = flags = None
logger = logging.getLogger("paperless.management.consumer")

View File

@@ -4,6 +4,9 @@ import os
import shutil
import tempfile
import time
from pathlib import Path
from typing import List
from typing import Set
import tqdm
from django.conf import settings
@@ -96,16 +99,16 @@ class Command(BaseCommand):
def __init__(self, *args, **kwargs):
BaseCommand.__init__(self, *args, **kwargs)
self.target = None
self.files_in_export_dir = []
self.exported_files = []
self.target: Path = None
self.files_in_export_dir: Set[Path] = set()
self.exported_files: List[Path] = []
self.compare_checksums = False
self.use_filename_format = False
self.delete = False
def handle(self, *args, **options):
self.target = options["target"]
self.target = Path(options["target"]).resolve()
self.compare_checksums = options["compare_checksums"]
self.use_filename_format = options["use_filename_format"]
self.delete = options["delete"]
@@ -121,11 +124,14 @@ class Command(BaseCommand):
dir=settings.SCRATCH_DIR,
prefix="paperless-export",
)
self.target = temp_dir.name
self.target = Path(temp_dir.name).resolve()
if not os.path.exists(self.target):
if not self.target.exists():
raise CommandError("That path doesn't exist")
if not self.target.is_dir():
raise CommandError("That path isn't a directory")
if not os.access(self.target, os.W_OK):
raise CommandError("That path doesn't appear to be writable")
@@ -152,10 +158,9 @@ class Command(BaseCommand):
def dump(self, progress_bar_disable=False):
# 1. Take a snapshot of what files exist in the current export folder
for root, dirs, files in os.walk(self.target):
self.files_in_export_dir.extend(
map(lambda f: os.path.abspath(os.path.join(root, f)), files),
)
for x in self.target.glob("**/*"):
if x.is_file():
self.files_in_export_dir.add(x.resolve())
# 2. Create manifest, containing all correspondents, types, tags, storage paths
# comments, documents and ui_settings
@@ -238,16 +243,16 @@ class Command(BaseCommand):
# 3.3. write filenames into manifest
original_name = base_name
original_target = os.path.join(self.target, original_name)
original_target = (self.target / Path(original_name)).resolve()
document_dict[EXPORTER_FILE_NAME] = original_name
thumbnail_name = base_name + "-thumbnail.webp"
thumbnail_target = os.path.join(self.target, thumbnail_name)
thumbnail_target = (self.target / Path(thumbnail_name)).resolve()
document_dict[EXPORTER_THUMBNAIL_NAME] = thumbnail_name
if document.has_archive_version:
archive_name = base_name + "-archive.pdf"
archive_target = os.path.join(self.target, archive_name)
archive_target = (self.target / Path(archive_name)).resolve()
document_dict[EXPORTER_ARCHIVE_NAME] = archive_name
else:
archive_target = None
@@ -256,24 +261,21 @@ class Command(BaseCommand):
t = int(time.mktime(document.created.timetuple()))
if document.storage_type == Document.STORAGE_TYPE_GPG:
os.makedirs(os.path.dirname(original_target), exist_ok=True)
with open(original_target, "wb") as f:
with document.source_file as out_file:
f.write(GnuPG.decrypted(out_file))
os.utime(original_target, times=(t, t))
original_target.parent.mkdir(parents=True, exist_ok=True)
with document.source_file as out_file:
original_target.write_bytes(GnuPG.decrypted(out_file))
os.utime(original_target, times=(t, t))
os.makedirs(os.path.dirname(thumbnail_target), exist_ok=True)
with open(thumbnail_target, "wb") as f:
with document.thumbnail_file as out_file:
f.write(GnuPG.decrypted(out_file))
os.utime(thumbnail_target, times=(t, t))
thumbnail_target.parent.mkdir(parents=True, exist_ok=True)
with document.thumbnail_file as out_file:
thumbnail_target.write_bytes(GnuPG.decrypted(out_file))
os.utime(thumbnail_target, times=(t, t))
if archive_target:
os.makedirs(os.path.dirname(archive_target), exist_ok=True)
with open(archive_target, "wb") as f:
with document.archive_path as out_file:
f.write(GnuPG.decrypted(out_file))
os.utime(archive_target, times=(t, t))
archive_target.parent.mkdir(parents=True, exist_ok=True)
with document.archive_path as out_file:
archive_target.write_bytes(GnuPG.decrypted(out_file))
os.utime(archive_target, times=(t, t))
else:
self.check_and_copy(
document.source_path,
@@ -291,16 +293,14 @@ class Command(BaseCommand):
)
# 4.1 write manifest to target folder
manifest_path = os.path.abspath(os.path.join(self.target, "manifest.json"))
with open(manifest_path, "w") as f:
json.dump(manifest, f, indent=2)
manifest_path = (self.target / Path("manifest.json")).resolve()
manifest_path.write_text(json.dumps(manifest, indent=2))
# 4.2 write version information to target folder
version_path = os.path.abspath(os.path.join(self.target, "version.json"))
with open(version_path, "w") as f:
json.dump({"version": version.__full_version_str__}, f, indent=2)
version_path = (self.target / Path("version.json")).resolve()
version_path.write_text(
json.dumps({"version": version.__full_version_str__}, indent=2),
)
if self.delete:
# 5. Remove files which we did not explicitly export in this run
@@ -309,25 +309,24 @@ class Command(BaseCommand):
self.files_in_export_dir.remove(manifest_path)
for f in self.files_in_export_dir:
os.remove(f)
f.unlink()
delete_empty_directories(
os.path.abspath(os.path.dirname(f)),
os.path.abspath(self.target),
f.parent,
self.target,
)
def check_and_copy(self, source, source_checksum, target):
if os.path.abspath(target) in self.files_in_export_dir:
self.files_in_export_dir.remove(os.path.abspath(target))
def check_and_copy(self, source, source_checksum, target: Path):
if target in self.files_in_export_dir:
self.files_in_export_dir.remove(target)
perform_copy = False
if os.path.exists(target):
if target.exists():
source_stat = os.stat(source)
target_stat = os.stat(target)
target_stat = target.stat()
if self.compare_checksums and source_checksum:
with open(target, "rb") as f:
target_checksum = hashlib.md5(f.read()).hexdigest()
target_checksum = hashlib.md5(target.read_bytes()).hexdigest()
perform_copy = target_checksum != source_checksum
elif source_stat.st_mtime != target_stat.st_mtime:
perform_copy = True
@@ -338,5 +337,5 @@ class Command(BaseCommand):
perform_copy = True
if perform_copy:
os.makedirs(os.path.dirname(target), exist_ok=True)
target.parent.mkdir(parents=True, exist_ok=True)
shutil.copy2(source, target)

View File

@@ -447,7 +447,7 @@ def update_filename_and_move_files(sender, instance, **kwargs):
)
except (OSError, DatabaseError, CannotMoveFilesException) as e:
logger.warn(f"Exception during file handling: {e}")
logger.warning(f"Exception during file handling: {e}")
# This happens when either:
# - moving the files failed due to file system errors
# - saving to the database failed due to database errors

View File

@@ -3,10 +3,10 @@ import logging
import os
import shutil
import uuid
from datetime import datetime
from pathlib import Path
from typing import Type
import dateutil.parser
import tqdm
from asgiref.sync import async_to_sync
from celery import shared_task
@@ -107,7 +107,7 @@ def consume_file(
# More types will be retained through JSON encode/decode
if override_created is not None and isinstance(override_created, str):
try:
override_created = datetime.fromisoformat(override_created)
override_created = dateutil.parser.isoparse(override_created)
except Exception:
pass

View File

@@ -7,6 +7,7 @@ import tempfile
import urllib.request
import uuid
import zipfile
from pathlib import Path
from unittest import mock
from unittest.mock import MagicMock
@@ -812,7 +813,9 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
m.assert_called_once()
args, kwargs = m.call_args
self.assertEqual(kwargs["override_filename"], "simple.pdf")
file_path = Path(args[0])
self.assertEqual(file_path.name, "simple.pdf")
self.assertIn(Path(settings.SCRATCH_DIR), file_path.parents)
self.assertIsNone(kwargs["override_title"])
self.assertIsNone(kwargs["override_correspondent_id"])
self.assertIsNone(kwargs["override_document_type_id"])
@@ -837,7 +840,9 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
m.assert_called_once()
args, kwargs = m.call_args
self.assertEqual(kwargs["override_filename"], "simple.pdf")
file_path = Path(args[0])
self.assertEqual(file_path.name, "simple.pdf")
self.assertIn(Path(settings.SCRATCH_DIR), file_path.parents)
self.assertIsNone(kwargs["override_title"])
self.assertIsNone(kwargs["override_correspondent_id"])
self.assertIsNone(kwargs["override_document_type_id"])

View File

@@ -8,6 +8,7 @@ from unittest import mock
from zipfile import ZipFile
from django.core.management import call_command
from django.core.management.base import CommandError
from django.test import override_settings
from django.test import TestCase
from django.utils import timezone
@@ -438,3 +439,61 @@ class TestExportImport(DirectoriesMixin, TestCase):
self.assertEqual(len(zip.namelist()), 14)
self.assertIn("manifest.json", zip.namelist())
self.assertIn("version.json", zip.namelist())
def test_export_target_not_exists(self):
"""
GIVEN:
- Request to export documents to directory that doesn't exist
WHEN:
- Export command is called
THEN:
- Error is raised
"""
args = ["document_exporter", "/tmp/foo/bar"]
with self.assertRaises(CommandError) as e:
call_command(*args)
self.assertEqual("That path isn't a directory", str(e))
def test_export_target_exists_but_is_file(self):
"""
GIVEN:
- Request to export documents to file instead of directory
WHEN:
- Export command is called
THEN:
- Error is raised
"""
with tempfile.NamedTemporaryFile() as tmp_file:
args = ["document_exporter", tmp_file.name]
with self.assertRaises(CommandError) as e:
call_command(*args)
self.assertEqual("That path isn't a directory", str(e))
def test_export_target_not_writable(self):
"""
GIVEN:
- Request to export documents to directory that's not writeable
WHEN:
- Export command is called
THEN:
- Error is raised
"""
with tempfile.TemporaryDirectory() as tmp_dir:
os.chmod(tmp_dir, 0o000)
args = ["document_exporter", tmp_dir]
with self.assertRaises(CommandError) as e:
call_command(*args)
self.assertEqual("That path doesn't appear to be writable", str(e))

View File

@@ -8,10 +8,12 @@ import urllib
import uuid
import zipfile
from datetime import datetime
from pathlib import Path
from time import mktime
from unicodedata import normalize
from urllib.parse import quote
import pathvalidate
from django.conf import settings
from django.contrib.auth.models import User
from django.db.models import Case
@@ -33,6 +35,7 @@ from documents.filters import ObjectOwnedOrGrandtedPermissionsFilter
from documents.permissions import PaperlessAdminPermissions
from documents.permissions import PaperlessObjectPermissions
from documents.tasks import consume_file
from langdetect import detect
from packaging import version as packaging_version
from paperless import version
from paperless.db import GnuPG
@@ -361,6 +364,13 @@ class DocumentViewSet(
"original_filename": doc.original_filename,
}
lang = "en"
try:
lang = detect(doc.content)
except Exception:
pass
meta["lang"] = lang
if doc.has_archive_version:
meta["archive_size"] = self.get_filesize(doc.archive_path)
meta["archive_metadata"] = self.get_metadata(
@@ -658,20 +668,19 @@ class PostDocumentView(GenericAPIView):
os.makedirs(settings.SCRATCH_DIR, exist_ok=True)
with tempfile.NamedTemporaryFile(
prefix="paperless-upload-",
dir=settings.SCRATCH_DIR,
delete=False,
) as f:
f.write(doc_data)
os.utime(f.name, times=(t, t))
temp_filename = f.name
temp_file_path = Path(tempfile.mkdtemp(dir=settings.SCRATCH_DIR)) / Path(
pathvalidate.sanitize_filename(doc_name),
)
temp_file_path.write_bytes(doc_data)
os.utime(temp_file_path, times=(t, t))
task_id = str(uuid.uuid4())
async_task = consume_file.delay(
temp_filename,
override_filename=doc_name,
# Paths are not JSON friendly
str(temp_file_path),
override_title=title,
override_correspondent_id=correspondent_id,
override_document_type_id=document_type_id,