mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-04-02 13:45:10 -05:00
Adds an optional API field to follow the filenaming format when creating a bulk download zip
This commit is contained in:
parent
ff4a8b37bd
commit
54bb1ae27d
@ -1,18 +1,29 @@
|
||||
import os
|
||||
from zipfile import ZipFile
|
||||
|
||||
from documents.models import Document
|
||||
|
||||
|
||||
class BulkArchiveStrategy:
|
||||
def __init__(self, zipf: ZipFile):
|
||||
def __init__(self, zipf: ZipFile, follow_formatting: bool = False):
|
||||
self.zipf = zipf
|
||||
if follow_formatting:
|
||||
self.make_unique_filename = self._formatted_filepath
|
||||
else:
|
||||
self.make_unique_filename = self._filename_only
|
||||
|
||||
def make_unique_filename(
|
||||
def _filename_only(
|
||||
self,
|
||||
doc: Document,
|
||||
archive: bool = False,
|
||||
folder: str = "",
|
||||
):
|
||||
"""
|
||||
Constructs a unique name for the given document to be used inside the
|
||||
zip file.
|
||||
|
||||
The filename might not be unique enough, so a counter is appended if needed
|
||||
"""
|
||||
counter = 0
|
||||
while True:
|
||||
filename = folder + doc.get_public_filename(archive, counter)
|
||||
@ -21,6 +32,25 @@ class BulkArchiveStrategy:
|
||||
else:
|
||||
return filename
|
||||
|
||||
def _formatted_filepath(
|
||||
self,
|
||||
doc: Document,
|
||||
archive: bool = False,
|
||||
folder: str = "",
|
||||
):
|
||||
"""
|
||||
Constructs a full file path for the given document to be used inside
|
||||
the zipfile.
|
||||
|
||||
The path is already unique, as handled when a document is consumed or updated
|
||||
"""
|
||||
if archive and doc.has_archive_version:
|
||||
in_archive_path = os.path.join(folder, doc.archive_filename)
|
||||
else:
|
||||
in_archive_path = os.path.join(folder, doc.filename)
|
||||
|
||||
return in_archive_path
|
||||
|
||||
def add_document(self, doc: Document):
|
||||
raise NotImplementedError() # pragma: no cover
|
||||
|
||||
@ -31,9 +61,6 @@ class OriginalsOnlyStrategy(BulkArchiveStrategy):
|
||||
|
||||
|
||||
class ArchiveOnlyStrategy(BulkArchiveStrategy):
|
||||
def __init__(self, zipf):
|
||||
super().__init__(zipf)
|
||||
|
||||
def add_document(self, doc: Document):
|
||||
if doc.has_archive_version:
|
||||
self.zipf.write(
|
||||
|
@ -287,6 +287,9 @@ class Document(models.Model):
|
||||
return open(self.archive_path, "rb")
|
||||
|
||||
def get_public_filename(self, archive=False, counter=0, suffix=None) -> str:
|
||||
"""
|
||||
Returns a sanitized filename for the document, not including any paths.
|
||||
"""
|
||||
result = str(self)
|
||||
|
||||
if counter:
|
||||
|
@ -551,6 +551,10 @@ class BulkDownloadSerializer(DocumentListSerializer):
|
||||
default="none",
|
||||
)
|
||||
|
||||
follow_formatting = serializers.BooleanField(
|
||||
default=False,
|
||||
)
|
||||
|
||||
def validate_compression(self, compression):
|
||||
import zipfile
|
||||
|
||||
|
@ -2329,6 +2329,9 @@ class TestBulkEdit(DirectoriesMixin, APITestCase):
|
||||
|
||||
|
||||
class TestBulkDownload(DirectoriesMixin, APITestCase):
|
||||
|
||||
ENDPOINT = "/api/documents/bulk_download/"
|
||||
|
||||
def setUp(self):
|
||||
super().setUp()
|
||||
|
||||
@ -2379,7 +2382,7 @@ class TestBulkDownload(DirectoriesMixin, APITestCase):
|
||||
|
||||
def test_download_originals(self):
|
||||
response = self.client.post(
|
||||
"/api/documents/bulk_download/",
|
||||
self.ENDPOINT,
|
||||
json.dumps(
|
||||
{"documents": [self.doc2.id, self.doc3.id], "content": "originals"},
|
||||
),
|
||||
@ -2402,7 +2405,7 @@ class TestBulkDownload(DirectoriesMixin, APITestCase):
|
||||
|
||||
def test_download_default(self):
|
||||
response = self.client.post(
|
||||
"/api/documents/bulk_download/",
|
||||
self.ENDPOINT,
|
||||
json.dumps({"documents": [self.doc2.id, self.doc3.id]}),
|
||||
content_type="application/json",
|
||||
)
|
||||
@ -2423,7 +2426,7 @@ class TestBulkDownload(DirectoriesMixin, APITestCase):
|
||||
|
||||
def test_download_both(self):
|
||||
response = self.client.post(
|
||||
"/api/documents/bulk_download/",
|
||||
self.ENDPOINT,
|
||||
json.dumps({"documents": [self.doc2.id, self.doc3.id], "content": "both"}),
|
||||
content_type="application/json",
|
||||
)
|
||||
@ -2457,7 +2460,7 @@ class TestBulkDownload(DirectoriesMixin, APITestCase):
|
||||
|
||||
def test_filename_clashes(self):
|
||||
response = self.client.post(
|
||||
"/api/documents/bulk_download/",
|
||||
self.ENDPOINT,
|
||||
json.dumps({"documents": [self.doc2.id, self.doc2b.id]}),
|
||||
content_type="application/json",
|
||||
)
|
||||
@ -2479,13 +2482,145 @@ class TestBulkDownload(DirectoriesMixin, APITestCase):
|
||||
|
||||
def test_compression(self):
|
||||
response = self.client.post(
|
||||
"/api/documents/bulk_download/",
|
||||
self.ENDPOINT,
|
||||
json.dumps(
|
||||
{"documents": [self.doc2.id, self.doc2b.id], "compression": "lzma"},
|
||||
),
|
||||
content_type="application/json",
|
||||
)
|
||||
|
||||
@override_settings(FILENAME_FORMAT="{correspondent}/{title}")
|
||||
def test_formatted_download_originals(self):
|
||||
|
||||
c = Correspondent.objects.create(name="test")
|
||||
c2 = Correspondent.objects.create(name="a space name")
|
||||
|
||||
self.doc2.correspondent = c
|
||||
self.doc2.title = "This is Doc 2"
|
||||
self.doc2.save()
|
||||
|
||||
self.doc3.correspondent = c2
|
||||
self.doc3.title = "Title 2 - Doc 3"
|
||||
self.doc3.save()
|
||||
|
||||
response = self.client.post(
|
||||
self.ENDPOINT,
|
||||
json.dumps(
|
||||
{
|
||||
"documents": [self.doc2.id, self.doc3.id],
|
||||
"content": "originals",
|
||||
"follow_formatting": True,
|
||||
},
|
||||
),
|
||||
content_type="application/json",
|
||||
)
|
||||
|
||||
self.assertEqual(response.status_code, 200)
|
||||
self.assertEqual(response["Content-Type"], "application/zip")
|
||||
|
||||
with zipfile.ZipFile(io.BytesIO(response.content)) as zipf:
|
||||
self.assertEqual(len(zipf.filelist), 2)
|
||||
self.assertIn("a space name/Title 2 - Doc 3.jpg", zipf.namelist())
|
||||
self.assertIn("test/This is Doc 2.pdf", zipf.namelist())
|
||||
|
||||
with self.doc2.source_file as f:
|
||||
self.assertEqual(f.read(), zipf.read("test/This is Doc 2.pdf"))
|
||||
|
||||
with self.doc3.source_file as f:
|
||||
self.assertEqual(
|
||||
f.read(),
|
||||
zipf.read("a space name/Title 2 - Doc 3.jpg"),
|
||||
)
|
||||
|
||||
@override_settings(FILENAME_FORMAT="somewhere/{title}")
|
||||
def test_formatted_download_archive(self):
|
||||
|
||||
self.doc2.title = "This is Doc 2"
|
||||
self.doc2.save()
|
||||
|
||||
self.doc3.title = "Title 2 - Doc 3"
|
||||
self.doc3.save()
|
||||
print(self.doc3.archive_path)
|
||||
print(self.doc3.archive_filename)
|
||||
|
||||
response = self.client.post(
|
||||
self.ENDPOINT,
|
||||
json.dumps(
|
||||
{
|
||||
"documents": [self.doc2.id, self.doc3.id],
|
||||
"follow_formatting": True,
|
||||
},
|
||||
),
|
||||
content_type="application/json",
|
||||
)
|
||||
|
||||
self.assertEqual(response.status_code, 200)
|
||||
self.assertEqual(response["Content-Type"], "application/zip")
|
||||
|
||||
with zipfile.ZipFile(io.BytesIO(response.content)) as zipf:
|
||||
self.assertEqual(len(zipf.filelist), 2)
|
||||
self.assertIn("somewhere/This is Doc 2.pdf", zipf.namelist())
|
||||
self.assertIn("somewhere/Title 2 - Doc 3.pdf", zipf.namelist())
|
||||
|
||||
with self.doc2.source_file as f:
|
||||
self.assertEqual(f.read(), zipf.read("somewhere/This is Doc 2.pdf"))
|
||||
|
||||
with self.doc3.archive_file as f:
|
||||
self.assertEqual(f.read(), zipf.read("somewhere/Title 2 - Doc 3.pdf"))
|
||||
|
||||
@override_settings(FILENAME_FORMAT="{document_type}/{title}")
|
||||
def test_formatted_download_both(self):
|
||||
|
||||
dc1 = DocumentType.objects.create(name="bill")
|
||||
dc2 = DocumentType.objects.create(name="statement")
|
||||
|
||||
self.doc2.document_type = dc1
|
||||
self.doc2.title = "This is Doc 2"
|
||||
self.doc2.save()
|
||||
|
||||
self.doc3.document_type = dc2
|
||||
self.doc3.title = "Title 2 - Doc 3"
|
||||
self.doc3.save()
|
||||
|
||||
response = self.client.post(
|
||||
self.ENDPOINT,
|
||||
json.dumps(
|
||||
{
|
||||
"documents": [self.doc2.id, self.doc3.id],
|
||||
"content": "both",
|
||||
"follow_formatting": True,
|
||||
},
|
||||
),
|
||||
content_type="application/json",
|
||||
)
|
||||
|
||||
self.assertEqual(response.status_code, 200)
|
||||
self.assertEqual(response["Content-Type"], "application/zip")
|
||||
|
||||
with zipfile.ZipFile(io.BytesIO(response.content)) as zipf:
|
||||
self.assertEqual(len(zipf.filelist), 3)
|
||||
self.assertIn("originals/bill/This is Doc 2.pdf", zipf.namelist())
|
||||
self.assertIn("archive/statement/Title 2 - Doc 3.pdf", zipf.namelist())
|
||||
self.assertIn("originals/statement/Title 2 - Doc 3.jpg", zipf.namelist())
|
||||
|
||||
with self.doc2.source_file as f:
|
||||
self.assertEqual(
|
||||
f.read(),
|
||||
zipf.read("originals/bill/This is Doc 2.pdf"),
|
||||
)
|
||||
|
||||
with self.doc3.archive_file as f:
|
||||
self.assertEqual(
|
||||
f.read(),
|
||||
zipf.read("archive/statement/Title 2 - Doc 3.pdf"),
|
||||
)
|
||||
|
||||
with self.doc3.source_file as f:
|
||||
self.assertEqual(
|
||||
f.read(),
|
||||
zipf.read("originals/statement/Title 2 - Doc 3.jpg"),
|
||||
)
|
||||
|
||||
|
||||
class TestApiAuth(DirectoriesMixin, APITestCase):
|
||||
def test_auth_required(self):
|
||||
|
@ -745,6 +745,7 @@ class BulkDownloadView(GenericAPIView):
|
||||
ids = serializer.validated_data.get("documents")
|
||||
compression = serializer.validated_data.get("compression")
|
||||
content = serializer.validated_data.get("content")
|
||||
follow_filename_format = serializer.validated_data.get("follow_formatting")
|
||||
|
||||
os.makedirs(settings.SCRATCH_DIR, exist_ok=True)
|
||||
temp = tempfile.NamedTemporaryFile(
|
||||
@ -761,7 +762,7 @@ class BulkDownloadView(GenericAPIView):
|
||||
strategy_class = ArchiveOnlyStrategy
|
||||
|
||||
with zipfile.ZipFile(temp.name, "w", compression) as zipf:
|
||||
strategy = strategy_class(zipf)
|
||||
strategy = strategy_class(zipf, follow_filename_format)
|
||||
for id in ids:
|
||||
doc = Document.objects.get(id=id)
|
||||
strategy.add_document(doc)
|
||||
|
Loading…
x
Reference in New Issue
Block a user