From 91434a5c6fee0c61dc0999f3d5c073929c0ee148 Mon Sep 17 00:00:00 2001 From: shamoon <4887959+shamoon@users.noreply.github.com> Date: Thu, 7 Mar 2024 12:39:55 -0800 Subject: [PATCH] Enhancement: move and rename files when storage paths deleted, update file handling docs (#6033) --- docs/advanced_usage.md | 71 +++++++++++-------------- docs/configuration.md | 2 +- src/documents/tests/test_api_objects.py | 29 ++++++++++ src/documents/views.py | 16 ++++++ 4 files changed, 77 insertions(+), 41 deletions(-) diff --git a/docs/advanced_usage.md b/docs/advanced_usage.md index 863be639b..ec67db2a5 100644 --- a/docs/advanced_usage.md +++ b/docs/advanced_usage.md @@ -256,7 +256,8 @@ document. You will end up getting files like `0000123.pdf` in your media directory. This isn't necessarily a bad thing, because you normally don't have to access these files manually. However, if you wish to name your files differently, you can do that by adjusting the -[`PAPERLESS_FILENAME_FORMAT`](configuration.md#PAPERLESS_FILENAME_FORMAT) configuration option. Paperless adds the +[`PAPERLESS_FILENAME_FORMAT`](configuration.md#PAPERLESS_FILENAME_FORMAT) configuration option +or using [storage paths (see below)](#storage-paths). Paperless adds the correct file extension e.g. `.pdf`, `.jpg` automatically. This variable allows you to configure the filename (folders are allowed) @@ -289,6 +290,15 @@ will create a directory structure as follows: paperless will report your files as missing and won't be able to find them. +!!! tip + + Paperless checks the filename of a document whenever it is saved. Changing (or deleting) + a [storage paths](#storage-paths) will automatically be reflected in the file system. However, + when changing `PAPERLESS_FILENAME_FORMAT` you will need to manually run the + [`document renamer`](administration.md#renamer) to move any existing documents. + +#### Placeholders + Paperless provides the following placeholders within filenames: - `{asn}`: The archive serial number of the document, or "none". @@ -321,6 +331,12 @@ Paperless provides the following placeholders within filenames: - `{original_name}`: Document original filename, minus the extension, if any, or "none" - `{doc_pk}`: The paperless identifier (primary key) for the document. +!!! warning + + When using file name placeholders, in particular when using `{tag_list}`, + you may run into the limits of your operating system's maximum path lengths. + In that case, files will retain the previous path instead and the issue logged. + Paperless will try to conserve the information from your database as much as possible. However, some characters that you can use in document titles and correspondent names (such as `: \ /` and a couple more) are @@ -331,34 +347,12 @@ paperless will automatically append `_01`, `_02`, etc to the filename. This happens if all the placeholders in a filename evaluate to the same value. -!!! tip - - You can affect how empty placeholders are treated by changing the - following setting to `true`. - - ``` - PAPERLESS_FILENAME_FORMAT_REMOVE_NONE=True - ``` - - Doing this results in all empty placeholders resolving to "" instead - of "none" as stated above. Spaces before empty placeholders are - removed as well, empty directories are omitted. - -!!! tip - - Paperless checks the filename of a document whenever it is saved. - Therefore, you need to update the filenames of your documents and move - them after altering this setting by invoking the - [`document renamer`](administration.md#renamer). - -!!! warning - - Make absolutely sure you get the spelling of the placeholders right, or - else paperless will use the default naming scheme instead. +If there are any errors in the placeholders included in `PAPERLESS_FILENAME_FORMAT`, +paperless will fallback to using the default naming scheme instead. !!! caution - As of now, you could totally tell paperless to store your files anywhere + As of now, you could potentially tell paperless to store your files anywhere outside the media directory by setting ``` @@ -366,28 +360,25 @@ value. ``` However, keep in mind that inside docker, if files get stored outside of - the predefined volumes, they will be lost after a restart of paperless. + the predefined volumes, they will be lost after a restart. -!!! warning +##### Empty placeholders - When file naming handling, in particular when using `{tag_list}`, - you may run into the limits of your operating system's maximum - path lengths. Files will retain the previous path instead and - the issue logged. +You can affect how empty placeholders are treated by changing the +[`PAPERLESS_FILENAME_FORMAT_REMOVE_NONE`](configuration.md#PAPERLESS_FILENAME_FORMAT_REMOVE_NONE) setting. -## Storage paths +Enabling this results in all empty placeholders resolving to "" instead of "none" as stated above. Spaces +before empty placeholders are removed as well, empty directories are omitted. -One of the best things in Paperless is that you can not only access the -documents via the web interface, but also via the file system. +### Storage paths -When a single storage layout is not sufficient for your use case, -storage paths come to the rescue. Storage paths allow you to configure -more precisely where each document is stored in the file system. +When a single storage layout is not sufficient for your use case, storage paths allow for more complex +structure to set precisely where each document is stored in the file system. - Each storage path is a [`PAPERLESS_FILENAME_FORMAT`](configuration.md#PAPERLESS_FILENAME_FORMAT) and follows the rules described above -- Each document is assigned a storage path using the matching - algorithms described above, but can be overwritten at any time +- Each document is assigned a storage path using the matching algorithms described above, but can be + overwritten at any time For example, you could define the following two storage paths: diff --git a/docs/configuration.md b/docs/configuration.md index 38ffcf465..5182d018b 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -264,7 +264,7 @@ directory. See [File name handling](advanced_usage.md#file-name-handling) for de : Tells paperless to replace placeholders in `PAPERLESS_FILENAME_FORMAT` that would resolve to 'none' to be omitted from the resulting filename. This also holds -true for directory names. See [File name handling](advanced_usage.md#file-name-handling) for +true for directory names. See [File name handling](advanced_usage.md#empty-placeholders) for details. Defaults to `false` which disables this feature. diff --git a/src/documents/tests/test_api_objects.py b/src/documents/tests/test_api_objects.py index 9a0ccd598..65f379261 100644 --- a/src/documents/tests/test_api_objects.py +++ b/src/documents/tests/test_api_objects.py @@ -224,6 +224,35 @@ class TestApiStoragePaths(DirectoriesMixin, APITestCase): self.assertCountEqual([document.pk], args[0]) + @mock.patch("documents.bulk_edit.bulk_update_documents.delay") + def test_api_delete_storage_path(self, bulk_update_mock): + """ + GIVEN: + - API request to delete a storage + WHEN: + - API is called + THEN: + - Documents using the storage path are updated + """ + document = Document.objects.create( + mime_type="application/pdf", + storage_path=self.sp1, + ) + response = self.client.delete( + f"{self.ENDPOINT}{self.sp1.pk}/", + ) + self.assertEqual(response.status_code, status.HTTP_204_NO_CONTENT) + + # sp with no documents + sp2 = StoragePath.objects.create(name="sp2", path="Something2/{checksum}") + response = self.client.delete( + f"{self.ENDPOINT}{sp2.pk}/", + ) + self.assertEqual(response.status_code, status.HTTP_204_NO_CONTENT) + + # only called once + bulk_update_mock.assert_called_once_with([document.pk]) + class TestBulkEditObjects(APITestCase): # See test_api_permissions.py for bulk tests on permissions diff --git a/src/documents/views.py b/src/documents/views.py index 006ca0822..99db0a5d8 100644 --- a/src/documents/views.py +++ b/src/documents/views.py @@ -1211,6 +1211,22 @@ class StoragePathViewSet(ModelViewSet, PermissionsAwareDocumentCountMixin): filterset_class = StoragePathFilterSet ordering_fields = ("name", "path", "matching_algorithm", "match", "document_count") + def destroy(self, request, *args, **kwargs): + """ + When a storage path is deleted, see if documents + using it require a rename/move + """ + instance = self.get_object() + doc_ids = [doc.id for doc in instance.documents.all()] + + # perform the deletion so renaming/moving can happen + response = super().destroy(request, *args, **kwargs) + + if len(doc_ids): + bulk_edit.bulk_update_documents.delay(doc_ids) + + return response + class UiSettingsView(GenericAPIView): queryset = UiSettings.objects.all()