diff --git a/ansible/defaults/main.yml b/ansible/defaults/main.yml index 93f48e730..c5a34e87b 100644 --- a/ansible/defaults/main.yml +++ b/ansible/defaults/main.yml @@ -17,6 +17,7 @@ paperlessng_db_sslmode: prefer paperlessng_directory: /opt/paperless-ng paperlessng_consumption_dir: "{{ paperlessng_directory }}/consumption" paperlessng_data_dir: "{{ paperlessng_directory }}/data" +paperlessng_trash_dir: paperlessng_media_root: "{{ paperlessng_directory }}/media" paperlessng_staticdir: "{{ paperlessng_directory }}/static" paperlessng_filename_format: diff --git a/ansible/tasks/main.yml b/ansible/tasks/main.yml index c404e2889..a38c3b134 100644 --- a/ansible/tasks/main.yml +++ b/ansible/tasks/main.yml @@ -252,9 +252,11 @@ owner: "{{ paperlessng_system_user }}" group: "{{ paperlessng_system_group }}" mode: "750" + when: item with_items: - "{{ paperlessng_consumption_dir }}" - "{{ paperlessng_data_dir }}" + - "{{ paperlessng_trash_dir }}" - "{{ paperlessng_media_root }}" - "{{ paperlessng_staticdir }}" @@ -277,6 +279,8 @@ line: "PAPERLESS_CONSUMPTION_DIR={{ paperlessng_consumption_dir }}" - regexp: PAPERLESS_DATA_DIR line: "PAPERLESS_DATA_DIR={{ paperlessng_data_dir }}" + - regexp: PAPERLESS_TRASH_DIR + line: "PAPERLESS_TRASH_DIR={{ paperlessng_trash_dir }}" - regexp: PAPERLESS_MEDIA_ROOT line: "PAPERLESS_MEDIA_ROOT={{ paperlessng_media_root }}" - regexp: PAPERLESS_STATICDIR diff --git a/docs/configuration.rst b/docs/configuration.rst index 312b3b0ab..4813ad932 100644 --- a/docs/configuration.rst +++ b/docs/configuration.rst @@ -80,6 +80,15 @@ PAPERLESS_DATA_DIR= Defaults to "../data/", relative to the "src" directory. +PAPERLESS_TRASH_DIR= + Instead of removing deleted documents, they are moved to this directory. + + This must be writeable by the user running paperless. When running inside + docker, ensure that this path is within a permanent volume (such as + "../media/trash") so it won't get lost on upgrades. + + Defaults to empty (i.e. really delete documents). + PAPERLESS_MEDIA_ROOT= This is where your documents and thumbnails are stored. diff --git a/paperless.conf.example b/paperless.conf.example index 397314e6e..bc8e76e55 100644 --- a/paperless.conf.example +++ b/paperless.conf.example @@ -19,6 +19,7 @@ #PAPERLESS_CONSUMPTION_DIR=../consume #PAPERLESS_DATA_DIR=../data +#PAPERLESS_TRASH_DIR= #PAPERLESS_MEDIA_ROOT=../media #PAPERLESS_STATICDIR=../static #PAPERLESS_FILENAME_FORMAT= diff --git a/src/documents/signals/handlers.py b/src/documents/signals/handlers.py index fb7df5120..39e94d025 100644 --- a/src/documents/signals/handlers.py +++ b/src/documents/signals/handlers.py @@ -225,6 +225,37 @@ def set_tags(sender, @receiver(models.signals.post_delete, sender=Document) def cleanup_document_deletion(sender, instance, using, **kwargs): with FileLock(settings.MEDIA_LOCK): + if settings.TRASH_DIR: + # Find a non-conflicting filename in case a document with the same + # name was moved to trash earlier + counter = 0 + old_filename = os.path.split(instance.source_path)[1] + (old_filebase, old_fileext) = os.path.splitext(old_filename) + + while True: + new_file_path = os.path.join( + settings.TRASH_DIR, + old_filebase + + (f"_{counter:02}" if counter else "") + + old_fileext + ) + + if os.path.exists(new_file_path): + counter += 1 + else: + break + + logger.debug( + f"Moving {instance.source_path} to trash at {new_file_path}") + try: + os.rename(instance.source_path, new_file_path) + except OSError as e: + logger.error( + f"Failed to move {instance.source_path} to trash at " + f"{new_file_path}: {e}. Skipping cleanup!" + ) + return + for filename in (instance.source_path, instance.archive_path, instance.thumbnail_path): diff --git a/src/documents/tests/test_file_handling.py b/src/documents/tests/test_file_handling.py index 59af7e317..8be4b568b 100644 --- a/src/documents/tests/test_file_handling.py +++ b/src/documents/tests/test_file_handling.py @@ -2,6 +2,7 @@ import datetime import hashlib import os import random +import tempfile import uuid from pathlib import Path from unittest import mock @@ -154,6 +155,40 @@ class TestFileHandling(DirectoriesMixin, TestCase): self.assertEqual(os.path.isfile(settings.ORIGINALS_DIR + "/none/none.pdf"), False) self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none"), False) + @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}", TRASH_DIR=tempfile.mkdtemp()) + def test_document_delete_trash(self): + document = Document() + document.mime_type = "application/pdf" + document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED + document.save() + + # Ensure that filename is properly generated + document.filename = generate_filename(document) + self.assertEqual(document.filename, + "none/none.pdf") + + create_source_path_directory(document.source_path) + Path(document.source_path).touch() + + # Ensure file was moved to trash after delete + self.assertEqual(os.path.isfile(settings.TRASH_DIR + "/none/none.pdf"), False) + document.delete() + self.assertEqual(os.path.isfile(settings.ORIGINALS_DIR + "/none/none.pdf"), False) + self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none"), False) + self.assertEqual(os.path.isfile(settings.TRASH_DIR + "/none.pdf"), True) + self.assertEqual(os.path.isfile(settings.TRASH_DIR + "/none_01.pdf"), False) + + # Create an identical document and ensure it is trashed under a new name + document = Document() + document.mime_type = "application/pdf" + document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED + document.save() + document.filename = generate_filename(document) + create_source_path_directory(document.source_path) + Path(document.source_path).touch() + document.delete() + self.assertEqual(os.path.isfile(settings.TRASH_DIR + "/none_01.pdf"), True) + @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}") def test_document_delete_nofile(self): document = Document() diff --git a/src/paperless/checks.py b/src/paperless/checks.py index 1b19f1a72..24830a9e0 100644 --- a/src/paperless/checks.py +++ b/src/paperless/checks.py @@ -50,6 +50,7 @@ def paths_check(app_configs, **kwargs): """ return path_check("PAPERLESS_DATA_DIR", settings.DATA_DIR) + \ + path_check("PAPERLESS_TRASH_DIR", settings.TRASH_DIR) + \ path_check("PAPERLESS_MEDIA_ROOT", settings.MEDIA_ROOT) + \ path_check("PAPERLESS_CONSUMPTION_DIR", settings.CONSUMPTION_DIR) diff --git a/src/paperless/settings.py b/src/paperless/settings.py index c10836fb3..124f13ccb 100644 --- a/src/paperless/settings.py +++ b/src/paperless/settings.py @@ -57,6 +57,8 @@ THUMBNAIL_DIR = os.path.join(MEDIA_ROOT, "documents", "thumbnails") DATA_DIR = os.getenv('PAPERLESS_DATA_DIR', os.path.join(BASE_DIR, "..", "data")) +TRASH_DIR = os.getenv('PAPERLESS_TRASH_DIR') + # Lock file for synchronizing changes to the MEDIA directory across multiple # threads. MEDIA_LOCK = os.path.join(MEDIA_ROOT, "media.lock")