diff --git a/docker/install_management_commands.sh b/docker/install_management_commands.sh index fa59d59a9..37c17058a 100755 --- a/docker/install_management_commands.sh +++ b/docker/install_management_commands.sh @@ -15,7 +15,8 @@ for command in decrypt_documents \ document_sanity_checker \ document_fuzzy_match \ manage_superuser \ - convert_mariadb_uuid; + convert_mariadb_uuid \ + prune_audit_logs; do echo "installing $command..." sed "s/management_command/$command/g" management_script.sh > /usr/local/bin/$command diff --git a/docs/administration.md b/docs/administration.md index 8204352d8..0c852e667 100644 --- a/docs/administration.md +++ b/docs/administration.md @@ -624,3 +624,12 @@ document_fuzzy_match [--ratio] [--processes N] If providing the `--delete` option, it is highly recommended to have a backup. While every effort has been taken to ensure proper operation, there is always the chance of deletion of a file you want to keep. + +### Prune history (audit log) entries {#prune-history} + +If the audit log is enabled Paperless-ngx keeps an audit log of all changes made to documents. Functionality to automatically remove entries for deleted documents was added but +entries created prior to this are not removed. This command allows you to prune the audit log of entries that are no longer needed. + +```shell +prune_audit_logs +``` diff --git a/src/documents/management/commands/prune_audit_logs.py b/src/documents/management/commands/prune_audit_logs.py new file mode 100644 index 000000000..b49f4afc2 --- /dev/null +++ b/src/documents/management/commands/prune_audit_logs.py @@ -0,0 +1,39 @@ +from auditlog.models import LogEntry +from django.core.management.base import BaseCommand +from django.db import transaction +from tqdm import tqdm + +from documents.management.commands.mixins import ProgressBarMixin + + +class Command(BaseCommand, ProgressBarMixin): + """ + Prune the audit logs of objects that no longer exist. + """ + + help = "Prunes the audit logs of objects that no longer exist." + + def add_arguments(self, parser): + self.add_argument_progress_bar_mixin(parser) + + def handle(self, **options): + self.handle_progress_bar_mixin(**options) + with transaction.atomic(): + for log_entry in tqdm(LogEntry.objects.all(), disable=self.no_progress_bar): + model_class = log_entry.content_type.model_class() + # use global_objects for SoftDeleteModel + objects = ( + model_class.global_objects + if hasattr(model_class, "global_objects") + else model_class.objects + ) + if ( + log_entry.object_id + and not objects.filter(pk=log_entry.object_id).exists() + ): + log_entry.delete() + tqdm.write( + self.style.NOTICE( + f"Deleted audit log entry for {model_class.__name__} #{log_entry.object_id}", + ), + ) diff --git a/src/documents/tasks.py b/src/documents/tasks.py index bd5199359..d12e73138 100644 --- a/src/documents/tasks.py +++ b/src/documents/tasks.py @@ -10,6 +10,7 @@ import tqdm from celery import Task from celery import shared_task from django.conf import settings +from django.contrib.contenttypes.models import ContentType from django.db import models from django.db import transaction from django.db.models.signals import post_save @@ -332,9 +333,17 @@ def empty_trash(doc_ids=None): ) try: + deleted_document_ids = documents.values_list("id", flat=True) # Temporarily connect the cleanup handler models.signals.post_delete.connect(cleanup_document_deletion, sender=Document) documents.delete() # this is effectively a hard delete + + if settings.AUDIT_LOG_ENABLED: + # Delete the audit log entries for documents that dont exist anymore + LogEntry.objects.filter( + content_type=ContentType.objects.get_for_model(Document), + object_id__in=deleted_document_ids, + ).delete() except Exception as e: # pragma: no cover logger.exception(f"Error while emptying trash: {e}") finally: diff --git a/src/documents/tests/test_management.py b/src/documents/tests/test_management.py index 5340035e7..2f21627a7 100644 --- a/src/documents/tests/test_management.py +++ b/src/documents/tests/test_management.py @@ -7,6 +7,8 @@ from io import StringIO from pathlib import Path from unittest import mock +from auditlog.models import LogEntry +from django.contrib.contenttypes.models import ContentType from django.core.management import call_command from django.test import TestCase from django.test import override_settings @@ -252,3 +254,15 @@ class TestConvertMariaDBUUID(TestCase): m.assert_called_once() self.assertIn("Successfully converted", stdout.getvalue()) + + +class TestPruneAuditLogs(TestCase): + def test_prune_audit_logs(self): + LogEntry.objects.create( + content_type=ContentType.objects.get_for_model(Document), + object_id=1, + action=LogEntry.Action.CREATE, + ) + call_command("prune_audit_logs") + + self.assertEqual(LogEntry.objects.count(), 0)