Enhancement: prune audit logs and management command (#8416)

This commit is contained in:
shamoon 2024-12-03 11:28:27 -08:00 committed by GitHub
parent 51c339d1b7
commit 7d182ab894
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 73 additions and 1 deletions

View File

@ -15,7 +15,8 @@ for command in decrypt_documents \
document_sanity_checker \
document_fuzzy_match \
manage_superuser \
convert_mariadb_uuid;
convert_mariadb_uuid \
prune_audit_logs;
do
echo "installing $command..."
sed "s/management_command/$command/g" management_script.sh > /usr/local/bin/$command

View File

@ -624,3 +624,12 @@ document_fuzzy_match [--ratio] [--processes N]
If providing the `--delete` option, it is highly recommended to have a backup.
While every effort has been taken to ensure proper operation, there is always the
chance of deletion of a file you want to keep.
### Prune history (audit log) entries {#prune-history}
If the audit log is enabled Paperless-ngx keeps an audit log of all changes made to documents. Functionality to automatically remove entries for deleted documents was added but
entries created prior to this are not removed. This command allows you to prune the audit log of entries that are no longer needed.
```shell
prune_audit_logs
```

View File

@ -0,0 +1,39 @@
from auditlog.models import LogEntry
from django.core.management.base import BaseCommand
from django.db import transaction
from tqdm import tqdm
from documents.management.commands.mixins import ProgressBarMixin
class Command(BaseCommand, ProgressBarMixin):
"""
Prune the audit logs of objects that no longer exist.
"""
help = "Prunes the audit logs of objects that no longer exist."
def add_arguments(self, parser):
self.add_argument_progress_bar_mixin(parser)
def handle(self, **options):
self.handle_progress_bar_mixin(**options)
with transaction.atomic():
for log_entry in tqdm(LogEntry.objects.all(), disable=self.no_progress_bar):
model_class = log_entry.content_type.model_class()
# use global_objects for SoftDeleteModel
objects = (
model_class.global_objects
if hasattr(model_class, "global_objects")
else model_class.objects
)
if (
log_entry.object_id
and not objects.filter(pk=log_entry.object_id).exists()
):
log_entry.delete()
tqdm.write(
self.style.NOTICE(
f"Deleted audit log entry for {model_class.__name__} #{log_entry.object_id}",
),
)

View File

@ -10,6 +10,7 @@ import tqdm
from celery import Task
from celery import shared_task
from django.conf import settings
from django.contrib.contenttypes.models import ContentType
from django.db import models
from django.db import transaction
from django.db.models.signals import post_save
@ -332,9 +333,17 @@ def empty_trash(doc_ids=None):
)
try:
deleted_document_ids = documents.values_list("id", flat=True)
# Temporarily connect the cleanup handler
models.signals.post_delete.connect(cleanup_document_deletion, sender=Document)
documents.delete() # this is effectively a hard delete
if settings.AUDIT_LOG_ENABLED:
# Delete the audit log entries for documents that dont exist anymore
LogEntry.objects.filter(
content_type=ContentType.objects.get_for_model(Document),
object_id__in=deleted_document_ids,
).delete()
except Exception as e: # pragma: no cover
logger.exception(f"Error while emptying trash: {e}")
finally:

View File

@ -7,6 +7,8 @@ from io import StringIO
from pathlib import Path
from unittest import mock
from auditlog.models import LogEntry
from django.contrib.contenttypes.models import ContentType
from django.core.management import call_command
from django.test import TestCase
from django.test import override_settings
@ -252,3 +254,15 @@ class TestConvertMariaDBUUID(TestCase):
m.assert_called_once()
self.assertIn("Successfully converted", stdout.getvalue())
class TestPruneAuditLogs(TestCase):
def test_prune_audit_logs(self):
LogEntry.objects.create(
content_type=ContentType.objects.get_for_model(Document),
object_id=1,
action=LogEntry.Action.CREATE,
)
call_command("prune_audit_logs")
self.assertEqual(LogEntry.objects.count(), 0)