added sanity checker management command for manual execution #534

This commit is contained in:
jonaswinkler 2021-02-13 16:39:29 +01:00
parent ed478a1d73
commit 8b2965d55b
6 changed files with 101 additions and 3 deletions

View File

@ -1,4 +1,4 @@
for command in document_archiver document_exporter document_importer mail_fetcher document_create_classifier document_index document_renamer document_retagger document_thumbnails;
for command in document_archiver document_exporter document_importer mail_fetcher document_create_classifier document_index document_renamer document_retagger document_thumbnails document_sanity_checker;
do
echo "installing $command..."
sed "s/management_command/$command/g" management_script.sh > /usr/local/bin/$command

View File

@ -410,6 +410,34 @@ the naming scheme.
The command takes no arguments and processes all your documents at once.
.. _utilities-sanity-checker:
Sanity checker
==============
Paperless has a built-in sanity checker that inspects your document collection for issues.
The issues detected by the sanity checker are as follows:
* Missing original files.
* Missing archive files.
* Inaccessible original files due to improper permissions.
* Inaccessible archive files due to improper permissions.
* Corrupted original documents by comparing their checksum against what is stored in the database.
* Corrupted archive documents by comparing their checksum against what is stored in the database.
* Missing thumbnails.
* Inaccessible thumbnails due to improper permissions.
* Documents without any content (warning).
* Orphaned files in the media directory (warning). These are files that are not referenced by any document im paperless.
.. code::
document_sanity_checker
The command takes no arguments. Depending on the size of your document archive, this may take some time.
Fetching e-mail
===============

View File

@ -10,6 +10,8 @@ paperless-ng 1.1.2
* Always show top left corner of thumbnails, even for extra wide documents.
* Added a management command for executing the sanity checker directly. See :ref:`utilities-sanity-checker`.
paperless-ng 1.1.1
##################

View File

@ -0,0 +1,27 @@
import logging
from django.core.management.base import BaseCommand
from documents.sanity_checker import check_sanity, SanityError, SanityWarning
logger = logging.getLogger("paperless.management.sanity_checker")
class Command(BaseCommand):
help = """
This command checks your document archive for issues.
""".replace(" ", "")
def handle(self, *args, **options):
messages = check_sanity(progress=True)
if len(messages) == 0:
logger.info("No issues found.")
else:
for msg in messages:
if type(msg) == SanityError:
logger.error(str(msg))
elif type(msg) == SanityWarning:
logger.warning(str(msg))
else:
logger.info((str(msg)))

View File

@ -2,6 +2,7 @@ import hashlib
import os
from django.conf import settings
from tqdm import tqdm
from documents.models import Document
@ -38,7 +39,7 @@ class SanityFailedError(Exception):
f"{message_string}\n\n===============\n\n")
def check_sanity():
def check_sanity(progress=False):
messages = []
present_files = []
@ -50,7 +51,12 @@ def check_sanity():
if lockfile in present_files:
present_files.remove(lockfile)
for doc in Document.objects.all():
if progress:
docs = tqdm(Document.objects.all())
else:
docs = Document.objects.all()
for doc in docs:
# Check sanity of the thumbnail
if not os.path.isfile(doc.thumbnail_path):
messages.append(SanityError(

View File

@ -65,6 +65,7 @@ class TestArchiver(DirectoriesMixin, TestCase):
self.assertEqual(doc1.archive_filename, "document.pdf")
self.assertEqual(doc2.archive_filename, "document_01.pdf")
class TestDecryptDocuments(TestCase):
@override_settings(
@ -154,3 +155,37 @@ class TestCreateClassifier(TestCase):
call_command("document_create_classifier")
m.assert_called_once()
class TestSanityChecker(DirectoriesMixin, TestCase):
def test_no_errors(self):
with self.assertLogs() as capture:
call_command("document_sanity_checker")
self.assertEqual(len(capture.output), 1)
self.assertIn("No issues found.", capture.output[0])
@mock.patch("documents.management.commands.document_sanity_checker.logger.warning")
@mock.patch("documents.management.commands.document_sanity_checker.logger.error")
def test_warnings(self, error, warning):
doc = Document.objects.create(title="test", filename="test.pdf", checksum="d41d8cd98f00b204e9800998ecf8427e")
Path(doc.source_path).touch()
Path(doc.thumbnail_path).touch()
call_command("document_sanity_checker")
error.assert_not_called()
warning.assert_called()
@mock.patch("documents.management.commands.document_sanity_checker.logger.warning")
@mock.patch("documents.management.commands.document_sanity_checker.logger.error")
def test_errors(self, error, warning):
doc = Document.objects.create(title="test", content="test", filename="test.pdf", checksum="abc")
Path(doc.source_path).touch()
Path(doc.thumbnail_path).touch()
call_command("document_sanity_checker")
warning.assert_not_called()
error.assert_called()