From 8b2965d55b160d5fe2b889ddc0f5b8591e39230c Mon Sep 17 00:00:00 2001 From: jonaswinkler Date: Sat, 13 Feb 2021 16:39:29 +0100 Subject: [PATCH] added sanity checker management command for manual execution #534 --- docker/install_management_commands.sh | 2 +- docs/administration.rst | 28 +++++++++++++++ docs/changelog.rst | 2 ++ .../commands/document_sanity_checker.py | 27 ++++++++++++++ src/documents/sanity_checker.py | 10 ++++-- src/documents/tests/test_management.py | 35 +++++++++++++++++++ 6 files changed, 101 insertions(+), 3 deletions(-) create mode 100644 src/documents/management/commands/document_sanity_checker.py diff --git a/docker/install_management_commands.sh b/docker/install_management_commands.sh index 6aeca635d..17fb8f277 100755 --- a/docker/install_management_commands.sh +++ b/docker/install_management_commands.sh @@ -1,4 +1,4 @@ -for command in document_archiver document_exporter document_importer mail_fetcher document_create_classifier document_index document_renamer document_retagger document_thumbnails; +for command in document_archiver document_exporter document_importer mail_fetcher document_create_classifier document_index document_renamer document_retagger document_thumbnails document_sanity_checker; do echo "installing $command..." sed "s/management_command/$command/g" management_script.sh > /usr/local/bin/$command diff --git a/docs/administration.rst b/docs/administration.rst index 5773a80cc..c91f501bd 100644 --- a/docs/administration.rst +++ b/docs/administration.rst @@ -410,6 +410,34 @@ the naming scheme. The command takes no arguments and processes all your documents at once. +.. _utilities-sanity-checker: + +Sanity checker +============== + +Paperless has a built-in sanity checker that inspects your document collection for issues. + +The issues detected by the sanity checker are as follows: + +* Missing original files. +* Missing archive files. +* Inaccessible original files due to improper permissions. +* Inaccessible archive files due to improper permissions. +* Corrupted original documents by comparing their checksum against what is stored in the database. +* Corrupted archive documents by comparing their checksum against what is stored in the database. +* Missing thumbnails. +* Inaccessible thumbnails due to improper permissions. +* Documents without any content (warning). +* Orphaned files in the media directory (warning). These are files that are not referenced by any document im paperless. + + +.. code:: + + document_sanity_checker + +The command takes no arguments. Depending on the size of your document archive, this may take some time. + + Fetching e-mail =============== diff --git a/docs/changelog.rst b/docs/changelog.rst index 47dd1bc02..f15c73605 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -10,6 +10,8 @@ paperless-ng 1.1.2 * Always show top left corner of thumbnails, even for extra wide documents. +* Added a management command for executing the sanity checker directly. See :ref:`utilities-sanity-checker`. + paperless-ng 1.1.1 ################## diff --git a/src/documents/management/commands/document_sanity_checker.py b/src/documents/management/commands/document_sanity_checker.py new file mode 100644 index 000000000..b5982ed90 --- /dev/null +++ b/src/documents/management/commands/document_sanity_checker.py @@ -0,0 +1,27 @@ +import logging +from django.core.management.base import BaseCommand +from documents.sanity_checker import check_sanity, SanityError, SanityWarning + +logger = logging.getLogger("paperless.management.sanity_checker") + + +class Command(BaseCommand): + + help = """ + This command checks your document archive for issues. + """.replace(" ", "") + + def handle(self, *args, **options): + + messages = check_sanity(progress=True) + + if len(messages) == 0: + logger.info("No issues found.") + else: + for msg in messages: + if type(msg) == SanityError: + logger.error(str(msg)) + elif type(msg) == SanityWarning: + logger.warning(str(msg)) + else: + logger.info((str(msg))) diff --git a/src/documents/sanity_checker.py b/src/documents/sanity_checker.py index 2d30a59d7..f42062154 100644 --- a/src/documents/sanity_checker.py +++ b/src/documents/sanity_checker.py @@ -2,6 +2,7 @@ import hashlib import os from django.conf import settings +from tqdm import tqdm from documents.models import Document @@ -38,7 +39,7 @@ class SanityFailedError(Exception): f"{message_string}\n\n===============\n\n") -def check_sanity(): +def check_sanity(progress=False): messages = [] present_files = [] @@ -50,7 +51,12 @@ def check_sanity(): if lockfile in present_files: present_files.remove(lockfile) - for doc in Document.objects.all(): + if progress: + docs = tqdm(Document.objects.all()) + else: + docs = Document.objects.all() + + for doc in docs: # Check sanity of the thumbnail if not os.path.isfile(doc.thumbnail_path): messages.append(SanityError( diff --git a/src/documents/tests/test_management.py b/src/documents/tests/test_management.py index 1a550a4b4..56710c6e5 100644 --- a/src/documents/tests/test_management.py +++ b/src/documents/tests/test_management.py @@ -65,6 +65,7 @@ class TestArchiver(DirectoriesMixin, TestCase): self.assertEqual(doc1.archive_filename, "document.pdf") self.assertEqual(doc2.archive_filename, "document_01.pdf") + class TestDecryptDocuments(TestCase): @override_settings( @@ -154,3 +155,37 @@ class TestCreateClassifier(TestCase): call_command("document_create_classifier") m.assert_called_once() + + +class TestSanityChecker(DirectoriesMixin, TestCase): + + def test_no_errors(self): + with self.assertLogs() as capture: + call_command("document_sanity_checker") + + self.assertEqual(len(capture.output), 1) + self.assertIn("No issues found.", capture.output[0]) + + @mock.patch("documents.management.commands.document_sanity_checker.logger.warning") + @mock.patch("documents.management.commands.document_sanity_checker.logger.error") + def test_warnings(self, error, warning): + doc = Document.objects.create(title="test", filename="test.pdf", checksum="d41d8cd98f00b204e9800998ecf8427e") + Path(doc.source_path).touch() + Path(doc.thumbnail_path).touch() + + call_command("document_sanity_checker") + + error.assert_not_called() + warning.assert_called() + + @mock.patch("documents.management.commands.document_sanity_checker.logger.warning") + @mock.patch("documents.management.commands.document_sanity_checker.logger.error") + def test_errors(self, error, warning): + doc = Document.objects.create(title="test", content="test", filename="test.pdf", checksum="abc") + Path(doc.source_path).touch() + Path(doc.thumbnail_path).touch() + + call_command("document_sanity_checker") + + warning.assert_not_called() + error.assert_called()