mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-04-02 13:45:10 -05:00
added sanity checker management command for manual execution #534
This commit is contained in:
parent
ed478a1d73
commit
8b2965d55b
@ -1,4 +1,4 @@
|
||||
for command in document_archiver document_exporter document_importer mail_fetcher document_create_classifier document_index document_renamer document_retagger document_thumbnails;
|
||||
for command in document_archiver document_exporter document_importer mail_fetcher document_create_classifier document_index document_renamer document_retagger document_thumbnails document_sanity_checker;
|
||||
do
|
||||
echo "installing $command..."
|
||||
sed "s/management_command/$command/g" management_script.sh > /usr/local/bin/$command
|
||||
|
@ -410,6 +410,34 @@ the naming scheme.
|
||||
The command takes no arguments and processes all your documents at once.
|
||||
|
||||
|
||||
.. _utilities-sanity-checker:
|
||||
|
||||
Sanity checker
|
||||
==============
|
||||
|
||||
Paperless has a built-in sanity checker that inspects your document collection for issues.
|
||||
|
||||
The issues detected by the sanity checker are as follows:
|
||||
|
||||
* Missing original files.
|
||||
* Missing archive files.
|
||||
* Inaccessible original files due to improper permissions.
|
||||
* Inaccessible archive files due to improper permissions.
|
||||
* Corrupted original documents by comparing their checksum against what is stored in the database.
|
||||
* Corrupted archive documents by comparing their checksum against what is stored in the database.
|
||||
* Missing thumbnails.
|
||||
* Inaccessible thumbnails due to improper permissions.
|
||||
* Documents without any content (warning).
|
||||
* Orphaned files in the media directory (warning). These are files that are not referenced by any document im paperless.
|
||||
|
||||
|
||||
.. code::
|
||||
|
||||
document_sanity_checker
|
||||
|
||||
The command takes no arguments. Depending on the size of your document archive, this may take some time.
|
||||
|
||||
|
||||
Fetching e-mail
|
||||
===============
|
||||
|
||||
|
@ -10,6 +10,8 @@ paperless-ng 1.1.2
|
||||
|
||||
* Always show top left corner of thumbnails, even for extra wide documents.
|
||||
|
||||
* Added a management command for executing the sanity checker directly. See :ref:`utilities-sanity-checker`.
|
||||
|
||||
paperless-ng 1.1.1
|
||||
##################
|
||||
|
||||
|
27
src/documents/management/commands/document_sanity_checker.py
Normal file
27
src/documents/management/commands/document_sanity_checker.py
Normal file
@ -0,0 +1,27 @@
|
||||
import logging
|
||||
from django.core.management.base import BaseCommand
|
||||
from documents.sanity_checker import check_sanity, SanityError, SanityWarning
|
||||
|
||||
logger = logging.getLogger("paperless.management.sanity_checker")
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
|
||||
help = """
|
||||
This command checks your document archive for issues.
|
||||
""".replace(" ", "")
|
||||
|
||||
def handle(self, *args, **options):
|
||||
|
||||
messages = check_sanity(progress=True)
|
||||
|
||||
if len(messages) == 0:
|
||||
logger.info("No issues found.")
|
||||
else:
|
||||
for msg in messages:
|
||||
if type(msg) == SanityError:
|
||||
logger.error(str(msg))
|
||||
elif type(msg) == SanityWarning:
|
||||
logger.warning(str(msg))
|
||||
else:
|
||||
logger.info((str(msg)))
|
@ -2,6 +2,7 @@ import hashlib
|
||||
import os
|
||||
|
||||
from django.conf import settings
|
||||
from tqdm import tqdm
|
||||
|
||||
from documents.models import Document
|
||||
|
||||
@ -38,7 +39,7 @@ class SanityFailedError(Exception):
|
||||
f"{message_string}\n\n===============\n\n")
|
||||
|
||||
|
||||
def check_sanity():
|
||||
def check_sanity(progress=False):
|
||||
messages = []
|
||||
|
||||
present_files = []
|
||||
@ -50,7 +51,12 @@ def check_sanity():
|
||||
if lockfile in present_files:
|
||||
present_files.remove(lockfile)
|
||||
|
||||
for doc in Document.objects.all():
|
||||
if progress:
|
||||
docs = tqdm(Document.objects.all())
|
||||
else:
|
||||
docs = Document.objects.all()
|
||||
|
||||
for doc in docs:
|
||||
# Check sanity of the thumbnail
|
||||
if not os.path.isfile(doc.thumbnail_path):
|
||||
messages.append(SanityError(
|
||||
|
@ -65,6 +65,7 @@ class TestArchiver(DirectoriesMixin, TestCase):
|
||||
self.assertEqual(doc1.archive_filename, "document.pdf")
|
||||
self.assertEqual(doc2.archive_filename, "document_01.pdf")
|
||||
|
||||
|
||||
class TestDecryptDocuments(TestCase):
|
||||
|
||||
@override_settings(
|
||||
@ -154,3 +155,37 @@ class TestCreateClassifier(TestCase):
|
||||
call_command("document_create_classifier")
|
||||
|
||||
m.assert_called_once()
|
||||
|
||||
|
||||
class TestSanityChecker(DirectoriesMixin, TestCase):
|
||||
|
||||
def test_no_errors(self):
|
||||
with self.assertLogs() as capture:
|
||||
call_command("document_sanity_checker")
|
||||
|
||||
self.assertEqual(len(capture.output), 1)
|
||||
self.assertIn("No issues found.", capture.output[0])
|
||||
|
||||
@mock.patch("documents.management.commands.document_sanity_checker.logger.warning")
|
||||
@mock.patch("documents.management.commands.document_sanity_checker.logger.error")
|
||||
def test_warnings(self, error, warning):
|
||||
doc = Document.objects.create(title="test", filename="test.pdf", checksum="d41d8cd98f00b204e9800998ecf8427e")
|
||||
Path(doc.source_path).touch()
|
||||
Path(doc.thumbnail_path).touch()
|
||||
|
||||
call_command("document_sanity_checker")
|
||||
|
||||
error.assert_not_called()
|
||||
warning.assert_called()
|
||||
|
||||
@mock.patch("documents.management.commands.document_sanity_checker.logger.warning")
|
||||
@mock.patch("documents.management.commands.document_sanity_checker.logger.error")
|
||||
def test_errors(self, error, warning):
|
||||
doc = Document.objects.create(title="test", content="test", filename="test.pdf", checksum="abc")
|
||||
Path(doc.source_path).touch()
|
||||
Path(doc.thumbnail_path).touch()
|
||||
|
||||
call_command("document_sanity_checker")
|
||||
|
||||
warning.assert_not_called()
|
||||
error.assert_called()
|
||||
|
Loading…
x
Reference in New Issue
Block a user