mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-04-02 13:45:10 -05:00
added sanity checker management command for manual execution #534
This commit is contained in:
parent
ed478a1d73
commit
8b2965d55b
@ -1,4 +1,4 @@
|
|||||||
for command in document_archiver document_exporter document_importer mail_fetcher document_create_classifier document_index document_renamer document_retagger document_thumbnails;
|
for command in document_archiver document_exporter document_importer mail_fetcher document_create_classifier document_index document_renamer document_retagger document_thumbnails document_sanity_checker;
|
||||||
do
|
do
|
||||||
echo "installing $command..."
|
echo "installing $command..."
|
||||||
sed "s/management_command/$command/g" management_script.sh > /usr/local/bin/$command
|
sed "s/management_command/$command/g" management_script.sh > /usr/local/bin/$command
|
||||||
|
@ -410,6 +410,34 @@ the naming scheme.
|
|||||||
The command takes no arguments and processes all your documents at once.
|
The command takes no arguments and processes all your documents at once.
|
||||||
|
|
||||||
|
|
||||||
|
.. _utilities-sanity-checker:
|
||||||
|
|
||||||
|
Sanity checker
|
||||||
|
==============
|
||||||
|
|
||||||
|
Paperless has a built-in sanity checker that inspects your document collection for issues.
|
||||||
|
|
||||||
|
The issues detected by the sanity checker are as follows:
|
||||||
|
|
||||||
|
* Missing original files.
|
||||||
|
* Missing archive files.
|
||||||
|
* Inaccessible original files due to improper permissions.
|
||||||
|
* Inaccessible archive files due to improper permissions.
|
||||||
|
* Corrupted original documents by comparing their checksum against what is stored in the database.
|
||||||
|
* Corrupted archive documents by comparing their checksum against what is stored in the database.
|
||||||
|
* Missing thumbnails.
|
||||||
|
* Inaccessible thumbnails due to improper permissions.
|
||||||
|
* Documents without any content (warning).
|
||||||
|
* Orphaned files in the media directory (warning). These are files that are not referenced by any document im paperless.
|
||||||
|
|
||||||
|
|
||||||
|
.. code::
|
||||||
|
|
||||||
|
document_sanity_checker
|
||||||
|
|
||||||
|
The command takes no arguments. Depending on the size of your document archive, this may take some time.
|
||||||
|
|
||||||
|
|
||||||
Fetching e-mail
|
Fetching e-mail
|
||||||
===============
|
===============
|
||||||
|
|
||||||
|
@ -10,6 +10,8 @@ paperless-ng 1.1.2
|
|||||||
|
|
||||||
* Always show top left corner of thumbnails, even for extra wide documents.
|
* Always show top left corner of thumbnails, even for extra wide documents.
|
||||||
|
|
||||||
|
* Added a management command for executing the sanity checker directly. See :ref:`utilities-sanity-checker`.
|
||||||
|
|
||||||
paperless-ng 1.1.1
|
paperless-ng 1.1.1
|
||||||
##################
|
##################
|
||||||
|
|
||||||
|
27
src/documents/management/commands/document_sanity_checker.py
Normal file
27
src/documents/management/commands/document_sanity_checker.py
Normal file
@ -0,0 +1,27 @@
|
|||||||
|
import logging
|
||||||
|
from django.core.management.base import BaseCommand
|
||||||
|
from documents.sanity_checker import check_sanity, SanityError, SanityWarning
|
||||||
|
|
||||||
|
logger = logging.getLogger("paperless.management.sanity_checker")
|
||||||
|
|
||||||
|
|
||||||
|
class Command(BaseCommand):
|
||||||
|
|
||||||
|
help = """
|
||||||
|
This command checks your document archive for issues.
|
||||||
|
""".replace(" ", "")
|
||||||
|
|
||||||
|
def handle(self, *args, **options):
|
||||||
|
|
||||||
|
messages = check_sanity(progress=True)
|
||||||
|
|
||||||
|
if len(messages) == 0:
|
||||||
|
logger.info("No issues found.")
|
||||||
|
else:
|
||||||
|
for msg in messages:
|
||||||
|
if type(msg) == SanityError:
|
||||||
|
logger.error(str(msg))
|
||||||
|
elif type(msg) == SanityWarning:
|
||||||
|
logger.warning(str(msg))
|
||||||
|
else:
|
||||||
|
logger.info((str(msg)))
|
@ -2,6 +2,7 @@ import hashlib
|
|||||||
import os
|
import os
|
||||||
|
|
||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
|
from tqdm import tqdm
|
||||||
|
|
||||||
from documents.models import Document
|
from documents.models import Document
|
||||||
|
|
||||||
@ -38,7 +39,7 @@ class SanityFailedError(Exception):
|
|||||||
f"{message_string}\n\n===============\n\n")
|
f"{message_string}\n\n===============\n\n")
|
||||||
|
|
||||||
|
|
||||||
def check_sanity():
|
def check_sanity(progress=False):
|
||||||
messages = []
|
messages = []
|
||||||
|
|
||||||
present_files = []
|
present_files = []
|
||||||
@ -50,7 +51,12 @@ def check_sanity():
|
|||||||
if lockfile in present_files:
|
if lockfile in present_files:
|
||||||
present_files.remove(lockfile)
|
present_files.remove(lockfile)
|
||||||
|
|
||||||
for doc in Document.objects.all():
|
if progress:
|
||||||
|
docs = tqdm(Document.objects.all())
|
||||||
|
else:
|
||||||
|
docs = Document.objects.all()
|
||||||
|
|
||||||
|
for doc in docs:
|
||||||
# Check sanity of the thumbnail
|
# Check sanity of the thumbnail
|
||||||
if not os.path.isfile(doc.thumbnail_path):
|
if not os.path.isfile(doc.thumbnail_path):
|
||||||
messages.append(SanityError(
|
messages.append(SanityError(
|
||||||
|
@ -65,6 +65,7 @@ class TestArchiver(DirectoriesMixin, TestCase):
|
|||||||
self.assertEqual(doc1.archive_filename, "document.pdf")
|
self.assertEqual(doc1.archive_filename, "document.pdf")
|
||||||
self.assertEqual(doc2.archive_filename, "document_01.pdf")
|
self.assertEqual(doc2.archive_filename, "document_01.pdf")
|
||||||
|
|
||||||
|
|
||||||
class TestDecryptDocuments(TestCase):
|
class TestDecryptDocuments(TestCase):
|
||||||
|
|
||||||
@override_settings(
|
@override_settings(
|
||||||
@ -154,3 +155,37 @@ class TestCreateClassifier(TestCase):
|
|||||||
call_command("document_create_classifier")
|
call_command("document_create_classifier")
|
||||||
|
|
||||||
m.assert_called_once()
|
m.assert_called_once()
|
||||||
|
|
||||||
|
|
||||||
|
class TestSanityChecker(DirectoriesMixin, TestCase):
|
||||||
|
|
||||||
|
def test_no_errors(self):
|
||||||
|
with self.assertLogs() as capture:
|
||||||
|
call_command("document_sanity_checker")
|
||||||
|
|
||||||
|
self.assertEqual(len(capture.output), 1)
|
||||||
|
self.assertIn("No issues found.", capture.output[0])
|
||||||
|
|
||||||
|
@mock.patch("documents.management.commands.document_sanity_checker.logger.warning")
|
||||||
|
@mock.patch("documents.management.commands.document_sanity_checker.logger.error")
|
||||||
|
def test_warnings(self, error, warning):
|
||||||
|
doc = Document.objects.create(title="test", filename="test.pdf", checksum="d41d8cd98f00b204e9800998ecf8427e")
|
||||||
|
Path(doc.source_path).touch()
|
||||||
|
Path(doc.thumbnail_path).touch()
|
||||||
|
|
||||||
|
call_command("document_sanity_checker")
|
||||||
|
|
||||||
|
error.assert_not_called()
|
||||||
|
warning.assert_called()
|
||||||
|
|
||||||
|
@mock.patch("documents.management.commands.document_sanity_checker.logger.warning")
|
||||||
|
@mock.patch("documents.management.commands.document_sanity_checker.logger.error")
|
||||||
|
def test_errors(self, error, warning):
|
||||||
|
doc = Document.objects.create(title="test", content="test", filename="test.pdf", checksum="abc")
|
||||||
|
Path(doc.source_path).touch()
|
||||||
|
Path(doc.thumbnail_path).touch()
|
||||||
|
|
||||||
|
call_command("document_sanity_checker")
|
||||||
|
|
||||||
|
warning.assert_not_called()
|
||||||
|
error.assert_called()
|
||||||
|
Loading…
x
Reference in New Issue
Block a user