mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-04-27 10:59:27 -05:00
273 lines
9.1 KiB
Python
273 lines
9.1 KiB
Python
import shutil
|
|
from datetime import timedelta
|
|
from pathlib import Path
|
|
from unittest import mock
|
|
|
|
from django.conf import settings
|
|
from django.test import TestCase
|
|
from django.utils import timezone
|
|
|
|
from documents.tests.test_classifier import dummy_preprocess
|
|
from documents.tests.utils import DirectoriesMixin
|
|
from documents.tests.utils import FileSystemAssertsMixin
|
|
from paperless import tasks
|
|
from paperless.models import Correspondent
|
|
from paperless.models import Document
|
|
from paperless.models import DocumentType
|
|
from paperless.models import Tag
|
|
from paperless.sanity_checker import SanityCheckFailedException
|
|
from paperless.sanity_checker import SanityCheckMessages
|
|
|
|
|
|
class TestIndexReindex(DirectoriesMixin, TestCase):
|
|
def test_index_reindex(self):
|
|
Document.objects.create(
|
|
title="test",
|
|
content="my document",
|
|
checksum="wow",
|
|
added=timezone.now(),
|
|
created=timezone.now(),
|
|
modified=timezone.now(),
|
|
)
|
|
|
|
tasks.index_reindex()
|
|
|
|
def test_index_optimize(self):
|
|
Document.objects.create(
|
|
title="test",
|
|
content="my document",
|
|
checksum="wow",
|
|
added=timezone.now(),
|
|
created=timezone.now(),
|
|
modified=timezone.now(),
|
|
)
|
|
|
|
tasks.index_optimize()
|
|
|
|
|
|
class TestClassifier(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
|
@mock.patch("documents.tasks.load_classifier")
|
|
def test_train_classifier_no_auto_matching(self, load_classifier):
|
|
tasks.train_classifier()
|
|
load_classifier.assert_not_called()
|
|
|
|
@mock.patch("documents.tasks.load_classifier")
|
|
def test_train_classifier_with_auto_tag(self, load_classifier):
|
|
load_classifier.return_value = None
|
|
Tag.objects.create(matching_algorithm=Tag.MATCH_AUTO, name="test")
|
|
tasks.train_classifier()
|
|
load_classifier.assert_called_once()
|
|
self.assertIsNotFile(settings.MODEL_FILE)
|
|
|
|
@mock.patch("documents.tasks.load_classifier")
|
|
def test_train_classifier_with_auto_type(self, load_classifier):
|
|
load_classifier.return_value = None
|
|
DocumentType.objects.create(matching_algorithm=Tag.MATCH_AUTO, name="test")
|
|
tasks.train_classifier()
|
|
load_classifier.assert_called_once()
|
|
self.assertIsNotFile(settings.MODEL_FILE)
|
|
|
|
@mock.patch("documents.tasks.load_classifier")
|
|
def test_train_classifier_with_auto_correspondent(self, load_classifier):
|
|
load_classifier.return_value = None
|
|
Correspondent.objects.create(matching_algorithm=Tag.MATCH_AUTO, name="test")
|
|
tasks.train_classifier()
|
|
load_classifier.assert_called_once()
|
|
self.assertIsNotFile(settings.MODEL_FILE)
|
|
|
|
def test_train_classifier(self):
|
|
c = Correspondent.objects.create(matching_algorithm=Tag.MATCH_AUTO, name="test")
|
|
doc = Document.objects.create(correspondent=c, content="test", title="test")
|
|
self.assertIsNotFile(settings.MODEL_FILE)
|
|
|
|
with mock.patch(
|
|
"documents.classifier.DocumentClassifier.preprocess_content",
|
|
) as pre_proc_mock:
|
|
pre_proc_mock.side_effect = dummy_preprocess
|
|
|
|
tasks.train_classifier()
|
|
self.assertIsFile(settings.MODEL_FILE)
|
|
mtime = Path(settings.MODEL_FILE).stat().st_mtime
|
|
|
|
tasks.train_classifier()
|
|
self.assertIsFile(settings.MODEL_FILE)
|
|
mtime2 = Path(settings.MODEL_FILE).stat().st_mtime
|
|
self.assertEqual(mtime, mtime2)
|
|
|
|
doc.content = "test2"
|
|
doc.save()
|
|
tasks.train_classifier()
|
|
self.assertIsFile(settings.MODEL_FILE)
|
|
mtime3 = Path(settings.MODEL_FILE).stat().st_mtime
|
|
self.assertNotEqual(mtime2, mtime3)
|
|
|
|
|
|
class TestSanityCheck(DirectoriesMixin, TestCase):
|
|
@mock.patch("documents.tasks.sanity_checker.check_sanity")
|
|
def test_sanity_check_success(self, m):
|
|
m.return_value = SanityCheckMessages()
|
|
self.assertEqual(tasks.sanity_check(), "No issues detected.")
|
|
m.assert_called_once()
|
|
|
|
@mock.patch("documents.tasks.sanity_checker.check_sanity")
|
|
def test_sanity_check_error(self, m):
|
|
messages = SanityCheckMessages()
|
|
messages.error(None, "Some error")
|
|
m.return_value = messages
|
|
self.assertRaises(SanityCheckFailedException, tasks.sanity_check)
|
|
m.assert_called_once()
|
|
|
|
@mock.patch("documents.tasks.sanity_checker.check_sanity")
|
|
def test_sanity_check_error_no_raise(self, m):
|
|
messages = SanityCheckMessages()
|
|
messages.error(None, "Some error")
|
|
m.return_value = messages
|
|
# No exception should be raised
|
|
result = tasks.sanity_check(raise_on_error=False)
|
|
self.assertEqual(
|
|
result,
|
|
"Sanity check exited with errors. See log.",
|
|
)
|
|
m.assert_called_once()
|
|
|
|
@mock.patch("documents.tasks.sanity_checker.check_sanity")
|
|
def test_sanity_check_warning(self, m):
|
|
messages = SanityCheckMessages()
|
|
messages.warning(None, "Some warning")
|
|
m.return_value = messages
|
|
self.assertEqual(
|
|
tasks.sanity_check(),
|
|
"Sanity check exited with warnings. See log.",
|
|
)
|
|
m.assert_called_once()
|
|
|
|
@mock.patch("documents.tasks.sanity_checker.check_sanity")
|
|
def test_sanity_check_info(self, m):
|
|
messages = SanityCheckMessages()
|
|
messages.info(None, "Some info")
|
|
m.return_value = messages
|
|
self.assertEqual(
|
|
tasks.sanity_check(),
|
|
"Sanity check exited with infos. See log.",
|
|
)
|
|
m.assert_called_once()
|
|
|
|
|
|
class TestBulkUpdate(DirectoriesMixin, TestCase):
|
|
def test_bulk_update_documents(self):
|
|
doc1 = Document.objects.create(
|
|
title="test",
|
|
content="my document",
|
|
checksum="wow",
|
|
added=timezone.now(),
|
|
created=timezone.now(),
|
|
modified=timezone.now(),
|
|
)
|
|
|
|
tasks.bulk_update_documents([doc1.pk])
|
|
|
|
|
|
class TestEmptyTrashTask(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
|
"""
|
|
GIVEN:
|
|
- Existing document in trash
|
|
WHEN:
|
|
- Empty trash task is called without doc_ids
|
|
THEN:
|
|
- Document is only deleted if it has been in trash for more than delay (default 30 days)
|
|
"""
|
|
|
|
def test_empty_trash(self):
|
|
doc = Document.objects.create(
|
|
title="test",
|
|
content="my document",
|
|
checksum="wow",
|
|
added=timezone.now(),
|
|
created=timezone.now(),
|
|
modified=timezone.now(),
|
|
)
|
|
|
|
doc.delete()
|
|
self.assertEqual(Document.global_objects.count(), 1)
|
|
self.assertEqual(Document.objects.count(), 0)
|
|
tasks.empty_trash()
|
|
self.assertEqual(Document.global_objects.count(), 1)
|
|
|
|
doc.deleted_at = timezone.now() - timedelta(days=31)
|
|
doc.save()
|
|
|
|
tasks.empty_trash()
|
|
self.assertEqual(Document.global_objects.count(), 0)
|
|
|
|
|
|
class TestUpdateContent(DirectoriesMixin, TestCase):
|
|
def test_update_content_maybe_archive_file(self):
|
|
"""
|
|
GIVEN:
|
|
- Existing document with archive file
|
|
WHEN:
|
|
- Update content task is called
|
|
THEN:
|
|
- Document is reprocessed, content and checksum are updated
|
|
"""
|
|
sample1 = self.dirs.scratch_dir / "sample.pdf"
|
|
shutil.copy(
|
|
Path(__file__).parent
|
|
/ "samples"
|
|
/ "documents"
|
|
/ "originals"
|
|
/ "0000001.pdf",
|
|
sample1,
|
|
)
|
|
sample1_archive = self.dirs.archive_dir / "sample_archive.pdf"
|
|
shutil.copy(
|
|
Path(__file__).parent
|
|
/ "samples"
|
|
/ "documents"
|
|
/ "originals"
|
|
/ "0000001.pdf",
|
|
sample1_archive,
|
|
)
|
|
doc = Document.objects.create(
|
|
title="test",
|
|
content="my document",
|
|
checksum="wow",
|
|
archive_checksum="wow",
|
|
filename=sample1,
|
|
mime_type="application/pdf",
|
|
archive_filename=sample1_archive,
|
|
)
|
|
|
|
tasks.update_document_content_maybe_archive_file(doc.pk)
|
|
self.assertNotEqual(Document.objects.get(pk=doc.pk).content, "test")
|
|
self.assertNotEqual(Document.objects.get(pk=doc.pk).archive_checksum, "wow")
|
|
|
|
def test_update_content_maybe_archive_file_no_archive(self):
|
|
"""
|
|
GIVEN:
|
|
- Existing document without archive file
|
|
WHEN:
|
|
- Update content task is called
|
|
THEN:
|
|
- Document is reprocessed, content is updated
|
|
"""
|
|
sample1 = self.dirs.scratch_dir / "sample.pdf"
|
|
shutil.copy(
|
|
Path(__file__).parent
|
|
/ "samples"
|
|
/ "documents"
|
|
/ "originals"
|
|
/ "0000001.pdf",
|
|
sample1,
|
|
)
|
|
doc = Document.objects.create(
|
|
title="test",
|
|
content="my document",
|
|
checksum="wow",
|
|
filename=sample1,
|
|
mime_type="application/pdf",
|
|
)
|
|
|
|
tasks.update_document_content_maybe_archive_file(doc.pk)
|
|
self.assertNotEqual(Document.objects.get(pk=doc.pk).content, "test")
|