2024-10-30 01:04:14 -07:00

261 lines
8.7 KiB
Python

import os
import shutil
import uuid
from datetime import timedelta
from pathlib import Path
from unittest import mock
from django.conf import settings
from django.test import TestCase
from django.test import override_settings
from django.utils import timezone
from documents import tasks
from documents.data_models import ConsumableDocument
from documents.data_models import DocumentSource
from documents.models import Correspondent
from documents.models import Document
from documents.models import DocumentType
from documents.models import PaperlessTask
from documents.models import Tag
from documents.sanity_checker import SanityCheckFailedException
from documents.sanity_checker import SanityCheckMessages
from documents.signals.handlers import before_task_publish_handler
from documents.signals.handlers import task_failure_handler
from documents.tests.test_classifier import dummy_preprocess
from documents.tests.utils import DirectoriesMixin
from documents.tests.utils import DummyProgressManager
from documents.tests.utils import FileSystemAssertsMixin
from documents.tests.utils import SampleDirMixin
class TestIndexReindex(DirectoriesMixin, TestCase):
def test_index_reindex(self):
Document.objects.create(
title="test",
content="my document",
checksum="wow",
added=timezone.now(),
created=timezone.now(),
modified=timezone.now(),
)
tasks.index_reindex()
def test_index_optimize(self):
Document.objects.create(
title="test",
content="my document",
checksum="wow",
added=timezone.now(),
created=timezone.now(),
modified=timezone.now(),
)
tasks.index_optimize()
class TestClassifier(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
@mock.patch("documents.tasks.load_classifier")
def test_train_classifier_no_auto_matching(self, load_classifier):
tasks.train_classifier()
load_classifier.assert_not_called()
@mock.patch("documents.tasks.load_classifier")
def test_train_classifier_with_auto_tag(self, load_classifier):
load_classifier.return_value = None
Tag.objects.create(matching_algorithm=Tag.MATCH_AUTO, name="test")
tasks.train_classifier()
load_classifier.assert_called_once()
self.assertIsNotFile(settings.MODEL_FILE)
@mock.patch("documents.tasks.load_classifier")
def test_train_classifier_with_auto_type(self, load_classifier):
load_classifier.return_value = None
DocumentType.objects.create(matching_algorithm=Tag.MATCH_AUTO, name="test")
tasks.train_classifier()
load_classifier.assert_called_once()
self.assertIsNotFile(settings.MODEL_FILE)
@mock.patch("documents.tasks.load_classifier")
def test_train_classifier_with_auto_correspondent(self, load_classifier):
load_classifier.return_value = None
Correspondent.objects.create(matching_algorithm=Tag.MATCH_AUTO, name="test")
tasks.train_classifier()
load_classifier.assert_called_once()
self.assertIsNotFile(settings.MODEL_FILE)
def test_train_classifier(self):
c = Correspondent.objects.create(matching_algorithm=Tag.MATCH_AUTO, name="test")
doc = Document.objects.create(correspondent=c, content="test", title="test")
self.assertIsNotFile(settings.MODEL_FILE)
with mock.patch(
"documents.classifier.DocumentClassifier.preprocess_content",
) as pre_proc_mock:
pre_proc_mock.side_effect = dummy_preprocess
tasks.train_classifier()
self.assertIsFile(settings.MODEL_FILE)
mtime = os.stat(settings.MODEL_FILE).st_mtime
tasks.train_classifier()
self.assertIsFile(settings.MODEL_FILE)
mtime2 = os.stat(settings.MODEL_FILE).st_mtime
self.assertEqual(mtime, mtime2)
doc.content = "test2"
doc.save()
tasks.train_classifier()
self.assertIsFile(settings.MODEL_FILE)
mtime3 = os.stat(settings.MODEL_FILE).st_mtime
self.assertNotEqual(mtime2, mtime3)
class TestSanityCheck(DirectoriesMixin, TestCase):
@mock.patch("documents.tasks.sanity_checker.check_sanity")
def test_sanity_check_success(self, m):
m.return_value = SanityCheckMessages()
self.assertEqual(tasks.sanity_check(), "No issues detected.")
m.assert_called_once()
@mock.patch("documents.tasks.sanity_checker.check_sanity")
def test_sanity_check_error(self, m):
messages = SanityCheckMessages()
messages.error(None, "Some error")
m.return_value = messages
self.assertRaises(SanityCheckFailedException, tasks.sanity_check)
m.assert_called_once()
@mock.patch("documents.tasks.sanity_checker.check_sanity")
def test_sanity_check_warning(self, m):
messages = SanityCheckMessages()
messages.warning(None, "Some warning")
m.return_value = messages
self.assertEqual(
tasks.sanity_check(),
"Sanity check exited with warnings. See log.",
)
m.assert_called_once()
@mock.patch("documents.tasks.sanity_checker.check_sanity")
def test_sanity_check_info(self, m):
messages = SanityCheckMessages()
messages.info(None, "Some info")
m.return_value = messages
self.assertEqual(
tasks.sanity_check(),
"Sanity check exited with infos. See log.",
)
m.assert_called_once()
class TestBulkUpdate(DirectoriesMixin, TestCase):
def test_bulk_update_documents(self):
doc1 = Document.objects.create(
title="test",
content="my document",
checksum="wow",
added=timezone.now(),
created=timezone.now(),
modified=timezone.now(),
)
tasks.bulk_update_documents([doc1.pk])
class TestEmptyTrashTask(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
"""
GIVEN:
- Existing document in trash
WHEN:
- Empty trash task is called without doc_ids
THEN:
- Document is only deleted if it has been in trash for more than delay (default 30 days)
"""
def test_empty_trash(self):
doc = Document.objects.create(
title="test",
content="my document",
checksum="wow",
added=timezone.now(),
created=timezone.now(),
modified=timezone.now(),
)
doc.delete()
self.assertEqual(Document.global_objects.count(), 1)
self.assertEqual(Document.objects.count(), 0)
tasks.empty_trash()
self.assertEqual(Document.global_objects.count(), 1)
doc.deleted_at = timezone.now() - timedelta(days=31)
doc.save()
tasks.empty_trash()
self.assertEqual(Document.global_objects.count(), 0)
class TestRetryConsumeTask(
DirectoriesMixin,
SampleDirMixin,
FileSystemAssertsMixin,
TestCase,
):
@override_settings(CONSUMPTION_FAILED_DIR=Path(__file__).parent / "samples")
@mock.patch("documents.consumer.run_subprocess")
def test_retry_consume(self, m):
test_file = self.SAMPLE_DIR / "corrupted.pdf"
temp_copy = self.dirs.scratch_dir / test_file.name
shutil.copy(test_file, temp_copy)
headers = {
"id": str(uuid.uuid4()),
"task": "documents.tasks.consume_file",
}
body = (
# args
(
ConsumableDocument(
source=DocumentSource.ConsumeFolder,
original_file=str(temp_copy),
),
None,
),
# kwargs
{},
# celery stuff
{"callbacks": None, "errbacks": None, "chain": None, "chord": None},
)
before_task_publish_handler(headers=headers, body=body)
with mock.patch("documents.tasks.ProgressManager", DummyProgressManager):
with self.assertRaises(Exception):
tasks.consume_file(
ConsumableDocument(
source=DocumentSource.ConsumeFolder,
original_file=temp_copy,
),
)
task_failure_handler(
task_id=headers["id"],
exception="Example failure",
)
task = PaperlessTask.objects.first()
# Ensure the file is moved to the failed dir
self.assertIsFile(settings.CONSUMPTION_FAILED_DIR / task.task_file_name)
tasks.retry_failed_file(task_id=task.task_id)
m.assert_called_once()
args, _ = m.call_args
command = args[0]
self.assertEqual(command[0], "qpdf")
self.assertEqual(command[1], "--replace-input")