|
|
|
@@ -1,8 +1,17 @@
|
|
|
|
|
import os
|
|
|
|
|
import re
|
|
|
|
|
import shutil
|
|
|
|
|
import tempfile
|
|
|
|
|
from unittest import mock
|
|
|
|
|
from unittest.mock import MagicMock
|
|
|
|
|
|
|
|
|
|
from django.test import TestCase
|
|
|
|
|
from django.conf import settings
|
|
|
|
|
from django.db import DatabaseError
|
|
|
|
|
from django.test import TestCase, override_settings
|
|
|
|
|
|
|
|
|
|
from ..models import FileInfo, Tag
|
|
|
|
|
from ..consumer import Consumer, ConsumerError
|
|
|
|
|
from ..models import FileInfo, Tag, Correspondent, DocumentType, Document
|
|
|
|
|
from ..parsers import DocumentParser, ParseError
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class TestAttributes(TestCase):
|
|
|
|
@@ -394,3 +403,251 @@ class TestFieldPermutations(TestCase):
|
|
|
|
|
self.assertEqual(info.created.year, 2019)
|
|
|
|
|
self.assertEqual(info.created.month, 9)
|
|
|
|
|
self.assertEqual(info.created.day, 8)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class DummyParser(DocumentParser):
|
|
|
|
|
|
|
|
|
|
def get_thumbnail(self):
|
|
|
|
|
# not important during tests
|
|
|
|
|
raise NotImplementedError()
|
|
|
|
|
|
|
|
|
|
def __init__(self, path, logging_group, scratch_dir):
|
|
|
|
|
super(DummyParser, self).__init__(path, logging_group)
|
|
|
|
|
_, self.fake_thumb = tempfile.mkstemp(suffix=".png", dir=scratch_dir)
|
|
|
|
|
|
|
|
|
|
def get_optimised_thumbnail(self):
|
|
|
|
|
return self.fake_thumb
|
|
|
|
|
|
|
|
|
|
def get_text(self):
|
|
|
|
|
return "The Text"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class FaultyParser(DocumentParser):
|
|
|
|
|
|
|
|
|
|
def get_thumbnail(self):
|
|
|
|
|
# not important during tests
|
|
|
|
|
raise NotImplementedError()
|
|
|
|
|
|
|
|
|
|
def __init__(self, path, logging_group, scratch_dir):
|
|
|
|
|
super(FaultyParser, self).__init__(path, logging_group)
|
|
|
|
|
_, self.fake_thumb = tempfile.mkstemp(suffix=".png", dir=scratch_dir)
|
|
|
|
|
|
|
|
|
|
def get_optimised_thumbnail(self):
|
|
|
|
|
return self.fake_thumb
|
|
|
|
|
|
|
|
|
|
def get_text(self):
|
|
|
|
|
raise ParseError("Does not compute.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class TestConsumer(TestCase):
|
|
|
|
|
|
|
|
|
|
def make_dummy_parser(self, path, logging_group):
|
|
|
|
|
return DummyParser(path, logging_group, self.scratch_dir)
|
|
|
|
|
|
|
|
|
|
def make_faulty_parser(self, path, logging_group):
|
|
|
|
|
return FaultyParser(path, logging_group, self.scratch_dir)
|
|
|
|
|
|
|
|
|
|
def setUp(self):
|
|
|
|
|
self.scratch_dir = tempfile.mkdtemp()
|
|
|
|
|
self.media_dir = tempfile.mkdtemp()
|
|
|
|
|
|
|
|
|
|
override_settings(
|
|
|
|
|
SCRATCH_DIR=self.scratch_dir,
|
|
|
|
|
MEDIA_ROOT=self.media_dir,
|
|
|
|
|
ORIGINALS_DIR=os.path.join(self.media_dir, "documents", "originals"),
|
|
|
|
|
THUMBNAIL_DIR=os.path.join(self.media_dir, "documents", "thumbnails")
|
|
|
|
|
).enable()
|
|
|
|
|
|
|
|
|
|
patcher = mock.patch("documents.parsers.document_consumer_declaration.send")
|
|
|
|
|
m = patcher.start()
|
|
|
|
|
m.return_value = [(None, {
|
|
|
|
|
"parser": self.make_dummy_parser,
|
|
|
|
|
"test": lambda _: True,
|
|
|
|
|
"weight": 0
|
|
|
|
|
})]
|
|
|
|
|
|
|
|
|
|
self.addCleanup(patcher.stop)
|
|
|
|
|
|
|
|
|
|
self.consumer = Consumer()
|
|
|
|
|
|
|
|
|
|
def tearDown(self):
|
|
|
|
|
shutil.rmtree(self.scratch_dir, ignore_errors=True)
|
|
|
|
|
shutil.rmtree(self.media_dir, ignore_errors=True)
|
|
|
|
|
|
|
|
|
|
def get_test_file(self):
|
|
|
|
|
fd, f = tempfile.mkstemp(suffix=".pdf", dir=self.scratch_dir)
|
|
|
|
|
return f
|
|
|
|
|
|
|
|
|
|
def testNormalOperation(self):
|
|
|
|
|
|
|
|
|
|
filename = self.get_test_file()
|
|
|
|
|
document = self.consumer.try_consume_file(filename)
|
|
|
|
|
|
|
|
|
|
self.assertEqual(document.content, "The Text")
|
|
|
|
|
self.assertEqual(document.title, os.path.splitext(os.path.basename(filename))[0])
|
|
|
|
|
self.assertIsNone(document.correspondent)
|
|
|
|
|
self.assertIsNone(document.document_type)
|
|
|
|
|
self.assertEqual(document.filename, "0000001.pdf")
|
|
|
|
|
|
|
|
|
|
self.assertTrue(os.path.isfile(
|
|
|
|
|
document.source_path
|
|
|
|
|
))
|
|
|
|
|
|
|
|
|
|
self.assertTrue(os.path.isfile(
|
|
|
|
|
document.thumbnail_path
|
|
|
|
|
))
|
|
|
|
|
|
|
|
|
|
self.assertFalse(os.path.isfile(filename))
|
|
|
|
|
|
|
|
|
|
def testOverrideFilename(self):
|
|
|
|
|
filename = self.get_test_file()
|
|
|
|
|
overrideFilename = "My Bank - Statement for November.pdf"
|
|
|
|
|
|
|
|
|
|
document = self.consumer.try_consume_file(filename, original_filename=overrideFilename)
|
|
|
|
|
|
|
|
|
|
self.assertEqual(document.correspondent.name, "My Bank")
|
|
|
|
|
self.assertEqual(document.title, "Statement for November")
|
|
|
|
|
|
|
|
|
|
def testOverrideTitle(self):
|
|
|
|
|
|
|
|
|
|
document = self.consumer.try_consume_file(self.get_test_file(), force_title="Override Title")
|
|
|
|
|
self.assertEqual(document.title, "Override Title")
|
|
|
|
|
|
|
|
|
|
def testOverrideCorrespondent(self):
|
|
|
|
|
c = Correspondent.objects.create(name="test")
|
|
|
|
|
|
|
|
|
|
document = self.consumer.try_consume_file(self.get_test_file(), force_correspondent_id=c.pk)
|
|
|
|
|
self.assertEqual(document.correspondent.id, c.id)
|
|
|
|
|
|
|
|
|
|
def testOverrideDocumentType(self):
|
|
|
|
|
dt = DocumentType.objects.create(name="test")
|
|
|
|
|
|
|
|
|
|
document = self.consumer.try_consume_file(self.get_test_file(), force_document_type_id=dt.pk)
|
|
|
|
|
self.assertEqual(document.document_type.id, dt.id)
|
|
|
|
|
|
|
|
|
|
def testOverrideTags(self):
|
|
|
|
|
t1 = Tag.objects.create(name="t1")
|
|
|
|
|
t2 = Tag.objects.create(name="t2")
|
|
|
|
|
t3 = Tag.objects.create(name="t3")
|
|
|
|
|
document = self.consumer.try_consume_file(self.get_test_file(), force_tag_ids=[t1.id, t3.id])
|
|
|
|
|
|
|
|
|
|
self.assertIn(t1, document.tags.all())
|
|
|
|
|
self.assertNotIn(t2, document.tags.all())
|
|
|
|
|
self.assertIn(t3, document.tags.all())
|
|
|
|
|
|
|
|
|
|
def testNotAFile(self):
|
|
|
|
|
try:
|
|
|
|
|
self.consumer.try_consume_file("non-existing-file")
|
|
|
|
|
except ConsumerError as e:
|
|
|
|
|
self.assertTrue(str(e).endswith('It is not a file'))
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
self.fail("Should throw exception")
|
|
|
|
|
|
|
|
|
|
@override_settings(CONSUMPTION_DIR=None)
|
|
|
|
|
def testConsumptionDirUnset(self):
|
|
|
|
|
try:
|
|
|
|
|
self.consumer.try_consume_file(self.get_test_file())
|
|
|
|
|
except ConsumerError as e:
|
|
|
|
|
self.assertEqual(str(e), "The CONSUMPTION_DIR settings variable does not appear to be set.")
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
self.fail("Should throw exception")
|
|
|
|
|
|
|
|
|
|
@override_settings(CONSUMPTION_DIR="asd")
|
|
|
|
|
def testNoConsumptionDir(self):
|
|
|
|
|
try:
|
|
|
|
|
self.consumer.try_consume_file(self.get_test_file())
|
|
|
|
|
except ConsumerError as e:
|
|
|
|
|
self.assertEqual(str(e), "Consumption directory asd does not exist")
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
self.fail("Should throw exception")
|
|
|
|
|
|
|
|
|
|
def testDuplicates(self):
|
|
|
|
|
self.consumer.try_consume_file(self.get_test_file())
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
self.consumer.try_consume_file(self.get_test_file())
|
|
|
|
|
except ConsumerError as e:
|
|
|
|
|
self.assertTrue(str(e).endswith("It is a duplicate."))
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
self.fail("Should throw exception")
|
|
|
|
|
|
|
|
|
|
@mock.patch("documents.parsers.document_consumer_declaration.send")
|
|
|
|
|
def testNoParsers(self, m):
|
|
|
|
|
m.return_value = []
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
self.consumer.try_consume_file(self.get_test_file())
|
|
|
|
|
except ConsumerError as e:
|
|
|
|
|
self.assertTrue(str(e).startswith("No parsers abvailable"))
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
self.fail("Should throw exception")
|
|
|
|
|
|
|
|
|
|
@mock.patch("documents.parsers.document_consumer_declaration.send")
|
|
|
|
|
def testFaultyParser(self, m):
|
|
|
|
|
m.return_value = [(None, {
|
|
|
|
|
"parser": self.make_faulty_parser,
|
|
|
|
|
"test": lambda _: True,
|
|
|
|
|
"weight": 0
|
|
|
|
|
})]
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
self.consumer.try_consume_file(self.get_test_file())
|
|
|
|
|
except ConsumerError as e:
|
|
|
|
|
self.assertEqual(str(e), "Does not compute.")
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
self.fail("Should throw exception.")
|
|
|
|
|
|
|
|
|
|
@mock.patch("documents.consumer.Consumer._write")
|
|
|
|
|
def testPostSaveError(self, m):
|
|
|
|
|
filename = self.get_test_file()
|
|
|
|
|
m.side_effect = OSError("NO.")
|
|
|
|
|
try:
|
|
|
|
|
self.consumer.try_consume_file(filename)
|
|
|
|
|
except ConsumerError as e:
|
|
|
|
|
self.assertEqual(str(e), "NO.")
|
|
|
|
|
else:
|
|
|
|
|
self.fail("Should raise exception")
|
|
|
|
|
|
|
|
|
|
# file not deleted
|
|
|
|
|
self.assertTrue(os.path.isfile(filename))
|
|
|
|
|
|
|
|
|
|
# Database empty
|
|
|
|
|
self.assertEqual(len(Document.objects.all()), 0)
|
|
|
|
|
|
|
|
|
|
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{title}")
|
|
|
|
|
def testFilenameHandling(self):
|
|
|
|
|
filename = self.get_test_file()
|
|
|
|
|
|
|
|
|
|
document = self.consumer.try_consume_file(filename, original_filename="Bank - Test.pdf", force_title="new docs")
|
|
|
|
|
|
|
|
|
|
print(document.source_path)
|
|
|
|
|
print("===")
|
|
|
|
|
|
|
|
|
|
self.assertEqual(document.title, "new docs")
|
|
|
|
|
self.assertEqual(document.correspondent.name, "Bank")
|
|
|
|
|
self.assertEqual(document.filename, "bank/new-docs-0000001.pdf")
|
|
|
|
|
|
|
|
|
|
@mock.patch("documents.consumer.DocumentClassifier")
|
|
|
|
|
def testClassifyDocument(self, m):
|
|
|
|
|
correspondent = Correspondent.objects.create(name="test")
|
|
|
|
|
dtype = DocumentType.objects.create(name="test")
|
|
|
|
|
t1 = Tag.objects.create(name="t1")
|
|
|
|
|
t2 = Tag.objects.create(name="t2")
|
|
|
|
|
|
|
|
|
|
m.return_value = MagicMock()
|
|
|
|
|
m.return_value.predict_correspondent.return_value = correspondent.pk
|
|
|
|
|
m.return_value.predict_document_type.return_value = dtype.pk
|
|
|
|
|
m.return_value.predict_tags.return_value = [t1.pk]
|
|
|
|
|
|
|
|
|
|
document = self.consumer.try_consume_file(self.get_test_file())
|
|
|
|
|
|
|
|
|
|
self.assertEqual(document.correspondent, correspondent)
|
|
|
|
|
self.assertEqual(document.document_type, dtype)
|
|
|
|
|
self.assertIn(t1, document.tags.all())
|
|
|
|
|
self.assertNotIn(t2, document.tags.all())
|
|
|
|
|