Merge pull request #1648 from paperless-ngx/feature-use-celery

Feature: Transition to celery for background tasks
This commit is contained in:
shamoon
2022-10-10 00:07:55 -07:00
committed by GitHub
34 changed files with 959 additions and 297 deletions

View File

@@ -10,6 +10,8 @@ import zipfile
from unittest import mock
from unittest.mock import MagicMock
import celery
try:
import zoneinfo
except ImportError:
@@ -20,7 +22,6 @@ from django.conf import settings
from django.contrib.auth.models import User
from django.test import override_settings
from django.utils import timezone
from django_q.models import Task
from documents import bulk_edit
from documents import index
from documents.models import Correspondent
@@ -31,7 +32,7 @@ from documents.models import PaperlessTask
from documents.models import SavedView
from documents.models import StoragePath
from documents.models import Tag
from documents.models import UiSettings
from django_celery_results.models import TaskResult
from documents.models import Comment
from documents.models import StoragePath
from documents.tests.utils import DirectoriesMixin
@@ -790,7 +791,7 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
self.assertEqual(response.status_code, 200)
self.assertEqual(response.data["documents_inbox"], None)
@mock.patch("documents.views.async_task")
@mock.patch("documents.views.consume_file.delay")
def test_upload(self, m):
with open(
@@ -813,7 +814,7 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
self.assertIsNone(kwargs["override_document_type_id"])
self.assertIsNone(kwargs["override_tag_ids"])
@mock.patch("documents.views.async_task")
@mock.patch("documents.views.consume_file.delay")
def test_upload_empty_metadata(self, m):
with open(
@@ -836,7 +837,7 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
self.assertIsNone(kwargs["override_document_type_id"])
self.assertIsNone(kwargs["override_tag_ids"])
@mock.patch("documents.views.async_task")
@mock.patch("documents.views.consume_file.delay")
def test_upload_invalid_form(self, m):
with open(
@@ -850,7 +851,7 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
self.assertEqual(response.status_code, 400)
m.assert_not_called()
@mock.patch("documents.views.async_task")
@mock.patch("documents.views.consume_file.delay")
def test_upload_invalid_file(self, m):
with open(
@@ -864,7 +865,7 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
self.assertEqual(response.status_code, 400)
m.assert_not_called()
@mock.patch("documents.views.async_task")
@mock.patch("documents.views.consume_file.delay")
def test_upload_with_title(self, async_task):
with open(
os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"),
@@ -882,7 +883,7 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
self.assertEqual(kwargs["override_title"], "my custom title")
@mock.patch("documents.views.async_task")
@mock.patch("documents.views.consume_file.delay")
def test_upload_with_correspondent(self, async_task):
c = Correspondent.objects.create(name="test-corres")
with open(
@@ -901,7 +902,7 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
self.assertEqual(kwargs["override_correspondent_id"], c.id)
@mock.patch("documents.views.async_task")
@mock.patch("documents.views.consume_file.delay")
def test_upload_with_invalid_correspondent(self, async_task):
with open(
os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"),
@@ -915,7 +916,7 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
async_task.assert_not_called()
@mock.patch("documents.views.async_task")
@mock.patch("documents.views.consume_file.delay")
def test_upload_with_document_type(self, async_task):
dt = DocumentType.objects.create(name="invoice")
with open(
@@ -934,7 +935,7 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
self.assertEqual(kwargs["override_document_type_id"], dt.id)
@mock.patch("documents.views.async_task")
@mock.patch("documents.views.consume_file.delay")
def test_upload_with_invalid_document_type(self, async_task):
with open(
os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"),
@@ -948,7 +949,7 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
async_task.assert_not_called()
@mock.patch("documents.views.async_task")
@mock.patch("documents.views.consume_file.delay")
def test_upload_with_tags(self, async_task):
t1 = Tag.objects.create(name="tag1")
t2 = Tag.objects.create(name="tag2")
@@ -968,7 +969,7 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
self.assertCountEqual(kwargs["override_tag_ids"], [t1.id, t2.id])
@mock.patch("documents.views.async_task")
@mock.patch("documents.views.consume_file.delay")
def test_upload_with_invalid_tags(self, async_task):
t1 = Tag.objects.create(name="tag1")
t2 = Tag.objects.create(name="tag2")
@@ -984,7 +985,7 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
async_task.assert_not_called()
@mock.patch("documents.views.async_task")
@mock.patch("documents.views.consume_file.delay")
def test_upload_with_created(self, async_task):
created = datetime.datetime(
2022,
@@ -1619,7 +1620,7 @@ class TestBulkEdit(DirectoriesMixin, APITestCase):
user = User.objects.create_superuser(username="temp_admin")
self.client.force_authenticate(user=user)
patcher = mock.patch("documents.bulk_edit.async_task")
patcher = mock.patch("documents.bulk_edit.bulk_update_documents.delay")
self.async_task = patcher.start()
self.addCleanup(patcher.stop)
self.c1 = Correspondent.objects.create(name="c1")
@@ -2738,7 +2739,7 @@ class TestApiStoragePaths(DirectoriesMixin, APITestCase):
class TestTasks(APITestCase):
ENDPOINT = "/api/tasks/"
ENDPOINT_ACKOWLEDGE = "/api/acknowledge_tasks/"
ENDPOINT_ACKNOWLEDGE = "/api/acknowledge_tasks/"
def setUp(self):
super().setUp()
@@ -2747,16 +2748,27 @@ class TestTasks(APITestCase):
self.client.force_authenticate(user=self.user)
def test_get_tasks(self):
task_id1 = str(uuid.uuid4())
PaperlessTask.objects.create(task_id=task_id1)
Task.objects.create(
id=task_id1,
started=timezone.now() - datetime.timedelta(seconds=30),
stopped=timezone.now(),
func="documents.tasks.consume_file",
"""
GIVEN:
- Attempted celery tasks
WHEN:
- API call is made to get tasks
THEN:
- Attempting and pending tasks are serialized and provided
"""
result1 = TaskResult.objects.create(
task_id=str(uuid.uuid4()),
task_name="documents.tasks.some_great_task",
status=celery.states.PENDING,
)
task_id2 = str(uuid.uuid4())
PaperlessTask.objects.create(task_id=task_id2)
PaperlessTask.objects.create(attempted_task=result1)
result2 = TaskResult.objects.create(
task_id=str(uuid.uuid4()),
task_name="documents.tasks.some_awesome_task",
status=celery.states.STARTED,
)
PaperlessTask.objects.create(attempted_task=result2)
response = self.client.get(self.ENDPOINT)
@@ -2764,25 +2776,155 @@ class TestTasks(APITestCase):
self.assertEqual(len(response.data), 2)
returned_task1 = response.data[1]
returned_task2 = response.data[0]
self.assertEqual(returned_task1["task_id"], task_id1)
self.assertEqual(returned_task1["status"], "complete")
self.assertIsNotNone(returned_task1["attempted_task"])
self.assertEqual(returned_task2["task_id"], task_id2)
self.assertEqual(returned_task2["status"], "queued")
self.assertIsNone(returned_task2["attempted_task"])
self.assertEqual(returned_task1["task_id"], result1.task_id)
self.assertEqual(returned_task1["status"], celery.states.PENDING)
self.assertEqual(returned_task1["task_name"], result1.task_name)
self.assertEqual(returned_task2["task_id"], result2.task_id)
self.assertEqual(returned_task2["status"], celery.states.STARTED)
self.assertEqual(returned_task2["task_name"], result2.task_name)
def test_acknowledge_tasks(self):
task_id = str(uuid.uuid4())
task = PaperlessTask.objects.create(task_id=task_id)
"""
GIVEN:
- Attempted celery tasks
WHEN:
- API call is made to get mark task as acknowledged
THEN:
- Task is marked as acknowledged
"""
result1 = TaskResult.objects.create(
task_id=str(uuid.uuid4()),
task_name="documents.tasks.some_task",
status=celery.states.PENDING,
)
task = PaperlessTask.objects.create(attempted_task=result1)
response = self.client.get(self.ENDPOINT)
self.assertEqual(len(response.data), 1)
response = self.client.post(
self.ENDPOINT_ACKOWLEDGE,
self.ENDPOINT_ACKNOWLEDGE,
{"tasks": [task.id]},
)
self.assertEqual(response.status_code, 200)
response = self.client.get(self.ENDPOINT)
self.assertEqual(len(response.data), 0)
def test_task_result_no_error(self):
"""
GIVEN:
- A celery task completed without error
WHEN:
- API call is made to get tasks
THEN:
- The returned data includes the task result
"""
result1 = TaskResult.objects.create(
task_id=str(uuid.uuid4()),
task_name="documents.tasks.some_task",
status=celery.states.SUCCESS,
result="Success. New document id 1 created",
)
_ = PaperlessTask.objects.create(attempted_task=result1)
response = self.client.get(self.ENDPOINT)
self.assertEqual(response.status_code, 200)
self.assertEqual(len(response.data), 1)
returned_data = response.data[0]
self.assertEqual(returned_data["result"], "Success. New document id 1 created")
self.assertEqual(returned_data["related_document"], "1")
def test_task_result_with_error(self):
"""
GIVEN:
- A celery task completed with an exception
WHEN:
- API call is made to get tasks
THEN:
- The returned result is the exception info
"""
result1 = TaskResult.objects.create(
task_id=str(uuid.uuid4()),
task_name="documents.tasks.some_task",
status=celery.states.SUCCESS,
result={
"exc_type": "ConsumerError",
"exc_message": ["test.pdf: Not consuming test.pdf: It is a duplicate."],
"exc_module": "documents.consumer",
},
)
_ = PaperlessTask.objects.create(attempted_task=result1)
response = self.client.get(self.ENDPOINT)
self.assertEqual(response.status_code, 200)
self.assertEqual(len(response.data), 1)
returned_data = response.data[0]
self.assertEqual(
returned_data["result"],
"test.pdf: Not consuming test.pdf: It is a duplicate.",
)
def test_task_name_webui(self):
"""
GIVEN:
- Attempted celery task
- Task was created through the webui
WHEN:
- API call is made to get tasks
THEN:
- Returned data include the filename
"""
result1 = TaskResult.objects.create(
task_id=str(uuid.uuid4()),
task_name="documents.tasks.some_task",
status=celery.states.SUCCESS,
task_args="\"('/tmp/paperless/paperless-upload-5iq7skzc',)\"",
task_kwargs="\"{'override_filename': 'test.pdf', 'override_title': None, 'override_correspondent_id': None, 'override_document_type_id': None, 'override_tag_ids': None, 'task_id': '466e8fe7-7193-4698-9fff-72f0340e2082', 'override_created': None}\"",
)
_ = PaperlessTask.objects.create(attempted_task=result1)
response = self.client.get(self.ENDPOINT)
self.assertEqual(response.status_code, 200)
self.assertEqual(len(response.data), 1)
returned_data = response.data[0]
self.assertEqual(returned_data["name"], "test.pdf")
def test_task_name_consume_folder(self):
"""
GIVEN:
- Attempted celery task
- Task was created through the consume folder
WHEN:
- API call is made to get tasks
THEN:
- Returned data include the filename
"""
result1 = TaskResult.objects.create(
task_id=str(uuid.uuid4()),
task_name="documents.tasks.some_task",
status=celery.states.SUCCESS,
task_args="\"('/consume/anothertest.pdf',)\"",
task_kwargs="\"{'override_tag_ids': None}\"",
)
_ = PaperlessTask.objects.create(attempted_task=result1)
response = self.client.get(self.ENDPOINT)
self.assertEqual(response.status_code, 200)
self.assertEqual(len(response.data), 1)
returned_data = response.data[0]
self.assertEqual(returned_data["name"], "anothertest.pdf")

View File

@@ -43,7 +43,7 @@ class ConsumerMixin:
super().setUp()
self.t = None
patcher = mock.patch(
"documents.management.commands.document_consumer.async_task",
"documents.tasks.consume_file.delay",
)
self.task_mock = patcher.start()
self.addCleanup(patcher.stop)
@@ -76,7 +76,7 @@ class ConsumerMixin:
# A bogus async_task that will simply check the file for
# completeness and raise an exception otherwise.
def bogus_task(self, func, filename, **kwargs):
def bogus_task(self, filename, **kwargs):
eq = filecmp.cmp(filename, self.sample_file, shallow=False)
if not eq:
print("Consumed an INVALID file.")
@@ -115,7 +115,7 @@ class TestConsumer(DirectoriesMixin, ConsumerMixin, TransactionTestCase):
self.task_mock.assert_called_once()
args, kwargs = self.task_mock.call_args
self.assertEqual(args[1], f)
self.assertEqual(args[0], f)
def test_consume_file_invalid_ext(self):
self.t_start()
@@ -135,7 +135,7 @@ class TestConsumer(DirectoriesMixin, ConsumerMixin, TransactionTestCase):
self.task_mock.assert_called_once()
args, kwargs = self.task_mock.call_args
self.assertEqual(args[1], f)
self.assertEqual(args[0], f)
@mock.patch("documents.management.commands.document_consumer.logger.error")
def test_slow_write_pdf(self, error_logger):
@@ -155,7 +155,7 @@ class TestConsumer(DirectoriesMixin, ConsumerMixin, TransactionTestCase):
self.task_mock.assert_called_once()
args, kwargs = self.task_mock.call_args
self.assertEqual(args[1], fname)
self.assertEqual(args[0], fname)
@mock.patch("documents.management.commands.document_consumer.logger.error")
def test_slow_write_and_move(self, error_logger):
@@ -175,7 +175,7 @@ class TestConsumer(DirectoriesMixin, ConsumerMixin, TransactionTestCase):
self.task_mock.assert_called_once()
args, kwargs = self.task_mock.call_args
self.assertEqual(args[1], fname2)
self.assertEqual(args[0], fname2)
error_logger.assert_not_called()
@@ -193,7 +193,7 @@ class TestConsumer(DirectoriesMixin, ConsumerMixin, TransactionTestCase):
self.task_mock.assert_called_once()
args, kwargs = self.task_mock.call_args
self.assertEqual(args[1], fname)
self.assertEqual(args[0], fname)
# assert that we have an error logged with this invalid file.
error_logger.assert_called_once()
@@ -241,7 +241,7 @@ class TestConsumer(DirectoriesMixin, ConsumerMixin, TransactionTestCase):
self.assertEqual(2, self.task_mock.call_count)
fnames = [
os.path.basename(args[1]) for args, _ in self.task_mock.call_args_list
os.path.basename(args[0]) for args, _ in self.task_mock.call_args_list
]
self.assertCountEqual(fnames, ["my_file.pdf", "my_second_file.pdf"])
@@ -338,7 +338,7 @@ class TestConsumerTags(DirectoriesMixin, ConsumerMixin, TransactionTestCase):
tag_ids.append(Tag.objects.get(name=tag_names[1]).pk)
args, kwargs = self.task_mock.call_args
self.assertEqual(args[1], f)
self.assertEqual(args[0], f)
# assertCountEqual has a bad name, but test that the first
# sequence contains the same elements as second, regardless of