Merge branch 'dev' into feature-permissions

This commit is contained in:
Michael Shamoon
2023-01-01 17:51:41 -08:00
49 changed files with 2587 additions and 1172 deletions

View File

@@ -797,6 +797,8 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
@mock.patch("documents.views.consume_file.delay")
def test_upload(self, m):
m.return_value = celery.result.AsyncResult(id=str(uuid.uuid4()))
with open(
os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"),
"rb",
@@ -820,6 +822,8 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
@mock.patch("documents.views.consume_file.delay")
def test_upload_empty_metadata(self, m):
m.return_value = celery.result.AsyncResult(id=str(uuid.uuid4()))
with open(
os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"),
"rb",
@@ -843,6 +847,8 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
@mock.patch("documents.views.consume_file.delay")
def test_upload_invalid_form(self, m):
m.return_value = celery.result.AsyncResult(id=str(uuid.uuid4()))
with open(
os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"),
"rb",
@@ -857,6 +863,8 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
@mock.patch("documents.views.consume_file.delay")
def test_upload_invalid_file(self, m):
m.return_value = celery.result.AsyncResult(id=str(uuid.uuid4()))
with open(
os.path.join(os.path.dirname(__file__), "samples", "simple.zip"),
"rb",
@@ -870,6 +878,9 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
@mock.patch("documents.views.consume_file.delay")
def test_upload_with_title(self, async_task):
async_task.return_value = celery.result.AsyncResult(id=str(uuid.uuid4()))
with open(
os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"),
"rb",
@@ -888,6 +899,9 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
@mock.patch("documents.views.consume_file.delay")
def test_upload_with_correspondent(self, async_task):
async_task.return_value = celery.result.AsyncResult(id=str(uuid.uuid4()))
c = Correspondent.objects.create(name="test-corres")
with open(
os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"),
@@ -907,6 +921,9 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
@mock.patch("documents.views.consume_file.delay")
def test_upload_with_invalid_correspondent(self, async_task):
async_task.return_value = celery.result.AsyncResult(id=str(uuid.uuid4()))
with open(
os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"),
"rb",
@@ -921,6 +938,9 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
@mock.patch("documents.views.consume_file.delay")
def test_upload_with_document_type(self, async_task):
async_task.return_value = celery.result.AsyncResult(id=str(uuid.uuid4()))
dt = DocumentType.objects.create(name="invoice")
with open(
os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"),
@@ -940,6 +960,9 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
@mock.patch("documents.views.consume_file.delay")
def test_upload_with_invalid_document_type(self, async_task):
async_task.return_value = celery.result.AsyncResult(id=str(uuid.uuid4()))
with open(
os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"),
"rb",
@@ -954,6 +977,9 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
@mock.patch("documents.views.consume_file.delay")
def test_upload_with_tags(self, async_task):
async_task.return_value = celery.result.AsyncResult(id=str(uuid.uuid4()))
t1 = Tag.objects.create(name="tag1")
t2 = Tag.objects.create(name="tag2")
with open(
@@ -974,6 +1000,9 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
@mock.patch("documents.views.consume_file.delay")
def test_upload_with_invalid_tags(self, async_task):
async_task.return_value = celery.result.AsyncResult(id=str(uuid.uuid4()))
t1 = Tag.objects.create(name="tag1")
t2 = Tag.objects.create(name="tag2")
with open(
@@ -990,6 +1019,9 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
@mock.patch("documents.views.consume_file.delay")
def test_upload_with_created(self, async_task):
async_task.return_value = celery.result.AsyncResult(id=str(uuid.uuid4()))
created = datetime.datetime(
2022,
5,
@@ -3040,6 +3072,59 @@ class TestTasks(APITestCase):
self.assertEqual(returned_task2["status"], celery.states.PENDING)
self.assertEqual(returned_task2["task_file_name"], task2.task_file_name)
def test_get_single_task_status(self):
"""
GIVEN
- Query parameter for a valid task ID
WHEN:
- API call is made to get task status
THEN:
- Single task data is returned
"""
id1 = str(uuid.uuid4())
task1 = PaperlessTask.objects.create(
task_id=id1,
task_file_name="task_one.pdf",
)
_ = PaperlessTask.objects.create(
task_id=str(uuid.uuid4()),
task_file_name="task_two.pdf",
)
response = self.client.get(self.ENDPOINT + f"?task_id={id1}")
self.assertEqual(response.status_code, 200)
self.assertEqual(len(response.data), 1)
returned_task1 = response.data[0]
self.assertEqual(returned_task1["task_id"], task1.task_id)
def test_get_single_task_status_not_valid(self):
"""
GIVEN
- Query parameter for a non-existent task ID
WHEN:
- API call is made to get task status
THEN:
- No task data is returned
"""
task1 = PaperlessTask.objects.create(
task_id=str(uuid.uuid4()),
task_file_name="task_one.pdf",
)
_ = PaperlessTask.objects.create(
task_id=str(uuid.uuid4()),
task_file_name="task_two.pdf",
)
response = self.client.get(self.ENDPOINT + "?task_id=bad-task-id")
self.assertEqual(response.status_code, 200)
self.assertEqual(len(response.data), 0)
def test_acknowledge_tasks(self):
"""
GIVEN:

View File

@@ -660,7 +660,7 @@ class PostDocumentView(GenericAPIView):
task_id = str(uuid.uuid4())
consume_file.delay(
async_task = consume_file.delay(
temp_filename,
override_filename=doc_name,
override_title=title,
@@ -672,7 +672,7 @@ class PostDocumentView(GenericAPIView):
override_owner_id=owner_id,
)
return Response("OK")
return Response(async_task.id)
class SelectionDataView(GenericAPIView):
@@ -929,13 +929,18 @@ class TasksViewSet(ReadOnlyModelViewSet):
permission_classes = (IsAuthenticated,)
serializer_class = TasksViewSerializer
queryset = (
PaperlessTask.objects.filter(
acknowledged=False,
def get_queryset(self):
queryset = (
PaperlessTask.objects.filter(
acknowledged=False,
)
.order_by("date_created")
.reverse()
)
.order_by("date_created")
.reverse()
)
task_id = self.request.query_params.get("task_id")
if task_id is not None:
queryset = PaperlessTask.objects.filter(task_id=task_id)
return queryset
class AcknowledgeTasksView(GenericAPIView):

View File

@@ -3,7 +3,7 @@ msgstr ""
"Project-Id-Version: paperless-ngx\n"
"Report-Msgid-Bugs-To: \n"
"POT-Creation-Date: 2022-11-09 21:50+0000\n"
"PO-Revision-Date: 2022-11-09 23:11\n"
"PO-Revision-Date: 2022-12-30 15:36\n"
"Last-Translator: \n"
"Language-Team: Portuguese\n"
"Language: pt_PT\n"
@@ -100,7 +100,7 @@ msgstr "tipos de documento"
#: documents/models.py:93
msgid "path"
msgstr ""
msgstr "caminho"
#: documents/models.py:99 documents/models.py:127
msgid "storage path"
@@ -396,7 +396,7 @@ msgstr "regras de filtragem"
#: documents/models.py:536
msgid "Task ID"
msgstr ""
msgstr "ID da tarefa"
#: documents/models.py:537
msgid "Celery ID for the Task that was run"
@@ -412,7 +412,7 @@ msgstr ""
#: documents/models.py:549 documents/models.py:556
msgid "Task Name"
msgstr ""
msgstr "Nome da Tarefa"
#: documents/models.py:550
msgid "Name of the file which the Task was run for"
@@ -626,7 +626,7 @@ msgstr ""
#: paperless/settings.py:395
msgid "Serbian"
msgstr ""
msgstr "Sérvio"
#: paperless/settings.py:396
msgid "Swedish"
@@ -634,11 +634,11 @@ msgstr "Sueco"
#: paperless/settings.py:397
msgid "Turkish"
msgstr ""
msgstr "Turco"
#: paperless/settings.py:398
msgid "Chinese Simplified"
msgstr ""
msgstr "Chinês Simplificado"
#: paperless/urls.py:161
msgid "Paperless-ngx administration"

View File

@@ -1,7 +1,7 @@
from typing import Final
from typing import Tuple
__version__: Final[Tuple[int, int, int]] = (1, 11, 0)
__version__: Final[Tuple[int, int, int]] = (1, 11, 3)
# Version string like X.Y.Z
__full_version_str__: Final[str] = ".".join(map(str, __version__))
# Version string like X.Y

View File

@@ -8,6 +8,8 @@ import requests
from bleach import clean
from bleach import linkify
from django.conf import settings
from django.utils.timezone import is_naive
from django.utils.timezone import make_aware
from documents.parsers import DocumentParser
from documents.parsers import make_thumbnail_from_pdf
from documents.parsers import ParseError
@@ -135,7 +137,11 @@ class MailDocumentParser(DocumentParser):
self.text += f"\n\n{strip_text(mail.text)}"
self.date = mail.date
if is_naive(mail.date):
self.date = make_aware(mail.date)
else:
self.date = mail.date
self.archive_path = self.generate_pdf(document_path)
def tika_parse(self, html: str):

View File

@@ -86,6 +86,7 @@ class MailRuleSerializer(serializers.ModelSerializer):
"assign_document_type",
"order",
"attachment_type",
"consumption_scope",
]
def update(self, instance, validated_data):

View File

@@ -1,6 +1,8 @@
import json
import os
import re
from pathlib import Path
from typing import Optional
from django.conf import settings
from documents.parsers import DocumentParser
@@ -99,7 +101,7 @@ class RasterisedDocumentParser(DocumentParser):
self.log("warning", f"Error while calculating DPI for image {image}: {e}")
return None
def extract_text(self, sidecar_file, pdf_file):
def extract_text(self, sidecar_file: Optional[Path], pdf_file: Path):
# When re-doing OCR, the sidecar contains ONLY the new text, not
# the whole text, so do not utilize it in that case
if (
@@ -139,11 +141,15 @@ class RasterisedDocumentParser(DocumentParser):
self.log("debug", f"Detected language {lang}")
if lang in {
"ar", # Arabic
"he", # Hebrew,
"fa", # Persian
}:
if (
lang
in {
"ar", # Arabic
"he", # Hebrew,
"fa", # Persian
}
and pdf_file.name != "archive-fallback.pdf"
):
raise RtlLanguageException()
return stripped
except RtlLanguageException:
@@ -275,7 +281,7 @@ class RasterisedDocumentParser(DocumentParser):
return ocrmypdf_args
def parse(self, document_path, mime_type, file_name=None):
def parse(self, document_path: Path, mime_type, file_name=None):
# This forces tesseract to use one core per page.
os.environ["OMP_THREAD_LIMIT"] = "1"
@@ -300,8 +306,8 @@ class RasterisedDocumentParser(DocumentParser):
import ocrmypdf
from ocrmypdf import InputFileError, EncryptedPdfError
archive_path = os.path.join(self.tempdir, "archive.pdf")
sidecar_file = os.path.join(self.tempdir, "sidecar.txt")
archive_path = Path(os.path.join(self.tempdir, "archive.pdf"))
sidecar_file = Path(os.path.join(self.tempdir, "sidecar.txt"))
args = self.construct_ocrmypdf_parameters(
document_path,
@@ -335,8 +341,12 @@ class RasterisedDocumentParser(DocumentParser):
f"Attempting force OCR to get the text.",
)
archive_path_fallback = os.path.join(self.tempdir, "archive-fallback.pdf")
sidecar_file_fallback = os.path.join(self.tempdir, "sidecar-fallback.txt")
archive_path_fallback = Path(
os.path.join(self.tempdir, "archive-fallback.pdf"),
)
sidecar_file_fallback = Path(
os.path.join(self.tempdir, "sidecar-fallback.txt"),
)
# Attempt to run OCR with safe settings.