mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-07-28 18:24:38 -05:00
Merge branch 'dev' into feature-permissions
This commit is contained in:
@@ -797,6 +797,8 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
|
||||
@mock.patch("documents.views.consume_file.delay")
|
||||
def test_upload(self, m):
|
||||
|
||||
m.return_value = celery.result.AsyncResult(id=str(uuid.uuid4()))
|
||||
|
||||
with open(
|
||||
os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"),
|
||||
"rb",
|
||||
@@ -820,6 +822,8 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
|
||||
@mock.patch("documents.views.consume_file.delay")
|
||||
def test_upload_empty_metadata(self, m):
|
||||
|
||||
m.return_value = celery.result.AsyncResult(id=str(uuid.uuid4()))
|
||||
|
||||
with open(
|
||||
os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"),
|
||||
"rb",
|
||||
@@ -843,6 +847,8 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
|
||||
@mock.patch("documents.views.consume_file.delay")
|
||||
def test_upload_invalid_form(self, m):
|
||||
|
||||
m.return_value = celery.result.AsyncResult(id=str(uuid.uuid4()))
|
||||
|
||||
with open(
|
||||
os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"),
|
||||
"rb",
|
||||
@@ -857,6 +863,8 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
|
||||
@mock.patch("documents.views.consume_file.delay")
|
||||
def test_upload_invalid_file(self, m):
|
||||
|
||||
m.return_value = celery.result.AsyncResult(id=str(uuid.uuid4()))
|
||||
|
||||
with open(
|
||||
os.path.join(os.path.dirname(__file__), "samples", "simple.zip"),
|
||||
"rb",
|
||||
@@ -870,6 +878,9 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
|
||||
|
||||
@mock.patch("documents.views.consume_file.delay")
|
||||
def test_upload_with_title(self, async_task):
|
||||
|
||||
async_task.return_value = celery.result.AsyncResult(id=str(uuid.uuid4()))
|
||||
|
||||
with open(
|
||||
os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"),
|
||||
"rb",
|
||||
@@ -888,6 +899,9 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
|
||||
|
||||
@mock.patch("documents.views.consume_file.delay")
|
||||
def test_upload_with_correspondent(self, async_task):
|
||||
|
||||
async_task.return_value = celery.result.AsyncResult(id=str(uuid.uuid4()))
|
||||
|
||||
c = Correspondent.objects.create(name="test-corres")
|
||||
with open(
|
||||
os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"),
|
||||
@@ -907,6 +921,9 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
|
||||
|
||||
@mock.patch("documents.views.consume_file.delay")
|
||||
def test_upload_with_invalid_correspondent(self, async_task):
|
||||
|
||||
async_task.return_value = celery.result.AsyncResult(id=str(uuid.uuid4()))
|
||||
|
||||
with open(
|
||||
os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"),
|
||||
"rb",
|
||||
@@ -921,6 +938,9 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
|
||||
|
||||
@mock.patch("documents.views.consume_file.delay")
|
||||
def test_upload_with_document_type(self, async_task):
|
||||
|
||||
async_task.return_value = celery.result.AsyncResult(id=str(uuid.uuid4()))
|
||||
|
||||
dt = DocumentType.objects.create(name="invoice")
|
||||
with open(
|
||||
os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"),
|
||||
@@ -940,6 +960,9 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
|
||||
|
||||
@mock.patch("documents.views.consume_file.delay")
|
||||
def test_upload_with_invalid_document_type(self, async_task):
|
||||
|
||||
async_task.return_value = celery.result.AsyncResult(id=str(uuid.uuid4()))
|
||||
|
||||
with open(
|
||||
os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"),
|
||||
"rb",
|
||||
@@ -954,6 +977,9 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
|
||||
|
||||
@mock.patch("documents.views.consume_file.delay")
|
||||
def test_upload_with_tags(self, async_task):
|
||||
|
||||
async_task.return_value = celery.result.AsyncResult(id=str(uuid.uuid4()))
|
||||
|
||||
t1 = Tag.objects.create(name="tag1")
|
||||
t2 = Tag.objects.create(name="tag2")
|
||||
with open(
|
||||
@@ -974,6 +1000,9 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
|
||||
|
||||
@mock.patch("documents.views.consume_file.delay")
|
||||
def test_upload_with_invalid_tags(self, async_task):
|
||||
|
||||
async_task.return_value = celery.result.AsyncResult(id=str(uuid.uuid4()))
|
||||
|
||||
t1 = Tag.objects.create(name="tag1")
|
||||
t2 = Tag.objects.create(name="tag2")
|
||||
with open(
|
||||
@@ -990,6 +1019,9 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
|
||||
|
||||
@mock.patch("documents.views.consume_file.delay")
|
||||
def test_upload_with_created(self, async_task):
|
||||
|
||||
async_task.return_value = celery.result.AsyncResult(id=str(uuid.uuid4()))
|
||||
|
||||
created = datetime.datetime(
|
||||
2022,
|
||||
5,
|
||||
@@ -3040,6 +3072,59 @@ class TestTasks(APITestCase):
|
||||
self.assertEqual(returned_task2["status"], celery.states.PENDING)
|
||||
self.assertEqual(returned_task2["task_file_name"], task2.task_file_name)
|
||||
|
||||
def test_get_single_task_status(self):
|
||||
"""
|
||||
GIVEN
|
||||
- Query parameter for a valid task ID
|
||||
WHEN:
|
||||
- API call is made to get task status
|
||||
THEN:
|
||||
- Single task data is returned
|
||||
"""
|
||||
|
||||
id1 = str(uuid.uuid4())
|
||||
task1 = PaperlessTask.objects.create(
|
||||
task_id=id1,
|
||||
task_file_name="task_one.pdf",
|
||||
)
|
||||
|
||||
_ = PaperlessTask.objects.create(
|
||||
task_id=str(uuid.uuid4()),
|
||||
task_file_name="task_two.pdf",
|
||||
)
|
||||
|
||||
response = self.client.get(self.ENDPOINT + f"?task_id={id1}")
|
||||
|
||||
self.assertEqual(response.status_code, 200)
|
||||
self.assertEqual(len(response.data), 1)
|
||||
returned_task1 = response.data[0]
|
||||
|
||||
self.assertEqual(returned_task1["task_id"], task1.task_id)
|
||||
|
||||
def test_get_single_task_status_not_valid(self):
|
||||
"""
|
||||
GIVEN
|
||||
- Query parameter for a non-existent task ID
|
||||
WHEN:
|
||||
- API call is made to get task status
|
||||
THEN:
|
||||
- No task data is returned
|
||||
"""
|
||||
task1 = PaperlessTask.objects.create(
|
||||
task_id=str(uuid.uuid4()),
|
||||
task_file_name="task_one.pdf",
|
||||
)
|
||||
|
||||
_ = PaperlessTask.objects.create(
|
||||
task_id=str(uuid.uuid4()),
|
||||
task_file_name="task_two.pdf",
|
||||
)
|
||||
|
||||
response = self.client.get(self.ENDPOINT + "?task_id=bad-task-id")
|
||||
|
||||
self.assertEqual(response.status_code, 200)
|
||||
self.assertEqual(len(response.data), 0)
|
||||
|
||||
def test_acknowledge_tasks(self):
|
||||
"""
|
||||
GIVEN:
|
||||
|
@@ -660,7 +660,7 @@ class PostDocumentView(GenericAPIView):
|
||||
|
||||
task_id = str(uuid.uuid4())
|
||||
|
||||
consume_file.delay(
|
||||
async_task = consume_file.delay(
|
||||
temp_filename,
|
||||
override_filename=doc_name,
|
||||
override_title=title,
|
||||
@@ -672,7 +672,7 @@ class PostDocumentView(GenericAPIView):
|
||||
override_owner_id=owner_id,
|
||||
)
|
||||
|
||||
return Response("OK")
|
||||
return Response(async_task.id)
|
||||
|
||||
|
||||
class SelectionDataView(GenericAPIView):
|
||||
@@ -929,13 +929,18 @@ class TasksViewSet(ReadOnlyModelViewSet):
|
||||
permission_classes = (IsAuthenticated,)
|
||||
serializer_class = TasksViewSerializer
|
||||
|
||||
queryset = (
|
||||
PaperlessTask.objects.filter(
|
||||
acknowledged=False,
|
||||
def get_queryset(self):
|
||||
queryset = (
|
||||
PaperlessTask.objects.filter(
|
||||
acknowledged=False,
|
||||
)
|
||||
.order_by("date_created")
|
||||
.reverse()
|
||||
)
|
||||
.order_by("date_created")
|
||||
.reverse()
|
||||
)
|
||||
task_id = self.request.query_params.get("task_id")
|
||||
if task_id is not None:
|
||||
queryset = PaperlessTask.objects.filter(task_id=task_id)
|
||||
return queryset
|
||||
|
||||
|
||||
class AcknowledgeTasksView(GenericAPIView):
|
||||
|
@@ -3,7 +3,7 @@ msgstr ""
|
||||
"Project-Id-Version: paperless-ngx\n"
|
||||
"Report-Msgid-Bugs-To: \n"
|
||||
"POT-Creation-Date: 2022-11-09 21:50+0000\n"
|
||||
"PO-Revision-Date: 2022-11-09 23:11\n"
|
||||
"PO-Revision-Date: 2022-12-30 15:36\n"
|
||||
"Last-Translator: \n"
|
||||
"Language-Team: Portuguese\n"
|
||||
"Language: pt_PT\n"
|
||||
@@ -100,7 +100,7 @@ msgstr "tipos de documento"
|
||||
|
||||
#: documents/models.py:93
|
||||
msgid "path"
|
||||
msgstr ""
|
||||
msgstr "caminho"
|
||||
|
||||
#: documents/models.py:99 documents/models.py:127
|
||||
msgid "storage path"
|
||||
@@ -396,7 +396,7 @@ msgstr "regras de filtragem"
|
||||
|
||||
#: documents/models.py:536
|
||||
msgid "Task ID"
|
||||
msgstr ""
|
||||
msgstr "ID da tarefa"
|
||||
|
||||
#: documents/models.py:537
|
||||
msgid "Celery ID for the Task that was run"
|
||||
@@ -412,7 +412,7 @@ msgstr ""
|
||||
|
||||
#: documents/models.py:549 documents/models.py:556
|
||||
msgid "Task Name"
|
||||
msgstr ""
|
||||
msgstr "Nome da Tarefa"
|
||||
|
||||
#: documents/models.py:550
|
||||
msgid "Name of the file which the Task was run for"
|
||||
@@ -626,7 +626,7 @@ msgstr ""
|
||||
|
||||
#: paperless/settings.py:395
|
||||
msgid "Serbian"
|
||||
msgstr ""
|
||||
msgstr "Sérvio"
|
||||
|
||||
#: paperless/settings.py:396
|
||||
msgid "Swedish"
|
||||
@@ -634,11 +634,11 @@ msgstr "Sueco"
|
||||
|
||||
#: paperless/settings.py:397
|
||||
msgid "Turkish"
|
||||
msgstr ""
|
||||
msgstr "Turco"
|
||||
|
||||
#: paperless/settings.py:398
|
||||
msgid "Chinese Simplified"
|
||||
msgstr ""
|
||||
msgstr "Chinês Simplificado"
|
||||
|
||||
#: paperless/urls.py:161
|
||||
msgid "Paperless-ngx administration"
|
||||
|
@@ -1,7 +1,7 @@
|
||||
from typing import Final
|
||||
from typing import Tuple
|
||||
|
||||
__version__: Final[Tuple[int, int, int]] = (1, 11, 0)
|
||||
__version__: Final[Tuple[int, int, int]] = (1, 11, 3)
|
||||
# Version string like X.Y.Z
|
||||
__full_version_str__: Final[str] = ".".join(map(str, __version__))
|
||||
# Version string like X.Y
|
||||
|
@@ -8,6 +8,8 @@ import requests
|
||||
from bleach import clean
|
||||
from bleach import linkify
|
||||
from django.conf import settings
|
||||
from django.utils.timezone import is_naive
|
||||
from django.utils.timezone import make_aware
|
||||
from documents.parsers import DocumentParser
|
||||
from documents.parsers import make_thumbnail_from_pdf
|
||||
from documents.parsers import ParseError
|
||||
@@ -135,7 +137,11 @@ class MailDocumentParser(DocumentParser):
|
||||
|
||||
self.text += f"\n\n{strip_text(mail.text)}"
|
||||
|
||||
self.date = mail.date
|
||||
if is_naive(mail.date):
|
||||
self.date = make_aware(mail.date)
|
||||
else:
|
||||
self.date = mail.date
|
||||
|
||||
self.archive_path = self.generate_pdf(document_path)
|
||||
|
||||
def tika_parse(self, html: str):
|
||||
|
@@ -86,6 +86,7 @@ class MailRuleSerializer(serializers.ModelSerializer):
|
||||
"assign_document_type",
|
||||
"order",
|
||||
"attachment_type",
|
||||
"consumption_scope",
|
||||
]
|
||||
|
||||
def update(self, instance, validated_data):
|
||||
|
@@ -1,6 +1,8 @@
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
from django.conf import settings
|
||||
from documents.parsers import DocumentParser
|
||||
@@ -99,7 +101,7 @@ class RasterisedDocumentParser(DocumentParser):
|
||||
self.log("warning", f"Error while calculating DPI for image {image}: {e}")
|
||||
return None
|
||||
|
||||
def extract_text(self, sidecar_file, pdf_file):
|
||||
def extract_text(self, sidecar_file: Optional[Path], pdf_file: Path):
|
||||
# When re-doing OCR, the sidecar contains ONLY the new text, not
|
||||
# the whole text, so do not utilize it in that case
|
||||
if (
|
||||
@@ -139,11 +141,15 @@ class RasterisedDocumentParser(DocumentParser):
|
||||
|
||||
self.log("debug", f"Detected language {lang}")
|
||||
|
||||
if lang in {
|
||||
"ar", # Arabic
|
||||
"he", # Hebrew,
|
||||
"fa", # Persian
|
||||
}:
|
||||
if (
|
||||
lang
|
||||
in {
|
||||
"ar", # Arabic
|
||||
"he", # Hebrew,
|
||||
"fa", # Persian
|
||||
}
|
||||
and pdf_file.name != "archive-fallback.pdf"
|
||||
):
|
||||
raise RtlLanguageException()
|
||||
return stripped
|
||||
except RtlLanguageException:
|
||||
@@ -275,7 +281,7 @@ class RasterisedDocumentParser(DocumentParser):
|
||||
|
||||
return ocrmypdf_args
|
||||
|
||||
def parse(self, document_path, mime_type, file_name=None):
|
||||
def parse(self, document_path: Path, mime_type, file_name=None):
|
||||
# This forces tesseract to use one core per page.
|
||||
os.environ["OMP_THREAD_LIMIT"] = "1"
|
||||
|
||||
@@ -300,8 +306,8 @@ class RasterisedDocumentParser(DocumentParser):
|
||||
import ocrmypdf
|
||||
from ocrmypdf import InputFileError, EncryptedPdfError
|
||||
|
||||
archive_path = os.path.join(self.tempdir, "archive.pdf")
|
||||
sidecar_file = os.path.join(self.tempdir, "sidecar.txt")
|
||||
archive_path = Path(os.path.join(self.tempdir, "archive.pdf"))
|
||||
sidecar_file = Path(os.path.join(self.tempdir, "sidecar.txt"))
|
||||
|
||||
args = self.construct_ocrmypdf_parameters(
|
||||
document_path,
|
||||
@@ -335,8 +341,12 @@ class RasterisedDocumentParser(DocumentParser):
|
||||
f"Attempting force OCR to get the text.",
|
||||
)
|
||||
|
||||
archive_path_fallback = os.path.join(self.tempdir, "archive-fallback.pdf")
|
||||
sidecar_file_fallback = os.path.join(self.tempdir, "sidecar-fallback.txt")
|
||||
archive_path_fallback = Path(
|
||||
os.path.join(self.tempdir, "archive-fallback.pdf"),
|
||||
)
|
||||
sidecar_file_fallback = Path(
|
||||
os.path.join(self.tempdir, "sidecar-fallback.txt"),
|
||||
)
|
||||
|
||||
# Attempt to run OCR with safe settings.
|
||||
|
||||
|
Reference in New Issue
Block a user