mirror of
				https://github.com/paperless-ngx/paperless-ngx.git
				synced 2025-10-30 03:56:23 -05:00 
			
		
		
		
	Merge branch 'dev' into feature-permissions
This commit is contained in:
		| @@ -797,6 +797,8 @@ class TestDocumentApi(DirectoriesMixin, APITestCase): | ||||
|     @mock.patch("documents.views.consume_file.delay") | ||||
|     def test_upload(self, m): | ||||
|  | ||||
|         m.return_value = celery.result.AsyncResult(id=str(uuid.uuid4())) | ||||
|  | ||||
|         with open( | ||||
|             os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), | ||||
|             "rb", | ||||
| @@ -820,6 +822,8 @@ class TestDocumentApi(DirectoriesMixin, APITestCase): | ||||
|     @mock.patch("documents.views.consume_file.delay") | ||||
|     def test_upload_empty_metadata(self, m): | ||||
|  | ||||
|         m.return_value = celery.result.AsyncResult(id=str(uuid.uuid4())) | ||||
|  | ||||
|         with open( | ||||
|             os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), | ||||
|             "rb", | ||||
| @@ -843,6 +847,8 @@ class TestDocumentApi(DirectoriesMixin, APITestCase): | ||||
|     @mock.patch("documents.views.consume_file.delay") | ||||
|     def test_upload_invalid_form(self, m): | ||||
|  | ||||
|         m.return_value = celery.result.AsyncResult(id=str(uuid.uuid4())) | ||||
|  | ||||
|         with open( | ||||
|             os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), | ||||
|             "rb", | ||||
| @@ -857,6 +863,8 @@ class TestDocumentApi(DirectoriesMixin, APITestCase): | ||||
|     @mock.patch("documents.views.consume_file.delay") | ||||
|     def test_upload_invalid_file(self, m): | ||||
|  | ||||
|         m.return_value = celery.result.AsyncResult(id=str(uuid.uuid4())) | ||||
|  | ||||
|         with open( | ||||
|             os.path.join(os.path.dirname(__file__), "samples", "simple.zip"), | ||||
|             "rb", | ||||
| @@ -870,6 +878,9 @@ class TestDocumentApi(DirectoriesMixin, APITestCase): | ||||
|  | ||||
|     @mock.patch("documents.views.consume_file.delay") | ||||
|     def test_upload_with_title(self, async_task): | ||||
|  | ||||
|         async_task.return_value = celery.result.AsyncResult(id=str(uuid.uuid4())) | ||||
|  | ||||
|         with open( | ||||
|             os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), | ||||
|             "rb", | ||||
| @@ -888,6 +899,9 @@ class TestDocumentApi(DirectoriesMixin, APITestCase): | ||||
|  | ||||
|     @mock.patch("documents.views.consume_file.delay") | ||||
|     def test_upload_with_correspondent(self, async_task): | ||||
|  | ||||
|         async_task.return_value = celery.result.AsyncResult(id=str(uuid.uuid4())) | ||||
|  | ||||
|         c = Correspondent.objects.create(name="test-corres") | ||||
|         with open( | ||||
|             os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), | ||||
| @@ -907,6 +921,9 @@ class TestDocumentApi(DirectoriesMixin, APITestCase): | ||||
|  | ||||
|     @mock.patch("documents.views.consume_file.delay") | ||||
|     def test_upload_with_invalid_correspondent(self, async_task): | ||||
|  | ||||
|         async_task.return_value = celery.result.AsyncResult(id=str(uuid.uuid4())) | ||||
|  | ||||
|         with open( | ||||
|             os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), | ||||
|             "rb", | ||||
| @@ -921,6 +938,9 @@ class TestDocumentApi(DirectoriesMixin, APITestCase): | ||||
|  | ||||
|     @mock.patch("documents.views.consume_file.delay") | ||||
|     def test_upload_with_document_type(self, async_task): | ||||
|  | ||||
|         async_task.return_value = celery.result.AsyncResult(id=str(uuid.uuid4())) | ||||
|  | ||||
|         dt = DocumentType.objects.create(name="invoice") | ||||
|         with open( | ||||
|             os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), | ||||
| @@ -940,6 +960,9 @@ class TestDocumentApi(DirectoriesMixin, APITestCase): | ||||
|  | ||||
|     @mock.patch("documents.views.consume_file.delay") | ||||
|     def test_upload_with_invalid_document_type(self, async_task): | ||||
|  | ||||
|         async_task.return_value = celery.result.AsyncResult(id=str(uuid.uuid4())) | ||||
|  | ||||
|         with open( | ||||
|             os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), | ||||
|             "rb", | ||||
| @@ -954,6 +977,9 @@ class TestDocumentApi(DirectoriesMixin, APITestCase): | ||||
|  | ||||
|     @mock.patch("documents.views.consume_file.delay") | ||||
|     def test_upload_with_tags(self, async_task): | ||||
|  | ||||
|         async_task.return_value = celery.result.AsyncResult(id=str(uuid.uuid4())) | ||||
|  | ||||
|         t1 = Tag.objects.create(name="tag1") | ||||
|         t2 = Tag.objects.create(name="tag2") | ||||
|         with open( | ||||
| @@ -974,6 +1000,9 @@ class TestDocumentApi(DirectoriesMixin, APITestCase): | ||||
|  | ||||
|     @mock.patch("documents.views.consume_file.delay") | ||||
|     def test_upload_with_invalid_tags(self, async_task): | ||||
|  | ||||
|         async_task.return_value = celery.result.AsyncResult(id=str(uuid.uuid4())) | ||||
|  | ||||
|         t1 = Tag.objects.create(name="tag1") | ||||
|         t2 = Tag.objects.create(name="tag2") | ||||
|         with open( | ||||
| @@ -990,6 +1019,9 @@ class TestDocumentApi(DirectoriesMixin, APITestCase): | ||||
|  | ||||
|     @mock.patch("documents.views.consume_file.delay") | ||||
|     def test_upload_with_created(self, async_task): | ||||
|  | ||||
|         async_task.return_value = celery.result.AsyncResult(id=str(uuid.uuid4())) | ||||
|  | ||||
|         created = datetime.datetime( | ||||
|             2022, | ||||
|             5, | ||||
| @@ -3040,6 +3072,59 @@ class TestTasks(APITestCase): | ||||
|         self.assertEqual(returned_task2["status"], celery.states.PENDING) | ||||
|         self.assertEqual(returned_task2["task_file_name"], task2.task_file_name) | ||||
|  | ||||
|     def test_get_single_task_status(self): | ||||
|         """ | ||||
|         GIVEN | ||||
|             - Query parameter for a valid task ID | ||||
|         WHEN: | ||||
|             - API call is made to get task status | ||||
|         THEN: | ||||
|             - Single task data is returned | ||||
|         """ | ||||
|  | ||||
|         id1 = str(uuid.uuid4()) | ||||
|         task1 = PaperlessTask.objects.create( | ||||
|             task_id=id1, | ||||
|             task_file_name="task_one.pdf", | ||||
|         ) | ||||
|  | ||||
|         _ = PaperlessTask.objects.create( | ||||
|             task_id=str(uuid.uuid4()), | ||||
|             task_file_name="task_two.pdf", | ||||
|         ) | ||||
|  | ||||
|         response = self.client.get(self.ENDPOINT + f"?task_id={id1}") | ||||
|  | ||||
|         self.assertEqual(response.status_code, 200) | ||||
|         self.assertEqual(len(response.data), 1) | ||||
|         returned_task1 = response.data[0] | ||||
|  | ||||
|         self.assertEqual(returned_task1["task_id"], task1.task_id) | ||||
|  | ||||
|     def test_get_single_task_status_not_valid(self): | ||||
|         """ | ||||
|         GIVEN | ||||
|             - Query parameter for a non-existent task ID | ||||
|         WHEN: | ||||
|             - API call is made to get task status | ||||
|         THEN: | ||||
|             - No task data is returned | ||||
|         """ | ||||
|         task1 = PaperlessTask.objects.create( | ||||
|             task_id=str(uuid.uuid4()), | ||||
|             task_file_name="task_one.pdf", | ||||
|         ) | ||||
|  | ||||
|         _ = PaperlessTask.objects.create( | ||||
|             task_id=str(uuid.uuid4()), | ||||
|             task_file_name="task_two.pdf", | ||||
|         ) | ||||
|  | ||||
|         response = self.client.get(self.ENDPOINT + "?task_id=bad-task-id") | ||||
|  | ||||
|         self.assertEqual(response.status_code, 200) | ||||
|         self.assertEqual(len(response.data), 0) | ||||
|  | ||||
|     def test_acknowledge_tasks(self): | ||||
|         """ | ||||
|         GIVEN: | ||||
|   | ||||
| @@ -660,7 +660,7 @@ class PostDocumentView(GenericAPIView): | ||||
|  | ||||
|         task_id = str(uuid.uuid4()) | ||||
|  | ||||
|         consume_file.delay( | ||||
|         async_task = consume_file.delay( | ||||
|             temp_filename, | ||||
|             override_filename=doc_name, | ||||
|             override_title=title, | ||||
| @@ -672,7 +672,7 @@ class PostDocumentView(GenericAPIView): | ||||
|             override_owner_id=owner_id, | ||||
|         ) | ||||
|  | ||||
|         return Response("OK") | ||||
|         return Response(async_task.id) | ||||
|  | ||||
|  | ||||
| class SelectionDataView(GenericAPIView): | ||||
| @@ -929,13 +929,18 @@ class TasksViewSet(ReadOnlyModelViewSet): | ||||
|     permission_classes = (IsAuthenticated,) | ||||
|     serializer_class = TasksViewSerializer | ||||
|  | ||||
|     queryset = ( | ||||
|         PaperlessTask.objects.filter( | ||||
|             acknowledged=False, | ||||
|     def get_queryset(self): | ||||
|         queryset = ( | ||||
|             PaperlessTask.objects.filter( | ||||
|                 acknowledged=False, | ||||
|             ) | ||||
|             .order_by("date_created") | ||||
|             .reverse() | ||||
|         ) | ||||
|         .order_by("date_created") | ||||
|         .reverse() | ||||
|     ) | ||||
|         task_id = self.request.query_params.get("task_id") | ||||
|         if task_id is not None: | ||||
|             queryset = PaperlessTask.objects.filter(task_id=task_id) | ||||
|         return queryset | ||||
|  | ||||
|  | ||||
| class AcknowledgeTasksView(GenericAPIView): | ||||
|   | ||||
| @@ -3,7 +3,7 @@ msgstr "" | ||||
| "Project-Id-Version: paperless-ngx\n" | ||||
| "Report-Msgid-Bugs-To: \n" | ||||
| "POT-Creation-Date: 2022-11-09 21:50+0000\n" | ||||
| "PO-Revision-Date: 2022-11-09 23:11\n" | ||||
| "PO-Revision-Date: 2022-12-30 15:36\n" | ||||
| "Last-Translator: \n" | ||||
| "Language-Team: Portuguese\n" | ||||
| "Language: pt_PT\n" | ||||
| @@ -100,7 +100,7 @@ msgstr "tipos de documento" | ||||
|  | ||||
| #: documents/models.py:93 | ||||
| msgid "path" | ||||
| msgstr "" | ||||
| msgstr "caminho" | ||||
|  | ||||
| #: documents/models.py:99 documents/models.py:127 | ||||
| msgid "storage path" | ||||
| @@ -396,7 +396,7 @@ msgstr "regras de filtragem" | ||||
|  | ||||
| #: documents/models.py:536 | ||||
| msgid "Task ID" | ||||
| msgstr "" | ||||
| msgstr "ID da tarefa" | ||||
|  | ||||
| #: documents/models.py:537 | ||||
| msgid "Celery ID for the Task that was run" | ||||
| @@ -412,7 +412,7 @@ msgstr "" | ||||
|  | ||||
| #: documents/models.py:549 documents/models.py:556 | ||||
| msgid "Task Name" | ||||
| msgstr "" | ||||
| msgstr "Nome da Tarefa" | ||||
|  | ||||
| #: documents/models.py:550 | ||||
| msgid "Name of the file which the Task was run for" | ||||
| @@ -626,7 +626,7 @@ msgstr "" | ||||
|  | ||||
| #: paperless/settings.py:395 | ||||
| msgid "Serbian" | ||||
| msgstr "" | ||||
| msgstr "Sérvio" | ||||
|  | ||||
| #: paperless/settings.py:396 | ||||
| msgid "Swedish" | ||||
| @@ -634,11 +634,11 @@ msgstr "Sueco" | ||||
|  | ||||
| #: paperless/settings.py:397 | ||||
| msgid "Turkish" | ||||
| msgstr "" | ||||
| msgstr "Turco" | ||||
|  | ||||
| #: paperless/settings.py:398 | ||||
| msgid "Chinese Simplified" | ||||
| msgstr "" | ||||
| msgstr "Chinês Simplificado" | ||||
|  | ||||
| #: paperless/urls.py:161 | ||||
| msgid "Paperless-ngx administration" | ||||
|   | ||||
| @@ -1,7 +1,7 @@ | ||||
| from typing import Final | ||||
| from typing import Tuple | ||||
|  | ||||
| __version__: Final[Tuple[int, int, int]] = (1, 11, 0) | ||||
| __version__: Final[Tuple[int, int, int]] = (1, 11, 3) | ||||
| # Version string like X.Y.Z | ||||
| __full_version_str__: Final[str] = ".".join(map(str, __version__)) | ||||
| # Version string like X.Y | ||||
|   | ||||
| @@ -8,6 +8,8 @@ import requests | ||||
| from bleach import clean | ||||
| from bleach import linkify | ||||
| from django.conf import settings | ||||
| from django.utils.timezone import is_naive | ||||
| from django.utils.timezone import make_aware | ||||
| from documents.parsers import DocumentParser | ||||
| from documents.parsers import make_thumbnail_from_pdf | ||||
| from documents.parsers import ParseError | ||||
| @@ -135,7 +137,11 @@ class MailDocumentParser(DocumentParser): | ||||
|  | ||||
|         self.text += f"\n\n{strip_text(mail.text)}" | ||||
|  | ||||
|         self.date = mail.date | ||||
|         if is_naive(mail.date): | ||||
|             self.date = make_aware(mail.date) | ||||
|         else: | ||||
|             self.date = mail.date | ||||
|  | ||||
|         self.archive_path = self.generate_pdf(document_path) | ||||
|  | ||||
|     def tika_parse(self, html: str): | ||||
|   | ||||
| @@ -86,6 +86,7 @@ class MailRuleSerializer(serializers.ModelSerializer): | ||||
|             "assign_document_type", | ||||
|             "order", | ||||
|             "attachment_type", | ||||
|             "consumption_scope", | ||||
|         ] | ||||
|  | ||||
|     def update(self, instance, validated_data): | ||||
|   | ||||
| @@ -1,6 +1,8 @@ | ||||
| import json | ||||
| import os | ||||
| import re | ||||
| from pathlib import Path | ||||
| from typing import Optional | ||||
|  | ||||
| from django.conf import settings | ||||
| from documents.parsers import DocumentParser | ||||
| @@ -99,7 +101,7 @@ class RasterisedDocumentParser(DocumentParser): | ||||
|             self.log("warning", f"Error while calculating DPI for image {image}: {e}") | ||||
|             return None | ||||
|  | ||||
|     def extract_text(self, sidecar_file, pdf_file): | ||||
|     def extract_text(self, sidecar_file: Optional[Path], pdf_file: Path): | ||||
|         # When re-doing OCR, the sidecar contains ONLY the new text, not | ||||
|         # the whole text, so do not utilize it in that case | ||||
|         if ( | ||||
| @@ -139,11 +141,15 @@ class RasterisedDocumentParser(DocumentParser): | ||||
|  | ||||
|             self.log("debug", f"Detected language {lang}") | ||||
|  | ||||
|             if lang in { | ||||
|                 "ar",  # Arabic | ||||
|                 "he",  # Hebrew, | ||||
|                 "fa",  # Persian | ||||
|             }: | ||||
|             if ( | ||||
|                 lang | ||||
|                 in { | ||||
|                     "ar",  # Arabic | ||||
|                     "he",  # Hebrew, | ||||
|                     "fa",  # Persian | ||||
|                 } | ||||
|                 and pdf_file.name != "archive-fallback.pdf" | ||||
|             ): | ||||
|                 raise RtlLanguageException() | ||||
|             return stripped | ||||
|         except RtlLanguageException: | ||||
| @@ -275,7 +281,7 @@ class RasterisedDocumentParser(DocumentParser): | ||||
|  | ||||
|         return ocrmypdf_args | ||||
|  | ||||
|     def parse(self, document_path, mime_type, file_name=None): | ||||
|     def parse(self, document_path: Path, mime_type, file_name=None): | ||||
|         # This forces tesseract to use one core per page. | ||||
|         os.environ["OMP_THREAD_LIMIT"] = "1" | ||||
|  | ||||
| @@ -300,8 +306,8 @@ class RasterisedDocumentParser(DocumentParser): | ||||
|         import ocrmypdf | ||||
|         from ocrmypdf import InputFileError, EncryptedPdfError | ||||
|  | ||||
|         archive_path = os.path.join(self.tempdir, "archive.pdf") | ||||
|         sidecar_file = os.path.join(self.tempdir, "sidecar.txt") | ||||
|         archive_path = Path(os.path.join(self.tempdir, "archive.pdf")) | ||||
|         sidecar_file = Path(os.path.join(self.tempdir, "sidecar.txt")) | ||||
|  | ||||
|         args = self.construct_ocrmypdf_parameters( | ||||
|             document_path, | ||||
| @@ -335,8 +341,12 @@ class RasterisedDocumentParser(DocumentParser): | ||||
|                 f"Attempting force OCR to get the text.", | ||||
|             ) | ||||
|  | ||||
|             archive_path_fallback = os.path.join(self.tempdir, "archive-fallback.pdf") | ||||
|             sidecar_file_fallback = os.path.join(self.tempdir, "sidecar-fallback.txt") | ||||
|             archive_path_fallback = Path( | ||||
|                 os.path.join(self.tempdir, "archive-fallback.pdf"), | ||||
|             ) | ||||
|             sidecar_file_fallback = Path( | ||||
|                 os.path.join(self.tempdir, "sidecar-fallback.txt"), | ||||
|             ) | ||||
|  | ||||
|             # Attempt to run OCR with safe settings. | ||||
|  | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Michael Shamoon
					Michael Shamoon