diff --git a/Dockerfile b/Dockerfile index 11915937a..b616c70e7 100644 --- a/Dockerfile +++ b/Dockerfile @@ -165,6 +165,7 @@ COPY [ \ "docker/docker-prepare.sh", \ "docker/paperless_cmd.sh", \ "docker/wait-for-redis.py", \ + "docker/env-from-file.sh", \ "docker/management_script.sh", \ "docker/flower-conditional.sh", \ "docker/install_management_commands.sh", \ @@ -184,6 +185,8 @@ RUN set -eux \ && chmod 755 /sbin/docker-prepare.sh \ && mv wait-for-redis.py /sbin/wait-for-redis.py \ && chmod 755 /sbin/wait-for-redis.py \ + && mv env-from-file.sh /sbin/env-from-file.sh \ + && chmod 755 /sbin/env-from-file.sh \ && mv paperless_cmd.sh /usr/local/bin/paperless_cmd.sh \ && chmod 755 /usr/local/bin/paperless_cmd.sh \ && mv flower-conditional.sh /usr/local/bin/flower-conditional.sh \ diff --git a/docker/docker-entrypoint.sh b/docker/docker-entrypoint.sh index 00be59add..58e46bd01 100755 --- a/docker/docker-entrypoint.sh +++ b/docker/docker-entrypoint.sh @@ -2,37 +2,6 @@ set -e -# Adapted from: -# https://github.com/docker-library/postgres/blob/master/docker-entrypoint.sh -# usage: file_env VAR -# ie: file_env 'XYZ_DB_PASSWORD' will allow for "$XYZ_DB_PASSWORD_FILE" to -# fill in the value of "$XYZ_DB_PASSWORD" from a file, especially for Docker's -# secrets feature -file_env() { - local -r var="$1" - local -r fileVar="${var}_FILE" - - # Basic validation - if [ "${!var:-}" ] && [ "${!fileVar:-}" ]; then - echo >&2 "error: both $var and $fileVar are set (but are exclusive)" - exit 1 - fi - - # Only export var if the _FILE exists - if [ "${!fileVar:-}" ]; then - # And the file exists - if [[ -f ${!fileVar} ]]; then - echo "Setting ${var} from file" - val="$(< "${!fileVar}")" - export "$var"="$val" - else - echo "File ${!fileVar} doesn't exist" - exit 1 - fi - fi - -} - # Source: https://github.com/sameersbn/docker-gitlab/ map_uidgid() { local -r usermap_original_uid=$(id -u paperless) @@ -96,19 +65,11 @@ custom_container_init() { initialize() { # Setup environment from secrets before anything else - for env_var in \ - PAPERLESS_DBUSER \ - PAPERLESS_DBPASS \ - PAPERLESS_SECRET_KEY \ - PAPERLESS_AUTO_LOGIN_USERNAME \ - PAPERLESS_ADMIN_USER \ - PAPERLESS_ADMIN_MAIL \ - PAPERLESS_ADMIN_PASSWORD \ - PAPERLESS_REDIS; do - # Check for a version of this var with _FILE appended - # and convert the contents to the env var value - file_env ${env_var} - done + # Check for a version of this var with _FILE appended + # and convert the contents to the env var value + # Source it so export is persistent + # shellcheck disable=SC1091 + source /sbin/env-from-file.sh # Change the user and group IDs if needed map_uidgid diff --git a/docker/env-from-file.sh b/docker/env-from-file.sh new file mode 100644 index 000000000..71247f5f6 --- /dev/null +++ b/docker/env-from-file.sh @@ -0,0 +1,39 @@ +#!/usr/bin/env bash + +# Scans the environment variables for those with the suffix _FILE +# When located, checks the file exists, and exports the contents +# of the file as the same name, minus the suffix +# This allows the use of Docker secrets or mounted files +# to fill in any of the settings configurable via environment +# variables + +set -eu + +for line in $(printenv) +do + # Extract the name of the environment variable + env_name=${line%%=*} + # Check if it ends in "_FILE" + if [[ ${env_name} == *_FILE ]]; then + # Extract the value of the environment + env_value=${line#*=} + + # Check the file exists + if [[ -f ${env_value} ]]; then + + # Trim off the _FILE suffix + non_file_env_name=${env_name%"_FILE"} + echo "Setting ${non_file_env_name} from file" + + # Reads the value from th file + val="$(< "${!env_name}")" + + # Sets the normal name to the read file contents + export "${non_file_env_name}"="${val}" + + else + echo "File ${env_value} doesn't exist" + exit 1 + fi + fi +done diff --git a/docker/management_script.sh b/docker/management_script.sh index 4e601f4a6..996435745 100755 --- a/docker/management_script.sh +++ b/docker/management_script.sh @@ -3,6 +3,9 @@ set -e cd /usr/src/paperless/src/ +# This ensures environment is setup +# shellcheck disable=SC1091 +source /sbin/env-from-file.sh if [[ $(id -u) == 0 ]] ; then diff --git a/src-ui/src/app/components/document-list/bulk-editor/bulk-editor.component.html b/src-ui/src/app/components/document-list/bulk-editor/bulk-editor.component.html index d8345fd81..aef47b605 100644 --- a/src-ui/src/app/components/document-list/bulk-editor/bulk-editor.component.html +++ b/src-ui/src/app/components/document-list/bulk-editor/bulk-editor.component.html @@ -31,7 +31,7 @@ [editing]="true" [multiple]="true" [applyOnClose]="applyOnClose" - (open)="openTagsDropdown()" + (opened)="openTagsDropdown()" [(selectionModel)]="tagSelectionModel" (apply)="setTags($event)"> @@ -40,7 +40,7 @@ [items]="correspondents" [editing]="true" [applyOnClose]="applyOnClose" - (open)="openCorrespondentDropdown()" + (opened)="openCorrespondentDropdown()" [(selectionModel)]="correspondentSelectionModel" (apply)="setCorrespondents($event)"> @@ -49,7 +49,7 @@ [items]="documentTypes" [editing]="true" [applyOnClose]="applyOnClose" - (open)="openDocumentTypeDropdown()" + (opened)="openDocumentTypeDropdown()" [(selectionModel)]="documentTypeSelectionModel" (apply)="setDocumentTypes($event)"> @@ -58,7 +58,7 @@ [items]="storagePaths" [editing]="true" [applyOnClose]="applyOnClose" - (open)="openStoragePathDropdown()" + (opened)="openStoragePathDropdown()" [(selectionModel)]="storagePathsSelectionModel" (apply)="setStoragePaths($event)"> diff --git a/src-ui/src/app/components/document-list/filter-editor/filter-editor.component.html b/src-ui/src/app/components/document-list/filter-editor/filter-editor.component.html index 0a6b95939..3b78d8445 100644 --- a/src-ui/src/app/components/document-list/filter-editor/filter-editor.component.html +++ b/src-ui/src/app/components/document-list/filter-editor/filter-editor.component.html @@ -30,27 +30,27 @@ [(selectionModel)]="tagSelectionModel" (selectionModelChange)="updateRules()" [multiple]="true" - (open)="onTagsDropdownOpen()" + (opened)="onTagsDropdownOpen()" [allowSelectNone]="true"> diff --git a/src-ui/src/environments/environment.prod.ts b/src-ui/src/environments/environment.prod.ts index ca9bb4400..22af10820 100644 --- a/src-ui/src/environments/environment.prod.ts +++ b/src-ui/src/environments/environment.prod.ts @@ -5,7 +5,7 @@ export const environment = { apiBaseUrl: document.baseURI + 'api/', apiVersion: '2', appTitle: 'Paperless-ngx', - version: '1.11.2', + version: '1.11.2-dev', webSocketHost: window.location.host, webSocketProtocol: window.location.protocol == 'https:' ? 'wss:' : 'ws:', webSocketBaseUrl: base_url.pathname + 'ws/', diff --git a/src/documents/tests/test_api.py b/src/documents/tests/test_api.py index 0f890249c..55a176247 100644 --- a/src/documents/tests/test_api.py +++ b/src/documents/tests/test_api.py @@ -793,6 +793,8 @@ class TestDocumentApi(DirectoriesMixin, APITestCase): @mock.patch("documents.views.consume_file.delay") def test_upload(self, m): + m.return_value = celery.result.AsyncResult(id=str(uuid.uuid4())) + with open( os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), "rb", @@ -816,6 +818,8 @@ class TestDocumentApi(DirectoriesMixin, APITestCase): @mock.patch("documents.views.consume_file.delay") def test_upload_empty_metadata(self, m): + m.return_value = celery.result.AsyncResult(id=str(uuid.uuid4())) + with open( os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), "rb", @@ -839,6 +843,8 @@ class TestDocumentApi(DirectoriesMixin, APITestCase): @mock.patch("documents.views.consume_file.delay") def test_upload_invalid_form(self, m): + m.return_value = celery.result.AsyncResult(id=str(uuid.uuid4())) + with open( os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), "rb", @@ -853,6 +859,8 @@ class TestDocumentApi(DirectoriesMixin, APITestCase): @mock.patch("documents.views.consume_file.delay") def test_upload_invalid_file(self, m): + m.return_value = celery.result.AsyncResult(id=str(uuid.uuid4())) + with open( os.path.join(os.path.dirname(__file__), "samples", "simple.zip"), "rb", @@ -866,6 +874,9 @@ class TestDocumentApi(DirectoriesMixin, APITestCase): @mock.patch("documents.views.consume_file.delay") def test_upload_with_title(self, async_task): + + async_task.return_value = celery.result.AsyncResult(id=str(uuid.uuid4())) + with open( os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), "rb", @@ -884,6 +895,9 @@ class TestDocumentApi(DirectoriesMixin, APITestCase): @mock.patch("documents.views.consume_file.delay") def test_upload_with_correspondent(self, async_task): + + async_task.return_value = celery.result.AsyncResult(id=str(uuid.uuid4())) + c = Correspondent.objects.create(name="test-corres") with open( os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), @@ -903,6 +917,9 @@ class TestDocumentApi(DirectoriesMixin, APITestCase): @mock.patch("documents.views.consume_file.delay") def test_upload_with_invalid_correspondent(self, async_task): + + async_task.return_value = celery.result.AsyncResult(id=str(uuid.uuid4())) + with open( os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), "rb", @@ -917,6 +934,9 @@ class TestDocumentApi(DirectoriesMixin, APITestCase): @mock.patch("documents.views.consume_file.delay") def test_upload_with_document_type(self, async_task): + + async_task.return_value = celery.result.AsyncResult(id=str(uuid.uuid4())) + dt = DocumentType.objects.create(name="invoice") with open( os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), @@ -936,6 +956,9 @@ class TestDocumentApi(DirectoriesMixin, APITestCase): @mock.patch("documents.views.consume_file.delay") def test_upload_with_invalid_document_type(self, async_task): + + async_task.return_value = celery.result.AsyncResult(id=str(uuid.uuid4())) + with open( os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), "rb", @@ -950,6 +973,9 @@ class TestDocumentApi(DirectoriesMixin, APITestCase): @mock.patch("documents.views.consume_file.delay") def test_upload_with_tags(self, async_task): + + async_task.return_value = celery.result.AsyncResult(id=str(uuid.uuid4())) + t1 = Tag.objects.create(name="tag1") t2 = Tag.objects.create(name="tag2") with open( @@ -970,6 +996,9 @@ class TestDocumentApi(DirectoriesMixin, APITestCase): @mock.patch("documents.views.consume_file.delay") def test_upload_with_invalid_tags(self, async_task): + + async_task.return_value = celery.result.AsyncResult(id=str(uuid.uuid4())) + t1 = Tag.objects.create(name="tag1") t2 = Tag.objects.create(name="tag2") with open( @@ -986,6 +1015,9 @@ class TestDocumentApi(DirectoriesMixin, APITestCase): @mock.patch("documents.views.consume_file.delay") def test_upload_with_created(self, async_task): + + async_task.return_value = celery.result.AsyncResult(id=str(uuid.uuid4())) + created = datetime.datetime( 2022, 5, @@ -2948,6 +2980,59 @@ class TestTasks(APITestCase): self.assertEqual(returned_task2["status"], celery.states.PENDING) self.assertEqual(returned_task2["task_file_name"], task2.task_file_name) + def test_get_single_task_status(self): + """ + GIVEN + - Query parameter for a valid task ID + WHEN: + - API call is made to get task status + THEN: + - Single task data is returned + """ + + id1 = str(uuid.uuid4()) + task1 = PaperlessTask.objects.create( + task_id=id1, + task_file_name="task_one.pdf", + ) + + _ = PaperlessTask.objects.create( + task_id=str(uuid.uuid4()), + task_file_name="task_two.pdf", + ) + + response = self.client.get(self.ENDPOINT + f"?task_id={id1}") + + self.assertEqual(response.status_code, 200) + self.assertEqual(len(response.data), 1) + returned_task1 = response.data[0] + + self.assertEqual(returned_task1["task_id"], task1.task_id) + + def test_get_single_task_status_not_valid(self): + """ + GIVEN + - Query parameter for a non-existent task ID + WHEN: + - API call is made to get task status + THEN: + - No task data is returned + """ + task1 = PaperlessTask.objects.create( + task_id=str(uuid.uuid4()), + task_file_name="task_one.pdf", + ) + + _ = PaperlessTask.objects.create( + task_id=str(uuid.uuid4()), + task_file_name="task_two.pdf", + ) + + response = self.client.get(self.ENDPOINT + "?task_id=bad-task-id") + + self.assertEqual(response.status_code, 200) + self.assertEqual(len(response.data), 0) + def test_acknowledge_tasks(self): """ GIVEN: diff --git a/src/documents/views.py b/src/documents/views.py index 7ff6e90d9..e313ae17e 100644 --- a/src/documents/views.py +++ b/src/documents/views.py @@ -617,7 +617,7 @@ class PostDocumentView(GenericAPIView): task_id = str(uuid.uuid4()) - consume_file.delay( + async_task = consume_file.delay( temp_filename, override_filename=doc_name, override_title=title, @@ -628,7 +628,7 @@ class PostDocumentView(GenericAPIView): override_created=created, ) - return Response("OK") + return Response(async_task.id) class SelectionDataView(GenericAPIView): @@ -886,13 +886,18 @@ class TasksViewSet(ReadOnlyModelViewSet): permission_classes = (IsAuthenticated,) serializer_class = TasksViewSerializer - queryset = ( - PaperlessTask.objects.filter( - acknowledged=False, + def get_queryset(self): + queryset = ( + PaperlessTask.objects.filter( + acknowledged=False, + ) + .order_by("date_created") + .reverse() ) - .order_by("date_created") - .reverse() - ) + task_id = self.request.query_params.get("task_id") + if task_id is not None: + queryset = PaperlessTask.objects.filter(task_id=task_id) + return queryset class AcknowledgeTasksView(GenericAPIView): diff --git a/src/paperless_mail/parsers.py b/src/paperless_mail/parsers.py index d50217f2e..cc5d4e3c8 100644 --- a/src/paperless_mail/parsers.py +++ b/src/paperless_mail/parsers.py @@ -8,6 +8,8 @@ import requests from bleach import clean from bleach import linkify from django.conf import settings +from django.utils.timezone import is_naive +from django.utils.timezone import make_aware from documents.parsers import DocumentParser from documents.parsers import make_thumbnail_from_pdf from documents.parsers import ParseError @@ -135,7 +137,11 @@ class MailDocumentParser(DocumentParser): self.text += f"\n\n{strip_text(mail.text)}" - self.date = mail.date + if is_naive(mail.date): + self.date = make_aware(mail.date) + else: + self.date = mail.date + self.archive_path = self.generate_pdf(document_path) def tika_parse(self, html: str): diff --git a/src/paperless_tesseract/parsers.py b/src/paperless_tesseract/parsers.py index 4cc9b8e5f..4107cace8 100644 --- a/src/paperless_tesseract/parsers.py +++ b/src/paperless_tesseract/parsers.py @@ -1,6 +1,8 @@ import json import os import re +from pathlib import Path +from typing import Optional from django.conf import settings from documents.parsers import DocumentParser @@ -99,7 +101,7 @@ class RasterisedDocumentParser(DocumentParser): self.log("warning", f"Error while calculating DPI for image {image}: {e}") return None - def extract_text(self, sidecar_file, pdf_file): + def extract_text(self, sidecar_file: Optional[Path], pdf_file: Path): # When re-doing OCR, the sidecar contains ONLY the new text, not # the whole text, so do not utilize it in that case if ( @@ -139,11 +141,15 @@ class RasterisedDocumentParser(DocumentParser): self.log("debug", f"Detected language {lang}") - if lang in { - "ar", # Arabic - "he", # Hebrew, - "fa", # Persian - }: + if ( + lang + in { + "ar", # Arabic + "he", # Hebrew, + "fa", # Persian + } + and pdf_file.name != "archive-fallback.pdf" + ): raise RtlLanguageException() return stripped except RtlLanguageException: @@ -275,7 +281,7 @@ class RasterisedDocumentParser(DocumentParser): return ocrmypdf_args - def parse(self, document_path, mime_type, file_name=None): + def parse(self, document_path: Path, mime_type, file_name=None): # This forces tesseract to use one core per page. os.environ["OMP_THREAD_LIMIT"] = "1" @@ -300,8 +306,8 @@ class RasterisedDocumentParser(DocumentParser): import ocrmypdf from ocrmypdf import InputFileError, EncryptedPdfError - archive_path = os.path.join(self.tempdir, "archive.pdf") - sidecar_file = os.path.join(self.tempdir, "sidecar.txt") + archive_path = Path(os.path.join(self.tempdir, "archive.pdf")) + sidecar_file = Path(os.path.join(self.tempdir, "sidecar.txt")) args = self.construct_ocrmypdf_parameters( document_path, @@ -335,8 +341,12 @@ class RasterisedDocumentParser(DocumentParser): f"Attempting force OCR to get the text.", ) - archive_path_fallback = os.path.join(self.tempdir, "archive-fallback.pdf") - sidecar_file_fallback = os.path.join(self.tempdir, "sidecar-fallback.txt") + archive_path_fallback = Path( + os.path.join(self.tempdir, "archive-fallback.pdf"), + ) + sidecar_file_fallback = Path( + os.path.join(self.tempdir, "sidecar-fallback.txt"), + ) # Attempt to run OCR with safe settings.