diff --git a/Dockerfile b/Dockerfile
index 11915937a..b616c70e7 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -165,6 +165,7 @@ COPY [ \
"docker/docker-prepare.sh", \
"docker/paperless_cmd.sh", \
"docker/wait-for-redis.py", \
+ "docker/env-from-file.sh", \
"docker/management_script.sh", \
"docker/flower-conditional.sh", \
"docker/install_management_commands.sh", \
@@ -184,6 +185,8 @@ RUN set -eux \
&& chmod 755 /sbin/docker-prepare.sh \
&& mv wait-for-redis.py /sbin/wait-for-redis.py \
&& chmod 755 /sbin/wait-for-redis.py \
+ && mv env-from-file.sh /sbin/env-from-file.sh \
+ && chmod 755 /sbin/env-from-file.sh \
&& mv paperless_cmd.sh /usr/local/bin/paperless_cmd.sh \
&& chmod 755 /usr/local/bin/paperless_cmd.sh \
&& mv flower-conditional.sh /usr/local/bin/flower-conditional.sh \
diff --git a/docker/docker-entrypoint.sh b/docker/docker-entrypoint.sh
index 00be59add..58e46bd01 100755
--- a/docker/docker-entrypoint.sh
+++ b/docker/docker-entrypoint.sh
@@ -2,37 +2,6 @@
set -e
-# Adapted from:
-# https://github.com/docker-library/postgres/blob/master/docker-entrypoint.sh
-# usage: file_env VAR
-# ie: file_env 'XYZ_DB_PASSWORD' will allow for "$XYZ_DB_PASSWORD_FILE" to
-# fill in the value of "$XYZ_DB_PASSWORD" from a file, especially for Docker's
-# secrets feature
-file_env() {
- local -r var="$1"
- local -r fileVar="${var}_FILE"
-
- # Basic validation
- if [ "${!var:-}" ] && [ "${!fileVar:-}" ]; then
- echo >&2 "error: both $var and $fileVar are set (but are exclusive)"
- exit 1
- fi
-
- # Only export var if the _FILE exists
- if [ "${!fileVar:-}" ]; then
- # And the file exists
- if [[ -f ${!fileVar} ]]; then
- echo "Setting ${var} from file"
- val="$(< "${!fileVar}")"
- export "$var"="$val"
- else
- echo "File ${!fileVar} doesn't exist"
- exit 1
- fi
- fi
-
-}
-
# Source: https://github.com/sameersbn/docker-gitlab/
map_uidgid() {
local -r usermap_original_uid=$(id -u paperless)
@@ -96,19 +65,11 @@ custom_container_init() {
initialize() {
# Setup environment from secrets before anything else
- for env_var in \
- PAPERLESS_DBUSER \
- PAPERLESS_DBPASS \
- PAPERLESS_SECRET_KEY \
- PAPERLESS_AUTO_LOGIN_USERNAME \
- PAPERLESS_ADMIN_USER \
- PAPERLESS_ADMIN_MAIL \
- PAPERLESS_ADMIN_PASSWORD \
- PAPERLESS_REDIS; do
- # Check for a version of this var with _FILE appended
- # and convert the contents to the env var value
- file_env ${env_var}
- done
+ # Check for a version of this var with _FILE appended
+ # and convert the contents to the env var value
+ # Source it so export is persistent
+ # shellcheck disable=SC1091
+ source /sbin/env-from-file.sh
# Change the user and group IDs if needed
map_uidgid
diff --git a/docker/env-from-file.sh b/docker/env-from-file.sh
new file mode 100644
index 000000000..71247f5f6
--- /dev/null
+++ b/docker/env-from-file.sh
@@ -0,0 +1,39 @@
+#!/usr/bin/env bash
+
+# Scans the environment variables for those with the suffix _FILE
+# When located, checks the file exists, and exports the contents
+# of the file as the same name, minus the suffix
+# This allows the use of Docker secrets or mounted files
+# to fill in any of the settings configurable via environment
+# variables
+
+set -eu
+
+for line in $(printenv)
+do
+ # Extract the name of the environment variable
+ env_name=${line%%=*}
+ # Check if it ends in "_FILE"
+ if [[ ${env_name} == *_FILE ]]; then
+ # Extract the value of the environment
+ env_value=${line#*=}
+
+ # Check the file exists
+ if [[ -f ${env_value} ]]; then
+
+ # Trim off the _FILE suffix
+ non_file_env_name=${env_name%"_FILE"}
+ echo "Setting ${non_file_env_name} from file"
+
+ # Reads the value from th file
+ val="$(< "${!env_name}")"
+
+ # Sets the normal name to the read file contents
+ export "${non_file_env_name}"="${val}"
+
+ else
+ echo "File ${env_value} doesn't exist"
+ exit 1
+ fi
+ fi
+done
diff --git a/docker/management_script.sh b/docker/management_script.sh
index 4e601f4a6..996435745 100755
--- a/docker/management_script.sh
+++ b/docker/management_script.sh
@@ -3,6 +3,9 @@
set -e
cd /usr/src/paperless/src/
+# This ensures environment is setup
+# shellcheck disable=SC1091
+source /sbin/env-from-file.sh
if [[ $(id -u) == 0 ]] ;
then
diff --git a/src-ui/src/app/components/document-list/bulk-editor/bulk-editor.component.html b/src-ui/src/app/components/document-list/bulk-editor/bulk-editor.component.html
index d8345fd81..aef47b605 100644
--- a/src-ui/src/app/components/document-list/bulk-editor/bulk-editor.component.html
+++ b/src-ui/src/app/components/document-list/bulk-editor/bulk-editor.component.html
@@ -31,7 +31,7 @@
[editing]="true"
[multiple]="true"
[applyOnClose]="applyOnClose"
- (open)="openTagsDropdown()"
+ (opened)="openTagsDropdown()"
[(selectionModel)]="tagSelectionModel"
(apply)="setTags($event)">
@@ -40,7 +40,7 @@
[items]="correspondents"
[editing]="true"
[applyOnClose]="applyOnClose"
- (open)="openCorrespondentDropdown()"
+ (opened)="openCorrespondentDropdown()"
[(selectionModel)]="correspondentSelectionModel"
(apply)="setCorrespondents($event)">
@@ -49,7 +49,7 @@
[items]="documentTypes"
[editing]="true"
[applyOnClose]="applyOnClose"
- (open)="openDocumentTypeDropdown()"
+ (opened)="openDocumentTypeDropdown()"
[(selectionModel)]="documentTypeSelectionModel"
(apply)="setDocumentTypes($event)">
@@ -58,7 +58,7 @@
[items]="storagePaths"
[editing]="true"
[applyOnClose]="applyOnClose"
- (open)="openStoragePathDropdown()"
+ (opened)="openStoragePathDropdown()"
[(selectionModel)]="storagePathsSelectionModel"
(apply)="setStoragePaths($event)">
diff --git a/src-ui/src/app/components/document-list/filter-editor/filter-editor.component.html b/src-ui/src/app/components/document-list/filter-editor/filter-editor.component.html
index 0a6b95939..3b78d8445 100644
--- a/src-ui/src/app/components/document-list/filter-editor/filter-editor.component.html
+++ b/src-ui/src/app/components/document-list/filter-editor/filter-editor.component.html
@@ -30,27 +30,27 @@
[(selectionModel)]="tagSelectionModel"
(selectionModelChange)="updateRules()"
[multiple]="true"
- (open)="onTagsDropdownOpen()"
+ (opened)="onTagsDropdownOpen()"
[allowSelectNone]="true">
diff --git a/src-ui/src/environments/environment.prod.ts b/src-ui/src/environments/environment.prod.ts
index ca9bb4400..22af10820 100644
--- a/src-ui/src/environments/environment.prod.ts
+++ b/src-ui/src/environments/environment.prod.ts
@@ -5,7 +5,7 @@ export const environment = {
apiBaseUrl: document.baseURI + 'api/',
apiVersion: '2',
appTitle: 'Paperless-ngx',
- version: '1.11.2',
+ version: '1.11.2-dev',
webSocketHost: window.location.host,
webSocketProtocol: window.location.protocol == 'https:' ? 'wss:' : 'ws:',
webSocketBaseUrl: base_url.pathname + 'ws/',
diff --git a/src/documents/tests/test_api.py b/src/documents/tests/test_api.py
index 0f890249c..55a176247 100644
--- a/src/documents/tests/test_api.py
+++ b/src/documents/tests/test_api.py
@@ -793,6 +793,8 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
@mock.patch("documents.views.consume_file.delay")
def test_upload(self, m):
+ m.return_value = celery.result.AsyncResult(id=str(uuid.uuid4()))
+
with open(
os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"),
"rb",
@@ -816,6 +818,8 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
@mock.patch("documents.views.consume_file.delay")
def test_upload_empty_metadata(self, m):
+ m.return_value = celery.result.AsyncResult(id=str(uuid.uuid4()))
+
with open(
os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"),
"rb",
@@ -839,6 +843,8 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
@mock.patch("documents.views.consume_file.delay")
def test_upload_invalid_form(self, m):
+ m.return_value = celery.result.AsyncResult(id=str(uuid.uuid4()))
+
with open(
os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"),
"rb",
@@ -853,6 +859,8 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
@mock.patch("documents.views.consume_file.delay")
def test_upload_invalid_file(self, m):
+ m.return_value = celery.result.AsyncResult(id=str(uuid.uuid4()))
+
with open(
os.path.join(os.path.dirname(__file__), "samples", "simple.zip"),
"rb",
@@ -866,6 +874,9 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
@mock.patch("documents.views.consume_file.delay")
def test_upload_with_title(self, async_task):
+
+ async_task.return_value = celery.result.AsyncResult(id=str(uuid.uuid4()))
+
with open(
os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"),
"rb",
@@ -884,6 +895,9 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
@mock.patch("documents.views.consume_file.delay")
def test_upload_with_correspondent(self, async_task):
+
+ async_task.return_value = celery.result.AsyncResult(id=str(uuid.uuid4()))
+
c = Correspondent.objects.create(name="test-corres")
with open(
os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"),
@@ -903,6 +917,9 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
@mock.patch("documents.views.consume_file.delay")
def test_upload_with_invalid_correspondent(self, async_task):
+
+ async_task.return_value = celery.result.AsyncResult(id=str(uuid.uuid4()))
+
with open(
os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"),
"rb",
@@ -917,6 +934,9 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
@mock.patch("documents.views.consume_file.delay")
def test_upload_with_document_type(self, async_task):
+
+ async_task.return_value = celery.result.AsyncResult(id=str(uuid.uuid4()))
+
dt = DocumentType.objects.create(name="invoice")
with open(
os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"),
@@ -936,6 +956,9 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
@mock.patch("documents.views.consume_file.delay")
def test_upload_with_invalid_document_type(self, async_task):
+
+ async_task.return_value = celery.result.AsyncResult(id=str(uuid.uuid4()))
+
with open(
os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"),
"rb",
@@ -950,6 +973,9 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
@mock.patch("documents.views.consume_file.delay")
def test_upload_with_tags(self, async_task):
+
+ async_task.return_value = celery.result.AsyncResult(id=str(uuid.uuid4()))
+
t1 = Tag.objects.create(name="tag1")
t2 = Tag.objects.create(name="tag2")
with open(
@@ -970,6 +996,9 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
@mock.patch("documents.views.consume_file.delay")
def test_upload_with_invalid_tags(self, async_task):
+
+ async_task.return_value = celery.result.AsyncResult(id=str(uuid.uuid4()))
+
t1 = Tag.objects.create(name="tag1")
t2 = Tag.objects.create(name="tag2")
with open(
@@ -986,6 +1015,9 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
@mock.patch("documents.views.consume_file.delay")
def test_upload_with_created(self, async_task):
+
+ async_task.return_value = celery.result.AsyncResult(id=str(uuid.uuid4()))
+
created = datetime.datetime(
2022,
5,
@@ -2948,6 +2980,59 @@ class TestTasks(APITestCase):
self.assertEqual(returned_task2["status"], celery.states.PENDING)
self.assertEqual(returned_task2["task_file_name"], task2.task_file_name)
+ def test_get_single_task_status(self):
+ """
+ GIVEN
+ - Query parameter for a valid task ID
+ WHEN:
+ - API call is made to get task status
+ THEN:
+ - Single task data is returned
+ """
+
+ id1 = str(uuid.uuid4())
+ task1 = PaperlessTask.objects.create(
+ task_id=id1,
+ task_file_name="task_one.pdf",
+ )
+
+ _ = PaperlessTask.objects.create(
+ task_id=str(uuid.uuid4()),
+ task_file_name="task_two.pdf",
+ )
+
+ response = self.client.get(self.ENDPOINT + f"?task_id={id1}")
+
+ self.assertEqual(response.status_code, 200)
+ self.assertEqual(len(response.data), 1)
+ returned_task1 = response.data[0]
+
+ self.assertEqual(returned_task1["task_id"], task1.task_id)
+
+ def test_get_single_task_status_not_valid(self):
+ """
+ GIVEN
+ - Query parameter for a non-existent task ID
+ WHEN:
+ - API call is made to get task status
+ THEN:
+ - No task data is returned
+ """
+ task1 = PaperlessTask.objects.create(
+ task_id=str(uuid.uuid4()),
+ task_file_name="task_one.pdf",
+ )
+
+ _ = PaperlessTask.objects.create(
+ task_id=str(uuid.uuid4()),
+ task_file_name="task_two.pdf",
+ )
+
+ response = self.client.get(self.ENDPOINT + "?task_id=bad-task-id")
+
+ self.assertEqual(response.status_code, 200)
+ self.assertEqual(len(response.data), 0)
+
def test_acknowledge_tasks(self):
"""
GIVEN:
diff --git a/src/documents/views.py b/src/documents/views.py
index 7ff6e90d9..e313ae17e 100644
--- a/src/documents/views.py
+++ b/src/documents/views.py
@@ -617,7 +617,7 @@ class PostDocumentView(GenericAPIView):
task_id = str(uuid.uuid4())
- consume_file.delay(
+ async_task = consume_file.delay(
temp_filename,
override_filename=doc_name,
override_title=title,
@@ -628,7 +628,7 @@ class PostDocumentView(GenericAPIView):
override_created=created,
)
- return Response("OK")
+ return Response(async_task.id)
class SelectionDataView(GenericAPIView):
@@ -886,13 +886,18 @@ class TasksViewSet(ReadOnlyModelViewSet):
permission_classes = (IsAuthenticated,)
serializer_class = TasksViewSerializer
- queryset = (
- PaperlessTask.objects.filter(
- acknowledged=False,
+ def get_queryset(self):
+ queryset = (
+ PaperlessTask.objects.filter(
+ acknowledged=False,
+ )
+ .order_by("date_created")
+ .reverse()
)
- .order_by("date_created")
- .reverse()
- )
+ task_id = self.request.query_params.get("task_id")
+ if task_id is not None:
+ queryset = PaperlessTask.objects.filter(task_id=task_id)
+ return queryset
class AcknowledgeTasksView(GenericAPIView):
diff --git a/src/paperless_mail/parsers.py b/src/paperless_mail/parsers.py
index d50217f2e..cc5d4e3c8 100644
--- a/src/paperless_mail/parsers.py
+++ b/src/paperless_mail/parsers.py
@@ -8,6 +8,8 @@ import requests
from bleach import clean
from bleach import linkify
from django.conf import settings
+from django.utils.timezone import is_naive
+from django.utils.timezone import make_aware
from documents.parsers import DocumentParser
from documents.parsers import make_thumbnail_from_pdf
from documents.parsers import ParseError
@@ -135,7 +137,11 @@ class MailDocumentParser(DocumentParser):
self.text += f"\n\n{strip_text(mail.text)}"
- self.date = mail.date
+ if is_naive(mail.date):
+ self.date = make_aware(mail.date)
+ else:
+ self.date = mail.date
+
self.archive_path = self.generate_pdf(document_path)
def tika_parse(self, html: str):
diff --git a/src/paperless_tesseract/parsers.py b/src/paperless_tesseract/parsers.py
index 4cc9b8e5f..4107cace8 100644
--- a/src/paperless_tesseract/parsers.py
+++ b/src/paperless_tesseract/parsers.py
@@ -1,6 +1,8 @@
import json
import os
import re
+from pathlib import Path
+from typing import Optional
from django.conf import settings
from documents.parsers import DocumentParser
@@ -99,7 +101,7 @@ class RasterisedDocumentParser(DocumentParser):
self.log("warning", f"Error while calculating DPI for image {image}: {e}")
return None
- def extract_text(self, sidecar_file, pdf_file):
+ def extract_text(self, sidecar_file: Optional[Path], pdf_file: Path):
# When re-doing OCR, the sidecar contains ONLY the new text, not
# the whole text, so do not utilize it in that case
if (
@@ -139,11 +141,15 @@ class RasterisedDocumentParser(DocumentParser):
self.log("debug", f"Detected language {lang}")
- if lang in {
- "ar", # Arabic
- "he", # Hebrew,
- "fa", # Persian
- }:
+ if (
+ lang
+ in {
+ "ar", # Arabic
+ "he", # Hebrew,
+ "fa", # Persian
+ }
+ and pdf_file.name != "archive-fallback.pdf"
+ ):
raise RtlLanguageException()
return stripped
except RtlLanguageException:
@@ -275,7 +281,7 @@ class RasterisedDocumentParser(DocumentParser):
return ocrmypdf_args
- def parse(self, document_path, mime_type, file_name=None):
+ def parse(self, document_path: Path, mime_type, file_name=None):
# This forces tesseract to use one core per page.
os.environ["OMP_THREAD_LIMIT"] = "1"
@@ -300,8 +306,8 @@ class RasterisedDocumentParser(DocumentParser):
import ocrmypdf
from ocrmypdf import InputFileError, EncryptedPdfError
- archive_path = os.path.join(self.tempdir, "archive.pdf")
- sidecar_file = os.path.join(self.tempdir, "sidecar.txt")
+ archive_path = Path(os.path.join(self.tempdir, "archive.pdf"))
+ sidecar_file = Path(os.path.join(self.tempdir, "sidecar.txt"))
args = self.construct_ocrmypdf_parameters(
document_path,
@@ -335,8 +341,12 @@ class RasterisedDocumentParser(DocumentParser):
f"Attempting force OCR to get the text.",
)
- archive_path_fallback = os.path.join(self.tempdir, "archive-fallback.pdf")
- sidecar_file_fallback = os.path.join(self.tempdir, "sidecar-fallback.txt")
+ archive_path_fallback = Path(
+ os.path.join(self.tempdir, "archive-fallback.pdf"),
+ )
+ sidecar_file_fallback = Path(
+ os.path.join(self.tempdir, "sidecar-fallback.txt"),
+ )
# Attempt to run OCR with safe settings.