mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-04-02 13:45:10 -05:00
Merge branch 'dev'
This commit is contained in:
commit
729f25c435
@ -165,6 +165,7 @@ COPY [ \
|
||||
"docker/docker-prepare.sh", \
|
||||
"docker/paperless_cmd.sh", \
|
||||
"docker/wait-for-redis.py", \
|
||||
"docker/env-from-file.sh", \
|
||||
"docker/management_script.sh", \
|
||||
"docker/flower-conditional.sh", \
|
||||
"docker/install_management_commands.sh", \
|
||||
@ -184,6 +185,8 @@ RUN set -eux \
|
||||
&& chmod 755 /sbin/docker-prepare.sh \
|
||||
&& mv wait-for-redis.py /sbin/wait-for-redis.py \
|
||||
&& chmod 755 /sbin/wait-for-redis.py \
|
||||
&& mv env-from-file.sh /sbin/env-from-file.sh \
|
||||
&& chmod 755 /sbin/env-from-file.sh \
|
||||
&& mv paperless_cmd.sh /usr/local/bin/paperless_cmd.sh \
|
||||
&& chmod 755 /usr/local/bin/paperless_cmd.sh \
|
||||
&& mv flower-conditional.sh /usr/local/bin/flower-conditional.sh \
|
||||
|
@ -2,37 +2,6 @@
|
||||
|
||||
set -e
|
||||
|
||||
# Adapted from:
|
||||
# https://github.com/docker-library/postgres/blob/master/docker-entrypoint.sh
|
||||
# usage: file_env VAR
|
||||
# ie: file_env 'XYZ_DB_PASSWORD' will allow for "$XYZ_DB_PASSWORD_FILE" to
|
||||
# fill in the value of "$XYZ_DB_PASSWORD" from a file, especially for Docker's
|
||||
# secrets feature
|
||||
file_env() {
|
||||
local -r var="$1"
|
||||
local -r fileVar="${var}_FILE"
|
||||
|
||||
# Basic validation
|
||||
if [ "${!var:-}" ] && [ "${!fileVar:-}" ]; then
|
||||
echo >&2 "error: both $var and $fileVar are set (but are exclusive)"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Only export var if the _FILE exists
|
||||
if [ "${!fileVar:-}" ]; then
|
||||
# And the file exists
|
||||
if [[ -f ${!fileVar} ]]; then
|
||||
echo "Setting ${var} from file"
|
||||
val="$(< "${!fileVar}")"
|
||||
export "$var"="$val"
|
||||
else
|
||||
echo "File ${!fileVar} doesn't exist"
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
}
|
||||
|
||||
# Source: https://github.com/sameersbn/docker-gitlab/
|
||||
map_uidgid() {
|
||||
local -r usermap_original_uid=$(id -u paperless)
|
||||
@ -96,19 +65,11 @@ custom_container_init() {
|
||||
initialize() {
|
||||
|
||||
# Setup environment from secrets before anything else
|
||||
for env_var in \
|
||||
PAPERLESS_DBUSER \
|
||||
PAPERLESS_DBPASS \
|
||||
PAPERLESS_SECRET_KEY \
|
||||
PAPERLESS_AUTO_LOGIN_USERNAME \
|
||||
PAPERLESS_ADMIN_USER \
|
||||
PAPERLESS_ADMIN_MAIL \
|
||||
PAPERLESS_ADMIN_PASSWORD \
|
||||
PAPERLESS_REDIS; do
|
||||
# Check for a version of this var with _FILE appended
|
||||
# and convert the contents to the env var value
|
||||
file_env ${env_var}
|
||||
done
|
||||
# Check for a version of this var with _FILE appended
|
||||
# and convert the contents to the env var value
|
||||
# Source it so export is persistent
|
||||
# shellcheck disable=SC1091
|
||||
source /sbin/env-from-file.sh
|
||||
|
||||
# Change the user and group IDs if needed
|
||||
map_uidgid
|
||||
|
39
docker/env-from-file.sh
Normal file
39
docker/env-from-file.sh
Normal file
@ -0,0 +1,39 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
# Scans the environment variables for those with the suffix _FILE
|
||||
# When located, checks the file exists, and exports the contents
|
||||
# of the file as the same name, minus the suffix
|
||||
# This allows the use of Docker secrets or mounted files
|
||||
# to fill in any of the settings configurable via environment
|
||||
# variables
|
||||
|
||||
set -eu
|
||||
|
||||
for line in $(printenv)
|
||||
do
|
||||
# Extract the name of the environment variable
|
||||
env_name=${line%%=*}
|
||||
# Check if it ends in "_FILE"
|
||||
if [[ ${env_name} == *_FILE ]]; then
|
||||
# Extract the value of the environment
|
||||
env_value=${line#*=}
|
||||
|
||||
# Check the file exists
|
||||
if [[ -f ${env_value} ]]; then
|
||||
|
||||
# Trim off the _FILE suffix
|
||||
non_file_env_name=${env_name%"_FILE"}
|
||||
echo "Setting ${non_file_env_name} from file"
|
||||
|
||||
# Reads the value from th file
|
||||
val="$(< "${!env_name}")"
|
||||
|
||||
# Sets the normal name to the read file contents
|
||||
export "${non_file_env_name}"="${val}"
|
||||
|
||||
else
|
||||
echo "File ${env_value} doesn't exist"
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
done
|
@ -3,6 +3,9 @@
|
||||
set -e
|
||||
|
||||
cd /usr/src/paperless/src/
|
||||
# This ensures environment is setup
|
||||
# shellcheck disable=SC1091
|
||||
source /sbin/env-from-file.sh
|
||||
|
||||
if [[ $(id -u) == 0 ]] ;
|
||||
then
|
||||
|
@ -31,7 +31,7 @@
|
||||
[editing]="true"
|
||||
[multiple]="true"
|
||||
[applyOnClose]="applyOnClose"
|
||||
(open)="openTagsDropdown()"
|
||||
(opened)="openTagsDropdown()"
|
||||
[(selectionModel)]="tagSelectionModel"
|
||||
(apply)="setTags($event)">
|
||||
</app-filterable-dropdown>
|
||||
@ -40,7 +40,7 @@
|
||||
[items]="correspondents"
|
||||
[editing]="true"
|
||||
[applyOnClose]="applyOnClose"
|
||||
(open)="openCorrespondentDropdown()"
|
||||
(opened)="openCorrespondentDropdown()"
|
||||
[(selectionModel)]="correspondentSelectionModel"
|
||||
(apply)="setCorrespondents($event)">
|
||||
</app-filterable-dropdown>
|
||||
@ -49,7 +49,7 @@
|
||||
[items]="documentTypes"
|
||||
[editing]="true"
|
||||
[applyOnClose]="applyOnClose"
|
||||
(open)="openDocumentTypeDropdown()"
|
||||
(opened)="openDocumentTypeDropdown()"
|
||||
[(selectionModel)]="documentTypeSelectionModel"
|
||||
(apply)="setDocumentTypes($event)">
|
||||
</app-filterable-dropdown>
|
||||
@ -58,7 +58,7 @@
|
||||
[items]="storagePaths"
|
||||
[editing]="true"
|
||||
[applyOnClose]="applyOnClose"
|
||||
(open)="openStoragePathDropdown()"
|
||||
(opened)="openStoragePathDropdown()"
|
||||
[(selectionModel)]="storagePathsSelectionModel"
|
||||
(apply)="setStoragePaths($event)">
|
||||
</app-filterable-dropdown>
|
||||
|
@ -30,27 +30,27 @@
|
||||
[(selectionModel)]="tagSelectionModel"
|
||||
(selectionModelChange)="updateRules()"
|
||||
[multiple]="true"
|
||||
(open)="onTagsDropdownOpen()"
|
||||
(opened)="onTagsDropdownOpen()"
|
||||
[allowSelectNone]="true"></app-filterable-dropdown>
|
||||
<app-filterable-dropdown class="flex-fill" title="Correspondent" icon="person-fill" i18n-title
|
||||
filterPlaceholder="Filter correspondents" i18n-filterPlaceholder
|
||||
[items]="correspondents"
|
||||
[(selectionModel)]="correspondentSelectionModel"
|
||||
(selectionModelChange)="updateRules()"
|
||||
(open)="onCorrespondentDropdownOpen()"
|
||||
(opened)="onCorrespondentDropdownOpen()"
|
||||
[allowSelectNone]="true"></app-filterable-dropdown>
|
||||
<app-filterable-dropdown class="flex-fill" title="Document type" icon="file-earmark-fill" i18n-title
|
||||
filterPlaceholder="Filter document types" i18n-filterPlaceholder
|
||||
[items]="documentTypes"
|
||||
[(selectionModel)]="documentTypeSelectionModel"
|
||||
(open)="onDocumentTypeDropdownOpen()"
|
||||
(opened)="onDocumentTypeDropdownOpen()"
|
||||
(selectionModelChange)="updateRules()"
|
||||
[allowSelectNone]="true"></app-filterable-dropdown>
|
||||
<app-filterable-dropdown class="me-2 flex-fill" title="Storage path" icon="folder-fill" i18n-title
|
||||
filterPlaceholder="Filter storage paths" i18n-filterPlaceholder
|
||||
[items]="storagePaths"
|
||||
[(selectionModel)]="storagePathSelectionModel"
|
||||
(open)="onStoragePathDropdownOpen()"
|
||||
(opened)="onStoragePathDropdownOpen()"
|
||||
(selectionModelChange)="updateRules()"
|
||||
[allowSelectNone]="true"></app-filterable-dropdown>
|
||||
</div>
|
||||
|
@ -5,7 +5,7 @@ export const environment = {
|
||||
apiBaseUrl: document.baseURI + 'api/',
|
||||
apiVersion: '2',
|
||||
appTitle: 'Paperless-ngx',
|
||||
version: '1.11.2',
|
||||
version: '1.11.2-dev',
|
||||
webSocketHost: window.location.host,
|
||||
webSocketProtocol: window.location.protocol == 'https:' ? 'wss:' : 'ws:',
|
||||
webSocketBaseUrl: base_url.pathname + 'ws/',
|
||||
|
@ -793,6 +793,8 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
|
||||
@mock.patch("documents.views.consume_file.delay")
|
||||
def test_upload(self, m):
|
||||
|
||||
m.return_value = celery.result.AsyncResult(id=str(uuid.uuid4()))
|
||||
|
||||
with open(
|
||||
os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"),
|
||||
"rb",
|
||||
@ -816,6 +818,8 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
|
||||
@mock.patch("documents.views.consume_file.delay")
|
||||
def test_upload_empty_metadata(self, m):
|
||||
|
||||
m.return_value = celery.result.AsyncResult(id=str(uuid.uuid4()))
|
||||
|
||||
with open(
|
||||
os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"),
|
||||
"rb",
|
||||
@ -839,6 +843,8 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
|
||||
@mock.patch("documents.views.consume_file.delay")
|
||||
def test_upload_invalid_form(self, m):
|
||||
|
||||
m.return_value = celery.result.AsyncResult(id=str(uuid.uuid4()))
|
||||
|
||||
with open(
|
||||
os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"),
|
||||
"rb",
|
||||
@ -853,6 +859,8 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
|
||||
@mock.patch("documents.views.consume_file.delay")
|
||||
def test_upload_invalid_file(self, m):
|
||||
|
||||
m.return_value = celery.result.AsyncResult(id=str(uuid.uuid4()))
|
||||
|
||||
with open(
|
||||
os.path.join(os.path.dirname(__file__), "samples", "simple.zip"),
|
||||
"rb",
|
||||
@ -866,6 +874,9 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
|
||||
|
||||
@mock.patch("documents.views.consume_file.delay")
|
||||
def test_upload_with_title(self, async_task):
|
||||
|
||||
async_task.return_value = celery.result.AsyncResult(id=str(uuid.uuid4()))
|
||||
|
||||
with open(
|
||||
os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"),
|
||||
"rb",
|
||||
@ -884,6 +895,9 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
|
||||
|
||||
@mock.patch("documents.views.consume_file.delay")
|
||||
def test_upload_with_correspondent(self, async_task):
|
||||
|
||||
async_task.return_value = celery.result.AsyncResult(id=str(uuid.uuid4()))
|
||||
|
||||
c = Correspondent.objects.create(name="test-corres")
|
||||
with open(
|
||||
os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"),
|
||||
@ -903,6 +917,9 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
|
||||
|
||||
@mock.patch("documents.views.consume_file.delay")
|
||||
def test_upload_with_invalid_correspondent(self, async_task):
|
||||
|
||||
async_task.return_value = celery.result.AsyncResult(id=str(uuid.uuid4()))
|
||||
|
||||
with open(
|
||||
os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"),
|
||||
"rb",
|
||||
@ -917,6 +934,9 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
|
||||
|
||||
@mock.patch("documents.views.consume_file.delay")
|
||||
def test_upload_with_document_type(self, async_task):
|
||||
|
||||
async_task.return_value = celery.result.AsyncResult(id=str(uuid.uuid4()))
|
||||
|
||||
dt = DocumentType.objects.create(name="invoice")
|
||||
with open(
|
||||
os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"),
|
||||
@ -936,6 +956,9 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
|
||||
|
||||
@mock.patch("documents.views.consume_file.delay")
|
||||
def test_upload_with_invalid_document_type(self, async_task):
|
||||
|
||||
async_task.return_value = celery.result.AsyncResult(id=str(uuid.uuid4()))
|
||||
|
||||
with open(
|
||||
os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"),
|
||||
"rb",
|
||||
@ -950,6 +973,9 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
|
||||
|
||||
@mock.patch("documents.views.consume_file.delay")
|
||||
def test_upload_with_tags(self, async_task):
|
||||
|
||||
async_task.return_value = celery.result.AsyncResult(id=str(uuid.uuid4()))
|
||||
|
||||
t1 = Tag.objects.create(name="tag1")
|
||||
t2 = Tag.objects.create(name="tag2")
|
||||
with open(
|
||||
@ -970,6 +996,9 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
|
||||
|
||||
@mock.patch("documents.views.consume_file.delay")
|
||||
def test_upload_with_invalid_tags(self, async_task):
|
||||
|
||||
async_task.return_value = celery.result.AsyncResult(id=str(uuid.uuid4()))
|
||||
|
||||
t1 = Tag.objects.create(name="tag1")
|
||||
t2 = Tag.objects.create(name="tag2")
|
||||
with open(
|
||||
@ -986,6 +1015,9 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
|
||||
|
||||
@mock.patch("documents.views.consume_file.delay")
|
||||
def test_upload_with_created(self, async_task):
|
||||
|
||||
async_task.return_value = celery.result.AsyncResult(id=str(uuid.uuid4()))
|
||||
|
||||
created = datetime.datetime(
|
||||
2022,
|
||||
5,
|
||||
@ -2948,6 +2980,59 @@ class TestTasks(APITestCase):
|
||||
self.assertEqual(returned_task2["status"], celery.states.PENDING)
|
||||
self.assertEqual(returned_task2["task_file_name"], task2.task_file_name)
|
||||
|
||||
def test_get_single_task_status(self):
|
||||
"""
|
||||
GIVEN
|
||||
- Query parameter for a valid task ID
|
||||
WHEN:
|
||||
- API call is made to get task status
|
||||
THEN:
|
||||
- Single task data is returned
|
||||
"""
|
||||
|
||||
id1 = str(uuid.uuid4())
|
||||
task1 = PaperlessTask.objects.create(
|
||||
task_id=id1,
|
||||
task_file_name="task_one.pdf",
|
||||
)
|
||||
|
||||
_ = PaperlessTask.objects.create(
|
||||
task_id=str(uuid.uuid4()),
|
||||
task_file_name="task_two.pdf",
|
||||
)
|
||||
|
||||
response = self.client.get(self.ENDPOINT + f"?task_id={id1}")
|
||||
|
||||
self.assertEqual(response.status_code, 200)
|
||||
self.assertEqual(len(response.data), 1)
|
||||
returned_task1 = response.data[0]
|
||||
|
||||
self.assertEqual(returned_task1["task_id"], task1.task_id)
|
||||
|
||||
def test_get_single_task_status_not_valid(self):
|
||||
"""
|
||||
GIVEN
|
||||
- Query parameter for a non-existent task ID
|
||||
WHEN:
|
||||
- API call is made to get task status
|
||||
THEN:
|
||||
- No task data is returned
|
||||
"""
|
||||
task1 = PaperlessTask.objects.create(
|
||||
task_id=str(uuid.uuid4()),
|
||||
task_file_name="task_one.pdf",
|
||||
)
|
||||
|
||||
_ = PaperlessTask.objects.create(
|
||||
task_id=str(uuid.uuid4()),
|
||||
task_file_name="task_two.pdf",
|
||||
)
|
||||
|
||||
response = self.client.get(self.ENDPOINT + "?task_id=bad-task-id")
|
||||
|
||||
self.assertEqual(response.status_code, 200)
|
||||
self.assertEqual(len(response.data), 0)
|
||||
|
||||
def test_acknowledge_tasks(self):
|
||||
"""
|
||||
GIVEN:
|
||||
|
@ -617,7 +617,7 @@ class PostDocumentView(GenericAPIView):
|
||||
|
||||
task_id = str(uuid.uuid4())
|
||||
|
||||
consume_file.delay(
|
||||
async_task = consume_file.delay(
|
||||
temp_filename,
|
||||
override_filename=doc_name,
|
||||
override_title=title,
|
||||
@ -628,7 +628,7 @@ class PostDocumentView(GenericAPIView):
|
||||
override_created=created,
|
||||
)
|
||||
|
||||
return Response("OK")
|
||||
return Response(async_task.id)
|
||||
|
||||
|
||||
class SelectionDataView(GenericAPIView):
|
||||
@ -886,13 +886,18 @@ class TasksViewSet(ReadOnlyModelViewSet):
|
||||
permission_classes = (IsAuthenticated,)
|
||||
serializer_class = TasksViewSerializer
|
||||
|
||||
queryset = (
|
||||
PaperlessTask.objects.filter(
|
||||
acknowledged=False,
|
||||
def get_queryset(self):
|
||||
queryset = (
|
||||
PaperlessTask.objects.filter(
|
||||
acknowledged=False,
|
||||
)
|
||||
.order_by("date_created")
|
||||
.reverse()
|
||||
)
|
||||
.order_by("date_created")
|
||||
.reverse()
|
||||
)
|
||||
task_id = self.request.query_params.get("task_id")
|
||||
if task_id is not None:
|
||||
queryset = PaperlessTask.objects.filter(task_id=task_id)
|
||||
return queryset
|
||||
|
||||
|
||||
class AcknowledgeTasksView(GenericAPIView):
|
||||
|
@ -8,6 +8,8 @@ import requests
|
||||
from bleach import clean
|
||||
from bleach import linkify
|
||||
from django.conf import settings
|
||||
from django.utils.timezone import is_naive
|
||||
from django.utils.timezone import make_aware
|
||||
from documents.parsers import DocumentParser
|
||||
from documents.parsers import make_thumbnail_from_pdf
|
||||
from documents.parsers import ParseError
|
||||
@ -135,7 +137,11 @@ class MailDocumentParser(DocumentParser):
|
||||
|
||||
self.text += f"\n\n{strip_text(mail.text)}"
|
||||
|
||||
self.date = mail.date
|
||||
if is_naive(mail.date):
|
||||
self.date = make_aware(mail.date)
|
||||
else:
|
||||
self.date = mail.date
|
||||
|
||||
self.archive_path = self.generate_pdf(document_path)
|
||||
|
||||
def tika_parse(self, html: str):
|
||||
|
@ -1,6 +1,8 @@
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
from django.conf import settings
|
||||
from documents.parsers import DocumentParser
|
||||
@ -99,7 +101,7 @@ class RasterisedDocumentParser(DocumentParser):
|
||||
self.log("warning", f"Error while calculating DPI for image {image}: {e}")
|
||||
return None
|
||||
|
||||
def extract_text(self, sidecar_file, pdf_file):
|
||||
def extract_text(self, sidecar_file: Optional[Path], pdf_file: Path):
|
||||
# When re-doing OCR, the sidecar contains ONLY the new text, not
|
||||
# the whole text, so do not utilize it in that case
|
||||
if (
|
||||
@ -139,11 +141,15 @@ class RasterisedDocumentParser(DocumentParser):
|
||||
|
||||
self.log("debug", f"Detected language {lang}")
|
||||
|
||||
if lang in {
|
||||
"ar", # Arabic
|
||||
"he", # Hebrew,
|
||||
"fa", # Persian
|
||||
}:
|
||||
if (
|
||||
lang
|
||||
in {
|
||||
"ar", # Arabic
|
||||
"he", # Hebrew,
|
||||
"fa", # Persian
|
||||
}
|
||||
and pdf_file.name != "archive-fallback.pdf"
|
||||
):
|
||||
raise RtlLanguageException()
|
||||
return stripped
|
||||
except RtlLanguageException:
|
||||
@ -275,7 +281,7 @@ class RasterisedDocumentParser(DocumentParser):
|
||||
|
||||
return ocrmypdf_args
|
||||
|
||||
def parse(self, document_path, mime_type, file_name=None):
|
||||
def parse(self, document_path: Path, mime_type, file_name=None):
|
||||
# This forces tesseract to use one core per page.
|
||||
os.environ["OMP_THREAD_LIMIT"] = "1"
|
||||
|
||||
@ -300,8 +306,8 @@ class RasterisedDocumentParser(DocumentParser):
|
||||
import ocrmypdf
|
||||
from ocrmypdf import InputFileError, EncryptedPdfError
|
||||
|
||||
archive_path = os.path.join(self.tempdir, "archive.pdf")
|
||||
sidecar_file = os.path.join(self.tempdir, "sidecar.txt")
|
||||
archive_path = Path(os.path.join(self.tempdir, "archive.pdf"))
|
||||
sidecar_file = Path(os.path.join(self.tempdir, "sidecar.txt"))
|
||||
|
||||
args = self.construct_ocrmypdf_parameters(
|
||||
document_path,
|
||||
@ -335,8 +341,12 @@ class RasterisedDocumentParser(DocumentParser):
|
||||
f"Attempting force OCR to get the text.",
|
||||
)
|
||||
|
||||
archive_path_fallback = os.path.join(self.tempdir, "archive-fallback.pdf")
|
||||
sidecar_file_fallback = os.path.join(self.tempdir, "sidecar-fallback.txt")
|
||||
archive_path_fallback = Path(
|
||||
os.path.join(self.tempdir, "archive-fallback.pdf"),
|
||||
)
|
||||
sidecar_file_fallback = Path(
|
||||
os.path.join(self.tempdir, "sidecar-fallback.txt"),
|
||||
)
|
||||
|
||||
# Attempt to run OCR with safe settings.
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user