Merge branch 'dev'

This commit is contained in:
Michael Shamoon 2023-01-01 14:11:46 -08:00
commit 729f25c435
11 changed files with 185 additions and 73 deletions

View File

@ -165,6 +165,7 @@ COPY [ \
"docker/docker-prepare.sh", \ "docker/docker-prepare.sh", \
"docker/paperless_cmd.sh", \ "docker/paperless_cmd.sh", \
"docker/wait-for-redis.py", \ "docker/wait-for-redis.py", \
"docker/env-from-file.sh", \
"docker/management_script.sh", \ "docker/management_script.sh", \
"docker/flower-conditional.sh", \ "docker/flower-conditional.sh", \
"docker/install_management_commands.sh", \ "docker/install_management_commands.sh", \
@ -184,6 +185,8 @@ RUN set -eux \
&& chmod 755 /sbin/docker-prepare.sh \ && chmod 755 /sbin/docker-prepare.sh \
&& mv wait-for-redis.py /sbin/wait-for-redis.py \ && mv wait-for-redis.py /sbin/wait-for-redis.py \
&& chmod 755 /sbin/wait-for-redis.py \ && chmod 755 /sbin/wait-for-redis.py \
&& mv env-from-file.sh /sbin/env-from-file.sh \
&& chmod 755 /sbin/env-from-file.sh \
&& mv paperless_cmd.sh /usr/local/bin/paperless_cmd.sh \ && mv paperless_cmd.sh /usr/local/bin/paperless_cmd.sh \
&& chmod 755 /usr/local/bin/paperless_cmd.sh \ && chmod 755 /usr/local/bin/paperless_cmd.sh \
&& mv flower-conditional.sh /usr/local/bin/flower-conditional.sh \ && mv flower-conditional.sh /usr/local/bin/flower-conditional.sh \

View File

@ -2,37 +2,6 @@
set -e set -e
# Adapted from:
# https://github.com/docker-library/postgres/blob/master/docker-entrypoint.sh
# usage: file_env VAR
# ie: file_env 'XYZ_DB_PASSWORD' will allow for "$XYZ_DB_PASSWORD_FILE" to
# fill in the value of "$XYZ_DB_PASSWORD" from a file, especially for Docker's
# secrets feature
file_env() {
local -r var="$1"
local -r fileVar="${var}_FILE"
# Basic validation
if [ "${!var:-}" ] && [ "${!fileVar:-}" ]; then
echo >&2 "error: both $var and $fileVar are set (but are exclusive)"
exit 1
fi
# Only export var if the _FILE exists
if [ "${!fileVar:-}" ]; then
# And the file exists
if [[ -f ${!fileVar} ]]; then
echo "Setting ${var} from file"
val="$(< "${!fileVar}")"
export "$var"="$val"
else
echo "File ${!fileVar} doesn't exist"
exit 1
fi
fi
}
# Source: https://github.com/sameersbn/docker-gitlab/ # Source: https://github.com/sameersbn/docker-gitlab/
map_uidgid() { map_uidgid() {
local -r usermap_original_uid=$(id -u paperless) local -r usermap_original_uid=$(id -u paperless)
@ -96,19 +65,11 @@ custom_container_init() {
initialize() { initialize() {
# Setup environment from secrets before anything else # Setup environment from secrets before anything else
for env_var in \
PAPERLESS_DBUSER \
PAPERLESS_DBPASS \
PAPERLESS_SECRET_KEY \
PAPERLESS_AUTO_LOGIN_USERNAME \
PAPERLESS_ADMIN_USER \
PAPERLESS_ADMIN_MAIL \
PAPERLESS_ADMIN_PASSWORD \
PAPERLESS_REDIS; do
# Check for a version of this var with _FILE appended # Check for a version of this var with _FILE appended
# and convert the contents to the env var value # and convert the contents to the env var value
file_env ${env_var} # Source it so export is persistent
done # shellcheck disable=SC1091
source /sbin/env-from-file.sh
# Change the user and group IDs if needed # Change the user and group IDs if needed
map_uidgid map_uidgid

39
docker/env-from-file.sh Normal file
View File

@ -0,0 +1,39 @@
#!/usr/bin/env bash
# Scans the environment variables for those with the suffix _FILE
# When located, checks the file exists, and exports the contents
# of the file as the same name, minus the suffix
# This allows the use of Docker secrets or mounted files
# to fill in any of the settings configurable via environment
# variables
set -eu
for line in $(printenv)
do
# Extract the name of the environment variable
env_name=${line%%=*}
# Check if it ends in "_FILE"
if [[ ${env_name} == *_FILE ]]; then
# Extract the value of the environment
env_value=${line#*=}
# Check the file exists
if [[ -f ${env_value} ]]; then
# Trim off the _FILE suffix
non_file_env_name=${env_name%"_FILE"}
echo "Setting ${non_file_env_name} from file"
# Reads the value from th file
val="$(< "${!env_name}")"
# Sets the normal name to the read file contents
export "${non_file_env_name}"="${val}"
else
echo "File ${env_value} doesn't exist"
exit 1
fi
fi
done

View File

@ -3,6 +3,9 @@
set -e set -e
cd /usr/src/paperless/src/ cd /usr/src/paperless/src/
# This ensures environment is setup
# shellcheck disable=SC1091
source /sbin/env-from-file.sh
if [[ $(id -u) == 0 ]] ; if [[ $(id -u) == 0 ]] ;
then then

View File

@ -31,7 +31,7 @@
[editing]="true" [editing]="true"
[multiple]="true" [multiple]="true"
[applyOnClose]="applyOnClose" [applyOnClose]="applyOnClose"
(open)="openTagsDropdown()" (opened)="openTagsDropdown()"
[(selectionModel)]="tagSelectionModel" [(selectionModel)]="tagSelectionModel"
(apply)="setTags($event)"> (apply)="setTags($event)">
</app-filterable-dropdown> </app-filterable-dropdown>
@ -40,7 +40,7 @@
[items]="correspondents" [items]="correspondents"
[editing]="true" [editing]="true"
[applyOnClose]="applyOnClose" [applyOnClose]="applyOnClose"
(open)="openCorrespondentDropdown()" (opened)="openCorrespondentDropdown()"
[(selectionModel)]="correspondentSelectionModel" [(selectionModel)]="correspondentSelectionModel"
(apply)="setCorrespondents($event)"> (apply)="setCorrespondents($event)">
</app-filterable-dropdown> </app-filterable-dropdown>
@ -49,7 +49,7 @@
[items]="documentTypes" [items]="documentTypes"
[editing]="true" [editing]="true"
[applyOnClose]="applyOnClose" [applyOnClose]="applyOnClose"
(open)="openDocumentTypeDropdown()" (opened)="openDocumentTypeDropdown()"
[(selectionModel)]="documentTypeSelectionModel" [(selectionModel)]="documentTypeSelectionModel"
(apply)="setDocumentTypes($event)"> (apply)="setDocumentTypes($event)">
</app-filterable-dropdown> </app-filterable-dropdown>
@ -58,7 +58,7 @@
[items]="storagePaths" [items]="storagePaths"
[editing]="true" [editing]="true"
[applyOnClose]="applyOnClose" [applyOnClose]="applyOnClose"
(open)="openStoragePathDropdown()" (opened)="openStoragePathDropdown()"
[(selectionModel)]="storagePathsSelectionModel" [(selectionModel)]="storagePathsSelectionModel"
(apply)="setStoragePaths($event)"> (apply)="setStoragePaths($event)">
</app-filterable-dropdown> </app-filterable-dropdown>

View File

@ -30,27 +30,27 @@
[(selectionModel)]="tagSelectionModel" [(selectionModel)]="tagSelectionModel"
(selectionModelChange)="updateRules()" (selectionModelChange)="updateRules()"
[multiple]="true" [multiple]="true"
(open)="onTagsDropdownOpen()" (opened)="onTagsDropdownOpen()"
[allowSelectNone]="true"></app-filterable-dropdown> [allowSelectNone]="true"></app-filterable-dropdown>
<app-filterable-dropdown class="flex-fill" title="Correspondent" icon="person-fill" i18n-title <app-filterable-dropdown class="flex-fill" title="Correspondent" icon="person-fill" i18n-title
filterPlaceholder="Filter correspondents" i18n-filterPlaceholder filterPlaceholder="Filter correspondents" i18n-filterPlaceholder
[items]="correspondents" [items]="correspondents"
[(selectionModel)]="correspondentSelectionModel" [(selectionModel)]="correspondentSelectionModel"
(selectionModelChange)="updateRules()" (selectionModelChange)="updateRules()"
(open)="onCorrespondentDropdownOpen()" (opened)="onCorrespondentDropdownOpen()"
[allowSelectNone]="true"></app-filterable-dropdown> [allowSelectNone]="true"></app-filterable-dropdown>
<app-filterable-dropdown class="flex-fill" title="Document type" icon="file-earmark-fill" i18n-title <app-filterable-dropdown class="flex-fill" title="Document type" icon="file-earmark-fill" i18n-title
filterPlaceholder="Filter document types" i18n-filterPlaceholder filterPlaceholder="Filter document types" i18n-filterPlaceholder
[items]="documentTypes" [items]="documentTypes"
[(selectionModel)]="documentTypeSelectionModel" [(selectionModel)]="documentTypeSelectionModel"
(open)="onDocumentTypeDropdownOpen()" (opened)="onDocumentTypeDropdownOpen()"
(selectionModelChange)="updateRules()" (selectionModelChange)="updateRules()"
[allowSelectNone]="true"></app-filterable-dropdown> [allowSelectNone]="true"></app-filterable-dropdown>
<app-filterable-dropdown class="me-2 flex-fill" title="Storage path" icon="folder-fill" i18n-title <app-filterable-dropdown class="me-2 flex-fill" title="Storage path" icon="folder-fill" i18n-title
filterPlaceholder="Filter storage paths" i18n-filterPlaceholder filterPlaceholder="Filter storage paths" i18n-filterPlaceholder
[items]="storagePaths" [items]="storagePaths"
[(selectionModel)]="storagePathSelectionModel" [(selectionModel)]="storagePathSelectionModel"
(open)="onStoragePathDropdownOpen()" (opened)="onStoragePathDropdownOpen()"
(selectionModelChange)="updateRules()" (selectionModelChange)="updateRules()"
[allowSelectNone]="true"></app-filterable-dropdown> [allowSelectNone]="true"></app-filterable-dropdown>
</div> </div>

View File

@ -5,7 +5,7 @@ export const environment = {
apiBaseUrl: document.baseURI + 'api/', apiBaseUrl: document.baseURI + 'api/',
apiVersion: '2', apiVersion: '2',
appTitle: 'Paperless-ngx', appTitle: 'Paperless-ngx',
version: '1.11.2', version: '1.11.2-dev',
webSocketHost: window.location.host, webSocketHost: window.location.host,
webSocketProtocol: window.location.protocol == 'https:' ? 'wss:' : 'ws:', webSocketProtocol: window.location.protocol == 'https:' ? 'wss:' : 'ws:',
webSocketBaseUrl: base_url.pathname + 'ws/', webSocketBaseUrl: base_url.pathname + 'ws/',

View File

@ -793,6 +793,8 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
@mock.patch("documents.views.consume_file.delay") @mock.patch("documents.views.consume_file.delay")
def test_upload(self, m): def test_upload(self, m):
m.return_value = celery.result.AsyncResult(id=str(uuid.uuid4()))
with open( with open(
os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"),
"rb", "rb",
@ -816,6 +818,8 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
@mock.patch("documents.views.consume_file.delay") @mock.patch("documents.views.consume_file.delay")
def test_upload_empty_metadata(self, m): def test_upload_empty_metadata(self, m):
m.return_value = celery.result.AsyncResult(id=str(uuid.uuid4()))
with open( with open(
os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"),
"rb", "rb",
@ -839,6 +843,8 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
@mock.patch("documents.views.consume_file.delay") @mock.patch("documents.views.consume_file.delay")
def test_upload_invalid_form(self, m): def test_upload_invalid_form(self, m):
m.return_value = celery.result.AsyncResult(id=str(uuid.uuid4()))
with open( with open(
os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"),
"rb", "rb",
@ -853,6 +859,8 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
@mock.patch("documents.views.consume_file.delay") @mock.patch("documents.views.consume_file.delay")
def test_upload_invalid_file(self, m): def test_upload_invalid_file(self, m):
m.return_value = celery.result.AsyncResult(id=str(uuid.uuid4()))
with open( with open(
os.path.join(os.path.dirname(__file__), "samples", "simple.zip"), os.path.join(os.path.dirname(__file__), "samples", "simple.zip"),
"rb", "rb",
@ -866,6 +874,9 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
@mock.patch("documents.views.consume_file.delay") @mock.patch("documents.views.consume_file.delay")
def test_upload_with_title(self, async_task): def test_upload_with_title(self, async_task):
async_task.return_value = celery.result.AsyncResult(id=str(uuid.uuid4()))
with open( with open(
os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"),
"rb", "rb",
@ -884,6 +895,9 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
@mock.patch("documents.views.consume_file.delay") @mock.patch("documents.views.consume_file.delay")
def test_upload_with_correspondent(self, async_task): def test_upload_with_correspondent(self, async_task):
async_task.return_value = celery.result.AsyncResult(id=str(uuid.uuid4()))
c = Correspondent.objects.create(name="test-corres") c = Correspondent.objects.create(name="test-corres")
with open( with open(
os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"),
@ -903,6 +917,9 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
@mock.patch("documents.views.consume_file.delay") @mock.patch("documents.views.consume_file.delay")
def test_upload_with_invalid_correspondent(self, async_task): def test_upload_with_invalid_correspondent(self, async_task):
async_task.return_value = celery.result.AsyncResult(id=str(uuid.uuid4()))
with open( with open(
os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"),
"rb", "rb",
@ -917,6 +934,9 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
@mock.patch("documents.views.consume_file.delay") @mock.patch("documents.views.consume_file.delay")
def test_upload_with_document_type(self, async_task): def test_upload_with_document_type(self, async_task):
async_task.return_value = celery.result.AsyncResult(id=str(uuid.uuid4()))
dt = DocumentType.objects.create(name="invoice") dt = DocumentType.objects.create(name="invoice")
with open( with open(
os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"),
@ -936,6 +956,9 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
@mock.patch("documents.views.consume_file.delay") @mock.patch("documents.views.consume_file.delay")
def test_upload_with_invalid_document_type(self, async_task): def test_upload_with_invalid_document_type(self, async_task):
async_task.return_value = celery.result.AsyncResult(id=str(uuid.uuid4()))
with open( with open(
os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"),
"rb", "rb",
@ -950,6 +973,9 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
@mock.patch("documents.views.consume_file.delay") @mock.patch("documents.views.consume_file.delay")
def test_upload_with_tags(self, async_task): def test_upload_with_tags(self, async_task):
async_task.return_value = celery.result.AsyncResult(id=str(uuid.uuid4()))
t1 = Tag.objects.create(name="tag1") t1 = Tag.objects.create(name="tag1")
t2 = Tag.objects.create(name="tag2") t2 = Tag.objects.create(name="tag2")
with open( with open(
@ -970,6 +996,9 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
@mock.patch("documents.views.consume_file.delay") @mock.patch("documents.views.consume_file.delay")
def test_upload_with_invalid_tags(self, async_task): def test_upload_with_invalid_tags(self, async_task):
async_task.return_value = celery.result.AsyncResult(id=str(uuid.uuid4()))
t1 = Tag.objects.create(name="tag1") t1 = Tag.objects.create(name="tag1")
t2 = Tag.objects.create(name="tag2") t2 = Tag.objects.create(name="tag2")
with open( with open(
@ -986,6 +1015,9 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
@mock.patch("documents.views.consume_file.delay") @mock.patch("documents.views.consume_file.delay")
def test_upload_with_created(self, async_task): def test_upload_with_created(self, async_task):
async_task.return_value = celery.result.AsyncResult(id=str(uuid.uuid4()))
created = datetime.datetime( created = datetime.datetime(
2022, 2022,
5, 5,
@ -2948,6 +2980,59 @@ class TestTasks(APITestCase):
self.assertEqual(returned_task2["status"], celery.states.PENDING) self.assertEqual(returned_task2["status"], celery.states.PENDING)
self.assertEqual(returned_task2["task_file_name"], task2.task_file_name) self.assertEqual(returned_task2["task_file_name"], task2.task_file_name)
def test_get_single_task_status(self):
"""
GIVEN
- Query parameter for a valid task ID
WHEN:
- API call is made to get task status
THEN:
- Single task data is returned
"""
id1 = str(uuid.uuid4())
task1 = PaperlessTask.objects.create(
task_id=id1,
task_file_name="task_one.pdf",
)
_ = PaperlessTask.objects.create(
task_id=str(uuid.uuid4()),
task_file_name="task_two.pdf",
)
response = self.client.get(self.ENDPOINT + f"?task_id={id1}")
self.assertEqual(response.status_code, 200)
self.assertEqual(len(response.data), 1)
returned_task1 = response.data[0]
self.assertEqual(returned_task1["task_id"], task1.task_id)
def test_get_single_task_status_not_valid(self):
"""
GIVEN
- Query parameter for a non-existent task ID
WHEN:
- API call is made to get task status
THEN:
- No task data is returned
"""
task1 = PaperlessTask.objects.create(
task_id=str(uuid.uuid4()),
task_file_name="task_one.pdf",
)
_ = PaperlessTask.objects.create(
task_id=str(uuid.uuid4()),
task_file_name="task_two.pdf",
)
response = self.client.get(self.ENDPOINT + "?task_id=bad-task-id")
self.assertEqual(response.status_code, 200)
self.assertEqual(len(response.data), 0)
def test_acknowledge_tasks(self): def test_acknowledge_tasks(self):
""" """
GIVEN: GIVEN:

View File

@ -617,7 +617,7 @@ class PostDocumentView(GenericAPIView):
task_id = str(uuid.uuid4()) task_id = str(uuid.uuid4())
consume_file.delay( async_task = consume_file.delay(
temp_filename, temp_filename,
override_filename=doc_name, override_filename=doc_name,
override_title=title, override_title=title,
@ -628,7 +628,7 @@ class PostDocumentView(GenericAPIView):
override_created=created, override_created=created,
) )
return Response("OK") return Response(async_task.id)
class SelectionDataView(GenericAPIView): class SelectionDataView(GenericAPIView):
@ -886,6 +886,7 @@ class TasksViewSet(ReadOnlyModelViewSet):
permission_classes = (IsAuthenticated,) permission_classes = (IsAuthenticated,)
serializer_class = TasksViewSerializer serializer_class = TasksViewSerializer
def get_queryset(self):
queryset = ( queryset = (
PaperlessTask.objects.filter( PaperlessTask.objects.filter(
acknowledged=False, acknowledged=False,
@ -893,6 +894,10 @@ class TasksViewSet(ReadOnlyModelViewSet):
.order_by("date_created") .order_by("date_created")
.reverse() .reverse()
) )
task_id = self.request.query_params.get("task_id")
if task_id is not None:
queryset = PaperlessTask.objects.filter(task_id=task_id)
return queryset
class AcknowledgeTasksView(GenericAPIView): class AcknowledgeTasksView(GenericAPIView):

View File

@ -8,6 +8,8 @@ import requests
from bleach import clean from bleach import clean
from bleach import linkify from bleach import linkify
from django.conf import settings from django.conf import settings
from django.utils.timezone import is_naive
from django.utils.timezone import make_aware
from documents.parsers import DocumentParser from documents.parsers import DocumentParser
from documents.parsers import make_thumbnail_from_pdf from documents.parsers import make_thumbnail_from_pdf
from documents.parsers import ParseError from documents.parsers import ParseError
@ -135,7 +137,11 @@ class MailDocumentParser(DocumentParser):
self.text += f"\n\n{strip_text(mail.text)}" self.text += f"\n\n{strip_text(mail.text)}"
if is_naive(mail.date):
self.date = make_aware(mail.date)
else:
self.date = mail.date self.date = mail.date
self.archive_path = self.generate_pdf(document_path) self.archive_path = self.generate_pdf(document_path)
def tika_parse(self, html: str): def tika_parse(self, html: str):

View File

@ -1,6 +1,8 @@
import json import json
import os import os
import re import re
from pathlib import Path
from typing import Optional
from django.conf import settings from django.conf import settings
from documents.parsers import DocumentParser from documents.parsers import DocumentParser
@ -99,7 +101,7 @@ class RasterisedDocumentParser(DocumentParser):
self.log("warning", f"Error while calculating DPI for image {image}: {e}") self.log("warning", f"Error while calculating DPI for image {image}: {e}")
return None return None
def extract_text(self, sidecar_file, pdf_file): def extract_text(self, sidecar_file: Optional[Path], pdf_file: Path):
# When re-doing OCR, the sidecar contains ONLY the new text, not # When re-doing OCR, the sidecar contains ONLY the new text, not
# the whole text, so do not utilize it in that case # the whole text, so do not utilize it in that case
if ( if (
@ -139,11 +141,15 @@ class RasterisedDocumentParser(DocumentParser):
self.log("debug", f"Detected language {lang}") self.log("debug", f"Detected language {lang}")
if lang in { if (
lang
in {
"ar", # Arabic "ar", # Arabic
"he", # Hebrew, "he", # Hebrew,
"fa", # Persian "fa", # Persian
}: }
and pdf_file.name != "archive-fallback.pdf"
):
raise RtlLanguageException() raise RtlLanguageException()
return stripped return stripped
except RtlLanguageException: except RtlLanguageException:
@ -275,7 +281,7 @@ class RasterisedDocumentParser(DocumentParser):
return ocrmypdf_args return ocrmypdf_args
def parse(self, document_path, mime_type, file_name=None): def parse(self, document_path: Path, mime_type, file_name=None):
# This forces tesseract to use one core per page. # This forces tesseract to use one core per page.
os.environ["OMP_THREAD_LIMIT"] = "1" os.environ["OMP_THREAD_LIMIT"] = "1"
@ -300,8 +306,8 @@ class RasterisedDocumentParser(DocumentParser):
import ocrmypdf import ocrmypdf
from ocrmypdf import InputFileError, EncryptedPdfError from ocrmypdf import InputFileError, EncryptedPdfError
archive_path = os.path.join(self.tempdir, "archive.pdf") archive_path = Path(os.path.join(self.tempdir, "archive.pdf"))
sidecar_file = os.path.join(self.tempdir, "sidecar.txt") sidecar_file = Path(os.path.join(self.tempdir, "sidecar.txt"))
args = self.construct_ocrmypdf_parameters( args = self.construct_ocrmypdf_parameters(
document_path, document_path,
@ -335,8 +341,12 @@ class RasterisedDocumentParser(DocumentParser):
f"Attempting force OCR to get the text.", f"Attempting force OCR to get the text.",
) )
archive_path_fallback = os.path.join(self.tempdir, "archive-fallback.pdf") archive_path_fallback = Path(
sidecar_file_fallback = os.path.join(self.tempdir, "sidecar-fallback.txt") os.path.join(self.tempdir, "archive-fallback.pdf"),
)
sidecar_file_fallback = Path(
os.path.join(self.tempdir, "sidecar-fallback.txt"),
)
# Attempt to run OCR with safe settings. # Attempt to run OCR with safe settings.