Add timeout for Tika client as suggested in

https://github.com/paperless-ngx/paperless-ngx/discussions/8509
This commit is contained in:
cube 2024-12-19 00:06:16 +01:00
parent 3f7a0802a4
commit e76c8689e9
2 changed files with 9 additions and 2 deletions

View File

@ -12,6 +12,7 @@ on:
pull_request: pull_request:
branches-ignore: branches-ignore:
- 'translations**' - 'translations**'
workflow_dispatch:
env: env:
# This is the version of pipenv all the steps will use # This is the version of pipenv all the steps will use

View File

@ -33,7 +33,10 @@ class TikaDocumentParser(DocumentParser):
def extract_metadata(self, document_path, mime_type): def extract_metadata(self, document_path, mime_type):
try: try:
with TikaClient(tika_url=settings.TIKA_ENDPOINT) as client: with TikaClient(
tika_url=settings.TIKA_ENDPOINT,
timeout=settings.CELERY_TASK_TIME_LIMIT,
) as client:
parsed = client.metadata.from_file(document_path, mime_type) parsed = client.metadata.from_file(document_path, mime_type)
return [ return [
{ {
@ -54,7 +57,10 @@ class TikaDocumentParser(DocumentParser):
self.log.info(f"Sending {document_path} to Tika server") self.log.info(f"Sending {document_path} to Tika server")
try: try:
with TikaClient(tika_url=settings.TIKA_ENDPOINT) as client: with TikaClient(
tika_url=settings.TIKA_ENDPOINT,
timeout=settings.CELERY_TASK_TIME_LIMIT,
) as client:
try: try:
parsed = client.tika.as_text.from_file(document_path, mime_type) parsed = client.tika.as_text.from_file(document_path, mime_type)
except httpx.HTTPStatusError as err: except httpx.HTTPStatusError as err: