From e76c8689e9778f4635b5aa81c643bd57a61dd3b9 Mon Sep 17 00:00:00 2001 From: cube Date: Thu, 19 Dec 2024 00:06:16 +0100 Subject: [PATCH] Add timeout for Tika client as suggested in https://github.com/paperless-ngx/paperless-ngx/discussions/8509 --- .github/workflows/ci.yml | 1 + src/paperless_tika/parsers.py | 10 ++++++++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 8bd31467b..401fffb15 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -12,6 +12,7 @@ on: pull_request: branches-ignore: - 'translations**' + workflow_dispatch: env: # This is the version of pipenv all the steps will use diff --git a/src/paperless_tika/parsers.py b/src/paperless_tika/parsers.py index f51a03916..40aa8e581 100644 --- a/src/paperless_tika/parsers.py +++ b/src/paperless_tika/parsers.py @@ -33,7 +33,10 @@ class TikaDocumentParser(DocumentParser): def extract_metadata(self, document_path, mime_type): try: - with TikaClient(tika_url=settings.TIKA_ENDPOINT) as client: + with TikaClient( + tika_url=settings.TIKA_ENDPOINT, + timeout=settings.CELERY_TASK_TIME_LIMIT, + ) as client: parsed = client.metadata.from_file(document_path, mime_type) return [ { @@ -54,7 +57,10 @@ class TikaDocumentParser(DocumentParser): self.log.info(f"Sending {document_path} to Tika server") try: - with TikaClient(tika_url=settings.TIKA_ENDPOINT) as client: + with TikaClient( + tika_url=settings.TIKA_ENDPOINT, + timeout=settings.CELERY_TASK_TIME_LIMIT, + ) as client: try: parsed = client.tika.as_text.from_file(document_path, mime_type) except httpx.HTTPStatusError as err: