From 452ea2ccf9a32206ca838b0577290759ec51350d Mon Sep 17 00:00:00 2001 From: HiranChaudhuri Date: Thu, 19 Dec 2024 17:58:26 +0100 Subject: [PATCH] Enhancement: add timeout for Tika client (#8520) Co-authored-by: shamoon <4887959+shamoon@users.noreply.github.com> --- src/paperless_tika/parsers.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/paperless_tika/parsers.py b/src/paperless_tika/parsers.py index f51a03916..40aa8e581 100644 --- a/src/paperless_tika/parsers.py +++ b/src/paperless_tika/parsers.py @@ -33,7 +33,10 @@ class TikaDocumentParser(DocumentParser): def extract_metadata(self, document_path, mime_type): try: - with TikaClient(tika_url=settings.TIKA_ENDPOINT) as client: + with TikaClient( + tika_url=settings.TIKA_ENDPOINT, + timeout=settings.CELERY_TASK_TIME_LIMIT, + ) as client: parsed = client.metadata.from_file(document_path, mime_type) return [ { @@ -54,7 +57,10 @@ class TikaDocumentParser(DocumentParser): self.log.info(f"Sending {document_path} to Tika server") try: - with TikaClient(tika_url=settings.TIKA_ENDPOINT) as client: + with TikaClient( + tika_url=settings.TIKA_ENDPOINT, + timeout=settings.CELERY_TASK_TIME_LIMIT, + ) as client: try: parsed = client.tika.as_text.from_file(document_path, mime_type) except httpx.HTTPStatusError as err: