Working arround current TIKA Library Bugs - lint

This commit is contained in:
Simon Siebert 2023-07-06 23:31:38 +02:00 committed by Trenton Holmes
parent 4664ff2f00
commit 29877d1ca3

View File

@ -1,9 +1,6 @@
import os import os
from pathlib import Path from pathlib import Path
import array
import dateutil.parser
import httpx import httpx
from django.conf import settings from django.conf import settings
from django.utils import timezone from django.utils import timezone
@ -56,7 +53,7 @@ class TikaDocumentParser(DocumentParser):
try: try:
with TikaClient(tika_url=settings.TIKA_ENDPOINT) as client: with TikaClient(tika_url=settings.TIKA_ENDPOINT) as client:
with open(document_path, 'rb') as f: with open(document_path, "rb") as f:
content = f.read() content = f.read()
parsed = client.tika.as_text.from_buffer(content, mime_type) parsed = client.tika.as_text.from_buffer(content, mime_type)
except Exception as err: except Exception as err: