Combine and extend the utility for calling the live services to be more robust against failures, reporting, etc

This commit is contained in:
Trenton H
2023-08-29 10:25:20 -07:00
parent b9fdf68be3
commit a1697ff21c
3 changed files with 79 additions and 94 deletions

View File

@@ -1,11 +1,11 @@
import os
import time
from pathlib import Path
from typing import Final
import pytest
from django.test import TestCase
from documents.tests.utils import util_call_with_backoff
from paperless_tika.parsers import TikaDocumentParser
@@ -28,44 +28,6 @@ class TestTikaParserAgainstServer(TestCase):
def tearDown(self) -> None:
self.parser.cleanup()
def try_parse_with_wait(self, test_file: Path, mime_type: str):
"""
For whatever reason, the image started during the test pipeline likes to
segfault sometimes, when run with the exact files that usually pass.
So, this function will retry the parsing up to 3 times, with larger backoff
periods between each attempt, in hopes the issue resolves itself during
one attempt to parse.
This will wait the following:
- Attempt 1 - 20s following failure
- Attempt 2 - 40s following failure
- Attempt 3 - 80s following failure
"""
succeeded = False
retry_time = 20.0
retry_count = 0
max_retry_count = 3
while retry_count < max_retry_count and not succeeded:
try:
self.parser.parse(test_file, mime_type)
succeeded = True
except Exception as e:
print(f"{e} during try #{retry_count}", flush=True)
retry_count = retry_count + 1
time.sleep(retry_time)
retry_time = retry_time * 2.0
self.assertTrue(
succeeded,
"Continued Tika server errors after multiple retries",
)
def test_basic_parse_odt(self):
"""
GIVEN:
@@ -78,7 +40,10 @@ class TestTikaParserAgainstServer(TestCase):
"""
test_file = self.SAMPLE_DIR / Path("sample.odt")
self.try_parse_with_wait(test_file, "application/vnd.oasis.opendocument.text")
util_call_with_backoff(
self.parser.parse,
[test_file, "application/vnd.oasis.opendocument.text"],
)
self.assertEqual(
self.parser.text,
@@ -104,9 +69,12 @@ class TestTikaParserAgainstServer(TestCase):
"""
test_file = self.SAMPLE_DIR / Path("sample.docx")
self.try_parse_with_wait(
test_file,
"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
util_call_with_backoff(
self.parser.parse,
[
test_file,
"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
],
)
self.assertEqual(
@@ -131,9 +99,9 @@ class TestTikaParserAgainstServer(TestCase):
"""
test_file = self.SAMPLE_DIR / "sample.doc"
self.try_parse_with_wait(
test_file,
"application/msword",
util_call_with_backoff(
self.parser.parse,
[test_file, "application/msword"],
)
self.assertIn(