Try waiting a little bit after a parser error during the live testing

This commit is contained in:
Trenton Holmes 2022-11-01 15:51:41 -07:00 committed by Trenton H
parent 853d13b6f2
commit 3c325582d9

View File

@ -1,10 +1,11 @@
import datetime
import os
import time
from pathlib import Path
from typing import Final
import pytest
from django.test import TestCase
from documents.parsers import ParseError
from paperless_tika.parsers import TikaDocumentParser
@ -24,6 +25,44 @@ class TestTikaParserAgainstServer(TestCase):
def tearDown(self) -> None:
self.parser.cleanup()
def try_parse_with_wait(self, test_file, mime_type):
"""
For whatever reason, the image started during the test pipeline likes to
segfault sometimes, when run with the exact files that usually pass.
So, this function will retry the parsing up to 3 times, with larger backoff
periods between each attempt, in hopes the issue resolves itself during
one attempt to parse.
This will wait the following:
- Attempt 1 - 20s following failure
- Attempt 2 - 40s following failure
- Attempt 3 - 80s following failure
"""
succeeded = False
retry_time = 20.0
retry_count = 0
max_retry_count = 3
while retry_count < max_retry_count and not succeeded:
try:
self.parser.parse(test_file, mime_type)
succeeded = True
except Exception as e:
print(f"{e} during try #{retry_count}", flush=True)
retry_count = retry_count + 1
time.sleep(retry_time)
retry_time = retry_time * 2.0
self.assertTrue(
succeeded,
"Continued Tika server errors after multiple retries",
)
def test_basic_parse_odt(self):
"""
GIVEN:
@ -36,7 +75,7 @@ class TestTikaParserAgainstServer(TestCase):
"""
test_file = self.SAMPLE_DIR / Path("sample.odt")
self.parser.parse(test_file, "application/vnd.oasis.opendocument.text")
self.try_parse_with_wait(test_file, "application/vnd.oasis.opendocument.text")
self.assertEqual(
self.parser.text,
@ -62,7 +101,7 @@ class TestTikaParserAgainstServer(TestCase):
"""
test_file = self.SAMPLE_DIR / Path("sample.docx")
self.parser.parse(
self.try_parse_with_wait(
test_file,
"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
)