mirror of
				https://github.com/paperless-ngx/paperless-ngx.git
				synced 2025-10-30 03:56:23 -05:00 
			
		
		
		
	Try waiting a little bit after a parser error during the live testing
This commit is contained in:
		 Trenton Holmes
					Trenton Holmes
				
			
				
					committed by
					
						 Trenton H
						Trenton H
					
				
			
			
				
	
			
			
			 Trenton H
						Trenton H
					
				
			
						parent
						
							853d13b6f2
						
					
				
				
					commit
					3c325582d9
				
			| @@ -1,10 +1,11 @@ | ||||
| import datetime | ||||
| import os | ||||
| import time | ||||
| from pathlib import Path | ||||
| from typing import Final | ||||
|  | ||||
| import pytest | ||||
| from django.test import TestCase | ||||
| from documents.parsers import ParseError | ||||
| from paperless_tika.parsers import TikaDocumentParser | ||||
|  | ||||
|  | ||||
| @@ -24,6 +25,44 @@ class TestTikaParserAgainstServer(TestCase): | ||||
|     def tearDown(self) -> None: | ||||
|         self.parser.cleanup() | ||||
|  | ||||
|     def try_parse_with_wait(self, test_file, mime_type): | ||||
|         """ | ||||
|         For whatever reason, the image started during the test pipeline likes to | ||||
|         segfault sometimes, when run with the exact files that usually pass. | ||||
|  | ||||
|         So, this function will retry the parsing up to 3 times, with larger backoff | ||||
|         periods between each attempt, in hopes the issue resolves itself during | ||||
|         one attempt to parse. | ||||
|  | ||||
|         This will wait the following: | ||||
|             - Attempt 1 - 20s following failure | ||||
|             - Attempt 2 - 40s following failure | ||||
|             - Attempt 3 - 80s following failure | ||||
|  | ||||
|         """ | ||||
|         succeeded = False | ||||
|         retry_time = 20.0 | ||||
|         retry_count = 0 | ||||
|         max_retry_count = 3 | ||||
|  | ||||
|         while retry_count < max_retry_count and not succeeded: | ||||
|             try: | ||||
|                 self.parser.parse(test_file, mime_type) | ||||
|  | ||||
|                 succeeded = True | ||||
|             except Exception as e: | ||||
|                 print(f"{e} during try #{retry_count}", flush=True) | ||||
|  | ||||
|                 retry_count = retry_count + 1 | ||||
|  | ||||
|                 time.sleep(retry_time) | ||||
|                 retry_time = retry_time * 2.0 | ||||
|  | ||||
|         self.assertTrue( | ||||
|             succeeded, | ||||
|             "Continued Tika server errors after multiple retries", | ||||
|         ) | ||||
|  | ||||
|     def test_basic_parse_odt(self): | ||||
|         """ | ||||
|         GIVEN: | ||||
| @@ -36,7 +75,7 @@ class TestTikaParserAgainstServer(TestCase): | ||||
|         """ | ||||
|         test_file = self.SAMPLE_DIR / Path("sample.odt") | ||||
|  | ||||
|         self.parser.parse(test_file, "application/vnd.oasis.opendocument.text") | ||||
|         self.try_parse_with_wait(test_file, "application/vnd.oasis.opendocument.text") | ||||
|  | ||||
|         self.assertEqual( | ||||
|             self.parser.text, | ||||
| @@ -62,7 +101,7 @@ class TestTikaParserAgainstServer(TestCase): | ||||
|         """ | ||||
|         test_file = self.SAMPLE_DIR / Path("sample.docx") | ||||
|  | ||||
|         self.parser.parse( | ||||
|         self.try_parse_with_wait( | ||||
|             test_file, | ||||
|             "application/vnd.openxmlformats-officedocument.wordprocessingml.document", | ||||
|         ) | ||||
|   | ||||
		Reference in New Issue
	
	Block a user