mirror of
				https://github.com/paperless-ngx/paperless-ngx.git
				synced 2025-10-30 03:56:23 -05:00 
			
		
		
		
	Try waiting a little bit after a parser error during the live testing
This commit is contained in:
		 Trenton Holmes
					Trenton Holmes
				
			
				
					committed by
					
						 Trenton H
						Trenton H
					
				
			
			
				
	
			
			
			 Trenton H
						Trenton H
					
				
			
						parent
						
							853d13b6f2
						
					
				
				
					commit
					3c325582d9
				
			| @@ -1,10 +1,11 @@ | |||||||
| import datetime |  | ||||||
| import os | import os | ||||||
|  | import time | ||||||
| from pathlib import Path | from pathlib import Path | ||||||
| from typing import Final | from typing import Final | ||||||
|  |  | ||||||
| import pytest | import pytest | ||||||
| from django.test import TestCase | from django.test import TestCase | ||||||
|  | from documents.parsers import ParseError | ||||||
| from paperless_tika.parsers import TikaDocumentParser | from paperless_tika.parsers import TikaDocumentParser | ||||||
|  |  | ||||||
|  |  | ||||||
| @@ -24,6 +25,44 @@ class TestTikaParserAgainstServer(TestCase): | |||||||
|     def tearDown(self) -> None: |     def tearDown(self) -> None: | ||||||
|         self.parser.cleanup() |         self.parser.cleanup() | ||||||
|  |  | ||||||
|  |     def try_parse_with_wait(self, test_file, mime_type): | ||||||
|  |         """ | ||||||
|  |         For whatever reason, the image started during the test pipeline likes to | ||||||
|  |         segfault sometimes, when run with the exact files that usually pass. | ||||||
|  |  | ||||||
|  |         So, this function will retry the parsing up to 3 times, with larger backoff | ||||||
|  |         periods between each attempt, in hopes the issue resolves itself during | ||||||
|  |         one attempt to parse. | ||||||
|  |  | ||||||
|  |         This will wait the following: | ||||||
|  |             - Attempt 1 - 20s following failure | ||||||
|  |             - Attempt 2 - 40s following failure | ||||||
|  |             - Attempt 3 - 80s following failure | ||||||
|  |  | ||||||
|  |         """ | ||||||
|  |         succeeded = False | ||||||
|  |         retry_time = 20.0 | ||||||
|  |         retry_count = 0 | ||||||
|  |         max_retry_count = 3 | ||||||
|  |  | ||||||
|  |         while retry_count < max_retry_count and not succeeded: | ||||||
|  |             try: | ||||||
|  |                 self.parser.parse(test_file, mime_type) | ||||||
|  |  | ||||||
|  |                 succeeded = True | ||||||
|  |             except Exception as e: | ||||||
|  |                 print(f"{e} during try #{retry_count}", flush=True) | ||||||
|  |  | ||||||
|  |                 retry_count = retry_count + 1 | ||||||
|  |  | ||||||
|  |                 time.sleep(retry_time) | ||||||
|  |                 retry_time = retry_time * 2.0 | ||||||
|  |  | ||||||
|  |         self.assertTrue( | ||||||
|  |             succeeded, | ||||||
|  |             "Continued Tika server errors after multiple retries", | ||||||
|  |         ) | ||||||
|  |  | ||||||
|     def test_basic_parse_odt(self): |     def test_basic_parse_odt(self): | ||||||
|         """ |         """ | ||||||
|         GIVEN: |         GIVEN: | ||||||
| @@ -36,7 +75,7 @@ class TestTikaParserAgainstServer(TestCase): | |||||||
|         """ |         """ | ||||||
|         test_file = self.SAMPLE_DIR / Path("sample.odt") |         test_file = self.SAMPLE_DIR / Path("sample.odt") | ||||||
|  |  | ||||||
|         self.parser.parse(test_file, "application/vnd.oasis.opendocument.text") |         self.try_parse_with_wait(test_file, "application/vnd.oasis.opendocument.text") | ||||||
|  |  | ||||||
|         self.assertEqual( |         self.assertEqual( | ||||||
|             self.parser.text, |             self.parser.text, | ||||||
| @@ -62,7 +101,7 @@ class TestTikaParserAgainstServer(TestCase): | |||||||
|         """ |         """ | ||||||
|         test_file = self.SAMPLE_DIR / Path("sample.docx") |         test_file = self.SAMPLE_DIR / Path("sample.docx") | ||||||
|  |  | ||||||
|         self.parser.parse( |         self.try_parse_with_wait( | ||||||
|             test_file, |             test_file, | ||||||
|             "application/vnd.openxmlformats-officedocument.wordprocessingml.document", |             "application/vnd.openxmlformats-officedocument.wordprocessingml.document", | ||||||
|         ) |         ) | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user