Wrap in try/catch

This commit is contained in:
shamoon
2025-11-18 12:07:16 -08:00
parent 4f53d1b6ee
commit cef100a955
2 changed files with 54 additions and 22 deletions

View File

@@ -77,31 +77,36 @@ class RemoteDocumentParser(RasterisedDocumentParser):
credential=AzureKeyCredential(self.settings.api_key), credential=AzureKeyCredential(self.settings.api_key),
) )
with file.open("rb") as f: try:
analyze_request = AnalyzeDocumentRequest(bytes_source=f.read()) with file.open("rb") as f:
poller = client.begin_analyze_document( analyze_request = AnalyzeDocumentRequest(bytes_source=f.read())
model_id="prebuilt-read", poller = client.begin_analyze_document(
body=analyze_request, model_id="prebuilt-read",
output_content_format=DocumentContentFormat.TEXT, body=analyze_request,
output=[AnalyzeOutputOption.PDF], # request searchable PDF output output_content_format=DocumentContentFormat.TEXT,
content_type="application/json", output=[AnalyzeOutputOption.PDF], # request searchable PDF output
) content_type="application/json",
)
poller.wait() poller.wait()
result_id = poller.details["operation_id"] result_id = poller.details["operation_id"]
result = poller.result() result = poller.result()
# Download the PDF with embedded text # Download the PDF with embedded text
self.archive_path = self.tempdir / "archive.pdf" self.archive_path = self.tempdir / "archive.pdf"
with self.archive_path.open("wb") as f: with self.archive_path.open("wb") as f:
for chunk in client.get_analyze_result_pdf( for chunk in client.get_analyze_result_pdf(
model_id="prebuilt-read", model_id="prebuilt-read",
result_id=result_id, result_id=result_id,
): ):
f.write(chunk) f.write(chunk)
return result.content
except Exception as e:
self.log.error(f"Azure AI Vision parsing failed: {e}")
finally:
client.close()
client.close() return None
return result.content
def parse(self, document_path: Path, mime_type, file_name=None): def parse(self, document_path: Path, mime_type, file_name=None):
if not self.settings.engine_is_valid(): if not self.settings.engine_is_valid():

View File

@@ -68,6 +68,33 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
["This is a test document."], ["This is a test document."],
) )
@mock.patch("azure.ai.documentintelligence.DocumentIntelligenceClient")
def test_get_text_with_azure_error_logged_and_returns_none(self, mock_client_cls):
mock_client = mock.Mock()
mock_client.begin_analyze_document.side_effect = RuntimeError("fail")
mock_client_cls.return_value = mock_client
with override_settings(
REMOTE_OCR_ENGINE="azureai",
REMOTE_OCR_API_KEY="somekey",
REMOTE_OCR_ENDPOINT="https://endpoint.cognitiveservices.azure.com",
):
parser = get_parser(uuid.uuid4())
with mock.patch.object(parser.log, "error") as mock_log_error:
parser.parse(
self.SAMPLE_FILES / "simple-digital.pdf",
"application/pdf",
)
self.assertIsNone(parser.text)
mock_client.begin_analyze_document.assert_called_once()
mock_client.close.assert_called_once()
mock_log_error.assert_called_once()
self.assertIn(
"Azure AI Vision parsing failed",
mock_log_error.call_args[0][0],
)
@override_settings( @override_settings(
REMOTE_OCR_ENGINE="azureai", REMOTE_OCR_ENGINE="azureai",
REMOTE_OCR_API_KEY="key", REMOTE_OCR_API_KEY="key",