mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-11-23 23:49:08 -06:00
Wrap in try/catch
This commit is contained in:
@@ -77,31 +77,36 @@ class RemoteDocumentParser(RasterisedDocumentParser):
|
|||||||
credential=AzureKeyCredential(self.settings.api_key),
|
credential=AzureKeyCredential(self.settings.api_key),
|
||||||
)
|
)
|
||||||
|
|
||||||
with file.open("rb") as f:
|
try:
|
||||||
analyze_request = AnalyzeDocumentRequest(bytes_source=f.read())
|
with file.open("rb") as f:
|
||||||
poller = client.begin_analyze_document(
|
analyze_request = AnalyzeDocumentRequest(bytes_source=f.read())
|
||||||
model_id="prebuilt-read",
|
poller = client.begin_analyze_document(
|
||||||
body=analyze_request,
|
model_id="prebuilt-read",
|
||||||
output_content_format=DocumentContentFormat.TEXT,
|
body=analyze_request,
|
||||||
output=[AnalyzeOutputOption.PDF], # request searchable PDF output
|
output_content_format=DocumentContentFormat.TEXT,
|
||||||
content_type="application/json",
|
output=[AnalyzeOutputOption.PDF], # request searchable PDF output
|
||||||
)
|
content_type="application/json",
|
||||||
|
)
|
||||||
|
|
||||||
poller.wait()
|
poller.wait()
|
||||||
result_id = poller.details["operation_id"]
|
result_id = poller.details["operation_id"]
|
||||||
result = poller.result()
|
result = poller.result()
|
||||||
|
|
||||||
# Download the PDF with embedded text
|
# Download the PDF with embedded text
|
||||||
self.archive_path = self.tempdir / "archive.pdf"
|
self.archive_path = self.tempdir / "archive.pdf"
|
||||||
with self.archive_path.open("wb") as f:
|
with self.archive_path.open("wb") as f:
|
||||||
for chunk in client.get_analyze_result_pdf(
|
for chunk in client.get_analyze_result_pdf(
|
||||||
model_id="prebuilt-read",
|
model_id="prebuilt-read",
|
||||||
result_id=result_id,
|
result_id=result_id,
|
||||||
):
|
):
|
||||||
f.write(chunk)
|
f.write(chunk)
|
||||||
|
return result.content
|
||||||
|
except Exception as e:
|
||||||
|
self.log.error(f"Azure AI Vision parsing failed: {e}")
|
||||||
|
finally:
|
||||||
|
client.close()
|
||||||
|
|
||||||
client.close()
|
return None
|
||||||
return result.content
|
|
||||||
|
|
||||||
def parse(self, document_path: Path, mime_type, file_name=None):
|
def parse(self, document_path: Path, mime_type, file_name=None):
|
||||||
if not self.settings.engine_is_valid():
|
if not self.settings.engine_is_valid():
|
||||||
|
|||||||
@@ -68,6 +68,33 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
|||||||
["This is a test document."],
|
["This is a test document."],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@mock.patch("azure.ai.documentintelligence.DocumentIntelligenceClient")
|
||||||
|
def test_get_text_with_azure_error_logged_and_returns_none(self, mock_client_cls):
|
||||||
|
mock_client = mock.Mock()
|
||||||
|
mock_client.begin_analyze_document.side_effect = RuntimeError("fail")
|
||||||
|
mock_client_cls.return_value = mock_client
|
||||||
|
|
||||||
|
with override_settings(
|
||||||
|
REMOTE_OCR_ENGINE="azureai",
|
||||||
|
REMOTE_OCR_API_KEY="somekey",
|
||||||
|
REMOTE_OCR_ENDPOINT="https://endpoint.cognitiveservices.azure.com",
|
||||||
|
):
|
||||||
|
parser = get_parser(uuid.uuid4())
|
||||||
|
with mock.patch.object(parser.log, "error") as mock_log_error:
|
||||||
|
parser.parse(
|
||||||
|
self.SAMPLE_FILES / "simple-digital.pdf",
|
||||||
|
"application/pdf",
|
||||||
|
)
|
||||||
|
|
||||||
|
self.assertIsNone(parser.text)
|
||||||
|
mock_client.begin_analyze_document.assert_called_once()
|
||||||
|
mock_client.close.assert_called_once()
|
||||||
|
mock_log_error.assert_called_once()
|
||||||
|
self.assertIn(
|
||||||
|
"Azure AI Vision parsing failed",
|
||||||
|
mock_log_error.call_args[0][0],
|
||||||
|
)
|
||||||
|
|
||||||
@override_settings(
|
@override_settings(
|
||||||
REMOTE_OCR_ENGINE="azureai",
|
REMOTE_OCR_ENGINE="azureai",
|
||||||
REMOTE_OCR_API_KEY="key",
|
REMOTE_OCR_API_KEY="key",
|
||||||
|
|||||||
Reference in New Issue
Block a user