From 170654cc3c4d489e7598c93e6f32d48e453d8215 Mon Sep 17 00:00:00 2001 From: shamoon <4887959+shamoon@users.noreply.github.com> Date: Fri, 18 Apr 2025 13:08:10 -0700 Subject: [PATCH] Test --- src/paperless_remote/tests/test_parser.py | 74 ++++++++--------------- 1 file changed, 25 insertions(+), 49 deletions(-) diff --git a/src/paperless_remote/tests/test_parser.py b/src/paperless_remote/tests/test_parser.py index 0bc046037..0a7753449 100644 --- a/src/paperless_remote/tests/test_parser.py +++ b/src/paperless_remote/tests/test_parser.py @@ -1,9 +1,7 @@ -import sys import uuid from pathlib import Path from unittest import mock -import pytest from django.test import TestCase from django.test import override_settings @@ -25,59 +23,37 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase): self.fail(f"'{s}' is not in '{content}'") self.assertListEqual(indices, sorted(indices)) - @pytest.mark.skipif( - sys.version_info > (3, 10), - reason="Fails on 3.11 only on CI, for some reason", - ) # TODO: investigate - @mock.patch("azure.ai.formrecognizer.DocumentAnalysisClient") - def test_get_text_with_azure(self, mock_azure_client): - result = mock.Mock() - result.content = "This is a test document." - result.pages = [ - mock.Mock( - width=100, - height=100, - words=[ - mock.Mock( - content="This", - polygon=[ - mock.Mock(x=0, y=0), - ], - ), - mock.Mock( - content="is", - polygon=[ - mock.Mock(x=10, y=10), - ], - ), - mock.Mock( - content="a", - polygon=[ - mock.Mock(x=20, y=20), - ], - ), - mock.Mock( - content="test", - polygon=[ - mock.Mock(x=30, y=30), - ], - ), - mock.Mock( - content="document.", - polygon=[ - mock.Mock(x=40, y=40), - ], - ), - ], - ), + @mock.patch("paperless_remote.parsers.subprocess.run") + @mock.patch("azure.ai.documentintelligence.DocumentIntelligenceClient") + def test_get_text_with_azure(self, mock_client_cls, mock_subprocess): + # Arrange mock Azure client + mock_client = mock.Mock() + mock_client_cls.return_value = mock_client + + # Simulate poller result and its `.details` + mock_poller = mock.Mock() + mock_poller.wait.return_value = None + mock_poller.details = {"operation_id": "fake-op-id"} + mock_client.begin_analyze_document.return_value = mock_poller + + # Return dummy PDF bytes + mock_client.get_analyze_result_pdf.return_value = [ + b"%PDF-", + b"1.7 ", + b"FAKEPDF", ] - mock_azure_client.return_value.begin_analyze_document.return_value.result.return_value = result + # Simulate pdftotext by writing dummy text to sidecar file + def fake_run(cmd, *args, **kwargs): + with Path(cmd[-1]).open("w", encoding="utf-8") as f: + f.write("This is a test document.") + + mock_subprocess.side_effect = fake_run with override_settings( REMOTE_OCR_ENGINE="azureai", REMOTE_OCR_API_KEY="somekey", - REMOTE_OCR_ENDPOINT="https://endpoint.cognitiveservices.azure.com/", + REMOTE_OCR_ENDPOINT="https://endpoint.cognitiveservices.azure.com", ): parser = RemoteDocumentParser(uuid.uuid4()) parser.parse(