From 55ef0d4a1b62c3abe8500cad97ddeecf9f746b84 Mon Sep 17 00:00:00 2001 From: Trenton Holmes <797416+stumpylog@users.noreply.github.com> Date: Sun, 4 Dec 2022 08:44:35 -0800 Subject: [PATCH] Fixes language code checks around two part languages --- src/paperless_tesseract/checks.py | 3 +- src/paperless_tesseract/tests/test_checks.py | 37 ++++++++++++++++++++ 2 files changed, 38 insertions(+), 2 deletions(-) diff --git a/src/paperless_tesseract/checks.py b/src/paperless_tesseract/checks.py index c63761f31..ed5725d36 100644 --- a/src/paperless_tesseract/checks.py +++ b/src/paperless_tesseract/checks.py @@ -16,8 +16,7 @@ def get_tesseract_langs(): # Decode bytes to string, split on newlines, trim out the header proc_lines = proc.stdout.decode("utf8", errors="ignore").strip().split("\n")[1:] - # Replace _ with - to convert two part languages to the expected code - return [x.replace("_", "-") for x in proc_lines] + return [x.strip() for x in proc_lines] @register() diff --git a/src/paperless_tesseract/tests/test_checks.py b/src/paperless_tesseract/tests/test_checks.py index cfac11d3c..4d46ad9a3 100644 --- a/src/paperless_tesseract/tests/test_checks.py +++ b/src/paperless_tesseract/tests/test_checks.py @@ -27,3 +27,40 @@ class TestChecks(TestCase): msgs = check_default_language_available(None) self.assertEqual(len(msgs), 1) self.assertEqual(msgs[0].level, ERROR) + + @override_settings(OCR_LANGUAGE="chi_sim") + @mock.patch("paperless_tesseract.checks.get_tesseract_langs") + def test_multi_part_language(self, m): + """ + GIVEN: + - An OCR language which is multi part (ie chi-sim) + - The language is correctly formatted + WHEN: + - Installed packages are checked + THEN: + - No errors are reported + """ + m.return_value = ["chi_sim", "eng"] + + msgs = check_default_language_available(None) + + self.assertEqual(len(msgs), 0) + + @override_settings(OCR_LANGUAGE="chi-sim") + @mock.patch("paperless_tesseract.checks.get_tesseract_langs") + def test_multi_part_language_bad_format(self, m): + """ + GIVEN: + - An OCR language which is multi part (ie chi-sim) + - The language is correctly NOT formatted + WHEN: + - Installed packages are checked + THEN: + - No errors are reported + """ + m.return_value = ["chi_sim", "eng"] + + msgs = check_default_language_available(None) + + self.assertEqual(len(msgs), 1) + self.assertEqual(msgs[0].level, ERROR)