diff --git a/docs/changelog.rst b/docs/changelog.rst index 059691811..21cfa9339 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -11,6 +11,8 @@ Changelog pointed this out. `#423`_. * Updated dependencies to include (among other things) a security patch to requests. +* Fix text in sample data for tests so that the language guesser stops thinking + that everything is in Catalan because we had *Lorem ipsum* in there. 2.5.0 diff --git a/src/paperless_tesseract/tests/samples/tests_date_3.pdf b/src/paperless_tesseract/tests/samples/tests_date_3.pdf index 0270ae097..1a91a1c71 100644 Binary files a/src/paperless_tesseract/tests/samples/tests_date_3.pdf and b/src/paperless_tesseract/tests/samples/tests_date_3.pdf differ diff --git a/src/paperless_tesseract/tests/samples/tests_date_3.png b/src/paperless_tesseract/tests/samples/tests_date_3.png index 7af752cbc..5bf781adf 100644 Binary files a/src/paperless_tesseract/tests/samples/tests_date_3.png and b/src/paperless_tesseract/tests/samples/tests_date_3.png differ diff --git a/src/paperless_tesseract/tests/samples/tests_date_4.pdf b/src/paperless_tesseract/tests/samples/tests_date_4.pdf index e235ad215..d89dd1692 100644 Binary files a/src/paperless_tesseract/tests/samples/tests_date_4.pdf and b/src/paperless_tesseract/tests/samples/tests_date_4.pdf differ diff --git a/src/paperless_tesseract/tests/samples/tests_date_4.png b/src/paperless_tesseract/tests/samples/tests_date_4.png index c76a55dfb..a82f21e88 100644 Binary files a/src/paperless_tesseract/tests/samples/tests_date_4.png and b/src/paperless_tesseract/tests/samples/tests_date_4.png differ diff --git a/src/paperless_tesseract/tests/test_date.py b/src/paperless_tesseract/tests/test_date.py index 15fed1a37..3959ded31 100644 --- a/src/paperless_tesseract/tests/test_date.py +++ b/src/paperless_tesseract/tests/test_date.py @@ -5,7 +5,7 @@ from unittest import mock from uuid import uuid4 from dateutil import tz -from django.test import TestCase +from django.test import TestCase, override_settings from ..parsers import RasterisedDocumentParser @@ -211,6 +211,7 @@ class TestDate(TestCase): "paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH", SCRATCH ) + @override_settings(OCR_LANGUAGE="deu") def test_get_text_3_pdf(self): input_file = os.path.join(self.SAMPLE_FILES, "tests_date_3.pdf") document = RasterisedDocumentParser(input_file) @@ -225,6 +226,7 @@ class TestDate(TestCase): "paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH", SCRATCH ) + @override_settings(OCR_LANGUAGE="deu") def test_get_text_3_png(self): input_file = os.path.join(self.SAMPLE_FILES, "tests_date_3.png") document = RasterisedDocumentParser(input_file) @@ -239,6 +241,7 @@ class TestDate(TestCase): "paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH", SCRATCH ) + @override_settings(OCR_LANGUAGE="eng") def test_get_text_4_pdf(self): input_file = os.path.join(self.SAMPLE_FILES, "tests_date_4.pdf") document = RasterisedDocumentParser(input_file) @@ -253,6 +256,7 @@ class TestDate(TestCase): "paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH", SCRATCH ) + @override_settings(OCR_LANGUAGE="eng") def test_get_text_4_png(self): input_file = os.path.join(self.SAMPLE_FILES, "tests_date_4.png") document = RasterisedDocumentParser(input_file)