Fix language guesses in tests

It turns out that the Lorem ipsum text in the sample files was confuing the language guesser, causing it to think the file was in Catalan and not English or German.
2025-07-24 18:04:39 -05:00 · 2018-12-01 15:55:02 +00:00 · 2018-12-01 15:55:02 +00:00 · c1d18c1e83
commit c1d18c1e83
parent c5488dcb98
6 changed files with 7 additions and 1 deletions
--- a/docs/changelog.rst
+++ b/docs/changelog.rst
@ -11,6 +11,8 @@ Changelog
  pointed this out. `#423`_.
 * Updated dependencies to include (among other things) a security patch to
  requests.
+* Fix text in sample data for tests so that the language guesser stops thinking
+  that everything is in Catalan because we had *Lorem ipsum* in there.


 2.5.0
--- a/src/paperless_tesseract/tests/samples/tests_date_3.pdf
+++ b/src/paperless_tesseract/tests/samples/tests_date_3.pdf
--- a/src/paperless_tesseract/tests/samples/tests_date_3.png
+++ b/src/paperless_tesseract/tests/samples/tests_date_3.png
--- a/src/paperless_tesseract/tests/samples/tests_date_4.pdf
+++ b/src/paperless_tesseract/tests/samples/tests_date_4.pdf
--- a/src/paperless_tesseract/tests/samples/tests_date_4.png
+++ b/src/paperless_tesseract/tests/samples/tests_date_4.png
--- a/src/paperless_tesseract/tests/test_date.py
+++ b/src/paperless_tesseract/tests/test_date.py
@ -5,7 +5,7 @@ from unittest import mock
 from uuid import uuid4

 from dateutil import tz
-from django.test import TestCase
+from django.test import TestCase, override_settings

 from ..parsers import RasterisedDocumentParser

@ -211,6 +211,7 @@ class TestDate(TestCase):
        "paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
        SCRATCH
    )
+    @override_settings(OCR_LANGUAGE="deu")
    def test_get_text_3_pdf(self):
        input_file = os.path.join(self.SAMPLE_FILES, "tests_date_3.pdf")
        document = RasterisedDocumentParser(input_file)
@ -225,6 +226,7 @@ class TestDate(TestCase):
        "paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
        SCRATCH
    )
+    @override_settings(OCR_LANGUAGE="deu")
    def test_get_text_3_png(self):
        input_file = os.path.join(self.SAMPLE_FILES, "tests_date_3.png")
        document = RasterisedDocumentParser(input_file)
@ -239,6 +241,7 @@ class TestDate(TestCase):
        "paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
        SCRATCH
    )
+    @override_settings(OCR_LANGUAGE="eng")
    def test_get_text_4_pdf(self):
        input_file = os.path.join(self.SAMPLE_FILES, "tests_date_4.pdf")
        document = RasterisedDocumentParser(input_file)
@ -253,6 +256,7 @@ class TestDate(TestCase):
        "paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
        SCRATCH
    )
+    @override_settings(OCR_LANGUAGE="eng")
    def test_get_text_4_png(self):
        input_file = os.path.join(self.SAMPLE_FILES, "tests_date_4.png")
        document = RasterisedDocumentParser(input_file)