mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-04-02 13:45:10 -05:00
Fix language guesses in tests
It turns out that the Lorem ipsum text in the sample files was confuing the language guesser, causing it to think the file was in Catalan and not English or German.
This commit is contained in:
parent
c5488dcb98
commit
c1d18c1e83
@ -11,6 +11,8 @@ Changelog
|
||||
pointed this out. `#423`_.
|
||||
* Updated dependencies to include (among other things) a security patch to
|
||||
requests.
|
||||
* Fix text in sample data for tests so that the language guesser stops thinking
|
||||
that everything is in Catalan because we had *Lorem ipsum* in there.
|
||||
|
||||
|
||||
2.5.0
|
||||
|
Binary file not shown.
Binary file not shown.
Before Width: | Height: | Size: 138 KiB After Width: | Height: | Size: 55 KiB |
Binary file not shown.
Binary file not shown.
Before Width: | Height: | Size: 138 KiB After Width: | Height: | Size: 53 KiB |
@ -5,7 +5,7 @@ from unittest import mock
|
||||
from uuid import uuid4
|
||||
|
||||
from dateutil import tz
|
||||
from django.test import TestCase
|
||||
from django.test import TestCase, override_settings
|
||||
|
||||
from ..parsers import RasterisedDocumentParser
|
||||
|
||||
@ -211,6 +211,7 @@ class TestDate(TestCase):
|
||||
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
|
||||
SCRATCH
|
||||
)
|
||||
@override_settings(OCR_LANGUAGE="deu")
|
||||
def test_get_text_3_pdf(self):
|
||||
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_3.pdf")
|
||||
document = RasterisedDocumentParser(input_file)
|
||||
@ -225,6 +226,7 @@ class TestDate(TestCase):
|
||||
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
|
||||
SCRATCH
|
||||
)
|
||||
@override_settings(OCR_LANGUAGE="deu")
|
||||
def test_get_text_3_png(self):
|
||||
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_3.png")
|
||||
document = RasterisedDocumentParser(input_file)
|
||||
@ -239,6 +241,7 @@ class TestDate(TestCase):
|
||||
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
|
||||
SCRATCH
|
||||
)
|
||||
@override_settings(OCR_LANGUAGE="eng")
|
||||
def test_get_text_4_pdf(self):
|
||||
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_4.pdf")
|
||||
document = RasterisedDocumentParser(input_file)
|
||||
@ -253,6 +256,7 @@ class TestDate(TestCase):
|
||||
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
|
||||
SCRATCH
|
||||
)
|
||||
@override_settings(OCR_LANGUAGE="eng")
|
||||
def test_get_text_4_png(self):
|
||||
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_4.png")
|
||||
document = RasterisedDocumentParser(input_file)
|
||||
|
Loading…
x
Reference in New Issue
Block a user