removed unused code, small fixes

This commit is contained in:
Jonas Winkler
2020-11-02 18:20:04 +01:00
parent 3bef72c717
commit 7d282a4e4e
10 changed files with 19 additions and 94 deletions

View File

@@ -160,6 +160,7 @@ class RasterisedDocumentParser(DocumentParser):
guess = langdetect.detect(text)
return guess
except Exception as e:
self.log('debug', "Language detection failed with: {}".format(e))
return None
def _ocr(self, imgs, lang):

View File

@@ -27,28 +27,28 @@ class TestDate(TestCase):
@mock.patch(MOCK_SCRATCH, SCRATCH)
def test_date_format_1(self):
input_file = os.path.join(self.SAMPLE_FILES, "")
document = RasterisedDocumentParser(input_file)
document = RasterisedDocumentParser(input_file, None)
document._text = "lorem ipsum 130218 lorem ipsum"
self.assertEqual(document.get_date(), None)
@mock.patch(MOCK_SCRATCH, SCRATCH)
def test_date_format_2(self):
input_file = os.path.join(self.SAMPLE_FILES, "")
document = RasterisedDocumentParser(input_file)
document = RasterisedDocumentParser(input_file, None)
document._text = "lorem ipsum 2018 lorem ipsum"
self.assertEqual(document.get_date(), None)
@mock.patch(MOCK_SCRATCH, SCRATCH)
def test_date_format_3(self):
input_file = os.path.join(self.SAMPLE_FILES, "")
document = RasterisedDocumentParser(input_file)
document = RasterisedDocumentParser(input_file, None)
document._text = "lorem ipsum 20180213 lorem ipsum"
self.assertEqual(document.get_date(), None)
@mock.patch(MOCK_SCRATCH, SCRATCH)
def test_date_format_4(self):
input_file = os.path.join(self.SAMPLE_FILES, "")
document = RasterisedDocumentParser(input_file)
document = RasterisedDocumentParser(input_file, None)
document._text = "lorem ipsum 13.02.2018 lorem ipsum"
date = document.get_date()
self.assertEqual(
@@ -62,7 +62,7 @@ class TestDate(TestCase):
@mock.patch(MOCK_SCRATCH, SCRATCH)
def test_date_format_5(self):
input_file = os.path.join(self.SAMPLE_FILES, "")
document = RasterisedDocumentParser(input_file)
document = RasterisedDocumentParser(input_file, None)
document._text = (
"lorem ipsum 130218, 2018, 20180213 and lorem 13.02.2018 lorem "
"ipsum"
@@ -79,7 +79,7 @@ class TestDate(TestCase):
@mock.patch(MOCK_SCRATCH, SCRATCH)
def test_date_format_6(self):
input_file = os.path.join(self.SAMPLE_FILES, "")
document = RasterisedDocumentParser(input_file)
document = RasterisedDocumentParser(input_file, None)
document._text = (
"lorem ipsum\n"
"Wohnort\n"
@@ -96,7 +96,7 @@ class TestDate(TestCase):
@mock.patch(MOCK_SCRATCH, SCRATCH)
def test_date_format_7(self):
input_file = os.path.join(self.SAMPLE_FILES, "")
document = RasterisedDocumentParser(input_file)
document = RasterisedDocumentParser(input_file, None)
document._text = (
"lorem ipsum\n"
"März 2019\n"
@@ -114,7 +114,7 @@ class TestDate(TestCase):
@mock.patch(MOCK_SCRATCH, SCRATCH)
def test_date_format_8(self):
input_file = os.path.join(self.SAMPLE_FILES, "")
document = RasterisedDocumentParser(input_file)
document = RasterisedDocumentParser(input_file, None)
document._text = (
"lorem ipsum\n"
"Wohnort\n"
@@ -138,7 +138,7 @@ class TestDate(TestCase):
@mock.patch(MOCK_SCRATCH, SCRATCH)
def test_date_format_9(self):
input_file = os.path.join(self.SAMPLE_FILES, "")
document = RasterisedDocumentParser(input_file)
document = RasterisedDocumentParser(input_file, None)
document._text = (
"lorem ipsum\n"
"27. Nullmonth 2020\n"
@@ -159,7 +159,7 @@ class TestDate(TestCase):
)
@mock.patch(MOCK_SCRATCH, SCRATCH)
def test_crazy_date_past(self, *args):
document = RasterisedDocumentParser("/dev/null")
document = RasterisedDocumentParser("/dev/null", None)
document.get_text()
self.assertIsNone(document.get_date())
@@ -169,7 +169,7 @@ class TestDate(TestCase):
)
@mock.patch(MOCK_SCRATCH, SCRATCH)
def test_crazy_date_future(self, *args):
document = RasterisedDocumentParser("/dev/null")
document = RasterisedDocumentParser("/dev/null", None)
document.get_text()
self.assertIsNone(document.get_date())
@@ -179,7 +179,7 @@ class TestDate(TestCase):
)
@mock.patch(MOCK_SCRATCH, SCRATCH)
def test_crazy_date_with_spaces(self, *args):
document = RasterisedDocumentParser("/dev/null")
document = RasterisedDocumentParser("/dev/null", None)
document.get_text()
self.assertIsNone(document.get_date())
@@ -195,6 +195,6 @@ class TestDate(TestCase):
)
@mock.patch(MOCK_SCRATCH, SCRATCH)
def test_filename_date_parse_invalid(self, *args):
document = RasterisedDocumentParser("/tmp/20 408000l 2475 - test.pdf")
document = RasterisedDocumentParser("/tmp/20 408000l 2475 - test.pdf", None)
document.get_text()
self.assertIsNone(document.get_date())