mirror of
				https://github.com/paperless-ngx/paperless-ngx.git
				synced 2025-10-30 03:56:23 -05:00 
			
		
		
		
	fixed the test cases
This commit is contained in:
		| @@ -364,35 +364,35 @@ class TestFieldPermutations(TestCase): | |||||||
|  |  | ||||||
| class DummyParser(DocumentParser): | class DummyParser(DocumentParser): | ||||||
|  |  | ||||||
|     def get_thumbnail(self): |     def get_thumbnail(self, document_path, mime_type): | ||||||
|         # not important during tests |         # not important during tests | ||||||
|         raise NotImplementedError() |         raise NotImplementedError() | ||||||
|  |  | ||||||
|     def __init__(self, path, logging_group, scratch_dir): |     def __init__(self, logging_group, scratch_dir): | ||||||
|         super(DummyParser, self).__init__(path, logging_group) |         super(DummyParser, self).__init__(logging_group) | ||||||
|         _, self.fake_thumb = tempfile.mkstemp(suffix=".png", dir=scratch_dir) |         _, self.fake_thumb = tempfile.mkstemp(suffix=".png", dir=scratch_dir) | ||||||
|  |  | ||||||
|     def get_optimised_thumbnail(self): |     def get_optimised_thumbnail(self, document_path, mime_type): | ||||||
|         return self.fake_thumb |         return self.fake_thumb | ||||||
|  |  | ||||||
|     def get_text(self): |     def parse(self, document_path, mime_type): | ||||||
|         return "The Text" |         self.text = "The Text" | ||||||
|  |  | ||||||
|  |  | ||||||
| class FaultyParser(DocumentParser): | class FaultyParser(DocumentParser): | ||||||
|  |  | ||||||
|     def get_thumbnail(self): |     def get_thumbnail(self, document_path, mime_type): | ||||||
|         # not important during tests |         # not important during tests | ||||||
|         raise NotImplementedError() |         raise NotImplementedError() | ||||||
|  |  | ||||||
|     def __init__(self, path, logging_group, scratch_dir): |     def __init__(self, logging_group, scratch_dir): | ||||||
|         super(FaultyParser, self).__init__(path, logging_group) |         super(FaultyParser, self).__init__(logging_group) | ||||||
|         _, self.fake_thumb = tempfile.mkstemp(suffix=".png", dir=scratch_dir) |         _, self.fake_thumb = tempfile.mkstemp(suffix=".png", dir=scratch_dir) | ||||||
|  |  | ||||||
|     def get_optimised_thumbnail(self): |     def get_optimised_thumbnail(self, document_path, mime_type): | ||||||
|         return self.fake_thumb |         return self.fake_thumb | ||||||
|  |  | ||||||
|     def get_text(self): |     def parse(self, document_path, mime_type): | ||||||
|         raise ParseError("Does not compute.") |         raise ParseError("Does not compute.") | ||||||
|  |  | ||||||
|  |  | ||||||
| @@ -410,11 +410,11 @@ def fake_magic_from_file(file, mime=False): | |||||||
| @mock.patch("documents.consumer.magic.from_file", fake_magic_from_file) | @mock.patch("documents.consumer.magic.from_file", fake_magic_from_file) | ||||||
| class TestConsumer(TestCase): | class TestConsumer(TestCase): | ||||||
|  |  | ||||||
|     def make_dummy_parser(self, path, logging_group): |     def make_dummy_parser(self, logging_group): | ||||||
|         return DummyParser(path, logging_group, self.scratch_dir) |         return DummyParser(logging_group, self.scratch_dir) | ||||||
|  |  | ||||||
|     def make_faulty_parser(self, path, logging_group): |     def make_faulty_parser(self, logging_group): | ||||||
|         return FaultyParser(path, logging_group, self.scratch_dir) |         return FaultyParser(logging_group, self.scratch_dir) | ||||||
|  |  | ||||||
|     def setUp(self): |     def setUp(self): | ||||||
|         self.scratch_dir = tempfile.mkdtemp() |         self.scratch_dir = tempfile.mkdtemp() | ||||||
|   | |||||||
							
								
								
									
										140
									
								
								src/documents/tests/test_date_parsing.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										140
									
								
								src/documents/tests/test_date_parsing.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,140 @@ | |||||||
|  | import datetime | ||||||
|  | import os | ||||||
|  | import shutil | ||||||
|  | from unittest import mock | ||||||
|  | from uuid import uuid4 | ||||||
|  |  | ||||||
|  | from dateutil import tz | ||||||
|  | from django.conf import settings | ||||||
|  | from django.test import TestCase, override_settings | ||||||
|  |  | ||||||
|  | from documents.parsers import parse_date | ||||||
|  | from paperless_tesseract.parsers import RasterisedDocumentParser | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class TestDate(TestCase): | ||||||
|  |  | ||||||
|  |     SAMPLE_FILES = os.path.join(os.path.dirname(__file__), "../../paperless_tesseract/tests/samples") | ||||||
|  |     SCRATCH = "/tmp/paperless-tests-{}".format(str(uuid4())[:8]) | ||||||
|  |  | ||||||
|  |     def setUp(self): | ||||||
|  |         os.makedirs(self.SCRATCH, exist_ok=True) | ||||||
|  |  | ||||||
|  |     def tearDown(self): | ||||||
|  |         shutil.rmtree(self.SCRATCH) | ||||||
|  |  | ||||||
|  |     def test_date_format_1(self): | ||||||
|  |         text = "lorem ipsum 130218 lorem ipsum" | ||||||
|  |         self.assertEqual(parse_date("", text), None) | ||||||
|  |  | ||||||
|  |     def test_date_format_2(self): | ||||||
|  |         text = "lorem ipsum 2018 lorem ipsum" | ||||||
|  |         self.assertEqual(parse_date("", text), None) | ||||||
|  |  | ||||||
|  |     def test_date_format_3(self): | ||||||
|  |         text = "lorem ipsum 20180213 lorem ipsum" | ||||||
|  |         self.assertEqual(parse_date("", text), None) | ||||||
|  |  | ||||||
|  |     def test_date_format_4(self): | ||||||
|  |         text = "lorem ipsum 13.02.2018 lorem ipsum" | ||||||
|  |         date = parse_date("", text) | ||||||
|  |         self.assertEqual( | ||||||
|  |             date, | ||||||
|  |             datetime.datetime( | ||||||
|  |                 2018, 2, 13, 0, 0, | ||||||
|  |                 tzinfo=tz.gettz(settings.TIME_ZONE) | ||||||
|  |             ) | ||||||
|  |         ) | ||||||
|  |  | ||||||
|  |     def test_date_format_5(self): | ||||||
|  |         text = ( | ||||||
|  |             "lorem ipsum 130218, 2018, 20180213 and lorem 13.02.2018 lorem " | ||||||
|  |             "ipsum" | ||||||
|  |         ) | ||||||
|  |         date = parse_date("", text) | ||||||
|  |         self.assertEqual( | ||||||
|  |             date, | ||||||
|  |             datetime.datetime( | ||||||
|  |                 2018, 2, 13, 0, 0, | ||||||
|  |                 tzinfo=tz.gettz(settings.TIME_ZONE) | ||||||
|  |             ) | ||||||
|  |         ) | ||||||
|  |  | ||||||
|  |     def test_date_format_6(self): | ||||||
|  |         text = ( | ||||||
|  |             "lorem ipsum\n" | ||||||
|  |             "Wohnort\n" | ||||||
|  |             "3100\n" | ||||||
|  |             "IBAN\n" | ||||||
|  |             "AT87 4534\n" | ||||||
|  |             "1234\n" | ||||||
|  |             "1234 5678\n" | ||||||
|  |             "BIC\n" | ||||||
|  |             "lorem ipsum" | ||||||
|  |         ) | ||||||
|  |         self.assertEqual(parse_date("", text), None) | ||||||
|  |  | ||||||
|  |     def test_date_format_7(self): | ||||||
|  |         text = ( | ||||||
|  |             "lorem ipsum\n" | ||||||
|  |             "März 2019\n" | ||||||
|  |             "lorem ipsum" | ||||||
|  |         ) | ||||||
|  |         date = parse_date("", text) | ||||||
|  |         self.assertEqual( | ||||||
|  |             date, | ||||||
|  |             datetime.datetime( | ||||||
|  |                 2019, 3, 1, 0, 0, | ||||||
|  |                 tzinfo=tz.gettz(settings.TIME_ZONE) | ||||||
|  |             ) | ||||||
|  |         ) | ||||||
|  |  | ||||||
|  |     def test_date_format_8(self): | ||||||
|  |         text = ( | ||||||
|  |             "lorem ipsum\n" | ||||||
|  |             "Wohnort\n" | ||||||
|  |             "3100\n" | ||||||
|  |             "IBAN\n" | ||||||
|  |             "AT87 4534\n" | ||||||
|  |             "1234\n" | ||||||
|  |             "1234 5678\n" | ||||||
|  |             "BIC\n" | ||||||
|  |             "lorem ipsum\n" | ||||||
|  |             "März 2020" | ||||||
|  |         ) | ||||||
|  |         self.assertEqual( | ||||||
|  |             parse_date("", text), | ||||||
|  |             datetime.datetime( | ||||||
|  |                 2020, 3, 1, 0, 0, | ||||||
|  |                 tzinfo=tz.gettz(settings.TIME_ZONE) | ||||||
|  |             ) | ||||||
|  |         ) | ||||||
|  |  | ||||||
|  |     @override_settings(SCRATCH_DIR=SCRATCH) | ||||||
|  |     def test_date_format_9(self): | ||||||
|  |         text = ( | ||||||
|  |             "lorem ipsum\n" | ||||||
|  |             "27. Nullmonth 2020\n" | ||||||
|  |             "März 2020\n" | ||||||
|  |             "lorem ipsum" | ||||||
|  |         ) | ||||||
|  |         self.assertEqual( | ||||||
|  |             parse_date("", text), | ||||||
|  |             datetime.datetime( | ||||||
|  |                 2020, 3, 1, 0, 0, | ||||||
|  |                 tzinfo=tz.gettz(settings.TIME_ZONE) | ||||||
|  |             ) | ||||||
|  |         ) | ||||||
|  |  | ||||||
|  |     def test_crazy_date_past(self, *args): | ||||||
|  |         self.assertIsNone(parse_date("", "01-07-0590 00:00:00")) | ||||||
|  |  | ||||||
|  |     def test_crazy_date_future(self, *args): | ||||||
|  |         self.assertIsNone(parse_date("", "01-07-2350 00:00:00")) | ||||||
|  |  | ||||||
|  |     def test_crazy_date_with_spaces(self, *args): | ||||||
|  |         self.assertIsNone(parse_date("", "20 408000l 2475")) | ||||||
|  |  | ||||||
|  |     @override_settings(FILENAME_DATE_ORDER="YMD") | ||||||
|  |     def test_filename_date_parse_invalid(self, *args): | ||||||
|  |         self.assertIsNone(parse_date("/tmp/20 408000l 2475 - test.pdf", "No date in here")) | ||||||
| @@ -1,193 +0,0 @@ | |||||||
| import datetime |  | ||||||
| import os |  | ||||||
| import shutil |  | ||||||
| from unittest import mock |  | ||||||
| from uuid import uuid4 |  | ||||||
|  |  | ||||||
| from dateutil import tz |  | ||||||
| from django.conf import settings |  | ||||||
| from django.test import TestCase, override_settings |  | ||||||
|  |  | ||||||
| from ..parsers import RasterisedDocumentParser |  | ||||||
|  |  | ||||||
|  |  | ||||||
| class TestDate(TestCase): |  | ||||||
|  |  | ||||||
|     SAMPLE_FILES = os.path.join(os.path.dirname(__file__), "samples") |  | ||||||
|     SCRATCH = "/tmp/paperless-tests-{}".format(str(uuid4())[:8]) |  | ||||||
|  |  | ||||||
|     def setUp(self): |  | ||||||
|         os.makedirs(self.SCRATCH, exist_ok=True) |  | ||||||
|  |  | ||||||
|     def tearDown(self): |  | ||||||
|         shutil.rmtree(self.SCRATCH) |  | ||||||
|  |  | ||||||
|     @override_settings(SCRATCH_DIR=SCRATCH) |  | ||||||
|     def test_date_format_1(self): |  | ||||||
|         input_file = os.path.join(self.SAMPLE_FILES, "") |  | ||||||
|         document = RasterisedDocumentParser(input_file, None) |  | ||||||
|         document._text = "lorem ipsum 130218 lorem ipsum" |  | ||||||
|         self.assertEqual(document.get_date(), None) |  | ||||||
|  |  | ||||||
|     @override_settings(SCRATCH_DIR=SCRATCH) |  | ||||||
|     def test_date_format_2(self): |  | ||||||
|         input_file = os.path.join(self.SAMPLE_FILES, "") |  | ||||||
|         document = RasterisedDocumentParser(input_file, None) |  | ||||||
|         document._text = "lorem ipsum 2018 lorem ipsum" |  | ||||||
|         self.assertEqual(document.get_date(), None) |  | ||||||
|  |  | ||||||
|     @override_settings(SCRATCH_DIR=SCRATCH) |  | ||||||
|     def test_date_format_3(self): |  | ||||||
|         input_file = os.path.join(self.SAMPLE_FILES, "") |  | ||||||
|         document = RasterisedDocumentParser(input_file, None) |  | ||||||
|         document._text = "lorem ipsum 20180213 lorem ipsum" |  | ||||||
|         self.assertEqual(document.get_date(), None) |  | ||||||
|  |  | ||||||
|     @override_settings(SCRATCH_DIR=SCRATCH) |  | ||||||
|     def test_date_format_4(self): |  | ||||||
|         input_file = os.path.join(self.SAMPLE_FILES, "") |  | ||||||
|         document = RasterisedDocumentParser(input_file, None) |  | ||||||
|         document._text = "lorem ipsum 13.02.2018 lorem ipsum" |  | ||||||
|         date = document.get_date() |  | ||||||
|         self.assertEqual( |  | ||||||
|             date, |  | ||||||
|             datetime.datetime( |  | ||||||
|                 2018, 2, 13, 0, 0, |  | ||||||
|                 tzinfo=tz.gettz(settings.TIME_ZONE) |  | ||||||
|             ) |  | ||||||
|         ) |  | ||||||
|  |  | ||||||
|     @override_settings(SCRATCH_DIR=SCRATCH) |  | ||||||
|     def test_date_format_5(self): |  | ||||||
|         input_file = os.path.join(self.SAMPLE_FILES, "") |  | ||||||
|         document = RasterisedDocumentParser(input_file, None) |  | ||||||
|         document._text = ( |  | ||||||
|             "lorem ipsum 130218, 2018, 20180213 and lorem 13.02.2018 lorem " |  | ||||||
|             "ipsum" |  | ||||||
|         ) |  | ||||||
|         date = document.get_date() |  | ||||||
|         self.assertEqual( |  | ||||||
|             date, |  | ||||||
|             datetime.datetime( |  | ||||||
|                 2018, 2, 13, 0, 0, |  | ||||||
|                 tzinfo=tz.gettz(settings.TIME_ZONE) |  | ||||||
|             ) |  | ||||||
|         ) |  | ||||||
|  |  | ||||||
|     @override_settings(SCRATCH_DIR=SCRATCH) |  | ||||||
|     def test_date_format_6(self): |  | ||||||
|         input_file = os.path.join(self.SAMPLE_FILES, "") |  | ||||||
|         document = RasterisedDocumentParser(input_file, None) |  | ||||||
|         document._text = ( |  | ||||||
|             "lorem ipsum\n" |  | ||||||
|             "Wohnort\n" |  | ||||||
|             "3100\n" |  | ||||||
|             "IBAN\n" |  | ||||||
|             "AT87 4534\n" |  | ||||||
|             "1234\n" |  | ||||||
|             "1234 5678\n" |  | ||||||
|             "BIC\n" |  | ||||||
|             "lorem ipsum" |  | ||||||
|         ) |  | ||||||
|         self.assertEqual(document.get_date(), None) |  | ||||||
|  |  | ||||||
|     @override_settings(SCRATCH_DIR=SCRATCH) |  | ||||||
|     def test_date_format_7(self): |  | ||||||
|         input_file = os.path.join(self.SAMPLE_FILES, "") |  | ||||||
|         document = RasterisedDocumentParser(input_file, None) |  | ||||||
|         document._text = ( |  | ||||||
|             "lorem ipsum\n" |  | ||||||
|             "März 2019\n" |  | ||||||
|             "lorem ipsum" |  | ||||||
|         ) |  | ||||||
|         date = document.get_date() |  | ||||||
|         self.assertEqual( |  | ||||||
|             date, |  | ||||||
|             datetime.datetime( |  | ||||||
|                 2019, 3, 1, 0, 0, |  | ||||||
|                 tzinfo=tz.gettz(settings.TIME_ZONE) |  | ||||||
|             ) |  | ||||||
|         ) |  | ||||||
|  |  | ||||||
|     @override_settings(SCRATCH_DIR=SCRATCH) |  | ||||||
|     def test_date_format_8(self): |  | ||||||
|         input_file = os.path.join(self.SAMPLE_FILES, "") |  | ||||||
|         document = RasterisedDocumentParser(input_file, None) |  | ||||||
|         document._text = ( |  | ||||||
|             "lorem ipsum\n" |  | ||||||
|             "Wohnort\n" |  | ||||||
|             "3100\n" |  | ||||||
|             "IBAN\n" |  | ||||||
|             "AT87 4534\n" |  | ||||||
|             "1234\n" |  | ||||||
|             "1234 5678\n" |  | ||||||
|             "BIC\n" |  | ||||||
|             "lorem ipsum\n" |  | ||||||
|             "März 2020" |  | ||||||
|         ) |  | ||||||
|         self.assertEqual( |  | ||||||
|             document.get_date(), |  | ||||||
|             datetime.datetime( |  | ||||||
|                 2020, 3, 1, 0, 0, |  | ||||||
|                 tzinfo=tz.gettz(settings.TIME_ZONE) |  | ||||||
|             ) |  | ||||||
|         ) |  | ||||||
|  |  | ||||||
|     @override_settings(SCRATCH_DIR=SCRATCH) |  | ||||||
|     def test_date_format_9(self): |  | ||||||
|         input_file = os.path.join(self.SAMPLE_FILES, "") |  | ||||||
|         document = RasterisedDocumentParser(input_file, None) |  | ||||||
|         document._text = ( |  | ||||||
|             "lorem ipsum\n" |  | ||||||
|             "27. Nullmonth 2020\n" |  | ||||||
|             "März 2020\n" |  | ||||||
|             "lorem ipsum" |  | ||||||
|         ) |  | ||||||
|         self.assertEqual( |  | ||||||
|             document.get_date(), |  | ||||||
|             datetime.datetime( |  | ||||||
|                 2020, 3, 1, 0, 0, |  | ||||||
|                 tzinfo=tz.gettz(settings.TIME_ZONE) |  | ||||||
|             ) |  | ||||||
|         ) |  | ||||||
|  |  | ||||||
|     @mock.patch( |  | ||||||
|         "paperless_tesseract.parsers.RasterisedDocumentParser.get_text", |  | ||||||
|         return_value="01-07-0590 00:00:00" |  | ||||||
|     ) |  | ||||||
|     @override_settings(SCRATCH_DIR=SCRATCH) |  | ||||||
|     def test_crazy_date_past(self, *args): |  | ||||||
|         document = RasterisedDocumentParser("/dev/null", None) |  | ||||||
|         document.get_text() |  | ||||||
|         self.assertIsNone(document.get_date()) |  | ||||||
|  |  | ||||||
|     @mock.patch( |  | ||||||
|         "paperless_tesseract.parsers.RasterisedDocumentParser.get_text", |  | ||||||
|         return_value="01-07-2350 00:00:00" |  | ||||||
|     ) |  | ||||||
|     @override_settings(SCRATCH_DIR=SCRATCH) |  | ||||||
|     def test_crazy_date_future(self, *args): |  | ||||||
|         document = RasterisedDocumentParser("/dev/null", None) |  | ||||||
|         document.get_text() |  | ||||||
|         self.assertIsNone(document.get_date()) |  | ||||||
|  |  | ||||||
|     @mock.patch( |  | ||||||
|         "paperless_tesseract.parsers.RasterisedDocumentParser.get_text", |  | ||||||
|         return_value="20 408000l 2475" |  | ||||||
|     ) |  | ||||||
|     @override_settings(SCRATCH_DIR=SCRATCH) |  | ||||||
|     def test_crazy_date_with_spaces(self, *args): |  | ||||||
|         document = RasterisedDocumentParser("/dev/null", None) |  | ||||||
|         document.get_text() |  | ||||||
|         self.assertIsNone(document.get_date()) |  | ||||||
|  |  | ||||||
|     @mock.patch( |  | ||||||
|         "paperless_tesseract.parsers.RasterisedDocumentParser.get_text", |  | ||||||
|         return_value="No date in here" |  | ||||||
|     ) |  | ||||||
|     @override_settings(FILENAME_DATE_ORDER="YMD") |  | ||||||
|     @override_settings(SCRATCH_DIR=SCRATCH) |  | ||||||
|     def test_filename_date_parse_invalid(self, *args): |  | ||||||
|         document = RasterisedDocumentParser("/tmp/20 408000l 2475 - test.pdf", None) |  | ||||||
|         document.get_text() |  | ||||||
|         self.assertIsNone(document.get_date()) |  | ||||||
| @@ -56,8 +56,8 @@ class TestAuxilliaryFunctions(TestCase): | |||||||
|         self.assertIsNone(text) |         self.assertIsNone(text) | ||||||
|  |  | ||||||
|     def test_thumbnail(self): |     def test_thumbnail(self): | ||||||
|         parser = RasterisedDocumentParser(os.path.join(self.SAMPLE_FILES, 'simple.pdf'), uuid.uuid4()) |         parser = RasterisedDocumentParser(uuid.uuid4()) | ||||||
|         parser.get_thumbnail() |         parser.get_thumbnail(os.path.join(self.SAMPLE_FILES, 'simple.pdf'), "application/pdf") | ||||||
|         # dont really know how to test it, just call it and assert that it does not raise anything. |         # dont really know how to test it, just call it and assert that it does not raise anything. | ||||||
|  |  | ||||||
|     @mock.patch("paperless_tesseract.parsers.run_convert") |     @mock.patch("paperless_tesseract.parsers.run_convert") | ||||||
| @@ -71,6 +71,6 @@ class TestAuxilliaryFunctions(TestCase): | |||||||
|  |  | ||||||
|         m.side_effect = call_convert |         m.side_effect = call_convert | ||||||
|  |  | ||||||
|         parser = RasterisedDocumentParser(os.path.join(self.SAMPLE_FILES, 'simple.pdf'), uuid.uuid4()) |         parser = RasterisedDocumentParser(uuid.uuid4()) | ||||||
|         parser.get_thumbnail() |         parser.get_thumbnail(os.path.join(self.SAMPLE_FILES, 'simple.pdf'), "application/pdf") | ||||||
|         # dont really know how to test it, just call it and assert that it does not raise anything. |         # dont really know how to test it, just call it and assert that it does not raise anything. | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Jonas Winkler
					Jonas Winkler