mirror of
				https://github.com/paperless-ngx/paperless-ngx.git
				synced 2025-10-30 03:56:23 -05:00 
			
		
		
		
	Handle dateparser ValueErrors
When parsing dates from the document text or filenames, correctly handle values errors indicating broken dates. Newly added tests ensure that this handling works properly.
This commit is contained in:
		| @@ -108,7 +108,7 @@ class DocumentParser: | |||||||
|  |  | ||||||
|                 try: |                 try: | ||||||
|                     date = __parser(date_string, self.FILENAME_DATE_ORDER) |                     date = __parser(date_string, self.FILENAME_DATE_ORDER) | ||||||
|                 except TypeError: |                 except (TypeError, ValueError): | ||||||
|                     # Skip all matches that do not parse to a proper date |                     # Skip all matches that do not parse to a proper date | ||||||
|                     continue |                     continue | ||||||
|  |  | ||||||
| @@ -134,7 +134,7 @@ class DocumentParser: | |||||||
|  |  | ||||||
|             try: |             try: | ||||||
|                 date = __parser(date_string, self.DATE_ORDER) |                 date = __parser(date_string, self.DATE_ORDER) | ||||||
|             except TypeError: |             except (TypeError, ValueError): | ||||||
|                 # Skip all matches that do not parse to a proper date |                 # Skip all matches that do not parse to a proper date | ||||||
|                 continue |                 continue | ||||||
|  |  | ||||||
|   | |||||||
| @@ -172,3 +172,29 @@ class TestDate(TestCase): | |||||||
|         document = RasterisedDocumentParser("/dev/null") |         document = RasterisedDocumentParser("/dev/null") | ||||||
|         document.get_text() |         document.get_text() | ||||||
|         self.assertIsNone(document.get_date()) |         self.assertIsNone(document.get_date()) | ||||||
|  |  | ||||||
|  |     @mock.patch( | ||||||
|  |         "paperless_tesseract.parsers.RasterisedDocumentParser.get_text", | ||||||
|  |         return_value="20 408000l 2475" | ||||||
|  |     ) | ||||||
|  |     @mock.patch(MOCK_SCRATCH, SCRATCH) | ||||||
|  |     def test_crazy_date_with_spaces(self, *args): | ||||||
|  |         document = RasterisedDocumentParser("/dev/null") | ||||||
|  |         document.get_text() | ||||||
|  |         self.assertIsNone(document.get_date()) | ||||||
|  |  | ||||||
|  |     @mock.patch( | ||||||
|  |         "paperless_tesseract.parsers.RasterisedDocumentParser.get_text", | ||||||
|  |         return_value="No date in here" | ||||||
|  |     ) | ||||||
|  |     @mock.patch( | ||||||
|  |         "paperless_tesseract.parsers.RasterisedDocumentParser." | ||||||
|  |         "FILENAME_DATE_ORDER", | ||||||
|  |         new_callable=mock.PropertyMock, | ||||||
|  |         return_value="YMD" | ||||||
|  |     ) | ||||||
|  |     @mock.patch(MOCK_SCRATCH, SCRATCH) | ||||||
|  |     def test_filename_date_parse_invalid(self, *args): | ||||||
|  |         document = RasterisedDocumentParser("/tmp/20 408000l 2475 - test.pdf") | ||||||
|  |         document.get_text() | ||||||
|  |         self.assertIsNone(document.get_date()) | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Johannes Wienke
					Johannes Wienke