mirror of
				https://github.com/paperless-ngx/paperless-ngx.git
				synced 2025-10-30 03:56:23 -05:00 
			
		
		
		
	Sets the timezone of creation, if the date is known and naive
This commit is contained in:
		| @@ -4,7 +4,6 @@ from pathlib import Path | ||||
| import httpx | ||||
| from django.conf import settings | ||||
| from django.utils import timezone | ||||
|  | ||||
| from tika_client import TikaClient | ||||
|  | ||||
| from documents.parsers import DocumentParser | ||||
| @@ -53,9 +52,7 @@ class TikaDocumentParser(DocumentParser): | ||||
|  | ||||
|         try: | ||||
|             with TikaClient(tika_url=settings.TIKA_ENDPOINT) as client: | ||||
|                 with open(document_path, "rb") as f: | ||||
|                     content = f.read() | ||||
|                     parsed = client.tika.as_text.from_buffer(content, mime_type) | ||||
|                 parsed = client.tika.as_text.from_file(document_path, mime_type) | ||||
|         except Exception as err: | ||||
|             raise ParseError( | ||||
|                 f"Could not parse {document_path} with tika server at " | ||||
| @@ -66,9 +63,10 @@ class TikaDocumentParser(DocumentParser): | ||||
|         if self.text is not None: | ||||
|             self.text = self.text.strip() | ||||
|  | ||||
|         tz = timezone.get_current_timezone() | ||||
|         self.date = parsed.created | ||||
|         if self.date is not None and timezone.is_naive(self.date): | ||||
|             self.date = timezone.make_aware(self.date) | ||||
|  | ||||
|         self.date = timezone.make_aware(parsed.created, tz) | ||||
|         self.archive_path = self.convert_to_pdf(document_path, file_name) | ||||
|  | ||||
|     def convert_to_pdf(self, document_path, file_name): | ||||
|   | ||||
| @@ -3,6 +3,11 @@ import os | ||||
| from pathlib import Path | ||||
| from unittest import mock | ||||
|  | ||||
| try: | ||||
|     import zoneinfo | ||||
| except ImportError: | ||||
|     from backports import zoneinfo | ||||
|  | ||||
| from django.test import TestCase | ||||
| from django.test import override_settings | ||||
| from httpx import Request | ||||
| @@ -21,6 +26,7 @@ class TestTikaParser(HttpxMockMixin, TestCase): | ||||
|     def tearDown(self) -> None: | ||||
|         self.parser.cleanup() | ||||
|  | ||||
|     @override_settings(TIME_ZONE="America/Chicago") | ||||
|     def test_parse(self): | ||||
|         # Pretend parse response | ||||
|         self.httpx_mock.add_response( | ||||
| @@ -44,7 +50,15 @@ class TestTikaParser(HttpxMockMixin, TestCase): | ||||
|         with open(self.parser.archive_path, "rb") as f: | ||||
|             self.assertEqual(f.read(), b"PDF document") | ||||
|  | ||||
|         self.assertEqual(self.parser.date, datetime.datetime(2020, 11, 21)) | ||||
|         self.assertEqual( | ||||
|             self.parser.date, | ||||
|             datetime.datetime( | ||||
|                 2020, | ||||
|                 11, | ||||
|                 21, | ||||
|                 tzinfo=zoneinfo.ZoneInfo("America/Chicago"), | ||||
|             ), | ||||
|         ) | ||||
|  | ||||
|     def test_metadata(self): | ||||
|         self.httpx_mock.add_response( | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Trenton Holmes
					Trenton Holmes