mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-04-02 13:45:10 -05:00
Sets the timezone of creation, if the date is known and naive
This commit is contained in:
parent
56fcb3fee1
commit
6bcc26b487
@ -4,7 +4,6 @@ from pathlib import Path
|
||||
import httpx
|
||||
from django.conf import settings
|
||||
from django.utils import timezone
|
||||
|
||||
from tika_client import TikaClient
|
||||
|
||||
from documents.parsers import DocumentParser
|
||||
@ -53,9 +52,7 @@ class TikaDocumentParser(DocumentParser):
|
||||
|
||||
try:
|
||||
with TikaClient(tika_url=settings.TIKA_ENDPOINT) as client:
|
||||
with open(document_path, "rb") as f:
|
||||
content = f.read()
|
||||
parsed = client.tika.as_text.from_buffer(content, mime_type)
|
||||
parsed = client.tika.as_text.from_file(document_path, mime_type)
|
||||
except Exception as err:
|
||||
raise ParseError(
|
||||
f"Could not parse {document_path} with tika server at "
|
||||
@ -66,9 +63,10 @@ class TikaDocumentParser(DocumentParser):
|
||||
if self.text is not None:
|
||||
self.text = self.text.strip()
|
||||
|
||||
tz = timezone.get_current_timezone()
|
||||
self.date = parsed.created
|
||||
if self.date is not None and timezone.is_naive(self.date):
|
||||
self.date = timezone.make_aware(self.date)
|
||||
|
||||
self.date = timezone.make_aware(parsed.created, tz)
|
||||
self.archive_path = self.convert_to_pdf(document_path, file_name)
|
||||
|
||||
def convert_to_pdf(self, document_path, file_name):
|
||||
|
@ -3,6 +3,11 @@ import os
|
||||
from pathlib import Path
|
||||
from unittest import mock
|
||||
|
||||
try:
|
||||
import zoneinfo
|
||||
except ImportError:
|
||||
from backports import zoneinfo
|
||||
|
||||
from django.test import TestCase
|
||||
from django.test import override_settings
|
||||
from httpx import Request
|
||||
@ -21,6 +26,7 @@ class TestTikaParser(HttpxMockMixin, TestCase):
|
||||
def tearDown(self) -> None:
|
||||
self.parser.cleanup()
|
||||
|
||||
@override_settings(TIME_ZONE="America/Chicago")
|
||||
def test_parse(self):
|
||||
# Pretend parse response
|
||||
self.httpx_mock.add_response(
|
||||
@ -44,7 +50,15 @@ class TestTikaParser(HttpxMockMixin, TestCase):
|
||||
with open(self.parser.archive_path, "rb") as f:
|
||||
self.assertEqual(f.read(), b"PDF document")
|
||||
|
||||
self.assertEqual(self.parser.date, datetime.datetime(2020, 11, 21))
|
||||
self.assertEqual(
|
||||
self.parser.date,
|
||||
datetime.datetime(
|
||||
2020,
|
||||
11,
|
||||
21,
|
||||
tzinfo=zoneinfo.ZoneInfo("America/Chicago"),
|
||||
),
|
||||
)
|
||||
|
||||
def test_metadata(self):
|
||||
self.httpx_mock.add_response(
|
||||
|
Loading…
x
Reference in New Issue
Block a user