mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-05-05 11:39:29 -05:00
53 lines
1.4 KiB
Python
53 lines
1.4 KiB
Python
from pathlib import Path
|
||
|
||
from django.test import TestCase
|
||
|
||
from documents.tests.utils import DirectoriesMixin
|
||
from documents.tests.utils import FileSystemAssertsMixin
|
||
from paperless_text.parsers import TextDocumentParser
|
||
|
||
|
||
class TestTextParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
||
SAMPLE_DIR = Path(__file__).resolve().parent / "samples"
|
||
|
||
def test_thumbnail(self):
|
||
parser = TextDocumentParser(None)
|
||
|
||
# just make sure that it does not crash
|
||
f = parser.get_thumbnail(
|
||
self.SAMPLE_DIR / "test.txt",
|
||
"text/plain",
|
||
)
|
||
self.assertIsFile(f)
|
||
|
||
def test_parse(self):
|
||
parser = TextDocumentParser(None)
|
||
|
||
parser.parse(
|
||
self.SAMPLE_DIR / "test.txt",
|
||
"text/plain",
|
||
)
|
||
|
||
self.assertEqual(parser.get_text(), "This is a test file.\n")
|
||
self.assertIsNone(parser.get_archive_path())
|
||
|
||
def test_parse_invalid_bytes(self):
|
||
"""
|
||
GIVEN:
|
||
- Text file which contains invalid UTF bytes
|
||
WHEN:
|
||
- The file is parsed
|
||
THEN:
|
||
- Parsing continues
|
||
- Invalid bytes are removed
|
||
"""
|
||
parser = TextDocumentParser(None)
|
||
|
||
parser.parse(
|
||
self.SAMPLE_DIR / "decode_error.txt",
|
||
"text/plain",
|
||
)
|
||
|
||
self.assertEqual(parser.get_text(), "Pantothens<EFBFBD>ure\n")
|
||
self.assertIsNone(parser.get_archive_path())
|