53 lines
1.4 KiB
Python
Raw Blame History

from pathlib import Path
from django.test import TestCase
from documents.tests.utils import DirectoriesMixin
from documents.tests.utils import FileSystemAssertsMixin
from paperless_text.parsers import TextDocumentParser
class TestTextParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
SAMPLE_DIR = Path(__file__).resolve().parent / "samples"
def test_thumbnail(self):
parser = TextDocumentParser(None)
# just make sure that it does not crash
f = parser.get_thumbnail(
self.SAMPLE_DIR / "test.txt",
"text/plain",
)
self.assertIsFile(f)
def test_parse(self):
parser = TextDocumentParser(None)
parser.parse(
self.SAMPLE_DIR / "test.txt",
"text/plain",
)
self.assertEqual(parser.get_text(), "This is a test file.\n")
self.assertIsNone(parser.get_archive_path())
def test_parse_invalid_bytes(self):
"""
GIVEN:
- Text file which contains invalid UTF bytes
WHEN:
- The file is parsed
THEN:
- Parsing continues
- Invalid bytes are removed
"""
parser = TextDocumentParser(None)
parser.parse(
self.SAMPLE_DIR / "decode_error.txt",
"text/plain",
)
self.assertEqual(parser.get_text(), "Pantothens<EFBFBD>ure\n")
self.assertIsNone(parser.get_archive_path())