mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-07-28 18:24:38 -05:00
Creates a mix-in for asserting file system states
This commit is contained in:

committed by
Trenton H

parent
1718cf6504
commit
0df91c31f1
@@ -10,6 +10,7 @@ from django.test import TestCase
|
||||
from documents.parsers import ParseError
|
||||
from documents.parsers import run_convert
|
||||
from documents.tests.utils import DirectoriesMixin
|
||||
from documents.tests.utils import FileSystemAssertsMixin
|
||||
from paperless_tesseract.parsers import post_process_text
|
||||
from paperless_tesseract.parsers import RasterisedDocumentParser
|
||||
|
||||
@@ -36,7 +37,7 @@ class FakeImageFile(ContextManager):
|
||||
return os.path.basename(self.fname)
|
||||
|
||||
|
||||
class TestParser(DirectoriesMixin, TestCase):
|
||||
class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
||||
|
||||
SAMPLE_FILES = os.path.join(os.path.dirname(__file__), "samples")
|
||||
|
||||
@@ -88,7 +89,7 @@ class TestParser(DirectoriesMixin, TestCase):
|
||||
os.path.join(self.SAMPLE_FILES, "simple-digital.pdf"),
|
||||
"application/pdf",
|
||||
)
|
||||
self.assertTrue(os.path.isfile(thumb))
|
||||
self.assertIsFile(thumb)
|
||||
|
||||
@mock.patch("documents.parsers.run_convert")
|
||||
def test_thumbnail_fallback(self, m):
|
||||
@@ -105,7 +106,7 @@ class TestParser(DirectoriesMixin, TestCase):
|
||||
os.path.join(self.SAMPLE_FILES, "simple-digital.pdf"),
|
||||
"application/pdf",
|
||||
)
|
||||
self.assertTrue(os.path.isfile(thumb))
|
||||
self.assertIsFile(thumb)
|
||||
|
||||
def test_thumbnail_encrypted(self):
|
||||
parser = RasterisedDocumentParser(uuid.uuid4())
|
||||
@@ -113,7 +114,7 @@ class TestParser(DirectoriesMixin, TestCase):
|
||||
os.path.join(self.SAMPLE_FILES, "encrypted.pdf"),
|
||||
"application/pdf",
|
||||
)
|
||||
self.assertTrue(os.path.isfile(thumb))
|
||||
self.assertIsFile(thumb)
|
||||
|
||||
def test_get_dpi(self):
|
||||
parser = RasterisedDocumentParser(None)
|
||||
@@ -132,7 +133,7 @@ class TestParser(DirectoriesMixin, TestCase):
|
||||
"application/pdf",
|
||||
)
|
||||
|
||||
self.assertTrue(os.path.isfile(parser.archive_path))
|
||||
self.assertIsFile(parser.archive_path)
|
||||
|
||||
self.assertContainsStrings(parser.get_text(), ["This is a test document."])
|
||||
|
||||
@@ -144,7 +145,7 @@ class TestParser(DirectoriesMixin, TestCase):
|
||||
"application/pdf",
|
||||
)
|
||||
|
||||
self.assertTrue(os.path.isfile(parser.archive_path))
|
||||
self.assertIsFile(parser.archive_path)
|
||||
|
||||
self.assertContainsStrings(
|
||||
parser.get_text(),
|
||||
@@ -225,7 +226,7 @@ class TestParser(DirectoriesMixin, TestCase):
|
||||
|
||||
parser.parse(os.path.join(self.SAMPLE_FILES, "simple.png"), "image/png")
|
||||
|
||||
self.assertTrue(os.path.isfile(parser.archive_path))
|
||||
self.assertIsFile(parser.archive_path)
|
||||
|
||||
self.assertContainsStrings(parser.get_text(), ["This is a test document."])
|
||||
|
||||
@@ -241,7 +242,7 @@ class TestParser(DirectoriesMixin, TestCase):
|
||||
|
||||
parser.parse(dest_file, "image/png")
|
||||
|
||||
self.assertTrue(os.path.isfile(parser.archive_path))
|
||||
self.assertIsFile(parser.archive_path)
|
||||
|
||||
self.assertContainsStrings(parser.get_text(), ["This is a test document."])
|
||||
|
||||
@@ -273,7 +274,7 @@ class TestParser(DirectoriesMixin, TestCase):
|
||||
|
||||
parser.parse(os.path.join(self.SAMPLE_FILES, "simple-no-dpi.png"), "image/png")
|
||||
|
||||
self.assertTrue(os.path.isfile(parser.archive_path))
|
||||
self.assertIsFile(parser.archive_path)
|
||||
|
||||
self.assertContainsStrings(
|
||||
parser.get_text().lower(),
|
||||
@@ -286,7 +287,7 @@ class TestParser(DirectoriesMixin, TestCase):
|
||||
os.path.join(self.SAMPLE_FILES, "multi-page-digital.pdf"),
|
||||
"application/pdf",
|
||||
)
|
||||
self.assertTrue(os.path.isfile(parser.archive_path))
|
||||
self.assertIsFile(parser.archive_path)
|
||||
self.assertContainsStrings(
|
||||
parser.get_text().lower(),
|
||||
["page 1", "page 2", "page 3"],
|
||||
@@ -299,7 +300,7 @@ class TestParser(DirectoriesMixin, TestCase):
|
||||
os.path.join(self.SAMPLE_FILES, "multi-page-digital.pdf"),
|
||||
"application/pdf",
|
||||
)
|
||||
self.assertTrue(os.path.isfile(parser.archive_path))
|
||||
self.assertIsFile(parser.archive_path)
|
||||
self.assertContainsStrings(
|
||||
parser.get_text().lower(),
|
||||
["page 1", "page 2", "page 3"],
|
||||
@@ -312,7 +313,7 @@ class TestParser(DirectoriesMixin, TestCase):
|
||||
os.path.join(self.SAMPLE_FILES, "multi-page-digital.pdf"),
|
||||
"application/pdf",
|
||||
)
|
||||
self.assertTrue(os.path.isfile(parser.archive_path))
|
||||
self.assertIsFile(parser.archive_path)
|
||||
self.assertContainsStrings(
|
||||
parser.get_text().lower(),
|
||||
["page 1", "page 2", "page 3"],
|
||||
@@ -325,7 +326,7 @@ class TestParser(DirectoriesMixin, TestCase):
|
||||
os.path.join(self.SAMPLE_FILES, "multi-page-digital.pdf"),
|
||||
"application/pdf",
|
||||
)
|
||||
self.assertTrue(os.path.isfile(parser.archive_path))
|
||||
self.assertIsFile(parser.archive_path)
|
||||
self.assertContainsStrings(
|
||||
parser.get_text().lower(),
|
||||
["page 1", "page 2", "page 3"],
|
||||
@@ -338,7 +339,7 @@ class TestParser(DirectoriesMixin, TestCase):
|
||||
os.path.join(self.SAMPLE_FILES, "multi-page-images.pdf"),
|
||||
"application/pdf",
|
||||
)
|
||||
self.assertTrue(os.path.isfile(parser.archive_path))
|
||||
self.assertIsFile(parser.archive_path)
|
||||
self.assertContainsStrings(
|
||||
parser.get_text().lower(),
|
||||
["page 1", "page 2", "page 3"],
|
||||
@@ -362,7 +363,7 @@ class TestParser(DirectoriesMixin, TestCase):
|
||||
os.path.join(self.SAMPLE_FILES, "multi-page-images.pdf"),
|
||||
"application/pdf",
|
||||
)
|
||||
self.assertTrue(os.path.isfile(parser.archive_path))
|
||||
self.assertIsFile(parser.archive_path)
|
||||
self.assertContainsStrings(parser.get_text().lower(), ["page 1", "page 2"])
|
||||
self.assertNotIn("page 3", parser.get_text().lower())
|
||||
|
||||
@@ -384,7 +385,7 @@ class TestParser(DirectoriesMixin, TestCase):
|
||||
os.path.join(self.SAMPLE_FILES, "multi-page-images.pdf"),
|
||||
"application/pdf",
|
||||
)
|
||||
self.assertTrue(os.path.isfile(parser.archive_path))
|
||||
self.assertIsFile(parser.archive_path)
|
||||
self.assertContainsStrings(parser.get_text().lower(), ["page 1"])
|
||||
self.assertNotIn("page 2", parser.get_text().lower())
|
||||
self.assertNotIn("page 3", parser.get_text().lower())
|
||||
@@ -455,7 +456,7 @@ class TestParser(DirectoriesMixin, TestCase):
|
||||
"application/pdf",
|
||||
)
|
||||
self.assertIsNotNone(parser.archive_path)
|
||||
self.assertTrue(os.path.isfile(parser.archive_path))
|
||||
self.assertIsFile(parser.archive_path)
|
||||
self.assertContainsStrings(
|
||||
parser.get_text().lower(),
|
||||
["page 1", "page 2", "page 3", "page 4", "page 5", "page 6"],
|
||||
@@ -486,7 +487,7 @@ class TestParser(DirectoriesMixin, TestCase):
|
||||
"application/pdf",
|
||||
)
|
||||
self.assertIsNotNone(parser.archive_path)
|
||||
self.assertTrue(os.path.isfile(parser.archive_path))
|
||||
self.assertIsFile(parser.archive_path)
|
||||
self.assertContainsStrings(
|
||||
parser.get_text().lower(),
|
||||
[
|
||||
@@ -556,7 +557,7 @@ class TestParser(DirectoriesMixin, TestCase):
|
||||
os.path.join(self.SAMPLE_FILES, "multi-page-images.tiff"),
|
||||
"image/tiff",
|
||||
)
|
||||
self.assertTrue(os.path.isfile(parser.archive_path))
|
||||
self.assertIsFile(parser.archive_path)
|
||||
self.assertContainsStrings(
|
||||
parser.get_text().lower(),
|
||||
["page 1", "page 2", "page 3"],
|
||||
@@ -580,7 +581,7 @@ class TestParser(DirectoriesMixin, TestCase):
|
||||
tmp_file.name,
|
||||
"image/tiff",
|
||||
)
|
||||
self.assertTrue(os.path.isfile(parser.archive_path))
|
||||
self.assertIsFile(parser.archive_path)
|
||||
self.assertContainsStrings(
|
||||
parser.get_text().lower(),
|
||||
["page 1", "page 2", "page 3"],
|
||||
@@ -608,7 +609,7 @@ class TestParser(DirectoriesMixin, TestCase):
|
||||
tmp_file.name,
|
||||
"image/tiff",
|
||||
)
|
||||
self.assertTrue(os.path.isfile(parser.archive_path))
|
||||
self.assertIsFile(parser.archive_path)
|
||||
self.assertContainsStrings(
|
||||
parser.get_text().lower(),
|
||||
["page 1", "page 2", "page 3"],
|
||||
@@ -689,40 +690,40 @@ class TestParser(DirectoriesMixin, TestCase):
|
||||
self.assertIn("ةﯾﻠﺧﺎدﻻ ةرازو", parser.get_text())
|
||||
|
||||
|
||||
class TestParserFileTypes(DirectoriesMixin, TestCase):
|
||||
class TestParserFileTypes(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
||||
|
||||
SAMPLE_FILES = os.path.join(os.path.dirname(__file__), "samples")
|
||||
|
||||
def test_bmp(self):
|
||||
parser = RasterisedDocumentParser(None)
|
||||
parser.parse(os.path.join(self.SAMPLE_FILES, "simple.bmp"), "image/bmp")
|
||||
self.assertTrue(os.path.isfile(parser.archive_path))
|
||||
self.assertIsFile(parser.archive_path)
|
||||
self.assertIn("this is a test document", parser.get_text().lower())
|
||||
|
||||
def test_jpg(self):
|
||||
parser = RasterisedDocumentParser(None)
|
||||
parser.parse(os.path.join(self.SAMPLE_FILES, "simple.jpg"), "image/jpeg")
|
||||
self.assertTrue(os.path.isfile(parser.archive_path))
|
||||
self.assertIsFile(parser.archive_path)
|
||||
self.assertIn("this is a test document", parser.get_text().lower())
|
||||
|
||||
@override_settings(OCR_IMAGE_DPI=200)
|
||||
def test_gif(self):
|
||||
parser = RasterisedDocumentParser(None)
|
||||
parser.parse(os.path.join(self.SAMPLE_FILES, "simple.gif"), "image/gif")
|
||||
self.assertTrue(os.path.isfile(parser.archive_path))
|
||||
self.assertIsFile(parser.archive_path)
|
||||
self.assertIn("this is a test document", parser.get_text().lower())
|
||||
|
||||
def test_tiff(self):
|
||||
parser = RasterisedDocumentParser(None)
|
||||
parser.parse(os.path.join(self.SAMPLE_FILES, "simple.tif"), "image/tiff")
|
||||
self.assertTrue(os.path.isfile(parser.archive_path))
|
||||
self.assertIsFile(parser.archive_path)
|
||||
self.assertIn("this is a test document", parser.get_text().lower())
|
||||
|
||||
@override_settings(OCR_IMAGE_DPI=72)
|
||||
def test_webp(self):
|
||||
parser = RasterisedDocumentParser(None)
|
||||
parser.parse(os.path.join(self.SAMPLE_FILES, "document.webp"), "image/webp")
|
||||
self.assertTrue(os.path.isfile(parser.archive_path))
|
||||
self.assertIsFile(parser.archive_path)
|
||||
# OCR consistent mangles this space, oh well
|
||||
self.assertIn(
|
||||
"this is awebp document, created 11/14/2022.",
|
||||
|
Reference in New Issue
Block a user