mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-07-28 18:24:38 -05:00
Merge branch 'dev' into feature-permissions
This commit is contained in:
@@ -325,11 +325,10 @@ def save_to_dir(
|
||||
Optionally rename the file.
|
||||
"""
|
||||
if os.path.isfile(filepath) and os.path.isdir(target_dir):
|
||||
dst = shutil.copy(filepath, target_dir)
|
||||
logging.debug(f"saved {str(filepath)} to {str(dst)}")
|
||||
if newname:
|
||||
dst_new = os.path.join(target_dir, newname)
|
||||
logger.debug(f"moving {str(dst)} to {str(dst_new)}")
|
||||
os.rename(dst, dst_new)
|
||||
dest = target_dir
|
||||
if newname is not None:
|
||||
dest = os.path.join(dest, newname)
|
||||
shutil.copy(filepath, dest)
|
||||
logging.debug(f"saved {str(filepath)} to {str(dest)}")
|
||||
else:
|
||||
logger.warning(f"{str(filepath)} or {str(target_dir)} don't exist.")
|
||||
|
@@ -346,6 +346,7 @@ class Consumer(LoggingMixin):
|
||||
mime_type,
|
||||
)
|
||||
if not parser_class:
|
||||
tempdir.cleanup()
|
||||
self._fail(MESSAGE_UNSUPPORTED_TYPE, f"Unsupported mime type {mime_type}")
|
||||
|
||||
# Notify all listeners that we're going to do some work.
|
||||
@@ -404,6 +405,7 @@ class Consumer(LoggingMixin):
|
||||
|
||||
except ParseError as e:
|
||||
document_parser.cleanup()
|
||||
tempdir.cleanup()
|
||||
self._fail(
|
||||
str(e),
|
||||
f"Error while consuming document {self.filename}: {e}",
|
||||
|
@@ -779,11 +779,17 @@ class StoragePathSerializer(MatchingModelSerializer, OwnedObjectSerializer):
|
||||
document_type="document_type",
|
||||
created="created",
|
||||
created_year="created_year",
|
||||
created_year_short="created_year_short",
|
||||
created_month="created_month",
|
||||
created_month_name="created_month_name",
|
||||
created_month_name_short="created_month_name_short",
|
||||
created_day="created_day",
|
||||
added="added",
|
||||
added_year="added_year",
|
||||
added_year_short="added_year_short",
|
||||
added_month="added_month",
|
||||
added_month_name="added_month_name",
|
||||
added_month_name_short="added_month_name_short",
|
||||
added_day="added_day",
|
||||
asn="asn",
|
||||
tags="tags",
|
||||
|
@@ -130,6 +130,18 @@ def consume_file(
|
||||
)
|
||||
|
||||
if document_list:
|
||||
|
||||
# If the file is an upload, it's in the scratch directory
|
||||
# Move it to consume directory to be picked up
|
||||
# Otherwise, use the current parent to keep possible tags
|
||||
# from subdirectories
|
||||
try:
|
||||
# is_relative_to would be nicer, but new in 3.9
|
||||
_ = path.relative_to(settings.SCRATCH_DIR)
|
||||
save_to_dir = settings.CONSUMPTION_DIR
|
||||
except ValueError:
|
||||
save_to_dir = path.parent
|
||||
|
||||
for n, document in enumerate(document_list):
|
||||
# save to consumption dir
|
||||
# rename it to the original filename with number prefix
|
||||
@@ -138,23 +150,18 @@ def consume_file(
|
||||
else:
|
||||
newname = None
|
||||
|
||||
# If the file is an upload, it's in the scratch directory
|
||||
# Move it to consume directory to be picked up
|
||||
# Otherwise, use the current parent to keep possible tags
|
||||
# from subdirectories
|
||||
try:
|
||||
# is_relative_to would be nicer, but new in 3.9
|
||||
_ = path.relative_to(settings.SCRATCH_DIR)
|
||||
save_to_dir = settings.CONSUMPTION_DIR
|
||||
except ValueError:
|
||||
save_to_dir = path.parent
|
||||
|
||||
barcodes.save_to_dir(
|
||||
document,
|
||||
newname=newname,
|
||||
target_dir=save_to_dir,
|
||||
)
|
||||
|
||||
# Split file has been copied safely, remove it
|
||||
os.remove(document)
|
||||
|
||||
# And clean up the directory as well, now it's empty
|
||||
shutil.rmtree(os.path.dirname(document_list[0]))
|
||||
|
||||
# Delete the PDF file which was split
|
||||
os.remove(doc_barcode_info.pdf_path)
|
||||
|
||||
|
@@ -125,28 +125,28 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
|
||||
response = self.client.get("/api/documents/", format="json")
|
||||
self.assertEqual(response.status_code, 200)
|
||||
results_full = response.data["results"]
|
||||
self.assertTrue("content" in results_full[0])
|
||||
self.assertTrue("id" in results_full[0])
|
||||
self.assertIn("content", results_full[0])
|
||||
self.assertIn("id", results_full[0])
|
||||
|
||||
response = self.client.get("/api/documents/?fields=id", format="json")
|
||||
self.assertEqual(response.status_code, 200)
|
||||
results = response.data["results"]
|
||||
self.assertFalse("content" in results[0])
|
||||
self.assertTrue("id" in results[0])
|
||||
self.assertIn("id", results[0])
|
||||
self.assertEqual(len(results[0]), 1)
|
||||
|
||||
response = self.client.get("/api/documents/?fields=content", format="json")
|
||||
self.assertEqual(response.status_code, 200)
|
||||
results = response.data["results"]
|
||||
self.assertTrue("content" in results[0])
|
||||
self.assertIn("content", results[0])
|
||||
self.assertFalse("id" in results[0])
|
||||
self.assertEqual(len(results[0]), 1)
|
||||
|
||||
response = self.client.get("/api/documents/?fields=id,content", format="json")
|
||||
self.assertEqual(response.status_code, 200)
|
||||
results = response.data["results"]
|
||||
self.assertTrue("content" in results[0])
|
||||
self.assertTrue("id" in results[0])
|
||||
self.assertIn("content", results[0])
|
||||
self.assertIn("id", results[0])
|
||||
self.assertEqual(len(results[0]), 2)
|
||||
|
||||
response = self.client.get(
|
||||
@@ -156,7 +156,7 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
|
||||
self.assertEqual(response.status_code, 200)
|
||||
results = response.data["results"]
|
||||
self.assertFalse("content" in results[0])
|
||||
self.assertTrue("id" in results[0])
|
||||
self.assertIn("id", results[0])
|
||||
self.assertEqual(len(results[0]), 1)
|
||||
|
||||
response = self.client.get("/api/documents/?fields=", format="json")
|
||||
@@ -3291,8 +3291,32 @@ class TestApiStoragePaths(DirectoriesMixin, APITestCase):
|
||||
self.assertEqual(response.status_code, 400)
|
||||
self.assertEqual(StoragePath.objects.count(), 1)
|
||||
|
||||
def test_api_storage_path_placeholders(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- API request to create a storage path with placeholders
|
||||
- Storage path is valid
|
||||
WHEN:
|
||||
- API is called
|
||||
THEN:
|
||||
- Correct HTTP response
|
||||
- New storage path is created
|
||||
"""
|
||||
response = self.client.post(
|
||||
self.ENDPOINT,
|
||||
json.dumps(
|
||||
{
|
||||
"name": "Storage path with placeholders",
|
||||
"path": "{title}/{correspondent}/{document_type}/{created}/{created_year}/{created_year_short}/{created_month}/{created_month_name}/{created_month_name_short}/{created_day}/{added}/{added_year}/{added_year_short}/{added_month}/{added_month_name}/{added_month_name_short}/{added_day}/{asn}/{tags}/{tag_list}/",
|
||||
},
|
||||
),
|
||||
content_type="application/json",
|
||||
)
|
||||
self.assertEqual(response.status_code, 201)
|
||||
self.assertEqual(StoragePath.objects.count(), 2)
|
||||
|
||||
class TestTasks(APITestCase):
|
||||
|
||||
class TestTasks(DirectoriesMixin, APITestCase):
|
||||
ENDPOINT = "/api/tasks/"
|
||||
ENDPOINT_ACKNOWLEDGE = "/api/acknowledge_tasks/"
|
||||
|
||||
|
@@ -847,13 +847,11 @@ class PreConsumeTestCase(TestCase):
|
||||
self.assertEqual(command[0], script.name)
|
||||
self.assertEqual(command[1], "path-to-file")
|
||||
|
||||
self.assertDictContainsSubset(
|
||||
{
|
||||
"DOCUMENT_SOURCE_PATH": c.original_path,
|
||||
"DOCUMENT_WORKING_PATH": c.path,
|
||||
},
|
||||
environment,
|
||||
)
|
||||
subset = {
|
||||
"DOCUMENT_SOURCE_PATH": c.original_path,
|
||||
"DOCUMENT_WORKING_PATH": c.path,
|
||||
}
|
||||
self.assertDictEqual(environment, {**environment, **subset})
|
||||
|
||||
@mock.patch("documents.consumer.Consumer.log")
|
||||
def test_script_with_output(self, mocked_log):
|
||||
@@ -983,16 +981,15 @@ class PostConsumeTestCase(TestCase):
|
||||
self.assertEqual(command[7], "my_bank")
|
||||
self.assertCountEqual(command[8].split(","), ["a", "b"])
|
||||
|
||||
self.assertDictContainsSubset(
|
||||
{
|
||||
"DOCUMENT_ID": str(doc.pk),
|
||||
"DOCUMENT_DOWNLOAD_URL": f"/api/documents/{doc.pk}/download/",
|
||||
"DOCUMENT_THUMBNAIL_URL": f"/api/documents/{doc.pk}/thumb/",
|
||||
"DOCUMENT_CORRESPONDENT": "my_bank",
|
||||
"DOCUMENT_TAGS": "a,b",
|
||||
},
|
||||
environment,
|
||||
)
|
||||
subset = {
|
||||
"DOCUMENT_ID": str(doc.pk),
|
||||
"DOCUMENT_DOWNLOAD_URL": f"/api/documents/{doc.pk}/download/",
|
||||
"DOCUMENT_THUMBNAIL_URL": f"/api/documents/{doc.pk}/thumb/",
|
||||
"DOCUMENT_CORRESPONDENT": "my_bank",
|
||||
"DOCUMENT_TAGS": "a,b",
|
||||
}
|
||||
|
||||
self.assertDictEqual(environment, {**environment, **subset})
|
||||
|
||||
def test_script_exit_non_zero(self):
|
||||
"""
|
||||
|
@@ -25,7 +25,7 @@ class TestImporter(TestCase):
|
||||
cmd.manifest = [{"model": "documents.document"}]
|
||||
with self.assertRaises(CommandError) as cm:
|
||||
cmd._check_manifest()
|
||||
self.assertTrue("The manifest file contains a record" in str(cm.exception))
|
||||
self.assertIn("The manifest file contains a record", str(cm.exception))
|
||||
|
||||
cmd.manifest = [
|
||||
{"model": "documents.document", EXPORTER_FILE_NAME: "noexist.pdf"},
|
||||
@@ -33,6 +33,7 @@ class TestImporter(TestCase):
|
||||
# self.assertRaises(CommandError, cmd._check_manifest)
|
||||
with self.assertRaises(CommandError) as cm:
|
||||
cmd._check_manifest()
|
||||
self.assertTrue(
|
||||
'The manifest file refers to "noexist.pdf"' in str(cm.exception),
|
||||
self.assertIn(
|
||||
'The manifest file refers to "noexist.pdf"',
|
||||
str(cm.exception),
|
||||
)
|
||||
|
@@ -1,6 +1,8 @@
|
||||
from tempfile import TemporaryDirectory
|
||||
from unittest import mock
|
||||
|
||||
from django.apps import apps
|
||||
from django.test import override_settings
|
||||
from django.test import TestCase
|
||||
from documents.parsers import get_default_file_extension
|
||||
from documents.parsers import get_parser_class_for_mime_type
|
||||
@@ -8,6 +10,7 @@ from documents.parsers import get_supported_file_extensions
|
||||
from documents.parsers import is_file_ext_supported
|
||||
from paperless_tesseract.parsers import RasterisedDocumentParser
|
||||
from paperless_text.parsers import TextDocumentParser
|
||||
from paperless_tika.parsers import TikaDocumentParser
|
||||
|
||||
|
||||
class TestParserDiscovery(TestCase):
|
||||
@@ -124,14 +127,43 @@ class TestParserDiscovery(TestCase):
|
||||
|
||||
|
||||
class TestParserAvailability(TestCase):
|
||||
def test_file_extensions(self):
|
||||
|
||||
def test_tesseract_parser(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- Various mime types
|
||||
WHEN:
|
||||
- The parser class is instantiated
|
||||
THEN:
|
||||
- The Tesseract based parser is return
|
||||
"""
|
||||
supported_mimes_and_exts = [
|
||||
("application/pdf", ".pdf"),
|
||||
("image/png", ".png"),
|
||||
("image/jpeg", ".jpg"),
|
||||
("image/tiff", ".tif"),
|
||||
("image/webp", ".webp"),
|
||||
]
|
||||
|
||||
supported_exts = get_supported_file_extensions()
|
||||
|
||||
for mime_type, ext in supported_mimes_and_exts:
|
||||
self.assertIn(ext, supported_exts)
|
||||
self.assertEqual(get_default_file_extension(mime_type), ext)
|
||||
self.assertIsInstance(
|
||||
get_parser_class_for_mime_type(mime_type)(logging_group=None),
|
||||
RasterisedDocumentParser,
|
||||
)
|
||||
|
||||
def test_text_parser(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- Various mime types of a text form
|
||||
WHEN:
|
||||
- The parser class is instantiated
|
||||
THEN:
|
||||
- The text based parser is return
|
||||
"""
|
||||
supported_mimes_and_exts = [
|
||||
("text/plain", ".txt"),
|
||||
("text/csv", ".csv"),
|
||||
]
|
||||
@@ -141,23 +173,55 @@ class TestParserAvailability(TestCase):
|
||||
for mime_type, ext in supported_mimes_and_exts:
|
||||
self.assertIn(ext, supported_exts)
|
||||
self.assertEqual(get_default_file_extension(mime_type), ext)
|
||||
self.assertIsInstance(
|
||||
get_parser_class_for_mime_type(mime_type)(logging_group=None),
|
||||
TextDocumentParser,
|
||||
)
|
||||
|
||||
def test_tika_parser(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- Various mime types of a office document form
|
||||
WHEN:
|
||||
- The parser class is instantiated
|
||||
THEN:
|
||||
- The Tika/Gotenberg based parser is return
|
||||
"""
|
||||
supported_mimes_and_exts = [
|
||||
("application/vnd.oasis.opendocument.text", ".odt"),
|
||||
("text/rtf", ".rtf"),
|
||||
("application/msword", ".doc"),
|
||||
(
|
||||
"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
||||
".docx",
|
||||
),
|
||||
]
|
||||
|
||||
# Force the app ready to notice the settings override
|
||||
with override_settings(TIKA_ENABLED=True, INSTALLED_APPS=["paperless_tika"]):
|
||||
app = apps.get_app_config("paperless_tika")
|
||||
app.ready()
|
||||
supported_exts = get_supported_file_extensions()
|
||||
|
||||
for mime_type, ext in supported_mimes_and_exts:
|
||||
self.assertIn(ext, supported_exts)
|
||||
self.assertEqual(get_default_file_extension(mime_type), ext)
|
||||
self.assertIsInstance(
|
||||
get_parser_class_for_mime_type(mime_type)(logging_group=None),
|
||||
TikaDocumentParser,
|
||||
)
|
||||
|
||||
def test_no_parser_for_mime(self):
|
||||
self.assertIsNone(get_parser_class_for_mime_type("text/sdgsdf"))
|
||||
|
||||
def test_default_extension(self):
|
||||
# Test no parser declared still returns a an extension
|
||||
self.assertEqual(get_default_file_extension("application/zip"), ".zip")
|
||||
|
||||
# Test invalid mimetype returns no extension
|
||||
self.assertEqual(get_default_file_extension("aasdasd/dgfgf"), "")
|
||||
|
||||
self.assertIsInstance(
|
||||
get_parser_class_for_mime_type("application/pdf")(logging_group=None),
|
||||
RasterisedDocumentParser,
|
||||
)
|
||||
self.assertIsInstance(
|
||||
get_parser_class_for_mime_type("text/plain")(logging_group=None),
|
||||
TextDocumentParser,
|
||||
)
|
||||
self.assertIsNone(get_parser_class_for_mime_type("text/sdgsdf"))
|
||||
|
||||
def test_file_extension_support(self):
|
||||
self.assertTrue(is_file_ext_supported(".pdf"))
|
||||
self.assertFalse(is_file_ext_supported(".hsdfh"))
|
||||
self.assertFalse(is_file_ext_supported(""))
|
||||
|
Reference in New Issue
Block a user