mirror of
				https://github.com/paperless-ngx/paperless-ngx.git
				synced 2025-10-30 03:56:23 -05:00 
			
		
		
		
	Merge branch 'dev' into feature-bulk-edit
This commit is contained in:
		| @@ -247,7 +247,6 @@ class Consumer(LoggingMixin): | ||||
|  | ||||
|         with open(self.path, "rb") as f: | ||||
|             document = Document.objects.create( | ||||
|                 correspondent=file_info.correspondent, | ||||
|                 title=(self.override_title or file_info.title)[:127], | ||||
|                 content=text, | ||||
|                 mime_type=mime_type, | ||||
| @@ -257,12 +256,6 @@ class Consumer(LoggingMixin): | ||||
|                 storage_type=storage_type | ||||
|             ) | ||||
|  | ||||
|         relevant_tags = set(file_info.tags) | ||||
|         if relevant_tags: | ||||
|             tag_names = ", ".join([t.name for t in relevant_tags]) | ||||
|             self.log("debug", "Tagging with {}".format(tag_names)) | ||||
|             document.tags.add(*relevant_tags) | ||||
|  | ||||
|         self.apply_overrides(document) | ||||
|  | ||||
|         document.save() | ||||
|   | ||||
| @@ -3,7 +3,7 @@ import os | ||||
| from contextlib import contextmanager | ||||
|  | ||||
| from django.conf import settings | ||||
| from whoosh import highlight | ||||
| from whoosh import highlight, classify, query | ||||
| from whoosh.fields import Schema, TEXT, NUMERIC, KEYWORD, DATETIME | ||||
| from whoosh.highlight import Formatter, get_text | ||||
| from whoosh.index import create_in, exists_in, open_dir | ||||
| @@ -20,32 +20,37 @@ class JsonFormatter(Formatter): | ||||
|         self.seen = {} | ||||
|  | ||||
|     def format_token(self, text, token, replace=False): | ||||
|         seen = self.seen | ||||
|         ttext = self._text(get_text(text, token, replace)) | ||||
|         if ttext in seen: | ||||
|             termnum = seen[ttext] | ||||
|         else: | ||||
|             termnum = len(seen) | ||||
|             seen[ttext] = termnum | ||||
|  | ||||
|         return {'text': ttext, 'term': termnum} | ||||
|         return {'text': ttext, 'highlight': 'true'} | ||||
|  | ||||
|     def format_fragment(self, fragment, replace=False): | ||||
|         output = [] | ||||
|         index = fragment.startchar | ||||
|         text = fragment.text | ||||
|  | ||||
|         amend_token = None | ||||
|         for t in fragment.matches: | ||||
|             if t.startchar is None: | ||||
|                 continue | ||||
|             if t.startchar < index: | ||||
|                 continue | ||||
|             if t.startchar > index: | ||||
|                 output.append({'text': text[index:t.startchar]}) | ||||
|             output.append(self.format_token(text, t, replace)) | ||||
|                 text_inbetween = text[index:t.startchar] | ||||
|                 if amend_token and t.startchar - index < 10: | ||||
|                     amend_token['text'] += text_inbetween | ||||
|                 else: | ||||
|                     output.append({'text': text_inbetween, | ||||
|                                    'highlight': False}) | ||||
|                     amend_token = None | ||||
|             token = self.format_token(text, t, replace) | ||||
|             if amend_token: | ||||
|                 amend_token['text'] += token['text'] | ||||
|             else: | ||||
|                 output.append(token) | ||||
|                 amend_token = token | ||||
|             index = t.endchar | ||||
|         if index < fragment.endchar: | ||||
|             output.append({'text': text[index:fragment.endchar]}) | ||||
|             output.append({'text': text[index:fragment.endchar], | ||||
|                            'highlight': False}) | ||||
|         return output | ||||
|  | ||||
|     def format(self, fragments, replace=False): | ||||
| @@ -120,22 +125,42 @@ def remove_document_from_index(document): | ||||
|  | ||||
|  | ||||
| @contextmanager | ||||
| def query_page(ix, querystring, page): | ||||
| def query_page(ix, page, querystring, more_like_doc_id, more_like_doc_content): | ||||
|     searcher = ix.searcher() | ||||
|     try: | ||||
|         qp = MultifieldParser( | ||||
|             ["content", "title", "correspondent", "tag", "type"], | ||||
|             ix.schema) | ||||
|         qp.add_plugin(DateParserPlugin()) | ||||
|         if querystring: | ||||
|             qp = MultifieldParser( | ||||
|                 ["content", "title", "correspondent", "tag", "type"], | ||||
|                 ix.schema) | ||||
|             qp.add_plugin(DateParserPlugin()) | ||||
|             str_q = qp.parse(querystring) | ||||
|             corrected = searcher.correct_query(str_q, querystring) | ||||
|         else: | ||||
|             str_q = None | ||||
|             corrected = None | ||||
|  | ||||
|         if more_like_doc_id: | ||||
|             docnum = searcher.document_number(id=more_like_doc_id) | ||||
|             kts = searcher.key_terms_from_text( | ||||
|                 'content', more_like_doc_content, numterms=20, | ||||
|                 model=classify.Bo1Model, normalize=False) | ||||
|             more_like_q = query.Or( | ||||
|                 [query.Term('content', word, boost=weight) | ||||
|                  for word, weight in kts]) | ||||
|             result_page = searcher.search_page( | ||||
|                 more_like_q, page, filter=str_q, mask={docnum}) | ||||
|         elif str_q: | ||||
|             result_page = searcher.search_page(str_q, page) | ||||
|         else: | ||||
|             raise ValueError( | ||||
|                 "Either querystring or more_like_doc_id is required." | ||||
|             ) | ||||
|  | ||||
|         q = qp.parse(querystring) | ||||
|         result_page = searcher.search_page(q, page) | ||||
|         result_page.results.fragmenter = highlight.ContextFragmenter( | ||||
|             surround=50) | ||||
|         result_page.results.formatter = JsonFormatter() | ||||
|  | ||||
|         corrected = searcher.correct_query(q, querystring) | ||||
|         if corrected.query != q: | ||||
|         if corrected and corrected.query != str_q: | ||||
|             corrected_query = corrected.string | ||||
|         else: | ||||
|             corrected_query = None | ||||
|   | ||||
| @@ -1,18 +1,29 @@ | ||||
| import json | ||||
| import os | ||||
| import shutil | ||||
| from contextlib import contextmanager | ||||
|  | ||||
| from django.conf import settings | ||||
| from django.core.management import call_command | ||||
| from django.core.management.base import BaseCommand, CommandError | ||||
| from django.db.models.signals import post_save, m2m_changed | ||||
| from filelock import FileLock | ||||
|  | ||||
| from documents.models import Document | ||||
| from documents.settings import EXPORTER_FILE_NAME, EXPORTER_THUMBNAIL_NAME, \ | ||||
|     EXPORTER_ARCHIVE_NAME | ||||
| from ...file_handling import create_source_path_directory, \ | ||||
|     generate_unique_filename | ||||
| from ...file_handling import create_source_path_directory | ||||
| from ...mixins import Renderable | ||||
| from ...signals.handlers import update_filename_and_move_files | ||||
|  | ||||
|  | ||||
| @contextmanager | ||||
| def disable_signal(sig, receiver, sender): | ||||
|     try: | ||||
|         sig.disconnect(receiver=receiver, sender=sender) | ||||
|         yield | ||||
|     finally: | ||||
|         sig.connect(receiver=receiver, sender=sender) | ||||
|  | ||||
|  | ||||
| class Command(Renderable, BaseCommand): | ||||
| @@ -47,11 +58,16 @@ class Command(Renderable, BaseCommand): | ||||
|             self.manifest = json.load(f) | ||||
|  | ||||
|         self._check_manifest() | ||||
|         with disable_signal(post_save, | ||||
|                             receiver=update_filename_and_move_files, | ||||
|                             sender=Document): | ||||
|             with disable_signal(m2m_changed, | ||||
|                                 receiver=update_filename_and_move_files, | ||||
|                                 sender=Document.tags.through): | ||||
|                 # Fill up the database with whatever is in the manifest | ||||
|                 call_command("loaddata", manifest_path) | ||||
|  | ||||
|         # Fill up the database with whatever is in the manifest | ||||
|         call_command("loaddata", manifest_path) | ||||
|  | ||||
|         self._import_files_from_manifest() | ||||
|                 self._import_files_from_manifest() | ||||
|  | ||||
|     @staticmethod | ||||
|     def _check_manifest_exists(path): | ||||
| @@ -117,9 +133,6 @@ class Command(Renderable, BaseCommand): | ||||
|             document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED | ||||
|  | ||||
|             with FileLock(settings.MEDIA_LOCK): | ||||
|                 document.filename = generate_unique_filename( | ||||
|                     document, settings.ORIGINALS_DIR) | ||||
|  | ||||
|                 if os.path.isfile(document.source_path): | ||||
|                     raise FileExistsError(document.source_path) | ||||
|  | ||||
|   | ||||
| @@ -11,6 +11,7 @@ from paperless.db import GnuPG | ||||
| STORAGE_TYPE_UNENCRYPTED = "unencrypted" | ||||
| STORAGE_TYPE_GPG = "gpg" | ||||
|  | ||||
|  | ||||
| def source_path(self): | ||||
|     if self.filename: | ||||
|         fname = str(self.filename) | ||||
|   | ||||
| @@ -357,54 +357,12 @@ class SavedViewFilterRule(models.Model): | ||||
| # TODO: why is this in the models file? | ||||
| class FileInfo: | ||||
|  | ||||
|     # This epic regex *almost* worked for our needs, so I'm keeping it here for | ||||
|     # posterity, in the hopes that we might find a way to make it work one day. | ||||
|     ALMOST_REGEX = re.compile( | ||||
|         r"^((?P<date>\d\d\d\d\d\d\d\d\d\d\d\d\d\dZ){separator})?" | ||||
|         r"((?P<correspondent>{non_separated_word}+){separator})??" | ||||
|         r"(?P<title>{non_separated_word}+)" | ||||
|         r"({separator}(?P<tags>[a-z,0-9-]+))?" | ||||
|         r"\.(?P<extension>[a-zA-Z.-]+)$".format( | ||||
|             separator=r"\s+-\s+", | ||||
|             non_separated_word=r"([\w,. ]|([^\s]-))" | ||||
|         ) | ||||
|     ) | ||||
|     REGEXES = OrderedDict([ | ||||
|         ("created-correspondent-title-tags", re.compile( | ||||
|             r"^(?P<created>\d\d\d\d\d\d\d\d(\d\d\d\d\d\d)?Z) - " | ||||
|             r"(?P<correspondent>.*) - " | ||||
|             r"(?P<title>.*) - " | ||||
|             r"(?P<tags>[a-z0-9\-,]*)$", | ||||
|             flags=re.IGNORECASE | ||||
|         )), | ||||
|         ("created-title-tags", re.compile( | ||||
|             r"^(?P<created>\d\d\d\d\d\d\d\d(\d\d\d\d\d\d)?Z) - " | ||||
|             r"(?P<title>.*) - " | ||||
|             r"(?P<tags>[a-z0-9\-,]*)$", | ||||
|             flags=re.IGNORECASE | ||||
|         )), | ||||
|         ("created-correspondent-title", re.compile( | ||||
|             r"^(?P<created>\d\d\d\d\d\d\d\d(\d\d\d\d\d\d)?Z) - " | ||||
|             r"(?P<correspondent>.*) - " | ||||
|             r"(?P<title>.*)$", | ||||
|             flags=re.IGNORECASE | ||||
|         )), | ||||
|         ("created-title", re.compile( | ||||
|             r"^(?P<created>\d\d\d\d\d\d\d\d(\d\d\d\d\d\d)?Z) - " | ||||
|             r"(?P<title>.*)$", | ||||
|             flags=re.IGNORECASE | ||||
|         )), | ||||
|         ("correspondent-title-tags", re.compile( | ||||
|             r"(?P<correspondent>.*) - " | ||||
|             r"(?P<title>.*) - " | ||||
|             r"(?P<tags>[a-z0-9\-,]*)$", | ||||
|             flags=re.IGNORECASE | ||||
|         )), | ||||
|         ("correspondent-title", re.compile( | ||||
|             r"(?P<correspondent>.*) - " | ||||
|             r"(?P<title>.*)?$", | ||||
|             flags=re.IGNORECASE | ||||
|         )), | ||||
|         ("title", re.compile( | ||||
|             r"(?P<title>.*)$", | ||||
|             flags=re.IGNORECASE | ||||
| @@ -427,23 +385,10 @@ class FileInfo: | ||||
|         except ValueError: | ||||
|             return None | ||||
|  | ||||
|     @classmethod | ||||
|     def _get_correspondent(cls, name): | ||||
|         if not name: | ||||
|             return None | ||||
|         return Correspondent.objects.get_or_create(name=name)[0] | ||||
|  | ||||
|     @classmethod | ||||
|     def _get_title(cls, title): | ||||
|         return title | ||||
|  | ||||
|     @classmethod | ||||
|     def _get_tags(cls, tags): | ||||
|         r = [] | ||||
|         for t in tags.split(","): | ||||
|             r.append(Tag.objects.get_or_create(name=t)[0]) | ||||
|         return tuple(r) | ||||
|  | ||||
|     @classmethod | ||||
|     def _mangle_property(cls, properties, name): | ||||
|         if name in properties: | ||||
| @@ -453,15 +398,6 @@ class FileInfo: | ||||
|  | ||||
|     @classmethod | ||||
|     def from_filename(cls, filename): | ||||
|         """ | ||||
|         We use a crude naming convention to make handling the correspondent, | ||||
|         title, and tags easier: | ||||
|           "<date> - <correspondent> - <title> - <tags>" | ||||
|           "<correspondent> - <title> - <tags>" | ||||
|           "<correspondent> - <title>" | ||||
|           "<title>" | ||||
|         """ | ||||
|  | ||||
|         # Mutate filename in-place before parsing its components | ||||
|         # by applying at most one of the configured transformations. | ||||
|         for (pattern, repl) in settings.FILENAME_PARSE_TRANSFORMS: | ||||
| @@ -492,7 +428,5 @@ class FileInfo: | ||||
|             if m: | ||||
|                 properties = m.groupdict() | ||||
|                 cls._mangle_property(properties, "created") | ||||
|                 cls._mangle_property(properties, "correspondent") | ||||
|                 cls._mangle_property(properties, "title") | ||||
|                 cls._mangle_property(properties, "tags") | ||||
|                 return cls(**properties) | ||||
|   | ||||
| @@ -5,9 +5,11 @@ | ||||
| <html lang="en"> | ||||
| <head> | ||||
|   <meta charset="utf-8"> | ||||
|   <title>PaperlessUi</title> | ||||
|   <title>Paperless-ng</title> | ||||
|   <base href="/"> | ||||
|   <meta name="viewport" content="width=device-width, initial-scale=1"> | ||||
| 	<meta name="username" content="{{username}}"> | ||||
| 	<meta name="full_name" content="{{full_name}}"> | ||||
| 	<meta name="cookie_prefix" content="{{cookie_prefix}}"> | ||||
|   <link rel="icon" type="image/x-icon" href="favicon.ico"> | ||||
| <link rel="stylesheet" href="{% static 'frontend/styles.css' %}"></head> | ||||
|   | ||||
							
								
								
									
										57
									
								
								src/documents/tests/test_admin.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										57
									
								
								src/documents/tests/test_admin.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,57 @@ | ||||
| from unittest import mock | ||||
|  | ||||
| from django.contrib.admin.sites import AdminSite | ||||
| from django.test import TestCase | ||||
| from django.utils import timezone | ||||
|  | ||||
| from documents.admin import DocumentAdmin | ||||
| from documents.models import Document, Tag | ||||
|  | ||||
|  | ||||
| class TestDocumentAdmin(TestCase): | ||||
|  | ||||
|     def setUp(self) -> None: | ||||
|         self.doc_admin = DocumentAdmin(model=Document, admin_site=AdminSite()) | ||||
|  | ||||
|     @mock.patch("documents.admin.index.add_or_update_document") | ||||
|     def test_save_model(self, m): | ||||
|         doc = Document.objects.create(title="test") | ||||
|         doc.title = "new title" | ||||
|         self.doc_admin.save_model(None, doc, None, None) | ||||
|         self.assertEqual(Document.objects.get(id=doc.id).title, "new title") | ||||
|         m.assert_called_once() | ||||
|  | ||||
|     def test_tags(self): | ||||
|         doc = Document.objects.create(title="test") | ||||
|         doc.tags.create(name="t1") | ||||
|         doc.tags.create(name="t2") | ||||
|  | ||||
|         self.assertEqual(self.doc_admin.tags_(doc), "<span >t1, </span><span >t2, </span>") | ||||
|  | ||||
|     def test_tags_empty(self): | ||||
|         doc = Document.objects.create(title="test") | ||||
|  | ||||
|         self.assertEqual(self.doc_admin.tags_(doc), "") | ||||
|  | ||||
|     @mock.patch("documents.admin.index.remove_document") | ||||
|     def test_delete_model(self, m): | ||||
|         doc = Document.objects.create(title="test") | ||||
|         self.doc_admin.delete_model(None, doc) | ||||
|         self.assertRaises(Document.DoesNotExist, Document.objects.get, id=doc.id) | ||||
|         m.assert_called_once() | ||||
|  | ||||
|     @mock.patch("documents.admin.index.remove_document") | ||||
|     def test_delete_queryset(self, m): | ||||
|         for i in range(42): | ||||
|             Document.objects.create(title="Many documents with the same title", checksum=f"{i:02}") | ||||
|  | ||||
|         self.assertEqual(Document.objects.count(), 42) | ||||
|  | ||||
|         self.doc_admin.delete_queryset(None, Document.objects.all()) | ||||
|  | ||||
|         self.assertEqual(m.call_count, 42) | ||||
|         self.assertEqual(Document.objects.count(), 0) | ||||
|  | ||||
|     def test_created(self): | ||||
|         doc = Document.objects.create(title="test", created=timezone.datetime(2020, 4, 12)) | ||||
|         self.assertEqual(self.doc_admin.created_(doc), "2020-04-12") | ||||
| @@ -352,6 +352,25 @@ class TestDocumentApi(DirectoriesMixin, APITestCase): | ||||
|  | ||||
|         self.assertEqual(correction, None) | ||||
|  | ||||
|     def test_search_more_like(self): | ||||
|         d1=Document.objects.create(title="invoice", content="the thing i bought at a shop and paid with bank account", checksum="A", pk=1) | ||||
|         d2=Document.objects.create(title="bank statement 1", content="things i paid for in august", pk=2, checksum="B") | ||||
|         d3=Document.objects.create(title="bank statement 3", content="things i paid for in september", pk=3, checksum="C") | ||||
|         with AsyncWriter(index.open_index()) as writer: | ||||
|             index.update_document(writer, d1) | ||||
|             index.update_document(writer, d2) | ||||
|             index.update_document(writer, d3) | ||||
|  | ||||
|         response = self.client.get(f"/api/search/?more_like={d2.id}") | ||||
|  | ||||
|         self.assertEqual(response.status_code, 200) | ||||
|  | ||||
|         results = response.data['results'] | ||||
|  | ||||
|         self.assertEqual(len(results), 2) | ||||
|         self.assertEqual(results[0]['id'], d3.id) | ||||
|         self.assertEqual(results[1]['id'], d1.id) | ||||
|  | ||||
|     def test_statistics(self): | ||||
|  | ||||
|         doc1 = Document.objects.create(title="none1", checksum="A") | ||||
|   | ||||
| @@ -29,81 +29,6 @@ class TestAttributes(TestCase): | ||||
|  | ||||
|         self.assertEqual(tuple([t.name for t in file_info.tags]), tags, filename) | ||||
|  | ||||
|     def test_guess_attributes_from_name0(self): | ||||
|         self._test_guess_attributes_from_name( | ||||
|             "Sender - Title.pdf", "Sender", "Title", ()) | ||||
|  | ||||
|     def test_guess_attributes_from_name1(self): | ||||
|         self._test_guess_attributes_from_name( | ||||
|             "Spaced Sender - Title.pdf", "Spaced Sender", "Title", ()) | ||||
|  | ||||
|     def test_guess_attributes_from_name2(self): | ||||
|         self._test_guess_attributes_from_name( | ||||
|             "Sender - Spaced Title.pdf", "Sender", "Spaced Title", ()) | ||||
|  | ||||
|     def test_guess_attributes_from_name3(self): | ||||
|         self._test_guess_attributes_from_name( | ||||
|             "Dashed-Sender - Title.pdf", "Dashed-Sender", "Title", ()) | ||||
|  | ||||
|     def test_guess_attributes_from_name4(self): | ||||
|         self._test_guess_attributes_from_name( | ||||
|             "Sender - Dashed-Title.pdf", "Sender", "Dashed-Title", ()) | ||||
|  | ||||
|     def test_guess_attributes_from_name5(self): | ||||
|         self._test_guess_attributes_from_name( | ||||
|             "Sender - Title - tag1,tag2,tag3.pdf", | ||||
|             "Sender", | ||||
|             "Title", | ||||
|             self.TAGS | ||||
|         ) | ||||
|  | ||||
|     def test_guess_attributes_from_name6(self): | ||||
|         self._test_guess_attributes_from_name( | ||||
|             "Spaced Sender - Title - tag1,tag2,tag3.pdf", | ||||
|             "Spaced Sender", | ||||
|             "Title", | ||||
|             self.TAGS | ||||
|         ) | ||||
|  | ||||
|     def test_guess_attributes_from_name7(self): | ||||
|         self._test_guess_attributes_from_name( | ||||
|             "Sender - Spaced Title - tag1,tag2,tag3.pdf", | ||||
|             "Sender", | ||||
|             "Spaced Title", | ||||
|             self.TAGS | ||||
|         ) | ||||
|  | ||||
|     def test_guess_attributes_from_name8(self): | ||||
|         self._test_guess_attributes_from_name( | ||||
|             "Dashed-Sender - Title - tag1,tag2,tag3.pdf", | ||||
|             "Dashed-Sender", | ||||
|             "Title", | ||||
|             self.TAGS | ||||
|         ) | ||||
|  | ||||
|     def test_guess_attributes_from_name9(self): | ||||
|         self._test_guess_attributes_from_name( | ||||
|             "Sender - Dashed-Title - tag1,tag2,tag3.pdf", | ||||
|             "Sender", | ||||
|             "Dashed-Title", | ||||
|             self.TAGS | ||||
|         ) | ||||
|  | ||||
|     def test_guess_attributes_from_name10(self): | ||||
|         self._test_guess_attributes_from_name( | ||||
|             "Σενδερ - Τιτλε - tag1,tag2,tag3.pdf", | ||||
|             "Σενδερ", | ||||
|             "Τιτλε", | ||||
|             self.TAGS | ||||
|         ) | ||||
|  | ||||
|     def test_guess_attributes_from_name_when_correspondent_empty(self): | ||||
|         self._test_guess_attributes_from_name( | ||||
|             ' - weird empty correspondent but should not break.pdf', | ||||
|             None, | ||||
|             'weird empty correspondent but should not break', | ||||
|             () | ||||
|         ) | ||||
|  | ||||
|     def test_guess_attributes_from_name_when_title_starts_with_dash(self): | ||||
|         self._test_guess_attributes_from_name( | ||||
| @@ -121,28 +46,6 @@ class TestAttributes(TestCase): | ||||
|             () | ||||
|         ) | ||||
|  | ||||
|     def test_guess_attributes_from_name_when_title_is_empty(self): | ||||
|         self._test_guess_attributes_from_name( | ||||
|             'weird correspondent but should not break - .pdf', | ||||
|             'weird correspondent but should not break', | ||||
|             '', | ||||
|             () | ||||
|         ) | ||||
|  | ||||
|     def test_case_insensitive_tag_creation(self): | ||||
|         """ | ||||
|         Tags should be detected and created as lower case. | ||||
|         :return: | ||||
|         """ | ||||
|  | ||||
|         filename = "Title - Correspondent - tAg1,TAG2.pdf" | ||||
|         self.assertEqual(len(FileInfo.from_filename(filename).tags), 2) | ||||
|  | ||||
|         path = "Title - Correspondent - tag1,tag2.pdf" | ||||
|         self.assertEqual(len(FileInfo.from_filename(filename).tags), 2) | ||||
|  | ||||
|         self.assertEqual(Tag.objects.all().count(), 2) | ||||
|  | ||||
|  | ||||
| class TestFieldPermutations(TestCase): | ||||
|  | ||||
| @@ -199,69 +102,7 @@ class TestFieldPermutations(TestCase): | ||||
|             filename = template.format(**spec) | ||||
|             self._test_guessed_attributes(filename, **spec) | ||||
|  | ||||
|     def test_title_and_correspondent(self): | ||||
|         template = '{correspondent} - {title}.pdf' | ||||
|         for correspondent in self.valid_correspondents: | ||||
|             for title in self.valid_titles: | ||||
|                 spec = dict(correspondent=correspondent, title=title) | ||||
|                 filename = template.format(**spec) | ||||
|                 self._test_guessed_attributes(filename, **spec) | ||||
|  | ||||
|     def test_title_and_correspondent_and_tags(self): | ||||
|         template = '{correspondent} - {title} - {tags}.pdf' | ||||
|         for correspondent in self.valid_correspondents: | ||||
|             for title in self.valid_titles: | ||||
|                 for tags in self.valid_tags: | ||||
|                     spec = dict(correspondent=correspondent, title=title, | ||||
|                                 tags=tags) | ||||
|                     filename = template.format(**spec) | ||||
|                     self._test_guessed_attributes(filename, **spec) | ||||
|  | ||||
|     def test_created_and_correspondent_and_title_and_tags(self): | ||||
|  | ||||
|         template = ( | ||||
|             "{created} - " | ||||
|             "{correspondent} - " | ||||
|             "{title} - " | ||||
|             "{tags}.pdf" | ||||
|         ) | ||||
|  | ||||
|         for created in self.valid_dates: | ||||
|             for correspondent in self.valid_correspondents: | ||||
|                 for title in self.valid_titles: | ||||
|                     for tags in self.valid_tags: | ||||
|                         spec = { | ||||
|                             "created": created, | ||||
|                             "correspondent": correspondent, | ||||
|                             "title": title, | ||||
|                             "tags": tags, | ||||
|                         } | ||||
|                         self._test_guessed_attributes( | ||||
|                             template.format(**spec), **spec) | ||||
|  | ||||
|     def test_created_and_correspondent_and_title(self): | ||||
|  | ||||
|         template = "{created} - {correspondent} - {title}.pdf" | ||||
|  | ||||
|         for created in self.valid_dates: | ||||
|             for correspondent in self.valid_correspondents: | ||||
|                 for title in self.valid_titles: | ||||
|  | ||||
|                     # Skip cases where title looks like a tag as we can't | ||||
|                     # accommodate such cases. | ||||
|                     if title.lower() == title: | ||||
|                         continue | ||||
|  | ||||
|                     spec = { | ||||
|                         "created": created, | ||||
|                         "correspondent": correspondent, | ||||
|                         "title": title | ||||
|                     } | ||||
|                     self._test_guessed_attributes( | ||||
|                         template.format(**spec), **spec) | ||||
|  | ||||
|     def test_created_and_title(self): | ||||
|  | ||||
|         template = "{created} - {title}.pdf" | ||||
|  | ||||
|         for created in self.valid_dates: | ||||
| @@ -273,21 +114,6 @@ class TestFieldPermutations(TestCase): | ||||
|                 self._test_guessed_attributes( | ||||
|                     template.format(**spec), **spec) | ||||
|  | ||||
|     def test_created_and_title_and_tags(self): | ||||
|  | ||||
|         template = "{created} - {title} - {tags}.pdf" | ||||
|  | ||||
|         for created in self.valid_dates: | ||||
|             for title in self.valid_titles: | ||||
|                 for tags in self.valid_tags: | ||||
|                     spec = { | ||||
|                         "created": created, | ||||
|                         "title": title, | ||||
|                         "tags": tags | ||||
|                     } | ||||
|                     self._test_guessed_attributes( | ||||
|                         template.format(**spec), **spec) | ||||
|  | ||||
|     def test_invalid_date_format(self): | ||||
|         info = FileInfo.from_filename("06112017Z - title.pdf") | ||||
|         self.assertEqual(info.title, "title") | ||||
| @@ -336,32 +162,6 @@ class TestFieldPermutations(TestCase): | ||||
|             info = FileInfo.from_filename(filename) | ||||
|             self.assertEqual(info.title, "anotherall") | ||||
|  | ||||
|         # Complex transformation without date in replacement string | ||||
|         with self.settings( | ||||
|                 FILENAME_PARSE_TRANSFORMS=[(exact_patt, repl1)]): | ||||
|             info = FileInfo.from_filename(filename) | ||||
|             self.assertEqual(info.title, "0001") | ||||
|             self.assertEqual(len(info.tags), 2) | ||||
|             self.assertEqual(info.tags[0].name, "tag1") | ||||
|             self.assertEqual(info.tags[1].name, "tag2") | ||||
|             self.assertIsNone(info.created) | ||||
|  | ||||
|         # Complex transformation with date in replacement string | ||||
|         with self.settings( | ||||
|             FILENAME_PARSE_TRANSFORMS=[ | ||||
|                 (none_patt, "none.gif"), | ||||
|                 (exact_patt, repl2),    # <-- matches | ||||
|                 (exact_patt, repl1), | ||||
|                 (all_patt, "all.gif")]): | ||||
|             info = FileInfo.from_filename(filename) | ||||
|             self.assertEqual(info.title, "0001") | ||||
|             self.assertEqual(len(info.tags), 2) | ||||
|             self.assertEqual(info.tags[0].name, "tag1") | ||||
|             self.assertEqual(info.tags[1].name, "tag2") | ||||
|             self.assertEqual(info.created.year, 2019) | ||||
|             self.assertEqual(info.created.month, 9) | ||||
|             self.assertEqual(info.created.day, 8) | ||||
|  | ||||
|  | ||||
| class DummyParser(DocumentParser): | ||||
|  | ||||
| @@ -476,15 +276,13 @@ class TestConsumer(DirectoriesMixin, TestCase): | ||||
|  | ||||
|     def testOverrideFilename(self): | ||||
|         filename = self.get_test_file() | ||||
|         override_filename = "My Bank - Statement for November.pdf" | ||||
|         override_filename = "Statement for November.pdf" | ||||
|  | ||||
|         document = self.consumer.try_consume_file(filename, override_filename=override_filename) | ||||
|  | ||||
|         self.assertEqual(document.correspondent.name, "My Bank") | ||||
|         self.assertEqual(document.title, "Statement for November") | ||||
|  | ||||
|     def testOverrideTitle(self): | ||||
|  | ||||
|         document = self.consumer.try_consume_file(self.get_test_file(), override_title="Override Title") | ||||
|         self.assertEqual(document.title, "Override Title") | ||||
|  | ||||
| @@ -594,11 +392,10 @@ class TestConsumer(DirectoriesMixin, TestCase): | ||||
|     def testFilenameHandling(self): | ||||
|         filename = self.get_test_file() | ||||
|  | ||||
|         document = self.consumer.try_consume_file(filename, override_filename="Bank - Test.pdf", override_title="new docs") | ||||
|         document = self.consumer.try_consume_file(filename, override_title="new docs") | ||||
|  | ||||
|         self.assertEqual(document.title, "new docs") | ||||
|         self.assertEqual(document.correspondent.name, "Bank") | ||||
|         self.assertEqual(document.filename, "Bank/new docs.pdf") | ||||
|         self.assertEqual(document.filename, "none/new docs.pdf") | ||||
|  | ||||
|     @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{title}") | ||||
|     @mock.patch("documents.signals.handlers.generate_unique_filename") | ||||
| @@ -617,10 +414,9 @@ class TestConsumer(DirectoriesMixin, TestCase): | ||||
|  | ||||
|         Tag.objects.create(name="test", is_inbox_tag=True) | ||||
|  | ||||
|         document = self.consumer.try_consume_file(filename, override_filename="Bank - Test.pdf", override_title="new docs") | ||||
|         document = self.consumer.try_consume_file(filename, override_title="new docs") | ||||
|  | ||||
|         self.assertEqual(document.title, "new docs") | ||||
|         self.assertEqual(document.correspondent.name, "Bank") | ||||
|         self.assertIsNotNone(os.path.isfile(document.title)) | ||||
|         self.assertTrue(os.path.isfile(document.source_path)) | ||||
|  | ||||
| @@ -642,3 +438,31 @@ class TestConsumer(DirectoriesMixin, TestCase): | ||||
|         self.assertEqual(document.document_type, dtype) | ||||
|         self.assertIn(t1, document.tags.all()) | ||||
|         self.assertNotIn(t2, document.tags.all()) | ||||
|  | ||||
|     @override_settings(CONSUMER_DELETE_DUPLICATES=True) | ||||
|     def test_delete_duplicate(self): | ||||
|         dst = self.get_test_file() | ||||
|         self.assertTrue(os.path.isfile(dst)) | ||||
|         doc = self.consumer.try_consume_file(dst) | ||||
|  | ||||
|         self.assertFalse(os.path.isfile(dst)) | ||||
|         self.assertIsNotNone(doc) | ||||
|  | ||||
|         dst = self.get_test_file() | ||||
|         self.assertTrue(os.path.isfile(dst)) | ||||
|         self.assertRaises(ConsumerError, self.consumer.try_consume_file, dst) | ||||
|         self.assertFalse(os.path.isfile(dst)) | ||||
|  | ||||
|     @override_settings(CONSUMER_DELETE_DUPLICATES=False) | ||||
|     def test_no_delete_duplicate(self): | ||||
|         dst = self.get_test_file() | ||||
|         self.assertTrue(os.path.isfile(dst)) | ||||
|         doc = self.consumer.try_consume_file(dst) | ||||
|  | ||||
|         self.assertFalse(os.path.isfile(dst)) | ||||
|         self.assertIsNotNone(doc) | ||||
|  | ||||
|         dst = self.get_test_file() | ||||
|         self.assertTrue(os.path.isfile(dst)) | ||||
|         self.assertRaises(ConsumerError, self.consumer.try_consume_file, dst) | ||||
|         self.assertTrue(os.path.isfile(dst)) | ||||
|   | ||||
| @@ -14,7 +14,7 @@ from django.utils import timezone | ||||
| from .utils import DirectoriesMixin | ||||
| from ..file_handling import generate_filename, create_source_path_directory, delete_empty_directories, \ | ||||
|     generate_unique_filename | ||||
| from ..models import Document, Correspondent, Tag | ||||
| from ..models import Document, Correspondent, Tag, DocumentType | ||||
|  | ||||
|  | ||||
| class TestFileHandling(DirectoriesMixin, TestCase): | ||||
| @@ -190,6 +190,17 @@ class TestFileHandling(DirectoriesMixin, TestCase): | ||||
|         self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none"), True) | ||||
|         self.assertTrue(os.path.isfile(important_file)) | ||||
|  | ||||
|     @override_settings(PAPERLESS_FILENAME_FORMAT="{document_type} - {title}") | ||||
|     def test_document_type(self): | ||||
|         dt = DocumentType.objects.create(name="my_doc_type") | ||||
|         d = Document.objects.create(title="the_doc", mime_type="application/pdf") | ||||
|  | ||||
|         self.assertEqual(generate_filename(d), "none - the_doc.pdf") | ||||
|  | ||||
|         d.document_type = dt | ||||
|  | ||||
|         self.assertEqual(generate_filename(d), "my_doc_type - the_doc.pdf") | ||||
|  | ||||
|     @override_settings(PAPERLESS_FILENAME_FORMAT="{tags[type]}") | ||||
|     def test_tags_with_underscore(self): | ||||
|         document = Document() | ||||
|   | ||||
							
								
								
									
										135
									
								
								src/documents/tests/test_management.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										135
									
								
								src/documents/tests/test_management.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,135 @@ | ||||
| import hashlib | ||||
| import tempfile | ||||
| import filecmp | ||||
| import os | ||||
| import shutil | ||||
| from pathlib import Path | ||||
| from unittest import mock | ||||
|  | ||||
| from django.test import TestCase, override_settings | ||||
|  | ||||
|  | ||||
| from django.core.management import call_command | ||||
|  | ||||
| from documents.file_handling import generate_filename | ||||
| from documents.management.commands.document_archiver import handle_document | ||||
| from documents.models import Document | ||||
| from documents.tests.utils import DirectoriesMixin | ||||
|  | ||||
|  | ||||
| sample_file = os.path.join(os.path.dirname(__file__), "samples", "simple.pdf") | ||||
|  | ||||
|  | ||||
| class TestArchiver(DirectoriesMixin, TestCase): | ||||
|  | ||||
|     def make_models(self): | ||||
|         return Document.objects.create(checksum="A", title="A", content="first document", mime_type="application/pdf") | ||||
|  | ||||
|     def test_archiver(self): | ||||
|  | ||||
|         doc = self.make_models() | ||||
|         shutil.copy(sample_file, os.path.join(self.dirs.originals_dir, f"{doc.id:07}.pdf")) | ||||
|  | ||||
|         call_command('document_archiver') | ||||
|  | ||||
|     def test_handle_document(self): | ||||
|  | ||||
|         doc = self.make_models() | ||||
|         shutil.copy(sample_file, os.path.join(self.dirs.originals_dir, f"{doc.id:07}.pdf")) | ||||
|  | ||||
|         handle_document(doc.pk) | ||||
|  | ||||
|         doc = Document.objects.get(id=doc.id) | ||||
|  | ||||
|         self.assertIsNotNone(doc.checksum) | ||||
|         self.assertTrue(os.path.isfile(doc.archive_path)) | ||||
|         self.assertTrue(os.path.isfile(doc.source_path)) | ||||
|         self.assertTrue(filecmp.cmp(sample_file, doc.source_path)) | ||||
|  | ||||
|  | ||||
| class TestDecryptDocuments(TestCase): | ||||
|  | ||||
|     @override_settings( | ||||
|         ORIGINALS_DIR=os.path.join(os.path.dirname(__file__), "samples", "originals"), | ||||
|         THUMBNAIL_DIR=os.path.join(os.path.dirname(__file__), "samples", "thumb"), | ||||
|         PASSPHRASE="test", | ||||
|         PAPERLESS_FILENAME_FORMAT=None | ||||
|     ) | ||||
|     @mock.patch("documents.management.commands.decrypt_documents.input") | ||||
|     def test_decrypt(self, m): | ||||
|  | ||||
|         media_dir = tempfile.mkdtemp() | ||||
|         originals_dir = os.path.join(media_dir, "documents", "originals") | ||||
|         thumb_dir = os.path.join(media_dir, "documents", "thumbnails") | ||||
|         os.makedirs(originals_dir, exist_ok=True) | ||||
|         os.makedirs(thumb_dir, exist_ok=True) | ||||
|  | ||||
|         override_settings( | ||||
|             ORIGINALS_DIR=originals_dir, | ||||
|             THUMBNAIL_DIR=thumb_dir, | ||||
|             PASSPHRASE="test" | ||||
|         ).enable() | ||||
|  | ||||
|         doc = Document.objects.create(checksum="9c9691e51741c1f4f41a20896af31770", title="wow", filename="0000002.pdf.gpg",  mime_type="application/pdf", storage_type=Document.STORAGE_TYPE_GPG) | ||||
|  | ||||
|         shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "documents", "originals", "0000002.pdf.gpg"), os.path.join(originals_dir, "0000002.pdf.gpg")) | ||||
|         shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "documents", "thumbnails", f"0000002.png.gpg"), os.path.join(thumb_dir, f"{doc.id:07}.png.gpg")) | ||||
|  | ||||
|         call_command('decrypt_documents') | ||||
|  | ||||
|         doc.refresh_from_db() | ||||
|  | ||||
|         self.assertEqual(doc.storage_type, Document.STORAGE_TYPE_UNENCRYPTED) | ||||
|         self.assertEqual(doc.filename, "0000002.pdf") | ||||
|         self.assertTrue(os.path.isfile(os.path.join(originals_dir, "0000002.pdf"))) | ||||
|         self.assertTrue(os.path.isfile(doc.source_path)) | ||||
|         self.assertTrue(os.path.isfile(os.path.join(thumb_dir, f"{doc.id:07}.png"))) | ||||
|         self.assertTrue(os.path.isfile(doc.thumbnail_path)) | ||||
|  | ||||
|         with doc.source_file as f: | ||||
|             checksum = hashlib.md5(f.read()).hexdigest() | ||||
|             self.assertEqual(checksum, doc.checksum) | ||||
|  | ||||
|  | ||||
| class TestMakeIndex(TestCase): | ||||
|  | ||||
|     @mock.patch("documents.management.commands.document_index.index_reindex") | ||||
|     def test_reindex(self, m): | ||||
|         call_command("document_index", "reindex") | ||||
|         m.assert_called_once() | ||||
|  | ||||
|     @mock.patch("documents.management.commands.document_index.index_optimize") | ||||
|     def test_optimize(self, m): | ||||
|         call_command("document_index", "optimize") | ||||
|         m.assert_called_once() | ||||
|  | ||||
|  | ||||
| class TestRenamer(DirectoriesMixin, TestCase): | ||||
|  | ||||
|     def test_rename(self): | ||||
|         doc = Document.objects.create(title="test", mime_type="application/pdf") | ||||
|         doc.filename = generate_filename(doc) | ||||
|         doc.save() | ||||
|  | ||||
|         Path(doc.source_path).touch() | ||||
|  | ||||
|         old_source_path = doc.source_path | ||||
|  | ||||
|         with override_settings(PAPERLESS_FILENAME_FORMAT="{title}"): | ||||
|             call_command("document_renamer") | ||||
|  | ||||
|         doc2 = Document.objects.get(id=doc.id) | ||||
|  | ||||
|         self.assertEqual(doc2.filename, "test.pdf") | ||||
|         self.assertFalse(os.path.isfile(old_source_path)) | ||||
|         self.assertFalse(os.path.isfile(doc.source_path)) | ||||
|         self.assertTrue(os.path.isfile(doc2.source_path)) | ||||
|  | ||||
|  | ||||
| class TestCreateClassifier(TestCase): | ||||
|  | ||||
|     @mock.patch("documents.management.commands.document_create_classifier.train_classifier") | ||||
|     def test_create_classifier(self, m): | ||||
|         call_command("document_create_classifier") | ||||
|  | ||||
|         m.assert_called_once() | ||||
| @@ -1,40 +0,0 @@ | ||||
| import filecmp | ||||
| import os | ||||
| import shutil | ||||
|  | ||||
| from django.core.management import call_command | ||||
| from django.test import TestCase | ||||
|  | ||||
| from documents.management.commands.document_archiver import handle_document | ||||
| from documents.models import Document | ||||
| from documents.tests.utils import DirectoriesMixin | ||||
|  | ||||
|  | ||||
| sample_file = os.path.join(os.path.dirname(__file__), "samples", "simple.pdf") | ||||
|  | ||||
|  | ||||
| class TestArchiver(DirectoriesMixin, TestCase): | ||||
|  | ||||
|     def make_models(self): | ||||
|         return Document.objects.create(checksum="A", title="A", content="first document", mime_type="application/pdf") | ||||
|  | ||||
|     def test_archiver(self): | ||||
|  | ||||
|         doc = self.make_models() | ||||
|         shutil.copy(sample_file, os.path.join(self.dirs.originals_dir, f"{doc.id:07}.pdf")) | ||||
|  | ||||
|         call_command('document_archiver') | ||||
|  | ||||
|     def test_handle_document(self): | ||||
|  | ||||
|         doc = self.make_models() | ||||
|         shutil.copy(sample_file, os.path.join(self.dirs.originals_dir, f"{doc.id:07}.pdf")) | ||||
|  | ||||
|         handle_document(doc.pk) | ||||
|  | ||||
|         doc = Document.objects.get(id=doc.id) | ||||
|  | ||||
|         self.assertIsNotNone(doc.checksum) | ||||
|         self.assertTrue(os.path.isfile(doc.archive_path)) | ||||
|         self.assertTrue(os.path.isfile(doc.source_path)) | ||||
|         self.assertTrue(filecmp.cmp(sample_file, doc.source_path)) | ||||
| @@ -1,57 +0,0 @@ | ||||
| import hashlib | ||||
| import json | ||||
| import os | ||||
| import shutil | ||||
| import tempfile | ||||
| from unittest import mock | ||||
|  | ||||
| from django.core.management import call_command | ||||
| from django.test import TestCase, override_settings | ||||
|  | ||||
| from documents.management.commands import document_exporter | ||||
| from documents.models import Document, Tag, DocumentType, Correspondent | ||||
|  | ||||
|  | ||||
| class TestDecryptDocuments(TestCase): | ||||
|  | ||||
|     @override_settings( | ||||
|         ORIGINALS_DIR=os.path.join(os.path.dirname(__file__), "samples", "originals"), | ||||
|         THUMBNAIL_DIR=os.path.join(os.path.dirname(__file__), "samples", "thumb"), | ||||
|         PASSPHRASE="test", | ||||
|         PAPERLESS_FILENAME_FORMAT=None | ||||
|     ) | ||||
|     @mock.patch("documents.management.commands.decrypt_documents.input") | ||||
|     def test_decrypt(self, m): | ||||
|  | ||||
|         media_dir = tempfile.mkdtemp() | ||||
|         originals_dir = os.path.join(media_dir, "documents", "originals") | ||||
|         thumb_dir = os.path.join(media_dir, "documents", "thumbnails") | ||||
|         os.makedirs(originals_dir, exist_ok=True) | ||||
|         os.makedirs(thumb_dir, exist_ok=True) | ||||
|  | ||||
|         override_settings( | ||||
|             ORIGINALS_DIR=originals_dir, | ||||
|             THUMBNAIL_DIR=thumb_dir, | ||||
|             PASSPHRASE="test" | ||||
|         ).enable() | ||||
|  | ||||
|         doc = Document.objects.create(checksum="9c9691e51741c1f4f41a20896af31770", title="wow", filename="0000002.pdf.gpg",  mime_type="application/pdf", storage_type=Document.STORAGE_TYPE_GPG) | ||||
|  | ||||
|         shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "documents", "originals", "0000002.pdf.gpg"), os.path.join(originals_dir, "0000002.pdf.gpg")) | ||||
|         shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "documents", "thumbnails", f"0000002.png.gpg"), os.path.join(thumb_dir, f"{doc.id:07}.png.gpg")) | ||||
|  | ||||
|         call_command('decrypt_documents') | ||||
|  | ||||
|         doc.refresh_from_db() | ||||
|  | ||||
|         self.assertEqual(doc.storage_type, Document.STORAGE_TYPE_UNENCRYPTED) | ||||
|         self.assertEqual(doc.filename, "0000002.pdf") | ||||
|         self.assertTrue(os.path.isfile(os.path.join(originals_dir, "0000002.pdf"))) | ||||
|         self.assertTrue(os.path.isfile(doc.source_path)) | ||||
|         self.assertTrue(os.path.isfile(os.path.join(thumb_dir, f"{doc.id:07}.png"))) | ||||
|         self.assertTrue(os.path.isfile(doc.thumbnail_path)) | ||||
|  | ||||
|         with doc.source_file as f: | ||||
|             checksum = hashlib.md5(f.read()).hexdigest() | ||||
|             self.assertEqual(checksum, doc.checksum) | ||||
|  | ||||
| @@ -24,11 +24,17 @@ class TestExportImport(DirectoriesMixin, TestCase): | ||||
|  | ||||
|         file = os.path.join(self.dirs.originals_dir, "0000001.pdf") | ||||
|  | ||||
|         Document.objects.create(content="Content", checksum="42995833e01aea9b3edee44bbfdd7ce1", archive_checksum="62acb0bcbfbcaa62ca6ad3668e4e404b", title="wow", filename="0000001.pdf", mime_type="application/pdf") | ||||
|         Document.objects.create(content="Content", checksum="9c9691e51741c1f4f41a20896af31770", title="wow", filename="0000002.pdf.gpg", mime_type="application/pdf", storage_type=Document.STORAGE_TYPE_GPG) | ||||
|         Tag.objects.create(name="t") | ||||
|         DocumentType.objects.create(name="dt") | ||||
|         Correspondent.objects.create(name="c") | ||||
|         d1 = Document.objects.create(content="Content", checksum="42995833e01aea9b3edee44bbfdd7ce1", archive_checksum="62acb0bcbfbcaa62ca6ad3668e4e404b", title="wow", filename="0000001.pdf", mime_type="application/pdf") | ||||
|         d2 = Document.objects.create(content="Content", checksum="9c9691e51741c1f4f41a20896af31770", title="wow", filename="0000002.pdf.gpg", mime_type="application/pdf", storage_type=Document.STORAGE_TYPE_GPG) | ||||
|         t1 = Tag.objects.create(name="t") | ||||
|         dt1 = DocumentType.objects.create(name="dt") | ||||
|         c1 = Correspondent.objects.create(name="c") | ||||
|  | ||||
|         d1.tags.add(t1) | ||||
|         d1.correspondents = c1 | ||||
|         d1.document_type = dt1 | ||||
|         d1.save() | ||||
|         d2.save() | ||||
|  | ||||
|         target = tempfile.mkdtemp() | ||||
|         self.addCleanup(shutil.rmtree, target) | ||||
| @@ -59,11 +65,25 @@ class TestExportImport(DirectoriesMixin, TestCase): | ||||
|                     self.assertEqual(checksum, element['fields']['archive_checksum']) | ||||
|  | ||||
|         with paperless_environment() as dirs: | ||||
|             self.assertEqual(Document.objects.count(), 2) | ||||
|             Document.objects.all().delete() | ||||
|             Correspondent.objects.all().delete() | ||||
|             DocumentType.objects.all().delete() | ||||
|             Tag.objects.all().delete() | ||||
|             self.assertEqual(Document.objects.count(), 0) | ||||
|  | ||||
|             call_command('document_importer', target) | ||||
|             self.assertEqual(Document.objects.count(), 2) | ||||
|             messages = check_sanity() | ||||
|             # everything is alright after the test | ||||
|             self.assertEqual(len(messages), 0, str([str(m) for m in messages])) | ||||
|  | ||||
|     @override_settings( | ||||
|         PAPERLESS_FILENAME_FORMAT="{title}" | ||||
|     ) | ||||
|     def test_exporter_with_filename_format(self): | ||||
|         self.test_exporter() | ||||
|  | ||||
|     def test_export_missing_files(self): | ||||
|  | ||||
|         target = tempfile.mkdtemp() | ||||
|   | ||||
							
								
								
									
										129
									
								
								src/documents/tests/test_migrations.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										129
									
								
								src/documents/tests/test_migrations.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,129 @@ | ||||
| import os | ||||
| import shutil | ||||
| from pathlib import Path | ||||
|  | ||||
| from django.apps import apps | ||||
| from django.conf import settings | ||||
| from django.db import connection | ||||
| from django.db.migrations.executor import MigrationExecutor | ||||
| from django.test import TestCase, TransactionTestCase, override_settings | ||||
|  | ||||
| from documents.models import Document | ||||
| from documents.parsers import get_default_file_extension | ||||
| from documents.tests.utils import DirectoriesMixin | ||||
|  | ||||
|  | ||||
| class TestMigrations(TransactionTestCase): | ||||
|  | ||||
|     @property | ||||
|     def app(self): | ||||
|         return apps.get_containing_app_config(type(self).__module__).name | ||||
|  | ||||
|     migrate_from = None | ||||
|     migrate_to = None | ||||
|  | ||||
|     def setUp(self): | ||||
|         super(TestMigrations, self).setUp() | ||||
|  | ||||
|         assert self.migrate_from and self.migrate_to, \ | ||||
|             "TestCase '{}' must define migrate_from and migrate_to     properties".format(type(self).__name__) | ||||
|         self.migrate_from = [(self.app, self.migrate_from)] | ||||
|         self.migrate_to = [(self.app, self.migrate_to)] | ||||
|         executor = MigrationExecutor(connection) | ||||
|         old_apps = executor.loader.project_state(self.migrate_from).apps | ||||
|  | ||||
|         # Reverse to the original migration | ||||
|         executor.migrate(self.migrate_from) | ||||
|  | ||||
|         self.setUpBeforeMigration(old_apps) | ||||
|  | ||||
|         # Run the migration to test | ||||
|         executor = MigrationExecutor(connection) | ||||
|         executor.loader.build_graph()  # reload. | ||||
|         executor.migrate(self.migrate_to) | ||||
|  | ||||
|         self.apps = executor.loader.project_state(self.migrate_to).apps | ||||
|  | ||||
|     def setUpBeforeMigration(self, apps): | ||||
|         pass | ||||
|  | ||||
|  | ||||
| STORAGE_TYPE_UNENCRYPTED = "unencrypted" | ||||
| STORAGE_TYPE_GPG = "gpg" | ||||
|  | ||||
|  | ||||
| def source_path_before(self): | ||||
|     if self.filename: | ||||
|         fname = str(self.filename) | ||||
|     else: | ||||
|         fname = "{:07}.{}".format(self.pk, self.file_type) | ||||
|         if self.storage_type == STORAGE_TYPE_GPG: | ||||
|             fname += ".gpg" | ||||
|  | ||||
|     return os.path.join( | ||||
|         settings.ORIGINALS_DIR, | ||||
|         fname | ||||
|     ) | ||||
|  | ||||
|  | ||||
| def file_type_after(self): | ||||
|     return get_default_file_extension(self.mime_type) | ||||
|  | ||||
|  | ||||
| def source_path_after(doc): | ||||
|     if doc.filename: | ||||
|         fname = str(doc.filename) | ||||
|     else: | ||||
|         fname = "{:07}{}".format(doc.pk, file_type_after(doc)) | ||||
|         if doc.storage_type == STORAGE_TYPE_GPG: | ||||
|             fname += ".gpg"  # pragma: no cover | ||||
|  | ||||
|     return os.path.join( | ||||
|         settings.ORIGINALS_DIR, | ||||
|         fname | ||||
|     ) | ||||
|  | ||||
|  | ||||
| @override_settings(PASSPHRASE="test") | ||||
| class TestMigrateMimeType(DirectoriesMixin, TestMigrations): | ||||
|  | ||||
|     migrate_from = '1002_auto_20201111_1105' | ||||
|     migrate_to = '1003_mime_types' | ||||
|  | ||||
|     def setUpBeforeMigration(self, apps): | ||||
|         Document = apps.get_model("documents", "Document") | ||||
|         doc = Document.objects.create(title="test", file_type="pdf", filename="file1.pdf") | ||||
|         self.doc_id = doc.id | ||||
|         shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), source_path_before(doc)) | ||||
|  | ||||
|         doc2 = Document.objects.create(checksum="B", file_type="pdf", storage_type=STORAGE_TYPE_GPG) | ||||
|         self.doc2_id = doc2.id | ||||
|         shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "documents", "originals", "0000002.pdf.gpg"), source_path_before(doc2)) | ||||
|  | ||||
|     def testMimeTypesMigrated(self): | ||||
|         Document = self.apps.get_model('documents', 'Document') | ||||
|  | ||||
|         doc = Document.objects.get(id=self.doc_id) | ||||
|         self.assertEqual(doc.mime_type, "application/pdf") | ||||
|  | ||||
|         doc2 = Document.objects.get(id=self.doc2_id) | ||||
|         self.assertEqual(doc2.mime_type, "application/pdf") | ||||
|  | ||||
|  | ||||
| @override_settings(PASSPHRASE="test") | ||||
| class TestMigrateMimeTypeBackwards(DirectoriesMixin, TestMigrations): | ||||
|  | ||||
|     migrate_from = '1003_mime_types' | ||||
|     migrate_to = '1002_auto_20201111_1105' | ||||
|  | ||||
|     def setUpBeforeMigration(self, apps): | ||||
|         Document = apps.get_model("documents", "Document") | ||||
|         doc = Document.objects.create(title="test", mime_type="application/pdf", filename="file1.pdf") | ||||
|         self.doc_id = doc.id | ||||
|         shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), source_path_after(doc)) | ||||
|  | ||||
|     def testMimeTypesReverted(self): | ||||
|         Document = self.apps.get_model('documents', 'Document') | ||||
|  | ||||
|         doc = Document.objects.get(id=self.doc_id) | ||||
|         self.assertEqual(doc.file_type, "pdf") | ||||
| @@ -58,6 +58,8 @@ class IndexView(TemplateView): | ||||
|     def get_context_data(self, **kwargs): | ||||
|         context = super().get_context_data(**kwargs) | ||||
|         context['cookie_prefix'] = settings.COOKIE_PREFIX | ||||
|         context['username'] = self.request.user.username | ||||
|         context['full_name'] = self.request.user.get_full_name() | ||||
|         return context | ||||
|  | ||||
|  | ||||
| @@ -389,14 +391,27 @@ class SearchView(APIView): | ||||
|                 } | ||||
|  | ||||
|     def get(self, request, format=None): | ||||
|         if 'query' not in request.query_params: | ||||
|  | ||||
|         if 'query' in request.query_params: | ||||
|             query = request.query_params['query'] | ||||
|         else: | ||||
|             query = None | ||||
|  | ||||
|         if 'more_like' in request.query_params: | ||||
|             more_like_id = request.query_params['more_like'] | ||||
|             more_like_content = Document.objects.get(id=more_like_id).content | ||||
|         else: | ||||
|             more_like_id = None | ||||
|             more_like_content = None | ||||
|  | ||||
|         if not query and not more_like_id: | ||||
|             return Response({ | ||||
|                 'count': 0, | ||||
|                 'page': 0, | ||||
|                 'page_count': 0, | ||||
|                 'corrected_query': None, | ||||
|                 'results': []}) | ||||
|  | ||||
|         query = request.query_params['query'] | ||||
|         try: | ||||
|             page = int(request.query_params.get('page', 1)) | ||||
|         except (ValueError, TypeError): | ||||
| @@ -406,8 +421,7 @@ class SearchView(APIView): | ||||
|             page = 1 | ||||
|  | ||||
|         try: | ||||
|             with index.query_page(self.ix, query, page) as (result_page, | ||||
|                                                             corrected_query): | ||||
|             with index.query_page(self.ix, page, query, more_like_id, more_like_content) as (result_page, corrected_query):  # NOQA: E501 | ||||
|                 return Response( | ||||
|                     {'count': len(result_page), | ||||
|                      'page': result_page.pagenum, | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 jonaswinkler
					jonaswinkler