From 4ba717b301925f21a0509f0d12f76a5e0e216971 Mon Sep 17 00:00:00 2001 From: Jonas Winkler Date: Sat, 7 Nov 2020 12:54:04 +0100 Subject: [PATCH 01/26] updated bootstrap --- src-ui/package-lock.json | 6 +++--- src-ui/package.json | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src-ui/package-lock.json b/src-ui/package-lock.json index 1d73a856f..45b1d2d6d 100644 --- a/src-ui/package-lock.json +++ b/src-ui/package-lock.json @@ -2049,9 +2049,9 @@ } }, "@ng-bootstrap/ng-bootstrap": { - "version": "7.0.0", - "resolved": "https://registry.npmjs.org/@ng-bootstrap/ng-bootstrap/-/ng-bootstrap-7.0.0.tgz", - "integrity": "sha512-SxUaptGWJmCxM0d2Zy1mx7K7p/YBwGZ69NmmBQVY4BE6p5av0hWrVmv9rzzfBz0rhxU7RPZLor2Jpaoq8Xyl4w==", + "version": "8.0.0", + "resolved": "https://registry.npmjs.org/@ng-bootstrap/ng-bootstrap/-/ng-bootstrap-8.0.0.tgz", + "integrity": "sha512-v77Gfd8xHH+exq0WqIqVRlxbUEHdA/2+RUJenUP2IDTQN9E1rWl7O461/kosr+0XPuxPArHQJxhh/WsCYckcNg==", "requires": { "tslib": "^2.0.0" } diff --git a/src-ui/package.json b/src-ui/package.json index b2da7eabe..a9e909155 100644 --- a/src-ui/package.json +++ b/src-ui/package.json @@ -20,7 +20,7 @@ "@angular/platform-browser": "~10.1.5", "@angular/platform-browser-dynamic": "~10.1.5", "@angular/router": "~10.1.5", - "@ng-bootstrap/ng-bootstrap": "^7.0.0", + "@ng-bootstrap/ng-bootstrap": "^8.0.0", "bootstrap": "^4.5.0", "ng-bootstrap": "^1.6.3", "ngx-file-drop": "^10.0.0", From 26bb6cba1a6dc08bb592a19eacc7ff0087d01db8 Mon Sep 17 00:00:00 2001 From: Jonas Winkler Date: Sat, 7 Nov 2020 23:22:44 +0100 Subject: [PATCH 02/26] combined migrations --- .../migrations/1000_update_paperless.py | 73 --------- .../migrations/1000_update_paperless_all.py | 148 ++++++++++++++++++ .../migrations/1001_workflow_improvements.py | 23 --- .../migrations/1002_auto_20180823_1155.py | 33 ---- .../migrations/1003_auto_20201028_1751.py | 32 ---- .../migrations/1004_auto_20201029_1331.py | 18 --- .../migrations/1005_auto_20201102_0007.py | 26 --- 7 files changed, 148 insertions(+), 205 deletions(-) delete mode 100644 src/documents/migrations/1000_update_paperless.py create mode 100644 src/documents/migrations/1000_update_paperless_all.py delete mode 100755 src/documents/migrations/1001_workflow_improvements.py delete mode 100644 src/documents/migrations/1002_auto_20180823_1155.py delete mode 100644 src/documents/migrations/1003_auto_20201028_1751.py delete mode 100644 src/documents/migrations/1004_auto_20201029_1331.py delete mode 100644 src/documents/migrations/1005_auto_20201102_0007.py diff --git a/src/documents/migrations/1000_update_paperless.py b/src/documents/migrations/1000_update_paperless.py deleted file mode 100644 index 900510c72..000000000 --- a/src/documents/migrations/1000_update_paperless.py +++ /dev/null @@ -1,73 +0,0 @@ -# Generated by Django 3.1.2 on 2020-10-29 14:29 -import os - -from django.db import migrations - -from django.conf import settings - - -def make_index(apps, schema_editor): - Document = apps.get_model("documents", "Document") - documents = Document.objects.all() - print() - try: - print(" --> Creating document index...") - from whoosh.writing import AsyncWriter - from documents import index - ix = index.open_index(recreate=True) - with AsyncWriter(ix) as writer: - for document in documents: - index.update_document(writer, document) - except ImportError: - # index may not be relevant anymore - print(" --> Cannot create document index.") - - -def restore_filenames(apps, schema_editor): - Document = apps.get_model("documents", "Document") - for doc in Document.objects.all(): - file_name = "{:07}.{}".format(doc.pk, doc.file_type) - if doc.storage_type == "gpg": - file_name += ".gpg" - - if not doc.filename == file_name: - try: - print("file was renamed, restoring {} to {}".format(doc.filename, file_name)) - os.rename(os.path.join(settings.ORIGINALS_DIR, doc.filename), - os.path.join(settings.ORIGINALS_DIR, file_name)) - except PermissionError: - pass - except FileNotFoundError: - pass - - -def initialize_document_classifier(apps, schema_editor): - try: - print("Initalizing document classifier...") - from documents.classifier import DocumentClassifier - classifier = DocumentClassifier() - try: - classifier.train() - classifier.save_classifier() - except Exception as e: - print("Classifier error: {}".format(e)) - except ImportError: - print("Document classifier not found, skipping") - - - -class Migration(migrations.Migration): - - dependencies = [ - ('documents', '0023_document_current_filename'), - ] - - operations = [ - migrations.RunPython(make_index, migrations.RunPython.noop), - migrations.RunPython(restore_filenames), - migrations.RunPython(initialize_document_classifier, migrations.RunPython.noop), - migrations.RemoveField( - model_name='document', - name='filename', - ), - ] diff --git a/src/documents/migrations/1000_update_paperless_all.py b/src/documents/migrations/1000_update_paperless_all.py new file mode 100644 index 000000000..79e2b3668 --- /dev/null +++ b/src/documents/migrations/1000_update_paperless_all.py @@ -0,0 +1,148 @@ +# Generated by Django 3.1.3 on 2020-11-07 12:35 +import os + +from django.conf import settings +from django.db import migrations, models +import django.db.models.deletion + + +def make_index(apps, schema_editor): + Document = apps.get_model("documents", "Document") + documents = Document.objects.all() + print() + try: + print(" --> Creating document index...") + from whoosh.writing import AsyncWriter + from documents import index + ix = index.open_index(recreate=True) + with AsyncWriter(ix) as writer: + for document in documents: + index.update_document(writer, document) + except ImportError: + # index may not be relevant anymore + print(" --> Cannot create document index.") + + +def restore_filenames(apps, schema_editor): + Document = apps.get_model("documents", "Document") + + rename_operations = [] + + for doc in Document.objects.all(): + file_name = "{:07}.{}".format(doc.pk, doc.file_type) + if doc.storage_type == "gpg": + file_name += ".gpg" + + if not doc.filename == file_name: + try: + src = os.path.join(settings.ORIGINALS_DIR, doc.filename) + dst = os.path.join(settings.ORIGINALS_DIR, file_name) + if os.path.exists(dst): + raise Exception("Cannot move {}, {} already exists!".format(src, dst)) + if not os.path.exists(src): + raise Exception("Cannot move {}, file does not exist! (this is bad, one of your documents is missing".format(src)) + + rename_operations.append( (src,dst) ) + except (PermissionError, FileNotFoundError) as e: + raise Exception(e) + + for (src, dst) in rename_operations: + print("file was renamed, restoring {} to {}".format(src, dst)) + os.rename(src, dst) + + +def initialize_document_classifier(apps, schema_editor): + try: + print("Initalizing document classifier...") + from documents.classifier import DocumentClassifier + classifier = DocumentClassifier() + try: + classifier.train() + classifier.save_classifier() + except Exception as e: + print("Classifier error: {}".format(e)) + except ImportError: + print("Document classifier not found, skipping") + + +class Migration(migrations.Migration): + + dependencies = [ + ('documents', '0023_document_current_filename'), + ] + + operations = [ + migrations.RunPython( + code=restore_filenames, + ), + migrations.RemoveField( + model_name='document', + name='filename', + ), + migrations.AddField( + model_name='document', + name='archive_serial_number', + field=models.IntegerField(blank=True, db_index=True, help_text='The position of this document in your physical document archive.', null=True, unique=True), + ), + migrations.AddField( + model_name='tag', + name='is_inbox_tag', + field=models.BooleanField(default=False, help_text='Marks this tag as an inbox tag: All newly consumed documents will be tagged with inbox tags.'), + ), + migrations.CreateModel( + name='DocumentType', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('name', models.CharField(max_length=128, unique=True)), + ('slug', models.SlugField(blank=True, editable=False)), + ('match', models.CharField(blank=True, max_length=256)), + ('matching_algorithm', models.PositiveIntegerField(choices=[(1, 'Any'), (2, 'All'), (3, 'Literal'), (4, 'Regular Expression'), (5, 'Fuzzy Match'), (6, 'Automatic Classification')], default=1, help_text='Which algorithm you want to use when matching text to the OCR\'d PDF. Here, "any" looks for any occurrence of any word provided in the PDF, while "all" requires that every word provided appear in the PDF, albeit not in the order provided. A "literal" match means that the text you enter must appear in the PDF exactly as you\'ve entered it, and "regular expression" uses a regex to match the PDF. (If you don\'t know what a regex is, you probably don\'t want this option.) Finally, a "fuzzy match" looks for words or phrases that are mostly—but not exactly—the same, which can be useful for matching against documents containg imperfections that foil accurate OCR.')), + ('is_insensitive', models.BooleanField(default=True)), + ], + options={ + 'abstract': False, + 'ordering': ('name',), + }, + ), + migrations.AddField( + model_name='document', + name='document_type', + field=models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='documents', to='documents.documenttype'), + ), + migrations.AlterField( + model_name='correspondent', + name='matching_algorithm', + field=models.PositiveIntegerField(choices=[(1, 'Any'), (2, 'All'), (3, 'Literal'), (4, 'Regular Expression'), (5, 'Fuzzy Match'), (6, 'Automatic Classification')], default=1, help_text='Which algorithm you want to use when matching text to the OCR\'d PDF. Here, "any" looks for any occurrence of any word provided in the PDF, while "all" requires that every word provided appear in the PDF, albeit not in the order provided. A "literal" match means that the text you enter must appear in the PDF exactly as you\'ve entered it, and "regular expression" uses a regex to match the PDF. (If you don\'t know what a regex is, you probably don\'t want this option.) Finally, a "fuzzy match" looks for words or phrases that are mostly—but not exactly—the same, which can be useful for matching against documents containg imperfections that foil accurate OCR.'), + ), + migrations.AlterField( + model_name='tag', + name='matching_algorithm', + field=models.PositiveIntegerField(choices=[(1, 'Any'), (2, 'All'), (3, 'Literal'), (4, 'Regular Expression'), (5, 'Fuzzy Match'), (6, 'Automatic Classification')], default=1, help_text='Which algorithm you want to use when matching text to the OCR\'d PDF. Here, "any" looks for any occurrence of any word provided in the PDF, while "all" requires that every word provided appear in the PDF, albeit not in the order provided. A "literal" match means that the text you enter must appear in the PDF exactly as you\'ve entered it, and "regular expression" uses a regex to match the PDF. (If you don\'t know what a regex is, you probably don\'t want this option.) Finally, a "fuzzy match" looks for words or phrases that are mostly—but not exactly—the same, which can be useful for matching against documents containg imperfections that foil accurate OCR.'), + ), + migrations.AlterField( + model_name='document', + name='content', + field=models.TextField(blank=True, help_text='The raw, text-only data of the document. This field is primarily used for searching.'), + ), + migrations.AlterModelOptions( + name='log', + options={'ordering': ('-created',)}, + ), + migrations.RemoveField( + model_name='log', + name='modified', + ), + migrations.AlterField( + model_name='log', + name='group', + field=models.UUIDField(blank=True, null=True), + ), + migrations.RunPython( + code=make_index, + reverse_code=django.db.migrations.operations.special.RunPython.noop, + ), + migrations.RunPython( + code=initialize_document_classifier, + reverse_code=django.db.migrations.operations.special.RunPython.noop, + ), + ] diff --git a/src/documents/migrations/1001_workflow_improvements.py b/src/documents/migrations/1001_workflow_improvements.py deleted file mode 100755 index 94ad8135d..000000000 --- a/src/documents/migrations/1001_workflow_improvements.py +++ /dev/null @@ -1,23 +0,0 @@ -# Generated by Django 2.0.7 on 2018-07-12 09:52 - -from django.db import migrations, models - - -class Migration(migrations.Migration): - - dependencies = [ - ('documents', '1000_update_paperless'), - ] - - operations = [ - migrations.AddField( - model_name='document', - name='archive_serial_number', - field=models.IntegerField(blank=True, db_index=True, help_text='The position of this document in your physical document archive.', null=True, unique=True), - ), - migrations.AddField( - model_name='tag', - name='is_inbox_tag', - field=models.BooleanField(default=False, help_text='Marks this tag as an inbox tag: All newly consumed documents will be tagged with inbox tags.'), - ), - ] diff --git a/src/documents/migrations/1002_auto_20180823_1155.py b/src/documents/migrations/1002_auto_20180823_1155.py deleted file mode 100644 index d44a131ab..000000000 --- a/src/documents/migrations/1002_auto_20180823_1155.py +++ /dev/null @@ -1,33 +0,0 @@ -# Generated by Django 2.0.7 on 2018-08-23 11:55 - -from django.db import migrations, models -import django.db.models.deletion - - -class Migration(migrations.Migration): - - dependencies = [ - ('documents', '1001_workflow_improvements'), - ] - - operations = [ - migrations.CreateModel( - name='DocumentType', - fields=[ - ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), - ('name', models.CharField(max_length=128, unique=True)), - ('slug', models.SlugField(blank=True, editable=False)), - ('match', models.CharField(blank=True, max_length=256)), - ('matching_algorithm', models.PositiveIntegerField(choices=[(1, 'Any'), (2, 'All'), (3, 'Literal'), (4, 'Regular Expression'), (5, 'Fuzzy Match')], default=1, help_text='Which algorithm you want to use when matching text to the OCR\'d PDF. Here, "any" looks for any occurrence of any word provided in the PDF, while "all" requires that every word provided appear in the PDF, albeit not in the order provided. A "literal" match means that the text you enter must appear in the PDF exactly as you\'ve entered it, and "regular expression" uses a regex to match the PDF. (If you don\'t know what a regex is, you probably don\'t want this option.) Finally, a "fuzzy match" looks for words or phrases that are mostly—but not exactly—the same, which can be useful for matching against documents containg imperfections that foil accurate OCR.')), - ('is_insensitive', models.BooleanField(default=True)), - ], - options={ - 'abstract': False, - }, - ), - migrations.AddField( - model_name='document', - name='document_type', - field=models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='documents', to='documents.DocumentType'), - ), - ] diff --git a/src/documents/migrations/1003_auto_20201028_1751.py b/src/documents/migrations/1003_auto_20201028_1751.py deleted file mode 100644 index 66dd329e1..000000000 --- a/src/documents/migrations/1003_auto_20201028_1751.py +++ /dev/null @@ -1,32 +0,0 @@ -# Generated by Django 3.1.2 on 2020-10-28 17:51 - -from django.db import migrations, models - - -class Migration(migrations.Migration): - - dependencies = [ - ('documents', '1002_auto_20180823_1155'), - ] - - operations = [ - migrations.AlterModelOptions( - name='documenttype', - options={'ordering': ('name',)}, - ), - migrations.AlterField( - model_name='correspondent', - name='matching_algorithm', - field=models.PositiveIntegerField(choices=[(1, 'Any'), (2, 'All'), (3, 'Literal'), (4, 'Regular Expression'), (5, 'Fuzzy Match'), (6, 'Automatic Classification')], default=1, help_text='Which algorithm you want to use when matching text to the OCR\'d PDF. Here, "any" looks for any occurrence of any word provided in the PDF, while "all" requires that every word provided appear in the PDF, albeit not in the order provided. A "literal" match means that the text you enter must appear in the PDF exactly as you\'ve entered it, and "regular expression" uses a regex to match the PDF. (If you don\'t know what a regex is, you probably don\'t want this option.) Finally, a "fuzzy match" looks for words or phrases that are mostly—but not exactly—the same, which can be useful for matching against documents containg imperfections that foil accurate OCR.'), - ), - migrations.AlterField( - model_name='documenttype', - name='matching_algorithm', - field=models.PositiveIntegerField(choices=[(1, 'Any'), (2, 'All'), (3, 'Literal'), (4, 'Regular Expression'), (5, 'Fuzzy Match'), (6, 'Automatic Classification')], default=1, help_text='Which algorithm you want to use when matching text to the OCR\'d PDF. Here, "any" looks for any occurrence of any word provided in the PDF, while "all" requires that every word provided appear in the PDF, albeit not in the order provided. A "literal" match means that the text you enter must appear in the PDF exactly as you\'ve entered it, and "regular expression" uses a regex to match the PDF. (If you don\'t know what a regex is, you probably don\'t want this option.) Finally, a "fuzzy match" looks for words or phrases that are mostly—but not exactly—the same, which can be useful for matching against documents containg imperfections that foil accurate OCR.'), - ), - migrations.AlterField( - model_name='tag', - name='matching_algorithm', - field=models.PositiveIntegerField(choices=[(1, 'Any'), (2, 'All'), (3, 'Literal'), (4, 'Regular Expression'), (5, 'Fuzzy Match'), (6, 'Automatic Classification')], default=1, help_text='Which algorithm you want to use when matching text to the OCR\'d PDF. Here, "any" looks for any occurrence of any word provided in the PDF, while "all" requires that every word provided appear in the PDF, albeit not in the order provided. A "literal" match means that the text you enter must appear in the PDF exactly as you\'ve entered it, and "regular expression" uses a regex to match the PDF. (If you don\'t know what a regex is, you probably don\'t want this option.) Finally, a "fuzzy match" looks for words or phrases that are mostly—but not exactly—the same, which can be useful for matching against documents containg imperfections that foil accurate OCR.'), - ), - ] diff --git a/src/documents/migrations/1004_auto_20201029_1331.py b/src/documents/migrations/1004_auto_20201029_1331.py deleted file mode 100644 index b845bde33..000000000 --- a/src/documents/migrations/1004_auto_20201029_1331.py +++ /dev/null @@ -1,18 +0,0 @@ -# Generated by Django 3.1.2 on 2020-10-29 13:31 - -from django.db import migrations, models - - -class Migration(migrations.Migration): - - dependencies = [ - ('documents', '1003_auto_20201028_1751'), - ] - - operations = [ - migrations.AlterField( - model_name='document', - name='content', - field=models.TextField(blank=True, help_text='The raw, text-only data of the document. This field is primarily used for searching.'), - ), - ] diff --git a/src/documents/migrations/1005_auto_20201102_0007.py b/src/documents/migrations/1005_auto_20201102_0007.py deleted file mode 100644 index 146cc0b5a..000000000 --- a/src/documents/migrations/1005_auto_20201102_0007.py +++ /dev/null @@ -1,26 +0,0 @@ -# Generated by Django 3.1.2 on 2020-11-02 00:07 - -from django.db import migrations, models - - -class Migration(migrations.Migration): - - dependencies = [ - ('documents', '1004_auto_20201029_1331'), - ] - - operations = [ - migrations.AlterModelOptions( - name='log', - options={'ordering': ('-created',)}, - ), - migrations.RemoveField( - model_name='log', - name='modified', - ), - migrations.AlterField( - model_name='log', - name='group', - field=models.UUIDField(blank=True, null=True), - ), - ] From 51a43989695c9dae1645387a5b893b3d00420f0c Mon Sep 17 00:00:00 2001 From: Jonas Winkler Date: Sat, 7 Nov 2020 23:25:50 +0100 Subject: [PATCH 03/26] updated document upload form --- src/documents/forms.py | 89 +++++++++--------------------------------- 1 file changed, 18 insertions(+), 71 deletions(-) diff --git a/src/documents/forms.py b/src/documents/forms.py index 3cdef448e..e6c7bbf41 100644 --- a/src/documents/forms.py +++ b/src/documents/forms.py @@ -1,4 +1,3 @@ -import magic import os from datetime import datetime @@ -6,77 +5,25 @@ from time import mktime from django import forms from django.conf import settings - -from .models import Document, Correspondent +from pathvalidate import validate_filename, ValidationError class UploadForm(forms.Form): - TYPE_LOOKUP = { - "application/pdf": Document.TYPE_PDF, - "image/png": Document.TYPE_PNG, - "image/jpeg": Document.TYPE_JPG, - "image/gif": Document.TYPE_GIF, - "image/tiff": Document.TYPE_TIF, - } - - correspondent = forms.CharField( - max_length=Correspondent._meta.get_field("name").max_length, - required=False - ) - title = forms.CharField( - max_length=Document._meta.get_field("title").max_length, - required=False - ) document = forms.FileField() - def __init__(self, *args, **kwargs): - forms.Form.__init__(self, *args, **kwargs) - self._file_type = None - - def clean_correspondent(self): - """ - I suppose it might look cleaner to use .get_or_create() here, but that - would also allow someone to fill up the db with bogus correspondents - before all validation was met. - """ - - corresp = self.cleaned_data.get("correspondent") - - if not corresp: - return None - - if not Correspondent.SAFE_REGEX.match(corresp) or " - " in corresp: - raise forms.ValidationError( - "That correspondent name is suspicious.") - - return corresp - - def clean_title(self): - - title = self.cleaned_data.get("title") - - if not title: - return None - - if not Correspondent.SAFE_REGEX.match(title) or " - " in title: - raise forms.ValidationError("That title is suspicious.") - - return title - def clean_document(self): + try: + validate_filename(self.cleaned_data.get("document").name) + except ValidationError: + raise forms.ValidationError("That filename is suspicious.") + return self.cleaned_data.get("document") - document = self.cleaned_data.get("document").read() - - with magic.Magic(flags=magic.MAGIC_MIME_TYPE) as m: - file_type = m.id_buffer(document) - - if file_type not in self.TYPE_LOOKUP: - raise forms.ValidationError("The file type is invalid.") - - self._file_type = self.TYPE_LOOKUP[file_type] - - return document + def get_filename(self, i=None): + return os.path.join( + settings.CONSUMPTION_DIR, + "{}_{}".format(str(i), self.cleaned_data.get("document").name) if i else self.cleaned_data.get("document").name + ) def save(self): """ @@ -85,15 +32,15 @@ class UploadForm(forms.Form): form do that as well. Think of it as a poor-man's queue server. """ - correspondent = self.cleaned_data.get("correspondent") - title = self.cleaned_data.get("title") - document = self.cleaned_data.get("document") + document = self.cleaned_data.get("document").read() t = int(mktime(datetime.now().timetuple())) - file_name = os.path.join( - settings.CONSUMPTION_DIR, - "{} - {}.{}".format(correspondent, title, self._file_type) - ) + + file_name = self.get_filename() + i = 0 + while os.path.exists(file_name): + i += 1 + file_name = self.get_filename(i) with open(file_name, "wb") as f: f.write(document) From 164be3cfed18ad2d0ff81f0b7080a399d1117287 Mon Sep 17 00:00:00 2001 From: Jonas Winkler Date: Sat, 7 Nov 2020 23:26:18 +0100 Subject: [PATCH 04/26] removed logging from index since it was causing issues with migrations and missing logging groups --- src/documents/index.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/documents/index.py b/src/documents/index.py index 3fa8bd353..e08d016d2 100644 --- a/src/documents/index.py +++ b/src/documents/index.py @@ -68,7 +68,6 @@ def open_index(recreate=False): def update_document(writer, doc): - logging.getLogger(__name__).debug("Updating index with document{}".format(str(doc))) writer.update_document( id=doc.pk, title=doc.title, @@ -86,7 +85,6 @@ def add_document_to_index(sender, instance, **kwargs): @receiver(models.signals.post_delete, sender=Document) def remove_document_from_index(sender, instance, **kwargs): - logging.getLogger(__name__).debug("Removing document {} from index".format(str(instance))) ix = open_index() with AsyncWriter(ix) as writer: writer.delete_by_term('id', instance.pk) From 8b36bc7801cd9ffa3624bd9e32fadcb59d99f403 Mon Sep 17 00:00:00 2001 From: Jonas Winkler Date: Sat, 7 Nov 2020 23:26:33 +0100 Subject: [PATCH 05/26] added dependency for filename checking --- Pipfile | 1 + Pipfile.lock | 279 ++++++++++++++++++++++++++++----------------------- 2 files changed, 156 insertions(+), 124 deletions(-) diff --git a/Pipfile b/Pipfile index e8f862578..beb252591 100644 --- a/Pipfile +++ b/Pipfile @@ -26,6 +26,7 @@ fuzzywuzzy = "*" python-Levenshtein = "*" django-extensions = "" watchdog = "*" +pathvalidate = "*" [dev-packages] coveralls = "*" diff --git a/Pipfile.lock b/Pipfile.lock index 8b3bf705a..40d92fa59 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -1,7 +1,7 @@ { "_meta": { "hash": { - "sha256": "2c1558fe7df0aee1ee20b095c2102f802470bf4a4ae09a7749ac487f8bfab8b6" + "sha256": "9b05b0a30fedd4192cd81df4fe96e7ae6e55facd557607cc1f9f66c173b4cdb1" }, "pipfile-spec": 6, "requires": {}, @@ -16,11 +16,11 @@ "default": { "asgiref": { "hashes": [ - "sha256:7e51911ee147dd685c3c8b805c0ad0cb58d360987b56953878f8c06d2d1c6f1a", - "sha256:9fc6fb5d39b8af147ba40765234fa822b39818b12cc80b35ad9b0cef3a476aed" + "sha256:a5098bc870b80e7b872bff60bb363c7f2c2c89078759f6c47b53ff8c525a152e", + "sha256:cd88907ecaec59d78e4ac00ea665b03e571cb37e3a0e37b3702af1a9e86c365a" ], "markers": "python_version >= '3.5'", - "version": "==3.2.10" + "version": "==3.3.0" }, "dateparser": { "hashes": [ @@ -32,11 +32,11 @@ }, "django": { "hashes": [ - "sha256:a2127ad0150ec6966655bedf15dbbff9697cc86d61653db2da1afa506c0b04cc", - "sha256:c93c28ccf1d094cbd00d860e83128a39e45d2c571d3b54361713aaaf9a94cac4" + "sha256:14a4b7cd77297fba516fc0d92444cc2e2e388aa9de32d7a68d4a83d58f5a4927", + "sha256:14b87775ffedab2ef6299b73343d1b4b41e5d4e2aa58c6581f114dbec01e3f8f" ], "index": "pypi", - "version": "==3.1.2" + "version": "==3.1.3" }, "django-cors-headers": { "hashes": [ @@ -65,11 +65,10 @@ }, "djangorestframework": { "hashes": [ - "sha256:5c5071fcbad6dce16f566d492015c829ddb0df42965d488b878594aabc3aed21", - "sha256:d54452aedebb4b650254ca092f9f4f5df947cb1de6ab245d817b08b4f4156249" + "sha256:0209bafcb7b5010fdfec784034f059d512256424de2a0f084cb82b096d6dd6a7" ], "index": "pypi", - "version": "==3.12.1" + "version": "==3.12.2" }, "filemagic": { "hashes": [ @@ -112,43 +111,43 @@ }, "numpy": { "hashes": [ - "sha256:0ee77786eebbfa37f2141fd106b549d37c89207a0d01d8852fde1c82e9bfc0e7", - "sha256:199bebc296bd8a5fc31c16f256ac873dd4d5b4928dfd50e6c4995570fc71a8f3", - "sha256:1a307bdd3dd444b1d0daa356b5f4c7de2e24d63bdc33ea13ff718b8ec4c6a268", - "sha256:1ea7e859f16e72ab81ef20aae69216cfea870676347510da9244805ff9670170", - "sha256:271139653e8b7a046d11a78c0d33bafbddd5c443a5b9119618d0652a4eb3a09f", - "sha256:35bf5316af8dc7c7db1ad45bec603e5fb28671beb98ebd1d65e8059efcfd3b72", - "sha256:463792a249a81b9eb2b63676347f996d3f0082c2666fd0604f4180d2e5445996", - "sha256:50d3513469acf5b2c0406e822d3f314d7ac5788c2b438c24e5dd54d5a81ef522", - "sha256:50f68ebc439821b826823a8da6caa79cd080dee2a6d5ab9f1163465a060495ed", - "sha256:51e8d2ae7c7e985c7bebf218e56f72fa93c900ad0c8a7d9fbbbf362f45710f69", - "sha256:522053b731e11329dd52d258ddf7de5288cae7418b55e4b7d32f0b7e31787e9d", - "sha256:5ea4401ada0d3988c263df85feb33818dc995abc85b8125f6ccb762009e7bc68", - "sha256:604d2e5a31482a3ad2c88206efd43d6fcf666ada1f3188fd779b4917e49b7a98", - "sha256:6ff88bcf1872b79002569c63fe26cd2cda614e573c553c4d5b814fb5eb3d2822", - "sha256:7197ee0a25629ed782c7bd01871ee40702ffeef35bc48004bc2fdcc71e29ba9d", - "sha256:741d95eb2b505bb7a99fbf4be05fa69f466e240c2b4f2d3ddead4f1b5f82a5a5", - "sha256:83af653bb92d1e248ccf5fdb05ccc934c14b936bcfe9b917dc180d3f00250ac6", - "sha256:8802d23e4895e0c65e418abe67cdf518aa5cbb976d97f42fd591f921d6dffad0", - "sha256:8edc4d687a74d0a5f8b9b26532e860f4f85f56c400b3a98899fc44acb5e27add", - "sha256:942d2cdcb362739908c26ce8dd88db6e139d3fa829dd7452dd9ff02cba6b58b2", - "sha256:9a0669787ba8c9d3bb5de5d9429208882fb47764aa79123af25c5edc4f5966b9", - "sha256:9d08d84bb4128abb9fbd9f073e5c69f70e5dab991a9c42e5b4081ea5b01b5db0", - "sha256:9f7f56b5e85b08774939622b7d45a5d00ff511466522c44fc0756ac7692c00f2", - "sha256:a2daea1cba83210c620e359de2861316f49cc7aea8e9a6979d6cb2ddab6dda8c", - "sha256:b9074d062d30c2779d8af587924f178a539edde5285d961d2dfbecbac9c4c931", - "sha256:c4aa79993f5d856765819a3651117520e41ac3f89c3fc1cb6dee11aa562df6da", - "sha256:d78294f1c20f366cde8a75167f822538a7252b6e8b9d6dbfb3bdab34e7c1929e", - "sha256:dfdc8b53aa9838b9d44ed785431ca47aa3efaa51d0d5dd9c412ab5247151a7c4", - "sha256:dffed17848e8b968d8d3692604e61881aa6ef1f8074c99e81647ac84f6038535", - "sha256:e080087148fd70469aade2abfeadee194357defd759f9b59b349c6192aba994c", - "sha256:e983cbabe10a8989333684c98fdc5dd2f28b236216981e0c26ed359aaa676772", - "sha256:ea6171d2d8d648dee717457d0f75db49ad8c2f13100680e284d7becf3dc311a6", - "sha256:eefc13863bf01583a85e8c1121a901cc7cb8f059b960c4eba30901e2e6aba95f", - "sha256:efd656893171bbf1331beca4ec9f2e74358fc732a2084f664fd149cc4b3441d2" + "sha256:08308c38e44cc926bdfce99498b21eec1f848d24c302519e64203a8da99a97db", + "sha256:09c12096d843b90eafd01ea1b3307e78ddd47a55855ad402b157b6c4862197ce", + "sha256:13d166f77d6dc02c0a73c1101dd87fdf01339febec1030bd810dcd53fff3b0f1", + "sha256:141ec3a3300ab89c7f2b0775289954d193cc8edb621ea05f99db9cb181530512", + "sha256:16c1b388cc31a9baa06d91a19366fb99ddbe1c7b205293ed072211ee5bac1ed2", + "sha256:18bed2bcb39e3f758296584337966e68d2d5ba6aab7e038688ad53c8f889f757", + "sha256:1aeef46a13e51931c0b1cf8ae1168b4a55ecd282e6688fdb0a948cc5a1d5afb9", + "sha256:27d3f3b9e3406579a8af3a9f262f5339005dd25e0ecf3cf1559ff8a49ed5cbf2", + "sha256:2a2740aa9733d2e5b2dfb33639d98a64c3b0f24765fed86b0fd2aec07f6a0a08", + "sha256:4377e10b874e653fe96985c05feed2225c912e328c8a26541f7fc600fb9c637b", + "sha256:448ebb1b3bf64c0267d6b09a7cba26b5ae61b6d2dbabff7c91b660c7eccf2bdb", + "sha256:50e86c076611212ca62e5a59f518edafe0c0730f7d9195fec718da1a5c2bb1fc", + "sha256:5734bdc0342aba9dfc6f04920988140fb41234db42381cf7ccba64169f9fe7ac", + "sha256:64324f64f90a9e4ef732be0928be853eee378fd6a01be21a0a8469c4f2682c83", + "sha256:6ae6c680f3ebf1cf7ad1d7748868b39d9f900836df774c453c11c5440bc15b36", + "sha256:6d7593a705d662be5bfe24111af14763016765f43cb6923ed86223f965f52387", + "sha256:8cac8790a6b1ddf88640a9267ee67b1aee7a57dfa2d2dd33999d080bc8ee3a0f", + "sha256:8ece138c3a16db8c1ad38f52eb32be6086cc72f403150a79336eb2045723a1ad", + "sha256:9eeb7d1d04b117ac0d38719915ae169aa6b61fca227b0b7d198d43728f0c879c", + "sha256:a09f98011236a419ee3f49cedc9ef27d7a1651df07810ae430a6b06576e0b414", + "sha256:a5d897c14513590a85774180be713f692df6fa8ecf6483e561a6d47309566f37", + "sha256:ad6f2ff5b1989a4899bf89800a671d71b1612e5ff40866d1f4d8bcf48d4e5764", + "sha256:c42c4b73121caf0ed6cd795512c9c09c52a7287b04d105d112068c1736d7c753", + "sha256:cb1017eec5257e9ac6209ac172058c430e834d5d2bc21961dceeb79d111e5909", + "sha256:d6c7bb82883680e168b55b49c70af29b84b84abb161cbac2800e8fcb6f2109b6", + "sha256:e452dc66e08a4ce642a961f134814258a082832c78c90351b75c41ad16f79f63", + "sha256:e5b6ed0f0b42317050c88022349d994fe72bfe35f5908617512cd8c8ef9da2a9", + "sha256:e9b30d4bd69498fc0c3fe9db5f62fffbb06b8eb9321f92cc970f2969be5e3949", + "sha256:ec149b90019852266fec2341ce1db513b843e496d5a8e8cdb5ced1923a92faab", + "sha256:edb01671b3caae1ca00881686003d16c2209e07b7ef8b7639f1867852b948f7c", + "sha256:f0d3929fe88ee1c155129ecd82f981b8856c5d97bcb0d5f23e9b4242e79d1de3", + "sha256:f29454410db6ef8126c83bd3c968d143304633d45dc57b51252afbd79d700893", + "sha256:fe45becb4c2f72a0907c1d0246ea6449fe7a9e2293bb0e11c4e9a32bb0930a15", + "sha256:fedbd128668ead37f33917820b704784aff695e0019309ad446a6d0b065b57e4" ], "markers": "python_version >= '3.6'", - "version": "==1.19.3" + "version": "==1.19.4" }, "pathtools": { "hashes": [ @@ -156,6 +155,14 @@ ], "version": "==0.1.2" }, + "pathvalidate": { + "hashes": [ + "sha256:1697c8ea71ff4c48e7aa0eda72fe4581404be8f41e51a17363ef682dd6824d35", + "sha256:32d30dbacb711c16bb188b12ce7e9a46b41785f50a12f64500f747480a4b6ee3" + ], + "index": "pypi", + "version": "==2.3.0" + }, "pdftotext": { "hashes": [ "sha256:98aeb8b07a4127e1a30223bd933ef080bbd29aa88f801717ca6c5618380b8aa6" @@ -199,39 +206,41 @@ }, "psycopg2-binary": { "hashes": [ - "sha256:0deac2af1a587ae12836aa07970f5cb91964f05a7c6cdb69d8425ff4c15d4e2c", - "sha256:0e4dc3d5996760104746e6cfcdb519d9d2cd27c738296525d5867ea695774e67", - "sha256:11b9c0ebce097180129e422379b824ae21c8f2a6596b159c7659e2e5a00e1aa0", - "sha256:1fabed9ea2acc4efe4671b92c669a213db744d2af8a9fc5d69a8e9bc14b7a9db", - "sha256:2dac98e85565d5688e8ab7bdea5446674a83a3945a8f416ad0110018d1501b94", - "sha256:42ec1035841b389e8cc3692277a0bd81cdfe0b65d575a2c8862cec7a80e62e52", - "sha256:6a32f3a4cb2f6e1a0b15215f448e8ce2da192fd4ff35084d80d5e39da683e79b", - "sha256:7312e931b90fe14f925729cde58022f5d034241918a5c4f9797cac62f6b3a9dd", + "sha256:cec7e622ebc545dbb4564e483dd20e4e404da17ae07e06f3e780b2dacd5cee66", + "sha256:d1b4ab59e02d9008efe10ceabd0b31e79519da6fb67f7d8e8977118832d0f449", "sha256:7d92a09b788cbb1aec325af5fcba9fed7203897bbd9269d5691bb1e3bce29550", - "sha256:833709a5c66ca52f1d21d41865a637223b368c0ee76ea54ca5bad6f2526c7679", - "sha256:89705f45ce07b2dfa806ee84439ec67c5d9a0ef20154e0e475e2b2ed392a5b83", "sha256:8cd0fb36c7412996859cb4606a35969dd01f4ea34d9812a141cd920c3b18be77", + "sha256:bd1be66dde2b82f80afb9459fc618216753f67109b859a361cf7def5c7968729", "sha256:950bc22bb56ee6ff142a2cb9ee980b571dd0912b0334aa3fe0fe3788d860bea2", - "sha256:a0c50db33c32594305b0ef9abc0cb7db13de7621d2cadf8392a1d9b3c437ef77", - "sha256:a0eb43a07386c3f1f1ebb4dc7aafb13f67188eab896e7397aa1ee95a9c884eb2", - "sha256:aaa4213c862f0ef00022751161df35804127b78adf4a2755b9f991a507e425fd", "sha256:ac0c682111fbf404525dfc0f18a8b5f11be52657d4f96e9fcb75daf4f3984859", + "sha256:6a32f3a4cb2f6e1a0b15215f448e8ce2da192fd4ff35084d80d5e39da683e79b", + "sha256:ba28584e6bca48c59eecbf7efb1576ca214b47f05194646b081717fa628dfddf", + "sha256:0deac2af1a587ae12836aa07970f5cb91964f05a7c6cdb69d8425ff4c15d4e2c", + "sha256:2dac98e85565d5688e8ab7bdea5446674a83a3945a8f416ad0110018d1501b94", + "sha256:1fabed9ea2acc4efe4671b92c669a213db744d2af8a9fc5d69a8e9bc14b7a9db", + "sha256:11b9c0ebce097180129e422379b824ae21c8f2a6596b159c7659e2e5a00e1aa0", + "sha256:7312e931b90fe14f925729cde58022f5d034241918a5c4f9797cac62f6b3a9dd", + "sha256:c2507d796fca339c8fb03216364cca68d87e037c1f774977c8fc377627d01c71", + "sha256:42ec1035841b389e8cc3692277a0bd81cdfe0b65d575a2c8862cec7a80e62e52", + "sha256:a0c50db33c32594305b0ef9abc0cb7db13de7621d2cadf8392a1d9b3c437ef77", "sha256:ad20d2eb875aaa1ea6d0f2916949f5c08a19c74d05b16ce6ebf6d24f2c9f75d1", "sha256:b4afc542c0ac0db720cf516dd20c0846f71c248d2b3d21013aa0d4ef9c71ca25", - "sha256:b8a3715b3c4e604bcc94c90a825cd7f5635417453b253499664f784fc4da0152", - "sha256:ba28584e6bca48c59eecbf7efb1576ca214b47f05194646b081717fa628dfddf", "sha256:ba381aec3a5dc29634f20692349d73f2d21f17653bda1decf0b52b11d694541f", - "sha256:bd1be66dde2b82f80afb9459fc618216753f67109b859a361cf7def5c7968729", - "sha256:c2507d796fca339c8fb03216364cca68d87e037c1f774977c8fc377627d01c71", - "sha256:cec7e622ebc545dbb4564e483dd20e4e404da17ae07e06f3e780b2dacd5cee66", - "sha256:d14b140a4439d816e3b1229a4a525df917d6ea22a0771a2a78332273fd9528a4", - "sha256:d1b4ab59e02d9008efe10ceabd0b31e79519da6fb67f7d8e8977118832d0f449", + "sha256:f5ab93a2cb2d8338b1674be43b442a7f544a0971da062a5da774ed40587f18f5", + "sha256:ee69dad2c7155756ad114c02db06002f4cded41132cc51378e57aad79cc8e4f4", "sha256:d5227b229005a696cc67676e24c214740efd90b148de5733419ac9aaba3773da", + "sha256:d14b140a4439d816e3b1229a4a525df917d6ea22a0771a2a78332273fd9528a4", + "sha256:0e4dc3d5996760104746e6cfcdb519d9d2cd27c738296525d5867ea695774e67", + "sha256:a0eb43a07386c3f1f1ebb4dc7aafb13f67188eab896e7397aa1ee95a9c884eb2", + "sha256:b8a3715b3c4e604bcc94c90a825cd7f5635417453b253499664f784fc4da0152", + "sha256:aaa4213c862f0ef00022751161df35804127b78adf4a2755b9f991a507e425fd", + "sha256:15978a1fbd225583dd8cdaf37e67ccc278b5abecb4caf6b2d6b8e2b948e953f6", + "sha256:833709a5c66ca52f1d21d41865a637223b368c0ee76ea54ca5bad6f2526c7679", + "sha256:89705f45ce07b2dfa806ee84439ec67c5d9a0ef20154e0e475e2b2ed392a5b83", "sha256:e1f57aa70d3f7cc6947fd88636a481638263ba04a742b4a37dd25c373e41491a", "sha256:e74a55f6bad0e7d3968399deb50f61f4db1926acf4a6d83beaaa7df986f48b1c", "sha256:e82aba2188b9ba309fd8e271702bd0d0fc9148ae3150532bbb474f4590039ffb", - "sha256:ee69dad2c7155756ad114c02db06002f4cded41132cc51378e57aad79cc8e4f4", - "sha256:f5ab93a2cb2d8338b1674be43b442a7f544a0971da062a5da774ed40587f18f5" + "sha256:6422f2ff0919fd720195f64ffd8f924c1395d30f9a495f31e2392c2efafb5056" ], "index": "pypi", "version": "==2.8.6" @@ -276,40 +285,56 @@ }, "pytz": { "hashes": [ - "sha256:a494d53b6d39c3c6e44c3bec237336e14305e4f29bbf800b599253057fbb79ed", - "sha256:c35965d010ce31b23eeb663ed3cc8c906275d6be1a34393a1d73a41febf4a048" + "sha256:3e6b7dd2d1e0a59084bcee14a17af60c5c562cdc16d828e8eba2e683d3a7e268", + "sha256:5c55e189b682d420be27c6995ba6edce0c0a77dd67bfbe2ae6607134d5851ffd" ], - "version": "==2020.1" + "version": "==2020.4" }, "regex": { "hashes": [ - "sha256:03855ee22980c3e4863dc84c42d6d2901133362db5daf4c36b710dd895d78f0a", - "sha256:06b52815d4ad38d6524666e0d50fe9173533c9cc145a5779b89733284e6f688f", - "sha256:11116d424734fe356d8777f89d625f0df783251ada95d6261b4c36ad27a394bb", - "sha256:119e0355dbdd4cf593b17f2fc5dbd4aec2b8899d0057e4957ba92f941f704bf5", - "sha256:1ec66700a10e3c75f1f92cbde36cca0d3aaee4c73dfa26699495a3a30b09093c", - "sha256:2dc522e25e57e88b4980d2bdd334825dbf6fa55f28a922fc3bfa60cc09e5ef53", - "sha256:3a5f08039eee9ea195a89e180c5762bfb55258bfb9abb61a20d3abee3b37fd12", - "sha256:49461446b783945597c4076aea3f49aee4b4ce922bd241e4fcf62a3e7c61794c", - "sha256:4afa350f162551cf402bfa3cd8302165c8e03e689c897d185f16a167328cc6dd", - "sha256:4b5a9bcb56cc146c3932c648603b24514447eafa6ce9295234767bf92f69b504", - "sha256:625116aca6c4b57c56ea3d70369cacc4d62fead4930f8329d242e4fe7a58ce4b", - "sha256:654c1635f2313d0843028487db2191530bca45af61ca85d0b16555c399625b0e", - "sha256:8092a5a06ad9a7a247f2a76ace121183dc4e1a84c259cf9c2ce3bbb69fac3582", - "sha256:832339223b9ce56b7b15168e691ae654d345ac1635eeb367ade9ecfe0e66bee0", - "sha256:8ca9dca965bd86ea3631b975d63b0693566d3cc347e55786d5514988b6f5b84c", - "sha256:a62162be05edf64f819925ea88d09d18b09bebf20971b363ce0c24e8b4aa14c0", + "sha256:dd3e6547ecf842a29cf25123fbf8d2461c53c8d37aa20d87ecee130c89b7079b", "sha256:b88fa3b8a3469f22b4f13d045d9bd3eda797aa4e406fde0a2644bc92bbdd4bdd", - "sha256:c13d311a4c4a8d671f5860317eb5f09591fbe8259676b86a85769423b544451e", + "sha256:297116e79074ec2a2f885d22db00ce6e88b15f75162c5e8b38f66ea734e73c64", + "sha256:2564def9ce0710d510b1fc7e5178ce2d20f75571f788b5197b3c8134c366f50c", + "sha256:06b52815d4ad38d6524666e0d50fe9173533c9cc145a5779b89733284e6f688f", + "sha256:b45bab9f224de276b7bc916f6306b86283f6aa8afe7ed4133423efb42015a898", + "sha256:f1fce1e4929157b2afeb4bb7069204d4370bab9f4fc03ca1fbec8bd601f8c87d", + "sha256:654c1635f2313d0843028487db2191530bca45af61ca85d0b16555c399625b0e", + "sha256:ea37320877d56a7f0a1e6a625d892cf963aa7f570013499f5b8d5ab8402b5625", + "sha256:52e83a5f28acd621ba8e71c2b816f6541af7144b69cc5859d17da76c436a5427", + "sha256:b8a686a6c98872007aa41fdbb2e86dc03b287d951ff4a7f1da77fb7f14113e4d", "sha256:c2c6c56ee97485a127555c9595c069201b5161de9d05495fbe2132b5ac104786", + "sha256:832339223b9ce56b7b15168e691ae654d345ac1635eeb367ade9ecfe0e66bee0", "sha256:c3466a84fce42c2016113101018a9981804097bacbab029c2d5b4fcb224b89de", - "sha256:c8a2b7ccff330ae4c460aff36626f911f918555660cc28163417cb84ffb25789", + "sha256:3dfca201fa6b326239e1bccb00b915e058707028809b8ecc0cf6819ad233a740", + "sha256:127a9e0c0d91af572fbb9e56d00a504dbd4c65e574ddda3d45b55722462210de", + "sha256:1ec66700a10e3c75f1f92cbde36cca0d3aaee4c73dfa26699495a3a30b09093c", + "sha256:bf4f896c42c63d1f22039ad57de2644c72587756c0cfb3cc3b7530cfe228277f", + "sha256:bd904c0dec29bbd0769887a816657491721d5f545c29e30fd9d7a1a275dc80ab", + "sha256:03855ee22980c3e4863dc84c42d6d2901133362db5daf4c36b710dd895d78f0a", + "sha256:9b6305295b6591e45f069d3553c54d50cc47629eb5c218aac99e0f7fafbf90a1", + "sha256:c32c91a0f1ac779cbd73e62430de3d3502bbc45ffe5bb6c376015acfa848144b", + "sha256:4afa350f162551cf402bfa3cd8302165c8e03e689c897d185f16a167328cc6dd", + "sha256:227a8d2e5282c2b8346e7f68aa759e0331a0b4a890b55a5cfbb28bd0261b84c0", + "sha256:3a5f08039eee9ea195a89e180c5762bfb55258bfb9abb61a20d3abee3b37fd12", + "sha256:c454ad88e56e80e44f824ef8366bb7e4c3def12999151fd5c0ea76a18fe9aa3e", + "sha256:11116d424734fe356d8777f89d625f0df783251ada95d6261b4c36ad27a394bb", + "sha256:2dc522e25e57e88b4980d2bdd334825dbf6fa55f28a922fc3bfa60cc09e5ef53", + "sha256:625116aca6c4b57c56ea3d70369cacc4d62fead4930f8329d242e4fe7a58ce4b", + "sha256:49461446b783945597c4076aea3f49aee4b4ce922bd241e4fcf62a3e7c61794c", + "sha256:de7fd57765398d141949946c84f3590a68cf5887dac3fc52388df0639b01eda4", + "sha256:4b5a9bcb56cc146c3932c648603b24514447eafa6ce9295234767bf92f69b504", "sha256:cb905f3d2e290a8b8f1579d3984f2cfa7c3a29cc7cba608540ceeed18513f520", "sha256:cfcf28ed4ce9ced47b9b9670a4f0d3d3c0e4d4779ad4dadb1ad468b097f808aa", - "sha256:dd3e6547ecf842a29cf25123fbf8d2461c53c8d37aa20d87ecee130c89b7079b", - "sha256:ea37320877d56a7f0a1e6a625d892cf963aa7f570013499f5b8d5ab8402b5625", - "sha256:f1fce1e4929157b2afeb4bb7069204d4370bab9f4fc03ca1fbec8bd601f8c87d", - "sha256:f43109822df2d3faac7aad79613f5f02e4eab0fc8ad7932d2e70e2a83bd49c26" + "sha256:c8a2b7ccff330ae4c460aff36626f911f918555660cc28163417cb84ffb25789", + "sha256:c13d311a4c4a8d671f5860317eb5f09591fbe8259676b86a85769423b544451e", + "sha256:aacc8623ffe7999a97935eeabbd24b1ae701d08ea8f874a6ff050e93c3e658cf", + "sha256:8ca9dca965bd86ea3631b975d63b0693566d3cc347e55786d5514988b6f5b84c", + "sha256:a62162be05edf64f819925ea88d09d18b09bebf20971b363ce0c24e8b4aa14c0", + "sha256:119e0355dbdd4cf593b17f2fc5dbd4aec2b8899d0057e4957ba92f941f704bf5", + "sha256:96f99219dddb33e235a37283306834700b63170d7bb2a1ee17e41c6d589c8eb9", + "sha256:f43109822df2d3faac7aad79613f5f02e4eab0fc8ad7932d2e70e2a83bd49c26", + "sha256:8092a5a06ad9a7a247f2a76ace121183dc4e1a84c259cf9c2ce3bbb69fac3582" ], "version": "==2020.10.28" }, @@ -337,28 +362,34 @@ }, "scipy": { "hashes": [ - "sha256:07b083128beae040f1129bd8a82b01804f5e716a7fd2962c1053fa683433e4ab", - "sha256:0edd67e8a00903aaf7a29c968555a2e27c5a69fea9d1dcfffda80614281a884f", - "sha256:12fdcbfa56cac926a0a9364a30cbf4ad03c2c7b59f75b14234656a5e4fd52bf3", - "sha256:1fee28b6641ecbff6e80fe7788e50f50c5576157d278fa40f36c851940eb0aff", - "sha256:33e6a7439f43f37d4c1135bc95bcd490ffeac6ef4b374892c7005ce2c729cf4a", - "sha256:5163200ab14fd2b83aba8f0c4ddcc1fa982a43192867264ab0f4c8065fd10d17", - "sha256:66ec29348444ed6e8a14c9adc2de65e74a8fc526dc2c770741725464488ede1f", - "sha256:8cc5c39ed287a8b52a5509cd6680af078a40b0e010e2657eca01ffbfec929468", - "sha256:a1a13858b10d41beb0413c4378462b43eafef88a1948d286cb357eadc0aec024", - "sha256:a3db1fe7c6cb29ca02b14c9141151ebafd11e06ffb6da8ecd330eee5c8283a8a", - "sha256:aebb69bcdec209d874fc4b0c7ac36f509d50418a431c1422465fa34c2c0143ea", - "sha256:b9751b39c52a3fa59312bd2e1f40144ee26b51404db5d2f0d5259c511ff6f614", - "sha256:bc0e63daf43bf052aefbbd6c5424bc03f629d115ece828e87303a0bcc04a37e4", - "sha256:d5e3cc60868f396b78fc881d2c76460febccfe90f6d2f082b9952265c79a8788", - "sha256:ddae76784574cc4c172f3d5edd7308be16078dd3b977e8746860c76c195fa707", - "sha256:e2602f79c85924e4486f684aa9bbab74afff90606100db88d0785a0088be7edb", - "sha256:e527c9221b6494bcd06a17f9f16874406b32121385f9ab353b8a9545be458f0b", - "sha256:f574558f1b774864516f3c3fe072ebc90a29186f49b720f60ed339294b7f32ac", - "sha256:ffcbd331f1ffa82e22f1d408e93c37463c9a83088243158635baec61983aaacf" + "sha256:168c45c0c32e23f613db7c9e4e780bc61982d71dcd406ead746c7c7c2f2004ce", + "sha256:213bc59191da2f479984ad4ec39406bf949a99aba70e9237b916ce7547b6ef42", + "sha256:25b241034215247481f53355e05f9e25462682b13bd9191359075682adcd9554", + "sha256:2c872de0c69ed20fb1a9b9cf6f77298b04a26f0b8720a5457be08be254366c6e", + "sha256:3397c129b479846d7eaa18f999369a24322d008fac0782e7828fa567358c36ce", + "sha256:368c0f69f93186309e1b4beb8e26d51dd6f5010b79264c0f1e9ca00cd92ea8c9", + "sha256:3d5db5d815370c28d938cf9b0809dade4acf7aba57eaf7ef733bfedc9b2474c4", + "sha256:4598cf03136067000855d6b44d7a1f4f46994164bcd450fb2c3d481afc25dd06", + "sha256:4a453d5e5689de62e5d38edf40af3f17560bfd63c9c5bd228c18c1f99afa155b", + "sha256:4f12d13ffbc16e988fa40809cbbd7a8b45bc05ff6ea0ba8e3e41f6f4db3a9e47", + "sha256:634568a3018bc16a83cda28d4f7aed0d803dd5618facb36e977e53b2df868443", + "sha256:65923bc3809524e46fb7eb4d6346552cbb6a1ffc41be748535aa502a2e3d3389", + "sha256:6b0ceb23560f46dd236a8ad4378fc40bad1783e997604ba845e131d6c680963e", + "sha256:8c8d6ca19c8497344b810b0b0344f8375af5f6bb9c98bd42e33f747417ab3f57", + "sha256:9ad4fcddcbf5dc67619379782e6aeef41218a79e17979aaed01ed099876c0e62", + "sha256:a254b98dbcc744c723a838c03b74a8a34c0558c9ac5c86d5561703362231107d", + "sha256:b03c4338d6d3d299e8ca494194c0ae4f611548da59e3c038813f1a43976cb437", + "sha256:cc1f78ebc982cd0602c9a7615d878396bec94908db67d4ecddca864d049112f2", + "sha256:d6d25c41a009e3c6b7e757338948d0076ee1dd1770d1c09ec131f11946883c54", + "sha256:d84cadd7d7998433334c99fa55bcba0d8b4aeff0edb123b2a1dfcface538e474", + "sha256:e360cb2299028d0b0d0f65a5c5e51fc16a335f1603aa2357c25766c8dab56938", + "sha256:e98d49a5717369d8241d6cf33ecb0ca72deee392414118198a8e5b4c35c56340", + "sha256:ed572470af2438b526ea574ff8f05e7f39b44ac37f712105e57fc4d53a6fb660", + "sha256:f87b39f4d69cf7d7529d7b1098cb712033b17ea7714aed831b95628f483fd012", + "sha256:fa789583fc94a7689b45834453fec095245c7e69c58561dc159b5d5277057e4c" ], "markers": "python_version >= '3.6'", - "version": "==1.5.3" + "version": "==1.5.4" }, "six": { "hashes": [ @@ -441,11 +472,11 @@ }, "attrs": { "hashes": [ - "sha256:26b54ddbbb9ee1d34d5d3668dd37d6cf74990ab23c828c2888dccdceee395594", - "sha256:fce7fc47dfc976152e82d53ff92fa0407700c21acd20886a13777a0d20e655dc" + "sha256:31b2eced602aa8423c2aea9c76a724617ed67cf9513173fd3a4f03e3a929c7e6", + "sha256:832aa3cde19744e49938b91fea06d69ecb9e649c93ba974535d08ad92164f700" ], "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", - "version": "==20.2.0" + "version": "==20.3.0" }, "babel": { "hashes": [ @@ -556,11 +587,11 @@ }, "faker": { "hashes": [ - "sha256:30afa8f564350770373f299d2d267bff42aaba699a7ae0a3b6f378b2a8170569", - "sha256:a7a36c3c657f06bd1e3e3821b9480f2a92017d8a26e150e464ab6b97743cbc92" + "sha256:6afc461ab3f779c9c16e299fc731d775e39ea7e8e063b3053ee359ae198a15ca", + "sha256:ce1c38823eb0f927567cde5bf2e7c8ca565c7a70316139342050ce2ca74b4026" ], "markers": "python_version >= '3.5'", - "version": "==4.14.0" + "version": "==4.14.2" }, "filelock": { "hashes": [ @@ -751,10 +782,10 @@ }, "pytz": { "hashes": [ - "sha256:a494d53b6d39c3c6e44c3bec237336e14305e4f29bbf800b599253057fbb79ed", - "sha256:c35965d010ce31b23eeb663ed3cc8c906275d6be1a34393a1d73a41febf4a048" + "sha256:3e6b7dd2d1e0a59084bcee14a17af60c5c562cdc16d828e8eba2e683d3a7e268", + "sha256:5c55e189b682d420be27c6995ba6edce0c0a77dd67bfbe2ae6607134d5851ffd" ], - "version": "==2020.1" + "version": "==2020.4" }, "requests": { "hashes": [ @@ -781,11 +812,11 @@ }, "sphinx": { "hashes": [ - "sha256:321d6d9b16fa381a5306e5a0b76cd48ffbc588e6340059a729c6fdd66087e0e8", - "sha256:ce6fd7ff5b215af39e2fcd44d4a321f6694b4530b6f2b2109b64d120773faea0" + "sha256:1c21e7c5481a31b531e6cbf59c3292852ccde175b504b00ce2ff0b8f4adc3649", + "sha256:3abdb2c57a65afaaa4f8573cbabd5465078eb6fd282c1e4f87f006875a7ec0c7" ], "index": "pypi", - "version": "==3.2.1" + "version": "==3.3.0" }, "sphinxcontrib-applehelp": { "hashes": [ From d3b1a20a99cf167521dccbb1ff31fc2841ccdac2 Mon Sep 17 00:00:00 2001 From: Jonas Winkler Date: Sun, 8 Nov 2020 00:07:31 +0100 Subject: [PATCH 06/26] current view persists during session, code cleanup --- .../dashboard/dashboard.component.html | 14 --- .../document-detail.component.ts | 4 +- .../document-list.component.html | 12 +-- .../document-list/document-list.component.ts | 27 +++--- src-ui/src/app/data/saved-view-config.ts | 6 +- src-ui/src/app/data/storage-keys.ts | 4 + .../services/document-list-view.service.ts | 92 ++++++++++++++----- .../app/services/saved-view-config.service.ts | 6 +- 8 files changed, 99 insertions(+), 66 deletions(-) diff --git a/src-ui/src/app/components/dashboard/dashboard.component.html b/src-ui/src/app/components/dashboard/dashboard.component.html index 694b431c4..a6d7b3d1b 100644 --- a/src-ui/src/app/components/dashboard/dashboard.component.html +++ b/src-ui/src/app/components/dashboard/dashboard.component.html @@ -46,19 +46,5 @@ -
Document conumser status
-

This is what it might look like in the future.

-
-
-

Filename.pdf: Running tesseract on page 4/8...

-

-
-
-
-
-

Filename2.pdf: Completed.

-

-
-
diff --git a/src-ui/src/app/components/document-detail/document-detail.component.ts b/src-ui/src/app/components/document-detail/document-detail.component.ts index ad9a3ac07..802a3b212 100644 --- a/src-ui/src/app/components/document-detail/document-detail.component.ts +++ b/src-ui/src/app/components/document-detail/document-detail.component.ts @@ -134,8 +134,8 @@ export class DocumentDetailComponent implements OnInit { close() { this.openDocumentService.closeDocument(this.document) - if (this.documentListViewService.viewConfig) { - this.router.navigate(['view', this.documentListViewService.viewConfig.id]) + if (this.documentListViewService.viewId) { + this.router.navigate(['view', this.documentListViewService.viewId]) } else { this.router.navigate(['documents']) } diff --git a/src-ui/src/app/components/document-list/document-list.component.html b/src-ui/src/app/components/document-list/document-list.component.html index 9d275854e..6eafb7b80 100644 --- a/src-ui/src/app/components/document-list/document-list.component.html +++ b/src-ui/src/app/components/document-list/document-list.component.html @@ -1,4 +1,4 @@ - +
@@ -21,14 +21,13 @@
-
+
+ [class.active]="docs.sortField == f.field">{{f.name}}
-
+
-
diff --git a/src-ui/src/app/components/document-list/document-list.component.ts b/src-ui/src/app/components/document-list/document-list.component.ts index be83bf0bf..2ea2c9e3e 100644 --- a/src-ui/src/app/components/document-list/document-list.component.ts +++ b/src-ui/src/app/components/document-list/document-list.component.ts @@ -26,13 +26,16 @@ export class DocumentListComponent implements OnInit { filterRules: FilterRule[] = [] showFilter = false + getTitle() { + return this.docs.viewConfigOverride ? this.docs.viewConfigOverride.title : "Documents" + } + getSortFields() { return DOCUMENT_SORT_FIELDS } setSort(field: string) { - this.docs.currentSortField = field - this.reload() + this.docs.sortField = field } saveDisplayMode() { @@ -45,11 +48,11 @@ export class DocumentListComponent implements OnInit { } this.route.paramMap.subscribe(params => { if (params.has('id')) { - this.docs.viewConfig = this.savedViewConfigService.getConfig(params.get('id')) + this.docs.viewConfigOverride = this.savedViewConfigService.getConfig(params.get('id')) } else { - this.filterRules = cloneFilterRules(this.docs.currentFilterRules) + this.filterRules = this.docs.filterRules this.showFilter = this.filterRules.length > 0 - this.docs.viewConfig = null + this.docs.viewConfigOverride = null } this.reload() }) @@ -60,28 +63,24 @@ export class DocumentListComponent implements OnInit { } applyFilterRules() { - this.docs.setFilterRules(this.filterRules) - this.reload() + this.docs.filterRules = this.filterRules } loadViewConfig(config: SavedViewConfig) { this.filterRules = cloneFilterRules(config.filterRules) - this.docs.setFilterRules(config.filterRules) - this.docs.currentSortField = config.sortField - this.docs.currentSortDirection = config.sortDirection - this.reload() + this.docs.loadViewConfig(config) } saveViewConfig() { let modal = this.modalService.open(SaveViewConfigDialogComponent, {backdrop: 'static'}) modal.componentInstance.saveClicked.subscribe(formValue => { this.savedViewConfigService.saveConfig({ - filterRules: cloneFilterRules(this.filterRules), title: formValue.title, showInDashboard: formValue.showInDashboard, showInSideBar: formValue.showInSideBar, - sortDirection: this.docs.currentSortDirection, - sortField: this.docs.currentSortField + filterRules: this.docs.filterRules, + sortDirection: this.docs.sortDirection, + sortField: this.docs.sortField }) modal.close() }) diff --git a/src-ui/src/app/data/saved-view-config.ts b/src-ui/src/app/data/saved-view-config.ts index 29d881510..9d7076215 100644 --- a/src-ui/src/app/data/saved-view-config.ts +++ b/src-ui/src/app/data/saved-view-config.ts @@ -10,10 +10,10 @@ export interface SavedViewConfig { sortDirection: string - title: string + title?: string - showInSideBar: boolean + showInSideBar?: boolean - showInDashboard: boolean + showInDashboard?: boolean } \ No newline at end of file diff --git a/src-ui/src/app/data/storage-keys.ts b/src-ui/src/app/data/storage-keys.ts index cc4a05ec2..13b41d4a7 100644 --- a/src-ui/src/app/data/storage-keys.ts +++ b/src-ui/src/app/data/storage-keys.ts @@ -2,6 +2,10 @@ export const OPEN_DOCUMENT_SERVICE = { DOCUMENTS: 'open-documents-service:openDocuments' } +export const DOCUMENT_LIST_SERVICE = { + CURRENT_VIEW_CONFIG: 'document-list-service:currentViewConfig' +} + export const GENERAL_SETTINGS = { DOCUMENT_LIST_SIZE: 'general-settings:documentListSize', DOCUMENT_LIST_SIZE_DEFAULT: 50 diff --git a/src-ui/src/app/services/document-list-view.service.ts b/src-ui/src/app/services/document-list-view.service.ts index e554f2c8f..39a8661b9 100644 --- a/src-ui/src/app/services/document-list-view.service.ts +++ b/src-ui/src/app/services/document-list-view.service.ts @@ -3,8 +3,8 @@ import { Observable } from 'rxjs'; import { cloneFilterRules, FilterRule } from '../data/filter-rule'; import { PaperlessDocument } from '../data/paperless-document'; import { SavedViewConfig } from '../data/saved-view-config'; -import { GENERAL_SETTINGS } from '../data/storage-keys'; -import { DocumentService, SORT_DIRECTION_DESCENDING } from './rest/document.service'; +import { DOCUMENT_LIST_SERVICE, GENERAL_SETTINGS } from '../data/storage-keys'; +import { DocumentService } from './rest/document.service'; @Injectable({ @@ -18,33 +18,24 @@ export class DocumentListViewService { currentPage = 1 currentPageSize: number = +localStorage.getItem(GENERAL_SETTINGS.DOCUMENT_LIST_SIZE) || GENERAL_SETTINGS.DOCUMENT_LIST_SIZE_DEFAULT collectionSize: number - - currentFilterRules: FilterRule[] = [] - currentSortDirection = SORT_DIRECTION_DESCENDING - currentSortField = DocumentListViewService.DEFAULT_SORT_FIELD - viewConfig: SavedViewConfig + private currentViewConfig: SavedViewConfig + //TODO: make private + viewConfigOverride: SavedViewConfig + + get viewId() { + return this.viewConfigOverride?.id + } reload(onFinish?) { - let sortField: string - let sortDirection: string - let filterRules: FilterRule[] - if (this.viewConfig) { - sortField = this.viewConfig.sortField - sortDirection = this.viewConfig.sortDirection - filterRules = this.viewConfig.filterRules - } else { - sortField = this.currentSortField - sortDirection = this.currentSortDirection - filterRules = this.currentFilterRules - } + let viewConfig = this.viewConfigOverride || this.currentViewConfig this.documentService.list( this.currentPage, this.currentPageSize, - sortField, - sortDirection, - filterRules).subscribe( + viewConfig.sortField, + viewConfig.sortDirection, + viewConfig.filterRules).subscribe( result => { this.collectionSize = result.count this.documents = result.results @@ -60,9 +51,43 @@ export class DocumentListViewService { }) } + set filterRules(filterRules: FilterRule[]) { + this.currentViewConfig.filterRules = cloneFilterRules(filterRules) + this.saveCurrentViewConfig() + this.reload() + } - setFilterRules(filterRules: FilterRule[]) { - this.currentFilterRules = cloneFilterRules(filterRules) + get filterRules(): FilterRule[] { + return cloneFilterRules(this.currentViewConfig.filterRules) + } + + set sortField(field: string) { + this.currentViewConfig.sortField = field + this.saveCurrentViewConfig() + this.reload() + } + + get sortField(): string { + return this.currentViewConfig.sortField + } + + set sortDirection(direction: string) { + this.currentViewConfig.sortDirection = direction + this.saveCurrentViewConfig() + this.reload() + } + + get sortDirection(): string { + return this.currentViewConfig.sortDirection + } + + loadViewConfig(config: SavedViewConfig) { + Object.assign(this.currentViewConfig, config) + this.reload() + } + + private saveCurrentViewConfig() { + sessionStorage.setItem(DOCUMENT_LIST_SERVICE.CURRENT_VIEW_CONFIG, JSON.stringify(this.currentViewConfig)) } getLastPage(): number { @@ -108,5 +133,22 @@ export class DocumentListViewService { } } - constructor(private documentService: DocumentService) { } + constructor(private documentService: DocumentService) { + let currentViewConfigJson = sessionStorage.getItem(DOCUMENT_LIST_SERVICE.CURRENT_VIEW_CONFIG) + if (currentViewConfigJson) { + try { + this.currentViewConfig = JSON.parse(currentViewConfigJson) + } catch (e) { + sessionStorage.removeItem(DOCUMENT_LIST_SERVICE.CURRENT_VIEW_CONFIG) + this.currentViewConfig = null + } + } + if (!this.currentViewConfig) { + this.currentViewConfig = { + filterRules: [], + sortDirection: 'des', + sortField: 'created' + } + } + } } diff --git a/src-ui/src/app/services/saved-view-config.service.ts b/src-ui/src/app/services/saved-view-config.service.ts index a6b538b0d..d69791209 100644 --- a/src-ui/src/app/services/saved-view-config.service.ts +++ b/src-ui/src/app/services/saved-view-config.service.ts @@ -10,7 +10,11 @@ export class SavedViewConfigService { constructor() { let savedConfigs = localStorage.getItem('saved-view-config-service:savedConfigs') if (savedConfigs) { - this.configs = JSON.parse(savedConfigs) + try { + this.configs = JSON.parse(savedConfigs) + } catch (e) { + this.configs = [] + } } } From 3378ac14870fbfebecb8498664c073f7a967f541 Mon Sep 17 00:00:00 2001 From: Jonas Winkler Date: Sun, 8 Nov 2020 00:42:39 +0100 Subject: [PATCH 07/26] forgot a sort field --- src-ui/src/app/services/rest/document.service.ts | 1 + src/documents/views.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src-ui/src/app/services/rest/document.service.ts b/src-ui/src/app/services/rest/document.service.ts index 7328b380e..bc1afb419 100644 --- a/src-ui/src/app/services/rest/document.service.ts +++ b/src-ui/src/app/services/rest/document.service.ts @@ -10,6 +10,7 @@ import { FilterRule } from 'src/app/data/filter-rule'; export const DOCUMENT_SORT_FIELDS = [ { field: "correspondent__name", name: "Correspondent" }, + { field: "document_type__name", name: "Document type" }, { field: 'title', name: 'Title' }, { field: 'archive_serial_number', name: 'ASN' }, { field: 'created', name: 'Created' }, diff --git a/src/documents/views.py b/src/documents/views.py index 6818551f0..63f0e0bb9 100755 --- a/src/documents/views.py +++ b/src/documents/views.py @@ -97,7 +97,7 @@ class DocumentViewSet(RetrieveModelMixin, filter_class = DocumentFilterSet search_fields = ("title", "correspondent__name", "content") ordering_fields = ( - "id", "title", "correspondent__name", "created", "modified", "added", "archive_serial_number") + "id", "title", "correspondent__name", "document_type__name", "created", "modified", "added", "archive_serial_number") def file_response(self, pk, disposition): #TODO: this should not be necessary here. From 942fab72984e2d6e1f280b981044b92ba174fa65 Mon Sep 17 00:00:00 2001 From: Jonas Winkler Date: Sun, 8 Nov 2020 11:24:57 +0100 Subject: [PATCH 08/26] I removed the model save/delete hooks for index updates since they were causing too much trouble with migrations --- src/documents/admin.py | 17 +++++++++++++++++ src/documents/apps.py | 4 +++- src/documents/index.py | 23 +++++++++++++++-------- src/documents/signals/handlers.py | 4 ++++ src/documents/views.py | 9 +++++++++ 5 files changed, 48 insertions(+), 9 deletions(-) diff --git a/src/documents/admin.py b/src/documents/admin.py index 74a152c68..6ac949a45 100755 --- a/src/documents/admin.py +++ b/src/documents/admin.py @@ -2,7 +2,9 @@ from django.contrib import admin from django.contrib.auth.models import Group, User from django.utils.html import format_html, format_html_join from django.utils.safestring import mark_safe +from whoosh.writing import AsyncWriter +from . import index from .models import Correspondent, Document, DocumentType, Log, Tag @@ -71,6 +73,21 @@ class DocumentAdmin(admin.ModelAdmin): return obj.created.date().strftime("%Y-%m-%d") created_.short_description = "Created" + def delete_queryset(self, request, queryset): + ix = index.open_index() + with AsyncWriter(ix) as writer: + for o in queryset: + index.remove_document(writer, o) + super(DocumentAdmin, self).delete_queryset(request, queryset) + + def delete_model(self, request, obj): + index.remove_document_from_index(obj) + super(DocumentAdmin, self).delete_model(request, obj) + + def save_model(self, request, obj, form, change): + index.add_or_update_document(obj) + super(DocumentAdmin, self).save_model(request, obj, form, change) + @mark_safe def tags_(self, obj): r = "" diff --git a/src/documents/apps.py b/src/documents/apps.py index ca278e2e3..83e671d07 100644 --- a/src/documents/apps.py +++ b/src/documents/apps.py @@ -18,7 +18,8 @@ class DocumentsConfig(AppConfig): set_log_entry, set_correspondent, set_document_type, - set_tags + set_tags, + add_to_index ) @@ -29,6 +30,7 @@ class DocumentsConfig(AppConfig): document_consumption_finished.connect(set_document_type) document_consumption_finished.connect(set_tags) document_consumption_finished.connect(set_log_entry) + document_consumption_finished.connect(add_to_index) document_consumption_finished.connect(run_post_consume_script) post_delete.connect(cleanup_document_deletion) diff --git a/src/documents/index.py b/src/documents/index.py index e08d016d2..c55402e10 100644 --- a/src/documents/index.py +++ b/src/documents/index.py @@ -11,6 +11,9 @@ from documents.models import Document from paperless import settings +logger = logging.getLogger(__name__) + + class JsonFormatter(Formatter): def __init__(self): self.seen = {} @@ -68,6 +71,7 @@ def open_index(recreate=False): def update_document(writer, doc): + logger.debug("Indexing {}...".format(doc)) writer.update_document( id=doc.pk, title=doc.title, @@ -76,18 +80,21 @@ def update_document(writer, doc): ) -@receiver(models.signals.post_save, sender=Document) -def add_document_to_index(sender, instance, **kwargs): - ix = open_index() - with AsyncWriter(ix) as writer: - update_document(writer, instance) +def remove_document(writer, doc): + logger.debug("Removing {} from index...".format(doc)) + writer.delete_by_term('id', doc.pk) -@receiver(models.signals.post_delete, sender=Document) -def remove_document_from_index(sender, instance, **kwargs): +def add_or_update_document(document): ix = open_index() with AsyncWriter(ix) as writer: - writer.delete_by_term('id', instance.pk) + update_document(writer, document) + + +def remove_document_from_index(document): + ix = open_index() + with AsyncWriter(ix) as writer: + remove_document(writer, document) def autocomplete(ix, term, limit=10): diff --git a/src/documents/signals/handlers.py b/src/documents/signals/handlers.py index 231a39e0d..cee1e042b 100755 --- a/src/documents/signals/handlers.py +++ b/src/documents/signals/handlers.py @@ -166,3 +166,7 @@ def set_log_entry(sender, document=None, logging_group=None, **kwargs): user=user, object_repr=document.__str__(), ) + + +def add_to_index(sender, document, **kwargs): + index.add_or_update_document(document) diff --git a/src/documents/views.py b/src/documents/views.py index 63f0e0bb9..28ac7ae26 100755 --- a/src/documents/views.py +++ b/src/documents/views.py @@ -99,6 +99,15 @@ class DocumentViewSet(RetrieveModelMixin, ordering_fields = ( "id", "title", "correspondent__name", "document_type__name", "created", "modified", "added", "archive_serial_number") + def update(self, request, *args, **kwargs): + response = super(DocumentViewSet, self).update(request, *args, **kwargs) + index.add_or_update_document(self.get_object()) + return response + + def destroy(self, request, *args, **kwargs): + index.remove_document_from_index(self.get_object()) + return super(DocumentViewSet, self).destroy(request, *args, **kwargs) + def file_response(self, pk, disposition): #TODO: this should not be necessary here. content_types = { From 7747e6512a065f3a192d08dee8c9dbb1aa40db1a Mon Sep 17 00:00:00 2001 From: Jonas Winkler Date: Sun, 8 Nov 2020 11:30:16 +0100 Subject: [PATCH 09/26] moved some code --- src/documents/index.py | 13 +++++++++++++ src/documents/views.py | 20 ++++++-------------- 2 files changed, 19 insertions(+), 14 deletions(-) diff --git a/src/documents/index.py b/src/documents/index.py index c55402e10..a099f670c 100644 --- a/src/documents/index.py +++ b/src/documents/index.py @@ -2,9 +2,11 @@ import logging from django.db import models from django.dispatch import receiver +from whoosh import highlight from whoosh.fields import Schema, TEXT, NUMERIC from whoosh.highlight import Formatter, get_text from whoosh.index import create_in, exists_in, open_dir +from whoosh.qparser import MultifieldParser from whoosh.writing import AsyncWriter from documents.models import Document @@ -97,6 +99,17 @@ def remove_document_from_index(document): remove_document(writer, document) +def query_page(ix, query, page): + with ix.searcher() as searcher: + query_parser = MultifieldParser(["content", "title", "correspondent"], + ix.schema).parse(query) + result_page = searcher.search_page(query_parser, page) + result_page.results.fragmenter = highlight.ContextFragmenter( + surround=50) + result_page.results.formatter = JsonFormatter() + return result_page + + def autocomplete(ix, term, limit=10): with ix.reader() as reader: terms = [] diff --git a/src/documents/views.py b/src/documents/views.py index 28ac7ae26..b3d6012f1 100755 --- a/src/documents/views.py +++ b/src/documents/views.py @@ -6,9 +6,6 @@ from django_filters.rest_framework import DjangoFilterBackend from rest_framework.decorators import action from rest_framework.response import Response from rest_framework.views import APIView -from whoosh import highlight -from whoosh.qparser import QueryParser -from whoosh.query import terms from paperless.db import GnuPG from paperless.views import StandardPagination @@ -194,18 +191,13 @@ class SearchView(APIView): except (ValueError, TypeError): page = 1 - with self.ix.searcher() as searcher: - query_parser = QueryParser("content", self.ix.schema).parse(query) - result_page = searcher.search_page(query_parser, page) - result_page.results.fragmenter = highlight.ContextFragmenter( - surround=50) - result_page.results.formatter = index.JsonFormatter() + result_page = index.query_page(self.ix, query, page) - return Response( - {'count': len(result_page), - 'page': result_page.pagenum, - 'page_count': result_page.pagecount, - 'results': list(map(self.add_infos_to_hit, result_page))}) + return Response( + {'count': len(result_page), + 'page': result_page.pagenum, + 'page_count': result_page.pagecount, + 'results': list(map(self.add_infos_to_hit, result_page))}) else: return Response({ From 9067a4f28833a0398fd2283157b5acd9df3d8de3 Mon Sep 17 00:00:00 2001 From: Jonas Winkler Date: Sun, 8 Nov 2020 13:00:45 +0100 Subject: [PATCH 10/26] added the filename handling back into the code --- README.md | 4 +- paperless.conf.example | 14 +- .../management/commands/document_renamer.py | 24 + .../migrations/1000_update_paperless_all.py | 53 -- src/documents/models.py | 246 +++++++- src/documents/tests/test_file_handling.py | 559 ++++++++++++++++++ src/paperless/settings.py | 3 + 7 files changed, 841 insertions(+), 62 deletions(-) create mode 100644 src/documents/management/commands/document_renamer.py create mode 100644 src/documents/tests/test_file_handling.py diff --git a/README.md b/README.md index b925fa89b..6f013a913 100644 --- a/README.md +++ b/README.md @@ -26,6 +26,7 @@ This is a list of changes that have been made to the original project. ## Added - **A new single page UI** built with bootstrap and Angular. Its much more responsive than the django admin pages. It features the follwing improvements over the old django admin interface: + - *Dashboard.* The landing page shows some useful information, such as statistics, recently scanned documents, file uploading, and possibly more in the future. - *Document uploading on the web page.* This is very crude right now, but gets the job done. It simply uploads the documents and stores them in the configured consumer directory. The API for that has always been in the project, there simply was no form on the UI to support it. - *Full text search* with a proper document indexer: The search feature sorts documents by relevance to the search query, highlights query terms in the found documents and provides autocomplete while typing the query. This is still very basic but will see extensions in the future. - *Saveable filters.* Save filter and sorting presets and optionally display a couple documents of saved filters (i.e., your inbox sorted descending by added date, or tagged TODO, oldest to newest) on the dash board. @@ -49,21 +50,18 @@ This is a list of changes that have been made to the original project. These features were removed each due to two reasons. First, I did not feel these features contributed all that much to the over project, and second, I don't want to maintain these features. - **(BREAKING) Reminders.** I have no idea what they were used for and thus removed them from the project. -- **Filename handling (I'm sorry).** The master branch of the paperless project has seen some changes regarding the filename handling of stored documents. These changes allow you to change the filename of stored documents from their default form ‘{id}.pdf’. These changes have not made it into this project, since the whole point of paperless is that you don't have to access your documents on the disk anymore. If you are using version 2.7.0, this does not affect you. If you are on the most recent push on the master branch, the provided migration will revert these changes and rename all your files to their original file name. - **Every customization made to the admin interface.** Since this is not the primary interface for the application anymore, there is no need to keep and maintain these. Besides, some changes were incompatible with the most recent versions of django. The interface is completely usable, though. ## Planned These features will make it into the application at some point, sorted by priority. -- **Better tag editor.** The tag editor on the document detail page is not very convenient. This was put in there to get the project working but will be replaced with something nicer eventually. - **More search.** The search backend is incredibly versatile and customizable. Searching is the most important feature of this project and thus, I want to implement things like: - Group and limit search results by correspondent, show “more from this” links in the results. - Ability to search for “Similar documents” in the search results - Provide corrections for mispelled queries - **More robust consumer** that shows its progress on the web page. - **Arbitrary tag colors**. Allow the selection of any color with a color picker. -- **Dashboard**. The landing page is a little bleak right now but will feature status updates about the consumer, previews of saved filters and database statistics in the future. ## On the chopping block. diff --git a/paperless.conf.example b/paperless.conf.example index 41ac778e7..bd9f715f2 100644 --- a/paperless.conf.example +++ b/paperless.conf.example @@ -63,7 +63,19 @@ PAPERLESS_CONSUMPTION_DIR="../consume" # Any email sent to the target account that does not contain this text will be # ignored. -#PAPERLESS_EMAIL_SECRET="" +PAPERLESS_EMAIL_SECRET="" + +# Specify a filename format for the document (directories are supported) +# Use the following placeholders: +# * {correspondent} +# * {title} +# * {created} +# * {added} +# * {tags[KEY]} If your tags conform to key_value or key-value +# * {tags[INDEX]} If your tags are strings, select the tag by index +# Uniqueness of filenames is ensured, as an incrementing counter is attached +# to each filename. +#PAPERLESS_FILENAME_FORMAT="" ############################################################################### #### Security #### diff --git a/src/documents/management/commands/document_renamer.py b/src/documents/management/commands/document_renamer.py new file mode 100644 index 000000000..d7d77a111 --- /dev/null +++ b/src/documents/management/commands/document_renamer.py @@ -0,0 +1,24 @@ +from django.core.management.base import BaseCommand + +from documents.models import Document, Tag + +from ...mixins import Renderable + + +class Command(Renderable, BaseCommand): + + help = """ + This will rename all documents to match the latest filename format. + """.replace(" ", "") + + def __init__(self, *args, **kwargs): + self.verbosity = 0 + BaseCommand.__init__(self, *args, **kwargs) + + def handle(self, *args, **options): + + self.verbosity = options["verbosity"] + + for document in Document.objects.all(): + # Saving the document again will generate a new filename and rename + document.save() diff --git a/src/documents/migrations/1000_update_paperless_all.py b/src/documents/migrations/1000_update_paperless_all.py index 79e2b3668..8c1bd52af 100644 --- a/src/documents/migrations/1000_update_paperless_all.py +++ b/src/documents/migrations/1000_update_paperless_all.py @@ -23,48 +23,6 @@ def make_index(apps, schema_editor): print(" --> Cannot create document index.") -def restore_filenames(apps, schema_editor): - Document = apps.get_model("documents", "Document") - - rename_operations = [] - - for doc in Document.objects.all(): - file_name = "{:07}.{}".format(doc.pk, doc.file_type) - if doc.storage_type == "gpg": - file_name += ".gpg" - - if not doc.filename == file_name: - try: - src = os.path.join(settings.ORIGINALS_DIR, doc.filename) - dst = os.path.join(settings.ORIGINALS_DIR, file_name) - if os.path.exists(dst): - raise Exception("Cannot move {}, {} already exists!".format(src, dst)) - if not os.path.exists(src): - raise Exception("Cannot move {}, file does not exist! (this is bad, one of your documents is missing".format(src)) - - rename_operations.append( (src,dst) ) - except (PermissionError, FileNotFoundError) as e: - raise Exception(e) - - for (src, dst) in rename_operations: - print("file was renamed, restoring {} to {}".format(src, dst)) - os.rename(src, dst) - - -def initialize_document_classifier(apps, schema_editor): - try: - print("Initalizing document classifier...") - from documents.classifier import DocumentClassifier - classifier = DocumentClassifier() - try: - classifier.train() - classifier.save_classifier() - except Exception as e: - print("Classifier error: {}".format(e)) - except ImportError: - print("Document classifier not found, skipping") - - class Migration(migrations.Migration): dependencies = [ @@ -72,13 +30,6 @@ class Migration(migrations.Migration): ] operations = [ - migrations.RunPython( - code=restore_filenames, - ), - migrations.RemoveField( - model_name='document', - name='filename', - ), migrations.AddField( model_name='document', name='archive_serial_number', @@ -141,8 +92,4 @@ class Migration(migrations.Migration): code=make_index, reverse_code=django.db.migrations.operations.special.RunPython.noop, ), - migrations.RunPython( - code=initialize_document_classifier, - reverse_code=django.db.migrations.operations.special.RunPython.noop, - ), ] diff --git a/src/documents/models.py b/src/documents/models.py index 436f5163a..88598b5f6 100755 --- a/src/documents/models.py +++ b/src/documents/models.py @@ -3,11 +3,12 @@ import logging import os import re -from collections import OrderedDict +from collections import OrderedDict, defaultdict import dateutil.parser from django.conf import settings from django.db import models +from django.dispatch import receiver from django.template.defaultfilters import slugify from django.utils import timezone from django.utils.text import slugify @@ -190,6 +191,14 @@ class Document(models.Model): added = models.DateTimeField( default=timezone.now, editable=False, db_index=True) + filename = models.FilePathField( + max_length=256, + editable=False, + default=None, + null=True, + help_text="Current filename in storage" + ) + archive_serial_number = models.IntegerField( blank=True, null=True, @@ -211,15 +220,123 @@ class Document(models.Model): return "{}: {}".format(created, self.correspondent or self.title) return str(created) + def find_renamed_document(self, subdirectory=""): + suffix = "%07i.%s" % (self.pk, self.file_type) + + # Append .gpg for encrypted files + if self.storage_type == self.STORAGE_TYPE_GPG: + suffix += ".gpg" + + # Go up in the directory hierarchy and try to delete all directories + root = os.path.normpath(Document.filename_to_path(subdirectory)) + + for filename in os.listdir(root): + if filename.endswith(suffix): + return os.path.join(subdirectory, filename) + + fullname = os.path.join(subdirectory, filename) + if os.path.isdir(Document.filename_to_path(fullname)): + return self.find_renamed_document(fullname) + + return None + + @property + def source_filename(self): + # Initial filename generation (for new documents) + if self.filename is None: + self.filename = self.generate_source_filename() + + # Check if document is still available under filename + elif not os.path.isfile(Document.filename_to_path(self.filename)): + recovered_filename = self.find_renamed_document() + + # If we have found the file so update the filename + if recovered_filename is not None: + logger = logging.getLogger(__name__) + logger.warning("Filename of document " + str(self.id) + + " has changed and was successfully updated") + self.filename = recovered_filename + + # Remove all empty subdirectories from MEDIA_ROOT + Document.delete_all_empty_subdirectories( + Document.filename_to_path("")) + else: + logger = logging.getLogger(__name__) + logger.error("File of document " + str(self.id) + " has " + + "gone and could not be recovered") + + return self.filename + + @staticmethod + def many_to_dictionary(field): + # Converts ManyToManyField to dictionary by assuming, that field + # entries contain an _ or - which will be used as a delimiter + mydictionary = dict() + + for index, t in enumerate(field.all()): + # Populate tag names by index + mydictionary[index] = slugify(t.name) + + # Find delimiter + delimiter = t.name.find('_') + + if delimiter == -1: + delimiter = t.name.find('-') + + if delimiter == -1: + continue + + key = t.name[:delimiter] + value = t.name[delimiter+1:] + + mydictionary[slugify(key)] = slugify(value) + + return mydictionary + + def generate_source_filename(self): + # Create filename based on configured format + if settings.PAPERLESS_FILENAME_FORMAT is not None: + tags = defaultdict(lambda: slugify(None), + self.many_to_dictionary(self.tags)) + path = settings.PAPERLESS_FILENAME_FORMAT.format( + correspondent=slugify(self.correspondent), + title=slugify(self.title), + created=slugify(self.created), + added=slugify(self.added), + tags=tags) + else: + path = "" + + # Always append the primary key to guarantee uniqueness of filename + if len(path) > 0: + filename = "%s-%07i.%s" % (path, self.pk, self.file_type) + else: + filename = "%07i.%s" % (self.pk, self.file_type) + + # Append .gpg for encrypted files + if self.storage_type == self.STORAGE_TYPE_GPG: + filename += ".gpg" + + return filename + + def create_source_directory(self): + new_filename = self.generate_source_filename() + + # Determine the full "target" path + dir_new = Document.filename_to_path(os.path.dirname(new_filename)) + + # Create new path + os.makedirs(dir_new, exist_ok=True) + @property def source_path(self): - file_name = "{:07}.{}".format(self.pk, self.file_type) - if self.storage_type == self.STORAGE_TYPE_GPG: - file_name += ".gpg" + return Document.filename_to_path(self.source_filename) + @staticmethod + def filename_to_path(filename): return os.path.join( settings.ORIGINALS_DIR, - file_name + filename ) @property @@ -245,6 +362,125 @@ class Document(models.Model): def thumbnail_file(self): return open(self.thumbnail_path, "rb") + def set_filename(self, filename): + if os.path.isfile(Document.filename_to_path(filename)): + self.filename = filename + + @staticmethod + def try_delete_empty_directories(directory): + # Go up in the directory hierarchy and try to delete all directories + directory = os.path.normpath(directory) + root = os.path.normpath(Document.filename_to_path("")) + + while directory != root: + # Try to delete the current directory + try: + os.rmdir(directory) + except os.error: + # Directory not empty, no need to go further up + return + + # Cut off actual directory and go one level up + directory, _ = os.path.split(directory) + directory = os.path.normpath(directory) + + @staticmethod + def delete_all_empty_subdirectories(directory): + # Go through all folders and try to delete all directories + root = os.path.normpath(Document.filename_to_path(directory)) + + for filename in os.listdir(root): + fullname = os.path.join(directory, filename) + + if not os.path.isdir(Document.filename_to_path(fullname)): + continue + + # Go into subdirectory to see, if there is more to delete + Document.delete_all_empty_subdirectories( + os.path.join(directory, filename)) + + # Try to delete the directory + try: + os.rmdir(Document.filename_to_path(fullname)) + continue + except os.error: + # Directory not empty, no need to go further up + continue + + +@receiver(models.signals.m2m_changed, sender=Document.tags.through) +@receiver(models.signals.post_save, sender=Document) +def update_filename(sender, instance, **kwargs): + # Skip if document has not been saved yet + if instance.filename is None: + return + + # Check is file exists and update filename otherwise + if not os.path.isfile(Document.filename_to_path(instance.filename)): + instance.filename = instance.source_filename + + # Build the new filename + new_filename = instance.generate_source_filename() + + # If the filename is the same, then nothing needs to be done + if instance.filename == new_filename: + return + + # Determine the full "target" path + path_new = instance.filename_to_path(new_filename) + dir_new = instance.filename_to_path(os.path.dirname(new_filename)) + + # Create new path + instance.create_source_directory() + + # Determine the full "current" path + path_current = instance.filename_to_path(instance.source_filename) + + # Move file + try: + os.rename(path_current, path_new) + except PermissionError: + # Do not update filename in object + return + except FileNotFoundError: + logger = logging.getLogger(__name__) + logger.error("Renaming of document " + str(instance.id) + " failed " + + "as file " + instance.filename + " was no longer present") + return + + # Delete empty directory + old_dir = os.path.dirname(instance.filename) + old_path = instance.filename_to_path(old_dir) + Document.try_delete_empty_directories(old_path) + + instance.filename = new_filename + + # Save instance + # This will not cause a cascade of post_save signals, as next time + # nothing needs to be renamed + instance.save() + + +@receiver(models.signals.post_delete, sender=Document) +def delete_files(sender, instance, **kwargs): + if instance.filename is None: + return + + # Remove the document + old_file = instance.filename_to_path(instance.filename) + + try: + os.remove(old_file) + except FileNotFoundError: + logger = logging.getLogger(__name__) + logger.warning("Deleted document " + str(instance.id) + " but file " + + old_file + " was no longer present") + + # And remove the directory (if applicable) + old_dir = os.path.dirname(instance.filename) + old_path = instance.filename_to_path(old_dir) + Document.try_delete_empty_directories(old_path) + class Log(models.Model): diff --git a/src/documents/tests/test_file_handling.py b/src/documents/tests/test_file_handling.py new file mode 100644 index 000000000..3b7c757d4 --- /dev/null +++ b/src/documents/tests/test_file_handling.py @@ -0,0 +1,559 @@ +import datetime +import os +import shutil +from unittest import mock +from uuid import uuid4 +from pathlib import Path +from shutil import rmtree + +from dateutil import tz +from django.test import TestCase, override_settings + +from django.utils.text import slugify +from ..models import Tag, Document, Correspondent +from django.conf import settings + + +class TestDate(TestCase): + deletion_list = [] + + def add_to_deletion_list(self, dirname): + self.deletion_list.append(dirname) + + def setUp(self): + folder = "/tmp/paperless-tests-{}".format(str(uuid4())[:8]) + os.makedirs(folder + "/documents/originals") + override_settings(MEDIA_ROOT=folder).enable() + override_settings(ORIGINALS_DIR=folder + "/documents/originals").enable() + self.add_to_deletion_list(folder) + + def tearDown(self): + for dirname in self.deletion_list: + shutil.rmtree(dirname, ignore_errors=True) + + @override_settings(PAPERLESS_FILENAME_FORMAT="") + def test_source_filename(self): + document = Document() + document.file_type = "pdf" + document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED + document.save() + + self.assertEqual(document.source_filename, "0000001.pdf") + + document.filename = "test.pdf" + self.assertEqual(document.source_filename, "test.pdf") + + @override_settings(PAPERLESS_FILENAME_FORMAT="") + def test_generate_source_filename(self): + document = Document() + document.file_type = "pdf" + document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED + document.save() + + self.assertEqual(document.generate_source_filename(), "0000001.pdf") + + document.storage_type = Document.STORAGE_TYPE_GPG + self.assertEqual(document.generate_source_filename(), + "0000001.pdf.gpg") + + @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/" + + "{correspondent}") + def test_file_renaming(self): + document = Document() + document.file_type = "pdf" + document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED + document.save() + + # Ensure that filename is properly generated + tmp = document.source_filename + self.assertEqual(document.generate_source_filename(), + "none/none-0000001.pdf") + document.create_source_directory() + Path(document.source_path).touch() + + # Test source_path + self.assertEqual(document.source_path, settings.MEDIA_ROOT + + "/documents/originals/none/none-0000001.pdf") + + # Enable encryption and check again + document.storage_type = Document.STORAGE_TYPE_GPG + tmp = document.source_filename + self.assertEqual(document.generate_source_filename(), + "none/none-0000001.pdf.gpg") + document.save() + + self.assertEqual(os.path.isdir(settings.MEDIA_ROOT + + "/documents/originals/none"), True) + + # Set a correspondent and save the document + document.correspondent = Correspondent.objects.get_or_create( + name="test")[0] + document.save() + + # Check proper handling of files + self.assertEqual(os.path.isdir(settings.MEDIA_ROOT + + "/documents/originals/test"), True) + self.assertEqual(os.path.isdir(settings.MEDIA_ROOT + + "/documents/originals/none"), False) + self.assertEqual(os.path.isfile(settings.MEDIA_ROOT + "/documents/" + + "originals/test/test-0000001.pdf.gpg"), True) + self.assertEqual(document.generate_source_filename(), + "test/test-0000001.pdf.gpg") + + @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/" + + "{correspondent}") + def test_file_renaming_missing_permissions(self): + document = Document() + document.file_type = "pdf" + document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED + document.save() + + # Ensure that filename is properly generated + tmp = document.source_filename + self.assertEqual(document.generate_source_filename(), + "none/none-0000001.pdf") + document.create_source_directory() + Path(document.source_path).touch() + + # Test source_path + self.assertEqual(document.source_path, settings.MEDIA_ROOT + + "/documents/originals/none/none-0000001.pdf") + + # Make the folder read- and execute-only (no writing and no renaming) + os.chmod(settings.MEDIA_ROOT + "/documents/originals/none", 0o555) + + # Set a correspondent and save the document + document.correspondent = Correspondent.objects.get_or_create( + name="test")[0] + document.save() + + # Check proper handling of files + self.assertEqual(os.path.isfile(settings.MEDIA_ROOT + "/documents/" + + "originals/none/none-0000001.pdf"), True) + self.assertEqual(document.source_filename, + "none/none-0000001.pdf") + + os.chmod(settings.MEDIA_ROOT + "/documents/originals/none", 0o777) + + @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/" + + "{correspondent}") + def test_document_delete(self): + document = Document() + document.file_type = "pdf" + document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED + document.save() + + # Ensure that filename is properly generated + tmp = document.source_filename + self.assertEqual(document.generate_source_filename(), + "none/none-0000001.pdf") + document.create_source_directory() + Path(document.source_path).touch() + + # Ensure file deletion after delete + document.delete() + self.assertEqual(os.path.isfile(settings.MEDIA_ROOT + + "/documents/originals/none/none-0000001.pdf"), False) + self.assertEqual(os.path.isdir(settings.MEDIA_ROOT + + "/documents/originals/none"), False) + + @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/" + + "{correspondent}") + def test_document_delete_nofile(self): + document = Document() + document.file_type = "pdf" + document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED + document.save() + + document.delete() + + @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/" + + "{correspondent}") + def test_directory_not_empty(self): + document = Document() + document.file_type = "pdf" + document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED + document.save() + + # Ensure that filename is properly generated + tmp = document.source_filename + self.assertEqual(document.generate_source_filename(), + "none/none-0000001.pdf") + document.create_source_directory() + Path(document.source_path).touch() + Path(document.source_path + "test").touch() + + # Set a correspondent and save the document + document.correspondent = Correspondent.objects.get_or_create( + name="test")[0] + document.save() + + # Check proper handling of files + self.assertEqual(os.path.isdir(settings.MEDIA_ROOT + + "/documents/originals/test"), True) + self.assertEqual(os.path.isdir(settings.MEDIA_ROOT + + "/documents/originals/none"), True) + + # Cleanup + os.remove(settings.MEDIA_ROOT + + "/documents/originals/none/none-0000001.pdftest") + os.rmdir(settings.MEDIA_ROOT + "/documents/originals/none") + + @override_settings(PAPERLESS_FILENAME_FORMAT="{tags[type]}") + def test_tags_with_underscore(self): + document = Document() + document.file_type = "pdf" + document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED + document.save() + + # Add tag to document + document.tags.create(name="type_demo") + document.tags.create(name="foo_bar") + document.save() + + # Ensure that filename is properly generated + tmp = document.source_filename + self.assertEqual(document.generate_source_filename(), + "demo-0000001.pdf") + document.create_source_directory() + Path(document.source_path).touch() + + document.delete() + + @override_settings(PAPERLESS_FILENAME_FORMAT="{tags[type]}") + def test_tags_with_dash(self): + document = Document() + document.file_type = "pdf" + document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED + document.save() + + # Add tag to document + document.tags.create(name="type-demo") + document.tags.create(name="foo-bar") + document.save() + + # Ensure that filename is properly generated + tmp = document.source_filename + self.assertEqual(document.generate_source_filename(), + "demo-0000001.pdf") + document.create_source_directory() + Path(document.source_path).touch() + + document.delete() + + @override_settings(PAPERLESS_FILENAME_FORMAT="{tags[type]}") + def test_tags_malformed(self): + document = Document() + document.file_type = "pdf" + document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED + document.save() + + # Add tag to document + document.tags.create(name="type:demo") + document.tags.create(name="foo:bar") + document.save() + + # Ensure that filename is properly generated + tmp = document.source_filename + self.assertEqual(document.generate_source_filename(), + "none-0000001.pdf") + document.create_source_directory() + Path(document.source_path).touch() + + document.delete() + + @override_settings(PAPERLESS_FILENAME_FORMAT="{tags[0]}") + def test_tags_all(self): + document = Document() + document.file_type = "pdf" + document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED + document.save() + + # Add tag to document + document.tags.create(name="demo") + document.save() + + # Ensure that filename is properly generated + tmp = document.source_filename + self.assertEqual(document.generate_source_filename(), + "demo-0000001.pdf") + document.create_source_directory() + Path(document.source_path).touch() + + document.delete() + + @override_settings(PAPERLESS_FILENAME_FORMAT="{tags[0]}") + def test_tags_out_of_bounds_0(self): + document = Document() + document.file_type = "pdf" + document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED + document.save() + + # Ensure that filename is properly generated + tmp = document.source_filename + self.assertEqual(document.generate_source_filename(), + "none-0000001.pdf") + document.create_source_directory() + Path(document.source_path).touch() + + document.delete() + + @override_settings(PAPERLESS_FILENAME_FORMAT="{tags[10000000]}") + def test_tags_out_of_bounds_10000000(self): + document = Document() + document.file_type = "pdf" + document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED + document.save() + + # Ensure that filename is properly generated + tmp = document.source_filename + self.assertEqual(document.generate_source_filename(), + "none-0000001.pdf") + document.create_source_directory() + Path(document.source_path).touch() + + document.delete() + + @override_settings(PAPERLESS_FILENAME_FORMAT="{tags[99]}") + def test_tags_out_of_bounds_99(self): + document = Document() + document.file_type = "pdf" + document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED + document.save() + + # Ensure that filename is properly generated + tmp = document.source_filename + self.assertEqual(document.generate_source_filename(), + "none-0000001.pdf") + document.create_source_directory() + Path(document.source_path).touch() + + document.delete() + + @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/" + + "{correspondent}/{correspondent}") + def test_nested_directory_cleanup(self): + document = Document() + document.file_type = "pdf" + document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED + document.save() + + # Ensure that filename is properly generated + tmp = document.source_filename + self.assertEqual(document.generate_source_filename(), + "none/none/none-0000001.pdf") + document.create_source_directory() + Path(document.source_path).touch() + + # Check proper handling of files + self.assertEqual(os.path.isdir(settings.MEDIA_ROOT + + "/documents/originals/none/none"), True) + + document.delete() + + self.assertEqual(os.path.isfile(settings.MEDIA_ROOT + + "/documents/originals/none/none/none-0000001.pdf"), + False) + self.assertEqual(os.path.isdir(settings.MEDIA_ROOT + + "/documents/originals/none/none"), False) + self.assertEqual(os.path.isdir(settings.MEDIA_ROOT + + "/documents/originals/none"), False) + self.assertEqual(os.path.isdir(settings.MEDIA_ROOT + + "/documents/originals"), True) + + @override_settings(PAPERLESS_FILENAME_FORMAT=None) + def test_format_none(self): + document = Document() + document.file_type = "pdf" + document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED + document.save() + + self.assertEqual(document.generate_source_filename(), "0000001.pdf") + + @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/" + + "{correspondent}") + def test_document_renamed(self): + document = Document() + document.file_type = "pdf" + document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED + document.save() + + # Ensure that filename is properly generated + tmp = document.source_filename + self.assertEqual(document.generate_source_filename(), + "none/none-0000001.pdf") + document.create_source_directory() + Path(document.source_path).touch() + + # Test source_path + self.assertEqual(document.source_path, settings.MEDIA_ROOT + + "/documents/originals/none/none-0000001.pdf") + + # Rename the document "illegaly" + os.makedirs(settings.MEDIA_ROOT + "/documents/originals/test") + os.rename(settings.MEDIA_ROOT + "/documents/originals/" + + "none/none-0000001.pdf", + settings.MEDIA_ROOT + "/documents/originals/" + + "test/test-0000001.pdf") + self.assertEqual(os.path.isfile(settings.MEDIA_ROOT + "/documents/" + + "originals/test/test-0000001.pdf"), True) + self.assertEqual(os.path.isfile(settings.MEDIA_ROOT + "/documents/" + + "originals/none/none-0000001.pdf"), False) + + # Set new correspondent and expect document to be saved properly + document.correspondent = Correspondent.objects.get_or_create( + name="foo")[0] + document.save() + self.assertEqual(os.path.isfile(settings.MEDIA_ROOT + "/documents/" + + "originals/foo/foo-0000001.pdf"), True) + + # Check proper handling of files + self.assertEqual(os.path.isdir(settings.MEDIA_ROOT + + "/documents/originals/foo"), True) + self.assertEqual(os.path.isdir(settings.MEDIA_ROOT + + "/documents/originals/none"), False) + self.assertEqual(os.path.isdir(settings.MEDIA_ROOT + + "/documents/originals/test"), False) + self.assertEqual(document.generate_source_filename(), + "foo/foo-0000001.pdf") + + @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/" + + "{correspondent}") + def test_document_renamed_encrypted(self): + document = Document() + document.file_type = "pdf" + document.storage_type = Document.STORAGE_TYPE_GPG + document.save() + + # Ensure that filename is properly generated + tmp = document.source_filename + self.assertEqual(document.generate_source_filename(), + "none/none-0000001.pdf.gpg") + document.create_source_directory() + Path(document.source_path).touch() + + # Test source_path + self.assertEqual(document.source_path, settings.MEDIA_ROOT + + "/documents/originals/none/none-0000001.pdf.gpg") + + # Rename the document "illegaly" + os.makedirs(settings.MEDIA_ROOT + "/documents/originals/test") + os.rename(settings.MEDIA_ROOT + "/documents/originals/" + + "none/none-0000001.pdf.gpg", + settings.MEDIA_ROOT + "/documents/originals/" + + "test/test-0000001.pdf.gpg") + self.assertEqual(os.path.isfile(settings.MEDIA_ROOT + "/documents/" + + "originals/test/test-0000001.pdf.gpg"), True) + self.assertEqual(os.path.isfile(settings.MEDIA_ROOT + "/documents/" + + "originals/none/none-0000001.pdf"), False) + + # Set new correspondent and expect document to be saved properly + document.correspondent = Correspondent.objects.get_or_create( + name="foo")[0] + document.save() + self.assertEqual(os.path.isfile(settings.MEDIA_ROOT + "/documents/" + + "originals/foo/foo-0000001.pdf.gpg"), True) + + # Check proper handling of files + self.assertEqual(os.path.isdir(settings.MEDIA_ROOT + + "/documents/originals/foo"), True) + self.assertEqual(os.path.isdir(settings.MEDIA_ROOT + + "/documents/originals/none"), False) + self.assertEqual(os.path.isdir(settings.MEDIA_ROOT + + "/documents/originals/test"), False) + self.assertEqual(document.generate_source_filename(), + "foo/foo-0000001.pdf.gpg") + + def test_delete_all_empty_subdirectories(self): + # Create our working directory + tmp = "/tmp/paperless-tests-{}".format(str(uuid4())[:8]) + os.makedirs(tmp) + self.add_to_deletion_list(tmp) + + os.makedirs(os.path.join(tmp, "empty")) + os.makedirs(os.path.join(tmp, "empty", "subdirectory")) + + os.makedirs(os.path.join(tmp, "notempty")) + Path(os.path.join(tmp, "notempty", "file")).touch() + + Document.delete_all_empty_subdirectories(tmp) + + self.assertEqual(os.path.isdir(os.path.join(tmp, "notempty")), True) + self.assertEqual(os.path.isdir(os.path.join(tmp, "empty")), False) + self.assertEqual(os.path.isfile( + os.path.join(tmp, "notempty", "file")), True) + + def test_try_delete_empty_directories(self): + # Create our working directory + tmp = "/tmp/paperless-tests-{}".format(str(uuid4())[:8]) + os.makedirs(tmp) + self.add_to_deletion_list(tmp) + + os.makedirs(os.path.join(tmp, "notempty")) + Path(os.path.join(tmp, "notempty", "file")).touch() + os.makedirs(os.path.join(tmp, "notempty", "empty")) + + Document.try_delete_empty_directories( + os.path.join(tmp, "notempty", "empty")) + self.assertEqual(os.path.isdir(os.path.join(tmp, "notempty")), True) + self.assertEqual(os.path.isfile( + os.path.join(tmp, "notempty", "file")), True) + self.assertEqual(os.path.isdir( + os.path.join(tmp, "notempty", "empty")), False) + + @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/" + + "{correspondent}") + def test_document_accidentally_deleted(self): + document = Document() + document.file_type = "pdf" + document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED + document.save() + + # Ensure that filename is properly generated + tmp = document.source_filename + self.assertEqual(document.generate_source_filename(), + "none/none-0000001.pdf") + document.create_source_directory() + Path(document.source_path).touch() + + # Test source_path + self.assertEqual(document.source_path, settings.MEDIA_ROOT + + "/documents/originals/none/none-0000001.pdf") + + # Delete the document "illegaly" + os.remove(settings.MEDIA_ROOT + "/documents/originals/" + + "none/none-0000001.pdf") + + # Set new correspondent and expect document to be saved properly + document.correspondent = Correspondent.objects.get_or_create( + name="foo")[0] + document.save() + + # Check proper handling of files + self.assertEqual(os.path.isdir(settings.MEDIA_ROOT + + "/documents/originals/none"), True) + self.assertEqual(document.source_filename, + "none/none-0000001.pdf") + + @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/" + + "{correspondent}") + def test_set_filename(self): + document = Document() + document.file_type = "pdf" + document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED + document.save() + + # Ensure that filename is properly generated + tmp = document.source_filename + self.assertEqual(document.generate_source_filename(), + "none/none-0000001.pdf") + document.create_source_directory() + Path(document.source_path).touch() + + # Set existing filename + document.set_filename(tmp) + self.assertEqual(document.source_filename, "none/none-0000001.pdf") + + # Set non-existing filename + document.set_filename("doesnotexist") + self.assertEqual(document.source_filename, "none/none-0000001.pdf") diff --git a/src/paperless/settings.py b/src/paperless/settings.py index bb71e4764..fd772577b 100644 --- a/src/paperless/settings.py +++ b/src/paperless/settings.py @@ -299,3 +299,6 @@ FILENAME_DATE_ORDER = os.getenv("PAPERLESS_FILENAME_DATE_ORDER") FILENAME_PARSE_TRANSFORMS = [] for t in json.loads(os.getenv("PAPERLESS_FILENAME_PARSE_TRANSFORMS", "[]")): FILENAME_PARSE_TRANSFORMS.append((re.compile(t["pattern"]), t["repl"])) + +# Specify the filename format for out files +PAPERLESS_FILENAME_FORMAT = os.getenv("PAPERLESS_FILENAME_FORMAT") From 6d14e111b673a8640eb3e1a44f022dae978be533 Mon Sep 17 00:00:00 2001 From: Jonas Winkler Date: Sun, 8 Nov 2020 13:49:15 +0100 Subject: [PATCH 11/26] fixed most of the test cases --- src/documents/tests/test_consumer.py | 56 ---------------------------- src/documents/tests/test_parsers.py | 50 +++++++++++++++++++++++++ 2 files changed, 50 insertions(+), 56 deletions(-) create mode 100644 src/documents/tests/test_parsers.py diff --git a/src/documents/tests/test_consumer.py b/src/documents/tests/test_consumer.py index 512447741..4d5360c7b 100644 --- a/src/documents/tests/test_consumer.py +++ b/src/documents/tests/test_consumer.py @@ -1,66 +1,10 @@ import re from django.test import TestCase -from unittest import mock -from tempfile import TemporaryDirectory -from ..consumer import Consumer from ..models import FileInfo, Tag -class TestConsumer(TestCase): - - class DummyParser(object): - pass - - def test__get_parser_class_1_parser(self): - self.assertEqual( - self._get_consumer()._get_parser_class("doc.pdf"), - self.DummyParser - ) - - @mock.patch("documents.consumer.os.makedirs") - @mock.patch("documents.consumer.os.path.exists", return_value=True) - @mock.patch("documents.consumer.document_consumer_declaration.send") - def test__get_parser_class_n_parsers(self, m, *args): - - class DummyParser1(object): - pass - - class DummyParser2(object): - pass - - m.return_value = ( - (None, lambda _: {"weight": 0, "parser": DummyParser1}), - (None, lambda _: {"weight": 1, "parser": DummyParser2}), - ) - with TemporaryDirectory() as tmpdir: - self.assertEqual( - Consumer(consume=tmpdir)._get_parser_class("doc.pdf"), - DummyParser2 - ) - - @mock.patch("documents.consumer.os.makedirs") - @mock.patch("documents.consumer.os.path.exists", return_value=True) - @mock.patch("documents.consumer.document_consumer_declaration.send") - def test__get_parser_class_0_parsers(self, m, *args): - m.return_value = ((None, lambda _: None),) - with TemporaryDirectory() as tmpdir: - self.assertIsNone( - Consumer(consume=tmpdir)._get_parser_class("doc.pdf") - ) - - @mock.patch("documents.consumer.os.makedirs") - @mock.patch("documents.consumer.os.path.exists", return_value=True) - @mock.patch("documents.consumer.document_consumer_declaration.send") - def _get_consumer(self, m, *args): - m.return_value = ( - (None, lambda _: {"weight": 0, "parser": self.DummyParser}), - ) - with TemporaryDirectory() as tmpdir: - return Consumer(consume=tmpdir) - - class TestAttributes(TestCase): TAGS = ("tag1", "tag2", "tag3") diff --git a/src/documents/tests/test_parsers.py b/src/documents/tests/test_parsers.py new file mode 100644 index 000000000..f49d6ca4d --- /dev/null +++ b/src/documents/tests/test_parsers.py @@ -0,0 +1,50 @@ +from tempfile import TemporaryDirectory +from unittest import mock + +from django.test import TestCase + +from documents.parsers import get_parser_class + + +class TestParserDiscovery(TestCase): + + @mock.patch("documents.parsers.document_consumer_declaration.send") + def test__get_parser_class_1_parser(self, m, *args): + class DummyParser(object): + pass + + m.return_value = ( + (None, lambda _: {"weight": 0, "parser": DummyParser}), + ) + + self.assertEqual( + get_parser_class("doc.pdf"), + DummyParser + ) + + @mock.patch("documents.parsers.document_consumer_declaration.send") + def test__get_parser_class_n_parsers(self, m, *args): + + class DummyParser1(object): + pass + + class DummyParser2(object): + pass + + m.return_value = ( + (None, lambda _: {"weight": 0, "parser": DummyParser1}), + (None, lambda _: {"weight": 1, "parser": DummyParser2}), + ) + + self.assertEqual( + get_parser_class("doc.pdf"), + DummyParser2 + ) + + @mock.patch("documents.parsers.document_consumer_declaration.send") + def test__get_parser_class_0_parsers(self, m, *args): + m.return_value = ((None, lambda _: None),) + with TemporaryDirectory() as tmpdir: + self.assertIsNone( + get_parser_class("doc.pdf") + ) From 2c054621fd84f8e8ed5df04a99575d5ffae9c200 Mon Sep 17 00:00:00 2001 From: Jonas Winkler Date: Sun, 8 Nov 2020 13:52:35 +0100 Subject: [PATCH 12/26] updated docker-ignore --- .dockerignore | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/.dockerignore b/.dockerignore index 7534368f0..c00be4161 100644 --- a/.dockerignore +++ b/.dockerignore @@ -1,3 +1,7 @@ -src-ui/node_modules -src-ui/dist +/src-ui/node_modules +/src-ui/dist .git +/export +/consume +/media +/data From 4acea53fae28124b9b9f424d9153cc28e5f81333 Mon Sep 17 00:00:00 2001 From: Jonas Winkler Date: Sun, 8 Nov 2020 16:58:06 +0100 Subject: [PATCH 13/26] added sorting to management pages --- src-ui/src/app/app.module.ts | 4 ++- .../correspondent-list.component.html | 8 ++--- .../document-type-list.component.html | 6 ++-- .../generic-list/generic-list.component.ts | 29 ++++++++++++++++-- .../manage/tag-list/tag-list.component.html | 6 ++-- .../app/directives/sortable.directive.spec.ts | 8 +++++ .../src/app/directives/sortable.directive.ts | 30 +++++++++++++++++++ .../rest/abstract-paperless-service.ts | 13 +++++++- .../src/app/services/rest/document.service.ts | 10 +------ src-ui/src/styles.css | 30 +++++++++++++++++++ 10 files changed, 121 insertions(+), 23 deletions(-) create mode 100644 src-ui/src/app/directives/sortable.directive.spec.ts create mode 100644 src-ui/src/app/directives/sortable.directive.ts diff --git a/src-ui/src/app/app.module.ts b/src-ui/src/app/app.module.ts index e10bdbd0c..584b7bc7a 100644 --- a/src-ui/src/app/app.module.ts +++ b/src-ui/src/app/app.module.ts @@ -40,6 +40,7 @@ import { SaveViewConfigDialogComponent } from './components/document-list/save-v import { InfiniteScrollModule } from 'ngx-infinite-scroll'; import { DateTimeComponent } from './components/common/input/date-time/date-time.component'; import { TagsComponent } from './components/common/input/tags/tags.component'; +import { SortableDirective } from './directives/sortable.directive'; @NgModule({ declarations: [ @@ -73,7 +74,8 @@ import { TagsComponent } from './components/common/input/tags/tags.component'; CheckComponent, SaveViewConfigDialogComponent, DateTimeComponent, - TagsComponent + TagsComponent, + SortableDirective ], imports: [ BrowserModule, diff --git a/src-ui/src/app/components/manage/correspondent-list/correspondent-list.component.html b/src-ui/src/app/components/manage/correspondent-list/correspondent-list.component.html index fc01471d7..a790a18b3 100644 --- a/src-ui/src/app/components/manage/correspondent-list/correspondent-list.component.html +++ b/src-ui/src/app/components/manage/correspondent-list/correspondent-list.component.html @@ -9,10 +9,10 @@ - - - - + + + + diff --git a/src-ui/src/app/components/manage/document-type-list/document-type-list.component.html b/src-ui/src/app/components/manage/document-type-list/document-type-list.component.html index 94e7aa3b7..a07f6c7e4 100644 --- a/src-ui/src/app/components/manage/document-type-list/document-type-list.component.html +++ b/src-ui/src/app/components/manage/document-type-list/document-type-list.component.html @@ -10,9 +10,9 @@
NameMatchingDocument countLast correspondenceNameMatchingDocument countLast correspondence Actions
- - - + + + diff --git a/src-ui/src/app/components/manage/generic-list/generic-list.component.ts b/src-ui/src/app/components/manage/generic-list/generic-list.component.ts index 12cf08ea9..d5477d010 100644 --- a/src-ui/src/app/components/manage/generic-list/generic-list.component.ts +++ b/src-ui/src/app/components/manage/generic-list/generic-list.component.ts @@ -1,7 +1,8 @@ -import { Directive, OnInit } from '@angular/core'; +import { Directive, OnInit, QueryList, ViewChildren } from '@angular/core'; import { NgbModal } from '@ng-bootstrap/ng-bootstrap'; import { MatchingModel, MATCHING_ALGORITHMS, MATCH_AUTO } from 'src/app/data/matching-model'; import { ObjectWithId } from 'src/app/data/object-with-id'; +import { SortableDirective, SortEvent } from 'src/app/directives/sortable.directive'; import { AbstractPaperlessService } from 'src/app/services/rest/abstract-paperless-service'; import { DeleteDialogComponent } from '../../common/delete-dialog/delete-dialog.component'; @@ -14,12 +15,17 @@ export abstract class GenericListComponent implements On private editDialogComponent: any) { } + @ViewChildren(SortableDirective) headers: QueryList; + public data: T[] = [] public page = 1 public collectionSize = 0 + public sortField: string + public sortDirection: string + getMatching(o: MatchingModel) { if (o.matching_algorithm == MATCH_AUTO) { return "Automatic" @@ -30,12 +36,31 @@ export abstract class GenericListComponent implements On } } + onSort(event: SortEvent) { + + if (event.direction && event.direction.length > 0) { + this.sortField = event.column + this.sortDirection = event.direction + } else { + this.sortField = null + this.sortDirection = null + } + + this.headers.forEach(header => { + if (header.sortable !== this.sortField) { + header.direction = ''; + } + }); + + this.reloadData() + } + ngOnInit(): void { this.reloadData() } reloadData() { - this.service.list(this.page).subscribe(c => { + this.service.list(this.page, null, this.sortField, this.sortDirection).subscribe(c => { this.data = c.results this.collectionSize = c.count }); diff --git a/src-ui/src/app/components/manage/tag-list/tag-list.component.html b/src-ui/src/app/components/manage/tag-list/tag-list.component.html index 76ae9fb93..d06748cec 100644 --- a/src-ui/src/app/components/manage/tag-list/tag-list.component.html +++ b/src-ui/src/app/components/manage/tag-list/tag-list.component.html @@ -9,10 +9,10 @@
NameMatchingDocument countNameMatchingDocument count Actions
- + - - + + diff --git a/src-ui/src/app/directives/sortable.directive.spec.ts b/src-ui/src/app/directives/sortable.directive.spec.ts new file mode 100644 index 000000000..f77b499de --- /dev/null +++ b/src-ui/src/app/directives/sortable.directive.spec.ts @@ -0,0 +1,8 @@ +import { SortableDirective } from './sortable.directive'; + +describe('SortableDirective', () => { + it('should create an instance', () => { + const directive = new SortableDirective(); + expect(directive).toBeTruthy(); + }); +}); diff --git a/src-ui/src/app/directives/sortable.directive.ts b/src-ui/src/app/directives/sortable.directive.ts new file mode 100644 index 000000000..11c474dbb --- /dev/null +++ b/src-ui/src/app/directives/sortable.directive.ts @@ -0,0 +1,30 @@ +import { Directive, EventEmitter, Input, Output } from '@angular/core'; + +export interface SortEvent { + column: string; + direction: string; +} + +const rotate: {[key: string]: string} = { 'asc': 'des', 'des': '', '': 'asc' }; + +@Directive({ + selector: 'th[sortable]', + host: { + '[class.asc]': 'direction === "asc"', + '[class.des]': 'direction === "des"', + '(click)': 'rotate()' + } +}) +export class SortableDirective { + + constructor() { } + + @Input() sortable: string = ''; + @Input() direction: string = ''; + @Output() sort = new EventEmitter(); + + rotate() { + this.direction = rotate[this.direction]; + this.sort.emit({column: this.sortable, direction: this.direction}); + } +} diff --git a/src-ui/src/app/services/rest/abstract-paperless-service.ts b/src-ui/src/app/services/rest/abstract-paperless-service.ts index cdf157aaa..16064c702 100644 --- a/src-ui/src/app/services/rest/abstract-paperless-service.ts +++ b/src-ui/src/app/services/rest/abstract-paperless-service.ts @@ -21,7 +21,17 @@ export abstract class AbstractPaperlessService { return url } - list(page?: number, pageSize?: number, ordering?: string, extraParams?): Observable> { + private getOrderingQueryParam(sortField: string, sortDirection: string) { + if (sortField && sortDirection) { + return (sortDirection == 'des' ? '-' : '') + sortField + } else if (sortField) { + return sortField + } else { + return null + } + } + + list(page?: number, pageSize?: number, sortField?: string, sortDirection?: string, extraParams?): Observable> { let httpParams = new HttpParams() if (page) { httpParams = httpParams.set('page', page.toString()) @@ -29,6 +39,7 @@ export abstract class AbstractPaperlessService { if (pageSize) { httpParams = httpParams.set('page_size', pageSize.toString()) } + let ordering = this.getOrderingQueryParam(sortField, sortDirection) if (ordering) { httpParams = httpParams.set('ordering', ordering) } diff --git a/src-ui/src/app/services/rest/document.service.ts b/src-ui/src/app/services/rest/document.service.ts index bc1afb419..88a52ee04 100644 --- a/src-ui/src/app/services/rest/document.service.ts +++ b/src-ui/src/app/services/rest/document.service.ts @@ -47,16 +47,8 @@ export class DocumentService extends AbstractPaperlessService } } - private getOrderingQueryParam(sortField: string, sortDirection: string) { - if (DOCUMENT_SORT_FIELDS.find(f => f.field == sortField)) { - return (sortDirection == SORT_DIRECTION_DESCENDING ? '-' : '') + sortField - } else { - return null - } - } - list(page?: number, pageSize?: number, sortField?: string, sortDirection?: string, filterRules?: FilterRule[]): Observable> { - return super.list(page, pageSize, this.getOrderingQueryParam(sortField, sortDirection), this.filterRulesToQueryParams(filterRules)) + return super.list(page, pageSize, sortField, sortDirection, this.filterRulesToQueryParams(filterRules)) } getPreviewUrl(id: number): string { diff --git a/src-ui/src/styles.css b/src-ui/src/styles.css index 0f16a68f0..c7849912e 100644 --- a/src-ui/src/styles.css +++ b/src-ui/src/styles.css @@ -28,4 +28,34 @@ body { .form-control-dark:focus { border-color: transparent; box-shadow: 0 0 0 3px rgba(255, 255, 255, .25); +} + + +.asc { + background-color: #f8f9fa!important; +} + +.asc:after { + content: ''; + transform: rotate(180deg); + background: url("data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAEAAAABACAYAAACqaXHeAAAAAXNSR0IArs4c6QAAAmxJREFUeAHtmksrRVEUx72fH8CIGQNJkpGUUmakDEiZSJRIZsRQmCkTJRmZmJgQE0kpX0D5DJKJgff7v+ru2u3O3vvc67TOvsdatdrnnP1Y///v7HvvubdbUiIhBISAEBACQkAICAEhIAQ4CXSh2DnyDfmCPEG2Iv9F9MPlM/LHyAecdyMzHYNwR3fdNK/OH9HXl1UCozD24TCvILxizEDWIEzA0FcM8woCgRrJCoS5PIwrANQSMAJX1LEI9bqpQo4JYNFFKRSvIgsxHDVnqZgIkPnNBM0rIGtYk9YOOsqgbgepRCfdbmFtqhFkVEDVPjJp0+Z6e6hRHhqBKgg6ZDCvYBygVmUoEGoh5JTRvIJwhJo1aUOoh4CLPMyvxxi7EWOMgnCGsXXI1GIXlZUYX7ucU+kbR8NW8lh3O7cue0Pk32MKndfUxQFAwxdirk3fHappAnc0oqDPzDfGTBrCfHP04dM4oTV8cxr0SVzH9FF07xD3ib6xCDE+M+aUcVygtWzzbtGX2rPBrEUYfecfQkaFzYi6HjVnGBdtL7epqAlc1+jRdAap74RrnPc4BCijttY2tRcdN0g17w7HqZrXhdJTYAuS3hd8z+vKgK3V1zWPae0mZDMykadBn1hTQBLnZNwVrJpSe/NwEeDsEwCctEOsJTsgxLvCqUl2ACftEGvJDgjxrnBqkh3ASTvEWrIDQrwrnJpkB3DSDrGW7IAQ7wqnJtkBnLRztejXXVu4+mxz/nQ9jR1w5VB86ejLTFcnnDwhzV+F6T+CHZlx6THSjn76eyyBIOPHyDakhBAQAkJACAgBISAEhIAQYCLwC8JxpAmsEGt6AAAAAElFTkSuQmCC") no-repeat; + height: 1rem; + width: 1rem; + display: block; + background-size: 1rem; + float: right; +} + +.des { + background-color: #f8f9fa!important; +} + +.des:after { + content: ''; + background: url("data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAEAAAABACAYAAACqaXHeAAAAAXNSR0IArs4c6QAAAmxJREFUeAHtmksrRVEUx72fH8CIGQNJkpGUUmakDEiZSJRIZsRQmCkTJRmZmJgQE0kpX0D5DJKJgff7v+ru2u3O3vvc67TOvsdatdrnnP1Y///v7HvvubdbUiIhBISAEBACQkAICAEhIAQ4CXSh2DnyDfmCPEG2Iv9F9MPlM/LHyAecdyMzHYNwR3fdNK/OH9HXl1UCozD24TCvILxizEDWIEzA0FcM8woCgRrJCoS5PIwrANQSMAJX1LEI9bqpQo4JYNFFKRSvIgsxHDVnqZgIkPnNBM0rIGtYk9YOOsqgbgepRCfdbmFtqhFkVEDVPjJp0+Z6e6hRHhqBKgg6ZDCvYBygVmUoEGoh5JTRvIJwhJo1aUOoh4CLPMyvxxi7EWOMgnCGsXXI1GIXlZUYX7ucU+kbR8NW8lh3O7cue0Pk32MKndfUxQFAwxdirk3fHappAnc0oqDPzDfGTBrCfHP04dM4oTV8cxr0SVzH9FF07xD3ib6xCDE+M+aUcVygtWzzbtGX2rPBrEUYfecfQkaFzYi6HjVnGBdtL7epqAlc1+jRdAap74RrnPc4BCijttY2tRcdN0g17w7HqZrXhdJTYAuS3hd8z+vKgK3V1zWPae0mZDMykadBn1hTQBLnZNwVrJpSe/NwEeDsEwCctEOsJTsgxLvCqUl2ACftEGvJDgjxrnBqkh3ASTvEWrIDQrwrnJpkB3DSDrGW7IAQ7wqnJtkBnLRztejXXVu4+mxz/nQ9jR1w5VB86ejLTFcnnDwhzV+F6T+CHZlx6THSjn76eyyBIOPHyDakhBAQAkJACAgBISAEhIAQYCLwC8JxpAmsEGt6AAAAAElFTkSuQmCC") no-repeat; + height: 1rem; + width: 1rem; + display: block; + background-size: 1rem; + float: right; } \ No newline at end of file From e81f7e0430524b4cdcaac1a8c86527f149cd7929 Mon Sep 17 00:00:00 2001 From: Jonas Winkler Date: Sun, 8 Nov 2020 17:11:18 +0100 Subject: [PATCH 14/26] fix logs --- src-ui/src/app/components/manage/logs/logs.component.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src-ui/src/app/components/manage/logs/logs.component.ts b/src-ui/src/app/components/manage/logs/logs.component.ts index 0550e8151..da507cbe5 100644 --- a/src-ui/src/app/components/manage/logs/logs.component.ts +++ b/src-ui/src/app/components/manage/logs/logs.component.ts @@ -20,7 +20,7 @@ export class LogsComponent implements OnInit { } reload() { - this.logService.list(1, 50, null, {'level__gte': this.level}).subscribe(result => this.logs = result.results) + this.logService.list(1, 50, 'created', 'des', {'level__gte': this.level}).subscribe(result => this.logs = result.results) } getLevelText(level: number) { @@ -32,7 +32,7 @@ export class LogsComponent implements OnInit { if (this.logs.length > 0) { lastCreated = this.logs[this.logs.length-1].created } - this.logService.list(1, 25, null, {'created__lt': lastCreated, 'level__gte': this.level}).subscribe(result => { + this.logService.list(1, 25, 'created', 'des', {'created__lt': lastCreated, 'level__gte': this.level}).subscribe(result => { this.logs.push(...result.results) }) } From 7bd843283ddd5a3e4a40277dc1e4d42e31f65c45 Mon Sep 17 00:00:00 2001 From: Jonas Winkler Date: Mon, 9 Nov 2020 15:28:12 +0100 Subject: [PATCH 15/26] use django authentication instead of auth tokens. --- Dockerfile | 2 +- src-ui/angular.json | 3 +- src-ui/src/app/app-routing.module.ts | 23 +++--- src-ui/src/app/app.module.ts | 10 +-- .../app-frame/app-frame.component.html | 2 +- .../app-frame/app-frame.component.ts | 8 +-- .../app/components/login/login.component.html | 17 ----- .../components/login/login.component.spec.ts | 25 ------- .../app/components/login/login.component.ts | 34 --------- .../app/services/auth-guard.service.spec.ts | 16 ----- src-ui/src/app/services/auth-guard.service.ts | 20 ------ .../src/app/services/auth.interceptor.spec.ts | 16 ----- src-ui/src/app/services/auth.interceptor.ts | 37 ---------- src-ui/src/app/services/auth.service.spec.ts | 16 ----- src-ui/src/app/services/auth.service.ts | 72 ------------------- .../src/app/services/rest/document.service.ts | 9 ++- src/documents/static/bootstrap.min.css | 7 ++ .../documents/static/signin.css | 30 ++++---- src/documents/templates/index.html | 8 +-- .../templates/registration/logged_out.html | 44 ++++++++++++ .../templates/registration/login.html | 54 ++++++++++++++ src/paperless/auth.py | 20 ++++-- src/paperless/settings.py | 17 ++--- src/paperless/urls.py | 10 +-- 24 files changed, 172 insertions(+), 328 deletions(-) delete mode 100644 src-ui/src/app/components/login/login.component.html delete mode 100644 src-ui/src/app/components/login/login.component.spec.ts delete mode 100644 src-ui/src/app/components/login/login.component.ts delete mode 100644 src-ui/src/app/services/auth-guard.service.spec.ts delete mode 100644 src-ui/src/app/services/auth-guard.service.ts delete mode 100644 src-ui/src/app/services/auth.interceptor.spec.ts delete mode 100644 src-ui/src/app/services/auth.interceptor.ts delete mode 100644 src-ui/src/app/services/auth.service.spec.ts delete mode 100644 src-ui/src/app/services/auth.service.ts create mode 100644 src/documents/static/bootstrap.min.css rename src-ui/src/app/components/login/login.component.css => src/documents/static/signin.css (72%) create mode 100644 src/documents/templates/registration/logged_out.html create mode 100644 src/documents/templates/registration/login.html diff --git a/Dockerfile b/Dockerfile index 015d511d4..05fd430aa 100644 --- a/Dockerfile +++ b/Dockerfile @@ -65,7 +65,7 @@ COPY scripts/docker-entrypoint.sh /sbin/docker-entrypoint.sh # copy app COPY src/ ./src/ -COPY --from=frontend /usr/src/paperless/src-ui/dist/paperless-ui/ ./src/documents/static/ +COPY --from=frontend /usr/src/paperless/src-ui/dist/paperless-ui/ ./src/documents/static/frontend/ # add users, setup scripts RUN addgroup --gid 1000 paperless \ diff --git a/src-ui/angular.json b/src-ui/angular.json index 6135ffa91..aca54b8e0 100644 --- a/src-ui/angular.json +++ b/src-ui/angular.json @@ -14,6 +14,7 @@ "builder": "@angular-devkit/build-angular:browser", "options": { "outputPath": "dist/paperless-ui", + "outputHashing": "none", "index": "src/index.html", "main": "src/main.ts", "polyfills": "src/polyfills.ts", @@ -38,7 +39,7 @@ } ], "optimization": true, - "outputHashing": "all", + "outputHashing": "none", "sourceMap": false, "extractCss": true, "namedChunks": false, diff --git a/src-ui/src/app/app-routing.module.ts b/src-ui/src/app/app-routing.module.ts index fde8fd31f..27f0629b4 100644 --- a/src-ui/src/app/app-routing.module.ts +++ b/src-ui/src/app/app-routing.module.ts @@ -4,7 +4,6 @@ import { AppFrameComponent } from './components/app-frame/app-frame.component'; import { DashboardComponent } from './components/dashboard/dashboard.component'; import { DocumentDetailComponent } from './components/document-detail/document-detail.component'; import { DocumentListComponent } from './components/document-list/document-list.component'; -import { LoginComponent } from './components/login/login.component'; import { CorrespondentListComponent } from './components/manage/correspondent-list/correspondent-list.component'; import { DocumentTypeListComponent } from './components/manage/document-type-list/document-type-list.component'; import { LogsComponent } from './components/manage/logs/logs.component'; @@ -12,25 +11,23 @@ import { SettingsComponent } from './components/manage/settings/settings.compone import { TagListComponent } from './components/manage/tag-list/tag-list.component'; import { NotFoundComponent } from './components/not-found/not-found.component'; import { SearchComponent } from './components/search/search.component'; -import { AuthGuardService } from './services/auth-guard.service'; const routes: Routes = [ {path: '', redirectTo: 'dashboard', pathMatch: 'full'}, {path: '', component: AppFrameComponent, children: [ - {path: 'dashboard', component: DashboardComponent, canActivate: [AuthGuardService] }, - {path: 'documents', component: DocumentListComponent, canActivate: [AuthGuardService] }, - {path: 'view/:id', component: DocumentListComponent, canActivate: [AuthGuardService] }, - {path: 'search', component: SearchComponent, canActivate: [AuthGuardService] }, - {path: 'documents/:id', component: DocumentDetailComponent, canActivate: [AuthGuardService] }, + {path: 'dashboard', component: DashboardComponent }, + {path: 'documents', component: DocumentListComponent }, + {path: 'view/:id', component: DocumentListComponent }, + {path: 'search', component: SearchComponent }, + {path: 'documents/:id', component: DocumentDetailComponent }, - {path: 'tags', component: TagListComponent, canActivate: [AuthGuardService] }, - {path: 'documenttypes', component: DocumentTypeListComponent, canActivate: [AuthGuardService] }, - {path: 'correspondents', component: CorrespondentListComponent, canActivate: [AuthGuardService] }, - {path: 'logs', component: LogsComponent, canActivate: [AuthGuardService] }, - {path: 'settings', component: SettingsComponent, canActivate: [AuthGuardService] }, + {path: 'tags', component: TagListComponent }, + {path: 'documenttypes', component: DocumentTypeListComponent }, + {path: 'correspondents', component: CorrespondentListComponent }, + {path: 'logs', component: LogsComponent }, + {path: 'settings', component: SettingsComponent }, ]}, - {path: 'login', component: LoginComponent }, {path: '404', component: NotFoundComponent}, {path: '**', redirectTo: '/404', pathMatch: 'full'} ]; diff --git a/src-ui/src/app/app.module.ts b/src-ui/src/app/app.module.ts index 584b7bc7a..dad57280d 100644 --- a/src-ui/src/app/app.module.ts +++ b/src-ui/src/app/app.module.ts @@ -12,7 +12,6 @@ import { TagListComponent } from './components/manage/tag-list/tag-list.componen import { DocumentTypeListComponent } from './components/manage/document-type-list/document-type-list.component'; import { LogsComponent } from './components/manage/logs/logs.component'; import { SettingsComponent } from './components/manage/settings/settings.component'; -import { LoginComponent } from './components/login/login.component'; import { FormsModule, ReactiveFormsModule } from '@angular/forms'; import { DatePipe } from '@angular/common'; import { SafePipe } from './pipes/safe.pipe'; @@ -29,7 +28,6 @@ import { PageHeaderComponent } from './components/common/page-header/page-header import { AppFrameComponent } from './components/app-frame/app-frame.component'; import { ToastsComponent } from './components/common/toasts/toasts.component'; import { FilterEditorComponent } from './components/filter-editor/filter-editor.component'; -import { AuthInterceptor } from './services/auth.interceptor'; import { DocumentCardLargeComponent } from './components/document-list/document-card-large/document-card-large.component'; import { DocumentCardSmallComponent } from './components/document-list/document-card-small/document-card-small.component'; import { NgxFileDropModule } from 'ngx-file-drop'; @@ -53,7 +51,6 @@ import { SortableDirective } from './directives/sortable.directive'; DocumentTypeListComponent, LogsComponent, SettingsComponent, - LoginComponent, SafePipe, NotFoundComponent, CorrespondentEditDialogComponent, @@ -88,12 +85,7 @@ import { SortableDirective } from './directives/sortable.directive'; InfiniteScrollModule ], providers: [ - DatePipe, - { - provide: HTTP_INTERCEPTORS, - useClass: AuthInterceptor, - multi: true - } + DatePipe ], bootstrap: [AppComponent] }) diff --git a/src-ui/src/app/components/app-frame/app-frame.component.html b/src-ui/src/app/components/app-frame/app-frame.component.html index ad12a9d43..0b18777ef 100644 --- a/src-ui/src/app/components/app-frame/app-frame.component.html +++ b/src-ui/src/app/components/app-frame/app-frame.component.html @@ -10,7 +10,7 @@
NameName ColourMatchingDocument countMatchingDocument count Actions
+
@@ -57,7 +57,7 @@ -
+
\ No newline at end of file From 9d22d9c9b1835c7f755278338721c4dd764dc162 Mon Sep 17 00:00:00 2001 From: Jonas Winkler Date: Mon, 9 Nov 2020 20:29:02 +0100 Subject: [PATCH 19/26] added a task scheduler for recurring tasks --- .gitignore | 1 - Dockerfile | 8 +- Pipfile | 4 +- Pipfile.lock | 183 +++++++++++------- docker-compose.env.example | 4 - ...-compose.yml.example => docker-compose.yml | 12 +- paperless.conf.example | 10 + scripts/paperless-cron | 5 - scripts/supervisord.conf | 5 +- .../commands/document_create_classifier.py | 31 +-- .../management/commands/document_index.py | 15 +- .../management/commands/document_rerun_ocr.py | 60 ------ .../migrations/1001_auto_20201109_1636.py | 28 +++ src/documents/tasks.py | 57 ++++++ src/paperless/settings.py | 12 ++ 15 files changed, 243 insertions(+), 192 deletions(-) rename docker-compose.yml.example => docker-compose.yml (79%) delete mode 100644 scripts/paperless-cron delete mode 100644 src/documents/management/commands/document_rerun_ocr.py create mode 100644 src/documents/migrations/1001_auto_20201109_1636.py create mode 100644 src/documents/tasks.py diff --git a/.gitignore b/.gitignore index 871a7bd08..25c7c421a 100644 --- a/.gitignore +++ b/.gitignore @@ -65,7 +65,6 @@ target/ .virtualenv virtualenv /venv -docker-compose.yml docker-compose.env # Used for development diff --git a/Dockerfile b/Dockerfile index 05fd430aa..bb96305f3 100644 --- a/Dockerfile +++ b/Dockerfile @@ -25,7 +25,6 @@ COPY Pipfile* ./ #Dependencies RUN apt-get update \ && DEBIAN_FRONTEND="noninteractive" apt-get -y --no-install-recommends install \ - anacron \ build-essential \ curl \ ghostscript \ @@ -60,7 +59,6 @@ RUN apt-get update \ COPY scripts/imagemagick-policy.xml /etc/ImageMagick-6/policy.xml COPY scripts/gunicorn.conf.py ./ COPY scripts/supervisord.conf /etc/supervisord.conf -COPY scripts/paperless-cron /etc/cron.daily/ COPY scripts/docker-entrypoint.sh /sbin/docker-entrypoint.sh # copy app @@ -71,9 +69,7 @@ COPY --from=frontend /usr/src/paperless/src-ui/dist/paperless-ui/ ./src/document RUN addgroup --gid 1000 paperless \ && useradd --uid 1000 --gid paperless --home-dir /usr/src/paperless paperless \ && chown -R paperless:paperless . \ - && chmod 755 /sbin/docker-entrypoint.sh \ - && chmod +x /etc/cron.daily/paperless-cron \ - && rm /etc/cron.daily/apt-compat /etc/cron.daily/dpkg + && chmod 755 /sbin/docker-entrypoint.sh WORKDIR /usr/src/paperless/src/ @@ -81,6 +77,6 @@ RUN sudo -HEu paperless python3 manage.py collectstatic --clear --no-input VOLUME ["/usr/src/paperless/data", "/usr/src/paperless/consume", "/usr/src/paperless/export"] ENTRYPOINT ["/sbin/docker-entrypoint.sh"] -CMD ["python3", "manage.py", "--help"] +CMD ["supervisord", "-c", "/etc/supervisord.conf"] LABEL maintainer="Jonas Winkler " diff --git a/Pipfile b/Pipfile index beb252591..7cd663ac4 100644 --- a/Pipfile +++ b/Pipfile @@ -24,9 +24,11 @@ gunicorn = "*" whitenoise = "*" fuzzywuzzy = "*" python-Levenshtein = "*" -django-extensions = "" +django-extensions = "*" watchdog = "*" pathvalidate = "*" +django-q = "*" +redis = "*" [dev-packages] coveralls = "*" diff --git a/Pipfile.lock b/Pipfile.lock index 40d92fa59..122750db1 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -1,7 +1,7 @@ { "_meta": { "hash": { - "sha256": "9b05b0a30fedd4192cd81df4fe96e7ae6e55facd557607cc1f9f66c173b4cdb1" + "sha256": "135aa8778c31854db426652dfa7abf813cdfab1b08bfc16c8cd82e627db7565e" }, "pipfile-spec": 6, "requires": {}, @@ -14,13 +14,28 @@ ] }, "default": { + "arrow": { + "hashes": [ + "sha256:e098abbd9af3665aea81bdd6c869e93af4feb078e98468dd351c383af187aac5", + "sha256:ff08d10cda1d36c68657d6ad20d74fbea493d980f8b2d45344e00d6ed2bf6ed4" + ], + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'", + "version": "==0.17.0" + }, "asgiref": { "hashes": [ - "sha256:a5098bc870b80e7b872bff60bb363c7f2c2c89078759f6c47b53ff8c525a152e", - "sha256:cd88907ecaec59d78e4ac00ea665b03e571cb37e3a0e37b3702af1a9e86c365a" + "sha256:5ee950735509d04eb673bd7f7120f8fa1c9e2df495394992c73234d526907e17", + "sha256:7162a3cb30ab0609f1a4c95938fd73e8604f63bdba516a7f7d64b83ff09478f0" ], "markers": "python_version >= '3.5'", - "version": "==3.3.0" + "version": "==3.3.1" + }, + "blessed": { + "hashes": [ + "sha256:7d4914079a6e8e14fbe080dcaf14dee596a088057cdc598561080e3266123b48", + "sha256:81125aa5b84cb9dfc09ff451886f64b4b923b75c5eaf51fde9d1c48a135eb797" + ], + "version": "==1.17.11" }, "dateparser": { "hashes": [ @@ -52,7 +67,6 @@ "sha256:dc663652ac9460fd06580a973576820430c6d428720e874ae46b041fa63e0efa" ], "index": "pypi", - "markers": "python_version >= '3.5'", "version": "==3.0.9" }, "django-filter": { @@ -63,6 +77,22 @@ "index": "pypi", "version": "==2.4.0" }, + "django-picklefield": { + "hashes": [ + "sha256:15ccba592ca953b9edf9532e64640329cd47b136b7f8f10f2939caa5f9ce4287", + "sha256:3c702a54fde2d322fe5b2f39b8f78d9f655b8f77944ab26f703be6c0ed335a35" + ], + "markers": "python_version >= '3'", + "version": "==3.0.1" + }, + "django-q": { + "hashes": [ + "sha256:523d54dcf1b66152c1b658f914f00ed3b518a3432a9decd4898738ca8dbbe10f", + "sha256:7e5c5c021a15cff6807044a3aa48f5757789ccfef839d71c575f5512931a3e33" + ], + "index": "pypi", + "version": "==1.3.4" + }, "djangorestframework": { "hashes": [ "sha256:0209bafcb7b5010fdfec784034f059d512256424de2a0f084cb82b096d6dd6a7" @@ -206,41 +236,41 @@ }, "psycopg2-binary": { "hashes": [ - "sha256:cec7e622ebc545dbb4564e483dd20e4e404da17ae07e06f3e780b2dacd5cee66", - "sha256:d1b4ab59e02d9008efe10ceabd0b31e79519da6fb67f7d8e8977118832d0f449", - "sha256:7d92a09b788cbb1aec325af5fcba9fed7203897bbd9269d5691bb1e3bce29550", - "sha256:8cd0fb36c7412996859cb4606a35969dd01f4ea34d9812a141cd920c3b18be77", - "sha256:bd1be66dde2b82f80afb9459fc618216753f67109b859a361cf7def5c7968729", - "sha256:950bc22bb56ee6ff142a2cb9ee980b571dd0912b0334aa3fe0fe3788d860bea2", - "sha256:ac0c682111fbf404525dfc0f18a8b5f11be52657d4f96e9fcb75daf4f3984859", - "sha256:6a32f3a4cb2f6e1a0b15215f448e8ce2da192fd4ff35084d80d5e39da683e79b", - "sha256:ba28584e6bca48c59eecbf7efb1576ca214b47f05194646b081717fa628dfddf", "sha256:0deac2af1a587ae12836aa07970f5cb91964f05a7c6cdb69d8425ff4c15d4e2c", - "sha256:2dac98e85565d5688e8ab7bdea5446674a83a3945a8f416ad0110018d1501b94", - "sha256:1fabed9ea2acc4efe4671b92c669a213db744d2af8a9fc5d69a8e9bc14b7a9db", - "sha256:11b9c0ebce097180129e422379b824ae21c8f2a6596b159c7659e2e5a00e1aa0", - "sha256:7312e931b90fe14f925729cde58022f5d034241918a5c4f9797cac62f6b3a9dd", - "sha256:c2507d796fca339c8fb03216364cca68d87e037c1f774977c8fc377627d01c71", - "sha256:42ec1035841b389e8cc3692277a0bd81cdfe0b65d575a2c8862cec7a80e62e52", - "sha256:a0c50db33c32594305b0ef9abc0cb7db13de7621d2cadf8392a1d9b3c437ef77", - "sha256:ad20d2eb875aaa1ea6d0f2916949f5c08a19c74d05b16ce6ebf6d24f2c9f75d1", - "sha256:b4afc542c0ac0db720cf516dd20c0846f71c248d2b3d21013aa0d4ef9c71ca25", - "sha256:ba381aec3a5dc29634f20692349d73f2d21f17653bda1decf0b52b11d694541f", - "sha256:f5ab93a2cb2d8338b1674be43b442a7f544a0971da062a5da774ed40587f18f5", - "sha256:ee69dad2c7155756ad114c02db06002f4cded41132cc51378e57aad79cc8e4f4", - "sha256:d5227b229005a696cc67676e24c214740efd90b148de5733419ac9aaba3773da", - "sha256:d14b140a4439d816e3b1229a4a525df917d6ea22a0771a2a78332273fd9528a4", "sha256:0e4dc3d5996760104746e6cfcdb519d9d2cd27c738296525d5867ea695774e67", - "sha256:a0eb43a07386c3f1f1ebb4dc7aafb13f67188eab896e7397aa1ee95a9c884eb2", - "sha256:b8a3715b3c4e604bcc94c90a825cd7f5635417453b253499664f784fc4da0152", - "sha256:aaa4213c862f0ef00022751161df35804127b78adf4a2755b9f991a507e425fd", + "sha256:11b9c0ebce097180129e422379b824ae21c8f2a6596b159c7659e2e5a00e1aa0", "sha256:15978a1fbd225583dd8cdaf37e67ccc278b5abecb4caf6b2d6b8e2b948e953f6", + "sha256:1fabed9ea2acc4efe4671b92c669a213db744d2af8a9fc5d69a8e9bc14b7a9db", + "sha256:2dac98e85565d5688e8ab7bdea5446674a83a3945a8f416ad0110018d1501b94", + "sha256:42ec1035841b389e8cc3692277a0bd81cdfe0b65d575a2c8862cec7a80e62e52", + "sha256:6422f2ff0919fd720195f64ffd8f924c1395d30f9a495f31e2392c2efafb5056", + "sha256:6a32f3a4cb2f6e1a0b15215f448e8ce2da192fd4ff35084d80d5e39da683e79b", + "sha256:7312e931b90fe14f925729cde58022f5d034241918a5c4f9797cac62f6b3a9dd", + "sha256:7d92a09b788cbb1aec325af5fcba9fed7203897bbd9269d5691bb1e3bce29550", "sha256:833709a5c66ca52f1d21d41865a637223b368c0ee76ea54ca5bad6f2526c7679", "sha256:89705f45ce07b2dfa806ee84439ec67c5d9a0ef20154e0e475e2b2ed392a5b83", + "sha256:8cd0fb36c7412996859cb4606a35969dd01f4ea34d9812a141cd920c3b18be77", + "sha256:950bc22bb56ee6ff142a2cb9ee980b571dd0912b0334aa3fe0fe3788d860bea2", + "sha256:a0c50db33c32594305b0ef9abc0cb7db13de7621d2cadf8392a1d9b3c437ef77", + "sha256:a0eb43a07386c3f1f1ebb4dc7aafb13f67188eab896e7397aa1ee95a9c884eb2", + "sha256:aaa4213c862f0ef00022751161df35804127b78adf4a2755b9f991a507e425fd", + "sha256:ac0c682111fbf404525dfc0f18a8b5f11be52657d4f96e9fcb75daf4f3984859", + "sha256:ad20d2eb875aaa1ea6d0f2916949f5c08a19c74d05b16ce6ebf6d24f2c9f75d1", + "sha256:b4afc542c0ac0db720cf516dd20c0846f71c248d2b3d21013aa0d4ef9c71ca25", + "sha256:b8a3715b3c4e604bcc94c90a825cd7f5635417453b253499664f784fc4da0152", + "sha256:ba28584e6bca48c59eecbf7efb1576ca214b47f05194646b081717fa628dfddf", + "sha256:ba381aec3a5dc29634f20692349d73f2d21f17653bda1decf0b52b11d694541f", + "sha256:bd1be66dde2b82f80afb9459fc618216753f67109b859a361cf7def5c7968729", + "sha256:c2507d796fca339c8fb03216364cca68d87e037c1f774977c8fc377627d01c71", + "sha256:cec7e622ebc545dbb4564e483dd20e4e404da17ae07e06f3e780b2dacd5cee66", + "sha256:d14b140a4439d816e3b1229a4a525df917d6ea22a0771a2a78332273fd9528a4", + "sha256:d1b4ab59e02d9008efe10ceabd0b31e79519da6fb67f7d8e8977118832d0f449", + "sha256:d5227b229005a696cc67676e24c214740efd90b148de5733419ac9aaba3773da", "sha256:e1f57aa70d3f7cc6947fd88636a481638263ba04a742b4a37dd25c373e41491a", "sha256:e74a55f6bad0e7d3968399deb50f61f4db1926acf4a6d83beaaa7df986f48b1c", "sha256:e82aba2188b9ba309fd8e271702bd0d0fc9148ae3150532bbb474f4590039ffb", - "sha256:6422f2ff0919fd720195f64ffd8f924c1395d30f9a495f31e2392c2efafb5056" + "sha256:ee69dad2c7155756ad114c02db06002f4cded41132cc51378e57aad79cc8e4f4", + "sha256:f5ab93a2cb2d8338b1674be43b442a7f544a0971da062a5da774ed40587f18f5" ], "index": "pypi", "version": "==2.8.6" @@ -290,51 +320,59 @@ ], "version": "==2020.4" }, + "redis": { + "hashes": [ + "sha256:0e7e0cfca8660dea8b7d5cd8c4f6c5e29e11f31158c0b0ae91a397f00e5a05a2", + "sha256:432b788c4530cfe16d8d943a09d40ca6c16149727e4afe8c2c9d5580c59d9f24" + ], + "index": "pypi", + "version": "==3.5.3" + }, "regex": { "hashes": [ - "sha256:dd3e6547ecf842a29cf25123fbf8d2461c53c8d37aa20d87ecee130c89b7079b", - "sha256:b88fa3b8a3469f22b4f13d045d9bd3eda797aa4e406fde0a2644bc92bbdd4bdd", - "sha256:297116e79074ec2a2f885d22db00ce6e88b15f75162c5e8b38f66ea734e73c64", - "sha256:2564def9ce0710d510b1fc7e5178ce2d20f75571f788b5197b3c8134c366f50c", + "sha256:03855ee22980c3e4863dc84c42d6d2901133362db5daf4c36b710dd895d78f0a", "sha256:06b52815d4ad38d6524666e0d50fe9173533c9cc145a5779b89733284e6f688f", - "sha256:b45bab9f224de276b7bc916f6306b86283f6aa8afe7ed4133423efb42015a898", - "sha256:f1fce1e4929157b2afeb4bb7069204d4370bab9f4fc03ca1fbec8bd601f8c87d", - "sha256:654c1635f2313d0843028487db2191530bca45af61ca85d0b16555c399625b0e", - "sha256:ea37320877d56a7f0a1e6a625d892cf963aa7f570013499f5b8d5ab8402b5625", - "sha256:52e83a5f28acd621ba8e71c2b816f6541af7144b69cc5859d17da76c436a5427", - "sha256:b8a686a6c98872007aa41fdbb2e86dc03b287d951ff4a7f1da77fb7f14113e4d", - "sha256:c2c6c56ee97485a127555c9595c069201b5161de9d05495fbe2132b5ac104786", - "sha256:832339223b9ce56b7b15168e691ae654d345ac1635eeb367ade9ecfe0e66bee0", - "sha256:c3466a84fce42c2016113101018a9981804097bacbab029c2d5b4fcb224b89de", - "sha256:3dfca201fa6b326239e1bccb00b915e058707028809b8ecc0cf6819ad233a740", + "sha256:11116d424734fe356d8777f89d625f0df783251ada95d6261b4c36ad27a394bb", + "sha256:119e0355dbdd4cf593b17f2fc5dbd4aec2b8899d0057e4957ba92f941f704bf5", "sha256:127a9e0c0d91af572fbb9e56d00a504dbd4c65e574ddda3d45b55722462210de", "sha256:1ec66700a10e3c75f1f92cbde36cca0d3aaee4c73dfa26699495a3a30b09093c", - "sha256:bf4f896c42c63d1f22039ad57de2644c72587756c0cfb3cc3b7530cfe228277f", - "sha256:bd904c0dec29bbd0769887a816657491721d5f545c29e30fd9d7a1a275dc80ab", - "sha256:03855ee22980c3e4863dc84c42d6d2901133362db5daf4c36b710dd895d78f0a", - "sha256:9b6305295b6591e45f069d3553c54d50cc47629eb5c218aac99e0f7fafbf90a1", - "sha256:c32c91a0f1ac779cbd73e62430de3d3502bbc45ffe5bb6c376015acfa848144b", - "sha256:4afa350f162551cf402bfa3cd8302165c8e03e689c897d185f16a167328cc6dd", "sha256:227a8d2e5282c2b8346e7f68aa759e0331a0b4a890b55a5cfbb28bd0261b84c0", - "sha256:3a5f08039eee9ea195a89e180c5762bfb55258bfb9abb61a20d3abee3b37fd12", - "sha256:c454ad88e56e80e44f824ef8366bb7e4c3def12999151fd5c0ea76a18fe9aa3e", - "sha256:11116d424734fe356d8777f89d625f0df783251ada95d6261b4c36ad27a394bb", + "sha256:2564def9ce0710d510b1fc7e5178ce2d20f75571f788b5197b3c8134c366f50c", + "sha256:297116e79074ec2a2f885d22db00ce6e88b15f75162c5e8b38f66ea734e73c64", "sha256:2dc522e25e57e88b4980d2bdd334825dbf6fa55f28a922fc3bfa60cc09e5ef53", - "sha256:625116aca6c4b57c56ea3d70369cacc4d62fead4930f8329d242e4fe7a58ce4b", + "sha256:3a5f08039eee9ea195a89e180c5762bfb55258bfb9abb61a20d3abee3b37fd12", + "sha256:3dfca201fa6b326239e1bccb00b915e058707028809b8ecc0cf6819ad233a740", "sha256:49461446b783945597c4076aea3f49aee4b4ce922bd241e4fcf62a3e7c61794c", - "sha256:de7fd57765398d141949946c84f3590a68cf5887dac3fc52388df0639b01eda4", + "sha256:4afa350f162551cf402bfa3cd8302165c8e03e689c897d185f16a167328cc6dd", "sha256:4b5a9bcb56cc146c3932c648603b24514447eafa6ce9295234767bf92f69b504", + "sha256:52e83a5f28acd621ba8e71c2b816f6541af7144b69cc5859d17da76c436a5427", + "sha256:625116aca6c4b57c56ea3d70369cacc4d62fead4930f8329d242e4fe7a58ce4b", + "sha256:654c1635f2313d0843028487db2191530bca45af61ca85d0b16555c399625b0e", + "sha256:8092a5a06ad9a7a247f2a76ace121183dc4e1a84c259cf9c2ce3bbb69fac3582", + "sha256:832339223b9ce56b7b15168e691ae654d345ac1635eeb367ade9ecfe0e66bee0", + "sha256:8ca9dca965bd86ea3631b975d63b0693566d3cc347e55786d5514988b6f5b84c", + "sha256:96f99219dddb33e235a37283306834700b63170d7bb2a1ee17e41c6d589c8eb9", + "sha256:9b6305295b6591e45f069d3553c54d50cc47629eb5c218aac99e0f7fafbf90a1", + "sha256:a62162be05edf64f819925ea88d09d18b09bebf20971b363ce0c24e8b4aa14c0", + "sha256:aacc8623ffe7999a97935eeabbd24b1ae701d08ea8f874a6ff050e93c3e658cf", + "sha256:b45bab9f224de276b7bc916f6306b86283f6aa8afe7ed4133423efb42015a898", + "sha256:b88fa3b8a3469f22b4f13d045d9bd3eda797aa4e406fde0a2644bc92bbdd4bdd", + "sha256:b8a686a6c98872007aa41fdbb2e86dc03b287d951ff4a7f1da77fb7f14113e4d", + "sha256:bd904c0dec29bbd0769887a816657491721d5f545c29e30fd9d7a1a275dc80ab", + "sha256:bf4f896c42c63d1f22039ad57de2644c72587756c0cfb3cc3b7530cfe228277f", + "sha256:c13d311a4c4a8d671f5860317eb5f09591fbe8259676b86a85769423b544451e", + "sha256:c2c6c56ee97485a127555c9595c069201b5161de9d05495fbe2132b5ac104786", + "sha256:c32c91a0f1ac779cbd73e62430de3d3502bbc45ffe5bb6c376015acfa848144b", + "sha256:c3466a84fce42c2016113101018a9981804097bacbab029c2d5b4fcb224b89de", + "sha256:c454ad88e56e80e44f824ef8366bb7e4c3def12999151fd5c0ea76a18fe9aa3e", + "sha256:c8a2b7ccff330ae4c460aff36626f911f918555660cc28163417cb84ffb25789", "sha256:cb905f3d2e290a8b8f1579d3984f2cfa7c3a29cc7cba608540ceeed18513f520", "sha256:cfcf28ed4ce9ced47b9b9670a4f0d3d3c0e4d4779ad4dadb1ad468b097f808aa", - "sha256:c8a2b7ccff330ae4c460aff36626f911f918555660cc28163417cb84ffb25789", - "sha256:c13d311a4c4a8d671f5860317eb5f09591fbe8259676b86a85769423b544451e", - "sha256:aacc8623ffe7999a97935eeabbd24b1ae701d08ea8f874a6ff050e93c3e658cf", - "sha256:8ca9dca965bd86ea3631b975d63b0693566d3cc347e55786d5514988b6f5b84c", - "sha256:a62162be05edf64f819925ea88d09d18b09bebf20971b363ce0c24e8b4aa14c0", - "sha256:119e0355dbdd4cf593b17f2fc5dbd4aec2b8899d0057e4957ba92f941f704bf5", - "sha256:96f99219dddb33e235a37283306834700b63170d7bb2a1ee17e41c6d589c8eb9", - "sha256:f43109822df2d3faac7aad79613f5f02e4eab0fc8ad7932d2e70e2a83bd49c26", - "sha256:8092a5a06ad9a7a247f2a76ace121183dc4e1a84c259cf9c2ce3bbb69fac3582" + "sha256:dd3e6547ecf842a29cf25123fbf8d2461c53c8d37aa20d87ecee130c89b7079b", + "sha256:de7fd57765398d141949946c84f3590a68cf5887dac3fc52388df0639b01eda4", + "sha256:ea37320877d56a7f0a1e6a625d892cf963aa7f570013499f5b8d5ab8402b5625", + "sha256:f1fce1e4929157b2afeb4bb7069204d4370bab9f4fc03ca1fbec8bd601f8c87d", + "sha256:f43109822df2d3faac7aad79613f5f02e4eab0fc8ad7932d2e70e2a83bd49c26" ], "version": "==2020.10.28" }, @@ -429,6 +467,13 @@ "index": "pypi", "version": "==0.10.3" }, + "wcwidth": { + "hashes": [ + "sha256:beb4802a9cebb9144e99086eff703a642a13d6a0052920003a230f3294bbe784", + "sha256:c4d647b99872929fdb7bdcaa4fbe7f01413ed3d98077df798530e5b04f116c83" + ], + "version": "==0.2.5" + }, "whitenoise": { "hashes": [ "sha256:05ce0be39ad85740a78750c86a93485c40f08ad8c62a6006de0233765996e5c7", @@ -488,10 +533,10 @@ }, "certifi": { "hashes": [ - "sha256:5930595817496dd21bb8dc35dad090f1c2cd0adfaf21204bf6732ca5d8ee34d3", - "sha256:8fc0819f1f30ba15bdb34cceffb9ef04d99f420f68eb75d901e9560b8749fc41" + "sha256:1f422849db327d534e3d0c5f02a263458c3955ec0aae4ff09b95f195c59f4edd", + "sha256:f05def092c44fbf25834a51509ef6e631dc19765ab8a57b4e7ab85531f0a9cf4" ], - "version": "==2020.6.20" + "version": "==2020.11.8" }, "chardet": { "hashes": [ diff --git a/docker-compose.env.example b/docker-compose.env.example index cc2a1d3ec..fb529898a 100644 --- a/docker-compose.env.example +++ b/docker-compose.env.example @@ -1,7 +1,3 @@ -# Database settings for paperless -# If you want to use sqlite instead, remove this setting. -PAPERLESS_DBHOST="db" - # The UID and GID of the user used to run paperless in the container. Set this # to your UID and GID on the host so that you have write access to the # consumption directory. diff --git a/docker-compose.yml.example b/docker-compose.yml similarity index 79% rename from docker-compose.yml.example rename to docker-compose.yml index 1130e26a3..f9b4d6c33 100644 --- a/docker-compose.yml.example +++ b/docker-compose.yml @@ -1,5 +1,9 @@ version: "3.4" services: + broker: + image: redis:latest + #restart: always + db: image: postgres:13 #restart: always @@ -11,13 +15,12 @@ services: POSTGRES_PASSWORD: paperless webserver: - build: . - image: paperless-ng + image: paperless-ng:latest #restart: always depends_on: - db ports: - - "8000:8000" + - 8000:8000 healthcheck: test: ["CMD", "curl", "-f", "http://localhost:8000"] interval: 30s @@ -29,6 +32,9 @@ services: - ./export:/usr/src/paperless/export - ./consume:/usr/src/paperless/consume env_file: docker-compose.env + environment: + PAPERLESS_REDIS: redis://broker:6379 + PAPERLESS_DBHOST: db command: ["supervisord", "-c", "/etc/supervisord.conf"] diff --git a/paperless.conf.example b/paperless.conf.example index 9c0b57250..48df40ab2 100644 --- a/paperless.conf.example +++ b/paperless.conf.example @@ -3,6 +3,16 @@ # As this file contains passwords it should only be readable by the user # running paperless. +############################################################################### +#### Message Broker #### +############################################################################### + +# This is required for processing scheduled tasks such as email fetching, index +# optimization and for training the automatic document matcher. +# Defaults to localhost:6379. +#PAPERLESS_REDIS="redis://localhost:6379" + + ############################################################################### #### Database Settings #### ############################################################################### diff --git a/scripts/paperless-cron b/scripts/paperless-cron deleted file mode 100644 index 238857227..000000000 --- a/scripts/paperless-cron +++ /dev/null @@ -1,5 +0,0 @@ -#!/bin/sh - -cd /usr/src/paperless/src - -sudo -HEu paperless python3 manage.py document_create_classifier diff --git a/scripts/supervisord.conf b/scripts/supervisord.conf index d3ff288de..0ac416d3e 100644 --- a/scripts/supervisord.conf +++ b/scripts/supervisord.conf @@ -24,8 +24,9 @@ stdout_logfile_maxbytes=0 stderr_logfile=/dev/stderr stderr_logfile_maxbytes=0 -[program:anacron] -command=anacron -d +[program:scheduler] +command=python3 manage.py qcluster +user=paperless stdout_logfile=/dev/stdout stdout_logfile_maxbytes=0 diff --git a/src/documents/management/commands/document_create_classifier.py b/src/documents/management/commands/document_create_classifier.py index 85cb3b446..839044700 100755 --- a/src/documents/management/commands/document_create_classifier.py +++ b/src/documents/management/commands/document_create_classifier.py @@ -1,10 +1,6 @@ -import logging - from django.core.management.base import BaseCommand -from documents.classifier import DocumentClassifier, \ - IncompatibleClassifierVersionError -from paperless import settings from ...mixins import Renderable +from ...tasks import train_classifier class Command(Renderable, BaseCommand): @@ -18,27 +14,4 @@ class Command(Renderable, BaseCommand): BaseCommand.__init__(self, *args, **kwargs) def handle(self, *args, **options): - classifier = DocumentClassifier() - - try: - # load the classifier, since we might not have to train it again. - classifier.reload() - except (FileNotFoundError, IncompatibleClassifierVersionError): - # This is what we're going to fix here. - pass - - try: - if classifier.train(): - logging.getLogger(__name__).info( - "Saving updated classifier model to {}...".format(settings.MODEL_FILE) - ) - classifier.save_classifier() - else: - logging.getLogger(__name__).debug( - "Training data unchanged." - ) - - except Exception as e: - logging.getLogger(__name__).error( - "Classifier error: " + str(e) - ) + train_classifier() diff --git a/src/documents/management/commands/document_index.py b/src/documents/management/commands/document_index.py index 5a136d6a5..7dfdbaa42 100644 --- a/src/documents/management/commands/document_index.py +++ b/src/documents/management/commands/document_index.py @@ -1,9 +1,7 @@ from django.core.management import BaseCommand -from whoosh.writing import AsyncWriter -import documents.index as index from documents.mixins import Renderable -from documents.models import Document +from documents.tasks import index_reindex, index_optimize class Command(Renderable, BaseCommand): @@ -22,13 +20,6 @@ class Command(Renderable, BaseCommand): self.verbosity = options["verbosity"] if options['command'] == 'reindex': - documents = Document.objects.all() - - ix = index.open_index(recreate=True) - - with AsyncWriter(ix) as writer: - for document in documents: - index.update_document(writer, document) - + index_reindex() elif options['command'] == 'optimize': - index.open_index().optimize() + index_optimize() diff --git a/src/documents/management/commands/document_rerun_ocr.py b/src/documents/management/commands/document_rerun_ocr.py deleted file mode 100644 index 794357420..000000000 --- a/src/documents/management/commands/document_rerun_ocr.py +++ /dev/null @@ -1,60 +0,0 @@ -import argparse -import threading -from multiprocessing import Pool -from multiprocessing.pool import ThreadPool - -from django.core.management.base import BaseCommand - -from documents.consumer import Consumer -from documents.models import Log, Document -from documents.parsers import get_parser_class - - -def process_document(doc): - parser_class = get_parser_class(doc.file_name) - if not parser_class: - print("no parser available") - else: - print("Parser: {}".format(parser_class.__name__)) - parser = parser_class(doc.source_path, None) - try: - text = parser.get_text() - doc.content = text - doc.save() - finally: - parser.cleanup() - - -def document_index(value): - ivalue = int(value) - if not (1 <= ivalue <= Document.objects.count()): - raise argparse.ArgumentTypeError( - "{} is not a valid document index (out of range)".format(value)) - - return ivalue - - -class Command(BaseCommand): - - help = "Performs OCR on all documents again!" - - - def add_arguments(self, parser): - parser.add_argument( - "-s", "--start_index", - default=None, - type=document_index - ) - - def handle(self, *args, **options): - - docs = Document.objects.all().order_by("added") - - indices = range(options['start_index']-1, len(docs)) if options['start_index'] else range(len(docs)) - - for i in indices: - doc = docs[i] - print("==================================") - print("{} out of {}: {}".format(i+1, len(docs), doc.file_name)) - print("==================================") - process_document(doc) diff --git a/src/documents/migrations/1001_auto_20201109_1636.py b/src/documents/migrations/1001_auto_20201109_1636.py new file mode 100644 index 000000000..8d6a0f584 --- /dev/null +++ b/src/documents/migrations/1001_auto_20201109_1636.py @@ -0,0 +1,28 @@ +# Generated by Django 3.1.3 on 2020-11-09 16:36 + +from django.db import migrations +from django.db.migrations import RunPython +from django_q.models import Schedule +from django_q.tasks import schedule + + +def add_schedules(apps, schema_editor): + schedule('documents.tasks.train_classifier', name="Train the classifier", schedule_type=Schedule.HOURLY) + schedule('documents.tasks.index_optimize', name="Optimize the index", schedule_type=Schedule.DAILY) + schedule('documents.tasks.consume_mail', name="Check E-Mail", schedule_type=Schedule.MINUTES, minutes=10) + + +def remove_schedules(apps, schema_editor): + Schedule.objects.all().delete() + + +class Migration(migrations.Migration): + + dependencies = [ + ('documents', '1000_update_paperless_all'), + ('django_q', '0013_task_attempt_count'), + ] + + operations = [ + RunPython(add_schedules, remove_schedules) + ] diff --git a/src/documents/tasks.py b/src/documents/tasks.py new file mode 100644 index 000000000..aaf466bd2 --- /dev/null +++ b/src/documents/tasks.py @@ -0,0 +1,57 @@ +import logging + +from django.conf import settings +from django_q.tasks import async_task, result +from whoosh.writing import AsyncWriter + +from documents import index +from documents.classifier import DocumentClassifier, \ + IncompatibleClassifierVersionError +from documents.mail import MailFetcher +from documents.models import Document + + +def consume_mail(): + MailFetcher().pull() + + +def index_optimize(): + index.open_index().optimize() + + +def index_reindex(): + documents = Document.objects.all() + + ix = index.open_index(recreate=True) + + with AsyncWriter(ix) as writer: + for document in documents: + index.update_document(writer, document) + + +def train_classifier(): + classifier = DocumentClassifier() + + try: + # load the classifier, since we might not have to train it again. + classifier.reload() + except (FileNotFoundError, IncompatibleClassifierVersionError): + # This is what we're going to fix here. + pass + + try: + if classifier.train(): + logging.getLogger(__name__).info( + "Saving updated classifier model to {}...".format( + settings.MODEL_FILE) + ) + classifier.save_classifier() + else: + logging.getLogger(__name__).debug( + "Training data unchanged." + ) + + except Exception as e: + logging.getLogger(__name__).error( + "Classifier error: " + str(e) + ) diff --git a/src/paperless/settings.py b/src/paperless/settings.py index 65da01218..2c96350dc 100644 --- a/src/paperless/settings.py +++ b/src/paperless/settings.py @@ -71,6 +71,8 @@ INSTALLED_APPS = [ "rest_framework", "django_filters", + "django_q", + ] REST_FRAMEWORK = { @@ -242,6 +244,16 @@ LOGGING = { }, } +############################################################################### +# Task queue # +############################################################################### + +Q_CLUSTER = { + 'name': 'paperless', + 'catch_up': False, + 'redis': os.getenv("PAPERLESS_REDIS", "redis://localhost:6379") +} + ############################################################################### # Paperless Specific Settings # ############################################################################### From 54f04650d16278f23e9239af196493dbf52f92e1 Mon Sep 17 00:00:00 2001 From: Jonas Winkler Date: Tue, 10 Nov 2020 01:47:35 +0100 Subject: [PATCH 20/26] fixed an issue with the searcher. --- src/documents/index.py | 9 +++++++-- src/documents/views.py | 13 ++++++------- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/src/documents/index.py b/src/documents/index.py index a099f670c..82a35a63e 100644 --- a/src/documents/index.py +++ b/src/documents/index.py @@ -1,4 +1,5 @@ import logging +from contextlib import contextmanager from django.db import models from django.dispatch import receiver @@ -99,15 +100,19 @@ def remove_document_from_index(document): remove_document(writer, document) +@contextmanager def query_page(ix, query, page): - with ix.searcher() as searcher: + searcher = ix.searcher() + try: query_parser = MultifieldParser(["content", "title", "correspondent"], ix.schema).parse(query) result_page = searcher.search_page(query_parser, page) result_page.results.fragmenter = highlight.ContextFragmenter( surround=50) result_page.results.formatter = JsonFormatter() - return result_page + yield result_page + finally: + searcher.close() def autocomplete(ix, term, limit=10): diff --git a/src/documents/views.py b/src/documents/views.py index b3d6012f1..8cc330141 100755 --- a/src/documents/views.py +++ b/src/documents/views.py @@ -191,13 +191,12 @@ class SearchView(APIView): except (ValueError, TypeError): page = 1 - result_page = index.query_page(self.ix, query, page) - - return Response( - {'count': len(result_page), - 'page': result_page.pagenum, - 'page_count': result_page.pagecount, - 'results': list(map(self.add_infos_to_hit, result_page))}) + with index.query_page(self.ix, query, page) as result_page: + return Response( + {'count': len(result_page), + 'page': result_page.pagenum, + 'page_count': result_page.pagecount, + 'results': list(map(self.add_infos_to_hit, result_page))}) else: return Response({ From 83f82f3caf6ec7bc68624d36c96bb220e7cf3ab6 Mon Sep 17 00:00:00 2001 From: Jonas Winkler Date: Tue, 10 Nov 2020 01:47:58 +0100 Subject: [PATCH 21/26] added a setting: delete duplicate documents --- paperless.conf.example | 4 ++++ src/documents/consumer.py | 2 ++ src/paperless/settings.py | 2 ++ 3 files changed, 8 insertions(+) diff --git a/paperless.conf.example b/paperless.conf.example index 48df40ab2..1c62256ab 100644 --- a/paperless.conf.example +++ b/paperless.conf.example @@ -143,6 +143,10 @@ PAPERLESS_EMAIL_SECRET="" #### Software Tweaks #### ############################################################################### +# When the consumer detects a duplicate document, it will not touch the +# original document. This default behavior can be changed here. +#PAPERLESS_CONSUMER_DELETE_DUPLICATES="false" + # After a document is consumed, Paperless can trigger an arbitrary script if # you like. This script will be passed a number of arguments for you to work # with. The default is blank, which means nothing will be executed. For more diff --git a/src/documents/consumer.py b/src/documents/consumer.py index f61d11136..75e6f6120 100755 --- a/src/documents/consumer.py +++ b/src/documents/consumer.py @@ -84,6 +84,8 @@ class Consumer: "warning", "Skipping {} as it appears to be a duplicate".format(doc) ) + if settings.CONSUMER_DELETE_DUPLICATES: + self._cleanup_doc(doc) return False self.log("info", "Consuming {}".format(doc)) diff --git a/src/paperless/settings.py b/src/paperless/settings.py index 2c96350dc..06dfdcd84 100644 --- a/src/paperless/settings.py +++ b/src/paperless/settings.py @@ -258,6 +258,8 @@ Q_CLUSTER = { # Paperless Specific Settings # ############################################################################### +CONSUMER_DELETE_DUPLICATES = __get_boolean("PAPERLESS_CONSUMER_DELETE_DUPLICATES") + # The default language that tesseract will attempt to use when parsing # documents. It should be a 3-letter language code consistent with ISO 639. OCR_LANGUAGE = os.getenv("PAPERLESS_OCR_LANGUAGE", "eng") From 6795580739f26931f9ea1ee27ceaac3e65c8edce Mon Sep 17 00:00:00 2001 From: Jonas Winkler Date: Wed, 11 Nov 2020 14:13:54 +0100 Subject: [PATCH 22/26] remove only automatically created schedules (almost) --- src/documents/migrations/1001_auto_20201109_1636.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/documents/migrations/1001_auto_20201109_1636.py b/src/documents/migrations/1001_auto_20201109_1636.py index 8d6a0f584..138de6f91 100644 --- a/src/documents/migrations/1001_auto_20201109_1636.py +++ b/src/documents/migrations/1001_auto_20201109_1636.py @@ -13,7 +13,9 @@ def add_schedules(apps, schema_editor): def remove_schedules(apps, schema_editor): - Schedule.objects.all().delete() + Schedule.objects.filter(func='documents.tasks.train_classifier').delete() + Schedule.objects.filter(func='documents.tasks.index_optimize').delete() + Schedule.objects.filter(func='documents.tasks.consume_mail').delete() class Migration(migrations.Migration): From 02ef7cb0388970d894f85000563fe2249a967f9e Mon Sep 17 00:00:00 2001 From: Jonas Winkler Date: Wed, 11 Nov 2020 14:14:21 +0100 Subject: [PATCH 23/26] small consumer fixes --- src/documents/consumer.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/documents/consumer.py b/src/documents/consumer.py index 75e6f6120..96aad7d49 100755 --- a/src/documents/consumer.py +++ b/src/documents/consumer.py @@ -60,7 +60,6 @@ class Consumer: raise ConsumerError( "Consumption directory {} does not exist".format(self.consume)) - def log(self, level, message): getattr(self.logger, level)(message, extra={ "group": self.logging_group @@ -98,7 +97,6 @@ class Consumer: else: self.log("info", "Parser: {}".format(parser_class.__name__)) - document_consumption_started.send( sender=self.__class__, filename=doc, @@ -110,9 +108,10 @@ class Consumer: try: self.log("info", "Generating thumbnail for {}...".format(doc)) thumbnail = document_parser.get_optimised_thumbnail() + text = document_parser.get_text() date = document_parser.get_date() document = self._store( - document_parser.get_text(), + text, doc, thumbnail, date From 734da28b69a1e402ab6823efbf8f2c12c91b4d26 Mon Sep 17 00:00:00 2001 From: Jonas Winkler Date: Wed, 11 Nov 2020 14:21:33 +0100 Subject: [PATCH 24/26] fixed the file handling implementation. The feature is cool, but the original implementation had so many small flaws it wasn't even funny. --- src/documents/apps.py | 3 - src/documents/consumer.py | 8 +- src/documents/file_handling.py | 92 ++++ .../management/commands/document_importer.py | 27 +- .../migrations/1002_auto_20201111_1105.py | 18 + src/documents/models.py | 245 +---------- src/documents/signals/handlers.py | 60 ++- src/documents/tests/test_file_handling.py | 411 ++++-------------- 8 files changed, 287 insertions(+), 577 deletions(-) create mode 100644 src/documents/file_handling.py create mode 100644 src/documents/migrations/1002_auto_20201111_1105.py diff --git a/src/documents/apps.py b/src/documents/apps.py index 83e671d07..6cf815122 100644 --- a/src/documents/apps.py +++ b/src/documents/apps.py @@ -14,7 +14,6 @@ class DocumentsConfig(AppConfig): add_inbox_tags, run_pre_consume_script, run_post_consume_script, - cleanup_document_deletion, set_log_entry, set_correspondent, set_document_type, @@ -33,6 +32,4 @@ class DocumentsConfig(AppConfig): document_consumption_finished.connect(add_to_index) document_consumption_finished.connect(run_post_consume_script) - post_delete.connect(cleanup_document_deletion) - AppConfig.ready(self) diff --git a/src/documents/consumer.py b/src/documents/consumer.py index 96aad7d49..2e8c5493f 100755 --- a/src/documents/consumer.py +++ b/src/documents/consumer.py @@ -11,6 +11,7 @@ from django.utils import timezone from paperless.db import GnuPG from .classifier import DocumentClassifier, IncompatibleClassifierVersionError +from .file_handling import generate_filename, create_source_path_directory from .models import Document, FileInfo from .parsers import ParseError, get_parser_class from .signals import ( @@ -174,10 +175,15 @@ class Consumer: self.log("debug", "Tagging with {}".format(tag_names)) document.tags.add(*relevant_tags) + document.filename = generate_filename(document) + + create_source_path_directory(document.source_path) + self._write(document, doc, document.source_path) self._write(document, thumbnail, document.thumbnail_path) - #TODO: why do we need to save the document again? + # We need to save the document twice, since we need the PK of the + # document in order to create its filename above. document.save() return document diff --git a/src/documents/file_handling.py b/src/documents/file_handling.py new file mode 100644 index 000000000..cac317d4c --- /dev/null +++ b/src/documents/file_handling.py @@ -0,0 +1,92 @@ +import os +from collections import defaultdict + +from django.conf import settings +from django.template.defaultfilters import slugify + + +def create_source_path_directory(source_path): + os.makedirs(os.path.dirname(source_path), exist_ok=True) + + +def delete_empty_directories(directory): + # Go up in the directory hierarchy and try to delete all directories + directory = os.path.normpath(directory) + root = os.path.normpath(settings.ORIGINALS_DIR) + + if not directory.startswith(root + os.path.sep): + # don't do anything outside our originals folder. + + # append os.path.set so that we avoid these cases: + # directory = /home/originals2/test + # root = /home/originals ("/" gets appended and startswith fails) + return + + while directory != root: + if not os.listdir(directory): + # it's empty + try: + os.rmdir(directory) + except OSError: + # whatever. empty directories aren't that bad anyway. + return + else: + # it's not empty. + return + + # go one level up + directory = os.path.normpath(os.path.dirname(directory)) + + +def many_to_dictionary(field): + # Converts ManyToManyField to dictionary by assuming, that field + # entries contain an _ or - which will be used as a delimiter + mydictionary = dict() + + for index, t in enumerate(field.all()): + # Populate tag names by index + mydictionary[index] = slugify(t.name) + + # Find delimiter + delimiter = t.name.find('_') + + if delimiter == -1: + delimiter = t.name.find('-') + + if delimiter == -1: + continue + + key = t.name[:delimiter] + value = t.name[delimiter + 1:] + + mydictionary[slugify(key)] = slugify(value) + + return mydictionary + + +def generate_filename(document): + # Create filename based on configured format + if settings.PAPERLESS_FILENAME_FORMAT is not None: + tags = defaultdict(lambda: slugify(None), + many_to_dictionary(document.tags)) + path = settings.PAPERLESS_FILENAME_FORMAT.format( + correspondent=slugify(document.correspondent), + title=slugify(document.title), + created=document.created.date(), + added=slugify(document.added), + tags=tags, + ) + else: + path = "" + + # Always append the primary key to guarantee uniqueness of filename + if len(path) > 0: + filename = "%s-%07i.%s" % (path, document.pk, document.file_type) + else: + filename = "%07i.%s" % (document.pk, document.file_type) + + # Append .gpg for encrypted files + if document.storage_type == document.STORAGE_TYPE_GPG: + filename += ".gpg" + + return filename diff --git a/src/documents/management/commands/document_importer.py b/src/documents/management/commands/document_importer.py index ae5c1853f..ef3eaafc0 100644 --- a/src/documents/management/commands/document_importer.py +++ b/src/documents/management/commands/document_importer.py @@ -8,6 +8,7 @@ from django.core.management import call_command from documents.models import Document from paperless.db import GnuPG +from ...file_handling import generate_filename, create_source_path_directory from ...mixins import Renderable @@ -82,6 +83,10 @@ class Command(Renderable, BaseCommand): def _import_files_from_manifest(self): + storage_type = Document.STORAGE_TYPE_UNENCRYPTED + if settings.PASSPHRASE: + storage_type = Document.STORAGE_TYPE_GPG + for record in self.manifest: if not record["model"] == "documents.document": @@ -94,6 +99,14 @@ class Command(Renderable, BaseCommand): document_path = os.path.join(self.source, doc_file) thumbnail_path = os.path.join(self.source, thumb_file) + document.storage_type = storage_type + document.filename = generate_filename(document) + + if os.path.isfile(document.source_path): + raise FileExistsError(document.source_path) + + create_source_path_directory(document.source_path) + if settings.PASSPHRASE: with open(document_path, "rb") as unencrypted: @@ -109,18 +122,8 @@ class Command(Renderable, BaseCommand): encrypted.write(GnuPG.encrypted(unencrypted)) else: - + print("Moving {} to {}".format(document_path, document.source_path)) shutil.copy(document_path, document.source_path) shutil.copy(thumbnail_path, document.thumbnail_path) - # Reset the storage type to whatever we've used while importing - - storage_type = Document.STORAGE_TYPE_UNENCRYPTED - if settings.PASSPHRASE: - storage_type = Document.STORAGE_TYPE_GPG - - Document.objects.filter( - pk__in=[r["pk"] for r in self.manifest] - ).update( - storage_type=storage_type - ) + document.save() diff --git a/src/documents/migrations/1002_auto_20201111_1105.py b/src/documents/migrations/1002_auto_20201111_1105.py new file mode 100644 index 000000000..7f6bae50b --- /dev/null +++ b/src/documents/migrations/1002_auto_20201111_1105.py @@ -0,0 +1,18 @@ +# Generated by Django 3.1.3 on 2020-11-11 11:05 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('documents', '1001_auto_20201109_1636'), + ] + + operations = [ + migrations.AlterField( + model_name='document', + name='filename', + field=models.FilePathField(default=None, editable=False, help_text='Current filename in storage', max_length=1024, null=True), + ), + ] diff --git a/src/documents/models.py b/src/documents/models.py index 88598b5f6..ab3262fb5 100755 --- a/src/documents/models.py +++ b/src/documents/models.py @@ -3,18 +3,15 @@ import logging import os import re -from collections import OrderedDict, defaultdict +from collections import OrderedDict import dateutil.parser from django.conf import settings from django.db import models -from django.dispatch import receiver -from django.template.defaultfilters import slugify from django.utils import timezone from django.utils.text import slugify - class MatchingModel(models.Model): MATCH_ANY = 1 @@ -192,7 +189,7 @@ class Document(models.Model): default=timezone.now, editable=False, db_index=True) filename = models.FilePathField( - max_length=256, + max_length=1024, editable=False, default=None, null=True, @@ -220,123 +217,18 @@ class Document(models.Model): return "{}: {}".format(created, self.correspondent or self.title) return str(created) - def find_renamed_document(self, subdirectory=""): - suffix = "%07i.%s" % (self.pk, self.file_type) - - # Append .gpg for encrypted files - if self.storage_type == self.STORAGE_TYPE_GPG: - suffix += ".gpg" - - # Go up in the directory hierarchy and try to delete all directories - root = os.path.normpath(Document.filename_to_path(subdirectory)) - - for filename in os.listdir(root): - if filename.endswith(suffix): - return os.path.join(subdirectory, filename) - - fullname = os.path.join(subdirectory, filename) - if os.path.isdir(Document.filename_to_path(fullname)): - return self.find_renamed_document(fullname) - - return None - - @property - def source_filename(self): - # Initial filename generation (for new documents) - if self.filename is None: - self.filename = self.generate_source_filename() - - # Check if document is still available under filename - elif not os.path.isfile(Document.filename_to_path(self.filename)): - recovered_filename = self.find_renamed_document() - - # If we have found the file so update the filename - if recovered_filename is not None: - logger = logging.getLogger(__name__) - logger.warning("Filename of document " + str(self.id) + - " has changed and was successfully updated") - self.filename = recovered_filename - - # Remove all empty subdirectories from MEDIA_ROOT - Document.delete_all_empty_subdirectories( - Document.filename_to_path("")) - else: - logger = logging.getLogger(__name__) - logger.error("File of document " + str(self.id) + " has " + - "gone and could not be recovered") - - return self.filename - - @staticmethod - def many_to_dictionary(field): - # Converts ManyToManyField to dictionary by assuming, that field - # entries contain an _ or - which will be used as a delimiter - mydictionary = dict() - - for index, t in enumerate(field.all()): - # Populate tag names by index - mydictionary[index] = slugify(t.name) - - # Find delimiter - delimiter = t.name.find('_') - - if delimiter == -1: - delimiter = t.name.find('-') - - if delimiter == -1: - continue - - key = t.name[:delimiter] - value = t.name[delimiter+1:] - - mydictionary[slugify(key)] = slugify(value) - - return mydictionary - - def generate_source_filename(self): - # Create filename based on configured format - if settings.PAPERLESS_FILENAME_FORMAT is not None: - tags = defaultdict(lambda: slugify(None), - self.many_to_dictionary(self.tags)) - path = settings.PAPERLESS_FILENAME_FORMAT.format( - correspondent=slugify(self.correspondent), - title=slugify(self.title), - created=slugify(self.created), - added=slugify(self.added), - tags=tags) - else: - path = "" - - # Always append the primary key to guarantee uniqueness of filename - if len(path) > 0: - filename = "%s-%07i.%s" % (path, self.pk, self.file_type) - else: - filename = "%07i.%s" % (self.pk, self.file_type) - - # Append .gpg for encrypted files - if self.storage_type == self.STORAGE_TYPE_GPG: - filename += ".gpg" - - return filename - - def create_source_directory(self): - new_filename = self.generate_source_filename() - - # Determine the full "target" path - dir_new = Document.filename_to_path(os.path.dirname(new_filename)) - - # Create new path - os.makedirs(dir_new, exist_ok=True) - @property def source_path(self): - return Document.filename_to_path(self.source_filename) + if self.filename: + fname = str(self.filename) + else: + fname = "{:07}.{}".format(self.pk, self.file_type) + if self.storage_type == self.STORAGE_TYPE_GPG: + fname += ".gpg" - @staticmethod - def filename_to_path(filename): return os.path.join( settings.ORIGINALS_DIR, - filename + fname ) @property @@ -362,125 +254,6 @@ class Document(models.Model): def thumbnail_file(self): return open(self.thumbnail_path, "rb") - def set_filename(self, filename): - if os.path.isfile(Document.filename_to_path(filename)): - self.filename = filename - - @staticmethod - def try_delete_empty_directories(directory): - # Go up in the directory hierarchy and try to delete all directories - directory = os.path.normpath(directory) - root = os.path.normpath(Document.filename_to_path("")) - - while directory != root: - # Try to delete the current directory - try: - os.rmdir(directory) - except os.error: - # Directory not empty, no need to go further up - return - - # Cut off actual directory and go one level up - directory, _ = os.path.split(directory) - directory = os.path.normpath(directory) - - @staticmethod - def delete_all_empty_subdirectories(directory): - # Go through all folders and try to delete all directories - root = os.path.normpath(Document.filename_to_path(directory)) - - for filename in os.listdir(root): - fullname = os.path.join(directory, filename) - - if not os.path.isdir(Document.filename_to_path(fullname)): - continue - - # Go into subdirectory to see, if there is more to delete - Document.delete_all_empty_subdirectories( - os.path.join(directory, filename)) - - # Try to delete the directory - try: - os.rmdir(Document.filename_to_path(fullname)) - continue - except os.error: - # Directory not empty, no need to go further up - continue - - -@receiver(models.signals.m2m_changed, sender=Document.tags.through) -@receiver(models.signals.post_save, sender=Document) -def update_filename(sender, instance, **kwargs): - # Skip if document has not been saved yet - if instance.filename is None: - return - - # Check is file exists and update filename otherwise - if not os.path.isfile(Document.filename_to_path(instance.filename)): - instance.filename = instance.source_filename - - # Build the new filename - new_filename = instance.generate_source_filename() - - # If the filename is the same, then nothing needs to be done - if instance.filename == new_filename: - return - - # Determine the full "target" path - path_new = instance.filename_to_path(new_filename) - dir_new = instance.filename_to_path(os.path.dirname(new_filename)) - - # Create new path - instance.create_source_directory() - - # Determine the full "current" path - path_current = instance.filename_to_path(instance.source_filename) - - # Move file - try: - os.rename(path_current, path_new) - except PermissionError: - # Do not update filename in object - return - except FileNotFoundError: - logger = logging.getLogger(__name__) - logger.error("Renaming of document " + str(instance.id) + " failed " + - "as file " + instance.filename + " was no longer present") - return - - # Delete empty directory - old_dir = os.path.dirname(instance.filename) - old_path = instance.filename_to_path(old_dir) - Document.try_delete_empty_directories(old_path) - - instance.filename = new_filename - - # Save instance - # This will not cause a cascade of post_save signals, as next time - # nothing needs to be renamed - instance.save() - - -@receiver(models.signals.post_delete, sender=Document) -def delete_files(sender, instance, **kwargs): - if instance.filename is None: - return - - # Remove the document - old_file = instance.filename_to_path(instance.filename) - - try: - os.remove(old_file) - except FileNotFoundError: - logger = logging.getLogger(__name__) - logger.warning("Deleted document " + str(instance.id) + " but file " + - old_file + " was no longer present") - - # And remove the directory (if applicable) - old_dir = os.path.dirname(instance.filename) - old_path = instance.filename_to_path(old_dir) - Document.try_delete_empty_directories(old_path) - class Log(models.Model): diff --git a/src/documents/signals/handlers.py b/src/documents/signals/handlers.py index cee1e042b..671cdb104 100755 --- a/src/documents/signals/handlers.py +++ b/src/documents/signals/handlers.py @@ -6,9 +6,13 @@ from django.conf import settings from django.contrib.admin.models import ADDITION, LogEntry from django.contrib.auth.models import User from django.contrib.contenttypes.models import ContentType +from django.db import models, DatabaseError +from django.dispatch import receiver from django.utils import timezone from .. import index, matching +from ..file_handling import delete_empty_directories, generate_filename, \ + create_source_path_directory from ..models import Document, Tag @@ -141,17 +145,65 @@ def run_post_consume_script(sender, document, **kwargs): )).wait() +@receiver(models.signals.post_delete, sender=Document) def cleanup_document_deletion(sender, instance, using, **kwargs): - - if not isinstance(instance, Document): - return - for f in (instance.source_path, instance.thumbnail_path): try: os.unlink(f) except FileNotFoundError: pass # The file's already gone, so we're cool with it. + delete_empty_directories(os.path.dirname(instance.source_path)) + + +@receiver(models.signals.m2m_changed, sender=Document.tags.through) +@receiver(models.signals.post_save, sender=Document) +def update_filename_and_move_files(sender, instance, **kwargs): + + if not instance.filename: + # Can't update the filename if there is not filename to begin with + # This happens after the consumer creates a new document. + # The PK needs to be set first by saving the document once. When this + # happens, the file is not yet in the ORIGINALS_DIR, and thus can't be + # renamed anyway. In all other cases, instance.filename will be set. + return + + old_filename = instance.filename + old_path = instance.source_path + new_filename = generate_filename(instance) + + if new_filename == instance.filename: + # Don't do anything if its the same. + return + + new_path = os.path.join(settings.ORIGINALS_DIR, new_filename) + + if not os.path.isfile(old_path): + # Can't do anything if the old file does not exist anymore. + logging.getLogger(__name__).fatal('Document {}: File {} has gone.'.format(str(instance), old_path)) + return + + if os.path.isfile(new_path): + # Can't do anything if the new file already exists. Skip updating file. + logging.getLogger(__name__).warning('Document {}: Cannot rename file since target path {} already exists.'.format(str(instance), new_path)) + return + + create_source_path_directory(new_path) + + try: + os.rename(old_path, new_path) + instance.filename = new_filename + instance.save() + + except OSError as e: + instance.filename = old_filename + except DatabaseError as e: + os.rename(new_path, old_path) + instance.filename = old_filename + + if not os.path.isfile(old_path): + delete_empty_directories(os.path.dirname(old_path)) + def set_log_entry(sender, document=None, logging_group=None, **kwargs): diff --git a/src/documents/tests/test_file_handling.py b/src/documents/tests/test_file_handling.py index 3b7c757d4..e228acabb 100644 --- a/src/documents/tests/test_file_handling.py +++ b/src/documents/tests/test_file_handling.py @@ -10,6 +10,8 @@ from dateutil import tz from django.test import TestCase, override_settings from django.utils.text import slugify + +from ..file_handling import generate_filename, create_source_path_directory, delete_empty_directories from ..models import Tag, Document, Correspondent from django.conf import settings @@ -31,18 +33,6 @@ class TestDate(TestCase): for dirname in self.deletion_list: shutil.rmtree(dirname, ignore_errors=True) - @override_settings(PAPERLESS_FILENAME_FORMAT="") - def test_source_filename(self): - document = Document() - document.file_type = "pdf" - document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED - document.save() - - self.assertEqual(document.source_filename, "0000001.pdf") - - document.filename = "test.pdf" - self.assertEqual(document.source_filename, "test.pdf") - @override_settings(PAPERLESS_FILENAME_FORMAT="") def test_generate_source_filename(self): document = Document() @@ -50,40 +40,40 @@ class TestDate(TestCase): document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED document.save() - self.assertEqual(document.generate_source_filename(), "0000001.pdf") + self.assertEqual(generate_filename(document), "{:07d}.pdf".format(document.pk)) document.storage_type = Document.STORAGE_TYPE_GPG - self.assertEqual(document.generate_source_filename(), - "0000001.pdf.gpg") + self.assertEqual(generate_filename(document), + "{:07d}.pdf.gpg".format(document.pk)) - @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/" + - "{correspondent}") + @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}") def test_file_renaming(self): document = Document() document.file_type = "pdf" document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED document.save() - # Ensure that filename is properly generated - tmp = document.source_filename - self.assertEqual(document.generate_source_filename(), - "none/none-0000001.pdf") - document.create_source_directory() - Path(document.source_path).touch() + # Test default source_path + self.assertEqual(document.source_path, settings.ORIGINALS_DIR + "/{:07d}.pdf".format(document.pk)) - # Test source_path - self.assertEqual(document.source_path, settings.MEDIA_ROOT + - "/documents/originals/none/none-0000001.pdf") + document.filename = generate_filename(document) + + # Ensure that filename is properly generated + self.assertEqual(document.filename, "none/none-{:07d}.pdf".format(document.pk)) # Enable encryption and check again document.storage_type = Document.STORAGE_TYPE_GPG - tmp = document.source_filename - self.assertEqual(document.generate_source_filename(), - "none/none-0000001.pdf.gpg") + document.filename = generate_filename(document) + self.assertEqual(document.filename, + "none/none-{:07d}.pdf.gpg".format(document.pk)) + document.save() - self.assertEqual(os.path.isdir(settings.MEDIA_ROOT + - "/documents/originals/none"), True) + # test that creating dirs for the source_path creates the correct directory + create_source_path_directory(document.source_path) + Path(document.source_path).touch() + self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + + "/none"), True) # Set a correspondent and save the document document.correspondent = Correspondent.objects.get_or_create( @@ -91,14 +81,12 @@ class TestDate(TestCase): document.save() # Check proper handling of files - self.assertEqual(os.path.isdir(settings.MEDIA_ROOT + - "/documents/originals/test"), True) - self.assertEqual(os.path.isdir(settings.MEDIA_ROOT + - "/documents/originals/none"), False) - self.assertEqual(os.path.isfile(settings.MEDIA_ROOT + "/documents/" + - "originals/test/test-0000001.pdf.gpg"), True) - self.assertEqual(document.generate_source_filename(), - "test/test-0000001.pdf.gpg") + self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + + "/test"), True) + self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + + "/none"), False) + self.assertEqual(os.path.isfile(settings.ORIGINALS_DIR + + "/test/test-{:07d}.pdf.gpg".format(document.pk)), True) @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/" + "{correspondent}") @@ -109,18 +97,18 @@ class TestDate(TestCase): document.save() # Ensure that filename is properly generated - tmp = document.source_filename - self.assertEqual(document.generate_source_filename(), - "none/none-0000001.pdf") - document.create_source_directory() + document.filename = generate_filename(document) + self.assertEqual(document.filename, + "none/none-{:07d}.pdf".format(document.pk)) + create_source_path_directory(document.source_path) Path(document.source_path).touch() # Test source_path - self.assertEqual(document.source_path, settings.MEDIA_ROOT + - "/documents/originals/none/none-0000001.pdf") + self.assertEqual(document.source_path, settings.ORIGINALS_DIR + + "/none/none-{:07d}.pdf".format(document.pk)) # Make the folder read- and execute-only (no writing and no renaming) - os.chmod(settings.MEDIA_ROOT + "/documents/originals/none", 0o555) + os.chmod(settings.ORIGINALS_DIR + "/none", 0o555) # Set a correspondent and save the document document.correspondent = Correspondent.objects.get_or_create( @@ -129,11 +117,12 @@ class TestDate(TestCase): # Check proper handling of files self.assertEqual(os.path.isfile(settings.MEDIA_ROOT + "/documents/" + - "originals/none/none-0000001.pdf"), True) - self.assertEqual(document.source_filename, - "none/none-0000001.pdf") + "originals/none/none-{:07d}.pdf".format(document.pk)), True) + self.assertEqual(document.filename, + "none/none-{:07d}.pdf".format(document.pk)) + + os.chmod(settings.ORIGINALS_DIR + "/none", 0o777) - os.chmod(settings.MEDIA_ROOT + "/documents/originals/none", 0o777) @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/" + "{correspondent}") @@ -144,18 +133,20 @@ class TestDate(TestCase): document.save() # Ensure that filename is properly generated - tmp = document.source_filename - self.assertEqual(document.generate_source_filename(), - "none/none-0000001.pdf") - document.create_source_directory() + document.filename = generate_filename(document) + self.assertEqual(document.filename, + "none/none-{:07d}.pdf".format(document.pk)) + + create_source_path_directory(document.source_path) Path(document.source_path).touch() # Ensure file deletion after delete + pk = document.pk document.delete() - self.assertEqual(os.path.isfile(settings.MEDIA_ROOT + - "/documents/originals/none/none-0000001.pdf"), False) - self.assertEqual(os.path.isdir(settings.MEDIA_ROOT + - "/documents/originals/none"), False) + self.assertEqual(os.path.isfile(settings.ORIGINALS_DIR + + "/none/none-{:07d}.pdf".format(pk)), False) + self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + + "/none"), False) @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/" + "{correspondent}") @@ -176,12 +167,15 @@ class TestDate(TestCase): document.save() # Ensure that filename is properly generated - tmp = document.source_filename - self.assertEqual(document.generate_source_filename(), - "none/none-0000001.pdf") - document.create_source_directory() + document.filename = generate_filename(document) + self.assertEqual(document.filename, + "none/none-{:07d}.pdf".format(document.pk)) + + create_source_path_directory(document.source_path) + Path(document.source_path).touch() - Path(document.source_path + "test").touch() + important_file = document.source_path + "test" + Path(important_file).touch() # Set a correspondent and save the document document.correspondent = Correspondent.objects.get_or_create( @@ -193,11 +187,8 @@ class TestDate(TestCase): "/documents/originals/test"), True) self.assertEqual(os.path.isdir(settings.MEDIA_ROOT + "/documents/originals/none"), True) + self.assertTrue(os.path.isfile(important_file)) - # Cleanup - os.remove(settings.MEDIA_ROOT + - "/documents/originals/none/none-0000001.pdftest") - os.rmdir(settings.MEDIA_ROOT + "/documents/originals/none") @override_settings(PAPERLESS_FILENAME_FORMAT="{tags[type]}") def test_tags_with_underscore(self): @@ -212,13 +203,8 @@ class TestDate(TestCase): document.save() # Ensure that filename is properly generated - tmp = document.source_filename - self.assertEqual(document.generate_source_filename(), - "demo-0000001.pdf") - document.create_source_directory() - Path(document.source_path).touch() - - document.delete() + self.assertEqual(generate_filename(document), + "demo-{:07d}.pdf".format(document.pk)) @override_settings(PAPERLESS_FILENAME_FORMAT="{tags[type]}") def test_tags_with_dash(self): @@ -233,13 +219,8 @@ class TestDate(TestCase): document.save() # Ensure that filename is properly generated - tmp = document.source_filename - self.assertEqual(document.generate_source_filename(), - "demo-0000001.pdf") - document.create_source_directory() - Path(document.source_path).touch() - - document.delete() + self.assertEqual(generate_filename(document), + "demo-{:07d}.pdf".format(document.pk)) @override_settings(PAPERLESS_FILENAME_FORMAT="{tags[type]}") def test_tags_malformed(self): @@ -254,13 +235,8 @@ class TestDate(TestCase): document.save() # Ensure that filename is properly generated - tmp = document.source_filename - self.assertEqual(document.generate_source_filename(), - "none-0000001.pdf") - document.create_source_directory() - Path(document.source_path).touch() - - document.delete() + self.assertEqual(generate_filename(document), + "none-{:07d}.pdf".format(document.pk)) @override_settings(PAPERLESS_FILENAME_FORMAT="{tags[0]}") def test_tags_all(self): @@ -274,61 +250,24 @@ class TestDate(TestCase): document.save() # Ensure that filename is properly generated - tmp = document.source_filename - self.assertEqual(document.generate_source_filename(), - "demo-0000001.pdf") - document.create_source_directory() - Path(document.source_path).touch() + self.assertEqual(generate_filename(document), + "demo-{:07d}.pdf".format(document.pk)) - document.delete() - - @override_settings(PAPERLESS_FILENAME_FORMAT="{tags[0]}") - def test_tags_out_of_bounds_0(self): + @override_settings(PAPERLESS_FILENAME_FORMAT="{tags[1]}") + def test_tags_out_of_bounds(self): document = Document() document.file_type = "pdf" document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED document.save() - # Ensure that filename is properly generated - tmp = document.source_filename - self.assertEqual(document.generate_source_filename(), - "none-0000001.pdf") - document.create_source_directory() - Path(document.source_path).touch() - - document.delete() - - @override_settings(PAPERLESS_FILENAME_FORMAT="{tags[10000000]}") - def test_tags_out_of_bounds_10000000(self): - document = Document() - document.file_type = "pdf" - document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED + # Add tag to document + document.tags.create(name="demo") document.save() # Ensure that filename is properly generated - tmp = document.source_filename - self.assertEqual(document.generate_source_filename(), - "none-0000001.pdf") - document.create_source_directory() - Path(document.source_path).touch() + self.assertEqual(generate_filename(document), + "none-{:07d}.pdf".format(document.pk)) - document.delete() - - @override_settings(PAPERLESS_FILENAME_FORMAT="{tags[99]}") - def test_tags_out_of_bounds_99(self): - document = Document() - document.file_type = "pdf" - document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED - document.save() - - # Ensure that filename is properly generated - tmp = document.source_filename - self.assertEqual(document.generate_source_filename(), - "none-0000001.pdf") - document.create_source_directory() - Path(document.source_path).touch() - - document.delete() @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/" + "{correspondent}/{correspondent}") @@ -339,153 +278,40 @@ class TestDate(TestCase): document.save() # Ensure that filename is properly generated - tmp = document.source_filename - self.assertEqual(document.generate_source_filename(), - "none/none/none-0000001.pdf") - document.create_source_directory() + document.filename = generate_filename(document) + self.assertEqual(document.filename, + "none/none/none-{:07d}.pdf".format(document.pk)) + create_source_path_directory(document.source_path) Path(document.source_path).touch() # Check proper handling of files - self.assertEqual(os.path.isdir(settings.MEDIA_ROOT + - "/documents/originals/none/none"), True) + self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + + "/none/none"), True) + pk = document.pk document.delete() - self.assertEqual(os.path.isfile(settings.MEDIA_ROOT + - "/documents/originals/none/none/none-0000001.pdf"), + self.assertEqual(os.path.isfile(settings.ORIGINALS_DIR + + "/none/none/none-{:07d}.pdf".format(pk)), False) - self.assertEqual(os.path.isdir(settings.MEDIA_ROOT + - "/documents/originals/none/none"), False) - self.assertEqual(os.path.isdir(settings.MEDIA_ROOT + - "/documents/originals/none"), False) - self.assertEqual(os.path.isdir(settings.MEDIA_ROOT + - "/documents/originals"), True) + self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + + "/none/none"), False) + self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + + "/none"), False) + self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR), True) @override_settings(PAPERLESS_FILENAME_FORMAT=None) def test_format_none(self): document = Document() + document.pk = 1 document.file_type = "pdf" document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED - document.save() - self.assertEqual(document.generate_source_filename(), "0000001.pdf") - - @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/" + - "{correspondent}") - def test_document_renamed(self): - document = Document() - document.file_type = "pdf" - document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED - document.save() - - # Ensure that filename is properly generated - tmp = document.source_filename - self.assertEqual(document.generate_source_filename(), - "none/none-0000001.pdf") - document.create_source_directory() - Path(document.source_path).touch() - - # Test source_path - self.assertEqual(document.source_path, settings.MEDIA_ROOT + - "/documents/originals/none/none-0000001.pdf") - - # Rename the document "illegaly" - os.makedirs(settings.MEDIA_ROOT + "/documents/originals/test") - os.rename(settings.MEDIA_ROOT + "/documents/originals/" + - "none/none-0000001.pdf", - settings.MEDIA_ROOT + "/documents/originals/" + - "test/test-0000001.pdf") - self.assertEqual(os.path.isfile(settings.MEDIA_ROOT + "/documents/" + - "originals/test/test-0000001.pdf"), True) - self.assertEqual(os.path.isfile(settings.MEDIA_ROOT + "/documents/" + - "originals/none/none-0000001.pdf"), False) - - # Set new correspondent and expect document to be saved properly - document.correspondent = Correspondent.objects.get_or_create( - name="foo")[0] - document.save() - self.assertEqual(os.path.isfile(settings.MEDIA_ROOT + "/documents/" + - "originals/foo/foo-0000001.pdf"), True) - - # Check proper handling of files - self.assertEqual(os.path.isdir(settings.MEDIA_ROOT + - "/documents/originals/foo"), True) - self.assertEqual(os.path.isdir(settings.MEDIA_ROOT + - "/documents/originals/none"), False) - self.assertEqual(os.path.isdir(settings.MEDIA_ROOT + - "/documents/originals/test"), False) - self.assertEqual(document.generate_source_filename(), - "foo/foo-0000001.pdf") - - @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/" + - "{correspondent}") - def test_document_renamed_encrypted(self): - document = Document() - document.file_type = "pdf" - document.storage_type = Document.STORAGE_TYPE_GPG - document.save() - - # Ensure that filename is properly generated - tmp = document.source_filename - self.assertEqual(document.generate_source_filename(), - "none/none-0000001.pdf.gpg") - document.create_source_directory() - Path(document.source_path).touch() - - # Test source_path - self.assertEqual(document.source_path, settings.MEDIA_ROOT + - "/documents/originals/none/none-0000001.pdf.gpg") - - # Rename the document "illegaly" - os.makedirs(settings.MEDIA_ROOT + "/documents/originals/test") - os.rename(settings.MEDIA_ROOT + "/documents/originals/" + - "none/none-0000001.pdf.gpg", - settings.MEDIA_ROOT + "/documents/originals/" + - "test/test-0000001.pdf.gpg") - self.assertEqual(os.path.isfile(settings.MEDIA_ROOT + "/documents/" + - "originals/test/test-0000001.pdf.gpg"), True) - self.assertEqual(os.path.isfile(settings.MEDIA_ROOT + "/documents/" + - "originals/none/none-0000001.pdf"), False) - - # Set new correspondent and expect document to be saved properly - document.correspondent = Correspondent.objects.get_or_create( - name="foo")[0] - document.save() - self.assertEqual(os.path.isfile(settings.MEDIA_ROOT + "/documents/" + - "originals/foo/foo-0000001.pdf.gpg"), True) - - # Check proper handling of files - self.assertEqual(os.path.isdir(settings.MEDIA_ROOT + - "/documents/originals/foo"), True) - self.assertEqual(os.path.isdir(settings.MEDIA_ROOT + - "/documents/originals/none"), False) - self.assertEqual(os.path.isdir(settings.MEDIA_ROOT + - "/documents/originals/test"), False) - self.assertEqual(document.generate_source_filename(), - "foo/foo-0000001.pdf.gpg") - - def test_delete_all_empty_subdirectories(self): - # Create our working directory - tmp = "/tmp/paperless-tests-{}".format(str(uuid4())[:8]) - os.makedirs(tmp) - self.add_to_deletion_list(tmp) - - os.makedirs(os.path.join(tmp, "empty")) - os.makedirs(os.path.join(tmp, "empty", "subdirectory")) - - os.makedirs(os.path.join(tmp, "notempty")) - Path(os.path.join(tmp, "notempty", "file")).touch() - - Document.delete_all_empty_subdirectories(tmp) - - self.assertEqual(os.path.isdir(os.path.join(tmp, "notempty")), True) - self.assertEqual(os.path.isdir(os.path.join(tmp, "empty")), False) - self.assertEqual(os.path.isfile( - os.path.join(tmp, "notempty", "file")), True) + self.assertEqual(generate_filename(document), "0000001.pdf") def test_try_delete_empty_directories(self): # Create our working directory - tmp = "/tmp/paperless-tests-{}".format(str(uuid4())[:8]) + tmp = os.path.join(settings.ORIGINALS_DIR, "test_delete_empty") os.makedirs(tmp) self.add_to_deletion_list(tmp) @@ -493,67 +319,10 @@ class TestDate(TestCase): Path(os.path.join(tmp, "notempty", "file")).touch() os.makedirs(os.path.join(tmp, "notempty", "empty")) - Document.try_delete_empty_directories( + delete_empty_directories( os.path.join(tmp, "notempty", "empty")) self.assertEqual(os.path.isdir(os.path.join(tmp, "notempty")), True) self.assertEqual(os.path.isfile( os.path.join(tmp, "notempty", "file")), True) self.assertEqual(os.path.isdir( os.path.join(tmp, "notempty", "empty")), False) - - @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/" + - "{correspondent}") - def test_document_accidentally_deleted(self): - document = Document() - document.file_type = "pdf" - document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED - document.save() - - # Ensure that filename is properly generated - tmp = document.source_filename - self.assertEqual(document.generate_source_filename(), - "none/none-0000001.pdf") - document.create_source_directory() - Path(document.source_path).touch() - - # Test source_path - self.assertEqual(document.source_path, settings.MEDIA_ROOT + - "/documents/originals/none/none-0000001.pdf") - - # Delete the document "illegaly" - os.remove(settings.MEDIA_ROOT + "/documents/originals/" + - "none/none-0000001.pdf") - - # Set new correspondent and expect document to be saved properly - document.correspondent = Correspondent.objects.get_or_create( - name="foo")[0] - document.save() - - # Check proper handling of files - self.assertEqual(os.path.isdir(settings.MEDIA_ROOT + - "/documents/originals/none"), True) - self.assertEqual(document.source_filename, - "none/none-0000001.pdf") - - @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/" + - "{correspondent}") - def test_set_filename(self): - document = Document() - document.file_type = "pdf" - document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED - document.save() - - # Ensure that filename is properly generated - tmp = document.source_filename - self.assertEqual(document.generate_source_filename(), - "none/none-0000001.pdf") - document.create_source_directory() - Path(document.source_path).touch() - - # Set existing filename - document.set_filename(tmp) - self.assertEqual(document.source_filename, "none/none-0000001.pdf") - - # Set non-existing filename - document.set_filename("doesnotexist") - self.assertEqual(document.source_filename, "none/none-0000001.pdf") From 312b0034bd21cafbfbb61d5c45453751d0478b33 Mon Sep 17 00:00:00 2001 From: Jonas Winkler Date: Wed, 11 Nov 2020 14:38:41 +0100 Subject: [PATCH 25/26] test database errors. --- src/documents/tests/test_file_handling.py | 50 +++++++++++++++++++---- 1 file changed, 43 insertions(+), 7 deletions(-) diff --git a/src/documents/tests/test_file_handling.py b/src/documents/tests/test_file_handling.py index e228acabb..18fd327b1 100644 --- a/src/documents/tests/test_file_handling.py +++ b/src/documents/tests/test_file_handling.py @@ -1,20 +1,16 @@ -import datetime import os import shutil -from unittest import mock from uuid import uuid4 from pathlib import Path -from shutil import rmtree -from dateutil import tz from django.test import TestCase, override_settings -from django.utils.text import slugify - from ..file_handling import generate_filename, create_source_path_directory, delete_empty_directories -from ..models import Tag, Document, Correspondent +from ..models import Document, Correspondent from django.conf import settings +from ..signals.handlers import update_filename_and_move_files + class TestDate(TestCase): deletion_list = [] @@ -123,6 +119,46 @@ class TestDate(TestCase): os.chmod(settings.ORIGINALS_DIR + "/none", 0o777) + @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/" + + "{correspondent}") + def test_file_renaming_database_error(self): + + document1 = Document.objects.create(file_type="pdf", storage_type=Document.STORAGE_TYPE_UNENCRYPTED, checksum="AAAAA") + + document = Document() + document.file_type = "pdf" + document.checksum = "BBBBB" + document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED + document.save() + + # Ensure that filename is properly generated + document.filename = generate_filename(document) + self.assertEqual(document.filename, + "none/none-{:07d}.pdf".format(document.pk)) + create_source_path_directory(document.source_path) + Path(document.source_path).touch() + + # Test source_path + self.assertTrue(os.path.isfile(document.source_path)) + + # Set a correspondent and save the document + document.correspondent = Correspondent.objects.get_or_create( + name="test")[0] + + # This will cause save() to fail. + document.checksum = document1.checksum + + # Assume saving the document initially works, this gets called. + # After renaming, an error occurs, and filename is not saved: + # document should still be available at document.filename. + update_filename_and_move_files(None, document) + + # Check proper handling of files + self.assertTrue(os.path.isfile(document.source_path)) + self.assertEqual(os.path.isfile(settings.MEDIA_ROOT + "/documents/" + + "originals/none/none-{:07d}.pdf".format(document.pk)), True) + self.assertEqual(document.filename, + "none/none-{:07d}.pdf".format(document.pk)) @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/" + "{correspondent}") From ee6942989843ad71311ec968de7be68b3ee2aea4 Mon Sep 17 00:00:00 2001 From: Jonas Winkler Date: Wed, 11 Nov 2020 15:58:29 +0100 Subject: [PATCH 26/26] show the filename in the admin. --- src/documents/admin.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/documents/admin.py b/src/documents/admin.py index 6ac949a45..0f63253ce 100755 --- a/src/documents/admin.py +++ b/src/documents/admin.py @@ -32,7 +32,7 @@ class TagAdmin(admin.ModelAdmin): list_filter = ("colour", "matching_algorithm") list_editable = ("colour", "match", "matching_algorithm") - readonly_fields = ("slug",) + readonly_fields = ("slug", ) class DocumentTypeAdmin(admin.ModelAdmin): @@ -51,9 +51,9 @@ class DocumentTypeAdmin(admin.ModelAdmin): class DocumentAdmin(admin.ModelAdmin): search_fields = ("correspondent__name", "title", "content", "tags__name") - readonly_fields = ("added", "file_type", "storage_type",) + readonly_fields = ("added", "file_type", "storage_type", "filename") list_display = ("title", "created", "added", "correspondent", - "tags_", "archive_serial_number", "document_type") + "tags_", "archive_serial_number", "document_type", "filename") list_filter = ( "document_type", "tags",
Title