mirror of
				https://github.com/paperless-ngx/paperless-ngx.git
				synced 2025-10-26 03:36:08 -05:00 
			
		
		
		
	code style fixes
This commit is contained in:
		| @@ -1,5 +1,4 @@ | ||||
| from django.contrib import admin | ||||
| from django.contrib.auth.models import Group, User | ||||
| from django.utils.html import format_html, format_html_join | ||||
| from django.utils.safestring import mark_safe | ||||
| from whoosh.writing import AsyncWriter | ||||
| @@ -52,8 +51,16 @@ class DocumentAdmin(admin.ModelAdmin): | ||||
|  | ||||
|     search_fields = ("correspondent__name", "title", "content", "tags__name") | ||||
|     readonly_fields = ("added", "file_type", "storage_type", "filename") | ||||
|     list_display = ("title", "created", "added", "correspondent", | ||||
|                     "tags_", "archive_serial_number", "document_type", "filename") | ||||
|     list_display = ( | ||||
|         "title", | ||||
|         "created", | ||||
|         "added", | ||||
|         "correspondent", | ||||
|         "tags_", | ||||
|         "archive_serial_number", | ||||
|         "document_type", | ||||
|         "filename" | ||||
|     ) | ||||
|     list_filter = ( | ||||
|         "document_type", | ||||
|         "tags", | ||||
|   | ||||
| @@ -1,5 +1,4 @@ | ||||
| from django.apps import AppConfig | ||||
| from django.db.models.signals import post_delete | ||||
|  | ||||
|  | ||||
| class DocumentsConfig(AppConfig): | ||||
|   | ||||
| @@ -3,7 +3,6 @@ import logging | ||||
| import os | ||||
| import pickle | ||||
| import re | ||||
| import time | ||||
|  | ||||
| from sklearn.feature_extraction.text import CountVectorizer | ||||
| from sklearn.neural_network import MLPClassifier | ||||
| @@ -64,7 +63,7 @@ class DocumentClassifier(object): | ||||
|  | ||||
|     def save_classifier(self): | ||||
|         with open(settings.MODEL_FILE, "wb") as f: | ||||
|             pickle.dump(self.FORMAT_VERSION, f) # Version | ||||
|             pickle.dump(self.FORMAT_VERSION, f) | ||||
|             pickle.dump(self.data_hash, f) | ||||
|             pickle.dump(self.data_vectorizer, f) | ||||
|  | ||||
| @@ -89,15 +88,13 @@ class DocumentClassifier(object): | ||||
|             data.append(preprocessed_content) | ||||
|  | ||||
|             y = -1 | ||||
|             if doc.document_type: | ||||
|                 if doc.document_type.matching_algorithm == MatchingModel.MATCH_AUTO: | ||||
|             if doc.document_type and doc.document_type.matching_algorithm == MatchingModel.MATCH_AUTO: | ||||
|                 y = doc.document_type.pk | ||||
|             m.update(y.to_bytes(4, 'little', signed=True)) | ||||
|             labels_document_type.append(y) | ||||
|  | ||||
|             y = -1 | ||||
|             if doc.correspondent: | ||||
|                 if doc.correspondent.matching_algorithm == MatchingModel.MATCH_AUTO: | ||||
|             if doc.correspondent and doc.correspondent.matching_algorithm == MatchingModel.MATCH_AUTO: | ||||
|                 y = doc.correspondent.pk | ||||
|             m.update(y.to_bytes(4, 'little', signed=True)) | ||||
|             labels_correspondent.append(y) | ||||
| @@ -137,7 +134,7 @@ class DocumentClassifier(object): | ||||
|         logging.getLogger(__name__).debug("Vectorizing data...") | ||||
|         self.data_vectorizer = CountVectorizer( | ||||
|             analyzer="word", | ||||
|             ngram_range=(1,2), | ||||
|             ngram_range=(1, 2), | ||||
|             min_df=0.01 | ||||
|         ) | ||||
|         data_vectorized = self.data_vectorizer.fit_transform(data) | ||||
|   | ||||
| @@ -1,5 +1,4 @@ | ||||
| import os | ||||
|  | ||||
| from datetime import datetime | ||||
| from time import mktime | ||||
|  | ||||
| @@ -22,7 +21,10 @@ class UploadForm(forms.Form): | ||||
|     def get_filename(self, i=None): | ||||
|         return os.path.join( | ||||
|             settings.CONSUMPTION_DIR, | ||||
|             "{}_{}".format(str(i), self.cleaned_data.get("document").name) if i else self.cleaned_data.get("document").name | ||||
|             "{}_{}".format( | ||||
|                 str(i), | ||||
|                 self.cleaned_data.get("document").name | ||||
|             ) if i else self.cleaned_data.get("document").name | ||||
|         ) | ||||
|  | ||||
|     def save(self): | ||||
|   | ||||
| @@ -1,8 +1,6 @@ | ||||
| import logging | ||||
| from contextlib import contextmanager | ||||
|  | ||||
| from django.db import models | ||||
| from django.dispatch import receiver | ||||
| from whoosh import highlight | ||||
| from whoosh.fields import Schema, TEXT, NUMERIC | ||||
| from whoosh.highlight import Formatter, get_text | ||||
| @@ -10,10 +8,8 @@ from whoosh.index import create_in, exists_in, open_dir | ||||
| from whoosh.qparser import MultifieldParser | ||||
| from whoosh.writing import AsyncWriter | ||||
|  | ||||
| from documents.models import Document | ||||
| from paperless import settings | ||||
|  | ||||
|  | ||||
| logger = logging.getLogger(__name__) | ||||
|  | ||||
|  | ||||
|   | ||||
| @@ -5,12 +5,11 @@ import os | ||||
| import re | ||||
| import time | ||||
| import uuid | ||||
|  | ||||
| from base64 import b64decode | ||||
| from email import policy | ||||
| from email.parser import BytesParser | ||||
| from dateutil import parser | ||||
|  | ||||
| from dateutil import parser | ||||
| from django.conf import settings | ||||
|  | ||||
| from .models import Correspondent | ||||
|   | ||||
| @@ -3,9 +3,8 @@ import os | ||||
|  | ||||
| from django.conf import settings | ||||
| from django.core.management.base import BaseCommand | ||||
|  | ||||
| from watchdog.observers import Observer | ||||
| from watchdog.events import FileSystemEventHandler | ||||
| from watchdog.observers import Observer | ||||
|  | ||||
| from documents.consumer import Consumer | ||||
|  | ||||
|   | ||||
| @@ -1,4 +1,5 @@ | ||||
| from django.core.management.base import BaseCommand | ||||
|  | ||||
| from ...mixins import Renderable | ||||
| from ...tasks import train_classifier | ||||
|  | ||||
|   | ||||
| @@ -1,16 +1,15 @@ | ||||
| import json | ||||
| import os | ||||
| import time | ||||
| import shutil | ||||
| import time | ||||
|  | ||||
| from django.core.management.base import BaseCommand, CommandError | ||||
| from django.core import serializers | ||||
| from django.core.management.base import BaseCommand, CommandError | ||||
|  | ||||
| from documents.models import Document, Correspondent, Tag, DocumentType | ||||
| from paperless.db import GnuPG | ||||
|  | ||||
| from ...mixins import Renderable | ||||
| from documents.settings import EXPORTER_FILE_NAME, EXPORTER_THUMBNAIL_NAME | ||||
| from paperless.db import GnuPG | ||||
| from ...mixins import Renderable | ||||
|  | ||||
|  | ||||
| class Command(Renderable, BaseCommand): | ||||
|   | ||||
| @@ -3,17 +3,15 @@ import os | ||||
| import shutil | ||||
|  | ||||
| from django.conf import settings | ||||
| from django.core.management.base import BaseCommand, CommandError | ||||
| from django.core.management import call_command | ||||
| from django.core.management.base import BaseCommand, CommandError | ||||
|  | ||||
| from documents.models import Document | ||||
| from documents.settings import EXPORTER_FILE_NAME, EXPORTER_THUMBNAIL_NAME | ||||
| from paperless.db import GnuPG | ||||
| from ...file_handling import generate_filename, create_source_path_directory | ||||
|  | ||||
| from ...mixins import Renderable | ||||
|  | ||||
| from documents.settings import EXPORTER_FILE_NAME, EXPORTER_THUMBNAIL_NAME | ||||
|  | ||||
|  | ||||
| class Command(Renderable, BaseCommand): | ||||
|  | ||||
|   | ||||
| @@ -8,5 +8,5 @@ class Command(BaseCommand): | ||||
|     help = "A quick & dirty way to see what's in the logs" | ||||
|  | ||||
|     def handle(self, *args, **options): | ||||
|         for l in Log.objects.order_by("pk"): | ||||
|             print(l) | ||||
|         for log in Log.objects.order_by("pk"): | ||||
|             print(log) | ||||
|   | ||||
| @@ -1,7 +1,6 @@ | ||||
| from django.core.management.base import BaseCommand | ||||
|  | ||||
| from documents.models import Document, Tag | ||||
|  | ||||
| from documents.models import Document | ||||
| from ...mixins import Renderable | ||||
|  | ||||
|  | ||||
|   | ||||
| @@ -9,16 +9,14 @@ def match_correspondents(document_content, classifier): | ||||
|     correspondents = Correspondent.objects.all() | ||||
|     predicted_correspondent_id = classifier.predict_correspondent(document_content) if classifier else None | ||||
|  | ||||
|     matched_correspondents = [o for o in correspondents if matches(o, document_content) or o.pk == predicted_correspondent_id] | ||||
|     return matched_correspondents | ||||
|     return [o for o in correspondents if matches(o, document_content) or o.pk == predicted_correspondent_id] | ||||
|  | ||||
|  | ||||
| def match_document_types(document_content, classifier): | ||||
|     document_types = DocumentType.objects.all() | ||||
|     predicted_document_type_id = classifier.predict_document_type(document_content) if classifier else None | ||||
|  | ||||
|     matched_document_types = [o for o in document_types if matches(o, document_content) or o.pk == predicted_document_type_id] | ||||
|     return matched_document_types | ||||
|     return [o for o in document_types if matches(o, document_content) or o.pk == predicted_document_type_id] | ||||
|  | ||||
|  | ||||
| def match_tags(document_content, classifier): | ||||
|   | ||||
| @@ -22,11 +22,13 @@ from django.utils import timezone | ||||
| # - MONTH XX, ZZZZ with XX being 1 or 2 and ZZZZ being 4 digits | ||||
| from documents.signals import document_consumer_declaration | ||||
|  | ||||
| # TODO: isnt there a date parsing library for this? | ||||
|  | ||||
| DATE_REGEX = re.compile( | ||||
|     r'(\b|(?!=([_-])))([0-9]{1,2})[\.\/-]([0-9]{1,2})[\.\/-]([0-9]{4}|[0-9]{2})(\b|(?=([_-])))|' +  # NOQA: E501 | ||||
|     r'(\b|(?!=([_-])))([0-9]{4}|[0-9]{2})[\.\/-]([0-9]{1,2})[\.\/-]([0-9]{1,2})(\b|(?=([_-])))|' +  # NOQA: E501 | ||||
|     r'(\b|(?!=([_-])))([0-9]{1,2}[\. ]+[^ ]{3,9} ([0-9]{4}|[0-9]{2}))(\b|(?=([_-])))|' +  # NOQA: E501 | ||||
|     r'(\b|(?!=([_-])))([^\W\d_]{3,9} [0-9]{1,2}, ([0-9]{4}))(\b|(?=([_-])))|' + | ||||
|     r'(\b|(?!=([_-])))([0-9]{1,2})[\.\/-]([0-9]{1,2})[\.\/-]([0-9]{4}|[0-9]{2})(\b|(?=([_-])))|'   # NOQA: E501 | ||||
|     r'(\b|(?!=([_-])))([0-9]{4}|[0-9]{2})[\.\/-]([0-9]{1,2})[\.\/-]([0-9]{1,2})(\b|(?=([_-])))|'   # NOQA: E501 | ||||
|     r'(\b|(?!=([_-])))([0-9]{1,2}[\. ]+[^ ]{3,9} ([0-9]{4}|[0-9]{2}))(\b|(?=([_-])))|'   # NOQA: E501 | ||||
|     r'(\b|(?!=([_-])))([^\W\d_]{3,9} [0-9]{1,2}, ([0-9]{4}))(\b|(?=([_-])))|' | ||||
|     r'(\b|(?!=([_-])))([^\W\d_]{3,9} [0-9]{4})(\b|(?=([_-])))' | ||||
| ) | ||||
|  | ||||
| @@ -43,7 +45,7 @@ def get_parser_class(doc): | ||||
|     for response in document_consumer_declaration.send(None): | ||||
|         parsers.append(response[1]) | ||||
|  | ||||
|     #TODO: add a check that checks parser availability. | ||||
|     # TODO: add a check that checks parser availability. | ||||
|  | ||||
|     options = [] | ||||
|     for parser in parsers: | ||||
| @@ -59,7 +61,7 @@ def get_parser_class(doc): | ||||
|         options, key=lambda _: _["weight"], reverse=True)[0]["parser"] | ||||
|  | ||||
|  | ||||
| def run_convert(input, output, density=None, scale=None, alpha=None, strip=False, trim=False, type=None, depth=None, extra=None, logging_group=None): | ||||
| def run_convert(input_file, output_file, density=None, scale=None, alpha=None, strip=False, trim=False, type=None, depth=None, extra=None, logging_group=None): | ||||
|     environment = os.environ.copy() | ||||
|     if settings.CONVERT_MEMORY_LIMIT: | ||||
|         environment["MAGICK_MEMORY_LIMIT"] = settings.CONVERT_MEMORY_LIMIT | ||||
| @@ -74,7 +76,7 @@ def run_convert(input, output, density=None, scale=None, alpha=None, strip=False | ||||
|     args += ['-trim'] if trim else [] | ||||
|     args += ['-type', str(type)] if type else [] | ||||
|     args += ['-depth', str(depth)] if depth else [] | ||||
|     args += [input, output] | ||||
|     args += [input_file, output_file] | ||||
|  | ||||
|     logger.debug("Execute: " + " ".join(args), extra={'group': logging_group}) | ||||
|  | ||||
|   | ||||
| @@ -105,7 +105,6 @@ class DocumentSerializer(serializers.ModelSerializer): | ||||
|  | ||||
| class LogSerializer(serializers.ModelSerializer): | ||||
|  | ||||
|  | ||||
|     class Meta: | ||||
|         model = Log | ||||
|         fields = ( | ||||
|   | ||||
| @@ -1,7 +1,6 @@ | ||||
| import logging | ||||
|  | ||||
| from django.conf import settings | ||||
| from django_q.tasks import async_task, result | ||||
| from whoosh.writing import AsyncWriter | ||||
|  | ||||
| from documents import index | ||||
|   | ||||
| @@ -2,9 +2,9 @@ import unittest | ||||
|  | ||||
| from django.test import TestCase | ||||
|  | ||||
| from .factories import DocumentFactory | ||||
| from ..checks import changed_password_check | ||||
| from ..models import Document | ||||
| from .factories import DocumentFactory | ||||
|  | ||||
|  | ||||
| class ChecksTestCase(TestCase): | ||||
|   | ||||
| @@ -1,14 +1,13 @@ | ||||
| import os | ||||
| import shutil | ||||
| from uuid import uuid4 | ||||
| from pathlib import Path | ||||
| from uuid import uuid4 | ||||
|  | ||||
| from django.conf import settings | ||||
| from django.test import TestCase, override_settings | ||||
|  | ||||
| from ..file_handling import generate_filename, create_source_path_directory, delete_empty_directories | ||||
| from ..models import Document, Correspondent | ||||
| from django.conf import settings | ||||
|  | ||||
| from ..signals.handlers import update_filename_and_move_files | ||||
|  | ||||
|  | ||||
| @@ -68,24 +67,18 @@ class TestDate(TestCase): | ||||
|         # test that creating dirs for the source_path creates the correct directory | ||||
|         create_source_path_directory(document.source_path) | ||||
|         Path(document.source_path).touch() | ||||
|         self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + | ||||
|                          "/none"), True) | ||||
|         self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none"), True) | ||||
|  | ||||
|         # Set a correspondent and save the document | ||||
|         document.correspondent = Correspondent.objects.get_or_create( | ||||
|                 name="test")[0] | ||||
|         document.correspondent = Correspondent.objects.get_or_create(name="test")[0] | ||||
|         document.save() | ||||
|  | ||||
|         # Check proper handling of files | ||||
|         self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + | ||||
|                          "/test"), True) | ||||
|         self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + | ||||
|                          "/none"), False) | ||||
|         self.assertEqual(os.path.isfile(settings.ORIGINALS_DIR + | ||||
|                          "/test/test-{:07d}.pdf.gpg".format(document.pk)), True) | ||||
|         self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/test"), True) | ||||
|         self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none"), False) | ||||
|         self.assertEqual(os.path.isfile(settings.ORIGINALS_DIR + "/test/test-{:07d}.pdf.gpg".format(document.pk)), True) | ||||
|  | ||||
|     @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/" + | ||||
|                        "{correspondent}") | ||||
|     @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}") | ||||
|     def test_file_renaming_missing_permissions(self): | ||||
|         document = Document() | ||||
|         document.file_type = "pdf" | ||||
| @@ -100,27 +93,22 @@ class TestDate(TestCase): | ||||
|         Path(document.source_path).touch() | ||||
|  | ||||
|         # Test source_path | ||||
|         self.assertEqual(document.source_path, settings.ORIGINALS_DIR + | ||||
|                          "/none/none-{:07d}.pdf".format(document.pk)) | ||||
|         self.assertEqual(document.source_path, settings.ORIGINALS_DIR + "/none/none-{:07d}.pdf".format(document.pk)) | ||||
|  | ||||
|         # Make the folder read- and execute-only (no writing and no renaming) | ||||
|         os.chmod(settings.ORIGINALS_DIR + "/none", 0o555) | ||||
|  | ||||
|         # Set a correspondent and save the document | ||||
|         document.correspondent = Correspondent.objects.get_or_create( | ||||
|                 name="test")[0] | ||||
|         document.correspondent = Correspondent.objects.get_or_create(name="test")[0] | ||||
|         document.save() | ||||
|  | ||||
|         # Check proper handling of files | ||||
|         self.assertEqual(os.path.isfile(settings.MEDIA_ROOT + "/documents/" + | ||||
|                          "originals/none/none-{:07d}.pdf".format(document.pk)), True) | ||||
|         self.assertEqual(document.filename, | ||||
|                          "none/none-{:07d}.pdf".format(document.pk)) | ||||
|         self.assertEqual(os.path.isfile(settings.MEDIA_ROOT + "/documents/originals/none/none-{:07d}.pdf".format(document.pk)), True) | ||||
|         self.assertEqual(document.filename, "none/none-{:07d}.pdf".format(document.pk)) | ||||
|  | ||||
|         os.chmod(settings.ORIGINALS_DIR + "/none", 0o777) | ||||
|  | ||||
|     @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/" + | ||||
|                        "{correspondent}") | ||||
|     @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}") | ||||
|     def test_file_renaming_database_error(self): | ||||
|  | ||||
|         document1 = Document.objects.create(file_type="pdf", storage_type=Document.STORAGE_TYPE_UNENCRYPTED, checksum="AAAAA") | ||||
| @@ -155,13 +143,10 @@ class TestDate(TestCase): | ||||
|  | ||||
|         # Check proper handling of files | ||||
|         self.assertTrue(os.path.isfile(document.source_path)) | ||||
|         self.assertEqual(os.path.isfile(settings.MEDIA_ROOT + "/documents/" + | ||||
|                                         "originals/none/none-{:07d}.pdf".format(document.pk)), True) | ||||
|         self.assertEqual(document.filename, | ||||
|                          "none/none-{:07d}.pdf".format(document.pk)) | ||||
|         self.assertEqual(os.path.isfile(settings.MEDIA_ROOT + "/documents/originals/none/none-{:07d}.pdf".format(document.pk)), True) | ||||
|         self.assertEqual(document.filename, "none/none-{:07d}.pdf".format(document.pk)) | ||||
|  | ||||
|     @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/" + | ||||
|                        "{correspondent}") | ||||
|     @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}") | ||||
|     def test_document_delete(self): | ||||
|         document = Document() | ||||
|         document.file_type = "pdf" | ||||
| @@ -179,13 +164,10 @@ class TestDate(TestCase): | ||||
|         # Ensure file deletion after delete | ||||
|         pk = document.pk | ||||
|         document.delete() | ||||
|         self.assertEqual(os.path.isfile(settings.ORIGINALS_DIR + | ||||
|                          "/none/none-{:07d}.pdf".format(pk)), False) | ||||
|         self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + | ||||
|                          "/none"), False) | ||||
|         self.assertEqual(os.path.isfile(settings.ORIGINALS_DIR + "/none/none-{:07d}.pdf".format(pk)), False) | ||||
|         self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none"), False) | ||||
|  | ||||
|     @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/" + | ||||
|                        "{correspondent}") | ||||
|     @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}") | ||||
|     def test_document_delete_nofile(self): | ||||
|         document = Document() | ||||
|         document.file_type = "pdf" | ||||
| @@ -194,8 +176,7 @@ class TestDate(TestCase): | ||||
|  | ||||
|         document.delete() | ||||
|  | ||||
|     @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/" + | ||||
|                        "{correspondent}") | ||||
|     @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}") | ||||
|     def test_directory_not_empty(self): | ||||
|         document = Document() | ||||
|         document.file_type = "pdf" | ||||
| @@ -214,18 +195,14 @@ class TestDate(TestCase): | ||||
|         Path(important_file).touch() | ||||
|  | ||||
|         # Set a correspondent and save the document | ||||
|         document.correspondent = Correspondent.objects.get_or_create( | ||||
|                 name="test")[0] | ||||
|         document.correspondent = Correspondent.objects.get_or_create(name="test")[0] | ||||
|         document.save() | ||||
|  | ||||
|         # Check proper handling of files | ||||
|         self.assertEqual(os.path.isdir(settings.MEDIA_ROOT + | ||||
|                          "/documents/originals/test"), True) | ||||
|         self.assertEqual(os.path.isdir(settings.MEDIA_ROOT + | ||||
|                          "/documents/originals/none"), True) | ||||
|         self.assertEqual(os.path.isdir(settings.MEDIA_ROOT + "/documents/originals/test"), True) | ||||
|         self.assertEqual(os.path.isdir(settings.MEDIA_ROOT + "/documents/originals/none"), True) | ||||
|         self.assertTrue(os.path.isfile(important_file)) | ||||
|  | ||||
|  | ||||
|     @override_settings(PAPERLESS_FILENAME_FORMAT="{tags[type]}") | ||||
|     def test_tags_with_underscore(self): | ||||
|         document = Document() | ||||
| @@ -304,9 +281,7 @@ class TestDate(TestCase): | ||||
|         self.assertEqual(generate_filename(document), | ||||
|                          "none-{:07d}.pdf".format(document.pk)) | ||||
|  | ||||
|  | ||||
|     @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/" + | ||||
|                        "{correspondent}/{correspondent}") | ||||
|     @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}/{correspondent}") | ||||
|     def test_nested_directory_cleanup(self): | ||||
|         document = Document() | ||||
|         document.file_type = "pdf" | ||||
| @@ -315,25 +290,19 @@ class TestDate(TestCase): | ||||
|  | ||||
|         # Ensure that filename is properly generated | ||||
|         document.filename = generate_filename(document) | ||||
|         self.assertEqual(document.filename, | ||||
|                          "none/none/none-{:07d}.pdf".format(document.pk)) | ||||
|         self.assertEqual(document.filename, "none/none/none-{:07d}.pdf".format(document.pk)) | ||||
|         create_source_path_directory(document.source_path) | ||||
|         Path(document.source_path).touch() | ||||
|  | ||||
|         # Check proper handling of files | ||||
|         self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + | ||||
|                          "/none/none"), True) | ||||
|         self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none/none"), True) | ||||
|  | ||||
|         pk = document.pk | ||||
|         document.delete() | ||||
|  | ||||
|         self.assertEqual(os.path.isfile(settings.ORIGINALS_DIR + | ||||
|                          "/none/none/none-{:07d}.pdf".format(pk)), | ||||
|                          False) | ||||
|         self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + | ||||
|                          "/none/none"), False) | ||||
|         self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + | ||||
|                          "/none"), False) | ||||
|         self.assertEqual(os.path.isfile(settings.ORIGINALS_DIR + "/none/none/none-{:07d}.pdf".format(pk)), False) | ||||
|         self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none/none"), False) | ||||
|         self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none"), False) | ||||
|         self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR), True) | ||||
|  | ||||
|     @override_settings(PAPERLESS_FILENAME_FORMAT=None) | ||||
| @@ -355,8 +324,7 @@ class TestDate(TestCase): | ||||
|         Path(os.path.join(tmp, "notempty", "file")).touch() | ||||
|         os.makedirs(os.path.join(tmp, "notempty", "empty")) | ||||
|  | ||||
|         delete_empty_directories( | ||||
|                 os.path.join(tmp, "notempty", "empty")) | ||||
|         delete_empty_directories(os.path.join(tmp, "notempty", "empty")) | ||||
|         self.assertEqual(os.path.isdir(os.path.join(tmp, "notempty")), True) | ||||
|         self.assertEqual(os.path.isfile( | ||||
|             os.path.join(tmp, "notempty", "file")), True) | ||||
|   | ||||
| @@ -1,9 +1,8 @@ | ||||
| from django.core.management.base import CommandError | ||||
| from django.test import TestCase | ||||
|  | ||||
| from ..management.commands.document_importer import Command | ||||
|  | ||||
| from documents.settings import EXPORTER_FILE_NAME | ||||
| from ..management.commands.document_importer import Command | ||||
|  | ||||
|  | ||||
| class TestImporter(TestCase): | ||||
|   | ||||
| @@ -1,6 +1,5 @@ | ||||
| import logging | ||||
| import uuid | ||||
|  | ||||
| from unittest import mock | ||||
|  | ||||
| from django.test import TestCase | ||||
|   | ||||
| @@ -1,10 +1,9 @@ | ||||
| import base64 | ||||
| import os | ||||
| import magic | ||||
|  | ||||
| from hashlib import md5 | ||||
| from unittest import mock | ||||
|  | ||||
| import magic | ||||
| from django.conf import settings | ||||
| from django.test import TestCase | ||||
|  | ||||
|   | ||||
| @@ -1,7 +1,7 @@ | ||||
| from django.test import TestCase | ||||
|  | ||||
| from ..models import Document, Correspondent | ||||
| from .factories import DocumentFactory, CorrespondentFactory | ||||
| from ..models import Document, Correspondent | ||||
|  | ||||
|  | ||||
| class CorrespondentTestCase(TestCase): | ||||
|   | ||||
| @@ -4,11 +4,6 @@ from django.views.decorators.cache import cache_control | ||||
| from django.views.generic import TemplateView | ||||
| from django_filters.rest_framework import DjangoFilterBackend | ||||
| from rest_framework.decorators import action | ||||
| from rest_framework.response import Response | ||||
| from rest_framework.views import APIView | ||||
|  | ||||
| from paperless.db import GnuPG | ||||
| from paperless.views import StandardPagination | ||||
| from rest_framework.filters import OrderingFilter, SearchFilter | ||||
| from rest_framework.mixins import ( | ||||
|     DestroyModelMixin, | ||||
| @@ -17,12 +12,17 @@ from rest_framework.mixins import ( | ||||
|     UpdateModelMixin | ||||
| ) | ||||
| from rest_framework.permissions import IsAuthenticated | ||||
| from rest_framework.response import Response | ||||
| from rest_framework.views import APIView | ||||
| from rest_framework.viewsets import ( | ||||
|     GenericViewSet, | ||||
|     ModelViewSet, | ||||
|     ReadOnlyModelViewSet | ||||
| ) | ||||
|  | ||||
| import documents.index as index | ||||
| from paperless.db import GnuPG | ||||
| from paperless.views import StandardPagination | ||||
| from .filters import ( | ||||
|     CorrespondentFilterSet, | ||||
|     DocumentFilterSet, | ||||
| @@ -30,8 +30,6 @@ from .filters import ( | ||||
|     DocumentTypeFilterSet, | ||||
|     LogFilterSet | ||||
| ) | ||||
|  | ||||
| import documents.index as index | ||||
| from .forms import UploadForm | ||||
| from .models import Correspondent, Document, Log, Tag, DocumentType | ||||
| from .serialisers import ( | ||||
| @@ -106,7 +104,7 @@ class DocumentViewSet(RetrieveModelMixin, | ||||
|         return super(DocumentViewSet, self).destroy(request, *args, **kwargs) | ||||
|  | ||||
|     def file_response(self, pk, disposition): | ||||
|         #TODO: this should not be necessary here. | ||||
|         # TODO: this should not be necessary here. | ||||
|         content_types = { | ||||
|             Document.TYPE_PDF: "application/pdf", | ||||
|             Document.TYPE_PNG: "image/png", | ||||
| @@ -132,7 +130,7 @@ class DocumentViewSet(RetrieveModelMixin, | ||||
|  | ||||
|     @action(methods=['post'], detail=False) | ||||
|     def post_document(self, request, pk=None): | ||||
|         #TODO: is this a good implementation? | ||||
|         # TODO: is this a good implementation? | ||||
|         form = UploadForm(data=request.POST, files=request.FILES) | ||||
|         if form.is_valid(): | ||||
|             form.save() | ||||
|   | ||||
| @@ -11,6 +11,8 @@ writeable_hint = ( | ||||
|     "Set the permissions of {} to be writeable by the user running the " | ||||
|     "Paperless services" | ||||
| ) | ||||
|  | ||||
|  | ||||
| def path_check(env_var): | ||||
|     messages = [] | ||||
|     directory = os.getenv(env_var) | ||||
| @@ -27,6 +29,7 @@ def path_check(env_var): | ||||
|             )) | ||||
|     return messages | ||||
|  | ||||
|  | ||||
| @register() | ||||
| def paths_check(app_configs, **kwargs): | ||||
|     """ | ||||
|   | ||||
| @@ -25,6 +25,7 @@ elif os.path.exists("/usr/local/etc/paperless.conf"): | ||||
| # Tesseract process to one thread. | ||||
| os.environ['OMP_THREAD_LIMIT'] = "1" | ||||
|  | ||||
|  | ||||
| def __get_boolean(key, default="NO"): | ||||
|     """ | ||||
|     Return a boolean value based on whatever the user has supplied in the | ||||
| @@ -32,9 +33,11 @@ def __get_boolean(key, default="NO"): | ||||
|     """ | ||||
|     return bool(os.getenv(key, default).lower() in ("yes", "y", "1", "t", "true")) | ||||
|  | ||||
|  | ||||
| # NEVER RUN WITH DEBUG IN PRODUCTION. | ||||
| DEBUG = __get_boolean("PAPERLESS_DEBUG", "NO") | ||||
|  | ||||
|  | ||||
| ############################################################################### | ||||
| # Directories                                                                 # | ||||
| ############################################################################### | ||||
|   | ||||
| @@ -6,7 +6,6 @@ from django.views.decorators.csrf import csrf_exempt | ||||
| from django.views.generic import RedirectView | ||||
| from rest_framework.routers import DefaultRouter | ||||
|  | ||||
| from paperless.views import FaviconView | ||||
| from documents.views import ( | ||||
|     CorrespondentViewSet, | ||||
|     DocumentViewSet, | ||||
| @@ -18,6 +17,7 @@ from documents.views import ( | ||||
|     SearchAutoCompleteView, | ||||
|     StatisticsView | ||||
| ) | ||||
| from paperless.views import FaviconView | ||||
|  | ||||
| api_router = DefaultRouter() | ||||
| api_router.register(r"correspondents", CorrespondentViewSet) | ||||
| @@ -30,7 +30,7 @@ api_router.register(r"tags", TagViewSet) | ||||
| urlpatterns = [ | ||||
|  | ||||
|     # API | ||||
|     url(r"^api/auth/",include(('rest_framework.urls', 'rest_framework'), namespace="rest_framework")), | ||||
|     url(r"^api/auth/", include(('rest_framework.urls', 'rest_framework'), namespace="rest_framework")), | ||||
|     url(r"^api/search/autocomplete/", SearchAutoCompleteView.as_view(), name="autocomplete"), | ||||
|     url(r"^api/search/", SearchView.as_view(), name="search"), | ||||
|     url(r"^api/statistics/", StatisticsView.as_view(), name="statistics"), | ||||
|   | ||||
| @@ -5,15 +5,14 @@ import subprocess | ||||
| from multiprocessing.pool import Pool | ||||
|  | ||||
| import langdetect | ||||
| import pdftotext | ||||
| import pyocr | ||||
| from django.conf import settings | ||||
| from PIL import Image | ||||
| from django.conf import settings | ||||
| from pyocr import PyocrException | ||||
|  | ||||
| import pdftotext | ||||
| from documents.parsers import DocumentParser, ParseError, run_unpaper, \ | ||||
|     run_convert | ||||
|  | ||||
| from .languages import ISO639 | ||||
|  | ||||
|  | ||||
| @@ -45,8 +44,8 @@ class RasterisedDocumentParser(DocumentParser): | ||||
|                         alpha="remove", | ||||
|                         strip=True, | ||||
|                         trim=True, | ||||
|                         input="{}[0]".format(self.document_path), | ||||
|                         output=out_path, | ||||
|                         input_file="{}[0]".format(self.document_path), | ||||
|                         output_file=out_path, | ||||
|                         logging_group=self.logging_group) | ||||
|         except ParseError: | ||||
|             # if convert fails, fall back to extracting | ||||
| @@ -66,8 +65,8 @@ class RasterisedDocumentParser(DocumentParser): | ||||
|                         alpha="remove", | ||||
|                         strip=True, | ||||
|                         trim=True, | ||||
|                         input=gs_out_path, | ||||
|                         output=out_path, | ||||
|                         input_file=gs_out_path, | ||||
|                         output_file=out_path, | ||||
|                         logging_group=self.logging_group) | ||||
|  | ||||
|         return out_path | ||||
| @@ -99,7 +98,7 @@ class RasterisedDocumentParser(DocumentParser): | ||||
|         try: | ||||
|  | ||||
|             sample_page_index = int(len(images) / 2) | ||||
|             self.log("info", "Attempting language detection on page {} of {}...".format(sample_page_index+1, len(images))) | ||||
|             self.log("info", "Attempting language detection on page {} of {}...".format(sample_page_index + 1, len(images))) | ||||
|             sample_page_text = self._ocr([images[sample_page_index]], settings.OCR_LANGUAGE)[0] | ||||
|             guessed_language = self._guess_language(sample_page_text) | ||||
|  | ||||
| @@ -139,8 +138,8 @@ class RasterisedDocumentParser(DocumentParser): | ||||
|         run_convert(density=settings.CONVERT_DENSITY, | ||||
|                     depth="8", | ||||
|                     type="grayscale", | ||||
|                     input=self.document_path, | ||||
|                     output=pnm, | ||||
|                     input_file=self.document_path, | ||||
|                     output_file=pnm, | ||||
|                     logging_group=self.logging_group) | ||||
|  | ||||
|         # Get a list of converted images | ||||
| @@ -189,7 +188,6 @@ class RasterisedDocumentParser(DocumentParser): | ||||
|             return [sample_page] | ||||
|  | ||||
|  | ||||
|  | ||||
| def strip_excess_whitespace(text): | ||||
|     collapsed_spaces = re.sub(r"([^\S\r\n]+)", " ", text) | ||||
|     no_leading_whitespace = re.sub( | ||||
|   | ||||
| @@ -5,10 +5,10 @@ from unittest import mock | ||||
| from uuid import uuid4 | ||||
|  | ||||
| from dateutil import tz | ||||
| from django.conf import settings | ||||
| from django.test import TestCase, override_settings | ||||
|  | ||||
| from ..parsers import RasterisedDocumentParser | ||||
| from django.conf import settings | ||||
|  | ||||
|  | ||||
| class TestDate(TestCase): | ||||
|   | ||||
| @@ -47,8 +47,8 @@ class TextDocumentParser(DocumentParser): | ||||
|  | ||||
|         def read_text(): | ||||
|             with open(self.document_path, 'r') as src: | ||||
|                 lines = [l.strip() for l in src.readlines()] | ||||
|                 text = "\n".join([l for l in lines[:n_lines]]) | ||||
|                 lines = [line.strip() for line in src.readlines()] | ||||
|                 text = "\n".join([line for line in lines[:n_lines]]) | ||||
|                 return text.replace('"', "'") | ||||
|  | ||||
|         def create_txlayer(): | ||||
|   | ||||
| @@ -1,6 +1,6 @@ | ||||
| [pycodestyle] | ||||
| exclude = migrations, paperless/settings.py, .tox | ||||
|  | ||||
| ignore = E501 | ||||
|  | ||||
| [tool:pytest] | ||||
| DJANGO_SETTINGS_MODULE=paperless.settings | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Jonas Winkler
					Jonas Winkler