code style fixes

This commit is contained in:
Jonas Winkler 2020-11-12 21:09:45 +01:00
parent 9c4cf5d7bd
commit 2e04ba1c04
31 changed files with 110 additions and 149 deletions

View File

@ -1,5 +1,4 @@
from django.contrib import admin from django.contrib import admin
from django.contrib.auth.models import Group, User
from django.utils.html import format_html, format_html_join from django.utils.html import format_html, format_html_join
from django.utils.safestring import mark_safe from django.utils.safestring import mark_safe
from whoosh.writing import AsyncWriter from whoosh.writing import AsyncWriter
@ -52,8 +51,16 @@ class DocumentAdmin(admin.ModelAdmin):
search_fields = ("correspondent__name", "title", "content", "tags__name") search_fields = ("correspondent__name", "title", "content", "tags__name")
readonly_fields = ("added", "file_type", "storage_type", "filename") readonly_fields = ("added", "file_type", "storage_type", "filename")
list_display = ("title", "created", "added", "correspondent", list_display = (
"tags_", "archive_serial_number", "document_type", "filename") "title",
"created",
"added",
"correspondent",
"tags_",
"archive_serial_number",
"document_type",
"filename"
)
list_filter = ( list_filter = (
"document_type", "document_type",
"tags", "tags",

View File

@ -1,5 +1,4 @@
from django.apps import AppConfig from django.apps import AppConfig
from django.db.models.signals import post_delete
class DocumentsConfig(AppConfig): class DocumentsConfig(AppConfig):

View File

@ -3,7 +3,6 @@ import logging
import os import os
import pickle import pickle
import re import re
import time
from sklearn.feature_extraction.text import CountVectorizer from sklearn.feature_extraction.text import CountVectorizer
from sklearn.neural_network import MLPClassifier from sklearn.neural_network import MLPClassifier
@ -64,7 +63,7 @@ class DocumentClassifier(object):
def save_classifier(self): def save_classifier(self):
with open(settings.MODEL_FILE, "wb") as f: with open(settings.MODEL_FILE, "wb") as f:
pickle.dump(self.FORMAT_VERSION, f) # Version pickle.dump(self.FORMAT_VERSION, f)
pickle.dump(self.data_hash, f) pickle.dump(self.data_hash, f)
pickle.dump(self.data_vectorizer, f) pickle.dump(self.data_vectorizer, f)
@ -89,15 +88,13 @@ class DocumentClassifier(object):
data.append(preprocessed_content) data.append(preprocessed_content)
y = -1 y = -1
if doc.document_type: if doc.document_type and doc.document_type.matching_algorithm == MatchingModel.MATCH_AUTO:
if doc.document_type.matching_algorithm == MatchingModel.MATCH_AUTO:
y = doc.document_type.pk y = doc.document_type.pk
m.update(y.to_bytes(4, 'little', signed=True)) m.update(y.to_bytes(4, 'little', signed=True))
labels_document_type.append(y) labels_document_type.append(y)
y = -1 y = -1
if doc.correspondent: if doc.correspondent and doc.correspondent.matching_algorithm == MatchingModel.MATCH_AUTO:
if doc.correspondent.matching_algorithm == MatchingModel.MATCH_AUTO:
y = doc.correspondent.pk y = doc.correspondent.pk
m.update(y.to_bytes(4, 'little', signed=True)) m.update(y.to_bytes(4, 'little', signed=True))
labels_correspondent.append(y) labels_correspondent.append(y)

View File

@ -1,5 +1,4 @@
import os import os
from datetime import datetime from datetime import datetime
from time import mktime from time import mktime
@ -22,7 +21,10 @@ class UploadForm(forms.Form):
def get_filename(self, i=None): def get_filename(self, i=None):
return os.path.join( return os.path.join(
settings.CONSUMPTION_DIR, settings.CONSUMPTION_DIR,
"{}_{}".format(str(i), self.cleaned_data.get("document").name) if i else self.cleaned_data.get("document").name "{}_{}".format(
str(i),
self.cleaned_data.get("document").name
) if i else self.cleaned_data.get("document").name
) )
def save(self): def save(self):

View File

@ -1,8 +1,6 @@
import logging import logging
from contextlib import contextmanager from contextlib import contextmanager
from django.db import models
from django.dispatch import receiver
from whoosh import highlight from whoosh import highlight
from whoosh.fields import Schema, TEXT, NUMERIC from whoosh.fields import Schema, TEXT, NUMERIC
from whoosh.highlight import Formatter, get_text from whoosh.highlight import Formatter, get_text
@ -10,10 +8,8 @@ from whoosh.index import create_in, exists_in, open_dir
from whoosh.qparser import MultifieldParser from whoosh.qparser import MultifieldParser
from whoosh.writing import AsyncWriter from whoosh.writing import AsyncWriter
from documents.models import Document
from paperless import settings from paperless import settings
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)

View File

@ -5,12 +5,11 @@ import os
import re import re
import time import time
import uuid import uuid
from base64 import b64decode from base64 import b64decode
from email import policy from email import policy
from email.parser import BytesParser from email.parser import BytesParser
from dateutil import parser
from dateutil import parser
from django.conf import settings from django.conf import settings
from .models import Correspondent from .models import Correspondent

View File

@ -3,9 +3,8 @@ import os
from django.conf import settings from django.conf import settings
from django.core.management.base import BaseCommand from django.core.management.base import BaseCommand
from watchdog.observers import Observer
from watchdog.events import FileSystemEventHandler from watchdog.events import FileSystemEventHandler
from watchdog.observers import Observer
from documents.consumer import Consumer from documents.consumer import Consumer

View File

@ -1,4 +1,5 @@
from django.core.management.base import BaseCommand from django.core.management.base import BaseCommand
from ...mixins import Renderable from ...mixins import Renderable
from ...tasks import train_classifier from ...tasks import train_classifier

View File

@ -1,16 +1,15 @@
import json import json
import os import os
import time
import shutil import shutil
import time
from django.core.management.base import BaseCommand, CommandError
from django.core import serializers from django.core import serializers
from django.core.management.base import BaseCommand, CommandError
from documents.models import Document, Correspondent, Tag, DocumentType from documents.models import Document, Correspondent, Tag, DocumentType
from paperless.db import GnuPG
from ...mixins import Renderable
from documents.settings import EXPORTER_FILE_NAME, EXPORTER_THUMBNAIL_NAME from documents.settings import EXPORTER_FILE_NAME, EXPORTER_THUMBNAIL_NAME
from paperless.db import GnuPG
from ...mixins import Renderable
class Command(Renderable, BaseCommand): class Command(Renderable, BaseCommand):

View File

@ -3,17 +3,15 @@ import os
import shutil import shutil
from django.conf import settings from django.conf import settings
from django.core.management.base import BaseCommand, CommandError
from django.core.management import call_command from django.core.management import call_command
from django.core.management.base import BaseCommand, CommandError
from documents.models import Document from documents.models import Document
from documents.settings import EXPORTER_FILE_NAME, EXPORTER_THUMBNAIL_NAME
from paperless.db import GnuPG from paperless.db import GnuPG
from ...file_handling import generate_filename, create_source_path_directory from ...file_handling import generate_filename, create_source_path_directory
from ...mixins import Renderable from ...mixins import Renderable
from documents.settings import EXPORTER_FILE_NAME, EXPORTER_THUMBNAIL_NAME
class Command(Renderable, BaseCommand): class Command(Renderable, BaseCommand):

View File

@ -8,5 +8,5 @@ class Command(BaseCommand):
help = "A quick & dirty way to see what's in the logs" help = "A quick & dirty way to see what's in the logs"
def handle(self, *args, **options): def handle(self, *args, **options):
for l in Log.objects.order_by("pk"): for log in Log.objects.order_by("pk"):
print(l) print(log)

View File

@ -1,7 +1,6 @@
from django.core.management.base import BaseCommand from django.core.management.base import BaseCommand
from documents.models import Document, Tag from documents.models import Document
from ...mixins import Renderable from ...mixins import Renderable

View File

@ -9,16 +9,14 @@ def match_correspondents(document_content, classifier):
correspondents = Correspondent.objects.all() correspondents = Correspondent.objects.all()
predicted_correspondent_id = classifier.predict_correspondent(document_content) if classifier else None predicted_correspondent_id = classifier.predict_correspondent(document_content) if classifier else None
matched_correspondents = [o for o in correspondents if matches(o, document_content) or o.pk == predicted_correspondent_id] return [o for o in correspondents if matches(o, document_content) or o.pk == predicted_correspondent_id]
return matched_correspondents
def match_document_types(document_content, classifier): def match_document_types(document_content, classifier):
document_types = DocumentType.objects.all() document_types = DocumentType.objects.all()
predicted_document_type_id = classifier.predict_document_type(document_content) if classifier else None predicted_document_type_id = classifier.predict_document_type(document_content) if classifier else None
matched_document_types = [o for o in document_types if matches(o, document_content) or o.pk == predicted_document_type_id] return [o for o in document_types if matches(o, document_content) or o.pk == predicted_document_type_id]
return matched_document_types
def match_tags(document_content, classifier): def match_tags(document_content, classifier):

View File

@ -22,11 +22,13 @@ from django.utils import timezone
# - MONTH XX, ZZZZ with XX being 1 or 2 and ZZZZ being 4 digits # - MONTH XX, ZZZZ with XX being 1 or 2 and ZZZZ being 4 digits
from documents.signals import document_consumer_declaration from documents.signals import document_consumer_declaration
# TODO: isnt there a date parsing library for this?
DATE_REGEX = re.compile( DATE_REGEX = re.compile(
r'(\b|(?!=([_-])))([0-9]{1,2})[\.\/-]([0-9]{1,2})[\.\/-]([0-9]{4}|[0-9]{2})(\b|(?=([_-])))|' + # NOQA: E501 r'(\b|(?!=([_-])))([0-9]{1,2})[\.\/-]([0-9]{1,2})[\.\/-]([0-9]{4}|[0-9]{2})(\b|(?=([_-])))|' # NOQA: E501
r'(\b|(?!=([_-])))([0-9]{4}|[0-9]{2})[\.\/-]([0-9]{1,2})[\.\/-]([0-9]{1,2})(\b|(?=([_-])))|' + # NOQA: E501 r'(\b|(?!=([_-])))([0-9]{4}|[0-9]{2})[\.\/-]([0-9]{1,2})[\.\/-]([0-9]{1,2})(\b|(?=([_-])))|' # NOQA: E501
r'(\b|(?!=([_-])))([0-9]{1,2}[\. ]+[^ ]{3,9} ([0-9]{4}|[0-9]{2}))(\b|(?=([_-])))|' + # NOQA: E501 r'(\b|(?!=([_-])))([0-9]{1,2}[\. ]+[^ ]{3,9} ([0-9]{4}|[0-9]{2}))(\b|(?=([_-])))|' # NOQA: E501
r'(\b|(?!=([_-])))([^\W\d_]{3,9} [0-9]{1,2}, ([0-9]{4}))(\b|(?=([_-])))|' + r'(\b|(?!=([_-])))([^\W\d_]{3,9} [0-9]{1,2}, ([0-9]{4}))(\b|(?=([_-])))|'
r'(\b|(?!=([_-])))([^\W\d_]{3,9} [0-9]{4})(\b|(?=([_-])))' r'(\b|(?!=([_-])))([^\W\d_]{3,9} [0-9]{4})(\b|(?=([_-])))'
) )
@ -59,7 +61,7 @@ def get_parser_class(doc):
options, key=lambda _: _["weight"], reverse=True)[0]["parser"] options, key=lambda _: _["weight"], reverse=True)[0]["parser"]
def run_convert(input, output, density=None, scale=None, alpha=None, strip=False, trim=False, type=None, depth=None, extra=None, logging_group=None): def run_convert(input_file, output_file, density=None, scale=None, alpha=None, strip=False, trim=False, type=None, depth=None, extra=None, logging_group=None):
environment = os.environ.copy() environment = os.environ.copy()
if settings.CONVERT_MEMORY_LIMIT: if settings.CONVERT_MEMORY_LIMIT:
environment["MAGICK_MEMORY_LIMIT"] = settings.CONVERT_MEMORY_LIMIT environment["MAGICK_MEMORY_LIMIT"] = settings.CONVERT_MEMORY_LIMIT
@ -74,7 +76,7 @@ def run_convert(input, output, density=None, scale=None, alpha=None, strip=False
args += ['-trim'] if trim else [] args += ['-trim'] if trim else []
args += ['-type', str(type)] if type else [] args += ['-type', str(type)] if type else []
args += ['-depth', str(depth)] if depth else [] args += ['-depth', str(depth)] if depth else []
args += [input, output] args += [input_file, output_file]
logger.debug("Execute: " + " ".join(args), extra={'group': logging_group}) logger.debug("Execute: " + " ".join(args), extra={'group': logging_group})

View File

@ -105,7 +105,6 @@ class DocumentSerializer(serializers.ModelSerializer):
class LogSerializer(serializers.ModelSerializer): class LogSerializer(serializers.ModelSerializer):
class Meta: class Meta:
model = Log model = Log
fields = ( fields = (

View File

@ -1,7 +1,6 @@
import logging import logging
from django.conf import settings from django.conf import settings
from django_q.tasks import async_task, result
from whoosh.writing import AsyncWriter from whoosh.writing import AsyncWriter
from documents import index from documents import index

View File

@ -2,9 +2,9 @@ import unittest
from django.test import TestCase from django.test import TestCase
from .factories import DocumentFactory
from ..checks import changed_password_check from ..checks import changed_password_check
from ..models import Document from ..models import Document
from .factories import DocumentFactory
class ChecksTestCase(TestCase): class ChecksTestCase(TestCase):

View File

@ -1,14 +1,13 @@
import os import os
import shutil import shutil
from uuid import uuid4
from pathlib import Path from pathlib import Path
from uuid import uuid4
from django.conf import settings
from django.test import TestCase, override_settings from django.test import TestCase, override_settings
from ..file_handling import generate_filename, create_source_path_directory, delete_empty_directories from ..file_handling import generate_filename, create_source_path_directory, delete_empty_directories
from ..models import Document, Correspondent from ..models import Document, Correspondent
from django.conf import settings
from ..signals.handlers import update_filename_and_move_files from ..signals.handlers import update_filename_and_move_files
@ -68,24 +67,18 @@ class TestDate(TestCase):
# test that creating dirs for the source_path creates the correct directory # test that creating dirs for the source_path creates the correct directory
create_source_path_directory(document.source_path) create_source_path_directory(document.source_path)
Path(document.source_path).touch() Path(document.source_path).touch()
self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none"), True)
"/none"), True)
# Set a correspondent and save the document # Set a correspondent and save the document
document.correspondent = Correspondent.objects.get_or_create( document.correspondent = Correspondent.objects.get_or_create(name="test")[0]
name="test")[0]
document.save() document.save()
# Check proper handling of files # Check proper handling of files
self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/test"), True)
"/test"), True) self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none"), False)
self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + self.assertEqual(os.path.isfile(settings.ORIGINALS_DIR + "/test/test-{:07d}.pdf.gpg".format(document.pk)), True)
"/none"), False)
self.assertEqual(os.path.isfile(settings.ORIGINALS_DIR +
"/test/test-{:07d}.pdf.gpg".format(document.pk)), True)
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/" + @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}")
"{correspondent}")
def test_file_renaming_missing_permissions(self): def test_file_renaming_missing_permissions(self):
document = Document() document = Document()
document.file_type = "pdf" document.file_type = "pdf"
@ -100,27 +93,22 @@ class TestDate(TestCase):
Path(document.source_path).touch() Path(document.source_path).touch()
# Test source_path # Test source_path
self.assertEqual(document.source_path, settings.ORIGINALS_DIR + self.assertEqual(document.source_path, settings.ORIGINALS_DIR + "/none/none-{:07d}.pdf".format(document.pk))
"/none/none-{:07d}.pdf".format(document.pk))
# Make the folder read- and execute-only (no writing and no renaming) # Make the folder read- and execute-only (no writing and no renaming)
os.chmod(settings.ORIGINALS_DIR + "/none", 0o555) os.chmod(settings.ORIGINALS_DIR + "/none", 0o555)
# Set a correspondent and save the document # Set a correspondent and save the document
document.correspondent = Correspondent.objects.get_or_create( document.correspondent = Correspondent.objects.get_or_create(name="test")[0]
name="test")[0]
document.save() document.save()
# Check proper handling of files # Check proper handling of files
self.assertEqual(os.path.isfile(settings.MEDIA_ROOT + "/documents/" + self.assertEqual(os.path.isfile(settings.MEDIA_ROOT + "/documents/originals/none/none-{:07d}.pdf".format(document.pk)), True)
"originals/none/none-{:07d}.pdf".format(document.pk)), True) self.assertEqual(document.filename, "none/none-{:07d}.pdf".format(document.pk))
self.assertEqual(document.filename,
"none/none-{:07d}.pdf".format(document.pk))
os.chmod(settings.ORIGINALS_DIR + "/none", 0o777) os.chmod(settings.ORIGINALS_DIR + "/none", 0o777)
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/" + @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}")
"{correspondent}")
def test_file_renaming_database_error(self): def test_file_renaming_database_error(self):
document1 = Document.objects.create(file_type="pdf", storage_type=Document.STORAGE_TYPE_UNENCRYPTED, checksum="AAAAA") document1 = Document.objects.create(file_type="pdf", storage_type=Document.STORAGE_TYPE_UNENCRYPTED, checksum="AAAAA")
@ -155,13 +143,10 @@ class TestDate(TestCase):
# Check proper handling of files # Check proper handling of files
self.assertTrue(os.path.isfile(document.source_path)) self.assertTrue(os.path.isfile(document.source_path))
self.assertEqual(os.path.isfile(settings.MEDIA_ROOT + "/documents/" + self.assertEqual(os.path.isfile(settings.MEDIA_ROOT + "/documents/originals/none/none-{:07d}.pdf".format(document.pk)), True)
"originals/none/none-{:07d}.pdf".format(document.pk)), True) self.assertEqual(document.filename, "none/none-{:07d}.pdf".format(document.pk))
self.assertEqual(document.filename,
"none/none-{:07d}.pdf".format(document.pk))
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/" + @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}")
"{correspondent}")
def test_document_delete(self): def test_document_delete(self):
document = Document() document = Document()
document.file_type = "pdf" document.file_type = "pdf"
@ -179,13 +164,10 @@ class TestDate(TestCase):
# Ensure file deletion after delete # Ensure file deletion after delete
pk = document.pk pk = document.pk
document.delete() document.delete()
self.assertEqual(os.path.isfile(settings.ORIGINALS_DIR + self.assertEqual(os.path.isfile(settings.ORIGINALS_DIR + "/none/none-{:07d}.pdf".format(pk)), False)
"/none/none-{:07d}.pdf".format(pk)), False) self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none"), False)
self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR +
"/none"), False)
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/" + @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}")
"{correspondent}")
def test_document_delete_nofile(self): def test_document_delete_nofile(self):
document = Document() document = Document()
document.file_type = "pdf" document.file_type = "pdf"
@ -194,8 +176,7 @@ class TestDate(TestCase):
document.delete() document.delete()
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/" + @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}")
"{correspondent}")
def test_directory_not_empty(self): def test_directory_not_empty(self):
document = Document() document = Document()
document.file_type = "pdf" document.file_type = "pdf"
@ -214,18 +195,14 @@ class TestDate(TestCase):
Path(important_file).touch() Path(important_file).touch()
# Set a correspondent and save the document # Set a correspondent and save the document
document.correspondent = Correspondent.objects.get_or_create( document.correspondent = Correspondent.objects.get_or_create(name="test")[0]
name="test")[0]
document.save() document.save()
# Check proper handling of files # Check proper handling of files
self.assertEqual(os.path.isdir(settings.MEDIA_ROOT + self.assertEqual(os.path.isdir(settings.MEDIA_ROOT + "/documents/originals/test"), True)
"/documents/originals/test"), True) self.assertEqual(os.path.isdir(settings.MEDIA_ROOT + "/documents/originals/none"), True)
self.assertEqual(os.path.isdir(settings.MEDIA_ROOT +
"/documents/originals/none"), True)
self.assertTrue(os.path.isfile(important_file)) self.assertTrue(os.path.isfile(important_file))
@override_settings(PAPERLESS_FILENAME_FORMAT="{tags[type]}") @override_settings(PAPERLESS_FILENAME_FORMAT="{tags[type]}")
def test_tags_with_underscore(self): def test_tags_with_underscore(self):
document = Document() document = Document()
@ -304,9 +281,7 @@ class TestDate(TestCase):
self.assertEqual(generate_filename(document), self.assertEqual(generate_filename(document),
"none-{:07d}.pdf".format(document.pk)) "none-{:07d}.pdf".format(document.pk))
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}/{correspondent}")
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/" +
"{correspondent}/{correspondent}")
def test_nested_directory_cleanup(self): def test_nested_directory_cleanup(self):
document = Document() document = Document()
document.file_type = "pdf" document.file_type = "pdf"
@ -315,25 +290,19 @@ class TestDate(TestCase):
# Ensure that filename is properly generated # Ensure that filename is properly generated
document.filename = generate_filename(document) document.filename = generate_filename(document)
self.assertEqual(document.filename, self.assertEqual(document.filename, "none/none/none-{:07d}.pdf".format(document.pk))
"none/none/none-{:07d}.pdf".format(document.pk))
create_source_path_directory(document.source_path) create_source_path_directory(document.source_path)
Path(document.source_path).touch() Path(document.source_path).touch()
# Check proper handling of files # Check proper handling of files
self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none/none"), True)
"/none/none"), True)
pk = document.pk pk = document.pk
document.delete() document.delete()
self.assertEqual(os.path.isfile(settings.ORIGINALS_DIR + self.assertEqual(os.path.isfile(settings.ORIGINALS_DIR + "/none/none/none-{:07d}.pdf".format(pk)), False)
"/none/none/none-{:07d}.pdf".format(pk)), self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none/none"), False)
False) self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none"), False)
self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR +
"/none/none"), False)
self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR +
"/none"), False)
self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR), True) self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR), True)
@override_settings(PAPERLESS_FILENAME_FORMAT=None) @override_settings(PAPERLESS_FILENAME_FORMAT=None)
@ -355,8 +324,7 @@ class TestDate(TestCase):
Path(os.path.join(tmp, "notempty", "file")).touch() Path(os.path.join(tmp, "notempty", "file")).touch()
os.makedirs(os.path.join(tmp, "notempty", "empty")) os.makedirs(os.path.join(tmp, "notempty", "empty"))
delete_empty_directories( delete_empty_directories(os.path.join(tmp, "notempty", "empty"))
os.path.join(tmp, "notempty", "empty"))
self.assertEqual(os.path.isdir(os.path.join(tmp, "notempty")), True) self.assertEqual(os.path.isdir(os.path.join(tmp, "notempty")), True)
self.assertEqual(os.path.isfile( self.assertEqual(os.path.isfile(
os.path.join(tmp, "notempty", "file")), True) os.path.join(tmp, "notempty", "file")), True)

View File

@ -1,9 +1,8 @@
from django.core.management.base import CommandError from django.core.management.base import CommandError
from django.test import TestCase from django.test import TestCase
from ..management.commands.document_importer import Command
from documents.settings import EXPORTER_FILE_NAME from documents.settings import EXPORTER_FILE_NAME
from ..management.commands.document_importer import Command
class TestImporter(TestCase): class TestImporter(TestCase):

View File

@ -1,6 +1,5 @@
import logging import logging
import uuid import uuid
from unittest import mock from unittest import mock
from django.test import TestCase from django.test import TestCase

View File

@ -1,10 +1,9 @@
import base64 import base64
import os import os
import magic
from hashlib import md5 from hashlib import md5
from unittest import mock from unittest import mock
import magic
from django.conf import settings from django.conf import settings
from django.test import TestCase from django.test import TestCase

View File

@ -1,7 +1,7 @@
from django.test import TestCase from django.test import TestCase
from ..models import Document, Correspondent
from .factories import DocumentFactory, CorrespondentFactory from .factories import DocumentFactory, CorrespondentFactory
from ..models import Document, Correspondent
class CorrespondentTestCase(TestCase): class CorrespondentTestCase(TestCase):

View File

@ -4,11 +4,6 @@ from django.views.decorators.cache import cache_control
from django.views.generic import TemplateView from django.views.generic import TemplateView
from django_filters.rest_framework import DjangoFilterBackend from django_filters.rest_framework import DjangoFilterBackend
from rest_framework.decorators import action from rest_framework.decorators import action
from rest_framework.response import Response
from rest_framework.views import APIView
from paperless.db import GnuPG
from paperless.views import StandardPagination
from rest_framework.filters import OrderingFilter, SearchFilter from rest_framework.filters import OrderingFilter, SearchFilter
from rest_framework.mixins import ( from rest_framework.mixins import (
DestroyModelMixin, DestroyModelMixin,
@ -17,12 +12,17 @@ from rest_framework.mixins import (
UpdateModelMixin UpdateModelMixin
) )
from rest_framework.permissions import IsAuthenticated from rest_framework.permissions import IsAuthenticated
from rest_framework.response import Response
from rest_framework.views import APIView
from rest_framework.viewsets import ( from rest_framework.viewsets import (
GenericViewSet, GenericViewSet,
ModelViewSet, ModelViewSet,
ReadOnlyModelViewSet ReadOnlyModelViewSet
) )
import documents.index as index
from paperless.db import GnuPG
from paperless.views import StandardPagination
from .filters import ( from .filters import (
CorrespondentFilterSet, CorrespondentFilterSet,
DocumentFilterSet, DocumentFilterSet,
@ -30,8 +30,6 @@ from .filters import (
DocumentTypeFilterSet, DocumentTypeFilterSet,
LogFilterSet LogFilterSet
) )
import documents.index as index
from .forms import UploadForm from .forms import UploadForm
from .models import Correspondent, Document, Log, Tag, DocumentType from .models import Correspondent, Document, Log, Tag, DocumentType
from .serialisers import ( from .serialisers import (

View File

@ -11,6 +11,8 @@ writeable_hint = (
"Set the permissions of {} to be writeable by the user running the " "Set the permissions of {} to be writeable by the user running the "
"Paperless services" "Paperless services"
) )
def path_check(env_var): def path_check(env_var):
messages = [] messages = []
directory = os.getenv(env_var) directory = os.getenv(env_var)
@ -27,6 +29,7 @@ def path_check(env_var):
)) ))
return messages return messages
@register() @register()
def paths_check(app_configs, **kwargs): def paths_check(app_configs, **kwargs):
""" """

View File

@ -25,6 +25,7 @@ elif os.path.exists("/usr/local/etc/paperless.conf"):
# Tesseract process to one thread. # Tesseract process to one thread.
os.environ['OMP_THREAD_LIMIT'] = "1" os.environ['OMP_THREAD_LIMIT'] = "1"
def __get_boolean(key, default="NO"): def __get_boolean(key, default="NO"):
""" """
Return a boolean value based on whatever the user has supplied in the Return a boolean value based on whatever the user has supplied in the
@ -32,9 +33,11 @@ def __get_boolean(key, default="NO"):
""" """
return bool(os.getenv(key, default).lower() in ("yes", "y", "1", "t", "true")) return bool(os.getenv(key, default).lower() in ("yes", "y", "1", "t", "true"))
# NEVER RUN WITH DEBUG IN PRODUCTION. # NEVER RUN WITH DEBUG IN PRODUCTION.
DEBUG = __get_boolean("PAPERLESS_DEBUG", "NO") DEBUG = __get_boolean("PAPERLESS_DEBUG", "NO")
############################################################################### ###############################################################################
# Directories # # Directories #
############################################################################### ###############################################################################

View File

@ -6,7 +6,6 @@ from django.views.decorators.csrf import csrf_exempt
from django.views.generic import RedirectView from django.views.generic import RedirectView
from rest_framework.routers import DefaultRouter from rest_framework.routers import DefaultRouter
from paperless.views import FaviconView
from documents.views import ( from documents.views import (
CorrespondentViewSet, CorrespondentViewSet,
DocumentViewSet, DocumentViewSet,
@ -18,6 +17,7 @@ from documents.views import (
SearchAutoCompleteView, SearchAutoCompleteView,
StatisticsView StatisticsView
) )
from paperless.views import FaviconView
api_router = DefaultRouter() api_router = DefaultRouter()
api_router.register(r"correspondents", CorrespondentViewSet) api_router.register(r"correspondents", CorrespondentViewSet)

View File

@ -5,15 +5,14 @@ import subprocess
from multiprocessing.pool import Pool from multiprocessing.pool import Pool
import langdetect import langdetect
import pdftotext
import pyocr import pyocr
from django.conf import settings
from PIL import Image from PIL import Image
from django.conf import settings
from pyocr import PyocrException from pyocr import PyocrException
import pdftotext
from documents.parsers import DocumentParser, ParseError, run_unpaper, \ from documents.parsers import DocumentParser, ParseError, run_unpaper, \
run_convert run_convert
from .languages import ISO639 from .languages import ISO639
@ -45,8 +44,8 @@ class RasterisedDocumentParser(DocumentParser):
alpha="remove", alpha="remove",
strip=True, strip=True,
trim=True, trim=True,
input="{}[0]".format(self.document_path), input_file="{}[0]".format(self.document_path),
output=out_path, output_file=out_path,
logging_group=self.logging_group) logging_group=self.logging_group)
except ParseError: except ParseError:
# if convert fails, fall back to extracting # if convert fails, fall back to extracting
@ -66,8 +65,8 @@ class RasterisedDocumentParser(DocumentParser):
alpha="remove", alpha="remove",
strip=True, strip=True,
trim=True, trim=True,
input=gs_out_path, input_file=gs_out_path,
output=out_path, output_file=out_path,
logging_group=self.logging_group) logging_group=self.logging_group)
return out_path return out_path
@ -139,8 +138,8 @@ class RasterisedDocumentParser(DocumentParser):
run_convert(density=settings.CONVERT_DENSITY, run_convert(density=settings.CONVERT_DENSITY,
depth="8", depth="8",
type="grayscale", type="grayscale",
input=self.document_path, input_file=self.document_path,
output=pnm, output_file=pnm,
logging_group=self.logging_group) logging_group=self.logging_group)
# Get a list of converted images # Get a list of converted images
@ -189,7 +188,6 @@ class RasterisedDocumentParser(DocumentParser):
return [sample_page] return [sample_page]
def strip_excess_whitespace(text): def strip_excess_whitespace(text):
collapsed_spaces = re.sub(r"([^\S\r\n]+)", " ", text) collapsed_spaces = re.sub(r"([^\S\r\n]+)", " ", text)
no_leading_whitespace = re.sub( no_leading_whitespace = re.sub(

View File

@ -5,10 +5,10 @@ from unittest import mock
from uuid import uuid4 from uuid import uuid4
from dateutil import tz from dateutil import tz
from django.conf import settings
from django.test import TestCase, override_settings from django.test import TestCase, override_settings
from ..parsers import RasterisedDocumentParser from ..parsers import RasterisedDocumentParser
from django.conf import settings
class TestDate(TestCase): class TestDate(TestCase):

View File

@ -47,8 +47,8 @@ class TextDocumentParser(DocumentParser):
def read_text(): def read_text():
with open(self.document_path, 'r') as src: with open(self.document_path, 'r') as src:
lines = [l.strip() for l in src.readlines()] lines = [line.strip() for line in src.readlines()]
text = "\n".join([l for l in lines[:n_lines]]) text = "\n".join([line for line in lines[:n_lines]])
return text.replace('"', "'") return text.replace('"', "'")
def create_txlayer(): def create_txlayer():

View File

@ -1,6 +1,6 @@
[pycodestyle] [pycodestyle]
exclude = migrations, paperless/settings.py, .tox exclude = migrations, paperless/settings.py, .tox
ignore = E501
[tool:pytest] [tool:pytest]
DJANGO_SETTINGS_MODULE=paperless.settings DJANGO_SETTINGS_MODULE=paperless.settings