mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-04-19 10:19:27 -05:00
code style fixes
This commit is contained in:
parent
9c4cf5d7bd
commit
2e04ba1c04
@ -1,5 +1,4 @@
|
|||||||
from django.contrib import admin
|
from django.contrib import admin
|
||||||
from django.contrib.auth.models import Group, User
|
|
||||||
from django.utils.html import format_html, format_html_join
|
from django.utils.html import format_html, format_html_join
|
||||||
from django.utils.safestring import mark_safe
|
from django.utils.safestring import mark_safe
|
||||||
from whoosh.writing import AsyncWriter
|
from whoosh.writing import AsyncWriter
|
||||||
@ -52,8 +51,16 @@ class DocumentAdmin(admin.ModelAdmin):
|
|||||||
|
|
||||||
search_fields = ("correspondent__name", "title", "content", "tags__name")
|
search_fields = ("correspondent__name", "title", "content", "tags__name")
|
||||||
readonly_fields = ("added", "file_type", "storage_type", "filename")
|
readonly_fields = ("added", "file_type", "storage_type", "filename")
|
||||||
list_display = ("title", "created", "added", "correspondent",
|
list_display = (
|
||||||
"tags_", "archive_serial_number", "document_type", "filename")
|
"title",
|
||||||
|
"created",
|
||||||
|
"added",
|
||||||
|
"correspondent",
|
||||||
|
"tags_",
|
||||||
|
"archive_serial_number",
|
||||||
|
"document_type",
|
||||||
|
"filename"
|
||||||
|
)
|
||||||
list_filter = (
|
list_filter = (
|
||||||
"document_type",
|
"document_type",
|
||||||
"tags",
|
"tags",
|
||||||
|
@ -1,5 +1,4 @@
|
|||||||
from django.apps import AppConfig
|
from django.apps import AppConfig
|
||||||
from django.db.models.signals import post_delete
|
|
||||||
|
|
||||||
|
|
||||||
class DocumentsConfig(AppConfig):
|
class DocumentsConfig(AppConfig):
|
||||||
|
@ -3,7 +3,6 @@ import logging
|
|||||||
import os
|
import os
|
||||||
import pickle
|
import pickle
|
||||||
import re
|
import re
|
||||||
import time
|
|
||||||
|
|
||||||
from sklearn.feature_extraction.text import CountVectorizer
|
from sklearn.feature_extraction.text import CountVectorizer
|
||||||
from sklearn.neural_network import MLPClassifier
|
from sklearn.neural_network import MLPClassifier
|
||||||
@ -64,7 +63,7 @@ class DocumentClassifier(object):
|
|||||||
|
|
||||||
def save_classifier(self):
|
def save_classifier(self):
|
||||||
with open(settings.MODEL_FILE, "wb") as f:
|
with open(settings.MODEL_FILE, "wb") as f:
|
||||||
pickle.dump(self.FORMAT_VERSION, f) # Version
|
pickle.dump(self.FORMAT_VERSION, f)
|
||||||
pickle.dump(self.data_hash, f)
|
pickle.dump(self.data_hash, f)
|
||||||
pickle.dump(self.data_vectorizer, f)
|
pickle.dump(self.data_vectorizer, f)
|
||||||
|
|
||||||
@ -89,15 +88,13 @@ class DocumentClassifier(object):
|
|||||||
data.append(preprocessed_content)
|
data.append(preprocessed_content)
|
||||||
|
|
||||||
y = -1
|
y = -1
|
||||||
if doc.document_type:
|
if doc.document_type and doc.document_type.matching_algorithm == MatchingModel.MATCH_AUTO:
|
||||||
if doc.document_type.matching_algorithm == MatchingModel.MATCH_AUTO:
|
|
||||||
y = doc.document_type.pk
|
y = doc.document_type.pk
|
||||||
m.update(y.to_bytes(4, 'little', signed=True))
|
m.update(y.to_bytes(4, 'little', signed=True))
|
||||||
labels_document_type.append(y)
|
labels_document_type.append(y)
|
||||||
|
|
||||||
y = -1
|
y = -1
|
||||||
if doc.correspondent:
|
if doc.correspondent and doc.correspondent.matching_algorithm == MatchingModel.MATCH_AUTO:
|
||||||
if doc.correspondent.matching_algorithm == MatchingModel.MATCH_AUTO:
|
|
||||||
y = doc.correspondent.pk
|
y = doc.correspondent.pk
|
||||||
m.update(y.to_bytes(4, 'little', signed=True))
|
m.update(y.to_bytes(4, 'little', signed=True))
|
||||||
labels_correspondent.append(y)
|
labels_correspondent.append(y)
|
||||||
|
@ -1,5 +1,4 @@
|
|||||||
import os
|
import os
|
||||||
|
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from time import mktime
|
from time import mktime
|
||||||
|
|
||||||
@ -22,7 +21,10 @@ class UploadForm(forms.Form):
|
|||||||
def get_filename(self, i=None):
|
def get_filename(self, i=None):
|
||||||
return os.path.join(
|
return os.path.join(
|
||||||
settings.CONSUMPTION_DIR,
|
settings.CONSUMPTION_DIR,
|
||||||
"{}_{}".format(str(i), self.cleaned_data.get("document").name) if i else self.cleaned_data.get("document").name
|
"{}_{}".format(
|
||||||
|
str(i),
|
||||||
|
self.cleaned_data.get("document").name
|
||||||
|
) if i else self.cleaned_data.get("document").name
|
||||||
)
|
)
|
||||||
|
|
||||||
def save(self):
|
def save(self):
|
||||||
|
@ -1,8 +1,6 @@
|
|||||||
import logging
|
import logging
|
||||||
from contextlib import contextmanager
|
from contextlib import contextmanager
|
||||||
|
|
||||||
from django.db import models
|
|
||||||
from django.dispatch import receiver
|
|
||||||
from whoosh import highlight
|
from whoosh import highlight
|
||||||
from whoosh.fields import Schema, TEXT, NUMERIC
|
from whoosh.fields import Schema, TEXT, NUMERIC
|
||||||
from whoosh.highlight import Formatter, get_text
|
from whoosh.highlight import Formatter, get_text
|
||||||
@ -10,10 +8,8 @@ from whoosh.index import create_in, exists_in, open_dir
|
|||||||
from whoosh.qparser import MultifieldParser
|
from whoosh.qparser import MultifieldParser
|
||||||
from whoosh.writing import AsyncWriter
|
from whoosh.writing import AsyncWriter
|
||||||
|
|
||||||
from documents.models import Document
|
|
||||||
from paperless import settings
|
from paperless import settings
|
||||||
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
@ -5,12 +5,11 @@ import os
|
|||||||
import re
|
import re
|
||||||
import time
|
import time
|
||||||
import uuid
|
import uuid
|
||||||
|
|
||||||
from base64 import b64decode
|
from base64 import b64decode
|
||||||
from email import policy
|
from email import policy
|
||||||
from email.parser import BytesParser
|
from email.parser import BytesParser
|
||||||
from dateutil import parser
|
|
||||||
|
|
||||||
|
from dateutil import parser
|
||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
|
|
||||||
from .models import Correspondent
|
from .models import Correspondent
|
||||||
|
@ -3,9 +3,8 @@ import os
|
|||||||
|
|
||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
from django.core.management.base import BaseCommand
|
from django.core.management.base import BaseCommand
|
||||||
|
|
||||||
from watchdog.observers import Observer
|
|
||||||
from watchdog.events import FileSystemEventHandler
|
from watchdog.events import FileSystemEventHandler
|
||||||
|
from watchdog.observers import Observer
|
||||||
|
|
||||||
from documents.consumer import Consumer
|
from documents.consumer import Consumer
|
||||||
|
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
from django.core.management.base import BaseCommand
|
from django.core.management.base import BaseCommand
|
||||||
|
|
||||||
from ...mixins import Renderable
|
from ...mixins import Renderable
|
||||||
from ...tasks import train_classifier
|
from ...tasks import train_classifier
|
||||||
|
|
||||||
|
@ -1,16 +1,15 @@
|
|||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
import time
|
|
||||||
import shutil
|
import shutil
|
||||||
|
import time
|
||||||
|
|
||||||
from django.core.management.base import BaseCommand, CommandError
|
|
||||||
from django.core import serializers
|
from django.core import serializers
|
||||||
|
from django.core.management.base import BaseCommand, CommandError
|
||||||
|
|
||||||
from documents.models import Document, Correspondent, Tag, DocumentType
|
from documents.models import Document, Correspondent, Tag, DocumentType
|
||||||
from paperless.db import GnuPG
|
|
||||||
|
|
||||||
from ...mixins import Renderable
|
|
||||||
from documents.settings import EXPORTER_FILE_NAME, EXPORTER_THUMBNAIL_NAME
|
from documents.settings import EXPORTER_FILE_NAME, EXPORTER_THUMBNAIL_NAME
|
||||||
|
from paperless.db import GnuPG
|
||||||
|
from ...mixins import Renderable
|
||||||
|
|
||||||
|
|
||||||
class Command(Renderable, BaseCommand):
|
class Command(Renderable, BaseCommand):
|
||||||
|
@ -3,17 +3,15 @@ import os
|
|||||||
import shutil
|
import shutil
|
||||||
|
|
||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
from django.core.management.base import BaseCommand, CommandError
|
|
||||||
from django.core.management import call_command
|
from django.core.management import call_command
|
||||||
|
from django.core.management.base import BaseCommand, CommandError
|
||||||
|
|
||||||
from documents.models import Document
|
from documents.models import Document
|
||||||
|
from documents.settings import EXPORTER_FILE_NAME, EXPORTER_THUMBNAIL_NAME
|
||||||
from paperless.db import GnuPG
|
from paperless.db import GnuPG
|
||||||
from ...file_handling import generate_filename, create_source_path_directory
|
from ...file_handling import generate_filename, create_source_path_directory
|
||||||
|
|
||||||
from ...mixins import Renderable
|
from ...mixins import Renderable
|
||||||
|
|
||||||
from documents.settings import EXPORTER_FILE_NAME, EXPORTER_THUMBNAIL_NAME
|
|
||||||
|
|
||||||
|
|
||||||
class Command(Renderable, BaseCommand):
|
class Command(Renderable, BaseCommand):
|
||||||
|
|
||||||
|
@ -8,5 +8,5 @@ class Command(BaseCommand):
|
|||||||
help = "A quick & dirty way to see what's in the logs"
|
help = "A quick & dirty way to see what's in the logs"
|
||||||
|
|
||||||
def handle(self, *args, **options):
|
def handle(self, *args, **options):
|
||||||
for l in Log.objects.order_by("pk"):
|
for log in Log.objects.order_by("pk"):
|
||||||
print(l)
|
print(log)
|
||||||
|
@ -1,7 +1,6 @@
|
|||||||
from django.core.management.base import BaseCommand
|
from django.core.management.base import BaseCommand
|
||||||
|
|
||||||
from documents.models import Document, Tag
|
from documents.models import Document
|
||||||
|
|
||||||
from ...mixins import Renderable
|
from ...mixins import Renderable
|
||||||
|
|
||||||
|
|
||||||
|
@ -9,16 +9,14 @@ def match_correspondents(document_content, classifier):
|
|||||||
correspondents = Correspondent.objects.all()
|
correspondents = Correspondent.objects.all()
|
||||||
predicted_correspondent_id = classifier.predict_correspondent(document_content) if classifier else None
|
predicted_correspondent_id = classifier.predict_correspondent(document_content) if classifier else None
|
||||||
|
|
||||||
matched_correspondents = [o for o in correspondents if matches(o, document_content) or o.pk == predicted_correspondent_id]
|
return [o for o in correspondents if matches(o, document_content) or o.pk == predicted_correspondent_id]
|
||||||
return matched_correspondents
|
|
||||||
|
|
||||||
|
|
||||||
def match_document_types(document_content, classifier):
|
def match_document_types(document_content, classifier):
|
||||||
document_types = DocumentType.objects.all()
|
document_types = DocumentType.objects.all()
|
||||||
predicted_document_type_id = classifier.predict_document_type(document_content) if classifier else None
|
predicted_document_type_id = classifier.predict_document_type(document_content) if classifier else None
|
||||||
|
|
||||||
matched_document_types = [o for o in document_types if matches(o, document_content) or o.pk == predicted_document_type_id]
|
return [o for o in document_types if matches(o, document_content) or o.pk == predicted_document_type_id]
|
||||||
return matched_document_types
|
|
||||||
|
|
||||||
|
|
||||||
def match_tags(document_content, classifier):
|
def match_tags(document_content, classifier):
|
||||||
|
@ -22,11 +22,13 @@ from django.utils import timezone
|
|||||||
# - MONTH XX, ZZZZ with XX being 1 or 2 and ZZZZ being 4 digits
|
# - MONTH XX, ZZZZ with XX being 1 or 2 and ZZZZ being 4 digits
|
||||||
from documents.signals import document_consumer_declaration
|
from documents.signals import document_consumer_declaration
|
||||||
|
|
||||||
|
# TODO: isnt there a date parsing library for this?
|
||||||
|
|
||||||
DATE_REGEX = re.compile(
|
DATE_REGEX = re.compile(
|
||||||
r'(\b|(?!=([_-])))([0-9]{1,2})[\.\/-]([0-9]{1,2})[\.\/-]([0-9]{4}|[0-9]{2})(\b|(?=([_-])))|' + # NOQA: E501
|
r'(\b|(?!=([_-])))([0-9]{1,2})[\.\/-]([0-9]{1,2})[\.\/-]([0-9]{4}|[0-9]{2})(\b|(?=([_-])))|' # NOQA: E501
|
||||||
r'(\b|(?!=([_-])))([0-9]{4}|[0-9]{2})[\.\/-]([0-9]{1,2})[\.\/-]([0-9]{1,2})(\b|(?=([_-])))|' + # NOQA: E501
|
r'(\b|(?!=([_-])))([0-9]{4}|[0-9]{2})[\.\/-]([0-9]{1,2})[\.\/-]([0-9]{1,2})(\b|(?=([_-])))|' # NOQA: E501
|
||||||
r'(\b|(?!=([_-])))([0-9]{1,2}[\. ]+[^ ]{3,9} ([0-9]{4}|[0-9]{2}))(\b|(?=([_-])))|' + # NOQA: E501
|
r'(\b|(?!=([_-])))([0-9]{1,2}[\. ]+[^ ]{3,9} ([0-9]{4}|[0-9]{2}))(\b|(?=([_-])))|' # NOQA: E501
|
||||||
r'(\b|(?!=([_-])))([^\W\d_]{3,9} [0-9]{1,2}, ([0-9]{4}))(\b|(?=([_-])))|' +
|
r'(\b|(?!=([_-])))([^\W\d_]{3,9} [0-9]{1,2}, ([0-9]{4}))(\b|(?=([_-])))|'
|
||||||
r'(\b|(?!=([_-])))([^\W\d_]{3,9} [0-9]{4})(\b|(?=([_-])))'
|
r'(\b|(?!=([_-])))([^\W\d_]{3,9} [0-9]{4})(\b|(?=([_-])))'
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -59,7 +61,7 @@ def get_parser_class(doc):
|
|||||||
options, key=lambda _: _["weight"], reverse=True)[0]["parser"]
|
options, key=lambda _: _["weight"], reverse=True)[0]["parser"]
|
||||||
|
|
||||||
|
|
||||||
def run_convert(input, output, density=None, scale=None, alpha=None, strip=False, trim=False, type=None, depth=None, extra=None, logging_group=None):
|
def run_convert(input_file, output_file, density=None, scale=None, alpha=None, strip=False, trim=False, type=None, depth=None, extra=None, logging_group=None):
|
||||||
environment = os.environ.copy()
|
environment = os.environ.copy()
|
||||||
if settings.CONVERT_MEMORY_LIMIT:
|
if settings.CONVERT_MEMORY_LIMIT:
|
||||||
environment["MAGICK_MEMORY_LIMIT"] = settings.CONVERT_MEMORY_LIMIT
|
environment["MAGICK_MEMORY_LIMIT"] = settings.CONVERT_MEMORY_LIMIT
|
||||||
@ -74,7 +76,7 @@ def run_convert(input, output, density=None, scale=None, alpha=None, strip=False
|
|||||||
args += ['-trim'] if trim else []
|
args += ['-trim'] if trim else []
|
||||||
args += ['-type', str(type)] if type else []
|
args += ['-type', str(type)] if type else []
|
||||||
args += ['-depth', str(depth)] if depth else []
|
args += ['-depth', str(depth)] if depth else []
|
||||||
args += [input, output]
|
args += [input_file, output_file]
|
||||||
|
|
||||||
logger.debug("Execute: " + " ".join(args), extra={'group': logging_group})
|
logger.debug("Execute: " + " ".join(args), extra={'group': logging_group})
|
||||||
|
|
||||||
|
@ -105,7 +105,6 @@ class DocumentSerializer(serializers.ModelSerializer):
|
|||||||
|
|
||||||
class LogSerializer(serializers.ModelSerializer):
|
class LogSerializer(serializers.ModelSerializer):
|
||||||
|
|
||||||
|
|
||||||
class Meta:
|
class Meta:
|
||||||
model = Log
|
model = Log
|
||||||
fields = (
|
fields = (
|
||||||
|
@ -1,7 +1,6 @@
|
|||||||
import logging
|
import logging
|
||||||
|
|
||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
from django_q.tasks import async_task, result
|
|
||||||
from whoosh.writing import AsyncWriter
|
from whoosh.writing import AsyncWriter
|
||||||
|
|
||||||
from documents import index
|
from documents import index
|
||||||
|
@ -2,9 +2,9 @@ import unittest
|
|||||||
|
|
||||||
from django.test import TestCase
|
from django.test import TestCase
|
||||||
|
|
||||||
|
from .factories import DocumentFactory
|
||||||
from ..checks import changed_password_check
|
from ..checks import changed_password_check
|
||||||
from ..models import Document
|
from ..models import Document
|
||||||
from .factories import DocumentFactory
|
|
||||||
|
|
||||||
|
|
||||||
class ChecksTestCase(TestCase):
|
class ChecksTestCase(TestCase):
|
||||||
|
@ -1,14 +1,13 @@
|
|||||||
import os
|
import os
|
||||||
import shutil
|
import shutil
|
||||||
from uuid import uuid4
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
from uuid import uuid4
|
||||||
|
|
||||||
|
from django.conf import settings
|
||||||
from django.test import TestCase, override_settings
|
from django.test import TestCase, override_settings
|
||||||
|
|
||||||
from ..file_handling import generate_filename, create_source_path_directory, delete_empty_directories
|
from ..file_handling import generate_filename, create_source_path_directory, delete_empty_directories
|
||||||
from ..models import Document, Correspondent
|
from ..models import Document, Correspondent
|
||||||
from django.conf import settings
|
|
||||||
|
|
||||||
from ..signals.handlers import update_filename_and_move_files
|
from ..signals.handlers import update_filename_and_move_files
|
||||||
|
|
||||||
|
|
||||||
@ -68,24 +67,18 @@ class TestDate(TestCase):
|
|||||||
# test that creating dirs for the source_path creates the correct directory
|
# test that creating dirs for the source_path creates the correct directory
|
||||||
create_source_path_directory(document.source_path)
|
create_source_path_directory(document.source_path)
|
||||||
Path(document.source_path).touch()
|
Path(document.source_path).touch()
|
||||||
self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR +
|
self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none"), True)
|
||||||
"/none"), True)
|
|
||||||
|
|
||||||
# Set a correspondent and save the document
|
# Set a correspondent and save the document
|
||||||
document.correspondent = Correspondent.objects.get_or_create(
|
document.correspondent = Correspondent.objects.get_or_create(name="test")[0]
|
||||||
name="test")[0]
|
|
||||||
document.save()
|
document.save()
|
||||||
|
|
||||||
# Check proper handling of files
|
# Check proper handling of files
|
||||||
self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR +
|
self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/test"), True)
|
||||||
"/test"), True)
|
self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none"), False)
|
||||||
self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR +
|
self.assertEqual(os.path.isfile(settings.ORIGINALS_DIR + "/test/test-{:07d}.pdf.gpg".format(document.pk)), True)
|
||||||
"/none"), False)
|
|
||||||
self.assertEqual(os.path.isfile(settings.ORIGINALS_DIR +
|
|
||||||
"/test/test-{:07d}.pdf.gpg".format(document.pk)), True)
|
|
||||||
|
|
||||||
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/" +
|
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}")
|
||||||
"{correspondent}")
|
|
||||||
def test_file_renaming_missing_permissions(self):
|
def test_file_renaming_missing_permissions(self):
|
||||||
document = Document()
|
document = Document()
|
||||||
document.file_type = "pdf"
|
document.file_type = "pdf"
|
||||||
@ -100,27 +93,22 @@ class TestDate(TestCase):
|
|||||||
Path(document.source_path).touch()
|
Path(document.source_path).touch()
|
||||||
|
|
||||||
# Test source_path
|
# Test source_path
|
||||||
self.assertEqual(document.source_path, settings.ORIGINALS_DIR +
|
self.assertEqual(document.source_path, settings.ORIGINALS_DIR + "/none/none-{:07d}.pdf".format(document.pk))
|
||||||
"/none/none-{:07d}.pdf".format(document.pk))
|
|
||||||
|
|
||||||
# Make the folder read- and execute-only (no writing and no renaming)
|
# Make the folder read- and execute-only (no writing and no renaming)
|
||||||
os.chmod(settings.ORIGINALS_DIR + "/none", 0o555)
|
os.chmod(settings.ORIGINALS_DIR + "/none", 0o555)
|
||||||
|
|
||||||
# Set a correspondent and save the document
|
# Set a correspondent and save the document
|
||||||
document.correspondent = Correspondent.objects.get_or_create(
|
document.correspondent = Correspondent.objects.get_or_create(name="test")[0]
|
||||||
name="test")[0]
|
|
||||||
document.save()
|
document.save()
|
||||||
|
|
||||||
# Check proper handling of files
|
# Check proper handling of files
|
||||||
self.assertEqual(os.path.isfile(settings.MEDIA_ROOT + "/documents/" +
|
self.assertEqual(os.path.isfile(settings.MEDIA_ROOT + "/documents/originals/none/none-{:07d}.pdf".format(document.pk)), True)
|
||||||
"originals/none/none-{:07d}.pdf".format(document.pk)), True)
|
self.assertEqual(document.filename, "none/none-{:07d}.pdf".format(document.pk))
|
||||||
self.assertEqual(document.filename,
|
|
||||||
"none/none-{:07d}.pdf".format(document.pk))
|
|
||||||
|
|
||||||
os.chmod(settings.ORIGINALS_DIR + "/none", 0o777)
|
os.chmod(settings.ORIGINALS_DIR + "/none", 0o777)
|
||||||
|
|
||||||
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/" +
|
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}")
|
||||||
"{correspondent}")
|
|
||||||
def test_file_renaming_database_error(self):
|
def test_file_renaming_database_error(self):
|
||||||
|
|
||||||
document1 = Document.objects.create(file_type="pdf", storage_type=Document.STORAGE_TYPE_UNENCRYPTED, checksum="AAAAA")
|
document1 = Document.objects.create(file_type="pdf", storage_type=Document.STORAGE_TYPE_UNENCRYPTED, checksum="AAAAA")
|
||||||
@ -155,13 +143,10 @@ class TestDate(TestCase):
|
|||||||
|
|
||||||
# Check proper handling of files
|
# Check proper handling of files
|
||||||
self.assertTrue(os.path.isfile(document.source_path))
|
self.assertTrue(os.path.isfile(document.source_path))
|
||||||
self.assertEqual(os.path.isfile(settings.MEDIA_ROOT + "/documents/" +
|
self.assertEqual(os.path.isfile(settings.MEDIA_ROOT + "/documents/originals/none/none-{:07d}.pdf".format(document.pk)), True)
|
||||||
"originals/none/none-{:07d}.pdf".format(document.pk)), True)
|
self.assertEqual(document.filename, "none/none-{:07d}.pdf".format(document.pk))
|
||||||
self.assertEqual(document.filename,
|
|
||||||
"none/none-{:07d}.pdf".format(document.pk))
|
|
||||||
|
|
||||||
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/" +
|
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}")
|
||||||
"{correspondent}")
|
|
||||||
def test_document_delete(self):
|
def test_document_delete(self):
|
||||||
document = Document()
|
document = Document()
|
||||||
document.file_type = "pdf"
|
document.file_type = "pdf"
|
||||||
@ -179,13 +164,10 @@ class TestDate(TestCase):
|
|||||||
# Ensure file deletion after delete
|
# Ensure file deletion after delete
|
||||||
pk = document.pk
|
pk = document.pk
|
||||||
document.delete()
|
document.delete()
|
||||||
self.assertEqual(os.path.isfile(settings.ORIGINALS_DIR +
|
self.assertEqual(os.path.isfile(settings.ORIGINALS_DIR + "/none/none-{:07d}.pdf".format(pk)), False)
|
||||||
"/none/none-{:07d}.pdf".format(pk)), False)
|
self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none"), False)
|
||||||
self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR +
|
|
||||||
"/none"), False)
|
|
||||||
|
|
||||||
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/" +
|
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}")
|
||||||
"{correspondent}")
|
|
||||||
def test_document_delete_nofile(self):
|
def test_document_delete_nofile(self):
|
||||||
document = Document()
|
document = Document()
|
||||||
document.file_type = "pdf"
|
document.file_type = "pdf"
|
||||||
@ -194,8 +176,7 @@ class TestDate(TestCase):
|
|||||||
|
|
||||||
document.delete()
|
document.delete()
|
||||||
|
|
||||||
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/" +
|
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}")
|
||||||
"{correspondent}")
|
|
||||||
def test_directory_not_empty(self):
|
def test_directory_not_empty(self):
|
||||||
document = Document()
|
document = Document()
|
||||||
document.file_type = "pdf"
|
document.file_type = "pdf"
|
||||||
@ -214,18 +195,14 @@ class TestDate(TestCase):
|
|||||||
Path(important_file).touch()
|
Path(important_file).touch()
|
||||||
|
|
||||||
# Set a correspondent and save the document
|
# Set a correspondent and save the document
|
||||||
document.correspondent = Correspondent.objects.get_or_create(
|
document.correspondent = Correspondent.objects.get_or_create(name="test")[0]
|
||||||
name="test")[0]
|
|
||||||
document.save()
|
document.save()
|
||||||
|
|
||||||
# Check proper handling of files
|
# Check proper handling of files
|
||||||
self.assertEqual(os.path.isdir(settings.MEDIA_ROOT +
|
self.assertEqual(os.path.isdir(settings.MEDIA_ROOT + "/documents/originals/test"), True)
|
||||||
"/documents/originals/test"), True)
|
self.assertEqual(os.path.isdir(settings.MEDIA_ROOT + "/documents/originals/none"), True)
|
||||||
self.assertEqual(os.path.isdir(settings.MEDIA_ROOT +
|
|
||||||
"/documents/originals/none"), True)
|
|
||||||
self.assertTrue(os.path.isfile(important_file))
|
self.assertTrue(os.path.isfile(important_file))
|
||||||
|
|
||||||
|
|
||||||
@override_settings(PAPERLESS_FILENAME_FORMAT="{tags[type]}")
|
@override_settings(PAPERLESS_FILENAME_FORMAT="{tags[type]}")
|
||||||
def test_tags_with_underscore(self):
|
def test_tags_with_underscore(self):
|
||||||
document = Document()
|
document = Document()
|
||||||
@ -304,9 +281,7 @@ class TestDate(TestCase):
|
|||||||
self.assertEqual(generate_filename(document),
|
self.assertEqual(generate_filename(document),
|
||||||
"none-{:07d}.pdf".format(document.pk))
|
"none-{:07d}.pdf".format(document.pk))
|
||||||
|
|
||||||
|
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}/{correspondent}")
|
||||||
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/" +
|
|
||||||
"{correspondent}/{correspondent}")
|
|
||||||
def test_nested_directory_cleanup(self):
|
def test_nested_directory_cleanup(self):
|
||||||
document = Document()
|
document = Document()
|
||||||
document.file_type = "pdf"
|
document.file_type = "pdf"
|
||||||
@ -315,25 +290,19 @@ class TestDate(TestCase):
|
|||||||
|
|
||||||
# Ensure that filename is properly generated
|
# Ensure that filename is properly generated
|
||||||
document.filename = generate_filename(document)
|
document.filename = generate_filename(document)
|
||||||
self.assertEqual(document.filename,
|
self.assertEqual(document.filename, "none/none/none-{:07d}.pdf".format(document.pk))
|
||||||
"none/none/none-{:07d}.pdf".format(document.pk))
|
|
||||||
create_source_path_directory(document.source_path)
|
create_source_path_directory(document.source_path)
|
||||||
Path(document.source_path).touch()
|
Path(document.source_path).touch()
|
||||||
|
|
||||||
# Check proper handling of files
|
# Check proper handling of files
|
||||||
self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR +
|
self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none/none"), True)
|
||||||
"/none/none"), True)
|
|
||||||
|
|
||||||
pk = document.pk
|
pk = document.pk
|
||||||
document.delete()
|
document.delete()
|
||||||
|
|
||||||
self.assertEqual(os.path.isfile(settings.ORIGINALS_DIR +
|
self.assertEqual(os.path.isfile(settings.ORIGINALS_DIR + "/none/none/none-{:07d}.pdf".format(pk)), False)
|
||||||
"/none/none/none-{:07d}.pdf".format(pk)),
|
self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none/none"), False)
|
||||||
False)
|
self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none"), False)
|
||||||
self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR +
|
|
||||||
"/none/none"), False)
|
|
||||||
self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR +
|
|
||||||
"/none"), False)
|
|
||||||
self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR), True)
|
self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR), True)
|
||||||
|
|
||||||
@override_settings(PAPERLESS_FILENAME_FORMAT=None)
|
@override_settings(PAPERLESS_FILENAME_FORMAT=None)
|
||||||
@ -355,8 +324,7 @@ class TestDate(TestCase):
|
|||||||
Path(os.path.join(tmp, "notempty", "file")).touch()
|
Path(os.path.join(tmp, "notempty", "file")).touch()
|
||||||
os.makedirs(os.path.join(tmp, "notempty", "empty"))
|
os.makedirs(os.path.join(tmp, "notempty", "empty"))
|
||||||
|
|
||||||
delete_empty_directories(
|
delete_empty_directories(os.path.join(tmp, "notempty", "empty"))
|
||||||
os.path.join(tmp, "notempty", "empty"))
|
|
||||||
self.assertEqual(os.path.isdir(os.path.join(tmp, "notempty")), True)
|
self.assertEqual(os.path.isdir(os.path.join(tmp, "notempty")), True)
|
||||||
self.assertEqual(os.path.isfile(
|
self.assertEqual(os.path.isfile(
|
||||||
os.path.join(tmp, "notempty", "file")), True)
|
os.path.join(tmp, "notempty", "file")), True)
|
||||||
|
@ -1,9 +1,8 @@
|
|||||||
from django.core.management.base import CommandError
|
from django.core.management.base import CommandError
|
||||||
from django.test import TestCase
|
from django.test import TestCase
|
||||||
|
|
||||||
from ..management.commands.document_importer import Command
|
|
||||||
|
|
||||||
from documents.settings import EXPORTER_FILE_NAME
|
from documents.settings import EXPORTER_FILE_NAME
|
||||||
|
from ..management.commands.document_importer import Command
|
||||||
|
|
||||||
|
|
||||||
class TestImporter(TestCase):
|
class TestImporter(TestCase):
|
||||||
|
@ -1,6 +1,5 @@
|
|||||||
import logging
|
import logging
|
||||||
import uuid
|
import uuid
|
||||||
|
|
||||||
from unittest import mock
|
from unittest import mock
|
||||||
|
|
||||||
from django.test import TestCase
|
from django.test import TestCase
|
||||||
|
@ -1,10 +1,9 @@
|
|||||||
import base64
|
import base64
|
||||||
import os
|
import os
|
||||||
import magic
|
|
||||||
|
|
||||||
from hashlib import md5
|
from hashlib import md5
|
||||||
from unittest import mock
|
from unittest import mock
|
||||||
|
|
||||||
|
import magic
|
||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
from django.test import TestCase
|
from django.test import TestCase
|
||||||
|
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
from django.test import TestCase
|
from django.test import TestCase
|
||||||
|
|
||||||
from ..models import Document, Correspondent
|
|
||||||
from .factories import DocumentFactory, CorrespondentFactory
|
from .factories import DocumentFactory, CorrespondentFactory
|
||||||
|
from ..models import Document, Correspondent
|
||||||
|
|
||||||
|
|
||||||
class CorrespondentTestCase(TestCase):
|
class CorrespondentTestCase(TestCase):
|
||||||
|
@ -4,11 +4,6 @@ from django.views.decorators.cache import cache_control
|
|||||||
from django.views.generic import TemplateView
|
from django.views.generic import TemplateView
|
||||||
from django_filters.rest_framework import DjangoFilterBackend
|
from django_filters.rest_framework import DjangoFilterBackend
|
||||||
from rest_framework.decorators import action
|
from rest_framework.decorators import action
|
||||||
from rest_framework.response import Response
|
|
||||||
from rest_framework.views import APIView
|
|
||||||
|
|
||||||
from paperless.db import GnuPG
|
|
||||||
from paperless.views import StandardPagination
|
|
||||||
from rest_framework.filters import OrderingFilter, SearchFilter
|
from rest_framework.filters import OrderingFilter, SearchFilter
|
||||||
from rest_framework.mixins import (
|
from rest_framework.mixins import (
|
||||||
DestroyModelMixin,
|
DestroyModelMixin,
|
||||||
@ -17,12 +12,17 @@ from rest_framework.mixins import (
|
|||||||
UpdateModelMixin
|
UpdateModelMixin
|
||||||
)
|
)
|
||||||
from rest_framework.permissions import IsAuthenticated
|
from rest_framework.permissions import IsAuthenticated
|
||||||
|
from rest_framework.response import Response
|
||||||
|
from rest_framework.views import APIView
|
||||||
from rest_framework.viewsets import (
|
from rest_framework.viewsets import (
|
||||||
GenericViewSet,
|
GenericViewSet,
|
||||||
ModelViewSet,
|
ModelViewSet,
|
||||||
ReadOnlyModelViewSet
|
ReadOnlyModelViewSet
|
||||||
)
|
)
|
||||||
|
|
||||||
|
import documents.index as index
|
||||||
|
from paperless.db import GnuPG
|
||||||
|
from paperless.views import StandardPagination
|
||||||
from .filters import (
|
from .filters import (
|
||||||
CorrespondentFilterSet,
|
CorrespondentFilterSet,
|
||||||
DocumentFilterSet,
|
DocumentFilterSet,
|
||||||
@ -30,8 +30,6 @@ from .filters import (
|
|||||||
DocumentTypeFilterSet,
|
DocumentTypeFilterSet,
|
||||||
LogFilterSet
|
LogFilterSet
|
||||||
)
|
)
|
||||||
|
|
||||||
import documents.index as index
|
|
||||||
from .forms import UploadForm
|
from .forms import UploadForm
|
||||||
from .models import Correspondent, Document, Log, Tag, DocumentType
|
from .models import Correspondent, Document, Log, Tag, DocumentType
|
||||||
from .serialisers import (
|
from .serialisers import (
|
||||||
|
@ -11,6 +11,8 @@ writeable_hint = (
|
|||||||
"Set the permissions of {} to be writeable by the user running the "
|
"Set the permissions of {} to be writeable by the user running the "
|
||||||
"Paperless services"
|
"Paperless services"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def path_check(env_var):
|
def path_check(env_var):
|
||||||
messages = []
|
messages = []
|
||||||
directory = os.getenv(env_var)
|
directory = os.getenv(env_var)
|
||||||
@ -27,6 +29,7 @@ def path_check(env_var):
|
|||||||
))
|
))
|
||||||
return messages
|
return messages
|
||||||
|
|
||||||
|
|
||||||
@register()
|
@register()
|
||||||
def paths_check(app_configs, **kwargs):
|
def paths_check(app_configs, **kwargs):
|
||||||
"""
|
"""
|
||||||
|
@ -25,6 +25,7 @@ elif os.path.exists("/usr/local/etc/paperless.conf"):
|
|||||||
# Tesseract process to one thread.
|
# Tesseract process to one thread.
|
||||||
os.environ['OMP_THREAD_LIMIT'] = "1"
|
os.environ['OMP_THREAD_LIMIT'] = "1"
|
||||||
|
|
||||||
|
|
||||||
def __get_boolean(key, default="NO"):
|
def __get_boolean(key, default="NO"):
|
||||||
"""
|
"""
|
||||||
Return a boolean value based on whatever the user has supplied in the
|
Return a boolean value based on whatever the user has supplied in the
|
||||||
@ -32,9 +33,11 @@ def __get_boolean(key, default="NO"):
|
|||||||
"""
|
"""
|
||||||
return bool(os.getenv(key, default).lower() in ("yes", "y", "1", "t", "true"))
|
return bool(os.getenv(key, default).lower() in ("yes", "y", "1", "t", "true"))
|
||||||
|
|
||||||
|
|
||||||
# NEVER RUN WITH DEBUG IN PRODUCTION.
|
# NEVER RUN WITH DEBUG IN PRODUCTION.
|
||||||
DEBUG = __get_boolean("PAPERLESS_DEBUG", "NO")
|
DEBUG = __get_boolean("PAPERLESS_DEBUG", "NO")
|
||||||
|
|
||||||
|
|
||||||
###############################################################################
|
###############################################################################
|
||||||
# Directories #
|
# Directories #
|
||||||
###############################################################################
|
###############################################################################
|
||||||
|
@ -6,7 +6,6 @@ from django.views.decorators.csrf import csrf_exempt
|
|||||||
from django.views.generic import RedirectView
|
from django.views.generic import RedirectView
|
||||||
from rest_framework.routers import DefaultRouter
|
from rest_framework.routers import DefaultRouter
|
||||||
|
|
||||||
from paperless.views import FaviconView
|
|
||||||
from documents.views import (
|
from documents.views import (
|
||||||
CorrespondentViewSet,
|
CorrespondentViewSet,
|
||||||
DocumentViewSet,
|
DocumentViewSet,
|
||||||
@ -18,6 +17,7 @@ from documents.views import (
|
|||||||
SearchAutoCompleteView,
|
SearchAutoCompleteView,
|
||||||
StatisticsView
|
StatisticsView
|
||||||
)
|
)
|
||||||
|
from paperless.views import FaviconView
|
||||||
|
|
||||||
api_router = DefaultRouter()
|
api_router = DefaultRouter()
|
||||||
api_router.register(r"correspondents", CorrespondentViewSet)
|
api_router.register(r"correspondents", CorrespondentViewSet)
|
||||||
|
@ -5,15 +5,14 @@ import subprocess
|
|||||||
from multiprocessing.pool import Pool
|
from multiprocessing.pool import Pool
|
||||||
|
|
||||||
import langdetect
|
import langdetect
|
||||||
|
import pdftotext
|
||||||
import pyocr
|
import pyocr
|
||||||
from django.conf import settings
|
|
||||||
from PIL import Image
|
from PIL import Image
|
||||||
|
from django.conf import settings
|
||||||
from pyocr import PyocrException
|
from pyocr import PyocrException
|
||||||
|
|
||||||
import pdftotext
|
|
||||||
from documents.parsers import DocumentParser, ParseError, run_unpaper, \
|
from documents.parsers import DocumentParser, ParseError, run_unpaper, \
|
||||||
run_convert
|
run_convert
|
||||||
|
|
||||||
from .languages import ISO639
|
from .languages import ISO639
|
||||||
|
|
||||||
|
|
||||||
@ -45,8 +44,8 @@ class RasterisedDocumentParser(DocumentParser):
|
|||||||
alpha="remove",
|
alpha="remove",
|
||||||
strip=True,
|
strip=True,
|
||||||
trim=True,
|
trim=True,
|
||||||
input="{}[0]".format(self.document_path),
|
input_file="{}[0]".format(self.document_path),
|
||||||
output=out_path,
|
output_file=out_path,
|
||||||
logging_group=self.logging_group)
|
logging_group=self.logging_group)
|
||||||
except ParseError:
|
except ParseError:
|
||||||
# if convert fails, fall back to extracting
|
# if convert fails, fall back to extracting
|
||||||
@ -66,8 +65,8 @@ class RasterisedDocumentParser(DocumentParser):
|
|||||||
alpha="remove",
|
alpha="remove",
|
||||||
strip=True,
|
strip=True,
|
||||||
trim=True,
|
trim=True,
|
||||||
input=gs_out_path,
|
input_file=gs_out_path,
|
||||||
output=out_path,
|
output_file=out_path,
|
||||||
logging_group=self.logging_group)
|
logging_group=self.logging_group)
|
||||||
|
|
||||||
return out_path
|
return out_path
|
||||||
@ -139,8 +138,8 @@ class RasterisedDocumentParser(DocumentParser):
|
|||||||
run_convert(density=settings.CONVERT_DENSITY,
|
run_convert(density=settings.CONVERT_DENSITY,
|
||||||
depth="8",
|
depth="8",
|
||||||
type="grayscale",
|
type="grayscale",
|
||||||
input=self.document_path,
|
input_file=self.document_path,
|
||||||
output=pnm,
|
output_file=pnm,
|
||||||
logging_group=self.logging_group)
|
logging_group=self.logging_group)
|
||||||
|
|
||||||
# Get a list of converted images
|
# Get a list of converted images
|
||||||
@ -189,7 +188,6 @@ class RasterisedDocumentParser(DocumentParser):
|
|||||||
return [sample_page]
|
return [sample_page]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def strip_excess_whitespace(text):
|
def strip_excess_whitespace(text):
|
||||||
collapsed_spaces = re.sub(r"([^\S\r\n]+)", " ", text)
|
collapsed_spaces = re.sub(r"([^\S\r\n]+)", " ", text)
|
||||||
no_leading_whitespace = re.sub(
|
no_leading_whitespace = re.sub(
|
||||||
|
@ -5,10 +5,10 @@ from unittest import mock
|
|||||||
from uuid import uuid4
|
from uuid import uuid4
|
||||||
|
|
||||||
from dateutil import tz
|
from dateutil import tz
|
||||||
|
from django.conf import settings
|
||||||
from django.test import TestCase, override_settings
|
from django.test import TestCase, override_settings
|
||||||
|
|
||||||
from ..parsers import RasterisedDocumentParser
|
from ..parsers import RasterisedDocumentParser
|
||||||
from django.conf import settings
|
|
||||||
|
|
||||||
|
|
||||||
class TestDate(TestCase):
|
class TestDate(TestCase):
|
||||||
|
@ -47,8 +47,8 @@ class TextDocumentParser(DocumentParser):
|
|||||||
|
|
||||||
def read_text():
|
def read_text():
|
||||||
with open(self.document_path, 'r') as src:
|
with open(self.document_path, 'r') as src:
|
||||||
lines = [l.strip() for l in src.readlines()]
|
lines = [line.strip() for line in src.readlines()]
|
||||||
text = "\n".join([l for l in lines[:n_lines]])
|
text = "\n".join([line for line in lines[:n_lines]])
|
||||||
return text.replace('"', "'")
|
return text.replace('"', "'")
|
||||||
|
|
||||||
def create_txlayer():
|
def create_txlayer():
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
[pycodestyle]
|
[pycodestyle]
|
||||||
exclude = migrations, paperless/settings.py, .tox
|
exclude = migrations, paperless/settings.py, .tox
|
||||||
|
ignore = E501
|
||||||
|
|
||||||
[tool:pytest]
|
[tool:pytest]
|
||||||
DJANGO_SETTINGS_MODULE=paperless.settings
|
DJANGO_SETTINGS_MODULE=paperless.settings
|
||||||
|
Loading…
x
Reference in New Issue
Block a user