mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-04-02 13:45:10 -05:00
code style fixes
This commit is contained in:
parent
9c4cf5d7bd
commit
2e04ba1c04
@ -1,5 +1,4 @@
|
||||
from django.contrib import admin
|
||||
from django.contrib.auth.models import Group, User
|
||||
from django.utils.html import format_html, format_html_join
|
||||
from django.utils.safestring import mark_safe
|
||||
from whoosh.writing import AsyncWriter
|
||||
@ -52,8 +51,16 @@ class DocumentAdmin(admin.ModelAdmin):
|
||||
|
||||
search_fields = ("correspondent__name", "title", "content", "tags__name")
|
||||
readonly_fields = ("added", "file_type", "storage_type", "filename")
|
||||
list_display = ("title", "created", "added", "correspondent",
|
||||
"tags_", "archive_serial_number", "document_type", "filename")
|
||||
list_display = (
|
||||
"title",
|
||||
"created",
|
||||
"added",
|
||||
"correspondent",
|
||||
"tags_",
|
||||
"archive_serial_number",
|
||||
"document_type",
|
||||
"filename"
|
||||
)
|
||||
list_filter = (
|
||||
"document_type",
|
||||
"tags",
|
||||
|
@ -1,5 +1,4 @@
|
||||
from django.apps import AppConfig
|
||||
from django.db.models.signals import post_delete
|
||||
|
||||
|
||||
class DocumentsConfig(AppConfig):
|
||||
|
@ -3,7 +3,6 @@ import logging
|
||||
import os
|
||||
import pickle
|
||||
import re
|
||||
import time
|
||||
|
||||
from sklearn.feature_extraction.text import CountVectorizer
|
||||
from sklearn.neural_network import MLPClassifier
|
||||
@ -64,7 +63,7 @@ class DocumentClassifier(object):
|
||||
|
||||
def save_classifier(self):
|
||||
with open(settings.MODEL_FILE, "wb") as f:
|
||||
pickle.dump(self.FORMAT_VERSION, f) # Version
|
||||
pickle.dump(self.FORMAT_VERSION, f)
|
||||
pickle.dump(self.data_hash, f)
|
||||
pickle.dump(self.data_vectorizer, f)
|
||||
|
||||
@ -89,16 +88,14 @@ class DocumentClassifier(object):
|
||||
data.append(preprocessed_content)
|
||||
|
||||
y = -1
|
||||
if doc.document_type:
|
||||
if doc.document_type.matching_algorithm == MatchingModel.MATCH_AUTO:
|
||||
y = doc.document_type.pk
|
||||
if doc.document_type and doc.document_type.matching_algorithm == MatchingModel.MATCH_AUTO:
|
||||
y = doc.document_type.pk
|
||||
m.update(y.to_bytes(4, 'little', signed=True))
|
||||
labels_document_type.append(y)
|
||||
|
||||
y = -1
|
||||
if doc.correspondent:
|
||||
if doc.correspondent.matching_algorithm == MatchingModel.MATCH_AUTO:
|
||||
y = doc.correspondent.pk
|
||||
if doc.correspondent and doc.correspondent.matching_algorithm == MatchingModel.MATCH_AUTO:
|
||||
y = doc.correspondent.pk
|
||||
m.update(y.to_bytes(4, 'little', signed=True))
|
||||
labels_correspondent.append(y)
|
||||
|
||||
@ -137,7 +134,7 @@ class DocumentClassifier(object):
|
||||
logging.getLogger(__name__).debug("Vectorizing data...")
|
||||
self.data_vectorizer = CountVectorizer(
|
||||
analyzer="word",
|
||||
ngram_range=(1,2),
|
||||
ngram_range=(1, 2),
|
||||
min_df=0.01
|
||||
)
|
||||
data_vectorized = self.data_vectorizer.fit_transform(data)
|
||||
|
@ -155,7 +155,7 @@ class Consumer:
|
||||
self.log("debug", "Saving record to database")
|
||||
|
||||
created = file_info.created or date or timezone.make_aware(
|
||||
datetime.datetime.fromtimestamp(stats.st_mtime))
|
||||
datetime.datetime.fromtimestamp(stats.st_mtime))
|
||||
|
||||
with open(doc, "rb") as f:
|
||||
document = Document.objects.create(
|
||||
|
@ -1,5 +1,4 @@
|
||||
import os
|
||||
|
||||
from datetime import datetime
|
||||
from time import mktime
|
||||
|
||||
@ -22,7 +21,10 @@ class UploadForm(forms.Form):
|
||||
def get_filename(self, i=None):
|
||||
return os.path.join(
|
||||
settings.CONSUMPTION_DIR,
|
||||
"{}_{}".format(str(i), self.cleaned_data.get("document").name) if i else self.cleaned_data.get("document").name
|
||||
"{}_{}".format(
|
||||
str(i),
|
||||
self.cleaned_data.get("document").name
|
||||
) if i else self.cleaned_data.get("document").name
|
||||
)
|
||||
|
||||
def save(self):
|
||||
|
@ -1,8 +1,6 @@
|
||||
import logging
|
||||
from contextlib import contextmanager
|
||||
|
||||
from django.db import models
|
||||
from django.dispatch import receiver
|
||||
from whoosh import highlight
|
||||
from whoosh.fields import Schema, TEXT, NUMERIC
|
||||
from whoosh.highlight import Formatter, get_text
|
||||
@ -10,10 +8,8 @@ from whoosh.index import create_in, exists_in, open_dir
|
||||
from whoosh.qparser import MultifieldParser
|
||||
from whoosh.writing import AsyncWriter
|
||||
|
||||
from documents.models import Document
|
||||
from paperless import settings
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
|
@ -5,12 +5,11 @@ import os
|
||||
import re
|
||||
import time
|
||||
import uuid
|
||||
|
||||
from base64 import b64decode
|
||||
from email import policy
|
||||
from email.parser import BytesParser
|
||||
from dateutil import parser
|
||||
|
||||
from dateutil import parser
|
||||
from django.conf import settings
|
||||
|
||||
from .models import Correspondent
|
||||
|
@ -3,9 +3,8 @@ import os
|
||||
|
||||
from django.conf import settings
|
||||
from django.core.management.base import BaseCommand
|
||||
|
||||
from watchdog.observers import Observer
|
||||
from watchdog.events import FileSystemEventHandler
|
||||
from watchdog.observers import Observer
|
||||
|
||||
from documents.consumer import Consumer
|
||||
|
||||
|
@ -1,4 +1,5 @@
|
||||
from django.core.management.base import BaseCommand
|
||||
|
||||
from ...mixins import Renderable
|
||||
from ...tasks import train_classifier
|
||||
|
||||
|
@ -1,16 +1,15 @@
|
||||
import json
|
||||
import os
|
||||
import time
|
||||
import shutil
|
||||
import time
|
||||
|
||||
from django.core.management.base import BaseCommand, CommandError
|
||||
from django.core import serializers
|
||||
from django.core.management.base import BaseCommand, CommandError
|
||||
|
||||
from documents.models import Document, Correspondent, Tag, DocumentType
|
||||
from paperless.db import GnuPG
|
||||
|
||||
from ...mixins import Renderable
|
||||
from documents.settings import EXPORTER_FILE_NAME, EXPORTER_THUMBNAIL_NAME
|
||||
from paperless.db import GnuPG
|
||||
from ...mixins import Renderable
|
||||
|
||||
|
||||
class Command(Renderable, BaseCommand):
|
||||
|
@ -3,17 +3,15 @@ import os
|
||||
import shutil
|
||||
|
||||
from django.conf import settings
|
||||
from django.core.management.base import BaseCommand, CommandError
|
||||
from django.core.management import call_command
|
||||
from django.core.management.base import BaseCommand, CommandError
|
||||
|
||||
from documents.models import Document
|
||||
from documents.settings import EXPORTER_FILE_NAME, EXPORTER_THUMBNAIL_NAME
|
||||
from paperless.db import GnuPG
|
||||
from ...file_handling import generate_filename, create_source_path_directory
|
||||
|
||||
from ...mixins import Renderable
|
||||
|
||||
from documents.settings import EXPORTER_FILE_NAME, EXPORTER_THUMBNAIL_NAME
|
||||
|
||||
|
||||
class Command(Renderable, BaseCommand):
|
||||
|
||||
|
@ -8,5 +8,5 @@ class Command(BaseCommand):
|
||||
help = "A quick & dirty way to see what's in the logs"
|
||||
|
||||
def handle(self, *args, **options):
|
||||
for l in Log.objects.order_by("pk"):
|
||||
print(l)
|
||||
for log in Log.objects.order_by("pk"):
|
||||
print(log)
|
||||
|
@ -1,7 +1,6 @@
|
||||
from django.core.management.base import BaseCommand
|
||||
|
||||
from documents.models import Document, Tag
|
||||
|
||||
from documents.models import Document
|
||||
from ...mixins import Renderable
|
||||
|
||||
|
||||
|
@ -9,16 +9,14 @@ def match_correspondents(document_content, classifier):
|
||||
correspondents = Correspondent.objects.all()
|
||||
predicted_correspondent_id = classifier.predict_correspondent(document_content) if classifier else None
|
||||
|
||||
matched_correspondents = [o for o in correspondents if matches(o, document_content) or o.pk == predicted_correspondent_id]
|
||||
return matched_correspondents
|
||||
return [o for o in correspondents if matches(o, document_content) or o.pk == predicted_correspondent_id]
|
||||
|
||||
|
||||
def match_document_types(document_content, classifier):
|
||||
document_types = DocumentType.objects.all()
|
||||
predicted_document_type_id = classifier.predict_document_type(document_content) if classifier else None
|
||||
|
||||
matched_document_types = [o for o in document_types if matches(o, document_content) or o.pk == predicted_document_type_id]
|
||||
return matched_document_types
|
||||
return [o for o in document_types if matches(o, document_content) or o.pk == predicted_document_type_id]
|
||||
|
||||
|
||||
def match_tags(document_content, classifier):
|
||||
|
@ -22,11 +22,13 @@ from django.utils import timezone
|
||||
# - MONTH XX, ZZZZ with XX being 1 or 2 and ZZZZ being 4 digits
|
||||
from documents.signals import document_consumer_declaration
|
||||
|
||||
# TODO: isnt there a date parsing library for this?
|
||||
|
||||
DATE_REGEX = re.compile(
|
||||
r'(\b|(?!=([_-])))([0-9]{1,2})[\.\/-]([0-9]{1,2})[\.\/-]([0-9]{4}|[0-9]{2})(\b|(?=([_-])))|' + # NOQA: E501
|
||||
r'(\b|(?!=([_-])))([0-9]{4}|[0-9]{2})[\.\/-]([0-9]{1,2})[\.\/-]([0-9]{1,2})(\b|(?=([_-])))|' + # NOQA: E501
|
||||
r'(\b|(?!=([_-])))([0-9]{1,2}[\. ]+[^ ]{3,9} ([0-9]{4}|[0-9]{2}))(\b|(?=([_-])))|' + # NOQA: E501
|
||||
r'(\b|(?!=([_-])))([^\W\d_]{3,9} [0-9]{1,2}, ([0-9]{4}))(\b|(?=([_-])))|' +
|
||||
r'(\b|(?!=([_-])))([0-9]{1,2})[\.\/-]([0-9]{1,2})[\.\/-]([0-9]{4}|[0-9]{2})(\b|(?=([_-])))|' # NOQA: E501
|
||||
r'(\b|(?!=([_-])))([0-9]{4}|[0-9]{2})[\.\/-]([0-9]{1,2})[\.\/-]([0-9]{1,2})(\b|(?=([_-])))|' # NOQA: E501
|
||||
r'(\b|(?!=([_-])))([0-9]{1,2}[\. ]+[^ ]{3,9} ([0-9]{4}|[0-9]{2}))(\b|(?=([_-])))|' # NOQA: E501
|
||||
r'(\b|(?!=([_-])))([^\W\d_]{3,9} [0-9]{1,2}, ([0-9]{4}))(\b|(?=([_-])))|'
|
||||
r'(\b|(?!=([_-])))([^\W\d_]{3,9} [0-9]{4})(\b|(?=([_-])))'
|
||||
)
|
||||
|
||||
@ -43,7 +45,7 @@ def get_parser_class(doc):
|
||||
for response in document_consumer_declaration.send(None):
|
||||
parsers.append(response[1])
|
||||
|
||||
#TODO: add a check that checks parser availability.
|
||||
# TODO: add a check that checks parser availability.
|
||||
|
||||
options = []
|
||||
for parser in parsers:
|
||||
@ -59,7 +61,7 @@ def get_parser_class(doc):
|
||||
options, key=lambda _: _["weight"], reverse=True)[0]["parser"]
|
||||
|
||||
|
||||
def run_convert(input, output, density=None, scale=None, alpha=None, strip=False, trim=False, type=None, depth=None, extra=None, logging_group=None):
|
||||
def run_convert(input_file, output_file, density=None, scale=None, alpha=None, strip=False, trim=False, type=None, depth=None, extra=None, logging_group=None):
|
||||
environment = os.environ.copy()
|
||||
if settings.CONVERT_MEMORY_LIMIT:
|
||||
environment["MAGICK_MEMORY_LIMIT"] = settings.CONVERT_MEMORY_LIMIT
|
||||
@ -74,7 +76,7 @@ def run_convert(input, output, density=None, scale=None, alpha=None, strip=False
|
||||
args += ['-trim'] if trim else []
|
||||
args += ['-type', str(type)] if type else []
|
||||
args += ['-depth', str(depth)] if depth else []
|
||||
args += [input, output]
|
||||
args += [input_file, output_file]
|
||||
|
||||
logger.debug("Execute: " + " ".join(args), extra={'group': logging_group})
|
||||
|
||||
|
@ -105,7 +105,6 @@ class DocumentSerializer(serializers.ModelSerializer):
|
||||
|
||||
class LogSerializer(serializers.ModelSerializer):
|
||||
|
||||
|
||||
class Meta:
|
||||
model = Log
|
||||
fields = (
|
||||
|
@ -1,7 +1,6 @@
|
||||
import logging
|
||||
|
||||
from django.conf import settings
|
||||
from django_q.tasks import async_task, result
|
||||
from whoosh.writing import AsyncWriter
|
||||
|
||||
from documents import index
|
||||
|
@ -2,9 +2,9 @@ import unittest
|
||||
|
||||
from django.test import TestCase
|
||||
|
||||
from .factories import DocumentFactory
|
||||
from ..checks import changed_password_check
|
||||
from ..models import Document
|
||||
from .factories import DocumentFactory
|
||||
|
||||
|
||||
class ChecksTestCase(TestCase):
|
||||
|
@ -1,14 +1,13 @@
|
||||
import os
|
||||
import shutil
|
||||
from uuid import uuid4
|
||||
from pathlib import Path
|
||||
from uuid import uuid4
|
||||
|
||||
from django.conf import settings
|
||||
from django.test import TestCase, override_settings
|
||||
|
||||
from ..file_handling import generate_filename, create_source_path_directory, delete_empty_directories
|
||||
from ..models import Document, Correspondent
|
||||
from django.conf import settings
|
||||
|
||||
from ..signals.handlers import update_filename_and_move_files
|
||||
|
||||
|
||||
@ -68,24 +67,18 @@ class TestDate(TestCase):
|
||||
# test that creating dirs for the source_path creates the correct directory
|
||||
create_source_path_directory(document.source_path)
|
||||
Path(document.source_path).touch()
|
||||
self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR +
|
||||
"/none"), True)
|
||||
self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none"), True)
|
||||
|
||||
# Set a correspondent and save the document
|
||||
document.correspondent = Correspondent.objects.get_or_create(
|
||||
name="test")[0]
|
||||
document.correspondent = Correspondent.objects.get_or_create(name="test")[0]
|
||||
document.save()
|
||||
|
||||
# Check proper handling of files
|
||||
self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR +
|
||||
"/test"), True)
|
||||
self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR +
|
||||
"/none"), False)
|
||||
self.assertEqual(os.path.isfile(settings.ORIGINALS_DIR +
|
||||
"/test/test-{:07d}.pdf.gpg".format(document.pk)), True)
|
||||
self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/test"), True)
|
||||
self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none"), False)
|
||||
self.assertEqual(os.path.isfile(settings.ORIGINALS_DIR + "/test/test-{:07d}.pdf.gpg".format(document.pk)), True)
|
||||
|
||||
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/" +
|
||||
"{correspondent}")
|
||||
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}")
|
||||
def test_file_renaming_missing_permissions(self):
|
||||
document = Document()
|
||||
document.file_type = "pdf"
|
||||
@ -100,27 +93,22 @@ class TestDate(TestCase):
|
||||
Path(document.source_path).touch()
|
||||
|
||||
# Test source_path
|
||||
self.assertEqual(document.source_path, settings.ORIGINALS_DIR +
|
||||
"/none/none-{:07d}.pdf".format(document.pk))
|
||||
self.assertEqual(document.source_path, settings.ORIGINALS_DIR + "/none/none-{:07d}.pdf".format(document.pk))
|
||||
|
||||
# Make the folder read- and execute-only (no writing and no renaming)
|
||||
os.chmod(settings.ORIGINALS_DIR + "/none", 0o555)
|
||||
|
||||
# Set a correspondent and save the document
|
||||
document.correspondent = Correspondent.objects.get_or_create(
|
||||
name="test")[0]
|
||||
document.correspondent = Correspondent.objects.get_or_create(name="test")[0]
|
||||
document.save()
|
||||
|
||||
# Check proper handling of files
|
||||
self.assertEqual(os.path.isfile(settings.MEDIA_ROOT + "/documents/" +
|
||||
"originals/none/none-{:07d}.pdf".format(document.pk)), True)
|
||||
self.assertEqual(document.filename,
|
||||
"none/none-{:07d}.pdf".format(document.pk))
|
||||
self.assertEqual(os.path.isfile(settings.MEDIA_ROOT + "/documents/originals/none/none-{:07d}.pdf".format(document.pk)), True)
|
||||
self.assertEqual(document.filename, "none/none-{:07d}.pdf".format(document.pk))
|
||||
|
||||
os.chmod(settings.ORIGINALS_DIR + "/none", 0o777)
|
||||
|
||||
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/" +
|
||||
"{correspondent}")
|
||||
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}")
|
||||
def test_file_renaming_database_error(self):
|
||||
|
||||
document1 = Document.objects.create(file_type="pdf", storage_type=Document.STORAGE_TYPE_UNENCRYPTED, checksum="AAAAA")
|
||||
@ -155,13 +143,10 @@ class TestDate(TestCase):
|
||||
|
||||
# Check proper handling of files
|
||||
self.assertTrue(os.path.isfile(document.source_path))
|
||||
self.assertEqual(os.path.isfile(settings.MEDIA_ROOT + "/documents/" +
|
||||
"originals/none/none-{:07d}.pdf".format(document.pk)), True)
|
||||
self.assertEqual(document.filename,
|
||||
"none/none-{:07d}.pdf".format(document.pk))
|
||||
self.assertEqual(os.path.isfile(settings.MEDIA_ROOT + "/documents/originals/none/none-{:07d}.pdf".format(document.pk)), True)
|
||||
self.assertEqual(document.filename, "none/none-{:07d}.pdf".format(document.pk))
|
||||
|
||||
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/" +
|
||||
"{correspondent}")
|
||||
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}")
|
||||
def test_document_delete(self):
|
||||
document = Document()
|
||||
document.file_type = "pdf"
|
||||
@ -179,13 +164,10 @@ class TestDate(TestCase):
|
||||
# Ensure file deletion after delete
|
||||
pk = document.pk
|
||||
document.delete()
|
||||
self.assertEqual(os.path.isfile(settings.ORIGINALS_DIR +
|
||||
"/none/none-{:07d}.pdf".format(pk)), False)
|
||||
self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR +
|
||||
"/none"), False)
|
||||
self.assertEqual(os.path.isfile(settings.ORIGINALS_DIR + "/none/none-{:07d}.pdf".format(pk)), False)
|
||||
self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none"), False)
|
||||
|
||||
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/" +
|
||||
"{correspondent}")
|
||||
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}")
|
||||
def test_document_delete_nofile(self):
|
||||
document = Document()
|
||||
document.file_type = "pdf"
|
||||
@ -194,8 +176,7 @@ class TestDate(TestCase):
|
||||
|
||||
document.delete()
|
||||
|
||||
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/" +
|
||||
"{correspondent}")
|
||||
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}")
|
||||
def test_directory_not_empty(self):
|
||||
document = Document()
|
||||
document.file_type = "pdf"
|
||||
@ -214,18 +195,14 @@ class TestDate(TestCase):
|
||||
Path(important_file).touch()
|
||||
|
||||
# Set a correspondent and save the document
|
||||
document.correspondent = Correspondent.objects.get_or_create(
|
||||
name="test")[0]
|
||||
document.correspondent = Correspondent.objects.get_or_create(name="test")[0]
|
||||
document.save()
|
||||
|
||||
# Check proper handling of files
|
||||
self.assertEqual(os.path.isdir(settings.MEDIA_ROOT +
|
||||
"/documents/originals/test"), True)
|
||||
self.assertEqual(os.path.isdir(settings.MEDIA_ROOT +
|
||||
"/documents/originals/none"), True)
|
||||
self.assertEqual(os.path.isdir(settings.MEDIA_ROOT + "/documents/originals/test"), True)
|
||||
self.assertEqual(os.path.isdir(settings.MEDIA_ROOT + "/documents/originals/none"), True)
|
||||
self.assertTrue(os.path.isfile(important_file))
|
||||
|
||||
|
||||
@override_settings(PAPERLESS_FILENAME_FORMAT="{tags[type]}")
|
||||
def test_tags_with_underscore(self):
|
||||
document = Document()
|
||||
@ -304,9 +281,7 @@ class TestDate(TestCase):
|
||||
self.assertEqual(generate_filename(document),
|
||||
"none-{:07d}.pdf".format(document.pk))
|
||||
|
||||
|
||||
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/" +
|
||||
"{correspondent}/{correspondent}")
|
||||
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}/{correspondent}")
|
||||
def test_nested_directory_cleanup(self):
|
||||
document = Document()
|
||||
document.file_type = "pdf"
|
||||
@ -315,25 +290,19 @@ class TestDate(TestCase):
|
||||
|
||||
# Ensure that filename is properly generated
|
||||
document.filename = generate_filename(document)
|
||||
self.assertEqual(document.filename,
|
||||
"none/none/none-{:07d}.pdf".format(document.pk))
|
||||
self.assertEqual(document.filename, "none/none/none-{:07d}.pdf".format(document.pk))
|
||||
create_source_path_directory(document.source_path)
|
||||
Path(document.source_path).touch()
|
||||
|
||||
# Check proper handling of files
|
||||
self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR +
|
||||
"/none/none"), True)
|
||||
self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none/none"), True)
|
||||
|
||||
pk = document.pk
|
||||
document.delete()
|
||||
|
||||
self.assertEqual(os.path.isfile(settings.ORIGINALS_DIR +
|
||||
"/none/none/none-{:07d}.pdf".format(pk)),
|
||||
False)
|
||||
self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR +
|
||||
"/none/none"), False)
|
||||
self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR +
|
||||
"/none"), False)
|
||||
self.assertEqual(os.path.isfile(settings.ORIGINALS_DIR + "/none/none/none-{:07d}.pdf".format(pk)), False)
|
||||
self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none/none"), False)
|
||||
self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none"), False)
|
||||
self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR), True)
|
||||
|
||||
@override_settings(PAPERLESS_FILENAME_FORMAT=None)
|
||||
@ -355,8 +324,7 @@ class TestDate(TestCase):
|
||||
Path(os.path.join(tmp, "notempty", "file")).touch()
|
||||
os.makedirs(os.path.join(tmp, "notempty", "empty"))
|
||||
|
||||
delete_empty_directories(
|
||||
os.path.join(tmp, "notempty", "empty"))
|
||||
delete_empty_directories(os.path.join(tmp, "notempty", "empty"))
|
||||
self.assertEqual(os.path.isdir(os.path.join(tmp, "notempty")), True)
|
||||
self.assertEqual(os.path.isfile(
|
||||
os.path.join(tmp, "notempty", "file")), True)
|
||||
|
@ -1,9 +1,8 @@
|
||||
from django.core.management.base import CommandError
|
||||
from django.test import TestCase
|
||||
|
||||
from ..management.commands.document_importer import Command
|
||||
|
||||
from documents.settings import EXPORTER_FILE_NAME
|
||||
from ..management.commands.document_importer import Command
|
||||
|
||||
|
||||
class TestImporter(TestCase):
|
||||
|
@ -1,6 +1,5 @@
|
||||
import logging
|
||||
import uuid
|
||||
|
||||
from unittest import mock
|
||||
|
||||
from django.test import TestCase
|
||||
|
@ -1,10 +1,9 @@
|
||||
import base64
|
||||
import os
|
||||
import magic
|
||||
|
||||
from hashlib import md5
|
||||
from unittest import mock
|
||||
|
||||
import magic
|
||||
from django.conf import settings
|
||||
from django.test import TestCase
|
||||
|
||||
|
@ -1,7 +1,7 @@
|
||||
from django.test import TestCase
|
||||
|
||||
from ..models import Document, Correspondent
|
||||
from .factories import DocumentFactory, CorrespondentFactory
|
||||
from ..models import Document, Correspondent
|
||||
|
||||
|
||||
class CorrespondentTestCase(TestCase):
|
||||
|
@ -4,11 +4,6 @@ from django.views.decorators.cache import cache_control
|
||||
from django.views.generic import TemplateView
|
||||
from django_filters.rest_framework import DjangoFilterBackend
|
||||
from rest_framework.decorators import action
|
||||
from rest_framework.response import Response
|
||||
from rest_framework.views import APIView
|
||||
|
||||
from paperless.db import GnuPG
|
||||
from paperless.views import StandardPagination
|
||||
from rest_framework.filters import OrderingFilter, SearchFilter
|
||||
from rest_framework.mixins import (
|
||||
DestroyModelMixin,
|
||||
@ -17,12 +12,17 @@ from rest_framework.mixins import (
|
||||
UpdateModelMixin
|
||||
)
|
||||
from rest_framework.permissions import IsAuthenticated
|
||||
from rest_framework.response import Response
|
||||
from rest_framework.views import APIView
|
||||
from rest_framework.viewsets import (
|
||||
GenericViewSet,
|
||||
ModelViewSet,
|
||||
ReadOnlyModelViewSet
|
||||
)
|
||||
|
||||
import documents.index as index
|
||||
from paperless.db import GnuPG
|
||||
from paperless.views import StandardPagination
|
||||
from .filters import (
|
||||
CorrespondentFilterSet,
|
||||
DocumentFilterSet,
|
||||
@ -30,8 +30,6 @@ from .filters import (
|
||||
DocumentTypeFilterSet,
|
||||
LogFilterSet
|
||||
)
|
||||
|
||||
import documents.index as index
|
||||
from .forms import UploadForm
|
||||
from .models import Correspondent, Document, Log, Tag, DocumentType
|
||||
from .serialisers import (
|
||||
@ -106,7 +104,7 @@ class DocumentViewSet(RetrieveModelMixin,
|
||||
return super(DocumentViewSet, self).destroy(request, *args, **kwargs)
|
||||
|
||||
def file_response(self, pk, disposition):
|
||||
#TODO: this should not be necessary here.
|
||||
# TODO: this should not be necessary here.
|
||||
content_types = {
|
||||
Document.TYPE_PDF: "application/pdf",
|
||||
Document.TYPE_PNG: "image/png",
|
||||
@ -114,7 +112,7 @@ class DocumentViewSet(RetrieveModelMixin,
|
||||
Document.TYPE_GIF: "image/gif",
|
||||
Document.TYPE_TIF: "image/tiff",
|
||||
Document.TYPE_CSV: "text/csv",
|
||||
Document.TYPE_MD: "text/markdown",
|
||||
Document.TYPE_MD: "text/markdown",
|
||||
Document.TYPE_TXT: "text/plain"
|
||||
}
|
||||
|
||||
@ -132,7 +130,7 @@ class DocumentViewSet(RetrieveModelMixin,
|
||||
|
||||
@action(methods=['post'], detail=False)
|
||||
def post_document(self, request, pk=None):
|
||||
#TODO: is this a good implementation?
|
||||
# TODO: is this a good implementation?
|
||||
form = UploadForm(data=request.POST, files=request.FILES)
|
||||
if form.is_valid():
|
||||
form.save()
|
||||
|
@ -11,6 +11,8 @@ writeable_hint = (
|
||||
"Set the permissions of {} to be writeable by the user running the "
|
||||
"Paperless services"
|
||||
)
|
||||
|
||||
|
||||
def path_check(env_var):
|
||||
messages = []
|
||||
directory = os.getenv(env_var)
|
||||
@ -27,6 +29,7 @@ def path_check(env_var):
|
||||
))
|
||||
return messages
|
||||
|
||||
|
||||
@register()
|
||||
def paths_check(app_configs, **kwargs):
|
||||
"""
|
||||
@ -34,9 +37,9 @@ def paths_check(app_configs, **kwargs):
|
||||
"""
|
||||
|
||||
check_messages = path_check("PAPERLESS_DATA_DIR") + \
|
||||
path_check("PAPERLESS_MEDIA_ROOT") + \
|
||||
path_check("PAPERLESS_CONSUMPTION_DIR") + \
|
||||
path_check("PAPERLESS_STATICDIR")
|
||||
path_check("PAPERLESS_MEDIA_ROOT") + \
|
||||
path_check("PAPERLESS_CONSUMPTION_DIR") + \
|
||||
path_check("PAPERLESS_STATICDIR")
|
||||
|
||||
return check_messages
|
||||
|
||||
|
@ -25,6 +25,7 @@ elif os.path.exists("/usr/local/etc/paperless.conf"):
|
||||
# Tesseract process to one thread.
|
||||
os.environ['OMP_THREAD_LIMIT'] = "1"
|
||||
|
||||
|
||||
def __get_boolean(key, default="NO"):
|
||||
"""
|
||||
Return a boolean value based on whatever the user has supplied in the
|
||||
@ -32,9 +33,11 @@ def __get_boolean(key, default="NO"):
|
||||
"""
|
||||
return bool(os.getenv(key, default).lower() in ("yes", "y", "1", "t", "true"))
|
||||
|
||||
|
||||
# NEVER RUN WITH DEBUG IN PRODUCTION.
|
||||
DEBUG = __get_boolean("PAPERLESS_DEBUG", "NO")
|
||||
|
||||
|
||||
###############################################################################
|
||||
# Directories #
|
||||
###############################################################################
|
||||
|
@ -6,7 +6,6 @@ from django.views.decorators.csrf import csrf_exempt
|
||||
from django.views.generic import RedirectView
|
||||
from rest_framework.routers import DefaultRouter
|
||||
|
||||
from paperless.views import FaviconView
|
||||
from documents.views import (
|
||||
CorrespondentViewSet,
|
||||
DocumentViewSet,
|
||||
@ -18,6 +17,7 @@ from documents.views import (
|
||||
SearchAutoCompleteView,
|
||||
StatisticsView
|
||||
)
|
||||
from paperless.views import FaviconView
|
||||
|
||||
api_router = DefaultRouter()
|
||||
api_router.register(r"correspondents", CorrespondentViewSet)
|
||||
@ -30,7 +30,7 @@ api_router.register(r"tags", TagViewSet)
|
||||
urlpatterns = [
|
||||
|
||||
# API
|
||||
url(r"^api/auth/",include(('rest_framework.urls', 'rest_framework'), namespace="rest_framework")),
|
||||
url(r"^api/auth/", include(('rest_framework.urls', 'rest_framework'), namespace="rest_framework")),
|
||||
url(r"^api/search/autocomplete/", SearchAutoCompleteView.as_view(), name="autocomplete"),
|
||||
url(r"^api/search/", SearchView.as_view(), name="search"),
|
||||
url(r"^api/statistics/", StatisticsView.as_view(), name="statistics"),
|
||||
|
@ -5,15 +5,14 @@ import subprocess
|
||||
from multiprocessing.pool import Pool
|
||||
|
||||
import langdetect
|
||||
import pdftotext
|
||||
import pyocr
|
||||
from django.conf import settings
|
||||
from PIL import Image
|
||||
from django.conf import settings
|
||||
from pyocr import PyocrException
|
||||
|
||||
import pdftotext
|
||||
from documents.parsers import DocumentParser, ParseError, run_unpaper, \
|
||||
run_convert
|
||||
|
||||
from .languages import ISO639
|
||||
|
||||
|
||||
@ -45,8 +44,8 @@ class RasterisedDocumentParser(DocumentParser):
|
||||
alpha="remove",
|
||||
strip=True,
|
||||
trim=True,
|
||||
input="{}[0]".format(self.document_path),
|
||||
output=out_path,
|
||||
input_file="{}[0]".format(self.document_path),
|
||||
output_file=out_path,
|
||||
logging_group=self.logging_group)
|
||||
except ParseError:
|
||||
# if convert fails, fall back to extracting
|
||||
@ -66,8 +65,8 @@ class RasterisedDocumentParser(DocumentParser):
|
||||
alpha="remove",
|
||||
strip=True,
|
||||
trim=True,
|
||||
input=gs_out_path,
|
||||
output=out_path,
|
||||
input_file=gs_out_path,
|
||||
output_file=out_path,
|
||||
logging_group=self.logging_group)
|
||||
|
||||
return out_path
|
||||
@ -99,7 +98,7 @@ class RasterisedDocumentParser(DocumentParser):
|
||||
try:
|
||||
|
||||
sample_page_index = int(len(images) / 2)
|
||||
self.log("info", "Attempting language detection on page {} of {}...".format(sample_page_index+1, len(images)))
|
||||
self.log("info", "Attempting language detection on page {} of {}...".format(sample_page_index + 1, len(images)))
|
||||
sample_page_text = self._ocr([images[sample_page_index]], settings.OCR_LANGUAGE)[0]
|
||||
guessed_language = self._guess_language(sample_page_text)
|
||||
|
||||
@ -139,8 +138,8 @@ class RasterisedDocumentParser(DocumentParser):
|
||||
run_convert(density=settings.CONVERT_DENSITY,
|
||||
depth="8",
|
||||
type="grayscale",
|
||||
input=self.document_path,
|
||||
output=pnm,
|
||||
input_file=self.document_path,
|
||||
output_file=pnm,
|
||||
logging_group=self.logging_group)
|
||||
|
||||
# Get a list of converted images
|
||||
@ -189,7 +188,6 @@ class RasterisedDocumentParser(DocumentParser):
|
||||
return [sample_page]
|
||||
|
||||
|
||||
|
||||
def strip_excess_whitespace(text):
|
||||
collapsed_spaces = re.sub(r"([^\S\r\n]+)", " ", text)
|
||||
no_leading_whitespace = re.sub(
|
||||
|
@ -5,10 +5,10 @@ from unittest import mock
|
||||
from uuid import uuid4
|
||||
|
||||
from dateutil import tz
|
||||
from django.conf import settings
|
||||
from django.test import TestCase, override_settings
|
||||
|
||||
from ..parsers import RasterisedDocumentParser
|
||||
from django.conf import settings
|
||||
|
||||
|
||||
class TestDate(TestCase):
|
||||
|
@ -47,8 +47,8 @@ class TextDocumentParser(DocumentParser):
|
||||
|
||||
def read_text():
|
||||
with open(self.document_path, 'r') as src:
|
||||
lines = [l.strip() for l in src.readlines()]
|
||||
text = "\n".join([l for l in lines[:n_lines]])
|
||||
lines = [line.strip() for line in src.readlines()]
|
||||
text = "\n".join([line for line in lines[:n_lines]])
|
||||
return text.replace('"', "'")
|
||||
|
||||
def create_txlayer():
|
||||
|
@ -1,6 +1,6 @@
|
||||
[pycodestyle]
|
||||
exclude = migrations, paperless/settings.py, .tox
|
||||
|
||||
ignore = E501
|
||||
|
||||
[tool:pytest]
|
||||
DJANGO_SETTINGS_MODULE=paperless.settings
|
||||
|
Loading…
x
Reference in New Issue
Block a user