diff --git a/docs/conf.py b/docs/conf.py index 7cf8c9fe1..eb6720dbb 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -12,9 +12,6 @@ # All configuration values have a default; values that are commented out # serve to show the default. -import sys -import os - __version__ = None exec(open("../src/paperless/version.py").read()) diff --git a/src/documents/admin.py b/src/documents/admin.py index b9d2b5543..74a152c68 100755 --- a/src/documents/admin.py +++ b/src/documents/admin.py @@ -75,7 +75,6 @@ class DocumentAdmin(admin.ModelAdmin): def tags_(self, obj): r = "" for tag in obj.tags.all(): - colour = tag.get_colour_display() r += self._html_tag( "span", tag.slug + ", " diff --git a/src/documents/consumer.py b/src/documents/consumer.py index 41eefc948..639152725 100755 --- a/src/documents/consumer.py +++ b/src/documents/consumer.py @@ -1,4 +1,3 @@ -from django.db import transaction import datetime import hashlib import logging @@ -7,11 +6,12 @@ import re import uuid from django.conf import settings +from django.db import transaction from django.utils import timezone + from paperless.db import GnuPG from .classifier import DocumentClassifier - -from .models import Document, FileInfo, Tag +from .models import Document, FileInfo from .parsers import ParseError from .signals import ( document_consumer_declaration, diff --git a/src/documents/management/commands/document_renamer.py b/src/documents/management/commands/document_renamer.py deleted file mode 100644 index d7d77a111..000000000 --- a/src/documents/management/commands/document_renamer.py +++ /dev/null @@ -1,24 +0,0 @@ -from django.core.management.base import BaseCommand - -from documents.models import Document, Tag - -from ...mixins import Renderable - - -class Command(Renderable, BaseCommand): - - help = """ - This will rename all documents to match the latest filename format. - """.replace(" ", "") - - def __init__(self, *args, **kwargs): - self.verbosity = 0 - BaseCommand.__init__(self, *args, **kwargs) - - def handle(self, *args, **options): - - self.verbosity = options["verbosity"] - - for document in Document.objects.all(): - # Saving the document again will generate a new filename and rename - document.save() diff --git a/src/documents/management/commands/document_retagger.py b/src/documents/management/commands/document_retagger.py index 007286935..9238bea71 100755 --- a/src/documents/management/commands/document_retagger.py +++ b/src/documents/management/commands/document_retagger.py @@ -3,8 +3,7 @@ import logging from django.core.management.base import BaseCommand from documents.classifier import DocumentClassifier -from documents.models import Document, Tag - +from documents.models import Document from ...mixins import Renderable from ...signals.handlers import set_correspondent, set_document_type, set_tags diff --git a/src/documents/signals/handlers.py b/src/documents/signals/handlers.py index 0a96d6b06..8c893e46c 100755 --- a/src/documents/signals/handlers.py +++ b/src/documents/signals/handlers.py @@ -8,7 +8,6 @@ from django.contrib.auth.models import User from django.contrib.contenttypes.models import ContentType from django.utils import timezone -from documents.classifier import DocumentClassifier from .. import index, matching from ..models import Document, Tag diff --git a/src/paperless/mixins.py b/src/paperless/mixins.py deleted file mode 100644 index f4f1fcdec..000000000 --- a/src/paperless/mixins.py +++ /dev/null @@ -1,46 +0,0 @@ -from django.contrib.auth.mixins import AccessMixin -from django.contrib.auth import authenticate, login -import base64 - - -class SessionOrBasicAuthMixin(AccessMixin): - """ - Session or Basic Authentication mixin for Django. - It determines if the requester is already logged in or if they have - provided proper http-authorization and returning the view if all goes - well, otherwise responding with a 401. - - Base for mixin found here: https://djangosnippets.org/snippets/3073/ - """ - - def dispatch(self, request, *args, **kwargs): - - # check if user is authenticated via the session - if request.user.is_authenticated: - - # Already logged in, just return the view. - return super(SessionOrBasicAuthMixin, self).dispatch( - request, *args, **kwargs - ) - - # apparently not authenticated via session, maybe via HTTP Basic? - if 'HTTP_AUTHORIZATION' in request.META: - auth = request.META['HTTP_AUTHORIZATION'].split() - if len(auth) == 2: - # NOTE: Support for only basic authentication - if auth[0].lower() == "basic": - authString = base64.b64decode(auth[1]).decode('utf-8') - uname, passwd = authString.split(':') - user = authenticate(username=uname, password=passwd) - if user is not None: - if user.is_active: - login(request, user) - request.user = user - return super( - SessionOrBasicAuthMixin, self - ).dispatch( - request, *args, **kwargs - ) - - # nope, really not authenticated - return self.handle_no_permission() diff --git a/src/paperless/settings.py b/src/paperless/settings.py index 5cf2b4b66..45a4cf31b 100644 --- a/src/paperless/settings.py +++ b/src/paperless/settings.py @@ -260,7 +260,7 @@ OCR_LANGUAGE = os.getenv("PAPERLESS_OCR_LANGUAGE", "eng") OCR_THREADS = int(os.getenv("PAPERLESS_OCR_THREADS", 4)) # OCR all documents? -OCR_ALWAYS = __get_boolean("PAPERLESS_OCR_ALWAYS", False) +OCR_ALWAYS = __get_boolean("PAPERLESS_OCR_ALWAYS", "false") # GNUPG needs a home directory for some reason diff --git a/src/paperless_tesseract/parsers.py b/src/paperless_tesseract/parsers.py index afd64de65..951ad29ba 100644 --- a/src/paperless_tesseract/parsers.py +++ b/src/paperless_tesseract/parsers.py @@ -160,6 +160,7 @@ class RasterisedDocumentParser(DocumentParser): guess = langdetect.detect(text) return guess except Exception as e: + self.log('debug', "Language detection failed with: {}".format(e)) return None def _ocr(self, imgs, lang): diff --git a/src/paperless_tesseract/tests/test_date.py b/src/paperless_tesseract/tests/test_date.py index 9e9d48b90..eb14f3b61 100644 --- a/src/paperless_tesseract/tests/test_date.py +++ b/src/paperless_tesseract/tests/test_date.py @@ -27,28 +27,28 @@ class TestDate(TestCase): @mock.patch(MOCK_SCRATCH, SCRATCH) def test_date_format_1(self): input_file = os.path.join(self.SAMPLE_FILES, "") - document = RasterisedDocumentParser(input_file) + document = RasterisedDocumentParser(input_file, None) document._text = "lorem ipsum 130218 lorem ipsum" self.assertEqual(document.get_date(), None) @mock.patch(MOCK_SCRATCH, SCRATCH) def test_date_format_2(self): input_file = os.path.join(self.SAMPLE_FILES, "") - document = RasterisedDocumentParser(input_file) + document = RasterisedDocumentParser(input_file, None) document._text = "lorem ipsum 2018 lorem ipsum" self.assertEqual(document.get_date(), None) @mock.patch(MOCK_SCRATCH, SCRATCH) def test_date_format_3(self): input_file = os.path.join(self.SAMPLE_FILES, "") - document = RasterisedDocumentParser(input_file) + document = RasterisedDocumentParser(input_file, None) document._text = "lorem ipsum 20180213 lorem ipsum" self.assertEqual(document.get_date(), None) @mock.patch(MOCK_SCRATCH, SCRATCH) def test_date_format_4(self): input_file = os.path.join(self.SAMPLE_FILES, "") - document = RasterisedDocumentParser(input_file) + document = RasterisedDocumentParser(input_file, None) document._text = "lorem ipsum 13.02.2018 lorem ipsum" date = document.get_date() self.assertEqual( @@ -62,7 +62,7 @@ class TestDate(TestCase): @mock.patch(MOCK_SCRATCH, SCRATCH) def test_date_format_5(self): input_file = os.path.join(self.SAMPLE_FILES, "") - document = RasterisedDocumentParser(input_file) + document = RasterisedDocumentParser(input_file, None) document._text = ( "lorem ipsum 130218, 2018, 20180213 and lorem 13.02.2018 lorem " "ipsum" @@ -79,7 +79,7 @@ class TestDate(TestCase): @mock.patch(MOCK_SCRATCH, SCRATCH) def test_date_format_6(self): input_file = os.path.join(self.SAMPLE_FILES, "") - document = RasterisedDocumentParser(input_file) + document = RasterisedDocumentParser(input_file, None) document._text = ( "lorem ipsum\n" "Wohnort\n" @@ -96,7 +96,7 @@ class TestDate(TestCase): @mock.patch(MOCK_SCRATCH, SCRATCH) def test_date_format_7(self): input_file = os.path.join(self.SAMPLE_FILES, "") - document = RasterisedDocumentParser(input_file) + document = RasterisedDocumentParser(input_file, None) document._text = ( "lorem ipsum\n" "März 2019\n" @@ -114,7 +114,7 @@ class TestDate(TestCase): @mock.patch(MOCK_SCRATCH, SCRATCH) def test_date_format_8(self): input_file = os.path.join(self.SAMPLE_FILES, "") - document = RasterisedDocumentParser(input_file) + document = RasterisedDocumentParser(input_file, None) document._text = ( "lorem ipsum\n" "Wohnort\n" @@ -138,7 +138,7 @@ class TestDate(TestCase): @mock.patch(MOCK_SCRATCH, SCRATCH) def test_date_format_9(self): input_file = os.path.join(self.SAMPLE_FILES, "") - document = RasterisedDocumentParser(input_file) + document = RasterisedDocumentParser(input_file, None) document._text = ( "lorem ipsum\n" "27. Nullmonth 2020\n" @@ -159,7 +159,7 @@ class TestDate(TestCase): ) @mock.patch(MOCK_SCRATCH, SCRATCH) def test_crazy_date_past(self, *args): - document = RasterisedDocumentParser("/dev/null") + document = RasterisedDocumentParser("/dev/null", None) document.get_text() self.assertIsNone(document.get_date()) @@ -169,7 +169,7 @@ class TestDate(TestCase): ) @mock.patch(MOCK_SCRATCH, SCRATCH) def test_crazy_date_future(self, *args): - document = RasterisedDocumentParser("/dev/null") + document = RasterisedDocumentParser("/dev/null", None) document.get_text() self.assertIsNone(document.get_date()) @@ -179,7 +179,7 @@ class TestDate(TestCase): ) @mock.patch(MOCK_SCRATCH, SCRATCH) def test_crazy_date_with_spaces(self, *args): - document = RasterisedDocumentParser("/dev/null") + document = RasterisedDocumentParser("/dev/null", None) document.get_text() self.assertIsNone(document.get_date()) @@ -195,6 +195,6 @@ class TestDate(TestCase): ) @mock.patch(MOCK_SCRATCH, SCRATCH) def test_filename_date_parse_invalid(self, *args): - document = RasterisedDocumentParser("/tmp/20 408000l 2475 - test.pdf") + document = RasterisedDocumentParser("/tmp/20 408000l 2475 - test.pdf", None) document.get_text() self.assertIsNone(document.get_date())