mirror of
				https://github.com/paperless-ngx/paperless-ngx.git
				synced 2025-10-30 03:56:23 -05:00 
			
		
		
		
	code style fixes
This commit is contained in:
		| @@ -1,5 +1,4 @@ | |||||||
| from django.contrib import admin | from django.contrib import admin | ||||||
| from django.contrib.auth.models import Group, User |  | ||||||
| from django.utils.html import format_html, format_html_join | from django.utils.html import format_html, format_html_join | ||||||
| from django.utils.safestring import mark_safe | from django.utils.safestring import mark_safe | ||||||
| from whoosh.writing import AsyncWriter | from whoosh.writing import AsyncWriter | ||||||
| @@ -52,8 +51,16 @@ class DocumentAdmin(admin.ModelAdmin): | |||||||
|  |  | ||||||
|     search_fields = ("correspondent__name", "title", "content", "tags__name") |     search_fields = ("correspondent__name", "title", "content", "tags__name") | ||||||
|     readonly_fields = ("added", "file_type", "storage_type", "filename") |     readonly_fields = ("added", "file_type", "storage_type", "filename") | ||||||
|     list_display = ("title", "created", "added", "correspondent", |     list_display = ( | ||||||
|                     "tags_", "archive_serial_number", "document_type", "filename") |         "title", | ||||||
|  |         "created", | ||||||
|  |         "added", | ||||||
|  |         "correspondent", | ||||||
|  |         "tags_", | ||||||
|  |         "archive_serial_number", | ||||||
|  |         "document_type", | ||||||
|  |         "filename" | ||||||
|  |     ) | ||||||
|     list_filter = ( |     list_filter = ( | ||||||
|         "document_type", |         "document_type", | ||||||
|         "tags", |         "tags", | ||||||
|   | |||||||
| @@ -1,5 +1,4 @@ | |||||||
| from django.apps import AppConfig | from django.apps import AppConfig | ||||||
| from django.db.models.signals import post_delete |  | ||||||
|  |  | ||||||
|  |  | ||||||
| class DocumentsConfig(AppConfig): | class DocumentsConfig(AppConfig): | ||||||
|   | |||||||
| @@ -3,7 +3,6 @@ import logging | |||||||
| import os | import os | ||||||
| import pickle | import pickle | ||||||
| import re | import re | ||||||
| import time |  | ||||||
|  |  | ||||||
| from sklearn.feature_extraction.text import CountVectorizer | from sklearn.feature_extraction.text import CountVectorizer | ||||||
| from sklearn.neural_network import MLPClassifier | from sklearn.neural_network import MLPClassifier | ||||||
| @@ -64,7 +63,7 @@ class DocumentClassifier(object): | |||||||
|  |  | ||||||
|     def save_classifier(self): |     def save_classifier(self): | ||||||
|         with open(settings.MODEL_FILE, "wb") as f: |         with open(settings.MODEL_FILE, "wb") as f: | ||||||
|             pickle.dump(self.FORMAT_VERSION, f) # Version |             pickle.dump(self.FORMAT_VERSION, f) | ||||||
|             pickle.dump(self.data_hash, f) |             pickle.dump(self.data_hash, f) | ||||||
|             pickle.dump(self.data_vectorizer, f) |             pickle.dump(self.data_vectorizer, f) | ||||||
|  |  | ||||||
| @@ -89,16 +88,14 @@ class DocumentClassifier(object): | |||||||
|             data.append(preprocessed_content) |             data.append(preprocessed_content) | ||||||
|  |  | ||||||
|             y = -1 |             y = -1 | ||||||
|             if doc.document_type: |             if doc.document_type and doc.document_type.matching_algorithm == MatchingModel.MATCH_AUTO: | ||||||
|                 if doc.document_type.matching_algorithm == MatchingModel.MATCH_AUTO: |                 y = doc.document_type.pk | ||||||
|                     y = doc.document_type.pk |  | ||||||
|             m.update(y.to_bytes(4, 'little', signed=True)) |             m.update(y.to_bytes(4, 'little', signed=True)) | ||||||
|             labels_document_type.append(y) |             labels_document_type.append(y) | ||||||
|  |  | ||||||
|             y = -1 |             y = -1 | ||||||
|             if doc.correspondent: |             if doc.correspondent and doc.correspondent.matching_algorithm == MatchingModel.MATCH_AUTO: | ||||||
|                 if doc.correspondent.matching_algorithm == MatchingModel.MATCH_AUTO: |                 y = doc.correspondent.pk | ||||||
|                     y = doc.correspondent.pk |  | ||||||
|             m.update(y.to_bytes(4, 'little', signed=True)) |             m.update(y.to_bytes(4, 'little', signed=True)) | ||||||
|             labels_correspondent.append(y) |             labels_correspondent.append(y) | ||||||
|  |  | ||||||
| @@ -137,7 +134,7 @@ class DocumentClassifier(object): | |||||||
|         logging.getLogger(__name__).debug("Vectorizing data...") |         logging.getLogger(__name__).debug("Vectorizing data...") | ||||||
|         self.data_vectorizer = CountVectorizer( |         self.data_vectorizer = CountVectorizer( | ||||||
|             analyzer="word", |             analyzer="word", | ||||||
|             ngram_range=(1,2), |             ngram_range=(1, 2), | ||||||
|             min_df=0.01 |             min_df=0.01 | ||||||
|         ) |         ) | ||||||
|         data_vectorized = self.data_vectorizer.fit_transform(data) |         data_vectorized = self.data_vectorizer.fit_transform(data) | ||||||
|   | |||||||
| @@ -155,7 +155,7 @@ class Consumer: | |||||||
|         self.log("debug", "Saving record to database") |         self.log("debug", "Saving record to database") | ||||||
|  |  | ||||||
|         created = file_info.created or date or timezone.make_aware( |         created = file_info.created or date or timezone.make_aware( | ||||||
|                     datetime.datetime.fromtimestamp(stats.st_mtime)) |             datetime.datetime.fromtimestamp(stats.st_mtime)) | ||||||
|  |  | ||||||
|         with open(doc, "rb") as f: |         with open(doc, "rb") as f: | ||||||
|             document = Document.objects.create( |             document = Document.objects.create( | ||||||
|   | |||||||
| @@ -1,5 +1,4 @@ | |||||||
| import os | import os | ||||||
|  |  | ||||||
| from datetime import datetime | from datetime import datetime | ||||||
| from time import mktime | from time import mktime | ||||||
|  |  | ||||||
| @@ -22,7 +21,10 @@ class UploadForm(forms.Form): | |||||||
|     def get_filename(self, i=None): |     def get_filename(self, i=None): | ||||||
|         return os.path.join( |         return os.path.join( | ||||||
|             settings.CONSUMPTION_DIR, |             settings.CONSUMPTION_DIR, | ||||||
|             "{}_{}".format(str(i), self.cleaned_data.get("document").name) if i else self.cleaned_data.get("document").name |             "{}_{}".format( | ||||||
|  |                 str(i), | ||||||
|  |                 self.cleaned_data.get("document").name | ||||||
|  |             ) if i else self.cleaned_data.get("document").name | ||||||
|         ) |         ) | ||||||
|  |  | ||||||
|     def save(self): |     def save(self): | ||||||
|   | |||||||
| @@ -1,8 +1,6 @@ | |||||||
| import logging | import logging | ||||||
| from contextlib import contextmanager | from contextlib import contextmanager | ||||||
|  |  | ||||||
| from django.db import models |  | ||||||
| from django.dispatch import receiver |  | ||||||
| from whoosh import highlight | from whoosh import highlight | ||||||
| from whoosh.fields import Schema, TEXT, NUMERIC | from whoosh.fields import Schema, TEXT, NUMERIC | ||||||
| from whoosh.highlight import Formatter, get_text | from whoosh.highlight import Formatter, get_text | ||||||
| @@ -10,10 +8,8 @@ from whoosh.index import create_in, exists_in, open_dir | |||||||
| from whoosh.qparser import MultifieldParser | from whoosh.qparser import MultifieldParser | ||||||
| from whoosh.writing import AsyncWriter | from whoosh.writing import AsyncWriter | ||||||
|  |  | ||||||
| from documents.models import Document |  | ||||||
| from paperless import settings | from paperless import settings | ||||||
|  |  | ||||||
|  |  | ||||||
| logger = logging.getLogger(__name__) | logger = logging.getLogger(__name__) | ||||||
|  |  | ||||||
|  |  | ||||||
|   | |||||||
| @@ -5,12 +5,11 @@ import os | |||||||
| import re | import re | ||||||
| import time | import time | ||||||
| import uuid | import uuid | ||||||
|  |  | ||||||
| from base64 import b64decode | from base64 import b64decode | ||||||
| from email import policy | from email import policy | ||||||
| from email.parser import BytesParser | from email.parser import BytesParser | ||||||
| from dateutil import parser |  | ||||||
|  |  | ||||||
|  | from dateutil import parser | ||||||
| from django.conf import settings | from django.conf import settings | ||||||
|  |  | ||||||
| from .models import Correspondent | from .models import Correspondent | ||||||
|   | |||||||
| @@ -3,9 +3,8 @@ import os | |||||||
|  |  | ||||||
| from django.conf import settings | from django.conf import settings | ||||||
| from django.core.management.base import BaseCommand | from django.core.management.base import BaseCommand | ||||||
|  |  | ||||||
| from watchdog.observers import Observer |  | ||||||
| from watchdog.events import FileSystemEventHandler | from watchdog.events import FileSystemEventHandler | ||||||
|  | from watchdog.observers import Observer | ||||||
|  |  | ||||||
| from documents.consumer import Consumer | from documents.consumer import Consumer | ||||||
|  |  | ||||||
|   | |||||||
| @@ -1,4 +1,5 @@ | |||||||
| from django.core.management.base import BaseCommand | from django.core.management.base import BaseCommand | ||||||
|  |  | ||||||
| from ...mixins import Renderable | from ...mixins import Renderable | ||||||
| from ...tasks import train_classifier | from ...tasks import train_classifier | ||||||
|  |  | ||||||
|   | |||||||
| @@ -1,16 +1,15 @@ | |||||||
| import json | import json | ||||||
| import os | import os | ||||||
| import time |  | ||||||
| import shutil | import shutil | ||||||
|  | import time | ||||||
|  |  | ||||||
| from django.core.management.base import BaseCommand, CommandError |  | ||||||
| from django.core import serializers | from django.core import serializers | ||||||
|  | from django.core.management.base import BaseCommand, CommandError | ||||||
|  |  | ||||||
| from documents.models import Document, Correspondent, Tag, DocumentType | from documents.models import Document, Correspondent, Tag, DocumentType | ||||||
| from paperless.db import GnuPG |  | ||||||
|  |  | ||||||
| from ...mixins import Renderable |  | ||||||
| from documents.settings import EXPORTER_FILE_NAME, EXPORTER_THUMBNAIL_NAME | from documents.settings import EXPORTER_FILE_NAME, EXPORTER_THUMBNAIL_NAME | ||||||
|  | from paperless.db import GnuPG | ||||||
|  | from ...mixins import Renderable | ||||||
|  |  | ||||||
|  |  | ||||||
| class Command(Renderable, BaseCommand): | class Command(Renderable, BaseCommand): | ||||||
|   | |||||||
| @@ -3,17 +3,15 @@ import os | |||||||
| import shutil | import shutil | ||||||
|  |  | ||||||
| from django.conf import settings | from django.conf import settings | ||||||
| from django.core.management.base import BaseCommand, CommandError |  | ||||||
| from django.core.management import call_command | from django.core.management import call_command | ||||||
|  | from django.core.management.base import BaseCommand, CommandError | ||||||
|  |  | ||||||
| from documents.models import Document | from documents.models import Document | ||||||
|  | from documents.settings import EXPORTER_FILE_NAME, EXPORTER_THUMBNAIL_NAME | ||||||
| from paperless.db import GnuPG | from paperless.db import GnuPG | ||||||
| from ...file_handling import generate_filename, create_source_path_directory | from ...file_handling import generate_filename, create_source_path_directory | ||||||
|  |  | ||||||
| from ...mixins import Renderable | from ...mixins import Renderable | ||||||
|  |  | ||||||
| from documents.settings import EXPORTER_FILE_NAME, EXPORTER_THUMBNAIL_NAME |  | ||||||
|  |  | ||||||
|  |  | ||||||
| class Command(Renderable, BaseCommand): | class Command(Renderable, BaseCommand): | ||||||
|  |  | ||||||
|   | |||||||
| @@ -8,5 +8,5 @@ class Command(BaseCommand): | |||||||
|     help = "A quick & dirty way to see what's in the logs" |     help = "A quick & dirty way to see what's in the logs" | ||||||
|  |  | ||||||
|     def handle(self, *args, **options): |     def handle(self, *args, **options): | ||||||
|         for l in Log.objects.order_by("pk"): |         for log in Log.objects.order_by("pk"): | ||||||
|             print(l) |             print(log) | ||||||
|   | |||||||
| @@ -1,7 +1,6 @@ | |||||||
| from django.core.management.base import BaseCommand | from django.core.management.base import BaseCommand | ||||||
|  |  | ||||||
| from documents.models import Document, Tag | from documents.models import Document | ||||||
|  |  | ||||||
| from ...mixins import Renderable | from ...mixins import Renderable | ||||||
|  |  | ||||||
|  |  | ||||||
|   | |||||||
| @@ -9,16 +9,14 @@ def match_correspondents(document_content, classifier): | |||||||
|     correspondents = Correspondent.objects.all() |     correspondents = Correspondent.objects.all() | ||||||
|     predicted_correspondent_id = classifier.predict_correspondent(document_content) if classifier else None |     predicted_correspondent_id = classifier.predict_correspondent(document_content) if classifier else None | ||||||
|  |  | ||||||
|     matched_correspondents = [o for o in correspondents if matches(o, document_content) or o.pk == predicted_correspondent_id] |     return [o for o in correspondents if matches(o, document_content) or o.pk == predicted_correspondent_id] | ||||||
|     return matched_correspondents |  | ||||||
|  |  | ||||||
|  |  | ||||||
| def match_document_types(document_content, classifier): | def match_document_types(document_content, classifier): | ||||||
|     document_types = DocumentType.objects.all() |     document_types = DocumentType.objects.all() | ||||||
|     predicted_document_type_id = classifier.predict_document_type(document_content) if classifier else None |     predicted_document_type_id = classifier.predict_document_type(document_content) if classifier else None | ||||||
|  |  | ||||||
|     matched_document_types = [o for o in document_types if matches(o, document_content) or o.pk == predicted_document_type_id] |     return [o for o in document_types if matches(o, document_content) or o.pk == predicted_document_type_id] | ||||||
|     return matched_document_types |  | ||||||
|  |  | ||||||
|  |  | ||||||
| def match_tags(document_content, classifier): | def match_tags(document_content, classifier): | ||||||
|   | |||||||
| @@ -22,11 +22,13 @@ from django.utils import timezone | |||||||
| # - MONTH XX, ZZZZ with XX being 1 or 2 and ZZZZ being 4 digits | # - MONTH XX, ZZZZ with XX being 1 or 2 and ZZZZ being 4 digits | ||||||
| from documents.signals import document_consumer_declaration | from documents.signals import document_consumer_declaration | ||||||
|  |  | ||||||
|  | # TODO: isnt there a date parsing library for this? | ||||||
|  |  | ||||||
| DATE_REGEX = re.compile( | DATE_REGEX = re.compile( | ||||||
|     r'(\b|(?!=([_-])))([0-9]{1,2})[\.\/-]([0-9]{1,2})[\.\/-]([0-9]{4}|[0-9]{2})(\b|(?=([_-])))|' +  # NOQA: E501 |     r'(\b|(?!=([_-])))([0-9]{1,2})[\.\/-]([0-9]{1,2})[\.\/-]([0-9]{4}|[0-9]{2})(\b|(?=([_-])))|'   # NOQA: E501 | ||||||
|     r'(\b|(?!=([_-])))([0-9]{4}|[0-9]{2})[\.\/-]([0-9]{1,2})[\.\/-]([0-9]{1,2})(\b|(?=([_-])))|' +  # NOQA: E501 |     r'(\b|(?!=([_-])))([0-9]{4}|[0-9]{2})[\.\/-]([0-9]{1,2})[\.\/-]([0-9]{1,2})(\b|(?=([_-])))|'   # NOQA: E501 | ||||||
|     r'(\b|(?!=([_-])))([0-9]{1,2}[\. ]+[^ ]{3,9} ([0-9]{4}|[0-9]{2}))(\b|(?=([_-])))|' +  # NOQA: E501 |     r'(\b|(?!=([_-])))([0-9]{1,2}[\. ]+[^ ]{3,9} ([0-9]{4}|[0-9]{2}))(\b|(?=([_-])))|'   # NOQA: E501 | ||||||
|     r'(\b|(?!=([_-])))([^\W\d_]{3,9} [0-9]{1,2}, ([0-9]{4}))(\b|(?=([_-])))|' + |     r'(\b|(?!=([_-])))([^\W\d_]{3,9} [0-9]{1,2}, ([0-9]{4}))(\b|(?=([_-])))|' | ||||||
|     r'(\b|(?!=([_-])))([^\W\d_]{3,9} [0-9]{4})(\b|(?=([_-])))' |     r'(\b|(?!=([_-])))([^\W\d_]{3,9} [0-9]{4})(\b|(?=([_-])))' | ||||||
| ) | ) | ||||||
|  |  | ||||||
| @@ -43,7 +45,7 @@ def get_parser_class(doc): | |||||||
|     for response in document_consumer_declaration.send(None): |     for response in document_consumer_declaration.send(None): | ||||||
|         parsers.append(response[1]) |         parsers.append(response[1]) | ||||||
|  |  | ||||||
|     #TODO: add a check that checks parser availability. |     # TODO: add a check that checks parser availability. | ||||||
|  |  | ||||||
|     options = [] |     options = [] | ||||||
|     for parser in parsers: |     for parser in parsers: | ||||||
| @@ -59,7 +61,7 @@ def get_parser_class(doc): | |||||||
|         options, key=lambda _: _["weight"], reverse=True)[0]["parser"] |         options, key=lambda _: _["weight"], reverse=True)[0]["parser"] | ||||||
|  |  | ||||||
|  |  | ||||||
| def run_convert(input, output, density=None, scale=None, alpha=None, strip=False, trim=False, type=None, depth=None, extra=None, logging_group=None): | def run_convert(input_file, output_file, density=None, scale=None, alpha=None, strip=False, trim=False, type=None, depth=None, extra=None, logging_group=None): | ||||||
|     environment = os.environ.copy() |     environment = os.environ.copy() | ||||||
|     if settings.CONVERT_MEMORY_LIMIT: |     if settings.CONVERT_MEMORY_LIMIT: | ||||||
|         environment["MAGICK_MEMORY_LIMIT"] = settings.CONVERT_MEMORY_LIMIT |         environment["MAGICK_MEMORY_LIMIT"] = settings.CONVERT_MEMORY_LIMIT | ||||||
| @@ -74,7 +76,7 @@ def run_convert(input, output, density=None, scale=None, alpha=None, strip=False | |||||||
|     args += ['-trim'] if trim else [] |     args += ['-trim'] if trim else [] | ||||||
|     args += ['-type', str(type)] if type else [] |     args += ['-type', str(type)] if type else [] | ||||||
|     args += ['-depth', str(depth)] if depth else [] |     args += ['-depth', str(depth)] if depth else [] | ||||||
|     args += [input, output] |     args += [input_file, output_file] | ||||||
|  |  | ||||||
|     logger.debug("Execute: " + " ".join(args), extra={'group': logging_group}) |     logger.debug("Execute: " + " ".join(args), extra={'group': logging_group}) | ||||||
|  |  | ||||||
|   | |||||||
| @@ -105,7 +105,6 @@ class DocumentSerializer(serializers.ModelSerializer): | |||||||
|  |  | ||||||
| class LogSerializer(serializers.ModelSerializer): | class LogSerializer(serializers.ModelSerializer): | ||||||
|  |  | ||||||
|  |  | ||||||
|     class Meta: |     class Meta: | ||||||
|         model = Log |         model = Log | ||||||
|         fields = ( |         fields = ( | ||||||
|   | |||||||
| @@ -1,7 +1,6 @@ | |||||||
| import logging | import logging | ||||||
|  |  | ||||||
| from django.conf import settings | from django.conf import settings | ||||||
| from django_q.tasks import async_task, result |  | ||||||
| from whoosh.writing import AsyncWriter | from whoosh.writing import AsyncWriter | ||||||
|  |  | ||||||
| from documents import index | from documents import index | ||||||
|   | |||||||
| @@ -2,9 +2,9 @@ import unittest | |||||||
|  |  | ||||||
| from django.test import TestCase | from django.test import TestCase | ||||||
|  |  | ||||||
|  | from .factories import DocumentFactory | ||||||
| from ..checks import changed_password_check | from ..checks import changed_password_check | ||||||
| from ..models import Document | from ..models import Document | ||||||
| from .factories import DocumentFactory |  | ||||||
|  |  | ||||||
|  |  | ||||||
| class ChecksTestCase(TestCase): | class ChecksTestCase(TestCase): | ||||||
|   | |||||||
| @@ -1,14 +1,13 @@ | |||||||
| import os | import os | ||||||
| import shutil | import shutil | ||||||
| from uuid import uuid4 |  | ||||||
| from pathlib import Path | from pathlib import Path | ||||||
|  | from uuid import uuid4 | ||||||
|  |  | ||||||
|  | from django.conf import settings | ||||||
| from django.test import TestCase, override_settings | from django.test import TestCase, override_settings | ||||||
|  |  | ||||||
| from ..file_handling import generate_filename, create_source_path_directory, delete_empty_directories | from ..file_handling import generate_filename, create_source_path_directory, delete_empty_directories | ||||||
| from ..models import Document, Correspondent | from ..models import Document, Correspondent | ||||||
| from django.conf import settings |  | ||||||
|  |  | ||||||
| from ..signals.handlers import update_filename_and_move_files | from ..signals.handlers import update_filename_and_move_files | ||||||
|  |  | ||||||
|  |  | ||||||
| @@ -68,24 +67,18 @@ class TestDate(TestCase): | |||||||
|         # test that creating dirs for the source_path creates the correct directory |         # test that creating dirs for the source_path creates the correct directory | ||||||
|         create_source_path_directory(document.source_path) |         create_source_path_directory(document.source_path) | ||||||
|         Path(document.source_path).touch() |         Path(document.source_path).touch() | ||||||
|         self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + |         self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none"), True) | ||||||
|                          "/none"), True) |  | ||||||
|  |  | ||||||
|         # Set a correspondent and save the document |         # Set a correspondent and save the document | ||||||
|         document.correspondent = Correspondent.objects.get_or_create( |         document.correspondent = Correspondent.objects.get_or_create(name="test")[0] | ||||||
|                 name="test")[0] |  | ||||||
|         document.save() |         document.save() | ||||||
|  |  | ||||||
|         # Check proper handling of files |         # Check proper handling of files | ||||||
|         self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + |         self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/test"), True) | ||||||
|                          "/test"), True) |         self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none"), False) | ||||||
|         self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + |         self.assertEqual(os.path.isfile(settings.ORIGINALS_DIR + "/test/test-{:07d}.pdf.gpg".format(document.pk)), True) | ||||||
|                          "/none"), False) |  | ||||||
|         self.assertEqual(os.path.isfile(settings.ORIGINALS_DIR + |  | ||||||
|                          "/test/test-{:07d}.pdf.gpg".format(document.pk)), True) |  | ||||||
|  |  | ||||||
|     @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/" + |     @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}") | ||||||
|                        "{correspondent}") |  | ||||||
|     def test_file_renaming_missing_permissions(self): |     def test_file_renaming_missing_permissions(self): | ||||||
|         document = Document() |         document = Document() | ||||||
|         document.file_type = "pdf" |         document.file_type = "pdf" | ||||||
| @@ -100,27 +93,22 @@ class TestDate(TestCase): | |||||||
|         Path(document.source_path).touch() |         Path(document.source_path).touch() | ||||||
|  |  | ||||||
|         # Test source_path |         # Test source_path | ||||||
|         self.assertEqual(document.source_path, settings.ORIGINALS_DIR + |         self.assertEqual(document.source_path, settings.ORIGINALS_DIR + "/none/none-{:07d}.pdf".format(document.pk)) | ||||||
|                          "/none/none-{:07d}.pdf".format(document.pk)) |  | ||||||
|  |  | ||||||
|         # Make the folder read- and execute-only (no writing and no renaming) |         # Make the folder read- and execute-only (no writing and no renaming) | ||||||
|         os.chmod(settings.ORIGINALS_DIR + "/none", 0o555) |         os.chmod(settings.ORIGINALS_DIR + "/none", 0o555) | ||||||
|  |  | ||||||
|         # Set a correspondent and save the document |         # Set a correspondent and save the document | ||||||
|         document.correspondent = Correspondent.objects.get_or_create( |         document.correspondent = Correspondent.objects.get_or_create(name="test")[0] | ||||||
|                 name="test")[0] |  | ||||||
|         document.save() |         document.save() | ||||||
|  |  | ||||||
|         # Check proper handling of files |         # Check proper handling of files | ||||||
|         self.assertEqual(os.path.isfile(settings.MEDIA_ROOT + "/documents/" + |         self.assertEqual(os.path.isfile(settings.MEDIA_ROOT + "/documents/originals/none/none-{:07d}.pdf".format(document.pk)), True) | ||||||
|                          "originals/none/none-{:07d}.pdf".format(document.pk)), True) |         self.assertEqual(document.filename, "none/none-{:07d}.pdf".format(document.pk)) | ||||||
|         self.assertEqual(document.filename, |  | ||||||
|                          "none/none-{:07d}.pdf".format(document.pk)) |  | ||||||
|  |  | ||||||
|         os.chmod(settings.ORIGINALS_DIR + "/none", 0o777) |         os.chmod(settings.ORIGINALS_DIR + "/none", 0o777) | ||||||
|  |  | ||||||
|     @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/" + |     @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}") | ||||||
|                        "{correspondent}") |  | ||||||
|     def test_file_renaming_database_error(self): |     def test_file_renaming_database_error(self): | ||||||
|  |  | ||||||
|         document1 = Document.objects.create(file_type="pdf", storage_type=Document.STORAGE_TYPE_UNENCRYPTED, checksum="AAAAA") |         document1 = Document.objects.create(file_type="pdf", storage_type=Document.STORAGE_TYPE_UNENCRYPTED, checksum="AAAAA") | ||||||
| @@ -155,13 +143,10 @@ class TestDate(TestCase): | |||||||
|  |  | ||||||
|         # Check proper handling of files |         # Check proper handling of files | ||||||
|         self.assertTrue(os.path.isfile(document.source_path)) |         self.assertTrue(os.path.isfile(document.source_path)) | ||||||
|         self.assertEqual(os.path.isfile(settings.MEDIA_ROOT + "/documents/" + |         self.assertEqual(os.path.isfile(settings.MEDIA_ROOT + "/documents/originals/none/none-{:07d}.pdf".format(document.pk)), True) | ||||||
|                                         "originals/none/none-{:07d}.pdf".format(document.pk)), True) |         self.assertEqual(document.filename, "none/none-{:07d}.pdf".format(document.pk)) | ||||||
|         self.assertEqual(document.filename, |  | ||||||
|                          "none/none-{:07d}.pdf".format(document.pk)) |  | ||||||
|  |  | ||||||
|     @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/" + |     @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}") | ||||||
|                        "{correspondent}") |  | ||||||
|     def test_document_delete(self): |     def test_document_delete(self): | ||||||
|         document = Document() |         document = Document() | ||||||
|         document.file_type = "pdf" |         document.file_type = "pdf" | ||||||
| @@ -179,13 +164,10 @@ class TestDate(TestCase): | |||||||
|         # Ensure file deletion after delete |         # Ensure file deletion after delete | ||||||
|         pk = document.pk |         pk = document.pk | ||||||
|         document.delete() |         document.delete() | ||||||
|         self.assertEqual(os.path.isfile(settings.ORIGINALS_DIR + |         self.assertEqual(os.path.isfile(settings.ORIGINALS_DIR + "/none/none-{:07d}.pdf".format(pk)), False) | ||||||
|                          "/none/none-{:07d}.pdf".format(pk)), False) |         self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none"), False) | ||||||
|         self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + |  | ||||||
|                          "/none"), False) |  | ||||||
|  |  | ||||||
|     @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/" + |     @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}") | ||||||
|                        "{correspondent}") |  | ||||||
|     def test_document_delete_nofile(self): |     def test_document_delete_nofile(self): | ||||||
|         document = Document() |         document = Document() | ||||||
|         document.file_type = "pdf" |         document.file_type = "pdf" | ||||||
| @@ -194,8 +176,7 @@ class TestDate(TestCase): | |||||||
|  |  | ||||||
|         document.delete() |         document.delete() | ||||||
|  |  | ||||||
|     @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/" + |     @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}") | ||||||
|                        "{correspondent}") |  | ||||||
|     def test_directory_not_empty(self): |     def test_directory_not_empty(self): | ||||||
|         document = Document() |         document = Document() | ||||||
|         document.file_type = "pdf" |         document.file_type = "pdf" | ||||||
| @@ -214,18 +195,14 @@ class TestDate(TestCase): | |||||||
|         Path(important_file).touch() |         Path(important_file).touch() | ||||||
|  |  | ||||||
|         # Set a correspondent and save the document |         # Set a correspondent and save the document | ||||||
|         document.correspondent = Correspondent.objects.get_or_create( |         document.correspondent = Correspondent.objects.get_or_create(name="test")[0] | ||||||
|                 name="test")[0] |  | ||||||
|         document.save() |         document.save() | ||||||
|  |  | ||||||
|         # Check proper handling of files |         # Check proper handling of files | ||||||
|         self.assertEqual(os.path.isdir(settings.MEDIA_ROOT + |         self.assertEqual(os.path.isdir(settings.MEDIA_ROOT + "/documents/originals/test"), True) | ||||||
|                          "/documents/originals/test"), True) |         self.assertEqual(os.path.isdir(settings.MEDIA_ROOT + "/documents/originals/none"), True) | ||||||
|         self.assertEqual(os.path.isdir(settings.MEDIA_ROOT + |  | ||||||
|                          "/documents/originals/none"), True) |  | ||||||
|         self.assertTrue(os.path.isfile(important_file)) |         self.assertTrue(os.path.isfile(important_file)) | ||||||
|  |  | ||||||
|  |  | ||||||
|     @override_settings(PAPERLESS_FILENAME_FORMAT="{tags[type]}") |     @override_settings(PAPERLESS_FILENAME_FORMAT="{tags[type]}") | ||||||
|     def test_tags_with_underscore(self): |     def test_tags_with_underscore(self): | ||||||
|         document = Document() |         document = Document() | ||||||
| @@ -304,9 +281,7 @@ class TestDate(TestCase): | |||||||
|         self.assertEqual(generate_filename(document), |         self.assertEqual(generate_filename(document), | ||||||
|                          "none-{:07d}.pdf".format(document.pk)) |                          "none-{:07d}.pdf".format(document.pk)) | ||||||
|  |  | ||||||
|  |     @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}/{correspondent}") | ||||||
|     @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/" + |  | ||||||
|                        "{correspondent}/{correspondent}") |  | ||||||
|     def test_nested_directory_cleanup(self): |     def test_nested_directory_cleanup(self): | ||||||
|         document = Document() |         document = Document() | ||||||
|         document.file_type = "pdf" |         document.file_type = "pdf" | ||||||
| @@ -315,25 +290,19 @@ class TestDate(TestCase): | |||||||
|  |  | ||||||
|         # Ensure that filename is properly generated |         # Ensure that filename is properly generated | ||||||
|         document.filename = generate_filename(document) |         document.filename = generate_filename(document) | ||||||
|         self.assertEqual(document.filename, |         self.assertEqual(document.filename, "none/none/none-{:07d}.pdf".format(document.pk)) | ||||||
|                          "none/none/none-{:07d}.pdf".format(document.pk)) |  | ||||||
|         create_source_path_directory(document.source_path) |         create_source_path_directory(document.source_path) | ||||||
|         Path(document.source_path).touch() |         Path(document.source_path).touch() | ||||||
|  |  | ||||||
|         # Check proper handling of files |         # Check proper handling of files | ||||||
|         self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + |         self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none/none"), True) | ||||||
|                          "/none/none"), True) |  | ||||||
|  |  | ||||||
|         pk = document.pk |         pk = document.pk | ||||||
|         document.delete() |         document.delete() | ||||||
|  |  | ||||||
|         self.assertEqual(os.path.isfile(settings.ORIGINALS_DIR + |         self.assertEqual(os.path.isfile(settings.ORIGINALS_DIR + "/none/none/none-{:07d}.pdf".format(pk)), False) | ||||||
|                          "/none/none/none-{:07d}.pdf".format(pk)), |         self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none/none"), False) | ||||||
|                          False) |         self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none"), False) | ||||||
|         self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + |  | ||||||
|                          "/none/none"), False) |  | ||||||
|         self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + |  | ||||||
|                          "/none"), False) |  | ||||||
|         self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR), True) |         self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR), True) | ||||||
|  |  | ||||||
|     @override_settings(PAPERLESS_FILENAME_FORMAT=None) |     @override_settings(PAPERLESS_FILENAME_FORMAT=None) | ||||||
| @@ -355,8 +324,7 @@ class TestDate(TestCase): | |||||||
|         Path(os.path.join(tmp, "notempty", "file")).touch() |         Path(os.path.join(tmp, "notempty", "file")).touch() | ||||||
|         os.makedirs(os.path.join(tmp, "notempty", "empty")) |         os.makedirs(os.path.join(tmp, "notempty", "empty")) | ||||||
|  |  | ||||||
|         delete_empty_directories( |         delete_empty_directories(os.path.join(tmp, "notempty", "empty")) | ||||||
|                 os.path.join(tmp, "notempty", "empty")) |  | ||||||
|         self.assertEqual(os.path.isdir(os.path.join(tmp, "notempty")), True) |         self.assertEqual(os.path.isdir(os.path.join(tmp, "notempty")), True) | ||||||
|         self.assertEqual(os.path.isfile( |         self.assertEqual(os.path.isfile( | ||||||
|             os.path.join(tmp, "notempty", "file")), True) |             os.path.join(tmp, "notempty", "file")), True) | ||||||
|   | |||||||
| @@ -1,9 +1,8 @@ | |||||||
| from django.core.management.base import CommandError | from django.core.management.base import CommandError | ||||||
| from django.test import TestCase | from django.test import TestCase | ||||||
|  |  | ||||||
| from ..management.commands.document_importer import Command |  | ||||||
|  |  | ||||||
| from documents.settings import EXPORTER_FILE_NAME | from documents.settings import EXPORTER_FILE_NAME | ||||||
|  | from ..management.commands.document_importer import Command | ||||||
|  |  | ||||||
|  |  | ||||||
| class TestImporter(TestCase): | class TestImporter(TestCase): | ||||||
|   | |||||||
| @@ -1,6 +1,5 @@ | |||||||
| import logging | import logging | ||||||
| import uuid | import uuid | ||||||
|  |  | ||||||
| from unittest import mock | from unittest import mock | ||||||
|  |  | ||||||
| from django.test import TestCase | from django.test import TestCase | ||||||
|   | |||||||
| @@ -1,10 +1,9 @@ | |||||||
| import base64 | import base64 | ||||||
| import os | import os | ||||||
| import magic |  | ||||||
|  |  | ||||||
| from hashlib import md5 | from hashlib import md5 | ||||||
| from unittest import mock | from unittest import mock | ||||||
|  |  | ||||||
|  | import magic | ||||||
| from django.conf import settings | from django.conf import settings | ||||||
| from django.test import TestCase | from django.test import TestCase | ||||||
|  |  | ||||||
|   | |||||||
| @@ -1,7 +1,7 @@ | |||||||
| from django.test import TestCase | from django.test import TestCase | ||||||
|  |  | ||||||
| from ..models import Document, Correspondent |  | ||||||
| from .factories import DocumentFactory, CorrespondentFactory | from .factories import DocumentFactory, CorrespondentFactory | ||||||
|  | from ..models import Document, Correspondent | ||||||
|  |  | ||||||
|  |  | ||||||
| class CorrespondentTestCase(TestCase): | class CorrespondentTestCase(TestCase): | ||||||
|   | |||||||
| @@ -4,11 +4,6 @@ from django.views.decorators.cache import cache_control | |||||||
| from django.views.generic import TemplateView | from django.views.generic import TemplateView | ||||||
| from django_filters.rest_framework import DjangoFilterBackend | from django_filters.rest_framework import DjangoFilterBackend | ||||||
| from rest_framework.decorators import action | from rest_framework.decorators import action | ||||||
| from rest_framework.response import Response |  | ||||||
| from rest_framework.views import APIView |  | ||||||
|  |  | ||||||
| from paperless.db import GnuPG |  | ||||||
| from paperless.views import StandardPagination |  | ||||||
| from rest_framework.filters import OrderingFilter, SearchFilter | from rest_framework.filters import OrderingFilter, SearchFilter | ||||||
| from rest_framework.mixins import ( | from rest_framework.mixins import ( | ||||||
|     DestroyModelMixin, |     DestroyModelMixin, | ||||||
| @@ -17,12 +12,17 @@ from rest_framework.mixins import ( | |||||||
|     UpdateModelMixin |     UpdateModelMixin | ||||||
| ) | ) | ||||||
| from rest_framework.permissions import IsAuthenticated | from rest_framework.permissions import IsAuthenticated | ||||||
|  | from rest_framework.response import Response | ||||||
|  | from rest_framework.views import APIView | ||||||
| from rest_framework.viewsets import ( | from rest_framework.viewsets import ( | ||||||
|     GenericViewSet, |     GenericViewSet, | ||||||
|     ModelViewSet, |     ModelViewSet, | ||||||
|     ReadOnlyModelViewSet |     ReadOnlyModelViewSet | ||||||
| ) | ) | ||||||
|  |  | ||||||
|  | import documents.index as index | ||||||
|  | from paperless.db import GnuPG | ||||||
|  | from paperless.views import StandardPagination | ||||||
| from .filters import ( | from .filters import ( | ||||||
|     CorrespondentFilterSet, |     CorrespondentFilterSet, | ||||||
|     DocumentFilterSet, |     DocumentFilterSet, | ||||||
| @@ -30,8 +30,6 @@ from .filters import ( | |||||||
|     DocumentTypeFilterSet, |     DocumentTypeFilterSet, | ||||||
|     LogFilterSet |     LogFilterSet | ||||||
| ) | ) | ||||||
|  |  | ||||||
| import documents.index as index |  | ||||||
| from .forms import UploadForm | from .forms import UploadForm | ||||||
| from .models import Correspondent, Document, Log, Tag, DocumentType | from .models import Correspondent, Document, Log, Tag, DocumentType | ||||||
| from .serialisers import ( | from .serialisers import ( | ||||||
| @@ -106,7 +104,7 @@ class DocumentViewSet(RetrieveModelMixin, | |||||||
|         return super(DocumentViewSet, self).destroy(request, *args, **kwargs) |         return super(DocumentViewSet, self).destroy(request, *args, **kwargs) | ||||||
|  |  | ||||||
|     def file_response(self, pk, disposition): |     def file_response(self, pk, disposition): | ||||||
|         #TODO: this should not be necessary here. |         # TODO: this should not be necessary here. | ||||||
|         content_types = { |         content_types = { | ||||||
|             Document.TYPE_PDF: "application/pdf", |             Document.TYPE_PDF: "application/pdf", | ||||||
|             Document.TYPE_PNG: "image/png", |             Document.TYPE_PNG: "image/png", | ||||||
| @@ -114,7 +112,7 @@ class DocumentViewSet(RetrieveModelMixin, | |||||||
|             Document.TYPE_GIF: "image/gif", |             Document.TYPE_GIF: "image/gif", | ||||||
|             Document.TYPE_TIF: "image/tiff", |             Document.TYPE_TIF: "image/tiff", | ||||||
|             Document.TYPE_CSV: "text/csv", |             Document.TYPE_CSV: "text/csv", | ||||||
|             Document.TYPE_MD:  "text/markdown", |             Document.TYPE_MD: "text/markdown", | ||||||
|             Document.TYPE_TXT: "text/plain" |             Document.TYPE_TXT: "text/plain" | ||||||
|         } |         } | ||||||
|  |  | ||||||
| @@ -132,7 +130,7 @@ class DocumentViewSet(RetrieveModelMixin, | |||||||
|  |  | ||||||
|     @action(methods=['post'], detail=False) |     @action(methods=['post'], detail=False) | ||||||
|     def post_document(self, request, pk=None): |     def post_document(self, request, pk=None): | ||||||
|         #TODO: is this a good implementation? |         # TODO: is this a good implementation? | ||||||
|         form = UploadForm(data=request.POST, files=request.FILES) |         form = UploadForm(data=request.POST, files=request.FILES) | ||||||
|         if form.is_valid(): |         if form.is_valid(): | ||||||
|             form.save() |             form.save() | ||||||
|   | |||||||
| @@ -11,6 +11,8 @@ writeable_hint = ( | |||||||
|     "Set the permissions of {} to be writeable by the user running the " |     "Set the permissions of {} to be writeable by the user running the " | ||||||
|     "Paperless services" |     "Paperless services" | ||||||
| ) | ) | ||||||
|  |  | ||||||
|  |  | ||||||
| def path_check(env_var): | def path_check(env_var): | ||||||
|     messages = [] |     messages = [] | ||||||
|     directory = os.getenv(env_var) |     directory = os.getenv(env_var) | ||||||
| @@ -27,6 +29,7 @@ def path_check(env_var): | |||||||
|             )) |             )) | ||||||
|     return messages |     return messages | ||||||
|  |  | ||||||
|  |  | ||||||
| @register() | @register() | ||||||
| def paths_check(app_configs, **kwargs): | def paths_check(app_configs, **kwargs): | ||||||
|     """ |     """ | ||||||
| @@ -34,9 +37,9 @@ def paths_check(app_configs, **kwargs): | |||||||
|     """ |     """ | ||||||
|  |  | ||||||
|     check_messages = path_check("PAPERLESS_DATA_DIR") + \ |     check_messages = path_check("PAPERLESS_DATA_DIR") + \ | ||||||
|                      path_check("PAPERLESS_MEDIA_ROOT") + \ |         path_check("PAPERLESS_MEDIA_ROOT") + \ | ||||||
|                      path_check("PAPERLESS_CONSUMPTION_DIR") + \ |         path_check("PAPERLESS_CONSUMPTION_DIR") + \ | ||||||
|                      path_check("PAPERLESS_STATICDIR") |         path_check("PAPERLESS_STATICDIR") | ||||||
|  |  | ||||||
|     return check_messages |     return check_messages | ||||||
|  |  | ||||||
|   | |||||||
| @@ -25,6 +25,7 @@ elif os.path.exists("/usr/local/etc/paperless.conf"): | |||||||
| # Tesseract process to one thread. | # Tesseract process to one thread. | ||||||
| os.environ['OMP_THREAD_LIMIT'] = "1" | os.environ['OMP_THREAD_LIMIT'] = "1" | ||||||
|  |  | ||||||
|  |  | ||||||
| def __get_boolean(key, default="NO"): | def __get_boolean(key, default="NO"): | ||||||
|     """ |     """ | ||||||
|     Return a boolean value based on whatever the user has supplied in the |     Return a boolean value based on whatever the user has supplied in the | ||||||
| @@ -32,9 +33,11 @@ def __get_boolean(key, default="NO"): | |||||||
|     """ |     """ | ||||||
|     return bool(os.getenv(key, default).lower() in ("yes", "y", "1", "t", "true")) |     return bool(os.getenv(key, default).lower() in ("yes", "y", "1", "t", "true")) | ||||||
|  |  | ||||||
|  |  | ||||||
| # NEVER RUN WITH DEBUG IN PRODUCTION. | # NEVER RUN WITH DEBUG IN PRODUCTION. | ||||||
| DEBUG = __get_boolean("PAPERLESS_DEBUG", "NO") | DEBUG = __get_boolean("PAPERLESS_DEBUG", "NO") | ||||||
|  |  | ||||||
|  |  | ||||||
| ############################################################################### | ############################################################################### | ||||||
| # Directories                                                                 # | # Directories                                                                 # | ||||||
| ############################################################################### | ############################################################################### | ||||||
|   | |||||||
| @@ -6,7 +6,6 @@ from django.views.decorators.csrf import csrf_exempt | |||||||
| from django.views.generic import RedirectView | from django.views.generic import RedirectView | ||||||
| from rest_framework.routers import DefaultRouter | from rest_framework.routers import DefaultRouter | ||||||
|  |  | ||||||
| from paperless.views import FaviconView |  | ||||||
| from documents.views import ( | from documents.views import ( | ||||||
|     CorrespondentViewSet, |     CorrespondentViewSet, | ||||||
|     DocumentViewSet, |     DocumentViewSet, | ||||||
| @@ -18,6 +17,7 @@ from documents.views import ( | |||||||
|     SearchAutoCompleteView, |     SearchAutoCompleteView, | ||||||
|     StatisticsView |     StatisticsView | ||||||
| ) | ) | ||||||
|  | from paperless.views import FaviconView | ||||||
|  |  | ||||||
| api_router = DefaultRouter() | api_router = DefaultRouter() | ||||||
| api_router.register(r"correspondents", CorrespondentViewSet) | api_router.register(r"correspondents", CorrespondentViewSet) | ||||||
| @@ -30,7 +30,7 @@ api_router.register(r"tags", TagViewSet) | |||||||
| urlpatterns = [ | urlpatterns = [ | ||||||
|  |  | ||||||
|     # API |     # API | ||||||
|     url(r"^api/auth/",include(('rest_framework.urls', 'rest_framework'), namespace="rest_framework")), |     url(r"^api/auth/", include(('rest_framework.urls', 'rest_framework'), namespace="rest_framework")), | ||||||
|     url(r"^api/search/autocomplete/", SearchAutoCompleteView.as_view(), name="autocomplete"), |     url(r"^api/search/autocomplete/", SearchAutoCompleteView.as_view(), name="autocomplete"), | ||||||
|     url(r"^api/search/", SearchView.as_view(), name="search"), |     url(r"^api/search/", SearchView.as_view(), name="search"), | ||||||
|     url(r"^api/statistics/", StatisticsView.as_view(), name="statistics"), |     url(r"^api/statistics/", StatisticsView.as_view(), name="statistics"), | ||||||
|   | |||||||
| @@ -5,15 +5,14 @@ import subprocess | |||||||
| from multiprocessing.pool import Pool | from multiprocessing.pool import Pool | ||||||
|  |  | ||||||
| import langdetect | import langdetect | ||||||
|  | import pdftotext | ||||||
| import pyocr | import pyocr | ||||||
| from django.conf import settings |  | ||||||
| from PIL import Image | from PIL import Image | ||||||
|  | from django.conf import settings | ||||||
| from pyocr import PyocrException | from pyocr import PyocrException | ||||||
|  |  | ||||||
| import pdftotext |  | ||||||
| from documents.parsers import DocumentParser, ParseError, run_unpaper, \ | from documents.parsers import DocumentParser, ParseError, run_unpaper, \ | ||||||
|     run_convert |     run_convert | ||||||
|  |  | ||||||
| from .languages import ISO639 | from .languages import ISO639 | ||||||
|  |  | ||||||
|  |  | ||||||
| @@ -45,8 +44,8 @@ class RasterisedDocumentParser(DocumentParser): | |||||||
|                         alpha="remove", |                         alpha="remove", | ||||||
|                         strip=True, |                         strip=True, | ||||||
|                         trim=True, |                         trim=True, | ||||||
|                         input="{}[0]".format(self.document_path), |                         input_file="{}[0]".format(self.document_path), | ||||||
|                         output=out_path, |                         output_file=out_path, | ||||||
|                         logging_group=self.logging_group) |                         logging_group=self.logging_group) | ||||||
|         except ParseError: |         except ParseError: | ||||||
|             # if convert fails, fall back to extracting |             # if convert fails, fall back to extracting | ||||||
| @@ -66,8 +65,8 @@ class RasterisedDocumentParser(DocumentParser): | |||||||
|                         alpha="remove", |                         alpha="remove", | ||||||
|                         strip=True, |                         strip=True, | ||||||
|                         trim=True, |                         trim=True, | ||||||
|                         input=gs_out_path, |                         input_file=gs_out_path, | ||||||
|                         output=out_path, |                         output_file=out_path, | ||||||
|                         logging_group=self.logging_group) |                         logging_group=self.logging_group) | ||||||
|  |  | ||||||
|         return out_path |         return out_path | ||||||
| @@ -99,7 +98,7 @@ class RasterisedDocumentParser(DocumentParser): | |||||||
|         try: |         try: | ||||||
|  |  | ||||||
|             sample_page_index = int(len(images) / 2) |             sample_page_index = int(len(images) / 2) | ||||||
|             self.log("info", "Attempting language detection on page {} of {}...".format(sample_page_index+1, len(images))) |             self.log("info", "Attempting language detection on page {} of {}...".format(sample_page_index + 1, len(images))) | ||||||
|             sample_page_text = self._ocr([images[sample_page_index]], settings.OCR_LANGUAGE)[0] |             sample_page_text = self._ocr([images[sample_page_index]], settings.OCR_LANGUAGE)[0] | ||||||
|             guessed_language = self._guess_language(sample_page_text) |             guessed_language = self._guess_language(sample_page_text) | ||||||
|  |  | ||||||
| @@ -139,8 +138,8 @@ class RasterisedDocumentParser(DocumentParser): | |||||||
|         run_convert(density=settings.CONVERT_DENSITY, |         run_convert(density=settings.CONVERT_DENSITY, | ||||||
|                     depth="8", |                     depth="8", | ||||||
|                     type="grayscale", |                     type="grayscale", | ||||||
|                     input=self.document_path, |                     input_file=self.document_path, | ||||||
|                     output=pnm, |                     output_file=pnm, | ||||||
|                     logging_group=self.logging_group) |                     logging_group=self.logging_group) | ||||||
|  |  | ||||||
|         # Get a list of converted images |         # Get a list of converted images | ||||||
| @@ -189,7 +188,6 @@ class RasterisedDocumentParser(DocumentParser): | |||||||
|             return [sample_page] |             return [sample_page] | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
| def strip_excess_whitespace(text): | def strip_excess_whitespace(text): | ||||||
|     collapsed_spaces = re.sub(r"([^\S\r\n]+)", " ", text) |     collapsed_spaces = re.sub(r"([^\S\r\n]+)", " ", text) | ||||||
|     no_leading_whitespace = re.sub( |     no_leading_whitespace = re.sub( | ||||||
|   | |||||||
| @@ -5,10 +5,10 @@ from unittest import mock | |||||||
| from uuid import uuid4 | from uuid import uuid4 | ||||||
|  |  | ||||||
| from dateutil import tz | from dateutil import tz | ||||||
|  | from django.conf import settings | ||||||
| from django.test import TestCase, override_settings | from django.test import TestCase, override_settings | ||||||
|  |  | ||||||
| from ..parsers import RasterisedDocumentParser | from ..parsers import RasterisedDocumentParser | ||||||
| from django.conf import settings |  | ||||||
|  |  | ||||||
|  |  | ||||||
| class TestDate(TestCase): | class TestDate(TestCase): | ||||||
|   | |||||||
| @@ -47,8 +47,8 @@ class TextDocumentParser(DocumentParser): | |||||||
|  |  | ||||||
|         def read_text(): |         def read_text(): | ||||||
|             with open(self.document_path, 'r') as src: |             with open(self.document_path, 'r') as src: | ||||||
|                 lines = [l.strip() for l in src.readlines()] |                 lines = [line.strip() for line in src.readlines()] | ||||||
|                 text = "\n".join([l for l in lines[:n_lines]]) |                 text = "\n".join([line for line in lines[:n_lines]]) | ||||||
|                 return text.replace('"', "'") |                 return text.replace('"', "'") | ||||||
|  |  | ||||||
|         def create_txlayer(): |         def create_txlayer(): | ||||||
|   | |||||||
| @@ -1,6 +1,6 @@ | |||||||
| [pycodestyle] | [pycodestyle] | ||||||
| exclude = migrations, paperless/settings.py, .tox | exclude = migrations, paperless/settings.py, .tox | ||||||
|  | ignore = E501 | ||||||
|  |  | ||||||
| [tool:pytest] | [tool:pytest] | ||||||
| DJANGO_SETTINGS_MODULE=paperless.settings | DJANGO_SETTINGS_MODULE=paperless.settings | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Jonas Winkler
					Jonas Winkler