From 1b0233418b037c084895d3beb795b30f609d4689 Mon Sep 17 00:00:00 2001 From: Jonas Winkler Date: Wed, 18 Nov 2020 11:32:48 +0100 Subject: [PATCH 01/12] bugfix --- src/documents/forms.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/documents/forms.py b/src/documents/forms.py index 912fd9673..38a95a068 100644 --- a/src/documents/forms.py +++ b/src/documents/forms.py @@ -32,6 +32,9 @@ class UploadForm(forms.Form): t = int(mktime(datetime.now().timetuple())) + os.makedirs(settings.SCRATCH_DIR, exist_ok=True) + + # TODO: dont just append pdf. This is here for taht weird regex check at the start of the consumer. with tempfile.NamedTemporaryFile(prefix="paperless-upload-", suffix=".pdf", dir=settings.SCRATCH_DIR, delete=False) as f: f.write(document) From 8908bc259e2e41b74285882c2d9c9a7dec577bb0 Mon Sep 17 00:00:00 2001 From: Jonas Winkler Date: Wed, 18 Nov 2020 13:23:30 +0100 Subject: [PATCH 02/12] updated logging, logging for the mail consumer to see whats happening --- docs/advanced_usage.rst | 4 +- src/documents/consumer.py | 14 +- src/documents/loggers.py | 17 ++ src/documents/parsers.py | 12 +- src/paperless/settings.py | 8 + src/paperless_mail/admin.py | 11 -- src/paperless_mail/mail.py | 237 ++++++++++++++++---------- src/paperless_mail/tasks.py | 5 +- src/paperless_mail/tests/test_mail.py | 38 +++-- src/paperless_tesseract/parsers.py | 20 +-- 10 files changed, 214 insertions(+), 152 deletions(-) diff --git a/docs/advanced_usage.rst b/docs/advanced_usage.rst index 3b48ea582..218cfa8b7 100644 --- a/docs/advanced_usage.rst +++ b/docs/advanced_usage.rst @@ -175,8 +175,6 @@ then put the path to that script in ``paperless.conf`` with the variable name of either ``PAPERLESS_PRE_CONSUME_SCRIPT`` or ``PAPERLESS_POST_CONSUME_SCRIPT``. -.. TODO HYPEREF TO CONFIG - .. important:: These scripts are executed in a **blocking** process, which means that if @@ -319,6 +317,6 @@ for use in filenames. .. code:: PAPERLESS_FILENAME_FORMAT=../../my/custom/location/{title} - + However, keep in mind that inside docker, if files get stored outside of the predefined volumes, they will be lost after a restart of paperless. diff --git a/src/documents/consumer.py b/src/documents/consumer.py index f0cd4dd67..913f324c7 100755 --- a/src/documents/consumer.py +++ b/src/documents/consumer.py @@ -12,6 +12,7 @@ from django.utils import timezone from paperless.db import GnuPG from .classifier import DocumentClassifier, IncompatibleClassifierVersionError from .file_handling import generate_filename, create_source_path_directory +from .loggers import LoggingMixin from .models import Document, FileInfo, Correspondent, DocumentType, Tag from .parsers import ParseError, get_parser_class from .signals import ( @@ -24,12 +25,10 @@ class ConsumerError(Exception): pass -class Consumer: +class Consumer(LoggingMixin): def __init__(self): - - self.logger = logging.getLogger(__name__) - self.logging_group = None + super().__init__() self.path = None self.filename = None self.override_title = None @@ -74,11 +73,6 @@ class Consumer: os.makedirs(settings.THUMBNAIL_DIR, exist_ok=True) os.makedirs(settings.ORIGINALS_DIR, exist_ok=True) - def log(self, level, message): - getattr(self.logger, level)(message, extra={ - "group": self.logging_group - }) - def try_consume_file(self, path, override_filename=None, @@ -100,7 +94,7 @@ class Consumer: # this is for grouping logging entries for this particular file # together. - self.logging_group = uuid.uuid4() + self.renew_logging_group() # Make sure that preconditions for consuming the file are met. diff --git a/src/documents/loggers.py b/src/documents/loggers.py index d9c90ab16..fd20e1288 100644 --- a/src/documents/loggers.py +++ b/src/documents/loggers.py @@ -1,4 +1,5 @@ import logging +import uuid class PaperlessHandler(logging.Handler): @@ -13,3 +14,19 @@ class PaperlessHandler(logging.Handler): kwargs["group"] = record.group Log.objects.create(**kwargs) + + +class LoggingMixin: + + logging_group = None + + def renew_logging_group(self): + self.logging_group = uuid.uuid4() + + def log(self, level, message): + target = ".".join([self.__class__.__module__, self.__class__.__name__]) + logger = logging.getLogger(target) + + getattr(logger, level)(message, extra={ + "group": self.logging_group + }) diff --git a/src/documents/parsers.py b/src/documents/parsers.py index 600e4fc93..2fab6bc44 100644 --- a/src/documents/parsers.py +++ b/src/documents/parsers.py @@ -20,6 +20,7 @@ from django.utils import timezone # - XX. MONTH ZZZZ with XX being 1 or 2 and ZZZZ being 2 or 4 digits # - MONTH ZZZZ, with ZZZZ being 4 digits # - MONTH XX, ZZZZ with XX being 1 or 2 and ZZZZ being 4 digits +from documents.loggers import LoggingMixin from documents.signals import document_consumer_declaration # TODO: isnt there a date parsing library for this? @@ -101,17 +102,17 @@ class ParseError(Exception): pass -class DocumentParser: +class DocumentParser(LoggingMixin): """ Subclass this to make your own parser. Have a look at `paperless_tesseract.parsers` for inspiration. """ def __init__(self, path, logging_group): + super().__init__() + self.logging_group = logging_group self.document_path = path self.tempdir = tempfile.mkdtemp(prefix="paperless-", dir=settings.SCRATCH_DIR) - self.logger = logging.getLogger(__name__) - self.logging_group = logging_group def get_thumbnail(self): """ @@ -222,11 +223,6 @@ class DocumentParser: return date - def log(self, level, message): - getattr(self.logger, level)(message, extra={ - "group": self.logging_group - }) - def cleanup(self): self.log("debug", "Deleting directory {}".format(self.tempdir)) shutil.rmtree(self.tempdir) diff --git a/src/paperless/settings.py b/src/paperless/settings.py index 311913c3e..3661c3d02 100644 --- a/src/paperless/settings.py +++ b/src/paperless/settings.py @@ -257,6 +257,14 @@ LOGGING = { "handlers": ["dbhandler", "streamhandler"], "level": "DEBUG" }, + "paperless_mail": { + "handlers": ["dbhandler", "streamhandler"], + "level": "DEBUG" + }, + "paperless_tesseract": { + "handlers": ["dbhandler", "streamhandler"], + "level": "DEBUG" + }, }, } diff --git a/src/paperless_mail/admin.py b/src/paperless_mail/admin.py index b64a68637..130e34ad1 100644 --- a/src/paperless_mail/admin.py +++ b/src/paperless_mail/admin.py @@ -1,18 +1,7 @@ from django.contrib import admin -from django import forms - from paperless_mail.models import MailAccount, MailRule -class MailAccountForm(forms.ModelForm): - - password = forms.CharField(widget=forms.PasswordInput) - - class Meta: - fields = '__all__' - model = MailAccount - - class MailAccountAdmin(admin.ModelAdmin): list_display = ("name", "imap_server", "username") diff --git a/src/paperless_mail/mail.py b/src/paperless_mail/mail.py index ce8bf9459..dd1e68b35 100644 --- a/src/paperless_mail/mail.py +++ b/src/paperless_mail/mail.py @@ -8,6 +8,7 @@ from django_q.tasks import async_task from imap_tools import MailBox, MailBoxUnencrypted, AND, MailMessageFlags, \ MailboxFolderSelectError +from documents.loggers import LoggingMixin from documents.models import Correspondent from paperless_mail.models import MailAccount, MailRule @@ -83,72 +84,6 @@ def make_criterias(rule): return {**criterias, **get_rule_action(rule).get_criteria()} -def handle_mail_account(account): - - if account.imap_security == MailAccount.IMAP_SECURITY_NONE: - mailbox = MailBoxUnencrypted(account.imap_server, account.imap_port) - elif account.imap_security == MailAccount.IMAP_SECURITY_STARTTLS: - mailbox = MailBox(account.imap_server, account.imap_port, starttls=True) - elif account.imap_security == MailAccount.IMAP_SECURITY_SSL: - mailbox = MailBox(account.imap_server, account.imap_port) - else: - raise ValueError("Unknown IMAP security") - - total_processed_files = 0 - - with mailbox as M: - - try: - M.login(account.username, account.password) - except Exception: - raise MailError( - f"Error while authenticating account {account.name}") - - for rule in account.rules.all(): - - try: - M.folder.set(rule.folder) - except MailboxFolderSelectError: - raise MailError( - f"Rule {rule.name}: Folder {rule.folder} does not exist " - f"in account {account.name}") - - criterias = make_criterias(rule) - - try: - messages = M.fetch(criteria=AND(**criterias), mark_seen=False) - except Exception: - raise MailError( - f"Rule {rule.name}: Error while fetching folder " - f"{rule.folder} of account {account.name}") - - post_consume_messages = [] - - for message in messages: - try: - processed_files = handle_message(message, rule) - except Exception: - raise MailError( - f"Rule {rule.name}: Error while processing mail " - f"{message.uid} of account {account.name}") - if processed_files > 0: - post_consume_messages.append(message.uid) - - total_processed_files += processed_files - try: - get_rule_action(rule).post_consume( - M, - post_consume_messages, - rule.action_parameter) - - except Exception: - raise MailError( - f"Rule {rule.name}: Error while processing post-consume " - f"actions for account {account.name}") - - return total_processed_files - - def get_title(message, att, rule): if rule.assign_title_from == MailRule.TITLE_FROM_SUBJECT: title = message.subject @@ -189,39 +124,155 @@ def get_correspondent(message, rule): return correspondent -def handle_message(message, rule): - if not message.attachments: - return 0 +def get_mailbox(server, port, security): + if security == MailAccount.IMAP_SECURITY_NONE: + mailbox = MailBoxUnencrypted(server, port) + elif security == MailAccount.IMAP_SECURITY_STARTTLS: + mailbox = MailBox(server, port, starttls=True) + elif security == MailAccount.IMAP_SECURITY_SSL: + mailbox = MailBox(server, port) + else: + raise ValueError("Unknown IMAP security") + return mailbox - correspondent = get_correspondent(message, rule) - tag = rule.assign_tag - doc_type = rule.assign_document_type +class MailAccountHandler(LoggingMixin): - processed_attachments = 0 + def handle_mail_account(self, account): - for att in message.attachments: + self.renew_logging_group() - title = get_title(message, att, rule) + self.log('debug', f"Processing mail account {account}") - # TODO: check with parsers what files types are supported - if att.content_type == 'application/pdf': + total_processed_files = 0 - os.makedirs(settings.SCRATCH_DIR, exist_ok=True) - _, temp_filename = tempfile.mkstemp(prefix="paperless-mail-", dir=settings.SCRATCH_DIR) - with open(temp_filename, 'wb') as f: - f.write(att.payload) + with get_mailbox(account.imap_server, + account.imap_port, + account.imap_security) as M: - async_task( - "documents.tasks.consume_file", - path=temp_filename, - override_filename=att.filename, - override_title=title, - override_correspondent_id=correspondent.id if correspondent else None, - override_document_type_id=doc_type.id if doc_type else None, - override_tag_ids=[tag.id] if tag else None, - task_name=f"Mail: {att.filename}" - ) + try: + M.login(account.username, account.password) + except Exception: + raise MailError( + f"Error while authenticating account {account.name}") - processed_attachments += 1 + self.log('debug', f"Account {account}: Processing " + f"{account.rules.count()} rule(s)") - return processed_attachments + for rule in account.rules.all(): + self.log( + 'debug', + f"Account {account}: Processing rule {rule.name}") + + self.log( + 'debug', + f"Rule {account}.{rule}: Selecting folder {rule.folder}") + + try: + M.folder.set(rule.folder) + except MailboxFolderSelectError: + raise MailError( + f"Rule {rule.name}: Folder {rule.folder} does not exist " + f"in account {account.name}") + + criterias = make_criterias(rule) + + self.log( + 'debug', + f"Rule {account}.{rule}: Searching folder with criteria " + f"{str(AND(**criterias))}") + + try: + messages = M.fetch(criteria=AND(**criterias), mark_seen=False) + except Exception: + raise MailError( + f"Rule {rule.name}: Error while fetching folder " + f"{rule.folder} of account {account.name}") + + post_consume_messages = [] + + mails_processed = 0 + + for message in messages: + try: + processed_files = self.handle_message(message, rule) + except Exception: + raise MailError( + f"Rule {rule.name}: Error while processing mail " + f"{message.uid} of account {account.name}") + if processed_files > 0: + post_consume_messages.append(message.uid) + + total_processed_files += processed_files + mails_processed += 1 + + self.log( + 'debug', + f"Rule {account}.{rule}: Processed {mails_processed} " + f"matching mail(s)") + + self.log( + 'debug', + f"Rule {account}.{rule}: Running mail actions on " + f"{len(post_consume_messages)} mails") + + try: + get_rule_action(rule).post_consume( + M, + post_consume_messages, + rule.action_parameter) + + except Exception: + raise MailError( + f"Rule {rule.name}: Error while processing post-consume " + f"actions for account {account.name}") + + return total_processed_files + + def handle_message(self, message, rule): + if not message.attachments: + return 0 + + self.log( + 'debug', + f"Rule {rule.account}.{rule}: " + f"Processing mail {message.subject} from {message.from_} with " + f"{len(message.attachments)} attachment(s)") + + correspondent = get_correspondent(message, rule) + tag = rule.assign_tag + doc_type = rule.assign_document_type + + processed_attachments = 0 + + for att in message.attachments: + + title = get_title(message, att, rule) + + # TODO: check with parsers what files types are supported + if att.content_type == 'application/pdf': + + os.makedirs(settings.SCRATCH_DIR, exist_ok=True) + _, temp_filename = tempfile.mkstemp(prefix="paperless-mail-", dir=settings.SCRATCH_DIR) + with open(temp_filename, 'wb') as f: + f.write(att.payload) + + self.log( + 'info', + f"Rule {rule.account}.{rule}: " + f"Consuming attachment {att.filename} from mail " + f"{message.subject} from {message.from_}") + + async_task( + "documents.tasks.consume_file", + path=temp_filename, + override_filename=att.filename, + override_title=title, + override_correspondent_id=correspondent.id if correspondent else None, + override_document_type_id=doc_type.id if doc_type else None, + override_tag_ids=[tag.id] if tag else None, + task_name=f"Mail: {att.filename}" + ) + + processed_attachments += 1 + + return processed_attachments diff --git a/src/paperless_mail/tasks.py b/src/paperless_mail/tasks.py index d34941a8a..dbef91c94 100644 --- a/src/paperless_mail/tasks.py +++ b/src/paperless_mail/tasks.py @@ -1,13 +1,14 @@ import logging from paperless_mail import mail +from paperless_mail.mail import MailAccountHandler from paperless_mail.models import MailAccount def process_mail_accounts(): total_new_documents = 0 for account in MailAccount.objects.all(): - total_new_documents += mail.handle_mail_account(account) + total_new_documents += MailAccountHandler().handle_mail_account(account) if total_new_documents > 0: return f"Added {total_new_documents} document(s)." @@ -18,6 +19,6 @@ def process_mail_accounts(): def process_mail_account(name): account = MailAccount.objects.find(name=name) if account: - mail.handle_mail_account(account) + MailAccountHandler().handle_mail_account(account) else: logging.error("Unknown mail acccount: {}".format(name)) diff --git a/src/paperless_mail/tests/test_mail.py b/src/paperless_mail/tests/test_mail.py index 20cf17ec7..a9d57fcb8 100644 --- a/src/paperless_mail/tests/test_mail.py +++ b/src/paperless_mail/tests/test_mail.py @@ -7,7 +7,7 @@ from django.test import TestCase from imap_tools import MailMessageFlags, MailboxFolderSelectError from documents.models import Correspondent -from paperless_mail.mail import get_correspondent, get_title, handle_message, handle_mail_account, MailError +from paperless_mail.mail import MailError, MailAccountHandler, get_correspondent, get_title from paperless_mail.models import MailRule, MailAccount @@ -126,6 +126,8 @@ class TestMail(TestCase): self.reset_bogus_mailbox() + self.mail_account_handler = MailAccountHandler() + def reset_bogus_mailbox(self): self.bogus_mailbox.messages = [] self.bogus_mailbox.messages_spam = [] @@ -182,6 +184,7 @@ class TestMail(TestCase): def test_handle_message(self): message = namedtuple('MailMessage', []) message.subject = "the message title" + message.from_ = "Myself" att = namedtuple('Attachment', []) att.filename = "test1.pdf" @@ -200,9 +203,10 @@ class TestMail(TestCase): message.attachments = [att, att2, att3] - rule = MailRule(assign_title_from=MailRule.TITLE_FROM_FILENAME) + account = MailAccount() + rule = MailRule(assign_title_from=MailRule.TITLE_FROM_FILENAME, account=account) - result = handle_message(message, rule) + result = self.mail_account_handler.handle_message(message, rule) self.assertEqual(result, 2) @@ -224,7 +228,7 @@ class TestMail(TestCase): message.attachments = [] rule = MailRule() - result = handle_message(message, rule) + result = self.mail_account_handler.handle_message(message, rule) self.assertFalse(m.called) self.assertEqual(result, 0) @@ -235,11 +239,13 @@ class TestMail(TestCase): rule = MailRule.objects.create(name="testrule", account=account, action=MailRule.ACTION_MARK_READ) + self.assertEqual(len(self.bogus_mailbox.messages), 3) self.assertEqual(self.async_task.call_count, 0) self.assertEqual(len(self.bogus_mailbox.fetch("UNSEEN", False)), 2) - handle_mail_account(account) + self.mail_account_handler.handle_mail_account(account) self.assertEqual(self.async_task.call_count, 2) self.assertEqual(len(self.bogus_mailbox.fetch("UNSEEN", False)), 0) + self.assertEqual(len(self.bogus_mailbox.messages), 3) def test_handle_mail_account_delete(self): @@ -249,7 +255,7 @@ class TestMail(TestCase): self.assertEqual(self.async_task.call_count, 0) self.assertEqual(len(self.bogus_mailbox.messages), 3) - handle_mail_account(account) + self.mail_account_handler.handle_mail_account(account) self.assertEqual(self.async_task.call_count, 2) self.assertEqual(len(self.bogus_mailbox.messages), 1) @@ -258,11 +264,13 @@ class TestMail(TestCase): rule = MailRule.objects.create(name="testrule", account=account, action=MailRule.ACTION_FLAG, filter_subject="Invoice") + self.assertEqual(len(self.bogus_mailbox.messages), 3) self.assertEqual(self.async_task.call_count, 0) self.assertEqual(len(self.bogus_mailbox.fetch("UNFLAGGED", False)), 2) - handle_mail_account(account) + self.mail_account_handler.handle_mail_account(account) self.assertEqual(self.async_task.call_count, 1) self.assertEqual(len(self.bogus_mailbox.fetch("UNFLAGGED", False)), 1) + self.assertEqual(len(self.bogus_mailbox.messages), 3) def test_handle_mail_account_move(self): account = MailAccount.objects.create(name="test", imap_server="", username="admin", password="secret") @@ -272,7 +280,7 @@ class TestMail(TestCase): self.assertEqual(self.async_task.call_count, 0) self.assertEqual(len(self.bogus_mailbox.messages), 3) self.assertEqual(len(self.bogus_mailbox.messages_spam), 0) - handle_mail_account(account) + self.mail_account_handler.handle_mail_account(account) self.assertEqual(self.async_task.call_count, 1) self.assertEqual(len(self.bogus_mailbox.messages), 2) self.assertEqual(len(self.bogus_mailbox.messages_spam), 1) @@ -281,7 +289,7 @@ class TestMail(TestCase): account = MailAccount.objects.create(name="test", imap_server="", username="admin", password="wrong") try: - handle_mail_account(account) + self.mail_account_handler.handle_mail_account(account) except MailError as e: self.assertTrue(str(e).startswith("Error while authenticating account")) else: @@ -291,7 +299,7 @@ class TestMail(TestCase): rule = MailRule.objects.create(name="testrule", account=account, folder="uuuh") try: - handle_mail_account(account) + self.mail_account_handler.handle_mail_account(account) except MailError as e: self.assertTrue("uuuh does not exist" in str(e)) else: @@ -302,7 +310,7 @@ class TestMail(TestCase): rule = MailRule.objects.create(name="testrule", account=account, action=MailRule.ACTION_MOVE, action_parameter="doesnotexist", filter_subject="Claim") try: - handle_mail_account(account) + self.mail_account_handler.handle_mail_account(account) except MailError as e: self.assertTrue("Error while processing post-consume actions" in str(e)) else: @@ -316,7 +324,7 @@ class TestMail(TestCase): self.assertEqual(self.async_task.call_count, 0) self.assertEqual(len(self.bogus_mailbox.messages), 3) - handle_mail_account(account) + self.mail_account_handler.handle_mail_account(account) self.assertEqual(len(self.bogus_mailbox.messages), 2) self.assertEqual(self.async_task.call_count, 1) @@ -326,7 +334,7 @@ class TestMail(TestCase): rule.filter_body = "electronic" rule.save() self.assertEqual(len(self.bogus_mailbox.messages), 3) - handle_mail_account(account) + self.mail_account_handler.handle_mail_account(account) self.assertEqual(len(self.bogus_mailbox.messages), 2) self.assertEqual(self.async_task.call_count, 2) @@ -336,7 +344,7 @@ class TestMail(TestCase): rule.filter_body = None rule.save() self.assertEqual(len(self.bogus_mailbox.messages), 3) - handle_mail_account(account) + self.mail_account_handler.handle_mail_account(account) self.assertEqual(len(self.bogus_mailbox.messages), 1) self.assertEqual(self.async_task.call_count, 4) @@ -347,6 +355,6 @@ class TestMail(TestCase): rule.filter_subject = "Invoice" rule.save() self.assertEqual(len(self.bogus_mailbox.messages), 3) - handle_mail_account(account) + self.mail_account_handler.handle_mail_account(account) self.assertEqual(len(self.bogus_mailbox.messages), 2) self.assertEqual(self.async_task.call_count, 5) diff --git a/src/paperless_tesseract/parsers.py b/src/paperless_tesseract/parsers.py index d07f9e4b3..73b2414d5 100644 --- a/src/paperless_tesseract/parsers.py +++ b/src/paperless_tesseract/parsers.py @@ -86,7 +86,7 @@ class RasterisedDocumentParser(DocumentParser): return self._text if not settings.OCR_ALWAYS and self._is_ocred(): - self.log("info", "Skipping OCR, using Text from PDF") + self.log("debug", "Skipping OCR, using Text from PDF") self._text = get_text_from_pdf(self.document_path) return self._text @@ -98,7 +98,7 @@ class RasterisedDocumentParser(DocumentParser): try: sample_page_index = int(len(images) / 2) - self.log("info", "Attempting language detection on page {} of {}...".format(sample_page_index + 1, len(images))) + self.log("debug", "Attempting language detection on page {} of {}...".format(sample_page_index + 1, len(images))) sample_page_text = self._ocr([images[sample_page_index]], settings.OCR_LANGUAGE)[0] guessed_language = self._guess_language(sample_page_text) @@ -107,7 +107,7 @@ class RasterisedDocumentParser(DocumentParser): ocr_pages = self._complete_ocr_default_language(images, sample_page_index, sample_page_text) elif ISO639[guessed_language] == settings.OCR_LANGUAGE: - self.log("info", "Detected language: {} (default language)".format(guessed_language)) + self.log("debug", "Detected language: {} (default language)".format(guessed_language)) ocr_pages = self._complete_ocr_default_language(images, sample_page_index, sample_page_text) elif not ISO639[guessed_language] in pyocr.get_available_tools()[0].get_available_languages(): @@ -115,10 +115,10 @@ class RasterisedDocumentParser(DocumentParser): ocr_pages = self._complete_ocr_default_language(images, sample_page_index, sample_page_text) else: - self.log("info", "Detected language: {}".format(guessed_language)) + self.log("debug", "Detected language: {}".format(guessed_language)) ocr_pages = self._ocr(images, ISO639[guessed_language]) - self.log("info", "OCR completed.") + self.log("debug", "OCR completed.") self._text = strip_excess_whitespace(" ".join(ocr_pages)) return self._text @@ -130,7 +130,7 @@ class RasterisedDocumentParser(DocumentParser): Greyscale images are easier for Tesseract to OCR """ - self.log("info", "Converting document {} into greyscale images...".format(self.document_path)) + self.log("debug", "Converting document {} into greyscale images...".format(self.document_path)) # Convert PDF to multiple PNMs pnm = os.path.join(self.tempdir, "convert-%04d.pnm") @@ -148,7 +148,7 @@ class RasterisedDocumentParser(DocumentParser): if f.endswith(".pnm"): pnms.append(os.path.join(self.tempdir, f)) - self.log("info", "Running unpaper on {} pages...".format(len(pnms))) + self.log("debug", "Running unpaper on {} pages...".format(len(pnms))) # Run unpaper in parallel on converted images with ThreadPool(processes=settings.THREADS_PER_WORKER) as pool: @@ -161,11 +161,11 @@ class RasterisedDocumentParser(DocumentParser): guess = langdetect.detect(text) return guess except Exception as e: - self.log('debug', "Language detection failed with: {}".format(e)) + self.log('warning', "Language detection failed with: {}".format(e)) return None def _ocr(self, imgs, lang): - self.log("info", "Performing OCR on {} page(s) with language {}".format(len(imgs), lang)) + self.log("debug", "Performing OCR on {} page(s) with language {}".format(len(imgs), lang)) with ThreadPool(processes=settings.THREADS_PER_WORKER) as pool: r = pool.map(image_to_string, itertools.product(imgs, [lang])) return r @@ -180,7 +180,7 @@ class RasterisedDocumentParser(DocumentParser): images_copy = list(images) del images_copy[sample_page_index] if images_copy: - self.log('info', 'Continuing ocr with default language.') + self.log('debug', 'Continuing ocr with default language.') ocr_pages = self._ocr(images_copy, settings.OCR_LANGUAGE) ocr_pages.insert(sample_page_index, sample_page) return ocr_pages From 6d7a6e7297688f8d20fe2484d3b0bc4d3877893f Mon Sep 17 00:00:00 2001 From: Jonas Winkler Date: Wed, 18 Nov 2020 18:39:09 +0100 Subject: [PATCH 03/12] small things on the front end --- .../app-frame/app-frame.component.html | 22 +++++++++++++++++++ .../components/manage/logs/logs.component.css | 4 ++++ 2 files changed, 26 insertions(+) diff --git a/src-ui/src/app/components/app-frame/app-frame.component.html b/src-ui/src/app/components/app-frame/app-frame.component.html index 25066e9b2..519b69bf0 100644 --- a/src-ui/src/app/components/app-frame/app-frame.component.html +++ b/src-ui/src/app/components/app-frame/app-frame.component.html @@ -132,6 +132,28 @@ + + + diff --git a/src-ui/src/app/components/manage/logs/logs.component.css b/src-ui/src/app/components/manage/logs/logs.component.css index 1f0112fbc..dee9b10dc 100644 --- a/src-ui/src/app/components/manage/logs/logs.component.css +++ b/src-ui/src/app/components/manage/logs/logs.component.css @@ -1,3 +1,7 @@ +.log-entry-10 { + color: lightslategray !important; +} + .log-entry-30 { color: yellow !important; } From e59696efd4eab6c33f5f764d27896541d4cbe60a Mon Sep 17 00:00:00 2001 From: Jonas Winkler Date: Wed, 18 Nov 2020 18:39:37 +0100 Subject: [PATCH 04/12] small fixes --- src/paperless/checks.py | 12 ++++++------ src/paperless_mail/models.py | 3 --- 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/src/paperless/checks.py b/src/paperless/checks.py index bc03cb6bc..b39822128 100644 --- a/src/paperless/checks.py +++ b/src/paperless/checks.py @@ -72,11 +72,11 @@ def binaries_check(app_configs, **kwargs): @register() def debug_mode_check(app_configs, **kwargs): if settings.DEBUG: - return [Warning("DEBUG mode is enabled. Disable Debug mode. " - "This is a serious security " - "issue, since it puts security overides in place which" - "are meant to be only used during development. This" - "also means that paperless will tell anyone various" - "debugging information when something goes wrong.")] + return [Warning( + "DEBUG mode is enabled. Disable Debug mode. This is a serious " + "security issue, since it puts security overides in place which " + "are meant to be only used during development. This " + "also means that paperless will tell anyone various " + "debugging information when something goes wrong.")] else: return [] diff --git a/src/paperless_mail/models.py b/src/paperless_mail/models.py index 506882e6b..95dbc1bc1 100644 --- a/src/paperless_mail/models.py +++ b/src/paperless_mail/models.py @@ -1,8 +1,5 @@ from django.db import models -# Create your models here. -from django.db import models - import documents.models as document_models From 87e83eb71bd18050e2ef7babd656ceeaf2fe62c1 Mon Sep 17 00:00:00 2001 From: Jonas Winkler Date: Wed, 18 Nov 2020 20:41:42 +0100 Subject: [PATCH 05/12] a couple small adjustments here and there. --- .../page-header/page-header.component.html | 4 ++-- src/paperless_mail/admin.py | 2 ++ src/paperless_mail/mail.py | 1 + .../migrations/0003_auto_20201118_1940.py | 23 +++++++++++++++++++ src/paperless_mail/models.py | 8 +++++-- 5 files changed, 34 insertions(+), 4 deletions(-) create mode 100644 src/paperless_mail/migrations/0003_auto_20201118_1940.py diff --git a/src-ui/src/app/components/common/page-header/page-header.component.html b/src-ui/src/app/components/common/page-header/page-header.component.html index 7c84046d8..54386422a 100644 --- a/src-ui/src/app/components/common/page-header/page-header.component.html +++ b/src-ui/src/app/components/common/page-header/page-header.component.html @@ -1,6 +1,6 @@ -
+
-

{{title}}

+

{{title}}

diff --git a/src/paperless_mail/admin.py b/src/paperless_mail/admin.py index 130e34ad1..8d05c2a42 100644 --- a/src/paperless_mail/admin.py +++ b/src/paperless_mail/admin.py @@ -9,6 +9,8 @@ class MailAccountAdmin(admin.ModelAdmin): class MailRuleAdmin(admin.ModelAdmin): + list_filter = ("account",) + list_display = ("name", "account", "folder", "action") diff --git a/src/paperless_mail/mail.py b/src/paperless_mail/mail.py index dd1e68b35..b942e420a 100644 --- a/src/paperless_mail/mail.py +++ b/src/paperless_mail/mail.py @@ -135,6 +135,7 @@ def get_mailbox(server, port, security): raise ValueError("Unknown IMAP security") return mailbox + class MailAccountHandler(LoggingMixin): def handle_mail_account(self, account): diff --git a/src/paperless_mail/migrations/0003_auto_20201118_1940.py b/src/paperless_mail/migrations/0003_auto_20201118_1940.py new file mode 100644 index 000000000..3339a6d7f --- /dev/null +++ b/src/paperless_mail/migrations/0003_auto_20201118_1940.py @@ -0,0 +1,23 @@ +# Generated by Django 3.1.3 on 2020-11-18 19:40 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('paperless_mail', '0002_auto_20201117_1334'), + ] + + operations = [ + migrations.AlterField( + model_name='mailaccount', + name='imap_port', + field=models.IntegerField(blank=True, help_text='This is usually 143 for unencrypted and STARTTLS connections, and 993 for SSL connections.', null=True), + ), + migrations.AlterField( + model_name='mailrule', + name='name', + field=models.CharField(max_length=256, unique=True), + ), + ] diff --git a/src/paperless_mail/models.py b/src/paperless_mail/models.py index 95dbc1bc1..e37fbee16 100644 --- a/src/paperless_mail/models.py +++ b/src/paperless_mail/models.py @@ -19,7 +19,11 @@ class MailAccount(models.Model): imap_server = models.CharField(max_length=256) - imap_port = models.IntegerField(blank=True, null=True) + imap_port = models.IntegerField( + blank=True, + null=True, + help_text="This is usually 143 for unencrypted and STARTTLS " + "connections, and 993 for SSL connections.") imap_security = models.PositiveIntegerField( choices=IMAP_SECURITY_OPTIONS, @@ -68,7 +72,7 @@ class MailRule(models.Model): (CORRESPONDENT_FROM_CUSTOM, "Use correspondent selected below") ) - name = models.CharField(max_length=256) + name = models.CharField(max_length=256, unique=True) account = models.ForeignKey( MailAccount, From 93b81ef6e9a094d6722f144566d0c9e2c1cbd133 Mon Sep 17 00:00:00 2001 From: Jonas Winkler Date: Wed, 18 Nov 2020 22:40:07 +0100 Subject: [PATCH 06/12] updated docs --- docs/administration.rst | 40 +++++++++++++++++++++++++++++++++------- docs/faq.rst | 12 ------------ 2 files changed, 33 insertions(+), 19 deletions(-) diff --git a/docs/administration.rst b/docs/administration.rst index 275883a2f..42a91bcdb 100644 --- a/docs/administration.rst +++ b/docs/administration.rst @@ -8,16 +8,40 @@ Administration Making backups ############## -.. warning:: +Multiple options exist for making backups of your paperless instance, +depending on how you installed paperless. - This section is not updated to paperless-ng yet, the exporter is a valid tool - for backups though. +Before making backups, make sure that paperless is not running. -So you're bored of this whole project, or you want to make a remote backup of -your files for whatever reason. This is easy to do, simply use the -:ref:`exporter ` to dump your documents and database out -into an arbitrary directory. +Options available to any installation of paperless: +* Use the :ref:`document exporter `. + The document exporter exports all your documents, thumbnails and + metadata to a specific folder. You may import your documents into a + fresh instance of paperless again or store your documents in another + DMS with this export. + +Options available to docker installations: + +* Backup the docker volumes. These usually reside within + ``/var/lib/docker/volumes`` on the host and you need to be root in order + to access them. + + Paperless uses 3 volumes: + + * ``paperless_media``: This is where your documents are stored. + * ``paperless_data``: This is where auxilliary data is stored. This + folder also contains the SQLite database, if you use it. + * ``paperless_pgdata``: Exists only if you use PostgreSQL and contains + the database. + +Options available to bare-metal and non-docker installations: + +* Backup the entire paperless folder. This ensures that if your paperless instance + crashes at some point or your disk fails, you can simply copy the folder back + into place and it works. + + When using PostgreSQL, you'll also have to backup the database. .. _migrating-restoring: @@ -25,6 +49,8 @@ Restoring ========= + + .. _administration-updating: Updating paperless diff --git a/docs/faq.rst b/docs/faq.rst index b55f5d058..3f0de32b7 100644 --- a/docs/faq.rst +++ b/docs/faq.rst @@ -35,15 +35,3 @@ in your browser and paperless has to do much less work to serve the data. that automatically, I'm all ears. For now, you have to grab the latest release archive from the project page and build the image yourself. The release comes with the front end already compiled, so you don't have to do this on the Pi. - -You may encounter some issues during the build: - -.. code:: shell-session - - W: GPG error: http://ports.ubuntu.com/ubuntu-ports focal InRelease: At least one invalid signature was encountered. - E: The repository 'http://ports.ubuntu.com/ubuntu-ports focal InRelease' is not signed. - N: Updating from such a repository can't be done securely, and is therefore disabled by default. - N: See apt-secure(8) manpage for repository creation and user configuration details. - -If this happens, look at `this thread `:_. -You will need to update docker to the latest version to fix this issue. From 727f86c36977e2c2353c126584c95cb70ba64dbb Mon Sep 17 00:00:00 2001 From: Jonas Winkler Date: Wed, 18 Nov 2020 22:41:14 +0100 Subject: [PATCH 07/12] codestyle --- src/documents/consumer.py | 1 - src/documents/file_handling.py | 2 +- src/documents/migrations/1000_update_paperless_all.py | 3 --- src/documents/tests/test_api.py | 3 +-- src/documents/tests/test_classifier.py | 6 +++--- src/documents/tests/test_consumer.py | 6 ++---- src/paperless_mail/management/commands/mail_fetcher.py | 2 +- src/paperless_mail/tasks.py | 1 - src/paperless_mail/views.py | 3 --- 9 files changed, 8 insertions(+), 19 deletions(-) delete mode 100644 src/paperless_mail/views.py diff --git a/src/documents/consumer.py b/src/documents/consumer.py index 913f324c7..3cd57796e 100755 --- a/src/documents/consumer.py +++ b/src/documents/consumer.py @@ -3,7 +3,6 @@ import hashlib import logging import os import re -import uuid from django.conf import settings from django.db import transaction diff --git a/src/documents/file_handling.py b/src/documents/file_handling.py index ce51afe81..024003118 100644 --- a/src/documents/file_handling.py +++ b/src/documents/file_handling.py @@ -86,7 +86,7 @@ def generate_filename(document): added_day=document.added.day if document.added else "none", tags=tags, ) - except (ValueError, KeyError, IndexError) as e: + except (ValueError, KeyError, IndexError): logging.getLogger(__name__).warning("Invalid PAPERLESS_FILENAME_FORMAT: {}, falling back to default,".format(settings.PAPERLESS_FILENAME_FORMAT)) # Always append the primary key to guarantee uniqueness of filename diff --git a/src/documents/migrations/1000_update_paperless_all.py b/src/documents/migrations/1000_update_paperless_all.py index 8c1bd52af..5e5b475a3 100644 --- a/src/documents/migrations/1000_update_paperless_all.py +++ b/src/documents/migrations/1000_update_paperless_all.py @@ -1,7 +1,4 @@ # Generated by Django 3.1.3 on 2020-11-07 12:35 -import os - -from django.conf import settings from django.db import migrations, models import django.db.models.deletion diff --git a/src/documents/tests/test_api.py b/src/documents/tests/test_api.py index 1cc142e81..a049fb825 100644 --- a/src/documents/tests/test_api.py +++ b/src/documents/tests/test_api.py @@ -2,11 +2,10 @@ import os import shutil import tempfile from unittest import mock -from unittest.mock import MagicMock from django.contrib.auth.models import User from django.test import override_settings -from rest_framework.test import APITestCase, APIClient +from rest_framework.test import APITestCase from documents.models import Document, Correspondent, DocumentType, Tag diff --git a/src/documents/tests/test_classifier.py b/src/documents/tests/test_classifier.py index 9c85b786a..4ae672ac2 100644 --- a/src/documents/tests/test_classifier.py +++ b/src/documents/tests/test_classifier.py @@ -80,6 +80,6 @@ class TestClassifier(TestCase): self.classifier.save_classifier() - newClassifier = DocumentClassifier() - newClassifier.reload() - self.assertFalse(newClassifier.train()) + new_classifier = DocumentClassifier() + new_classifier.reload() + self.assertFalse(new_classifier.train()) diff --git a/src/documents/tests/test_consumer.py b/src/documents/tests/test_consumer.py index 52cf7dcc6..f61fd5718 100644 --- a/src/documents/tests/test_consumer.py +++ b/src/documents/tests/test_consumer.py @@ -5,8 +5,6 @@ import tempfile from unittest import mock from unittest.mock import MagicMock -from django.conf import settings -from django.db import DatabaseError from django.test import TestCase, override_settings from ..consumer import Consumer, ConsumerError @@ -504,9 +502,9 @@ class TestConsumer(TestCase): def testOverrideFilename(self): filename = self.get_test_file() - overrideFilename = "My Bank - Statement for November.pdf" + override_filename = "My Bank - Statement for November.pdf" - document = self.consumer.try_consume_file(filename, override_filename=overrideFilename) + document = self.consumer.try_consume_file(filename, override_filename=override_filename) self.assertEqual(document.correspondent.name, "My Bank") self.assertEqual(document.title, "Statement for November") diff --git a/src/paperless_mail/management/commands/mail_fetcher.py b/src/paperless_mail/management/commands/mail_fetcher.py index 928d177ef..b11b5b70d 100644 --- a/src/paperless_mail/management/commands/mail_fetcher.py +++ b/src/paperless_mail/management/commands/mail_fetcher.py @@ -1,6 +1,6 @@ from django.core.management.base import BaseCommand -from paperless_mail import mail, tasks +from paperless_mail import tasks class Command(BaseCommand): diff --git a/src/paperless_mail/tasks.py b/src/paperless_mail/tasks.py index dbef91c94..22d512c1e 100644 --- a/src/paperless_mail/tasks.py +++ b/src/paperless_mail/tasks.py @@ -1,6 +1,5 @@ import logging -from paperless_mail import mail from paperless_mail.mail import MailAccountHandler from paperless_mail.models import MailAccount diff --git a/src/paperless_mail/views.py b/src/paperless_mail/views.py deleted file mode 100644 index 91ea44a21..000000000 --- a/src/paperless_mail/views.py +++ /dev/null @@ -1,3 +0,0 @@ -from django.shortcuts import render - -# Create your views here. From c487e5f017a8825649aebb10bb60a87a09344e04 Mon Sep 17 00:00:00 2001 From: Jonas Winkler Date: Wed, 18 Nov 2020 22:42:05 +0100 Subject: [PATCH 08/12] a new setting that allows you to skip thumbnail optimization. --- paperless.conf.example | 7 ++++++- src/documents/parsers.py | 15 +++++++++------ src/paperless/settings.py | 2 ++ 3 files changed, 17 insertions(+), 7 deletions(-) diff --git a/paperless.conf.example b/paperless.conf.example index b9c9d4e6c..2bd2c08bc 100644 --- a/paperless.conf.example +++ b/paperless.conf.example @@ -158,7 +158,12 @@ PAPERLESS_CONSUMPTION_DIR="../consume" # When the consumer detects a duplicate document, it will not touch the # original document. This default behavior can be changed here. -#PAPERLESS_CONSUMER_DELETE_DUPLICATES="false" +#PAPERLESS_CONSUMER_DELETE_DUPLICATES=false + +# Use optipng to optimize thumbnails. This usually reduces the sice of +# thumbnails by about 20%, but uses considerable compute time during +# consumption. +#PAPERLESS_OPTIMIZE_THUMBNAILS=true # After a document is consumed, Paperless can trigger an arbitrary script if # you like. This script will be passed a number of arguments for you to work diff --git a/src/documents/parsers.py b/src/documents/parsers.py index 2fab6bc44..496efa188 100644 --- a/src/documents/parsers.py +++ b/src/documents/parsers.py @@ -122,16 +122,19 @@ class DocumentParser(LoggingMixin): def optimise_thumbnail(self, in_path): - out_path = os.path.join(self.tempdir, "optipng.png") + if settings.OPTIMIZE_THUMBNAILS: + out_path = os.path.join(self.tempdir, "optipng.png") - args = (settings.OPTIPNG_BINARY, "-silent", "-o5", in_path, "-out", out_path) + args = (settings.OPTIPNG_BINARY, "-silent", "-o5", in_path, "-out", out_path) - self.log('debug', 'Execute: ' + " ".join(args)) + self.log('debug', 'Execute: ' + " ".join(args)) - if not subprocess.Popen(args).wait() == 0: - raise ParseError("Optipng failed at {}".format(args)) + if not subprocess.Popen(args).wait() == 0: + raise ParseError("Optipng failed at {}".format(args)) - return out_path + return out_path + else: + return in_path def get_optimised_thumbnail(self): return self.optimise_thumbnail(self.get_thumbnail()) diff --git a/src/paperless/settings.py b/src/paperless/settings.py index 3661c3d02..2713e2b5e 100644 --- a/src/paperless/settings.py +++ b/src/paperless/settings.py @@ -320,6 +320,8 @@ CONSUMER_POLLING = int(os.getenv("PAPERLESS_CONSUMER_POLLING", 0)) CONSUMER_DELETE_DUPLICATES = __get_boolean("PAPERLESS_CONSUMER_DELETE_DUPLICATES") +OPTIMIZE_THUMBNAILS = __get_boolean("PAPERLESS_OPTIMIZE_THUMBNAILS", "true") + # The default language that tesseract will attempt to use when parsing # documents. It should be a 3-letter language code consistent with ISO 639. OCR_LANGUAGE = os.getenv("PAPERLESS_OCR_LANGUAGE", "eng") From d8e680465085a6ee7a9bdb340bd0e9aee8150f6b Mon Sep 17 00:00:00 2001 From: Jonas Winkler Date: Wed, 18 Nov 2020 22:42:56 +0100 Subject: [PATCH 09/12] removed all quotes from the config: they are not needed and are confusing for the docker-compose env files, where they are actually not allowed. --- paperless.conf.example | 44 ++++++++++++++++++++++-------------------- 1 file changed, 23 insertions(+), 21 deletions(-) diff --git a/paperless.conf.example b/paperless.conf.example index 2bd2c08bc..afc178bcf 100644 --- a/paperless.conf.example +++ b/paperless.conf.example @@ -10,7 +10,7 @@ # This is required for processing scheduled tasks such as email fetching, index # optimization and for training the automatic document matcher. # Defaults to localhost:6379. -#PAPERLESS_REDIS="redis://localhost:6379" +#PAPERLESS_REDIS=redis://localhost:6379 ############################################################################### @@ -22,15 +22,15 @@ # configuration for this is already done inside the docker-compose.env file. #Set PAPERLESS_DBHOST and postgresql will be used instead of mysql. -#PAPERLESS_DBHOST="localhost" +#PAPERLESS_DBHOST=localhost #Adjust port if necessary #PAPERLESS_DBPORT= #name, user and pass all default to "paperless" -#PAPERLESS_DBNAME="paperless" -#PAPERLESS_DBUSER="paperless" -#PAPERLESS_DBPASS="paperless" +#PAPERLESS_DBNAME=paperless +#PAPERLESS_DBUSER=paperless +#PAPERLESS_DBPASS=paperless ############################################################################### @@ -40,23 +40,23 @@ # This where your documents should go to be consumed. Make sure that it exists # and that the user running the paperless service can read/write its contents # before you start Paperless. -PAPERLESS_CONSUMPTION_DIR="../consume" +PAPERLESS_CONSUMPTION_DIR=../consume # This is where paperless stores all its data (search index, sqlite database, # classification model, etc). -#PAPERLESS_DATA_DIR="../data" +#PAPERLESS_DATA_DIR=../data # This is where your documents and thumbnails are stored. -#PAPERLESS_MEDIA_ROOT="../media" +#PAPERLESS_MEDIA_ROOT=../media # Override the default STATIC_ROOT here. This is where all static files # created using "collectstatic" manager command are stored. -#PAPERLESS_STATICDIR="../static" +#PAPERLESS_STATICDIR=../static # Override the STATIC_URL here. Unless you're hosting Paperless off a # subdomain like /paperless/, you probably don't need to change this. -#PAPERLESS_STATIC_URL="/static/" +#PAPERLESS_STATIC_URL=/static/ # Specify a filename format for the document (directories are supported) @@ -69,7 +69,7 @@ PAPERLESS_CONSUMPTION_DIR="../consume" # * {tags[INDEX]} If your tags are strings, select the tag by index # Uniqueness of filenames is ensured, as an incrementing counter is attached # to each filename. -#PAPERLESS_FILENAME_FORMAT="" +#PAPERLESS_FILENAME_FORMAT= ############################################################################### #### Security #### @@ -77,10 +77,12 @@ PAPERLESS_CONSUMPTION_DIR="../consume" # Controls whether django's debug mode is enabled. Disable this on production # systems. Debug mode is disabled by default. -#PAPERLESS_DEBUG="false" +#PAPERLESS_DEBUG=false # GnuPG encryption is deprecated and will be removed in future versions. # +# Dont use it. It does not provide any security at all. +# # Paperless can be instructed to attempt to encrypt your PDF files with GPG # using the PAPERLESS_PASSPHRASE specified below. If however you're not # concerned about encrypting these files (for example if you have disk @@ -93,13 +95,13 @@ PAPERLESS_CONSUMPTION_DIR="../consume" # you've since changed it to a new one. # # The default is to not use encryption at all. -#PAPERLESS_PASSPHRASE="secret" +#PAPERLESS_PASSPHRASE=secret # The secret key has a default that should be fine so long as you're hosting # Paperless on a closed network. However, if you're putting this anywhere # public, you should change the key to something unique and verbose. -#PAPERLESS_SECRET_KEY="change-me" +#PAPERLESS_SECRET_KEY=change-me # If you're planning on putting Paperless on the open internet, then you @@ -109,19 +111,19 @@ PAPERLESS_CONSUMPTION_DIR="../consume" # # Just remember that this is a comma-separated list, so "example.com" is fine, # as is "example.com,www.example.com", but NOT " example.com" or "example.com," -#PAPERLESS_ALLOWED_HOSTS="example.com,www.example.com" +#PAPERLESS_ALLOWED_HOSTS=example.com,www.example.com # If you decide to use the Paperless API in an ajax call, you need to add your # servers to the list of allowed hosts that can do CORS calls. By default # Paperless allows calls from localhost:8080, but you'd like to change that, # you can set this value to a comma-separated list. -#PAPERLESS_CORS_ALLOWED_HOSTS="localhost:8080,example.com,localhost:8000" +#PAPERLESS_CORS_ALLOWED_HOSTS=localhost:8080,example.com,localhost:8000 # To host paperless under a subpath url like example.com/paperless you set # this value to /paperless. No trailing slash! # # https://docs.djangoproject.com/en/1.11/ref/settings/#force-script-name -#PAPERLESS_FORCE_SCRIPT_NAME="" +#PAPERLESS_FORCE_SCRIPT_NAME= ############################################################################### #### Software Tweaks #### @@ -170,7 +172,7 @@ PAPERLESS_CONSUMPTION_DIR="../consume" # with. The default is blank, which means nothing will be executed. For more # information, take a look at the docs: # http://paperless.readthedocs.org/en/latest/consumption.html#hooking-into-the-consumption-process -#PAPERLESS_POST_CONSUME_SCRIPT="/path/to/an/arbitrary/script.sh" +#PAPERLESS_POST_CONSUME_SCRIPT=/path/to/an/arbitrary/script.sh # By default, paperless will check the document text for document date information. # Uncomment the line below to enable checking the document filename for date @@ -178,7 +180,7 @@ PAPERLESS_CONSUMPTION_DIR="../consume" # https://dateparser.readthedocs.io/en/latest/#settings. The filename will be # checked first, and if nothing is found, the document text will be checked # as normal. -#PAPERLESS_FILENAME_DATE_ORDER="YMD" +#PAPERLESS_FILENAME_DATE_ORDER=YMD # Sometimes devices won't create filenames which can be parsed properly # by the filename parser (see @@ -248,7 +250,7 @@ PAPERLESS_CONSUMPTION_DIR="../consume" # By default Paperless does not OCR a document if the text can be retrieved from # the document directly. Set to true to always OCR documents. -#PAPERLESS_OCR_ALWAYS="false" +#PAPERLESS_OCR_ALWAYS=false ############################################################################### @@ -276,7 +278,7 @@ PAPERLESS_CONSUMPTION_DIR="../consume" #PAPERLESS_CONVERT_BINARY=/usr/bin/convert # Ghostscript -#PAPERLESS_GS_BINARY = /usr/bin/gs +#PAPERLESS_GS_BINARY=/usr/bin/gs # Unpaper #PAPERLESS_UNPAPER_BINARY=/usr/bin/unpaper From 0bc6d471faa7599186a6617fb94bf032482b7e60 Mon Sep 17 00:00:00 2001 From: Jonas Winkler Date: Wed, 18 Nov 2020 22:43:36 +0100 Subject: [PATCH 10/12] updated the build process, it now works on RPi as well. --- Dockerfile | 82 ----------------------------------------- Pipfile | 5 +++ Pipfile.lock | 51 +++++++++++++++++++++---- docker/local/Dockerfile | 17 ++++----- scripts/make-release.sh | 11 +++--- scripts/push-release.sh | 23 ++++++++++++ 6 files changed, 83 insertions(+), 106 deletions(-) delete mode 100644 Dockerfile create mode 100755 scripts/push-release.sh diff --git a/Dockerfile b/Dockerfile deleted file mode 100644 index 291cacdad..000000000 --- a/Dockerfile +++ /dev/null @@ -1,82 +0,0 @@ -############################################################################### -### Front end ### -############################################################################### - -FROM node:current AS frontend - -WORKDIR /usr/src/paperless/src-ui/ - -COPY src-ui/package* ./ -RUN npm install - -COPY src-ui . -RUN node_modules/.bin/ng build --prod --output-hashing none --sourceMap=false --output-path dist/paperless-ui - -############################################################################### -### Back end ### -############################################################################### - -FROM ubuntu:20.04 - -WORKDIR /usr/src/paperless/ - -COPY Pipfile* ./ - -#Dependencies -RUN apt-get update \ - && DEBIAN_FRONTEND="noninteractive" apt-get -y --no-install-recommends install \ - build-essential \ - curl \ - ghostscript \ - gnupg \ - imagemagick \ - libmagic-dev \ - libpoppler-cpp-dev \ - libpq-dev \ - optipng \ - python3 \ - python3-dev \ - python3-pip \ - sudo \ - tesseract-ocr \ - tesseract-ocr-eng \ - tesseract-ocr-deu \ - tesseract-ocr-fra \ - tesseract-ocr-ita \ - tesseract-ocr-spa \ - tzdata \ - unpaper \ - && pip3 install --upgrade pipenv supervisor setuptools \ - && pipenv install --system --deploy \ - && pipenv --clear \ - && apt-get -y purge build-essential python3-pip python3-dev \ - && apt-get -y autoremove --purge \ - && rm -rf /var/lib/apt/lists/* \ - && mkdir /var/log/supervisord /var/run/supervisord - -# copy scripts -# this fixes issues with imagemagick and PDF -COPY docker/imagemagick-policy.xml /etc/ImageMagick-6/policy.xml -COPY docker/gunicorn.conf.py ./ -COPY docker/supervisord.conf /etc/supervisord.conf -COPY docker/docker-entrypoint.sh /sbin/docker-entrypoint.sh - -# copy app -COPY src/ ./src/ -COPY --from=frontend /usr/src/paperless/src-ui/dist/paperless-ui/ ./src/documents/static/frontend/ - -# add users, setup scripts -RUN addgroup --gid 1000 paperless \ - && useradd --uid 1000 --gid paperless --home-dir /usr/src/paperless paperless \ - && chown -R paperless:paperless . \ - && chmod 755 /sbin/docker-entrypoint.sh - -WORKDIR /usr/src/paperless/src/ - -RUN sudo -HEu paperless python3 manage.py collectstatic --clear --no-input - -VOLUME ["/usr/src/paperless/data", "/usr/src/paperless/media", "/usr/src/paperless/consume", "/usr/src/paperless/export"] -ENTRYPOINT ["/sbin/docker-entrypoint.sh"] -CMD ["/usr/local/bin/supervisord", "-c", "/etc/supervisord.conf"] - -LABEL maintainer="Jonas Winkler " diff --git a/Pipfile b/Pipfile index cf330f25d..66d60845b 100644 --- a/Pipfile +++ b/Pipfile @@ -3,6 +3,11 @@ url = "https://pypi.python.org/simple" verify_ssl = true name = "pypi" +[[source]] +url = "https://www.piwheels.org/simple" +verify_ssl = true +name = "piwheels" + [packages] django = "~=3.1" pillow = "*" diff --git a/Pipfile.lock b/Pipfile.lock index 4aa573900..15a30e1c0 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -1,7 +1,7 @@ { "_meta": { "hash": { - "sha256": "d6416e6844126b09200b9839a3abdcf3c24ef5cf70052b8f134d8bc804552c17" + "sha256": "abc7e5f5a8d075d4b013ceafd06ca07f57e597f053d670f73449ba210511b114" }, "pipfile-spec": 6, "requires": {}, @@ -10,6 +10,11 @@ "name": "pypi", "url": "https://pypi.python.org/simple", "verify_ssl": true + }, + { + "name": "piwheels", + "url": "https://www.piwheels.org/simple", + "verify_ssl": true } ] }, @@ -102,6 +107,7 @@ }, "filemagic": { "hashes": [ + "sha256:b2fd77411975510e28673220c4b8868ed81b5eb5906339b6f4c233b32122d7d3", "sha256:e684359ef40820fe406f0ebc5bf8a78f89717bdb7fed688af68082d991d6dbf3" ], "index": "pypi", @@ -142,6 +148,7 @@ "langdetect": { "hashes": [ "sha256:363795ea005f1243c958e953245dac5d814fabdc025c9afa91588c5fa6b2fa83", + "sha256:ae53a024643df713274c297c0795dbfb5a16b329902f8e543e7b2d7d45f699e4", "sha256:f37495e63607865e47deed08d78f7f8e58172658216ff954b2f14671bcd87740" ], "index": "pypi", @@ -162,6 +169,7 @@ "sha256:448ebb1b3bf64c0267d6b09a7cba26b5ae61b6d2dbabff7c91b660c7eccf2bdb", "sha256:50e86c076611212ca62e5a59f518edafe0c0730f7d9195fec718da1a5c2bb1fc", "sha256:5734bdc0342aba9dfc6f04920988140fb41234db42381cf7ccba64169f9fe7ac", + "sha256:5ddd1dfa2be066595c1993165b4cae84b9866b12339d0c903db7f21a094324a3", "sha256:64324f64f90a9e4ef732be0928be853eee378fd6a01be21a0a8469c4f2682c83", "sha256:6ae6c680f3ebf1cf7ad1d7748868b39d9f900836df774c453c11c5440bc15b36", "sha256:6d7593a705d662be5bfe24111af14763016765f43cb6923ed86223f965f52387", @@ -189,7 +197,8 @@ }, "pathtools": { "hashes": [ - "sha256:7c35c5421a39bb82e58018febd90e3b6e5db34c5443aaaf742b3f33d4655f1c0" + "sha256:7c35c5421a39bb82e58018febd90e3b6e5db34c5443aaaf742b3f33d4655f1c0", + "sha256:d77d982475e87f32b82157a43b09f0a5ef3e66c1d8f3c7eb8d2580e783cd8202" ], "version": "==0.1.2" }, @@ -217,6 +226,7 @@ "sha256:2fb113757a369a6cdb189f8df3226e995acfed0a8919a72416626af1a0a71140", "sha256:4b0ef2470c4979e345e4e0cc1bbac65fda11d0d7b789dbac035e4c6ce3f98adb", "sha256:59e903ca800c8cfd1ebe482349ec7c35687b95e98cefae213e271c8c7fffa021", + "sha256:5a3342d34289715928c914ee7f389351eb37fa4857caa9297fc7948f2ed3e53d", "sha256:5abd653a23c35d980b332bc0431d39663b1709d64142e3652890df4c9b6970f6", "sha256:5f9403af9c790cc18411ea398a6950ee2def2a830ad0cfe6dc9122e6d528b302", "sha256:6b4a8fd632b4ebee28282a9fef4c341835a1aa8671e2770b6f89adc8e8c2703c", @@ -274,8 +284,10 @@ "sha256:d14b140a4439d816e3b1229a4a525df917d6ea22a0771a2a78332273fd9528a4", "sha256:d1b4ab59e02d9008efe10ceabd0b31e79519da6fb67f7d8e8977118832d0f449", "sha256:d5227b229005a696cc67676e24c214740efd90b148de5733419ac9aaba3773da", + "sha256:d9f3a909b59ac4a3ca9beb77716f4bce627276edb039a71d4e9ec4b7548536a0", "sha256:e1f57aa70d3f7cc6947fd88636a481638263ba04a742b4a37dd25c373e41491a", "sha256:e74a55f6bad0e7d3968399deb50f61f4db1926acf4a6d83beaaa7df986f48b1c", + "sha256:e7f5a465c6431c0ad8d4e69603ee3306e521a09d3c6af76a16bdb62946bdddf0", "sha256:e82aba2188b9ba309fd8e271702bd0d0fc9148ae3150532bbb474f4590039ffb", "sha256:ee69dad2c7155756ad114c02db06002f4cded41132cc51378e57aad79cc8e4f4", "sha256:f5ab93a2cb2d8338b1674be43b442a7f544a0971da062a5da774ed40587f18f5" @@ -285,7 +297,8 @@ }, "pyocr": { "hashes": [ - "sha256:fa15adc7e1cf0d345a2990495fe125a947c6e09a60ddba0256a1c14b2e603179" + "sha256:fa15adc7e1cf0d345a2990495fe125a947c6e09a60ddba0256a1c14b2e603179", + "sha256:fd602af17b6e21985669aadc058a95f343ff921e962ed4aa6520ded32e4d1301" ], "index": "pypi", "version": "==0.7.2" @@ -316,7 +329,10 @@ }, "python-levenshtein": { "hashes": [ - "sha256:033a11de5e3d19ea25c9302d11224e1a1898fe5abd23c61c7c360c25195e3eb1" + "sha256:033a11de5e3d19ea25c9302d11224e1a1898fe5abd23c61c7c360c25195e3eb1", + "sha256:15e26882728c29ccdf74cfc6ac4b49fc22c08b44d152348cb0eb1ec4f3dbf9df", + "sha256:3df5e5eb144570ecf5ad38864a2393068798328c7f05e7b167a49391d36a2db1", + "sha256:7f049b3ddc4b525bd469febafb98bf5202f789b722e0e4ccbec2ffbe8c07d7b4" ], "index": "pypi", "version": "==0.12.0" @@ -331,6 +347,7 @@ "redis": { "hashes": [ "sha256:0e7e0cfca8660dea8b7d5cd8c4f6c5e29e11f31158c0b0ae91a397f00e5a05a2", + "sha256:3f1c7f166fa6c803613eec222224848a80f5e5b9c6af3aa82461506643034a7a", "sha256:432b788c4530cfe16d8d943a09d40ca6c16149727e4afe8c2c9d5580c59d9f24" ], "index": "pypi", @@ -360,7 +377,9 @@ "sha256:749078d1eb89484db5f34b4012092ad14b327944ee7f1c4f74d6279a6e4d1884", "sha256:7913bd25f4ab274ba37bc97ad0e21c31004224ccb02765ad984eef43e04acc6c", "sha256:7a25fcbeae08f96a754b45bdc050e1fb94b95cab046bf56b016c25e9ab127b3e", + "sha256:80ef188c0e47a6c964eed71c55a73c245f8daf9f0a4a9d804e91275afb468ca4", "sha256:83d6b356e116ca119db8e7c6fc2983289d87b27b3fac238cfe5dca529d884562", + "sha256:842fb985b2b99a82a2b145b6bbd588c5f5cfd83693402920fcb985d515794666", "sha256:8b882a78c320478b12ff024e81dc7d43c1462aa4a3341c754ee65d857a521f85", "sha256:8f6a2229e8ad946e36815f2a03386bb8353d4bde368fdf8ca5f0cb97264d3b5c", "sha256:9801c4c1d9ae6a70aeb2128e5b4b68c45d4f0af0d1535500884d644fa9b768c6", @@ -384,6 +403,7 @@ }, "scikit-learn": { "hashes": [ + "sha256:090bbf144fd5823c1f2efa3e1a9bf180295b24294ca8f478e75b40ed54f8036e", "sha256:0a127cc70990d4c15b1019680bfedc7fec6c23d14d3719fdf9b64b22d37cdeca", "sha256:0d39748e7c9669ba648acf40fb3ce96b8a07b240db6888563a7cb76e05e0d9cc", "sha256:1b8a391de95f6285a2f9adffb7db0892718950954b7149a70c783dc848f104ea", @@ -423,6 +443,7 @@ "sha256:9ad4fcddcbf5dc67619379782e6aeef41218a79e17979aaed01ed099876c0e62", "sha256:a254b98dbcc744c723a838c03b74a8a34c0558c9ac5c86d5561703362231107d", "sha256:b03c4338d6d3d299e8ca494194c0ae4f611548da59e3c038813f1a43976cb437", + "sha256:b5e9d3e4474644915809d6aa1416ff20430a3ed9ae723a5d295da5ddb24985e2", "sha256:cc1f78ebc982cd0602c9a7615d878396bec94908db67d4ecddca864d049112f2", "sha256:d6d25c41a009e3c6b7e757338948d0076ee1dd1770d1c09ec131f11946883c54", "sha256:d84cadd7d7998433334c99fa55bcba0d8b4aeff0edb123b2a1dfcface538e474", @@ -468,6 +489,7 @@ }, "watchdog": { "hashes": [ + "sha256:034c85530b647486e8c8477410fe79476511282658f2ce496f97106d9e5acfb8", "sha256:4214e1379d128b0588021880ccaf40317ee156d4603ac388b9adcf29165e0c04" ], "index": "pypi", @@ -561,6 +583,7 @@ "sha256:29a6272fec10623fcbe158fdf9abc7a5fa032048ac1d8631f14b50fbfc10d17f", "sha256:2b31f46bf7b31e6aa690d4c7a3d51bb262438c6dcb0d528adde446531d0d3bb7", "sha256:2d43af2be93ffbad25dd959899b5b809618a496926146ce98ee0b23683f8c51c", + "sha256:3188a7dfd96f734a7498f37cde6598b1e9c084f1ca68bc1aa04e88db31168ab6", "sha256:381ead10b9b9af5f64646cd27107fb27b614ee7040bb1226f9c07ba96625cbb5", "sha256:47a11bdbd8ada9b7ee628596f9d97fbd3851bd9999d398e9436bd67376dbece7", "sha256:4d6a42744139a7fa5b46a264874a781e8694bb32f1d76d8137b68138686f1729", @@ -586,7 +609,8 @@ "sha256:c851b35fc078389bc16b915a0a7c1d5923e12e2c5aeec58c52f4aa8085ac8237", "sha256:cb7df71de0af56000115eafd000b867d1261f786b5eebd88a0ca6360cccfaca7", "sha256:cedb2f9e1f990918ea061f28a0f0077a07702e3819602d3507e2ff98c8d20636", - "sha256:e8caf961e1b1a945db76f1b5fa9c91498d15f545ac0ababbe575cfab185d3bd8" + "sha256:e8caf961e1b1a945db76f1b5fa9c91498d15f545ac0ababbe575cfab185d3bd8", + "sha256:ef221855191457fffeb909d5787d1807800ab4d0111f089e6c93ee68f577634d" ], "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4' and python_version < '4'", "version": "==5.3" @@ -608,6 +632,7 @@ }, "docopt": { "hashes": [ + "sha256:15fde8252aa9f2804171014d50d069ffbf42c7a50b7d74bcbb82bfd5700fcfc2", "sha256:49b3a825280bd66b3aa83585ef59c4a8c82f2c8a522dbe754a8bc8d08c85c491" ], "version": "==0.6.2" @@ -638,11 +663,11 @@ }, "faker": { "hashes": [ - "sha256:6afc461ab3f779c9c16e299fc731d775e39ea7e8e063b3053ee359ae198a15ca", - "sha256:ce1c38823eb0f927567cde5bf2e7c8ca565c7a70316139342050ce2ca74b4026" + "sha256:4d038ba51ae5e0a956d79cadd684d856e5750bfd608b61dad1807f8f08b1da49", + "sha256:f260f0375a44cd1e1a735c9b8c9b914304f607b5eef431d20e098c7c2f5b50a6" ], "markers": "python_version >= '3.5'", - "version": "==4.14.2" + "version": "==4.16.0" }, "filelock": { "hashes": [ @@ -653,6 +678,7 @@ }, "idna": { "hashes": [ + "sha256:4a57a6379512ade94fa99e2fa46d3cd0f2f553040548d0e2958c6ed90ee48226", "sha256:b307872f855b18632ce0c21c5e45be78c0ea7ae4c15c828c20788b26921eb3f6", "sha256:b97d804b1e9b523befed77c48dacec60e6dcb0b5391d57af6a65a312a90648c0" ], @@ -670,12 +696,14 @@ "iniconfig": { "hashes": [ "sha256:011e24c64b7f47f6ebd835bb12a743f2fbe9a26d4cecaa7f53bc4f35ee9da8b3", + "sha256:8647b85c03813b8680f4ae9c9db2fd7293f8591ea536a10d73d90f6eb4b10aac", "sha256:bc3af051d7d14b2ee5ef9969666def0cd1a000e121eaea580d4a313df4b37f32" ], "version": "==1.1.1" }, "jinja2": { "hashes": [ + "sha256:3f172970d5670703bd3812e8ca6459a9a7e069fa8e51b40195f83c81db191ec4", "sha256:89aab215427ef59c34ad58735269eb58b1a5808103067f7bb9d5836c651b3bb0", "sha256:f0a4641d3cf955324a89c04f3d94663aa4d638abe8f733ecd3582848e1c37035" ], @@ -689,8 +717,10 @@ "sha256:09c4b7f37d6c648cb13f9230d847adf22f8171b1ccc4d5682398e77f40309235", "sha256:1027c282dad077d0bae18be6794e6b6b8c91d58ed8a8d89a89d59693b9131db5", "sha256:13d3144e1e340870b25e7b10b98d779608c02016d5184cfb9927a9f10c689f42", + "sha256:19536834abffb3fa155017053c607cb835b2ecc6a3a2554a88043d991dffb736", "sha256:24982cc2533820871eba85ba648cd53d8623687ff11cbb805be4ff7b4c971aff", "sha256:29872e92839765e546828bb7754a68c418d927cd064fd4708fab9fe9c8bb116b", + "sha256:3d61f15e39611aacd91b7e71d903787da86d9e80896e683c0103fced9add7834", "sha256:43a55c2930bbc139570ac2452adf3d70cdbb3cfe5912c71cdce1c2c6bbd9c5d1", "sha256:46c99d2de99945ec5cb54f23c8cd5689f6d7177305ebff350a58ce5f8de1669e", "sha256:500d4957e52ddc3351cabf489e79c91c17f6e0899158447047588650b5e69183", @@ -700,6 +730,7 @@ "sha256:6788b695d50a51edb699cb55e35487e430fa21f1ed838122d722e0ff0ac5ba15", "sha256:6dd73240d2af64df90aa7c4e7481e23825ea70af4b4922f8ede5b9e35f78a3b1", "sha256:717ba8fe3ae9cc0006d7c451f0bb265ee07739daf76355d06366154ee68d221e", + "sha256:7952deddf24b85c88dab48f6ec366ac6e39d2761b5280f2f9594911e03fcd064", "sha256:79855e1c5b8da654cf486b830bd42c06e8780cea587384cf6545b7d9ac013a0b", "sha256:7c1699dfe0cf8ff607dbdcc1e9b9af1755371f92a68f706051cc8c37d447c905", "sha256:88e5fcfb52ee7b911e8bb6d6aa2fd21fbecc674eadd44118a9cc3863f938e735", @@ -795,6 +826,7 @@ }, "pytest-env": { "hashes": [ + "sha256:33b4030383a021924fe3f3ba5ca4311990d8b1d02ca77389c2be020c4500f96a", "sha256:7e94956aef7f2764f3c147d216ce066bf6c42948bb9e293169b1b1c880a580c2" ], "index": "pypi", @@ -802,6 +834,7 @@ }, "pytest-forked": { "hashes": [ + "sha256:2d1bfc93ab65a28324eb0a63503bfb500c2da6916efede7a24b43a04970fe63c", "sha256:6aa9ac7e00ad1a539c41bec6d21011332de671e938c7637378ec9710204e37ca", "sha256:dc4147784048e70ef5d437951728825a131b81714b398d5d52f17c7c144d8815" ], @@ -810,6 +843,7 @@ }, "pytest-sugar": { "hashes": [ + "sha256:67a55a83c7b2717ad607704d3fe9004bb6543b54017ef82f9c6590acc38c1aec", "sha256:b1b2186b0a72aada6859bea2a5764145e3aaa2c1cfbb23c3a19b5f7b697563d3" ], "index": "pypi", @@ -927,6 +961,7 @@ }, "termcolor": { "hashes": [ + "sha256:19b1225d03bfb56571484caaa8521d8ec6e2473ae1640c9f48a48dda49417706", "sha256:1d6d69ce66211143803fbc56652b41d73b4a400a2891d7bf7a1cdf4c02de613b" ], "version": "==1.1.0" diff --git a/docker/local/Dockerfile b/docker/local/Dockerfile index 64e488f5e..0411d95ee 100644 --- a/docker/local/Dockerfile +++ b/docker/local/Dockerfile @@ -2,27 +2,25 @@ ### Back end ### ############################################################################### -FROM ubuntu:20.04 +FROM python:3.7-slim WORKDIR /usr/src/paperless/ -COPY Pipfile* ./ +COPY requirements.txt ./ #Dependencies RUN apt-get update \ - && DEBIAN_FRONTEND="noninteractive" apt-get -y --no-install-recommends install \ + && apt-get -y --no-install-recommends install \ build-essential \ curl \ ghostscript \ gnupg \ imagemagick \ + libatlas-base-dev \ libmagic-dev \ libpoppler-cpp-dev \ libpq-dev \ optipng \ - python3 \ - python3-dev \ - python3-pip \ sudo \ tesseract-ocr \ tesseract-ocr-eng \ @@ -32,10 +30,9 @@ RUN apt-get update \ tesseract-ocr-spa \ tzdata \ unpaper \ - && pip3 install --upgrade pipenv supervisor setuptools \ - && pipenv install --system --deploy \ - && pipenv --clear \ - && apt-get -y purge build-essential python3-pip python3-dev \ + && pip3 install --upgrade supervisor setuptools \ + && pip install --no-cache-dir -r requirements.txt \ + && apt-get -y purge build-essential \ && apt-get -y autoremove --purge \ && rm -rf /var/lib/apt/lists/* \ && mkdir /var/log/supervisord /var/run/supervisord diff --git a/scripts/make-release.sh b/scripts/make-release.sh index 4b509b5bf..b361834db 100755 --- a/scripts/make-release.sh +++ b/scripts/make-release.sh @@ -24,12 +24,17 @@ then rm "$PAPERLESS_DIST" -r fi +mkdir "$PAPERLESS_DIST" +mkdir "$PAPERLESS_DIST_APP" +mkdir "$PAPERLESS_DIST_APP/docker" + # setup dependencies. cd "$PAPERLESS_ROOT" pipenv clean pipenv install --dev +pipenv lock --keep-outdated -r > "$PAPERLESS_DIST_APP/requirements.txt" # test if the application works. @@ -44,10 +49,6 @@ make clean html # copy stuff into place -mkdir "$PAPERLESS_DIST" -mkdir "$PAPERLESS_DIST_APP" -mkdir "$PAPERLESS_DIST_APP/docker" - # the application itself cp "$PAPERLESS_ROOT/.env" \ @@ -92,8 +93,6 @@ cd "$PAPERLESS_DIST_APP" docker build . -t "jonaswinkler/paperless-ng:$VERSION" -docker push "jonaswinkler/paperless-ng:$VERSION" - # works. package the app! cd "$PAPERLESS_DIST" diff --git a/scripts/push-release.sh b/scripts/push-release.sh new file mode 100755 index 000000000..cfa63f5cf --- /dev/null +++ b/scripts/push-release.sh @@ -0,0 +1,23 @@ +#!/bin/bash + +set -e + + +VERSION=$1 + +if [ -z "$VERSION" ] +then + echo "Need a version string." + exit 1 +fi + +# source root directory of paperless +PAPERLESS_ROOT=$(git rev-parse --show-toplevel) + +# output directory +PAPERLESS_DIST="$PAPERLESS_ROOT/dist" +PAPERLESS_DIST_APP="$PAPERLESS_DIST/paperless-ng" + +cd "$PAPERLESS_DIST_APP" + +docker push "jonaswinkler/paperless-ng:$VERSION" From b89e836e3daf7b7f21259a049ff9f5e6f45c3453 Mon Sep 17 00:00:00 2001 From: Jonas Winkler Date: Wed, 18 Nov 2020 22:56:30 +0100 Subject: [PATCH 11/12] updated documentation --- docs/advanced_usage.rst | 2 ++ docs/changelog.rst | 2 ++ docs/faq.rst | 22 +++++++++++++++++++++- 3 files changed, 25 insertions(+), 1 deletion(-) diff --git a/docs/advanced_usage.rst b/docs/advanced_usage.rst index 218cfa8b7..6183baae1 100644 --- a/docs/advanced_usage.rst +++ b/docs/advanced_usage.rst @@ -128,6 +128,8 @@ consumer. Once complete, you should see the newly-created document, automatically tagged with the appropriate data. +.. _advanced-automatic_matching: + Automatic matching ================== diff --git a/docs/changelog.rst b/docs/changelog.rst index 36dc9e5cc..9fcf10940 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -96,6 +96,8 @@ paperless-ng 0.9.0 sqlite. * ``PAPERLESS_OCR_THREADS`` is gone and replaced with ``PAPERLESS_TASK_WORKERS`` and ``PAPERLESS_THREADS_PER_WORKER``. Refer to the config example for details. + * ``PAPERLESS_OPTIMIZE_THUMBNAILS`` allows you to disable or enable thumbnail + optimization. This is useful on less powerful devices. * Many more small changes here and there. The usual stuff. diff --git a/docs/faq.rst b/docs/faq.rst index 3f0de32b7..747ffaf53 100644 --- a/docs/faq.rst +++ b/docs/faq.rst @@ -23,12 +23,32 @@ is **Q:** *Will paperless-ng run on Raspberry Pi?* -**A:** The short answer is yes. The long answer is that certain parts of +**A:** The short answer is yes. I've tested it on a Raspberry Pi 3 B. +The long answer is that certain parts of Paperless will run very slow, such as the tesseract OCR. On Rasperry Pi, try to OCR documents before feeding them into paperless so that paperless can reuse the text. The web interface should be alot snappier, since it runs in your browser and paperless has to do much less work to serve the data. +.. note:: + + Consider setting ``PAPERLESS_OPTIMIZE_THUMBNAILS`` to false to speed up + the consumption process. This takes quite a bit of time on Raspberry Pi. + +.. note:: + + Updating the :ref:`automatic matching algorithm ` + takes quite a bit of time. However, the update mechanism checks if your + data has changed before doing the heavy lifting. If you experience the + algorithm taking too much cpu time, consider changing the schedule in the + admin interface to daily or weekly. You can also manually invoke the task + by changing the date and time of the next run to today/now. + + The actual matching of the algorithm is fast and works on Raspberry Pi as + well as on any other device. + + + **Q:** *How do I install paperless-ng on Raspberry Pi?* **A:** There is not docker image for ARM available. If you know how to build From 07e2ff0c2ee8865e321b85fb9a63d57b45ebc957 Mon Sep 17 00:00:00 2001 From: Jonas Winkler Date: Wed, 18 Nov 2020 22:59:27 +0100 Subject: [PATCH 12/12] fixed a test case --- src/paperless_mail/tests/test_mail.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/paperless_mail/tests/test_mail.py b/src/paperless_mail/tests/test_mail.py index a9d57fcb8..a3404b774 100644 --- a/src/paperless_mail/tests/test_mail.py +++ b/src/paperless_mail/tests/test_mail.py @@ -147,10 +147,10 @@ class TestMail(TestCase): me_localhost = Correspondent.objects.create(name=message2.from_) someone_else = Correspondent.objects.create(name="someone else") - rule = MailRule(assign_correspondent_from=MailRule.CORRESPONDENT_FROM_NOTHING) + rule = MailRule(name="a", assign_correspondent_from=MailRule.CORRESPONDENT_FROM_NOTHING) self.assertIsNone(get_correspondent(message, rule)) - rule = MailRule(assign_correspondent_from=MailRule.CORRESPONDENT_FROM_EMAIL) + rule = MailRule(name="b", assign_correspondent_from=MailRule.CORRESPONDENT_FROM_EMAIL) c = get_correspondent(message, rule) self.assertIsNotNone(c) self.assertEqual(c.name, "someone@somewhere.com") @@ -159,7 +159,7 @@ class TestMail(TestCase): self.assertEqual(c.name, "me@localhost.com") self.assertEqual(c.id, me_localhost.id) - rule = MailRule(assign_correspondent_from=MailRule.CORRESPONDENT_FROM_NAME) + rule = MailRule(name="c", assign_correspondent_from=MailRule.CORRESPONDENT_FROM_NAME) c = get_correspondent(message, rule) self.assertIsNotNone(c) self.assertEqual(c.name, "Someone!") @@ -167,7 +167,7 @@ class TestMail(TestCase): self.assertIsNotNone(c) self.assertEqual(c.id, me_localhost.id) - rule = MailRule(assign_correspondent_from=MailRule.CORRESPONDENT_FROM_CUSTOM, assign_correspondent=someone_else) + rule = MailRule(name="d", assign_correspondent_from=MailRule.CORRESPONDENT_FROM_CUSTOM, assign_correspondent=someone_else) c = get_correspondent(message, rule) self.assertEqual(c, someone_else) @@ -176,9 +176,9 @@ class TestMail(TestCase): message.subject = "the message title" att = namedtuple('Attachment', []) att.filename = "this_is_the_file.pdf" - rule = MailRule(assign_title_from=MailRule.TITLE_FROM_FILENAME) + rule = MailRule(name="a", assign_title_from=MailRule.TITLE_FROM_FILENAME) self.assertEqual(get_title(message, att, rule), "this_is_the_file") - rule = MailRule(assign_title_from=MailRule.TITLE_FROM_SUBJECT) + rule = MailRule(name="b", assign_title_from=MailRule.TITLE_FROM_SUBJECT) self.assertEqual(get_title(message, att, rule), "the message title") def test_handle_message(self): @@ -307,7 +307,7 @@ class TestMail(TestCase): account = MailAccount.objects.create(name="test3", imap_server="", username="admin", password="secret") - rule = MailRule.objects.create(name="testrule", account=account, action=MailRule.ACTION_MOVE, action_parameter="doesnotexist", filter_subject="Claim") + rule = MailRule.objects.create(name="testrule2", account=account, action=MailRule.ACTION_MOVE, action_parameter="doesnotexist", filter_subject="Claim") try: self.mail_account_handler.handle_mail_account(account) @@ -319,7 +319,7 @@ class TestMail(TestCase): def test_filters(self): account = MailAccount.objects.create(name="test3", imap_server="", username="admin", password="secret") - rule = MailRule.objects.create(name="testrule", account=account, action=MailRule.ACTION_DELETE, filter_subject="Claim") + rule = MailRule.objects.create(name="testrule3", account=account, action=MailRule.ACTION_DELETE, filter_subject="Claim") self.assertEqual(self.async_task.call_count, 0)