mirror of
				https://github.com/paperless-ngx/paperless-ngx.git
				synced 2025-11-03 03:16:10 -06:00 
			
		
		
		
	s/Sender/Correspondent & reworked the (im|ex)porter
This commit is contained in:
		@@ -3,7 +3,7 @@ from django.contrib.auth.models import User, Group
 | 
			
		||||
from django.core.urlresolvers import reverse
 | 
			
		||||
from django.templatetags.static import static
 | 
			
		||||
 | 
			
		||||
from .models import Sender, Tag, Document, Log
 | 
			
		||||
from .models import Correspondent, Tag, Document, Log
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class MonthListFilter(admin.SimpleListFilter):
 | 
			
		||||
@@ -107,7 +107,7 @@ class LogAdmin(admin.ModelAdmin):
 | 
			
		||||
    list_filter = ("level", "component",)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
admin.site.register(Sender)
 | 
			
		||||
admin.site.register(Correspondent)
 | 
			
		||||
admin.site.register(Tag, TagAdmin)
 | 
			
		||||
admin.site.register(Document, DocumentAdmin)
 | 
			
		||||
admin.site.register(Log, LogAdmin)
 | 
			
		||||
 
 | 
			
		||||
@@ -24,7 +24,7 @@ from pyocr.tesseract import TesseractError
 | 
			
		||||
 | 
			
		||||
from paperless.db import GnuPG
 | 
			
		||||
 | 
			
		||||
from .models import Sender, Tag, Document, Log
 | 
			
		||||
from .models import Correspondent, Tag, Document, Log
 | 
			
		||||
from .languages import ISO639
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@@ -246,7 +246,7 @@ class Consumer(object):
 | 
			
		||||
        """
 | 
			
		||||
 | 
			
		||||
        def get_sender(sender_name):
 | 
			
		||||
            return Sender.objects.get_or_create(
 | 
			
		||||
            return Correspondent.objects.get_or_create(
 | 
			
		||||
                name=sender_name, defaults={"slug": slugify(sender_name)})[0]
 | 
			
		||||
 | 
			
		||||
        def get_tags(tags):
 | 
			
		||||
 
 | 
			
		||||
@@ -8,7 +8,7 @@ from time import mktime
 | 
			
		||||
from django import forms
 | 
			
		||||
from django.conf import settings
 | 
			
		||||
 | 
			
		||||
from .models import Document, Sender
 | 
			
		||||
from .models import Document, Correspondent
 | 
			
		||||
from .consumer import Consumer
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@@ -24,7 +24,9 @@ class UploadForm(forms.Form):
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    sender = forms.CharField(
 | 
			
		||||
        max_length=Sender._meta.get_field("name").max_length, required=False)
 | 
			
		||||
        max_length=Correspondent._meta.get_field("name").max_length,
 | 
			
		||||
        required=False
 | 
			
		||||
    )
 | 
			
		||||
    title = forms.CharField(
 | 
			
		||||
        max_length=Document._meta.get_field("title").max_length,
 | 
			
		||||
        required=False
 | 
			
		||||
@@ -41,7 +43,7 @@ class UploadForm(forms.Form):
 | 
			
		||||
        sender = self.cleaned_data.get("sender")
 | 
			
		||||
        if not sender:
 | 
			
		||||
            return None
 | 
			
		||||
        if not Sender.SAFE_REGEX.match(sender) or " - " in sender:
 | 
			
		||||
        if not Correspondent.SAFE_REGEX.match(sender) or " - " in sender:
 | 
			
		||||
            raise forms.ValidationError("That sender name is suspicious.")
 | 
			
		||||
        return sender
 | 
			
		||||
 | 
			
		||||
@@ -49,7 +51,7 @@ class UploadForm(forms.Form):
 | 
			
		||||
        title = self.cleaned_data.get("title")
 | 
			
		||||
        if not title:
 | 
			
		||||
            return None
 | 
			
		||||
        if not Sender.SAFE_REGEX.match(title) or " - " in title:
 | 
			
		||||
        if not Correspondent.SAFE_REGEX.match(title) or " - " in title:
 | 
			
		||||
            raise forms.ValidationError("That title is suspicious.")
 | 
			
		||||
 | 
			
		||||
    def clean_document(self):
 | 
			
		||||
 
 | 
			
		||||
@@ -14,7 +14,7 @@ from dateutil import parser
 | 
			
		||||
from django.conf import settings
 | 
			
		||||
 | 
			
		||||
from .consumer import Consumer
 | 
			
		||||
from .models import Sender, Log
 | 
			
		||||
from .models import Correspondent, Log
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class MailFetcherError(Exception):
 | 
			
		||||
@@ -103,7 +103,7 @@ class Message(Loggable):
 | 
			
		||||
    def check_subject(self):
 | 
			
		||||
        if self.subject is None:
 | 
			
		||||
            raise InvalidMessageError("Message does not have a subject")
 | 
			
		||||
        if not Sender.SAFE_REGEX.match(self.subject):
 | 
			
		||||
        if not Correspondent.SAFE_REGEX.match(self.subject):
 | 
			
		||||
            raise InvalidMessageError("Message subject is unsafe: {}".format(
 | 
			
		||||
                self.subject))
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -1,10 +1,12 @@
 | 
			
		||||
import json
 | 
			
		||||
import os
 | 
			
		||||
import time
 | 
			
		||||
 | 
			
		||||
from django.conf import settings
 | 
			
		||||
from django.core.management.base import BaseCommand, CommandError
 | 
			
		||||
from django.core import serializers
 | 
			
		||||
 | 
			
		||||
from documents.models import Document
 | 
			
		||||
from documents.models import Document, Correspondent, Tag
 | 
			
		||||
from paperless.db import GnuPG
 | 
			
		||||
 | 
			
		||||
from ...mixins import Renderable
 | 
			
		||||
@@ -14,21 +16,19 @@ class Command(Renderable, BaseCommand):
 | 
			
		||||
 | 
			
		||||
    help = """
 | 
			
		||||
        Decrypt and rename all files in our collection into a given target
 | 
			
		||||
        directory.  Note that we don't export any of the parsed data since
 | 
			
		||||
        that can always be re-collected via the consumer.
 | 
			
		||||
        directory.  And include a manifest file containing document data for
 | 
			
		||||
        easy import.
 | 
			
		||||
    """.replace("    ", "")
 | 
			
		||||
 | 
			
		||||
    def add_arguments(self, parser):
 | 
			
		||||
        parser.add_argument("target")
 | 
			
		||||
 | 
			
		||||
    def __init__(self, *args, **kwargs):
 | 
			
		||||
        self.verbosity = 0
 | 
			
		||||
        self.target = None
 | 
			
		||||
        BaseCommand.__init__(self, *args, **kwargs)
 | 
			
		||||
        self.target = None
 | 
			
		||||
 | 
			
		||||
    def handle(self, *args, **options):
 | 
			
		||||
 | 
			
		||||
        self.verbosity = options["verbosity"]
 | 
			
		||||
        self.target = options["target"]
 | 
			
		||||
 | 
			
		||||
        if not os.path.exists(self.target):
 | 
			
		||||
@@ -40,9 +40,15 @@ class Command(Renderable, BaseCommand):
 | 
			
		||||
        if not settings.PASSPHRASE:
 | 
			
		||||
            settings.PASSPHRASE = input("Please enter the passphrase: ")
 | 
			
		||||
 | 
			
		||||
        for document in Document.objects.all():
 | 
			
		||||
        documents = Document.objects.all()
 | 
			
		||||
        document_map = {d.pk: d for d in documents}
 | 
			
		||||
        manifest = json.loads(serializers.serialize("json", documents))
 | 
			
		||||
        for document_dict in manifest:
 | 
			
		||||
 | 
			
		||||
            document = document_map[document_dict["pk"]]
 | 
			
		||||
 | 
			
		||||
            target = os.path.join(self.target, document.file_name)
 | 
			
		||||
            document_dict["__exported_file_name__"] = target
 | 
			
		||||
 | 
			
		||||
            print("Exporting: {}".format(target))
 | 
			
		||||
 | 
			
		||||
@@ -50,3 +56,12 @@ class Command(Renderable, BaseCommand):
 | 
			
		||||
                f.write(GnuPG.decrypted(document.source_file))
 | 
			
		||||
                t = int(time.mktime(document.created.timetuple()))
 | 
			
		||||
                os.utime(target, times=(t, t))
 | 
			
		||||
 | 
			
		||||
        manifest += json.loads(
 | 
			
		||||
            serializers.serialize("json", Correspondent.objects.all()))
 | 
			
		||||
 | 
			
		||||
        manifest += json.loads(serializers.serialize(
 | 
			
		||||
            "json", Tag.objects.all()))
 | 
			
		||||
 | 
			
		||||
        with open(os.path.join(self.target, "manifest.json"), "w") as f:
 | 
			
		||||
            json.dump(manifest, f, indent=2)
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										110
									
								
								src/documents/management/commands/document_importer.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										110
									
								
								src/documents/management/commands/document_importer.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,110 @@
 | 
			
		||||
import json
 | 
			
		||||
import os
 | 
			
		||||
 | 
			
		||||
from django.conf import settings
 | 
			
		||||
from django.core.management.base import BaseCommand, CommandError
 | 
			
		||||
from django.core.management import call_command
 | 
			
		||||
 | 
			
		||||
from documents.models import Document
 | 
			
		||||
from paperless.db import GnuPG
 | 
			
		||||
 | 
			
		||||
from ...mixins import Renderable
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class Command(Renderable, BaseCommand):
 | 
			
		||||
 | 
			
		||||
    help = """
 | 
			
		||||
        Using a manifest.json file, load the data from there, and import the
 | 
			
		||||
        documents it refers to.
 | 
			
		||||
    """.replace("    ", "")
 | 
			
		||||
 | 
			
		||||
    def add_arguments(self, parser):
 | 
			
		||||
        parser.add_argument("source")
 | 
			
		||||
        parser.add_argument(
 | 
			
		||||
            '--ignore-absent',
 | 
			
		||||
            action='store_true',
 | 
			
		||||
            default=False,
 | 
			
		||||
            help="If the manifest refers to a document that doesn't exist, "
 | 
			
		||||
                 "ignore it and attempt to import what it can"
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
    def __init__(self, *args, **kwargs):
 | 
			
		||||
        BaseCommand.__init__(self, *args, **kwargs)
 | 
			
		||||
        self.source = None
 | 
			
		||||
        self.manifest = None
 | 
			
		||||
 | 
			
		||||
    def handle(self, *args, **options):
 | 
			
		||||
 | 
			
		||||
        self.source = options["source"]
 | 
			
		||||
 | 
			
		||||
        if not os.path.exists(self.source):
 | 
			
		||||
            raise CommandError("That path doesn't exist")
 | 
			
		||||
 | 
			
		||||
        if not os.access(self.source, os.R_OK):
 | 
			
		||||
            raise CommandError("That path doesn't appear to be readable")
 | 
			
		||||
 | 
			
		||||
        manifest_path = os.path.join(self.source, "manifest.json")
 | 
			
		||||
        self._check_manifest_exists(manifest_path)
 | 
			
		||||
 | 
			
		||||
        with open(manifest_path) as f:
 | 
			
		||||
            self.manifest = json.load(f)
 | 
			
		||||
 | 
			
		||||
        self._check_manifest()
 | 
			
		||||
 | 
			
		||||
        if not settings.PASSPHRASE:
 | 
			
		||||
            raise CommandError(
 | 
			
		||||
                "You need to define a passphrase before continuing.  Please "
 | 
			
		||||
                "consult the documentation for setting up Paperless."
 | 
			
		||||
            )
 | 
			
		||||
 | 
			
		||||
        # Fill up the database with whatever is in the manifest
 | 
			
		||||
        call_command("loaddata", manifest_path)
 | 
			
		||||
 | 
			
		||||
        self._import_files_from_manifest()
 | 
			
		||||
 | 
			
		||||
    @staticmethod
 | 
			
		||||
    def _check_manifest_exists(path):
 | 
			
		||||
        if not os.path.exists(path):
 | 
			
		||||
            raise CommandError(
 | 
			
		||||
                "That directory doesn't appear to contain a manifest.json "
 | 
			
		||||
                "file."
 | 
			
		||||
            )
 | 
			
		||||
 | 
			
		||||
    def _check_manifest(self):
 | 
			
		||||
 | 
			
		||||
        for record in self.manifest:
 | 
			
		||||
 | 
			
		||||
            if not record["model"] == "documents.document":
 | 
			
		||||
                continue
 | 
			
		||||
 | 
			
		||||
            if "__exported_file_name__" not in record:
 | 
			
		||||
                raise CommandError(
 | 
			
		||||
                    'The manifest file contains a record which does not '
 | 
			
		||||
                    'refer to an actual document file.  If you want to import '
 | 
			
		||||
                    'the rest anyway (skipping such references) call the '
 | 
			
		||||
                    'importer with --ignore-absent'
 | 
			
		||||
                )
 | 
			
		||||
 | 
			
		||||
            doc_file = record["__exported_file_name__"]
 | 
			
		||||
            if not os.path.exists(os.path.join(self.source, doc_file)):
 | 
			
		||||
                raise CommandError(
 | 
			
		||||
                    'The manifest file refers to "{}" which does not '
 | 
			
		||||
                    'appear to be in the source directory.  If you want to '
 | 
			
		||||
                    'import the rest anyway (skipping such references) call '
 | 
			
		||||
                    'the importer with --ignore-absent'.format(doc_file)
 | 
			
		||||
                )
 | 
			
		||||
 | 
			
		||||
    def _import_files_from_manifest(self):
 | 
			
		||||
 | 
			
		||||
        for record in self.manifest:
 | 
			
		||||
 | 
			
		||||
            if not record["model"] == "documents.document":
 | 
			
		||||
                continue
 | 
			
		||||
 | 
			
		||||
            doc_file = record["__exported_file_name__"]
 | 
			
		||||
            document = Document.objects.get(pk=record["pk"])
 | 
			
		||||
            with open(doc_file, "rb") as unencrypted:
 | 
			
		||||
                with open(document.source_path, "wb") as encrypted:
 | 
			
		||||
                    print("Encrypting {} and saving it to {}".format(
 | 
			
		||||
                        doc_file, document.source_path))
 | 
			
		||||
                    encrypted.write(GnuPG.encrypted(unencrypted))
 | 
			
		||||
							
								
								
									
										19
									
								
								src/documents/migrations/0011_auto_20160303_1929.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										19
									
								
								src/documents/migrations/0011_auto_20160303_1929.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,19 @@
 | 
			
		||||
# -*- coding: utf-8 -*-
 | 
			
		||||
# Generated by Django 1.9.2 on 2016-03-03 19:29
 | 
			
		||||
from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
from django.db import migrations
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class Migration(migrations.Migration):
 | 
			
		||||
 | 
			
		||||
    dependencies = [
 | 
			
		||||
        ('documents', '0010_log'),
 | 
			
		||||
    ]
 | 
			
		||||
 | 
			
		||||
    operations = [
 | 
			
		||||
        migrations.RenameModel(
 | 
			
		||||
            old_name='Sender',
 | 
			
		||||
            new_name='Correspondent',
 | 
			
		||||
        ),
 | 
			
		||||
    ]
 | 
			
		||||
@@ -28,7 +28,7 @@ class SluggedModel(models.Model):
 | 
			
		||||
        return self.name
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class Sender(SluggedModel):
 | 
			
		||||
class Correspondent(SluggedModel):
 | 
			
		||||
 | 
			
		||||
    # This regex is probably more restrictive than it needs to be, but it's
 | 
			
		||||
    # better safe than sorry.
 | 
			
		||||
@@ -141,7 +141,7 @@ class Document(models.Model):
 | 
			
		||||
    TYPES = (TYPE_PDF, TYPE_PNG, TYPE_JPG, TYPE_GIF, TYPE_TIF,)
 | 
			
		||||
 | 
			
		||||
    sender = models.ForeignKey(
 | 
			
		||||
        Sender, blank=True, null=True, related_name="documents")
 | 
			
		||||
        Correspondent, blank=True, null=True, related_name="documents")
 | 
			
		||||
    title = models.CharField(max_length=128, blank=True, db_index=True)
 | 
			
		||||
    content = models.TextField(db_index=True)
 | 
			
		||||
    file_type = models.CharField(
 | 
			
		||||
@@ -158,9 +158,9 @@ class Document(models.Model):
 | 
			
		||||
        ordering = ("sender", "title")
 | 
			
		||||
 | 
			
		||||
    def __str__(self):
 | 
			
		||||
        created = self.created.strftime("%Y-%m-%d")
 | 
			
		||||
        created = self.created.strftime("%Y%m%d%H%M%S")
 | 
			
		||||
        if self.sender and self.title:
 | 
			
		||||
            return "{}: {}, {}".format(created, self.sender, self.title)
 | 
			
		||||
            return "{}: {} - {}".format(created, self.sender, self.title)
 | 
			
		||||
        if self.sender or self.title:
 | 
			
		||||
            return "{}: {}".format(created, self.sender or self.title)
 | 
			
		||||
        return str(created)
 | 
			
		||||
@@ -179,13 +179,7 @@ class Document(models.Model):
 | 
			
		||||
 | 
			
		||||
    @property
 | 
			
		||||
    def file_name(self):
 | 
			
		||||
        if self.sender and self.title:
 | 
			
		||||
            tags = ",".join([t.slug for t in self.tags.all()])
 | 
			
		||||
            if tags:
 | 
			
		||||
                return "{} - {} - {}.{}".format(
 | 
			
		||||
                    self.sender, self.title, tags, self.file_type)
 | 
			
		||||
            return "{} - {}.{}".format(self.sender, self.title, self.file_type)
 | 
			
		||||
        return os.path.basename(self.source_path)
 | 
			
		||||
        return slugify(str(self)) + "." + self.file_type
 | 
			
		||||
 | 
			
		||||
    @property
 | 
			
		||||
    def download_url(self):
 | 
			
		||||
 
 | 
			
		||||
@@ -1,12 +1,12 @@
 | 
			
		||||
from rest_framework import serializers
 | 
			
		||||
 | 
			
		||||
from .models import Sender, Tag, Document, Log
 | 
			
		||||
from .models import Correspondent, Tag, Document, Log
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class SenderSerializer(serializers.HyperlinkedModelSerializer):
 | 
			
		||||
class CorrespondentSerializer(serializers.HyperlinkedModelSerializer):
 | 
			
		||||
 | 
			
		||||
    class Meta(object):
 | 
			
		||||
        model = Sender
 | 
			
		||||
        model = Correspondent
 | 
			
		||||
        fields = ("id", "slug", "name")
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -1,6 +1,5 @@
 | 
			
		||||
from django.contrib.auth.mixins import LoginRequiredMixin
 | 
			
		||||
from django.http import HttpResponse
 | 
			
		||||
from django.template.defaultfilters import slugify
 | 
			
		||||
from django.views.decorators.csrf import csrf_exempt
 | 
			
		||||
from django.views.generic import FormView, DetailView, TemplateView
 | 
			
		||||
 | 
			
		||||
@@ -14,9 +13,9 @@ from rest_framework.viewsets import (
 | 
			
		||||
from paperless.db import GnuPG
 | 
			
		||||
 | 
			
		||||
from .forms import UploadForm
 | 
			
		||||
from .models import Sender, Tag, Document, Log
 | 
			
		||||
from .models import Correspondent, Tag, Document, Log
 | 
			
		||||
from .serialisers import (
 | 
			
		||||
    SenderSerializer, TagSerializer, DocumentSerializer, LogSerializer)
 | 
			
		||||
    CorrespondentSerializer, TagSerializer, DocumentSerializer, LogSerializer)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class IndexView(TemplateView):
 | 
			
		||||
@@ -52,7 +51,7 @@ class FetchView(LoginRequiredMixin, DetailView):
 | 
			
		||||
            content_type=content_types[self.object.file_type]
 | 
			
		||||
        )
 | 
			
		||||
        response["Content-Disposition"] = 'attachment; filename="{}"'.format(
 | 
			
		||||
            slugify(str(self.object)) + "." + self.object.file_type)
 | 
			
		||||
            self.object.file_name)
 | 
			
		||||
 | 
			
		||||
        return response
 | 
			
		||||
 | 
			
		||||
@@ -81,10 +80,10 @@ class StandardPagination(PageNumberPagination):
 | 
			
		||||
    max_page_size = 100000
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class SenderViewSet(ModelViewSet):
 | 
			
		||||
    model = Sender
 | 
			
		||||
    queryset = Sender.objects.all()
 | 
			
		||||
    serializer_class = SenderSerializer
 | 
			
		||||
class CorrespondentViewSet(ModelViewSet):
 | 
			
		||||
    model = Correspondent
 | 
			
		||||
    queryset = Correspondent.objects.all()
 | 
			
		||||
    serializer_class = CorrespondentSerializer
 | 
			
		||||
    pagination_class = StandardPagination
 | 
			
		||||
    permission_classes = (IsAuthenticated,)
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user