mirror of
				https://github.com/paperless-ngx/paperless-ngx.git
				synced 2025-10-30 03:56:23 -05:00 
			
		
		
		
	added
- document index - api access for thumbnails/downloads - more api filters updated - pipfile removed - filename handling - legacy thumb/download access - obsolete admin gui settings (per page items, FY, inline view)
This commit is contained in:
		| @@ -1,85 +1,12 @@ | ||||
| from datetime import datetime | ||||
|  | ||||
| from django.conf import settings | ||||
| from django.contrib import admin | ||||
| from django.contrib.auth.models import Group, User | ||||
| from django.db import models | ||||
| from django.utils.html import format_html, format_html_join | ||||
| from django.utils.safestring import mark_safe | ||||
|  | ||||
| from .models import Correspondent, Document, DocumentType, Log, Tag | ||||
|  | ||||
|  | ||||
| class FinancialYearFilter(admin.SimpleListFilter): | ||||
|  | ||||
|     title = "Financial Year" | ||||
|     parameter_name = "fy" | ||||
|     _fy_wraps = None | ||||
|  | ||||
|     def _fy_start(self, year): | ||||
|         """Return date of the start of financial year for the given year.""" | ||||
|         fy_start = "{}-{}".format(str(year), settings.FY_START) | ||||
|         return datetime.strptime(fy_start, "%Y-%m-%d").date() | ||||
|  | ||||
|     def _fy_end(self, year): | ||||
|         """Return date of the end of financial year for the given year.""" | ||||
|         fy_end = "{}-{}".format(str(year), settings.FY_END) | ||||
|         return datetime.strptime(fy_end, "%Y-%m-%d").date() | ||||
|  | ||||
|     def _fy_does_wrap(self): | ||||
|         """Return whether the financial year spans across two years.""" | ||||
|         if self._fy_wraps is None: | ||||
|             start = "{}".format(settings.FY_START) | ||||
|             start = datetime.strptime(start, "%m-%d").date() | ||||
|             end = "{}".format(settings.FY_END) | ||||
|             end = datetime.strptime(end, "%m-%d").date() | ||||
|             self._fy_wraps = end < start | ||||
|  | ||||
|         return self._fy_wraps | ||||
|  | ||||
|     def _determine_fy(self, date): | ||||
|         """Return a (query, display) financial year tuple of the given date.""" | ||||
|         if self._fy_does_wrap(): | ||||
|             fy_start = self._fy_start(date.year) | ||||
|  | ||||
|             if date.date() >= fy_start: | ||||
|                 query = "{}-{}".format(date.year, date.year + 1) | ||||
|             else: | ||||
|                 query = "{}-{}".format(date.year - 1, date.year) | ||||
|  | ||||
|             # To keep it simple we use the same string for both | ||||
|             # query parameter and the display. | ||||
|             return query, query | ||||
|  | ||||
|         else: | ||||
|             query = "{0}-{0}".format(date.year) | ||||
|             display = "{}".format(date.year) | ||||
|             return query, display | ||||
|  | ||||
|     def lookups(self, request, model_admin): | ||||
|         if not settings.FY_START or not settings.FY_END: | ||||
|             return None | ||||
|  | ||||
|         r = [] | ||||
|         for document in Document.objects.all(): | ||||
|             r.append(self._determine_fy(document.created)) | ||||
|  | ||||
|         return sorted(set(r), key=lambda x: x[0], reverse=True) | ||||
|  | ||||
|     def queryset(self, request, queryset): | ||||
|         if not self.value() or not settings.FY_START or not settings.FY_END: | ||||
|             return None | ||||
|  | ||||
|         start, end = self.value().split("-") | ||||
|         return queryset.filter(created__gte=self._fy_start(start), | ||||
|                                created__lte=self._fy_end(end)) | ||||
|  | ||||
|  | ||||
| class CommonAdmin(admin.ModelAdmin): | ||||
|     list_per_page = settings.PAPERLESS_LIST_PER_PAGE | ||||
|  | ||||
|  | ||||
| class CorrespondentAdmin(CommonAdmin): | ||||
| class CorrespondentAdmin(admin.ModelAdmin): | ||||
|  | ||||
|     list_display = ( | ||||
|         "name", | ||||
| @@ -90,7 +17,7 @@ class CorrespondentAdmin(CommonAdmin): | ||||
|     readonly_fields = ("slug",) | ||||
|  | ||||
|  | ||||
| class TagAdmin(CommonAdmin): | ||||
| class TagAdmin(admin.ModelAdmin): | ||||
|  | ||||
|     list_display = ( | ||||
|         "name", | ||||
| @@ -104,7 +31,7 @@ class TagAdmin(CommonAdmin): | ||||
|     readonly_fields = ("slug",) | ||||
|  | ||||
|  | ||||
| class DocumentTypeAdmin(CommonAdmin): | ||||
| class DocumentTypeAdmin(admin.ModelAdmin): | ||||
|  | ||||
|     list_display = ( | ||||
|         "name", | ||||
| @@ -116,7 +43,7 @@ class DocumentTypeAdmin(CommonAdmin): | ||||
|     readonly_fields = ("slug",) | ||||
|  | ||||
|  | ||||
| class DocumentAdmin(CommonAdmin): | ||||
| class DocumentAdmin(admin.ModelAdmin): | ||||
|  | ||||
|     search_fields = ("correspondent__name", "title", "content", "tags__name") | ||||
|     readonly_fields = ("added", "file_type", "storage_type",) | ||||
| @@ -125,8 +52,7 @@ class DocumentAdmin(CommonAdmin): | ||||
|     list_filter = ( | ||||
|         "document_type", | ||||
|         "tags", | ||||
|         "correspondent", | ||||
|         FinancialYearFilter | ||||
|         "correspondent" | ||||
|     ) | ||||
|  | ||||
|     filter_horizontal = ("tags",) | ||||
| @@ -164,7 +90,7 @@ class DocumentAdmin(CommonAdmin): | ||||
|         return format_html("<{} {}/>", kind, attributes) | ||||
|  | ||||
|  | ||||
| class LogAdmin(CommonAdmin): | ||||
| class LogAdmin(admin.ModelAdmin): | ||||
|  | ||||
|     list_display = ("created", "message", "level",) | ||||
|     list_filter = ("level", "created",) | ||||
|   | ||||
| @@ -16,12 +16,14 @@ class DocumentsConfig(AppConfig): | ||||
|             run_pre_consume_script, | ||||
|             run_post_consume_script, | ||||
|             cleanup_document_deletion, | ||||
|             set_log_entry | ||||
|             set_log_entry, | ||||
|             index_document | ||||
|         ) | ||||
|  | ||||
|         document_consumption_started.connect(run_pre_consume_script) | ||||
|  | ||||
|         document_consumption_finished.connect(classify_document) | ||||
|         document_consumption_finished.connect(index_document) | ||||
|         document_consumption_finished.connect(add_inbox_tags) | ||||
|         document_consumption_finished.connect(set_log_entry) | ||||
|         document_consumption_finished.connect(run_post_consume_script) | ||||
|   | ||||
| @@ -239,7 +239,6 @@ class Consumer: | ||||
|         self._write(document, doc, document.source_path) | ||||
|         self._write(document, thumbnail, document.thumbnail_path) | ||||
|  | ||||
|         document.set_filename(document.source_filename) | ||||
|         document.save() | ||||
|  | ||||
|         self.log("info", "Completed") | ||||
|   | ||||
| @@ -5,6 +5,8 @@ from .models import Correspondent, Document, Tag, DocumentType | ||||
|  | ||||
| CHAR_KWARGS = ["istartswith", "iendswith", "icontains", "iexact"] | ||||
| ID_KWARGS = ["in", "exact"] | ||||
| INT_KWARGS = ["exact"] | ||||
| DATE_KWARGS = ["year", "month", "day", "date__gt", "gt", "date__lt", "lt"] | ||||
|  | ||||
|  | ||||
| class CorrespondentFilterSet(FilterSet): | ||||
| @@ -36,7 +38,7 @@ class DocumentTypeFilterSet(FilterSet): | ||||
|  | ||||
| class DocumentFilterSet(FilterSet): | ||||
|  | ||||
|     tags_empty = BooleanFilter( | ||||
|     is_tagged = BooleanFilter( | ||||
|         label="Is tagged", | ||||
|         field_name="tags", | ||||
|         lookup_expr="isnull", | ||||
| @@ -50,6 +52,12 @@ class DocumentFilterSet(FilterSet): | ||||
|             "title": CHAR_KWARGS, | ||||
|             "content": CHAR_KWARGS, | ||||
|  | ||||
|             "archive_serial_number": INT_KWARGS, | ||||
|  | ||||
|             "created": DATE_KWARGS, | ||||
|             "added": DATE_KWARGS, | ||||
|             "modified": DATE_KWARGS, | ||||
|  | ||||
|             "correspondent__id": ID_KWARGS, | ||||
|             "correspondent__name": CHAR_KWARGS, | ||||
|  | ||||
| @@ -57,6 +65,6 @@ class DocumentFilterSet(FilterSet): | ||||
|             "tags__name": CHAR_KWARGS, | ||||
|  | ||||
|             "document_type__id": ID_KWARGS, | ||||
|             "document_type__name": CHAR_KWARGS | ||||
|             "document_type__name": CHAR_KWARGS, | ||||
|  | ||||
|         } | ||||
|   | ||||
							
								
								
									
										104
									
								
								src/documents/index.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										104
									
								
								src/documents/index.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,104 @@ | ||||
| from collections import Iterable | ||||
|  | ||||
| from django.db import models | ||||
| from django.dispatch import receiver | ||||
| from whoosh.fields import Schema, TEXT, NUMERIC, DATETIME, KEYWORD | ||||
| from whoosh.highlight import Formatter, get_text | ||||
| from whoosh.index import create_in, exists_in, open_dir | ||||
| from whoosh.qparser import QueryParser | ||||
| from whoosh.query import terms | ||||
| from whoosh.writing import AsyncWriter | ||||
|  | ||||
| from documents.models import Document | ||||
| from paperless import settings | ||||
|  | ||||
|  | ||||
| class JsonFormatter(Formatter): | ||||
|     def __init__(self): | ||||
|         self.seen = {} | ||||
|  | ||||
|     def format_token(self, text, token, replace=False): | ||||
|         seen = self.seen | ||||
|         ttext = self._text(get_text(text, token, replace)) | ||||
|         if ttext in seen: | ||||
|             termnum = seen[ttext] | ||||
|         else: | ||||
|             termnum = len(seen) | ||||
|             seen[ttext] = termnum | ||||
|  | ||||
|         return {'text': ttext, 'term': termnum} | ||||
|  | ||||
|     def format_fragment(self, fragment, replace=False): | ||||
|         output = [] | ||||
|         index = fragment.startchar | ||||
|         text = fragment.text | ||||
|  | ||||
|         for t in fragment.matches: | ||||
|             if t.startchar is None: | ||||
|                 continue | ||||
|             if t.startchar < index: | ||||
|                 continue | ||||
|             if t.startchar > index: | ||||
|                 output.append({'text': text[index:t.startchar]}) | ||||
|             output.append(self.format_token(text, t, replace)) | ||||
|             index = t.endchar | ||||
|         if index < fragment.endchar: | ||||
|             output.append({'text': text[index:fragment.endchar]}) | ||||
|         return output | ||||
|  | ||||
|     def format(self, fragments, replace=False): | ||||
|         output = [] | ||||
|         for fragment in fragments: | ||||
|             output.append(self.format_fragment(fragment, replace=replace)) | ||||
|         return output | ||||
|  | ||||
|  | ||||
| def get_schema(): | ||||
|     return Schema( | ||||
|         id=NUMERIC(stored=True, unique=True, numtype=int), | ||||
|         title=TEXT(stored=True), | ||||
|         content=TEXT(stored=True) | ||||
|     ) | ||||
|  | ||||
|  | ||||
| def open_index(recreate=False): | ||||
|     if exists_in(settings.INDEX_DIR) and not recreate: | ||||
|         return open_dir(settings.INDEX_DIR) | ||||
|     else: | ||||
|         return create_in(settings.INDEX_DIR, get_schema()) | ||||
|  | ||||
|  | ||||
| def update_document(writer, doc): | ||||
|     writer.update_document( | ||||
|         id=doc.id, | ||||
|         title=doc.title, | ||||
|         content=doc.content | ||||
|     ) | ||||
|  | ||||
| @receiver(models.signals.post_save, sender=Document) | ||||
| def add_document_to_index(sender, instance, **kwargs): | ||||
|     ix = open_index() | ||||
|     with AsyncWriter(ix) as writer: | ||||
|         update_document(writer, instance) | ||||
|  | ||||
|  | ||||
| @receiver(models.signals.post_delete, sender=Document) | ||||
| def remove_document_from_index(sender, instance, **kwargs): | ||||
|     ix = open_index() | ||||
|     with AsyncWriter(ix) as writer: | ||||
|         writer.delete_by_term('id', instance.id) | ||||
|  | ||||
|  | ||||
| def query_index(ix, querystr): | ||||
|     with ix.searcher() as searcher: | ||||
|         query = QueryParser("content", ix.schema, termclass=terms.FuzzyTerm).parse(querystr) | ||||
|         results = searcher.search(query) | ||||
|         results.formatter = JsonFormatter() | ||||
|         results.fragmenter.surround = 50 | ||||
|  | ||||
|         return [ | ||||
|             {'id': r['id'], | ||||
|              'highlights': r.highlights("content"), | ||||
|              'score': r.score, | ||||
|              'title': r['title'] | ||||
|              } for r in results] | ||||
							
								
								
									
										27
									
								
								src/documents/management/commands/document_index.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										27
									
								
								src/documents/management/commands/document_index.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,27 @@ | ||||
| from django.core.management import BaseCommand | ||||
| from whoosh.writing import AsyncWriter | ||||
|  | ||||
| import documents.index as index | ||||
| from documents.mixins import Renderable | ||||
| from documents.models import Document | ||||
|  | ||||
|  | ||||
| class Command(Renderable, BaseCommand): | ||||
|  | ||||
|     help = "Recreates the document index" | ||||
|  | ||||
|     def __init__(self, *args, **kwargs): | ||||
|         self.verbosity = 0 | ||||
|         BaseCommand.__init__(self, *args, **kwargs) | ||||
|  | ||||
|     def handle(self, *args, **options): | ||||
|  | ||||
|         self.verbosity = options["verbosity"] | ||||
|  | ||||
|         documents = Document.objects.all() | ||||
|  | ||||
|         ix = index.open_index(recreate=True) | ||||
|  | ||||
|         with AsyncWriter(ix) as writer: | ||||
|             for document in documents: | ||||
|                 index.update_document(writer, document) | ||||
| @@ -1,37 +0,0 @@ | ||||
| # Generated by Django 2.0.10 on 2019-04-26 18:57 | ||||
|  | ||||
| from django.db import migrations, models | ||||
|  | ||||
|  | ||||
| def set_filename(apps, schema_editor): | ||||
|     Document = apps.get_model("documents", "Document") | ||||
|     for doc in Document.objects.all(): | ||||
|         file_name = "{:07}.{}".format(doc.pk, doc.file_type) | ||||
|         if doc.storage_type == "gpg": | ||||
|             file_name += ".gpg" | ||||
|  | ||||
|         # Set filename | ||||
|         doc.filename = file_name | ||||
|  | ||||
|         # Save document | ||||
|         doc.save() | ||||
|  | ||||
|  | ||||
| class Migration(migrations.Migration): | ||||
|  | ||||
|     dependencies = [ | ||||
|         ('documents', '0022_auto_20181007_1420'), | ||||
|     ] | ||||
|  | ||||
|     operations = [ | ||||
|         migrations.AddField( | ||||
|             model_name='document', | ||||
|             name='filename', | ||||
|             field=models.FilePathField(default=None, | ||||
|                                        null=True, | ||||
|                                        editable=False, | ||||
|                                        help_text='Current filename in storage', | ||||
|                                        max_length=256), | ||||
|         ), | ||||
|         migrations.RunPython(set_filename) | ||||
|     ] | ||||
| @@ -168,14 +168,6 @@ class Document(models.Model): | ||||
|     added = models.DateTimeField( | ||||
|         default=timezone.now, editable=False, db_index=True) | ||||
|  | ||||
|     filename = models.FilePathField( | ||||
|         max_length=256, | ||||
|         editable=False, | ||||
|         default=None, | ||||
|         null=True, | ||||
|         help_text="Current filename in storage" | ||||
|     ) | ||||
|  | ||||
|     archive_serial_number = models.IntegerField( | ||||
|         blank=True, | ||||
|         null=True, | ||||
| @@ -197,125 +189,17 @@ class Document(models.Model): | ||||
|             return "{}: {}".format(created, self.correspondent or self.title) | ||||
|         return str(created) | ||||
|  | ||||
|     def find_renamed_document(self, subdirectory=""): | ||||
|         suffix = "%07i.%s" % (self.pk, self.file_type) | ||||
|  | ||||
|         # Append .gpg for encrypted files | ||||
|         if self.storage_type == self.STORAGE_TYPE_GPG: | ||||
|             suffix += ".gpg" | ||||
|  | ||||
|         # Go up in the directory hierarchy and try to delete all directories | ||||
|         root = os.path.normpath(Document.filename_to_path(subdirectory)) | ||||
|  | ||||
|         for filename in os.listdir(root): | ||||
|             if filename.endswith(suffix): | ||||
|                 return os.path.join(subdirectory, filename) | ||||
|  | ||||
|             fullname = os.path.join(subdirectory, filename) | ||||
|             if os.path.isdir(Document.filename_to_path(fullname)): | ||||
|                 return self.find_renamed_document(fullname) | ||||
|  | ||||
|         return None | ||||
|  | ||||
|     @property | ||||
|     def source_filename(self): | ||||
|         # Initial filename generation (for new documents) | ||||
|         if self.filename is None: | ||||
|             self.filename = self.generate_source_filename() | ||||
|  | ||||
|         # Check if document is still available under filename | ||||
|         elif not os.path.isfile(Document.filename_to_path(self.filename)): | ||||
|             recovered_filename = self.find_renamed_document() | ||||
|  | ||||
|             # If we have found the file so update the filename | ||||
|             if recovered_filename is not None: | ||||
|                 logger = logging.getLogger(__name__) | ||||
|                 logger.warning("Filename of document " + str(self.id) + | ||||
|                                " has changed and was successfully updated") | ||||
|                 self.filename = recovered_filename | ||||
|  | ||||
|                 # Remove all empty subdirectories from MEDIA_ROOT | ||||
|                 Document.delete_all_empty_subdirectories( | ||||
|                         Document.filename_to_path("")) | ||||
|             else: | ||||
|                 logger = logging.getLogger(__name__) | ||||
|                 logger.error("File of document " + str(self.id) + " has " + | ||||
|                              "gone and could not be recovered") | ||||
|  | ||||
|         return self.filename | ||||
|  | ||||
|     @staticmethod | ||||
|     def many_to_dictionary(field): | ||||
|         # Converts ManyToManyField to dictionary by assuming, that field | ||||
|         # entries contain an _ or - which will be used as a delimiter | ||||
|         mydictionary = dict() | ||||
|  | ||||
|         for index, t in enumerate(field.all()): | ||||
|             # Populate tag names by index | ||||
|             mydictionary[index] = slugify(t.name) | ||||
|  | ||||
|             # Find delimiter | ||||
|             delimiter = t.name.find('_') | ||||
|  | ||||
|             if delimiter == -1: | ||||
|                 delimiter = t.name.find('-') | ||||
|  | ||||
|             if delimiter == -1: | ||||
|                 continue | ||||
|  | ||||
|             key = t.name[:delimiter] | ||||
|             value = t.name[delimiter+1:] | ||||
|  | ||||
|             mydictionary[slugify(key)] = slugify(value) | ||||
|  | ||||
|         return mydictionary | ||||
|  | ||||
|     def generate_source_filename(self): | ||||
|         # Create filename based on configured format | ||||
|         if settings.PAPERLESS_FILENAME_FORMAT is not None: | ||||
|             tags = defaultdict(lambda: slugify(None), | ||||
|                                self.many_to_dictionary(self.tags)) | ||||
|             path = settings.PAPERLESS_FILENAME_FORMAT.format( | ||||
|                    correspondent=slugify(self.correspondent), | ||||
|                    title=slugify(self.title), | ||||
|                    created=slugify(self.created), | ||||
|                    added=slugify(self.added), | ||||
|                    tags=tags) | ||||
|         else: | ||||
|             path = "" | ||||
|  | ||||
|         # Always append the primary key to guarantee uniqueness of filename | ||||
|         if len(path) > 0: | ||||
|             filename = "%s-%07i.%s" % (path, self.pk, self.file_type) | ||||
|         else: | ||||
|             filename = "%07i.%s" % (self.pk, self.file_type) | ||||
|  | ||||
|         # Append .gpg for encrypted files | ||||
|         if self.storage_type == self.STORAGE_TYPE_GPG: | ||||
|             filename += ".gpg" | ||||
|  | ||||
|         return filename | ||||
|  | ||||
|     def create_source_directory(self): | ||||
|         new_filename = self.generate_source_filename() | ||||
|  | ||||
|         # Determine the full "target" path | ||||
|         dir_new = Document.filename_to_path(os.path.dirname(new_filename)) | ||||
|  | ||||
|         # Create new path | ||||
|         os.makedirs(dir_new, exist_ok=True) | ||||
|  | ||||
|     @property | ||||
|     def source_path(self): | ||||
|         return Document.filename_to_path(self.source_filename) | ||||
|         file_name = "{:07}.{}".format(self.pk, self.file_type) | ||||
|         if self.storage_type == self.STORAGE_TYPE_GPG: | ||||
|             file_name += ".gpg" | ||||
|  | ||||
|     @staticmethod | ||||
|     def filename_to_path(filename): | ||||
|         return os.path.join( | ||||
|             settings.MEDIA_ROOT, | ||||
|             "documents", | ||||
|             "originals", | ||||
|             filename | ||||
|             file_name | ||||
|         ) | ||||
|  | ||||
|     @property | ||||
| @@ -352,125 +236,6 @@ class Document(models.Model): | ||||
|     def thumbnail_url(self): | ||||
|         return reverse("fetch", kwargs={"kind": "thumb", "pk": self.pk}) | ||||
|  | ||||
|     def set_filename(self, filename): | ||||
|         if os.path.isfile(Document.filename_to_path(filename)): | ||||
|             self.filename = filename | ||||
|  | ||||
|     @staticmethod | ||||
|     def try_delete_empty_directories(directory): | ||||
|         # Go up in the directory hierarchy and try to delete all directories | ||||
|         directory = os.path.normpath(directory) | ||||
|         root = os.path.normpath(Document.filename_to_path("")) | ||||
|  | ||||
|         while directory != root: | ||||
|             # Try to delete the current directory | ||||
|             try: | ||||
|                 os.rmdir(directory) | ||||
|             except os.error: | ||||
|                 # Directory not empty, no need to go further up | ||||
|                 return | ||||
|  | ||||
|             # Cut off actual directory and go one level up | ||||
|             directory, _ = os.path.split(directory) | ||||
|             directory = os.path.normpath(directory) | ||||
|  | ||||
|     @staticmethod | ||||
|     def delete_all_empty_subdirectories(directory): | ||||
|         # Go through all folders and try to delete all directories | ||||
|         root = os.path.normpath(Document.filename_to_path(directory)) | ||||
|  | ||||
|         for filename in os.listdir(root): | ||||
|             fullname = os.path.join(directory, filename) | ||||
|  | ||||
|             if not os.path.isdir(Document.filename_to_path(fullname)): | ||||
|                 continue | ||||
|  | ||||
|             # Go into subdirectory to see, if there is more to delete | ||||
|             Document.delete_all_empty_subdirectories( | ||||
|                     os.path.join(directory, filename)) | ||||
|  | ||||
|             # Try to delete the directory | ||||
|             try: | ||||
|                 os.rmdir(Document.filename_to_path(fullname)) | ||||
|                 continue | ||||
|             except os.error: | ||||
|                 # Directory not empty, no need to go further up | ||||
|                 continue | ||||
|  | ||||
|  | ||||
| @receiver(models.signals.m2m_changed, sender=Document.tags.through) | ||||
| @receiver(models.signals.post_save, sender=Document) | ||||
| def update_filename(sender, instance, **kwargs): | ||||
|     # Skip if document has not been saved yet | ||||
|     if instance.filename is None: | ||||
|         return | ||||
|  | ||||
|     # Check is file exists and update filename otherwise | ||||
|     if not os.path.isfile(Document.filename_to_path(instance.filename)): | ||||
|         instance.filename = instance.source_filename | ||||
|  | ||||
|     # Build the new filename | ||||
|     new_filename = instance.generate_source_filename() | ||||
|  | ||||
|     # If the filename is the same, then nothing needs to be done | ||||
|     if instance.filename == new_filename: | ||||
|         return | ||||
|  | ||||
|     # Determine the full "target" path | ||||
|     path_new = instance.filename_to_path(new_filename) | ||||
|     dir_new = instance.filename_to_path(os.path.dirname(new_filename)) | ||||
|  | ||||
|     # Create new path | ||||
|     instance.create_source_directory() | ||||
|  | ||||
|     # Determine the full "current" path | ||||
|     path_current = instance.filename_to_path(instance.source_filename) | ||||
|  | ||||
|     # Move file | ||||
|     try: | ||||
|         os.rename(path_current, path_new) | ||||
|     except PermissionError: | ||||
|         # Do not update filename in object | ||||
|         return | ||||
|     except FileNotFoundError: | ||||
|         logger = logging.getLogger(__name__) | ||||
|         logger.error("Renaming of document " + str(instance.id) + " failed " + | ||||
|                      "as file " + instance.filename + " was no longer present") | ||||
|         return | ||||
|  | ||||
|     # Delete empty directory | ||||
|     old_dir = os.path.dirname(instance.filename) | ||||
|     old_path = instance.filename_to_path(old_dir) | ||||
|     Document.try_delete_empty_directories(old_path) | ||||
|  | ||||
|     instance.filename = new_filename | ||||
|  | ||||
|     # Save instance | ||||
|     # This will not cause a cascade of post_save signals, as next time | ||||
|     # nothing needs to be renamed | ||||
|     instance.save() | ||||
|  | ||||
|  | ||||
| @receiver(models.signals.post_delete, sender=Document) | ||||
| def delete_files(sender, instance, **kwargs): | ||||
|     if instance.filename is None: | ||||
|         return | ||||
|  | ||||
|     # Remove the document | ||||
|     old_file = instance.filename_to_path(instance.filename) | ||||
|  | ||||
|     try: | ||||
|         os.remove(old_file) | ||||
|     except FileNotFoundError: | ||||
|         logger = logging.getLogger(__name__) | ||||
|         logger.warning("Deleted document " + str(instance.id) + " but file " + | ||||
|                        old_file + " was no longer present") | ||||
|  | ||||
|     # And remove the directory (if applicable) | ||||
|     old_dir = os.path.dirname(instance.filename) | ||||
|     old_path = instance.filename_to_path(old_dir) | ||||
|     Document.try_delete_empty_directories(old_path) | ||||
|  | ||||
|  | ||||
| class Log(models.Model): | ||||
|  | ||||
|   | ||||
| @@ -93,8 +93,6 @@ class DocumentSerializer(serializers.ModelSerializer): | ||||
|             "modified", | ||||
|             "added", | ||||
|             "file_name", | ||||
|             "download_url", | ||||
|             "thumbnail_url", | ||||
|             "archive_serial_number" | ||||
|         ) | ||||
|  | ||||
|   | ||||
| @@ -9,6 +9,7 @@ from django.contrib.contenttypes.models import ContentType | ||||
| from django.utils import timezone | ||||
|  | ||||
| from documents.classifier import DocumentClassifier | ||||
| from .. import index | ||||
| from ..models import Document, Tag | ||||
|  | ||||
|  | ||||
| @@ -16,9 +17,14 @@ def logger(message, group): | ||||
|     logging.getLogger(__name__).debug(message, extra={"group": group}) | ||||
|  | ||||
|  | ||||
| #TODO: global? really? | ||||
| classifier = DocumentClassifier() | ||||
|  | ||||
|  | ||||
| def index_document(sender, document=None, logging_group=None, **kwargs): | ||||
|     index.add_document_to_index(sender, instance=document) | ||||
|  | ||||
|  | ||||
| def classify_document(sender, document=None, logging_group=None, **kwargs): | ||||
|     global classifier | ||||
|     try: | ||||
|   | ||||
| @@ -1,12 +1,13 @@ | ||||
| from django.db.models import Count, Max | ||||
| from django.http import HttpResponse, HttpResponseBadRequest | ||||
| from django.views.generic import DetailView, FormView, TemplateView | ||||
| from django.http import HttpResponse | ||||
| from django.views.decorators.cache import cache_control | ||||
| from django.views.generic import TemplateView | ||||
| from django_filters.rest_framework import DjangoFilterBackend | ||||
| from django.conf import settings | ||||
| from django.utils import cache | ||||
| from rest_framework.decorators import action | ||||
| from rest_framework.response import Response | ||||
| from rest_framework.views import APIView | ||||
|  | ||||
| from paperless.db import GnuPG | ||||
| from paperless.mixins import SessionOrBasicAuthMixin | ||||
| from paperless.views import StandardPagination | ||||
| from rest_framework.filters import OrderingFilter, SearchFilter | ||||
| from rest_framework.mixins import ( | ||||
| @@ -29,7 +30,7 @@ from .filters import ( | ||||
|     DocumentTypeFilterSet | ||||
| ) | ||||
|  | ||||
| from .forms import UploadForm | ||||
| import documents.index as index | ||||
| from .models import Correspondent, Document, Log, Tag, DocumentType | ||||
| from .serialisers import ( | ||||
|     CorrespondentSerializer, | ||||
| @@ -41,71 +42,7 @@ from .serialisers import ( | ||||
|  | ||||
|  | ||||
| class IndexView(TemplateView): | ||||
|     template_name = "documents/index.html" | ||||
|  | ||||
|  | ||||
| class FetchView(SessionOrBasicAuthMixin, DetailView): | ||||
|  | ||||
|     model = Document | ||||
|  | ||||
|     def render_to_response(self, context, **response_kwargs): | ||||
|         """ | ||||
|         Override the default to return the unencrypted image/PDF as raw data. | ||||
|         """ | ||||
|  | ||||
|         content_types = { | ||||
|             Document.TYPE_PDF: "application/pdf", | ||||
|             Document.TYPE_PNG: "image/png", | ||||
|             Document.TYPE_JPG: "image/jpeg", | ||||
|             Document.TYPE_GIF: "image/gif", | ||||
|             Document.TYPE_TIF: "image/tiff", | ||||
|             Document.TYPE_CSV: "text/csv", | ||||
|             Document.TYPE_MD:  "text/markdown", | ||||
|             Document.TYPE_TXT: "text/plain" | ||||
|         } | ||||
|  | ||||
|         if self.kwargs["kind"] == "thumb": | ||||
|             response = HttpResponse( | ||||
|                 self._get_raw_data(self.object.thumbnail_file), | ||||
|                 content_type=content_types[Document.TYPE_PNG] | ||||
|             ) | ||||
|             cache.patch_cache_control(response, max_age=31536000, private=True) | ||||
|             return response | ||||
|  | ||||
|         response = HttpResponse( | ||||
|             self._get_raw_data(self.object.source_file), | ||||
|             content_type=content_types[self.object.file_type] | ||||
|         ) | ||||
|  | ||||
|         DISPOSITION = ( | ||||
|             'inline' if settings.INLINE_DOC or self.kwargs["kind"] == 'preview' | ||||
|             else 'attachment' | ||||
|         ) | ||||
|  | ||||
|         response["Content-Disposition"] = '{}; filename="{}"'.format( | ||||
|             DISPOSITION, self.object.file_name) | ||||
|  | ||||
|         return response | ||||
|  | ||||
|     def _get_raw_data(self, file_handle): | ||||
|         if self.object.storage_type == Document.STORAGE_TYPE_UNENCRYPTED: | ||||
|             return file_handle | ||||
|         return GnuPG.decrypted(file_handle) | ||||
|  | ||||
|  | ||||
| class PushView(SessionOrBasicAuthMixin, FormView): | ||||
|     """ | ||||
|     A crude REST-ish API for creating documents. | ||||
|     """ | ||||
|  | ||||
|     form_class = UploadForm | ||||
|  | ||||
|     def form_valid(self, form): | ||||
|         form.save() | ||||
|         return HttpResponse("1", status=202) | ||||
|  | ||||
|     def form_invalid(self, form): | ||||
|         return HttpResponseBadRequest(str(form.errors)) | ||||
|     template_name = "index.html" | ||||
|  | ||||
|  | ||||
| class CorrespondentViewSet(ModelViewSet): | ||||
| @@ -155,7 +92,52 @@ class DocumentViewSet(RetrieveModelMixin, | ||||
|     filter_class = DocumentFilterSet | ||||
|     search_fields = ("title", "correspondent__name", "content") | ||||
|     ordering_fields = ( | ||||
|         "id", "title", "correspondent__name", "created", "modified", "added") | ||||
|         "id", "title", "correspondent__name", "created", "modified", "added", "archive_serial_number") | ||||
|  | ||||
|  | ||||
|     def file_response(self, pk, disposition): | ||||
|         #TODO: this should not be necessary here. | ||||
|         content_types = { | ||||
|             Document.TYPE_PDF: "application/pdf", | ||||
|             Document.TYPE_PNG: "image/png", | ||||
|             Document.TYPE_JPG: "image/jpeg", | ||||
|             Document.TYPE_GIF: "image/gif", | ||||
|             Document.TYPE_TIF: "image/tiff", | ||||
|             Document.TYPE_CSV: "text/csv", | ||||
|             Document.TYPE_MD:  "text/markdown", | ||||
|             Document.TYPE_TXT: "text/plain" | ||||
|         } | ||||
|  | ||||
|         doc = Document.objects.get(id=pk) | ||||
|  | ||||
|         if doc.storage_type == Document.STORAGE_TYPE_UNENCRYPTED: | ||||
|             file_handle = doc.source_file | ||||
|         else: | ||||
|             file_handle = GnuPG.decrypted(doc.source_file) | ||||
|  | ||||
|         response = HttpResponse(file_handle, content_type=content_types[doc.file_type]) | ||||
|         response["Content-Disposition"] = '{}; filename="{}"'.format( | ||||
|             disposition, doc.file_name) | ||||
|         return response | ||||
|  | ||||
|     @action(methods=['post'], detail=False) | ||||
|     def post_document(self, request, pk=None): | ||||
|         #TODO: implement document upload | ||||
|         return Response("not implemented yet", status=500) | ||||
|  | ||||
|     @action(methods=['get'], detail=True) | ||||
|     def preview(self, request, pk=None): | ||||
|         response = self.file_response(pk, "inline") | ||||
|         return response | ||||
|  | ||||
|     @action(methods=['get'], detail=True) | ||||
|     @cache_control(public=False, max_age=315360000) | ||||
|     def thumb(self, request, pk=None): | ||||
|         return HttpResponse(Document.objects.get(id=pk).thumbnail_file, content_type='image/png') | ||||
|  | ||||
|     @action(methods=['get'], detail=True) | ||||
|     def download(self, request, pk=None): | ||||
|         return self.file_response(pk, "attachment") | ||||
|  | ||||
|  | ||||
| class LogViewSet(ReadOnlyModelViewSet): | ||||
| @@ -166,3 +148,17 @@ class LogViewSet(ReadOnlyModelViewSet): | ||||
|     permission_classes = (IsAuthenticated,) | ||||
|     filter_backends = (DjangoFilterBackend, OrderingFilter) | ||||
|     ordering_fields = ("time",) | ||||
|  | ||||
|  | ||||
| class SearchView(APIView): | ||||
|     ix = index.open_index() | ||||
|     def get(self, request, format=None): | ||||
|         if 'query' in request.query_params: | ||||
|             query = request.query_params['query'] | ||||
|             query_results = index.query_index(self.ix, query) | ||||
|             for r in query_results: | ||||
|                 r['document'] = DocumentSerializer(Document.objects.get(id=r['id'])).data | ||||
|  | ||||
|             return Response(query_results) | ||||
|         else: | ||||
|             return Response([]) | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Jonas Winkler
					Jonas Winkler