mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-04-02 13:45:10 -05:00
Merge pull request #278 from stumpylog/pre-commit-python-changes
Python Cleanup from pre-commit
This commit is contained in:
commit
168ce2111d
@ -62,6 +62,7 @@ repos:
|
||||
exclude: "(migrations)|(paperless/settings.py)|(.*\\.tox)|(.*/tests/.*)"
|
||||
args:
|
||||
- "--max-line-length=88"
|
||||
- "--ignore=E203,W503"
|
||||
- repo: https://github.com/psf/black
|
||||
rev: 22.1.0
|
||||
hooks:
|
||||
|
@ -1,2 +1,5 @@
|
||||
# this is here so that django finds the checks.
|
||||
from .checks import *
|
||||
from .checks import changed_password_check
|
||||
from .checks import parser_check
|
||||
|
||||
__all__ = ["changed_password_check", "parser_check"]
|
||||
|
@ -1,13 +1,11 @@
|
||||
from django.contrib import admin
|
||||
|
||||
from .models import (
|
||||
Correspondent,
|
||||
Document,
|
||||
DocumentType,
|
||||
Tag,
|
||||
SavedView,
|
||||
SavedViewFilterRule,
|
||||
)
|
||||
from .models import Correspondent
|
||||
from .models import Document
|
||||
from .models import DocumentType
|
||||
from .models import SavedView
|
||||
from .models import SavedViewFilterRule
|
||||
from .models import Tag
|
||||
|
||||
|
||||
class CorrespondentAdmin(admin.ModelAdmin):
|
||||
|
@ -1,5 +1,4 @@
|
||||
from django.apps import AppConfig
|
||||
|
||||
from django.utils.translation import gettext_lazy as _
|
||||
|
||||
|
||||
|
@ -8,7 +8,10 @@ class BulkArchiveStrategy:
|
||||
self.zipf = zipf
|
||||
|
||||
def make_unique_filename(
|
||||
self, doc: Document, archive: bool = False, folder: str = ""
|
||||
self,
|
||||
doc: Document,
|
||||
archive: bool = False,
|
||||
folder: str = "",
|
||||
):
|
||||
counter = 0
|
||||
while True:
|
||||
@ -34,7 +37,8 @@ class ArchiveOnlyStrategy(BulkArchiveStrategy):
|
||||
def add_document(self, doc: Document):
|
||||
if doc.has_archive_version:
|
||||
self.zipf.write(
|
||||
doc.archive_path, self.make_unique_filename(doc, archive=True)
|
||||
doc.archive_path,
|
||||
self.make_unique_filename(doc, archive=True),
|
||||
)
|
||||
else:
|
||||
self.zipf.write(doc.source_path, self.make_unique_filename(doc))
|
||||
@ -49,5 +53,6 @@ class OriginalAndArchiveStrategy(BulkArchiveStrategy):
|
||||
)
|
||||
|
||||
self.zipf.write(
|
||||
doc.source_path, self.make_unique_filename(doc, folder="originals/")
|
||||
doc.source_path,
|
||||
self.make_unique_filename(doc, folder="originals/"),
|
||||
)
|
||||
|
@ -2,8 +2,9 @@ import itertools
|
||||
|
||||
from django.db.models import Q
|
||||
from django_q.tasks import async_task
|
||||
|
||||
from documents.models import Document, Correspondent, DocumentType
|
||||
from documents.models import Correspondent
|
||||
from documents.models import Document
|
||||
from documents.models import DocumentType
|
||||
|
||||
|
||||
def set_correspondent(doc_ids, correspondent):
|
||||
@ -40,7 +41,7 @@ def add_tag(doc_ids, tag):
|
||||
DocumentTagRelationship = Document.tags.through
|
||||
|
||||
DocumentTagRelationship.objects.bulk_create(
|
||||
[DocumentTagRelationship(document_id=doc, tag_id=tag) for doc in affected_docs]
|
||||
[DocumentTagRelationship(document_id=doc, tag_id=tag) for doc in affected_docs],
|
||||
)
|
||||
|
||||
async_task("documents.tasks.bulk_update_documents", document_ids=affected_docs)
|
||||
@ -56,7 +57,7 @@ def remove_tag(doc_ids, tag):
|
||||
DocumentTagRelationship = Document.tags.through
|
||||
|
||||
DocumentTagRelationship.objects.filter(
|
||||
Q(document_id__in=affected_docs) & Q(tag_id=tag)
|
||||
Q(document_id__in=affected_docs) & Q(tag_id=tag),
|
||||
).delete()
|
||||
|
||||
async_task("documents.tasks.bulk_update_documents", document_ids=affected_docs)
|
||||
|
@ -1,10 +1,11 @@
|
||||
import textwrap
|
||||
|
||||
from django.conf import settings
|
||||
from django.core.checks import Error, register
|
||||
from django.core.checks import Error
|
||||
from django.core.checks import register
|
||||
from django.core.exceptions import FieldError
|
||||
from django.db.utils import OperationalError, ProgrammingError
|
||||
|
||||
from django.db.utils import OperationalError
|
||||
from django.db.utils import ProgrammingError
|
||||
from documents.signals import document_consumer_declaration
|
||||
|
||||
|
||||
@ -16,7 +17,7 @@ def changed_password_check(app_configs, **kwargs):
|
||||
|
||||
try:
|
||||
encrypted_doc = Document.objects.filter(
|
||||
storage_type=Document.STORAGE_TYPE_GPG
|
||||
storage_type=Document.STORAGE_TYPE_GPG,
|
||||
).first()
|
||||
except (OperationalError, ProgrammingError, FieldError):
|
||||
return [] # No documents table yet
|
||||
@ -27,8 +28,8 @@ def changed_password_check(app_configs, **kwargs):
|
||||
return [
|
||||
Error(
|
||||
"The database contains encrypted documents but no password "
|
||||
"is set."
|
||||
)
|
||||
"is set.",
|
||||
),
|
||||
]
|
||||
|
||||
if not GnuPG.decrypted(encrypted_doc.source_file):
|
||||
@ -42,9 +43,9 @@ def changed_password_check(app_configs, **kwargs):
|
||||
If you intend to change your password, you must first export
|
||||
all of the old documents, start fresh with the new password
|
||||
and then re-import them."
|
||||
"""
|
||||
)
|
||||
)
|
||||
""",
|
||||
),
|
||||
),
|
||||
]
|
||||
|
||||
return []
|
||||
@ -61,8 +62,8 @@ def parser_check(app_configs, **kwargs):
|
||||
return [
|
||||
Error(
|
||||
"No parsers found. This is a bug. The consumer won't be "
|
||||
"able to consume any documents without parsers."
|
||||
)
|
||||
"able to consume any documents without parsers.",
|
||||
),
|
||||
]
|
||||
else:
|
||||
return []
|
||||
|
@ -6,8 +6,8 @@ import re
|
||||
import shutil
|
||||
|
||||
from django.conf import settings
|
||||
|
||||
from documents.models import Document, MatchingModel
|
||||
from documents.models import Document
|
||||
from documents.models import MatchingModel
|
||||
|
||||
|
||||
class IncompatibleClassifierVersionError(Exception):
|
||||
@ -30,8 +30,8 @@ def preprocess_content(content):
|
||||
def load_classifier():
|
||||
if not os.path.isfile(settings.MODEL_FILE):
|
||||
logger.debug(
|
||||
f"Document classification model does not exist (yet), not "
|
||||
f"performing automatic matching."
|
||||
"Document classification model does not exist (yet), not "
|
||||
"performing automatic matching.",
|
||||
)
|
||||
return None
|
||||
|
||||
@ -42,16 +42,16 @@ def load_classifier():
|
||||
except (ClassifierModelCorruptError, IncompatibleClassifierVersionError):
|
||||
# there's something wrong with the model file.
|
||||
logger.exception(
|
||||
f"Unrecoverable error while loading document "
|
||||
f"classification model, deleting model file."
|
||||
"Unrecoverable error while loading document "
|
||||
"classification model, deleting model file.",
|
||||
)
|
||||
os.unlink(settings.MODEL_FILE)
|
||||
classifier = None
|
||||
except OSError:
|
||||
logger.exception(f"IO error while loading document classification model")
|
||||
logger.exception("IO error while loading document classification model")
|
||||
classifier = None
|
||||
except Exception:
|
||||
logger.exception(f"Unknown error while loading document classification model")
|
||||
logger.exception("Unknown error while loading document classification model")
|
||||
classifier = None
|
||||
|
||||
return classifier
|
||||
@ -78,7 +78,7 @@ class DocumentClassifier(object):
|
||||
|
||||
if schema_version != self.FORMAT_VERSION:
|
||||
raise IncompatibleClassifierVersionError(
|
||||
"Cannor load classifier, incompatible versions."
|
||||
"Cannor load classifier, incompatible versions.",
|
||||
)
|
||||
else:
|
||||
try:
|
||||
@ -122,8 +122,8 @@ class DocumentClassifier(object):
|
||||
logger.debug("Gathering data from database...")
|
||||
m = hashlib.sha1()
|
||||
for doc in Document.objects.order_by("pk").exclude(
|
||||
tags__is_inbox_tag=True
|
||||
): # NOQA: E501
|
||||
tags__is_inbox_tag=True,
|
||||
):
|
||||
preprocessed_content = preprocess_content(doc.content)
|
||||
m.update(preprocessed_content.encode("utf-8"))
|
||||
data.append(preprocessed_content)
|
||||
@ -146,9 +146,9 @@ class DocumentClassifier(object):
|
||||
[
|
||||
tag.pk
|
||||
for tag in doc.tags.filter(
|
||||
matching_algorithm=MatchingModel.MATCH_AUTO
|
||||
matching_algorithm=MatchingModel.MATCH_AUTO,
|
||||
)
|
||||
]
|
||||
],
|
||||
)
|
||||
for tag in tags:
|
||||
m.update(tag.to_bytes(4, "little", signed=True))
|
||||
@ -177,8 +177,11 @@ class DocumentClassifier(object):
|
||||
logger.debug(
|
||||
"{} documents, {} tag(s), {} correspondent(s), "
|
||||
"{} document type(s).".format(
|
||||
len(data), num_tags, num_correspondents, num_document_types
|
||||
)
|
||||
len(data),
|
||||
num_tags,
|
||||
num_correspondents,
|
||||
num_document_types,
|
||||
),
|
||||
)
|
||||
|
||||
from sklearn.feature_extraction.text import CountVectorizer
|
||||
@ -188,7 +191,9 @@ class DocumentClassifier(object):
|
||||
# Step 2: vectorize data
|
||||
logger.debug("Vectorizing data...")
|
||||
self.data_vectorizer = CountVectorizer(
|
||||
analyzer="word", ngram_range=(1, 2), min_df=0.01
|
||||
analyzer="word",
|
||||
ngram_range=(1, 2),
|
||||
min_df=0.01,
|
||||
)
|
||||
data_vectorized = self.data_vectorizer.fit_transform(data)
|
||||
|
||||
@ -204,7 +209,7 @@ class DocumentClassifier(object):
|
||||
]
|
||||
self.tags_binarizer = LabelBinarizer()
|
||||
labels_tags_vectorized = self.tags_binarizer.fit_transform(
|
||||
labels_tags
|
||||
labels_tags,
|
||||
).ravel()
|
||||
else:
|
||||
self.tags_binarizer = MultiLabelBinarizer()
|
||||
@ -223,7 +228,8 @@ class DocumentClassifier(object):
|
||||
else:
|
||||
self.correspondent_classifier = None
|
||||
logger.debug(
|
||||
"There are no correspondents. Not training correspondent " "classifier."
|
||||
"There are no correspondents. Not training correspondent "
|
||||
"classifier.",
|
||||
)
|
||||
|
||||
if num_document_types > 0:
|
||||
@ -233,7 +239,8 @@ class DocumentClassifier(object):
|
||||
else:
|
||||
self.document_type_classifier = None
|
||||
logger.debug(
|
||||
"There are no document types. Not training document type " "classifier."
|
||||
"There are no document types. Not training document type "
|
||||
"classifier.",
|
||||
)
|
||||
|
||||
self.data_hash = new_data_hash
|
||||
|
@ -15,11 +15,19 @@ from filelock import FileLock
|
||||
from rest_framework.reverse import reverse
|
||||
|
||||
from .classifier import load_classifier
|
||||
from .file_handling import create_source_path_directory, generate_unique_filename
|
||||
from .file_handling import create_source_path_directory
|
||||
from .file_handling import generate_unique_filename
|
||||
from .loggers import LoggingMixin
|
||||
from .models import Document, FileInfo, Correspondent, DocumentType, Tag
|
||||
from .parsers import ParseError, get_parser_class_for_mime_type, parse_date
|
||||
from .signals import document_consumption_finished, document_consumption_started
|
||||
from .models import Correspondent
|
||||
from .models import Document
|
||||
from .models import DocumentType
|
||||
from .models import FileInfo
|
||||
from .models import Tag
|
||||
from .parsers import get_parser_class_for_mime_type
|
||||
from .parsers import parse_date
|
||||
from .parsers import ParseError
|
||||
from .signals import document_consumption_finished
|
||||
from .signals import document_consumption_started
|
||||
|
||||
|
||||
class ConsumerError(Exception):
|
||||
@ -46,12 +54,15 @@ class Consumer(LoggingMixin):
|
||||
logging_name = "paperless.consumer"
|
||||
|
||||
def _send_progress(
|
||||
self, current_progress, max_progress, status, message=None, document_id=None
|
||||
self,
|
||||
current_progress,
|
||||
max_progress,
|
||||
status,
|
||||
message=None,
|
||||
document_id=None,
|
||||
):
|
||||
payload = {
|
||||
"filename": os.path.basename(self.filename)
|
||||
if self.filename
|
||||
else None, # NOQA: E501
|
||||
"filename": os.path.basename(self.filename) if self.filename else None,
|
||||
"task_id": self.task_id,
|
||||
"current_progress": current_progress,
|
||||
"max_progress": max_progress,
|
||||
@ -60,7 +71,8 @@ class Consumer(LoggingMixin):
|
||||
"document_id": document_id,
|
||||
}
|
||||
async_to_sync(self.channel_layer.group_send)(
|
||||
"status_updates", {"type": "status_update", "data": payload}
|
||||
"status_updates",
|
||||
{"type": "status_update", "data": payload},
|
||||
)
|
||||
|
||||
def _fail(self, message, log_message=None, exc_info=None):
|
||||
@ -83,15 +95,16 @@ class Consumer(LoggingMixin):
|
||||
def pre_check_file_exists(self):
|
||||
if not os.path.isfile(self.path):
|
||||
self._fail(
|
||||
MESSAGE_FILE_NOT_FOUND, f"Cannot consume {self.path}: File not found."
|
||||
MESSAGE_FILE_NOT_FOUND,
|
||||
f"Cannot consume {self.path}: File not found.",
|
||||
)
|
||||
|
||||
def pre_check_duplicate(self):
|
||||
with open(self.path, "rb") as f:
|
||||
checksum = hashlib.md5(f.read()).hexdigest()
|
||||
if Document.objects.filter(
|
||||
Q(checksum=checksum) | Q(archive_checksum=checksum)
|
||||
).exists(): # NOQA: E501
|
||||
Q(checksum=checksum) | Q(archive_checksum=checksum),
|
||||
).exists():
|
||||
if settings.CONSUMER_DELETE_DUPLICATES:
|
||||
os.unlink(self.path)
|
||||
self._fail(
|
||||
@ -139,7 +152,8 @@ class Consumer(LoggingMixin):
|
||||
)
|
||||
|
||||
self.log(
|
||||
"info", f"Executing post-consume script {settings.POST_CONSUME_SCRIPT}"
|
||||
"info",
|
||||
f"Executing post-consume script {settings.POST_CONSUME_SCRIPT}",
|
||||
)
|
||||
|
||||
try:
|
||||
@ -154,7 +168,7 @@ class Consumer(LoggingMixin):
|
||||
reverse("document-thumb", kwargs={"pk": document.pk}),
|
||||
str(document.correspondent),
|
||||
str(",".join(document.tags.all().values_list("name", flat=True))),
|
||||
)
|
||||
),
|
||||
).wait()
|
||||
except Exception as e:
|
||||
self._fail(
|
||||
@ -213,7 +227,9 @@ class Consumer(LoggingMixin):
|
||||
# Notify all listeners that we're going to do some work.
|
||||
|
||||
document_consumption_started.send(
|
||||
sender=self.__class__, filename=self.path, logging_group=self.logging_group
|
||||
sender=self.__class__,
|
||||
filename=self.path,
|
||||
logging_group=self.logging_group,
|
||||
)
|
||||
|
||||
self.run_pre_consume_script()
|
||||
@ -247,7 +263,9 @@ class Consumer(LoggingMixin):
|
||||
self.log("debug", f"Generating thumbnail for {self.filename}...")
|
||||
self._send_progress(70, 100, "WORKING", MESSAGE_GENERATING_THUMBNAIL)
|
||||
thumbnail = document_parser.get_optimised_thumbnail(
|
||||
self.path, mime_type, self.filename
|
||||
self.path,
|
||||
mime_type,
|
||||
self.filename,
|
||||
)
|
||||
|
||||
text = document_parser.get_text()
|
||||
@ -301,21 +319,26 @@ class Consumer(LoggingMixin):
|
||||
self._write(document.storage_type, self.path, document.source_path)
|
||||
|
||||
self._write(
|
||||
document.storage_type, thumbnail, document.thumbnail_path
|
||||
document.storage_type,
|
||||
thumbnail,
|
||||
document.thumbnail_path,
|
||||
)
|
||||
|
||||
if archive_path and os.path.isfile(archive_path):
|
||||
document.archive_filename = generate_unique_filename(
|
||||
document, archive_filename=True
|
||||
document,
|
||||
archive_filename=True,
|
||||
)
|
||||
create_source_path_directory(document.archive_path)
|
||||
self._write(
|
||||
document.storage_type, archive_path, document.archive_path
|
||||
document.storage_type,
|
||||
archive_path,
|
||||
document.archive_path,
|
||||
)
|
||||
|
||||
with open(archive_path, "rb") as f:
|
||||
document.archive_checksum = hashlib.md5(
|
||||
f.read()
|
||||
f.read(),
|
||||
).hexdigest()
|
||||
|
||||
# Don't save with the lock active. Saving will cause the file
|
||||
@ -328,7 +351,8 @@ class Consumer(LoggingMixin):
|
||||
|
||||
# https://github.com/jonaswinkler/paperless-ng/discussions/1037
|
||||
shadow_file = os.path.join(
|
||||
os.path.dirname(self.path), "._" + os.path.basename(self.path)
|
||||
os.path.dirname(self.path),
|
||||
"._" + os.path.basename(self.path),
|
||||
)
|
||||
|
||||
if os.path.isfile(shadow_file):
|
||||
@ -390,12 +414,12 @@ class Consumer(LoggingMixin):
|
||||
def apply_overrides(self, document):
|
||||
if self.override_correspondent_id:
|
||||
document.correspondent = Correspondent.objects.get(
|
||||
pk=self.override_correspondent_id
|
||||
pk=self.override_correspondent_id,
|
||||
)
|
||||
|
||||
if self.override_document_type_id:
|
||||
document.document_type = DocumentType.objects.get(
|
||||
pk=self.override_document_type_id
|
||||
pk=self.override_document_type_id,
|
||||
)
|
||||
|
||||
if self.override_tag_ids:
|
||||
|
@ -103,15 +103,17 @@ def generate_unique_filename(doc, archive_filename=False):
|
||||
if archive_filename and doc.filename:
|
||||
new_filename = os.path.splitext(doc.filename)[0] + ".pdf"
|
||||
if new_filename == old_filename or not os.path.exists(
|
||||
os.path.join(root, new_filename)
|
||||
): # NOQA: E501
|
||||
os.path.join(root, new_filename),
|
||||
):
|
||||
return new_filename
|
||||
|
||||
counter = 0
|
||||
|
||||
while True:
|
||||
new_filename = generate_filename(
|
||||
doc, counter, archive_filename=archive_filename
|
||||
doc,
|
||||
counter,
|
||||
archive_filename=archive_filename,
|
||||
)
|
||||
if new_filename == old_filename:
|
||||
# still the same as before.
|
||||
@ -137,14 +139,16 @@ def generate_filename(doc, counter=0, append_gpg=True, archive_filename=False):
|
||||
|
||||
if doc.correspondent:
|
||||
correspondent = pathvalidate.sanitize_filename(
|
||||
doc.correspondent.name, replacement_text="-"
|
||||
doc.correspondent.name,
|
||||
replacement_text="-",
|
||||
)
|
||||
else:
|
||||
correspondent = "none"
|
||||
|
||||
if doc.document_type:
|
||||
document_type = pathvalidate.sanitize_filename(
|
||||
doc.document_type.name, replacement_text="-"
|
||||
doc.document_type.name,
|
||||
replacement_text="-",
|
||||
)
|
||||
else:
|
||||
document_type = "none"
|
||||
@ -160,9 +164,7 @@ def generate_filename(doc, counter=0, append_gpg=True, archive_filename=False):
|
||||
document_type=document_type,
|
||||
created=datetime.date.isoformat(doc.created),
|
||||
created_year=doc.created.year if doc.created else "none",
|
||||
created_month=f"{doc.created.month:02}"
|
||||
if doc.created
|
||||
else "none", # NOQA: E501
|
||||
created_month=f"{doc.created.month:02}" if doc.created else "none",
|
||||
created_day=f"{doc.created.day:02}" if doc.created else "none",
|
||||
added=datetime.date.isoformat(doc.added),
|
||||
added_year=doc.added.year if doc.added else "none",
|
||||
@ -178,7 +180,7 @@ def generate_filename(doc, counter=0, append_gpg=True, archive_filename=False):
|
||||
except (ValueError, KeyError, IndexError):
|
||||
logger.warning(
|
||||
f"Invalid PAPERLESS_FILENAME_FORMAT: "
|
||||
f"{settings.PAPERLESS_FILENAME_FORMAT}, falling back to default"
|
||||
f"{settings.PAPERLESS_FILENAME_FORMAT}, falling back to default",
|
||||
)
|
||||
|
||||
counter_str = f"_{counter:02}" if counter else ""
|
||||
|
@ -1,7 +1,13 @@
|
||||
from django.db.models import Q
|
||||
from django_filters.rest_framework import BooleanFilter, FilterSet, Filter
|
||||
from django_filters.rest_framework import BooleanFilter
|
||||
from django_filters.rest_framework import Filter
|
||||
from django_filters.rest_framework import FilterSet
|
||||
|
||||
from .models import Correspondent, Document, Tag, DocumentType, Log
|
||||
from .models import Correspondent
|
||||
from .models import Document
|
||||
from .models import DocumentType
|
||||
from .models import Log
|
||||
from .models import Tag
|
||||
|
||||
CHAR_KWARGS = ["istartswith", "iendswith", "icontains", "iexact"]
|
||||
ID_KWARGS = ["in", "exact"]
|
||||
@ -75,7 +81,10 @@ class TitleContentFilter(Filter):
|
||||
class DocumentFilterSet(FilterSet):
|
||||
|
||||
is_tagged = BooleanFilter(
|
||||
label="Is tagged", field_name="tags", lookup_expr="isnull", exclude=True
|
||||
label="Is tagged",
|
||||
field_name="tags",
|
||||
lookup_expr="isnull",
|
||||
exclude=True,
|
||||
)
|
||||
|
||||
tags__id__all = TagsFilter()
|
||||
|
@ -1,21 +1,30 @@
|
||||
import logging
|
||||
import math
|
||||
import os
|
||||
from contextlib import contextmanager
|
||||
|
||||
import math
|
||||
from dateutil.parser import isoparse
|
||||
from django.conf import settings
|
||||
from whoosh import highlight, classify, query
|
||||
from whoosh.fields import Schema, TEXT, NUMERIC, KEYWORD, DATETIME, BOOLEAN
|
||||
from documents.models import Document
|
||||
from whoosh import classify
|
||||
from whoosh import highlight
|
||||
from whoosh import query
|
||||
from whoosh.fields import BOOLEAN
|
||||
from whoosh.fields import DATETIME
|
||||
from whoosh.fields import KEYWORD
|
||||
from whoosh.fields import NUMERIC
|
||||
from whoosh.fields import Schema
|
||||
from whoosh.fields import TEXT
|
||||
from whoosh.highlight import HtmlFormatter
|
||||
from whoosh.index import create_in, exists_in, open_dir
|
||||
from whoosh.index import create_in
|
||||
from whoosh.index import exists_in
|
||||
from whoosh.index import open_dir
|
||||
from whoosh.qparser import MultifieldParser
|
||||
from whoosh.qparser.dateparse import DateParserPlugin
|
||||
from whoosh.searching import ResultsPage, Searcher
|
||||
from whoosh.searching import ResultsPage
|
||||
from whoosh.searching import Searcher
|
||||
from whoosh.writing import AsyncWriter
|
||||
|
||||
from documents.models import Document
|
||||
|
||||
logger = logging.getLogger("paperless.index")
|
||||
|
||||
|
||||
@ -45,7 +54,7 @@ def open_index(recreate=False):
|
||||
if exists_in(settings.INDEX_DIR) and not recreate:
|
||||
return open_dir(settings.INDEX_DIR, schema=get_schema())
|
||||
except Exception:
|
||||
logger.exception(f"Error while opening the index, recreating.")
|
||||
logger.exception("Error while opening the index, recreating.")
|
||||
|
||||
if not os.path.isdir(settings.INDEX_DIR):
|
||||
os.makedirs(settings.INDEX_DIR, exist_ok=True)
|
||||
@ -138,11 +147,11 @@ class DelayedQuery:
|
||||
criterias.append(query.Term("has_type", v == "false"))
|
||||
elif k == "created__date__lt":
|
||||
criterias.append(
|
||||
query.DateRange("created", start=None, end=isoparse(v))
|
||||
query.DateRange("created", start=None, end=isoparse(v)),
|
||||
)
|
||||
elif k == "created__date__gt":
|
||||
criterias.append(
|
||||
query.DateRange("created", start=isoparse(v), end=None)
|
||||
query.DateRange("created", start=isoparse(v), end=None),
|
||||
)
|
||||
elif k == "added__date__gt":
|
||||
criterias.append(query.DateRange("added", start=isoparse(v), end=None))
|
||||
@ -220,7 +229,7 @@ class DelayedQuery:
|
||||
hit[1],
|
||||
),
|
||||
page.results.top_n,
|
||||
)
|
||||
),
|
||||
)
|
||||
|
||||
self.saved_results[item.start] = page
|
||||
@ -240,7 +249,7 @@ class DelayedFullTextQuery(DelayedQuery):
|
||||
|
||||
corrected = self.searcher.correct_query(q, q_str)
|
||||
if corrected.query != q:
|
||||
corrected_query = corrected.string
|
||||
corrected.query = corrected.string
|
||||
|
||||
return q, None
|
||||
|
||||
@ -252,10 +261,14 @@ class DelayedMoreLikeThisQuery(DelayedQuery):
|
||||
|
||||
docnum = self.searcher.document_number(id=more_like_doc_id)
|
||||
kts = self.searcher.key_terms_from_text(
|
||||
"content", content, numterms=20, model=classify.Bo1Model, normalize=False
|
||||
"content",
|
||||
content,
|
||||
numterms=20,
|
||||
model=classify.Bo1Model,
|
||||
normalize=False,
|
||||
)
|
||||
q = query.Or(
|
||||
[query.Term("content", word, boost=weight) for word, weight in kts]
|
||||
[query.Term("content", word, boost=weight) for word, weight in kts],
|
||||
)
|
||||
mask = {docnum}
|
||||
|
||||
@ -266,7 +279,9 @@ def autocomplete(ix, term, limit=10):
|
||||
with ix.reader() as reader:
|
||||
terms = []
|
||||
for (score, t) in reader.most_distinctive_terms(
|
||||
"content", number=limit, prefix=term.lower()
|
||||
"content",
|
||||
number=limit,
|
||||
prefix=term.lower(),
|
||||
):
|
||||
terms.append(t)
|
||||
return terms
|
||||
|
@ -1,8 +1,6 @@
|
||||
import logging
|
||||
import uuid
|
||||
|
||||
from django.conf import settings
|
||||
|
||||
|
||||
class LoggingMixin:
|
||||
|
||||
|
@ -1,8 +1,8 @@
|
||||
import os
|
||||
|
||||
from django.conf import settings
|
||||
from django.core.management.base import BaseCommand, CommandError
|
||||
|
||||
from django.core.management.base import BaseCommand
|
||||
from django.core.management.base import CommandError
|
||||
from documents.models import Document
|
||||
from paperless.db import GnuPG
|
||||
|
||||
@ -31,9 +31,9 @@ class Command(BaseCommand):
|
||||
"this unless you've got a recent backup\nWARNING: handy. It "
|
||||
"*should* work without a hitch, but be safe and backup your\n"
|
||||
"WARNING: stuff first.\n\nHit Ctrl+C to exit now, or Enter to "
|
||||
"continue.\n\n"
|
||||
"continue.\n\n",
|
||||
)
|
||||
__ = input()
|
||||
_ = input()
|
||||
except KeyboardInterrupt:
|
||||
return
|
||||
|
||||
@ -41,7 +41,7 @@ class Command(BaseCommand):
|
||||
if not passphrase:
|
||||
raise CommandError(
|
||||
"Passphrase not defined. Please set it with --passphrase or "
|
||||
"by declaring it in your environment or your config."
|
||||
"by declaring it in your environment or your config.",
|
||||
)
|
||||
|
||||
self.__gpg_to_unencrypted(passphrase)
|
||||
@ -50,7 +50,7 @@ class Command(BaseCommand):
|
||||
def __gpg_to_unencrypted(passphrase):
|
||||
|
||||
encrypted_files = Document.objects.filter(
|
||||
storage_type=Document.STORAGE_TYPE_GPG
|
||||
storage_type=Document.STORAGE_TYPE_GPG,
|
||||
)
|
||||
|
||||
for document in encrypted_files:
|
||||
@ -71,7 +71,7 @@ class Command(BaseCommand):
|
||||
if not ext == ".gpg":
|
||||
raise CommandError(
|
||||
f"Abort: encrypted file {document.source_path} does not "
|
||||
f"end with .gpg"
|
||||
f"end with .gpg",
|
||||
)
|
||||
|
||||
document.filename = os.path.splitext(document.filename)[0]
|
||||
@ -83,7 +83,8 @@ class Command(BaseCommand):
|
||||
f.write(raw_thumb)
|
||||
|
||||
Document.objects.filter(id=document.id).update(
|
||||
storage_type=document.storage_type, filename=document.filename
|
||||
storage_type=document.storage_type,
|
||||
filename=document.filename,
|
||||
)
|
||||
|
||||
for path in old_paths:
|
||||
|
@ -1,7 +1,6 @@
|
||||
import hashlib
|
||||
import multiprocessing
|
||||
|
||||
import logging
|
||||
import multiprocessing
|
||||
import os
|
||||
import shutil
|
||||
import uuid
|
||||
@ -11,12 +10,12 @@ from django import db
|
||||
from django.conf import settings
|
||||
from django.core.management.base import BaseCommand
|
||||
from django.db import transaction
|
||||
from filelock import FileLock
|
||||
from whoosh.writing import AsyncWriter
|
||||
|
||||
from documents.models import Document
|
||||
from filelock import FileLock
|
||||
|
||||
from ... import index
|
||||
from ...file_handling import create_source_path_directory, generate_unique_filename
|
||||
from ...file_handling import create_source_path_directory
|
||||
from ...file_handling import generate_unique_filename
|
||||
from ...parsers import get_parser_class_for_mime_type
|
||||
|
||||
|
||||
@ -33,7 +32,7 @@ def handle_document(document_id):
|
||||
if not parser_class:
|
||||
logger.error(
|
||||
f"No parser found for mime type {mime_type}, cannot "
|
||||
f"archive document {document} (ID: {document_id})"
|
||||
f"archive document {document} (ID: {document_id})",
|
||||
)
|
||||
return
|
||||
|
||||
@ -43,7 +42,9 @@ def handle_document(document_id):
|
||||
parser.parse(document.source_path, mime_type, document.get_public_filename())
|
||||
|
||||
thumbnail = parser.get_optimised_thumbnail(
|
||||
document.source_path, mime_type, document.get_public_filename()
|
||||
document.source_path,
|
||||
mime_type,
|
||||
document.get_public_filename(),
|
||||
)
|
||||
|
||||
if parser.get_archive_path():
|
||||
@ -55,7 +56,8 @@ def handle_document(document_id):
|
||||
# We also don't use save() since that triggers the filehandling
|
||||
# logic, and we don't want that yet (file not yet in place)
|
||||
document.archive_filename = generate_unique_filename(
|
||||
document, archive_filename=True
|
||||
document,
|
||||
archive_filename=True,
|
||||
)
|
||||
Document.objects.filter(pk=document.pk).update(
|
||||
archive_checksum=checksum,
|
||||
@ -70,9 +72,9 @@ def handle_document(document_id):
|
||||
with index.open_index_writer() as writer:
|
||||
index.update_document(writer, document)
|
||||
|
||||
except Exception as e:
|
||||
except Exception:
|
||||
logger.exception(
|
||||
f"Error while parsing document {document} " f"(ID: {document_id})"
|
||||
f"Error while parsing document {document} " f"(ID: {document_id})",
|
||||
)
|
||||
finally:
|
||||
parser.cleanup()
|
||||
@ -86,7 +88,8 @@ class Command(BaseCommand):
|
||||
back-tag all previously indexed documents with metadata created (or
|
||||
modified) after their initial import.
|
||||
""".replace(
|
||||
" ", ""
|
||||
" ",
|
||||
"",
|
||||
)
|
||||
|
||||
def add_arguments(self, parser):
|
||||
@ -129,7 +132,7 @@ class Command(BaseCommand):
|
||||
map(
|
||||
lambda doc: doc.id,
|
||||
filter(lambda d: overwrite or not d.has_archive_version, documents),
|
||||
)
|
||||
),
|
||||
)
|
||||
|
||||
# Note to future self: this prevents django from reusing database
|
||||
@ -146,7 +149,7 @@ class Command(BaseCommand):
|
||||
pool.imap_unordered(handle_document, document_ids),
|
||||
total=len(document_ids),
|
||||
disable=options["no_progress_bar"],
|
||||
)
|
||||
),
|
||||
)
|
||||
except KeyboardInterrupt:
|
||||
print("Aborting...")
|
||||
|
@ -1,17 +1,18 @@
|
||||
import logging
|
||||
import os
|
||||
from pathlib import Path, PurePath
|
||||
from pathlib import Path
|
||||
from pathlib import PurePath
|
||||
from threading import Thread
|
||||
from time import sleep
|
||||
|
||||
from django.conf import settings
|
||||
from django.core.management.base import BaseCommand, CommandError
|
||||
from django.core.management.base import BaseCommand
|
||||
from django.core.management.base import CommandError
|
||||
from django_q.tasks import async_task
|
||||
from watchdog.events import FileSystemEventHandler
|
||||
from watchdog.observers.polling import PollingObserver
|
||||
|
||||
from documents.models import Tag
|
||||
from documents.parsers import is_file_ext_supported
|
||||
from watchdog.events import FileSystemEventHandler
|
||||
from watchdog.observers.polling import PollingObserver
|
||||
|
||||
try:
|
||||
from inotifyrecursive import INotify, flags
|
||||
@ -29,7 +30,7 @@ def _tags_from_path(filepath):
|
||||
path_parts = Path(filepath).relative_to(settings.CONSUMPTION_DIR).parent.parts
|
||||
for part in path_parts:
|
||||
tag_ids.add(
|
||||
Tag.objects.get_or_create(name__iexact=part, defaults={"name": part})[0].pk
|
||||
Tag.objects.get_or_create(name__iexact=part, defaults={"name": part})[0].pk,
|
||||
)
|
||||
|
||||
return tag_ids
|
||||
@ -56,7 +57,7 @@ def _consume(filepath):
|
||||
try:
|
||||
if settings.CONSUMER_SUBDIRS_AS_TAGS:
|
||||
tag_ids = _tags_from_path(filepath)
|
||||
except Exception as e:
|
||||
except Exception:
|
||||
logger.exception("Error creating tags from path")
|
||||
|
||||
try:
|
||||
@ -67,7 +68,7 @@ def _consume(filepath):
|
||||
override_tag_ids=tag_ids if tag_ids else None,
|
||||
task_name=os.path.basename(filepath)[:100],
|
||||
)
|
||||
except Exception as e:
|
||||
except Exception:
|
||||
# Catch all so that the consumer won't crash.
|
||||
# This is also what the test case is listening for to check for
|
||||
# errors.
|
||||
@ -86,7 +87,7 @@ def _consume_wait_unmodified(file):
|
||||
new_mtime = os.stat(file).st_mtime
|
||||
except FileNotFoundError:
|
||||
logger.debug(
|
||||
f"File {file} moved while waiting for it to remain " f"unmodified."
|
||||
f"File {file} moved while waiting for it to remain " f"unmodified.",
|
||||
)
|
||||
return
|
||||
if new_mtime == mtime:
|
||||
|
@ -9,7 +9,8 @@ class Command(BaseCommand):
|
||||
Trains the classifier on your data and saves the resulting models to a
|
||||
file. The document consumer will then automatically use this new model.
|
||||
""".replace(
|
||||
" ", ""
|
||||
" ",
|
||||
"",
|
||||
)
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
|
@ -6,28 +6,28 @@ import time
|
||||
|
||||
import tqdm
|
||||
from django.conf import settings
|
||||
from django.contrib.auth.models import User, Group
|
||||
from django.contrib.auth.models import Group
|
||||
from django.contrib.auth.models import User
|
||||
from django.core import serializers
|
||||
from django.core.management.base import BaseCommand, CommandError
|
||||
from django.core.management.base import BaseCommand
|
||||
from django.core.management.base import CommandError
|
||||
from django.db import transaction
|
||||
from documents.models import Correspondent
|
||||
from documents.models import Document
|
||||
from documents.models import DocumentType
|
||||
from documents.models import SavedView
|
||||
from documents.models import SavedViewFilterRule
|
||||
from documents.models import Tag
|
||||
from documents.settings import EXPORTER_ARCHIVE_NAME
|
||||
from documents.settings import EXPORTER_FILE_NAME
|
||||
from documents.settings import EXPORTER_THUMBNAIL_NAME
|
||||
from filelock import FileLock
|
||||
|
||||
from documents.models import (
|
||||
Document,
|
||||
Correspondent,
|
||||
Tag,
|
||||
DocumentType,
|
||||
SavedView,
|
||||
SavedViewFilterRule,
|
||||
)
|
||||
from documents.settings import (
|
||||
EXPORTER_FILE_NAME,
|
||||
EXPORTER_THUMBNAIL_NAME,
|
||||
EXPORTER_ARCHIVE_NAME,
|
||||
)
|
||||
from paperless.db import GnuPG
|
||||
from paperless_mail.models import MailAccount, MailRule
|
||||
from ...file_handling import generate_filename, delete_empty_directories
|
||||
from paperless_mail.models import MailAccount
|
||||
from paperless_mail.models import MailRule
|
||||
|
||||
from ...file_handling import delete_empty_directories
|
||||
from ...file_handling import generate_filename
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
@ -37,7 +37,8 @@ class Command(BaseCommand):
|
||||
directory. And include a manifest file containing document data for
|
||||
easy import.
|
||||
""".replace(
|
||||
" ", ""
|
||||
" ",
|
||||
"",
|
||||
)
|
||||
|
||||
def add_arguments(self, parser):
|
||||
@ -107,20 +108,20 @@ class Command(BaseCommand):
|
||||
# 1. Take a snapshot of what files exist in the current export folder
|
||||
for root, dirs, files in os.walk(self.target):
|
||||
self.files_in_export_dir.extend(
|
||||
map(lambda f: os.path.abspath(os.path.join(root, f)), files)
|
||||
map(lambda f: os.path.abspath(os.path.join(root, f)), files),
|
||||
)
|
||||
|
||||
# 2. Create manifest, containing all correspondents, types, tags and
|
||||
# documents
|
||||
with transaction.atomic():
|
||||
manifest = json.loads(
|
||||
serializers.serialize("json", Correspondent.objects.all())
|
||||
serializers.serialize("json", Correspondent.objects.all()),
|
||||
)
|
||||
|
||||
manifest += json.loads(serializers.serialize("json", Tag.objects.all()))
|
||||
|
||||
manifest += json.loads(
|
||||
serializers.serialize("json", DocumentType.objects.all())
|
||||
serializers.serialize("json", DocumentType.objects.all()),
|
||||
)
|
||||
|
||||
documents = Document.objects.order_by("id")
|
||||
@ -129,19 +130,19 @@ class Command(BaseCommand):
|
||||
manifest += document_manifest
|
||||
|
||||
manifest += json.loads(
|
||||
serializers.serialize("json", MailAccount.objects.all())
|
||||
serializers.serialize("json", MailAccount.objects.all()),
|
||||
)
|
||||
|
||||
manifest += json.loads(
|
||||
serializers.serialize("json", MailRule.objects.all())
|
||||
serializers.serialize("json", MailRule.objects.all()),
|
||||
)
|
||||
|
||||
manifest += json.loads(
|
||||
serializers.serialize("json", SavedView.objects.all())
|
||||
serializers.serialize("json", SavedView.objects.all()),
|
||||
)
|
||||
|
||||
manifest += json.loads(
|
||||
serializers.serialize("json", SavedViewFilterRule.objects.all())
|
||||
serializers.serialize("json", SavedViewFilterRule.objects.all()),
|
||||
)
|
||||
|
||||
manifest += json.loads(serializers.serialize("json", Group.objects.all()))
|
||||
@ -155,9 +156,7 @@ class Command(BaseCommand):
|
||||
disable=progress_bar_disable,
|
||||
):
|
||||
# 3.1. store files unencrypted
|
||||
document_dict["fields"][
|
||||
"storage_type"
|
||||
] = Document.STORAGE_TYPE_UNENCRYPTED # NOQA: E501
|
||||
document_dict["fields"]["storage_type"] = Document.STORAGE_TYPE_UNENCRYPTED
|
||||
|
||||
document = document_map[document_dict["pk"]]
|
||||
|
||||
@ -166,7 +165,9 @@ class Command(BaseCommand):
|
||||
while True:
|
||||
if self.use_filename_format:
|
||||
base_name = generate_filename(
|
||||
document, counter=filename_counter, append_gpg=False
|
||||
document,
|
||||
counter=filename_counter,
|
||||
append_gpg=False,
|
||||
)
|
||||
else:
|
||||
base_name = document.get_public_filename(counter=filename_counter)
|
||||
@ -217,14 +218,18 @@ class Command(BaseCommand):
|
||||
os.utime(archive_target, times=(t, t))
|
||||
else:
|
||||
self.check_and_copy(
|
||||
document.source_path, document.checksum, original_target
|
||||
document.source_path,
|
||||
document.checksum,
|
||||
original_target,
|
||||
)
|
||||
|
||||
self.check_and_copy(document.thumbnail_path, None, thumbnail_target)
|
||||
|
||||
if archive_target:
|
||||
self.check_and_copy(
|
||||
document.archive_path, document.archive_checksum, archive_target
|
||||
document.archive_path,
|
||||
document.archive_checksum,
|
||||
archive_target,
|
||||
)
|
||||
|
||||
# 4. write manifest to target forlder
|
||||
@ -243,7 +248,8 @@ class Command(BaseCommand):
|
||||
os.remove(f)
|
||||
|
||||
delete_empty_directories(
|
||||
os.path.abspath(os.path.dirname(f)), os.path.abspath(self.target)
|
||||
os.path.abspath(os.path.dirname(f)),
|
||||
os.path.abspath(self.target),
|
||||
)
|
||||
|
||||
def check_and_copy(self, source, source_checksum, target):
|
||||
|
@ -7,16 +7,16 @@ from contextlib import contextmanager
|
||||
import tqdm
|
||||
from django.conf import settings
|
||||
from django.core.management import call_command
|
||||
from django.core.management.base import BaseCommand, CommandError
|
||||
from django.db.models.signals import post_save, m2m_changed
|
||||
from django.core.management.base import BaseCommand
|
||||
from django.core.management.base import CommandError
|
||||
from django.db.models.signals import m2m_changed
|
||||
from django.db.models.signals import post_save
|
||||
from documents.models import Document
|
||||
from documents.settings import EXPORTER_ARCHIVE_NAME
|
||||
from documents.settings import EXPORTER_FILE_NAME
|
||||
from documents.settings import EXPORTER_THUMBNAIL_NAME
|
||||
from filelock import FileLock
|
||||
|
||||
from documents.models import Document
|
||||
from documents.settings import (
|
||||
EXPORTER_FILE_NAME,
|
||||
EXPORTER_THUMBNAIL_NAME,
|
||||
EXPORTER_ARCHIVE_NAME,
|
||||
)
|
||||
from ...file_handling import create_source_path_directory
|
||||
from ...signals.handlers import update_filename_and_move_files
|
||||
|
||||
@ -36,7 +36,8 @@ class Command(BaseCommand):
|
||||
Using a manifest.json file, load the data from there, and import the
|
||||
documents it refers to.
|
||||
""".replace(
|
||||
" ", ""
|
||||
" ",
|
||||
"",
|
||||
)
|
||||
|
||||
def add_arguments(self, parser):
|
||||
@ -73,7 +74,9 @@ class Command(BaseCommand):
|
||||
|
||||
self._check_manifest()
|
||||
with disable_signal(
|
||||
post_save, receiver=update_filename_and_move_files, sender=Document
|
||||
post_save,
|
||||
receiver=update_filename_and_move_files,
|
||||
sender=Document,
|
||||
):
|
||||
with disable_signal(
|
||||
m2m_changed,
|
||||
@ -92,7 +95,7 @@ class Command(BaseCommand):
|
||||
def _check_manifest_exists(path):
|
||||
if not os.path.exists(path):
|
||||
raise CommandError(
|
||||
"That directory doesn't appear to contain a manifest.json " "file."
|
||||
"That directory doesn't appear to contain a manifest.json " "file.",
|
||||
)
|
||||
|
||||
def _check_manifest(self):
|
||||
@ -105,14 +108,14 @@ class Command(BaseCommand):
|
||||
if EXPORTER_FILE_NAME not in record:
|
||||
raise CommandError(
|
||||
"The manifest file contains a record which does not "
|
||||
"refer to an actual document file."
|
||||
"refer to an actual document file.",
|
||||
)
|
||||
|
||||
doc_file = record[EXPORTER_FILE_NAME]
|
||||
if not os.path.exists(os.path.join(self.source, doc_file)):
|
||||
raise CommandError(
|
||||
'The manifest file refers to "{}" which does not '
|
||||
"appear to be in the source directory.".format(doc_file)
|
||||
"appear to be in the source directory.".format(doc_file),
|
||||
)
|
||||
|
||||
if EXPORTER_ARCHIVE_NAME in record:
|
||||
@ -120,7 +123,7 @@ class Command(BaseCommand):
|
||||
if not os.path.exists(os.path.join(self.source, archive_file)):
|
||||
raise CommandError(
|
||||
f"The manifest file refers to {archive_file} which "
|
||||
f"does not appear to be in the source directory."
|
||||
f"does not appear to be in the source directory.",
|
||||
)
|
||||
|
||||
def _import_files_from_manifest(self, progress_bar_disable):
|
||||
@ -132,7 +135,7 @@ class Command(BaseCommand):
|
||||
print("Copy files into paperless...")
|
||||
|
||||
manifest_documents = list(
|
||||
filter(lambda r: r["model"] == "documents.document", self.manifest)
|
||||
filter(lambda r: r["model"] == "documents.document", self.manifest),
|
||||
)
|
||||
|
||||
for record in tqdm.tqdm(manifest_documents, disable=progress_bar_disable):
|
||||
|
@ -1,7 +1,7 @@
|
||||
from django.core.management import BaseCommand
|
||||
from django.db import transaction
|
||||
|
||||
from documents.tasks import index_reindex, index_optimize
|
||||
from documents.tasks import index_optimize
|
||||
from documents.tasks import index_reindex
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
|
@ -3,7 +3,6 @@ import logging
|
||||
import tqdm
|
||||
from django.core.management.base import BaseCommand
|
||||
from django.db.models.signals import post_save
|
||||
|
||||
from documents.models import Document
|
||||
|
||||
|
||||
@ -12,7 +11,8 @@ class Command(BaseCommand):
|
||||
help = """
|
||||
This will rename all documents to match the latest filename format.
|
||||
""".replace(
|
||||
" ", ""
|
||||
" ",
|
||||
"",
|
||||
)
|
||||
|
||||
def add_arguments(self, parser):
|
||||
@ -28,6 +28,7 @@ class Command(BaseCommand):
|
||||
logging.getLogger().handlers[0].level = logging.ERROR
|
||||
|
||||
for document in tqdm.tqdm(
|
||||
Document.objects.all(), disable=options["no_progress_bar"]
|
||||
Document.objects.all(),
|
||||
disable=options["no_progress_bar"],
|
||||
):
|
||||
post_save.send(Document, instance=document)
|
||||
|
@ -2,10 +2,12 @@ import logging
|
||||
|
||||
import tqdm
|
||||
from django.core.management.base import BaseCommand
|
||||
|
||||
from documents.classifier import load_classifier
|
||||
from documents.models import Document
|
||||
from ...signals.handlers import set_correspondent, set_document_type, set_tags
|
||||
|
||||
from ...signals.handlers import set_correspondent
|
||||
from ...signals.handlers import set_document_type
|
||||
from ...signals.handlers import set_tags
|
||||
|
||||
|
||||
logger = logging.getLogger("paperless.management.retagger")
|
||||
@ -19,7 +21,8 @@ class Command(BaseCommand):
|
||||
back-tag all previously indexed documents with metadata created (or
|
||||
modified) after their initial import.
|
||||
""".replace(
|
||||
" ", ""
|
||||
" ",
|
||||
"",
|
||||
)
|
||||
|
||||
def add_arguments(self, parser):
|
||||
@ -57,7 +60,8 @@ class Command(BaseCommand):
|
||||
help="Return the suggestion, don't change anything.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--base-url", help="The base URL to use to build the link to the documents."
|
||||
"--base-url",
|
||||
help="The base URL to use to build the link to the documents.",
|
||||
)
|
||||
|
||||
def handle(self, *args, **options):
|
||||
|
@ -7,7 +7,8 @@ class Command(BaseCommand):
|
||||
help = """
|
||||
This command checks your document archive for issues.
|
||||
""".replace(
|
||||
" ", ""
|
||||
" ",
|
||||
"",
|
||||
)
|
||||
|
||||
def add_arguments(self, parser):
|
||||
|
@ -5,8 +5,8 @@ import shutil
|
||||
import tqdm
|
||||
from django import db
|
||||
from django.core.management.base import BaseCommand
|
||||
|
||||
from documents.models import Document
|
||||
|
||||
from ...parsers import get_parser_class_for_mime_type
|
||||
|
||||
|
||||
@ -22,7 +22,9 @@ def _process_document(doc_in):
|
||||
|
||||
try:
|
||||
thumb = parser.get_optimised_thumbnail(
|
||||
document.source_path, document.mime_type, document.get_public_filename()
|
||||
document.source_path,
|
||||
document.mime_type,
|
||||
document.get_public_filename(),
|
||||
)
|
||||
|
||||
shutil.move(thumb, document.thumbnail_path)
|
||||
@ -35,7 +37,8 @@ class Command(BaseCommand):
|
||||
help = """
|
||||
This will regenerate the thumbnails for all documents.
|
||||
""".replace(
|
||||
" ", ""
|
||||
" ",
|
||||
"",
|
||||
)
|
||||
|
||||
def add_arguments(self, parser):
|
||||
@ -76,5 +79,5 @@ class Command(BaseCommand):
|
||||
pool.imap_unordered(_process_document, ids),
|
||||
total=len(ids),
|
||||
disable=options["no_progress_bar"],
|
||||
)
|
||||
),
|
||||
)
|
||||
|
@ -2,7 +2,7 @@ import logging
|
||||
import os
|
||||
|
||||
from django.contrib.auth.models import User
|
||||
from django.core.management.base import BaseCommand, CommandError
|
||||
from django.core.management.base import BaseCommand
|
||||
|
||||
|
||||
logger = logging.getLogger("paperless.management.superuser")
|
||||
@ -13,7 +13,8 @@ class Command(BaseCommand):
|
||||
help = """
|
||||
Creates a Django superuser based on env variables.
|
||||
""".replace(
|
||||
" ", ""
|
||||
" ",
|
||||
"",
|
||||
)
|
||||
|
||||
def handle(self, *args, **options):
|
||||
@ -39,5 +40,5 @@ class Command(BaseCommand):
|
||||
self.stdout.write(f'Did not create superuser "{username}".')
|
||||
self.stdout.write(
|
||||
'Make sure you specified "PAPERLESS_ADMIN_PASSWORD" in your '
|
||||
'"docker-compose.env" file.'
|
||||
'"docker-compose.env" file.',
|
||||
)
|
||||
|
@ -1,8 +1,10 @@
|
||||
import logging
|
||||
import re
|
||||
|
||||
|
||||
from documents.models import MatchingModel, Correspondent, DocumentType, Tag
|
||||
from documents.models import Correspondent
|
||||
from documents.models import DocumentType
|
||||
from documents.models import MatchingModel
|
||||
from documents.models import Tag
|
||||
|
||||
|
||||
logger = logging.getLogger("paperless.matching")
|
||||
@ -12,7 +14,7 @@ def log_reason(matching_model, document, reason):
|
||||
class_name = type(matching_model).__name__
|
||||
logger.debug(
|
||||
f"{class_name} {matching_model.name} matched on document "
|
||||
f"{document} because {reason}"
|
||||
f"{document} because {reason}",
|
||||
)
|
||||
|
||||
|
||||
@ -25,7 +27,7 @@ def match_correspondents(document, classifier):
|
||||
correspondents = Correspondent.objects.all()
|
||||
|
||||
return list(
|
||||
filter(lambda o: matches(o, document) or o.pk == pred_id, correspondents)
|
||||
filter(lambda o: matches(o, document) or o.pk == pred_id, correspondents),
|
||||
)
|
||||
|
||||
|
||||
@ -38,7 +40,7 @@ def match_document_types(document, classifier):
|
||||
document_types = DocumentType.objects.all()
|
||||
|
||||
return list(
|
||||
filter(lambda o: matches(o, document) or o.pk == pred_id, document_types)
|
||||
filter(lambda o: matches(o, document) or o.pk == pred_id, document_types),
|
||||
)
|
||||
|
||||
|
||||
@ -51,7 +53,7 @@ def match_tags(document, classifier):
|
||||
tags = Tag.objects.all()
|
||||
|
||||
return list(
|
||||
filter(lambda o: matches(o, document) or o.pk in predicted_tag_ids, tags)
|
||||
filter(lambda o: matches(o, document) or o.pk in predicted_tag_ids, tags),
|
||||
)
|
||||
|
||||
|
||||
@ -92,7 +94,7 @@ def matches(matching_model, document):
|
||||
rf"\b{re.escape(matching_model.match)}\b",
|
||||
document_content,
|
||||
**search_kwargs,
|
||||
)
|
||||
),
|
||||
)
|
||||
if result:
|
||||
log_reason(
|
||||
@ -105,11 +107,12 @@ def matches(matching_model, document):
|
||||
elif matching_model.matching_algorithm == MatchingModel.MATCH_REGEX:
|
||||
try:
|
||||
match = re.search(
|
||||
re.compile(matching_model.match, **search_kwargs), document_content
|
||||
re.compile(matching_model.match, **search_kwargs),
|
||||
document_content,
|
||||
)
|
||||
except re.error:
|
||||
logger.error(
|
||||
f"Error while processing regular expression " f"{matching_model.match}"
|
||||
f"Error while processing regular expression " f"{matching_model.match}",
|
||||
)
|
||||
return False
|
||||
if match:
|
||||
|
@ -5,17 +5,14 @@ import os
|
||||
import re
|
||||
from collections import OrderedDict
|
||||
|
||||
import pathvalidate
|
||||
|
||||
import dateutil.parser
|
||||
import pathvalidate
|
||||
from django.conf import settings
|
||||
from django.contrib.auth.models import User
|
||||
from django.db import models
|
||||
from django.utils import timezone
|
||||
from django.utils.timezone import is_aware
|
||||
|
||||
from django.utils.translation import gettext_lazy as _
|
||||
|
||||
from documents.parsers import get_default_file_extension
|
||||
|
||||
|
||||
@ -42,7 +39,9 @@ class MatchingModel(models.Model):
|
||||
match = models.CharField(_("match"), max_length=256, blank=True)
|
||||
|
||||
matching_algorithm = models.PositiveIntegerField(
|
||||
_("matching algorithm"), choices=MATCHING_ALGORITHMS, default=MATCH_ANY
|
||||
_("matching algorithm"),
|
||||
choices=MATCHING_ALGORITHMS,
|
||||
default=MATCH_ANY,
|
||||
)
|
||||
|
||||
is_insensitive = models.BooleanField(_("is insensitive"), default=True)
|
||||
@ -71,7 +70,7 @@ class Tag(MatchingModel):
|
||||
default=False,
|
||||
help_text=_(
|
||||
"Marks this tag as an inbox tag: All newly consumed "
|
||||
"documents will be tagged with inbox tags."
|
||||
"documents will be tagged with inbox tags.",
|
||||
),
|
||||
)
|
||||
|
||||
@ -120,14 +119,17 @@ class Document(models.Model):
|
||||
blank=True,
|
||||
help_text=_(
|
||||
"The raw, text-only data of the document. This field is "
|
||||
"primarily used for searching."
|
||||
"primarily used for searching.",
|
||||
),
|
||||
)
|
||||
|
||||
mime_type = models.CharField(_("mime type"), max_length=256, editable=False)
|
||||
|
||||
tags = models.ManyToManyField(
|
||||
Tag, related_name="documents", blank=True, verbose_name=_("tags")
|
||||
Tag,
|
||||
related_name="documents",
|
||||
blank=True,
|
||||
verbose_name=_("tags"),
|
||||
)
|
||||
|
||||
checksum = models.CharField(
|
||||
@ -150,7 +152,10 @@ class Document(models.Model):
|
||||
created = models.DateTimeField(_("created"), default=timezone.now, db_index=True)
|
||||
|
||||
modified = models.DateTimeField(
|
||||
_("modified"), auto_now=True, editable=False, db_index=True
|
||||
_("modified"),
|
||||
auto_now=True,
|
||||
editable=False,
|
||||
db_index=True,
|
||||
)
|
||||
|
||||
storage_type = models.CharField(
|
||||
@ -162,7 +167,10 @@ class Document(models.Model):
|
||||
)
|
||||
|
||||
added = models.DateTimeField(
|
||||
_("added"), default=timezone.now, editable=False, db_index=True
|
||||
_("added"),
|
||||
default=timezone.now,
|
||||
editable=False,
|
||||
db_index=True,
|
||||
)
|
||||
|
||||
filename = models.FilePathField(
|
||||
@ -192,7 +200,7 @@ class Document(models.Model):
|
||||
unique=True,
|
||||
db_index=True,
|
||||
help_text=_(
|
||||
"The position of this document in your physical document " "archive."
|
||||
"The position of this document in your physical document " "archive.",
|
||||
),
|
||||
)
|
||||
|
||||
@ -289,7 +297,9 @@ class Log(models.Model):
|
||||
message = models.TextField(_("message"))
|
||||
|
||||
level = models.PositiveIntegerField(
|
||||
_("level"), choices=LEVELS, default=logging.INFO
|
||||
_("level"),
|
||||
choices=LEVELS,
|
||||
default=logging.INFO,
|
||||
)
|
||||
|
||||
created = models.DateTimeField(_("created"), auto_now_add=True)
|
||||
@ -321,7 +331,10 @@ class SavedView(models.Model):
|
||||
)
|
||||
|
||||
sort_field = models.CharField(
|
||||
_("sort field"), max_length=128, null=True, blank=True
|
||||
_("sort field"),
|
||||
max_length=128,
|
||||
null=True,
|
||||
blank=True,
|
||||
)
|
||||
sort_reverse = models.BooleanField(_("sort reverse"), default=False)
|
||||
|
||||
@ -383,11 +396,16 @@ class FileInfo:
|
||||
),
|
||||
),
|
||||
("title", re.compile(r"(?P<title>.*)$", flags=re.IGNORECASE)),
|
||||
]
|
||||
],
|
||||
)
|
||||
|
||||
def __init__(
|
||||
self, created=None, correspondent=None, title=None, tags=(), extension=None
|
||||
self,
|
||||
created=None,
|
||||
correspondent=None,
|
||||
title=None,
|
||||
tags=(),
|
||||
extension=None,
|
||||
):
|
||||
|
||||
self.created = created
|
||||
|
@ -9,6 +9,8 @@ import tempfile
|
||||
import magic
|
||||
from django.conf import settings
|
||||
from django.utils import timezone
|
||||
from documents.loggers import LoggingMixin
|
||||
from documents.signals import document_consumer_declaration
|
||||
|
||||
# This regular expression will try to find dates in the document at
|
||||
# hand and will match the following formats:
|
||||
@ -21,17 +23,15 @@ from django.utils import timezone
|
||||
# - XX. MONTH ZZZZ with XX being 1 or 2 and ZZZZ being 2 or 4 digits
|
||||
# - MONTH ZZZZ, with ZZZZ being 4 digits
|
||||
# - MONTH XX, ZZZZ with XX being 1 or 2 and ZZZZ being 4 digits
|
||||
from documents.loggers import LoggingMixin
|
||||
from documents.signals import document_consumer_declaration
|
||||
|
||||
# TODO: isnt there a date parsing library for this?
|
||||
|
||||
DATE_REGEX = re.compile(
|
||||
r"(\b|(?!=([_-])))([0-9]{1,2})[\.\/-]([0-9]{1,2})[\.\/-]([0-9]{4}|[0-9]{2})(\b|(?=([_-])))|" # NOQA: E501
|
||||
r"(\b|(?!=([_-])))([0-9]{4}|[0-9]{2})[\.\/-]([0-9]{1,2})[\.\/-]([0-9]{1,2})(\b|(?=([_-])))|" # NOQA: E501
|
||||
r"(\b|(?!=([_-])))([0-9]{1,2}[\. ]+[^ ]{3,9} ([0-9]{4}|[0-9]{2}))(\b|(?=([_-])))|" # NOQA: E501
|
||||
r"(\b|(?!=([_-])))([0-9]{1,2})[\.\/-]([0-9]{1,2})[\.\/-]([0-9]{4}|[0-9]{2})(\b|(?=([_-])))|" # noqa: E501
|
||||
r"(\b|(?!=([_-])))([0-9]{4}|[0-9]{2})[\.\/-]([0-9]{1,2})[\.\/-]([0-9]{1,2})(\b|(?=([_-])))|" # noqa: E501
|
||||
r"(\b|(?!=([_-])))([0-9]{1,2}[\. ]+[^ ]{3,9} ([0-9]{4}|[0-9]{2}))(\b|(?=([_-])))|" # noqa: E501
|
||||
r"(\b|(?!=([_-])))([^\W\d_]{3,9} [0-9]{1,2}, ([0-9]{4}))(\b|(?=([_-])))|"
|
||||
r"(\b|(?!=([_-])))([^\W\d_]{3,9} [0-9]{4})(\b|(?=([_-])))"
|
||||
r"(\b|(?!=([_-])))([^\W\d_]{3,9} [0-9]{4})(\b|(?=([_-])))",
|
||||
)
|
||||
|
||||
|
||||
|
@ -3,9 +3,8 @@ import logging
|
||||
import os
|
||||
|
||||
from django.conf import settings
|
||||
from tqdm import tqdm
|
||||
|
||||
from documents.models import Document
|
||||
from tqdm import tqdm
|
||||
|
||||
|
||||
class SanityCheckMessages:
|
||||
@ -88,19 +87,19 @@ def check_sanity(progress=False):
|
||||
if not checksum == doc.checksum:
|
||||
messages.error(
|
||||
f"Checksum mismatch of document {doc.pk}. "
|
||||
f"Stored: {doc.checksum}, actual: {checksum}."
|
||||
f"Stored: {doc.checksum}, actual: {checksum}.",
|
||||
)
|
||||
|
||||
# Check sanity of the archive file.
|
||||
if doc.archive_checksum and not doc.archive_filename:
|
||||
messages.error(
|
||||
f"Document {doc.pk} has an archive file checksum, but no "
|
||||
f"archive filename."
|
||||
f"archive filename.",
|
||||
)
|
||||
elif not doc.archive_checksum and doc.archive_filename:
|
||||
messages.error(
|
||||
f"Document {doc.pk} has an archive file, but its checksum is "
|
||||
f"missing."
|
||||
f"missing.",
|
||||
)
|
||||
elif doc.has_archive_version:
|
||||
if not os.path.isfile(doc.archive_path):
|
||||
@ -113,7 +112,7 @@ def check_sanity(progress=False):
|
||||
checksum = hashlib.md5(f.read()).hexdigest()
|
||||
except OSError as e:
|
||||
messages.error(
|
||||
f"Cannot read archive file of document {doc.pk}: {e}"
|
||||
f"Cannot read archive file of document {doc.pk}: {e}",
|
||||
)
|
||||
else:
|
||||
if not checksum == doc.archive_checksum:
|
||||
@ -121,7 +120,7 @@ def check_sanity(progress=False):
|
||||
f"Checksum mismatch of archived document "
|
||||
f"{doc.pk}. "
|
||||
f"Stored: {doc.archive_checksum}, "
|
||||
f"actual: {checksum}."
|
||||
f"actual: {checksum}.",
|
||||
)
|
||||
|
||||
# other document checks
|
||||
|
@ -1,25 +1,22 @@
|
||||
import math
|
||||
import re
|
||||
|
||||
import magic
|
||||
import math
|
||||
from django.utils.text import slugify
|
||||
from django.utils.translation import gettext as _
|
||||
from rest_framework import serializers
|
||||
from rest_framework.fields import SerializerMethodField
|
||||
|
||||
from . import bulk_edit
|
||||
from .models import (
|
||||
Correspondent,
|
||||
Tag,
|
||||
Document,
|
||||
DocumentType,
|
||||
SavedView,
|
||||
SavedViewFilterRule,
|
||||
MatchingModel,
|
||||
)
|
||||
from .models import Correspondent
|
||||
from .models import Document
|
||||
from .models import DocumentType
|
||||
from .models import MatchingModel
|
||||
from .models import SavedView
|
||||
from .models import SavedViewFilterRule
|
||||
from .models import Tag
|
||||
from .parsers import is_mime_type_supported
|
||||
|
||||
from django.utils.translation import gettext as _
|
||||
|
||||
|
||||
# https://www.django-rest-framework.org/api-guide/serializers/#example
|
||||
class DynamicFieldsModelSerializer(serializers.ModelSerializer):
|
||||
@ -56,12 +53,12 @@ class MatchingModelSerializer(serializers.ModelSerializer):
|
||||
if (
|
||||
"matching_algorithm" in self.initial_data
|
||||
and self.initial_data["matching_algorithm"] == MatchingModel.MATCH_REGEX
|
||||
): # NOQA: E501
|
||||
):
|
||||
try:
|
||||
re.compile(match)
|
||||
except Exception as e:
|
||||
except re.error as e:
|
||||
raise serializers.ValidationError(
|
||||
_("Invalid regular expression: %(error)s") % {"error": str(e)}
|
||||
_("Invalid regular expression: %(error)s") % {"error": str(e.msg)},
|
||||
)
|
||||
return match
|
||||
|
||||
@ -156,7 +153,7 @@ class TagSerializer(MatchingModelSerializer):
|
||||
luminance = math.sqrt(
|
||||
0.299 * math.pow(rgb[0], 2)
|
||||
+ 0.587 * math.pow(rgb[1], 2)
|
||||
+ 0.114 * math.pow(rgb[2], 2)
|
||||
+ 0.114 * math.pow(rgb[2], 2),
|
||||
)
|
||||
return "#ffffff" if luminance < 0.53 else "#000000"
|
||||
except ValueError:
|
||||
@ -298,7 +295,7 @@ class DocumentListSerializer(serializers.Serializer):
|
||||
count = Document.objects.filter(id__in=documents).count()
|
||||
if not count == len(documents):
|
||||
raise serializers.ValidationError(
|
||||
f"Some documents in {name} don't exist or were " f"specified twice."
|
||||
f"Some documents in {name} don't exist or were " f"specified twice.",
|
||||
)
|
||||
|
||||
def validate_documents(self, documents):
|
||||
@ -331,7 +328,7 @@ class BulkEditSerializer(DocumentListSerializer):
|
||||
count = Tag.objects.filter(id__in=tags).count()
|
||||
if not count == len(tags):
|
||||
raise serializers.ValidationError(
|
||||
f"Some tags in {name} don't exist or were specified twice."
|
||||
f"Some tags in {name} don't exist or were specified twice.",
|
||||
)
|
||||
|
||||
def validate_method(self, method):
|
||||
@ -456,7 +453,7 @@ class PostDocumentSerializer(serializers.Serializer):
|
||||
|
||||
if not is_mime_type_supported(mime_type):
|
||||
raise serializers.ValidationError(
|
||||
_("File type %(type)s not supported") % {"type": mime_type}
|
||||
_("File type %(type)s not supported") % {"type": mime_type},
|
||||
)
|
||||
|
||||
return document.name, document_data
|
||||
@ -483,11 +480,13 @@ class PostDocumentSerializer(serializers.Serializer):
|
||||
class BulkDownloadSerializer(DocumentListSerializer):
|
||||
|
||||
content = serializers.ChoiceField(
|
||||
choices=["archive", "originals", "both"], default="archive"
|
||||
choices=["archive", "originals", "both"],
|
||||
default="archive",
|
||||
)
|
||||
|
||||
compression = serializers.ChoiceField(
|
||||
choices=["none", "deflated", "bzip2", "lzma"], default="none"
|
||||
choices=["none", "deflated", "bzip2", "lzma"],
|
||||
default="none",
|
||||
)
|
||||
|
||||
def validate_compression(self, compression):
|
||||
|
@ -1,24 +1,26 @@
|
||||
import logging
|
||||
import os
|
||||
|
||||
from django.utils import termcolors
|
||||
from django.conf import settings
|
||||
from django.contrib.admin.models import ADDITION, LogEntry
|
||||
from django.contrib.admin.models import ADDITION
|
||||
from django.contrib.admin.models import LogEntry
|
||||
from django.contrib.auth.models import User
|
||||
from django.contrib.contenttypes.models import ContentType
|
||||
from django.db import models, DatabaseError
|
||||
from django.db import DatabaseError
|
||||
from django.db import models
|
||||
from django.db.models import Q
|
||||
from django.dispatch import receiver
|
||||
from django.utils import termcolors, timezone
|
||||
from django.utils import termcolors
|
||||
from django.utils import timezone
|
||||
from filelock import FileLock
|
||||
|
||||
from .. import matching
|
||||
from ..file_handling import (
|
||||
delete_empty_directories,
|
||||
create_source_path_directory,
|
||||
generate_unique_filename,
|
||||
)
|
||||
from ..models import Document, Tag, MatchingModel
|
||||
from ..file_handling import create_source_path_directory
|
||||
from ..file_handling import delete_empty_directories
|
||||
from ..file_handling import generate_unique_filename
|
||||
from ..models import Document
|
||||
from ..models import MatchingModel
|
||||
from ..models import Tag
|
||||
|
||||
|
||||
logger = logging.getLogger("paperless.handlers")
|
||||
@ -72,7 +74,7 @@ def set_correspondent(
|
||||
print(
|
||||
termcolors.colorize(str(document), fg="green")
|
||||
if color
|
||||
else str(document)
|
||||
else str(document),
|
||||
)
|
||||
print(f"{base_url}/documents/{document.pk}")
|
||||
else:
|
||||
@ -82,7 +84,7 @@ def set_correspondent(
|
||||
if color
|
||||
else str(document)
|
||||
)
|
||||
+ f" [{document.pk}]"
|
||||
+ f" [{document.pk}]",
|
||||
)
|
||||
print(f"Suggest correspondent {selected}")
|
||||
else:
|
||||
@ -139,7 +141,7 @@ def set_document_type(
|
||||
print(
|
||||
termcolors.colorize(str(document), fg="green")
|
||||
if color
|
||||
else str(document)
|
||||
else str(document),
|
||||
)
|
||||
print(f"{base_url}/documents/{document.pk}")
|
||||
else:
|
||||
@ -149,7 +151,7 @@ def set_document_type(
|
||||
if color
|
||||
else str(document)
|
||||
)
|
||||
+ f" [{document.pk}]"
|
||||
+ f" [{document.pk}]",
|
||||
)
|
||||
print(f"Suggest document type {selected}")
|
||||
else:
|
||||
@ -176,9 +178,9 @@ def set_tags(
|
||||
|
||||
if replace:
|
||||
Document.tags.through.objects.filter(document=document).exclude(
|
||||
Q(tag__is_inbox_tag=True)
|
||||
Q(tag__is_inbox_tag=True),
|
||||
).exclude(
|
||||
Q(tag__match="") & ~Q(tag__matching_algorithm=Tag.MATCH_AUTO)
|
||||
Q(tag__match="") & ~Q(tag__matching_algorithm=Tag.MATCH_AUTO),
|
||||
).delete()
|
||||
|
||||
current_tags = set(document.tags.all())
|
||||
@ -198,7 +200,7 @@ def set_tags(
|
||||
print(
|
||||
termcolors.colorize(str(document), fg="green")
|
||||
if color
|
||||
else str(document)
|
||||
else str(document),
|
||||
)
|
||||
print(f"{base_url}/documents/{document.pk}")
|
||||
else:
|
||||
@ -208,7 +210,7 @@ def set_tags(
|
||||
if color
|
||||
else str(document)
|
||||
)
|
||||
+ f" [{document.pk}]"
|
||||
+ f" [{document.pk}]",
|
||||
)
|
||||
if relevant_tags:
|
||||
print("Suggest tags: " + ", ".join([t.name for t in relevant_tags]))
|
||||
@ -254,7 +256,7 @@ def cleanup_document_deletion(sender, instance, using, **kwargs):
|
||||
except OSError as e:
|
||||
logger.error(
|
||||
f"Failed to move {instance.source_path} to trash at "
|
||||
f"{new_file_path}: {e}. Skipping cleanup!"
|
||||
f"{new_file_path}: {e}. Skipping cleanup!",
|
||||
)
|
||||
return
|
||||
|
||||
@ -270,16 +272,18 @@ def cleanup_document_deletion(sender, instance, using, **kwargs):
|
||||
except OSError as e:
|
||||
logger.warning(
|
||||
f"While deleting document {str(instance)}, the file "
|
||||
f"{filename} could not be deleted: {e}"
|
||||
f"{filename} could not be deleted: {e}",
|
||||
)
|
||||
|
||||
delete_empty_directories(
|
||||
os.path.dirname(instance.source_path), root=settings.ORIGINALS_DIR
|
||||
os.path.dirname(instance.source_path),
|
||||
root=settings.ORIGINALS_DIR,
|
||||
)
|
||||
|
||||
if instance.has_archive_version:
|
||||
delete_empty_directories(
|
||||
os.path.dirname(instance.archive_path), root=settings.ARCHIVE_DIR
|
||||
os.path.dirname(instance.archive_path),
|
||||
root=settings.ARCHIVE_DIR,
|
||||
)
|
||||
|
||||
|
||||
@ -297,7 +301,7 @@ def validate_move(instance, old_path, new_path):
|
||||
# Can't do anything if the new file already exists. Skip updating file.
|
||||
logger.warning(
|
||||
f"Document {str(instance)}: Cannot rename file "
|
||||
f"since target path {new_path} already exists."
|
||||
f"since target path {new_path} already exists.",
|
||||
)
|
||||
raise CannotMoveFilesException()
|
||||
|
||||
@ -331,12 +335,11 @@ def update_filename_and_move_files(sender, instance, **kwargs):
|
||||
if instance.has_archive_version:
|
||||
|
||||
instance.archive_filename = generate_unique_filename(
|
||||
instance, archive_filename=True
|
||||
instance,
|
||||
archive_filename=True,
|
||||
)
|
||||
|
||||
move_archive = (
|
||||
old_archive_filename != instance.archive_filename
|
||||
) # NOQA: E501
|
||||
move_archive = old_archive_filename != instance.archive_filename
|
||||
else:
|
||||
move_archive = False
|
||||
|
||||
@ -374,7 +377,7 @@ def update_filename_and_move_files(sender, instance, **kwargs):
|
||||
if move_archive and os.path.isfile(instance.archive_path):
|
||||
os.rename(instance.archive_path, old_archive_path)
|
||||
|
||||
except Exception as e:
|
||||
except Exception:
|
||||
# This is fine, since:
|
||||
# A: if we managed to move source from A to B, we will also
|
||||
# manage to move it from B to A. If not, we have a serious
|
||||
@ -393,14 +396,16 @@ def update_filename_and_move_files(sender, instance, **kwargs):
|
||||
# something has failed above.
|
||||
if not os.path.isfile(old_source_path):
|
||||
delete_empty_directories(
|
||||
os.path.dirname(old_source_path), root=settings.ORIGINALS_DIR
|
||||
os.path.dirname(old_source_path),
|
||||
root=settings.ORIGINALS_DIR,
|
||||
)
|
||||
|
||||
if instance.has_archive_version and not os.path.isfile(
|
||||
old_archive_path
|
||||
): # NOQA: E501
|
||||
old_archive_path,
|
||||
):
|
||||
delete_empty_directories(
|
||||
os.path.dirname(old_archive_path), root=settings.ARCHIVE_DIR
|
||||
os.path.dirname(old_archive_path),
|
||||
root=settings.ARCHIVE_DIR,
|
||||
)
|
||||
|
||||
|
||||
|
@ -3,13 +3,18 @@ import logging
|
||||
import tqdm
|
||||
from django.conf import settings
|
||||
from django.db.models.signals import post_save
|
||||
from whoosh.writing import AsyncWriter
|
||||
|
||||
from documents import index, sanity_checker
|
||||
from documents.classifier import DocumentClassifier, load_classifier
|
||||
from documents.consumer import Consumer, ConsumerError
|
||||
from documents.models import Document, Tag, DocumentType, Correspondent
|
||||
from documents import index
|
||||
from documents import sanity_checker
|
||||
from documents.classifier import DocumentClassifier
|
||||
from documents.classifier import load_classifier
|
||||
from documents.consumer import Consumer
|
||||
from documents.consumer import ConsumerError
|
||||
from documents.models import Correspondent
|
||||
from documents.models import Document
|
||||
from documents.models import DocumentType
|
||||
from documents.models import Tag
|
||||
from documents.sanity_checker import SanityCheckFailedException
|
||||
from whoosh.writing import AsyncWriter
|
||||
|
||||
logger = logging.getLogger("paperless.tasks")
|
||||
|
||||
@ -47,7 +52,7 @@ def train_classifier():
|
||||
try:
|
||||
if classifier.train():
|
||||
logger.info(
|
||||
"Saving updated classifier model to {}...".format(settings.MODEL_FILE)
|
||||
"Saving updated classifier model to {}...".format(settings.MODEL_FILE),
|
||||
)
|
||||
classifier.save()
|
||||
else:
|
||||
@ -82,7 +87,7 @@ def consume_file(
|
||||
else:
|
||||
raise ConsumerError(
|
||||
"Unknown error: Returned document was null, but "
|
||||
"no error message was given."
|
||||
"no error message was given.",
|
||||
)
|
||||
|
||||
|
||||
|
@ -1,7 +1,8 @@
|
||||
from factory import Faker
|
||||
from factory.django import DjangoModelFactory
|
||||
|
||||
from ..models import Document, Correspondent
|
||||
from ..models import Correspondent
|
||||
from ..models import Document
|
||||
|
||||
|
||||
class CorrespondentFactory(DjangoModelFactory):
|
||||
|
@ -3,7 +3,6 @@ from unittest import mock
|
||||
from django.contrib.admin.sites import AdminSite
|
||||
from django.test import TestCase
|
||||
from django.utils import timezone
|
||||
|
||||
from documents import index
|
||||
from documents.admin import DocumentAdmin
|
||||
from documents.models import Document
|
||||
@ -42,7 +41,8 @@ class TestDocumentAdmin(DirectoriesMixin, TestCase):
|
||||
docs = []
|
||||
for i in range(42):
|
||||
doc = Document.objects.create(
|
||||
title="Many documents with the same title", checksum=f"{i:02}"
|
||||
title="Many documents with the same title",
|
||||
checksum=f"{i:02}",
|
||||
)
|
||||
docs.append(doc)
|
||||
index.add_or_update_document(doc)
|
||||
@ -61,6 +61,7 @@ class TestDocumentAdmin(DirectoriesMixin, TestCase):
|
||||
|
||||
def test_created(self):
|
||||
doc = Document.objects.create(
|
||||
title="test", created=timezone.make_aware(timezone.datetime(2020, 4, 12))
|
||||
title="test",
|
||||
created=timezone.make_aware(timezone.datetime(2020, 4, 12)),
|
||||
)
|
||||
self.assertEqual(self.doc_admin.created_(doc), "2020-04-12")
|
||||
|
@ -10,22 +10,20 @@ from unittest import mock
|
||||
import pytest
|
||||
from django.conf import settings
|
||||
from django.contrib.auth.models import User
|
||||
from django.utils import timezone
|
||||
from django.test import override_settings
|
||||
from django.utils import timezone
|
||||
from documents import bulk_edit
|
||||
from documents import index
|
||||
from documents.models import Correspondent
|
||||
from documents.models import Document
|
||||
from documents.models import DocumentType
|
||||
from documents.models import MatchingModel
|
||||
from documents.models import SavedView
|
||||
from documents.models import Tag
|
||||
from documents.tests.utils import DirectoriesMixin
|
||||
from rest_framework.test import APITestCase
|
||||
from whoosh.writing import AsyncWriter
|
||||
|
||||
from documents import index, bulk_edit
|
||||
from documents.models import (
|
||||
Document,
|
||||
Correspondent,
|
||||
DocumentType,
|
||||
Tag,
|
||||
SavedView,
|
||||
MatchingModel,
|
||||
)
|
||||
from documents.tests.utils import DirectoriesMixin
|
||||
|
||||
|
||||
class TestDocumentApi(DirectoriesMixin, APITestCase):
|
||||
def setUp(self):
|
||||
@ -72,7 +70,9 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
|
||||
returned_doc["title"] = "the new title"
|
||||
|
||||
response = self.client.put(
|
||||
"/api/documents/{}/".format(doc.pk), returned_doc, format="json"
|
||||
"/api/documents/{}/".format(doc.pk),
|
||||
returned_doc,
|
||||
format="json",
|
||||
)
|
||||
|
||||
self.assertEqual(response.status_code, 200)
|
||||
@ -127,7 +127,8 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
|
||||
self.assertEqual(len(results[0]), 2)
|
||||
|
||||
response = self.client.get(
|
||||
"/api/documents/?fields=id,conteasdnt", format="json"
|
||||
"/api/documents/?fields=id,conteasdnt",
|
||||
format="json",
|
||||
)
|
||||
self.assertEqual(response.status_code, 200)
|
||||
results = response.data["results"]
|
||||
@ -162,7 +163,8 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
|
||||
)
|
||||
|
||||
with open(
|
||||
os.path.join(self.dirs.thumbnail_dir, "{:07d}.png".format(doc.pk)), "wb"
|
||||
os.path.join(self.dirs.thumbnail_dir, "{:07d}.png".format(doc.pk)),
|
||||
"wb",
|
||||
) as f:
|
||||
f.write(content_thumbnail)
|
||||
|
||||
@ -206,7 +208,7 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
|
||||
self.assertEqual(response.content, content_archive)
|
||||
|
||||
response = self.client.get(
|
||||
"/api/documents/{}/download/?original=true".format(doc.pk)
|
||||
"/api/documents/{}/download/?original=true".format(doc.pk),
|
||||
)
|
||||
|
||||
self.assertEqual(response.status_code, 200)
|
||||
@ -218,7 +220,7 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
|
||||
self.assertEqual(response.content, content_archive)
|
||||
|
||||
response = self.client.get(
|
||||
"/api/documents/{}/preview/?original=true".format(doc.pk)
|
||||
"/api/documents/{}/preview/?original=true".format(doc.pk),
|
||||
)
|
||||
|
||||
self.assertEqual(response.status_code, 200)
|
||||
@ -227,7 +229,9 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
|
||||
def test_document_actions_not_existing_file(self):
|
||||
|
||||
doc = Document.objects.create(
|
||||
title="none", filename=os.path.basename("asd"), mime_type="application/pdf"
|
||||
title="none",
|
||||
filename=os.path.basename("asd"),
|
||||
mime_type="application/pdf",
|
||||
)
|
||||
|
||||
response = self.client.get("/api/documents/{}/download/".format(doc.pk))
|
||||
@ -242,13 +246,19 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
|
||||
def test_document_filters(self):
|
||||
|
||||
doc1 = Document.objects.create(
|
||||
title="none1", checksum="A", mime_type="application/pdf"
|
||||
title="none1",
|
||||
checksum="A",
|
||||
mime_type="application/pdf",
|
||||
)
|
||||
doc2 = Document.objects.create(
|
||||
title="none2", checksum="B", mime_type="application/pdf"
|
||||
title="none2",
|
||||
checksum="B",
|
||||
mime_type="application/pdf",
|
||||
)
|
||||
doc3 = Document.objects.create(
|
||||
title="none3", checksum="C", mime_type="application/pdf"
|
||||
title="none3",
|
||||
checksum="C",
|
||||
mime_type="application/pdf",
|
||||
)
|
||||
|
||||
tag_inbox = Tag.objects.create(name="t1", is_inbox_tag=True)
|
||||
@ -273,7 +283,7 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
|
||||
self.assertCountEqual([results[0]["id"], results[1]["id"]], [doc2.id, doc3.id])
|
||||
|
||||
response = self.client.get(
|
||||
"/api/documents/?tags__id__in={},{}".format(tag_inbox.id, tag_3.id)
|
||||
"/api/documents/?tags__id__in={},{}".format(tag_inbox.id, tag_3.id),
|
||||
)
|
||||
self.assertEqual(response.status_code, 200)
|
||||
results = response.data["results"]
|
||||
@ -281,7 +291,7 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
|
||||
self.assertCountEqual([results[0]["id"], results[1]["id"]], [doc1.id, doc3.id])
|
||||
|
||||
response = self.client.get(
|
||||
"/api/documents/?tags__id__in={},{}".format(tag_2.id, tag_3.id)
|
||||
"/api/documents/?tags__id__in={},{}".format(tag_2.id, tag_3.id),
|
||||
)
|
||||
self.assertEqual(response.status_code, 200)
|
||||
results = response.data["results"]
|
||||
@ -289,7 +299,7 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
|
||||
self.assertCountEqual([results[0]["id"], results[1]["id"]], [doc2.id, doc3.id])
|
||||
|
||||
response = self.client.get(
|
||||
"/api/documents/?tags__id__all={},{}".format(tag_2.id, tag_3.id)
|
||||
"/api/documents/?tags__id__all={},{}".format(tag_2.id, tag_3.id),
|
||||
)
|
||||
self.assertEqual(response.status_code, 200)
|
||||
results = response.data["results"]
|
||||
@ -297,14 +307,14 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
|
||||
self.assertEqual(results[0]["id"], doc3.id)
|
||||
|
||||
response = self.client.get(
|
||||
"/api/documents/?tags__id__all={},{}".format(tag_inbox.id, tag_3.id)
|
||||
"/api/documents/?tags__id__all={},{}".format(tag_inbox.id, tag_3.id),
|
||||
)
|
||||
self.assertEqual(response.status_code, 200)
|
||||
results = response.data["results"]
|
||||
self.assertEqual(len(results), 0)
|
||||
|
||||
response = self.client.get(
|
||||
"/api/documents/?tags__id__all={}a{}".format(tag_inbox.id, tag_3.id)
|
||||
"/api/documents/?tags__id__all={}a{}".format(tag_inbox.id, tag_3.id),
|
||||
)
|
||||
self.assertEqual(response.status_code, 200)
|
||||
results = response.data["results"]
|
||||
@ -317,7 +327,7 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
|
||||
self.assertCountEqual([results[0]["id"], results[1]["id"]], [doc1.id, doc2.id])
|
||||
|
||||
response = self.client.get(
|
||||
"/api/documents/?tags__id__none={},{}".format(tag_3.id, tag_2.id)
|
||||
"/api/documents/?tags__id__none={},{}".format(tag_3.id, tag_2.id),
|
||||
)
|
||||
self.assertEqual(response.status_code, 200)
|
||||
results = response.data["results"]
|
||||
@ -325,7 +335,7 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
|
||||
self.assertEqual(results[0]["id"], doc1.id)
|
||||
|
||||
response = self.client.get(
|
||||
"/api/documents/?tags__id__none={},{}".format(tag_2.id, tag_inbox.id)
|
||||
"/api/documents/?tags__id__none={},{}".format(tag_2.id, tag_inbox.id),
|
||||
)
|
||||
self.assertEqual(response.status_code, 200)
|
||||
results = response.data["results"]
|
||||
@ -443,7 +453,7 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
|
||||
|
||||
for i in range(1, 6):
|
||||
response = self.client.get(
|
||||
f"/api/documents/?query=content&page={i}&page_size=10"
|
||||
f"/api/documents/?query=content&page={i}&page_size=10",
|
||||
)
|
||||
results = response.data["results"]
|
||||
self.assertEqual(response.data["count"], 55)
|
||||
@ -595,31 +605,35 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
|
||||
self.assertCountEqual(search_query("&correspondent__id=" + str(c.id)), [d1.id])
|
||||
self.assertCountEqual(search_query("&document_type__id=" + str(dt.id)), [d2.id])
|
||||
self.assertCountEqual(
|
||||
search_query("&correspondent__isnull"), [d2.id, d3.id, d4.id, d5.id]
|
||||
search_query("&correspondent__isnull"),
|
||||
[d2.id, d3.id, d4.id, d5.id],
|
||||
)
|
||||
self.assertCountEqual(
|
||||
search_query("&document_type__isnull"), [d1.id, d3.id, d4.id, d5.id]
|
||||
search_query("&document_type__isnull"),
|
||||
[d1.id, d3.id, d4.id, d5.id],
|
||||
)
|
||||
self.assertCountEqual(
|
||||
search_query("&tags__id__all=" + str(t.id) + "," + str(t2.id)), [d3.id]
|
||||
search_query("&tags__id__all=" + str(t.id) + "," + str(t2.id)),
|
||||
[d3.id],
|
||||
)
|
||||
self.assertCountEqual(search_query("&tags__id__all=" + str(t.id)), [d3.id])
|
||||
self.assertCountEqual(
|
||||
search_query("&tags__id__all=" + str(t2.id)), [d3.id, d4.id]
|
||||
search_query("&tags__id__all=" + str(t2.id)),
|
||||
[d3.id, d4.id],
|
||||
)
|
||||
|
||||
self.assertIn(
|
||||
d4.id,
|
||||
search_query(
|
||||
"&created__date__lt="
|
||||
+ datetime.datetime(2020, 9, 2).strftime("%Y-%m-%d")
|
||||
+ datetime.datetime(2020, 9, 2).strftime("%Y-%m-%d"),
|
||||
),
|
||||
)
|
||||
self.assertNotIn(
|
||||
d4.id,
|
||||
search_query(
|
||||
"&created__date__gt="
|
||||
+ datetime.datetime(2020, 9, 2).strftime("%Y-%m-%d")
|
||||
+ datetime.datetime(2020, 9, 2).strftime("%Y-%m-%d"),
|
||||
),
|
||||
)
|
||||
|
||||
@ -627,40 +641,44 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
|
||||
d4.id,
|
||||
search_query(
|
||||
"&created__date__lt="
|
||||
+ datetime.datetime(2020, 1, 2).strftime("%Y-%m-%d")
|
||||
+ datetime.datetime(2020, 1, 2).strftime("%Y-%m-%d"),
|
||||
),
|
||||
)
|
||||
self.assertIn(
|
||||
d4.id,
|
||||
search_query(
|
||||
"&created__date__gt="
|
||||
+ datetime.datetime(2020, 1, 2).strftime("%Y-%m-%d")
|
||||
+ datetime.datetime(2020, 1, 2).strftime("%Y-%m-%d"),
|
||||
),
|
||||
)
|
||||
|
||||
self.assertIn(
|
||||
d5.id,
|
||||
search_query(
|
||||
"&added__date__lt=" + datetime.datetime(2020, 9, 2).strftime("%Y-%m-%d")
|
||||
"&added__date__lt="
|
||||
+ datetime.datetime(2020, 9, 2).strftime("%Y-%m-%d"),
|
||||
),
|
||||
)
|
||||
self.assertNotIn(
|
||||
d5.id,
|
||||
search_query(
|
||||
"&added__date__gt=" + datetime.datetime(2020, 9, 2).strftime("%Y-%m-%d")
|
||||
"&added__date__gt="
|
||||
+ datetime.datetime(2020, 9, 2).strftime("%Y-%m-%d"),
|
||||
),
|
||||
)
|
||||
|
||||
self.assertNotIn(
|
||||
d5.id,
|
||||
search_query(
|
||||
"&added__date__lt=" + datetime.datetime(2020, 1, 2).strftime("%Y-%m-%d")
|
||||
"&added__date__lt="
|
||||
+ datetime.datetime(2020, 1, 2).strftime("%Y-%m-%d"),
|
||||
),
|
||||
)
|
||||
self.assertIn(
|
||||
d5.id,
|
||||
search_query(
|
||||
"&added__date__gt=" + datetime.datetime(2020, 1, 2).strftime("%Y-%m-%d")
|
||||
"&added__date__gt="
|
||||
+ datetime.datetime(2020, 1, 2).strftime("%Y-%m-%d"),
|
||||
),
|
||||
)
|
||||
|
||||
@ -700,18 +718,22 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
|
||||
return [hit["id"] for hit in r.data["results"]]
|
||||
|
||||
self.assertListEqual(
|
||||
search_query("&ordering=archive_serial_number"), [d3.id, d1.id, d2.id]
|
||||
search_query("&ordering=archive_serial_number"),
|
||||
[d3.id, d1.id, d2.id],
|
||||
)
|
||||
self.assertListEqual(
|
||||
search_query("&ordering=-archive_serial_number"), [d2.id, d1.id, d3.id]
|
||||
search_query("&ordering=-archive_serial_number"),
|
||||
[d2.id, d1.id, d3.id],
|
||||
)
|
||||
self.assertListEqual(search_query("&ordering=title"), [d3.id, d2.id, d1.id])
|
||||
self.assertListEqual(search_query("&ordering=-title"), [d1.id, d2.id, d3.id])
|
||||
self.assertListEqual(
|
||||
search_query("&ordering=correspondent__name"), [d1.id, d3.id, d2.id]
|
||||
search_query("&ordering=correspondent__name"),
|
||||
[d1.id, d3.id, d2.id],
|
||||
)
|
||||
self.assertListEqual(
|
||||
search_query("&ordering=-correspondent__name"), [d2.id, d3.id, d1.id]
|
||||
search_query("&ordering=-correspondent__name"),
|
||||
[d2.id, d3.id, d1.id],
|
||||
)
|
||||
|
||||
def test_statistics(self):
|
||||
@ -740,10 +762,12 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
|
||||
def test_upload(self, m):
|
||||
|
||||
with open(
|
||||
os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), "rb"
|
||||
os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"),
|
||||
"rb",
|
||||
) as f:
|
||||
response = self.client.post(
|
||||
"/api/documents/post_document/", {"document": f}
|
||||
"/api/documents/post_document/",
|
||||
{"document": f},
|
||||
)
|
||||
|
||||
self.assertEqual(response.status_code, 200)
|
||||
@ -761,7 +785,8 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
|
||||
def test_upload_empty_metadata(self, m):
|
||||
|
||||
with open(
|
||||
os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), "rb"
|
||||
os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"),
|
||||
"rb",
|
||||
) as f:
|
||||
response = self.client.post(
|
||||
"/api/documents/post_document/",
|
||||
@ -783,10 +808,12 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
|
||||
def test_upload_invalid_form(self, m):
|
||||
|
||||
with open(
|
||||
os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), "rb"
|
||||
os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"),
|
||||
"rb",
|
||||
) as f:
|
||||
response = self.client.post(
|
||||
"/api/documents/post_document/", {"documenst": f}
|
||||
"/api/documents/post_document/",
|
||||
{"documenst": f},
|
||||
)
|
||||
self.assertEqual(response.status_code, 400)
|
||||
m.assert_not_called()
|
||||
@ -795,10 +822,12 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
|
||||
def test_upload_invalid_file(self, m):
|
||||
|
||||
with open(
|
||||
os.path.join(os.path.dirname(__file__), "samples", "simple.zip"), "rb"
|
||||
os.path.join(os.path.dirname(__file__), "samples", "simple.zip"),
|
||||
"rb",
|
||||
) as f:
|
||||
response = self.client.post(
|
||||
"/api/documents/post_document/", {"document": f}
|
||||
"/api/documents/post_document/",
|
||||
{"document": f},
|
||||
)
|
||||
self.assertEqual(response.status_code, 400)
|
||||
m.assert_not_called()
|
||||
@ -806,7 +835,8 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
|
||||
@mock.patch("documents.views.async_task")
|
||||
def test_upload_with_title(self, async_task):
|
||||
with open(
|
||||
os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), "rb"
|
||||
os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"),
|
||||
"rb",
|
||||
) as f:
|
||||
response = self.client.post(
|
||||
"/api/documents/post_document/",
|
||||
@ -824,10 +854,12 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
|
||||
def test_upload_with_correspondent(self, async_task):
|
||||
c = Correspondent.objects.create(name="test-corres")
|
||||
with open(
|
||||
os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), "rb"
|
||||
os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"),
|
||||
"rb",
|
||||
) as f:
|
||||
response = self.client.post(
|
||||
"/api/documents/post_document/", {"document": f, "correspondent": c.id}
|
||||
"/api/documents/post_document/",
|
||||
{"document": f, "correspondent": c.id},
|
||||
)
|
||||
self.assertEqual(response.status_code, 200)
|
||||
|
||||
@ -840,10 +872,12 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
|
||||
@mock.patch("documents.views.async_task")
|
||||
def test_upload_with_invalid_correspondent(self, async_task):
|
||||
with open(
|
||||
os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), "rb"
|
||||
os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"),
|
||||
"rb",
|
||||
) as f:
|
||||
response = self.client.post(
|
||||
"/api/documents/post_document/", {"document": f, "correspondent": 3456}
|
||||
"/api/documents/post_document/",
|
||||
{"document": f, "correspondent": 3456},
|
||||
)
|
||||
self.assertEqual(response.status_code, 400)
|
||||
|
||||
@ -853,10 +887,12 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
|
||||
def test_upload_with_document_type(self, async_task):
|
||||
dt = DocumentType.objects.create(name="invoice")
|
||||
with open(
|
||||
os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), "rb"
|
||||
os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"),
|
||||
"rb",
|
||||
) as f:
|
||||
response = self.client.post(
|
||||
"/api/documents/post_document/", {"document": f, "document_type": dt.id}
|
||||
"/api/documents/post_document/",
|
||||
{"document": f, "document_type": dt.id},
|
||||
)
|
||||
self.assertEqual(response.status_code, 200)
|
||||
|
||||
@ -869,10 +905,12 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
|
||||
@mock.patch("documents.views.async_task")
|
||||
def test_upload_with_invalid_document_type(self, async_task):
|
||||
with open(
|
||||
os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), "rb"
|
||||
os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"),
|
||||
"rb",
|
||||
) as f:
|
||||
response = self.client.post(
|
||||
"/api/documents/post_document/", {"document": f, "document_type": 34578}
|
||||
"/api/documents/post_document/",
|
||||
{"document": f, "document_type": 34578},
|
||||
)
|
||||
self.assertEqual(response.status_code, 400)
|
||||
|
||||
@ -883,10 +921,12 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
|
||||
t1 = Tag.objects.create(name="tag1")
|
||||
t2 = Tag.objects.create(name="tag2")
|
||||
with open(
|
||||
os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), "rb"
|
||||
os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"),
|
||||
"rb",
|
||||
) as f:
|
||||
response = self.client.post(
|
||||
"/api/documents/post_document/", {"document": f, "tags": [t2.id, t1.id]}
|
||||
"/api/documents/post_document/",
|
||||
{"document": f, "tags": [t2.id, t1.id]},
|
||||
)
|
||||
self.assertEqual(response.status_code, 200)
|
||||
|
||||
@ -901,7 +941,8 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
|
||||
t1 = Tag.objects.create(name="tag1")
|
||||
t2 = Tag.objects.create(name="tag2")
|
||||
with open(
|
||||
os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), "rb"
|
||||
os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"),
|
||||
"rb",
|
||||
) as f:
|
||||
response = self.client.post(
|
||||
"/api/documents/post_document/",
|
||||
@ -952,7 +993,9 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
|
||||
|
||||
def test_get_metadata_no_archive(self):
|
||||
doc = Document.objects.create(
|
||||
title="test", filename="file.pdf", mime_type="application/pdf"
|
||||
title="test",
|
||||
filename="file.pdf",
|
||||
mime_type="application/pdf",
|
||||
)
|
||||
|
||||
shutil.copy(
|
||||
@ -999,7 +1042,8 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
|
||||
|
||||
self.assertEqual(response.status_code, 200)
|
||||
self.assertEqual(
|
||||
response.data, {"correspondents": [], "tags": [], "document_types": []}
|
||||
response.data,
|
||||
{"correspondents": [], "tags": [], "document_types": []},
|
||||
)
|
||||
|
||||
def test_get_suggestions_invalid_doc(self):
|
||||
@ -1010,10 +1054,15 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
|
||||
@mock.patch("documents.views.match_tags")
|
||||
@mock.patch("documents.views.match_document_types")
|
||||
def test_get_suggestions(
|
||||
self, match_document_types, match_tags, match_correspondents
|
||||
self,
|
||||
match_document_types,
|
||||
match_tags,
|
||||
match_correspondents,
|
||||
):
|
||||
doc = Document.objects.create(
|
||||
title="test", mime_type="application/pdf", content="this is an invoice!"
|
||||
title="test",
|
||||
mime_type="application/pdf",
|
||||
content="this is an invoice!",
|
||||
)
|
||||
match_tags.return_value = [Tag(id=56), Tag(id=123)]
|
||||
match_document_types.return_value = [DocumentType(id=23)]
|
||||
@ -1094,7 +1143,9 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
|
||||
self.assertEqual(v1.user, self.user)
|
||||
|
||||
response = self.client.patch(
|
||||
f"/api/saved_views/{v1.id}/", {"show_in_sidebar": False}, format="json"
|
||||
f"/api/saved_views/{v1.id}/",
|
||||
{"show_in_sidebar": False},
|
||||
format="json",
|
||||
)
|
||||
|
||||
v1 = SavedView.objects.get(id=v1.id)
|
||||
@ -1183,7 +1234,9 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
|
||||
def test_regex_no_algorithm(self):
|
||||
for endpoint in ["correspondents", "tags", "document_types"]:
|
||||
response = self.client.post(
|
||||
f"/api/{endpoint}/", {"name": "test", "match": "[0-9]"}, format="json"
|
||||
f"/api/{endpoint}/",
|
||||
{"name": "test", "match": "[0-9]"},
|
||||
format="json",
|
||||
)
|
||||
self.assertEqual(response.status_code, 201, endpoint)
|
||||
|
||||
@ -1200,7 +1253,9 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
|
||||
|
||||
def test_tag_color(self):
|
||||
response = self.client.post(
|
||||
"/api/tags/", {"name": "tag", "colour": 3}, format="json"
|
||||
"/api/tags/",
|
||||
{"name": "tag", "colour": 3},
|
||||
format="json",
|
||||
)
|
||||
self.assertEqual(response.status_code, 201)
|
||||
self.assertEqual(Tag.objects.get(id=response.data["id"]).color, "#b2df8a")
|
||||
@ -1213,14 +1268,17 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
|
||||
|
||||
def test_tag_color_invalid(self):
|
||||
response = self.client.post(
|
||||
"/api/tags/", {"name": "tag", "colour": 34}, format="json"
|
||||
"/api/tags/",
|
||||
{"name": "tag", "colour": 34},
|
||||
format="json",
|
||||
)
|
||||
self.assertEqual(response.status_code, 400)
|
||||
|
||||
def test_tag_color_custom(self):
|
||||
tag = Tag.objects.create(name="test", color="#abcdef")
|
||||
self.assertEqual(
|
||||
self.client.get(f"/api/tags/{tag.id}/", format="json").data["colour"], 1
|
||||
self.client.get(f"/api/tags/{tag.id}/", format="json").data["colour"],
|
||||
1,
|
||||
)
|
||||
|
||||
|
||||
@ -1236,32 +1294,42 @@ class TestDocumentApiV2(DirectoriesMixin, APITestCase):
|
||||
def test_tag_validate_color(self):
|
||||
self.assertEqual(
|
||||
self.client.post(
|
||||
"/api/tags/", {"name": "test", "color": "#12fFaA"}, format="json"
|
||||
"/api/tags/",
|
||||
{"name": "test", "color": "#12fFaA"},
|
||||
format="json",
|
||||
).status_code,
|
||||
201,
|
||||
)
|
||||
|
||||
self.assertEqual(
|
||||
self.client.post(
|
||||
"/api/tags/", {"name": "test1", "color": "abcdef"}, format="json"
|
||||
"/api/tags/",
|
||||
{"name": "test1", "color": "abcdef"},
|
||||
format="json",
|
||||
).status_code,
|
||||
400,
|
||||
)
|
||||
self.assertEqual(
|
||||
self.client.post(
|
||||
"/api/tags/", {"name": "test2", "color": "#abcdfg"}, format="json"
|
||||
"/api/tags/",
|
||||
{"name": "test2", "color": "#abcdfg"},
|
||||
format="json",
|
||||
).status_code,
|
||||
400,
|
||||
)
|
||||
self.assertEqual(
|
||||
self.client.post(
|
||||
"/api/tags/", {"name": "test3", "color": "#asd"}, format="json"
|
||||
"/api/tags/",
|
||||
{"name": "test3", "color": "#asd"},
|
||||
format="json",
|
||||
).status_code,
|
||||
400,
|
||||
)
|
||||
self.assertEqual(
|
||||
self.client.post(
|
||||
"/api/tags/", {"name": "test4", "color": "#12121212"}, format="json"
|
||||
"/api/tags/",
|
||||
{"name": "test4", "color": "#12121212"},
|
||||
format="json",
|
||||
).status_code,
|
||||
400,
|
||||
)
|
||||
@ -1313,10 +1381,16 @@ class TestBulkEdit(DirectoriesMixin, APITestCase):
|
||||
self.t2 = Tag.objects.create(name="t2")
|
||||
self.doc1 = Document.objects.create(checksum="A", title="A")
|
||||
self.doc2 = Document.objects.create(
|
||||
checksum="B", title="B", correspondent=self.c1, document_type=self.dt1
|
||||
checksum="B",
|
||||
title="B",
|
||||
correspondent=self.c1,
|
||||
document_type=self.dt1,
|
||||
)
|
||||
self.doc3 = Document.objects.create(
|
||||
checksum="C", title="C", correspondent=self.c2, document_type=self.dt2
|
||||
checksum="C",
|
||||
title="C",
|
||||
correspondent=self.c2,
|
||||
document_type=self.dt2,
|
||||
)
|
||||
self.doc4 = Document.objects.create(checksum="D", title="D")
|
||||
self.doc5 = Document.objects.create(checksum="E", title="E")
|
||||
@ -1327,7 +1401,8 @@ class TestBulkEdit(DirectoriesMixin, APITestCase):
|
||||
def test_set_correspondent(self):
|
||||
self.assertEqual(Document.objects.filter(correspondent=self.c2).count(), 1)
|
||||
bulk_edit.set_correspondent(
|
||||
[self.doc1.id, self.doc2.id, self.doc3.id], self.c2.id
|
||||
[self.doc1.id, self.doc2.id, self.doc3.id],
|
||||
self.c2.id,
|
||||
)
|
||||
self.assertEqual(Document.objects.filter(correspondent=self.c2).count(), 3)
|
||||
self.async_task.assert_called_once()
|
||||
@ -1345,7 +1420,8 @@ class TestBulkEdit(DirectoriesMixin, APITestCase):
|
||||
def test_set_document_type(self):
|
||||
self.assertEqual(Document.objects.filter(document_type=self.dt2).count(), 1)
|
||||
bulk_edit.set_document_type(
|
||||
[self.doc1.id, self.doc2.id, self.doc3.id], self.dt2.id
|
||||
[self.doc1.id, self.doc2.id, self.doc3.id],
|
||||
self.dt2.id,
|
||||
)
|
||||
self.assertEqual(Document.objects.filter(document_type=self.dt2).count(), 3)
|
||||
self.async_task.assert_called_once()
|
||||
@ -1363,7 +1439,8 @@ class TestBulkEdit(DirectoriesMixin, APITestCase):
|
||||
def test_add_tag(self):
|
||||
self.assertEqual(Document.objects.filter(tags__id=self.t1.id).count(), 2)
|
||||
bulk_edit.add_tag(
|
||||
[self.doc1.id, self.doc2.id, self.doc3.id, self.doc4.id], self.t1.id
|
||||
[self.doc1.id, self.doc2.id, self.doc3.id, self.doc4.id],
|
||||
self.t1.id,
|
||||
)
|
||||
self.assertEqual(Document.objects.filter(tags__id=self.t1.id).count(), 4)
|
||||
self.async_task.assert_called_once()
|
||||
@ -1415,7 +1492,7 @@ class TestBulkEdit(DirectoriesMixin, APITestCase):
|
||||
"documents": [self.doc1.id],
|
||||
"method": "set_correspondent",
|
||||
"parameters": {"correspondent": self.c1.id},
|
||||
}
|
||||
},
|
||||
),
|
||||
content_type="application/json",
|
||||
)
|
||||
@ -1435,7 +1512,7 @@ class TestBulkEdit(DirectoriesMixin, APITestCase):
|
||||
"documents": [self.doc1.id],
|
||||
"method": "set_correspondent",
|
||||
"parameters": {"correspondent": None},
|
||||
}
|
||||
},
|
||||
),
|
||||
content_type="application/json",
|
||||
)
|
||||
@ -1455,7 +1532,7 @@ class TestBulkEdit(DirectoriesMixin, APITestCase):
|
||||
"documents": [self.doc1.id],
|
||||
"method": "set_document_type",
|
||||
"parameters": {"document_type": self.dt1.id},
|
||||
}
|
||||
},
|
||||
),
|
||||
content_type="application/json",
|
||||
)
|
||||
@ -1475,7 +1552,7 @@ class TestBulkEdit(DirectoriesMixin, APITestCase):
|
||||
"documents": [self.doc1.id],
|
||||
"method": "set_document_type",
|
||||
"parameters": {"document_type": None},
|
||||
}
|
||||
},
|
||||
),
|
||||
content_type="application/json",
|
||||
)
|
||||
@ -1495,7 +1572,7 @@ class TestBulkEdit(DirectoriesMixin, APITestCase):
|
||||
"documents": [self.doc1.id],
|
||||
"method": "add_tag",
|
||||
"parameters": {"tag": self.t1.id},
|
||||
}
|
||||
},
|
||||
),
|
||||
content_type="application/json",
|
||||
)
|
||||
@ -1515,7 +1592,7 @@ class TestBulkEdit(DirectoriesMixin, APITestCase):
|
||||
"documents": [self.doc1.id],
|
||||
"method": "remove_tag",
|
||||
"parameters": {"tag": self.t1.id},
|
||||
}
|
||||
},
|
||||
),
|
||||
content_type="application/json",
|
||||
)
|
||||
@ -1538,7 +1615,7 @@ class TestBulkEdit(DirectoriesMixin, APITestCase):
|
||||
"add_tags": [self.t1.id],
|
||||
"remove_tags": [self.t2.id],
|
||||
},
|
||||
}
|
||||
},
|
||||
),
|
||||
content_type="application/json",
|
||||
)
|
||||
@ -1555,7 +1632,7 @@ class TestBulkEdit(DirectoriesMixin, APITestCase):
|
||||
response = self.client.post(
|
||||
"/api/documents/bulk_edit/",
|
||||
json.dumps(
|
||||
{"documents": [self.doc1.id], "method": "delete", "parameters": {}}
|
||||
{"documents": [self.doc1.id], "method": "delete", "parameters": {}},
|
||||
),
|
||||
content_type="application/json",
|
||||
)
|
||||
@ -1580,7 +1657,11 @@ class TestBulkEdit(DirectoriesMixin, APITestCase):
|
||||
response = self.client.post(
|
||||
"/api/documents/bulk_edit/",
|
||||
json.dumps(
|
||||
{"documents": [self.doc2.id], "method": "exterminate", "parameters": {}}
|
||||
{
|
||||
"documents": [self.doc2.id],
|
||||
"method": "exterminate",
|
||||
"parameters": {},
|
||||
},
|
||||
),
|
||||
content_type="application/json",
|
||||
)
|
||||
@ -1596,7 +1677,7 @@ class TestBulkEdit(DirectoriesMixin, APITestCase):
|
||||
"documents": [self.doc2.id],
|
||||
"method": "set_correspondent",
|
||||
"parameters": {"correspondent": 345657},
|
||||
}
|
||||
},
|
||||
),
|
||||
content_type="application/json",
|
||||
)
|
||||
@ -1613,7 +1694,7 @@ class TestBulkEdit(DirectoriesMixin, APITestCase):
|
||||
"documents": [self.doc2.id],
|
||||
"method": "set_correspondent",
|
||||
"parameters": {},
|
||||
}
|
||||
},
|
||||
),
|
||||
content_type="application/json",
|
||||
)
|
||||
@ -1628,7 +1709,7 @@ class TestBulkEdit(DirectoriesMixin, APITestCase):
|
||||
"documents": [self.doc2.id],
|
||||
"method": "set_document_type",
|
||||
"parameters": {"document_type": 345657},
|
||||
}
|
||||
},
|
||||
),
|
||||
content_type="application/json",
|
||||
)
|
||||
@ -1645,7 +1726,7 @@ class TestBulkEdit(DirectoriesMixin, APITestCase):
|
||||
"documents": [self.doc2.id],
|
||||
"method": "set_document_type",
|
||||
"parameters": {},
|
||||
}
|
||||
},
|
||||
),
|
||||
content_type="application/json",
|
||||
)
|
||||
@ -1660,7 +1741,7 @@ class TestBulkEdit(DirectoriesMixin, APITestCase):
|
||||
"documents": [self.doc2.id],
|
||||
"method": "add_tag",
|
||||
"parameters": {"tag": 345657},
|
||||
}
|
||||
},
|
||||
),
|
||||
content_type="application/json",
|
||||
)
|
||||
@ -1672,7 +1753,7 @@ class TestBulkEdit(DirectoriesMixin, APITestCase):
|
||||
response = self.client.post(
|
||||
"/api/documents/bulk_edit/",
|
||||
json.dumps(
|
||||
{"documents": [self.doc2.id], "method": "add_tag", "parameters": {}}
|
||||
{"documents": [self.doc2.id], "method": "add_tag", "parameters": {}},
|
||||
),
|
||||
content_type="application/json",
|
||||
)
|
||||
@ -1687,7 +1768,7 @@ class TestBulkEdit(DirectoriesMixin, APITestCase):
|
||||
"documents": [self.doc2.id],
|
||||
"method": "remove_tag",
|
||||
"parameters": {"tag": 345657},
|
||||
}
|
||||
},
|
||||
),
|
||||
content_type="application/json",
|
||||
)
|
||||
@ -1699,7 +1780,7 @@ class TestBulkEdit(DirectoriesMixin, APITestCase):
|
||||
response = self.client.post(
|
||||
"/api/documents/bulk_edit/",
|
||||
json.dumps(
|
||||
{"documents": [self.doc2.id], "method": "remove_tag", "parameters": {}}
|
||||
{"documents": [self.doc2.id], "method": "remove_tag", "parameters": {}},
|
||||
),
|
||||
content_type="application/json",
|
||||
)
|
||||
@ -1717,7 +1798,7 @@ class TestBulkEdit(DirectoriesMixin, APITestCase):
|
||||
"add_tags": [self.t2.id, 1657],
|
||||
"remove_tags": [1123123],
|
||||
},
|
||||
}
|
||||
},
|
||||
),
|
||||
content_type="application/json",
|
||||
)
|
||||
@ -1731,7 +1812,7 @@ class TestBulkEdit(DirectoriesMixin, APITestCase):
|
||||
"documents": [self.doc2.id],
|
||||
"method": "modify_tags",
|
||||
"parameters": {"remove_tags": [1123123]},
|
||||
}
|
||||
},
|
||||
),
|
||||
content_type="application/json",
|
||||
)
|
||||
@ -1744,7 +1825,7 @@ class TestBulkEdit(DirectoriesMixin, APITestCase):
|
||||
"documents": [self.doc2.id],
|
||||
"method": "modify_tags",
|
||||
"parameters": {"add_tags": [self.t2.id, 1657]},
|
||||
}
|
||||
},
|
||||
),
|
||||
content_type="application/json",
|
||||
)
|
||||
@ -1774,7 +1855,7 @@ class TestBulkEdit(DirectoriesMixin, APITestCase):
|
||||
response = self.client.post(
|
||||
"/api/documents/selection_data/",
|
||||
json.dumps(
|
||||
{"documents": [self.doc1.id, self.doc2.id, self.doc4.id, self.doc5.id]}
|
||||
{"documents": [self.doc1.id, self.doc2.id, self.doc4.id, self.doc5.id]},
|
||||
),
|
||||
content_type="application/json",
|
||||
)
|
||||
@ -1856,7 +1937,7 @@ class TestBulkDownload(DirectoriesMixin, APITestCase):
|
||||
response = self.client.post(
|
||||
"/api/documents/bulk_download/",
|
||||
json.dumps(
|
||||
{"documents": [self.doc2.id, self.doc3.id], "content": "originals"}
|
||||
{"documents": [self.doc2.id, self.doc3.id], "content": "originals"},
|
||||
),
|
||||
content_type="application/json",
|
||||
)
|
||||
@ -1914,17 +1995,20 @@ class TestBulkDownload(DirectoriesMixin, APITestCase):
|
||||
|
||||
with self.doc2.source_file as f:
|
||||
self.assertEqual(
|
||||
f.read(), zipf.read("originals/2021-01-01 document A.pdf")
|
||||
f.read(),
|
||||
zipf.read("originals/2021-01-01 document A.pdf"),
|
||||
)
|
||||
|
||||
with self.doc3.archive_file as f:
|
||||
self.assertEqual(
|
||||
f.read(), zipf.read("archive/2020-03-21 document B.pdf")
|
||||
f.read(),
|
||||
zipf.read("archive/2020-03-21 document B.pdf"),
|
||||
)
|
||||
|
||||
with self.doc3.source_file as f:
|
||||
self.assertEqual(
|
||||
f.read(), zipf.read("originals/2020-03-21 document B.jpg")
|
||||
f.read(),
|
||||
zipf.read("originals/2020-03-21 document B.jpg"),
|
||||
)
|
||||
|
||||
def test_filename_clashes(self):
|
||||
@ -1953,7 +2037,7 @@ class TestBulkDownload(DirectoriesMixin, APITestCase):
|
||||
response = self.client.post(
|
||||
"/api/documents/bulk_download/",
|
||||
json.dumps(
|
||||
{"documents": [self.doc2.id, self.doc2b.id], "compression": "lzma"}
|
||||
{"documents": [self.doc2.id, self.doc2b.id], "compression": "lzma"},
|
||||
),
|
||||
content_type="application/json",
|
||||
)
|
||||
@ -1968,13 +2052,16 @@ class TestApiAuth(APITestCase):
|
||||
|
||||
self.assertEqual(self.client.get(f"/api/documents/{d.id}/").status_code, 401)
|
||||
self.assertEqual(
|
||||
self.client.get(f"/api/documents/{d.id}/download/").status_code, 401
|
||||
self.client.get(f"/api/documents/{d.id}/download/").status_code,
|
||||
401,
|
||||
)
|
||||
self.assertEqual(
|
||||
self.client.get(f"/api/documents/{d.id}/preview/").status_code, 401
|
||||
self.client.get(f"/api/documents/{d.id}/preview/").status_code,
|
||||
401,
|
||||
)
|
||||
self.assertEqual(
|
||||
self.client.get(f"/api/documents/{d.id}/thumb/").status_code, 401
|
||||
self.client.get(f"/api/documents/{d.id}/thumb/").status_code,
|
||||
401,
|
||||
)
|
||||
|
||||
self.assertEqual(self.client.get("/api/tags/").status_code, 401)
|
||||
@ -1987,10 +2074,12 @@ class TestApiAuth(APITestCase):
|
||||
self.assertEqual(self.client.get("/api/search/autocomplete/").status_code, 401)
|
||||
self.assertEqual(self.client.get("/api/documents/bulk_edit/").status_code, 401)
|
||||
self.assertEqual(
|
||||
self.client.get("/api/documents/bulk_download/").status_code, 401
|
||||
self.client.get("/api/documents/bulk_download/").status_code,
|
||||
401,
|
||||
)
|
||||
self.assertEqual(
|
||||
self.client.get("/api/documents/selection_data/").status_code, 401
|
||||
self.client.get("/api/documents/selection_data/").status_code,
|
||||
401,
|
||||
)
|
||||
|
||||
def test_api_version_no_auth(self):
|
||||
|
@ -4,10 +4,11 @@ from unittest import mock
|
||||
from django.core.checks import Error
|
||||
from django.test import TestCase
|
||||
|
||||
from .factories import DocumentFactory
|
||||
from .. import document_consumer_declaration
|
||||
from ..checks import changed_password_check, parser_check
|
||||
from ..checks import changed_password_check
|
||||
from ..checks import parser_check
|
||||
from ..models import Document
|
||||
from ..signals import document_consumer_declaration
|
||||
from .factories import DocumentFactory
|
||||
|
||||
|
||||
class ChecksTestCase(TestCase):
|
||||
@ -30,7 +31,7 @@ class ChecksTestCase(TestCase):
|
||||
[
|
||||
Error(
|
||||
"No parsers found. This is a bug. The consumer won't be "
|
||||
"able to consume any documents without parsers."
|
||||
)
|
||||
"able to consume any documents without parsers.",
|
||||
),
|
||||
],
|
||||
)
|
||||
|
@ -5,14 +5,15 @@ from unittest import mock
|
||||
|
||||
import pytest
|
||||
from django.conf import settings
|
||||
from django.test import TestCase, override_settings
|
||||
|
||||
from documents.classifier import (
|
||||
DocumentClassifier,
|
||||
IncompatibleClassifierVersionError,
|
||||
load_classifier,
|
||||
)
|
||||
from documents.models import Correspondent, Document, Tag, DocumentType
|
||||
from django.test import override_settings
|
||||
from django.test import TestCase
|
||||
from documents.classifier import DocumentClassifier
|
||||
from documents.classifier import IncompatibleClassifierVersionError
|
||||
from documents.classifier import load_classifier
|
||||
from documents.models import Correspondent
|
||||
from documents.models import Document
|
||||
from documents.models import DocumentType
|
||||
from documents.models import Tag
|
||||
from documents.tests.utils import DirectoriesMixin
|
||||
|
||||
|
||||
@ -23,26 +24,37 @@ class TestClassifier(DirectoriesMixin, TestCase):
|
||||
|
||||
def generate_test_data(self):
|
||||
self.c1 = Correspondent.objects.create(
|
||||
name="c1", matching_algorithm=Correspondent.MATCH_AUTO
|
||||
name="c1",
|
||||
matching_algorithm=Correspondent.MATCH_AUTO,
|
||||
)
|
||||
self.c2 = Correspondent.objects.create(name="c2")
|
||||
self.c3 = Correspondent.objects.create(
|
||||
name="c3", matching_algorithm=Correspondent.MATCH_AUTO
|
||||
name="c3",
|
||||
matching_algorithm=Correspondent.MATCH_AUTO,
|
||||
)
|
||||
self.t1 = Tag.objects.create(
|
||||
name="t1", matching_algorithm=Tag.MATCH_AUTO, pk=12
|
||||
name="t1",
|
||||
matching_algorithm=Tag.MATCH_AUTO,
|
||||
pk=12,
|
||||
)
|
||||
self.t2 = Tag.objects.create(
|
||||
name="t2", matching_algorithm=Tag.MATCH_ANY, pk=34, is_inbox_tag=True
|
||||
name="t2",
|
||||
matching_algorithm=Tag.MATCH_ANY,
|
||||
pk=34,
|
||||
is_inbox_tag=True,
|
||||
)
|
||||
self.t3 = Tag.objects.create(
|
||||
name="t3", matching_algorithm=Tag.MATCH_AUTO, pk=45
|
||||
name="t3",
|
||||
matching_algorithm=Tag.MATCH_AUTO,
|
||||
pk=45,
|
||||
)
|
||||
self.dt = DocumentType.objects.create(
|
||||
name="dt", matching_algorithm=DocumentType.MATCH_AUTO
|
||||
name="dt",
|
||||
matching_algorithm=DocumentType.MATCH_AUTO,
|
||||
)
|
||||
self.dt2 = DocumentType.objects.create(
|
||||
name="dt2", matching_algorithm=DocumentType.MATCH_AUTO
|
||||
name="dt2",
|
||||
matching_algorithm=DocumentType.MATCH_AUTO,
|
||||
)
|
||||
|
||||
self.doc1 = Document.objects.create(
|
||||
@ -59,7 +71,9 @@ class TestClassifier(DirectoriesMixin, TestCase):
|
||||
checksum="B",
|
||||
)
|
||||
self.doc_inbox = Document.objects.create(
|
||||
title="doc235", content="aa", checksum="C"
|
||||
title="doc235",
|
||||
content="aa",
|
||||
checksum="C",
|
||||
)
|
||||
|
||||
self.doc1.tags.add(self.t1)
|
||||
@ -90,27 +104,33 @@ class TestClassifier(DirectoriesMixin, TestCase):
|
||||
self.generate_test_data()
|
||||
self.classifier.train()
|
||||
self.assertListEqual(
|
||||
list(self.classifier.correspondent_classifier.classes_), [-1, self.c1.pk]
|
||||
list(self.classifier.correspondent_classifier.classes_),
|
||||
[-1, self.c1.pk],
|
||||
)
|
||||
self.assertListEqual(
|
||||
list(self.classifier.tags_binarizer.classes_), [self.t1.pk, self.t3.pk]
|
||||
list(self.classifier.tags_binarizer.classes_),
|
||||
[self.t1.pk, self.t3.pk],
|
||||
)
|
||||
|
||||
def testPredict(self):
|
||||
self.generate_test_data()
|
||||
self.classifier.train()
|
||||
self.assertEqual(
|
||||
self.classifier.predict_correspondent(self.doc1.content), self.c1.pk
|
||||
self.classifier.predict_correspondent(self.doc1.content),
|
||||
self.c1.pk,
|
||||
)
|
||||
self.assertEqual(self.classifier.predict_correspondent(self.doc2.content), None)
|
||||
self.assertListEqual(
|
||||
self.classifier.predict_tags(self.doc1.content), [self.t1.pk]
|
||||
self.classifier.predict_tags(self.doc1.content),
|
||||
[self.t1.pk],
|
||||
)
|
||||
self.assertListEqual(
|
||||
self.classifier.predict_tags(self.doc2.content), [self.t1.pk, self.t3.pk]
|
||||
self.classifier.predict_tags(self.doc2.content),
|
||||
[self.t1.pk, self.t3.pk],
|
||||
)
|
||||
self.assertEqual(
|
||||
self.classifier.predict_document_type(self.doc1.content), self.dt.pk
|
||||
self.classifier.predict_document_type(self.doc1.content),
|
||||
self.dt.pk,
|
||||
)
|
||||
self.assertEqual(self.classifier.predict_document_type(self.doc2.content), None)
|
||||
|
||||
@ -133,7 +153,8 @@ class TestClassifier(DirectoriesMixin, TestCase):
|
||||
|
||||
current_ver = DocumentClassifier.FORMAT_VERSION
|
||||
with mock.patch(
|
||||
"documents.classifier.DocumentClassifier.FORMAT_VERSION", current_ver + 1
|
||||
"documents.classifier.DocumentClassifier.FORMAT_VERSION",
|
||||
current_ver + 1,
|
||||
):
|
||||
# assure that we won't load old classifiers.
|
||||
self.assertRaises(IncompatibleClassifierVersionError, classifier2.load)
|
||||
@ -157,7 +178,7 @@ class TestClassifier(DirectoriesMixin, TestCase):
|
||||
self.assertFalse(new_classifier.train())
|
||||
|
||||
@override_settings(
|
||||
MODEL_FILE=os.path.join(os.path.dirname(__file__), "data", "model.pickle")
|
||||
MODEL_FILE=os.path.join(os.path.dirname(__file__), "data", "model.pickle"),
|
||||
)
|
||||
def test_load_and_classify(self):
|
||||
self.generate_test_data()
|
||||
@ -169,7 +190,8 @@ class TestClassifier(DirectoriesMixin, TestCase):
|
||||
|
||||
def test_one_correspondent_predict(self):
|
||||
c1 = Correspondent.objects.create(
|
||||
name="c1", matching_algorithm=Correspondent.MATCH_AUTO
|
||||
name="c1",
|
||||
matching_algorithm=Correspondent.MATCH_AUTO,
|
||||
)
|
||||
doc1 = Document.objects.create(
|
||||
title="doc1",
|
||||
@ -183,7 +205,8 @@ class TestClassifier(DirectoriesMixin, TestCase):
|
||||
|
||||
def test_one_correspondent_predict_manydocs(self):
|
||||
c1 = Correspondent.objects.create(
|
||||
name="c1", matching_algorithm=Correspondent.MATCH_AUTO
|
||||
name="c1",
|
||||
matching_algorithm=Correspondent.MATCH_AUTO,
|
||||
)
|
||||
doc1 = Document.objects.create(
|
||||
title="doc1",
|
||||
@ -192,7 +215,9 @@ class TestClassifier(DirectoriesMixin, TestCase):
|
||||
checksum="A",
|
||||
)
|
||||
doc2 = Document.objects.create(
|
||||
title="doc2", content="this is a document from noone", checksum="B"
|
||||
title="doc2",
|
||||
content="this is a document from noone",
|
||||
checksum="B",
|
||||
)
|
||||
|
||||
self.classifier.train()
|
||||
@ -201,7 +226,8 @@ class TestClassifier(DirectoriesMixin, TestCase):
|
||||
|
||||
def test_one_type_predict(self):
|
||||
dt = DocumentType.objects.create(
|
||||
name="dt", matching_algorithm=DocumentType.MATCH_AUTO
|
||||
name="dt",
|
||||
matching_algorithm=DocumentType.MATCH_AUTO,
|
||||
)
|
||||
|
||||
doc1 = Document.objects.create(
|
||||
@ -216,7 +242,8 @@ class TestClassifier(DirectoriesMixin, TestCase):
|
||||
|
||||
def test_one_type_predict_manydocs(self):
|
||||
dt = DocumentType.objects.create(
|
||||
name="dt", matching_algorithm=DocumentType.MATCH_AUTO
|
||||
name="dt",
|
||||
matching_algorithm=DocumentType.MATCH_AUTO,
|
||||
)
|
||||
|
||||
doc1 = Document.objects.create(
|
||||
@ -227,7 +254,9 @@ class TestClassifier(DirectoriesMixin, TestCase):
|
||||
)
|
||||
|
||||
doc2 = Document.objects.create(
|
||||
title="doc1", content="this is a document from c2", checksum="B"
|
||||
title="doc1",
|
||||
content="this is a document from c2",
|
||||
checksum="B",
|
||||
)
|
||||
|
||||
self.classifier.train()
|
||||
@ -238,7 +267,9 @@ class TestClassifier(DirectoriesMixin, TestCase):
|
||||
t1 = Tag.objects.create(name="t1", matching_algorithm=Tag.MATCH_AUTO, pk=12)
|
||||
|
||||
doc1 = Document.objects.create(
|
||||
title="doc1", content="this is a document from c1", checksum="A"
|
||||
title="doc1",
|
||||
content="this is a document from c1",
|
||||
checksum="A",
|
||||
)
|
||||
|
||||
doc1.tags.add(t1)
|
||||
@ -249,7 +280,9 @@ class TestClassifier(DirectoriesMixin, TestCase):
|
||||
t1 = Tag.objects.create(name="t1", matching_algorithm=Tag.MATCH_AUTO, pk=12)
|
||||
|
||||
doc1 = Document.objects.create(
|
||||
title="doc1", content="this is a document from c1", checksum="A"
|
||||
title="doc1",
|
||||
content="this is a document from c1",
|
||||
checksum="A",
|
||||
)
|
||||
|
||||
self.classifier.train()
|
||||
@ -260,7 +293,9 @@ class TestClassifier(DirectoriesMixin, TestCase):
|
||||
t2 = Tag.objects.create(name="t2", matching_algorithm=Tag.MATCH_AUTO, pk=121)
|
||||
|
||||
doc4 = Document.objects.create(
|
||||
title="doc1", content="this is a document from c4", checksum="D"
|
||||
title="doc1",
|
||||
content="this is a document from c4",
|
||||
checksum="D",
|
||||
)
|
||||
|
||||
doc4.tags.add(t1)
|
||||
@ -273,16 +308,24 @@ class TestClassifier(DirectoriesMixin, TestCase):
|
||||
t2 = Tag.objects.create(name="t2", matching_algorithm=Tag.MATCH_AUTO, pk=121)
|
||||
|
||||
doc1 = Document.objects.create(
|
||||
title="doc1", content="this is a document from c1", checksum="A"
|
||||
title="doc1",
|
||||
content="this is a document from c1",
|
||||
checksum="A",
|
||||
)
|
||||
doc2 = Document.objects.create(
|
||||
title="doc1", content="this is a document from c2", checksum="B"
|
||||
title="doc1",
|
||||
content="this is a document from c2",
|
||||
checksum="B",
|
||||
)
|
||||
doc3 = Document.objects.create(
|
||||
title="doc1", content="this is a document from c3", checksum="C"
|
||||
title="doc1",
|
||||
content="this is a document from c3",
|
||||
checksum="C",
|
||||
)
|
||||
doc4 = Document.objects.create(
|
||||
title="doc1", content="this is a document from c4", checksum="D"
|
||||
title="doc1",
|
||||
content="this is a document from c4",
|
||||
checksum="D",
|
||||
)
|
||||
|
||||
doc1.tags.add(t1)
|
||||
@ -300,10 +343,14 @@ class TestClassifier(DirectoriesMixin, TestCase):
|
||||
t1 = Tag.objects.create(name="t1", matching_algorithm=Tag.MATCH_AUTO, pk=12)
|
||||
|
||||
doc1 = Document.objects.create(
|
||||
title="doc1", content="this is a document from c1", checksum="A"
|
||||
title="doc1",
|
||||
content="this is a document from c1",
|
||||
checksum="A",
|
||||
)
|
||||
doc2 = Document.objects.create(
|
||||
title="doc2", content="this is a document from c2", checksum="B"
|
||||
title="doc2",
|
||||
content="this is a document from c2",
|
||||
checksum="B",
|
||||
)
|
||||
|
||||
doc1.tags.add(t1)
|
||||
@ -316,10 +363,14 @@ class TestClassifier(DirectoriesMixin, TestCase):
|
||||
t1 = Tag.objects.create(name="t1", matching_algorithm=Tag.MATCH_AUTO, pk=12)
|
||||
|
||||
doc1 = Document.objects.create(
|
||||
title="doc1", content="this is a document from c1", checksum="A"
|
||||
title="doc1",
|
||||
content="this is a document from c1",
|
||||
checksum="A",
|
||||
)
|
||||
doc2 = Document.objects.create(
|
||||
title="doc2", content="this is a document from c2", checksum="B"
|
||||
title="doc2",
|
||||
content="this is a document from c2",
|
||||
checksum="B",
|
||||
)
|
||||
|
||||
doc1.tags.add(t1)
|
||||
@ -338,13 +389,15 @@ class TestClassifier(DirectoriesMixin, TestCase):
|
||||
load.assert_called_once()
|
||||
|
||||
@override_settings(
|
||||
CACHES={"default": {"BACKEND": "django.core.cache.backends.locmem.LocMemCache"}}
|
||||
CACHES={
|
||||
"default": {"BACKEND": "django.core.cache.backends.locmem.LocMemCache"},
|
||||
},
|
||||
)
|
||||
@override_settings(
|
||||
MODEL_FILE=os.path.join(os.path.dirname(__file__), "data", "model.pickle")
|
||||
MODEL_FILE=os.path.join(os.path.dirname(__file__), "data", "model.pickle"),
|
||||
)
|
||||
@pytest.mark.skip(
|
||||
reason="Disabled caching due to high memory usage - need to investigate."
|
||||
reason="Disabled caching due to high memory usage - need to investigate.",
|
||||
)
|
||||
def test_load_classifier_cached(self):
|
||||
classifier = load_classifier()
|
||||
|
@ -6,13 +6,20 @@ from unittest import mock
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
from django.conf import settings
|
||||
from django.test import TestCase, override_settings
|
||||
from django.test import override_settings
|
||||
from django.test import TestCase
|
||||
|
||||
from .utils import DirectoriesMixin
|
||||
from ..consumer import Consumer, ConsumerError
|
||||
from ..models import FileInfo, Tag, Correspondent, DocumentType, Document
|
||||
from ..parsers import DocumentParser, ParseError
|
||||
from ..consumer import Consumer
|
||||
from ..consumer import ConsumerError
|
||||
from ..models import Correspondent
|
||||
from ..models import Document
|
||||
from ..models import DocumentType
|
||||
from ..models import FileInfo
|
||||
from ..models import Tag
|
||||
from ..parsers import DocumentParser
|
||||
from ..parsers import ParseError
|
||||
from ..tasks import sanity_check
|
||||
from .utils import DirectoriesMixin
|
||||
|
||||
|
||||
class TestAttributes(TestCase):
|
||||
@ -33,12 +40,18 @@ class TestAttributes(TestCase):
|
||||
|
||||
def test_guess_attributes_from_name_when_title_starts_with_dash(self):
|
||||
self._test_guess_attributes_from_name(
|
||||
"- weird but should not break.pdf", None, "- weird but should not break", ()
|
||||
"- weird but should not break.pdf",
|
||||
None,
|
||||
"- weird but should not break",
|
||||
(),
|
||||
)
|
||||
|
||||
def test_guess_attributes_from_name_when_title_ends_with_dash(self):
|
||||
self._test_guess_attributes_from_name(
|
||||
"weird but should not break -.pdf", None, "weird but should not break -", ()
|
||||
"weird but should not break -.pdf",
|
||||
None,
|
||||
"weird but should not break -",
|
||||
(),
|
||||
)
|
||||
|
||||
|
||||
@ -53,7 +66,12 @@ class TestFieldPermutations(TestCase):
|
||||
valid_tags = ["tag", "tig,tag", "tag1,tag2,tag-3"]
|
||||
|
||||
def _test_guessed_attributes(
|
||||
self, filename, created=None, correspondent=None, title=None, tags=None
|
||||
self,
|
||||
filename,
|
||||
created=None,
|
||||
correspondent=None,
|
||||
title=None,
|
||||
tags=None,
|
||||
):
|
||||
|
||||
info = FileInfo.from_filename(filename)
|
||||
@ -131,7 +149,7 @@ class TestFieldPermutations(TestCase):
|
||||
FILENAME_PARSE_TRANSFORMS=[
|
||||
(all_patt, "all.gif"),
|
||||
(all_patt, "anotherall.gif"),
|
||||
]
|
||||
],
|
||||
):
|
||||
info = FileInfo.from_filename(filename)
|
||||
self.assertEqual(info.title, "all")
|
||||
@ -141,7 +159,7 @@ class TestFieldPermutations(TestCase):
|
||||
FILENAME_PARSE_TRANSFORMS=[
|
||||
(none_patt, "none.gif"),
|
||||
(all_patt, "anotherall.gif"),
|
||||
]
|
||||
],
|
||||
):
|
||||
info = FileInfo.from_filename(filename)
|
||||
self.assertEqual(info.title, "anotherall")
|
||||
@ -238,7 +256,9 @@ class TestConsumer(DirectoriesMixin, TestCase):
|
||||
|
||||
def make_dummy_parser(self, logging_group, progress_callback=None):
|
||||
return DummyParser(
|
||||
logging_group, self.dirs.scratch_dir, self.get_test_archive_file()
|
||||
logging_group,
|
||||
self.dirs.scratch_dir,
|
||||
self.get_test_archive_file(),
|
||||
)
|
||||
|
||||
def make_faulty_parser(self, logging_group, progress_callback=None):
|
||||
@ -257,7 +277,7 @@ class TestConsumer(DirectoriesMixin, TestCase):
|
||||
"mime_types": {"application/pdf": ".pdf"},
|
||||
"weight": 0,
|
||||
},
|
||||
)
|
||||
),
|
||||
]
|
||||
self.addCleanup(patcher.stop)
|
||||
|
||||
@ -282,7 +302,11 @@ class TestConsumer(DirectoriesMixin, TestCase):
|
||||
|
||||
def get_test_archive_file(self):
|
||||
src = os.path.join(
|
||||
os.path.dirname(__file__), "samples", "documents", "archive", "0000001.pdf"
|
||||
os.path.dirname(__file__),
|
||||
"samples",
|
||||
"documents",
|
||||
"archive",
|
||||
"0000001.pdf",
|
||||
)
|
||||
dst = os.path.join(self.dirs.scratch_dir, "sample_archive.pdf")
|
||||
shutil.copy(src, dst)
|
||||
@ -296,7 +320,8 @@ class TestConsumer(DirectoriesMixin, TestCase):
|
||||
|
||||
self.assertEqual(document.content, "The Text")
|
||||
self.assertEqual(
|
||||
document.title, os.path.splitext(os.path.basename(filename))[0]
|
||||
document.title,
|
||||
os.path.splitext(os.path.basename(filename))[0],
|
||||
)
|
||||
self.assertIsNone(document.correspondent)
|
||||
self.assertIsNone(document.document_type)
|
||||
@ -339,7 +364,8 @@ class TestConsumer(DirectoriesMixin, TestCase):
|
||||
override_filename = "Statement for November.pdf"
|
||||
|
||||
document = self.consumer.try_consume_file(
|
||||
filename, override_filename=override_filename
|
||||
filename,
|
||||
override_filename=override_filename,
|
||||
)
|
||||
|
||||
self.assertEqual(document.title, "Statement for November")
|
||||
@ -348,7 +374,8 @@ class TestConsumer(DirectoriesMixin, TestCase):
|
||||
|
||||
def testOverrideTitle(self):
|
||||
document = self.consumer.try_consume_file(
|
||||
self.get_test_file(), override_title="Override Title"
|
||||
self.get_test_file(),
|
||||
override_title="Override Title",
|
||||
)
|
||||
self.assertEqual(document.title, "Override Title")
|
||||
self._assert_first_last_send_progress()
|
||||
@ -357,7 +384,8 @@ class TestConsumer(DirectoriesMixin, TestCase):
|
||||
c = Correspondent.objects.create(name="test")
|
||||
|
||||
document = self.consumer.try_consume_file(
|
||||
self.get_test_file(), override_correspondent_id=c.pk
|
||||
self.get_test_file(),
|
||||
override_correspondent_id=c.pk,
|
||||
)
|
||||
self.assertEqual(document.correspondent.id, c.id)
|
||||
self._assert_first_last_send_progress()
|
||||
@ -366,7 +394,8 @@ class TestConsumer(DirectoriesMixin, TestCase):
|
||||
dt = DocumentType.objects.create(name="test")
|
||||
|
||||
document = self.consumer.try_consume_file(
|
||||
self.get_test_file(), override_document_type_id=dt.pk
|
||||
self.get_test_file(),
|
||||
override_document_type_id=dt.pk,
|
||||
)
|
||||
self.assertEqual(document.document_type.id, dt.id)
|
||||
self._assert_first_last_send_progress()
|
||||
@ -376,7 +405,8 @@ class TestConsumer(DirectoriesMixin, TestCase):
|
||||
t2 = Tag.objects.create(name="t2")
|
||||
t3 = Tag.objects.create(name="t3")
|
||||
document = self.consumer.try_consume_file(
|
||||
self.get_test_file(), override_tag_ids=[t1.id, t3.id]
|
||||
self.get_test_file(),
|
||||
override_tag_ids=[t1.id, t3.id],
|
||||
)
|
||||
|
||||
self.assertIn(t1, document.tags.all())
|
||||
@ -446,7 +476,7 @@ class TestConsumer(DirectoriesMixin, TestCase):
|
||||
"mime_types": {"application/pdf": ".pdf"},
|
||||
"weight": 0,
|
||||
},
|
||||
)
|
||||
),
|
||||
]
|
||||
|
||||
self.assertRaisesMessage(
|
||||
@ -595,16 +625,16 @@ class TestConsumer(DirectoriesMixin, TestCase):
|
||||
"mime_types": {"application/pdf": ".pdf", "image/png": ".png"},
|
||||
"weight": 0,
|
||||
},
|
||||
)
|
||||
),
|
||||
]
|
||||
doc1 = self.consumer.try_consume_file(
|
||||
os.path.join(settings.CONSUMPTION_DIR, "simple.png")
|
||||
os.path.join(settings.CONSUMPTION_DIR, "simple.png"),
|
||||
)
|
||||
doc2 = self.consumer.try_consume_file(
|
||||
os.path.join(settings.CONSUMPTION_DIR, "simple.pdf")
|
||||
os.path.join(settings.CONSUMPTION_DIR, "simple.pdf"),
|
||||
)
|
||||
doc3 = self.consumer.try_consume_file(
|
||||
os.path.join(settings.CONSUMPTION_DIR, "simple.png.pdf")
|
||||
os.path.join(settings.CONSUMPTION_DIR, "simple.png.pdf"),
|
||||
)
|
||||
|
||||
self.assertEqual(doc1.filename, "simple.png")
|
||||
@ -691,7 +721,9 @@ class PostConsumeTestCase(TestCase):
|
||||
with override_settings(POST_CONSUME_SCRIPT=script.name):
|
||||
c = Correspondent.objects.create(name="my_bank")
|
||||
doc = Document.objects.create(
|
||||
title="Test", mime_type="application/pdf", correspondent=c
|
||||
title="Test",
|
||||
mime_type="application/pdf",
|
||||
correspondent=c,
|
||||
)
|
||||
tag1 = Tag.objects.create(name="a")
|
||||
tag2 = Tag.objects.create(name="b")
|
||||
|
@ -5,15 +5,16 @@ from uuid import uuid4
|
||||
|
||||
from dateutil import tz
|
||||
from django.conf import settings
|
||||
from django.test import TestCase, override_settings
|
||||
|
||||
from django.test import override_settings
|
||||
from django.test import TestCase
|
||||
from documents.parsers import parse_date
|
||||
|
||||
|
||||
class TestDate(TestCase):
|
||||
|
||||
SAMPLE_FILES = os.path.join(
|
||||
os.path.dirname(__file__), "../../paperless_tesseract/tests/samples"
|
||||
os.path.dirname(__file__),
|
||||
"../../paperless_tesseract/tests/samples",
|
||||
)
|
||||
SCRATCH = "/tmp/paperless-tests-{}".format(str(uuid4())[:8])
|
||||
|
||||
@ -111,11 +112,11 @@ class TestDate(TestCase):
|
||||
@override_settings(FILENAME_DATE_ORDER="YMD")
|
||||
def test_filename_date_parse_invalid(self, *args):
|
||||
self.assertIsNone(
|
||||
parse_date("/tmp/20 408000l 2475 - test.pdf", "No date in here")
|
||||
parse_date("/tmp/20 408000l 2475 - test.pdf", "No date in here"),
|
||||
)
|
||||
|
||||
@override_settings(
|
||||
IGNORE_DATES=(datetime.date(2019, 11, 3), datetime.date(2020, 1, 17))
|
||||
IGNORE_DATES=(datetime.date(2019, 11, 3), datetime.date(2020, 1, 17)),
|
||||
)
|
||||
def test_ignored_dates(self, *args):
|
||||
text = "lorem ipsum 110319, 20200117 and lorem 13.02.2018 lorem " "ipsum"
|
||||
|
@ -3,10 +3,12 @@ import tempfile
|
||||
from pathlib import Path
|
||||
from unittest import mock
|
||||
|
||||
from django.test import TestCase, override_settings
|
||||
from django.test import override_settings
|
||||
from django.test import TestCase
|
||||
from django.utils import timezone
|
||||
|
||||
from ..models import Document, Correspondent
|
||||
from ..models import Correspondent
|
||||
from ..models import Document
|
||||
|
||||
|
||||
class TestDocument(TestCase):
|
||||
|
@ -9,17 +9,19 @@ from unittest import mock
|
||||
|
||||
from django.conf import settings
|
||||
from django.db import DatabaseError
|
||||
from django.test import TestCase, override_settings
|
||||
from django.test import override_settings
|
||||
from django.test import TestCase
|
||||
from django.utils import timezone
|
||||
|
||||
from ..file_handling import create_source_path_directory
|
||||
from ..file_handling import delete_empty_directories
|
||||
from ..file_handling import generate_filename
|
||||
from ..file_handling import generate_unique_filename
|
||||
from ..models import Correspondent
|
||||
from ..models import Document
|
||||
from ..models import DocumentType
|
||||
from ..models import Tag
|
||||
from .utils import DirectoriesMixin
|
||||
from ..file_handling import (
|
||||
generate_filename,
|
||||
create_source_path_directory,
|
||||
delete_empty_directories,
|
||||
generate_unique_filename,
|
||||
)
|
||||
from ..models import Document, Correspondent, Tag, DocumentType
|
||||
|
||||
|
||||
class TestFileHandling(DirectoriesMixin, TestCase):
|
||||
@ -34,7 +36,8 @@ class TestFileHandling(DirectoriesMixin, TestCase):
|
||||
|
||||
document.storage_type = Document.STORAGE_TYPE_GPG
|
||||
self.assertEqual(
|
||||
generate_filename(document), "{:07d}.pdf.gpg".format(document.pk)
|
||||
generate_filename(document),
|
||||
"{:07d}.pdf.gpg".format(document.pk),
|
||||
)
|
||||
|
||||
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}")
|
||||
@ -75,7 +78,8 @@ class TestFileHandling(DirectoriesMixin, TestCase):
|
||||
self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/test"), True)
|
||||
self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none"), False)
|
||||
self.assertEqual(
|
||||
os.path.isfile(settings.ORIGINALS_DIR + "/test/test.pdf.gpg"), True
|
||||
os.path.isfile(settings.ORIGINALS_DIR + "/test/test.pdf.gpg"),
|
||||
True,
|
||||
)
|
||||
|
||||
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}")
|
||||
@ -93,7 +97,8 @@ class TestFileHandling(DirectoriesMixin, TestCase):
|
||||
|
||||
# Test source_path
|
||||
self.assertEqual(
|
||||
document.source_path, settings.ORIGINALS_DIR + "/none/none.pdf"
|
||||
document.source_path,
|
||||
settings.ORIGINALS_DIR + "/none/none.pdf",
|
||||
)
|
||||
|
||||
# Make the folder read- and execute-only (no writing and no renaming)
|
||||
@ -105,7 +110,8 @@ class TestFileHandling(DirectoriesMixin, TestCase):
|
||||
|
||||
# Check proper handling of files
|
||||
self.assertEqual(
|
||||
os.path.isfile(settings.ORIGINALS_DIR + "/none/none.pdf"), True
|
||||
os.path.isfile(settings.ORIGINALS_DIR + "/none/none.pdf"),
|
||||
True,
|
||||
)
|
||||
self.assertEqual(document.filename, "none/none.pdf")
|
||||
|
||||
@ -145,7 +151,8 @@ class TestFileHandling(DirectoriesMixin, TestCase):
|
||||
# Check proper handling of files
|
||||
self.assertTrue(os.path.isfile(document.source_path))
|
||||
self.assertEqual(
|
||||
os.path.isfile(settings.ORIGINALS_DIR + "/none/none.pdf"), True
|
||||
os.path.isfile(settings.ORIGINALS_DIR + "/none/none.pdf"),
|
||||
True,
|
||||
)
|
||||
self.assertEqual(document.filename, "none/none.pdf")
|
||||
|
||||
@ -167,7 +174,8 @@ class TestFileHandling(DirectoriesMixin, TestCase):
|
||||
pk = document.pk
|
||||
document.delete()
|
||||
self.assertEqual(
|
||||
os.path.isfile(settings.ORIGINALS_DIR + "/none/none.pdf"), False
|
||||
os.path.isfile(settings.ORIGINALS_DIR + "/none/none.pdf"),
|
||||
False,
|
||||
)
|
||||
self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none"), False)
|
||||
|
||||
@ -192,7 +200,8 @@ class TestFileHandling(DirectoriesMixin, TestCase):
|
||||
self.assertEqual(os.path.isfile(settings.TRASH_DIR + "/none/none.pdf"), False)
|
||||
document.delete()
|
||||
self.assertEqual(
|
||||
os.path.isfile(settings.ORIGINALS_DIR + "/none/none.pdf"), False
|
||||
os.path.isfile(settings.ORIGINALS_DIR + "/none/none.pdf"),
|
||||
False,
|
||||
)
|
||||
self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none"), False)
|
||||
self.assertEqual(os.path.isfile(settings.TRASH_DIR + "/none.pdf"), True)
|
||||
@ -363,7 +372,9 @@ class TestFileHandling(DirectoriesMixin, TestCase):
|
||||
self.assertEqual(generate_filename(doc), "doc1 tag1,tag2.pdf")
|
||||
|
||||
doc = Document.objects.create(
|
||||
title="doc2", checksum="B", mime_type="application/pdf"
|
||||
title="doc2",
|
||||
checksum="B",
|
||||
mime_type="application/pdf",
|
||||
)
|
||||
|
||||
self.assertEqual(generate_filename(doc), "doc2.pdf")
|
||||
@ -380,12 +391,14 @@ class TestFileHandling(DirectoriesMixin, TestCase):
|
||||
)
|
||||
|
||||
@override_settings(
|
||||
PAPERLESS_FILENAME_FORMAT="{created_year}-{created_month}-{created_day}"
|
||||
PAPERLESS_FILENAME_FORMAT="{created_year}-{created_month}-{created_day}",
|
||||
)
|
||||
def test_created_year_month_day(self):
|
||||
d1 = timezone.make_aware(datetime.datetime(2020, 3, 6, 1, 1, 1))
|
||||
doc1 = Document.objects.create(
|
||||
title="doc1", mime_type="application/pdf", created=d1
|
||||
title="doc1",
|
||||
mime_type="application/pdf",
|
||||
created=d1,
|
||||
)
|
||||
|
||||
self.assertEqual(generate_filename(doc1), "2020-03-06.pdf")
|
||||
@ -395,12 +408,14 @@ class TestFileHandling(DirectoriesMixin, TestCase):
|
||||
self.assertEqual(generate_filename(doc1), "2020-11-16.pdf")
|
||||
|
||||
@override_settings(
|
||||
PAPERLESS_FILENAME_FORMAT="{added_year}-{added_month}-{added_day}"
|
||||
PAPERLESS_FILENAME_FORMAT="{added_year}-{added_month}-{added_day}",
|
||||
)
|
||||
def test_added_year_month_day(self):
|
||||
d1 = timezone.make_aware(datetime.datetime(232, 1, 9, 1, 1, 1))
|
||||
doc1 = Document.objects.create(
|
||||
title="doc1", mime_type="application/pdf", added=d1
|
||||
title="doc1",
|
||||
mime_type="application/pdf",
|
||||
added=d1,
|
||||
)
|
||||
|
||||
self.assertEqual(generate_filename(doc1), "232-01-09.pdf")
|
||||
@ -410,7 +425,7 @@ class TestFileHandling(DirectoriesMixin, TestCase):
|
||||
self.assertEqual(generate_filename(doc1), "2020-11-16.pdf")
|
||||
|
||||
@override_settings(
|
||||
PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}/{correspondent}"
|
||||
PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}/{correspondent}",
|
||||
)
|
||||
def test_nested_directory_cleanup(self):
|
||||
document = Document()
|
||||
@ -431,7 +446,8 @@ class TestFileHandling(DirectoriesMixin, TestCase):
|
||||
document.delete()
|
||||
|
||||
self.assertEqual(
|
||||
os.path.isfile(settings.ORIGINALS_DIR + "/none/none/none.pdf"), False
|
||||
os.path.isfile(settings.ORIGINALS_DIR + "/none/none/none.pdf"),
|
||||
False,
|
||||
)
|
||||
self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none/none"), False)
|
||||
self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none"), False)
|
||||
@ -456,7 +472,8 @@ class TestFileHandling(DirectoriesMixin, TestCase):
|
||||
os.makedirs(os.path.join(tmp, "notempty", "empty"))
|
||||
|
||||
delete_empty_directories(
|
||||
os.path.join(tmp, "notempty", "empty"), root=settings.ORIGINALS_DIR
|
||||
os.path.join(tmp, "notempty", "empty"),
|
||||
root=settings.ORIGINALS_DIR,
|
||||
)
|
||||
self.assertEqual(os.path.isdir(os.path.join(tmp, "notempty")), True)
|
||||
self.assertEqual(os.path.isfile(os.path.join(tmp, "notempty", "file")), True)
|
||||
@ -483,10 +500,16 @@ class TestFileHandling(DirectoriesMixin, TestCase):
|
||||
@override_settings(PAPERLESS_FILENAME_FORMAT="{title}")
|
||||
def test_duplicates(self):
|
||||
document = Document.objects.create(
|
||||
mime_type="application/pdf", title="qwe", checksum="A", pk=1
|
||||
mime_type="application/pdf",
|
||||
title="qwe",
|
||||
checksum="A",
|
||||
pk=1,
|
||||
)
|
||||
document2 = Document.objects.create(
|
||||
mime_type="application/pdf", title="qwe", checksum="B", pk=2
|
||||
mime_type="application/pdf",
|
||||
title="qwe",
|
||||
checksum="B",
|
||||
pk=2,
|
||||
)
|
||||
Path(document.source_path).touch()
|
||||
Path(document2.source_path).touch()
|
||||
@ -584,10 +607,12 @@ class TestFileHandlingWithArchive(DirectoriesMixin, TestCase):
|
||||
self.assertTrue(os.path.isfile(doc.source_path))
|
||||
self.assertTrue(os.path.isfile(doc.archive_path))
|
||||
self.assertEqual(
|
||||
doc.source_path, os.path.join(settings.ORIGINALS_DIR, "none", "my_doc.pdf")
|
||||
doc.source_path,
|
||||
os.path.join(settings.ORIGINALS_DIR, "none", "my_doc.pdf"),
|
||||
)
|
||||
self.assertEqual(
|
||||
doc.archive_path, os.path.join(settings.ARCHIVE_DIR, "none", "my_doc.pdf")
|
||||
doc.archive_path,
|
||||
os.path.join(settings.ARCHIVE_DIR, "none", "my_doc.pdf"),
|
||||
)
|
||||
|
||||
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{title}")
|
||||
@ -851,7 +876,10 @@ class TestFilenameGeneration(TestCase):
|
||||
def test_invalid_characters(self):
|
||||
|
||||
doc = Document.objects.create(
|
||||
title="This. is the title.", mime_type="application/pdf", pk=1, checksum="1"
|
||||
title="This. is the title.",
|
||||
mime_type="application/pdf",
|
||||
pk=1,
|
||||
checksum="1",
|
||||
)
|
||||
self.assertEqual(generate_filename(doc), "This. is the title.pdf")
|
||||
|
||||
@ -877,7 +905,9 @@ class TestFilenameGeneration(TestCase):
|
||||
|
||||
def run():
|
||||
doc = Document.objects.create(
|
||||
checksum=str(uuid.uuid4()), title=str(uuid.uuid4()), content="wow"
|
||||
checksum=str(uuid.uuid4()),
|
||||
title=str(uuid.uuid4()),
|
||||
content="wow",
|
||||
)
|
||||
doc.filename = generate_unique_filename(doc)
|
||||
Path(doc.thumbnail_path).touch()
|
||||
|
@ -1,7 +1,7 @@
|
||||
from django.core.management.base import CommandError
|
||||
from django.test import TestCase
|
||||
|
||||
from documents.settings import EXPORTER_FILE_NAME
|
||||
|
||||
from ..management.commands.document_importer import Command
|
||||
|
||||
|
||||
@ -12,7 +12,9 @@ class TestImporter(TestCase):
|
||||
def test_check_manifest_exists(self):
|
||||
cmd = Command()
|
||||
self.assertRaises(
|
||||
CommandError, cmd._check_manifest_exists, "/tmp/manifest.json"
|
||||
CommandError,
|
||||
cmd._check_manifest_exists,
|
||||
"/tmp/manifest.json",
|
||||
)
|
||||
|
||||
def test_check_manifest(self):
|
||||
@ -26,11 +28,11 @@ class TestImporter(TestCase):
|
||||
self.assertTrue("The manifest file contains a record" in str(cm.exception))
|
||||
|
||||
cmd.manifest = [
|
||||
{"model": "documents.document", EXPORTER_FILE_NAME: "noexist.pdf"}
|
||||
{"model": "documents.document", EXPORTER_FILE_NAME: "noexist.pdf"},
|
||||
]
|
||||
# self.assertRaises(CommandError, cmd._check_manifest)
|
||||
with self.assertRaises(CommandError) as cm:
|
||||
cmd._check_manifest()
|
||||
self.assertTrue(
|
||||
'The manifest file refers to "noexist.pdf"' in str(cm.exception)
|
||||
'The manifest file refers to "noexist.pdf"' in str(cm.exception),
|
||||
)
|
||||
|
@ -1,5 +1,4 @@
|
||||
from django.test import TestCase
|
||||
|
||||
from documents import index
|
||||
from documents.models import Document
|
||||
from documents.tests.utils import DirectoriesMixin
|
||||
@ -9,7 +8,9 @@ class TestAutoComplete(DirectoriesMixin, TestCase):
|
||||
def test_auto_complete(self):
|
||||
|
||||
doc1 = Document.objects.create(
|
||||
title="doc1", checksum="A", content="test test2 test3"
|
||||
title="doc1",
|
||||
checksum="A",
|
||||
content="test test2 test3",
|
||||
)
|
||||
doc2 = Document.objects.create(title="doc2", checksum="B", content="test test2")
|
||||
doc3 = Document.objects.create(title="doc3", checksum="C", content="test2")
|
||||
@ -21,10 +22,12 @@ class TestAutoComplete(DirectoriesMixin, TestCase):
|
||||
ix = index.open_index()
|
||||
|
||||
self.assertListEqual(
|
||||
index.autocomplete(ix, "tes"), [b"test3", b"test", b"test2"]
|
||||
index.autocomplete(ix, "tes"),
|
||||
[b"test3", b"test", b"test2"],
|
||||
)
|
||||
self.assertListEqual(
|
||||
index.autocomplete(ix, "tes", limit=3), [b"test3", b"test", b"test2"]
|
||||
index.autocomplete(ix, "tes", limit=3),
|
||||
[b"test3", b"test", b"test2"],
|
||||
)
|
||||
self.assertListEqual(index.autocomplete(ix, "tes", limit=1), [b"test3"])
|
||||
self.assertListEqual(index.autocomplete(ix, "tes", limit=0), [])
|
||||
|
@ -1,16 +1,14 @@
|
||||
import hashlib
|
||||
import tempfile
|
||||
import filecmp
|
||||
import hashlib
|
||||
import os
|
||||
import shutil
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
from unittest import mock
|
||||
|
||||
from django.test import TestCase, override_settings
|
||||
|
||||
|
||||
from django.core.management import call_command
|
||||
|
||||
from django.test import override_settings
|
||||
from django.test import TestCase
|
||||
from documents.file_handling import generate_filename
|
||||
from documents.management.commands.document_archiver import handle_document
|
||||
from documents.models import Document
|
||||
@ -34,7 +32,8 @@ class TestArchiver(DirectoriesMixin, TestCase):
|
||||
|
||||
doc = self.make_models()
|
||||
shutil.copy(
|
||||
sample_file, os.path.join(self.dirs.originals_dir, f"{doc.id:07}.pdf")
|
||||
sample_file,
|
||||
os.path.join(self.dirs.originals_dir, f"{doc.id:07}.pdf"),
|
||||
)
|
||||
|
||||
call_command("document_archiver")
|
||||
@ -43,7 +42,8 @@ class TestArchiver(DirectoriesMixin, TestCase):
|
||||
|
||||
doc = self.make_models()
|
||||
shutil.copy(
|
||||
sample_file, os.path.join(self.dirs.originals_dir, f"{doc.id:07}.pdf")
|
||||
sample_file,
|
||||
os.path.join(self.dirs.originals_dir, f"{doc.id:07}.pdf"),
|
||||
)
|
||||
|
||||
handle_document(doc.pk)
|
||||
@ -90,7 +90,8 @@ class TestArchiver(DirectoriesMixin, TestCase):
|
||||
)
|
||||
shutil.copy(sample_file, os.path.join(self.dirs.originals_dir, f"document.pdf"))
|
||||
shutil.copy(
|
||||
sample_file, os.path.join(self.dirs.originals_dir, f"document_01.pdf")
|
||||
sample_file,
|
||||
os.path.join(self.dirs.originals_dir, f"document_01.pdf"),
|
||||
)
|
||||
|
||||
handle_document(doc2.pk)
|
||||
@ -120,7 +121,9 @@ class TestDecryptDocuments(TestCase):
|
||||
os.makedirs(thumb_dir, exist_ok=True)
|
||||
|
||||
override_settings(
|
||||
ORIGINALS_DIR=originals_dir, THUMBNAIL_DIR=thumb_dir, PASSPHRASE="test"
|
||||
ORIGINALS_DIR=originals_dir,
|
||||
THUMBNAIL_DIR=thumb_dir,
|
||||
PASSPHRASE="test",
|
||||
).enable()
|
||||
|
||||
doc = Document.objects.create(
|
||||
@ -206,7 +209,7 @@ class TestRenamer(DirectoriesMixin, TestCase):
|
||||
|
||||
class TestCreateClassifier(TestCase):
|
||||
@mock.patch(
|
||||
"documents.management.commands.document_create_classifier.train_classifier"
|
||||
"documents.management.commands.document_create_classifier.train_classifier",
|
||||
)
|
||||
def test_create_classifier(self, m):
|
||||
call_command("document_create_classifier")
|
||||
@ -224,7 +227,10 @@ class TestSanityChecker(DirectoriesMixin, TestCase):
|
||||
|
||||
def test_errors(self):
|
||||
doc = Document.objects.create(
|
||||
title="test", content="test", filename="test.pdf", checksum="abc"
|
||||
title="test",
|
||||
content="test",
|
||||
filename="test.pdf",
|
||||
checksum="abc",
|
||||
)
|
||||
Path(doc.source_path).touch()
|
||||
Path(doc.thumbnail_path).touch()
|
||||
|
@ -6,12 +6,13 @@ from time import sleep
|
||||
from unittest import mock
|
||||
|
||||
from django.conf import settings
|
||||
from django.core.management import call_command, CommandError
|
||||
from django.test import override_settings, TransactionTestCase
|
||||
|
||||
from documents.models import Tag
|
||||
from django.core.management import call_command
|
||||
from django.core.management import CommandError
|
||||
from django.test import override_settings
|
||||
from django.test import TransactionTestCase
|
||||
from documents.consumer import ConsumerError
|
||||
from documents.management.commands import document_consumer
|
||||
from documents.models import Tag
|
||||
from documents.tests.utils import DirectoriesMixin
|
||||
|
||||
|
||||
@ -41,7 +42,7 @@ class ConsumerMixin:
|
||||
super(ConsumerMixin, self).setUp()
|
||||
self.t = None
|
||||
patcher = mock.patch(
|
||||
"documents.management.commands.document_consumer.async_task"
|
||||
"documents.management.commands.document_consumer.async_task",
|
||||
)
|
||||
self.task_mock = patcher.start()
|
||||
self.addCleanup(patcher.stop)
|
||||
@ -208,13 +209,16 @@ class TestConsumer(DirectoriesMixin, ConsumerMixin, TransactionTestCase):
|
||||
self.t_start()
|
||||
|
||||
shutil.copy(
|
||||
self.sample_file, os.path.join(self.dirs.consumption_dir, ".DS_STORE")
|
||||
self.sample_file,
|
||||
os.path.join(self.dirs.consumption_dir, ".DS_STORE"),
|
||||
)
|
||||
shutil.copy(
|
||||
self.sample_file, os.path.join(self.dirs.consumption_dir, "my_file.pdf")
|
||||
self.sample_file,
|
||||
os.path.join(self.dirs.consumption_dir, "my_file.pdf"),
|
||||
)
|
||||
shutil.copy(
|
||||
self.sample_file, os.path.join(self.dirs.consumption_dir, "._my_file.pdf")
|
||||
self.sample_file,
|
||||
os.path.join(self.dirs.consumption_dir, "._my_file.pdf"),
|
||||
)
|
||||
shutil.copy(
|
||||
self.sample_file,
|
||||
@ -258,7 +262,9 @@ class TestConsumer(DirectoriesMixin, ConsumerMixin, TransactionTestCase):
|
||||
|
||||
|
||||
@override_settings(
|
||||
CONSUMER_POLLING=1, CONSUMER_POLLING_DELAY=3, CONSUMER_POLLING_RETRY_COUNT=20
|
||||
CONSUMER_POLLING=1,
|
||||
CONSUMER_POLLING_DELAY=3,
|
||||
CONSUMER_POLLING_RETRY_COUNT=20,
|
||||
)
|
||||
class TestConsumerPolling(TestConsumer):
|
||||
# just do all the tests with polling
|
||||
@ -319,7 +325,9 @@ class TestConsumerTags(DirectoriesMixin, ConsumerMixin, TransactionTestCase):
|
||||
self.assertCountEqual(kwargs["override_tag_ids"], tag_ids)
|
||||
|
||||
@override_settings(
|
||||
CONSUMER_POLLING=1, CONSUMER_POLLING_DELAY=1, CONSUMER_POLLING_RETRY_COUNT=20
|
||||
CONSUMER_POLLING=1,
|
||||
CONSUMER_POLLING_DELAY=1,
|
||||
CONSUMER_POLLING_RETRY_COUNT=20,
|
||||
)
|
||||
def test_consume_file_with_path_tags_polling(self):
|
||||
self.test_consume_file_with_path_tags()
|
||||
|
@ -7,13 +7,17 @@ from pathlib import Path
|
||||
from unittest import mock
|
||||
|
||||
from django.core.management import call_command
|
||||
from django.test import TestCase, override_settings
|
||||
|
||||
from django.test import override_settings
|
||||
from django.test import TestCase
|
||||
from documents.management.commands import document_exporter
|
||||
from documents.models import Document, Tag, DocumentType, Correspondent
|
||||
from documents.models import Correspondent
|
||||
from documents.models import Document
|
||||
from documents.models import DocumentType
|
||||
from documents.models import Tag
|
||||
from documents.sanity_checker import check_sanity
|
||||
from documents.settings import EXPORTER_FILE_NAME
|
||||
from documents.tests.utils import DirectoriesMixin, paperless_environment
|
||||
from documents.tests.utils import DirectoriesMixin
|
||||
from documents.tests.utils import paperless_environment
|
||||
|
||||
|
||||
class TestExportImport(DirectoriesMixin, TestCase):
|
||||
@ -66,8 +70,9 @@ class TestExportImport(DirectoriesMixin, TestCase):
|
||||
def _get_document_from_manifest(self, manifest, id):
|
||||
f = list(
|
||||
filter(
|
||||
lambda d: d["model"] == "documents.document" and d["pk"] == id, manifest
|
||||
)
|
||||
lambda d: d["model"] == "documents.document" and d["pk"] == id,
|
||||
manifest,
|
||||
),
|
||||
)
|
||||
if len(f) == 1:
|
||||
return f[0]
|
||||
@ -76,7 +81,10 @@ class TestExportImport(DirectoriesMixin, TestCase):
|
||||
|
||||
@override_settings(PASSPHRASE="test")
|
||||
def _do_export(
|
||||
self, use_filename_format=False, compare_checksums=False, delete=False
|
||||
self,
|
||||
use_filename_format=False,
|
||||
compare_checksums=False,
|
||||
delete=False,
|
||||
):
|
||||
args = ["document_exporter", self.target]
|
||||
if use_filename_format:
|
||||
@ -104,7 +112,8 @@ class TestExportImport(DirectoriesMixin, TestCase):
|
||||
|
||||
self.assertEqual(len(manifest), 8)
|
||||
self.assertEqual(
|
||||
len(list(filter(lambda e: e["model"] == "documents.document", manifest))), 4
|
||||
len(list(filter(lambda e: e["model"] == "documents.document", manifest))),
|
||||
4,
|
||||
)
|
||||
|
||||
self.assertTrue(os.path.exists(os.path.join(self.target, "manifest.json")))
|
||||
@ -129,7 +138,8 @@ class TestExportImport(DirectoriesMixin, TestCase):
|
||||
for element in manifest:
|
||||
if element["model"] == "documents.document":
|
||||
fname = os.path.join(
|
||||
self.target, element[document_exporter.EXPORTER_FILE_NAME]
|
||||
self.target,
|
||||
element[document_exporter.EXPORTER_FILE_NAME],
|
||||
)
|
||||
self.assertTrue(os.path.exists(fname))
|
||||
self.assertTrue(
|
||||
@ -137,8 +147,8 @@ class TestExportImport(DirectoriesMixin, TestCase):
|
||||
os.path.join(
|
||||
self.target,
|
||||
element[document_exporter.EXPORTER_THUMBNAIL_NAME],
|
||||
)
|
||||
)
|
||||
),
|
||||
),
|
||||
)
|
||||
|
||||
with open(fname, "rb") as f:
|
||||
@ -146,12 +156,14 @@ class TestExportImport(DirectoriesMixin, TestCase):
|
||||
self.assertEqual(checksum, element["fields"]["checksum"])
|
||||
|
||||
self.assertEqual(
|
||||
element["fields"]["storage_type"], Document.STORAGE_TYPE_UNENCRYPTED
|
||||
element["fields"]["storage_type"],
|
||||
Document.STORAGE_TYPE_UNENCRYPTED,
|
||||
)
|
||||
|
||||
if document_exporter.EXPORTER_ARCHIVE_NAME in element:
|
||||
fname = os.path.join(
|
||||
self.target, element[document_exporter.EXPORTER_ARCHIVE_NAME]
|
||||
self.target,
|
||||
element[document_exporter.EXPORTER_ARCHIVE_NAME],
|
||||
)
|
||||
self.assertTrue(os.path.exists(fname))
|
||||
|
||||
@ -188,7 +200,7 @@ class TestExportImport(DirectoriesMixin, TestCase):
|
||||
)
|
||||
|
||||
with override_settings(
|
||||
PAPERLESS_FILENAME_FORMAT="{created_year}/{correspondent}/{title}"
|
||||
PAPERLESS_FILENAME_FORMAT="{created_year}/{correspondent}/{title}",
|
||||
):
|
||||
self.test_exporter(use_filename_format=True)
|
||||
|
||||
@ -205,7 +217,7 @@ class TestExportImport(DirectoriesMixin, TestCase):
|
||||
st_mtime_1 = os.stat(os.path.join(self.target, "manifest.json")).st_mtime
|
||||
|
||||
with mock.patch(
|
||||
"documents.management.commands.document_exporter.shutil.copy2"
|
||||
"documents.management.commands.document_exporter.shutil.copy2",
|
||||
) as m:
|
||||
self._do_export()
|
||||
m.assert_not_called()
|
||||
@ -216,7 +228,7 @@ class TestExportImport(DirectoriesMixin, TestCase):
|
||||
Path(self.d1.source_path).touch()
|
||||
|
||||
with mock.patch(
|
||||
"documents.management.commands.document_exporter.shutil.copy2"
|
||||
"documents.management.commands.document_exporter.shutil.copy2",
|
||||
) as m:
|
||||
self._do_export()
|
||||
self.assertEqual(m.call_count, 1)
|
||||
@ -239,7 +251,7 @@ class TestExportImport(DirectoriesMixin, TestCase):
|
||||
self.assertTrue(os.path.exists(os.path.join(self.target, "manifest.json")))
|
||||
|
||||
with mock.patch(
|
||||
"documents.management.commands.document_exporter.shutil.copy2"
|
||||
"documents.management.commands.document_exporter.shutil.copy2",
|
||||
) as m:
|
||||
self._do_export()
|
||||
m.assert_not_called()
|
||||
@ -250,7 +262,7 @@ class TestExportImport(DirectoriesMixin, TestCase):
|
||||
self.d2.save()
|
||||
|
||||
with mock.patch(
|
||||
"documents.management.commands.document_exporter.shutil.copy2"
|
||||
"documents.management.commands.document_exporter.shutil.copy2",
|
||||
) as m:
|
||||
self._do_export(compare_checksums=True)
|
||||
self.assertEqual(m.call_count, 1)
|
||||
@ -270,26 +282,29 @@ class TestExportImport(DirectoriesMixin, TestCase):
|
||||
doc_from_manifest = self._get_document_from_manifest(manifest, self.d3.id)
|
||||
self.assertTrue(
|
||||
os.path.isfile(
|
||||
os.path.join(self.target, doc_from_manifest[EXPORTER_FILE_NAME])
|
||||
)
|
||||
os.path.join(self.target, doc_from_manifest[EXPORTER_FILE_NAME]),
|
||||
),
|
||||
)
|
||||
self.d3.delete()
|
||||
|
||||
manifest = self._do_export()
|
||||
self.assertRaises(
|
||||
ValueError, self._get_document_from_manifest, manifest, self.d3.id
|
||||
ValueError,
|
||||
self._get_document_from_manifest,
|
||||
manifest,
|
||||
self.d3.id,
|
||||
)
|
||||
self.assertTrue(
|
||||
os.path.isfile(
|
||||
os.path.join(self.target, doc_from_manifest[EXPORTER_FILE_NAME])
|
||||
)
|
||||
os.path.join(self.target, doc_from_manifest[EXPORTER_FILE_NAME]),
|
||||
),
|
||||
)
|
||||
|
||||
manifest = self._do_export(delete=True)
|
||||
self.assertFalse(
|
||||
os.path.isfile(
|
||||
os.path.join(self.target, doc_from_manifest[EXPORTER_FILE_NAME])
|
||||
)
|
||||
os.path.join(self.target, doc_from_manifest[EXPORTER_FILE_NAME]),
|
||||
),
|
||||
)
|
||||
|
||||
self.assertTrue(len(manifest), 6)
|
||||
@ -316,7 +331,7 @@ class TestExportImport(DirectoriesMixin, TestCase):
|
||||
self.assertTrue(os.path.exists(os.path.join(self.target, "manifest.json")))
|
||||
self.assertTrue(os.path.isfile(os.path.join(self.target, "wow2", "none.pdf")))
|
||||
self.assertTrue(
|
||||
os.path.isfile(os.path.join(self.target, "wow2", "none_01.pdf"))
|
||||
os.path.isfile(os.path.join(self.target, "wow2", "none_01.pdf")),
|
||||
)
|
||||
|
||||
def test_export_missing_files(self):
|
||||
|
@ -1,35 +1,50 @@
|
||||
from django.core.management import call_command
|
||||
from django.test import TestCase
|
||||
|
||||
from documents.models import Document, Tag, Correspondent, DocumentType
|
||||
from documents.models import Correspondent
|
||||
from documents.models import Document
|
||||
from documents.models import DocumentType
|
||||
from documents.models import Tag
|
||||
from documents.tests.utils import DirectoriesMixin
|
||||
|
||||
|
||||
class TestRetagger(DirectoriesMixin, TestCase):
|
||||
def make_models(self):
|
||||
self.d1 = Document.objects.create(
|
||||
checksum="A", title="A", content="first document"
|
||||
checksum="A",
|
||||
title="A",
|
||||
content="first document",
|
||||
)
|
||||
self.d2 = Document.objects.create(
|
||||
checksum="B", title="B", content="second document"
|
||||
checksum="B",
|
||||
title="B",
|
||||
content="second document",
|
||||
)
|
||||
self.d3 = Document.objects.create(
|
||||
checksum="C", title="C", content="unrelated document"
|
||||
checksum="C",
|
||||
title="C",
|
||||
content="unrelated document",
|
||||
)
|
||||
self.d4 = Document.objects.create(
|
||||
checksum="D", title="D", content="auto document"
|
||||
checksum="D",
|
||||
title="D",
|
||||
content="auto document",
|
||||
)
|
||||
|
||||
self.tag_first = Tag.objects.create(
|
||||
name="tag1", match="first", matching_algorithm=Tag.MATCH_ANY
|
||||
name="tag1",
|
||||
match="first",
|
||||
matching_algorithm=Tag.MATCH_ANY,
|
||||
)
|
||||
self.tag_second = Tag.objects.create(
|
||||
name="tag2", match="second", matching_algorithm=Tag.MATCH_ANY
|
||||
name="tag2",
|
||||
match="second",
|
||||
matching_algorithm=Tag.MATCH_ANY,
|
||||
)
|
||||
self.tag_inbox = Tag.objects.create(name="test", is_inbox_tag=True)
|
||||
self.tag_no_match = Tag.objects.create(name="test2")
|
||||
self.tag_auto = Tag.objects.create(
|
||||
name="tagauto", matching_algorithm=Tag.MATCH_AUTO
|
||||
name="tagauto",
|
||||
matching_algorithm=Tag.MATCH_AUTO,
|
||||
)
|
||||
|
||||
self.d3.tags.add(self.tag_inbox)
|
||||
@ -37,17 +52,25 @@ class TestRetagger(DirectoriesMixin, TestCase):
|
||||
self.d4.tags.add(self.tag_auto)
|
||||
|
||||
self.correspondent_first = Correspondent.objects.create(
|
||||
name="c1", match="first", matching_algorithm=Correspondent.MATCH_ANY
|
||||
name="c1",
|
||||
match="first",
|
||||
matching_algorithm=Correspondent.MATCH_ANY,
|
||||
)
|
||||
self.correspondent_second = Correspondent.objects.create(
|
||||
name="c2", match="second", matching_algorithm=Correspondent.MATCH_ANY
|
||||
name="c2",
|
||||
match="second",
|
||||
matching_algorithm=Correspondent.MATCH_ANY,
|
||||
)
|
||||
|
||||
self.doctype_first = DocumentType.objects.create(
|
||||
name="dt1", match="first", matching_algorithm=DocumentType.MATCH_ANY
|
||||
name="dt1",
|
||||
match="first",
|
||||
matching_algorithm=DocumentType.MATCH_ANY,
|
||||
)
|
||||
self.doctype_second = DocumentType.objects.create(
|
||||
name="dt2", match="second", matching_algorithm=DocumentType.MATCH_ANY
|
||||
name="dt2",
|
||||
match="second",
|
||||
matching_algorithm=DocumentType.MATCH_ANY,
|
||||
)
|
||||
|
||||
def get_updated_docs(self):
|
||||
@ -98,10 +121,12 @@ class TestRetagger(DirectoriesMixin, TestCase):
|
||||
self.assertIsNotNone(Tag.objects.get(id=self.tag_second.id))
|
||||
|
||||
self.assertCountEqual(
|
||||
[tag.id for tag in d_first.tags.all()], [self.tag_first.id]
|
||||
[tag.id for tag in d_first.tags.all()],
|
||||
[self.tag_first.id],
|
||||
)
|
||||
self.assertCountEqual(
|
||||
[tag.id for tag in d_second.tags.all()], [self.tag_second.id]
|
||||
[tag.id for tag in d_second.tags.all()],
|
||||
[self.tag_second.id],
|
||||
)
|
||||
self.assertCountEqual(
|
||||
[tag.id for tag in d_unrelated.tags.all()],
|
||||
@ -133,7 +158,10 @@ class TestRetagger(DirectoriesMixin, TestCase):
|
||||
|
||||
def test_add_tags_suggest_url(self):
|
||||
call_command(
|
||||
"document_retagger", "--tags", "--suggest", "--base-url=http://localhost"
|
||||
"document_retagger",
|
||||
"--tags",
|
||||
"--suggest",
|
||||
"--base-url=http://localhost",
|
||||
)
|
||||
d_first, d_second, d_unrelated, d_auto = self.get_updated_docs()
|
||||
|
||||
|
@ -5,9 +5,11 @@ from unittest import mock
|
||||
from django.contrib.auth.models import User
|
||||
from django.core.management import call_command
|
||||
from django.test import TestCase
|
||||
|
||||
from documents.management.commands.document_thumbnails import _process_document
|
||||
from documents.models import Document, Tag, Correspondent, DocumentType
|
||||
from documents.models import Correspondent
|
||||
from documents.models import Document
|
||||
from documents.models import DocumentType
|
||||
from documents.models import Tag
|
||||
from documents.tests.utils import DirectoriesMixin
|
||||
|
||||
|
||||
|
@ -4,9 +4,11 @@ from unittest import mock
|
||||
|
||||
from django.core.management import call_command
|
||||
from django.test import TestCase
|
||||
|
||||
from documents.management.commands.document_thumbnails import _process_document
|
||||
from documents.models import Document, Tag, Correspondent, DocumentType
|
||||
from documents.models import Correspondent
|
||||
from documents.models import Document
|
||||
from documents.models import DocumentType
|
||||
from documents.models import Tag
|
||||
from documents.tests.utils import DirectoriesMixin
|
||||
|
||||
|
||||
|
@ -4,10 +4,14 @@ from random import randint
|
||||
|
||||
from django.contrib.admin.models import LogEntry
|
||||
from django.contrib.auth.models import User
|
||||
from django.test import TestCase, override_settings
|
||||
from django.test import override_settings
|
||||
from django.test import TestCase
|
||||
|
||||
from .. import matching
|
||||
from ..models import Correspondent, Document, Tag, DocumentType
|
||||
from ..models import Correspondent
|
||||
from ..models import Document
|
||||
from ..models import DocumentType
|
||||
from ..models import Tag
|
||||
from ..signals import document_consumption_finished
|
||||
|
||||
|
||||
@ -209,7 +213,8 @@ class TestDocumentConsumptionFinishedSignal(TestCase):
|
||||
TestCase.setUp(self)
|
||||
User.objects.create_user(username="test_consumer", password="12345")
|
||||
self.doc_contains = Document.objects.create(
|
||||
content="I contain the keyword.", mime_type="application/pdf"
|
||||
content="I contain the keyword.",
|
||||
mime_type="application/pdf",
|
||||
)
|
||||
|
||||
self.index_dir = tempfile.mkdtemp()
|
||||
@ -221,43 +226,56 @@ class TestDocumentConsumptionFinishedSignal(TestCase):
|
||||
|
||||
def test_tag_applied_any(self):
|
||||
t1 = Tag.objects.create(
|
||||
name="test", match="keyword", matching_algorithm=Tag.MATCH_ANY
|
||||
name="test",
|
||||
match="keyword",
|
||||
matching_algorithm=Tag.MATCH_ANY,
|
||||
)
|
||||
document_consumption_finished.send(
|
||||
sender=self.__class__, document=self.doc_contains
|
||||
sender=self.__class__,
|
||||
document=self.doc_contains,
|
||||
)
|
||||
self.assertTrue(list(self.doc_contains.tags.all()) == [t1])
|
||||
|
||||
def test_tag_not_applied(self):
|
||||
Tag.objects.create(
|
||||
name="test", match="no-match", matching_algorithm=Tag.MATCH_ANY
|
||||
name="test",
|
||||
match="no-match",
|
||||
matching_algorithm=Tag.MATCH_ANY,
|
||||
)
|
||||
document_consumption_finished.send(
|
||||
sender=self.__class__, document=self.doc_contains
|
||||
sender=self.__class__,
|
||||
document=self.doc_contains,
|
||||
)
|
||||
self.assertTrue(list(self.doc_contains.tags.all()) == [])
|
||||
|
||||
def test_correspondent_applied(self):
|
||||
correspondent = Correspondent.objects.create(
|
||||
name="test", match="keyword", matching_algorithm=Correspondent.MATCH_ANY
|
||||
name="test",
|
||||
match="keyword",
|
||||
matching_algorithm=Correspondent.MATCH_ANY,
|
||||
)
|
||||
document_consumption_finished.send(
|
||||
sender=self.__class__, document=self.doc_contains
|
||||
sender=self.__class__,
|
||||
document=self.doc_contains,
|
||||
)
|
||||
self.assertTrue(self.doc_contains.correspondent == correspondent)
|
||||
|
||||
def test_correspondent_not_applied(self):
|
||||
Tag.objects.create(
|
||||
name="test", match="no-match", matching_algorithm=Correspondent.MATCH_ANY
|
||||
name="test",
|
||||
match="no-match",
|
||||
matching_algorithm=Correspondent.MATCH_ANY,
|
||||
)
|
||||
document_consumption_finished.send(
|
||||
sender=self.__class__, document=self.doc_contains
|
||||
sender=self.__class__,
|
||||
document=self.doc_contains,
|
||||
)
|
||||
self.assertEqual(self.doc_contains.correspondent, None)
|
||||
|
||||
def test_logentry_created(self):
|
||||
document_consumption_finished.send(
|
||||
sender=self.__class__, document=self.doc_contains
|
||||
sender=self.__class__,
|
||||
document=self.doc_contains,
|
||||
)
|
||||
|
||||
self.assertEqual(LogEntry.objects.count(), 1)
|
||||
|
@ -6,9 +6,9 @@ from unittest import mock
|
||||
|
||||
from django.conf import settings
|
||||
from django.test import override_settings
|
||||
|
||||
from documents.parsers import ParseError
|
||||
from documents.tests.utils import DirectoriesMixin, TestMigrations
|
||||
from documents.tests.utils import DirectoriesMixin
|
||||
from documents.tests.utils import TestMigrations
|
||||
|
||||
|
||||
STORAGE_TYPE_GPG = "gpg"
|
||||
@ -93,10 +93,18 @@ def make_test_document(
|
||||
simple_jpg = os.path.join(os.path.dirname(__file__), "samples", "simple.jpg")
|
||||
simple_pdf = os.path.join(os.path.dirname(__file__), "samples", "simple.pdf")
|
||||
simple_pdf2 = os.path.join(
|
||||
os.path.dirname(__file__), "samples", "documents", "originals", "0000002.pdf"
|
||||
os.path.dirname(__file__),
|
||||
"samples",
|
||||
"documents",
|
||||
"originals",
|
||||
"0000002.pdf",
|
||||
)
|
||||
simple_pdf3 = os.path.join(
|
||||
os.path.dirname(__file__), "samples", "documents", "originals", "0000003.pdf"
|
||||
os.path.dirname(__file__),
|
||||
"samples",
|
||||
"documents",
|
||||
"originals",
|
||||
"0000003.pdf",
|
||||
)
|
||||
simple_txt = os.path.join(os.path.dirname(__file__), "samples", "simple.txt")
|
||||
simple_png = os.path.join(os.path.dirname(__file__), "samples", "simple-noalpha.png")
|
||||
@ -121,19 +129,43 @@ class TestMigrateArchiveFiles(DirectoriesMixin, TestMigrations):
|
||||
simple_pdf,
|
||||
)
|
||||
self.no_text = make_test_document(
|
||||
Document, "no-text", "image/png", simple_png2, "no-text.png", simple_pdf
|
||||
Document,
|
||||
"no-text",
|
||||
"image/png",
|
||||
simple_png2,
|
||||
"no-text.png",
|
||||
simple_pdf,
|
||||
)
|
||||
self.doc_no_archive = make_test_document(
|
||||
Document, "no_archive", "text/plain", simple_txt, "no_archive.txt"
|
||||
Document,
|
||||
"no_archive",
|
||||
"text/plain",
|
||||
simple_txt,
|
||||
"no_archive.txt",
|
||||
)
|
||||
self.clash1 = make_test_document(
|
||||
Document, "clash", "application/pdf", simple_pdf, "clash.pdf", simple_pdf
|
||||
Document,
|
||||
"clash",
|
||||
"application/pdf",
|
||||
simple_pdf,
|
||||
"clash.pdf",
|
||||
simple_pdf,
|
||||
)
|
||||
self.clash2 = make_test_document(
|
||||
Document, "clash", "image/jpeg", simple_jpg, "clash.jpg", simple_pdf
|
||||
Document,
|
||||
"clash",
|
||||
"image/jpeg",
|
||||
simple_jpg,
|
||||
"clash.jpg",
|
||||
simple_pdf,
|
||||
)
|
||||
self.clash3 = make_test_document(
|
||||
Document, "clash", "image/png", simple_png, "clash.png", simple_pdf
|
||||
Document,
|
||||
"clash",
|
||||
"image/png",
|
||||
simple_png,
|
||||
"clash.png",
|
||||
simple_pdf,
|
||||
)
|
||||
self.clash4 = make_test_document(
|
||||
Document,
|
||||
@ -147,7 +179,8 @@ class TestMigrateArchiveFiles(DirectoriesMixin, TestMigrations):
|
||||
self.assertEqual(archive_path_old(self.clash1), archive_path_old(self.clash2))
|
||||
self.assertEqual(archive_path_old(self.clash1), archive_path_old(self.clash3))
|
||||
self.assertNotEqual(
|
||||
archive_path_old(self.clash1), archive_path_old(self.clash4)
|
||||
archive_path_old(self.clash1),
|
||||
archive_path_old(self.clash4),
|
||||
)
|
||||
|
||||
def testArchiveFilesMigrated(self):
|
||||
@ -171,19 +204,23 @@ class TestMigrateArchiveFiles(DirectoriesMixin, TestMigrations):
|
||||
self.assertEqual(archive_checksum, doc.archive_checksum)
|
||||
|
||||
self.assertEqual(
|
||||
Document.objects.filter(archive_checksum__isnull=False).count(), 6
|
||||
Document.objects.filter(archive_checksum__isnull=False).count(),
|
||||
6,
|
||||
)
|
||||
|
||||
def test_filenames(self):
|
||||
Document = self.apps.get_model("documents", "Document")
|
||||
self.assertEqual(
|
||||
Document.objects.get(id=self.unrelated.id).archive_filename, "unrelated.pdf"
|
||||
Document.objects.get(id=self.unrelated.id).archive_filename,
|
||||
"unrelated.pdf",
|
||||
)
|
||||
self.assertEqual(
|
||||
Document.objects.get(id=self.no_text.id).archive_filename, "no-text.pdf"
|
||||
Document.objects.get(id=self.no_text.id).archive_filename,
|
||||
"no-text.pdf",
|
||||
)
|
||||
self.assertEqual(
|
||||
Document.objects.get(id=self.doc_no_archive.id).archive_filename, None
|
||||
Document.objects.get(id=self.doc_no_archive.id).archive_filename,
|
||||
None,
|
||||
)
|
||||
self.assertEqual(
|
||||
Document.objects.get(id=self.clash1.id).archive_filename,
|
||||
@ -198,7 +235,8 @@ class TestMigrateArchiveFiles(DirectoriesMixin, TestMigrations):
|
||||
f"{self.clash3.id:07}.pdf",
|
||||
)
|
||||
self.assertEqual(
|
||||
Document.objects.get(id=self.clash4.id).archive_filename, "clash.png.pdf"
|
||||
Document.objects.get(id=self.clash4.id).archive_filename,
|
||||
"clash.png.pdf",
|
||||
)
|
||||
|
||||
|
||||
@ -207,16 +245,20 @@ class TestMigrateArchiveFilesWithFilenameFormat(TestMigrateArchiveFiles):
|
||||
def test_filenames(self):
|
||||
Document = self.apps.get_model("documents", "Document")
|
||||
self.assertEqual(
|
||||
Document.objects.get(id=self.unrelated.id).archive_filename, "unrelated.pdf"
|
||||
Document.objects.get(id=self.unrelated.id).archive_filename,
|
||||
"unrelated.pdf",
|
||||
)
|
||||
self.assertEqual(
|
||||
Document.objects.get(id=self.no_text.id).archive_filename, "no-text.pdf"
|
||||
Document.objects.get(id=self.no_text.id).archive_filename,
|
||||
"no-text.pdf",
|
||||
)
|
||||
self.assertEqual(
|
||||
Document.objects.get(id=self.doc_no_archive.id).archive_filename, None
|
||||
Document.objects.get(id=self.doc_no_archive.id).archive_filename,
|
||||
None,
|
||||
)
|
||||
self.assertEqual(
|
||||
Document.objects.get(id=self.clash1.id).archive_filename, "none/clash.pdf"
|
||||
Document.objects.get(id=self.clash1.id).archive_filename,
|
||||
"none/clash.pdf",
|
||||
)
|
||||
self.assertEqual(
|
||||
Document.objects.get(id=self.clash2.id).archive_filename,
|
||||
@ -227,7 +269,8 @@ class TestMigrateArchiveFilesWithFilenameFormat(TestMigrateArchiveFiles):
|
||||
"none/clash_02.pdf",
|
||||
)
|
||||
self.assertEqual(
|
||||
Document.objects.get(id=self.clash4.id).archive_filename, "clash.png.pdf"
|
||||
Document.objects.get(id=self.clash4.id).archive_filename,
|
||||
"clash.png.pdf",
|
||||
)
|
||||
|
||||
|
||||
@ -248,12 +291,19 @@ class TestMigrateArchiveFilesErrors(DirectoriesMixin, TestMigrations):
|
||||
Document = self.apps.get_model("documents", "Document")
|
||||
|
||||
doc = make_test_document(
|
||||
Document, "clash", "application/pdf", simple_pdf, "clash.pdf", simple_pdf
|
||||
Document,
|
||||
"clash",
|
||||
"application/pdf",
|
||||
simple_pdf,
|
||||
"clash.pdf",
|
||||
simple_pdf,
|
||||
)
|
||||
os.unlink(archive_path_old(doc))
|
||||
|
||||
self.assertRaisesMessage(
|
||||
ValueError, "does not exist at: ", self.performMigration
|
||||
ValueError,
|
||||
"does not exist at: ",
|
||||
self.performMigration,
|
||||
)
|
||||
|
||||
def test_parser_missing(self):
|
||||
@ -277,7 +327,9 @@ class TestMigrateArchiveFilesErrors(DirectoriesMixin, TestMigrations):
|
||||
)
|
||||
|
||||
self.assertRaisesMessage(
|
||||
ValueError, "no parsers are available", self.performMigration
|
||||
ValueError,
|
||||
"no parsers are available",
|
||||
self.performMigration,
|
||||
)
|
||||
|
||||
@mock.patch("documents.migrations.1012_fix_archive_files.parse_wrapper")
|
||||
@ -286,7 +338,12 @@ class TestMigrateArchiveFilesErrors(DirectoriesMixin, TestMigrations):
|
||||
Document = self.apps.get_model("documents", "Document")
|
||||
|
||||
doc1 = make_test_document(
|
||||
Document, "document", "image/png", simple_png, "document.png", simple_pdf
|
||||
Document,
|
||||
"document",
|
||||
"image/png",
|
||||
simple_png,
|
||||
"document.png",
|
||||
simple_pdf,
|
||||
)
|
||||
doc2 = make_test_document(
|
||||
Document,
|
||||
@ -311,8 +368,8 @@ class TestMigrateArchiveFilesErrors(DirectoriesMixin, TestMigrations):
|
||||
filter(
|
||||
lambda log: "Parse error, will try again in 5 seconds" in log,
|
||||
capture.output,
|
||||
)
|
||||
)
|
||||
),
|
||||
),
|
||||
),
|
||||
4,
|
||||
)
|
||||
@ -324,8 +381,8 @@ class TestMigrateArchiveFilesErrors(DirectoriesMixin, TestMigrations):
|
||||
lambda log: "Unable to regenerate archive document for ID:"
|
||||
in log,
|
||||
capture.output,
|
||||
)
|
||||
)
|
||||
),
|
||||
),
|
||||
),
|
||||
2,
|
||||
)
|
||||
@ -347,7 +404,12 @@ class TestMigrateArchiveFilesErrors(DirectoriesMixin, TestMigrations):
|
||||
Document = self.apps.get_model("documents", "Document")
|
||||
|
||||
doc1 = make_test_document(
|
||||
Document, "document", "image/png", simple_png, "document.png", simple_pdf
|
||||
Document,
|
||||
"document",
|
||||
"image/png",
|
||||
simple_png,
|
||||
"document.png",
|
||||
simple_pdf,
|
||||
)
|
||||
doc2 = make_test_document(
|
||||
Document,
|
||||
@ -368,8 +430,8 @@ class TestMigrateArchiveFilesErrors(DirectoriesMixin, TestMigrations):
|
||||
lambda log: "Parser did not return an archive document for document"
|
||||
in log,
|
||||
capture.output,
|
||||
)
|
||||
)
|
||||
),
|
||||
),
|
||||
),
|
||||
2,
|
||||
)
|
||||
@ -405,7 +467,11 @@ class TestMigrateArchiveFilesBackwards(DirectoriesMixin, TestMigrations):
|
||||
"unrelated.pdf",
|
||||
)
|
||||
doc_no_archive = make_test_document(
|
||||
Document, "no_archive", "text/plain", simple_txt, "no_archive.txt"
|
||||
Document,
|
||||
"no_archive",
|
||||
"text/plain",
|
||||
simple_txt,
|
||||
"no_archive.txt",
|
||||
)
|
||||
clashB = make_test_document(
|
||||
Document,
|
||||
@ -434,13 +500,14 @@ class TestMigrateArchiveFilesBackwards(DirectoriesMixin, TestMigrations):
|
||||
self.assertEqual(archive_checksum, doc.archive_checksum)
|
||||
|
||||
self.assertEqual(
|
||||
Document.objects.filter(archive_checksum__isnull=False).count(), 2
|
||||
Document.objects.filter(archive_checksum__isnull=False).count(),
|
||||
2,
|
||||
)
|
||||
|
||||
|
||||
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{title}")
|
||||
class TestMigrateArchiveFilesBackwardsWithFilenameFormat(
|
||||
TestMigrateArchiveFilesBackwards
|
||||
TestMigrateArchiveFilesBackwards,
|
||||
):
|
||||
pass
|
||||
|
||||
@ -505,5 +572,7 @@ class TestMigrateArchiveFilesBackwardsErrors(DirectoriesMixin, TestMigrations):
|
||||
)
|
||||
|
||||
self.assertRaisesMessage(
|
||||
ValueError, "file already exists.", self.performMigration
|
||||
ValueError,
|
||||
"file already exists.",
|
||||
self.performMigration,
|
||||
)
|
||||
|
@ -3,9 +3,9 @@ import shutil
|
||||
|
||||
from django.conf import settings
|
||||
from django.test import override_settings
|
||||
|
||||
from documents.parsers import get_default_file_extension
|
||||
from documents.tests.utils import DirectoriesMixin, TestMigrations
|
||||
from documents.tests.utils import DirectoriesMixin
|
||||
from documents.tests.utils import TestMigrations
|
||||
|
||||
STORAGE_TYPE_UNENCRYPTED = "unencrypted"
|
||||
STORAGE_TYPE_GPG = "gpg"
|
||||
@ -46,7 +46,9 @@ class TestMigrateMimeType(DirectoriesMixin, TestMigrations):
|
||||
def setUpBeforeMigration(self, apps):
|
||||
Document = apps.get_model("documents", "Document")
|
||||
doc = Document.objects.create(
|
||||
title="test", file_type="pdf", filename="file1.pdf"
|
||||
title="test",
|
||||
file_type="pdf",
|
||||
filename="file1.pdf",
|
||||
)
|
||||
self.doc_id = doc.id
|
||||
shutil.copy(
|
||||
@ -55,7 +57,9 @@ class TestMigrateMimeType(DirectoriesMixin, TestMigrations):
|
||||
)
|
||||
|
||||
doc2 = Document.objects.create(
|
||||
checksum="B", file_type="pdf", storage_type=STORAGE_TYPE_GPG
|
||||
checksum="B",
|
||||
file_type="pdf",
|
||||
storage_type=STORAGE_TYPE_GPG,
|
||||
)
|
||||
self.doc2_id = doc2.id
|
||||
shutil.copy(
|
||||
@ -88,7 +92,9 @@ class TestMigrateMimeTypeBackwards(DirectoriesMixin, TestMigrations):
|
||||
def setUpBeforeMigration(self, apps):
|
||||
Document = apps.get_model("documents", "Document")
|
||||
doc = Document.objects.create(
|
||||
title="test", mime_type="application/pdf", filename="file1.pdf"
|
||||
title="test",
|
||||
mime_type="application/pdf",
|
||||
filename="file1.pdf",
|
||||
)
|
||||
self.doc_id = doc.id
|
||||
shutil.copy(
|
||||
|
@ -1,4 +1,5 @@
|
||||
from documents.tests.utils import DirectoriesMixin, TestMigrations
|
||||
from documents.tests.utils import DirectoriesMixin
|
||||
from documents.tests.utils import TestMigrations
|
||||
|
||||
|
||||
class TestMigrateNullCharacters(DirectoriesMixin, TestMigrations):
|
||||
|
@ -1,4 +1,5 @@
|
||||
from documents.tests.utils import DirectoriesMixin, TestMigrations
|
||||
from documents.tests.utils import DirectoriesMixin
|
||||
from documents.tests.utils import TestMigrations
|
||||
|
||||
|
||||
class TestMigrateTagColor(DirectoriesMixin, TestMigrations):
|
||||
|
@ -1,7 +1,9 @@
|
||||
from django.test import TestCase
|
||||
|
||||
from .factories import DocumentFactory, CorrespondentFactory
|
||||
from ..models import Document, Correspondent
|
||||
from ..models import Correspondent
|
||||
from ..models import Document
|
||||
from .factories import CorrespondentFactory
|
||||
from .factories import DocumentFactory
|
||||
|
||||
|
||||
class CorrespondentTestCase(TestCase):
|
||||
|
@ -4,16 +4,14 @@ import tempfile
|
||||
from tempfile import TemporaryDirectory
|
||||
from unittest import mock
|
||||
|
||||
from django.test import TestCase, override_settings
|
||||
|
||||
from documents.parsers import (
|
||||
get_parser_class,
|
||||
get_supported_file_extensions,
|
||||
get_default_file_extension,
|
||||
get_parser_class_for_mime_type,
|
||||
DocumentParser,
|
||||
is_file_ext_supported,
|
||||
)
|
||||
from django.test import override_settings
|
||||
from django.test import TestCase
|
||||
from documents.parsers import DocumentParser
|
||||
from documents.parsers import get_default_file_extension
|
||||
from documents.parsers import get_parser_class
|
||||
from documents.parsers import get_parser_class_for_mime_type
|
||||
from documents.parsers import get_supported_file_extensions
|
||||
from documents.parsers import is_file_ext_supported
|
||||
from paperless_tesseract.parsers import RasterisedDocumentParser
|
||||
from paperless_text.parsers import TextDocumentParser
|
||||
|
||||
|
@ -6,9 +6,9 @@ from pathlib import Path
|
||||
import filelock
|
||||
from django.conf import settings
|
||||
from django.test import TestCase
|
||||
|
||||
from documents.models import Document
|
||||
from documents.sanity_checker import check_sanity, SanityCheckMessages
|
||||
from documents.sanity_checker import check_sanity
|
||||
from documents.sanity_checker import SanityCheckMessages
|
||||
from documents.tests.utils import DirectoriesMixin
|
||||
|
||||
|
||||
@ -23,7 +23,8 @@ class TestSanityCheckMessages(TestCase):
|
||||
self.assertEqual(len(capture.output), 1)
|
||||
self.assertEqual(capture.records[0].levelno, logging.INFO)
|
||||
self.assertEqual(
|
||||
capture.records[0].message, "Sanity checker detected no issues."
|
||||
capture.records[0].message,
|
||||
"Sanity checker detected no issues.",
|
||||
)
|
||||
|
||||
def test_info(self):
|
||||
|
@ -2,8 +2,8 @@ import logging
|
||||
from unittest import mock
|
||||
|
||||
from django.test import TestCase
|
||||
|
||||
from paperless.settings import default_task_workers, default_threads_per_worker
|
||||
from paperless.settings import default_task_workers
|
||||
from paperless.settings import default_threads_per_worker
|
||||
|
||||
|
||||
class TestSettings(TestCase):
|
||||
@ -21,7 +21,7 @@ class TestSettings(TestCase):
|
||||
def test_workers_threads(self):
|
||||
for i in range(1, 64):
|
||||
with mock.patch(
|
||||
"paperless.settings.multiprocessing.cpu_count"
|
||||
"paperless.settings.multiprocessing.cpu_count",
|
||||
) as cpu_count:
|
||||
cpu_count.return_value = i
|
||||
|
||||
|
@ -4,10 +4,13 @@ from unittest import mock
|
||||
from django.conf import settings
|
||||
from django.test import TestCase
|
||||
from django.utils import timezone
|
||||
|
||||
from documents import tasks
|
||||
from documents.models import Document, Tag, Correspondent, DocumentType
|
||||
from documents.sanity_checker import SanityCheckMessages, SanityCheckFailedException
|
||||
from documents.models import Correspondent
|
||||
from documents.models import Document
|
||||
from documents.models import DocumentType
|
||||
from documents.models import Tag
|
||||
from documents.sanity_checker import SanityCheckFailedException
|
||||
from documents.sanity_checker import SanityCheckMessages
|
||||
from documents.tests.utils import DirectoriesMixin
|
||||
|
||||
|
||||
@ -106,7 +109,8 @@ class TestTasks(DirectoriesMixin, TestCase):
|
||||
messages.warning("Some warning")
|
||||
m.return_value = messages
|
||||
self.assertEqual(
|
||||
tasks.sanity_check(), "Sanity check exited with warnings. See log."
|
||||
tasks.sanity_check(),
|
||||
"Sanity check exited with warnings. See log.",
|
||||
)
|
||||
m.assert_called_once()
|
||||
|
||||
@ -116,7 +120,8 @@ class TestTasks(DirectoriesMixin, TestCase):
|
||||
messages.info("Some info")
|
||||
m.return_value = messages
|
||||
self.assertEqual(
|
||||
tasks.sanity_check(), "Sanity check exited with infos. See log."
|
||||
tasks.sanity_check(),
|
||||
"Sanity check exited with infos. See log.",
|
||||
)
|
||||
m.assert_called_once()
|
||||
|
||||
|
@ -25,7 +25,7 @@ class TestViews(TestCase):
|
||||
]:
|
||||
if language_given:
|
||||
self.client.cookies.load(
|
||||
{settings.LANGUAGE_COOKIE_NAME: language_given}
|
||||
{settings.LANGUAGE_COOKIE_NAME: language_given},
|
||||
)
|
||||
elif settings.LANGUAGE_COOKIE_NAME in self.client.cookies.keys():
|
||||
self.client.cookies.pop(settings.LANGUAGE_COOKIE_NAME)
|
||||
@ -51,5 +51,6 @@ class TestViews(TestCase):
|
||||
f"frontend/{language_actual}/polyfills.js",
|
||||
)
|
||||
self.assertEqual(
|
||||
response.context_data["main_js"], f"frontend/{language_actual}/main.js"
|
||||
response.context_data["main_js"],
|
||||
f"frontend/{language_actual}/main.js",
|
||||
)
|
||||
|
@ -7,7 +7,8 @@ from contextlib import contextmanager
|
||||
from django.apps import apps
|
||||
from django.db import connection
|
||||
from django.db.migrations.executor import MigrationExecutor
|
||||
from django.test import override_settings, TransactionTestCase
|
||||
from django.test import override_settings
|
||||
from django.test import TransactionTestCase
|
||||
|
||||
|
||||
def setup_directories():
|
||||
@ -97,7 +98,7 @@ class TestMigrations(TransactionTestCase):
|
||||
assert (
|
||||
self.migrate_from and self.migrate_to
|
||||
), "TestCase '{}' must define migrate_from and migrate_to properties".format(
|
||||
type(self).__name__
|
||||
type(self).__name__,
|
||||
)
|
||||
self.migrate_from = [(self.app, self.migrate_from)]
|
||||
self.migrate_to = [(self.app, self.migrate_to)]
|
||||
|
@ -5,63 +5,70 @@ import uuid
|
||||
import zipfile
|
||||
from datetime import datetime
|
||||
from time import mktime
|
||||
from urllib.parse import quote_plus
|
||||
from unicodedata import normalize
|
||||
from urllib.parse import quote_plus
|
||||
|
||||
from django.conf import settings
|
||||
from django.db.models import Count, Max, Case, When, IntegerField
|
||||
from django.db.models import Case
|
||||
from django.db.models import Count
|
||||
from django.db.models import IntegerField
|
||||
from django.db.models import Max
|
||||
from django.db.models import When
|
||||
from django.db.models.functions import Lower
|
||||
from django.http import HttpResponse, HttpResponseBadRequest, Http404
|
||||
from django.http import Http404
|
||||
from django.http import HttpResponse
|
||||
from django.http import HttpResponseBadRequest
|
||||
from django.utils.translation import get_language
|
||||
from django.views.decorators.cache import cache_control
|
||||
from django.views.generic import TemplateView
|
||||
from django_filters.rest_framework import DjangoFilterBackend
|
||||
from django_q.tasks import async_task
|
||||
from paperless.db import GnuPG
|
||||
from paperless.views import StandardPagination
|
||||
from rest_framework import parsers
|
||||
from rest_framework.decorators import action
|
||||
from rest_framework.exceptions import NotFound
|
||||
from rest_framework.filters import OrderingFilter, SearchFilter
|
||||
from rest_framework.filters import OrderingFilter
|
||||
from rest_framework.filters import SearchFilter
|
||||
from rest_framework.generics import GenericAPIView
|
||||
from rest_framework.mixins import (
|
||||
DestroyModelMixin,
|
||||
ListModelMixin,
|
||||
RetrieveModelMixin,
|
||||
UpdateModelMixin,
|
||||
)
|
||||
from rest_framework.mixins import DestroyModelMixin
|
||||
from rest_framework.mixins import ListModelMixin
|
||||
from rest_framework.mixins import RetrieveModelMixin
|
||||
from rest_framework.mixins import UpdateModelMixin
|
||||
from rest_framework.permissions import IsAuthenticated
|
||||
from rest_framework.response import Response
|
||||
from rest_framework.views import APIView
|
||||
from rest_framework.viewsets import GenericViewSet, ModelViewSet, ViewSet
|
||||
from rest_framework.viewsets import GenericViewSet
|
||||
from rest_framework.viewsets import ModelViewSet
|
||||
from rest_framework.viewsets import ViewSet
|
||||
|
||||
from paperless.db import GnuPG
|
||||
from paperless.views import StandardPagination
|
||||
from .bulk_download import (
|
||||
OriginalAndArchiveStrategy,
|
||||
OriginalsOnlyStrategy,
|
||||
ArchiveOnlyStrategy,
|
||||
)
|
||||
from .bulk_download import ArchiveOnlyStrategy
|
||||
from .bulk_download import OriginalAndArchiveStrategy
|
||||
from .bulk_download import OriginalsOnlyStrategy
|
||||
from .classifier import load_classifier
|
||||
from .filters import (
|
||||
CorrespondentFilterSet,
|
||||
DocumentFilterSet,
|
||||
TagFilterSet,
|
||||
DocumentTypeFilterSet,
|
||||
)
|
||||
from .matching import match_correspondents, match_tags, match_document_types
|
||||
from .models import Correspondent, Document, Tag, DocumentType, SavedView
|
||||
from .filters import CorrespondentFilterSet
|
||||
from .filters import DocumentFilterSet
|
||||
from .filters import DocumentTypeFilterSet
|
||||
from .filters import TagFilterSet
|
||||
from .matching import match_correspondents
|
||||
from .matching import match_document_types
|
||||
from .matching import match_tags
|
||||
from .models import Correspondent
|
||||
from .models import Document
|
||||
from .models import DocumentType
|
||||
from .models import SavedView
|
||||
from .models import Tag
|
||||
from .parsers import get_parser_class_for_mime_type
|
||||
from .serialisers import (
|
||||
CorrespondentSerializer,
|
||||
DocumentSerializer,
|
||||
TagSerializerVersion1,
|
||||
TagSerializer,
|
||||
DocumentTypeSerializer,
|
||||
PostDocumentSerializer,
|
||||
SavedViewSerializer,
|
||||
BulkEditSerializer,
|
||||
DocumentListSerializer,
|
||||
BulkDownloadSerializer,
|
||||
)
|
||||
from .serialisers import BulkDownloadSerializer
|
||||
from .serialisers import BulkEditSerializer
|
||||
from .serialisers import CorrespondentSerializer
|
||||
from .serialisers import DocumentListSerializer
|
||||
from .serialisers import DocumentSerializer
|
||||
from .serialisers import DocumentTypeSerializer
|
||||
from .serialisers import PostDocumentSerializer
|
||||
from .serialisers import SavedViewSerializer
|
||||
from .serialisers import TagSerializer
|
||||
from .serialisers import TagSerializerVersion1
|
||||
|
||||
logger = logging.getLogger("paperless.api")
|
||||
|
||||
@ -89,16 +96,14 @@ class IndexView(TemplateView):
|
||||
context["full_name"] = self.request.user.get_full_name()
|
||||
context["styles_css"] = f"frontend/{self.get_language()}/styles.css"
|
||||
context["runtime_js"] = f"frontend/{self.get_language()}/runtime.js"
|
||||
context[
|
||||
"polyfills_js"
|
||||
] = f"frontend/{self.get_language()}/polyfills.js" # NOQA: E501
|
||||
context["polyfills_js"] = f"frontend/{self.get_language()}/polyfills.js"
|
||||
context["main_js"] = f"frontend/{self.get_language()}/main.js"
|
||||
context[
|
||||
"webmanifest"
|
||||
] = f"frontend/{self.get_language()}/manifest.webmanifest" # NOQA: E501
|
||||
] = f"frontend/{self.get_language()}/manifest.webmanifest" # noqa: E501
|
||||
context[
|
||||
"apple_touch_icon"
|
||||
] = f"frontend/{self.get_language()}/apple-touch-icon.png" # NOQA: E501
|
||||
] = f"frontend/{self.get_language()}/apple-touch-icon.png" # noqa: E501
|
||||
return context
|
||||
|
||||
|
||||
@ -106,7 +111,8 @@ class CorrespondentViewSet(ModelViewSet):
|
||||
model = Correspondent
|
||||
|
||||
queryset = Correspondent.objects.annotate(
|
||||
document_count=Count("documents"), last_correspondence=Max("documents__created")
|
||||
document_count=Count("documents"),
|
||||
last_correspondence=Max("documents__created"),
|
||||
).order_by(Lower("name"))
|
||||
|
||||
serializer_class = CorrespondentSerializer
|
||||
@ -127,7 +133,7 @@ class TagViewSet(ModelViewSet):
|
||||
model = Tag
|
||||
|
||||
queryset = Tag.objects.annotate(document_count=Count("documents")).order_by(
|
||||
Lower("name")
|
||||
Lower("name"),
|
||||
)
|
||||
|
||||
def get_serializer_class(self):
|
||||
@ -147,7 +153,7 @@ class DocumentTypeViewSet(ModelViewSet):
|
||||
model = DocumentType
|
||||
|
||||
queryset = DocumentType.objects.annotate(
|
||||
document_count=Count("documents")
|
||||
document_count=Count("documents"),
|
||||
).order_by(Lower("name"))
|
||||
|
||||
serializer_class = DocumentTypeSerializer
|
||||
@ -220,9 +226,7 @@ class DocumentViewSet(
|
||||
|
||||
def file_response(self, pk, request, disposition):
|
||||
doc = Document.objects.get(id=pk)
|
||||
if (
|
||||
not self.original_requested(request) and doc.has_archive_version
|
||||
): # NOQA: E501
|
||||
if not self.original_requested(request) and doc.has_archive_version:
|
||||
file_handle = doc.archive_file
|
||||
filename = doc.get_public_filename(archive=True)
|
||||
mime_type = "application/pdf"
|
||||
@ -258,7 +262,7 @@ class DocumentViewSet(
|
||||
|
||||
try:
|
||||
return parser.extract_metadata(file, mime_type)
|
||||
except Exception as e:
|
||||
except Exception:
|
||||
# TODO: cover GPG errors, remove later.
|
||||
return []
|
||||
else:
|
||||
@ -291,7 +295,8 @@ class DocumentViewSet(
|
||||
if doc.has_archive_version:
|
||||
meta["archive_size"] = self.get_filesize(doc.archive_path)
|
||||
meta["archive_metadata"] = self.get_metadata(
|
||||
doc.archive_path, "application/pdf"
|
||||
doc.archive_path,
|
||||
"application/pdf",
|
||||
)
|
||||
else:
|
||||
meta["archive_size"] = None
|
||||
@ -315,7 +320,7 @@ class DocumentViewSet(
|
||||
"document_types": [
|
||||
dt.id for dt in match_document_types(doc, classifier)
|
||||
],
|
||||
}
|
||||
},
|
||||
)
|
||||
|
||||
@action(methods=["get"], detail=True)
|
||||
@ -357,7 +362,7 @@ class SearchResultSerializer(DocumentSerializer):
|
||||
"score": instance.score,
|
||||
"highlights": instance.highlights("content", text=doc.content)
|
||||
if doc
|
||||
else None, # NOQA: E501
|
||||
else None,
|
||||
"rank": instance.rank,
|
||||
}
|
||||
|
||||
@ -500,7 +505,9 @@ class PostDocumentView(GenericAPIView):
|
||||
os.makedirs(settings.SCRATCH_DIR, exist_ok=True)
|
||||
|
||||
with tempfile.NamedTemporaryFile(
|
||||
prefix="paperless-upload-", dir=settings.SCRATCH_DIR, delete=False
|
||||
prefix="paperless-upload-",
|
||||
dir=settings.SCRATCH_DIR,
|
||||
delete=False,
|
||||
) as f:
|
||||
f.write(doc_data)
|
||||
os.utime(f.name, times=(t, t))
|
||||
@ -537,20 +544,20 @@ class SelectionDataView(GenericAPIView):
|
||||
|
||||
correspondents = Correspondent.objects.annotate(
|
||||
document_count=Count(
|
||||
Case(When(documents__id__in=ids, then=1), output_field=IntegerField())
|
||||
)
|
||||
Case(When(documents__id__in=ids, then=1), output_field=IntegerField()),
|
||||
),
|
||||
)
|
||||
|
||||
tags = Tag.objects.annotate(
|
||||
document_count=Count(
|
||||
Case(When(documents__id__in=ids, then=1), output_field=IntegerField())
|
||||
)
|
||||
Case(When(documents__id__in=ids, then=1), output_field=IntegerField()),
|
||||
),
|
||||
)
|
||||
|
||||
types = DocumentType.objects.annotate(
|
||||
document_count=Count(
|
||||
Case(When(documents__id__in=ids, then=1), output_field=IntegerField())
|
||||
)
|
||||
Case(When(documents__id__in=ids, then=1), output_field=IntegerField()),
|
||||
),
|
||||
)
|
||||
|
||||
r = Response(
|
||||
@ -565,7 +572,7 @@ class SelectionDataView(GenericAPIView):
|
||||
"selected_document_types": [
|
||||
{"id": t.id, "document_count": t.document_count} for t in types
|
||||
],
|
||||
}
|
||||
},
|
||||
)
|
||||
|
||||
return r
|
||||
@ -612,7 +619,7 @@ class StatisticsView(APIView):
|
||||
{
|
||||
"documents_total": documents_total,
|
||||
"documents_inbox": documents_inbox,
|
||||
}
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
@ -632,7 +639,9 @@ class BulkDownloadView(GenericAPIView):
|
||||
|
||||
os.makedirs(settings.SCRATCH_DIR, exist_ok=True)
|
||||
temp = tempfile.NamedTemporaryFile(
|
||||
dir=settings.SCRATCH_DIR, suffix="-compressed-archive", delete=False
|
||||
dir=settings.SCRATCH_DIR,
|
||||
suffix="-compressed-archive",
|
||||
delete=False,
|
||||
)
|
||||
|
||||
if content == "both":
|
||||
@ -651,7 +660,8 @@ class BulkDownloadView(GenericAPIView):
|
||||
with open(temp.name, "rb") as f:
|
||||
response = HttpResponse(f, content_type="application/zip")
|
||||
response["Content-Disposition"] = '{}; filename="{}"'.format(
|
||||
"attachment", "documents.zip"
|
||||
"attachment",
|
||||
"documents.zip",
|
||||
)
|
||||
|
||||
return response
|
||||
|
@ -1 +1,4 @@
|
||||
from .checks import paths_check, binaries_check
|
||||
from .checks import binaries_check
|
||||
from .checks import paths_check
|
||||
|
||||
__all__ = ["binaries_check", "paths_check"]
|
||||
|
@ -9,14 +9,14 @@ from django.core.asgi import get_asgi_application
|
||||
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "paperless.settings")
|
||||
django_asgi_app = get_asgi_application()
|
||||
|
||||
from channels.auth import AuthMiddlewareStack # NOQA: E402
|
||||
from channels.routing import ProtocolTypeRouter, URLRouter # NOQA: E402
|
||||
from channels.auth import AuthMiddlewareStack # noqa: E402
|
||||
from channels.routing import ProtocolTypeRouter, URLRouter # noqa: E402
|
||||
|
||||
from paperless.urls import websocket_urlpatterns # NOQA: E402
|
||||
from paperless.urls import websocket_urlpatterns # noqa: E402
|
||||
|
||||
application = ProtocolTypeRouter(
|
||||
{
|
||||
"http": get_asgi_application(),
|
||||
"websocket": AuthMiddlewareStack(URLRouter(websocket_urlpatterns)),
|
||||
}
|
||||
},
|
||||
)
|
||||
|
@ -1,9 +1,9 @@
|
||||
from django.conf import settings
|
||||
from django.contrib import auth
|
||||
from django.contrib.auth.middleware import RemoteUserMiddleware
|
||||
from django.contrib.auth.models import User
|
||||
from django.utils.deprecation import MiddlewareMixin
|
||||
from rest_framework import authentication
|
||||
from django.contrib.auth.middleware import RemoteUserMiddleware
|
||||
|
||||
|
||||
class AutoLoginMiddleware(MiddlewareMixin):
|
||||
@ -25,7 +25,7 @@ class AngularApiAuthenticationOverride(authentication.BaseAuthentication):
|
||||
settings.DEBUG
|
||||
and "Referer" in request.headers
|
||||
and request.headers["Referer"].startswith("http://localhost:4200/")
|
||||
): # NOQA: E501
|
||||
):
|
||||
user = User.objects.filter(is_staff=True).first()
|
||||
print("Auto-Login with user {}".format(user))
|
||||
return (user, None)
|
||||
|
@ -3,7 +3,9 @@ import shutil
|
||||
import stat
|
||||
|
||||
from django.conf import settings
|
||||
from django.core.checks import Error, Warning, register
|
||||
from django.core.checks import Error
|
||||
from django.core.checks import register
|
||||
from django.core.checks import Warning
|
||||
|
||||
exists_message = "{} is set but doesn't exist."
|
||||
exists_hint = "Create a directory at {}"
|
||||
@ -19,11 +21,12 @@ def path_check(var, directory):
|
||||
if directory:
|
||||
if not os.path.isdir(directory):
|
||||
messages.append(
|
||||
Error(exists_message.format(var), exists_hint.format(directory))
|
||||
Error(exists_message.format(var), exists_hint.format(directory)),
|
||||
)
|
||||
else:
|
||||
test_file = os.path.join(
|
||||
directory, f"__paperless_write_test_{os.getpid()}__"
|
||||
directory,
|
||||
f"__paperless_write_test_{os.getpid()}__",
|
||||
)
|
||||
try:
|
||||
with open(test_file, "w"):
|
||||
@ -34,9 +37,9 @@ def path_check(var, directory):
|
||||
writeable_message.format(var),
|
||||
writeable_hint.format(
|
||||
f"\n{stat.filemode(os.stat(directory).st_mode)} "
|
||||
f"{directory}\n"
|
||||
f"{directory}\n",
|
||||
),
|
||||
)
|
||||
),
|
||||
)
|
||||
finally:
|
||||
if os.path.isfile(test_file):
|
||||
@ -88,8 +91,8 @@ def debug_mode_check(app_configs, **kwargs):
|
||||
"security issue, since it puts security overides in place which "
|
||||
"are meant to be only used during development. This "
|
||||
"also means that paperless will tell anyone various "
|
||||
"debugging information when something goes wrong."
|
||||
)
|
||||
"debugging information when something goes wrong.",
|
||||
),
|
||||
]
|
||||
else:
|
||||
return []
|
||||
|
@ -1,7 +1,8 @@
|
||||
import json
|
||||
|
||||
from asgiref.sync import async_to_sync
|
||||
from channels.exceptions import DenyConnection, AcceptConnection
|
||||
from channels.exceptions import AcceptConnection
|
||||
from channels.exceptions import DenyConnection
|
||||
from channels.generic.websocket import WebsocketConsumer
|
||||
|
||||
|
||||
@ -14,13 +15,15 @@ class StatusConsumer(WebsocketConsumer):
|
||||
raise DenyConnection()
|
||||
else:
|
||||
async_to_sync(self.channel_layer.group_add)(
|
||||
"status_updates", self.channel_name
|
||||
"status_updates",
|
||||
self.channel_name,
|
||||
)
|
||||
raise AcceptConnection()
|
||||
|
||||
def disconnect(self, close_code):
|
||||
async_to_sync(self.channel_layer.group_discard)(
|
||||
"status_updates", self.channel_name
|
||||
"status_updates",
|
||||
self.channel_name,
|
||||
)
|
||||
|
||||
def status_update(self, event):
|
||||
|
@ -1,5 +1,4 @@
|
||||
import gnupg
|
||||
|
||||
from django.conf import settings
|
||||
|
||||
|
||||
|
@ -1,5 +1,4 @@
|
||||
from django.conf import settings
|
||||
|
||||
from paperless import version
|
||||
|
||||
|
||||
|
@ -5,9 +5,8 @@ import os
|
||||
import re
|
||||
|
||||
from concurrent_log_handler.queue import setup_logging_queues
|
||||
from dotenv import load_dotenv
|
||||
|
||||
from django.utils.translation import gettext_lazy as _
|
||||
from dotenv import load_dotenv
|
||||
|
||||
# Tap paperless.conf if it's available
|
||||
if os.path.exists("../paperless.conf"):
|
||||
@ -68,7 +67,8 @@ MODEL_FILE = os.path.join(DATA_DIR, "classification_model.pickle")
|
||||
LOGGING_DIR = os.getenv("PAPERLESS_LOGGING_DIR", os.path.join(DATA_DIR, "log"))
|
||||
|
||||
CONSUMPTION_DIR = os.getenv(
|
||||
"PAPERLESS_CONSUMPTION_DIR", os.path.join(BASE_DIR, "..", "consume")
|
||||
"PAPERLESS_CONSUMPTION_DIR",
|
||||
os.path.join(BASE_DIR, "..", "consume"),
|
||||
)
|
||||
|
||||
# This will be created if it doesn't exist
|
||||
@ -119,7 +119,7 @@ REST_FRAMEWORK = {
|
||||
|
||||
if DEBUG:
|
||||
REST_FRAMEWORK["DEFAULT_AUTHENTICATION_CLASSES"].append(
|
||||
"paperless.auth.AngularApiAuthenticationOverride"
|
||||
"paperless.auth.AngularApiAuthenticationOverride",
|
||||
)
|
||||
|
||||
MIDDLEWARE = [
|
||||
@ -191,7 +191,8 @@ if AUTO_LOGIN_USERNAME:
|
||||
|
||||
ENABLE_HTTP_REMOTE_USER = __get_boolean("PAPERLESS_ENABLE_HTTP_REMOTE_USER")
|
||||
HTTP_REMOTE_USER_HEADER_NAME = os.getenv(
|
||||
"PAPERLESS_HTTP_REMOTE_USER_HEADER_NAME", "HTTP_REMOTE_USER"
|
||||
"PAPERLESS_HTTP_REMOTE_USER_HEADER_NAME",
|
||||
"HTTP_REMOTE_USER",
|
||||
)
|
||||
|
||||
if ENABLE_HTTP_REMOTE_USER:
|
||||
@ -201,7 +202,7 @@ if ENABLE_HTTP_REMOTE_USER:
|
||||
"django.contrib.auth.backends.ModelBackend",
|
||||
]
|
||||
REST_FRAMEWORK["DEFAULT_AUTHENTICATION_CLASSES"].append(
|
||||
"rest_framework.authentication.RemoteUserAuthentication"
|
||||
"rest_framework.authentication.RemoteUserAuthentication",
|
||||
)
|
||||
|
||||
# X-Frame options for embedded PDF display:
|
||||
@ -212,7 +213,7 @@ else:
|
||||
|
||||
# We allow CORS from localhost:8080
|
||||
CORS_ALLOWED_ORIGINS = tuple(
|
||||
os.getenv("PAPERLESS_CORS_ALLOWED_HOSTS", "http://localhost:8000").split(",")
|
||||
os.getenv("PAPERLESS_CORS_ALLOWED_HOSTS", "http://localhost:8000").split(","),
|
||||
)
|
||||
|
||||
if DEBUG:
|
||||
@ -223,7 +224,8 @@ if DEBUG:
|
||||
# Paperless on a closed network. However, if you're putting this anywhere
|
||||
# public, you should change the key to something unique and verbose.
|
||||
SECRET_KEY = os.getenv(
|
||||
"PAPERLESS_SECRET_KEY", "e11fl1oa-*ytql8p)(06fbj4ukrlo+n7k&q5+$1md7i+mge=ee"
|
||||
"PAPERLESS_SECRET_KEY",
|
||||
"e11fl1oa-*ytql8p)(06fbj4ukrlo+n7k&q5+$1md7i+mge=ee",
|
||||
)
|
||||
|
||||
_allowed_hosts = os.getenv("PAPERLESS_ALLOWED_HOSTS")
|
||||
@ -268,7 +270,7 @@ DATABASES = {
|
||||
"default": {
|
||||
"ENGINE": "django.db.backends.sqlite3",
|
||||
"NAME": os.path.join(DATA_DIR, "db.sqlite3"),
|
||||
}
|
||||
},
|
||||
}
|
||||
|
||||
if os.getenv("PAPERLESS_DBHOST"):
|
||||
@ -423,7 +425,8 @@ def default_threads_per_worker(task_workers):
|
||||
|
||||
|
||||
THREADS_PER_WORKER = os.getenv(
|
||||
"PAPERLESS_THREADS_PER_WORKER", default_threads_per_worker(TASK_WORKERS)
|
||||
"PAPERLESS_THREADS_PER_WORKER",
|
||||
default_threads_per_worker(TASK_WORKERS),
|
||||
)
|
||||
|
||||
###############################################################################
|
||||
@ -435,7 +438,7 @@ CONSUMER_POLLING = int(os.getenv("PAPERLESS_CONSUMER_POLLING", 0))
|
||||
CONSUMER_POLLING_DELAY = int(os.getenv("PAPERLESS_CONSUMER_POLLING_DELAY", 5))
|
||||
|
||||
CONSUMER_POLLING_RETRY_COUNT = int(
|
||||
os.getenv("PAPERLESS_CONSUMER_POLLING_RETRY_COUNT", 5)
|
||||
os.getenv("PAPERLESS_CONSUMER_POLLING_RETRY_COUNT", 5),
|
||||
)
|
||||
|
||||
CONSUMER_DELETE_DUPLICATES = __get_boolean("PAPERLESS_CONSUMER_DELETE_DUPLICATES")
|
||||
@ -448,8 +451,8 @@ CONSUMER_IGNORE_PATTERNS = list(
|
||||
os.getenv(
|
||||
"PAPERLESS_CONSUMER_IGNORE_PATTERNS",
|
||||
'[".DS_STORE/*", "._*", ".stfolder/*"]',
|
||||
)
|
||||
)
|
||||
),
|
||||
),
|
||||
)
|
||||
|
||||
CONSUMER_SUBDIRS_AS_TAGS = __get_boolean("PAPERLESS_CONSUMER_SUBDIRS_AS_TAGS")
|
||||
@ -479,7 +482,7 @@ OCR_DESKEW = __get_boolean("PAPERLESS_OCR_DESKEW", "true")
|
||||
OCR_ROTATE_PAGES = __get_boolean("PAPERLESS_OCR_ROTATE_PAGES", "true")
|
||||
|
||||
OCR_ROTATE_PAGES_THRESHOLD = float(
|
||||
os.getenv("PAPERLESS_OCR_ROTATE_PAGES_THRESHOLD", 12.0)
|
||||
os.getenv("PAPERLESS_OCR_ROTATE_PAGES_THRESHOLD", 12.0),
|
||||
)
|
||||
|
||||
OCR_USER_ARGS = os.getenv("PAPERLESS_OCR_USER_ARGS", "{}")
|
||||
@ -536,7 +539,8 @@ THUMBNAIL_FONT_NAME = os.getenv(
|
||||
PAPERLESS_TIKA_ENABLED = __get_boolean("PAPERLESS_TIKA_ENABLED", "NO")
|
||||
PAPERLESS_TIKA_ENDPOINT = os.getenv("PAPERLESS_TIKA_ENDPOINT", "http://localhost:9998")
|
||||
PAPERLESS_TIKA_GOTENBERG_ENDPOINT = os.getenv(
|
||||
"PAPERLESS_TIKA_GOTENBERG_ENDPOINT", "http://localhost:3000"
|
||||
"PAPERLESS_TIKA_GOTENBERG_ENDPOINT",
|
||||
"http://localhost:3000",
|
||||
)
|
||||
|
||||
if PAPERLESS_TIKA_ENABLED:
|
||||
|
@ -1,10 +1,11 @@
|
||||
import os
|
||||
import shutil
|
||||
|
||||
from django.test import TestCase, override_settings
|
||||
|
||||
from django.test import override_settings
|
||||
from django.test import TestCase
|
||||
from documents.tests.utils import DirectoriesMixin
|
||||
from paperless import binaries_check, paths_check
|
||||
from paperless import binaries_check
|
||||
from paperless import paths_check
|
||||
from paperless.checks import debug_mode_check
|
||||
|
||||
|
||||
@ -20,7 +21,9 @@ class TestChecks(DirectoriesMixin, TestCase):
|
||||
self.assertEqual(paths_check(None), [])
|
||||
|
||||
@override_settings(
|
||||
MEDIA_ROOT="uuh", DATA_DIR="whatever", CONSUMPTION_DIR="idontcare"
|
||||
MEDIA_ROOT="uuh",
|
||||
DATA_DIR="whatever",
|
||||
CONSUMPTION_DIR="idontcare",
|
||||
)
|
||||
def test_paths_check_dont_exist(self):
|
||||
msgs = paths_check(None)
|
||||
|
@ -2,8 +2,8 @@ from unittest import mock
|
||||
|
||||
from channels.layers import get_channel_layer
|
||||
from channels.testing import WebsocketCommunicator
|
||||
from django.test import TestCase, override_settings
|
||||
|
||||
from django.test import override_settings
|
||||
from django.test import TestCase
|
||||
from paperless.asgi import application
|
||||
|
||||
|
||||
@ -46,7 +46,8 @@ class TestWebSockets(TestCase):
|
||||
|
||||
channel_layer = get_channel_layer()
|
||||
await channel_layer.group_send(
|
||||
"status_updates", {"type": "status_update", "data": message}
|
||||
"status_updates",
|
||||
{"type": "status_update", "data": message},
|
||||
)
|
||||
|
||||
response = await communicator.receive_json_from()
|
||||
|
@ -1,34 +1,30 @@
|
||||
from django.conf import settings
|
||||
from django.conf.urls import include
|
||||
from django.contrib import admin
|
||||
from django.contrib.auth.decorators import login_required
|
||||
from django.urls import path, re_path
|
||||
from django.urls import path
|
||||
from django.urls import re_path
|
||||
from django.utils.translation import gettext_lazy as _
|
||||
from django.views.decorators.csrf import csrf_exempt
|
||||
from django.views.generic import RedirectView
|
||||
from documents.views import BulkDownloadView
|
||||
from documents.views import BulkEditView
|
||||
from documents.views import CorrespondentViewSet
|
||||
from documents.views import DocumentTypeViewSet
|
||||
from documents.views import IndexView
|
||||
from documents.views import LogViewSet
|
||||
from documents.views import PostDocumentView
|
||||
from documents.views import SavedViewViewSet
|
||||
from documents.views import SearchAutoCompleteView
|
||||
from documents.views import SelectionDataView
|
||||
from documents.views import StatisticsView
|
||||
from documents.views import TagViewSet
|
||||
from documents.views import UnifiedSearchViewSet
|
||||
from paperless.consumers import StatusConsumer
|
||||
from paperless.views import FaviconView
|
||||
from rest_framework.authtoken import views
|
||||
from rest_framework.routers import DefaultRouter
|
||||
|
||||
from django.utils.translation import gettext_lazy as _
|
||||
|
||||
from django.conf import settings
|
||||
|
||||
from paperless.consumers import StatusConsumer
|
||||
from documents.views import (
|
||||
CorrespondentViewSet,
|
||||
UnifiedSearchViewSet,
|
||||
LogViewSet,
|
||||
TagViewSet,
|
||||
DocumentTypeViewSet,
|
||||
IndexView,
|
||||
SearchAutoCompleteView,
|
||||
StatisticsView,
|
||||
PostDocumentView,
|
||||
SavedViewViewSet,
|
||||
BulkEditView,
|
||||
SelectionDataView,
|
||||
BulkDownloadView,
|
||||
)
|
||||
from paperless.views import FaviconView
|
||||
|
||||
api_router = DefaultRouter()
|
||||
api_router.register(r"correspondents", CorrespondentViewSet)
|
||||
api_router.register(r"document_types", DocumentTypeViewSet)
|
||||
@ -62,7 +58,9 @@ urlpatterns = [
|
||||
name="post_document",
|
||||
),
|
||||
re_path(
|
||||
r"^documents/bulk_edit/", BulkEditView.as_view(), name="bulk_edit"
|
||||
r"^documents/bulk_edit/",
|
||||
BulkEditView.as_view(),
|
||||
name="bulk_edit",
|
||||
),
|
||||
re_path(
|
||||
r"^documents/selection_data/",
|
||||
@ -76,7 +74,7 @@ urlpatterns = [
|
||||
),
|
||||
path("token/", views.obtain_auth_token),
|
||||
]
|
||||
+ api_router.urls
|
||||
+ api_router.urls,
|
||||
),
|
||||
),
|
||||
re_path(r"^favicon.ico$", FaviconView.as_view(), name="favicon"),
|
||||
@ -88,35 +86,37 @@ urlpatterns = [
|
||||
re_path(
|
||||
r"^doc/(?P<pk>\d+)$",
|
||||
RedirectView.as_view(
|
||||
url=settings.BASE_URL + "api/documents/%(pk)s/download/"
|
||||
url=settings.BASE_URL + "api/documents/%(pk)s/download/",
|
||||
),
|
||||
),
|
||||
re_path(
|
||||
r"^thumb/(?P<pk>\d+)$",
|
||||
RedirectView.as_view(
|
||||
url=settings.BASE_URL + "api/documents/%(pk)s/thumb/"
|
||||
url=settings.BASE_URL + "api/documents/%(pk)s/thumb/",
|
||||
),
|
||||
),
|
||||
re_path(
|
||||
r"^preview/(?P<pk>\d+)$",
|
||||
RedirectView.as_view(
|
||||
url=settings.BASE_URL + "api/documents/%(pk)s/preview/"
|
||||
url=settings.BASE_URL + "api/documents/%(pk)s/preview/",
|
||||
),
|
||||
),
|
||||
]
|
||||
],
|
||||
),
|
||||
),
|
||||
re_path(
|
||||
r"^push$",
|
||||
csrf_exempt(
|
||||
RedirectView.as_view(url=settings.BASE_URL + "api/documents/post_document/")
|
||||
RedirectView.as_view(
|
||||
url=settings.BASE_URL + "api/documents/post_document/",
|
||||
),
|
||||
),
|
||||
),
|
||||
# Frontend assets TODO: this is pretty bad, but it works.
|
||||
path(
|
||||
"assets/<path:path>",
|
||||
RedirectView.as_view(
|
||||
url=settings.STATIC_URL + "frontend/en-US/assets/%(path)s"
|
||||
url=settings.STATIC_URL + "frontend/en-US/assets/%(path)s",
|
||||
),
|
||||
),
|
||||
# TODO: with localization, this is even worse! :/
|
||||
|
@ -14,7 +14,11 @@ class StandardPagination(PageNumberPagination):
|
||||
class FaviconView(View):
|
||||
def get(self, request, *args, **kwargs):
|
||||
favicon = os.path.join(
|
||||
os.path.dirname(__file__), "static", "paperless", "img", "favicon.ico"
|
||||
os.path.dirname(__file__),
|
||||
"static",
|
||||
"paperless",
|
||||
"img",
|
||||
"favicon.ico",
|
||||
)
|
||||
with open(favicon, "rb") as f:
|
||||
return HttpResponse(f, content_type="image/x-icon")
|
||||
|
@ -1,6 +1,7 @@
|
||||
import os
|
||||
from uvicorn.workers import UvicornWorker
|
||||
|
||||
from django.conf import settings
|
||||
from uvicorn.workers import UvicornWorker
|
||||
|
||||
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "paperless.settings")
|
||||
|
||||
|
@ -6,7 +6,6 @@ It exposes the WSGI callable as a module-level variable named ``application``.
|
||||
For more information on this file, see
|
||||
https://docs.djangoproject.com/en/1.10/howto/deployment/wsgi/
|
||||
"""
|
||||
|
||||
import os
|
||||
|
||||
from django.core.wsgi import get_wsgi_application
|
||||
|
@ -1,8 +1,8 @@
|
||||
from django.contrib import admin
|
||||
from django import forms
|
||||
from paperless_mail.models import MailAccount, MailRule
|
||||
|
||||
from django.contrib import admin
|
||||
from django.utils.translation import gettext_lazy as _
|
||||
from paperless_mail.models import MailAccount
|
||||
from paperless_mail.models import MailRule
|
||||
|
||||
|
||||
class MailAccountAdminForm(forms.ModelForm):
|
||||
@ -48,7 +48,7 @@ class MailRuleAdmin(admin.ModelAdmin):
|
||||
{
|
||||
"description": _(
|
||||
"Paperless will only process mails that match ALL of the "
|
||||
"filters given below."
|
||||
"filters given below.",
|
||||
),
|
||||
"fields": (
|
||||
"filter_from",
|
||||
@ -66,7 +66,7 @@ class MailRuleAdmin(admin.ModelAdmin):
|
||||
"description": _(
|
||||
"The action applied to the mail. This action is only "
|
||||
"performed when documents were consumed from the mail. "
|
||||
"Mails without attachments will remain entirely untouched."
|
||||
"Mails without attachments will remain entirely untouched.",
|
||||
),
|
||||
"fields": ("action", "action_parameter"),
|
||||
},
|
||||
@ -78,7 +78,7 @@ class MailRuleAdmin(admin.ModelAdmin):
|
||||
"Assign metadata to documents consumed from this rule "
|
||||
"automatically. If you do not assign tags, types or "
|
||||
"correspondents here, paperless will still process all "
|
||||
"matching rules that you have defined."
|
||||
"matching rules that you have defined.",
|
||||
),
|
||||
"fields": (
|
||||
"assign_title_from",
|
||||
|
@ -1,5 +1,4 @@
|
||||
from django.apps import AppConfig
|
||||
|
||||
from django.utils.translation import gettext_lazy as _
|
||||
|
||||
|
||||
|
@ -1,6 +1,7 @@
|
||||
import os
|
||||
import tempfile
|
||||
from datetime import timedelta, date
|
||||
from datetime import date
|
||||
from datetime import timedelta
|
||||
from fnmatch import fnmatch
|
||||
|
||||
import magic
|
||||
@ -8,18 +9,16 @@ import pathvalidate
|
||||
from django.conf import settings
|
||||
from django.db import DatabaseError
|
||||
from django_q.tasks import async_task
|
||||
from imap_tools import (
|
||||
MailBox,
|
||||
MailBoxUnencrypted,
|
||||
AND,
|
||||
MailMessageFlags,
|
||||
MailboxFolderSelectError,
|
||||
)
|
||||
|
||||
from documents.loggers import LoggingMixin
|
||||
from documents.models import Correspondent
|
||||
from documents.parsers import is_mime_type_supported
|
||||
from paperless_mail.models import MailAccount, MailRule
|
||||
from imap_tools import AND
|
||||
from imap_tools import MailBox
|
||||
from imap_tools import MailboxFolderSelectError
|
||||
from imap_tools import MailBoxUnencrypted
|
||||
from imap_tools import MailMessageFlags
|
||||
from paperless_mail.models import MailAccount
|
||||
from paperless_mail.models import MailRule
|
||||
|
||||
|
||||
class MailError(Exception):
|
||||
@ -120,8 +119,8 @@ class MailAccountHandler(LoggingMixin):
|
||||
|
||||
else:
|
||||
raise NotImplementedError(
|
||||
"Unknown title selector."
|
||||
) # pragma: nocover # NOQA: E501
|
||||
"Unknown title selector.",
|
||||
) # pragma: nocover
|
||||
|
||||
def get_correspondent(self, message, rule):
|
||||
c_from = rule.assign_correspondent_from
|
||||
@ -137,7 +136,7 @@ class MailAccountHandler(LoggingMixin):
|
||||
message.from_values
|
||||
and "name" in message.from_values
|
||||
and message.from_values["name"]
|
||||
): # NOQA: E501
|
||||
):
|
||||
return self._correspondent_from_name(message.from_values["name"])
|
||||
else:
|
||||
return self._correspondent_from_name(message.from_)
|
||||
@ -147,8 +146,8 @@ class MailAccountHandler(LoggingMixin):
|
||||
|
||||
else:
|
||||
raise NotImplementedError(
|
||||
"Unknwown correspondent selector"
|
||||
) # pragma: nocover # NOQA: E501
|
||||
"Unknwown correspondent selector",
|
||||
) # pragma: nocover
|
||||
|
||||
def handle_mail_account(self, account):
|
||||
|
||||
@ -159,7 +158,9 @@ class MailAccountHandler(LoggingMixin):
|
||||
total_processed_files = 0
|
||||
|
||||
with get_mailbox(
|
||||
account.imap_server, account.imap_port, account.imap_security
|
||||
account.imap_server,
|
||||
account.imap_port,
|
||||
account.imap_security,
|
||||
) as M:
|
||||
|
||||
try:
|
||||
@ -193,7 +194,7 @@ class MailAccountHandler(LoggingMixin):
|
||||
except MailboxFolderSelectError:
|
||||
raise MailError(
|
||||
f"Rule {rule}: Folder {rule.folder} "
|
||||
f"does not exist in account {rule.account}"
|
||||
f"does not exist in account {rule.account}",
|
||||
)
|
||||
|
||||
criterias = make_criterias(rule)
|
||||
@ -242,12 +243,14 @@ class MailAccountHandler(LoggingMixin):
|
||||
|
||||
try:
|
||||
get_rule_action(rule).post_consume(
|
||||
M, post_consume_messages, rule.action_parameter
|
||||
M,
|
||||
post_consume_messages,
|
||||
rule.action_parameter,
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
raise MailError(
|
||||
f"Rule {rule}: Error while processing post-consume actions: " f"{e}"
|
||||
f"Rule {rule}: Error while processing post-consume actions: " f"{e}",
|
||||
)
|
||||
|
||||
return total_processed_files
|
||||
@ -274,7 +277,7 @@ class MailAccountHandler(LoggingMixin):
|
||||
if (
|
||||
not att.content_disposition == "attachment"
|
||||
and rule.attachment_type == MailRule.ATTACHMENT_TYPE_ATTACHMENTS_ONLY
|
||||
): # NOQA: E501
|
||||
):
|
||||
self.log(
|
||||
"debug",
|
||||
f"Rule {rule}: "
|
||||
@ -297,7 +300,8 @@ class MailAccountHandler(LoggingMixin):
|
||||
|
||||
os.makedirs(settings.SCRATCH_DIR, exist_ok=True)
|
||||
_, temp_filename = tempfile.mkstemp(
|
||||
prefix="paperless-mail-", dir=settings.SCRATCH_DIR
|
||||
prefix="paperless-mail-",
|
||||
dir=settings.SCRATCH_DIR,
|
||||
)
|
||||
with open(temp_filename, "wb") as f:
|
||||
f.write(att.payload)
|
||||
@ -313,15 +317,13 @@ class MailAccountHandler(LoggingMixin):
|
||||
"documents.tasks.consume_file",
|
||||
path=temp_filename,
|
||||
override_filename=pathvalidate.sanitize_filename(
|
||||
att.filename
|
||||
), # NOQA: E501
|
||||
att.filename,
|
||||
),
|
||||
override_title=title,
|
||||
override_correspondent_id=correspondent.id
|
||||
if correspondent
|
||||
else None, # NOQA: E501
|
||||
override_document_type_id=doc_type.id
|
||||
if doc_type
|
||||
else None, # NOQA: E501
|
||||
else None,
|
||||
override_document_type_id=doc_type.id if doc_type else None,
|
||||
override_tag_ids=[tag.id] if tag else None,
|
||||
task_name=att.filename[:100],
|
||||
)
|
||||
|
@ -1,5 +1,4 @@
|
||||
from django.core.management.base import BaseCommand
|
||||
|
||||
from paperless_mail import tasks
|
||||
|
||||
|
||||
@ -7,7 +6,8 @@ class Command(BaseCommand):
|
||||
|
||||
help = """
|
||||
""".replace(
|
||||
" ", ""
|
||||
" ",
|
||||
"",
|
||||
)
|
||||
|
||||
def handle(self, *args, **options):
|
||||
|
@ -1,7 +1,5 @@
|
||||
from django.db import models
|
||||
|
||||
import documents.models as document_models
|
||||
|
||||
from django.db import models
|
||||
from django.utils.translation import gettext_lazy as _
|
||||
|
||||
|
||||
@ -30,12 +28,14 @@ class MailAccount(models.Model):
|
||||
null=True,
|
||||
help_text=_(
|
||||
"This is usually 143 for unencrypted and STARTTLS "
|
||||
"connections, and 993 for SSL connections."
|
||||
"connections, and 993 for SSL connections.",
|
||||
),
|
||||
)
|
||||
|
||||
imap_security = models.PositiveIntegerField(
|
||||
_("IMAP security"), choices=IMAP_SECURITY_OPTIONS, default=IMAP_SECURITY_SSL
|
||||
_("IMAP security"),
|
||||
choices=IMAP_SECURITY_OPTIONS,
|
||||
default=IMAP_SECURITY_SSL,
|
||||
)
|
||||
|
||||
username = models.CharField(_("username"), max_length=256)
|
||||
@ -48,7 +48,7 @@ class MailAccount(models.Model):
|
||||
default="UTF-8",
|
||||
help_text=_(
|
||||
"The character set to use when communicating with the "
|
||||
"mail server, such as 'UTF-8' or 'US-ASCII'."
|
||||
"mail server, such as 'UTF-8' or 'US-ASCII'.",
|
||||
),
|
||||
)
|
||||
|
||||
@ -123,13 +123,22 @@ class MailRule(models.Model):
|
||||
)
|
||||
|
||||
filter_from = models.CharField(
|
||||
_("filter from"), max_length=256, null=True, blank=True
|
||||
_("filter from"),
|
||||
max_length=256,
|
||||
null=True,
|
||||
blank=True,
|
||||
)
|
||||
filter_subject = models.CharField(
|
||||
_("filter subject"), max_length=256, null=True, blank=True
|
||||
_("filter subject"),
|
||||
max_length=256,
|
||||
null=True,
|
||||
blank=True,
|
||||
)
|
||||
filter_body = models.CharField(
|
||||
_("filter body"), max_length=256, null=True, blank=True
|
||||
_("filter body"),
|
||||
max_length=256,
|
||||
null=True,
|
||||
blank=True,
|
||||
)
|
||||
|
||||
filter_attachment_filename = models.CharField(
|
||||
@ -140,12 +149,14 @@ class MailRule(models.Model):
|
||||
help_text=_(
|
||||
"Only consume documents which entirely match this "
|
||||
"filename if specified. Wildcards such as *.pdf or "
|
||||
"*invoice* are allowed. Case insensitive."
|
||||
"*invoice* are allowed. Case insensitive.",
|
||||
),
|
||||
)
|
||||
|
||||
maximum_age = models.PositiveIntegerField(
|
||||
_("maximum age"), default=30, help_text=_("Specified in days.")
|
||||
_("maximum age"),
|
||||
default=30,
|
||||
help_text=_("Specified in days."),
|
||||
)
|
||||
|
||||
attachment_type = models.PositiveIntegerField(
|
||||
@ -154,7 +165,7 @@ class MailRule(models.Model):
|
||||
default=ATTACHMENT_TYPE_ATTACHMENTS_ONLY,
|
||||
help_text=_(
|
||||
"Inline attachments include embedded images, so it's best "
|
||||
"to combine this option with a filename filter."
|
||||
"to combine this option with a filename filter.",
|
||||
),
|
||||
)
|
||||
|
||||
@ -173,12 +184,14 @@ class MailRule(models.Model):
|
||||
"Additional parameter for the action selected above, "
|
||||
"i.e., "
|
||||
"the target folder of the move to folder action. "
|
||||
"Subfolders must be separated by dots."
|
||||
"Subfolders must be separated by dots.",
|
||||
),
|
||||
)
|
||||
|
||||
assign_title_from = models.PositiveIntegerField(
|
||||
_("assign title from"), choices=TITLE_SELECTOR, default=TITLE_FROM_SUBJECT
|
||||
_("assign title from"),
|
||||
choices=TITLE_SELECTOR,
|
||||
default=TITLE_FROM_SUBJECT,
|
||||
)
|
||||
|
||||
assign_tag = models.ForeignKey(
|
||||
|
@ -1,6 +1,7 @@
|
||||
import logging
|
||||
|
||||
from paperless_mail.mail import MailAccountHandler, MailError
|
||||
from paperless_mail.mail import MailAccountHandler
|
||||
from paperless_mail.mail import MailError
|
||||
from paperless_mail.models import MailAccount
|
||||
|
||||
|
||||
|
@ -7,13 +7,15 @@ from unittest import mock
|
||||
from django.core.management import call_command
|
||||
from django.db import DatabaseError
|
||||
from django.test import TestCase
|
||||
from imap_tools import MailMessageFlags, MailboxFolderSelectError
|
||||
|
||||
from documents.models import Correspondent
|
||||
from documents.tests.utils import DirectoriesMixin
|
||||
from imap_tools import MailboxFolderSelectError
|
||||
from imap_tools import MailMessageFlags
|
||||
from paperless_mail import tasks
|
||||
from paperless_mail.mail import MailError, MailAccountHandler
|
||||
from paperless_mail.models import MailRule, MailAccount
|
||||
from paperless_mail.mail import MailAccountHandler
|
||||
from paperless_mail.mail import MailError
|
||||
from paperless_mail.models import MailAccount
|
||||
from paperless_mail.models import MailRule
|
||||
|
||||
|
||||
class BogusFolderManager:
|
||||
@ -83,7 +85,7 @@ class BogusMailBox(ContextManager):
|
||||
def move(self, uid_list, folder):
|
||||
if folder == "spam":
|
||||
self.messages_spam.append(
|
||||
filter(lambda m: m.uid in uid_list, self.messages)
|
||||
filter(lambda m: m.uid in uid_list, self.messages),
|
||||
)
|
||||
self.messages = list(filter(lambda m: m.uid not in uid_list, self.messages))
|
||||
else:
|
||||
@ -115,7 +117,9 @@ def create_message(
|
||||
|
||||
|
||||
def create_attachment(
|
||||
filename="the_file.pdf", content_disposition="attachment", payload=b"a PDF document"
|
||||
filename="the_file.pdf",
|
||||
content_disposition="attachment",
|
||||
payload=b"a PDF document",
|
||||
):
|
||||
attachment = namedtuple("Attachment", [])
|
||||
attachment.filename = filename
|
||||
@ -163,7 +167,7 @@ class TestMail(DirectoriesMixin, TestCase):
|
||||
body="cables",
|
||||
seen=True,
|
||||
flagged=False,
|
||||
)
|
||||
),
|
||||
)
|
||||
self.bogus_mailbox.messages.append(
|
||||
create_message(
|
||||
@ -171,14 +175,14 @@ class TestMail(DirectoriesMixin, TestCase):
|
||||
body="from my favorite electronic store",
|
||||
seen=False,
|
||||
flagged=True,
|
||||
)
|
||||
),
|
||||
)
|
||||
self.bogus_mailbox.messages.append(
|
||||
create_message(
|
||||
subject="Claim your $10M price now!",
|
||||
from_="amazon@amazon-some-indian-site.org",
|
||||
seen=False,
|
||||
)
|
||||
),
|
||||
)
|
||||
|
||||
def test_get_correspondent(self):
|
||||
@ -196,12 +200,14 @@ class TestMail(DirectoriesMixin, TestCase):
|
||||
handler = MailAccountHandler()
|
||||
|
||||
rule = MailRule(
|
||||
name="a", assign_correspondent_from=MailRule.CORRESPONDENT_FROM_NOTHING
|
||||
name="a",
|
||||
assign_correspondent_from=MailRule.CORRESPONDENT_FROM_NOTHING,
|
||||
)
|
||||
self.assertIsNone(handler.get_correspondent(message, rule))
|
||||
|
||||
rule = MailRule(
|
||||
name="b", assign_correspondent_from=MailRule.CORRESPONDENT_FROM_EMAIL
|
||||
name="b",
|
||||
assign_correspondent_from=MailRule.CORRESPONDENT_FROM_EMAIL,
|
||||
)
|
||||
c = handler.get_correspondent(message, rule)
|
||||
self.assertIsNotNone(c)
|
||||
@ -212,7 +218,8 @@ class TestMail(DirectoriesMixin, TestCase):
|
||||
self.assertEqual(c.id, me_localhost.id)
|
||||
|
||||
rule = MailRule(
|
||||
name="c", assign_correspondent_from=MailRule.CORRESPONDENT_FROM_NAME
|
||||
name="c",
|
||||
assign_correspondent_from=MailRule.CORRESPONDENT_FROM_NAME,
|
||||
)
|
||||
c = handler.get_correspondent(message, rule)
|
||||
self.assertIsNotNone(c)
|
||||
@ -244,7 +251,9 @@ class TestMail(DirectoriesMixin, TestCase):
|
||||
|
||||
def test_handle_message(self):
|
||||
message = create_message(
|
||||
subject="the message title", from_="Myself", num_attachments=2
|
||||
subject="the message title",
|
||||
from_="Myself",
|
||||
num_attachments=2,
|
||||
)
|
||||
|
||||
account = MailAccount()
|
||||
@ -376,11 +385,16 @@ class TestMail(DirectoriesMixin, TestCase):
|
||||
def test_handle_mail_account_mark_read(self):
|
||||
|
||||
account = MailAccount.objects.create(
|
||||
name="test", imap_server="", username="admin", password="secret"
|
||||
name="test",
|
||||
imap_server="",
|
||||
username="admin",
|
||||
password="secret",
|
||||
)
|
||||
|
||||
rule = MailRule.objects.create(
|
||||
name="testrule", account=account, action=MailRule.ACTION_MARK_READ
|
||||
name="testrule",
|
||||
account=account,
|
||||
action=MailRule.ACTION_MARK_READ,
|
||||
)
|
||||
|
||||
self.assertEqual(len(self.bogus_mailbox.messages), 3)
|
||||
@ -394,7 +408,10 @@ class TestMail(DirectoriesMixin, TestCase):
|
||||
def test_handle_mail_account_delete(self):
|
||||
|
||||
account = MailAccount.objects.create(
|
||||
name="test", imap_server="", username="admin", password="secret"
|
||||
name="test",
|
||||
imap_server="",
|
||||
username="admin",
|
||||
password="secret",
|
||||
)
|
||||
|
||||
rule = MailRule.objects.create(
|
||||
@ -412,7 +429,10 @@ class TestMail(DirectoriesMixin, TestCase):
|
||||
|
||||
def test_handle_mail_account_flag(self):
|
||||
account = MailAccount.objects.create(
|
||||
name="test", imap_server="", username="admin", password="secret"
|
||||
name="test",
|
||||
imap_server="",
|
||||
username="admin",
|
||||
password="secret",
|
||||
)
|
||||
|
||||
rule = MailRule.objects.create(
|
||||
@ -432,7 +452,10 @@ class TestMail(DirectoriesMixin, TestCase):
|
||||
|
||||
def test_handle_mail_account_move(self):
|
||||
account = MailAccount.objects.create(
|
||||
name="test", imap_server="", username="admin", password="secret"
|
||||
name="test",
|
||||
imap_server="",
|
||||
username="admin",
|
||||
password="secret",
|
||||
)
|
||||
|
||||
rule = MailRule.objects.create(
|
||||
@ -453,7 +476,10 @@ class TestMail(DirectoriesMixin, TestCase):
|
||||
|
||||
def test_error_login(self):
|
||||
account = MailAccount.objects.create(
|
||||
name="test", imap_server="", username="admin", password="wrong"
|
||||
name="test",
|
||||
imap_server="",
|
||||
username="admin",
|
||||
password="wrong",
|
||||
)
|
||||
|
||||
try:
|
||||
@ -465,11 +491,17 @@ class TestMail(DirectoriesMixin, TestCase):
|
||||
|
||||
def test_error_skip_account(self):
|
||||
account_faulty = MailAccount.objects.create(
|
||||
name="test", imap_server="", username="admin", password="wroasdng"
|
||||
name="test",
|
||||
imap_server="",
|
||||
username="admin",
|
||||
password="wroasdng",
|
||||
)
|
||||
|
||||
account = MailAccount.objects.create(
|
||||
name="test2", imap_server="", username="admin", password="secret"
|
||||
name="test2",
|
||||
imap_server="",
|
||||
username="admin",
|
||||
password="secret",
|
||||
)
|
||||
rule = MailRule.objects.create(
|
||||
name="testrule",
|
||||
@ -487,7 +519,10 @@ class TestMail(DirectoriesMixin, TestCase):
|
||||
def test_error_skip_rule(self):
|
||||
|
||||
account = MailAccount.objects.create(
|
||||
name="test2", imap_server="", username="admin", password="secret"
|
||||
name="test2",
|
||||
imap_server="",
|
||||
username="admin",
|
||||
password="secret",
|
||||
)
|
||||
rule = MailRule.objects.create(
|
||||
name="testrule",
|
||||
@ -523,7 +558,10 @@ class TestMail(DirectoriesMixin, TestCase):
|
||||
m.side_effect = get_correspondent_fake
|
||||
|
||||
account = MailAccount.objects.create(
|
||||
name="test2", imap_server="", username="admin", password="secret"
|
||||
name="test2",
|
||||
imap_server="",
|
||||
username="admin",
|
||||
password="secret",
|
||||
)
|
||||
rule = MailRule.objects.create(
|
||||
name="testrule",
|
||||
@ -544,7 +582,10 @@ class TestMail(DirectoriesMixin, TestCase):
|
||||
def test_error_create_correspondent(self):
|
||||
|
||||
account = MailAccount.objects.create(
|
||||
name="test2", imap_server="", username="admin", password="secret"
|
||||
name="test2",
|
||||
imap_server="",
|
||||
username="admin",
|
||||
password="secret",
|
||||
)
|
||||
rule = MailRule.objects.create(
|
||||
name="testrule",
|
||||
@ -579,7 +620,10 @@ class TestMail(DirectoriesMixin, TestCase):
|
||||
def test_filters(self):
|
||||
|
||||
account = MailAccount.objects.create(
|
||||
name="test3", imap_server="", username="admin", password="secret"
|
||||
name="test3",
|
||||
imap_server="",
|
||||
username="admin",
|
||||
password="secret",
|
||||
)
|
||||
rule = MailRule.objects.create(
|
||||
name="testrule3",
|
||||
@ -629,7 +673,7 @@ class TestMail(DirectoriesMixin, TestCase):
|
||||
|
||||
class TestManagementCommand(TestCase):
|
||||
@mock.patch(
|
||||
"paperless_mail.management.commands.mail_fetcher.tasks.process_mail_accounts"
|
||||
"paperless_mail.management.commands.mail_fetcher.tasks.process_mail_accounts",
|
||||
)
|
||||
def test_mail_fetcher(self, m):
|
||||
|
||||
@ -644,10 +688,16 @@ class TestTasks(TestCase):
|
||||
m.side_effect = lambda account: 6
|
||||
|
||||
MailAccount.objects.create(
|
||||
name="A", imap_server="A", username="A", password="A"
|
||||
name="A",
|
||||
imap_server="A",
|
||||
username="A",
|
||||
password="A",
|
||||
)
|
||||
MailAccount.objects.create(
|
||||
name="B", imap_server="A", username="A", password="A"
|
||||
name="B",
|
||||
imap_server="A",
|
||||
username="A",
|
||||
password="A",
|
||||
)
|
||||
|
||||
result = tasks.process_mail_accounts()
|
||||
@ -663,7 +713,10 @@ class TestTasks(TestCase):
|
||||
def test_single_accounts(self, m):
|
||||
|
||||
MailAccount.objects.create(
|
||||
name="A", imap_server="A", username="A", password="A"
|
||||
name="A",
|
||||
imap_server="A",
|
||||
username="A",
|
||||
password="A",
|
||||
)
|
||||
|
||||
tasks.process_mail_account("A")
|
||||
|
@ -1,2 +1,5 @@
|
||||
# this is here so that django finds the checks.
|
||||
from .checks import *
|
||||
from .checks import check_default_language_available
|
||||
from .checks import get_tesseract_langs
|
||||
|
||||
__all__ = ["get_tesseract_langs", "check_default_language_available"]
|
||||
|
@ -1,5 +1,4 @@
|
||||
from django.apps import AppConfig
|
||||
|
||||
from paperless_tesseract.signals import tesseract_consumer_declaration
|
||||
|
||||
|
||||
|
@ -1,7 +1,9 @@
|
||||
import subprocess
|
||||
|
||||
from django.conf import settings
|
||||
from django.core.checks import Error, Warning, register
|
||||
from django.core.checks import Error
|
||||
from django.core.checks import register
|
||||
from django.core.checks import Warning
|
||||
|
||||
|
||||
def get_tesseract_langs():
|
||||
@ -19,8 +21,8 @@ def check_default_language_available(app_configs, **kwargs):
|
||||
return [
|
||||
Warning(
|
||||
"No OCR language has been specified with PAPERLESS_OCR_LANGUAGE. "
|
||||
"This means that tesseract will fallback to english."
|
||||
)
|
||||
"This means that tesseract will fallback to english.",
|
||||
),
|
||||
]
|
||||
|
||||
specified_langs = settings.OCR_LANGUAGE.split("+")
|
||||
@ -31,8 +33,8 @@ def check_default_language_available(app_configs, **kwargs):
|
||||
Error(
|
||||
f"The selected ocr language {lang} is "
|
||||
f"not installed. Paperless cannot OCR your documents "
|
||||
f"without it. Please fix PAPERLESS_OCR_LANGUAGE."
|
||||
)
|
||||
f"without it. Please fix PAPERLESS_OCR_LANGUAGE.",
|
||||
),
|
||||
]
|
||||
|
||||
return []
|
||||
|
@ -2,10 +2,11 @@ import json
|
||||
import os
|
||||
import re
|
||||
|
||||
from PIL import Image
|
||||
from django.conf import settings
|
||||
|
||||
from documents.parsers import DocumentParser, ParseError, make_thumbnail_from_pdf
|
||||
from documents.parsers import DocumentParser
|
||||
from documents.parsers import make_thumbnail_from_pdf
|
||||
from documents.parsers import ParseError
|
||||
from PIL import Image
|
||||
|
||||
|
||||
class NoTextFoundException(Exception):
|
||||
@ -42,7 +43,7 @@ class RasterisedDocumentParser(DocumentParser):
|
||||
"prefix": meta.REVERSE_NS[m.group(1)],
|
||||
"key": m.group(2),
|
||||
"value": value,
|
||||
}
|
||||
},
|
||||
)
|
||||
except Exception as e:
|
||||
self.log(
|
||||
@ -53,7 +54,9 @@ class RasterisedDocumentParser(DocumentParser):
|
||||
|
||||
def get_thumbnail(self, document_path, mime_type, file_name=None):
|
||||
return make_thumbnail_from_pdf(
|
||||
self.archive_path or document_path, self.tempdir, self.logging_group
|
||||
self.archive_path or document_path,
|
||||
self.tempdir,
|
||||
self.logging_group,
|
||||
)
|
||||
|
||||
def is_image(self, mime_type):
|
||||
@ -110,7 +113,6 @@ class RasterisedDocumentParser(DocumentParser):
|
||||
return None
|
||||
|
||||
from pdfminer.high_level import extract_text as pdfminer_extract_text
|
||||
from pdfminer.pdftypes import PDFException
|
||||
|
||||
try:
|
||||
stripped = post_process_text(pdfminer_extract_text(pdf_file))
|
||||
@ -129,7 +131,12 @@ class RasterisedDocumentParser(DocumentParser):
|
||||
return None
|
||||
|
||||
def construct_ocrmypdf_parameters(
|
||||
self, input_file, mime_type, output_file, sidecar_file, safe_fallback=False
|
||||
self,
|
||||
input_file,
|
||||
mime_type,
|
||||
output_file,
|
||||
sidecar_file,
|
||||
safe_fallback=False,
|
||||
):
|
||||
ocrmypdf_args = {
|
||||
"input_file": input_file,
|
||||
@ -167,7 +174,7 @@ class RasterisedDocumentParser(DocumentParser):
|
||||
ocrmypdf_args["rotate_pages"] = True
|
||||
ocrmypdf_args[
|
||||
"rotate_pages_threshold"
|
||||
] = settings.OCR_ROTATE_PAGES_THRESHOLD # NOQA: E501
|
||||
] = settings.OCR_ROTATE_PAGES_THRESHOLD
|
||||
|
||||
if settings.OCR_PAGES > 0:
|
||||
ocrmypdf_args["pages"] = f"1-{settings.OCR_PAGES}"
|
||||
@ -202,7 +209,7 @@ class RasterisedDocumentParser(DocumentParser):
|
||||
raise ParseError(
|
||||
f"Cannot produce archive PDF for image {input_file}, "
|
||||
f"no DPI information is present in this image and "
|
||||
f"OCR_IMAGE_DPI is not set."
|
||||
f"OCR_IMAGE_DPI is not set.",
|
||||
)
|
||||
|
||||
if settings.OCR_USER_ARGS and not safe_fallback:
|
||||
@ -241,7 +248,10 @@ class RasterisedDocumentParser(DocumentParser):
|
||||
sidecar_file = os.path.join(self.tempdir, "sidecar.txt")
|
||||
|
||||
args = self.construct_ocrmypdf_parameters(
|
||||
document_path, mime_type, archive_path, sidecar_file
|
||||
document_path,
|
||||
mime_type,
|
||||
archive_path,
|
||||
sidecar_file,
|
||||
)
|
||||
|
||||
try:
|
||||
@ -289,7 +299,8 @@ class RasterisedDocumentParser(DocumentParser):
|
||||
# is bigger and blurry due to --force-ocr.
|
||||
|
||||
self.text = self.extract_text(
|
||||
sidecar_file_fallback, archive_path_fallback
|
||||
sidecar_file_fallback,
|
||||
archive_path_fallback,
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
|
@ -1,8 +1,8 @@
|
||||
from unittest import mock
|
||||
|
||||
from django.core.checks import ERROR
|
||||
from django.test import TestCase, override_settings
|
||||
|
||||
from django.test import override_settings
|
||||
from django.test import TestCase
|
||||
from paperless_tesseract import check_default_language_available
|
||||
|
||||
|
||||
@ -16,8 +16,8 @@ class TestChecks(TestCase):
|
||||
self.assertEqual(len(msgs), 1)
|
||||
self.assertTrue(
|
||||
msgs[0].msg.startswith(
|
||||
"No OCR language has been specified with PAPERLESS_OCR_LANGUAGE"
|
||||
)
|
||||
"No OCR language has been specified with PAPERLESS_OCR_LANGUAGE",
|
||||
),
|
||||
)
|
||||
|
||||
@override_settings(OCR_LANGUAGE="ita")
|
||||
|
@ -3,11 +3,13 @@ import uuid
|
||||
from typing import ContextManager
|
||||
from unittest import mock
|
||||
|
||||
from django.test import TestCase, override_settings
|
||||
|
||||
from documents.parsers import ParseError, run_convert
|
||||
from django.test import override_settings
|
||||
from django.test import TestCase
|
||||
from documents.parsers import ParseError
|
||||
from documents.parsers import run_convert
|
||||
from documents.tests.utils import DirectoriesMixin
|
||||
from paperless_tesseract.parsers import RasterisedDocumentParser, post_process_text
|
||||
from paperless_tesseract.parsers import post_process_text
|
||||
from paperless_tesseract.parsers import RasterisedDocumentParser
|
||||
|
||||
image_to_string_calls = []
|
||||
|
||||
@ -56,7 +58,9 @@ class TestParser(DirectoriesMixin, TestCase):
|
||||
result,
|
||||
actual_result,
|
||||
"strip_exceess_whitespace({}) != '{}', but '{}'".format(
|
||||
source, result, actual_result
|
||||
source,
|
||||
result,
|
||||
actual_result,
|
||||
),
|
||||
)
|
||||
|
||||
@ -65,7 +69,8 @@ class TestParser(DirectoriesMixin, TestCase):
|
||||
def test_get_text_from_pdf(self):
|
||||
parser = RasterisedDocumentParser(uuid.uuid4())
|
||||
text = parser.extract_text(
|
||||
None, os.path.join(self.SAMPLE_FILES, "simple-digital.pdf")
|
||||
None,
|
||||
os.path.join(self.SAMPLE_FILES, "simple-digital.pdf"),
|
||||
)
|
||||
|
||||
self.assertContainsStrings(text.strip(), ["This is a test document."])
|
||||
@ -73,7 +78,8 @@ class TestParser(DirectoriesMixin, TestCase):
|
||||
def test_thumbnail(self):
|
||||
parser = RasterisedDocumentParser(uuid.uuid4())
|
||||
thumb = parser.get_thumbnail(
|
||||
os.path.join(self.SAMPLE_FILES, "simple-digital.pdf"), "application/pdf"
|
||||
os.path.join(self.SAMPLE_FILES, "simple-digital.pdf"),
|
||||
"application/pdf",
|
||||
)
|
||||
self.assertTrue(os.path.isfile(thumb))
|
||||
|
||||
@ -89,14 +95,16 @@ class TestParser(DirectoriesMixin, TestCase):
|
||||
|
||||
parser = RasterisedDocumentParser(uuid.uuid4())
|
||||
thumb = parser.get_thumbnail(
|
||||
os.path.join(self.SAMPLE_FILES, "simple-digital.pdf"), "application/pdf"
|
||||
os.path.join(self.SAMPLE_FILES, "simple-digital.pdf"),
|
||||
"application/pdf",
|
||||
)
|
||||
self.assertTrue(os.path.isfile(thumb))
|
||||
|
||||
def test_thumbnail_encrypted(self):
|
||||
parser = RasterisedDocumentParser(uuid.uuid4())
|
||||
thumb = parser.get_thumbnail(
|
||||
os.path.join(self.SAMPLE_FILES, "encrypted.pdf"), "application/pdf"
|
||||
os.path.join(self.SAMPLE_FILES, "encrypted.pdf"),
|
||||
"application/pdf",
|
||||
)
|
||||
self.assertTrue(os.path.isfile(thumb))
|
||||
|
||||
@ -113,7 +121,8 @@ class TestParser(DirectoriesMixin, TestCase):
|
||||
parser = RasterisedDocumentParser(None)
|
||||
|
||||
parser.parse(
|
||||
os.path.join(self.SAMPLE_FILES, "simple-digital.pdf"), "application/pdf"
|
||||
os.path.join(self.SAMPLE_FILES, "simple-digital.pdf"),
|
||||
"application/pdf",
|
||||
)
|
||||
|
||||
self.assertTrue(os.path.isfile(parser.archive_path))
|
||||
@ -124,7 +133,8 @@ class TestParser(DirectoriesMixin, TestCase):
|
||||
parser = RasterisedDocumentParser(None)
|
||||
|
||||
parser.parse(
|
||||
os.path.join(self.SAMPLE_FILES, "with-form.pdf"), "application/pdf"
|
||||
os.path.join(self.SAMPLE_FILES, "with-form.pdf"),
|
||||
"application/pdf",
|
||||
)
|
||||
|
||||
self.assertTrue(os.path.isfile(parser.archive_path))
|
||||
@ -139,7 +149,8 @@ class TestParser(DirectoriesMixin, TestCase):
|
||||
parser = RasterisedDocumentParser(None)
|
||||
|
||||
parser.parse(
|
||||
os.path.join(self.SAMPLE_FILES, "with-form.pdf"), "application/pdf"
|
||||
os.path.join(self.SAMPLE_FILES, "with-form.pdf"),
|
||||
"application/pdf",
|
||||
)
|
||||
|
||||
self.assertIsNone(parser.archive_path)
|
||||
@ -168,7 +179,8 @@ class TestParser(DirectoriesMixin, TestCase):
|
||||
parser = RasterisedDocumentParser(None)
|
||||
|
||||
parser.parse(
|
||||
os.path.join(self.SAMPLE_FILES, "encrypted.pdf"), "application/pdf"
|
||||
os.path.join(self.SAMPLE_FILES, "encrypted.pdf"),
|
||||
"application/pdf",
|
||||
)
|
||||
|
||||
self.assertIsNone(parser.archive_path)
|
||||
@ -178,7 +190,8 @@ class TestParser(DirectoriesMixin, TestCase):
|
||||
def test_with_form_error_notext(self):
|
||||
parser = RasterisedDocumentParser(None)
|
||||
parser.parse(
|
||||
os.path.join(self.SAMPLE_FILES, "with-form.pdf"), "application/pdf"
|
||||
os.path.join(self.SAMPLE_FILES, "with-form.pdf"),
|
||||
"application/pdf",
|
||||
)
|
||||
|
||||
self.assertContainsStrings(
|
||||
@ -191,7 +204,8 @@ class TestParser(DirectoriesMixin, TestCase):
|
||||
parser = RasterisedDocumentParser(None)
|
||||
|
||||
parser.parse(
|
||||
os.path.join(self.SAMPLE_FILES, "with-form.pdf"), "application/pdf"
|
||||
os.path.join(self.SAMPLE_FILES, "with-form.pdf"),
|
||||
"application/pdf",
|
||||
)
|
||||
|
||||
self.assertContainsStrings(
|
||||
@ -221,7 +235,7 @@ class TestParser(DirectoriesMixin, TestCase):
|
||||
parser = RasterisedDocumentParser(None)
|
||||
|
||||
dpi = parser.calculate_a4_dpi(
|
||||
os.path.join(self.SAMPLE_FILES, "simple-no-dpi.png")
|
||||
os.path.join(self.SAMPLE_FILES, "simple-no-dpi.png"),
|
||||
)
|
||||
|
||||
self.assertEqual(dpi, 62)
|
||||
@ -233,7 +247,8 @@ class TestParser(DirectoriesMixin, TestCase):
|
||||
|
||||
def f():
|
||||
parser.parse(
|
||||
os.path.join(self.SAMPLE_FILES, "simple-no-dpi.png"), "image/png"
|
||||
os.path.join(self.SAMPLE_FILES, "simple-no-dpi.png"),
|
||||
"image/png",
|
||||
)
|
||||
|
||||
self.assertRaises(ParseError, f)
|
||||
@ -247,68 +262,80 @@ class TestParser(DirectoriesMixin, TestCase):
|
||||
self.assertTrue(os.path.isfile(parser.archive_path))
|
||||
|
||||
self.assertContainsStrings(
|
||||
parser.get_text().lower(), ["this is a test document."]
|
||||
parser.get_text().lower(),
|
||||
["this is a test document."],
|
||||
)
|
||||
|
||||
def test_multi_page(self):
|
||||
parser = RasterisedDocumentParser(None)
|
||||
parser.parse(
|
||||
os.path.join(self.SAMPLE_FILES, "multi-page-digital.pdf"), "application/pdf"
|
||||
os.path.join(self.SAMPLE_FILES, "multi-page-digital.pdf"),
|
||||
"application/pdf",
|
||||
)
|
||||
self.assertTrue(os.path.isfile(parser.archive_path))
|
||||
self.assertContainsStrings(
|
||||
parser.get_text().lower(), ["page 1", "page 2", "page 3"]
|
||||
parser.get_text().lower(),
|
||||
["page 1", "page 2", "page 3"],
|
||||
)
|
||||
|
||||
@override_settings(OCR_PAGES=2, OCR_MODE="skip")
|
||||
def test_multi_page_pages_skip(self):
|
||||
parser = RasterisedDocumentParser(None)
|
||||
parser.parse(
|
||||
os.path.join(self.SAMPLE_FILES, "multi-page-digital.pdf"), "application/pdf"
|
||||
os.path.join(self.SAMPLE_FILES, "multi-page-digital.pdf"),
|
||||
"application/pdf",
|
||||
)
|
||||
self.assertTrue(os.path.isfile(parser.archive_path))
|
||||
self.assertContainsStrings(
|
||||
parser.get_text().lower(), ["page 1", "page 2", "page 3"]
|
||||
parser.get_text().lower(),
|
||||
["page 1", "page 2", "page 3"],
|
||||
)
|
||||
|
||||
@override_settings(OCR_PAGES=2, OCR_MODE="redo")
|
||||
def test_multi_page_pages_redo(self):
|
||||
parser = RasterisedDocumentParser(None)
|
||||
parser.parse(
|
||||
os.path.join(self.SAMPLE_FILES, "multi-page-digital.pdf"), "application/pdf"
|
||||
os.path.join(self.SAMPLE_FILES, "multi-page-digital.pdf"),
|
||||
"application/pdf",
|
||||
)
|
||||
self.assertTrue(os.path.isfile(parser.archive_path))
|
||||
self.assertContainsStrings(
|
||||
parser.get_text().lower(), ["page 1", "page 2", "page 3"]
|
||||
parser.get_text().lower(),
|
||||
["page 1", "page 2", "page 3"],
|
||||
)
|
||||
|
||||
@override_settings(OCR_PAGES=2, OCR_MODE="force")
|
||||
def test_multi_page_pages_force(self):
|
||||
parser = RasterisedDocumentParser(None)
|
||||
parser.parse(
|
||||
os.path.join(self.SAMPLE_FILES, "multi-page-digital.pdf"), "application/pdf"
|
||||
os.path.join(self.SAMPLE_FILES, "multi-page-digital.pdf"),
|
||||
"application/pdf",
|
||||
)
|
||||
self.assertTrue(os.path.isfile(parser.archive_path))
|
||||
self.assertContainsStrings(
|
||||
parser.get_text().lower(), ["page 1", "page 2", "page 3"]
|
||||
parser.get_text().lower(),
|
||||
["page 1", "page 2", "page 3"],
|
||||
)
|
||||
|
||||
@override_settings(OOCR_MODE="skip")
|
||||
def test_multi_page_analog_pages_skip(self):
|
||||
parser = RasterisedDocumentParser(None)
|
||||
parser.parse(
|
||||
os.path.join(self.SAMPLE_FILES, "multi-page-images.pdf"), "application/pdf"
|
||||
os.path.join(self.SAMPLE_FILES, "multi-page-images.pdf"),
|
||||
"application/pdf",
|
||||
)
|
||||
self.assertTrue(os.path.isfile(parser.archive_path))
|
||||
self.assertContainsStrings(
|
||||
parser.get_text().lower(), ["page 1", "page 2", "page 3"]
|
||||
parser.get_text().lower(),
|
||||
["page 1", "page 2", "page 3"],
|
||||
)
|
||||
|
||||
@override_settings(OCR_PAGES=2, OCR_MODE="redo")
|
||||
def test_multi_page_analog_pages_redo(self):
|
||||
parser = RasterisedDocumentParser(None)
|
||||
parser.parse(
|
||||
os.path.join(self.SAMPLE_FILES, "multi-page-images.pdf"), "application/pdf"
|
||||
os.path.join(self.SAMPLE_FILES, "multi-page-images.pdf"),
|
||||
"application/pdf",
|
||||
)
|
||||
self.assertTrue(os.path.isfile(parser.archive_path))
|
||||
self.assertContainsStrings(parser.get_text().lower(), ["page 1", "page 2"])
|
||||
@ -318,7 +345,8 @@ class TestParser(DirectoriesMixin, TestCase):
|
||||
def test_multi_page_analog_pages_force(self):
|
||||
parser = RasterisedDocumentParser(None)
|
||||
parser.parse(
|
||||
os.path.join(self.SAMPLE_FILES, "multi-page-images.pdf"), "application/pdf"
|
||||
os.path.join(self.SAMPLE_FILES, "multi-page-images.pdf"),
|
||||
"application/pdf",
|
||||
)
|
||||
self.assertTrue(os.path.isfile(parser.archive_path))
|
||||
self.assertContainsStrings(parser.get_text().lower(), ["page 1"])
|
||||
@ -329,29 +357,34 @@ class TestParser(DirectoriesMixin, TestCase):
|
||||
def test_skip_noarchive_withtext(self):
|
||||
parser = RasterisedDocumentParser(None)
|
||||
parser.parse(
|
||||
os.path.join(self.SAMPLE_FILES, "multi-page-digital.pdf"), "application/pdf"
|
||||
os.path.join(self.SAMPLE_FILES, "multi-page-digital.pdf"),
|
||||
"application/pdf",
|
||||
)
|
||||
self.assertIsNone(parser.archive_path)
|
||||
self.assertContainsStrings(
|
||||
parser.get_text().lower(), ["page 1", "page 2", "page 3"]
|
||||
parser.get_text().lower(),
|
||||
["page 1", "page 2", "page 3"],
|
||||
)
|
||||
|
||||
@override_settings(OCR_MODE="skip_noarchive")
|
||||
def test_skip_noarchive_notext(self):
|
||||
parser = RasterisedDocumentParser(None)
|
||||
parser.parse(
|
||||
os.path.join(self.SAMPLE_FILES, "multi-page-images.pdf"), "application/pdf"
|
||||
os.path.join(self.SAMPLE_FILES, "multi-page-images.pdf"),
|
||||
"application/pdf",
|
||||
)
|
||||
self.assertTrue(os.path.isfile(parser.archive_path))
|
||||
self.assertContainsStrings(
|
||||
parser.get_text().lower(), ["page 1", "page 2", "page 3"]
|
||||
parser.get_text().lower(),
|
||||
["page 1", "page 2", "page 3"],
|
||||
)
|
||||
|
||||
@override_settings(OCR_MODE="skip")
|
||||
def test_multi_page_mixed(self):
|
||||
parser = RasterisedDocumentParser(None)
|
||||
parser.parse(
|
||||
os.path.join(self.SAMPLE_FILES, "multi-page-mixed.pdf"), "application/pdf"
|
||||
os.path.join(self.SAMPLE_FILES, "multi-page-mixed.pdf"),
|
||||
"application/pdf",
|
||||
)
|
||||
self.assertTrue(os.path.isfile(parser.archive_path))
|
||||
self.assertContainsStrings(
|
||||
@ -368,11 +401,13 @@ class TestParser(DirectoriesMixin, TestCase):
|
||||
def test_multi_page_mixed_no_archive(self):
|
||||
parser = RasterisedDocumentParser(None)
|
||||
parser.parse(
|
||||
os.path.join(self.SAMPLE_FILES, "multi-page-mixed.pdf"), "application/pdf"
|
||||
os.path.join(self.SAMPLE_FILES, "multi-page-mixed.pdf"),
|
||||
"application/pdf",
|
||||
)
|
||||
self.assertIsNone(parser.archive_path)
|
||||
self.assertContainsStrings(
|
||||
parser.get_text().lower(), ["page 4", "page 5", "page 6"]
|
||||
parser.get_text().lower(),
|
||||
["page 4", "page 5", "page 6"],
|
||||
)
|
||||
|
||||
@override_settings(OCR_MODE="skip", OCR_ROTATE_PAGES=True)
|
||||
|
@ -1,5 +1,4 @@
|
||||
from django.apps import AppConfig
|
||||
|
||||
from paperless_text.signals import text_consumer_declaration
|
||||
|
||||
|
||||
|
@ -1,9 +1,10 @@
|
||||
import os
|
||||
|
||||
from PIL import ImageDraw, ImageFont, Image
|
||||
from django.conf import settings
|
||||
|
||||
from documents.parsers import DocumentParser
|
||||
from PIL import Image
|
||||
from PIL import ImageDraw
|
||||
from PIL import ImageFont
|
||||
|
||||
|
||||
class TextDocumentParser(DocumentParser):
|
||||
|
@ -1,7 +1,6 @@
|
||||
import os
|
||||
|
||||
from django.test import TestCase
|
||||
|
||||
from documents.tests.utils import DirectoriesMixin
|
||||
from paperless_text.parsers import TextDocumentParser
|
||||
|
||||
@ -13,7 +12,8 @@ class TestTextParser(DirectoriesMixin, TestCase):
|
||||
|
||||
# just make sure that it does not crash
|
||||
f = parser.get_thumbnail(
|
||||
os.path.join(os.path.dirname(__file__), "samples", "test.txt"), "text/plain"
|
||||
os.path.join(os.path.dirname(__file__), "samples", "test.txt"),
|
||||
"text/plain",
|
||||
)
|
||||
self.assertTrue(os.path.isfile(f))
|
||||
|
||||
@ -22,7 +22,8 @@ class TestTextParser(DirectoriesMixin, TestCase):
|
||||
parser = TextDocumentParser(None)
|
||||
|
||||
parser.parse(
|
||||
os.path.join(os.path.dirname(__file__), "samples", "test.txt"), "text/plain"
|
||||
os.path.join(os.path.dirname(__file__), "samples", "test.txt"),
|
||||
"text/plain",
|
||||
)
|
||||
|
||||
self.assertEqual(parser.get_text(), "This is a test file.\n")
|
||||
|
@ -1,10 +1,11 @@
|
||||
import os
|
||||
import requests
|
||||
|
||||
import dateutil.parser
|
||||
|
||||
import requests
|
||||
from django.conf import settings
|
||||
|
||||
from documents.parsers import DocumentParser, ParseError, make_thumbnail_from_pdf
|
||||
from documents.parsers import DocumentParser
|
||||
from documents.parsers import make_thumbnail_from_pdf
|
||||
from documents.parsers import ParseError
|
||||
from tika import parser
|
||||
|
||||
|
||||
@ -20,7 +21,9 @@ class TikaDocumentParser(DocumentParser):
|
||||
self.archive_path = self.convert_to_pdf(document_path, file_name)
|
||||
|
||||
return make_thumbnail_from_pdf(
|
||||
self.archive_path, self.tempdir, self.logging_group
|
||||
self.archive_path,
|
||||
self.tempdir,
|
||||
self.logging_group,
|
||||
)
|
||||
|
||||
def extract_metadata(self, document_path, mime_type):
|
||||
@ -53,7 +56,7 @@ class TikaDocumentParser(DocumentParser):
|
||||
except Exception as err:
|
||||
raise ParseError(
|
||||
f"Could not parse {document_path} with tika server at "
|
||||
f"{tika_server}: {err}"
|
||||
f"{tika_server}: {err}",
|
||||
)
|
||||
|
||||
self.text = parsed["content"].strip()
|
||||
@ -74,22 +77,23 @@ class TikaDocumentParser(DocumentParser):
|
||||
url = gotenberg_server + "/forms/libreoffice/convert"
|
||||
|
||||
self.log("info", f"Converting {document_path} to PDF as {pdf_path}")
|
||||
files = {
|
||||
"files": (
|
||||
file_name or os.path.basename(document_path),
|
||||
open(document_path, "rb"),
|
||||
)
|
||||
}
|
||||
headers = {}
|
||||
with open(document_path, "rb") as document_handle:
|
||||
files = {
|
||||
"files": (
|
||||
file_name or os.path.basename(document_path),
|
||||
document_handle,
|
||||
),
|
||||
}
|
||||
headers = {}
|
||||
|
||||
try:
|
||||
response = requests.post(url, files=files, headers=headers)
|
||||
response.raise_for_status() # ensure we notice bad responses
|
||||
except Exception as err:
|
||||
raise ParseError(f"Error while converting document to PDF: {err}")
|
||||
try:
|
||||
response = requests.post(url, files=files, headers=headers)
|
||||
response.raise_for_status() # ensure we notice bad responses
|
||||
except Exception as err:
|
||||
raise ParseError(f"Error while converting document to PDF: {err}")
|
||||
|
||||
file = open(pdf_path, "wb")
|
||||
file.write(response.content)
|
||||
file.close()
|
||||
with open(pdf_path, "wb") as file:
|
||||
file.write(response.content)
|
||||
file.close()
|
||||
|
||||
return pdf_path
|
||||
|
@ -10,12 +10,12 @@ def tika_consumer_declaration(sender, **kwargs):
|
||||
"weight": 10,
|
||||
"mime_types": {
|
||||
"application/msword": ".doc",
|
||||
"application/vnd.openxmlformats-officedocument.wordprocessingml.document": ".docx", # NOQA: E501
|
||||
"application/vnd.openxmlformats-officedocument.wordprocessingml.document": ".docx", # noqa: E501
|
||||
"application/vnd.ms-excel": ".xls",
|
||||
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": ".xlsx", # NOQA: E501
|
||||
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": ".xlsx", # noqa: E501
|
||||
"application/vnd.ms-powerpoint": ".ppt",
|
||||
"application/vnd.openxmlformats-officedocument.presentationml.presentation": ".pptx", # NOQA: E501
|
||||
"application/vnd.openxmlformats-officedocument.presentationml.slideshow": ".ppsx", # NOQA: E501
|
||||
"application/vnd.openxmlformats-officedocument.presentationml.presentation": ".pptx", # noqa: E501
|
||||
"application/vnd.openxmlformats-officedocument.presentationml.slideshow": ".ppsx", # noqa: E501
|
||||
"application/vnd.oasis.opendocument.presentation": ".odp",
|
||||
"application/vnd.oasis.opendocument.spreadsheet": ".ods",
|
||||
"application/vnd.oasis.opendocument.text": ".odt",
|
||||
|
@ -4,9 +4,8 @@ from pathlib import Path
|
||||
from unittest import mock
|
||||
|
||||
from django.test import TestCase
|
||||
from requests import Response
|
||||
|
||||
from paperless_tika.parsers import TikaDocumentParser
|
||||
from requests import Response
|
||||
|
||||
|
||||
class TestTikaParser(TestCase):
|
||||
@ -42,14 +41,15 @@ class TestTikaParser(TestCase):
|
||||
@mock.patch("paperless_tika.parsers.parser.from_file")
|
||||
def test_metadata(self, from_file):
|
||||
from_file.return_value = {
|
||||
"metadata": {"Creation-Date": "2020-11-21", "Some-key": "value"}
|
||||
"metadata": {"Creation-Date": "2020-11-21", "Some-key": "value"},
|
||||
}
|
||||
|
||||
file = os.path.join(self.parser.tempdir, "input.odt")
|
||||
Path(file).touch()
|
||||
|
||||
metadata = self.parser.extract_metadata(
|
||||
file, "application/vnd.oasis.opendocument.text"
|
||||
file,
|
||||
"application/vnd.oasis.opendocument.text",
|
||||
)
|
||||
|
||||
self.assertTrue("Creation-Date" in [m["key"] for m in metadata])
|
||||
|
Loading…
x
Reference in New Issue
Block a user