mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-07-28 18:24:38 -05:00
Merge remote-tracking branch 'paperless/dev' into feature-consume-eml
This commit is contained in:
@@ -42,6 +42,7 @@ class DocumentAdmin(admin.ModelAdmin):
|
||||
"checksum",
|
||||
"archive_filename",
|
||||
"archive_checksum",
|
||||
"original_filename",
|
||||
)
|
||||
|
||||
list_display_links = ("title",)
|
||||
|
@@ -3,12 +3,16 @@ import os
|
||||
import shutil
|
||||
import tempfile
|
||||
from functools import lru_cache
|
||||
from typing import List # for type hinting. Can be removed, if only Python >3.8 is used
|
||||
from typing import List
|
||||
from typing import Optional
|
||||
from typing import Tuple
|
||||
|
||||
import magic
|
||||
from django.conf import settings
|
||||
from pdf2image import convert_from_path
|
||||
from pikepdf import Page
|
||||
from pikepdf import Pdf
|
||||
from pikepdf import PdfImage
|
||||
from PIL import Image
|
||||
from PIL import ImageSequence
|
||||
from pyzbar import pyzbar
|
||||
@@ -16,6 +20,10 @@ from pyzbar import pyzbar
|
||||
logger = logging.getLogger("paperless.barcodes")
|
||||
|
||||
|
||||
class BarcodeImageFormatError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
@lru_cache(maxsize=8)
|
||||
def supported_file_type(mime_type) -> bool:
|
||||
"""
|
||||
@@ -31,7 +39,7 @@ def supported_file_type(mime_type) -> bool:
|
||||
return mime_type in supported_mime
|
||||
|
||||
|
||||
def barcode_reader(image) -> List[str]:
|
||||
def barcode_reader(image: Image) -> List[str]:
|
||||
"""
|
||||
Read any barcodes contained in image
|
||||
Returns a list containing all found barcodes
|
||||
@@ -98,21 +106,66 @@ def convert_from_tiff_to_pdf(filepath: str) -> str:
|
||||
return newpath
|
||||
|
||||
|
||||
def scan_file_for_separating_barcodes(filepath: str) -> List[int]:
|
||||
def scan_file_for_separating_barcodes(filepath: str) -> Tuple[Optional[str], List[int]]:
|
||||
"""
|
||||
Scan the provided pdf file for page separating barcodes
|
||||
Returns a list of pagenumbers, which separate the file
|
||||
Returns a PDF filepath and a list of pagenumbers,
|
||||
which separate the file into new files
|
||||
"""
|
||||
|
||||
def _pikepdf_barcode_scan(pdf_filepath: str):
|
||||
with Pdf.open(pdf_filepath) as pdf:
|
||||
for page_num, page in enumerate(pdf.pages):
|
||||
for image_key in page.images:
|
||||
pdfimage = PdfImage(page.images[image_key])
|
||||
|
||||
if "/CCITTFaxDecode" in pdfimage.filters:
|
||||
raise BarcodeImageFormatError()
|
||||
|
||||
# Not all images can be transcoded to a PIL image, which
|
||||
# is what pyzbar expects to receive
|
||||
pillow_img = pdfimage.as_pil_image()
|
||||
|
||||
detected_barcodes = barcode_reader(pillow_img)
|
||||
|
||||
if settings.CONSUMER_BARCODE_STRING in detected_barcodes:
|
||||
separator_page_numbers.append(page_num)
|
||||
|
||||
def _pdf2image_barcode_scan(pdf_filepath: str):
|
||||
# use a temporary directory in case the file os too big to handle in memory
|
||||
with tempfile.TemporaryDirectory() as path:
|
||||
pages_from_path = convert_from_path(pdf_filepath, output_folder=path)
|
||||
for current_page_number, page in enumerate(pages_from_path):
|
||||
current_barcodes = barcode_reader(page)
|
||||
if settings.CONSUMER_BARCODE_STRING in current_barcodes:
|
||||
separator_page_numbers.append(current_page_number)
|
||||
|
||||
separator_page_numbers = []
|
||||
separator_barcode = str(settings.CONSUMER_BARCODE_STRING)
|
||||
# use a temporary directory in case the file os too big to handle in memory
|
||||
with tempfile.TemporaryDirectory() as path:
|
||||
pages_from_path = convert_from_path(filepath, output_folder=path)
|
||||
for current_page_number, page in enumerate(pages_from_path):
|
||||
current_barcodes = barcode_reader(page)
|
||||
if separator_barcode in current_barcodes:
|
||||
separator_page_numbers.append(current_page_number)
|
||||
return separator_page_numbers
|
||||
pdf_filepath = None
|
||||
|
||||
mime_type = get_file_mime_type(filepath)
|
||||
|
||||
if supported_file_type(mime_type):
|
||||
pdf_filepath = filepath
|
||||
if mime_type == "image/tiff":
|
||||
pdf_filepath = convert_from_tiff_to_pdf(filepath)
|
||||
|
||||
try:
|
||||
_pikepdf_barcode_scan(pdf_filepath)
|
||||
except Exception as e:
|
||||
|
||||
logger.warning(
|
||||
f"Exception using pikepdf for barcodes, falling back to pdf2image: {e}",
|
||||
)
|
||||
# Reset this incase pikepdf got part way through
|
||||
separator_page_numbers = []
|
||||
_pdf2image_barcode_scan(pdf_filepath)
|
||||
|
||||
else:
|
||||
logger.warning(
|
||||
f"Unsupported file format for barcode reader: {str(mime_type)}",
|
||||
)
|
||||
return pdf_filepath, separator_page_numbers
|
||||
|
||||
|
||||
def separate_pages(filepath: str, pages_to_split_on: List[int]) -> List[str]:
|
||||
@@ -122,47 +175,56 @@ def separate_pages(filepath: str, pages_to_split_on: List[int]) -> List[str]:
|
||||
Returns a list of (temporary) filepaths to consume.
|
||||
These will need to be deleted later.
|
||||
"""
|
||||
|
||||
document_paths = []
|
||||
|
||||
if not pages_to_split_on:
|
||||
logger.warning("No pages to split on!")
|
||||
return document_paths
|
||||
|
||||
os.makedirs(settings.SCRATCH_DIR, exist_ok=True)
|
||||
tempdir = tempfile.mkdtemp(prefix="paperless-", dir=settings.SCRATCH_DIR)
|
||||
fname = os.path.splitext(os.path.basename(filepath))[0]
|
||||
pdf = Pdf.open(filepath)
|
||||
document_paths = []
|
||||
logger.debug(f"Temp dir is {str(tempdir)}")
|
||||
if not pages_to_split_on:
|
||||
logger.warning("No pages to split on!")
|
||||
else:
|
||||
# go from the first page to the first separator page
|
||||
|
||||
# A list of documents, ie a list of lists of pages
|
||||
documents: List[List[Page]] = []
|
||||
# A single document, ie a list of pages
|
||||
document: List[Page] = []
|
||||
|
||||
for idx, page in enumerate(pdf.pages):
|
||||
# Keep building the new PDF as long as it is not a
|
||||
# separator index
|
||||
if idx not in pages_to_split_on:
|
||||
document.append(page)
|
||||
# Make sure to append the very last document to the documents
|
||||
if idx == (len(pdf.pages) - 1):
|
||||
documents.append(document)
|
||||
document = []
|
||||
else:
|
||||
# This is a split index, save the current PDF pages, and restart
|
||||
# a new destination page listing
|
||||
logger.debug(f"Starting new document at idx {idx}")
|
||||
documents.append(document)
|
||||
document = []
|
||||
|
||||
documents = [x for x in documents if len(x)]
|
||||
|
||||
logger.debug(f"Split into {len(documents)} new documents")
|
||||
|
||||
# Write the new documents out
|
||||
for doc_idx, document in enumerate(documents):
|
||||
dst = Pdf.new()
|
||||
for n, page in enumerate(pdf.pages):
|
||||
if n < pages_to_split_on[0]:
|
||||
dst.pages.append(page)
|
||||
output_filename = f"{fname}_document_0.pdf"
|
||||
dst.pages.extend(document)
|
||||
|
||||
output_filename = f"{fname}_document_{doc_idx}.pdf"
|
||||
|
||||
logger.debug(f"pdf no:{doc_idx} has {len(dst.pages)} pages")
|
||||
savepath = os.path.join(tempdir, output_filename)
|
||||
with open(savepath, "wb") as out:
|
||||
dst.save(out)
|
||||
document_paths = [savepath]
|
||||
document_paths.append(savepath)
|
||||
|
||||
# iterate through the rest of the document
|
||||
for count, page_number in enumerate(pages_to_split_on):
|
||||
logger.debug(f"Count: {str(count)} page_number: {str(page_number)}")
|
||||
dst = Pdf.new()
|
||||
try:
|
||||
next_page = pages_to_split_on[count + 1]
|
||||
except IndexError:
|
||||
next_page = len(pdf.pages)
|
||||
# skip the first page_number. This contains the barcode page
|
||||
for page in range(page_number + 1, next_page):
|
||||
logger.debug(
|
||||
f"page_number: {str(page_number)} next_page: {str(next_page)}",
|
||||
)
|
||||
dst.pages.append(pdf.pages[page])
|
||||
output_filename = f"{fname}_document_{str(count + 1)}.pdf"
|
||||
logger.debug(f"pdf no:{str(count)} has {str(len(dst.pages))} pages")
|
||||
savepath = os.path.join(tempdir, output_filename)
|
||||
with open(savepath, "wb") as out:
|
||||
dst.save(out)
|
||||
document_paths.append(savepath)
|
||||
logger.debug(f"Temp files are {str(document_paths)}")
|
||||
return document_paths
|
||||
|
||||
|
||||
|
@@ -1,11 +1,12 @@
|
||||
import itertools
|
||||
|
||||
from django.db.models import Q
|
||||
from django_q.tasks import async_task
|
||||
from documents.models import Correspondent
|
||||
from documents.models import Document
|
||||
from documents.models import DocumentType
|
||||
from documents.models import StoragePath
|
||||
from documents.tasks import bulk_update_documents
|
||||
from documents.tasks import update_document_archive_file
|
||||
|
||||
|
||||
def set_correspondent(doc_ids, correspondent):
|
||||
@@ -16,7 +17,7 @@ def set_correspondent(doc_ids, correspondent):
|
||||
affected_docs = [doc.id for doc in qs]
|
||||
qs.update(correspondent=correspondent)
|
||||
|
||||
async_task("documents.tasks.bulk_update_documents", document_ids=affected_docs)
|
||||
bulk_update_documents.delay(document_ids=affected_docs)
|
||||
|
||||
return "OK"
|
||||
|
||||
@@ -31,8 +32,7 @@ def set_storage_path(doc_ids, storage_path):
|
||||
affected_docs = [doc.id for doc in qs]
|
||||
qs.update(storage_path=storage_path)
|
||||
|
||||
async_task(
|
||||
"documents.tasks.bulk_update_documents",
|
||||
bulk_update_documents.delay(
|
||||
document_ids=affected_docs,
|
||||
)
|
||||
|
||||
@@ -47,7 +47,7 @@ def set_document_type(doc_ids, document_type):
|
||||
affected_docs = [doc.id for doc in qs]
|
||||
qs.update(document_type=document_type)
|
||||
|
||||
async_task("documents.tasks.bulk_update_documents", document_ids=affected_docs)
|
||||
bulk_update_documents.delay(document_ids=affected_docs)
|
||||
|
||||
return "OK"
|
||||
|
||||
@@ -63,7 +63,7 @@ def add_tag(doc_ids, tag):
|
||||
[DocumentTagRelationship(document_id=doc, tag_id=tag) for doc in affected_docs],
|
||||
)
|
||||
|
||||
async_task("documents.tasks.bulk_update_documents", document_ids=affected_docs)
|
||||
bulk_update_documents.delay(document_ids=affected_docs)
|
||||
|
||||
return "OK"
|
||||
|
||||
@@ -79,7 +79,7 @@ def remove_tag(doc_ids, tag):
|
||||
Q(document_id__in=affected_docs) & Q(tag_id=tag),
|
||||
).delete()
|
||||
|
||||
async_task("documents.tasks.bulk_update_documents", document_ids=affected_docs)
|
||||
bulk_update_documents.delay(document_ids=affected_docs)
|
||||
|
||||
return "OK"
|
||||
|
||||
@@ -103,7 +103,7 @@ def modify_tags(doc_ids, add_tags, remove_tags):
|
||||
ignore_conflicts=True,
|
||||
)
|
||||
|
||||
async_task("documents.tasks.bulk_update_documents", document_ids=affected_docs)
|
||||
bulk_update_documents.delay(document_ids=affected_docs)
|
||||
|
||||
return "OK"
|
||||
|
||||
@@ -122,6 +122,9 @@ def delete(doc_ids):
|
||||
|
||||
def redo_ocr(doc_ids):
|
||||
|
||||
async_task("documents.tasks.redo_ocr", document_ids=doc_ids)
|
||||
for document_id in doc_ids:
|
||||
update_document_archive_file.delay(
|
||||
document_id=document_id,
|
||||
)
|
||||
|
||||
return "OK"
|
||||
|
@@ -5,12 +5,15 @@ import pickle
|
||||
import re
|
||||
import shutil
|
||||
import warnings
|
||||
from typing import List
|
||||
from typing import Optional
|
||||
|
||||
from django.conf import settings
|
||||
from documents.models import Document
|
||||
from documents.models import MatchingModel
|
||||
|
||||
logger = logging.getLogger("paperless.classifier")
|
||||
|
||||
|
||||
class IncompatibleClassifierVersionError(Exception):
|
||||
pass
|
||||
@@ -20,15 +23,6 @@ class ClassifierModelCorruptError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
logger = logging.getLogger("paperless.classifier")
|
||||
|
||||
|
||||
def preprocess_content(content: str) -> str:
|
||||
content = content.lower().strip()
|
||||
content = re.sub(r"\s+", " ", content)
|
||||
return content
|
||||
|
||||
|
||||
def load_classifier() -> Optional["DocumentClassifier"]:
|
||||
if not os.path.isfile(settings.MODEL_FILE):
|
||||
logger.debug(
|
||||
@@ -81,6 +75,9 @@ class DocumentClassifier:
|
||||
self.document_type_classifier = None
|
||||
self.storage_path_classifier = None
|
||||
|
||||
self._stemmer = None
|
||||
self._stop_words = None
|
||||
|
||||
def load(self):
|
||||
# Catch warnings for processing
|
||||
with warnings.catch_warnings(record=True) as w:
|
||||
@@ -101,8 +98,8 @@ class DocumentClassifier:
|
||||
self.correspondent_classifier = pickle.load(f)
|
||||
self.document_type_classifier = pickle.load(f)
|
||||
self.storage_path_classifier = pickle.load(f)
|
||||
except Exception:
|
||||
raise ClassifierModelCorruptError()
|
||||
except Exception as err:
|
||||
raise ClassifierModelCorruptError() from err
|
||||
|
||||
# Check for the warning about unpickling from differing versions
|
||||
# and consider it incompatible
|
||||
@@ -139,11 +136,11 @@ class DocumentClassifier:
|
||||
|
||||
def train(self):
|
||||
|
||||
data = list()
|
||||
labels_tags = list()
|
||||
labels_correspondent = list()
|
||||
labels_document_type = list()
|
||||
labels_storage_path = list()
|
||||
data = []
|
||||
labels_tags = []
|
||||
labels_correspondent = []
|
||||
labels_document_type = []
|
||||
labels_storage_path = []
|
||||
|
||||
# Step 1: Extract and preprocess training data from the database.
|
||||
logger.debug("Gathering data from database...")
|
||||
@@ -151,7 +148,7 @@ class DocumentClassifier:
|
||||
for doc in Document.objects.order_by("pk").exclude(
|
||||
tags__is_inbox_tag=True,
|
||||
):
|
||||
preprocessed_content = preprocess_content(doc.content)
|
||||
preprocessed_content = self.preprocess_content(doc.content)
|
||||
m.update(preprocessed_content.encode("utf-8"))
|
||||
data.append(preprocessed_content)
|
||||
|
||||
@@ -231,6 +228,11 @@ class DocumentClassifier:
|
||||
)
|
||||
data_vectorized = self.data_vectorizer.fit_transform(data)
|
||||
|
||||
# See the notes here:
|
||||
# https://scikit-learn.org/stable/modules/generated/sklearn.feature_extraction.text.CountVectorizer.html # noqa: 501
|
||||
# This attribute isn't needed to function and can be large
|
||||
self.data_vectorizer.stop_words_ = None
|
||||
|
||||
# Step 3: train the classifiers
|
||||
if num_tags > 0:
|
||||
logger.debug("Training tags classifier...")
|
||||
@@ -296,9 +298,52 @@ class DocumentClassifier:
|
||||
|
||||
return True
|
||||
|
||||
def preprocess_content(self, content: str) -> str:
|
||||
"""
|
||||
Process to contents of a document, distilling it down into
|
||||
words which are meaningful to the content
|
||||
"""
|
||||
|
||||
# Lower case the document
|
||||
content = content.lower().strip()
|
||||
# Reduce spaces
|
||||
content = re.sub(r"\s+", " ", content)
|
||||
# Get only the letters
|
||||
content = re.sub(r"[^\w\s]", " ", content)
|
||||
|
||||
# If the NLTK language is supported, do further processing
|
||||
if settings.NLTK_LANGUAGE is not None and settings.NLTK_ENABLED:
|
||||
|
||||
import nltk
|
||||
|
||||
from nltk.tokenize import word_tokenize
|
||||
from nltk.corpus import stopwords
|
||||
from nltk.stem import SnowballStemmer
|
||||
|
||||
# Not really hacky, since it isn't private and is documented, but
|
||||
# set the search path for NLTK data to the single location it should be in
|
||||
nltk.data.path = [settings.NLTK_DIR]
|
||||
|
||||
# Do some one time setup
|
||||
if self._stemmer is None:
|
||||
self._stemmer = SnowballStemmer(settings.NLTK_LANGUAGE)
|
||||
if self._stop_words is None:
|
||||
self._stop_words = set(stopwords.words(settings.NLTK_LANGUAGE))
|
||||
|
||||
# Tokenize
|
||||
words: List[str] = word_tokenize(content, language=settings.NLTK_LANGUAGE)
|
||||
# Remove stop words
|
||||
meaningful_words = [w for w in words if w not in self._stop_words]
|
||||
# Stem words
|
||||
meaningful_words = [self._stemmer.stem(w) for w in meaningful_words]
|
||||
|
||||
return " ".join(meaningful_words)
|
||||
|
||||
return content
|
||||
|
||||
def predict_correspondent(self, content):
|
||||
if self.correspondent_classifier:
|
||||
X = self.data_vectorizer.transform([preprocess_content(content)])
|
||||
X = self.data_vectorizer.transform([self.preprocess_content(content)])
|
||||
correspondent_id = self.correspondent_classifier.predict(X)
|
||||
if correspondent_id != -1:
|
||||
return correspondent_id
|
||||
@@ -309,7 +354,7 @@ class DocumentClassifier:
|
||||
|
||||
def predict_document_type(self, content):
|
||||
if self.document_type_classifier:
|
||||
X = self.data_vectorizer.transform([preprocess_content(content)])
|
||||
X = self.data_vectorizer.transform([self.preprocess_content(content)])
|
||||
document_type_id = self.document_type_classifier.predict(X)
|
||||
if document_type_id != -1:
|
||||
return document_type_id
|
||||
@@ -322,7 +367,7 @@ class DocumentClassifier:
|
||||
from sklearn.utils.multiclass import type_of_target
|
||||
|
||||
if self.tags_classifier:
|
||||
X = self.data_vectorizer.transform([preprocess_content(content)])
|
||||
X = self.data_vectorizer.transform([self.preprocess_content(content)])
|
||||
y = self.tags_classifier.predict(X)
|
||||
tags_ids = self.tags_binarizer.inverse_transform(y)[0]
|
||||
if type_of_target(y).startswith("multilabel"):
|
||||
@@ -341,7 +386,7 @@ class DocumentClassifier:
|
||||
|
||||
def predict_storage_path(self, content):
|
||||
if self.storage_path_classifier:
|
||||
X = self.data_vectorizer.transform([preprocess_content(content)])
|
||||
X = self.data_vectorizer.transform([self.preprocess_content(content)])
|
||||
storage_path_id = self.storage_path_classifier.predict(X)
|
||||
if storage_path_id != -1:
|
||||
return storage_path_id
|
||||
|
@@ -78,10 +78,16 @@ class Consumer(LoggingMixin):
|
||||
{"type": "status_update", "data": payload},
|
||||
)
|
||||
|
||||
def _fail(self, message, log_message=None, exc_info=None):
|
||||
def _fail(
|
||||
self,
|
||||
message,
|
||||
log_message=None,
|
||||
exc_info=None,
|
||||
exception: Optional[Exception] = None,
|
||||
):
|
||||
self._send_progress(100, 100, "FAILED", message)
|
||||
self.log("error", log_message or message, exc_info=exc_info)
|
||||
raise ConsumerError(f"{self.filename}: {log_message or message}")
|
||||
raise ConsumerError(f"{self.filename}: {log_message or message}") from exception
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
@@ -105,14 +111,16 @@ class Consumer(LoggingMixin):
|
||||
def pre_check_duplicate(self):
|
||||
with open(self.path, "rb") as f:
|
||||
checksum = hashlib.md5(f.read()).hexdigest()
|
||||
if Document.objects.filter(
|
||||
existing_doc = Document.objects.filter(
|
||||
Q(checksum=checksum) | Q(archive_checksum=checksum),
|
||||
).exists():
|
||||
)
|
||||
if existing_doc.exists():
|
||||
if settings.CONSUMER_DELETE_DUPLICATES:
|
||||
os.unlink(self.path)
|
||||
self._fail(
|
||||
MESSAGE_DOCUMENT_ALREADY_EXISTS,
|
||||
f"Not consuming {self.filename}: It is a duplicate.",
|
||||
f"Not consuming {self.filename}: It is a duplicate of"
|
||||
f" {existing_doc.get().title} (#{existing_doc.get().pk})",
|
||||
)
|
||||
|
||||
def pre_check_directories(self):
|
||||
@@ -134,13 +142,25 @@ class Consumer(LoggingMixin):
|
||||
|
||||
self.log("info", f"Executing pre-consume script {settings.PRE_CONSUME_SCRIPT}")
|
||||
|
||||
filepath_arg = os.path.normpath(self.path)
|
||||
|
||||
script_env = os.environ.copy()
|
||||
script_env["DOCUMENT_SOURCE_PATH"] = filepath_arg
|
||||
|
||||
try:
|
||||
Popen((settings.PRE_CONSUME_SCRIPT, self.path)).wait()
|
||||
Popen(
|
||||
(
|
||||
settings.PRE_CONSUME_SCRIPT,
|
||||
filepath_arg,
|
||||
),
|
||||
env=script_env,
|
||||
).wait()
|
||||
except Exception as e:
|
||||
self._fail(
|
||||
MESSAGE_PRE_CONSUME_SCRIPT_ERROR,
|
||||
f"Error while executing pre-consume script: {e}",
|
||||
exc_info=True,
|
||||
exception=e,
|
||||
)
|
||||
|
||||
def run_post_consume_script(self, document):
|
||||
@@ -159,6 +179,34 @@ class Consumer(LoggingMixin):
|
||||
f"Executing post-consume script {settings.POST_CONSUME_SCRIPT}",
|
||||
)
|
||||
|
||||
script_env = os.environ.copy()
|
||||
|
||||
script_env["DOCUMENT_ID"] = str(document.pk)
|
||||
script_env["DOCUMENT_CREATED"] = str(document.created)
|
||||
script_env["DOCUMENT_MODIFIED"] = str(document.modified)
|
||||
script_env["DOCUMENT_ADDED"] = str(document.added)
|
||||
script_env["DOCUMENT_FILE_NAME"] = document.get_public_filename()
|
||||
script_env["DOCUMENT_SOURCE_PATH"] = os.path.normpath(document.source_path)
|
||||
script_env["DOCUMENT_ARCHIVE_PATH"] = os.path.normpath(
|
||||
str(document.archive_path),
|
||||
)
|
||||
script_env["DOCUMENT_THUMBNAIL_PATH"] = os.path.normpath(
|
||||
document.thumbnail_path,
|
||||
)
|
||||
script_env["DOCUMENT_DOWNLOAD_URL"] = reverse(
|
||||
"document-download",
|
||||
kwargs={"pk": document.pk},
|
||||
)
|
||||
script_env["DOCUMENT_THUMBNAIL_URL"] = reverse(
|
||||
"document-thumb",
|
||||
kwargs={"pk": document.pk},
|
||||
)
|
||||
script_env["DOCUMENT_CORRESPONDENT"] = str(document.correspondent)
|
||||
script_env["DOCUMENT_TAGS"] = str(
|
||||
",".join(document.tags.all().values_list("name", flat=True)),
|
||||
)
|
||||
script_env["DOCUMENT_ORIGINAL_FILENAME"] = str(document.original_filename)
|
||||
|
||||
try:
|
||||
Popen(
|
||||
(
|
||||
@@ -172,12 +220,14 @@ class Consumer(LoggingMixin):
|
||||
str(document.correspondent),
|
||||
str(",".join(document.tags.all().values_list("name", flat=True))),
|
||||
),
|
||||
env=script_env,
|
||||
).wait()
|
||||
except Exception as e:
|
||||
self._fail(
|
||||
MESSAGE_POST_CONSUME_SCRIPT_ERROR,
|
||||
f"Error while executing post-consume script: {e}",
|
||||
exc_info=True,
|
||||
exception=e,
|
||||
)
|
||||
|
||||
def try_consume_file(
|
||||
@@ -292,6 +342,7 @@ class Consumer(LoggingMixin):
|
||||
str(e),
|
||||
f"Error while consuming document {self.filename}: {e}",
|
||||
exc_info=True,
|
||||
exception=e,
|
||||
)
|
||||
|
||||
# Prepare the document classifier.
|
||||
@@ -376,6 +427,7 @@ class Consumer(LoggingMixin):
|
||||
f"The following error occurred while consuming "
|
||||
f"{self.filename}: {e}",
|
||||
exc_info=True,
|
||||
exception=e,
|
||||
)
|
||||
finally:
|
||||
document_parser.cleanup()
|
||||
@@ -426,6 +478,7 @@ class Consumer(LoggingMixin):
|
||||
created=create_date,
|
||||
modified=create_date,
|
||||
storage_type=storage_type,
|
||||
original_filename=self.filename,
|
||||
)
|
||||
|
||||
self.apply_overrides(document)
|
||||
|
@@ -1,85 +1,18 @@
|
||||
import hashlib
|
||||
import logging
|
||||
import multiprocessing
|
||||
import os
|
||||
import shutil
|
||||
import uuid
|
||||
|
||||
import tqdm
|
||||
from django import db
|
||||
from django.conf import settings
|
||||
from django.core.management.base import BaseCommand
|
||||
from django.db import transaction
|
||||
from documents.models import Document
|
||||
from filelock import FileLock
|
||||
|
||||
from ... import index
|
||||
from ...file_handling import create_source_path_directory
|
||||
from ...file_handling import generate_unique_filename
|
||||
from ...parsers import get_parser_class_for_mime_type
|
||||
from documents.tasks import update_document_archive_file
|
||||
|
||||
|
||||
logger = logging.getLogger("paperless.management.archiver")
|
||||
|
||||
|
||||
def handle_document(document_id):
|
||||
document = Document.objects.get(id=document_id)
|
||||
|
||||
mime_type = document.mime_type
|
||||
|
||||
parser_class = get_parser_class_for_mime_type(mime_type)
|
||||
|
||||
if not parser_class:
|
||||
logger.error(
|
||||
f"No parser found for mime type {mime_type}, cannot "
|
||||
f"archive document {document} (ID: {document_id})",
|
||||
)
|
||||
return
|
||||
|
||||
parser = parser_class(logging_group=uuid.uuid4())
|
||||
|
||||
try:
|
||||
parser.parse(document.source_path, mime_type, document.get_public_filename())
|
||||
|
||||
thumbnail = parser.get_thumbnail(
|
||||
document.source_path,
|
||||
mime_type,
|
||||
document.get_public_filename(),
|
||||
)
|
||||
|
||||
if parser.get_archive_path():
|
||||
with transaction.atomic():
|
||||
with open(parser.get_archive_path(), "rb") as f:
|
||||
checksum = hashlib.md5(f.read()).hexdigest()
|
||||
# I'm going to save first so that in case the file move
|
||||
# fails, the database is rolled back.
|
||||
# We also don't use save() since that triggers the filehandling
|
||||
# logic, and we don't want that yet (file not yet in place)
|
||||
document.archive_filename = generate_unique_filename(
|
||||
document,
|
||||
archive_filename=True,
|
||||
)
|
||||
Document.objects.filter(pk=document.pk).update(
|
||||
archive_checksum=checksum,
|
||||
content=parser.get_text(),
|
||||
archive_filename=document.archive_filename,
|
||||
)
|
||||
with FileLock(settings.MEDIA_LOCK):
|
||||
create_source_path_directory(document.archive_path)
|
||||
shutil.move(parser.get_archive_path(), document.archive_path)
|
||||
shutil.move(thumbnail, document.thumbnail_path)
|
||||
|
||||
with index.open_index_writer() as writer:
|
||||
index.update_document(writer, document)
|
||||
|
||||
except Exception:
|
||||
logger.exception(
|
||||
f"Error while parsing document {document} " f"(ID: {document_id})",
|
||||
)
|
||||
finally:
|
||||
parser.cleanup()
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
|
||||
help = """
|
||||
@@ -146,7 +79,7 @@ class Command(BaseCommand):
|
||||
with multiprocessing.Pool(processes=settings.TASK_WORKERS) as pool:
|
||||
list(
|
||||
tqdm.tqdm(
|
||||
pool.imap_unordered(handle_document, document_ids),
|
||||
pool.imap_unordered(update_document_archive_file, document_ids),
|
||||
total=len(document_ids),
|
||||
disable=options["no_progress_bar"],
|
||||
),
|
||||
|
@@ -2,6 +2,7 @@ import logging
|
||||
import os
|
||||
from pathlib import Path
|
||||
from pathlib import PurePath
|
||||
from threading import Event
|
||||
from threading import Thread
|
||||
from time import monotonic
|
||||
from time import sleep
|
||||
@@ -10,9 +11,9 @@ from typing import Final
|
||||
from django.conf import settings
|
||||
from django.core.management.base import BaseCommand
|
||||
from django.core.management.base import CommandError
|
||||
from django_q.tasks import async_task
|
||||
from documents.models import Tag
|
||||
from documents.parsers import is_file_ext_supported
|
||||
from documents.tasks import consume_file
|
||||
from watchdog.events import FileSystemEventHandler
|
||||
from watchdog.observers.polling import PollingObserver
|
||||
|
||||
@@ -91,11 +92,9 @@ def _consume(filepath):
|
||||
|
||||
try:
|
||||
logger.info(f"Adding {filepath} to the task queue.")
|
||||
async_task(
|
||||
"documents.tasks.consume_file",
|
||||
consume_file.delay(
|
||||
filepath,
|
||||
override_tag_ids=tag_ids if tag_ids else None,
|
||||
task_name=os.path.basename(filepath)[:100],
|
||||
)
|
||||
except Exception:
|
||||
# Catch all so that the consumer won't crash.
|
||||
@@ -148,9 +147,11 @@ class Command(BaseCommand):
|
||||
"""
|
||||
|
||||
# This is here primarily for the tests and is irrelevant in production.
|
||||
stop_flag = False
|
||||
|
||||
observer = None
|
||||
stop_flag = Event()
|
||||
# Also only for testing, configures in one place the timeout used before checking
|
||||
# the stop flag
|
||||
testing_timeout_s: Final[float] = 0.5
|
||||
testing_timeout_ms: Final[float] = testing_timeout_s * 1000.0
|
||||
|
||||
def add_arguments(self, parser):
|
||||
parser.add_argument(
|
||||
@@ -161,6 +162,16 @@ class Command(BaseCommand):
|
||||
)
|
||||
parser.add_argument("--oneshot", action="store_true", help="Run only once.")
|
||||
|
||||
# Only use during unit testing, will configure a timeout
|
||||
# Leaving it unset or false and the consumer will exit when it
|
||||
# receives SIGINT
|
||||
parser.add_argument(
|
||||
"--testing",
|
||||
action="store_true",
|
||||
help="Flag used only for unit testing",
|
||||
default=False,
|
||||
)
|
||||
|
||||
def handle(self, *args, **options):
|
||||
directory = options["directory"]
|
||||
recursive = settings.CONSUMER_RECURSIVE
|
||||
@@ -186,29 +197,40 @@ class Command(BaseCommand):
|
||||
return
|
||||
|
||||
if settings.CONSUMER_POLLING == 0 and INotify:
|
||||
self.handle_inotify(directory, recursive)
|
||||
self.handle_inotify(directory, recursive, options["testing"])
|
||||
else:
|
||||
self.handle_polling(directory, recursive)
|
||||
self.handle_polling(directory, recursive, options["testing"])
|
||||
|
||||
logger.debug("Consumer exiting.")
|
||||
|
||||
def handle_polling(self, directory, recursive):
|
||||
def handle_polling(self, directory, recursive, is_testing: bool):
|
||||
logger.info(f"Polling directory for changes: {directory}")
|
||||
self.observer = PollingObserver(timeout=settings.CONSUMER_POLLING)
|
||||
self.observer.schedule(Handler(), directory, recursive=recursive)
|
||||
self.observer.start()
|
||||
try:
|
||||
while self.observer.is_alive():
|
||||
self.observer.join(1)
|
||||
if self.stop_flag:
|
||||
self.observer.stop()
|
||||
except KeyboardInterrupt:
|
||||
self.observer.stop()
|
||||
self.observer.join()
|
||||
|
||||
def handle_inotify(self, directory, recursive):
|
||||
timeout = None
|
||||
if is_testing:
|
||||
timeout = self.testing_timeout_s
|
||||
logger.debug(f"Configuring timeout to {timeout}s")
|
||||
|
||||
observer = PollingObserver(timeout=settings.CONSUMER_POLLING)
|
||||
observer.schedule(Handler(), directory, recursive=recursive)
|
||||
observer.start()
|
||||
try:
|
||||
while observer.is_alive():
|
||||
observer.join(timeout)
|
||||
if self.stop_flag.is_set():
|
||||
observer.stop()
|
||||
except KeyboardInterrupt:
|
||||
observer.stop()
|
||||
observer.join()
|
||||
|
||||
def handle_inotify(self, directory, recursive, is_testing: bool):
|
||||
logger.info(f"Using inotify to watch directory for changes: {directory}")
|
||||
|
||||
timeout = None
|
||||
if is_testing:
|
||||
timeout = self.testing_timeout_ms
|
||||
logger.debug(f"Configuring timeout to {timeout}ms")
|
||||
|
||||
inotify = INotify()
|
||||
inotify_flags = flags.CLOSE_WRITE | flags.MOVED_TO
|
||||
if recursive:
|
||||
@@ -216,14 +238,15 @@ class Command(BaseCommand):
|
||||
else:
|
||||
descriptor = inotify.add_watch(directory, inotify_flags)
|
||||
|
||||
try:
|
||||
inotify_debounce: Final[float] = settings.CONSUMER_INOTIFY_DELAY
|
||||
|
||||
inotify_debounce: Final[float] = settings.CONSUMER_INOTIFY_DELAY
|
||||
notified_files = {}
|
||||
finished = False
|
||||
|
||||
while not self.stop_flag:
|
||||
notified_files = {}
|
||||
|
||||
for event in inotify.read(timeout=1000):
|
||||
while not finished:
|
||||
try:
|
||||
for event in inotify.read(timeout=timeout):
|
||||
if recursive:
|
||||
path = inotify.get_path(event.wd)
|
||||
else:
|
||||
@@ -256,8 +279,22 @@ class Command(BaseCommand):
|
||||
# These files are still waiting to hit the timeout
|
||||
notified_files = still_waiting
|
||||
|
||||
except KeyboardInterrupt:
|
||||
pass
|
||||
# If files are waiting, need to exit read() to check them
|
||||
# Otherwise, go back to infinite sleep time, but only if not testing
|
||||
if len(notified_files) > 0:
|
||||
timeout = inotify_debounce
|
||||
elif is_testing:
|
||||
timeout = self.testing_timeout_ms
|
||||
else:
|
||||
timeout = None
|
||||
|
||||
if self.stop_flag.is_set():
|
||||
logger.debug("Finishing because event is set")
|
||||
finished = True
|
||||
|
||||
except KeyboardInterrupt:
|
||||
logger.info("Received SIGINT, stopping inotify")
|
||||
finished = True
|
||||
|
||||
inotify.rm_watch(descriptor)
|
||||
inotify.close()
|
||||
|
@@ -12,11 +12,13 @@ from django.core import serializers
|
||||
from django.core.management.base import BaseCommand
|
||||
from django.core.management.base import CommandError
|
||||
from django.db import transaction
|
||||
from documents.models import Comment
|
||||
from documents.models import Correspondent
|
||||
from documents.models import Document
|
||||
from documents.models import DocumentType
|
||||
from documents.models import SavedView
|
||||
from documents.models import SavedViewFilterRule
|
||||
from documents.models import StoragePath
|
||||
from documents.models import Tag
|
||||
from documents.models import UiSettings
|
||||
from documents.settings import EXPORTER_ARCHIVE_NAME
|
||||
@@ -113,8 +115,8 @@ class Command(BaseCommand):
|
||||
map(lambda f: os.path.abspath(os.path.join(root, f)), files),
|
||||
)
|
||||
|
||||
# 2. Create manifest, containing all correspondents, types, tags,
|
||||
# documents and ui_settings
|
||||
# 2. Create manifest, containing all correspondents, types, tags, storage paths
|
||||
# comments, documents and ui_settings
|
||||
with transaction.atomic():
|
||||
manifest = json.loads(
|
||||
serializers.serialize("json", Correspondent.objects.all()),
|
||||
@@ -126,6 +128,14 @@ class Command(BaseCommand):
|
||||
serializers.serialize("json", DocumentType.objects.all()),
|
||||
)
|
||||
|
||||
manifest += json.loads(
|
||||
serializers.serialize("json", StoragePath.objects.all()),
|
||||
)
|
||||
|
||||
manifest += json.loads(
|
||||
serializers.serialize("json", Comment.objects.all()),
|
||||
)
|
||||
|
||||
documents = Document.objects.order_by("id")
|
||||
document_map = {d.pk: d for d in documents}
|
||||
document_manifest = json.loads(serializers.serialize("json", documents))
|
||||
|
@@ -3,6 +3,7 @@ import logging
|
||||
import os
|
||||
import shutil
|
||||
from contextlib import contextmanager
|
||||
from pathlib import Path
|
||||
|
||||
import tqdm
|
||||
from django.conf import settings
|
||||
@@ -14,6 +15,7 @@ from django.core.serializers.base import DeserializationError
|
||||
from django.db.models.signals import m2m_changed
|
||||
from django.db.models.signals import post_save
|
||||
from documents.models import Document
|
||||
from documents.parsers import run_convert
|
||||
from documents.settings import EXPORTER_ARCHIVE_NAME
|
||||
from documents.settings import EXPORTER_FILE_NAME
|
||||
from documents.settings import EXPORTER_THUMBNAIL_NAME
|
||||
@@ -192,7 +194,7 @@ class Command(BaseCommand):
|
||||
document_path = os.path.join(self.source, doc_file)
|
||||
|
||||
thumb_file = record[EXPORTER_THUMBNAIL_NAME]
|
||||
thumbnail_path = os.path.join(self.source, thumb_file)
|
||||
thumbnail_path = Path(os.path.join(self.source, thumb_file)).resolve()
|
||||
|
||||
if EXPORTER_ARCHIVE_NAME in record:
|
||||
archive_file = record[EXPORTER_ARCHIVE_NAME]
|
||||
@@ -209,7 +211,20 @@ class Command(BaseCommand):
|
||||
create_source_path_directory(document.source_path)
|
||||
|
||||
shutil.copy2(document_path, document.source_path)
|
||||
shutil.copy2(thumbnail_path, document.thumbnail_path)
|
||||
|
||||
if thumbnail_path.suffix in {".png", ".PNG"}:
|
||||
run_convert(
|
||||
density=300,
|
||||
scale="500x5000>",
|
||||
alpha="remove",
|
||||
strip=True,
|
||||
trim=False,
|
||||
auto_orient=True,
|
||||
input_file=f"{thumbnail_path}[0]",
|
||||
output_file=str(document.thumbnail_path),
|
||||
)
|
||||
else:
|
||||
shutil.copy2(thumbnail_path, document.thumbnail_path)
|
||||
if archive_path:
|
||||
create_source_path_directory(document.archive_path)
|
||||
# TODO: this assumes that the export is valid and
|
||||
|
@@ -1,35 +0,0 @@
|
||||
import tqdm
|
||||
from django.core.management.base import BaseCommand
|
||||
from documents.tasks import redo_ocr
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
|
||||
help = """
|
||||
This will rename all documents to match the latest filename format.
|
||||
""".replace(
|
||||
" ",
|
||||
"",
|
||||
)
|
||||
|
||||
def add_arguments(self, parser):
|
||||
|
||||
parser.add_argument(
|
||||
"--no-progress-bar",
|
||||
default=False,
|
||||
action="store_true",
|
||||
help="If set, the progress bar will not be shown",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"documents",
|
||||
nargs="+",
|
||||
help="Document primary keys for re-processing OCR on",
|
||||
)
|
||||
|
||||
def handle(self, *args, **options):
|
||||
doc_pks = tqdm.tqdm(
|
||||
options["documents"],
|
||||
disable=options["no_progress_bar"],
|
||||
)
|
||||
redo_ocr(doc_pks)
|
@@ -7,6 +7,7 @@ from documents.models import Document
|
||||
|
||||
from ...signals.handlers import set_correspondent
|
||||
from ...signals.handlers import set_document_type
|
||||
from ...signals.handlers import set_storage_path
|
||||
from ...signals.handlers import set_tags
|
||||
|
||||
|
||||
@@ -29,6 +30,7 @@ class Command(BaseCommand):
|
||||
parser.add_argument("-c", "--correspondent", default=False, action="store_true")
|
||||
parser.add_argument("-T", "--tags", default=False, action="store_true")
|
||||
parser.add_argument("-t", "--document_type", default=False, action="store_true")
|
||||
parser.add_argument("-s", "--storage_path", default=False, action="store_true")
|
||||
parser.add_argument("-i", "--inbox-only", default=False, action="store_true")
|
||||
parser.add_argument(
|
||||
"--use-first",
|
||||
@@ -112,3 +114,14 @@ class Command(BaseCommand):
|
||||
base_url=options["base_url"],
|
||||
color=color,
|
||||
)
|
||||
if options["storage_path"]:
|
||||
set_storage_path(
|
||||
sender=None,
|
||||
document=document,
|
||||
classifier=classifier,
|
||||
replace=options["overwrite"],
|
||||
use_first=options["use_first"],
|
||||
suggest=options["suggest"],
|
||||
base_url=options["base_url"],
|
||||
color=color,
|
||||
)
|
||||
|
@@ -1,34 +1,14 @@
|
||||
# Generated by Django 3.1.3 on 2020-11-09 16:36
|
||||
|
||||
from django.db import migrations
|
||||
from django.db.migrations import RunPython
|
||||
from django_q.models import Schedule
|
||||
from django_q.tasks import schedule
|
||||
|
||||
|
||||
def add_schedules(apps, schema_editor):
|
||||
schedule(
|
||||
"documents.tasks.train_classifier",
|
||||
name="Train the classifier",
|
||||
schedule_type=Schedule.HOURLY,
|
||||
)
|
||||
schedule(
|
||||
"documents.tasks.index_optimize",
|
||||
name="Optimize the index",
|
||||
schedule_type=Schedule.DAILY,
|
||||
)
|
||||
|
||||
|
||||
def remove_schedules(apps, schema_editor):
|
||||
Schedule.objects.filter(func="documents.tasks.train_classifier").delete()
|
||||
Schedule.objects.filter(func="documents.tasks.index_optimize").delete()
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
("documents", "1000_update_paperless_all"),
|
||||
("django_q", "0013_task_attempt_count"),
|
||||
]
|
||||
|
||||
operations = [RunPython(add_schedules, remove_schedules)]
|
||||
operations = [
|
||||
migrations.RunPython(migrations.RunPython.noop, migrations.RunPython.noop)
|
||||
]
|
||||
|
@@ -2,27 +2,12 @@
|
||||
|
||||
from django.db import migrations
|
||||
from django.db.migrations import RunPython
|
||||
from django_q.models import Schedule
|
||||
from django_q.tasks import schedule
|
||||
|
||||
|
||||
def add_schedules(apps, schema_editor):
|
||||
schedule(
|
||||
"documents.tasks.sanity_check",
|
||||
name="Perform sanity check",
|
||||
schedule_type=Schedule.WEEKLY,
|
||||
)
|
||||
|
||||
|
||||
def remove_schedules(apps, schema_editor):
|
||||
Schedule.objects.filter(func="documents.tasks.sanity_check").delete()
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
("documents", "1003_mime_types"),
|
||||
("django_q", "0013_task_attempt_count"),
|
||||
]
|
||||
|
||||
operations = [RunPython(add_schedules, remove_schedules)]
|
||||
operations = [RunPython(migrations.RunPython.noop, migrations.RunPython.noop)]
|
||||
|
@@ -4,28 +4,9 @@ from django.db import migrations, models
|
||||
import django.db.models.deletion
|
||||
|
||||
|
||||
def init_paperless_tasks(apps, schema_editor):
|
||||
PaperlessTask = apps.get_model("documents", "PaperlessTask")
|
||||
Task = apps.get_model("django_q", "Task")
|
||||
|
||||
for task in Task.objects.filter(func="documents.tasks.consume_file"):
|
||||
if not hasattr(task, "paperlesstask"):
|
||||
paperlesstask = PaperlessTask.objects.create(
|
||||
attempted_task=task,
|
||||
task_id=task.id,
|
||||
name=task.name,
|
||||
created=task.started,
|
||||
started=task.started,
|
||||
acknowledged=True,
|
||||
)
|
||||
task.paperlesstask = paperlesstask
|
||||
task.save()
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
("django_q", "0014_schedule_cluster"),
|
||||
("documents", "1021_webp_thumbnail_conversion"),
|
||||
]
|
||||
|
||||
@@ -60,10 +41,12 @@ class Migration(migrations.Migration):
|
||||
null=True,
|
||||
on_delete=django.db.models.deletion.CASCADE,
|
||||
related_name="attempted_task",
|
||||
to="django_q.task",
|
||||
# This is a dummy field, 1026 will fix up the column
|
||||
# This manual change is required, as django doesn't django doesn't really support
|
||||
# removing an app which has migration deps like this
|
||||
to="documents.document",
|
||||
),
|
||||
),
|
||||
],
|
||||
),
|
||||
migrations.RunPython(init_paperless_tasks, migrations.RunPython.noop),
|
||||
)
|
||||
]
|
||||
|
69
src/documents/migrations/1023_add_comments.py
Normal file
69
src/documents/migrations/1023_add_comments.py
Normal file
@@ -0,0 +1,69 @@
|
||||
from django.db import migrations, models
|
||||
import django.utils.timezone
|
||||
from django.conf import settings
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
dependencies = [
|
||||
("documents", "1022_paperlesstask"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.CreateModel(
|
||||
name="Comment",
|
||||
fields=[
|
||||
(
|
||||
"id",
|
||||
models.AutoField(
|
||||
auto_created=True,
|
||||
primary_key=True,
|
||||
serialize=False,
|
||||
verbose_name="ID",
|
||||
),
|
||||
),
|
||||
(
|
||||
"comment",
|
||||
models.TextField(
|
||||
blank=True,
|
||||
help_text="Comment for the document",
|
||||
verbose_name="content",
|
||||
),
|
||||
),
|
||||
(
|
||||
"created",
|
||||
models.DateTimeField(
|
||||
db_index=True,
|
||||
default=django.utils.timezone.now,
|
||||
verbose_name="created",
|
||||
),
|
||||
),
|
||||
(
|
||||
"document",
|
||||
models.ForeignKey(
|
||||
blank=True,
|
||||
null=True,
|
||||
on_delete=django.db.models.deletion.CASCADE,
|
||||
related_name="documents",
|
||||
to="documents.document",
|
||||
verbose_name="document",
|
||||
),
|
||||
),
|
||||
(
|
||||
"user",
|
||||
models.ForeignKey(
|
||||
blank=True,
|
||||
null=True,
|
||||
on_delete=django.db.models.deletion.SET_NULL,
|
||||
related_name="users",
|
||||
to=settings.AUTH_USER_MODEL,
|
||||
verbose_name="user",
|
||||
),
|
||||
),
|
||||
],
|
||||
options={
|
||||
"verbose_name": "comment",
|
||||
"verbose_name_plural": "comments",
|
||||
"ordering": ("created",),
|
||||
},
|
||||
),
|
||||
]
|
25
src/documents/migrations/1024_document_original_filename.py
Normal file
25
src/documents/migrations/1024_document_original_filename.py
Normal file
@@ -0,0 +1,25 @@
|
||||
# Generated by Django 4.0.6 on 2022-07-25 06:34
|
||||
|
||||
from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
("documents", "1023_add_comments"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AddField(
|
||||
model_name="document",
|
||||
name="original_filename",
|
||||
field=models.CharField(
|
||||
default=None,
|
||||
editable=False,
|
||||
help_text="The original name of the file when it was uploaded",
|
||||
max_length=1024,
|
||||
null=True,
|
||||
verbose_name="original filename",
|
||||
),
|
||||
),
|
||||
]
|
@@ -0,0 +1,48 @@
|
||||
# Generated by Django 4.0.5 on 2022-08-26 16:49
|
||||
|
||||
from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
("documents", "1024_document_original_filename"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AlterField(
|
||||
model_name="savedviewfilterrule",
|
||||
name="rule_type",
|
||||
field=models.PositiveIntegerField(
|
||||
choices=[
|
||||
(0, "title contains"),
|
||||
(1, "content contains"),
|
||||
(2, "ASN is"),
|
||||
(3, "correspondent is"),
|
||||
(4, "document type is"),
|
||||
(5, "is in inbox"),
|
||||
(6, "has tag"),
|
||||
(7, "has any tag"),
|
||||
(8, "created before"),
|
||||
(9, "created after"),
|
||||
(10, "created year is"),
|
||||
(11, "created month is"),
|
||||
(12, "created day is"),
|
||||
(13, "added before"),
|
||||
(14, "added after"),
|
||||
(15, "modified before"),
|
||||
(16, "modified after"),
|
||||
(17, "does not have tag"),
|
||||
(18, "does not have ASN"),
|
||||
(19, "title or content contains"),
|
||||
(20, "fulltext query"),
|
||||
(21, "more like this"),
|
||||
(22, "has tags in"),
|
||||
(23, "ASN greater than"),
|
||||
(24, "ASN less than"),
|
||||
(25, "storage path is"),
|
||||
],
|
||||
verbose_name="rule type",
|
||||
),
|
||||
),
|
||||
]
|
57
src/documents/migrations/1026_transition_to_celery.py
Normal file
57
src/documents/migrations/1026_transition_to_celery.py
Normal file
@@ -0,0 +1,57 @@
|
||||
# Generated by Django 4.1.1 on 2022-09-27 19:31
|
||||
|
||||
from django.db import migrations, models
|
||||
import django.db.models.deletion
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
("django_celery_results", "0011_taskresult_periodic_task_name"),
|
||||
("documents", "1025_alter_savedviewfilterrule_rule_type"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.RemoveField(
|
||||
model_name="paperlesstask",
|
||||
name="created",
|
||||
),
|
||||
migrations.RemoveField(
|
||||
model_name="paperlesstask",
|
||||
name="name",
|
||||
),
|
||||
migrations.RemoveField(
|
||||
model_name="paperlesstask",
|
||||
name="started",
|
||||
),
|
||||
# Remove the field from the model
|
||||
migrations.RemoveField(
|
||||
model_name="paperlesstask",
|
||||
name="attempted_task",
|
||||
),
|
||||
# Add the field back, pointing to the correct model
|
||||
# This resolves a problem where the temporary change in 1022
|
||||
# results in a type mismatch
|
||||
migrations.AddField(
|
||||
model_name="paperlesstask",
|
||||
name="attempted_task",
|
||||
field=models.OneToOneField(
|
||||
blank=True,
|
||||
null=True,
|
||||
on_delete=django.db.models.deletion.CASCADE,
|
||||
related_name="attempted_task",
|
||||
to="django_celery_results.taskresult",
|
||||
),
|
||||
),
|
||||
# Drop the django-q tables entirely
|
||||
# Must be done last or there could be references here
|
||||
migrations.RunSQL(
|
||||
"DROP TABLE IF EXISTS django_q_ormq", reverse_sql=migrations.RunSQL.noop
|
||||
),
|
||||
migrations.RunSQL(
|
||||
"DROP TABLE IF EXISTS django_q_schedule", reverse_sql=migrations.RunSQL.noop
|
||||
),
|
||||
migrations.RunSQL(
|
||||
"DROP TABLE IF EXISTS django_q_task", reverse_sql=migrations.RunSQL.noop
|
||||
),
|
||||
]
|
@@ -12,7 +12,7 @@ from django.contrib.auth.models import User
|
||||
from django.db import models
|
||||
from django.utils import timezone
|
||||
from django.utils.translation import gettext_lazy as _
|
||||
from django_q.tasks import Task
|
||||
from django_celery_results.models import TaskResult
|
||||
from documents.parsers import get_default_file_extension
|
||||
|
||||
|
||||
@@ -214,6 +214,16 @@ class Document(models.Model):
|
||||
help_text=_("Current archive filename in storage"),
|
||||
)
|
||||
|
||||
original_filename = models.CharField(
|
||||
_("original filename"),
|
||||
max_length=1024,
|
||||
editable=False,
|
||||
default=None,
|
||||
unique=False,
|
||||
null=True,
|
||||
help_text=_("The original name of the file when it was uploaded"),
|
||||
)
|
||||
|
||||
archive_serial_number = models.IntegerField(
|
||||
_("archive serial number"),
|
||||
blank=True,
|
||||
@@ -394,6 +404,9 @@ class SavedViewFilterRule(models.Model):
|
||||
(20, _("fulltext query")),
|
||||
(21, _("more like this")),
|
||||
(22, _("has tags in")),
|
||||
(23, _("ASN greater than")),
|
||||
(24, _("ASN less than")),
|
||||
(25, _("storage path is")),
|
||||
]
|
||||
|
||||
saved_view = models.ForeignKey(
|
||||
@@ -514,16 +527,53 @@ class UiSettings(models.Model):
|
||||
|
||||
|
||||
class PaperlessTask(models.Model):
|
||||
|
||||
task_id = models.CharField(max_length=128)
|
||||
name = models.CharField(max_length=256)
|
||||
created = models.DateTimeField(_("created"), auto_now=True)
|
||||
started = models.DateTimeField(_("started"), null=True)
|
||||
acknowledged = models.BooleanField(default=False)
|
||||
|
||||
attempted_task = models.OneToOneField(
|
||||
Task,
|
||||
TaskResult,
|
||||
on_delete=models.CASCADE,
|
||||
related_name="attempted_task",
|
||||
null=True,
|
||||
blank=True,
|
||||
)
|
||||
acknowledged = models.BooleanField(default=False)
|
||||
|
||||
|
||||
class Comment(models.Model):
|
||||
comment = models.TextField(
|
||||
_("content"),
|
||||
blank=True,
|
||||
help_text=_("Comment for the document"),
|
||||
)
|
||||
|
||||
created = models.DateTimeField(
|
||||
_("created"),
|
||||
default=timezone.now,
|
||||
db_index=True,
|
||||
)
|
||||
|
||||
document = models.ForeignKey(
|
||||
Document,
|
||||
blank=True,
|
||||
null=True,
|
||||
related_name="documents",
|
||||
on_delete=models.CASCADE,
|
||||
verbose_name=_("document"),
|
||||
)
|
||||
|
||||
user = models.ForeignKey(
|
||||
User,
|
||||
blank=True,
|
||||
null=True,
|
||||
related_name="users",
|
||||
on_delete=models.SET_NULL,
|
||||
verbose_name=_("user"),
|
||||
)
|
||||
|
||||
class Meta:
|
||||
ordering = ("created",)
|
||||
verbose_name = _("comment")
|
||||
verbose_name_plural = _("comments")
|
||||
|
||||
def __str__(self):
|
||||
return self.content
|
||||
|
@@ -6,6 +6,8 @@ import re
|
||||
import shutil
|
||||
import subprocess
|
||||
import tempfile
|
||||
from typing import Iterator
|
||||
from typing import Match
|
||||
from typing import Optional
|
||||
from typing import Set
|
||||
|
||||
@@ -216,6 +218,10 @@ def make_thumbnail_from_pdf(in_path, temp_dir, logging_group=None) -> str:
|
||||
|
||||
|
||||
def parse_date(filename, text) -> Optional[datetime.datetime]:
|
||||
return next(parse_date_generator(filename, text), None)
|
||||
|
||||
|
||||
def parse_date_generator(filename, text) -> Iterator[datetime.datetime]:
|
||||
"""
|
||||
Returns the date of the document.
|
||||
"""
|
||||
@@ -246,38 +252,32 @@ def parse_date(filename, text) -> Optional[datetime.datetime]:
|
||||
return date
|
||||
return None
|
||||
|
||||
date = None
|
||||
def __process_match(
|
||||
match: Match[str],
|
||||
date_order: str,
|
||||
) -> Optional[datetime.datetime]:
|
||||
date_string = match.group(0)
|
||||
|
||||
try:
|
||||
date = __parser(date_string, date_order)
|
||||
except (TypeError, ValueError):
|
||||
# Skip all matches that do not parse to a proper date
|
||||
date = None
|
||||
|
||||
return __filter(date)
|
||||
|
||||
def __process_content(content: str, date_order: str) -> Iterator[datetime.datetime]:
|
||||
for m in re.finditer(DATE_REGEX, content):
|
||||
date = __process_match(m, date_order)
|
||||
if date is not None:
|
||||
yield date
|
||||
|
||||
# if filename date parsing is enabled, search there first:
|
||||
if settings.FILENAME_DATE_ORDER:
|
||||
for m in re.finditer(DATE_REGEX, filename):
|
||||
date_string = m.group(0)
|
||||
|
||||
try:
|
||||
date = __parser(date_string, settings.FILENAME_DATE_ORDER)
|
||||
except (TypeError, ValueError):
|
||||
# Skip all matches that do not parse to a proper date
|
||||
continue
|
||||
|
||||
date = __filter(date)
|
||||
if date is not None:
|
||||
return date
|
||||
yield from __process_content(filename, settings.FILENAME_DATE_ORDER)
|
||||
|
||||
# Iterate through all regex matches in text and try to parse the date
|
||||
for m in re.finditer(DATE_REGEX, text):
|
||||
date_string = m.group(0)
|
||||
|
||||
try:
|
||||
date = __parser(date_string, settings.DATE_ORDER)
|
||||
except (TypeError, ValueError):
|
||||
# Skip all matches that do not parse to a proper date
|
||||
continue
|
||||
|
||||
date = __filter(date)
|
||||
if date is not None:
|
||||
return date
|
||||
|
||||
return date
|
||||
yield from __process_content(text, settings.DATE_ORDER)
|
||||
|
||||
|
||||
class ParseError(Exception):
|
||||
|
@@ -1,6 +1,14 @@
|
||||
import datetime
|
||||
import math
|
||||
import re
|
||||
from ast import literal_eval
|
||||
from asyncio.log import logger
|
||||
from pathlib import Path
|
||||
from typing import Dict
|
||||
from typing import Optional
|
||||
from typing import Tuple
|
||||
|
||||
from celery import states
|
||||
|
||||
try:
|
||||
import zoneinfo
|
||||
@@ -18,12 +26,12 @@ from .models import Correspondent
|
||||
from .models import Document
|
||||
from .models import DocumentType
|
||||
from .models import MatchingModel
|
||||
from .models import PaperlessTask
|
||||
from .models import SavedView
|
||||
from .models import SavedViewFilterRule
|
||||
from .models import StoragePath
|
||||
from .models import Tag
|
||||
from .models import UiSettings
|
||||
from .models import PaperlessTask
|
||||
from .parsers import is_mime_type_supported
|
||||
|
||||
|
||||
@@ -240,7 +248,8 @@ class DocumentSerializer(DynamicFieldsModelSerializer):
|
||||
)
|
||||
instance.created = new_datetime
|
||||
instance.save()
|
||||
validated_data.pop("created_date")
|
||||
if "created_date" in validated_data:
|
||||
validated_data.pop("created_date")
|
||||
super().update(instance, validated_data)
|
||||
return instance
|
||||
|
||||
@@ -607,6 +616,15 @@ class UiSettingsViewSerializer(serializers.ModelSerializer):
|
||||
"settings",
|
||||
]
|
||||
|
||||
def validate_settings(self, settings):
|
||||
# we never save update checking backend setting
|
||||
if "update_checking" in settings:
|
||||
try:
|
||||
settings["update_checking"].pop("backend_setting")
|
||||
except KeyError:
|
||||
pass
|
||||
return settings
|
||||
|
||||
def create(self, validated_data):
|
||||
ui_settings = UiSettings.objects.update_or_create(
|
||||
user=validated_data.get("user"),
|
||||
@@ -619,7 +637,19 @@ class TasksViewSerializer(serializers.ModelSerializer):
|
||||
class Meta:
|
||||
model = PaperlessTask
|
||||
depth = 1
|
||||
fields = "__all__"
|
||||
fields = (
|
||||
"id",
|
||||
"task_id",
|
||||
"date_created",
|
||||
"date_done",
|
||||
"type",
|
||||
"status",
|
||||
"result",
|
||||
"acknowledged",
|
||||
"task_name",
|
||||
"name",
|
||||
"related_document",
|
||||
)
|
||||
|
||||
type = serializers.SerializerMethodField()
|
||||
|
||||
@@ -631,24 +661,108 @@ class TasksViewSerializer(serializers.ModelSerializer):
|
||||
|
||||
def get_result(self, obj):
|
||||
result = ""
|
||||
if hasattr(obj, "attempted_task") and obj.attempted_task:
|
||||
result = obj.attempted_task.result
|
||||
if (
|
||||
hasattr(obj, "attempted_task")
|
||||
and obj.attempted_task
|
||||
and obj.attempted_task.result
|
||||
):
|
||||
try:
|
||||
result: str = obj.attempted_task.result
|
||||
if "exc_message" in result:
|
||||
# This is a dict in this case
|
||||
result: Dict = literal_eval(result)
|
||||
# This is a list, grab the first item (most recent)
|
||||
result = result["exc_message"][0]
|
||||
except Exception as e: # pragma: no cover
|
||||
# Extra security if something is malformed
|
||||
logger.warn(f"Error getting task result: {e}", exc_info=True)
|
||||
return result
|
||||
|
||||
status = serializers.SerializerMethodField()
|
||||
|
||||
def get_status(self, obj):
|
||||
if obj.attempted_task is None:
|
||||
if obj.started:
|
||||
return "started"
|
||||
else:
|
||||
return "queued"
|
||||
elif obj.attempted_task.success:
|
||||
return "complete"
|
||||
elif not obj.attempted_task.success:
|
||||
return "failed"
|
||||
else:
|
||||
return "unknown"
|
||||
result = "unknown"
|
||||
if hasattr(obj, "attempted_task") and obj.attempted_task:
|
||||
result = obj.attempted_task.status
|
||||
return result
|
||||
|
||||
date_created = serializers.SerializerMethodField()
|
||||
|
||||
def get_date_created(self, obj):
|
||||
result = ""
|
||||
if hasattr(obj, "attempted_task") and obj.attempted_task:
|
||||
result = obj.attempted_task.date_created
|
||||
return result
|
||||
|
||||
date_done = serializers.SerializerMethodField()
|
||||
|
||||
def get_date_done(self, obj):
|
||||
result = ""
|
||||
if hasattr(obj, "attempted_task") and obj.attempted_task:
|
||||
result = obj.attempted_task.date_done
|
||||
return result
|
||||
|
||||
task_id = serializers.SerializerMethodField()
|
||||
|
||||
def get_task_id(self, obj):
|
||||
result = ""
|
||||
if hasattr(obj, "attempted_task") and obj.attempted_task:
|
||||
result = obj.attempted_task.task_id
|
||||
return result
|
||||
|
||||
task_name = serializers.SerializerMethodField()
|
||||
|
||||
def get_task_name(self, obj):
|
||||
result = ""
|
||||
if hasattr(obj, "attempted_task") and obj.attempted_task:
|
||||
result = obj.attempted_task.task_name
|
||||
return result
|
||||
|
||||
name = serializers.SerializerMethodField()
|
||||
|
||||
def get_name(self, obj):
|
||||
result = ""
|
||||
if hasattr(obj, "attempted_task") and obj.attempted_task:
|
||||
try:
|
||||
task_kwargs: Optional[str] = obj.attempted_task.task_kwargs
|
||||
# Try the override filename first (this is a webui created task?)
|
||||
if task_kwargs is not None:
|
||||
# It's a string, string of a dict. Who knows why...
|
||||
kwargs = literal_eval(literal_eval(task_kwargs))
|
||||
if "override_filename" in kwargs:
|
||||
result = kwargs["override_filename"]
|
||||
|
||||
# Nothing was found, report the task first argument
|
||||
if not len(result):
|
||||
# There are always some arguments to the consume
|
||||
task_args: Tuple = literal_eval(
|
||||
literal_eval(obj.attempted_task.task_args),
|
||||
)
|
||||
filepath = Path(task_args[0])
|
||||
result = filepath.name
|
||||
except Exception as e: # pragma: no cover
|
||||
# Extra security if something is malformed
|
||||
logger.warning(f"Error getting file name from task: {e}", exc_info=True)
|
||||
|
||||
return result
|
||||
|
||||
related_document = serializers.SerializerMethodField()
|
||||
|
||||
def get_related_document(self, obj):
|
||||
result = ""
|
||||
regexp = r"New document id (\d+) created"
|
||||
if (
|
||||
hasattr(obj, "attempted_task")
|
||||
and obj.attempted_task
|
||||
and obj.attempted_task.result
|
||||
and obj.attempted_task.status == states.SUCCESS
|
||||
):
|
||||
try:
|
||||
result = re.search(regexp, obj.attempted_task.result).group(1)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return result
|
||||
|
||||
|
||||
class AcknowledgeTasksViewSerializer(serializers.Serializer):
|
||||
|
@@ -2,7 +2,6 @@ import logging
|
||||
import os
|
||||
import shutil
|
||||
|
||||
import django_q
|
||||
from django.conf import settings
|
||||
from django.contrib.admin.models import ADDITION
|
||||
from django.contrib.admin.models import LogEntry
|
||||
@@ -14,6 +13,7 @@ from django.db.models import Q
|
||||
from django.dispatch import receiver
|
||||
from django.utils import termcolors
|
||||
from django.utils import timezone
|
||||
from django_celery_results.models import TaskResult
|
||||
from filelock import FileLock
|
||||
|
||||
from .. import matching
|
||||
@@ -25,7 +25,6 @@ from ..models import MatchingModel
|
||||
from ..models import PaperlessTask
|
||||
from ..models import Tag
|
||||
|
||||
|
||||
logger = logging.getLogger("paperless.handlers")
|
||||
|
||||
|
||||
@@ -291,7 +290,7 @@ def set_storage_path(
|
||||
)
|
||||
+ f" [{document.pk}]",
|
||||
)
|
||||
print(f"Sugest storage directory {selected}")
|
||||
print(f"Suggest storage directory {selected}")
|
||||
else:
|
||||
logger.info(
|
||||
f"Assigning storage path {selected} to {document}",
|
||||
@@ -503,34 +502,19 @@ def add_to_index(sender, document, **kwargs):
|
||||
index.add_or_update_document(document)
|
||||
|
||||
|
||||
@receiver(django_q.signals.pre_enqueue)
|
||||
def init_paperless_task(sender, task, **kwargs):
|
||||
if task["func"] == "documents.tasks.consume_file":
|
||||
paperless_task, created = PaperlessTask.objects.get_or_create(
|
||||
task_id=task["id"],
|
||||
)
|
||||
paperless_task.name = task["name"]
|
||||
paperless_task.created = task["started"]
|
||||
paperless_task.save()
|
||||
|
||||
|
||||
@receiver(django_q.signals.pre_execute)
|
||||
def paperless_task_started(sender, task, **kwargs):
|
||||
@receiver(models.signals.post_save, sender=TaskResult)
|
||||
def update_paperless_task(sender, instance: TaskResult, **kwargs):
|
||||
try:
|
||||
if task["func"] == "documents.tasks.consume_file":
|
||||
paperless_task = PaperlessTask.objects.get(task_id=task["id"])
|
||||
paperless_task.started = timezone.now()
|
||||
paperless_task.save()
|
||||
except PaperlessTask.DoesNotExist:
|
||||
pass
|
||||
|
||||
|
||||
@receiver(models.signals.post_save, sender=django_q.models.Task)
|
||||
def update_paperless_task(sender, instance, **kwargs):
|
||||
try:
|
||||
if instance.func == "documents.tasks.consume_file":
|
||||
paperless_task = PaperlessTask.objects.get(task_id=instance.id)
|
||||
if instance.task_name == "documents.tasks.consume_file":
|
||||
paperless_task, _ = PaperlessTask.objects.get_or_create(
|
||||
task_id=instance.task_id,
|
||||
)
|
||||
paperless_task.name = instance.task_name
|
||||
paperless_task.created = instance.date_created
|
||||
paperless_task.completed = instance.date_done
|
||||
paperless_task.attempted_task = instance
|
||||
paperless_task.save()
|
||||
except PaperlessTask.DoesNotExist:
|
||||
pass
|
||||
except Exception as e:
|
||||
# Don't let an exception in the signal handlers prevent
|
||||
# a document from being consumed.
|
||||
logger.error(f"Creating PaperlessTask failed: {e}")
|
||||
|
@@ -1,14 +1,17 @@
|
||||
import hashlib
|
||||
import logging
|
||||
import os
|
||||
import shutil
|
||||
import uuid
|
||||
from pathlib import Path
|
||||
from typing import Type
|
||||
|
||||
import tqdm
|
||||
from asgiref.sync import async_to_sync
|
||||
from celery import shared_task
|
||||
from channels.layers import get_channel_layer
|
||||
from django.conf import settings
|
||||
from django.core.exceptions import ObjectDoesNotExist
|
||||
from django.db import transaction
|
||||
from django.db.models.signals import post_save
|
||||
from documents import barcodes
|
||||
from documents import index
|
||||
@@ -17,6 +20,8 @@ from documents.classifier import DocumentClassifier
|
||||
from documents.classifier import load_classifier
|
||||
from documents.consumer import Consumer
|
||||
from documents.consumer import ConsumerError
|
||||
from documents.file_handling import create_source_path_directory
|
||||
from documents.file_handling import generate_unique_filename
|
||||
from documents.models import Correspondent
|
||||
from documents.models import Document
|
||||
from documents.models import DocumentType
|
||||
@@ -24,14 +29,16 @@ from documents.models import StoragePath
|
||||
from documents.models import Tag
|
||||
from documents.parsers import DocumentParser
|
||||
from documents.parsers import get_parser_class_for_mime_type
|
||||
from documents.parsers import ParseError
|
||||
from documents.sanity_checker import SanityCheckFailedException
|
||||
from filelock import FileLock
|
||||
from redis.exceptions import ConnectionError
|
||||
from whoosh.writing import AsyncWriter
|
||||
|
||||
|
||||
logger = logging.getLogger("paperless.tasks")
|
||||
|
||||
|
||||
@shared_task
|
||||
def index_optimize():
|
||||
ix = index.open_index()
|
||||
writer = AsyncWriter(ix)
|
||||
@@ -48,6 +55,7 @@ def index_reindex(progress_bar_disable=False):
|
||||
index.update_document(writer, document)
|
||||
|
||||
|
||||
@shared_task
|
||||
def train_classifier():
|
||||
if (
|
||||
not Tag.objects.filter(matching_algorithm=Tag.MATCH_AUTO).exists()
|
||||
@@ -76,6 +84,7 @@ def train_classifier():
|
||||
logger.warning("Classifier error: " + str(e))
|
||||
|
||||
|
||||
@shared_task
|
||||
def consume_file(
|
||||
path,
|
||||
override_filename=None,
|
||||
@@ -87,32 +96,18 @@ def consume_file(
|
||||
override_created=None,
|
||||
):
|
||||
|
||||
path = Path(path).resolve()
|
||||
|
||||
# check for separators in current document
|
||||
if settings.CONSUMER_ENABLE_BARCODES:
|
||||
|
||||
mime_type = barcodes.get_file_mime_type(path)
|
||||
pdf_filepath, separators = barcodes.scan_file_for_separating_barcodes(path)
|
||||
|
||||
if not barcodes.supported_file_type(mime_type):
|
||||
# if not supported, skip this routine
|
||||
logger.warning(
|
||||
f"Unsupported file format for barcode reader: {str(mime_type)}",
|
||||
if separators:
|
||||
logger.debug(
|
||||
f"Pages with separators found in: {str(path)}",
|
||||
)
|
||||
else:
|
||||
separators = []
|
||||
document_list = []
|
||||
|
||||
if mime_type == "image/tiff":
|
||||
file_to_process = barcodes.convert_from_tiff_to_pdf(path)
|
||||
else:
|
||||
file_to_process = path
|
||||
|
||||
separators = barcodes.scan_file_for_separating_barcodes(file_to_process)
|
||||
|
||||
if separators:
|
||||
logger.debug(
|
||||
f"Pages with separators found in: {str(path)}",
|
||||
)
|
||||
document_list = barcodes.separate_pages(file_to_process, separators)
|
||||
document_list = barcodes.separate_pages(pdf_filepath, separators)
|
||||
|
||||
if document_list:
|
||||
for n, document in enumerate(document_list):
|
||||
@@ -122,17 +117,31 @@ def consume_file(
|
||||
newname = f"{str(n)}_" + override_filename
|
||||
else:
|
||||
newname = None
|
||||
barcodes.save_to_dir(document, newname=newname)
|
||||
|
||||
# if we got here, the document was successfully split
|
||||
# and can safely be deleted
|
||||
if mime_type == "image/tiff":
|
||||
# Remove the TIFF converted to PDF file
|
||||
logger.debug(f"Deleting file {file_to_process}")
|
||||
os.unlink(file_to_process)
|
||||
# Remove the original file (new file is saved above)
|
||||
logger.debug(f"Deleting file {path}")
|
||||
os.unlink(path)
|
||||
# If the file is an upload, it's in the scratch directory
|
||||
# Move it to consume directory to be picked up
|
||||
# Otherwise, use the current parent to keep possible tags
|
||||
# from subdirectories
|
||||
try:
|
||||
# is_relative_to would be nicer, but new in 3.9
|
||||
_ = path.relative_to(settings.SCRATCH_DIR)
|
||||
save_to_dir = settings.CONSUMPTION_DIR
|
||||
except ValueError:
|
||||
save_to_dir = path.parent
|
||||
|
||||
barcodes.save_to_dir(
|
||||
document,
|
||||
newname=newname,
|
||||
target_dir=save_to_dir,
|
||||
)
|
||||
|
||||
# Delete the PDF file which was split
|
||||
os.remove(pdf_filepath)
|
||||
|
||||
# If the original was a TIFF, remove the original file as well
|
||||
if str(pdf_filepath) != str(path):
|
||||
logger.debug(f"Deleting file {path}")
|
||||
os.unlink(path)
|
||||
|
||||
# notify the sender, otherwise the progress bar
|
||||
# in the UI stays stuck
|
||||
@@ -149,11 +158,8 @@ def consume_file(
|
||||
"status_updates",
|
||||
{"type": "status_update", "data": payload},
|
||||
)
|
||||
except OSError as e:
|
||||
logger.warning(
|
||||
"OSError. It could be, the broker cannot be reached.",
|
||||
)
|
||||
logger.warning(str(e))
|
||||
except ConnectionError as e:
|
||||
logger.warning(f"ConnectionError on status send: {str(e)}")
|
||||
# consuming stops here, since the original document with
|
||||
# the barcodes has been split and will be consumed separately
|
||||
return "File successfully split"
|
||||
@@ -179,6 +185,7 @@ def consume_file(
|
||||
)
|
||||
|
||||
|
||||
@shared_task
|
||||
def sanity_check():
|
||||
messages = sanity_checker.check_sanity()
|
||||
|
||||
@@ -194,6 +201,7 @@ def sanity_check():
|
||||
return "No issues detected."
|
||||
|
||||
|
||||
@shared_task
|
||||
def bulk_update_documents(document_ids):
|
||||
documents = Document.objects.filter(id__in=document_ids)
|
||||
|
||||
@@ -207,44 +215,63 @@ def bulk_update_documents(document_ids):
|
||||
index.update_document(writer, doc)
|
||||
|
||||
|
||||
def redo_ocr(document_ids):
|
||||
all_docs = Document.objects.all()
|
||||
@shared_task
|
||||
def update_document_archive_file(document_id):
|
||||
"""
|
||||
Re-creates the archive file of a document, including new OCR content and thumbnail
|
||||
"""
|
||||
document = Document.objects.get(id=document_id)
|
||||
|
||||
for doc_pk in document_ids:
|
||||
try:
|
||||
logger.info(f"Parsing document {doc_pk}")
|
||||
doc: Document = all_docs.get(pk=doc_pk)
|
||||
except ObjectDoesNotExist:
|
||||
logger.error(f"Document {doc_pk} does not exist")
|
||||
continue
|
||||
mime_type = document.mime_type
|
||||
|
||||
# Get the correct parser for this mime type
|
||||
parser_class: Type[DocumentParser] = get_parser_class_for_mime_type(
|
||||
doc.mime_type,
|
||||
parser_class: Type[DocumentParser] = get_parser_class_for_mime_type(mime_type)
|
||||
|
||||
if not parser_class:
|
||||
logger.error(
|
||||
f"No parser found for mime type {mime_type}, cannot "
|
||||
f"archive document {document} (ID: {document_id})",
|
||||
)
|
||||
document_parser: DocumentParser = parser_class(
|
||||
"redo-ocr",
|
||||
return
|
||||
|
||||
parser: DocumentParser = parser_class(logging_group=uuid.uuid4())
|
||||
|
||||
try:
|
||||
parser.parse(document.source_path, mime_type, document.get_public_filename())
|
||||
|
||||
thumbnail = parser.get_thumbnail(
|
||||
document.source_path,
|
||||
mime_type,
|
||||
document.get_public_filename(),
|
||||
)
|
||||
|
||||
# Create a file path to copy the original file to for working on
|
||||
temp_file = (Path(document_parser.tempdir) / Path("new-ocr-file")).resolve()
|
||||
if parser.get_archive_path():
|
||||
with transaction.atomic():
|
||||
with open(parser.get_archive_path(), "rb") as f:
|
||||
checksum = hashlib.md5(f.read()).hexdigest()
|
||||
# I'm going to save first so that in case the file move
|
||||
# fails, the database is rolled back.
|
||||
# We also don't use save() since that triggers the filehandling
|
||||
# logic, and we don't want that yet (file not yet in place)
|
||||
document.archive_filename = generate_unique_filename(
|
||||
document,
|
||||
archive_filename=True,
|
||||
)
|
||||
Document.objects.filter(pk=document.pk).update(
|
||||
archive_checksum=checksum,
|
||||
content=parser.get_text(),
|
||||
archive_filename=document.archive_filename,
|
||||
)
|
||||
with FileLock(settings.MEDIA_LOCK):
|
||||
create_source_path_directory(document.archive_path)
|
||||
shutil.move(parser.get_archive_path(), document.archive_path)
|
||||
shutil.move(thumbnail, document.thumbnail_path)
|
||||
|
||||
shutil.copy(doc.source_path, temp_file)
|
||||
with index.open_index_writer() as writer:
|
||||
index.update_document(writer, document)
|
||||
|
||||
try:
|
||||
logger.info(
|
||||
f"Using {type(document_parser).__name__} for document",
|
||||
)
|
||||
# Try to re-parse the document into text
|
||||
document_parser.parse(str(temp_file), doc.mime_type)
|
||||
|
||||
doc.content = document_parser.get_text()
|
||||
doc.save()
|
||||
logger.info("Document OCR updated")
|
||||
|
||||
except ParseError as e:
|
||||
logger.error(f"Error parsing document: {e}")
|
||||
finally:
|
||||
# Remove the file path if it was created
|
||||
if temp_file.exists() and temp_file.is_file():
|
||||
temp_file.unlink()
|
||||
except Exception:
|
||||
logger.exception(
|
||||
f"Error while parsing document {document} " f"(ID: {document_id})",
|
||||
)
|
||||
finally:
|
||||
parser.cleanup()
|
||||
|
BIN
src/documents/tests/samples/barcodes/barcode-fax-image.pdf
Normal file
BIN
src/documents/tests/samples/barcodes/barcode-fax-image.pdf
Normal file
Binary file not shown.
BIN
src/documents/tests/samples/barcodes/patch-code-t-double.pdf
Normal file
BIN
src/documents/tests/samples/barcodes/patch-code-t-double.pdf
Normal file
Binary file not shown.
@@ -10,6 +10,8 @@ import zipfile
|
||||
from unittest import mock
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
import celery
|
||||
|
||||
try:
|
||||
import zoneinfo
|
||||
except ImportError:
|
||||
@@ -20,7 +22,6 @@ from django.conf import settings
|
||||
from django.contrib.auth.models import User
|
||||
from django.test import override_settings
|
||||
from django.utils import timezone
|
||||
from django_q.models import Task
|
||||
from documents import bulk_edit
|
||||
from documents import index
|
||||
from documents.models import Correspondent
|
||||
@@ -31,7 +32,8 @@ from documents.models import PaperlessTask
|
||||
from documents.models import SavedView
|
||||
from documents.models import StoragePath
|
||||
from documents.models import Tag
|
||||
from documents.models import UiSettings
|
||||
from django_celery_results.models import TaskResult
|
||||
from documents.models import Comment
|
||||
from documents.models import StoragePath
|
||||
from documents.tests.utils import DirectoriesMixin
|
||||
from paperless import version
|
||||
@@ -789,7 +791,7 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
|
||||
self.assertEqual(response.status_code, 200)
|
||||
self.assertEqual(response.data["documents_inbox"], None)
|
||||
|
||||
@mock.patch("documents.views.async_task")
|
||||
@mock.patch("documents.views.consume_file.delay")
|
||||
def test_upload(self, m):
|
||||
|
||||
with open(
|
||||
@@ -812,7 +814,7 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
|
||||
self.assertIsNone(kwargs["override_document_type_id"])
|
||||
self.assertIsNone(kwargs["override_tag_ids"])
|
||||
|
||||
@mock.patch("documents.views.async_task")
|
||||
@mock.patch("documents.views.consume_file.delay")
|
||||
def test_upload_empty_metadata(self, m):
|
||||
|
||||
with open(
|
||||
@@ -835,7 +837,7 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
|
||||
self.assertIsNone(kwargs["override_document_type_id"])
|
||||
self.assertIsNone(kwargs["override_tag_ids"])
|
||||
|
||||
@mock.patch("documents.views.async_task")
|
||||
@mock.patch("documents.views.consume_file.delay")
|
||||
def test_upload_invalid_form(self, m):
|
||||
|
||||
with open(
|
||||
@@ -849,7 +851,7 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
|
||||
self.assertEqual(response.status_code, 400)
|
||||
m.assert_not_called()
|
||||
|
||||
@mock.patch("documents.views.async_task")
|
||||
@mock.patch("documents.views.consume_file.delay")
|
||||
def test_upload_invalid_file(self, m):
|
||||
|
||||
with open(
|
||||
@@ -863,7 +865,7 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
|
||||
self.assertEqual(response.status_code, 400)
|
||||
m.assert_not_called()
|
||||
|
||||
@mock.patch("documents.views.async_task")
|
||||
@mock.patch("documents.views.consume_file.delay")
|
||||
def test_upload_with_title(self, async_task):
|
||||
with open(
|
||||
os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"),
|
||||
@@ -881,7 +883,7 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
|
||||
|
||||
self.assertEqual(kwargs["override_title"], "my custom title")
|
||||
|
||||
@mock.patch("documents.views.async_task")
|
||||
@mock.patch("documents.views.consume_file.delay")
|
||||
def test_upload_with_correspondent(self, async_task):
|
||||
c = Correspondent.objects.create(name="test-corres")
|
||||
with open(
|
||||
@@ -900,7 +902,7 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
|
||||
|
||||
self.assertEqual(kwargs["override_correspondent_id"], c.id)
|
||||
|
||||
@mock.patch("documents.views.async_task")
|
||||
@mock.patch("documents.views.consume_file.delay")
|
||||
def test_upload_with_invalid_correspondent(self, async_task):
|
||||
with open(
|
||||
os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"),
|
||||
@@ -914,7 +916,7 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
|
||||
|
||||
async_task.assert_not_called()
|
||||
|
||||
@mock.patch("documents.views.async_task")
|
||||
@mock.patch("documents.views.consume_file.delay")
|
||||
def test_upload_with_document_type(self, async_task):
|
||||
dt = DocumentType.objects.create(name="invoice")
|
||||
with open(
|
||||
@@ -933,7 +935,7 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
|
||||
|
||||
self.assertEqual(kwargs["override_document_type_id"], dt.id)
|
||||
|
||||
@mock.patch("documents.views.async_task")
|
||||
@mock.patch("documents.views.consume_file.delay")
|
||||
def test_upload_with_invalid_document_type(self, async_task):
|
||||
with open(
|
||||
os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"),
|
||||
@@ -947,7 +949,7 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
|
||||
|
||||
async_task.assert_not_called()
|
||||
|
||||
@mock.patch("documents.views.async_task")
|
||||
@mock.patch("documents.views.consume_file.delay")
|
||||
def test_upload_with_tags(self, async_task):
|
||||
t1 = Tag.objects.create(name="tag1")
|
||||
t2 = Tag.objects.create(name="tag2")
|
||||
@@ -967,7 +969,7 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
|
||||
|
||||
self.assertCountEqual(kwargs["override_tag_ids"], [t1.id, t2.id])
|
||||
|
||||
@mock.patch("documents.views.async_task")
|
||||
@mock.patch("documents.views.consume_file.delay")
|
||||
def test_upload_with_invalid_tags(self, async_task):
|
||||
t1 = Tag.objects.create(name="tag1")
|
||||
t2 = Tag.objects.create(name="tag2")
|
||||
@@ -983,7 +985,7 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
|
||||
|
||||
async_task.assert_not_called()
|
||||
|
||||
@mock.patch("documents.views.async_task")
|
||||
@mock.patch("documents.views.consume_file.delay")
|
||||
def test_upload_with_created(self, async_task):
|
||||
created = datetime.datetime(
|
||||
2022,
|
||||
@@ -1107,6 +1109,7 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
|
||||
"tags": [],
|
||||
"document_types": [],
|
||||
"storage_paths": [],
|
||||
"dates": [],
|
||||
},
|
||||
)
|
||||
|
||||
@@ -1118,6 +1121,7 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
|
||||
@mock.patch("documents.views.match_document_types")
|
||||
@mock.patch("documents.views.match_tags")
|
||||
@mock.patch("documents.views.match_correspondents")
|
||||
@override_settings(NUMBER_OF_SUGGESTED_DATES=10)
|
||||
def test_get_suggestions(
|
||||
self,
|
||||
match_correspondents,
|
||||
@@ -1128,7 +1132,7 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
|
||||
doc = Document.objects.create(
|
||||
title="test",
|
||||
mime_type="application/pdf",
|
||||
content="this is an invoice!",
|
||||
content="this is an invoice from 12.04.2022!",
|
||||
)
|
||||
|
||||
match_correspondents.return_value = [Correspondent(id=88), Correspondent(id=2)]
|
||||
@@ -1144,6 +1148,7 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
|
||||
"tags": [56, 123],
|
||||
"document_types": [23],
|
||||
"storage_paths": [99, 77],
|
||||
"dates": ["2022-04-12"],
|
||||
},
|
||||
)
|
||||
|
||||
@@ -1354,6 +1359,133 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
|
||||
1,
|
||||
)
|
||||
|
||||
def test_get_existing_comments(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- A document with a single comment
|
||||
WHEN:
|
||||
- API reuqest for document comments is made
|
||||
THEN:
|
||||
- The associated comment is returned
|
||||
"""
|
||||
doc = Document.objects.create(
|
||||
title="test",
|
||||
mime_type="application/pdf",
|
||||
content="this is a document which will have comments!",
|
||||
)
|
||||
comment = Comment.objects.create(
|
||||
comment="This is a comment.",
|
||||
document=doc,
|
||||
user=self.user,
|
||||
)
|
||||
|
||||
response = self.client.get(
|
||||
f"/api/documents/{doc.pk}/comments/",
|
||||
format="json",
|
||||
)
|
||||
|
||||
self.assertEqual(response.status_code, 200)
|
||||
|
||||
resp_data = response.json()
|
||||
|
||||
self.assertEqual(len(resp_data), 1)
|
||||
|
||||
resp_data = resp_data[0]
|
||||
del resp_data["created"]
|
||||
|
||||
self.assertDictEqual(
|
||||
resp_data,
|
||||
{
|
||||
"id": comment.id,
|
||||
"comment": comment.comment,
|
||||
"user": {
|
||||
"id": comment.user.id,
|
||||
"username": comment.user.username,
|
||||
"firstname": comment.user.first_name,
|
||||
"lastname": comment.user.last_name,
|
||||
},
|
||||
},
|
||||
)
|
||||
|
||||
def test_create_comment(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- Existing document
|
||||
WHEN:
|
||||
- API request is made to add a comment
|
||||
THEN:
|
||||
- Comment is created and associated with document
|
||||
"""
|
||||
doc = Document.objects.create(
|
||||
title="test",
|
||||
mime_type="application/pdf",
|
||||
content="this is a document which will have comments added",
|
||||
)
|
||||
resp = self.client.post(
|
||||
f"/api/documents/{doc.pk}/comments/",
|
||||
data={"comment": "this is a posted comment"},
|
||||
)
|
||||
self.assertEqual(resp.status_code, 200)
|
||||
|
||||
response = self.client.get(
|
||||
f"/api/documents/{doc.pk}/comments/",
|
||||
format="json",
|
||||
)
|
||||
|
||||
self.assertEqual(response.status_code, 200)
|
||||
|
||||
resp_data = response.json()
|
||||
|
||||
self.assertEqual(len(resp_data), 1)
|
||||
|
||||
resp_data = resp_data[0]
|
||||
|
||||
self.assertEqual(resp_data["comment"], "this is a posted comment")
|
||||
|
||||
def test_delete_comment(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- Existing document
|
||||
WHEN:
|
||||
- API request is made to add a comment
|
||||
THEN:
|
||||
- Comment is created and associated with document
|
||||
"""
|
||||
doc = Document.objects.create(
|
||||
title="test",
|
||||
mime_type="application/pdf",
|
||||
content="this is a document which will have comments!",
|
||||
)
|
||||
comment = Comment.objects.create(
|
||||
comment="This is a comment.",
|
||||
document=doc,
|
||||
user=self.user,
|
||||
)
|
||||
|
||||
response = self.client.delete(
|
||||
f"/api/documents/{doc.pk}/comments/?id={comment.pk}",
|
||||
format="json",
|
||||
)
|
||||
|
||||
self.assertEqual(response.status_code, 200)
|
||||
|
||||
self.assertEqual(len(Comment.objects.all()), 0)
|
||||
|
||||
def test_get_comments_no_doc(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- A request to get comments from a non-existent document
|
||||
WHEN:
|
||||
- API request for document comments is made
|
||||
THEN:
|
||||
- HTTP 404 is returned
|
||||
"""
|
||||
response = self.client.get(
|
||||
"/api/documents/500/comments/",
|
||||
format="json",
|
||||
)
|
||||
self.assertEqual(response.status_code, 404)
|
||||
|
||||
|
||||
class TestDocumentApiV2(DirectoriesMixin, APITestCase):
|
||||
def setUp(self):
|
||||
@@ -1450,7 +1582,11 @@ class TestApiUiSettings(DirectoriesMixin, APITestCase):
|
||||
self.assertEqual(response.status_code, 200)
|
||||
self.assertDictEqual(
|
||||
response.data["settings"],
|
||||
{},
|
||||
{
|
||||
"update_checking": {
|
||||
"backend_setting": "default",
|
||||
},
|
||||
},
|
||||
)
|
||||
|
||||
def test_api_set_ui_settings(self):
|
||||
@@ -1484,7 +1620,7 @@ class TestBulkEdit(DirectoriesMixin, APITestCase):
|
||||
user = User.objects.create_superuser(username="temp_admin")
|
||||
self.client.force_authenticate(user=user)
|
||||
|
||||
patcher = mock.patch("documents.bulk_edit.async_task")
|
||||
patcher = mock.patch("documents.bulk_edit.bulk_update_documents.delay")
|
||||
self.async_task = patcher.start()
|
||||
self.addCleanup(patcher.stop)
|
||||
self.c1 = Correspondent.objects.create(name="c1")
|
||||
@@ -2411,38 +2547,6 @@ class TestApiRemoteVersion(DirectoriesMixin, APITestCase):
|
||||
def setUp(self):
|
||||
super().setUp()
|
||||
|
||||
def test_remote_version_default(self):
|
||||
response = self.client.get(self.ENDPOINT)
|
||||
|
||||
self.assertEqual(response.status_code, 200)
|
||||
self.assertDictEqual(
|
||||
response.data,
|
||||
{
|
||||
"version": "0.0.0",
|
||||
"update_available": False,
|
||||
"feature_is_set": False,
|
||||
},
|
||||
)
|
||||
|
||||
@override_settings(
|
||||
ENABLE_UPDATE_CHECK=False,
|
||||
)
|
||||
def test_remote_version_disabled(self):
|
||||
response = self.client.get(self.ENDPOINT)
|
||||
|
||||
self.assertEqual(response.status_code, 200)
|
||||
self.assertDictEqual(
|
||||
response.data,
|
||||
{
|
||||
"version": "0.0.0",
|
||||
"update_available": False,
|
||||
"feature_is_set": True,
|
||||
},
|
||||
)
|
||||
|
||||
@override_settings(
|
||||
ENABLE_UPDATE_CHECK=True,
|
||||
)
|
||||
@mock.patch("urllib.request.urlopen")
|
||||
def test_remote_version_enabled_no_update_prefix(self, urlopen_mock):
|
||||
|
||||
@@ -2460,13 +2564,9 @@ class TestApiRemoteVersion(DirectoriesMixin, APITestCase):
|
||||
{
|
||||
"version": "1.6.0",
|
||||
"update_available": False,
|
||||
"feature_is_set": True,
|
||||
},
|
||||
)
|
||||
|
||||
@override_settings(
|
||||
ENABLE_UPDATE_CHECK=True,
|
||||
)
|
||||
@mock.patch("urllib.request.urlopen")
|
||||
def test_remote_version_enabled_no_update_no_prefix(self, urlopen_mock):
|
||||
|
||||
@@ -2486,13 +2586,9 @@ class TestApiRemoteVersion(DirectoriesMixin, APITestCase):
|
||||
{
|
||||
"version": version.__full_version_str__,
|
||||
"update_available": False,
|
||||
"feature_is_set": True,
|
||||
},
|
||||
)
|
||||
|
||||
@override_settings(
|
||||
ENABLE_UPDATE_CHECK=True,
|
||||
)
|
||||
@mock.patch("urllib.request.urlopen")
|
||||
def test_remote_version_enabled_update(self, urlopen_mock):
|
||||
|
||||
@@ -2519,13 +2615,9 @@ class TestApiRemoteVersion(DirectoriesMixin, APITestCase):
|
||||
{
|
||||
"version": new_version_str,
|
||||
"update_available": True,
|
||||
"feature_is_set": True,
|
||||
},
|
||||
)
|
||||
|
||||
@override_settings(
|
||||
ENABLE_UPDATE_CHECK=True,
|
||||
)
|
||||
@mock.patch("urllib.request.urlopen")
|
||||
def test_remote_version_bad_json(self, urlopen_mock):
|
||||
|
||||
@@ -2543,13 +2635,9 @@ class TestApiRemoteVersion(DirectoriesMixin, APITestCase):
|
||||
{
|
||||
"version": "0.0.0",
|
||||
"update_available": False,
|
||||
"feature_is_set": True,
|
||||
},
|
||||
)
|
||||
|
||||
@override_settings(
|
||||
ENABLE_UPDATE_CHECK=True,
|
||||
)
|
||||
@mock.patch("urllib.request.urlopen")
|
||||
def test_remote_version_exception(self, urlopen_mock):
|
||||
|
||||
@@ -2567,7 +2655,6 @@ class TestApiRemoteVersion(DirectoriesMixin, APITestCase):
|
||||
{
|
||||
"version": "0.0.0",
|
||||
"update_available": False,
|
||||
"feature_is_set": True,
|
||||
},
|
||||
)
|
||||
|
||||
@@ -2652,7 +2739,7 @@ class TestApiStoragePaths(DirectoriesMixin, APITestCase):
|
||||
|
||||
class TestTasks(APITestCase):
|
||||
ENDPOINT = "/api/tasks/"
|
||||
ENDPOINT_ACKOWLEDGE = "/api/acknowledge_tasks/"
|
||||
ENDPOINT_ACKNOWLEDGE = "/api/acknowledge_tasks/"
|
||||
|
||||
def setUp(self):
|
||||
super().setUp()
|
||||
@@ -2661,16 +2748,27 @@ class TestTasks(APITestCase):
|
||||
self.client.force_authenticate(user=self.user)
|
||||
|
||||
def test_get_tasks(self):
|
||||
task_id1 = str(uuid.uuid4())
|
||||
PaperlessTask.objects.create(task_id=task_id1)
|
||||
Task.objects.create(
|
||||
id=task_id1,
|
||||
started=timezone.now() - datetime.timedelta(seconds=30),
|
||||
stopped=timezone.now(),
|
||||
func="documents.tasks.consume_file",
|
||||
"""
|
||||
GIVEN:
|
||||
- Attempted celery tasks
|
||||
WHEN:
|
||||
- API call is made to get tasks
|
||||
THEN:
|
||||
- Attempting and pending tasks are serialized and provided
|
||||
"""
|
||||
result1 = TaskResult.objects.create(
|
||||
task_id=str(uuid.uuid4()),
|
||||
task_name="documents.tasks.some_great_task",
|
||||
status=celery.states.PENDING,
|
||||
)
|
||||
task_id2 = str(uuid.uuid4())
|
||||
PaperlessTask.objects.create(task_id=task_id2)
|
||||
PaperlessTask.objects.create(attempted_task=result1)
|
||||
|
||||
result2 = TaskResult.objects.create(
|
||||
task_id=str(uuid.uuid4()),
|
||||
task_name="documents.tasks.some_awesome_task",
|
||||
status=celery.states.STARTED,
|
||||
)
|
||||
PaperlessTask.objects.create(attempted_task=result2)
|
||||
|
||||
response = self.client.get(self.ENDPOINT)
|
||||
|
||||
@@ -2678,25 +2776,155 @@ class TestTasks(APITestCase):
|
||||
self.assertEqual(len(response.data), 2)
|
||||
returned_task1 = response.data[1]
|
||||
returned_task2 = response.data[0]
|
||||
self.assertEqual(returned_task1["task_id"], task_id1)
|
||||
self.assertEqual(returned_task1["status"], "complete")
|
||||
self.assertIsNotNone(returned_task1["attempted_task"])
|
||||
self.assertEqual(returned_task2["task_id"], task_id2)
|
||||
self.assertEqual(returned_task2["status"], "queued")
|
||||
self.assertIsNone(returned_task2["attempted_task"])
|
||||
|
||||
self.assertEqual(returned_task1["task_id"], result1.task_id)
|
||||
self.assertEqual(returned_task1["status"], celery.states.PENDING)
|
||||
self.assertEqual(returned_task1["task_name"], result1.task_name)
|
||||
|
||||
self.assertEqual(returned_task2["task_id"], result2.task_id)
|
||||
self.assertEqual(returned_task2["status"], celery.states.STARTED)
|
||||
self.assertEqual(returned_task2["task_name"], result2.task_name)
|
||||
|
||||
def test_acknowledge_tasks(self):
|
||||
task_id = str(uuid.uuid4())
|
||||
task = PaperlessTask.objects.create(task_id=task_id)
|
||||
"""
|
||||
GIVEN:
|
||||
- Attempted celery tasks
|
||||
WHEN:
|
||||
- API call is made to get mark task as acknowledged
|
||||
THEN:
|
||||
- Task is marked as acknowledged
|
||||
"""
|
||||
result1 = TaskResult.objects.create(
|
||||
task_id=str(uuid.uuid4()),
|
||||
task_name="documents.tasks.some_task",
|
||||
status=celery.states.PENDING,
|
||||
)
|
||||
task = PaperlessTask.objects.create(attempted_task=result1)
|
||||
|
||||
response = self.client.get(self.ENDPOINT)
|
||||
self.assertEqual(len(response.data), 1)
|
||||
|
||||
response = self.client.post(
|
||||
self.ENDPOINT_ACKOWLEDGE,
|
||||
self.ENDPOINT_ACKNOWLEDGE,
|
||||
{"tasks": [task.id]},
|
||||
)
|
||||
self.assertEqual(response.status_code, 200)
|
||||
|
||||
response = self.client.get(self.ENDPOINT)
|
||||
self.assertEqual(len(response.data), 0)
|
||||
|
||||
def test_task_result_no_error(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- A celery task completed without error
|
||||
WHEN:
|
||||
- API call is made to get tasks
|
||||
THEN:
|
||||
- The returned data includes the task result
|
||||
"""
|
||||
result1 = TaskResult.objects.create(
|
||||
task_id=str(uuid.uuid4()),
|
||||
task_name="documents.tasks.some_task",
|
||||
status=celery.states.SUCCESS,
|
||||
result="Success. New document id 1 created",
|
||||
)
|
||||
_ = PaperlessTask.objects.create(attempted_task=result1)
|
||||
|
||||
response = self.client.get(self.ENDPOINT)
|
||||
|
||||
self.assertEqual(response.status_code, 200)
|
||||
self.assertEqual(len(response.data), 1)
|
||||
|
||||
returned_data = response.data[0]
|
||||
|
||||
self.assertEqual(returned_data["result"], "Success. New document id 1 created")
|
||||
self.assertEqual(returned_data["related_document"], "1")
|
||||
|
||||
def test_task_result_with_error(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- A celery task completed with an exception
|
||||
WHEN:
|
||||
- API call is made to get tasks
|
||||
THEN:
|
||||
- The returned result is the exception info
|
||||
"""
|
||||
result1 = TaskResult.objects.create(
|
||||
task_id=str(uuid.uuid4()),
|
||||
task_name="documents.tasks.some_task",
|
||||
status=celery.states.SUCCESS,
|
||||
result={
|
||||
"exc_type": "ConsumerError",
|
||||
"exc_message": ["test.pdf: Not consuming test.pdf: It is a duplicate."],
|
||||
"exc_module": "documents.consumer",
|
||||
},
|
||||
)
|
||||
_ = PaperlessTask.objects.create(attempted_task=result1)
|
||||
|
||||
response = self.client.get(self.ENDPOINT)
|
||||
|
||||
self.assertEqual(response.status_code, 200)
|
||||
self.assertEqual(len(response.data), 1)
|
||||
|
||||
returned_data = response.data[0]
|
||||
|
||||
self.assertEqual(
|
||||
returned_data["result"],
|
||||
"test.pdf: Not consuming test.pdf: It is a duplicate.",
|
||||
)
|
||||
|
||||
def test_task_name_webui(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- Attempted celery task
|
||||
- Task was created through the webui
|
||||
WHEN:
|
||||
- API call is made to get tasks
|
||||
THEN:
|
||||
- Returned data include the filename
|
||||
"""
|
||||
result1 = TaskResult.objects.create(
|
||||
task_id=str(uuid.uuid4()),
|
||||
task_name="documents.tasks.some_task",
|
||||
status=celery.states.SUCCESS,
|
||||
task_args="\"('/tmp/paperless/paperless-upload-5iq7skzc',)\"",
|
||||
task_kwargs="\"{'override_filename': 'test.pdf', 'override_title': None, 'override_correspondent_id': None, 'override_document_type_id': None, 'override_tag_ids': None, 'task_id': '466e8fe7-7193-4698-9fff-72f0340e2082', 'override_created': None}\"",
|
||||
)
|
||||
_ = PaperlessTask.objects.create(attempted_task=result1)
|
||||
|
||||
response = self.client.get(self.ENDPOINT)
|
||||
|
||||
self.assertEqual(response.status_code, 200)
|
||||
self.assertEqual(len(response.data), 1)
|
||||
|
||||
returned_data = response.data[0]
|
||||
|
||||
self.assertEqual(returned_data["name"], "test.pdf")
|
||||
|
||||
def test_task_name_consume_folder(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- Attempted celery task
|
||||
- Task was created through the consume folder
|
||||
WHEN:
|
||||
- API call is made to get tasks
|
||||
THEN:
|
||||
- Returned data include the filename
|
||||
"""
|
||||
result1 = TaskResult.objects.create(
|
||||
task_id=str(uuid.uuid4()),
|
||||
task_name="documents.tasks.some_task",
|
||||
status=celery.states.SUCCESS,
|
||||
task_args="\"('/consume/anothertest.pdf',)\"",
|
||||
task_kwargs="\"{'override_tag_ids': None}\"",
|
||||
)
|
||||
_ = PaperlessTask.objects.create(attempted_task=result1)
|
||||
|
||||
response = self.client.get(self.ENDPOINT)
|
||||
|
||||
self.assertEqual(response.status_code, 200)
|
||||
self.assertEqual(len(response.data), 1)
|
||||
|
||||
returned_data = response.data[0]
|
||||
|
||||
self.assertEqual(returned_data["name"], "anothertest.pdf")
|
||||
|
@@ -3,6 +3,7 @@ import shutil
|
||||
import tempfile
|
||||
from unittest import mock
|
||||
|
||||
import pikepdf
|
||||
from django.conf import settings
|
||||
from django.test import override_settings
|
||||
from django.test import TestCase
|
||||
@@ -13,22 +14,23 @@ from PIL import Image
|
||||
|
||||
|
||||
class TestBarcode(DirectoriesMixin, TestCase):
|
||||
|
||||
SAMPLE_DIR = os.path.join(
|
||||
os.path.dirname(__file__),
|
||||
"samples",
|
||||
)
|
||||
|
||||
BARCODE_SAMPLE_DIR = os.path.join(SAMPLE_DIR, "barcodes")
|
||||
|
||||
def test_barcode_reader(self):
|
||||
test_file = os.path.join(
|
||||
os.path.dirname(__file__),
|
||||
"samples",
|
||||
"barcodes",
|
||||
"barcode-39-PATCHT.png",
|
||||
)
|
||||
test_file = os.path.join(self.BARCODE_SAMPLE_DIR, "barcode-39-PATCHT.png")
|
||||
img = Image.open(test_file)
|
||||
separator_barcode = str(settings.CONSUMER_BARCODE_STRING)
|
||||
self.assertEqual(barcodes.barcode_reader(img), [separator_barcode])
|
||||
|
||||
def test_barcode_reader2(self):
|
||||
test_file = os.path.join(
|
||||
os.path.dirname(__file__),
|
||||
"samples",
|
||||
"barcodes",
|
||||
self.BARCODE_SAMPLE_DIR,
|
||||
"patch-code-t.pbm",
|
||||
)
|
||||
img = Image.open(test_file)
|
||||
@@ -37,9 +39,7 @@ class TestBarcode(DirectoriesMixin, TestCase):
|
||||
|
||||
def test_barcode_reader_distorsion(self):
|
||||
test_file = os.path.join(
|
||||
os.path.dirname(__file__),
|
||||
"samples",
|
||||
"barcodes",
|
||||
self.BARCODE_SAMPLE_DIR,
|
||||
"barcode-39-PATCHT-distorsion.png",
|
||||
)
|
||||
img = Image.open(test_file)
|
||||
@@ -48,9 +48,7 @@ class TestBarcode(DirectoriesMixin, TestCase):
|
||||
|
||||
def test_barcode_reader_distorsion2(self):
|
||||
test_file = os.path.join(
|
||||
os.path.dirname(__file__),
|
||||
"samples",
|
||||
"barcodes",
|
||||
self.BARCODE_SAMPLE_DIR,
|
||||
"barcode-39-PATCHT-distorsion2.png",
|
||||
)
|
||||
img = Image.open(test_file)
|
||||
@@ -59,9 +57,7 @@ class TestBarcode(DirectoriesMixin, TestCase):
|
||||
|
||||
def test_barcode_reader_unreadable(self):
|
||||
test_file = os.path.join(
|
||||
os.path.dirname(__file__),
|
||||
"samples",
|
||||
"barcodes",
|
||||
self.BARCODE_SAMPLE_DIR,
|
||||
"barcode-39-PATCHT-unreadable.png",
|
||||
)
|
||||
img = Image.open(test_file)
|
||||
@@ -69,9 +65,7 @@ class TestBarcode(DirectoriesMixin, TestCase):
|
||||
|
||||
def test_barcode_reader_qr(self):
|
||||
test_file = os.path.join(
|
||||
os.path.dirname(__file__),
|
||||
"samples",
|
||||
"barcodes",
|
||||
self.BARCODE_SAMPLE_DIR,
|
||||
"qr-code-PATCHT.png",
|
||||
)
|
||||
img = Image.open(test_file)
|
||||
@@ -80,9 +74,7 @@ class TestBarcode(DirectoriesMixin, TestCase):
|
||||
|
||||
def test_barcode_reader_128(self):
|
||||
test_file = os.path.join(
|
||||
os.path.dirname(__file__),
|
||||
"samples",
|
||||
"barcodes",
|
||||
self.BARCODE_SAMPLE_DIR,
|
||||
"barcode-128-PATCHT.png",
|
||||
)
|
||||
img = Image.open(test_file)
|
||||
@@ -90,15 +82,13 @@ class TestBarcode(DirectoriesMixin, TestCase):
|
||||
self.assertEqual(barcodes.barcode_reader(img), [separator_barcode])
|
||||
|
||||
def test_barcode_reader_no_barcode(self):
|
||||
test_file = os.path.join(os.path.dirname(__file__), "samples", "simple.png")
|
||||
test_file = os.path.join(self.SAMPLE_DIR, "simple.png")
|
||||
img = Image.open(test_file)
|
||||
self.assertEqual(barcodes.barcode_reader(img), [])
|
||||
|
||||
def test_barcode_reader_custom_separator(self):
|
||||
test_file = os.path.join(
|
||||
os.path.dirname(__file__),
|
||||
"samples",
|
||||
"barcodes",
|
||||
self.BARCODE_SAMPLE_DIR,
|
||||
"barcode-39-custom.png",
|
||||
)
|
||||
img = Image.open(test_file)
|
||||
@@ -106,9 +96,7 @@ class TestBarcode(DirectoriesMixin, TestCase):
|
||||
|
||||
def test_barcode_reader_custom_qr_separator(self):
|
||||
test_file = os.path.join(
|
||||
os.path.dirname(__file__),
|
||||
"samples",
|
||||
"barcodes",
|
||||
self.BARCODE_SAMPLE_DIR,
|
||||
"barcode-qr-custom.png",
|
||||
)
|
||||
img = Image.open(test_file)
|
||||
@@ -116,9 +104,7 @@ class TestBarcode(DirectoriesMixin, TestCase):
|
||||
|
||||
def test_barcode_reader_custom_128_separator(self):
|
||||
test_file = os.path.join(
|
||||
os.path.dirname(__file__),
|
||||
"samples",
|
||||
"barcodes",
|
||||
self.BARCODE_SAMPLE_DIR,
|
||||
"barcode-128-custom.png",
|
||||
)
|
||||
img = Image.open(test_file)
|
||||
@@ -126,19 +112,15 @@ class TestBarcode(DirectoriesMixin, TestCase):
|
||||
|
||||
def test_get_mime_type(self):
|
||||
tiff_file = os.path.join(
|
||||
os.path.dirname(__file__),
|
||||
"samples",
|
||||
self.SAMPLE_DIR,
|
||||
"simple.tiff",
|
||||
)
|
||||
pdf_file = os.path.join(
|
||||
os.path.dirname(__file__),
|
||||
"samples",
|
||||
self.SAMPLE_DIR,
|
||||
"simple.pdf",
|
||||
)
|
||||
png_file = os.path.join(
|
||||
os.path.dirname(__file__),
|
||||
"samples",
|
||||
"barcodes",
|
||||
self.BARCODE_SAMPLE_DIR,
|
||||
"barcode-128-custom.png",
|
||||
)
|
||||
tiff_file_no_extension = os.path.join(settings.SCRATCH_DIR, "testfile1")
|
||||
@@ -173,8 +155,7 @@ class TestBarcode(DirectoriesMixin, TestCase):
|
||||
|
||||
def test_convert_error_from_pdf_to_pdf(self):
|
||||
test_file = os.path.join(
|
||||
os.path.dirname(__file__),
|
||||
"samples",
|
||||
self.SAMPLE_DIR,
|
||||
"simple.pdf",
|
||||
)
|
||||
dst = os.path.join(settings.SCRATCH_DIR, "simple.pdf")
|
||||
@@ -183,117 +164,235 @@ class TestBarcode(DirectoriesMixin, TestCase):
|
||||
|
||||
def test_scan_file_for_separating_barcodes(self):
|
||||
test_file = os.path.join(
|
||||
os.path.dirname(__file__),
|
||||
"samples",
|
||||
"barcodes",
|
||||
self.BARCODE_SAMPLE_DIR,
|
||||
"patch-code-t.pdf",
|
||||
)
|
||||
pages = barcodes.scan_file_for_separating_barcodes(test_file)
|
||||
self.assertEqual(pages, [0])
|
||||
pdf_file, separator_page_numbers = barcodes.scan_file_for_separating_barcodes(
|
||||
test_file,
|
||||
)
|
||||
|
||||
self.assertEqual(pdf_file, test_file)
|
||||
self.assertListEqual(separator_page_numbers, [0])
|
||||
|
||||
def test_scan_file_for_separating_barcodes2(self):
|
||||
test_file = os.path.join(os.path.dirname(__file__), "samples", "simple.pdf")
|
||||
pages = barcodes.scan_file_for_separating_barcodes(test_file)
|
||||
self.assertEqual(pages, [])
|
||||
test_file = os.path.join(self.SAMPLE_DIR, "simple.pdf")
|
||||
pdf_file, separator_page_numbers = barcodes.scan_file_for_separating_barcodes(
|
||||
test_file,
|
||||
)
|
||||
|
||||
self.assertEqual(pdf_file, test_file)
|
||||
self.assertListEqual(separator_page_numbers, [])
|
||||
|
||||
def test_scan_file_for_separating_barcodes3(self):
|
||||
test_file = os.path.join(
|
||||
os.path.dirname(__file__),
|
||||
"samples",
|
||||
"barcodes",
|
||||
self.BARCODE_SAMPLE_DIR,
|
||||
"patch-code-t-middle.pdf",
|
||||
)
|
||||
pages = barcodes.scan_file_for_separating_barcodes(test_file)
|
||||
self.assertEqual(pages, [1])
|
||||
pdf_file, separator_page_numbers = barcodes.scan_file_for_separating_barcodes(
|
||||
test_file,
|
||||
)
|
||||
|
||||
self.assertEqual(pdf_file, test_file)
|
||||
self.assertListEqual(separator_page_numbers, [1])
|
||||
|
||||
def test_scan_file_for_separating_barcodes4(self):
|
||||
test_file = os.path.join(
|
||||
os.path.dirname(__file__),
|
||||
"samples",
|
||||
"barcodes",
|
||||
self.BARCODE_SAMPLE_DIR,
|
||||
"several-patcht-codes.pdf",
|
||||
)
|
||||
pages = barcodes.scan_file_for_separating_barcodes(test_file)
|
||||
self.assertEqual(pages, [2, 5])
|
||||
pdf_file, separator_page_numbers = barcodes.scan_file_for_separating_barcodes(
|
||||
test_file,
|
||||
)
|
||||
|
||||
self.assertEqual(pdf_file, test_file)
|
||||
self.assertListEqual(separator_page_numbers, [2, 5])
|
||||
|
||||
def test_scan_file_for_separating_barcodes_upsidedown(self):
|
||||
test_file = os.path.join(
|
||||
os.path.dirname(__file__),
|
||||
"samples",
|
||||
"barcodes",
|
||||
self.BARCODE_SAMPLE_DIR,
|
||||
"patch-code-t-middle_reverse.pdf",
|
||||
)
|
||||
pages = barcodes.scan_file_for_separating_barcodes(test_file)
|
||||
self.assertEqual(pages, [1])
|
||||
pdf_file, separator_page_numbers = barcodes.scan_file_for_separating_barcodes(
|
||||
test_file,
|
||||
)
|
||||
|
||||
self.assertEqual(pdf_file, test_file)
|
||||
self.assertListEqual(separator_page_numbers, [1])
|
||||
|
||||
def test_scan_file_for_separating_barcodes_pillow_transcode_error(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- A PDF containing an image which cannot be transcoded to a PIL image
|
||||
WHEN:
|
||||
- The image tries to be transcoded to a PIL image, but fails
|
||||
THEN:
|
||||
- The barcode reader is still called
|
||||
"""
|
||||
|
||||
def _build_device_n_pdf(self, save_path: str):
|
||||
# Based on the pikepdf tests
|
||||
# https://github.com/pikepdf/pikepdf/blob/abb35ebe17d579d76abe08265e00cf8890a12a95/tests/test_image_access.py
|
||||
pdf = pikepdf.new()
|
||||
pdf.add_blank_page(page_size=(72, 72))
|
||||
imobj = pikepdf.Stream(
|
||||
pdf,
|
||||
bytes(range(0, 256)),
|
||||
BitsPerComponent=8,
|
||||
ColorSpace=pikepdf.Array(
|
||||
[
|
||||
pikepdf.Name.DeviceN,
|
||||
pikepdf.Array([pikepdf.Name.Black]),
|
||||
pikepdf.Name.DeviceCMYK,
|
||||
pikepdf.Stream(
|
||||
pdf,
|
||||
b"{0 0 0 4 -1 roll}", # Colorspace conversion function
|
||||
FunctionType=4,
|
||||
Domain=[0.0, 1.0],
|
||||
Range=[0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0],
|
||||
),
|
||||
],
|
||||
),
|
||||
Width=16,
|
||||
Height=16,
|
||||
Type=pikepdf.Name.XObject,
|
||||
Subtype=pikepdf.Name.Image,
|
||||
)
|
||||
pim = pikepdf.PdfImage(imobj)
|
||||
self.assertEqual(pim.mode, "DeviceN")
|
||||
self.assertTrue(pim.is_device_n)
|
||||
|
||||
pdf.pages[0].Contents = pikepdf.Stream(pdf, b"72 0 0 72 0 0 cm /Im0 Do")
|
||||
pdf.pages[0].Resources = pikepdf.Dictionary(
|
||||
XObject=pikepdf.Dictionary(Im0=imobj),
|
||||
)
|
||||
pdf.save(save_path)
|
||||
|
||||
with tempfile.NamedTemporaryFile(suffix="pdf") as device_n_pdf:
|
||||
# Build an offending file
|
||||
_build_device_n_pdf(self, str(device_n_pdf.name))
|
||||
with mock.patch("documents.barcodes.barcode_reader") as reader:
|
||||
reader.return_value = list()
|
||||
|
||||
_, _ = barcodes.scan_file_for_separating_barcodes(
|
||||
str(device_n_pdf.name),
|
||||
)
|
||||
|
||||
reader.assert_called()
|
||||
|
||||
def test_scan_file_for_separating_barcodes_fax_decode(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- A PDF containing an image encoded as CCITT Group 4 encoding
|
||||
WHEN:
|
||||
- Barcode processing happens with the file
|
||||
THEN:
|
||||
- The barcode is still detected
|
||||
"""
|
||||
test_file = os.path.join(
|
||||
self.BARCODE_SAMPLE_DIR,
|
||||
"barcode-fax-image.pdf",
|
||||
)
|
||||
pdf_file, separator_page_numbers = barcodes.scan_file_for_separating_barcodes(
|
||||
test_file,
|
||||
)
|
||||
|
||||
self.assertEqual(pdf_file, test_file)
|
||||
self.assertListEqual(separator_page_numbers, [1])
|
||||
|
||||
def test_scan_file_for_separating_qr_barcodes(self):
|
||||
test_file = os.path.join(
|
||||
os.path.dirname(__file__),
|
||||
"samples",
|
||||
"barcodes",
|
||||
self.BARCODE_SAMPLE_DIR,
|
||||
"patch-code-t-qr.pdf",
|
||||
)
|
||||
pages = barcodes.scan_file_for_separating_barcodes(test_file)
|
||||
self.assertEqual(pages, [0])
|
||||
pdf_file, separator_page_numbers = barcodes.scan_file_for_separating_barcodes(
|
||||
test_file,
|
||||
)
|
||||
|
||||
self.assertEqual(pdf_file, test_file)
|
||||
self.assertListEqual(separator_page_numbers, [0])
|
||||
|
||||
@override_settings(CONSUMER_BARCODE_STRING="CUSTOM BARCODE")
|
||||
def test_scan_file_for_separating_custom_barcodes(self):
|
||||
test_file = os.path.join(
|
||||
os.path.dirname(__file__),
|
||||
"samples",
|
||||
"barcodes",
|
||||
self.BARCODE_SAMPLE_DIR,
|
||||
"barcode-39-custom.pdf",
|
||||
)
|
||||
pages = barcodes.scan_file_for_separating_barcodes(test_file)
|
||||
self.assertEqual(pages, [0])
|
||||
pdf_file, separator_page_numbers = barcodes.scan_file_for_separating_barcodes(
|
||||
test_file,
|
||||
)
|
||||
|
||||
self.assertEqual(pdf_file, test_file)
|
||||
self.assertListEqual(separator_page_numbers, [0])
|
||||
|
||||
@override_settings(CONSUMER_BARCODE_STRING="CUSTOM BARCODE")
|
||||
def test_scan_file_for_separating_custom_qr_barcodes(self):
|
||||
test_file = os.path.join(
|
||||
os.path.dirname(__file__),
|
||||
"samples",
|
||||
"barcodes",
|
||||
self.BARCODE_SAMPLE_DIR,
|
||||
"barcode-qr-custom.pdf",
|
||||
)
|
||||
pages = barcodes.scan_file_for_separating_barcodes(test_file)
|
||||
self.assertEqual(pages, [0])
|
||||
pdf_file, separator_page_numbers = barcodes.scan_file_for_separating_barcodes(
|
||||
test_file,
|
||||
)
|
||||
|
||||
self.assertEqual(pdf_file, test_file)
|
||||
self.assertListEqual(separator_page_numbers, [0])
|
||||
|
||||
@override_settings(CONSUMER_BARCODE_STRING="CUSTOM BARCODE")
|
||||
def test_scan_file_for_separating_custom_128_barcodes(self):
|
||||
test_file = os.path.join(
|
||||
os.path.dirname(__file__),
|
||||
"samples",
|
||||
"barcodes",
|
||||
self.BARCODE_SAMPLE_DIR,
|
||||
"barcode-128-custom.pdf",
|
||||
)
|
||||
pages = barcodes.scan_file_for_separating_barcodes(test_file)
|
||||
self.assertEqual(pages, [0])
|
||||
pdf_file, separator_page_numbers = barcodes.scan_file_for_separating_barcodes(
|
||||
test_file,
|
||||
)
|
||||
|
||||
self.assertEqual(pdf_file, test_file)
|
||||
self.assertListEqual(separator_page_numbers, [0])
|
||||
|
||||
def test_scan_file_for_separating_wrong_qr_barcodes(self):
|
||||
test_file = os.path.join(
|
||||
os.path.dirname(__file__),
|
||||
"samples",
|
||||
"barcodes",
|
||||
self.BARCODE_SAMPLE_DIR,
|
||||
"barcode-39-custom.pdf",
|
||||
)
|
||||
pages = barcodes.scan_file_for_separating_barcodes(test_file)
|
||||
self.assertEqual(pages, [])
|
||||
pdf_file, separator_page_numbers = barcodes.scan_file_for_separating_barcodes(
|
||||
test_file,
|
||||
)
|
||||
|
||||
self.assertEqual(pdf_file, test_file)
|
||||
self.assertListEqual(separator_page_numbers, [])
|
||||
|
||||
def test_separate_pages(self):
|
||||
test_file = os.path.join(
|
||||
self.BARCODE_SAMPLE_DIR,
|
||||
"patch-code-t-middle.pdf",
|
||||
)
|
||||
pages = barcodes.separate_pages(test_file, [1])
|
||||
|
||||
self.assertEqual(len(pages), 2)
|
||||
|
||||
def test_separate_pages_double_code(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- Input PDF with two patch code pages in a row
|
||||
WHEN:
|
||||
- The input file is split
|
||||
THEN:
|
||||
- Only two files are output
|
||||
"""
|
||||
test_file = os.path.join(
|
||||
os.path.dirname(__file__),
|
||||
"samples",
|
||||
"barcodes",
|
||||
"patch-code-t-middle.pdf",
|
||||
"patch-code-t-double.pdf",
|
||||
)
|
||||
pages = barcodes.separate_pages(test_file, [1])
|
||||
pages = barcodes.separate_pages(test_file, [1, 2])
|
||||
|
||||
self.assertEqual(len(pages), 2)
|
||||
|
||||
def test_separate_pages_no_list(self):
|
||||
test_file = os.path.join(
|
||||
os.path.dirname(__file__),
|
||||
"samples",
|
||||
"barcodes",
|
||||
self.BARCODE_SAMPLE_DIR,
|
||||
"patch-code-t-middle.pdf",
|
||||
)
|
||||
with self.assertLogs("paperless.barcodes", level="WARNING") as cm:
|
||||
@@ -308,9 +407,7 @@ class TestBarcode(DirectoriesMixin, TestCase):
|
||||
|
||||
def test_save_to_dir(self):
|
||||
test_file = os.path.join(
|
||||
os.path.dirname(__file__),
|
||||
"samples",
|
||||
"barcodes",
|
||||
self.BARCODE_SAMPLE_DIR,
|
||||
"patch-code-t.pdf",
|
||||
)
|
||||
tempdir = tempfile.mkdtemp(prefix="paperless-", dir=settings.SCRATCH_DIR)
|
||||
@@ -320,9 +417,7 @@ class TestBarcode(DirectoriesMixin, TestCase):
|
||||
|
||||
def test_save_to_dir2(self):
|
||||
test_file = os.path.join(
|
||||
os.path.dirname(__file__),
|
||||
"samples",
|
||||
"barcodes",
|
||||
self.BARCODE_SAMPLE_DIR,
|
||||
"patch-code-t.pdf",
|
||||
)
|
||||
nonexistingdir = "/nowhere"
|
||||
@@ -340,9 +435,7 @@ class TestBarcode(DirectoriesMixin, TestCase):
|
||||
|
||||
def test_save_to_dir3(self):
|
||||
test_file = os.path.join(
|
||||
os.path.dirname(__file__),
|
||||
"samples",
|
||||
"barcodes",
|
||||
self.BARCODE_SAMPLE_DIR,
|
||||
"patch-code-t.pdf",
|
||||
)
|
||||
tempdir = tempfile.mkdtemp(prefix="paperless-", dir=settings.SCRATCH_DIR)
|
||||
@@ -352,35 +445,41 @@ class TestBarcode(DirectoriesMixin, TestCase):
|
||||
|
||||
def test_barcode_splitter(self):
|
||||
test_file = os.path.join(
|
||||
os.path.dirname(__file__),
|
||||
"samples",
|
||||
"barcodes",
|
||||
self.BARCODE_SAMPLE_DIR,
|
||||
"patch-code-t-middle.pdf",
|
||||
)
|
||||
tempdir = tempfile.mkdtemp(prefix="paperless-", dir=settings.SCRATCH_DIR)
|
||||
separators = barcodes.scan_file_for_separating_barcodes(test_file)
|
||||
self.assertTrue(separators)
|
||||
document_list = barcodes.separate_pages(test_file, separators)
|
||||
|
||||
pdf_file, separator_page_numbers = barcodes.scan_file_for_separating_barcodes(
|
||||
test_file,
|
||||
)
|
||||
|
||||
self.assertEqual(test_file, pdf_file)
|
||||
self.assertTrue(len(separator_page_numbers) > 0)
|
||||
|
||||
document_list = barcodes.separate_pages(test_file, separator_page_numbers)
|
||||
self.assertTrue(document_list)
|
||||
for document in document_list:
|
||||
barcodes.save_to_dir(document, target_dir=tempdir)
|
||||
|
||||
target_file1 = os.path.join(tempdir, "patch-code-t-middle_document_0.pdf")
|
||||
target_file2 = os.path.join(tempdir, "patch-code-t-middle_document_1.pdf")
|
||||
|
||||
self.assertTrue(os.path.isfile(target_file1))
|
||||
self.assertTrue(os.path.isfile(target_file2))
|
||||
|
||||
@override_settings(CONSUMER_ENABLE_BARCODES=True)
|
||||
def test_consume_barcode_file(self):
|
||||
test_file = os.path.join(
|
||||
os.path.dirname(__file__),
|
||||
"samples",
|
||||
"barcodes",
|
||||
self.BARCODE_SAMPLE_DIR,
|
||||
"patch-code-t-middle.pdf",
|
||||
)
|
||||
|
||||
dst = os.path.join(settings.SCRATCH_DIR, "patch-code-t-middle.pdf")
|
||||
shutil.copy(test_file, dst)
|
||||
|
||||
self.assertEqual(tasks.consume_file(dst), "File successfully split")
|
||||
with mock.patch("documents.tasks.async_to_sync"):
|
||||
self.assertEqual(tasks.consume_file(dst), "File successfully split")
|
||||
|
||||
@override_settings(
|
||||
CONSUMER_ENABLE_BARCODES=True,
|
||||
@@ -388,15 +487,14 @@ class TestBarcode(DirectoriesMixin, TestCase):
|
||||
)
|
||||
def test_consume_barcode_tiff_file(self):
|
||||
test_file = os.path.join(
|
||||
os.path.dirname(__file__),
|
||||
"samples",
|
||||
"barcodes",
|
||||
self.BARCODE_SAMPLE_DIR,
|
||||
"patch-code-t-middle.tiff",
|
||||
)
|
||||
dst = os.path.join(settings.SCRATCH_DIR, "patch-code-t-middle.tiff")
|
||||
shutil.copy(test_file, dst)
|
||||
|
||||
self.assertEqual(tasks.consume_file(dst), "File successfully split")
|
||||
with mock.patch("documents.tasks.async_to_sync"):
|
||||
self.assertEqual(tasks.consume_file(dst), "File successfully split")
|
||||
|
||||
@override_settings(
|
||||
CONSUMER_ENABLE_BARCODES=True,
|
||||
@@ -412,18 +510,17 @@ class TestBarcode(DirectoriesMixin, TestCase):
|
||||
and continue archiving the file as is.
|
||||
"""
|
||||
test_file = os.path.join(
|
||||
os.path.dirname(__file__),
|
||||
"samples",
|
||||
self.SAMPLE_DIR,
|
||||
"simple.jpg",
|
||||
)
|
||||
dst = os.path.join(settings.SCRATCH_DIR, "simple.jpg")
|
||||
shutil.copy(test_file, dst)
|
||||
with self.assertLogs("paperless.tasks", level="WARNING") as cm:
|
||||
with self.assertLogs("paperless.barcodes", level="WARNING") as cm:
|
||||
self.assertIn("Success", tasks.consume_file(dst))
|
||||
self.assertListEqual(
|
||||
cm.output,
|
||||
[
|
||||
"WARNING:paperless.tasks:Unsupported file format for barcode reader: image/jpeg",
|
||||
"WARNING:paperless.barcodes:Unsupported file format for barcode reader: image/jpeg",
|
||||
],
|
||||
)
|
||||
m.assert_called_once()
|
||||
@@ -445,12 +542,11 @@ class TestBarcode(DirectoriesMixin, TestCase):
|
||||
the user uploads a supported image file, but without extension
|
||||
"""
|
||||
test_file = os.path.join(
|
||||
os.path.dirname(__file__),
|
||||
"samples",
|
||||
"barcodes",
|
||||
self.BARCODE_SAMPLE_DIR,
|
||||
"patch-code-t-middle.tiff",
|
||||
)
|
||||
dst = os.path.join(settings.SCRATCH_DIR, "patch-code-t-middle")
|
||||
shutil.copy(test_file, dst)
|
||||
|
||||
self.assertEqual(tasks.consume_file(dst), "File successfully split")
|
||||
with mock.patch("documents.tasks.async_to_sync"):
|
||||
self.assertEqual(tasks.consume_file(dst), "File successfully split")
|
||||
|
@@ -1,9 +1,9 @@
|
||||
import os
|
||||
import re
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
from unittest import mock
|
||||
|
||||
import documents
|
||||
import pytest
|
||||
from django.conf import settings
|
||||
from django.test import override_settings
|
||||
@@ -20,10 +20,19 @@ from documents.models import Tag
|
||||
from documents.tests.utils import DirectoriesMixin
|
||||
|
||||
|
||||
def dummy_preprocess(content: str):
|
||||
content = content.lower().strip()
|
||||
content = re.sub(r"\s+", " ", content)
|
||||
return content
|
||||
|
||||
|
||||
class TestClassifier(DirectoriesMixin, TestCase):
|
||||
def setUp(self):
|
||||
super().setUp()
|
||||
self.classifier = DocumentClassifier()
|
||||
self.classifier.preprocess_content = mock.MagicMock(
|
||||
side_effect=dummy_preprocess,
|
||||
)
|
||||
|
||||
def generate_test_data(self):
|
||||
self.c1 = Correspondent.objects.create(
|
||||
@@ -192,6 +201,8 @@ class TestClassifier(DirectoriesMixin, TestCase):
|
||||
|
||||
new_classifier = DocumentClassifier()
|
||||
new_classifier.load()
|
||||
new_classifier.preprocess_content = mock.MagicMock(side_effect=dummy_preprocess)
|
||||
|
||||
self.assertFalse(new_classifier.train())
|
||||
|
||||
# @override_settings(
|
||||
@@ -215,6 +226,7 @@ class TestClassifier(DirectoriesMixin, TestCase):
|
||||
|
||||
new_classifier = DocumentClassifier()
|
||||
new_classifier.load()
|
||||
new_classifier.preprocess_content = mock.MagicMock(side_effect=dummy_preprocess)
|
||||
|
||||
self.assertCountEqual(new_classifier.predict_tags(self.doc2.content), [45, 12])
|
||||
|
||||
|
@@ -8,6 +8,7 @@ from django.conf import settings
|
||||
from django.test import override_settings
|
||||
from django.test import TestCase
|
||||
from documents.parsers import parse_date
|
||||
from documents.parsers import parse_date_generator
|
||||
from paperless.settings import DATE_ORDER
|
||||
|
||||
|
||||
@@ -161,6 +162,25 @@ class TestDate(TestCase):
|
||||
def test_crazy_date_with_spaces(self, *args):
|
||||
self.assertIsNone(parse_date("", "20 408000l 2475"))
|
||||
|
||||
def test_multiple_dates(self):
|
||||
text = """This text has multiple dates.
|
||||
For example 02.02.2018, 22 July 2022 and Dezember 2021.
|
||||
But not 24-12-9999 because its in the future..."""
|
||||
dates = list(parse_date_generator("", text))
|
||||
self.assertEqual(len(dates), 3)
|
||||
self.assertEqual(
|
||||
dates[0],
|
||||
datetime.datetime(2018, 2, 2, 0, 0, tzinfo=tz.gettz(settings.TIME_ZONE)),
|
||||
)
|
||||
self.assertEqual(
|
||||
dates[1],
|
||||
datetime.datetime(2022, 7, 22, 0, 0, tzinfo=tz.gettz(settings.TIME_ZONE)),
|
||||
)
|
||||
self.assertEqual(
|
||||
dates[2],
|
||||
datetime.datetime(2021, 12, 1, 0, 0, tzinfo=tz.gettz(settings.TIME_ZONE)),
|
||||
)
|
||||
|
||||
@override_settings(FILENAME_DATE_ORDER="YMD")
|
||||
def test_filename_date_parse_valid_ymd(self, *args):
|
||||
"""
|
||||
|
@@ -10,8 +10,8 @@ from django.core.management import call_command
|
||||
from django.test import override_settings
|
||||
from django.test import TestCase
|
||||
from documents.file_handling import generate_filename
|
||||
from documents.management.commands.document_archiver import handle_document
|
||||
from documents.models import Document
|
||||
from documents.tasks import update_document_archive_file
|
||||
from documents.tests.utils import DirectoriesMixin
|
||||
|
||||
|
||||
@@ -46,7 +46,7 @@ class TestArchiver(DirectoriesMixin, TestCase):
|
||||
os.path.join(self.dirs.originals_dir, f"{doc.id:07}.pdf"),
|
||||
)
|
||||
|
||||
handle_document(doc.pk)
|
||||
update_document_archive_file(doc.pk)
|
||||
|
||||
doc = Document.objects.get(id=doc.id)
|
||||
|
||||
@@ -63,7 +63,7 @@ class TestArchiver(DirectoriesMixin, TestCase):
|
||||
doc.save()
|
||||
shutil.copy(sample_file, doc.source_path)
|
||||
|
||||
handle_document(doc.pk)
|
||||
update_document_archive_file(doc.pk)
|
||||
|
||||
doc = Document.objects.get(id=doc.id)
|
||||
|
||||
@@ -94,8 +94,8 @@ class TestArchiver(DirectoriesMixin, TestCase):
|
||||
os.path.join(self.dirs.originals_dir, f"document_01.pdf"),
|
||||
)
|
||||
|
||||
handle_document(doc2.pk)
|
||||
handle_document(doc1.pk)
|
||||
update_document_archive_file(doc2.pk)
|
||||
update_document_archive_file(doc1.pk)
|
||||
|
||||
doc1 = Document.objects.get(id=doc1.id)
|
||||
doc2 = Document.objects.get(id=doc2.id)
|
||||
|
@@ -20,13 +20,14 @@ class ConsumerThread(Thread):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.cmd = document_consumer.Command()
|
||||
self.cmd.stop_flag.clear()
|
||||
|
||||
def run(self) -> None:
|
||||
self.cmd.handle(directory=settings.CONSUMPTION_DIR, oneshot=False)
|
||||
self.cmd.handle(directory=settings.CONSUMPTION_DIR, oneshot=False, testing=True)
|
||||
|
||||
def stop(self):
|
||||
# Consumer checks this every second.
|
||||
self.cmd.stop_flag = True
|
||||
self.cmd.stop_flag.set()
|
||||
|
||||
|
||||
def chunked(size, source):
|
||||
@@ -42,7 +43,7 @@ class ConsumerMixin:
|
||||
super().setUp()
|
||||
self.t = None
|
||||
patcher = mock.patch(
|
||||
"documents.management.commands.document_consumer.async_task",
|
||||
"documents.tasks.consume_file.delay",
|
||||
)
|
||||
self.task_mock = patcher.start()
|
||||
self.addCleanup(patcher.stop)
|
||||
@@ -59,13 +60,14 @@ class ConsumerMixin:
|
||||
self.t.stop()
|
||||
# wait for the consumer to exit.
|
||||
self.t.join()
|
||||
self.t = None
|
||||
|
||||
super().tearDown()
|
||||
|
||||
def wait_for_task_mock_call(self, excpeted_call_count=1):
|
||||
def wait_for_task_mock_call(self, expected_call_count=1):
|
||||
n = 0
|
||||
while n < 100:
|
||||
if self.task_mock.call_count >= excpeted_call_count:
|
||||
while n < 50:
|
||||
if self.task_mock.call_count >= expected_call_count:
|
||||
# give task_mock some time to finish and raise errors
|
||||
sleep(1)
|
||||
return
|
||||
@@ -74,7 +76,7 @@ class ConsumerMixin:
|
||||
|
||||
# A bogus async_task that will simply check the file for
|
||||
# completeness and raise an exception otherwise.
|
||||
def bogus_task(self, func, filename, **kwargs):
|
||||
def bogus_task(self, filename, **kwargs):
|
||||
eq = filecmp.cmp(filename, self.sample_file, shallow=False)
|
||||
if not eq:
|
||||
print("Consumed an INVALID file.")
|
||||
@@ -113,7 +115,7 @@ class TestConsumer(DirectoriesMixin, ConsumerMixin, TransactionTestCase):
|
||||
self.task_mock.assert_called_once()
|
||||
|
||||
args, kwargs = self.task_mock.call_args
|
||||
self.assertEqual(args[1], f)
|
||||
self.assertEqual(args[0], f)
|
||||
|
||||
def test_consume_file_invalid_ext(self):
|
||||
self.t_start()
|
||||
@@ -133,7 +135,7 @@ class TestConsumer(DirectoriesMixin, ConsumerMixin, TransactionTestCase):
|
||||
self.task_mock.assert_called_once()
|
||||
|
||||
args, kwargs = self.task_mock.call_args
|
||||
self.assertEqual(args[1], f)
|
||||
self.assertEqual(args[0], f)
|
||||
|
||||
@mock.patch("documents.management.commands.document_consumer.logger.error")
|
||||
def test_slow_write_pdf(self, error_logger):
|
||||
@@ -153,7 +155,7 @@ class TestConsumer(DirectoriesMixin, ConsumerMixin, TransactionTestCase):
|
||||
self.task_mock.assert_called_once()
|
||||
|
||||
args, kwargs = self.task_mock.call_args
|
||||
self.assertEqual(args[1], fname)
|
||||
self.assertEqual(args[0], fname)
|
||||
|
||||
@mock.patch("documents.management.commands.document_consumer.logger.error")
|
||||
def test_slow_write_and_move(self, error_logger):
|
||||
@@ -173,7 +175,7 @@ class TestConsumer(DirectoriesMixin, ConsumerMixin, TransactionTestCase):
|
||||
self.task_mock.assert_called_once()
|
||||
|
||||
args, kwargs = self.task_mock.call_args
|
||||
self.assertEqual(args[1], fname2)
|
||||
self.assertEqual(args[0], fname2)
|
||||
|
||||
error_logger.assert_not_called()
|
||||
|
||||
@@ -191,7 +193,7 @@ class TestConsumer(DirectoriesMixin, ConsumerMixin, TransactionTestCase):
|
||||
|
||||
self.task_mock.assert_called_once()
|
||||
args, kwargs = self.task_mock.call_args
|
||||
self.assertEqual(args[1], fname)
|
||||
self.assertEqual(args[0], fname)
|
||||
|
||||
# assert that we have an error logged with this invalid file.
|
||||
error_logger.assert_called_once()
|
||||
@@ -234,12 +236,12 @@ class TestConsumer(DirectoriesMixin, ConsumerMixin, TransactionTestCase):
|
||||
|
||||
sleep(5)
|
||||
|
||||
self.wait_for_task_mock_call(excpeted_call_count=2)
|
||||
self.wait_for_task_mock_call(expected_call_count=2)
|
||||
|
||||
self.assertEqual(2, self.task_mock.call_count)
|
||||
|
||||
fnames = [
|
||||
os.path.basename(args[1]) for args, _ in self.task_mock.call_args_list
|
||||
os.path.basename(args[0]) for args, _ in self.task_mock.call_args_list
|
||||
]
|
||||
self.assertCountEqual(fnames, ["my_file.pdf", "my_second_file.pdf"])
|
||||
|
||||
@@ -281,6 +283,8 @@ class TestConsumer(DirectoriesMixin, ConsumerMixin, TransactionTestCase):
|
||||
|
||||
@override_settings(
|
||||
CONSUMER_POLLING=1,
|
||||
# please leave the delay here and down below
|
||||
# see https://github.com/paperless-ngx/paperless-ngx/pull/66
|
||||
CONSUMER_POLLING_DELAY=3,
|
||||
CONSUMER_POLLING_RETRY_COUNT=20,
|
||||
)
|
||||
@@ -307,8 +311,7 @@ class TestConsumerRecursivePolling(TestConsumer):
|
||||
|
||||
|
||||
class TestConsumerTags(DirectoriesMixin, ConsumerMixin, TransactionTestCase):
|
||||
@override_settings(CONSUMER_RECURSIVE=True)
|
||||
@override_settings(CONSUMER_SUBDIRS_AS_TAGS=True)
|
||||
@override_settings(CONSUMER_RECURSIVE=True, CONSUMER_SUBDIRS_AS_TAGS=True)
|
||||
def test_consume_file_with_path_tags(self):
|
||||
|
||||
tag_names = ("existingTag", "Space Tag")
|
||||
@@ -335,7 +338,7 @@ class TestConsumerTags(DirectoriesMixin, ConsumerMixin, TransactionTestCase):
|
||||
tag_ids.append(Tag.objects.get(name=tag_names[1]).pk)
|
||||
|
||||
args, kwargs = self.task_mock.call_args
|
||||
self.assertEqual(args[1], f)
|
||||
self.assertEqual(args[0], f)
|
||||
|
||||
# assertCountEqual has a bad name, but test that the first
|
||||
# sequence contains the same elements as second, regardless of
|
||||
@@ -344,7 +347,7 @@ class TestConsumerTags(DirectoriesMixin, ConsumerMixin, TransactionTestCase):
|
||||
|
||||
@override_settings(
|
||||
CONSUMER_POLLING=1,
|
||||
CONSUMER_POLLING_DELAY=1,
|
||||
CONSUMER_POLLING_DELAY=3,
|
||||
CONSUMER_POLLING_RETRY_COUNT=20,
|
||||
)
|
||||
def test_consume_file_with_path_tags_polling(self):
|
||||
|
@@ -10,10 +10,13 @@ from django.core.management import call_command
|
||||
from django.test import override_settings
|
||||
from django.test import TestCase
|
||||
from documents.management.commands import document_exporter
|
||||
from documents.models import Comment
|
||||
from documents.models import Correspondent
|
||||
from documents.models import Document
|
||||
from documents.models import DocumentType
|
||||
from documents.models import StoragePath
|
||||
from documents.models import Tag
|
||||
from documents.models import User
|
||||
from documents.sanity_checker import check_sanity
|
||||
from documents.settings import EXPORTER_FILE_NAME
|
||||
from documents.tests.utils import DirectoriesMixin
|
||||
@@ -25,6 +28,8 @@ class TestExportImport(DirectoriesMixin, TestCase):
|
||||
self.target = tempfile.mkdtemp()
|
||||
self.addCleanup(shutil.rmtree, self.target)
|
||||
|
||||
self.user = User.objects.create(username="temp_admin")
|
||||
|
||||
self.d1 = Document.objects.create(
|
||||
content="Content",
|
||||
checksum="42995833e01aea9b3edee44bbfdd7ce1",
|
||||
@@ -57,14 +62,23 @@ class TestExportImport(DirectoriesMixin, TestCase):
|
||||
storage_type=Document.STORAGE_TYPE_GPG,
|
||||
)
|
||||
|
||||
self.comment = Comment.objects.create(
|
||||
comment="This is a comment. amaze.",
|
||||
document=self.d1,
|
||||
user=self.user,
|
||||
)
|
||||
|
||||
self.t1 = Tag.objects.create(name="t")
|
||||
self.dt1 = DocumentType.objects.create(name="dt")
|
||||
self.c1 = Correspondent.objects.create(name="c")
|
||||
self.sp1 = StoragePath.objects.create(path="{created_year}-{title}")
|
||||
|
||||
self.d1.tags.add(self.t1)
|
||||
self.d1.correspondent = self.c1
|
||||
self.d1.document_type = self.dt1
|
||||
self.d1.save()
|
||||
self.d4.storage_path = self.sp1
|
||||
self.d4.save()
|
||||
super().setUp()
|
||||
|
||||
def _get_document_from_manifest(self, manifest, id):
|
||||
@@ -110,7 +124,7 @@ class TestExportImport(DirectoriesMixin, TestCase):
|
||||
|
||||
manifest = self._do_export(use_filename_format=use_filename_format)
|
||||
|
||||
self.assertEqual(len(manifest), 8)
|
||||
self.assertEqual(len(manifest), 11)
|
||||
self.assertEqual(
|
||||
len(list(filter(lambda e: e["model"] == "documents.document", manifest))),
|
||||
4,
|
||||
@@ -171,6 +185,11 @@ class TestExportImport(DirectoriesMixin, TestCase):
|
||||
checksum = hashlib.md5(f.read()).hexdigest()
|
||||
self.assertEqual(checksum, element["fields"]["archive_checksum"])
|
||||
|
||||
elif element["model"] == "documents.comment":
|
||||
self.assertEqual(element["fields"]["comment"], self.comment.comment)
|
||||
self.assertEqual(element["fields"]["document"], self.d1.id)
|
||||
self.assertEqual(element["fields"]["user"], self.user.id)
|
||||
|
||||
with paperless_environment() as dirs:
|
||||
self.assertEqual(Document.objects.count(), 4)
|
||||
Document.objects.all().delete()
|
||||
@@ -184,6 +203,7 @@ class TestExportImport(DirectoriesMixin, TestCase):
|
||||
self.assertEqual(Tag.objects.count(), 1)
|
||||
self.assertEqual(Correspondent.objects.count(), 1)
|
||||
self.assertEqual(DocumentType.objects.count(), 1)
|
||||
self.assertEqual(StoragePath.objects.count(), 1)
|
||||
self.assertEqual(Document.objects.get(id=self.d1.id).title, "wow1")
|
||||
self.assertEqual(Document.objects.get(id=self.d2.id).title, "wow2")
|
||||
self.assertEqual(Document.objects.get(id=self.d3.id).title, "wow2")
|
||||
|
@@ -3,12 +3,34 @@ from django.test import TestCase
|
||||
from documents.models import Correspondent
|
||||
from documents.models import Document
|
||||
from documents.models import DocumentType
|
||||
from documents.models import StoragePath
|
||||
from documents.models import Tag
|
||||
from documents.tests.utils import DirectoriesMixin
|
||||
|
||||
|
||||
class TestRetagger(DirectoriesMixin, TestCase):
|
||||
def make_models(self):
|
||||
|
||||
self.sp1 = StoragePath.objects.create(
|
||||
name="dummy a",
|
||||
path="{created_data}/{title}",
|
||||
match="auto document",
|
||||
matching_algorithm=StoragePath.MATCH_LITERAL,
|
||||
)
|
||||
self.sp2 = StoragePath.objects.create(
|
||||
name="dummy b",
|
||||
path="{title}",
|
||||
match="^first|^unrelated",
|
||||
matching_algorithm=StoragePath.MATCH_REGEX,
|
||||
)
|
||||
|
||||
self.sp3 = StoragePath.objects.create(
|
||||
name="dummy c",
|
||||
path="{title}",
|
||||
match="^blah",
|
||||
matching_algorithm=StoragePath.MATCH_REGEX,
|
||||
)
|
||||
|
||||
self.d1 = Document.objects.create(
|
||||
checksum="A",
|
||||
title="A",
|
||||
@@ -23,6 +45,7 @@ class TestRetagger(DirectoriesMixin, TestCase):
|
||||
checksum="C",
|
||||
title="C",
|
||||
content="unrelated document",
|
||||
storage_path=self.sp3,
|
||||
)
|
||||
self.d4 = Document.objects.create(
|
||||
checksum="D",
|
||||
@@ -146,15 +169,15 @@ class TestRetagger(DirectoriesMixin, TestCase):
|
||||
call_command("document_retagger", "--document_type", "--suggest")
|
||||
d_first, d_second, d_unrelated, d_auto = self.get_updated_docs()
|
||||
|
||||
self.assertEqual(d_first.document_type, None)
|
||||
self.assertEqual(d_second.document_type, None)
|
||||
self.assertIsNone(d_first.document_type)
|
||||
self.assertIsNone(d_second.document_type)
|
||||
|
||||
def test_add_correspondent_suggest(self):
|
||||
call_command("document_retagger", "--correspondent", "--suggest")
|
||||
d_first, d_second, d_unrelated, d_auto = self.get_updated_docs()
|
||||
|
||||
self.assertEqual(d_first.correspondent, None)
|
||||
self.assertEqual(d_second.correspondent, None)
|
||||
self.assertIsNone(d_first.correspondent)
|
||||
self.assertIsNone(d_second.correspondent)
|
||||
|
||||
def test_add_tags_suggest_url(self):
|
||||
call_command(
|
||||
@@ -178,8 +201,8 @@ class TestRetagger(DirectoriesMixin, TestCase):
|
||||
)
|
||||
d_first, d_second, d_unrelated, d_auto = self.get_updated_docs()
|
||||
|
||||
self.assertEqual(d_first.document_type, None)
|
||||
self.assertEqual(d_second.document_type, None)
|
||||
self.assertIsNone(d_first.document_type)
|
||||
self.assertIsNone(d_second.document_type)
|
||||
|
||||
def test_add_correspondent_suggest_url(self):
|
||||
call_command(
|
||||
@@ -190,5 +213,48 @@ class TestRetagger(DirectoriesMixin, TestCase):
|
||||
)
|
||||
d_first, d_second, d_unrelated, d_auto = self.get_updated_docs()
|
||||
|
||||
self.assertEqual(d_first.correspondent, None)
|
||||
self.assertEqual(d_second.correspondent, None)
|
||||
self.assertIsNone(d_first.correspondent)
|
||||
self.assertIsNone(d_second.correspondent)
|
||||
|
||||
def test_add_storage_path(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- 2 storage paths with documents which match them
|
||||
- 1 document which matches but has a storage path
|
||||
WHEN:
|
||||
- document retagger is called
|
||||
THEN:
|
||||
- Matching document's storage paths updated
|
||||
- Non-matching documents have no storage path
|
||||
- Existing storage patch left unchanged
|
||||
"""
|
||||
call_command(
|
||||
"document_retagger",
|
||||
"--storage_path",
|
||||
)
|
||||
d_first, d_second, d_unrelated, d_auto = self.get_updated_docs()
|
||||
|
||||
self.assertEqual(d_first.storage_path, self.sp2)
|
||||
self.assertEqual(d_auto.storage_path, self.sp1)
|
||||
self.assertIsNone(d_second.storage_path)
|
||||
self.assertEqual(d_unrelated.storage_path, self.sp3)
|
||||
|
||||
def test_overwrite_storage_path(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- 2 storage paths with documents which match them
|
||||
- 1 document which matches but has a storage path
|
||||
WHEN:
|
||||
- document retagger is called with overwrite
|
||||
THEN:
|
||||
- Matching document's storage paths updated
|
||||
- Non-matching documents have no storage path
|
||||
- Existing storage patch overwritten
|
||||
"""
|
||||
call_command("document_retagger", "--storage_path", "--overwrite")
|
||||
d_first, d_second, d_unrelated, d_auto = self.get_updated_docs()
|
||||
|
||||
self.assertEqual(d_first.storage_path, self.sp2)
|
||||
self.assertEqual(d_auto.storage_path, self.sp1)
|
||||
self.assertIsNone(d_second.storage_path)
|
||||
self.assertEqual(d_unrelated.storage_path, self.sp2)
|
||||
|
@@ -1,35 +0,0 @@
|
||||
import logging
|
||||
from unittest import mock
|
||||
|
||||
from django.test import TestCase
|
||||
from paperless.settings import default_task_workers
|
||||
from paperless.settings import default_threads_per_worker
|
||||
|
||||
|
||||
class TestSettings(TestCase):
|
||||
@mock.patch("paperless.settings.multiprocessing.cpu_count")
|
||||
def test_single_core(self, cpu_count):
|
||||
cpu_count.return_value = 1
|
||||
|
||||
default_workers = default_task_workers()
|
||||
|
||||
default_threads = default_threads_per_worker(default_workers)
|
||||
|
||||
self.assertEqual(default_workers, 1)
|
||||
self.assertEqual(default_threads, 1)
|
||||
|
||||
def test_workers_threads(self):
|
||||
for i in range(1, 64):
|
||||
with mock.patch(
|
||||
"paperless.settings.multiprocessing.cpu_count",
|
||||
) as cpu_count:
|
||||
cpu_count.return_value = i
|
||||
|
||||
default_workers = default_task_workers()
|
||||
|
||||
default_threads = default_threads_per_worker(default_workers)
|
||||
|
||||
self.assertTrue(default_workers >= 1)
|
||||
self.assertTrue(default_threads >= 1)
|
||||
|
||||
self.assertTrue(default_workers * default_threads <= i, f"{i}")
|
@@ -11,6 +11,7 @@ from documents.models import DocumentType
|
||||
from documents.models import Tag
|
||||
from documents.sanity_checker import SanityCheckFailedException
|
||||
from documents.sanity_checker import SanityCheckMessages
|
||||
from documents.tests.test_classifier import dummy_preprocess
|
||||
from documents.tests.utils import DirectoriesMixin
|
||||
|
||||
|
||||
@@ -75,21 +76,26 @@ class TestClassifier(DirectoriesMixin, TestCase):
|
||||
doc = Document.objects.create(correspondent=c, content="test", title="test")
|
||||
self.assertFalse(os.path.isfile(settings.MODEL_FILE))
|
||||
|
||||
tasks.train_classifier()
|
||||
self.assertTrue(os.path.isfile(settings.MODEL_FILE))
|
||||
mtime = os.stat(settings.MODEL_FILE).st_mtime
|
||||
with mock.patch(
|
||||
"documents.classifier.DocumentClassifier.preprocess_content",
|
||||
) as pre_proc_mock:
|
||||
pre_proc_mock.side_effect = dummy_preprocess
|
||||
|
||||
tasks.train_classifier()
|
||||
self.assertTrue(os.path.isfile(settings.MODEL_FILE))
|
||||
mtime2 = os.stat(settings.MODEL_FILE).st_mtime
|
||||
self.assertEqual(mtime, mtime2)
|
||||
tasks.train_classifier()
|
||||
self.assertTrue(os.path.isfile(settings.MODEL_FILE))
|
||||
mtime = os.stat(settings.MODEL_FILE).st_mtime
|
||||
|
||||
doc.content = "test2"
|
||||
doc.save()
|
||||
tasks.train_classifier()
|
||||
self.assertTrue(os.path.isfile(settings.MODEL_FILE))
|
||||
mtime3 = os.stat(settings.MODEL_FILE).st_mtime
|
||||
self.assertNotEqual(mtime2, mtime3)
|
||||
tasks.train_classifier()
|
||||
self.assertTrue(os.path.isfile(settings.MODEL_FILE))
|
||||
mtime2 = os.stat(settings.MODEL_FILE).st_mtime
|
||||
self.assertEqual(mtime, mtime2)
|
||||
|
||||
doc.content = "test2"
|
||||
doc.save()
|
||||
tasks.train_classifier()
|
||||
self.assertTrue(os.path.isfile(settings.MODEL_FILE))
|
||||
mtime3 = os.stat(settings.MODEL_FILE).st_mtime
|
||||
self.assertNotEqual(mtime2, mtime3)
|
||||
|
||||
|
||||
class TestSanityCheck(DirectoriesMixin, TestCase):
|
||||
|
@@ -1,3 +1,4 @@
|
||||
import itertools
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
@@ -21,12 +22,13 @@ from django.db.models.functions import Lower
|
||||
from django.http import Http404
|
||||
from django.http import HttpResponse
|
||||
from django.http import HttpResponseBadRequest
|
||||
from django.shortcuts import get_object_or_404
|
||||
from django.utils.decorators import method_decorator
|
||||
from django.utils.translation import get_language
|
||||
from django.views.decorators.cache import cache_control
|
||||
from django.views.generic import TemplateView
|
||||
from django_filters.rest_framework import DjangoFilterBackend
|
||||
from django_q.tasks import async_task
|
||||
from documents.tasks import consume_file
|
||||
from packaging import version as packaging_version
|
||||
from paperless import version
|
||||
from paperless.db import GnuPG
|
||||
@@ -62,6 +64,7 @@ from .matching import match_correspondents
|
||||
from .matching import match_document_types
|
||||
from .matching import match_storage_paths
|
||||
from .matching import match_tags
|
||||
from .models import Comment
|
||||
from .models import Correspondent
|
||||
from .models import Document
|
||||
from .models import DocumentType
|
||||
@@ -70,6 +73,7 @@ from .models import SavedView
|
||||
from .models import StoragePath
|
||||
from .models import Tag
|
||||
from .parsers import get_parser_class_for_mime_type
|
||||
from .parsers import parse_date_generator
|
||||
from .serialisers import AcknowledgeTasksViewSerializer
|
||||
from .serialisers import BulkDownloadSerializer
|
||||
from .serialisers import BulkEditSerializer
|
||||
@@ -257,6 +261,9 @@ class DocumentViewSet(
|
||||
file_handle = doc.source_file
|
||||
filename = doc.get_public_filename()
|
||||
mime_type = doc.mime_type
|
||||
# Support browser previewing csv files by using text mime type
|
||||
if mime_type in {"application/csv", "text/csv"} and disposition == "inline":
|
||||
mime_type = "text/plain"
|
||||
|
||||
if doc.storage_type == Document.STORAGE_TYPE_GPG:
|
||||
file_handle = GnuPG.decrypted(file_handle)
|
||||
@@ -313,6 +320,7 @@ class DocumentViewSet(
|
||||
"original_metadata": self.get_metadata(doc.source_path, doc.mime_type),
|
||||
"archive_checksum": doc.archive_checksum,
|
||||
"archive_media_filename": doc.archive_filename,
|
||||
"original_filename": doc.original_filename,
|
||||
}
|
||||
|
||||
if doc.has_archive_version:
|
||||
@@ -329,13 +337,15 @@ class DocumentViewSet(
|
||||
|
||||
@action(methods=["get"], detail=True)
|
||||
def suggestions(self, request, pk=None):
|
||||
try:
|
||||
doc = Document.objects.get(pk=pk)
|
||||
except Document.DoesNotExist:
|
||||
raise Http404()
|
||||
doc = get_object_or_404(Document, pk=pk)
|
||||
|
||||
classifier = load_classifier()
|
||||
|
||||
gen = parse_date_generator(doc.filename, doc.content)
|
||||
dates = sorted(
|
||||
{i for i in itertools.islice(gen, settings.NUMBER_OF_SUGGESTED_DATES)},
|
||||
)
|
||||
|
||||
return Response(
|
||||
{
|
||||
"correspondents": [c.id for c in match_correspondents(doc, classifier)],
|
||||
@@ -344,6 +354,9 @@ class DocumentViewSet(
|
||||
dt.id for dt in match_document_types(doc, classifier)
|
||||
],
|
||||
"storage_paths": [dt.id for dt in match_storage_paths(doc, classifier)],
|
||||
"dates": [
|
||||
date.strftime("%Y-%m-%d") for date in dates if date is not None
|
||||
],
|
||||
},
|
||||
)
|
||||
|
||||
@@ -378,6 +391,67 @@ class DocumentViewSet(
|
||||
except (FileNotFoundError, Document.DoesNotExist):
|
||||
raise Http404()
|
||||
|
||||
def getComments(self, doc):
|
||||
return [
|
||||
{
|
||||
"id": c.id,
|
||||
"comment": c.comment,
|
||||
"created": c.created,
|
||||
"user": {
|
||||
"id": c.user.id,
|
||||
"username": c.user.username,
|
||||
"firstname": c.user.first_name,
|
||||
"lastname": c.user.last_name,
|
||||
},
|
||||
}
|
||||
for c in Comment.objects.filter(document=doc).order_by("-created")
|
||||
]
|
||||
|
||||
@action(methods=["get", "post", "delete"], detail=True)
|
||||
def comments(self, request, pk=None):
|
||||
try:
|
||||
doc = Document.objects.get(pk=pk)
|
||||
except Document.DoesNotExist:
|
||||
raise Http404()
|
||||
|
||||
currentUser = request.user
|
||||
|
||||
if request.method == "GET":
|
||||
try:
|
||||
return Response(self.getComments(doc))
|
||||
except Exception as e:
|
||||
logger.warning(f"An error occurred retrieving comments: {str(e)}")
|
||||
return Response(
|
||||
{"error": "Error retreiving comments, check logs for more detail."},
|
||||
)
|
||||
elif request.method == "POST":
|
||||
try:
|
||||
c = Comment.objects.create(
|
||||
document=doc,
|
||||
comment=request.data["comment"],
|
||||
user=currentUser,
|
||||
)
|
||||
c.save()
|
||||
|
||||
return Response(self.getComments(doc))
|
||||
except Exception as e:
|
||||
logger.warning(f"An error occurred saving comment: {str(e)}")
|
||||
return Response(
|
||||
{
|
||||
"error": "Error saving comment, check logs for more detail.",
|
||||
},
|
||||
)
|
||||
elif request.method == "DELETE":
|
||||
comment = Comment.objects.get(id=int(request.GET.get("id")))
|
||||
comment.delete()
|
||||
return Response(self.getComments(doc))
|
||||
|
||||
return Response(
|
||||
{
|
||||
"error": "error",
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
class SearchResultSerializer(DocumentSerializer):
|
||||
def to_representation(self, instance):
|
||||
@@ -541,8 +615,7 @@ class PostDocumentView(GenericAPIView):
|
||||
|
||||
task_id = str(uuid.uuid4())
|
||||
|
||||
async_task(
|
||||
"documents.tasks.consume_file",
|
||||
consume_file.delay(
|
||||
temp_filename,
|
||||
override_filename=doc_name,
|
||||
override_title=title,
|
||||
@@ -550,7 +623,6 @@ class PostDocumentView(GenericAPIView):
|
||||
override_document_type_id=document_type_id,
|
||||
override_tag_ids=tag_ids,
|
||||
task_id=task_id,
|
||||
task_name=os.path.basename(doc_name)[:100],
|
||||
override_created=created,
|
||||
)
|
||||
|
||||
@@ -709,42 +781,38 @@ class RemoteVersionView(GenericAPIView):
|
||||
remote_version = "0.0.0"
|
||||
is_greater_than_current = False
|
||||
current_version = packaging_version.parse(version.__full_version_str__)
|
||||
# TODO: this can likely be removed when frontend settings are saved to DB
|
||||
feature_is_set = settings.ENABLE_UPDATE_CHECK != "default"
|
||||
if feature_is_set and settings.ENABLE_UPDATE_CHECK:
|
||||
try:
|
||||
req = urllib.request.Request(
|
||||
"https://api.github.com/repos/paperless-ngx/"
|
||||
"paperless-ngx/releases/latest",
|
||||
)
|
||||
# Ensure a JSON response
|
||||
req.add_header("Accept", "application/json")
|
||||
|
||||
with urllib.request.urlopen(req) as response:
|
||||
remote = response.read().decode("utf-8")
|
||||
try:
|
||||
remote_json = json.loads(remote)
|
||||
remote_version = remote_json["tag_name"]
|
||||
# Basically PEP 616 but that only went in 3.9
|
||||
if remote_version.startswith("ngx-"):
|
||||
remote_version = remote_version[len("ngx-") :]
|
||||
except ValueError:
|
||||
logger.debug("An error occurred parsing remote version json")
|
||||
except urllib.error.URLError:
|
||||
logger.debug("An error occurred checking for available updates")
|
||||
|
||||
is_greater_than_current = (
|
||||
packaging_version.parse(
|
||||
remote_version,
|
||||
)
|
||||
> current_version
|
||||
try:
|
||||
req = urllib.request.Request(
|
||||
"https://api.github.com/repos/paperless-ngx/"
|
||||
"paperless-ngx/releases/latest",
|
||||
)
|
||||
# Ensure a JSON response
|
||||
req.add_header("Accept", "application/json")
|
||||
|
||||
with urllib.request.urlopen(req) as response:
|
||||
remote = response.read().decode("utf-8")
|
||||
try:
|
||||
remote_json = json.loads(remote)
|
||||
remote_version = remote_json["tag_name"]
|
||||
# Basically PEP 616 but that only went in 3.9
|
||||
if remote_version.startswith("ngx-"):
|
||||
remote_version = remote_version[len("ngx-") :]
|
||||
except ValueError:
|
||||
logger.debug("An error occurred parsing remote version json")
|
||||
except urllib.error.URLError:
|
||||
logger.debug("An error occurred checking for available updates")
|
||||
|
||||
is_greater_than_current = (
|
||||
packaging_version.parse(
|
||||
remote_version,
|
||||
)
|
||||
> current_version
|
||||
)
|
||||
|
||||
return Response(
|
||||
{
|
||||
"version": remote_version,
|
||||
"update_available": is_greater_than_current,
|
||||
"feature_is_set": feature_is_set,
|
||||
},
|
||||
)
|
||||
|
||||
@@ -777,15 +845,23 @@ class UiSettingsView(GenericAPIView):
|
||||
displayname = user.username
|
||||
if user.first_name or user.last_name:
|
||||
displayname = " ".join([user.first_name, user.last_name])
|
||||
settings = {}
|
||||
ui_settings = {}
|
||||
if hasattr(user, "ui_settings"):
|
||||
settings = user.ui_settings.settings
|
||||
ui_settings = user.ui_settings.settings
|
||||
if "update_checking" in ui_settings:
|
||||
ui_settings["update_checking"][
|
||||
"backend_setting"
|
||||
] = settings.ENABLE_UPDATE_CHECK
|
||||
else:
|
||||
ui_settings["update_checking"] = {
|
||||
"backend_setting": settings.ENABLE_UPDATE_CHECK,
|
||||
}
|
||||
return Response(
|
||||
{
|
||||
"user_id": user.id,
|
||||
"username": user.username,
|
||||
"display_name": displayname,
|
||||
"settings": settings,
|
||||
"settings": ui_settings,
|
||||
},
|
||||
)
|
||||
|
||||
@@ -810,8 +886,9 @@ class TasksViewSet(ReadOnlyModelViewSet):
|
||||
queryset = (
|
||||
PaperlessTask.objects.filter(
|
||||
acknowledged=False,
|
||||
attempted_task__isnull=False,
|
||||
)
|
||||
.order_by("created")
|
||||
.order_by("attempted_task__date_created")
|
||||
.reverse()
|
||||
)
|
||||
|
||||
|
@@ -5,15 +5,15 @@ msgstr ""
|
||||
"POT-Creation-Date: 2022-07-08 14:11-0700\n"
|
||||
"PO-Revision-Date: 2022-07-08 22:07\n"
|
||||
"Last-Translator: \n"
|
||||
"Language-Team: Arabic, Saudi Arabia\n"
|
||||
"Language: ar_SA\n"
|
||||
"Language-Team: Arabic, Arabic\n"
|
||||
"Language: ar_AR\n"
|
||||
"MIME-Version: 1.0\n"
|
||||
"Content-Type: text/plain; charset=UTF-8\n"
|
||||
"Content-Transfer-Encoding: 8bit\n"
|
||||
"Plural-Forms: nplurals=6; plural=(n==0 ? 0 : n==1 ? 1 : n==2 ? 2 : n%100>=3 && n%100<=10 ? 3 : n%100>=11 && n%100<=99 ? 4 : 5);\n"
|
||||
"X-Crowdin-Project: paperless-ngx\n"
|
||||
"X-Crowdin-Project-ID: 500308\n"
|
||||
"X-Crowdin-Language: ar-SA\n"
|
||||
"X-Crowdin-Language: ar-AR\n"
|
||||
"X-Crowdin-File: /dev/src/locale/en_US/LC_MESSAGES/django.po\n"
|
||||
"X-Crowdin-File-ID: 14\n"
|
||||
|
@@ -3,7 +3,7 @@ msgstr ""
|
||||
"Project-Id-Version: paperless-ngx\n"
|
||||
"Report-Msgid-Bugs-To: \n"
|
||||
"POT-Creation-Date: 2022-07-08 14:11-0700\n"
|
||||
"PO-Revision-Date: 2022-07-08 22:07\n"
|
||||
"PO-Revision-Date: 2022-07-29 20:44\n"
|
||||
"Last-Translator: \n"
|
||||
"Language-Team: Belarusian\n"
|
||||
"Language: be_BY\n"
|
||||
@@ -100,7 +100,7 @@ msgstr "тыпы дакументаў"
|
||||
|
||||
#: documents/models.py:90
|
||||
msgid "path"
|
||||
msgstr ""
|
||||
msgstr "шлях"
|
||||
|
||||
#: documents/models.py:96 documents/models.py:124
|
||||
msgid "storage path"
|
||||
|
@@ -3,7 +3,7 @@ msgstr ""
|
||||
"Project-Id-Version: paperless-ngx\n"
|
||||
"Report-Msgid-Bugs-To: \n"
|
||||
"POT-Creation-Date: 2022-07-08 14:11-0700\n"
|
||||
"PO-Revision-Date: 2022-07-08 22:07\n"
|
||||
"PO-Revision-Date: 2022-09-04 11:44\n"
|
||||
"Last-Translator: \n"
|
||||
"Language-Team: German\n"
|
||||
"Language: de_DE\n"
|
||||
@@ -376,7 +376,7 @@ msgstr "Filterregeln"
|
||||
|
||||
#: documents/models.py:521
|
||||
msgid "started"
|
||||
msgstr ""
|
||||
msgstr "gestartet"
|
||||
|
||||
#: documents/serialisers.py:70
|
||||
#, python-format
|
||||
@@ -402,7 +402,7 @@ msgstr "Paperless-ngx wird geladen..."
|
||||
|
||||
#: documents/templates/index.html:79
|
||||
msgid "Still here?! Hmm, something might be wrong."
|
||||
msgstr "Du bist noch hier?! Hmm, da muss wohl etwas schief gelaufen sein."
|
||||
msgstr "Du bist noch hier? Hmm, da muss wohl etwas schiefgelaufen sein."
|
||||
|
||||
#: documents/templates/index.html:79
|
||||
msgid "Here's a link to the docs."
|
||||
@@ -654,7 +654,7 @@ msgstr "Als wichtig markieren, markierte E-Mails nicht verarbeiten"
|
||||
|
||||
#: paperless_mail/models.py:68
|
||||
msgid "Tag the mail with specified tag, don't process tagged mails"
|
||||
msgstr ""
|
||||
msgstr "Markiere die Mail mit dem angegebenen Tag, verarbeite nicht markierte Mails"
|
||||
|
||||
#: paperless_mail/models.py:71
|
||||
msgid "Use subject as title"
|
||||
|
@@ -3,7 +3,7 @@ msgstr ""
|
||||
"Project-Id-Version: paperless-ngx\n"
|
||||
"Report-Msgid-Bugs-To: \n"
|
||||
"POT-Creation-Date: 2022-07-08 14:11-0700\n"
|
||||
"PO-Revision-Date: 2022-07-08 22:07\n"
|
||||
"PO-Revision-Date: 2022-09-06 20:21\n"
|
||||
"Last-Translator: \n"
|
||||
"Language-Team: Finnish\n"
|
||||
"Language: fi_FI\n"
|
||||
@@ -376,7 +376,7 @@ msgstr "suodatussäännöt"
|
||||
|
||||
#: documents/models.py:521
|
||||
msgid "started"
|
||||
msgstr ""
|
||||
msgstr "aloitettu"
|
||||
|
||||
#: documents/serialisers.py:70
|
||||
#, python-format
|
||||
@@ -638,11 +638,11 @@ msgstr "Prosessoi kaikki tiedostot, sisältäen \"inline\"-liitteet."
|
||||
|
||||
#: paperless_mail/models.py:64
|
||||
msgid "Delete"
|
||||
msgstr ""
|
||||
msgstr "Poista"
|
||||
|
||||
#: paperless_mail/models.py:65
|
||||
msgid "Move to specified folder"
|
||||
msgstr ""
|
||||
msgstr "Siirrä määritettyyn kansioon"
|
||||
|
||||
#: paperless_mail/models.py:66
|
||||
msgid "Mark as read, don't process read mails"
|
||||
@@ -650,117 +650,117 @@ msgstr "Merkitse luetuksi, älä prosessoi luettuja sähköposteja"
|
||||
|
||||
#: paperless_mail/models.py:67
|
||||
msgid "Flag the mail, don't process flagged mails"
|
||||
msgstr ""
|
||||
msgstr "Liputa sähköposti, älä käsittele liputettuja sähköposteja"
|
||||
|
||||
#: paperless_mail/models.py:68
|
||||
msgid "Tag the mail with specified tag, don't process tagged mails"
|
||||
msgstr ""
|
||||
msgstr "Merkitse viesti määrätyllä tagilla, älä käsittele tageja"
|
||||
|
||||
#: paperless_mail/models.py:71
|
||||
msgid "Use subject as title"
|
||||
msgstr ""
|
||||
msgstr "Käytä aihetta otsikkona"
|
||||
|
||||
#: paperless_mail/models.py:72
|
||||
msgid "Use attachment filename as title"
|
||||
msgstr ""
|
||||
msgstr "Käytä liitteen tiedostonimeä otsikkona"
|
||||
|
||||
#: paperless_mail/models.py:75
|
||||
msgid "Do not assign a correspondent"
|
||||
msgstr ""
|
||||
msgstr "Älä määritä yhteyshenkilöä"
|
||||
|
||||
#: paperless_mail/models.py:76
|
||||
msgid "Use mail address"
|
||||
msgstr ""
|
||||
msgstr "Käytä sähköpostiosoitetta"
|
||||
|
||||
#: paperless_mail/models.py:77
|
||||
msgid "Use name (or mail address if not available)"
|
||||
msgstr ""
|
||||
msgstr "Käytä nimeä (tai sähköpostiosoitetta, jos ei ole saatavilla)"
|
||||
|
||||
#: paperless_mail/models.py:78
|
||||
msgid "Use correspondent selected below"
|
||||
msgstr ""
|
||||
msgstr "Käytä alla valittua yhteyshenkilöä"
|
||||
|
||||
#: paperless_mail/models.py:82
|
||||
msgid "order"
|
||||
msgstr ""
|
||||
msgstr "järjestys"
|
||||
|
||||
#: paperless_mail/models.py:88
|
||||
msgid "account"
|
||||
msgstr ""
|
||||
msgstr "tili"
|
||||
|
||||
#: paperless_mail/models.py:92
|
||||
msgid "folder"
|
||||
msgstr ""
|
||||
msgstr "kansio"
|
||||
|
||||
#: paperless_mail/models.py:96
|
||||
msgid "Subfolders must be separated by a delimiter, often a dot ('.') or slash ('/'), but it varies by mail server."
|
||||
msgstr ""
|
||||
msgstr "Alikansiot on erotettava erottimella, usein pisteellä ('.') tai kauttaviivalla ('/'), mutta se vaihtelee postipalvelimen mukaan."
|
||||
|
||||
#: paperless_mail/models.py:102
|
||||
msgid "filter from"
|
||||
msgstr ""
|
||||
msgstr "suodata lähettäjä-kenttä"
|
||||
|
||||
#: paperless_mail/models.py:108
|
||||
msgid "filter subject"
|
||||
msgstr ""
|
||||
msgstr "suodata aihe"
|
||||
|
||||
#: paperless_mail/models.py:114
|
||||
msgid "filter body"
|
||||
msgstr ""
|
||||
msgstr "suodata runko"
|
||||
|
||||
#: paperless_mail/models.py:121
|
||||
msgid "filter attachment filename"
|
||||
msgstr ""
|
||||
msgstr "suodata liitteen tiedostonimi"
|
||||
|
||||
#: paperless_mail/models.py:126
|
||||
msgid "Only consume documents which entirely match this filename if specified. Wildcards such as *.pdf or *invoice* are allowed. Case insensitive."
|
||||
msgstr ""
|
||||
msgstr "Tuo vain dokumentit jotka täsmäävät täysin tiedostonimen suhteen. Jokerimerkit kuten *.pdf tai *lasku* ovat sallittuja. Kirjainkoko ei merkitse."
|
||||
|
||||
#: paperless_mail/models.py:133
|
||||
msgid "maximum age"
|
||||
msgstr ""
|
||||
msgstr "ikä enintään"
|
||||
|
||||
#: paperless_mail/models.py:135
|
||||
msgid "Specified in days."
|
||||
msgstr ""
|
||||
msgstr "Määritetty päivinä."
|
||||
|
||||
#: paperless_mail/models.py:139
|
||||
msgid "attachment type"
|
||||
msgstr ""
|
||||
msgstr "liitteen tyyppi"
|
||||
|
||||
#: paperless_mail/models.py:143
|
||||
msgid "Inline attachments include embedded images, so it's best to combine this option with a filename filter."
|
||||
msgstr ""
|
||||
msgstr "Sisäiset liitteet sisältävät upotettuja kuvia, joten on parasta yhdistää tämä vaihtoehto tiedostonimen suodattimeen."
|
||||
|
||||
#: paperless_mail/models.py:149
|
||||
msgid "action"
|
||||
msgstr ""
|
||||
msgstr "toiminto"
|
||||
|
||||
#: paperless_mail/models.py:155
|
||||
msgid "action parameter"
|
||||
msgstr ""
|
||||
msgstr "toiminnon parametrit"
|
||||
|
||||
#: paperless_mail/models.py:160
|
||||
msgid "Additional parameter for the action selected above, i.e., the target folder of the move to folder action. Subfolders must be separated by dots."
|
||||
msgstr ""
|
||||
msgstr "Yllä valitun toiminnon lisäparametri eli siirrä hakemistoon -toiminnon kohdehakemisto. Alikansiot on erotettava toisistaan pisteillä."
|
||||
|
||||
#: paperless_mail/models.py:168
|
||||
msgid "assign title from"
|
||||
msgstr ""
|
||||
msgstr "aseta otsikko kohteesta"
|
||||
|
||||
#: paperless_mail/models.py:176
|
||||
msgid "assign this tag"
|
||||
msgstr ""
|
||||
msgstr "määritä tämä tunniste"
|
||||
|
||||
#: paperless_mail/models.py:184
|
||||
msgid "assign this document type"
|
||||
msgstr ""
|
||||
msgstr "määritä tämä asiakirjatyyppi"
|
||||
|
||||
#: paperless_mail/models.py:188
|
||||
msgid "assign correspondent from"
|
||||
msgstr ""
|
||||
msgstr "määritä kirjeenvaihtaja kohteesta"
|
||||
|
||||
#: paperless_mail/models.py:198
|
||||
msgid "assign this correspondent"
|
||||
msgstr ""
|
||||
msgstr "määritä tämä kirjeenvaihtaja"
|
||||
|
||||
|
@@ -3,7 +3,7 @@ msgstr ""
|
||||
"Project-Id-Version: paperless-ngx\n"
|
||||
"Report-Msgid-Bugs-To: \n"
|
||||
"POT-Creation-Date: 2022-07-08 14:11-0700\n"
|
||||
"PO-Revision-Date: 2022-07-08 22:07\n"
|
||||
"PO-Revision-Date: 2022-09-07 21:41\n"
|
||||
"Last-Translator: \n"
|
||||
"Language-Team: French\n"
|
||||
"Language: fr_FR\n"
|
||||
@@ -100,15 +100,15 @@ msgstr "types de document"
|
||||
|
||||
#: documents/models.py:90
|
||||
msgid "path"
|
||||
msgstr ""
|
||||
msgstr "chemin"
|
||||
|
||||
#: documents/models.py:96 documents/models.py:124
|
||||
msgid "storage path"
|
||||
msgstr ""
|
||||
msgstr "chemin de stockage"
|
||||
|
||||
#: documents/models.py:97
|
||||
msgid "storage paths"
|
||||
msgstr ""
|
||||
msgstr "chemins de stockage"
|
||||
|
||||
#: documents/models.py:105
|
||||
msgid "Unencrypted"
|
||||
@@ -376,7 +376,7 @@ msgstr "règles de filtrage"
|
||||
|
||||
#: documents/models.py:521
|
||||
msgid "started"
|
||||
msgstr ""
|
||||
msgstr "démarré"
|
||||
|
||||
#: documents/serialisers.py:70
|
||||
#, python-format
|
||||
@@ -394,7 +394,7 @@ msgstr "Type de fichier %(type)s non pris en charge"
|
||||
|
||||
#: documents/serialisers.py:596
|
||||
msgid "Invalid variable detected."
|
||||
msgstr ""
|
||||
msgstr "Variable non valide détectée."
|
||||
|
||||
#: documents/templates/index.html:78
|
||||
msgid "Paperless-ngx is loading..."
|
||||
@@ -402,11 +402,11 @@ msgstr "Paperless-ngx est en cours de chargement..."
|
||||
|
||||
#: documents/templates/index.html:79
|
||||
msgid "Still here?! Hmm, something might be wrong."
|
||||
msgstr ""
|
||||
msgstr "Toujours ici ? Hum, quelque chose a dû mal se passer."
|
||||
|
||||
#: documents/templates/index.html:79
|
||||
msgid "Here's a link to the docs."
|
||||
msgstr ""
|
||||
msgstr "Lien vers la documentation."
|
||||
|
||||
#: documents/templates/registration/logged_out.html:14
|
||||
msgid "Paperless-ngx signed out"
|
||||
@@ -450,7 +450,7 @@ msgstr "Anglais (US)"
|
||||
|
||||
#: paperless/settings.py:340
|
||||
msgid "Belarusian"
|
||||
msgstr ""
|
||||
msgstr "Biélorusse"
|
||||
|
||||
#: paperless/settings.py:341
|
||||
msgid "Czech"
|
||||
@@ -510,11 +510,11 @@ msgstr "Russe"
|
||||
|
||||
#: paperless/settings.py:355
|
||||
msgid "Slovenian"
|
||||
msgstr ""
|
||||
msgstr "Slovène"
|
||||
|
||||
#: paperless/settings.py:356
|
||||
msgid "Serbian"
|
||||
msgstr ""
|
||||
msgstr "Serbe"
|
||||
|
||||
#: paperless/settings.py:357
|
||||
msgid "Swedish"
|
||||
@@ -522,11 +522,11 @@ msgstr "Suédois"
|
||||
|
||||
#: paperless/settings.py:358
|
||||
msgid "Turkish"
|
||||
msgstr ""
|
||||
msgstr "Turc"
|
||||
|
||||
#: paperless/settings.py:359
|
||||
msgid "Chinese Simplified"
|
||||
msgstr ""
|
||||
msgstr "Chinois simplifié"
|
||||
|
||||
#: paperless/urls.py:161
|
||||
msgid "Paperless-ngx administration"
|
||||
@@ -654,7 +654,7 @@ msgstr "Marquer le courriel, ne pas traiter les courriels marqués"
|
||||
|
||||
#: paperless_mail/models.py:68
|
||||
msgid "Tag the mail with specified tag, don't process tagged mails"
|
||||
msgstr ""
|
||||
msgstr "Affecter l’étiquette spécifée au courrier, ne pas traiter les courriels étiquetés"
|
||||
|
||||
#: paperless_mail/models.py:71
|
||||
msgid "Use subject as title"
|
||||
@@ -694,7 +694,7 @@ msgstr "répertoire"
|
||||
|
||||
#: paperless_mail/models.py:96
|
||||
msgid "Subfolders must be separated by a delimiter, often a dot ('.') or slash ('/'), but it varies by mail server."
|
||||
msgstr ""
|
||||
msgstr "Les sous-dossiers doivent être séparés par un délimiteurs, souvent un point ('.') ou un slash ('/'), en fonction du serveur de messagerie."
|
||||
|
||||
#: paperless_mail/models.py:102
|
||||
msgid "filter from"
|
||||
|
@@ -3,7 +3,7 @@ msgstr ""
|
||||
"Project-Id-Version: paperless-ngx\n"
|
||||
"Report-Msgid-Bugs-To: \n"
|
||||
"POT-Creation-Date: 2022-07-08 14:11-0700\n"
|
||||
"PO-Revision-Date: 2022-07-08 22:07\n"
|
||||
"PO-Revision-Date: 2022-08-03 11:24\n"
|
||||
"Last-Translator: \n"
|
||||
"Language-Team: Italian\n"
|
||||
"Language: it_IT\n"
|
||||
@@ -376,7 +376,7 @@ msgstr "regole filtro"
|
||||
|
||||
#: documents/models.py:521
|
||||
msgid "started"
|
||||
msgstr ""
|
||||
msgstr "avviato"
|
||||
|
||||
#: documents/serialisers.py:70
|
||||
#, python-format
|
||||
@@ -654,7 +654,7 @@ msgstr "Contrassegna la email, non elaborare le email elaborate."
|
||||
|
||||
#: paperless_mail/models.py:68
|
||||
msgid "Tag the mail with specified tag, don't process tagged mails"
|
||||
msgstr ""
|
||||
msgstr "Etichetta la posta con il tag specificato, non processare le email etichettate"
|
||||
|
||||
#: paperless_mail/models.py:71
|
||||
msgid "Use subject as title"
|
||||
|
@@ -3,7 +3,7 @@ msgstr ""
|
||||
"Project-Id-Version: paperless-ngx\n"
|
||||
"Report-Msgid-Bugs-To: \n"
|
||||
"POT-Creation-Date: 2022-07-08 14:11-0700\n"
|
||||
"PO-Revision-Date: 2022-07-08 22:07\n"
|
||||
"PO-Revision-Date: 2022-08-26 20:54\n"
|
||||
"Last-Translator: \n"
|
||||
"Language-Team: Dutch\n"
|
||||
"Language: nl_NL\n"
|
||||
@@ -100,15 +100,15 @@ msgstr "documenttypen"
|
||||
|
||||
#: documents/models.py:90
|
||||
msgid "path"
|
||||
msgstr ""
|
||||
msgstr "pad"
|
||||
|
||||
#: documents/models.py:96 documents/models.py:124
|
||||
msgid "storage path"
|
||||
msgstr ""
|
||||
msgstr "opslag pad"
|
||||
|
||||
#: documents/models.py:97
|
||||
msgid "storage paths"
|
||||
msgstr ""
|
||||
msgstr "opslag paden"
|
||||
|
||||
#: documents/models.py:105
|
||||
msgid "Unencrypted"
|
||||
@@ -376,7 +376,7 @@ msgstr "filterregels"
|
||||
|
||||
#: documents/models.py:521
|
||||
msgid "started"
|
||||
msgstr ""
|
||||
msgstr "gestart"
|
||||
|
||||
#: documents/serialisers.py:70
|
||||
#, python-format
|
||||
@@ -394,7 +394,7 @@ msgstr "Bestandstype %(type)s niet ondersteund"
|
||||
|
||||
#: documents/serialisers.py:596
|
||||
msgid "Invalid variable detected."
|
||||
msgstr ""
|
||||
msgstr "Ongeldige variabele ontdekt."
|
||||
|
||||
#: documents/templates/index.html:78
|
||||
msgid "Paperless-ngx is loading..."
|
||||
@@ -402,7 +402,7 @@ msgstr "Paperless-ngx is aan het laden..."
|
||||
|
||||
#: documents/templates/index.html:79
|
||||
msgid "Still here?! Hmm, something might be wrong."
|
||||
msgstr ""
|
||||
msgstr "Nog steeds hier?! Hmm, er kan iets mis zijn."
|
||||
|
||||
#: documents/templates/index.html:79
|
||||
msgid "Here's a link to the docs."
|
||||
@@ -450,7 +450,7 @@ msgstr "Engels (US)"
|
||||
|
||||
#: paperless/settings.py:340
|
||||
msgid "Belarusian"
|
||||
msgstr ""
|
||||
msgstr "Wit-Russisch"
|
||||
|
||||
#: paperless/settings.py:341
|
||||
msgid "Czech"
|
||||
@@ -510,11 +510,11 @@ msgstr "Russisch"
|
||||
|
||||
#: paperless/settings.py:355
|
||||
msgid "Slovenian"
|
||||
msgstr ""
|
||||
msgstr "Sloveens"
|
||||
|
||||
#: paperless/settings.py:356
|
||||
msgid "Serbian"
|
||||
msgstr ""
|
||||
msgstr "Servisch"
|
||||
|
||||
#: paperless/settings.py:357
|
||||
msgid "Swedish"
|
||||
@@ -522,11 +522,11 @@ msgstr "Zweeds"
|
||||
|
||||
#: paperless/settings.py:358
|
||||
msgid "Turkish"
|
||||
msgstr ""
|
||||
msgstr "Turks"
|
||||
|
||||
#: paperless/settings.py:359
|
||||
msgid "Chinese Simplified"
|
||||
msgstr ""
|
||||
msgstr "Chinees (vereenvoudigd)"
|
||||
|
||||
#: paperless/urls.py:161
|
||||
msgid "Paperless-ngx administration"
|
||||
@@ -654,7 +654,7 @@ msgstr "Markeer de mail, verwerk geen mails met markering"
|
||||
|
||||
#: paperless_mail/models.py:68
|
||||
msgid "Tag the mail with specified tag, don't process tagged mails"
|
||||
msgstr ""
|
||||
msgstr "Tag de mail met de opgegeven tag, verwerk geen getagde mails"
|
||||
|
||||
#: paperless_mail/models.py:71
|
||||
msgid "Use subject as title"
|
||||
@@ -694,7 +694,7 @@ msgstr "map"
|
||||
|
||||
#: paperless_mail/models.py:96
|
||||
msgid "Subfolders must be separated by a delimiter, often a dot ('.') or slash ('/'), but it varies by mail server."
|
||||
msgstr ""
|
||||
msgstr "Submappen moeten gescheiden worden door een scheidingsteken, vaak een punt ('.') of slash ('/'), maar het varieert per mailserver."
|
||||
|
||||
#: paperless_mail/models.py:102
|
||||
msgid "filter from"
|
||||
|
@@ -3,7 +3,7 @@ msgstr ""
|
||||
"Project-Id-Version: paperless-ngx\n"
|
||||
"Report-Msgid-Bugs-To: \n"
|
||||
"POT-Creation-Date: 2022-07-08 14:11-0700\n"
|
||||
"PO-Revision-Date: 2022-07-08 22:07\n"
|
||||
"PO-Revision-Date: 2022-08-03 08:59\n"
|
||||
"Last-Translator: \n"
|
||||
"Language-Team: Norwegian\n"
|
||||
"Language: no_NO\n"
|
||||
@@ -220,7 +220,7 @@ msgstr "kritisk"
|
||||
|
||||
#: documents/models.py:325
|
||||
msgid "group"
|
||||
msgstr ""
|
||||
msgstr "gruppe"
|
||||
|
||||
#: documents/models.py:327
|
||||
msgid "message"
|
||||
@@ -228,11 +228,11 @@ msgstr "melding"
|
||||
|
||||
#: documents/models.py:330
|
||||
msgid "level"
|
||||
msgstr ""
|
||||
msgstr "nivå"
|
||||
|
||||
#: documents/models.py:339
|
||||
msgid "log"
|
||||
msgstr "log"
|
||||
msgstr "Logg"
|
||||
|
||||
#: documents/models.py:340
|
||||
msgid "logs"
|
||||
@@ -240,11 +240,11 @@ msgstr "logger"
|
||||
|
||||
#: documents/models.py:350 documents/models.py:403
|
||||
msgid "saved view"
|
||||
msgstr ""
|
||||
msgstr "lagret visning"
|
||||
|
||||
#: documents/models.py:351
|
||||
msgid "saved views"
|
||||
msgstr ""
|
||||
msgstr "lagrede visninger"
|
||||
|
||||
#: documents/models.py:353
|
||||
msgid "user"
|
||||
@@ -252,35 +252,35 @@ msgstr "bruker"
|
||||
|
||||
#: documents/models.py:357
|
||||
msgid "show on dashboard"
|
||||
msgstr ""
|
||||
msgstr "vis på dashbordet"
|
||||
|
||||
#: documents/models.py:360
|
||||
msgid "show in sidebar"
|
||||
msgstr ""
|
||||
msgstr "vis i sidestolpen"
|
||||
|
||||
#: documents/models.py:364
|
||||
msgid "sort field"
|
||||
msgstr ""
|
||||
msgstr "sorter felt"
|
||||
|
||||
#: documents/models.py:369
|
||||
msgid "sort reverse"
|
||||
msgstr ""
|
||||
msgstr "sorter på baksiden"
|
||||
|
||||
#: documents/models.py:374
|
||||
msgid "title contains"
|
||||
msgstr ""
|
||||
msgstr "tittelen inneholder"
|
||||
|
||||
#: documents/models.py:375
|
||||
msgid "content contains"
|
||||
msgstr ""
|
||||
msgstr "innholdet inneholder"
|
||||
|
||||
#: documents/models.py:376
|
||||
msgid "ASN is"
|
||||
msgstr ""
|
||||
msgstr "ASN er"
|
||||
|
||||
#: documents/models.py:377
|
||||
msgid "correspondent is"
|
||||
msgstr ""
|
||||
msgstr "tilsvarendet er"
|
||||
|
||||
#: documents/models.py:378
|
||||
msgid "document type is"
|
||||
@@ -288,15 +288,15 @@ msgstr "dokumenttype er"
|
||||
|
||||
#: documents/models.py:379
|
||||
msgid "is in inbox"
|
||||
msgstr ""
|
||||
msgstr "er i innboksen"
|
||||
|
||||
#: documents/models.py:380
|
||||
msgid "has tag"
|
||||
msgstr ""
|
||||
msgstr "har tagg"
|
||||
|
||||
#: documents/models.py:381
|
||||
msgid "has any tag"
|
||||
msgstr ""
|
||||
msgstr "har en tag"
|
||||
|
||||
#: documents/models.py:382
|
||||
msgid "created before"
|
||||
@@ -304,125 +304,125 @@ msgstr "opprettet før"
|
||||
|
||||
#: documents/models.py:383
|
||||
msgid "created after"
|
||||
msgstr ""
|
||||
msgstr "opprettet etter"
|
||||
|
||||
#: documents/models.py:384
|
||||
msgid "created year is"
|
||||
msgstr ""
|
||||
msgstr "opprettet år er"
|
||||
|
||||
#: documents/models.py:385
|
||||
msgid "created month is"
|
||||
msgstr ""
|
||||
msgstr "opprettet måned er"
|
||||
|
||||
#: documents/models.py:386
|
||||
msgid "created day is"
|
||||
msgstr ""
|
||||
msgstr "opprettet dag er"
|
||||
|
||||
#: documents/models.py:387
|
||||
msgid "added before"
|
||||
msgstr ""
|
||||
msgstr "lagt til før"
|
||||
|
||||
#: documents/models.py:388
|
||||
msgid "added after"
|
||||
msgstr ""
|
||||
msgstr "lagt til etter"
|
||||
|
||||
#: documents/models.py:389
|
||||
msgid "modified before"
|
||||
msgstr ""
|
||||
msgstr "endret før"
|
||||
|
||||
#: documents/models.py:390
|
||||
msgid "modified after"
|
||||
msgstr ""
|
||||
msgstr "endret etter"
|
||||
|
||||
#: documents/models.py:391
|
||||
msgid "does not have tag"
|
||||
msgstr ""
|
||||
msgstr "har ikke tagg"
|
||||
|
||||
#: documents/models.py:392
|
||||
msgid "does not have ASN"
|
||||
msgstr ""
|
||||
msgstr "har ikke ASN"
|
||||
|
||||
#: documents/models.py:393
|
||||
msgid "title or content contains"
|
||||
msgstr ""
|
||||
msgstr "tittel eller innhold inneholder"
|
||||
|
||||
#: documents/models.py:394
|
||||
msgid "fulltext query"
|
||||
msgstr ""
|
||||
msgstr "full tekst spørring"
|
||||
|
||||
#: documents/models.py:395
|
||||
msgid "more like this"
|
||||
msgstr ""
|
||||
msgstr "mer som dette"
|
||||
|
||||
#: documents/models.py:396
|
||||
msgid "has tags in"
|
||||
msgstr ""
|
||||
msgstr "har tags i"
|
||||
|
||||
#: documents/models.py:406
|
||||
msgid "rule type"
|
||||
msgstr ""
|
||||
msgstr "Type regel"
|
||||
|
||||
#: documents/models.py:408
|
||||
msgid "value"
|
||||
msgstr ""
|
||||
msgstr "verdi"
|
||||
|
||||
#: documents/models.py:411
|
||||
msgid "filter rule"
|
||||
msgstr ""
|
||||
msgstr "filtrer regel"
|
||||
|
||||
#: documents/models.py:412
|
||||
msgid "filter rules"
|
||||
msgstr ""
|
||||
msgstr "filtrer regler"
|
||||
|
||||
#: documents/models.py:521
|
||||
msgid "started"
|
||||
msgstr ""
|
||||
msgstr "startet"
|
||||
|
||||
#: documents/serialisers.py:70
|
||||
#, python-format
|
||||
msgid "Invalid regular expression: %(error)s"
|
||||
msgstr ""
|
||||
msgstr "Ugyldig regulært uttrykk: %(error)s"
|
||||
|
||||
#: documents/serialisers.py:191
|
||||
msgid "Invalid color."
|
||||
msgstr ""
|
||||
msgstr "Ugyldig farge."
|
||||
|
||||
#: documents/serialisers.py:515
|
||||
#, python-format
|
||||
msgid "File type %(type)s not supported"
|
||||
msgstr ""
|
||||
msgstr "Filtype %(type)s støttes ikke"
|
||||
|
||||
#: documents/serialisers.py:596
|
||||
msgid "Invalid variable detected."
|
||||
msgstr ""
|
||||
msgstr "Ugyldig variabel oppdaget."
|
||||
|
||||
#: documents/templates/index.html:78
|
||||
msgid "Paperless-ngx is loading..."
|
||||
msgstr ""
|
||||
msgstr "Paperless-ngx laster..."
|
||||
|
||||
#: documents/templates/index.html:79
|
||||
msgid "Still here?! Hmm, something might be wrong."
|
||||
msgstr ""
|
||||
msgstr "Fortsatt her?! Hmm, noe kan være galt."
|
||||
|
||||
#: documents/templates/index.html:79
|
||||
msgid "Here's a link to the docs."
|
||||
msgstr ""
|
||||
msgstr "Her er en lenke til dokkene."
|
||||
|
||||
#: documents/templates/registration/logged_out.html:14
|
||||
msgid "Paperless-ngx signed out"
|
||||
msgstr ""
|
||||
msgstr "Paperless-ngx logget ut"
|
||||
|
||||
#: documents/templates/registration/logged_out.html:59
|
||||
msgid "You have been successfully logged out. Bye!"
|
||||
msgstr ""
|
||||
msgstr "Du har blitt logget ut. Av!"
|
||||
|
||||
#: documents/templates/registration/logged_out.html:60
|
||||
msgid "Sign in again"
|
||||
msgstr ""
|
||||
msgstr "Logg inn igjen"
|
||||
|
||||
#: documents/templates/registration/login.html:15
|
||||
msgid "Paperless-ngx sign in"
|
||||
msgstr ""
|
||||
msgstr "Paperless-ngx-tegn inn"
|
||||
|
||||
#: documents/templates/registration/login.html:61
|
||||
msgid "Please sign in."
|
||||
@@ -450,63 +450,63 @@ msgstr "Engelsk (US)"
|
||||
|
||||
#: paperless/settings.py:340
|
||||
msgid "Belarusian"
|
||||
msgstr "Belarusian"
|
||||
msgstr "Hviterussisk"
|
||||
|
||||
#: paperless/settings.py:341
|
||||
msgid "Czech"
|
||||
msgstr "Czech"
|
||||
msgstr "Tsjekkisk"
|
||||
|
||||
#: paperless/settings.py:342
|
||||
msgid "Danish"
|
||||
msgstr "Danish"
|
||||
msgstr "Dansk"
|
||||
|
||||
#: paperless/settings.py:343
|
||||
msgid "German"
|
||||
msgstr "German"
|
||||
msgstr "Tysk"
|
||||
|
||||
#: paperless/settings.py:344
|
||||
msgid "English (GB)"
|
||||
msgstr "English (GB)"
|
||||
msgstr "Engelsk (GB)"
|
||||
|
||||
#: paperless/settings.py:345
|
||||
msgid "Spanish"
|
||||
msgstr "Spanish"
|
||||
msgstr "Spansk"
|
||||
|
||||
#: paperless/settings.py:346
|
||||
msgid "French"
|
||||
msgstr "French"
|
||||
msgstr "Fransk"
|
||||
|
||||
#: paperless/settings.py:347
|
||||
msgid "Italian"
|
||||
msgstr "Italian"
|
||||
msgstr "Italiensk"
|
||||
|
||||
#: paperless/settings.py:348
|
||||
msgid "Luxembourgish"
|
||||
msgstr "Luxembourgish"
|
||||
msgstr "Luxembourgsk"
|
||||
|
||||
#: paperless/settings.py:349
|
||||
msgid "Dutch"
|
||||
msgstr "Dutch"
|
||||
msgstr "Nederlandsk"
|
||||
|
||||
#: paperless/settings.py:350
|
||||
msgid "Polish"
|
||||
msgstr "Polish"
|
||||
msgstr "Polsk"
|
||||
|
||||
#: paperless/settings.py:351
|
||||
msgid "Portuguese (Brazil)"
|
||||
msgstr "Portuguese (Brazil)"
|
||||
msgstr "Portugisisk (Brasil)"
|
||||
|
||||
#: paperless/settings.py:352
|
||||
msgid "Portuguese"
|
||||
msgstr "Portuguese"
|
||||
msgstr "Portugisisk"
|
||||
|
||||
#: paperless/settings.py:353
|
||||
msgid "Romanian"
|
||||
msgstr "Romanian"
|
||||
msgstr "Rumensk"
|
||||
|
||||
#: paperless/settings.py:354
|
||||
msgid "Russian"
|
||||
msgstr "Russian"
|
||||
msgstr "Russisk"
|
||||
|
||||
#: paperless/settings.py:355
|
||||
msgid "Slovenian"
|
||||
@@ -514,19 +514,19 @@ msgstr "Slovenian"
|
||||
|
||||
#: paperless/settings.py:356
|
||||
msgid "Serbian"
|
||||
msgstr "Serbian"
|
||||
msgstr "Serbisk"
|
||||
|
||||
#: paperless/settings.py:357
|
||||
msgid "Swedish"
|
||||
msgstr "Swedish"
|
||||
msgstr "Svensk"
|
||||
|
||||
#: paperless/settings.py:358
|
||||
msgid "Turkish"
|
||||
msgstr "Turkish"
|
||||
msgstr "Tyrkisk"
|
||||
|
||||
#: paperless/settings.py:359
|
||||
msgid "Chinese Simplified"
|
||||
msgstr "Chinese Simplified"
|
||||
msgstr "Kinesisk forenklet"
|
||||
|
||||
#: paperless/urls.py:161
|
||||
msgid "Paperless-ngx administration"
|
||||
@@ -542,7 +542,7 @@ msgstr "Avanserte innstillinger"
|
||||
|
||||
#: paperless_mail/admin.py:47
|
||||
msgid "Filter"
|
||||
msgstr "Filter"
|
||||
msgstr "Filtrer"
|
||||
|
||||
#: paperless_mail/admin.py:50
|
||||
msgid "Paperless will only process mails that match ALL of the filters given below."
|
||||
@@ -554,19 +554,19 @@ msgstr "Handlinger"
|
||||
|
||||
#: paperless_mail/admin.py:67
|
||||
msgid "The action applied to the mail. This action is only performed when documents were consumed from the mail. Mails without attachments will remain entirely untouched."
|
||||
msgstr ""
|
||||
msgstr "Handlingen som brukes på e-posten. Denne handlingen blir bare utført når dokumenter blir forbrukt av e-posten. Mailer uten vedlegg forblir helt urørte."
|
||||
|
||||
#: paperless_mail/admin.py:75
|
||||
msgid "Metadata"
|
||||
msgstr "Metadata"
|
||||
msgstr "Nøkkeldata"
|
||||
|
||||
#: paperless_mail/admin.py:78
|
||||
msgid "Assign metadata to documents consumed from this rule automatically. If you do not assign tags, types or correspondents here, paperless will still process all matching rules that you have defined."
|
||||
msgstr ""
|
||||
msgstr "Tilordne metadata til dokumenter som brukes fra denne regelen automatisk. Hvis du ikke tilordner etiketter, typer eller korrespondenter her, vil papirløs fremdeles behandle alle matchende regler som du har definert."
|
||||
|
||||
#: paperless_mail/apps.py:8
|
||||
msgid "Paperless mail"
|
||||
msgstr ""
|
||||
msgstr "Paperløst e-post"
|
||||
|
||||
#: paperless_mail/models.py:8
|
||||
msgid "mail account"
|
||||
@@ -586,23 +586,23 @@ msgstr "Bruk SSL"
|
||||
|
||||
#: paperless_mail/models.py:14
|
||||
msgid "Use STARTTLS"
|
||||
msgstr ""
|
||||
msgstr "Bruk STARTTLS"
|
||||
|
||||
#: paperless_mail/models.py:18
|
||||
msgid "IMAP server"
|
||||
msgstr ""
|
||||
msgstr "IMAP tjener"
|
||||
|
||||
#: paperless_mail/models.py:21
|
||||
msgid "IMAP port"
|
||||
msgstr ""
|
||||
msgstr "IMAP port"
|
||||
|
||||
#: paperless_mail/models.py:25
|
||||
msgid "This is usually 143 for unencrypted and STARTTLS connections, and 993 for SSL connections."
|
||||
msgstr ""
|
||||
msgstr "Dette er vanligvis 143 for ukrypterte og STARTTLS-tilkoblinger, og 993 for SSL-tilkoblinger."
|
||||
|
||||
#: paperless_mail/models.py:31
|
||||
msgid "IMAP security"
|
||||
msgstr ""
|
||||
msgstr "IMAP sikkerhet"
|
||||
|
||||
#: paperless_mail/models.py:36
|
||||
msgid "username"
|
||||
@@ -618,7 +618,7 @@ msgstr "tegnsett"
|
||||
|
||||
#: paperless_mail/models.py:45
|
||||
msgid "The character set to use when communicating with the mail server, such as 'UTF-8' or 'US-ASCII'."
|
||||
msgstr ""
|
||||
msgstr "Tegnet som skal brukes ved kommunikasjon med e-posttjeneren, som for eksempel 'UTF-8' eller 'US-ASCII'."
|
||||
|
||||
#: paperless_mail/models.py:56
|
||||
msgid "mail rule"
|
||||
@@ -626,141 +626,141 @@ msgstr "e-post regel"
|
||||
|
||||
#: paperless_mail/models.py:57
|
||||
msgid "mail rules"
|
||||
msgstr ""
|
||||
msgstr "Epost regler"
|
||||
|
||||
#: paperless_mail/models.py:60
|
||||
msgid "Only process attachments."
|
||||
msgstr ""
|
||||
msgstr "Bare behandle vedlegg."
|
||||
|
||||
#: paperless_mail/models.py:61
|
||||
msgid "Process all files, including 'inline' attachments."
|
||||
msgstr ""
|
||||
msgstr "Behandle alle filer, inkludert \"inline\"-vedlegg."
|
||||
|
||||
#: paperless_mail/models.py:64
|
||||
msgid "Delete"
|
||||
msgstr ""
|
||||
msgstr "Slett"
|
||||
|
||||
#: paperless_mail/models.py:65
|
||||
msgid "Move to specified folder"
|
||||
msgstr ""
|
||||
msgstr "Flytt til angitt mappe"
|
||||
|
||||
#: paperless_mail/models.py:66
|
||||
msgid "Mark as read, don't process read mails"
|
||||
msgstr ""
|
||||
msgstr "Merk som lest og ikke behandle e-post"
|
||||
|
||||
#: paperless_mail/models.py:67
|
||||
msgid "Flag the mail, don't process flagged mails"
|
||||
msgstr ""
|
||||
msgstr "Marker posten, ikke behandle flaggede meldinger"
|
||||
|
||||
#: paperless_mail/models.py:68
|
||||
msgid "Tag the mail with specified tag, don't process tagged mails"
|
||||
msgstr ""
|
||||
msgstr "Merk e-post med angitte tag, ikke bruk merkede meldinger"
|
||||
|
||||
#: paperless_mail/models.py:71
|
||||
msgid "Use subject as title"
|
||||
msgstr ""
|
||||
msgstr "Bruk emne som tittel"
|
||||
|
||||
#: paperless_mail/models.py:72
|
||||
msgid "Use attachment filename as title"
|
||||
msgstr ""
|
||||
msgstr "Bruk vedlagte filnavn som tittel"
|
||||
|
||||
#: paperless_mail/models.py:75
|
||||
msgid "Do not assign a correspondent"
|
||||
msgstr ""
|
||||
msgstr "Ikke tildel en korrespondent"
|
||||
|
||||
#: paperless_mail/models.py:76
|
||||
msgid "Use mail address"
|
||||
msgstr ""
|
||||
msgstr "Bruk e-postadresse"
|
||||
|
||||
#: paperless_mail/models.py:77
|
||||
msgid "Use name (or mail address if not available)"
|
||||
msgstr ""
|
||||
msgstr "Bruk navn (eller e-postadresse hvis det ikke er tilgjengelig)"
|
||||
|
||||
#: paperless_mail/models.py:78
|
||||
msgid "Use correspondent selected below"
|
||||
msgstr ""
|
||||
msgstr "Bruk tilsvarende valgt nedenfor"
|
||||
|
||||
#: paperless_mail/models.py:82
|
||||
msgid "order"
|
||||
msgstr ""
|
||||
msgstr "ordre"
|
||||
|
||||
#: paperless_mail/models.py:88
|
||||
msgid "account"
|
||||
msgstr ""
|
||||
msgstr "konto"
|
||||
|
||||
#: paperless_mail/models.py:92
|
||||
msgid "folder"
|
||||
msgstr ""
|
||||
msgstr "mappe"
|
||||
|
||||
#: paperless_mail/models.py:96
|
||||
msgid "Subfolders must be separated by a delimiter, often a dot ('.') or slash ('/'), but it varies by mail server."
|
||||
msgstr ""
|
||||
msgstr "Undermapper må være atskilt av en skilletegn, ofte en punktum ('.') eller skråstrek ('/'), men den varierer fra e-postserver."
|
||||
|
||||
#: paperless_mail/models.py:102
|
||||
msgid "filter from"
|
||||
msgstr ""
|
||||
msgstr "filtrer fra"
|
||||
|
||||
#: paperless_mail/models.py:108
|
||||
msgid "filter subject"
|
||||
msgstr ""
|
||||
msgstr "filtrer emne"
|
||||
|
||||
#: paperless_mail/models.py:114
|
||||
msgid "filter body"
|
||||
msgstr ""
|
||||
msgstr "filtrer innhold"
|
||||
|
||||
#: paperless_mail/models.py:121
|
||||
msgid "filter attachment filename"
|
||||
msgstr ""
|
||||
msgstr "filtrer vedlagte filnavn"
|
||||
|
||||
#: paperless_mail/models.py:126
|
||||
msgid "Only consume documents which entirely match this filename if specified. Wildcards such as *.pdf or *invoice* are allowed. Case insensitive."
|
||||
msgstr ""
|
||||
msgstr "Bare bruke dokumenter som samsvarer med dette filnavnet hvis angitt. Jokertegn som *.pdf eller *faktura* er tillatt. Saksfortegnet."
|
||||
|
||||
#: paperless_mail/models.py:133
|
||||
msgid "maximum age"
|
||||
msgstr ""
|
||||
msgstr "maksimal alder"
|
||||
|
||||
#: paperless_mail/models.py:135
|
||||
msgid "Specified in days."
|
||||
msgstr ""
|
||||
msgstr "Spesifisert i dager"
|
||||
|
||||
#: paperless_mail/models.py:139
|
||||
msgid "attachment type"
|
||||
msgstr ""
|
||||
msgstr "vedlegg type"
|
||||
|
||||
#: paperless_mail/models.py:143
|
||||
msgid "Inline attachments include embedded images, so it's best to combine this option with a filename filter."
|
||||
msgstr ""
|
||||
msgstr "Innebygde vedlegg inkluderer innebygde bilder, så det er best å kombinere dette alternativet med et filter."
|
||||
|
||||
#: paperless_mail/models.py:149
|
||||
msgid "action"
|
||||
msgstr ""
|
||||
msgstr "handling"
|
||||
|
||||
#: paperless_mail/models.py:155
|
||||
msgid "action parameter"
|
||||
msgstr ""
|
||||
msgstr "parameter for handling"
|
||||
|
||||
#: paperless_mail/models.py:160
|
||||
msgid "Additional parameter for the action selected above, i.e., the target folder of the move to folder action. Subfolders must be separated by dots."
|
||||
msgstr ""
|
||||
msgstr "Ytterligere parameter for handlingen valgt ovenfor, dvs. målmappen for flytting til mappehandling. Undermapper må separeres med punkter."
|
||||
|
||||
#: paperless_mail/models.py:168
|
||||
msgid "assign title from"
|
||||
msgstr ""
|
||||
msgstr "tilordne tittel fra"
|
||||
|
||||
#: paperless_mail/models.py:176
|
||||
msgid "assign this tag"
|
||||
msgstr ""
|
||||
msgstr "tilordne denne taggen"
|
||||
|
||||
#: paperless_mail/models.py:184
|
||||
msgid "assign this document type"
|
||||
msgstr ""
|
||||
msgstr "tilordne denne dokumenttypen"
|
||||
|
||||
#: paperless_mail/models.py:188
|
||||
msgid "assign correspondent from"
|
||||
msgstr ""
|
||||
msgstr "Tildel korrespondent fra"
|
||||
|
||||
#: paperless_mail/models.py:198
|
||||
msgid "assign this correspondent"
|
||||
msgstr ""
|
||||
msgstr "Tildel denne korrespondenten"
|
||||
|
||||
|
@@ -3,7 +3,7 @@ msgstr ""
|
||||
"Project-Id-Version: paperless-ngx\n"
|
||||
"Report-Msgid-Bugs-To: \n"
|
||||
"POT-Creation-Date: 2022-07-08 14:11-0700\n"
|
||||
"PO-Revision-Date: 2022-07-08 22:07\n"
|
||||
"PO-Revision-Date: 2022-08-17 11:20\n"
|
||||
"Last-Translator: \n"
|
||||
"Language-Team: Polish\n"
|
||||
"Language: pl_PL\n"
|
||||
@@ -376,7 +376,7 @@ msgstr "reguły filtrowania"
|
||||
|
||||
#: documents/models.py:521
|
||||
msgid "started"
|
||||
msgstr ""
|
||||
msgstr "start"
|
||||
|
||||
#: documents/serialisers.py:70
|
||||
#, python-format
|
||||
@@ -654,7 +654,7 @@ msgstr "Oznacz wiadomość, nie przetwarzaj oznaczonych wiadomości"
|
||||
|
||||
#: paperless_mail/models.py:68
|
||||
msgid "Tag the mail with specified tag, don't process tagged mails"
|
||||
msgstr ""
|
||||
msgstr "Oznacz pocztę z podanym tagiem, nie przetwarzaj otagowanych wiadomości"
|
||||
|
||||
#: paperless_mail/models.py:71
|
||||
msgid "Use subject as title"
|
||||
|
@@ -3,7 +3,7 @@ msgstr ""
|
||||
"Project-Id-Version: paperless-ngx\n"
|
||||
"Report-Msgid-Bugs-To: \n"
|
||||
"POT-Creation-Date: 2022-07-08 14:11-0700\n"
|
||||
"PO-Revision-Date: 2022-07-08 22:07\n"
|
||||
"PO-Revision-Date: 2022-08-03 16:12\n"
|
||||
"Last-Translator: \n"
|
||||
"Language-Team: Russian\n"
|
||||
"Language: ru_RU\n"
|
||||
@@ -100,15 +100,15 @@ msgstr "типы документов"
|
||||
|
||||
#: documents/models.py:90
|
||||
msgid "path"
|
||||
msgstr ""
|
||||
msgstr "путь"
|
||||
|
||||
#: documents/models.py:96 documents/models.py:124
|
||||
msgid "storage path"
|
||||
msgstr ""
|
||||
msgstr "путь к хранилищу"
|
||||
|
||||
#: documents/models.py:97
|
||||
msgid "storage paths"
|
||||
msgstr ""
|
||||
msgstr "пути хранения"
|
||||
|
||||
#: documents/models.py:105
|
||||
msgid "Unencrypted"
|
||||
@@ -376,7 +376,7 @@ msgstr "правила фильтрации"
|
||||
|
||||
#: documents/models.py:521
|
||||
msgid "started"
|
||||
msgstr ""
|
||||
msgstr "запущено"
|
||||
|
||||
#: documents/serialisers.py:70
|
||||
#, python-format
|
||||
@@ -394,7 +394,7 @@ msgstr "Тип файла %(type)s не поддерживается"
|
||||
|
||||
#: documents/serialisers.py:596
|
||||
msgid "Invalid variable detected."
|
||||
msgstr ""
|
||||
msgstr "Обнаружена неверная переменная."
|
||||
|
||||
#: documents/templates/index.html:78
|
||||
msgid "Paperless-ngx is loading..."
|
||||
@@ -402,11 +402,11 @@ msgstr "Paperless-ngx загружается..."
|
||||
|
||||
#: documents/templates/index.html:79
|
||||
msgid "Still here?! Hmm, something might be wrong."
|
||||
msgstr ""
|
||||
msgstr "Все еще здесь?! Хмм, возможно что-то не так."
|
||||
|
||||
#: documents/templates/index.html:79
|
||||
msgid "Here's a link to the docs."
|
||||
msgstr ""
|
||||
msgstr "Вот ссылка на документацию."
|
||||
|
||||
#: documents/templates/registration/logged_out.html:14
|
||||
msgid "Paperless-ngx signed out"
|
||||
@@ -450,7 +450,7 @@ msgstr "Английский (США)"
|
||||
|
||||
#: paperless/settings.py:340
|
||||
msgid "Belarusian"
|
||||
msgstr ""
|
||||
msgstr "Белорусский"
|
||||
|
||||
#: paperless/settings.py:341
|
||||
msgid "Czech"
|
||||
@@ -510,11 +510,11 @@ msgstr "Русский"
|
||||
|
||||
#: paperless/settings.py:355
|
||||
msgid "Slovenian"
|
||||
msgstr ""
|
||||
msgstr "Словенский"
|
||||
|
||||
#: paperless/settings.py:356
|
||||
msgid "Serbian"
|
||||
msgstr ""
|
||||
msgstr "Сербский"
|
||||
|
||||
#: paperless/settings.py:357
|
||||
msgid "Swedish"
|
||||
@@ -522,11 +522,11 @@ msgstr "Шведский"
|
||||
|
||||
#: paperless/settings.py:358
|
||||
msgid "Turkish"
|
||||
msgstr ""
|
||||
msgstr "Турецкий"
|
||||
|
||||
#: paperless/settings.py:359
|
||||
msgid "Chinese Simplified"
|
||||
msgstr ""
|
||||
msgstr "Китайский упрощенный"
|
||||
|
||||
#: paperless/urls.py:161
|
||||
msgid "Paperless-ngx administration"
|
||||
@@ -654,7 +654,7 @@ msgstr "Пометить почту, не обрабатывать помече
|
||||
|
||||
#: paperless_mail/models.py:68
|
||||
msgid "Tag the mail with specified tag, don't process tagged mails"
|
||||
msgstr ""
|
||||
msgstr "Отметить почту указанным тегом, не обрабатывать помеченные письма"
|
||||
|
||||
#: paperless_mail/models.py:71
|
||||
msgid "Use subject as title"
|
||||
@@ -694,7 +694,7 @@ msgstr "каталог"
|
||||
|
||||
#: paperless_mail/models.py:96
|
||||
msgid "Subfolders must be separated by a delimiter, often a dot ('.') or slash ('/'), but it varies by mail server."
|
||||
msgstr ""
|
||||
msgstr "Подпапки должны быть отделены разделителем, часто точкой ('.') или косой чертой ('/'), но это зависит от почтового сервера."
|
||||
|
||||
#: paperless_mail/models.py:102
|
||||
msgid "filter from"
|
||||
|
@@ -3,7 +3,7 @@ msgstr ""
|
||||
"Project-Id-Version: paperless-ngx\n"
|
||||
"Report-Msgid-Bugs-To: \n"
|
||||
"POT-Creation-Date: 2022-07-08 14:11-0700\n"
|
||||
"PO-Revision-Date: 2022-07-08 22:07\n"
|
||||
"PO-Revision-Date: 2022-08-25 12:46\n"
|
||||
"Last-Translator: \n"
|
||||
"Language-Team: Slovenian\n"
|
||||
"Language: sl_SI\n"
|
||||
@@ -100,15 +100,15 @@ msgstr "vrste dokumentov"
|
||||
|
||||
#: documents/models.py:90
|
||||
msgid "path"
|
||||
msgstr ""
|
||||
msgstr "pot"
|
||||
|
||||
#: documents/models.py:96 documents/models.py:124
|
||||
msgid "storage path"
|
||||
msgstr ""
|
||||
msgstr "pot do shrambe"
|
||||
|
||||
#: documents/models.py:97
|
||||
msgid "storage paths"
|
||||
msgstr ""
|
||||
msgstr "poti do shrambe"
|
||||
|
||||
#: documents/models.py:105
|
||||
msgid "Unencrypted"
|
||||
@@ -376,7 +376,7 @@ msgstr "filtriraj pravila"
|
||||
|
||||
#: documents/models.py:521
|
||||
msgid "started"
|
||||
msgstr ""
|
||||
msgstr "zagnano"
|
||||
|
||||
#: documents/serialisers.py:70
|
||||
#, python-format
|
||||
@@ -394,7 +394,7 @@ msgstr "Vrsta datoteke %(type)s ni podprta"
|
||||
|
||||
#: documents/serialisers.py:596
|
||||
msgid "Invalid variable detected."
|
||||
msgstr ""
|
||||
msgstr "Zaznani neveljavni znaki."
|
||||
|
||||
#: documents/templates/index.html:78
|
||||
msgid "Paperless-ngx is loading..."
|
||||
@@ -402,11 +402,11 @@ msgstr "Paperless-ngx se nalaga..."
|
||||
|
||||
#: documents/templates/index.html:79
|
||||
msgid "Still here?! Hmm, something might be wrong."
|
||||
msgstr ""
|
||||
msgstr "Še vedno tam? Hmm, kot kaže je šlo nekaj narobe."
|
||||
|
||||
#: documents/templates/index.html:79
|
||||
msgid "Here's a link to the docs."
|
||||
msgstr ""
|
||||
msgstr "Tu je povezava do dokumentacije."
|
||||
|
||||
#: documents/templates/registration/logged_out.html:14
|
||||
msgid "Paperless-ngx signed out"
|
||||
@@ -450,7 +450,7 @@ msgstr "Angleščina (ZDA)"
|
||||
|
||||
#: paperless/settings.py:340
|
||||
msgid "Belarusian"
|
||||
msgstr ""
|
||||
msgstr "Beloruščina"
|
||||
|
||||
#: paperless/settings.py:341
|
||||
msgid "Czech"
|
||||
@@ -510,11 +510,11 @@ msgstr "Ruščina"
|
||||
|
||||
#: paperless/settings.py:355
|
||||
msgid "Slovenian"
|
||||
msgstr ""
|
||||
msgstr "Slovenščina"
|
||||
|
||||
#: paperless/settings.py:356
|
||||
msgid "Serbian"
|
||||
msgstr ""
|
||||
msgstr "Srbščina"
|
||||
|
||||
#: paperless/settings.py:357
|
||||
msgid "Swedish"
|
||||
@@ -522,11 +522,11 @@ msgstr "Švedščina"
|
||||
|
||||
#: paperless/settings.py:358
|
||||
msgid "Turkish"
|
||||
msgstr ""
|
||||
msgstr "Turščina"
|
||||
|
||||
#: paperless/settings.py:359
|
||||
msgid "Chinese Simplified"
|
||||
msgstr ""
|
||||
msgstr "Poenostavljena kitajščina"
|
||||
|
||||
#: paperless/urls.py:161
|
||||
msgid "Paperless-ngx administration"
|
||||
@@ -654,7 +654,7 @@ msgstr "Označite pošto z zastavico, ne obdelujte označene pošte"
|
||||
|
||||
#: paperless_mail/models.py:68
|
||||
msgid "Tag the mail with specified tag, don't process tagged mails"
|
||||
msgstr ""
|
||||
msgstr "Označi pošto s določeno oznako, ne procesiraj označene pošte"
|
||||
|
||||
#: paperless_mail/models.py:71
|
||||
msgid "Use subject as title"
|
||||
@@ -694,7 +694,7 @@ msgstr "mapa"
|
||||
|
||||
#: paperless_mail/models.py:96
|
||||
msgid "Subfolders must be separated by a delimiter, often a dot ('.') or slash ('/'), but it varies by mail server."
|
||||
msgstr ""
|
||||
msgstr "Podmape morajo biti ločene s znakom, običajno je to pika (.) ali slash ('/'), je pa odvisno od poštnega strežnika."
|
||||
|
||||
#: paperless_mail/models.py:102
|
||||
msgid "filter from"
|
||||
|
@@ -3,7 +3,7 @@ msgstr ""
|
||||
"Project-Id-Version: paperless-ngx\n"
|
||||
"Report-Msgid-Bugs-To: \n"
|
||||
"POT-Creation-Date: 2022-07-08 14:11-0700\n"
|
||||
"PO-Revision-Date: 2022-07-08 22:07\n"
|
||||
"PO-Revision-Date: 2022-08-04 23:55\n"
|
||||
"Last-Translator: \n"
|
||||
"Language-Team: Serbian (Latin)\n"
|
||||
"Language: sr_CS\n"
|
||||
@@ -60,15 +60,15 @@ msgstr "algoritam podudaranja"
|
||||
|
||||
#: documents/models.py:47
|
||||
msgid "is insensitive"
|
||||
msgstr ""
|
||||
msgstr "bez razlike veliko/malo slovo"
|
||||
|
||||
#: documents/models.py:60 documents/models.py:115
|
||||
msgid "correspondent"
|
||||
msgstr "dopisnik"
|
||||
msgstr "korespodent"
|
||||
|
||||
#: documents/models.py:61
|
||||
msgid "correspondents"
|
||||
msgstr "dopisnici"
|
||||
msgstr "korespodenti"
|
||||
|
||||
#: documents/models.py:66
|
||||
msgid "color"
|
||||
@@ -80,7 +80,7 @@ msgstr "je oznaka prijemnog sandučeta"
|
||||
|
||||
#: documents/models.py:72
|
||||
msgid "Marks this tag as an inbox tag: All newly consumed documents will be tagged with inbox tags."
|
||||
msgstr ""
|
||||
msgstr "Označava ovu oznaku kao oznaku prijemnog sandučeta (inbox): Svi novoobrađeni dokumenti će biti označeni oznakama prijemnog sandučeta (inbox)."
|
||||
|
||||
#: documents/models.py:78
|
||||
msgid "tag"
|
||||
@@ -100,23 +100,23 @@ msgstr "tipovi dokumenta"
|
||||
|
||||
#: documents/models.py:90
|
||||
msgid "path"
|
||||
msgstr ""
|
||||
msgstr "putanja"
|
||||
|
||||
#: documents/models.py:96 documents/models.py:124
|
||||
msgid "storage path"
|
||||
msgstr ""
|
||||
msgstr "putanja skladišta"
|
||||
|
||||
#: documents/models.py:97
|
||||
msgid "storage paths"
|
||||
msgstr ""
|
||||
msgstr "putanja skladišta"
|
||||
|
||||
#: documents/models.py:105
|
||||
msgid "Unencrypted"
|
||||
msgstr ""
|
||||
msgstr "Nešifrovano"
|
||||
|
||||
#: documents/models.py:106
|
||||
msgid "Encrypted with GNU Privacy Guard"
|
||||
msgstr ""
|
||||
msgstr "Šifrovano pomoću GNU Privacy Guard"
|
||||
|
||||
#: documents/models.py:127
|
||||
msgid "title"
|
||||
@@ -128,7 +128,7 @@ msgstr "sadržaj"
|
||||
|
||||
#: documents/models.py:142
|
||||
msgid "The raw, text-only data of the document. This field is primarily used for searching."
|
||||
msgstr ""
|
||||
msgstr "Neobrađeni tekstualni podaci dokumenta. Ovo se polje koristi prvenstveno za pretraživanje."
|
||||
|
||||
#: documents/models.py:147
|
||||
msgid "mime type"
|
||||
@@ -172,7 +172,7 @@ msgstr "naziv fajla"
|
||||
|
||||
#: documents/models.py:204
|
||||
msgid "Current filename in storage"
|
||||
msgstr ""
|
||||
msgstr "Trenutni naziv sačuvane datoteke"
|
||||
|
||||
#: documents/models.py:208
|
||||
msgid "archive filename"
|
||||
@@ -180,7 +180,7 @@ msgstr "naziv fajla arhive"
|
||||
|
||||
#: documents/models.py:214
|
||||
msgid "Current archive filename in storage"
|
||||
msgstr ""
|
||||
msgstr "Trenutni naziv arhivirane sačuvane datoteke"
|
||||
|
||||
#: documents/models.py:218
|
||||
msgid "archive serial number"
|
||||
@@ -188,7 +188,7 @@ msgstr "arhivski serijski broj"
|
||||
|
||||
#: documents/models.py:224
|
||||
msgid "The position of this document in your physical document archive."
|
||||
msgstr ""
|
||||
msgstr "Položaj ovog dokumenta u vašoj fizičkoj arhivi dokumenata."
|
||||
|
||||
#: documents/models.py:230
|
||||
msgid "document"
|
||||
@@ -264,7 +264,7 @@ msgstr "polje za sortiranje"
|
||||
|
||||
#: documents/models.py:369
|
||||
msgid "sort reverse"
|
||||
msgstr ""
|
||||
msgstr "obrnuto sortiranje"
|
||||
|
||||
#: documents/models.py:374
|
||||
msgid "title contains"
|
||||
@@ -280,7 +280,7 @@ msgstr "ASN je"
|
||||
|
||||
#: documents/models.py:377
|
||||
msgid "correspondent is"
|
||||
msgstr "dopisnik je"
|
||||
msgstr "korespodent je"
|
||||
|
||||
#: documents/models.py:378
|
||||
msgid "document type is"
|
||||
@@ -348,7 +348,7 @@ msgstr "naslov i sadržaj sadrži"
|
||||
|
||||
#: documents/models.py:394
|
||||
msgid "fulltext query"
|
||||
msgstr ""
|
||||
msgstr "upit za ceo tekst"
|
||||
|
||||
#: documents/models.py:395
|
||||
msgid "more like this"
|
||||
@@ -376,12 +376,12 @@ msgstr "filter pravila"
|
||||
|
||||
#: documents/models.py:521
|
||||
msgid "started"
|
||||
msgstr ""
|
||||
msgstr "pokrenuto"
|
||||
|
||||
#: documents/serialisers.py:70
|
||||
#, python-format
|
||||
msgid "Invalid regular expression: %(error)s"
|
||||
msgstr ""
|
||||
msgstr "Nevažeći regularni izraz: %(error)s"
|
||||
|
||||
#: documents/serialisers.py:191
|
||||
msgid "Invalid color."
|
||||
@@ -390,11 +390,11 @@ msgstr "Nevažeća boja."
|
||||
#: documents/serialisers.py:515
|
||||
#, python-format
|
||||
msgid "File type %(type)s not supported"
|
||||
msgstr ""
|
||||
msgstr "Vrsta datoteke %(type)s nije podržana"
|
||||
|
||||
#: documents/serialisers.py:596
|
||||
msgid "Invalid variable detected."
|
||||
msgstr ""
|
||||
msgstr "Otkrivena je nevažeća promenljiva."
|
||||
|
||||
#: documents/templates/index.html:78
|
||||
msgid "Paperless-ngx is loading..."
|
||||
@@ -402,19 +402,19 @@ msgstr "Paperless-ngx se učitava..."
|
||||
|
||||
#: documents/templates/index.html:79
|
||||
msgid "Still here?! Hmm, something might be wrong."
|
||||
msgstr ""
|
||||
msgstr "Još uvek si ovde?! Hmm, možda nešto nije u redu."
|
||||
|
||||
#: documents/templates/index.html:79
|
||||
msgid "Here's a link to the docs."
|
||||
msgstr ""
|
||||
msgstr "Veze ka dokumentima."
|
||||
|
||||
#: documents/templates/registration/logged_out.html:14
|
||||
msgid "Paperless-ngx signed out"
|
||||
msgstr ""
|
||||
msgstr "Paperless-ngx odjavljen"
|
||||
|
||||
#: documents/templates/registration/logged_out.html:59
|
||||
msgid "You have been successfully logged out. Bye!"
|
||||
msgstr ""
|
||||
msgstr "Uspešno ste se odjavili!"
|
||||
|
||||
#: documents/templates/registration/logged_out.html:60
|
||||
msgid "Sign in again"
|
||||
@@ -422,7 +422,7 @@ msgstr "Prijavitе sе ponovo"
|
||||
|
||||
#: documents/templates/registration/login.html:15
|
||||
msgid "Paperless-ngx sign in"
|
||||
msgstr ""
|
||||
msgstr "Paperless-ngx prijava"
|
||||
|
||||
#: documents/templates/registration/login.html:61
|
||||
msgid "Please sign in."
|
||||
@@ -430,7 +430,7 @@ msgstr "Prijavite se."
|
||||
|
||||
#: documents/templates/registration/login.html:64
|
||||
msgid "Your username and password didn't match. Please try again."
|
||||
msgstr ""
|
||||
msgstr "Vaše korisničko ime i lozinka ne odgovaraju. Molimo pokušajte ponovo."
|
||||
|
||||
#: documents/templates/registration/login.html:67
|
||||
msgid "Username"
|
||||
@@ -450,7 +450,7 @@ msgstr "Engleski (US)"
|
||||
|
||||
#: paperless/settings.py:340
|
||||
msgid "Belarusian"
|
||||
msgstr ""
|
||||
msgstr "Beloruski"
|
||||
|
||||
#: paperless/settings.py:341
|
||||
msgid "Czech"
|
||||
@@ -510,11 +510,11 @@ msgstr "Ruski"
|
||||
|
||||
#: paperless/settings.py:355
|
||||
msgid "Slovenian"
|
||||
msgstr ""
|
||||
msgstr "Slovenački"
|
||||
|
||||
#: paperless/settings.py:356
|
||||
msgid "Serbian"
|
||||
msgstr ""
|
||||
msgstr "Srpski"
|
||||
|
||||
#: paperless/settings.py:357
|
||||
msgid "Swedish"
|
||||
@@ -522,11 +522,11 @@ msgstr "Švedski"
|
||||
|
||||
#: paperless/settings.py:358
|
||||
msgid "Turkish"
|
||||
msgstr ""
|
||||
msgstr "Turski"
|
||||
|
||||
#: paperless/settings.py:359
|
||||
msgid "Chinese Simplified"
|
||||
msgstr ""
|
||||
msgstr "Kineski pojednostavljen"
|
||||
|
||||
#: paperless/urls.py:161
|
||||
msgid "Paperless-ngx administration"
|
||||
@@ -534,7 +534,7 @@ msgstr "Paperless-ngx administracija"
|
||||
|
||||
#: paperless_mail/admin.py:29
|
||||
msgid "Authentication"
|
||||
msgstr ""
|
||||
msgstr "Autentifikacija"
|
||||
|
||||
#: paperless_mail/admin.py:30
|
||||
msgid "Advanced settings"
|
||||
@@ -546,7 +546,7 @@ msgstr "Filter"
|
||||
|
||||
#: paperless_mail/admin.py:50
|
||||
msgid "Paperless will only process mails that match ALL of the filters given below."
|
||||
msgstr ""
|
||||
msgstr "Paperless-ngx će obrađivati samo e-poštu koja odgovara SVIM filterima navedenim u nastavku."
|
||||
|
||||
#: paperless_mail/admin.py:64
|
||||
msgid "Actions"
|
||||
@@ -554,7 +554,7 @@ msgstr "Radnje"
|
||||
|
||||
#: paperless_mail/admin.py:67
|
||||
msgid "The action applied to the mail. This action is only performed when documents were consumed from the mail. Mails without attachments will remain entirely untouched."
|
||||
msgstr ""
|
||||
msgstr "Akcija se odnosi na e-poštu. Ova se radnja izvodi samo ako su dokumenti konzumirani iz e-pošte. E-pošta bez priloga ostat će u potpunosti netaknuta."
|
||||
|
||||
#: paperless_mail/admin.py:75
|
||||
msgid "Metadata"
|
||||
@@ -562,7 +562,7 @@ msgstr "Metapodaci"
|
||||
|
||||
#: paperless_mail/admin.py:78
|
||||
msgid "Assign metadata to documents consumed from this rule automatically. If you do not assign tags, types or correspondents here, paperless will still process all matching rules that you have defined."
|
||||
msgstr ""
|
||||
msgstr "Automatski dodelite metapodatke dokumentima koji se koriste iz ovog pravila. Ako ne dodelite oznaku, vrstu ili korespodenta, Paperless-ngx će i dalje obraditi sva pravila podudaranja koja ste definisali."
|
||||
|
||||
#: paperless_mail/apps.py:8
|
||||
msgid "Paperless mail"
|
||||
@@ -578,7 +578,7 @@ msgstr "mejl nalozi"
|
||||
|
||||
#: paperless_mail/models.py:12
|
||||
msgid "No encryption"
|
||||
msgstr ""
|
||||
msgstr "Nema enkripcije"
|
||||
|
||||
#: paperless_mail/models.py:13
|
||||
msgid "Use SSL"
|
||||
@@ -598,7 +598,7 @@ msgstr "IMAP port"
|
||||
|
||||
#: paperless_mail/models.py:25
|
||||
msgid "This is usually 143 for unencrypted and STARTTLS connections, and 993 for SSL connections."
|
||||
msgstr ""
|
||||
msgstr "Uobičajno 143 za nešifrovane i STARTTLS veze, a 993 za SSL veze."
|
||||
|
||||
#: paperless_mail/models.py:31
|
||||
msgid "IMAP security"
|
||||
@@ -618,23 +618,23 @@ msgstr "karakter set"
|
||||
|
||||
#: paperless_mail/models.py:45
|
||||
msgid "The character set to use when communicating with the mail server, such as 'UTF-8' or 'US-ASCII'."
|
||||
msgstr ""
|
||||
msgstr "Skup znakova koji se koristi pri komunikaciji sa mejl serverom, poput 'UTF-8' ili 'US-ASCII'."
|
||||
|
||||
#: paperless_mail/models.py:56
|
||||
msgid "mail rule"
|
||||
msgstr ""
|
||||
msgstr "pravilo e-pošte"
|
||||
|
||||
#: paperless_mail/models.py:57
|
||||
msgid "mail rules"
|
||||
msgstr ""
|
||||
msgstr "pravila e-pošte"
|
||||
|
||||
#: paperless_mail/models.py:60
|
||||
msgid "Only process attachments."
|
||||
msgstr ""
|
||||
msgstr "Obradi samo priloge."
|
||||
|
||||
#: paperless_mail/models.py:61
|
||||
msgid "Process all files, including 'inline' attachments."
|
||||
msgstr ""
|
||||
msgstr "Obradite sve datoteke, uključujući \"umetnute\" priloge."
|
||||
|
||||
#: paperless_mail/models.py:64
|
||||
msgid "Delete"
|
||||
@@ -642,31 +642,31 @@ msgstr "Obriši"
|
||||
|
||||
#: paperless_mail/models.py:65
|
||||
msgid "Move to specified folder"
|
||||
msgstr ""
|
||||
msgstr "Premesti u određen folder"
|
||||
|
||||
#: paperless_mail/models.py:66
|
||||
msgid "Mark as read, don't process read mails"
|
||||
msgstr ""
|
||||
msgstr "Označi kao pročitano. Ne obrađuj pročitanu e-poštu"
|
||||
|
||||
#: paperless_mail/models.py:67
|
||||
msgid "Flag the mail, don't process flagged mails"
|
||||
msgstr ""
|
||||
msgstr "Označi poštu zastavicom. Ne obrađuj e-poštu sa zastavicom"
|
||||
|
||||
#: paperless_mail/models.py:68
|
||||
msgid "Tag the mail with specified tag, don't process tagged mails"
|
||||
msgstr ""
|
||||
msgstr "Označite poštu specifičnom oznakom. Ne obrađuj e-poštu s specifičnom oznakom"
|
||||
|
||||
#: paperless_mail/models.py:71
|
||||
msgid "Use subject as title"
|
||||
msgstr ""
|
||||
msgstr "Koristi predmet kao naziv"
|
||||
|
||||
#: paperless_mail/models.py:72
|
||||
msgid "Use attachment filename as title"
|
||||
msgstr ""
|
||||
msgstr "Koristi naziv datoteke priloga kao naziv"
|
||||
|
||||
#: paperless_mail/models.py:75
|
||||
msgid "Do not assign a correspondent"
|
||||
msgstr "Ne dodeljuj dopisnika"
|
||||
msgstr "Ne dodeljuj korespodenta"
|
||||
|
||||
#: paperless_mail/models.py:76
|
||||
msgid "Use mail address"
|
||||
@@ -678,7 +678,7 @@ msgstr "Koristi naziv (ili mejl adresu ako nije dostupno)"
|
||||
|
||||
#: paperless_mail/models.py:78
|
||||
msgid "Use correspondent selected below"
|
||||
msgstr "Koristi dopisnika ispod"
|
||||
msgstr "Koristi koreespodenta ispod"
|
||||
|
||||
#: paperless_mail/models.py:82
|
||||
msgid "order"
|
||||
@@ -694,7 +694,7 @@ msgstr "folder"
|
||||
|
||||
#: paperless_mail/models.py:96
|
||||
msgid "Subfolders must be separated by a delimiter, often a dot ('.') or slash ('/'), but it varies by mail server."
|
||||
msgstr ""
|
||||
msgstr "Podfolderi moraju biti odvojeni separatorom, često tačkom ('.') ili kosom crtom ('/'), ali to se razlikuje zavisno od servera e-pošte."
|
||||
|
||||
#: paperless_mail/models.py:102
|
||||
msgid "filter from"
|
||||
@@ -714,15 +714,15 @@ msgstr "filter naziv fajla priloga"
|
||||
|
||||
#: paperless_mail/models.py:126
|
||||
msgid "Only consume documents which entirely match this filename if specified. Wildcards such as *.pdf or *invoice* are allowed. Case insensitive."
|
||||
msgstr ""
|
||||
msgstr "Konzumirajte samo dokumente koji u potpunosti odgovaraju ovom nazivu datoteke ako je navedeno. Dopušteni su zamenski znakovi kao što su *.pdf ili *faktura*. Neosetljivo je na mala i mala slova."
|
||||
|
||||
#: paperless_mail/models.py:133
|
||||
msgid "maximum age"
|
||||
msgstr ""
|
||||
msgstr "maksimalna starost"
|
||||
|
||||
#: paperless_mail/models.py:135
|
||||
msgid "Specified in days."
|
||||
msgstr ""
|
||||
msgstr "Navedeno u danima."
|
||||
|
||||
#: paperless_mail/models.py:139
|
||||
msgid "attachment type"
|
||||
@@ -730,7 +730,7 @@ msgstr "tip priloga"
|
||||
|
||||
#: paperless_mail/models.py:143
|
||||
msgid "Inline attachments include embedded images, so it's best to combine this option with a filename filter."
|
||||
msgstr ""
|
||||
msgstr "Ugrađeni prilozi uključuju ugrađene slike, pa je najbolje kombinovati ovu opciju s filterom naziva datoteke."
|
||||
|
||||
#: paperless_mail/models.py:149
|
||||
msgid "action"
|
||||
@@ -738,11 +738,11 @@ msgstr "radnja"
|
||||
|
||||
#: paperless_mail/models.py:155
|
||||
msgid "action parameter"
|
||||
msgstr ""
|
||||
msgstr "parametar akcije"
|
||||
|
||||
#: paperless_mail/models.py:160
|
||||
msgid "Additional parameter for the action selected above, i.e., the target folder of the move to folder action. Subfolders must be separated by dots."
|
||||
msgstr ""
|
||||
msgstr "Dodatni parametar za gore odabranu akciju, tj. ciljani folder za premeštanje u folder akcije. Podfolderi moraju biti odvojeni tačkama."
|
||||
|
||||
#: paperless_mail/models.py:168
|
||||
msgid "assign title from"
|
||||
@@ -758,9 +758,9 @@ msgstr "dodeli ovaj tip dokumenta"
|
||||
|
||||
#: paperless_mail/models.py:188
|
||||
msgid "assign correspondent from"
|
||||
msgstr "dodeli dopisnika iz"
|
||||
msgstr "dodeli korespodenta iz"
|
||||
|
||||
#: paperless_mail/models.py:198
|
||||
msgid "assign this correspondent"
|
||||
msgstr "dodeli ovog dopisnika"
|
||||
msgstr "dodeli ovog korspodenta"
|
||||
|
||||
|
@@ -3,7 +3,7 @@ msgstr ""
|
||||
"Project-Id-Version: paperless-ngx\n"
|
||||
"Report-Msgid-Bugs-To: \n"
|
||||
"POT-Creation-Date: 2022-07-08 14:11-0700\n"
|
||||
"PO-Revision-Date: 2022-07-08 22:07\n"
|
||||
"PO-Revision-Date: 2022-08-01 19:02\n"
|
||||
"Last-Translator: \n"
|
||||
"Language-Team: Turkish\n"
|
||||
"Language: tr_TR\n"
|
||||
@@ -80,7 +80,7 @@ msgstr "gelen kutu etiketidir"
|
||||
|
||||
#: documents/models.py:72
|
||||
msgid "Marks this tag as an inbox tag: All newly consumed documents will be tagged with inbox tags."
|
||||
msgstr "Bu etiketi, gelen kutusu etiketi olarak işaretle: Tüm yeni olarak tüketilen dökümanlar gelen kutusu etiketi ile etiketlendirileceklerdir."
|
||||
msgstr "Bu etiketi, gelen kutusu etiketi olarak işaretle: Yeni aktarılan tüm dokümanlar gelen kutusu etiketi ile etiketlendirileceklerdir."
|
||||
|
||||
#: documents/models.py:78
|
||||
msgid "tag"
|
||||
@@ -376,7 +376,7 @@ msgstr "filtreleme kuralları"
|
||||
|
||||
#: documents/models.py:521
|
||||
msgid "started"
|
||||
msgstr ""
|
||||
msgstr "başladı"
|
||||
|
||||
#: documents/serialisers.py:70
|
||||
#, python-format
|
||||
@@ -394,7 +394,7 @@ msgstr "Dosya türü %(type)s desteklenmiyor"
|
||||
|
||||
#: documents/serialisers.py:596
|
||||
msgid "Invalid variable detected."
|
||||
msgstr ""
|
||||
msgstr "Geçersiz değişken algılandı."
|
||||
|
||||
#: documents/templates/index.html:78
|
||||
msgid "Paperless-ngx is loading..."
|
||||
@@ -402,7 +402,7 @@ msgstr "Paperless-ngx yükleniyor..."
|
||||
|
||||
#: documents/templates/index.html:79
|
||||
msgid "Still here?! Hmm, something might be wrong."
|
||||
msgstr ""
|
||||
msgstr "Hâlâ burada mısınız? Hmm, bir şeyler yanlış olabilir."
|
||||
|
||||
#: documents/templates/index.html:79
|
||||
msgid "Here's a link to the docs."
|
||||
@@ -450,7 +450,7 @@ msgstr "İngilizce (Birleşik Devletler)"
|
||||
|
||||
#: paperless/settings.py:340
|
||||
msgid "Belarusian"
|
||||
msgstr ""
|
||||
msgstr "Belarusça"
|
||||
|
||||
#: paperless/settings.py:341
|
||||
msgid "Czech"
|
||||
@@ -510,11 +510,11 @@ msgstr "Rusça"
|
||||
|
||||
#: paperless/settings.py:355
|
||||
msgid "Slovenian"
|
||||
msgstr ""
|
||||
msgstr "Slovakça"
|
||||
|
||||
#: paperless/settings.py:356
|
||||
msgid "Serbian"
|
||||
msgstr ""
|
||||
msgstr "Sırpça"
|
||||
|
||||
#: paperless/settings.py:357
|
||||
msgid "Swedish"
|
||||
@@ -522,11 +522,11 @@ msgstr "İsveççe"
|
||||
|
||||
#: paperless/settings.py:358
|
||||
msgid "Turkish"
|
||||
msgstr ""
|
||||
msgstr "Türkçe"
|
||||
|
||||
#: paperless/settings.py:359
|
||||
msgid "Chinese Simplified"
|
||||
msgstr ""
|
||||
msgstr "Basitleştirilmiş Çince"
|
||||
|
||||
#: paperless/urls.py:161
|
||||
msgid "Paperless-ngx administration"
|
||||
|
@@ -3,7 +3,7 @@ msgstr ""
|
||||
"Project-Id-Version: paperless-ngx\n"
|
||||
"Report-Msgid-Bugs-To: \n"
|
||||
"POT-Creation-Date: 2022-07-08 14:11-0700\n"
|
||||
"PO-Revision-Date: 2022-07-08 22:07\n"
|
||||
"PO-Revision-Date: 2022-07-15 04:02\n"
|
||||
"Last-Translator: \n"
|
||||
"Language-Team: Chinese Simplified\n"
|
||||
"Language: zh_CN\n"
|
||||
@@ -376,7 +376,7 @@ msgstr "过滤规则"
|
||||
|
||||
#: documents/models.py:521
|
||||
msgid "started"
|
||||
msgstr ""
|
||||
msgstr "已开始"
|
||||
|
||||
#: documents/serialisers.py:70
|
||||
#, python-format
|
||||
@@ -654,7 +654,7 @@ msgstr "标记邮件,不处理已标记的邮件"
|
||||
|
||||
#: paperless_mail/models.py:68
|
||||
msgid "Tag the mail with specified tag, don't process tagged mails"
|
||||
msgstr ""
|
||||
msgstr "用指定标签标记邮件,不要处理已标记的邮件"
|
||||
|
||||
#: paperless_mail/models.py:71
|
||||
msgid "Use subject as title"
|
||||
|
@@ -1,4 +1,11 @@
|
||||
from .celery import app as celery_app
|
||||
from .checks import binaries_check
|
||||
from .checks import paths_check
|
||||
from .checks import settings_values_check
|
||||
|
||||
__all__ = ["binaries_check", "paths_check"]
|
||||
__all__ = [
|
||||
"celery_app",
|
||||
"binaries_check",
|
||||
"paths_check",
|
||||
"settings_values_check",
|
||||
]
|
||||
|
17
src/paperless/celery.py
Normal file
17
src/paperless/celery.py
Normal file
@@ -0,0 +1,17 @@
|
||||
import os
|
||||
|
||||
from celery import Celery
|
||||
|
||||
# Set the default Django settings module for the 'celery' program.
|
||||
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "paperless.settings")
|
||||
|
||||
app = Celery("paperless")
|
||||
|
||||
# Using a string here means the worker doesn't have to serialize
|
||||
# the configuration object to child processes.
|
||||
# - namespace='CELERY' means all celery-related configuration keys
|
||||
# should have a `CELERY_` prefix.
|
||||
app.config_from_object("django.conf:settings", namespace="CELERY")
|
||||
|
||||
# Load task modules from all registered Django apps.
|
||||
app.autodiscover_tasks()
|
@@ -1,4 +1,6 @@
|
||||
import grp
|
||||
import os
|
||||
import pwd
|
||||
import shutil
|
||||
import stat
|
||||
|
||||
@@ -32,12 +34,15 @@ def path_check(var, directory):
|
||||
with open(test_file, "w"):
|
||||
pass
|
||||
except PermissionError:
|
||||
dir_stat = os.stat(directory)
|
||||
dir_mode = stat.filemode(dir_stat.st_mode)
|
||||
dir_owner = pwd.getpwuid(dir_stat.st_uid).pw_name
|
||||
dir_group = grp.getgrgid(dir_stat.st_gid).gr_name
|
||||
messages.append(
|
||||
Error(
|
||||
writeable_message.format(var),
|
||||
writeable_hint.format(
|
||||
f"\n{stat.filemode(os.stat(directory).st_mode)} "
|
||||
f"{directory}\n",
|
||||
f"\n{dir_mode} {dir_owner} {dir_group} " f"{directory}\n",
|
||||
),
|
||||
),
|
||||
)
|
||||
@@ -96,3 +101,52 @@ def debug_mode_check(app_configs, **kwargs):
|
||||
]
|
||||
else:
|
||||
return []
|
||||
|
||||
|
||||
@register()
|
||||
def settings_values_check(app_configs, **kwargs):
|
||||
"""
|
||||
Validates at least some of the user provided settings
|
||||
"""
|
||||
|
||||
def _ocrmypdf_settings_check():
|
||||
"""
|
||||
Validates some of the arguments which will be provided to ocrmypdf
|
||||
against the valid options. Use "ocrmypdf --help" to see the valid
|
||||
inputs
|
||||
"""
|
||||
msgs = []
|
||||
if settings.OCR_OUTPUT_TYPE not in {
|
||||
"pdfa",
|
||||
"pdf",
|
||||
"pdfa-1",
|
||||
"pdfa-2",
|
||||
"pdfa-3",
|
||||
}:
|
||||
msgs.append(
|
||||
Error(f'OCR output type "{settings.OCR_OUTPUT_TYPE}" is not valid'),
|
||||
)
|
||||
|
||||
if settings.OCR_MODE not in {"force", "skip", "redo", "skip_noarchive"}:
|
||||
msgs.append(Error(f'OCR output mode "{settings.OCR_MODE}" is not valid'))
|
||||
|
||||
if settings.OCR_CLEAN not in {"clean", "clean-final", "none"}:
|
||||
msgs.append(Error(f'OCR clean mode "{settings.OCR_CLEAN}" is not valid'))
|
||||
return msgs
|
||||
|
||||
def _timezone_validate():
|
||||
"""
|
||||
Validates the user provided timezone is a valid timezone
|
||||
"""
|
||||
try:
|
||||
import zoneinfo
|
||||
except ImportError: # pragma: nocover
|
||||
import backports.zoneinfo as zoneinfo
|
||||
msgs = []
|
||||
if settings.TIME_ZONE not in zoneinfo.available_timezones():
|
||||
msgs.append(
|
||||
Error(f'Timezone "{settings.TIME_ZONE}" is not a valid timezone'),
|
||||
)
|
||||
return msgs
|
||||
|
||||
return _ocrmypdf_settings_check() + _timezone_validate()
|
||||
|
@@ -4,11 +4,13 @@ import math
|
||||
import multiprocessing
|
||||
import os
|
||||
import re
|
||||
import tempfile
|
||||
from typing import Final
|
||||
from typing import Optional
|
||||
from typing import Set
|
||||
from urllib.parse import urlparse
|
||||
|
||||
from celery.schedules import crontab
|
||||
from concurrent_log_handler.queue import setup_logging_queues
|
||||
from django.utils.translation import gettext_lazy as _
|
||||
from dotenv import load_dotenv
|
||||
@@ -56,6 +58,13 @@ def __get_float(key: str, default: float) -> float:
|
||||
return float(os.getenv(key, default))
|
||||
|
||||
|
||||
def __get_path(key: str, default: str) -> str:
|
||||
"""
|
||||
Return a normalized, absolute path based on the environment variable or a default
|
||||
"""
|
||||
return os.path.abspath(os.path.normpath(os.environ.get(key, default)))
|
||||
|
||||
|
||||
# NEVER RUN WITH DEBUG IN PRODUCTION.
|
||||
DEBUG = __get_boolean("PAPERLESS_DEBUG", "NO")
|
||||
|
||||
@@ -66,14 +75,16 @@ DEBUG = __get_boolean("PAPERLESS_DEBUG", "NO")
|
||||
|
||||
BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||
|
||||
STATIC_ROOT = os.getenv("PAPERLESS_STATICDIR", os.path.join(BASE_DIR, "..", "static"))
|
||||
STATIC_ROOT = __get_path("PAPERLESS_STATICDIR", os.path.join(BASE_DIR, "..", "static"))
|
||||
|
||||
MEDIA_ROOT = os.getenv("PAPERLESS_MEDIA_ROOT", os.path.join(BASE_DIR, "..", "media"))
|
||||
MEDIA_ROOT = __get_path("PAPERLESS_MEDIA_ROOT", os.path.join(BASE_DIR, "..", "media"))
|
||||
ORIGINALS_DIR = os.path.join(MEDIA_ROOT, "documents", "originals")
|
||||
ARCHIVE_DIR = os.path.join(MEDIA_ROOT, "documents", "archive")
|
||||
THUMBNAIL_DIR = os.path.join(MEDIA_ROOT, "documents", "thumbnails")
|
||||
|
||||
DATA_DIR = os.getenv("PAPERLESS_DATA_DIR", os.path.join(BASE_DIR, "..", "data"))
|
||||
DATA_DIR = __get_path("PAPERLESS_DATA_DIR", os.path.join(BASE_DIR, "..", "data"))
|
||||
|
||||
NLTK_DIR = os.path.join(DATA_DIR, "nltk")
|
||||
|
||||
TRASH_DIR = os.getenv("PAPERLESS_TRASH_DIR")
|
||||
|
||||
@@ -83,15 +94,18 @@ MEDIA_LOCK = os.path.join(MEDIA_ROOT, "media.lock")
|
||||
INDEX_DIR = os.path.join(DATA_DIR, "index")
|
||||
MODEL_FILE = os.path.join(DATA_DIR, "classification_model.pickle")
|
||||
|
||||
LOGGING_DIR = os.getenv("PAPERLESS_LOGGING_DIR", os.path.join(DATA_DIR, "log"))
|
||||
LOGGING_DIR = __get_path("PAPERLESS_LOGGING_DIR", os.path.join(DATA_DIR, "log"))
|
||||
|
||||
CONSUMPTION_DIR = os.getenv(
|
||||
CONSUMPTION_DIR = __get_path(
|
||||
"PAPERLESS_CONSUMPTION_DIR",
|
||||
os.path.join(BASE_DIR, "..", "consume"),
|
||||
)
|
||||
|
||||
# This will be created if it doesn't exist
|
||||
SCRATCH_DIR = os.getenv("PAPERLESS_SCRATCH_DIR", "/tmp/paperless")
|
||||
SCRATCH_DIR = __get_path(
|
||||
"PAPERLESS_SCRATCH_DIR",
|
||||
os.path.join(tempfile.gettempdir(), "paperless"),
|
||||
)
|
||||
|
||||
###############################################################################
|
||||
# Application Definition #
|
||||
@@ -117,7 +131,7 @@ INSTALLED_APPS = [
|
||||
"rest_framework",
|
||||
"rest_framework.authtoken",
|
||||
"django_filters",
|
||||
"django_q",
|
||||
"django_celery_results",
|
||||
] + env_apps
|
||||
|
||||
if DEBUG:
|
||||
@@ -168,6 +182,8 @@ ASGI_APPLICATION = "paperless.asgi.application"
|
||||
STATIC_URL = os.getenv("PAPERLESS_STATIC_URL", BASE_URL + "static/")
|
||||
WHITENOISE_STATIC_PREFIX = "/static/"
|
||||
|
||||
_REDIS_URL = os.getenv("PAPERLESS_REDIS", "redis://localhost:6379")
|
||||
|
||||
# TODO: what is this used for?
|
||||
TEMPLATES = [
|
||||
{
|
||||
@@ -189,7 +205,7 @@ CHANNEL_LAYERS = {
|
||||
"default": {
|
||||
"BACKEND": "channels_redis.core.RedisChannelLayer",
|
||||
"CONFIG": {
|
||||
"hosts": [os.getenv("PAPERLESS_REDIS", "redis://localhost:6379")],
|
||||
"hosts": [_REDIS_URL],
|
||||
"capacity": 2000, # default 100
|
||||
"expiry": 15, # default 60
|
||||
},
|
||||
@@ -274,7 +290,7 @@ SECRET_KEY = os.getenv(
|
||||
|
||||
AUTH_PASSWORD_VALIDATORS = [
|
||||
{
|
||||
"NAME": "django.contrib.auth.password_validation.UserAttributeSimilarityValidator",
|
||||
"NAME": "django.contrib.auth.password_validation.UserAttributeSimilarityValidator", # noqa: E501
|
||||
},
|
||||
{
|
||||
"NAME": "django.contrib.auth.password_validation.MinimumLengthValidator",
|
||||
@@ -308,6 +324,7 @@ DATABASES = {
|
||||
"default": {
|
||||
"ENGINE": "django.db.backends.sqlite3",
|
||||
"NAME": os.path.join(DATA_DIR, "db.sqlite3"),
|
||||
"OPTIONS": {},
|
||||
},
|
||||
}
|
||||
|
||||
@@ -317,16 +334,31 @@ if os.getenv("PAPERLESS_DBHOST"):
|
||||
DATABASES["sqlite"] = DATABASES["default"].copy()
|
||||
|
||||
DATABASES["default"] = {
|
||||
"ENGINE": "django.db.backends.postgresql_psycopg2",
|
||||
"HOST": os.getenv("PAPERLESS_DBHOST"),
|
||||
"NAME": os.getenv("PAPERLESS_DBNAME", "paperless"),
|
||||
"USER": os.getenv("PAPERLESS_DBUSER", "paperless"),
|
||||
"PASSWORD": os.getenv("PAPERLESS_DBPASS", "paperless"),
|
||||
"OPTIONS": {"sslmode": os.getenv("PAPERLESS_DBSSLMODE", "prefer")},
|
||||
"OPTIONS": {},
|
||||
}
|
||||
if os.getenv("PAPERLESS_DBPORT"):
|
||||
DATABASES["default"]["PORT"] = os.getenv("PAPERLESS_DBPORT")
|
||||
|
||||
# Leave room for future extensibility
|
||||
if os.getenv("PAPERLESS_DBENGINE") == "mariadb":
|
||||
engine = "django.db.backends.mysql"
|
||||
options = {"read_default_file": "/etc/mysql/my.cnf", "charset": "utf8mb4"}
|
||||
else: # Default to PostgresDB
|
||||
engine = "django.db.backends.postgresql_psycopg2"
|
||||
options = {"sslmode": os.getenv("PAPERLESS_DBSSLMODE", "prefer")}
|
||||
|
||||
DATABASES["default"]["ENGINE"] = engine
|
||||
DATABASES["default"]["OPTIONS"].update(options)
|
||||
|
||||
if os.getenv("PAPERLESS_DB_TIMEOUT") is not None:
|
||||
DATABASES["default"]["OPTIONS"].update(
|
||||
{"timeout": float(os.getenv("PAPERLESS_DB_TIMEOUT"))},
|
||||
)
|
||||
|
||||
DEFAULT_AUTO_FIELD = "django.db.models.AutoField"
|
||||
|
||||
###############################################################################
|
||||
@@ -425,47 +457,57 @@ LOGGING = {
|
||||
# Task queue #
|
||||
###############################################################################
|
||||
|
||||
TASK_WORKERS = __get_int("PAPERLESS_TASK_WORKERS", 1)
|
||||
|
||||
# Sensible defaults for multitasking:
|
||||
# use a fair balance between worker processes and threads epr worker so that
|
||||
# both consuming many documents in parallel and consuming large documents is
|
||||
# reasonably fast.
|
||||
# Favors threads per worker on smaller systems and never exceeds cpu_count()
|
||||
# in total.
|
||||
WORKER_TIMEOUT: Final[int] = __get_int("PAPERLESS_WORKER_TIMEOUT", 1800)
|
||||
|
||||
CELERY_BROKER_URL = _REDIS_URL
|
||||
CELERY_TIMEZONE = TIME_ZONE
|
||||
|
||||
def default_task_workers() -> int:
|
||||
# always leave one core open
|
||||
available_cores = max(multiprocessing.cpu_count(), 1)
|
||||
try:
|
||||
if available_cores < 4:
|
||||
return available_cores
|
||||
return max(math.floor(math.sqrt(available_cores)), 1)
|
||||
except NotImplementedError:
|
||||
return 1
|
||||
CELERY_WORKER_HIJACK_ROOT_LOGGER = False
|
||||
CELERY_WORKER_CONCURRENCY = TASK_WORKERS
|
||||
CELERY_WORKER_MAX_TASKS_PER_CHILD = 1
|
||||
CELERY_WORKER_SEND_TASK_EVENTS = True
|
||||
|
||||
CELERY_SEND_TASK_SENT_EVENT = True
|
||||
|
||||
TASK_WORKERS = __get_int("PAPERLESS_TASK_WORKERS", default_task_workers())
|
||||
CELERY_TASK_TRACK_STARTED = True
|
||||
CELERY_TASK_TIME_LIMIT = WORKER_TIMEOUT
|
||||
|
||||
PAPERLESS_WORKER_TIMEOUT: Final[int] = __get_int("PAPERLESS_WORKER_TIMEOUT", 1800)
|
||||
CELERY_RESULT_EXTENDED = True
|
||||
CELERY_RESULT_BACKEND = "django-db"
|
||||
CELERY_CACHE_BACKEND = "default"
|
||||
|
||||
# Per django-q docs, timeout must be smaller than retry
|
||||
# We default retry to 10s more than the timeout
|
||||
PAPERLESS_WORKER_RETRY: Final[int] = __get_int(
|
||||
"PAPERLESS_WORKER_RETRY",
|
||||
PAPERLESS_WORKER_TIMEOUT + 10,
|
||||
)
|
||||
CELERY_BEAT_SCHEDULE = {
|
||||
# Every ten minutes
|
||||
"Check all e-mail accounts": {
|
||||
"task": "paperless_mail.tasks.process_mail_accounts",
|
||||
"schedule": crontab(minute="*/10"),
|
||||
},
|
||||
# Hourly at 5 minutes past the hour
|
||||
"Train the classifier": {
|
||||
"task": "documents.tasks.train_classifier",
|
||||
"schedule": crontab(minute="5", hour="*/1"),
|
||||
},
|
||||
# Daily at midnight
|
||||
"Optimize the index": {
|
||||
"task": "documents.tasks.index_optimize",
|
||||
"schedule": crontab(minute=0, hour=0),
|
||||
},
|
||||
# Weekly, Sunday at 00:30
|
||||
"Perform sanity check": {
|
||||
"task": "documents.tasks.sanity_check",
|
||||
"schedule": crontab(minute=30, hour=0, day_of_week="sun"),
|
||||
},
|
||||
}
|
||||
CELERY_BEAT_SCHEDULE_FILENAME = os.path.join(DATA_DIR, "celerybeat-schedule.db")
|
||||
|
||||
Q_CLUSTER = {
|
||||
"name": "paperless",
|
||||
"guard_cycle": 5,
|
||||
"catch_up": False,
|
||||
"recycle": 1,
|
||||
"retry": PAPERLESS_WORKER_RETRY,
|
||||
"timeout": PAPERLESS_WORKER_TIMEOUT,
|
||||
"workers": TASK_WORKERS,
|
||||
"redis": os.getenv("PAPERLESS_REDIS", "redis://localhost:6379"),
|
||||
"log_level": "DEBUG" if DEBUG else "INFO",
|
||||
# django setting.
|
||||
CACHES = {
|
||||
"default": {
|
||||
"BACKEND": "django.core.cache.backends.redis.RedisCache",
|
||||
"LOCATION": _REDIS_URL,
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
@@ -509,7 +551,7 @@ CONSUMER_IGNORE_PATTERNS = list(
|
||||
json.loads(
|
||||
os.getenv(
|
||||
"PAPERLESS_CONSUMER_IGNORE_PATTERNS",
|
||||
'[".DS_STORE/*", "._*", ".stfolder/*", ".stversions/*", ".localized/*", "desktop.ini"]',
|
||||
'[".DS_STORE/*", "._*", ".stfolder/*", ".stversions/*", ".localized/*", "desktop.ini"]', # noqa: E501
|
||||
),
|
||||
),
|
||||
)
|
||||
@@ -533,11 +575,9 @@ OCR_PAGES = int(os.getenv("PAPERLESS_OCR_PAGES", 0))
|
||||
OCR_LANGUAGE = os.getenv("PAPERLESS_OCR_LANGUAGE", "eng")
|
||||
|
||||
# OCRmyPDF --output-type options are available.
|
||||
# TODO: validate this setting.
|
||||
OCR_OUTPUT_TYPE = os.getenv("PAPERLESS_OCR_OUTPUT_TYPE", "pdfa")
|
||||
|
||||
# skip. redo, force
|
||||
# TODO: validate this.
|
||||
OCR_MODE = os.getenv("PAPERLESS_OCR_MODE", "skip")
|
||||
|
||||
OCR_IMAGE_DPI = os.getenv("PAPERLESS_OCR_IMAGE_DPI")
|
||||
@@ -590,6 +630,11 @@ POST_CONSUME_SCRIPT = os.getenv("PAPERLESS_POST_CONSUME_SCRIPT")
|
||||
DATE_ORDER = os.getenv("PAPERLESS_DATE_ORDER", "DMY")
|
||||
FILENAME_DATE_ORDER = os.getenv("PAPERLESS_FILENAME_DATE_ORDER")
|
||||
|
||||
# Maximum number of dates taken from document start to end to show as suggestions for
|
||||
# `created` date in the frontend. Duplicates are removed, which can result in
|
||||
# fewer dates shown.
|
||||
NUMBER_OF_SUGGESTED_DATES = __get_int("PAPERLESS_NUMBER_OF_SUGGESTED_DATES", 3)
|
||||
|
||||
# Transformations applied before filename parsing
|
||||
FILENAME_PARSE_TRANSFORMS = []
|
||||
for t in json.loads(os.getenv("PAPERLESS_FILENAME_PARSE_TRANSFORMS", "[]")):
|
||||
@@ -598,7 +643,8 @@ for t in json.loads(os.getenv("PAPERLESS_FILENAME_PARSE_TRANSFORMS", "[]")):
|
||||
# Specify the filename format for out files
|
||||
FILENAME_FORMAT = os.getenv("PAPERLESS_FILENAME_FORMAT")
|
||||
|
||||
# If this is enabled, variables in filename format will resolve to empty-string instead of 'none'.
|
||||
# If this is enabled, variables in filename format will resolve to
|
||||
# empty-string instead of 'none'.
|
||||
# Directories with 'empty names' are omitted, too.
|
||||
FILENAME_FORMAT_REMOVE_NONE = __get_boolean(
|
||||
"PAPERLESS_FILENAME_FORMAT_REMOVE_NONE",
|
||||
@@ -610,16 +656,15 @@ THUMBNAIL_FONT_NAME = os.getenv(
|
||||
"/usr/share/fonts/liberation/LiberationSerif-Regular.ttf",
|
||||
)
|
||||
|
||||
# TODO: this should not have a prefix.
|
||||
# Tika settings
|
||||
PAPERLESS_TIKA_ENABLED = __get_boolean("PAPERLESS_TIKA_ENABLED", "NO")
|
||||
PAPERLESS_TIKA_ENDPOINT = os.getenv("PAPERLESS_TIKA_ENDPOINT", "http://localhost:9998")
|
||||
PAPERLESS_TIKA_GOTENBERG_ENDPOINT = os.getenv(
|
||||
TIKA_ENABLED = __get_boolean("PAPERLESS_TIKA_ENABLED", "NO")
|
||||
TIKA_ENDPOINT = os.getenv("PAPERLESS_TIKA_ENDPOINT", "http://localhost:9998")
|
||||
TIKA_GOTENBERG_ENDPOINT = os.getenv(
|
||||
"PAPERLESS_TIKA_GOTENBERG_ENDPOINT",
|
||||
"http://localhost:3000",
|
||||
)
|
||||
|
||||
if PAPERLESS_TIKA_ENABLED:
|
||||
if TIKA_ENABLED:
|
||||
INSTALLED_APPS.append("paperless_tika.apps.PaperlessTikaConfig")
|
||||
|
||||
|
||||
@@ -632,8 +677,9 @@ def _parse_ignore_dates(
|
||||
user provided string(s) into dates
|
||||
|
||||
Args:
|
||||
env_ignore (str): The value of the environment variable, comma seperated dates
|
||||
date_order (str, optional): The format of the date strings. Defaults to DATE_ORDER.
|
||||
env_ignore (str): The value of the environment variable, comma separated dates
|
||||
date_order (str, optional): The format of the date strings.
|
||||
Defaults to DATE_ORDER.
|
||||
|
||||
Returns:
|
||||
Set[datetime.datetime]: The set of parsed date objects
|
||||
@@ -662,3 +708,40 @@ if os.getenv("PAPERLESS_IGNORE_DATES") is not None:
|
||||
ENABLE_UPDATE_CHECK = os.getenv("PAPERLESS_ENABLE_UPDATE_CHECK", "default")
|
||||
if ENABLE_UPDATE_CHECK != "default":
|
||||
ENABLE_UPDATE_CHECK = __get_boolean("PAPERLESS_ENABLE_UPDATE_CHECK")
|
||||
|
||||
###############################################################################
|
||||
# Machine Learning #
|
||||
###############################################################################
|
||||
|
||||
|
||||
def _get_nltk_language_setting(ocr_lang: str) -> Optional[str]:
|
||||
"""
|
||||
Maps an ISO-639-1 language code supported by Tesseract into
|
||||
an optional NLTK language name. This is the set of common supported
|
||||
languages for all the NLTK data used.
|
||||
|
||||
Assumption: The primary language is first
|
||||
"""
|
||||
ocr_lang = ocr_lang.split("+")[0]
|
||||
iso_code_to_nltk = {
|
||||
"dan": "danish",
|
||||
"nld": "dutch",
|
||||
"eng": "english",
|
||||
"fin": "finnish",
|
||||
"fra": "french",
|
||||
"deu": "german",
|
||||
"ita": "italian",
|
||||
"nor": "norwegian",
|
||||
"por": "portuguese",
|
||||
"rus": "russian",
|
||||
"spa": "spanish",
|
||||
"swe": "swedish",
|
||||
"tur": "turkish",
|
||||
}
|
||||
|
||||
return iso_code_to_nltk.get(ocr_lang, None)
|
||||
|
||||
|
||||
NLTK_ENABLED: Final[bool] = __get_boolean("PAPERLESS_ENABLE_NLTK", "yes")
|
||||
|
||||
NLTK_LANGUAGE: Optional[str] = _get_nltk_language_setting(OCR_LANGUAGE)
|
||||
|
@@ -1,12 +1,12 @@
|
||||
import os
|
||||
import shutil
|
||||
|
||||
from django.test import override_settings
|
||||
from django.test import TestCase
|
||||
from documents.tests.utils import DirectoriesMixin
|
||||
from paperless import binaries_check
|
||||
from paperless import paths_check
|
||||
from paperless.checks import binaries_check
|
||||
from paperless.checks import debug_mode_check
|
||||
from paperless.checks import paths_check
|
||||
from paperless.checks import settings_values_check
|
||||
|
||||
|
||||
class TestChecks(DirectoriesMixin, TestCase):
|
||||
@@ -54,3 +54,89 @@ class TestChecks(DirectoriesMixin, TestCase):
|
||||
@override_settings(DEBUG=True)
|
||||
def test_debug_enabled(self):
|
||||
self.assertEqual(len(debug_mode_check(None)), 1)
|
||||
|
||||
|
||||
class TestSettingsChecks(DirectoriesMixin, TestCase):
|
||||
def test_all_valid(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- Default settings
|
||||
WHEN:
|
||||
- Settings are validated
|
||||
THEN:
|
||||
- No system check errors reported
|
||||
"""
|
||||
msgs = settings_values_check(None)
|
||||
self.assertEqual(len(msgs), 0)
|
||||
|
||||
@override_settings(OCR_OUTPUT_TYPE="notapdf")
|
||||
def test_invalid_output_type(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- Default settings
|
||||
- OCR output type is invalid
|
||||
WHEN:
|
||||
- Settings are validated
|
||||
THEN:
|
||||
- system check error reported for OCR output type
|
||||
"""
|
||||
msgs = settings_values_check(None)
|
||||
self.assertEqual(len(msgs), 1)
|
||||
|
||||
msg = msgs[0]
|
||||
|
||||
self.assertIn('OCR output type "notapdf"', msg.msg)
|
||||
|
||||
@override_settings(OCR_MODE="makeitso")
|
||||
def test_invalid_ocr_type(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- Default settings
|
||||
- OCR type is invalid
|
||||
WHEN:
|
||||
- Settings are validated
|
||||
THEN:
|
||||
- system check error reported for OCR type
|
||||
"""
|
||||
msgs = settings_values_check(None)
|
||||
self.assertEqual(len(msgs), 1)
|
||||
|
||||
msg = msgs[0]
|
||||
|
||||
self.assertIn('OCR output mode "makeitso"', msg.msg)
|
||||
|
||||
@override_settings(OCR_CLEAN="cleanme")
|
||||
def test_invalid_ocr_clean(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- Default settings
|
||||
- OCR cleaning type is invalid
|
||||
WHEN:
|
||||
- Settings are validated
|
||||
THEN:
|
||||
- system check error reported for OCR cleaning type
|
||||
"""
|
||||
msgs = settings_values_check(None)
|
||||
self.assertEqual(len(msgs), 1)
|
||||
|
||||
msg = msgs[0]
|
||||
|
||||
self.assertIn('OCR clean mode "cleanme"', msg.msg)
|
||||
|
||||
@override_settings(TIME_ZONE="TheMoon\\MyCrater")
|
||||
def test_invalid_timezone(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- Default settings
|
||||
- Timezone is invalid
|
||||
WHEN:
|
||||
- Settings are validated
|
||||
THEN:
|
||||
- system check error reported for timezone
|
||||
"""
|
||||
msgs = settings_values_check(None)
|
||||
self.assertEqual(len(msgs), 1)
|
||||
|
||||
msg = msgs[0]
|
||||
|
||||
self.assertIn('Timezone "TheMoon\\MyCrater"', msg.msg)
|
||||
|
@@ -1,7 +1,9 @@
|
||||
import datetime
|
||||
from unittest import mock
|
||||
from unittest import TestCase
|
||||
|
||||
from paperless.settings import _parse_ignore_dates
|
||||
from paperless.settings import default_threads_per_worker
|
||||
|
||||
|
||||
class TestIgnoreDateParsing(TestCase):
|
||||
@@ -56,3 +58,27 @@ class TestIgnoreDateParsing(TestCase):
|
||||
]
|
||||
|
||||
self._parse_checker(test_cases)
|
||||
|
||||
def test_workers_threads(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- Certain CPU counts
|
||||
WHEN:
|
||||
- Threads per worker is calculated
|
||||
THEN:
|
||||
- Threads per worker less than or equal to CPU count
|
||||
- At least 1 thread per worker
|
||||
"""
|
||||
default_workers = 1
|
||||
|
||||
for i in range(1, 64):
|
||||
with mock.patch(
|
||||
"paperless.settings.multiprocessing.cpu_count",
|
||||
) as cpu_count:
|
||||
cpu_count.return_value = i
|
||||
|
||||
default_threads = default_threads_per_worker(default_workers)
|
||||
|
||||
self.assertGreaterEqual(default_threads, 1)
|
||||
|
||||
self.assertLessEqual(default_workers * default_threads, i)
|
||||
|
@@ -1,7 +1,7 @@
|
||||
from typing import Final
|
||||
from typing import Tuple
|
||||
|
||||
__version__: Final[Tuple[int, int, int]] = (1, 7, 1)
|
||||
__version__: Final[Tuple[int, int, int]] = (1, 9, 2)
|
||||
# Version string like X.Y.Z
|
||||
__full_version_str__: Final[str] = ".".join(map(str, __version__))
|
||||
# Version string like X.Y
|
||||
|
@@ -1,24 +1,26 @@
|
||||
import os
|
||||
import re
|
||||
import tempfile
|
||||
from datetime import date
|
||||
from datetime import timedelta
|
||||
from fnmatch import fnmatch
|
||||
from imaplib import IMAP4
|
||||
from typing import Dict
|
||||
|
||||
import magic
|
||||
import pathvalidate
|
||||
from django.conf import settings
|
||||
from django.db import DatabaseError
|
||||
from django_q.tasks import async_task
|
||||
from documents.loggers import LoggingMixin
|
||||
from documents.models import Correspondent
|
||||
from documents.parsers import is_mime_type_supported
|
||||
from documents.tasks import consume_file
|
||||
from imap_tools import AND
|
||||
from imap_tools import MailBox
|
||||
from imap_tools import MailboxFolderSelectError
|
||||
from imap_tools import MailBoxUnencrypted
|
||||
from imap_tools import MailMessage
|
||||
from imap_tools import MailMessageFlags
|
||||
from imap_tools import NOT
|
||||
from imap_tools.mailbox import MailBoxTls
|
||||
from paperless_mail.models import MailAccount
|
||||
from paperless_mail.models import MailRule
|
||||
@@ -29,7 +31,7 @@ class MailError(Exception):
|
||||
|
||||
|
||||
class BaseMailAction:
|
||||
def get_criteria(self):
|
||||
def get_criteria(self) -> Dict:
|
||||
return {}
|
||||
|
||||
def post_consume(self, M, message_uids, parameter):
|
||||
@@ -67,13 +69,17 @@ class TagMailAction(BaseMailAction):
|
||||
self.keyword = parameter
|
||||
|
||||
def get_criteria(self):
|
||||
return {"no_keyword": self.keyword}
|
||||
return {"no_keyword": self.keyword, "gmail_label": self.keyword}
|
||||
|
||||
def post_consume(self, M: MailBox, message_uids, parameter):
|
||||
M.flag(message_uids, [self.keyword], True)
|
||||
if re.search(r"gmail\.com$|googlemail\.com$", M._host):
|
||||
for uid in message_uids:
|
||||
M.client.uid("STORE", uid, "X-GM-LABELS", self.keyword)
|
||||
else:
|
||||
M.flag(message_uids, [self.keyword], True)
|
||||
|
||||
|
||||
def get_rule_action(rule):
|
||||
def get_rule_action(rule) -> BaseMailAction:
|
||||
if rule.action == MailRule.MailAction.FLAG:
|
||||
return FlagMailAction()
|
||||
elif rule.action == MailRule.MailAction.DELETE:
|
||||
@@ -103,7 +109,7 @@ def make_criterias(rule):
|
||||
return {**criterias, **get_rule_action(rule).get_criteria()}
|
||||
|
||||
|
||||
def get_mailbox(server, port, security):
|
||||
def get_mailbox(server, port, security) -> MailBox:
|
||||
if security == MailAccount.ImapSecurity.NONE:
|
||||
mailbox = MailBoxUnencrypted(server, port)
|
||||
elif security == MailAccount.ImapSecurity.STARTTLS:
|
||||
@@ -162,7 +168,7 @@ class MailAccountHandler(LoggingMixin):
|
||||
"Unknown correspondent selector",
|
||||
) # pragma: nocover
|
||||
|
||||
def handle_mail_account(self, account):
|
||||
def handle_mail_account(self, account: MailAccount):
|
||||
|
||||
self.renew_logging_group()
|
||||
|
||||
@@ -176,33 +182,29 @@ class MailAccountHandler(LoggingMixin):
|
||||
account.imap_security,
|
||||
) as M:
|
||||
|
||||
supports_gmail_labels = "X-GM-EXT-1" in M.client.capabilities
|
||||
supports_auth_plain = "AUTH=PLAIN" in M.client.capabilities
|
||||
|
||||
self.log("debug", f"GMAIL Label Support: {supports_gmail_labels}")
|
||||
self.log("debug", f"AUTH=PLAIN Support: {supports_auth_plain}")
|
||||
|
||||
try:
|
||||
|
||||
M.login(account.username, account.password)
|
||||
|
||||
except UnicodeEncodeError:
|
||||
self.log("debug", "Falling back to AUTH=PLAIN")
|
||||
try:
|
||||
# rfc2595 section 6 - PLAIN SASL mechanism
|
||||
client: IMAP4 = M.client
|
||||
encoded = (
|
||||
b"\0"
|
||||
+ account.username.encode("utf8")
|
||||
+ b"\0"
|
||||
+ account.password.encode("utf8")
|
||||
)
|
||||
# Assumption is the server supports AUTH=PLAIN capability
|
||||
# Could check the list with client.capability(), but then what?
|
||||
# We're failing anyway then
|
||||
client.authenticate("PLAIN", lambda x: encoded)
|
||||
|
||||
# Need to transition out of AUTH state to SELECTED
|
||||
M.folder.set("INBOX")
|
||||
except Exception:
|
||||
try:
|
||||
M.login_utf8(account.username, account.password)
|
||||
except Exception as err:
|
||||
self.log(
|
||||
"error",
|
||||
"Unable to authenticate with mail server using AUTH=PLAIN",
|
||||
)
|
||||
raise MailError(f"Error while authenticating account {account}")
|
||||
raise MailError(
|
||||
f"Error while authenticating account {account}",
|
||||
) from err
|
||||
except Exception as e:
|
||||
self.log(
|
||||
"error",
|
||||
@@ -221,7 +223,11 @@ class MailAccountHandler(LoggingMixin):
|
||||
|
||||
for rule in account.rules.order_by("order"):
|
||||
try:
|
||||
total_processed_files += self.handle_mail_rule(M, rule)
|
||||
total_processed_files += self.handle_mail_rule(
|
||||
M,
|
||||
rule,
|
||||
supports_gmail_labels,
|
||||
)
|
||||
except Exception as e:
|
||||
self.log(
|
||||
"error",
|
||||
@@ -239,13 +245,18 @@ class MailAccountHandler(LoggingMixin):
|
||||
|
||||
return total_processed_files
|
||||
|
||||
def handle_mail_rule(self, M: MailBox, rule: MailRule):
|
||||
def handle_mail_rule(
|
||||
self,
|
||||
M: MailBox,
|
||||
rule: MailRule,
|
||||
supports_gmail_labels: bool = False,
|
||||
):
|
||||
|
||||
self.log("debug", f"Rule {rule}: Selecting folder {rule.folder}")
|
||||
|
||||
try:
|
||||
M.folder.set(rule.folder)
|
||||
except MailboxFolderSelectError:
|
||||
except MailboxFolderSelectError as err:
|
||||
|
||||
self.log(
|
||||
"error",
|
||||
@@ -264,23 +275,38 @@ class MailAccountHandler(LoggingMixin):
|
||||
raise MailError(
|
||||
f"Rule {rule}: Folder {rule.folder} "
|
||||
f"does not exist in account {rule.account}",
|
||||
)
|
||||
) from err
|
||||
|
||||
criterias = make_criterias(rule)
|
||||
|
||||
# Deal with the Gmail label extension
|
||||
if "gmail_label" in criterias:
|
||||
|
||||
gmail_label = criterias["gmail_label"]
|
||||
del criterias["gmail_label"]
|
||||
|
||||
if not supports_gmail_labels:
|
||||
criterias_imap = AND(**criterias)
|
||||
else:
|
||||
criterias_imap = AND(NOT(gmail_label=gmail_label), **criterias)
|
||||
else:
|
||||
criterias_imap = AND(**criterias)
|
||||
|
||||
self.log(
|
||||
"debug",
|
||||
f"Rule {rule}: Searching folder with criteria " f"{str(AND(**criterias))}",
|
||||
f"Rule {rule}: Searching folder with criteria " f"{str(criterias_imap)}",
|
||||
)
|
||||
|
||||
try:
|
||||
messages = M.fetch(
|
||||
criteria=AND(**criterias),
|
||||
criteria=criterias_imap,
|
||||
mark_seen=False,
|
||||
charset=rule.account.character_set,
|
||||
)
|
||||
except Exception:
|
||||
raise MailError(f"Rule {rule}: Error while fetching folder {rule.folder}")
|
||||
except Exception as err:
|
||||
raise MailError(
|
||||
f"Rule {rule}: Error while fetching folder {rule.folder}",
|
||||
) from err
|
||||
|
||||
post_consume_messages = []
|
||||
|
||||
@@ -320,7 +346,7 @@ class MailAccountHandler(LoggingMixin):
|
||||
except Exception as e:
|
||||
raise MailError(
|
||||
f"Rule {rule}: Error while processing post-consume actions: " f"{e}",
|
||||
)
|
||||
) from e
|
||||
|
||||
return total_processed_files
|
||||
|
||||
@@ -382,8 +408,7 @@ class MailAccountHandler(LoggingMixin):
|
||||
f"{message.subject} from {message.from_}",
|
||||
)
|
||||
|
||||
async_task(
|
||||
"documents.tasks.consume_file",
|
||||
consume_file.delay(
|
||||
path=temp_filename,
|
||||
override_filename=pathvalidate.sanitize_filename(
|
||||
message.subject + ".eml",
|
||||
@@ -447,8 +472,7 @@ class MailAccountHandler(LoggingMixin):
|
||||
f"{message.subject} from {message.from_}",
|
||||
)
|
||||
|
||||
async_task(
|
||||
"documents.tasks.consume_file",
|
||||
consume_file.delay(
|
||||
path=temp_filename,
|
||||
override_filename=pathvalidate.sanitize_filename(
|
||||
att.filename,
|
||||
|
@@ -2,28 +2,12 @@
|
||||
|
||||
from django.db import migrations
|
||||
from django.db.migrations import RunPython
|
||||
from django_q.models import Schedule
|
||||
from django_q.tasks import schedule
|
||||
|
||||
|
||||
def add_schedules(apps, schema_editor):
|
||||
schedule(
|
||||
"paperless_mail.tasks.process_mail_accounts",
|
||||
name="Check all e-mail accounts",
|
||||
schedule_type=Schedule.MINUTES,
|
||||
minutes=10,
|
||||
)
|
||||
|
||||
|
||||
def remove_schedules(apps, schema_editor):
|
||||
Schedule.objects.filter(func="paperless_mail.tasks.process_mail_accounts").delete()
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
("paperless_mail", "0001_initial"),
|
||||
("django_q", "0013_task_attempt_count"),
|
||||
]
|
||||
|
||||
operations = [RunPython(add_schedules, remove_schedules)]
|
||||
operations = [RunPython(migrations.RunPython.noop, migrations.RunPython.noop)]
|
||||
|
@@ -1,13 +1,14 @@
|
||||
import logging
|
||||
|
||||
from celery import shared_task
|
||||
from paperless_mail.mail import MailAccountHandler
|
||||
from paperless_mail.mail import MailError
|
||||
from paperless_mail.models import MailAccount
|
||||
|
||||
|
||||
logger = logging.getLogger("paperless.mail.tasks")
|
||||
|
||||
|
||||
@shared_task
|
||||
def process_mail_accounts():
|
||||
total_new_documents = 0
|
||||
for account in MailAccount.objects.all():
|
||||
@@ -20,11 +21,3 @@ def process_mail_accounts():
|
||||
return f"Added {total_new_documents} document(s)."
|
||||
else:
|
||||
return "No new documents were added."
|
||||
|
||||
|
||||
def process_mail_account(name):
|
||||
try:
|
||||
account = MailAccount.objects.get(name=name)
|
||||
MailAccountHandler().handle_mail_account(account)
|
||||
except MailAccount.DoesNotExist:
|
||||
logger.error(f"Unknown mail acccount: {name}")
|
||||
|
70
src/paperless_mail/tests/test_live_mail.py
Normal file
70
src/paperless_mail/tests/test_live_mail.py
Normal file
@@ -0,0 +1,70 @@
|
||||
import os
|
||||
|
||||
import pytest
|
||||
from django.test import TestCase
|
||||
from paperless_mail.mail import MailAccountHandler
|
||||
from paperless_mail.mail import MailError
|
||||
from paperless_mail.models import MailAccount
|
||||
from paperless_mail.models import MailRule
|
||||
|
||||
# Only run if the environment is setup
|
||||
# And the environment is not empty (forks, I think)
|
||||
@pytest.mark.skipif(
|
||||
"PAPERLESS_MAIL_TEST_HOST" not in os.environ
|
||||
or not len(os.environ["PAPERLESS_MAIL_TEST_HOST"]),
|
||||
reason="Live server testing not enabled",
|
||||
)
|
||||
class TestMailLiveServer(TestCase):
|
||||
def setUp(self) -> None:
|
||||
|
||||
self.mail_account_handler = MailAccountHandler()
|
||||
self.account = MailAccount.objects.create(
|
||||
name="test",
|
||||
imap_server=os.environ["PAPERLESS_MAIL_TEST_HOST"],
|
||||
username=os.environ["PAPERLESS_MAIL_TEST_USER"],
|
||||
password=os.environ["PAPERLESS_MAIL_TEST_PASSWD"],
|
||||
imap_port=993,
|
||||
)
|
||||
|
||||
return super().setUp()
|
||||
|
||||
def tearDown(self) -> None:
|
||||
self.account.delete()
|
||||
return super().tearDown()
|
||||
|
||||
def test_process_non_gmail_server_flag(self):
|
||||
|
||||
try:
|
||||
rule1 = MailRule.objects.create(
|
||||
name="testrule",
|
||||
account=self.account,
|
||||
action=MailRule.MailAction.FLAG,
|
||||
)
|
||||
|
||||
self.mail_account_handler.handle_mail_account(self.account)
|
||||
|
||||
rule1.delete()
|
||||
|
||||
except MailError as e:
|
||||
self.fail(f"Failure: {e}")
|
||||
except Exception as e:
|
||||
pass
|
||||
|
||||
def test_process_non_gmail_server_tag(self):
|
||||
|
||||
try:
|
||||
|
||||
rule2 = MailRule.objects.create(
|
||||
name="testrule",
|
||||
account=self.account,
|
||||
action=MailRule.MailAction.TAG,
|
||||
)
|
||||
|
||||
self.mail_account_handler.handle_mail_account(self.account)
|
||||
|
||||
rule2.delete()
|
||||
|
||||
except MailError as e:
|
||||
self.fail(f"Failure: {e}")
|
||||
except Exception as e:
|
||||
pass
|
@@ -20,6 +20,7 @@ from imap_tools import MailboxFolderSelectError
|
||||
from imap_tools import MailboxLoginError
|
||||
from imap_tools import MailMessage
|
||||
from imap_tools import MailMessageFlags
|
||||
from imap_tools import NOT
|
||||
from paperless_mail import tasks
|
||||
from paperless_mail.mail import MailAccountHandler
|
||||
from paperless_mail.mail import MailError
|
||||
@@ -46,31 +47,66 @@ class BogusFolderManager:
|
||||
|
||||
|
||||
class BogusClient:
|
||||
def authenticate(self, mechanism, authobject):
|
||||
# authobject must be a callable object
|
||||
auth_bytes = authobject(None)
|
||||
if auth_bytes != b"\x00admin\x00w57\xc3\xa4\xc3\xb6\xc3\xbcw4b6huwb6nhu":
|
||||
raise MailboxLoginError("BAD", "OK")
|
||||
def __init__(self, messages):
|
||||
self.messages: List[MailMessage] = messages
|
||||
self.capabilities: List[str] = []
|
||||
|
||||
|
||||
class BogusMailBox(ContextManager):
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||
pass
|
||||
|
||||
def authenticate(self, mechanism, authobject):
|
||||
# authobject must be a callable object
|
||||
auth_bytes = authobject(None)
|
||||
if auth_bytes != b"\x00admin\x00w57\xc3\xa4\xc3\xb6\xc3\xbcw4b6huwb6nhu":
|
||||
raise MailboxLoginError("BAD", "OK")
|
||||
|
||||
def uid(self, command, *args):
|
||||
if command == "STORE":
|
||||
for message in self.messages:
|
||||
if message.uid == args[0]:
|
||||
flag = args[2]
|
||||
if flag == "processed":
|
||||
message._raw_flag_data.append(f"+FLAGS (processed)".encode())
|
||||
MailMessage.flags.fget.cache_clear()
|
||||
|
||||
|
||||
class BogusMailBox(ContextManager):
|
||||
|
||||
# Common values so tests don't need to remember an accepted login
|
||||
USERNAME: str = "admin"
|
||||
ASCII_PASSWORD: str = "secret"
|
||||
# Note the non-ascii characters here
|
||||
UTF_PASSWORD: str = "w57äöüw4b6huwb6nhu"
|
||||
|
||||
def __init__(self):
|
||||
self.messages: List[MailMessage] = []
|
||||
self.messages_spam: List[MailMessage] = []
|
||||
self.folder = BogusFolderManager()
|
||||
self.client = BogusClient()
|
||||
self.client = BogusClient(self.messages)
|
||||
self._host = ""
|
||||
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||
pass
|
||||
|
||||
def updateClient(self):
|
||||
self.client = BogusClient(self.messages)
|
||||
|
||||
def login(self, username, password):
|
||||
# This will raise a UnicodeEncodeError if the password is not ASCII only
|
||||
password.encode("ascii")
|
||||
# Otherwise, check for correct values
|
||||
if username != "admin" or password not in {"secret"}:
|
||||
if username != self.USERNAME or password != self.ASCII_PASSWORD:
|
||||
raise MailboxLoginError("BAD", "OK")
|
||||
|
||||
def login_utf8(self, username, password):
|
||||
# Expected to only be called with the UTF-8 password
|
||||
if username != self.USERNAME or password != self.UTF_PASSWORD:
|
||||
raise MailboxLoginError("BAD", "OK")
|
||||
|
||||
def fetch(self, criteria, mark_seen, charset=""):
|
||||
@@ -100,6 +136,9 @@ class BogusMailBox(ContextManager):
|
||||
tag = criteria[criteria.index("UNKEYWORD") + 1].strip("'")
|
||||
msg = filter(lambda m: "processed" not in m.flags, msg)
|
||||
|
||||
if "(X-GM-LABELS" in criteria: # ['NOT', '(X-GM-LABELS', '"processed"']
|
||||
msg = filter(lambda m: "processed" not in m.flags, msg)
|
||||
|
||||
return list(msg)
|
||||
|
||||
def delete(self, uid_list):
|
||||
@@ -209,7 +248,7 @@ class TestMail(DirectoriesMixin, TestCase):
|
||||
m.return_value = self.bogus_mailbox
|
||||
self.addCleanup(patcher.stop)
|
||||
|
||||
patcher = mock.patch("paperless_mail.mail.async_task")
|
||||
patcher = mock.patch("paperless_mail.mail.consume_file.delay")
|
||||
self.async_task = patcher.start()
|
||||
self.addCleanup(patcher.stop)
|
||||
|
||||
@@ -247,6 +286,7 @@ class TestMail(DirectoriesMixin, TestCase):
|
||||
seen=False,
|
||||
),
|
||||
)
|
||||
self.bogus_mailbox.updateClient()
|
||||
|
||||
def test_get_correspondent(self):
|
||||
message = namedtuple("MailMessage", [])
|
||||
@@ -607,6 +647,33 @@ class TestMail(DirectoriesMixin, TestCase):
|
||||
self.assertEqual(len(self.bogus_mailbox.fetch("UNKEYWORD processed", False)), 0)
|
||||
self.assertEqual(len(self.bogus_mailbox.messages), 3)
|
||||
|
||||
def test_handle_mail_account_tag_gmail(self):
|
||||
self.bogus_mailbox._host = "imap.gmail.com"
|
||||
self.bogus_mailbox.client.capabilities = ["X-GM-EXT-1"]
|
||||
|
||||
account = MailAccount.objects.create(
|
||||
name="test",
|
||||
imap_server="",
|
||||
username="admin",
|
||||
password="secret",
|
||||
)
|
||||
|
||||
_ = MailRule.objects.create(
|
||||
name="testrule",
|
||||
account=account,
|
||||
action=MailRule.MailAction.TAG,
|
||||
action_parameter="processed",
|
||||
)
|
||||
|
||||
self.assertEqual(len(self.bogus_mailbox.messages), 3)
|
||||
self.assertEqual(self.async_task.call_count, 0)
|
||||
criteria = NOT(gmail_label="processed")
|
||||
self.assertEqual(len(self.bogus_mailbox.fetch(criteria, False)), 2)
|
||||
self.mail_account_handler.handle_mail_account(account)
|
||||
self.assertEqual(self.async_task.call_count, 2)
|
||||
self.assertEqual(len(self.bogus_mailbox.fetch(criteria, False)), 0)
|
||||
self.assertEqual(len(self.bogus_mailbox.messages), 3)
|
||||
|
||||
def test_error_login(self):
|
||||
account = MailAccount.objects.create(
|
||||
name="test",
|
||||
@@ -878,9 +945,9 @@ class TestMail(DirectoriesMixin, TestCase):
|
||||
account = MailAccount.objects.create(
|
||||
name="test",
|
||||
imap_server="",
|
||||
username="admin",
|
||||
username=BogusMailBox.USERNAME,
|
||||
# Note the non-ascii characters here
|
||||
password="w57äöüw4b6huwb6nhu",
|
||||
password=BogusMailBox.UTF_PASSWORD,
|
||||
)
|
||||
|
||||
_ = MailRule.objects.create(
|
||||
@@ -910,7 +977,7 @@ class TestMail(DirectoriesMixin, TestCase):
|
||||
account = MailAccount.objects.create(
|
||||
name="test",
|
||||
imap_server="",
|
||||
username="admin",
|
||||
username=BogusMailBox.USERNAME,
|
||||
# Note the non-ascii characters here
|
||||
# Passes the check in login, not in authenticate
|
||||
password="réception",
|
||||
@@ -965,20 +1032,3 @@ class TestTasks(TestCase):
|
||||
m.side_effect = lambda account: 0
|
||||
result = tasks.process_mail_accounts()
|
||||
self.assertIn("No new", result)
|
||||
|
||||
@mock.patch("paperless_mail.tasks.MailAccountHandler.handle_mail_account")
|
||||
def test_single_accounts(self, m):
|
||||
MailAccount.objects.create(
|
||||
name="A",
|
||||
imap_server="A",
|
||||
username="A",
|
||||
password="A",
|
||||
)
|
||||
|
||||
tasks.process_mail_account("A")
|
||||
|
||||
m.assert_called_once()
|
||||
m.reset_mock()
|
||||
|
||||
tasks.process_mail_account("B")
|
||||
m.assert_not_called()
|
||||
|
@@ -249,16 +249,22 @@ class RasterisedDocumentParser(DocumentParser):
|
||||
|
||||
if mime_type == "application/pdf":
|
||||
text_original = self.extract_text(None, document_path)
|
||||
original_has_text = text_original and len(text_original) > 50
|
||||
original_has_text = text_original is not None and len(text_original) > 50
|
||||
else:
|
||||
text_original = None
|
||||
original_has_text = False
|
||||
|
||||
# If the original has text, and the user doesn't want an archive,
|
||||
# we're done here
|
||||
if settings.OCR_MODE == "skip_noarchive" and original_has_text:
|
||||
self.log("debug", "Document has text, skipping OCRmyPDF entirely.")
|
||||
self.text = text_original
|
||||
return
|
||||
|
||||
# Either no text was in the original or there should be an archive
|
||||
# file created, so OCR the file and create an archive with any
|
||||
# test located via OCR
|
||||
|
||||
import ocrmypdf
|
||||
from ocrmypdf import InputFileError, EncryptedPdfError
|
||||
|
||||
@@ -277,6 +283,7 @@ class RasterisedDocumentParser(DocumentParser):
|
||||
ocrmypdf.ocr(**args)
|
||||
|
||||
self.archive_path = archive_path
|
||||
|
||||
self.text = self.extract_text(sidecar_file, archive_path)
|
||||
|
||||
if not self.text:
|
||||
@@ -323,11 +330,11 @@ class RasterisedDocumentParser(DocumentParser):
|
||||
|
||||
except Exception as e:
|
||||
# If this fails, we have a serious issue at hand.
|
||||
raise ParseError(f"{e.__class__.__name__}: {str(e)}")
|
||||
raise ParseError(f"{e.__class__.__name__}: {str(e)}") from e
|
||||
|
||||
except Exception as e:
|
||||
# Anything else is probably serious.
|
||||
raise ParseError(f"{e.__class__.__name__}: {str(e)}")
|
||||
raise ParseError(f"{e.__class__.__name__}: {str(e)}") from e
|
||||
|
||||
# As a last resort, if we still don't have any text for any reason,
|
||||
# try to extract the text from the original document.
|
||||
|
@@ -341,6 +341,17 @@ class TestParser(DirectoriesMixin, TestCase):
|
||||
|
||||
@override_settings(OCR_PAGES=2, OCR_MODE="redo")
|
||||
def test_multi_page_analog_pages_redo(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- File with text contained in images but no text layer
|
||||
- OCR of only pages 1 and 2 requested
|
||||
- OCR mode set to redo
|
||||
WHEN:
|
||||
- Document is parsed
|
||||
THEN:
|
||||
- Text of page 1 and 2 extracted
|
||||
- An archive file is created
|
||||
"""
|
||||
parser = RasterisedDocumentParser(None)
|
||||
parser.parse(
|
||||
os.path.join(self.SAMPLE_FILES, "multi-page-images.pdf"),
|
||||
@@ -352,6 +363,17 @@ class TestParser(DirectoriesMixin, TestCase):
|
||||
|
||||
@override_settings(OCR_PAGES=1, OCR_MODE="force")
|
||||
def test_multi_page_analog_pages_force(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- File with text contained in images but no text layer
|
||||
- OCR of only page 1 requested
|
||||
- OCR mode set to force
|
||||
WHEN:
|
||||
- Document is parsed
|
||||
THEN:
|
||||
- Only text of page 1 is extracted
|
||||
- An archive file is created
|
||||
"""
|
||||
parser = RasterisedDocumentParser(None)
|
||||
parser.parse(
|
||||
os.path.join(self.SAMPLE_FILES, "multi-page-images.pdf"),
|
||||
@@ -364,6 +386,16 @@ class TestParser(DirectoriesMixin, TestCase):
|
||||
|
||||
@override_settings(OCR_MODE="skip_noarchive")
|
||||
def test_skip_noarchive_withtext(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- File with existing text layer
|
||||
- OCR mode set to skip_noarchive
|
||||
WHEN:
|
||||
- Document is parsed
|
||||
THEN:
|
||||
- Text from images is extracted
|
||||
- No archive file is created
|
||||
"""
|
||||
parser = RasterisedDocumentParser(None)
|
||||
parser.parse(
|
||||
os.path.join(self.SAMPLE_FILES, "multi-page-digital.pdf"),
|
||||
@@ -377,24 +409,47 @@ class TestParser(DirectoriesMixin, TestCase):
|
||||
|
||||
@override_settings(OCR_MODE="skip_noarchive")
|
||||
def test_skip_noarchive_notext(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- File with text contained in images but no text layer
|
||||
- OCR mode set to skip_noarchive
|
||||
WHEN:
|
||||
- Document is parsed
|
||||
THEN:
|
||||
- Text from images is extracted
|
||||
- An archive file is created with the OCRd text
|
||||
"""
|
||||
parser = RasterisedDocumentParser(None)
|
||||
parser.parse(
|
||||
os.path.join(self.SAMPLE_FILES, "multi-page-images.pdf"),
|
||||
"application/pdf",
|
||||
)
|
||||
self.assertTrue(os.path.isfile(parser.archive_path))
|
||||
|
||||
self.assertContainsStrings(
|
||||
parser.get_text().lower(),
|
||||
["page 1", "page 2", "page 3"],
|
||||
)
|
||||
|
||||
self.assertIsNotNone(parser.archive_path)
|
||||
|
||||
@override_settings(OCR_MODE="skip")
|
||||
def test_multi_page_mixed(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- File with some text contained in images and some in text layer
|
||||
- OCR mode set to skip
|
||||
WHEN:
|
||||
- Document is parsed
|
||||
THEN:
|
||||
- Text from images is extracted
|
||||
- An archive file is created with the OCRd text and the original text
|
||||
"""
|
||||
parser = RasterisedDocumentParser(None)
|
||||
parser.parse(
|
||||
os.path.join(self.SAMPLE_FILES, "multi-page-mixed.pdf"),
|
||||
"application/pdf",
|
||||
)
|
||||
self.assertIsNotNone(parser.archive_path)
|
||||
self.assertTrue(os.path.isfile(parser.archive_path))
|
||||
self.assertContainsStrings(
|
||||
parser.get_text().lower(),
|
||||
@@ -408,6 +463,16 @@ class TestParser(DirectoriesMixin, TestCase):
|
||||
|
||||
@override_settings(OCR_MODE="skip_noarchive")
|
||||
def test_multi_page_mixed_no_archive(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- File with some text contained in images and some in text layer
|
||||
- OCR mode set to skip_noarchive
|
||||
WHEN:
|
||||
- Document is parsed
|
||||
THEN:
|
||||
- Text from images is extracted
|
||||
- No archive file is created as original file contains text
|
||||
"""
|
||||
parser = RasterisedDocumentParser(None)
|
||||
parser.parse(
|
||||
os.path.join(self.SAMPLE_FILES, "multi-page-mixed.pdf"),
|
||||
|
@@ -11,5 +11,6 @@ def text_consumer_declaration(sender, **kwargs):
|
||||
"mime_types": {
|
||||
"text/plain": ".txt",
|
||||
"text/csv": ".csv",
|
||||
"application/csv": ".csv",
|
||||
},
|
||||
}
|
||||
|
@@ -9,6 +9,6 @@ class PaperlessTikaConfig(AppConfig):
|
||||
def ready(self):
|
||||
from documents.signals import document_consumer_declaration
|
||||
|
||||
if settings.PAPERLESS_TIKA_ENABLED:
|
||||
if settings.TIKA_ENABLED:
|
||||
document_consumer_declaration.connect(tika_consumer_declaration)
|
||||
AppConfig.ready(self)
|
||||
|
@@ -1,4 +1,5 @@
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import dateutil.parser
|
||||
import requests
|
||||
@@ -27,7 +28,12 @@ class TikaDocumentParser(DocumentParser):
|
||||
)
|
||||
|
||||
def extract_metadata(self, document_path, mime_type):
|
||||
tika_server = settings.PAPERLESS_TIKA_ENDPOINT
|
||||
tika_server = settings.TIKA_ENDPOINT
|
||||
|
||||
# tika does not support a PathLike, only strings
|
||||
# ensure this is a string
|
||||
document_path = str(document_path)
|
||||
|
||||
try:
|
||||
parsed = parser.from_file(document_path, tika_server)
|
||||
except Exception as e:
|
||||
@@ -47,9 +53,13 @@ class TikaDocumentParser(DocumentParser):
|
||||
for key in parsed["metadata"]
|
||||
]
|
||||
|
||||
def parse(self, document_path, mime_type, file_name=None):
|
||||
def parse(self, document_path: Path, mime_type, file_name=None):
|
||||
self.log("info", f"Sending {document_path} to Tika server")
|
||||
tika_server = settings.PAPERLESS_TIKA_ENDPOINT
|
||||
tika_server = settings.TIKA_ENDPOINT
|
||||
|
||||
# tika does not support a PathLike, only strings
|
||||
# ensure this is a string
|
||||
document_path = str(document_path)
|
||||
|
||||
try:
|
||||
parsed = parser.from_file(document_path, tika_server)
|
||||
@@ -57,7 +67,7 @@ class TikaDocumentParser(DocumentParser):
|
||||
raise ParseError(
|
||||
f"Could not parse {document_path} with tika server at "
|
||||
f"{tika_server}: {err}",
|
||||
)
|
||||
) from err
|
||||
|
||||
self.text = parsed["content"].strip()
|
||||
|
||||
@@ -73,7 +83,7 @@ class TikaDocumentParser(DocumentParser):
|
||||
|
||||
def convert_to_pdf(self, document_path, file_name):
|
||||
pdf_path = os.path.join(self.tempdir, "convert.pdf")
|
||||
gotenberg_server = settings.PAPERLESS_TIKA_GOTENBERG_ENDPOINT
|
||||
gotenberg_server = settings.TIKA_GOTENBERG_ENDPOINT
|
||||
url = gotenberg_server + "/forms/libreoffice/convert"
|
||||
|
||||
self.log("info", f"Converting {document_path} to PDF as {pdf_path}")
|
||||
@@ -90,7 +100,9 @@ class TikaDocumentParser(DocumentParser):
|
||||
response = requests.post(url, files=files, headers=headers)
|
||||
response.raise_for_status() # ensure we notice bad responses
|
||||
except Exception as err:
|
||||
raise ParseError(f"Error while converting document to PDF: {err}")
|
||||
raise ParseError(
|
||||
f"Error while converting document to PDF: {err}",
|
||||
) from err
|
||||
|
||||
with open(pdf_path, "wb") as file:
|
||||
file.write(response.content)
|
||||
|
BIN
src/paperless_tika/tests/samples/sample.docx
Normal file
BIN
src/paperless_tika/tests/samples/sample.docx
Normal file
Binary file not shown.
BIN
src/paperless_tika/tests/samples/sample.odt
Normal file
BIN
src/paperless_tika/tests/samples/sample.odt
Normal file
Binary file not shown.
78
src/paperless_tika/tests/test_live_tika.py
Normal file
78
src/paperless_tika/tests/test_live_tika.py
Normal file
@@ -0,0 +1,78 @@
|
||||
import datetime
|
||||
import os
|
||||
from pathlib import Path
|
||||
from typing import Final
|
||||
|
||||
import pytest
|
||||
from django.test import TestCase
|
||||
from paperless_tika.parsers import TikaDocumentParser
|
||||
|
||||
|
||||
@pytest.mark.skipif("TIKA_LIVE" not in os.environ, reason="No tika server")
|
||||
class TestTikaParserAgainstServer(TestCase):
|
||||
"""
|
||||
This test case tests the Tika parsing against a live tika server,
|
||||
if the environment contains the correct value indicating such a server
|
||||
is available.
|
||||
"""
|
||||
|
||||
SAMPLE_DIR: Final[Path] = (Path(__file__).parent / Path("samples")).resolve()
|
||||
|
||||
def setUp(self) -> None:
|
||||
self.parser = TikaDocumentParser(logging_group=None)
|
||||
|
||||
def tearDown(self) -> None:
|
||||
self.parser.cleanup()
|
||||
|
||||
def test_basic_parse_odt(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- An input ODT format document
|
||||
WHEN:
|
||||
- The document is parsed
|
||||
THEN:
|
||||
- Document content is correct
|
||||
- Document date is correct
|
||||
"""
|
||||
test_file = self.SAMPLE_DIR / Path("sample.odt")
|
||||
|
||||
self.parser.parse(test_file, "application/vnd.oasis.opendocument.text")
|
||||
|
||||
self.assertEqual(
|
||||
self.parser.text,
|
||||
"This is an ODT test document, created September 14, 2022",
|
||||
)
|
||||
self.assertIsNotNone(self.parser.archive_path)
|
||||
with open(self.parser.archive_path, "rb") as f:
|
||||
# PDFs begin with the bytes PDF-x.y
|
||||
self.assertTrue(b"PDF-" in f.read()[:10])
|
||||
|
||||
# TODO: Unsure what can set the Creation-Date field in a document, enable when possible
|
||||
# self.assertEqual(self.parser.date, datetime.datetime(2022, 9, 14))
|
||||
|
||||
def test_basic_parse_docx(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- An input DOCX format document
|
||||
WHEN:
|
||||
- The document is parsed
|
||||
THEN:
|
||||
- Document content is correct
|
||||
- Document date is correct
|
||||
"""
|
||||
test_file = self.SAMPLE_DIR / Path("sample.docx")
|
||||
|
||||
self.parser.parse(
|
||||
test_file,
|
||||
"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
||||
)
|
||||
|
||||
self.assertEqual(
|
||||
self.parser.text,
|
||||
"This is an DOCX test document, also made September 14, 2022",
|
||||
)
|
||||
self.assertIsNotNone(self.parser.archive_path)
|
||||
with open(self.parser.archive_path, "rb") as f:
|
||||
self.assertTrue(b"PDF-" in f.read()[:10])
|
||||
|
||||
# self.assertEqual(self.parser.date, datetime.datetime(2022, 9, 14))
|
@@ -1,5 +1,5 @@
|
||||
[flake8]
|
||||
extend-exclude = */migrations/*, paperless/settings.py, */tests/*
|
||||
extend-exclude = */migrations/*, */tests/*
|
||||
# E203 - https://www.flake8rules.com/rules/E203.html
|
||||
# W503 - https://www.flake8rules.com/rules/W503.html
|
||||
ignore = E203,W503
|
||||
|
Reference in New Issue
Block a user