mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-09-01 01:46:16 +00:00
Merge branch 'dev' into feature-ocrmypdf
This commit is contained in:
@@ -10,10 +10,11 @@ from django.db.models import Q
|
||||
from django.utils import timezone
|
||||
|
||||
from .classifier import DocumentClassifier, IncompatibleClassifierVersionError
|
||||
from .file_handling import generate_filename, create_source_path_directory
|
||||
from .file_handling import create_source_path_directory
|
||||
from .loggers import LoggingMixin
|
||||
from .models import Document, FileInfo, Correspondent, DocumentType, Tag
|
||||
from .parsers import ParseError, get_parser_class_for_mime_type, parse_date
|
||||
from .parsers import ParseError, get_parser_class_for_mime_type, \
|
||||
get_supported_file_extensions, parse_date
|
||||
from .signals import (
|
||||
document_consumption_finished,
|
||||
document_consumption_started
|
||||
@@ -40,6 +41,21 @@ class Consumer(LoggingMixin):
|
||||
raise ConsumerError("Cannot consume {}: It is not a file".format(
|
||||
self.path))
|
||||
|
||||
def pre_check_file_extension(self):
|
||||
extensions = get_supported_file_extensions()
|
||||
_, ext = os.path.splitext(self.filename)
|
||||
|
||||
if not ext:
|
||||
raise ConsumerError(
|
||||
f"Not consuming {self.filename}: File type unknown."
|
||||
)
|
||||
|
||||
if ext not in extensions:
|
||||
raise ConsumerError(
|
||||
f"Not consuming {self.filename}: File extension {ext} does "
|
||||
f"not map to any known file type ({str(extensions)})"
|
||||
)
|
||||
|
||||
def pre_check_duplicate(self):
|
||||
with open(self.path, "rb") as f:
|
||||
checksum = hashlib.md5(f.read()).hexdigest()
|
||||
@@ -82,6 +98,7 @@ class Consumer(LoggingMixin):
|
||||
# Make sure that preconditions for consuming the file are met.
|
||||
|
||||
self.pre_check_file_exists()
|
||||
self.pre_check_file_extension()
|
||||
self.pre_check_directories()
|
||||
self.pre_check_duplicate()
|
||||
|
||||
|
Reference in New Issue
Block a user