Merge branch 'dev' into feature-permissions

This commit is contained in:
shamoon 2023-02-14 11:32:37 -08:00
commit 32754defef
31 changed files with 327 additions and 90 deletions

View File

@ -161,7 +161,7 @@ jobs:
pipenv --python ${{ steps.setup-python.outputs.python-version }} run pytest -ra
-
name: Upload coverage to Codecov
if: matrix.python-version == ${{ env.DEFAULT_PYTHON_VERSION }}
if: ${{ matrix.python-version == env.DEFAULT_PYTHON_VERSION }}
uses: codecov/codecov-action@v3
with:
# not required for public repos, but intermittently fails otherwise

View File

@ -59,7 +59,7 @@ services:
- gotenberg
- tika
ports:
- 8000:8000
- "8000:8000"
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8000"]
interval: 30s

View File

@ -53,7 +53,7 @@ services:
- db
- broker
ports:
- 8000:8000
- "8000:8000"
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8000"]
interval: 30s

View File

@ -53,7 +53,7 @@ services:
- db
- broker
ports:
- 8010:8000
- "8010:8000"
healthcheck:
test: ["CMD", "curl", "-fs", "-S", "--max-time", "2", "http://localhost:8000"]
interval: 30s

View File

@ -57,7 +57,7 @@ services:
- gotenberg
- tika
ports:
- 8000:8000
- "8000:8000"
healthcheck:
test: ["CMD", "curl", "-fs", "-S", "--max-time", "2", "http://localhost:8000"]
interval: 30s

View File

@ -51,7 +51,7 @@ services:
- db
- broker
ports:
- 8000:8000
- "8000:8000"
healthcheck:
test: ["CMD", "curl", "-fs", "-S", "--max-time", "2", "http://localhost:8000"]
interval: 30s

View File

@ -46,7 +46,7 @@ services:
- gotenberg
- tika
ports:
- 8000:8000
- "8000:8000"
healthcheck:
test: ["CMD", "curl", "-fs", "-S", "--max-time", "2", "http://localhost:8000"]
interval: 30s

View File

@ -37,7 +37,7 @@ services:
depends_on:
- broker
ports:
- 8000:8000
- "8000:8000"
healthcheck:
test: ["CMD", "curl", "-fs", "-S", "--max-time", "2", "http://localhost:8000"]
interval: 30s

View File

@ -3,5 +3,10 @@
echo "Checking if we should start flower..."
if [[ -n "${PAPERLESS_ENABLE_FLOWER}" ]]; then
celery --app paperless flower
# Small delay to allow celery to be up first
echo "Starting flower in 5s"
sleep 5
celery --app paperless flower --conf=/usr/src/paperless/src/paperless/flowerconfig.py
else
echo "Not starting flower"
fi

View File

@ -346,7 +346,7 @@ read -r -a OCR_LANGUAGES_ARRAY <<< "${_split_langs}"
fi
} > docker-compose.env
sed -i "s/- 8000:8000/- $PORT:8000/g" docker-compose.yml
sed -i "s/- \"8000:8000\"/- \"$PORT:8000\"/g" docker-compose.yml
sed -i "s#- \./consume:/usr/src/paperless/consume#- $CONSUME_FOLDER:/usr/src/paperless/consume#g" docker-compose.yml

View File

@ -18,7 +18,7 @@
(blur)="onBlur()">
<ng-template ng-label-tmp let-item="item">
<span class="tag-wrap tag-wrap-delete" (click)="removeTag(item.id)">
<span class="tag-wrap tag-wrap-delete" (mousedown)="removeTag($event, item.id)">
<svg width="1.2em" height="1em" viewBox="0 0 16 16" fill="currentColor" xmlns="http://www.w3.org/2000/svg">
<use xlink:href="assets/bootstrap-icons.svg#x"/>
</svg>

View File

@ -65,7 +65,7 @@ export class TagsComponent implements OnInit, ControlValueAccessor {
private _lastSearchTerm: string
getTag(id) {
getTag(id: number) {
if (this.tags) {
return this.tags.find((tag) => tag.id == id)
} else {
@ -73,8 +73,12 @@ export class TagsComponent implements OnInit, ControlValueAccessor {
}
}
removeTag(id) {
removeTag(event: PointerEvent, id: number) {
if (this.disabled) return
// prevent opening dropdown
event.stopImmediatePropagation()
let index = this.value.indexOf(id)
if (index > -1) {
let oldValue = this.value

View File

@ -63,7 +63,7 @@
<div class="row">
<div class="col mb-4">
<div class="col-md-6 col-xl-4 mb-4">
<form [formGroup]='documentForm' (ngSubmit)="save()">

View File

@ -22,6 +22,15 @@
--page-margin: 1px 0 20px;
}
::ng-deep .ng-select-taggable {
max-width: calc(100% - 46px); // fudge factor for ng-select button width
}
.btn-group .dropdown-toggle-split {
border-top-right-radius: inherit;
border-bottom-right-radius: inherit;
}
.password-prompt {
position: absolute;
top: 30%;

View File

@ -10,7 +10,7 @@
</div>
</div>
<ngb-pagination class="col-auto" [pageSize]="25" [collectionSize]="collectionSize" [(page)]="page" (pageChange)="reloadData()" aria-label="Default pagination"></ngb-pagination>
<ngb-pagination class="col-auto" [pageSize]="25" [collectionSize]="collectionSize" [(page)]="page" [maxSize]="5" (pageChange)="reloadData()" aria-label="Default pagination"></ngb-pagination>
</div>
<table class="table table-striped align-middle border shadow-sm">
@ -72,5 +72,5 @@
<div class="d-flex">
<div i18n *ngIf="collectionSize > 0">{collectionSize, plural, =1 {One {{typeName}}} other {{{collectionSize || 0}} total {{typeNamePlural}}}}</div>
<ngb-pagination *ngIf="collectionSize > 20" class="ms-auto" [pageSize]="25" [collectionSize]="collectionSize" [(page)]="page" (pageChange)="reloadData()" aria-label="Default pagination"></ngb-pagination>
<ngb-pagination *ngIf="collectionSize > 20" class="ms-auto" [pageSize]="25" [collectionSize]="collectionSize" [(page)]="page" [maxSize]="5" (pageChange)="reloadData()" aria-label="Default pagination"></ngb-pagination>
</div>

View File

@ -325,11 +325,10 @@ def save_to_dir(
Optionally rename the file.
"""
if os.path.isfile(filepath) and os.path.isdir(target_dir):
dst = shutil.copy(filepath, target_dir)
logging.debug(f"saved {str(filepath)} to {str(dst)}")
if newname:
dst_new = os.path.join(target_dir, newname)
logger.debug(f"moving {str(dst)} to {str(dst_new)}")
os.rename(dst, dst_new)
dest = target_dir
if newname is not None:
dest = os.path.join(dest, newname)
shutil.copy(filepath, dest)
logging.debug(f"saved {str(filepath)} to {str(dest)}")
else:
logger.warning(f"{str(filepath)} or {str(target_dir)} don't exist.")

View File

@ -346,6 +346,7 @@ class Consumer(LoggingMixin):
mime_type,
)
if not parser_class:
tempdir.cleanup()
self._fail(MESSAGE_UNSUPPORTED_TYPE, f"Unsupported mime type {mime_type}")
# Notify all listeners that we're going to do some work.
@ -404,6 +405,7 @@ class Consumer(LoggingMixin):
except ParseError as e:
document_parser.cleanup()
tempdir.cleanup()
self._fail(
str(e),
f"Error while consuming document {self.filename}: {e}",

View File

@ -779,11 +779,17 @@ class StoragePathSerializer(MatchingModelSerializer, OwnedObjectSerializer):
document_type="document_type",
created="created",
created_year="created_year",
created_year_short="created_year_short",
created_month="created_month",
created_month_name="created_month_name",
created_month_name_short="created_month_name_short",
created_day="created_day",
added="added",
added_year="added_year",
added_year_short="added_year_short",
added_month="added_month",
added_month_name="added_month_name",
added_month_name_short="added_month_name_short",
added_day="added_day",
asn="asn",
tags="tags",

View File

@ -130,6 +130,18 @@ def consume_file(
)
if document_list:
# If the file is an upload, it's in the scratch directory
# Move it to consume directory to be picked up
# Otherwise, use the current parent to keep possible tags
# from subdirectories
try:
# is_relative_to would be nicer, but new in 3.9
_ = path.relative_to(settings.SCRATCH_DIR)
save_to_dir = settings.CONSUMPTION_DIR
except ValueError:
save_to_dir = path.parent
for n, document in enumerate(document_list):
# save to consumption dir
# rename it to the original filename with number prefix
@ -138,23 +150,18 @@ def consume_file(
else:
newname = None
# If the file is an upload, it's in the scratch directory
# Move it to consume directory to be picked up
# Otherwise, use the current parent to keep possible tags
# from subdirectories
try:
# is_relative_to would be nicer, but new in 3.9
_ = path.relative_to(settings.SCRATCH_DIR)
save_to_dir = settings.CONSUMPTION_DIR
except ValueError:
save_to_dir = path.parent
barcodes.save_to_dir(
document,
newname=newname,
target_dir=save_to_dir,
)
# Split file has been copied safely, remove it
os.remove(document)
# And clean up the directory as well, now it's empty
shutil.rmtree(os.path.dirname(document_list[0]))
# Delete the PDF file which was split
os.remove(doc_barcode_info.pdf_path)

View File

@ -125,28 +125,28 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
response = self.client.get("/api/documents/", format="json")
self.assertEqual(response.status_code, 200)
results_full = response.data["results"]
self.assertTrue("content" in results_full[0])
self.assertTrue("id" in results_full[0])
self.assertIn("content", results_full[0])
self.assertIn("id", results_full[0])
response = self.client.get("/api/documents/?fields=id", format="json")
self.assertEqual(response.status_code, 200)
results = response.data["results"]
self.assertFalse("content" in results[0])
self.assertTrue("id" in results[0])
self.assertIn("id", results[0])
self.assertEqual(len(results[0]), 1)
response = self.client.get("/api/documents/?fields=content", format="json")
self.assertEqual(response.status_code, 200)
results = response.data["results"]
self.assertTrue("content" in results[0])
self.assertIn("content", results[0])
self.assertFalse("id" in results[0])
self.assertEqual(len(results[0]), 1)
response = self.client.get("/api/documents/?fields=id,content", format="json")
self.assertEqual(response.status_code, 200)
results = response.data["results"]
self.assertTrue("content" in results[0])
self.assertTrue("id" in results[0])
self.assertIn("content", results[0])
self.assertIn("id", results[0])
self.assertEqual(len(results[0]), 2)
response = self.client.get(
@ -156,7 +156,7 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
self.assertEqual(response.status_code, 200)
results = response.data["results"]
self.assertFalse("content" in results[0])
self.assertTrue("id" in results[0])
self.assertIn("id", results[0])
self.assertEqual(len(results[0]), 1)
response = self.client.get("/api/documents/?fields=", format="json")
@ -3291,8 +3291,32 @@ class TestApiStoragePaths(DirectoriesMixin, APITestCase):
self.assertEqual(response.status_code, 400)
self.assertEqual(StoragePath.objects.count(), 1)
def test_api_storage_path_placeholders(self):
"""
GIVEN:
- API request to create a storage path with placeholders
- Storage path is valid
WHEN:
- API is called
THEN:
- Correct HTTP response
- New storage path is created
"""
response = self.client.post(
self.ENDPOINT,
json.dumps(
{
"name": "Storage path with placeholders",
"path": "{title}/{correspondent}/{document_type}/{created}/{created_year}/{created_year_short}/{created_month}/{created_month_name}/{created_month_name_short}/{created_day}/{added}/{added_year}/{added_year_short}/{added_month}/{added_month_name}/{added_month_name_short}/{added_day}/{asn}/{tags}/{tag_list}/",
},
),
content_type="application/json",
)
self.assertEqual(response.status_code, 201)
self.assertEqual(StoragePath.objects.count(), 2)
class TestTasks(APITestCase):
class TestTasks(DirectoriesMixin, APITestCase):
ENDPOINT = "/api/tasks/"
ENDPOINT_ACKNOWLEDGE = "/api/acknowledge_tasks/"

View File

@ -847,13 +847,11 @@ class PreConsumeTestCase(TestCase):
self.assertEqual(command[0], script.name)
self.assertEqual(command[1], "path-to-file")
self.assertDictContainsSubset(
{
"DOCUMENT_SOURCE_PATH": c.original_path,
"DOCUMENT_WORKING_PATH": c.path,
},
environment,
)
subset = {
"DOCUMENT_SOURCE_PATH": c.original_path,
"DOCUMENT_WORKING_PATH": c.path,
}
self.assertDictEqual(environment, {**environment, **subset})
@mock.patch("documents.consumer.Consumer.log")
def test_script_with_output(self, mocked_log):
@ -983,16 +981,15 @@ class PostConsumeTestCase(TestCase):
self.assertEqual(command[7], "my_bank")
self.assertCountEqual(command[8].split(","), ["a", "b"])
self.assertDictContainsSubset(
{
"DOCUMENT_ID": str(doc.pk),
"DOCUMENT_DOWNLOAD_URL": f"/api/documents/{doc.pk}/download/",
"DOCUMENT_THUMBNAIL_URL": f"/api/documents/{doc.pk}/thumb/",
"DOCUMENT_CORRESPONDENT": "my_bank",
"DOCUMENT_TAGS": "a,b",
},
environment,
)
subset = {
"DOCUMENT_ID": str(doc.pk),
"DOCUMENT_DOWNLOAD_URL": f"/api/documents/{doc.pk}/download/",
"DOCUMENT_THUMBNAIL_URL": f"/api/documents/{doc.pk}/thumb/",
"DOCUMENT_CORRESPONDENT": "my_bank",
"DOCUMENT_TAGS": "a,b",
}
self.assertDictEqual(environment, {**environment, **subset})
def test_script_exit_non_zero(self):
"""

View File

@ -25,7 +25,7 @@ class TestImporter(TestCase):
cmd.manifest = [{"model": "documents.document"}]
with self.assertRaises(CommandError) as cm:
cmd._check_manifest()
self.assertTrue("The manifest file contains a record" in str(cm.exception))
self.assertIn("The manifest file contains a record", str(cm.exception))
cmd.manifest = [
{"model": "documents.document", EXPORTER_FILE_NAME: "noexist.pdf"},
@ -33,6 +33,7 @@ class TestImporter(TestCase):
# self.assertRaises(CommandError, cmd._check_manifest)
with self.assertRaises(CommandError) as cm:
cmd._check_manifest()
self.assertTrue(
'The manifest file refers to "noexist.pdf"' in str(cm.exception),
self.assertIn(
'The manifest file refers to "noexist.pdf"',
str(cm.exception),
)

View File

@ -1,6 +1,8 @@
from tempfile import TemporaryDirectory
from unittest import mock
from django.apps import apps
from django.test import override_settings
from django.test import TestCase
from documents.parsers import get_default_file_extension
from documents.parsers import get_parser_class_for_mime_type
@ -8,6 +10,7 @@ from documents.parsers import get_supported_file_extensions
from documents.parsers import is_file_ext_supported
from paperless_tesseract.parsers import RasterisedDocumentParser
from paperless_text.parsers import TextDocumentParser
from paperless_tika.parsers import TikaDocumentParser
class TestParserDiscovery(TestCase):
@ -124,14 +127,43 @@ class TestParserDiscovery(TestCase):
class TestParserAvailability(TestCase):
def test_file_extensions(self):
def test_tesseract_parser(self):
"""
GIVEN:
- Various mime types
WHEN:
- The parser class is instantiated
THEN:
- The Tesseract based parser is return
"""
supported_mimes_and_exts = [
("application/pdf", ".pdf"),
("image/png", ".png"),
("image/jpeg", ".jpg"),
("image/tiff", ".tif"),
("image/webp", ".webp"),
]
supported_exts = get_supported_file_extensions()
for mime_type, ext in supported_mimes_and_exts:
self.assertIn(ext, supported_exts)
self.assertEqual(get_default_file_extension(mime_type), ext)
self.assertIsInstance(
get_parser_class_for_mime_type(mime_type)(logging_group=None),
RasterisedDocumentParser,
)
def test_text_parser(self):
"""
GIVEN:
- Various mime types of a text form
WHEN:
- The parser class is instantiated
THEN:
- The text based parser is return
"""
supported_mimes_and_exts = [
("text/plain", ".txt"),
("text/csv", ".csv"),
]
@ -141,23 +173,55 @@ class TestParserAvailability(TestCase):
for mime_type, ext in supported_mimes_and_exts:
self.assertIn(ext, supported_exts)
self.assertEqual(get_default_file_extension(mime_type), ext)
self.assertIsInstance(
get_parser_class_for_mime_type(mime_type)(logging_group=None),
TextDocumentParser,
)
def test_tika_parser(self):
"""
GIVEN:
- Various mime types of a office document form
WHEN:
- The parser class is instantiated
THEN:
- The Tika/Gotenberg based parser is return
"""
supported_mimes_and_exts = [
("application/vnd.oasis.opendocument.text", ".odt"),
("text/rtf", ".rtf"),
("application/msword", ".doc"),
(
"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
".docx",
),
]
# Force the app ready to notice the settings override
with override_settings(TIKA_ENABLED=True, INSTALLED_APPS=["paperless_tika"]):
app = apps.get_app_config("paperless_tika")
app.ready()
supported_exts = get_supported_file_extensions()
for mime_type, ext in supported_mimes_and_exts:
self.assertIn(ext, supported_exts)
self.assertEqual(get_default_file_extension(mime_type), ext)
self.assertIsInstance(
get_parser_class_for_mime_type(mime_type)(logging_group=None),
TikaDocumentParser,
)
def test_no_parser_for_mime(self):
self.assertIsNone(get_parser_class_for_mime_type("text/sdgsdf"))
def test_default_extension(self):
# Test no parser declared still returns a an extension
self.assertEqual(get_default_file_extension("application/zip"), ".zip")
# Test invalid mimetype returns no extension
self.assertEqual(get_default_file_extension("aasdasd/dgfgf"), "")
self.assertIsInstance(
get_parser_class_for_mime_type("application/pdf")(logging_group=None),
RasterisedDocumentParser,
)
self.assertIsInstance(
get_parser_class_for_mime_type("text/plain")(logging_group=None),
TextDocumentParser,
)
self.assertIsNone(get_parser_class_for_mime_type("text/sdgsdf"))
def test_file_extension_support(self):
self.assertTrue(is_file_ext_supported(".pdf"))
self.assertFalse(is_file_ext_supported(".hsdfh"))
self.assertFalse(is_file_ext_supported(""))

View File

@ -109,6 +109,16 @@ def _parse_redis_url(env_redis: Optional[str]) -> Tuple[str]:
def _parse_beat_schedule() -> Dict:
"""
Configures the scheduled tasks, according to default or
environment variables. Task expiration is configured so the task will
expire (and not run), shortly before the default frequency will put another
of the same task into the queue
https://docs.celeryq.dev/en/stable/userguide/periodic-tasks.html#beat-entries
https://docs.celeryq.dev/en/latest/userguide/calling.html#expiration
"""
schedule = {}
tasks = [
{
@ -117,6 +127,11 @@ def _parse_beat_schedule() -> Dict:
# Default every ten minutes
"env_default": "*/10 * * * *",
"task": "paperless_mail.tasks.process_mail_accounts",
"options": {
# 1 minute before default schedule sends again
"expires": 9.0
* 60.0,
},
},
{
"name": "Train the classifier",
@ -124,6 +139,11 @@ def _parse_beat_schedule() -> Dict:
# Default hourly at 5 minutes past the hour
"env_default": "5 */1 * * *",
"task": "documents.tasks.train_classifier",
"options": {
# 1 minute before default schedule sends again
"expires": 59.0
* 60.0,
},
},
{
"name": "Optimize the index",
@ -131,6 +151,12 @@ def _parse_beat_schedule() -> Dict:
# Default daily at midnight
"env_default": "0 0 * * *",
"task": "documents.tasks.index_optimize",
"options": {
# 1 hour before default schedule sends again
"expires": 23.0
* 60.0
* 60.0,
},
},
{
"name": "Perform sanity check",
@ -138,6 +164,12 @@ def _parse_beat_schedule() -> Dict:
# Default Sunday at 00:30
"env_default": "30 0 * * sun",
"task": "documents.tasks.sanity_check",
"options": {
# 1 hour before default schedule sends again
"expires": ((7.0 * 24.0) - 1.0)
* 60.0
* 60.0,
},
},
]
for task in tasks:
@ -151,9 +183,11 @@ def _parse_beat_schedule() -> Dict:
# - five time-and-date fields
# - separated by at least one blank
minute, hour, day_month, month, day_week = value.split(" ")
schedule[task["name"]] = {
"task": task["task"],
"schedule": crontab(minute, hour, day_week, day_month, month),
"options": task["options"],
}
return schedule
@ -564,22 +598,21 @@ LOGGING = {
# Task queue #
###############################################################################
TASK_WORKERS = __get_int("PAPERLESS_TASK_WORKERS", 1)
WORKER_TIMEOUT: Final[int] = __get_int("PAPERLESS_WORKER_TIMEOUT", 1800)
# https://docs.celeryq.dev/en/stable/userguide/configuration.html
CELERY_BROKER_URL = _CELERY_REDIS_URL
CELERY_TIMEZONE = TIME_ZONE
CELERY_WORKER_HIJACK_ROOT_LOGGER = False
CELERY_WORKER_CONCURRENCY = TASK_WORKERS
CELERY_WORKER_CONCURRENCY: Final[int] = __get_int("PAPERLESS_TASK_WORKERS", 1)
TASK_WORKERS = CELERY_WORKER_CONCURRENCY
CELERY_WORKER_MAX_TASKS_PER_CHILD = 1
CELERY_WORKER_SEND_TASK_EVENTS = True
CELERY_TASK_SEND_SENT_EVENT = True
CELERY_SEND_TASK_SENT_EVENT = True
CELERY_TASK_TRACK_STARTED = True
CELERY_TASK_TIME_LIMIT = WORKER_TIMEOUT
CELERY_TASK_TIME_LIMIT: Final[int] = __get_int("PAPERLESS_WORKER_TIMEOUT", 1800)
CELERY_RESULT_EXTENDED = True
CELERY_RESULT_BACKEND = "django-db"
@ -611,7 +644,7 @@ def default_threads_per_worker(task_workers) -> int:
THREADS_PER_WORKER = os.getenv(
"PAPERLESS_THREADS_PER_WORKER",
default_threads_per_worker(TASK_WORKERS),
default_threads_per_worker(CELERY_WORKER_CONCURRENCY),
)
###############################################################################

View File

@ -149,6 +149,11 @@ class TestRedisSocketConversion(TestCase):
class TestCeleryScheduleParsing(TestCase):
MAIL_EXPIRE_TIME = 9.0 * 60.0
CLASSIFIER_EXPIRE_TIME = 59.0 * 60.0
INDEX_EXPIRE_TIME = 23.0 * 60.0 * 60.0
SANITY_EXPIRE_TIME = ((7.0 * 24.0) - 1.0) * 60.0 * 60.0
def test_schedule_configuration_default(self):
"""
GIVEN:
@ -165,18 +170,22 @@ class TestCeleryScheduleParsing(TestCase):
"Check all e-mail accounts": {
"task": "paperless_mail.tasks.process_mail_accounts",
"schedule": crontab(minute="*/10"),
"options": {"expires": self.MAIL_EXPIRE_TIME},
},
"Train the classifier": {
"task": "documents.tasks.train_classifier",
"schedule": crontab(minute="5", hour="*/1"),
"options": {"expires": self.CLASSIFIER_EXPIRE_TIME},
},
"Optimize the index": {
"task": "documents.tasks.index_optimize",
"schedule": crontab(minute=0, hour=0),
"options": {"expires": self.INDEX_EXPIRE_TIME},
},
"Perform sanity check": {
"task": "documents.tasks.sanity_check",
"schedule": crontab(minute=30, hour=0, day_of_week="sun"),
"options": {"expires": self.SANITY_EXPIRE_TIME},
},
},
schedule,
@ -203,18 +212,22 @@ class TestCeleryScheduleParsing(TestCase):
"Check all e-mail accounts": {
"task": "paperless_mail.tasks.process_mail_accounts",
"schedule": crontab(minute="*/50", day_of_week="mon"),
"options": {"expires": self.MAIL_EXPIRE_TIME},
},
"Train the classifier": {
"task": "documents.tasks.train_classifier",
"schedule": crontab(minute="5", hour="*/1"),
"options": {"expires": self.CLASSIFIER_EXPIRE_TIME},
},
"Optimize the index": {
"task": "documents.tasks.index_optimize",
"schedule": crontab(minute=0, hour=0),
"options": {"expires": self.INDEX_EXPIRE_TIME},
},
"Perform sanity check": {
"task": "documents.tasks.sanity_check",
"schedule": crontab(minute=30, hour=0, day_of_week="sun"),
"options": {"expires": self.SANITY_EXPIRE_TIME},
},
},
schedule,
@ -238,14 +251,17 @@ class TestCeleryScheduleParsing(TestCase):
"Check all e-mail accounts": {
"task": "paperless_mail.tasks.process_mail_accounts",
"schedule": crontab(minute="*/10"),
"options": {"expires": self.MAIL_EXPIRE_TIME},
},
"Train the classifier": {
"task": "documents.tasks.train_classifier",
"schedule": crontab(minute="5", hour="*/1"),
"options": {"expires": self.CLASSIFIER_EXPIRE_TIME},
},
"Perform sanity check": {
"task": "documents.tasks.sanity_check",
"schedule": crontab(minute=30, hour=0, day_of_week="sun"),
"options": {"expires": self.SANITY_EXPIRE_TIME},
},
},
schedule,

View File

@ -14,15 +14,14 @@ TEST_CHANNEL_LAYERS = {
}
@override_settings(CHANNEL_LAYERS=TEST_CHANNEL_LAYERS)
class TestWebSockets(TestCase):
@override_settings(CHANNEL_LAYERS=TEST_CHANNEL_LAYERS)
async def test_no_auth(self):
communicator = WebsocketCommunicator(application, "/ws/status/")
connected, subprotocol = await communicator.connect()
self.assertFalse(connected)
await communicator.disconnect()
@override_settings(CHANNEL_LAYERS=TEST_CHANNEL_LAYERS)
@mock.patch("paperless.consumers.StatusConsumer._authenticated")
async def test_auth(self, _authenticated):
_authenticated.return_value = True
@ -33,7 +32,6 @@ class TestWebSockets(TestCase):
await communicator.disconnect()
@override_settings(CHANNEL_LAYERS=TEST_CHANNEL_LAYERS)
@mock.patch("paperless.consumers.StatusConsumer._authenticated")
async def test_receive(self, _authenticated):
_authenticated.return_value = True

View File

@ -24,7 +24,7 @@ class StandardPagination(PageNumberPagination):
class FaviconView(View):
def get(self, request, *args, **kwargs):
def get(self, request, *args, **kwargs): # pragma: nocover
favicon = os.path.join(
os.path.dirname(__file__),
"static",

View File

@ -2,12 +2,13 @@ from django.contrib.auth.models import User
from documents.models import Correspondent
from documents.models import DocumentType
from documents.models import Tag
from documents.tests.utils import DirectoriesMixin
from paperless_mail.models import MailAccount
from paperless_mail.models import MailRule
from rest_framework.test import APITestCase
class TestAPIMailAccounts(APITestCase):
class TestAPIMailAccounts(DirectoriesMixin, APITestCase):
ENDPOINT = "/api/mail_accounts/"
def setUp(self):
@ -165,7 +166,7 @@ class TestAPIMailAccounts(APITestCase):
self.assertEqual(returned_account2.password, "123xyz")
class TestAPIMailRules(APITestCase):
class TestAPIMailRules(DirectoriesMixin, APITestCase):
ENDPOINT = "/api/mail_rules/"
def setUp(self):

View File

@ -161,7 +161,7 @@ class RasterisedDocumentParser(DocumentParser):
except Exception:
# TODO catch all for various issues with PDFminer.six.
# If PDFminer fails, fall back to OCR.
# If pdftotext fails, fall back to OCR.
self.log(
"warning",
"Error while getting text from PDF document with " "pdfminer.six",

View File

@ -364,7 +364,7 @@ class TestParser(DirectoriesMixin, TestCase):
)
self.assertTrue(os.path.isfile(parser.archive_path))
self.assertContainsStrings(parser.get_text().lower(), ["page 1", "page 2"])
self.assertFalse("page 3" in parser.get_text().lower())
self.assertNotIn("page 3", parser.get_text().lower())
@override_settings(OCR_PAGES=1, OCR_MODE="force")
def test_multi_page_analog_pages_force(self):
@ -386,8 +386,8 @@ class TestParser(DirectoriesMixin, TestCase):
)
self.assertTrue(os.path.isfile(parser.archive_path))
self.assertContainsStrings(parser.get_text().lower(), ["page 1"])
self.assertFalse("page 2" in parser.get_text().lower())
self.assertFalse("page 3" in parser.get_text().lower())
self.assertNotIn("page 2", parser.get_text().lower())
self.assertNotIn("page 3", parser.get_text().lower())
@override_settings(OCR_MODE="skip_noarchive")
def test_skip_noarchive_withtext(self):
@ -660,6 +660,15 @@ class TestParser(DirectoriesMixin, TestCase):
params = parser.construct_ocrmypdf_parameters("", "", "", "")
self.assertNotIn("deskew", params)
with override_settings(OCR_MAX_IMAGE_PIXELS=1_000_001.0):
params = parser.construct_ocrmypdf_parameters("", "", "", "")
self.assertIn("max_image_mpixels", params)
self.assertAlmostEqual(params["max_image_mpixels"], 1, places=4)
with override_settings(OCR_MAX_IMAGE_PIXELS=-1_000_001.0):
params = parser.construct_ocrmypdf_parameters("", "", "", "")
self.assertNotIn("max_image_mpixels", params)
def test_rtl_language_detection(self):
"""
GIVEN:

View File

@ -3,7 +3,9 @@ import os
from pathlib import Path
from unittest import mock
from django.test import override_settings
from django.test import TestCase
from documents.parsers import ParseError
from paperless_tika.parsers import TikaDocumentParser
from requests import Response
@ -54,3 +56,63 @@ class TestTikaParser(TestCase):
self.assertTrue("Creation-Date" in [m["key"] for m in metadata])
self.assertTrue("Some-key" in [m["key"] for m in metadata])
@mock.patch("paperless_tika.parsers.parser.from_file")
@mock.patch("paperless_tika.parsers.requests.post")
def test_convert_failure(self, post, from_file):
"""
GIVEN:
- Document needs to be converted to PDF
WHEN:
- Gotenberg server returns an error
THEN:
- Parse error is raised
"""
from_file.return_value = {
"content": "the content",
"metadata": {"Creation-Date": "2020-11-21"},
}
response = Response()
response._content = b"PDF document"
response.status_code = 500
post.return_value = response
file = os.path.join(self.parser.tempdir, "input.odt")
Path(file).touch()
with self.assertRaises(ParseError):
self.parser.convert_to_pdf(file, None)
@mock.patch("paperless_tika.parsers.requests.post")
def test_request_pdf_a_format(self, post: mock.Mock):
"""
GIVEN:
- Document needs to be converted to PDF
WHEN:
- Specific PDF/A format requested
THEN:
- Request to Gotenberg contains the expected PDF/A format string
"""
file = os.path.join(self.parser.tempdir, "input.odt")
Path(file).touch()
response = Response()
response._content = b"PDF document"
response.status_code = 200
post.return_value = response
for setting, expected_key in [
("pdfa", "PDF/A-2b"),
("pdfa-2", "PDF/A-2b"),
("pdfa-1", "PDF/A-1a"),
("pdfa-3", "PDF/A-3b"),
]:
with override_settings(OCR_OUTPUT_TYPE=setting):
self.parser.convert_to_pdf(file, None)
post.assert_called_once()
_, kwargs = post.call_args
self.assertEqual(kwargs["data"]["pdfFormat"], expected_key)
post.reset_mock()