mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-04-02 13:45:10 -05:00
Merge branch 'dev' into feature-permissions
This commit is contained in:
commit
32754defef
2
.github/workflows/ci.yml
vendored
2
.github/workflows/ci.yml
vendored
@ -161,7 +161,7 @@ jobs:
|
|||||||
pipenv --python ${{ steps.setup-python.outputs.python-version }} run pytest -ra
|
pipenv --python ${{ steps.setup-python.outputs.python-version }} run pytest -ra
|
||||||
-
|
-
|
||||||
name: Upload coverage to Codecov
|
name: Upload coverage to Codecov
|
||||||
if: matrix.python-version == ${{ env.DEFAULT_PYTHON_VERSION }}
|
if: ${{ matrix.python-version == env.DEFAULT_PYTHON_VERSION }}
|
||||||
uses: codecov/codecov-action@v3
|
uses: codecov/codecov-action@v3
|
||||||
with:
|
with:
|
||||||
# not required for public repos, but intermittently fails otherwise
|
# not required for public repos, but intermittently fails otherwise
|
||||||
|
@ -59,7 +59,7 @@ services:
|
|||||||
- gotenberg
|
- gotenberg
|
||||||
- tika
|
- tika
|
||||||
ports:
|
ports:
|
||||||
- 8000:8000
|
- "8000:8000"
|
||||||
healthcheck:
|
healthcheck:
|
||||||
test: ["CMD", "curl", "-f", "http://localhost:8000"]
|
test: ["CMD", "curl", "-f", "http://localhost:8000"]
|
||||||
interval: 30s
|
interval: 30s
|
||||||
|
@ -53,7 +53,7 @@ services:
|
|||||||
- db
|
- db
|
||||||
- broker
|
- broker
|
||||||
ports:
|
ports:
|
||||||
- 8000:8000
|
- "8000:8000"
|
||||||
healthcheck:
|
healthcheck:
|
||||||
test: ["CMD", "curl", "-f", "http://localhost:8000"]
|
test: ["CMD", "curl", "-f", "http://localhost:8000"]
|
||||||
interval: 30s
|
interval: 30s
|
||||||
|
@ -53,7 +53,7 @@ services:
|
|||||||
- db
|
- db
|
||||||
- broker
|
- broker
|
||||||
ports:
|
ports:
|
||||||
- 8010:8000
|
- "8010:8000"
|
||||||
healthcheck:
|
healthcheck:
|
||||||
test: ["CMD", "curl", "-fs", "-S", "--max-time", "2", "http://localhost:8000"]
|
test: ["CMD", "curl", "-fs", "-S", "--max-time", "2", "http://localhost:8000"]
|
||||||
interval: 30s
|
interval: 30s
|
||||||
|
@ -57,7 +57,7 @@ services:
|
|||||||
- gotenberg
|
- gotenberg
|
||||||
- tika
|
- tika
|
||||||
ports:
|
ports:
|
||||||
- 8000:8000
|
- "8000:8000"
|
||||||
healthcheck:
|
healthcheck:
|
||||||
test: ["CMD", "curl", "-fs", "-S", "--max-time", "2", "http://localhost:8000"]
|
test: ["CMD", "curl", "-fs", "-S", "--max-time", "2", "http://localhost:8000"]
|
||||||
interval: 30s
|
interval: 30s
|
||||||
|
@ -51,7 +51,7 @@ services:
|
|||||||
- db
|
- db
|
||||||
- broker
|
- broker
|
||||||
ports:
|
ports:
|
||||||
- 8000:8000
|
- "8000:8000"
|
||||||
healthcheck:
|
healthcheck:
|
||||||
test: ["CMD", "curl", "-fs", "-S", "--max-time", "2", "http://localhost:8000"]
|
test: ["CMD", "curl", "-fs", "-S", "--max-time", "2", "http://localhost:8000"]
|
||||||
interval: 30s
|
interval: 30s
|
||||||
|
@ -46,7 +46,7 @@ services:
|
|||||||
- gotenberg
|
- gotenberg
|
||||||
- tika
|
- tika
|
||||||
ports:
|
ports:
|
||||||
- 8000:8000
|
- "8000:8000"
|
||||||
healthcheck:
|
healthcheck:
|
||||||
test: ["CMD", "curl", "-fs", "-S", "--max-time", "2", "http://localhost:8000"]
|
test: ["CMD", "curl", "-fs", "-S", "--max-time", "2", "http://localhost:8000"]
|
||||||
interval: 30s
|
interval: 30s
|
||||||
|
@ -37,7 +37,7 @@ services:
|
|||||||
depends_on:
|
depends_on:
|
||||||
- broker
|
- broker
|
||||||
ports:
|
ports:
|
||||||
- 8000:8000
|
- "8000:8000"
|
||||||
healthcheck:
|
healthcheck:
|
||||||
test: ["CMD", "curl", "-fs", "-S", "--max-time", "2", "http://localhost:8000"]
|
test: ["CMD", "curl", "-fs", "-S", "--max-time", "2", "http://localhost:8000"]
|
||||||
interval: 30s
|
interval: 30s
|
||||||
|
@ -3,5 +3,10 @@
|
|||||||
echo "Checking if we should start flower..."
|
echo "Checking if we should start flower..."
|
||||||
|
|
||||||
if [[ -n "${PAPERLESS_ENABLE_FLOWER}" ]]; then
|
if [[ -n "${PAPERLESS_ENABLE_FLOWER}" ]]; then
|
||||||
celery --app paperless flower
|
# Small delay to allow celery to be up first
|
||||||
|
echo "Starting flower in 5s"
|
||||||
|
sleep 5
|
||||||
|
celery --app paperless flower --conf=/usr/src/paperless/src/paperless/flowerconfig.py
|
||||||
|
else
|
||||||
|
echo "Not starting flower"
|
||||||
fi
|
fi
|
||||||
|
@ -346,7 +346,7 @@ read -r -a OCR_LANGUAGES_ARRAY <<< "${_split_langs}"
|
|||||||
fi
|
fi
|
||||||
} > docker-compose.env
|
} > docker-compose.env
|
||||||
|
|
||||||
sed -i "s/- 8000:8000/- $PORT:8000/g" docker-compose.yml
|
sed -i "s/- \"8000:8000\"/- \"$PORT:8000\"/g" docker-compose.yml
|
||||||
|
|
||||||
sed -i "s#- \./consume:/usr/src/paperless/consume#- $CONSUME_FOLDER:/usr/src/paperless/consume#g" docker-compose.yml
|
sed -i "s#- \./consume:/usr/src/paperless/consume#- $CONSUME_FOLDER:/usr/src/paperless/consume#g" docker-compose.yml
|
||||||
|
|
||||||
|
@ -18,7 +18,7 @@
|
|||||||
(blur)="onBlur()">
|
(blur)="onBlur()">
|
||||||
|
|
||||||
<ng-template ng-label-tmp let-item="item">
|
<ng-template ng-label-tmp let-item="item">
|
||||||
<span class="tag-wrap tag-wrap-delete" (click)="removeTag(item.id)">
|
<span class="tag-wrap tag-wrap-delete" (mousedown)="removeTag($event, item.id)">
|
||||||
<svg width="1.2em" height="1em" viewBox="0 0 16 16" fill="currentColor" xmlns="http://www.w3.org/2000/svg">
|
<svg width="1.2em" height="1em" viewBox="0 0 16 16" fill="currentColor" xmlns="http://www.w3.org/2000/svg">
|
||||||
<use xlink:href="assets/bootstrap-icons.svg#x"/>
|
<use xlink:href="assets/bootstrap-icons.svg#x"/>
|
||||||
</svg>
|
</svg>
|
||||||
|
@ -65,7 +65,7 @@ export class TagsComponent implements OnInit, ControlValueAccessor {
|
|||||||
|
|
||||||
private _lastSearchTerm: string
|
private _lastSearchTerm: string
|
||||||
|
|
||||||
getTag(id) {
|
getTag(id: number) {
|
||||||
if (this.tags) {
|
if (this.tags) {
|
||||||
return this.tags.find((tag) => tag.id == id)
|
return this.tags.find((tag) => tag.id == id)
|
||||||
} else {
|
} else {
|
||||||
@ -73,8 +73,12 @@ export class TagsComponent implements OnInit, ControlValueAccessor {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
removeTag(id) {
|
removeTag(event: PointerEvent, id: number) {
|
||||||
if (this.disabled) return
|
if (this.disabled) return
|
||||||
|
|
||||||
|
// prevent opening dropdown
|
||||||
|
event.stopImmediatePropagation()
|
||||||
|
|
||||||
let index = this.value.indexOf(id)
|
let index = this.value.indexOf(id)
|
||||||
if (index > -1) {
|
if (index > -1) {
|
||||||
let oldValue = this.value
|
let oldValue = this.value
|
||||||
|
@ -63,7 +63,7 @@
|
|||||||
|
|
||||||
|
|
||||||
<div class="row">
|
<div class="row">
|
||||||
<div class="col mb-4">
|
<div class="col-md-6 col-xl-4 mb-4">
|
||||||
|
|
||||||
<form [formGroup]='documentForm' (ngSubmit)="save()">
|
<form [formGroup]='documentForm' (ngSubmit)="save()">
|
||||||
|
|
||||||
|
@ -22,6 +22,15 @@
|
|||||||
--page-margin: 1px 0 20px;
|
--page-margin: 1px 0 20px;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
::ng-deep .ng-select-taggable {
|
||||||
|
max-width: calc(100% - 46px); // fudge factor for ng-select button width
|
||||||
|
}
|
||||||
|
|
||||||
|
.btn-group .dropdown-toggle-split {
|
||||||
|
border-top-right-radius: inherit;
|
||||||
|
border-bottom-right-radius: inherit;
|
||||||
|
}
|
||||||
|
|
||||||
.password-prompt {
|
.password-prompt {
|
||||||
position: absolute;
|
position: absolute;
|
||||||
top: 30%;
|
top: 30%;
|
||||||
|
@ -10,7 +10,7 @@
|
|||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<ngb-pagination class="col-auto" [pageSize]="25" [collectionSize]="collectionSize" [(page)]="page" (pageChange)="reloadData()" aria-label="Default pagination"></ngb-pagination>
|
<ngb-pagination class="col-auto" [pageSize]="25" [collectionSize]="collectionSize" [(page)]="page" [maxSize]="5" (pageChange)="reloadData()" aria-label="Default pagination"></ngb-pagination>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<table class="table table-striped align-middle border shadow-sm">
|
<table class="table table-striped align-middle border shadow-sm">
|
||||||
@ -72,5 +72,5 @@
|
|||||||
|
|
||||||
<div class="d-flex">
|
<div class="d-flex">
|
||||||
<div i18n *ngIf="collectionSize > 0">{collectionSize, plural, =1 {One {{typeName}}} other {{{collectionSize || 0}} total {{typeNamePlural}}}}</div>
|
<div i18n *ngIf="collectionSize > 0">{collectionSize, plural, =1 {One {{typeName}}} other {{{collectionSize || 0}} total {{typeNamePlural}}}}</div>
|
||||||
<ngb-pagination *ngIf="collectionSize > 20" class="ms-auto" [pageSize]="25" [collectionSize]="collectionSize" [(page)]="page" (pageChange)="reloadData()" aria-label="Default pagination"></ngb-pagination>
|
<ngb-pagination *ngIf="collectionSize > 20" class="ms-auto" [pageSize]="25" [collectionSize]="collectionSize" [(page)]="page" [maxSize]="5" (pageChange)="reloadData()" aria-label="Default pagination"></ngb-pagination>
|
||||||
</div>
|
</div>
|
||||||
|
@ -325,11 +325,10 @@ def save_to_dir(
|
|||||||
Optionally rename the file.
|
Optionally rename the file.
|
||||||
"""
|
"""
|
||||||
if os.path.isfile(filepath) and os.path.isdir(target_dir):
|
if os.path.isfile(filepath) and os.path.isdir(target_dir):
|
||||||
dst = shutil.copy(filepath, target_dir)
|
dest = target_dir
|
||||||
logging.debug(f"saved {str(filepath)} to {str(dst)}")
|
if newname is not None:
|
||||||
if newname:
|
dest = os.path.join(dest, newname)
|
||||||
dst_new = os.path.join(target_dir, newname)
|
shutil.copy(filepath, dest)
|
||||||
logger.debug(f"moving {str(dst)} to {str(dst_new)}")
|
logging.debug(f"saved {str(filepath)} to {str(dest)}")
|
||||||
os.rename(dst, dst_new)
|
|
||||||
else:
|
else:
|
||||||
logger.warning(f"{str(filepath)} or {str(target_dir)} don't exist.")
|
logger.warning(f"{str(filepath)} or {str(target_dir)} don't exist.")
|
||||||
|
@ -346,6 +346,7 @@ class Consumer(LoggingMixin):
|
|||||||
mime_type,
|
mime_type,
|
||||||
)
|
)
|
||||||
if not parser_class:
|
if not parser_class:
|
||||||
|
tempdir.cleanup()
|
||||||
self._fail(MESSAGE_UNSUPPORTED_TYPE, f"Unsupported mime type {mime_type}")
|
self._fail(MESSAGE_UNSUPPORTED_TYPE, f"Unsupported mime type {mime_type}")
|
||||||
|
|
||||||
# Notify all listeners that we're going to do some work.
|
# Notify all listeners that we're going to do some work.
|
||||||
@ -404,6 +405,7 @@ class Consumer(LoggingMixin):
|
|||||||
|
|
||||||
except ParseError as e:
|
except ParseError as e:
|
||||||
document_parser.cleanup()
|
document_parser.cleanup()
|
||||||
|
tempdir.cleanup()
|
||||||
self._fail(
|
self._fail(
|
||||||
str(e),
|
str(e),
|
||||||
f"Error while consuming document {self.filename}: {e}",
|
f"Error while consuming document {self.filename}: {e}",
|
||||||
|
@ -779,11 +779,17 @@ class StoragePathSerializer(MatchingModelSerializer, OwnedObjectSerializer):
|
|||||||
document_type="document_type",
|
document_type="document_type",
|
||||||
created="created",
|
created="created",
|
||||||
created_year="created_year",
|
created_year="created_year",
|
||||||
|
created_year_short="created_year_short",
|
||||||
created_month="created_month",
|
created_month="created_month",
|
||||||
|
created_month_name="created_month_name",
|
||||||
|
created_month_name_short="created_month_name_short",
|
||||||
created_day="created_day",
|
created_day="created_day",
|
||||||
added="added",
|
added="added",
|
||||||
added_year="added_year",
|
added_year="added_year",
|
||||||
|
added_year_short="added_year_short",
|
||||||
added_month="added_month",
|
added_month="added_month",
|
||||||
|
added_month_name="added_month_name",
|
||||||
|
added_month_name_short="added_month_name_short",
|
||||||
added_day="added_day",
|
added_day="added_day",
|
||||||
asn="asn",
|
asn="asn",
|
||||||
tags="tags",
|
tags="tags",
|
||||||
|
@ -130,6 +130,18 @@ def consume_file(
|
|||||||
)
|
)
|
||||||
|
|
||||||
if document_list:
|
if document_list:
|
||||||
|
|
||||||
|
# If the file is an upload, it's in the scratch directory
|
||||||
|
# Move it to consume directory to be picked up
|
||||||
|
# Otherwise, use the current parent to keep possible tags
|
||||||
|
# from subdirectories
|
||||||
|
try:
|
||||||
|
# is_relative_to would be nicer, but new in 3.9
|
||||||
|
_ = path.relative_to(settings.SCRATCH_DIR)
|
||||||
|
save_to_dir = settings.CONSUMPTION_DIR
|
||||||
|
except ValueError:
|
||||||
|
save_to_dir = path.parent
|
||||||
|
|
||||||
for n, document in enumerate(document_list):
|
for n, document in enumerate(document_list):
|
||||||
# save to consumption dir
|
# save to consumption dir
|
||||||
# rename it to the original filename with number prefix
|
# rename it to the original filename with number prefix
|
||||||
@ -138,23 +150,18 @@ def consume_file(
|
|||||||
else:
|
else:
|
||||||
newname = None
|
newname = None
|
||||||
|
|
||||||
# If the file is an upload, it's in the scratch directory
|
|
||||||
# Move it to consume directory to be picked up
|
|
||||||
# Otherwise, use the current parent to keep possible tags
|
|
||||||
# from subdirectories
|
|
||||||
try:
|
|
||||||
# is_relative_to would be nicer, but new in 3.9
|
|
||||||
_ = path.relative_to(settings.SCRATCH_DIR)
|
|
||||||
save_to_dir = settings.CONSUMPTION_DIR
|
|
||||||
except ValueError:
|
|
||||||
save_to_dir = path.parent
|
|
||||||
|
|
||||||
barcodes.save_to_dir(
|
barcodes.save_to_dir(
|
||||||
document,
|
document,
|
||||||
newname=newname,
|
newname=newname,
|
||||||
target_dir=save_to_dir,
|
target_dir=save_to_dir,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Split file has been copied safely, remove it
|
||||||
|
os.remove(document)
|
||||||
|
|
||||||
|
# And clean up the directory as well, now it's empty
|
||||||
|
shutil.rmtree(os.path.dirname(document_list[0]))
|
||||||
|
|
||||||
# Delete the PDF file which was split
|
# Delete the PDF file which was split
|
||||||
os.remove(doc_barcode_info.pdf_path)
|
os.remove(doc_barcode_info.pdf_path)
|
||||||
|
|
||||||
|
@ -125,28 +125,28 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
|
|||||||
response = self.client.get("/api/documents/", format="json")
|
response = self.client.get("/api/documents/", format="json")
|
||||||
self.assertEqual(response.status_code, 200)
|
self.assertEqual(response.status_code, 200)
|
||||||
results_full = response.data["results"]
|
results_full = response.data["results"]
|
||||||
self.assertTrue("content" in results_full[0])
|
self.assertIn("content", results_full[0])
|
||||||
self.assertTrue("id" in results_full[0])
|
self.assertIn("id", results_full[0])
|
||||||
|
|
||||||
response = self.client.get("/api/documents/?fields=id", format="json")
|
response = self.client.get("/api/documents/?fields=id", format="json")
|
||||||
self.assertEqual(response.status_code, 200)
|
self.assertEqual(response.status_code, 200)
|
||||||
results = response.data["results"]
|
results = response.data["results"]
|
||||||
self.assertFalse("content" in results[0])
|
self.assertFalse("content" in results[0])
|
||||||
self.assertTrue("id" in results[0])
|
self.assertIn("id", results[0])
|
||||||
self.assertEqual(len(results[0]), 1)
|
self.assertEqual(len(results[0]), 1)
|
||||||
|
|
||||||
response = self.client.get("/api/documents/?fields=content", format="json")
|
response = self.client.get("/api/documents/?fields=content", format="json")
|
||||||
self.assertEqual(response.status_code, 200)
|
self.assertEqual(response.status_code, 200)
|
||||||
results = response.data["results"]
|
results = response.data["results"]
|
||||||
self.assertTrue("content" in results[0])
|
self.assertIn("content", results[0])
|
||||||
self.assertFalse("id" in results[0])
|
self.assertFalse("id" in results[0])
|
||||||
self.assertEqual(len(results[0]), 1)
|
self.assertEqual(len(results[0]), 1)
|
||||||
|
|
||||||
response = self.client.get("/api/documents/?fields=id,content", format="json")
|
response = self.client.get("/api/documents/?fields=id,content", format="json")
|
||||||
self.assertEqual(response.status_code, 200)
|
self.assertEqual(response.status_code, 200)
|
||||||
results = response.data["results"]
|
results = response.data["results"]
|
||||||
self.assertTrue("content" in results[0])
|
self.assertIn("content", results[0])
|
||||||
self.assertTrue("id" in results[0])
|
self.assertIn("id", results[0])
|
||||||
self.assertEqual(len(results[0]), 2)
|
self.assertEqual(len(results[0]), 2)
|
||||||
|
|
||||||
response = self.client.get(
|
response = self.client.get(
|
||||||
@ -156,7 +156,7 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
|
|||||||
self.assertEqual(response.status_code, 200)
|
self.assertEqual(response.status_code, 200)
|
||||||
results = response.data["results"]
|
results = response.data["results"]
|
||||||
self.assertFalse("content" in results[0])
|
self.assertFalse("content" in results[0])
|
||||||
self.assertTrue("id" in results[0])
|
self.assertIn("id", results[0])
|
||||||
self.assertEqual(len(results[0]), 1)
|
self.assertEqual(len(results[0]), 1)
|
||||||
|
|
||||||
response = self.client.get("/api/documents/?fields=", format="json")
|
response = self.client.get("/api/documents/?fields=", format="json")
|
||||||
@ -3291,8 +3291,32 @@ class TestApiStoragePaths(DirectoriesMixin, APITestCase):
|
|||||||
self.assertEqual(response.status_code, 400)
|
self.assertEqual(response.status_code, 400)
|
||||||
self.assertEqual(StoragePath.objects.count(), 1)
|
self.assertEqual(StoragePath.objects.count(), 1)
|
||||||
|
|
||||||
|
def test_api_storage_path_placeholders(self):
|
||||||
|
"""
|
||||||
|
GIVEN:
|
||||||
|
- API request to create a storage path with placeholders
|
||||||
|
- Storage path is valid
|
||||||
|
WHEN:
|
||||||
|
- API is called
|
||||||
|
THEN:
|
||||||
|
- Correct HTTP response
|
||||||
|
- New storage path is created
|
||||||
|
"""
|
||||||
|
response = self.client.post(
|
||||||
|
self.ENDPOINT,
|
||||||
|
json.dumps(
|
||||||
|
{
|
||||||
|
"name": "Storage path with placeholders",
|
||||||
|
"path": "{title}/{correspondent}/{document_type}/{created}/{created_year}/{created_year_short}/{created_month}/{created_month_name}/{created_month_name_short}/{created_day}/{added}/{added_year}/{added_year_short}/{added_month}/{added_month_name}/{added_month_name_short}/{added_day}/{asn}/{tags}/{tag_list}/",
|
||||||
|
},
|
||||||
|
),
|
||||||
|
content_type="application/json",
|
||||||
|
)
|
||||||
|
self.assertEqual(response.status_code, 201)
|
||||||
|
self.assertEqual(StoragePath.objects.count(), 2)
|
||||||
|
|
||||||
class TestTasks(APITestCase):
|
|
||||||
|
class TestTasks(DirectoriesMixin, APITestCase):
|
||||||
ENDPOINT = "/api/tasks/"
|
ENDPOINT = "/api/tasks/"
|
||||||
ENDPOINT_ACKNOWLEDGE = "/api/acknowledge_tasks/"
|
ENDPOINT_ACKNOWLEDGE = "/api/acknowledge_tasks/"
|
||||||
|
|
||||||
|
@ -847,13 +847,11 @@ class PreConsumeTestCase(TestCase):
|
|||||||
self.assertEqual(command[0], script.name)
|
self.assertEqual(command[0], script.name)
|
||||||
self.assertEqual(command[1], "path-to-file")
|
self.assertEqual(command[1], "path-to-file")
|
||||||
|
|
||||||
self.assertDictContainsSubset(
|
subset = {
|
||||||
{
|
"DOCUMENT_SOURCE_PATH": c.original_path,
|
||||||
"DOCUMENT_SOURCE_PATH": c.original_path,
|
"DOCUMENT_WORKING_PATH": c.path,
|
||||||
"DOCUMENT_WORKING_PATH": c.path,
|
}
|
||||||
},
|
self.assertDictEqual(environment, {**environment, **subset})
|
||||||
environment,
|
|
||||||
)
|
|
||||||
|
|
||||||
@mock.patch("documents.consumer.Consumer.log")
|
@mock.patch("documents.consumer.Consumer.log")
|
||||||
def test_script_with_output(self, mocked_log):
|
def test_script_with_output(self, mocked_log):
|
||||||
@ -983,16 +981,15 @@ class PostConsumeTestCase(TestCase):
|
|||||||
self.assertEqual(command[7], "my_bank")
|
self.assertEqual(command[7], "my_bank")
|
||||||
self.assertCountEqual(command[8].split(","), ["a", "b"])
|
self.assertCountEqual(command[8].split(","), ["a", "b"])
|
||||||
|
|
||||||
self.assertDictContainsSubset(
|
subset = {
|
||||||
{
|
"DOCUMENT_ID": str(doc.pk),
|
||||||
"DOCUMENT_ID": str(doc.pk),
|
"DOCUMENT_DOWNLOAD_URL": f"/api/documents/{doc.pk}/download/",
|
||||||
"DOCUMENT_DOWNLOAD_URL": f"/api/documents/{doc.pk}/download/",
|
"DOCUMENT_THUMBNAIL_URL": f"/api/documents/{doc.pk}/thumb/",
|
||||||
"DOCUMENT_THUMBNAIL_URL": f"/api/documents/{doc.pk}/thumb/",
|
"DOCUMENT_CORRESPONDENT": "my_bank",
|
||||||
"DOCUMENT_CORRESPONDENT": "my_bank",
|
"DOCUMENT_TAGS": "a,b",
|
||||||
"DOCUMENT_TAGS": "a,b",
|
}
|
||||||
},
|
|
||||||
environment,
|
self.assertDictEqual(environment, {**environment, **subset})
|
||||||
)
|
|
||||||
|
|
||||||
def test_script_exit_non_zero(self):
|
def test_script_exit_non_zero(self):
|
||||||
"""
|
"""
|
||||||
|
@ -25,7 +25,7 @@ class TestImporter(TestCase):
|
|||||||
cmd.manifest = [{"model": "documents.document"}]
|
cmd.manifest = [{"model": "documents.document"}]
|
||||||
with self.assertRaises(CommandError) as cm:
|
with self.assertRaises(CommandError) as cm:
|
||||||
cmd._check_manifest()
|
cmd._check_manifest()
|
||||||
self.assertTrue("The manifest file contains a record" in str(cm.exception))
|
self.assertIn("The manifest file contains a record", str(cm.exception))
|
||||||
|
|
||||||
cmd.manifest = [
|
cmd.manifest = [
|
||||||
{"model": "documents.document", EXPORTER_FILE_NAME: "noexist.pdf"},
|
{"model": "documents.document", EXPORTER_FILE_NAME: "noexist.pdf"},
|
||||||
@ -33,6 +33,7 @@ class TestImporter(TestCase):
|
|||||||
# self.assertRaises(CommandError, cmd._check_manifest)
|
# self.assertRaises(CommandError, cmd._check_manifest)
|
||||||
with self.assertRaises(CommandError) as cm:
|
with self.assertRaises(CommandError) as cm:
|
||||||
cmd._check_manifest()
|
cmd._check_manifest()
|
||||||
self.assertTrue(
|
self.assertIn(
|
||||||
'The manifest file refers to "noexist.pdf"' in str(cm.exception),
|
'The manifest file refers to "noexist.pdf"',
|
||||||
|
str(cm.exception),
|
||||||
)
|
)
|
||||||
|
@ -1,6 +1,8 @@
|
|||||||
from tempfile import TemporaryDirectory
|
from tempfile import TemporaryDirectory
|
||||||
from unittest import mock
|
from unittest import mock
|
||||||
|
|
||||||
|
from django.apps import apps
|
||||||
|
from django.test import override_settings
|
||||||
from django.test import TestCase
|
from django.test import TestCase
|
||||||
from documents.parsers import get_default_file_extension
|
from documents.parsers import get_default_file_extension
|
||||||
from documents.parsers import get_parser_class_for_mime_type
|
from documents.parsers import get_parser_class_for_mime_type
|
||||||
@ -8,6 +10,7 @@ from documents.parsers import get_supported_file_extensions
|
|||||||
from documents.parsers import is_file_ext_supported
|
from documents.parsers import is_file_ext_supported
|
||||||
from paperless_tesseract.parsers import RasterisedDocumentParser
|
from paperless_tesseract.parsers import RasterisedDocumentParser
|
||||||
from paperless_text.parsers import TextDocumentParser
|
from paperless_text.parsers import TextDocumentParser
|
||||||
|
from paperless_tika.parsers import TikaDocumentParser
|
||||||
|
|
||||||
|
|
||||||
class TestParserDiscovery(TestCase):
|
class TestParserDiscovery(TestCase):
|
||||||
@ -124,14 +127,43 @@ class TestParserDiscovery(TestCase):
|
|||||||
|
|
||||||
|
|
||||||
class TestParserAvailability(TestCase):
|
class TestParserAvailability(TestCase):
|
||||||
def test_file_extensions(self):
|
def test_tesseract_parser(self):
|
||||||
|
"""
|
||||||
|
GIVEN:
|
||||||
|
- Various mime types
|
||||||
|
WHEN:
|
||||||
|
- The parser class is instantiated
|
||||||
|
THEN:
|
||||||
|
- The Tesseract based parser is return
|
||||||
|
"""
|
||||||
supported_mimes_and_exts = [
|
supported_mimes_and_exts = [
|
||||||
("application/pdf", ".pdf"),
|
("application/pdf", ".pdf"),
|
||||||
("image/png", ".png"),
|
("image/png", ".png"),
|
||||||
("image/jpeg", ".jpg"),
|
("image/jpeg", ".jpg"),
|
||||||
("image/tiff", ".tif"),
|
("image/tiff", ".tif"),
|
||||||
("image/webp", ".webp"),
|
("image/webp", ".webp"),
|
||||||
|
]
|
||||||
|
|
||||||
|
supported_exts = get_supported_file_extensions()
|
||||||
|
|
||||||
|
for mime_type, ext in supported_mimes_and_exts:
|
||||||
|
self.assertIn(ext, supported_exts)
|
||||||
|
self.assertEqual(get_default_file_extension(mime_type), ext)
|
||||||
|
self.assertIsInstance(
|
||||||
|
get_parser_class_for_mime_type(mime_type)(logging_group=None),
|
||||||
|
RasterisedDocumentParser,
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_text_parser(self):
|
||||||
|
"""
|
||||||
|
GIVEN:
|
||||||
|
- Various mime types of a text form
|
||||||
|
WHEN:
|
||||||
|
- The parser class is instantiated
|
||||||
|
THEN:
|
||||||
|
- The text based parser is return
|
||||||
|
"""
|
||||||
|
supported_mimes_and_exts = [
|
||||||
("text/plain", ".txt"),
|
("text/plain", ".txt"),
|
||||||
("text/csv", ".csv"),
|
("text/csv", ".csv"),
|
||||||
]
|
]
|
||||||
@ -141,23 +173,55 @@ class TestParserAvailability(TestCase):
|
|||||||
for mime_type, ext in supported_mimes_and_exts:
|
for mime_type, ext in supported_mimes_and_exts:
|
||||||
self.assertIn(ext, supported_exts)
|
self.assertIn(ext, supported_exts)
|
||||||
self.assertEqual(get_default_file_extension(mime_type), ext)
|
self.assertEqual(get_default_file_extension(mime_type), ext)
|
||||||
|
self.assertIsInstance(
|
||||||
|
get_parser_class_for_mime_type(mime_type)(logging_group=None),
|
||||||
|
TextDocumentParser,
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_tika_parser(self):
|
||||||
|
"""
|
||||||
|
GIVEN:
|
||||||
|
- Various mime types of a office document form
|
||||||
|
WHEN:
|
||||||
|
- The parser class is instantiated
|
||||||
|
THEN:
|
||||||
|
- The Tika/Gotenberg based parser is return
|
||||||
|
"""
|
||||||
|
supported_mimes_and_exts = [
|
||||||
|
("application/vnd.oasis.opendocument.text", ".odt"),
|
||||||
|
("text/rtf", ".rtf"),
|
||||||
|
("application/msword", ".doc"),
|
||||||
|
(
|
||||||
|
"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
||||||
|
".docx",
|
||||||
|
),
|
||||||
|
]
|
||||||
|
|
||||||
|
# Force the app ready to notice the settings override
|
||||||
|
with override_settings(TIKA_ENABLED=True, INSTALLED_APPS=["paperless_tika"]):
|
||||||
|
app = apps.get_app_config("paperless_tika")
|
||||||
|
app.ready()
|
||||||
|
supported_exts = get_supported_file_extensions()
|
||||||
|
|
||||||
|
for mime_type, ext in supported_mimes_and_exts:
|
||||||
|
self.assertIn(ext, supported_exts)
|
||||||
|
self.assertEqual(get_default_file_extension(mime_type), ext)
|
||||||
|
self.assertIsInstance(
|
||||||
|
get_parser_class_for_mime_type(mime_type)(logging_group=None),
|
||||||
|
TikaDocumentParser,
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_no_parser_for_mime(self):
|
||||||
|
self.assertIsNone(get_parser_class_for_mime_type("text/sdgsdf"))
|
||||||
|
|
||||||
|
def test_default_extension(self):
|
||||||
# Test no parser declared still returns a an extension
|
# Test no parser declared still returns a an extension
|
||||||
self.assertEqual(get_default_file_extension("application/zip"), ".zip")
|
self.assertEqual(get_default_file_extension("application/zip"), ".zip")
|
||||||
|
|
||||||
# Test invalid mimetype returns no extension
|
# Test invalid mimetype returns no extension
|
||||||
self.assertEqual(get_default_file_extension("aasdasd/dgfgf"), "")
|
self.assertEqual(get_default_file_extension("aasdasd/dgfgf"), "")
|
||||||
|
|
||||||
self.assertIsInstance(
|
def test_file_extension_support(self):
|
||||||
get_parser_class_for_mime_type("application/pdf")(logging_group=None),
|
|
||||||
RasterisedDocumentParser,
|
|
||||||
)
|
|
||||||
self.assertIsInstance(
|
|
||||||
get_parser_class_for_mime_type("text/plain")(logging_group=None),
|
|
||||||
TextDocumentParser,
|
|
||||||
)
|
|
||||||
self.assertIsNone(get_parser_class_for_mime_type("text/sdgsdf"))
|
|
||||||
|
|
||||||
self.assertTrue(is_file_ext_supported(".pdf"))
|
self.assertTrue(is_file_ext_supported(".pdf"))
|
||||||
self.assertFalse(is_file_ext_supported(".hsdfh"))
|
self.assertFalse(is_file_ext_supported(".hsdfh"))
|
||||||
self.assertFalse(is_file_ext_supported(""))
|
self.assertFalse(is_file_ext_supported(""))
|
||||||
|
@ -109,6 +109,16 @@ def _parse_redis_url(env_redis: Optional[str]) -> Tuple[str]:
|
|||||||
|
|
||||||
|
|
||||||
def _parse_beat_schedule() -> Dict:
|
def _parse_beat_schedule() -> Dict:
|
||||||
|
"""
|
||||||
|
Configures the scheduled tasks, according to default or
|
||||||
|
environment variables. Task expiration is configured so the task will
|
||||||
|
expire (and not run), shortly before the default frequency will put another
|
||||||
|
of the same task into the queue
|
||||||
|
|
||||||
|
|
||||||
|
https://docs.celeryq.dev/en/stable/userguide/periodic-tasks.html#beat-entries
|
||||||
|
https://docs.celeryq.dev/en/latest/userguide/calling.html#expiration
|
||||||
|
"""
|
||||||
schedule = {}
|
schedule = {}
|
||||||
tasks = [
|
tasks = [
|
||||||
{
|
{
|
||||||
@ -117,6 +127,11 @@ def _parse_beat_schedule() -> Dict:
|
|||||||
# Default every ten minutes
|
# Default every ten minutes
|
||||||
"env_default": "*/10 * * * *",
|
"env_default": "*/10 * * * *",
|
||||||
"task": "paperless_mail.tasks.process_mail_accounts",
|
"task": "paperless_mail.tasks.process_mail_accounts",
|
||||||
|
"options": {
|
||||||
|
# 1 minute before default schedule sends again
|
||||||
|
"expires": 9.0
|
||||||
|
* 60.0,
|
||||||
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "Train the classifier",
|
"name": "Train the classifier",
|
||||||
@ -124,6 +139,11 @@ def _parse_beat_schedule() -> Dict:
|
|||||||
# Default hourly at 5 minutes past the hour
|
# Default hourly at 5 minutes past the hour
|
||||||
"env_default": "5 */1 * * *",
|
"env_default": "5 */1 * * *",
|
||||||
"task": "documents.tasks.train_classifier",
|
"task": "documents.tasks.train_classifier",
|
||||||
|
"options": {
|
||||||
|
# 1 minute before default schedule sends again
|
||||||
|
"expires": 59.0
|
||||||
|
* 60.0,
|
||||||
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "Optimize the index",
|
"name": "Optimize the index",
|
||||||
@ -131,6 +151,12 @@ def _parse_beat_schedule() -> Dict:
|
|||||||
# Default daily at midnight
|
# Default daily at midnight
|
||||||
"env_default": "0 0 * * *",
|
"env_default": "0 0 * * *",
|
||||||
"task": "documents.tasks.index_optimize",
|
"task": "documents.tasks.index_optimize",
|
||||||
|
"options": {
|
||||||
|
# 1 hour before default schedule sends again
|
||||||
|
"expires": 23.0
|
||||||
|
* 60.0
|
||||||
|
* 60.0,
|
||||||
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "Perform sanity check",
|
"name": "Perform sanity check",
|
||||||
@ -138,6 +164,12 @@ def _parse_beat_schedule() -> Dict:
|
|||||||
# Default Sunday at 00:30
|
# Default Sunday at 00:30
|
||||||
"env_default": "30 0 * * sun",
|
"env_default": "30 0 * * sun",
|
||||||
"task": "documents.tasks.sanity_check",
|
"task": "documents.tasks.sanity_check",
|
||||||
|
"options": {
|
||||||
|
# 1 hour before default schedule sends again
|
||||||
|
"expires": ((7.0 * 24.0) - 1.0)
|
||||||
|
* 60.0
|
||||||
|
* 60.0,
|
||||||
|
},
|
||||||
},
|
},
|
||||||
]
|
]
|
||||||
for task in tasks:
|
for task in tasks:
|
||||||
@ -151,9 +183,11 @@ def _parse_beat_schedule() -> Dict:
|
|||||||
# - five time-and-date fields
|
# - five time-and-date fields
|
||||||
# - separated by at least one blank
|
# - separated by at least one blank
|
||||||
minute, hour, day_month, month, day_week = value.split(" ")
|
minute, hour, day_month, month, day_week = value.split(" ")
|
||||||
|
|
||||||
schedule[task["name"]] = {
|
schedule[task["name"]] = {
|
||||||
"task": task["task"],
|
"task": task["task"],
|
||||||
"schedule": crontab(minute, hour, day_week, day_month, month),
|
"schedule": crontab(minute, hour, day_week, day_month, month),
|
||||||
|
"options": task["options"],
|
||||||
}
|
}
|
||||||
|
|
||||||
return schedule
|
return schedule
|
||||||
@ -564,22 +598,21 @@ LOGGING = {
|
|||||||
# Task queue #
|
# Task queue #
|
||||||
###############################################################################
|
###############################################################################
|
||||||
|
|
||||||
TASK_WORKERS = __get_int("PAPERLESS_TASK_WORKERS", 1)
|
# https://docs.celeryq.dev/en/stable/userguide/configuration.html
|
||||||
|
|
||||||
WORKER_TIMEOUT: Final[int] = __get_int("PAPERLESS_WORKER_TIMEOUT", 1800)
|
|
||||||
|
|
||||||
CELERY_BROKER_URL = _CELERY_REDIS_URL
|
CELERY_BROKER_URL = _CELERY_REDIS_URL
|
||||||
CELERY_TIMEZONE = TIME_ZONE
|
CELERY_TIMEZONE = TIME_ZONE
|
||||||
|
|
||||||
CELERY_WORKER_HIJACK_ROOT_LOGGER = False
|
CELERY_WORKER_HIJACK_ROOT_LOGGER = False
|
||||||
CELERY_WORKER_CONCURRENCY = TASK_WORKERS
|
CELERY_WORKER_CONCURRENCY: Final[int] = __get_int("PAPERLESS_TASK_WORKERS", 1)
|
||||||
|
TASK_WORKERS = CELERY_WORKER_CONCURRENCY
|
||||||
CELERY_WORKER_MAX_TASKS_PER_CHILD = 1
|
CELERY_WORKER_MAX_TASKS_PER_CHILD = 1
|
||||||
CELERY_WORKER_SEND_TASK_EVENTS = True
|
CELERY_WORKER_SEND_TASK_EVENTS = True
|
||||||
|
CELERY_TASK_SEND_SENT_EVENT = True
|
||||||
CELERY_SEND_TASK_SENT_EVENT = True
|
CELERY_SEND_TASK_SENT_EVENT = True
|
||||||
|
|
||||||
CELERY_TASK_TRACK_STARTED = True
|
CELERY_TASK_TRACK_STARTED = True
|
||||||
CELERY_TASK_TIME_LIMIT = WORKER_TIMEOUT
|
CELERY_TASK_TIME_LIMIT: Final[int] = __get_int("PAPERLESS_WORKER_TIMEOUT", 1800)
|
||||||
|
|
||||||
CELERY_RESULT_EXTENDED = True
|
CELERY_RESULT_EXTENDED = True
|
||||||
CELERY_RESULT_BACKEND = "django-db"
|
CELERY_RESULT_BACKEND = "django-db"
|
||||||
@ -611,7 +644,7 @@ def default_threads_per_worker(task_workers) -> int:
|
|||||||
|
|
||||||
THREADS_PER_WORKER = os.getenv(
|
THREADS_PER_WORKER = os.getenv(
|
||||||
"PAPERLESS_THREADS_PER_WORKER",
|
"PAPERLESS_THREADS_PER_WORKER",
|
||||||
default_threads_per_worker(TASK_WORKERS),
|
default_threads_per_worker(CELERY_WORKER_CONCURRENCY),
|
||||||
)
|
)
|
||||||
|
|
||||||
###############################################################################
|
###############################################################################
|
||||||
|
@ -149,6 +149,11 @@ class TestRedisSocketConversion(TestCase):
|
|||||||
|
|
||||||
|
|
||||||
class TestCeleryScheduleParsing(TestCase):
|
class TestCeleryScheduleParsing(TestCase):
|
||||||
|
MAIL_EXPIRE_TIME = 9.0 * 60.0
|
||||||
|
CLASSIFIER_EXPIRE_TIME = 59.0 * 60.0
|
||||||
|
INDEX_EXPIRE_TIME = 23.0 * 60.0 * 60.0
|
||||||
|
SANITY_EXPIRE_TIME = ((7.0 * 24.0) - 1.0) * 60.0 * 60.0
|
||||||
|
|
||||||
def test_schedule_configuration_default(self):
|
def test_schedule_configuration_default(self):
|
||||||
"""
|
"""
|
||||||
GIVEN:
|
GIVEN:
|
||||||
@ -165,18 +170,22 @@ class TestCeleryScheduleParsing(TestCase):
|
|||||||
"Check all e-mail accounts": {
|
"Check all e-mail accounts": {
|
||||||
"task": "paperless_mail.tasks.process_mail_accounts",
|
"task": "paperless_mail.tasks.process_mail_accounts",
|
||||||
"schedule": crontab(minute="*/10"),
|
"schedule": crontab(minute="*/10"),
|
||||||
|
"options": {"expires": self.MAIL_EXPIRE_TIME},
|
||||||
},
|
},
|
||||||
"Train the classifier": {
|
"Train the classifier": {
|
||||||
"task": "documents.tasks.train_classifier",
|
"task": "documents.tasks.train_classifier",
|
||||||
"schedule": crontab(minute="5", hour="*/1"),
|
"schedule": crontab(minute="5", hour="*/1"),
|
||||||
|
"options": {"expires": self.CLASSIFIER_EXPIRE_TIME},
|
||||||
},
|
},
|
||||||
"Optimize the index": {
|
"Optimize the index": {
|
||||||
"task": "documents.tasks.index_optimize",
|
"task": "documents.tasks.index_optimize",
|
||||||
"schedule": crontab(minute=0, hour=0),
|
"schedule": crontab(minute=0, hour=0),
|
||||||
|
"options": {"expires": self.INDEX_EXPIRE_TIME},
|
||||||
},
|
},
|
||||||
"Perform sanity check": {
|
"Perform sanity check": {
|
||||||
"task": "documents.tasks.sanity_check",
|
"task": "documents.tasks.sanity_check",
|
||||||
"schedule": crontab(minute=30, hour=0, day_of_week="sun"),
|
"schedule": crontab(minute=30, hour=0, day_of_week="sun"),
|
||||||
|
"options": {"expires": self.SANITY_EXPIRE_TIME},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
schedule,
|
schedule,
|
||||||
@ -203,18 +212,22 @@ class TestCeleryScheduleParsing(TestCase):
|
|||||||
"Check all e-mail accounts": {
|
"Check all e-mail accounts": {
|
||||||
"task": "paperless_mail.tasks.process_mail_accounts",
|
"task": "paperless_mail.tasks.process_mail_accounts",
|
||||||
"schedule": crontab(minute="*/50", day_of_week="mon"),
|
"schedule": crontab(minute="*/50", day_of_week="mon"),
|
||||||
|
"options": {"expires": self.MAIL_EXPIRE_TIME},
|
||||||
},
|
},
|
||||||
"Train the classifier": {
|
"Train the classifier": {
|
||||||
"task": "documents.tasks.train_classifier",
|
"task": "documents.tasks.train_classifier",
|
||||||
"schedule": crontab(minute="5", hour="*/1"),
|
"schedule": crontab(minute="5", hour="*/1"),
|
||||||
|
"options": {"expires": self.CLASSIFIER_EXPIRE_TIME},
|
||||||
},
|
},
|
||||||
"Optimize the index": {
|
"Optimize the index": {
|
||||||
"task": "documents.tasks.index_optimize",
|
"task": "documents.tasks.index_optimize",
|
||||||
"schedule": crontab(minute=0, hour=0),
|
"schedule": crontab(minute=0, hour=0),
|
||||||
|
"options": {"expires": self.INDEX_EXPIRE_TIME},
|
||||||
},
|
},
|
||||||
"Perform sanity check": {
|
"Perform sanity check": {
|
||||||
"task": "documents.tasks.sanity_check",
|
"task": "documents.tasks.sanity_check",
|
||||||
"schedule": crontab(minute=30, hour=0, day_of_week="sun"),
|
"schedule": crontab(minute=30, hour=0, day_of_week="sun"),
|
||||||
|
"options": {"expires": self.SANITY_EXPIRE_TIME},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
schedule,
|
schedule,
|
||||||
@ -238,14 +251,17 @@ class TestCeleryScheduleParsing(TestCase):
|
|||||||
"Check all e-mail accounts": {
|
"Check all e-mail accounts": {
|
||||||
"task": "paperless_mail.tasks.process_mail_accounts",
|
"task": "paperless_mail.tasks.process_mail_accounts",
|
||||||
"schedule": crontab(minute="*/10"),
|
"schedule": crontab(minute="*/10"),
|
||||||
|
"options": {"expires": self.MAIL_EXPIRE_TIME},
|
||||||
},
|
},
|
||||||
"Train the classifier": {
|
"Train the classifier": {
|
||||||
"task": "documents.tasks.train_classifier",
|
"task": "documents.tasks.train_classifier",
|
||||||
"schedule": crontab(minute="5", hour="*/1"),
|
"schedule": crontab(minute="5", hour="*/1"),
|
||||||
|
"options": {"expires": self.CLASSIFIER_EXPIRE_TIME},
|
||||||
},
|
},
|
||||||
"Perform sanity check": {
|
"Perform sanity check": {
|
||||||
"task": "documents.tasks.sanity_check",
|
"task": "documents.tasks.sanity_check",
|
||||||
"schedule": crontab(minute=30, hour=0, day_of_week="sun"),
|
"schedule": crontab(minute=30, hour=0, day_of_week="sun"),
|
||||||
|
"options": {"expires": self.SANITY_EXPIRE_TIME},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
schedule,
|
schedule,
|
||||||
|
@ -14,15 +14,14 @@ TEST_CHANNEL_LAYERS = {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@override_settings(CHANNEL_LAYERS=TEST_CHANNEL_LAYERS)
|
||||||
class TestWebSockets(TestCase):
|
class TestWebSockets(TestCase):
|
||||||
@override_settings(CHANNEL_LAYERS=TEST_CHANNEL_LAYERS)
|
|
||||||
async def test_no_auth(self):
|
async def test_no_auth(self):
|
||||||
communicator = WebsocketCommunicator(application, "/ws/status/")
|
communicator = WebsocketCommunicator(application, "/ws/status/")
|
||||||
connected, subprotocol = await communicator.connect()
|
connected, subprotocol = await communicator.connect()
|
||||||
self.assertFalse(connected)
|
self.assertFalse(connected)
|
||||||
await communicator.disconnect()
|
await communicator.disconnect()
|
||||||
|
|
||||||
@override_settings(CHANNEL_LAYERS=TEST_CHANNEL_LAYERS)
|
|
||||||
@mock.patch("paperless.consumers.StatusConsumer._authenticated")
|
@mock.patch("paperless.consumers.StatusConsumer._authenticated")
|
||||||
async def test_auth(self, _authenticated):
|
async def test_auth(self, _authenticated):
|
||||||
_authenticated.return_value = True
|
_authenticated.return_value = True
|
||||||
@ -33,7 +32,6 @@ class TestWebSockets(TestCase):
|
|||||||
|
|
||||||
await communicator.disconnect()
|
await communicator.disconnect()
|
||||||
|
|
||||||
@override_settings(CHANNEL_LAYERS=TEST_CHANNEL_LAYERS)
|
|
||||||
@mock.patch("paperless.consumers.StatusConsumer._authenticated")
|
@mock.patch("paperless.consumers.StatusConsumer._authenticated")
|
||||||
async def test_receive(self, _authenticated):
|
async def test_receive(self, _authenticated):
|
||||||
_authenticated.return_value = True
|
_authenticated.return_value = True
|
||||||
|
@ -24,7 +24,7 @@ class StandardPagination(PageNumberPagination):
|
|||||||
|
|
||||||
|
|
||||||
class FaviconView(View):
|
class FaviconView(View):
|
||||||
def get(self, request, *args, **kwargs):
|
def get(self, request, *args, **kwargs): # pragma: nocover
|
||||||
favicon = os.path.join(
|
favicon = os.path.join(
|
||||||
os.path.dirname(__file__),
|
os.path.dirname(__file__),
|
||||||
"static",
|
"static",
|
||||||
|
@ -2,12 +2,13 @@ from django.contrib.auth.models import User
|
|||||||
from documents.models import Correspondent
|
from documents.models import Correspondent
|
||||||
from documents.models import DocumentType
|
from documents.models import DocumentType
|
||||||
from documents.models import Tag
|
from documents.models import Tag
|
||||||
|
from documents.tests.utils import DirectoriesMixin
|
||||||
from paperless_mail.models import MailAccount
|
from paperless_mail.models import MailAccount
|
||||||
from paperless_mail.models import MailRule
|
from paperless_mail.models import MailRule
|
||||||
from rest_framework.test import APITestCase
|
from rest_framework.test import APITestCase
|
||||||
|
|
||||||
|
|
||||||
class TestAPIMailAccounts(APITestCase):
|
class TestAPIMailAccounts(DirectoriesMixin, APITestCase):
|
||||||
ENDPOINT = "/api/mail_accounts/"
|
ENDPOINT = "/api/mail_accounts/"
|
||||||
|
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
@ -165,7 +166,7 @@ class TestAPIMailAccounts(APITestCase):
|
|||||||
self.assertEqual(returned_account2.password, "123xyz")
|
self.assertEqual(returned_account2.password, "123xyz")
|
||||||
|
|
||||||
|
|
||||||
class TestAPIMailRules(APITestCase):
|
class TestAPIMailRules(DirectoriesMixin, APITestCase):
|
||||||
ENDPOINT = "/api/mail_rules/"
|
ENDPOINT = "/api/mail_rules/"
|
||||||
|
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
|
@ -161,7 +161,7 @@ class RasterisedDocumentParser(DocumentParser):
|
|||||||
|
|
||||||
except Exception:
|
except Exception:
|
||||||
# TODO catch all for various issues with PDFminer.six.
|
# TODO catch all for various issues with PDFminer.six.
|
||||||
# If PDFminer fails, fall back to OCR.
|
# If pdftotext fails, fall back to OCR.
|
||||||
self.log(
|
self.log(
|
||||||
"warning",
|
"warning",
|
||||||
"Error while getting text from PDF document with " "pdfminer.six",
|
"Error while getting text from PDF document with " "pdfminer.six",
|
||||||
|
@ -364,7 +364,7 @@ class TestParser(DirectoriesMixin, TestCase):
|
|||||||
)
|
)
|
||||||
self.assertTrue(os.path.isfile(parser.archive_path))
|
self.assertTrue(os.path.isfile(parser.archive_path))
|
||||||
self.assertContainsStrings(parser.get_text().lower(), ["page 1", "page 2"])
|
self.assertContainsStrings(parser.get_text().lower(), ["page 1", "page 2"])
|
||||||
self.assertFalse("page 3" in parser.get_text().lower())
|
self.assertNotIn("page 3", parser.get_text().lower())
|
||||||
|
|
||||||
@override_settings(OCR_PAGES=1, OCR_MODE="force")
|
@override_settings(OCR_PAGES=1, OCR_MODE="force")
|
||||||
def test_multi_page_analog_pages_force(self):
|
def test_multi_page_analog_pages_force(self):
|
||||||
@ -386,8 +386,8 @@ class TestParser(DirectoriesMixin, TestCase):
|
|||||||
)
|
)
|
||||||
self.assertTrue(os.path.isfile(parser.archive_path))
|
self.assertTrue(os.path.isfile(parser.archive_path))
|
||||||
self.assertContainsStrings(parser.get_text().lower(), ["page 1"])
|
self.assertContainsStrings(parser.get_text().lower(), ["page 1"])
|
||||||
self.assertFalse("page 2" in parser.get_text().lower())
|
self.assertNotIn("page 2", parser.get_text().lower())
|
||||||
self.assertFalse("page 3" in parser.get_text().lower())
|
self.assertNotIn("page 3", parser.get_text().lower())
|
||||||
|
|
||||||
@override_settings(OCR_MODE="skip_noarchive")
|
@override_settings(OCR_MODE="skip_noarchive")
|
||||||
def test_skip_noarchive_withtext(self):
|
def test_skip_noarchive_withtext(self):
|
||||||
@ -660,6 +660,15 @@ class TestParser(DirectoriesMixin, TestCase):
|
|||||||
params = parser.construct_ocrmypdf_parameters("", "", "", "")
|
params = parser.construct_ocrmypdf_parameters("", "", "", "")
|
||||||
self.assertNotIn("deskew", params)
|
self.assertNotIn("deskew", params)
|
||||||
|
|
||||||
|
with override_settings(OCR_MAX_IMAGE_PIXELS=1_000_001.0):
|
||||||
|
params = parser.construct_ocrmypdf_parameters("", "", "", "")
|
||||||
|
self.assertIn("max_image_mpixels", params)
|
||||||
|
self.assertAlmostEqual(params["max_image_mpixels"], 1, places=4)
|
||||||
|
|
||||||
|
with override_settings(OCR_MAX_IMAGE_PIXELS=-1_000_001.0):
|
||||||
|
params = parser.construct_ocrmypdf_parameters("", "", "", "")
|
||||||
|
self.assertNotIn("max_image_mpixels", params)
|
||||||
|
|
||||||
def test_rtl_language_detection(self):
|
def test_rtl_language_detection(self):
|
||||||
"""
|
"""
|
||||||
GIVEN:
|
GIVEN:
|
||||||
|
@ -3,7 +3,9 @@ import os
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from unittest import mock
|
from unittest import mock
|
||||||
|
|
||||||
|
from django.test import override_settings
|
||||||
from django.test import TestCase
|
from django.test import TestCase
|
||||||
|
from documents.parsers import ParseError
|
||||||
from paperless_tika.parsers import TikaDocumentParser
|
from paperless_tika.parsers import TikaDocumentParser
|
||||||
from requests import Response
|
from requests import Response
|
||||||
|
|
||||||
@ -54,3 +56,63 @@ class TestTikaParser(TestCase):
|
|||||||
|
|
||||||
self.assertTrue("Creation-Date" in [m["key"] for m in metadata])
|
self.assertTrue("Creation-Date" in [m["key"] for m in metadata])
|
||||||
self.assertTrue("Some-key" in [m["key"] for m in metadata])
|
self.assertTrue("Some-key" in [m["key"] for m in metadata])
|
||||||
|
|
||||||
|
@mock.patch("paperless_tika.parsers.parser.from_file")
|
||||||
|
@mock.patch("paperless_tika.parsers.requests.post")
|
||||||
|
def test_convert_failure(self, post, from_file):
|
||||||
|
"""
|
||||||
|
GIVEN:
|
||||||
|
- Document needs to be converted to PDF
|
||||||
|
WHEN:
|
||||||
|
- Gotenberg server returns an error
|
||||||
|
THEN:
|
||||||
|
- Parse error is raised
|
||||||
|
"""
|
||||||
|
from_file.return_value = {
|
||||||
|
"content": "the content",
|
||||||
|
"metadata": {"Creation-Date": "2020-11-21"},
|
||||||
|
}
|
||||||
|
response = Response()
|
||||||
|
response._content = b"PDF document"
|
||||||
|
response.status_code = 500
|
||||||
|
post.return_value = response
|
||||||
|
|
||||||
|
file = os.path.join(self.parser.tempdir, "input.odt")
|
||||||
|
Path(file).touch()
|
||||||
|
|
||||||
|
with self.assertRaises(ParseError):
|
||||||
|
self.parser.convert_to_pdf(file, None)
|
||||||
|
|
||||||
|
@mock.patch("paperless_tika.parsers.requests.post")
|
||||||
|
def test_request_pdf_a_format(self, post: mock.Mock):
|
||||||
|
"""
|
||||||
|
GIVEN:
|
||||||
|
- Document needs to be converted to PDF
|
||||||
|
WHEN:
|
||||||
|
- Specific PDF/A format requested
|
||||||
|
THEN:
|
||||||
|
- Request to Gotenberg contains the expected PDF/A format string
|
||||||
|
"""
|
||||||
|
file = os.path.join(self.parser.tempdir, "input.odt")
|
||||||
|
Path(file).touch()
|
||||||
|
|
||||||
|
response = Response()
|
||||||
|
response._content = b"PDF document"
|
||||||
|
response.status_code = 200
|
||||||
|
post.return_value = response
|
||||||
|
|
||||||
|
for setting, expected_key in [
|
||||||
|
("pdfa", "PDF/A-2b"),
|
||||||
|
("pdfa-2", "PDF/A-2b"),
|
||||||
|
("pdfa-1", "PDF/A-1a"),
|
||||||
|
("pdfa-3", "PDF/A-3b"),
|
||||||
|
]:
|
||||||
|
with override_settings(OCR_OUTPUT_TYPE=setting):
|
||||||
|
self.parser.convert_to_pdf(file, None)
|
||||||
|
|
||||||
|
post.assert_called_once()
|
||||||
|
_, kwargs = post.call_args
|
||||||
|
|
||||||
|
self.assertEqual(kwargs["data"]["pdfFormat"], expected_key)
|
||||||
|
|
||||||
|
post.reset_mock()
|
||||||
|
Loading…
x
Reference in New Issue
Block a user