mirror of
				https://github.com/paperless-ngx/paperless-ngx.git
				synced 2025-10-30 03:56:23 -05:00 
			
		
		
		
	Merge branch 'dev' into feature-permissions
This commit is contained in:
		
							
								
								
									
										2
									
								
								.github/workflows/ci.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										2
									
								
								.github/workflows/ci.yml
									
									
									
									
										vendored
									
									
								
							| @@ -161,7 +161,7 @@ jobs: | |||||||
|           pipenv --python ${{ steps.setup-python.outputs.python-version }} run pytest -ra |           pipenv --python ${{ steps.setup-python.outputs.python-version }} run pytest -ra | ||||||
|       - |       - | ||||||
|         name: Upload coverage to Codecov |         name: Upload coverage to Codecov | ||||||
|         if: matrix.python-version == ${{ env.DEFAULT_PYTHON_VERSION }} |         if: ${{ matrix.python-version == env.DEFAULT_PYTHON_VERSION }} | ||||||
|         uses: codecov/codecov-action@v3 |         uses: codecov/codecov-action@v3 | ||||||
|         with: |         with: | ||||||
|           # not required for public repos, but intermittently fails otherwise |           # not required for public repos, but intermittently fails otherwise | ||||||
|   | |||||||
| @@ -59,7 +59,7 @@ services: | |||||||
|       - gotenberg |       - gotenberg | ||||||
|       - tika |       - tika | ||||||
|     ports: |     ports: | ||||||
|       - 8000:8000 |       - "8000:8000" | ||||||
|     healthcheck: |     healthcheck: | ||||||
|       test: ["CMD", "curl", "-f", "http://localhost:8000"] |       test: ["CMD", "curl", "-f", "http://localhost:8000"] | ||||||
|       interval: 30s |       interval: 30s | ||||||
|   | |||||||
| @@ -53,7 +53,7 @@ services: | |||||||
|       - db |       - db | ||||||
|       - broker |       - broker | ||||||
|     ports: |     ports: | ||||||
|       - 8000:8000 |       - "8000:8000" | ||||||
|     healthcheck: |     healthcheck: | ||||||
|       test: ["CMD", "curl", "-f", "http://localhost:8000"] |       test: ["CMD", "curl", "-f", "http://localhost:8000"] | ||||||
|       interval: 30s |       interval: 30s | ||||||
|   | |||||||
| @@ -53,7 +53,7 @@ services: | |||||||
|       - db |       - db | ||||||
|       - broker |       - broker | ||||||
|     ports: |     ports: | ||||||
|       - 8010:8000 |       - "8010:8000" | ||||||
|     healthcheck: |     healthcheck: | ||||||
|       test: ["CMD", "curl", "-fs", "-S", "--max-time", "2", "http://localhost:8000"] |       test: ["CMD", "curl", "-fs", "-S", "--max-time", "2", "http://localhost:8000"] | ||||||
|       interval: 30s |       interval: 30s | ||||||
|   | |||||||
| @@ -57,7 +57,7 @@ services: | |||||||
|       - gotenberg |       - gotenberg | ||||||
|       - tika |       - tika | ||||||
|     ports: |     ports: | ||||||
|       - 8000:8000 |       - "8000:8000" | ||||||
|     healthcheck: |     healthcheck: | ||||||
|       test: ["CMD", "curl", "-fs", "-S", "--max-time", "2", "http://localhost:8000"] |       test: ["CMD", "curl", "-fs", "-S", "--max-time", "2", "http://localhost:8000"] | ||||||
|       interval: 30s |       interval: 30s | ||||||
|   | |||||||
| @@ -51,7 +51,7 @@ services: | |||||||
|       - db |       - db | ||||||
|       - broker |       - broker | ||||||
|     ports: |     ports: | ||||||
|       - 8000:8000 |       - "8000:8000" | ||||||
|     healthcheck: |     healthcheck: | ||||||
|       test: ["CMD", "curl", "-fs", "-S", "--max-time", "2", "http://localhost:8000"] |       test: ["CMD", "curl", "-fs", "-S", "--max-time", "2", "http://localhost:8000"] | ||||||
|       interval: 30s |       interval: 30s | ||||||
|   | |||||||
| @@ -46,7 +46,7 @@ services: | |||||||
|       - gotenberg |       - gotenberg | ||||||
|       - tika |       - tika | ||||||
|     ports: |     ports: | ||||||
|       - 8000:8000 |       - "8000:8000" | ||||||
|     healthcheck: |     healthcheck: | ||||||
|       test: ["CMD", "curl", "-fs", "-S", "--max-time", "2", "http://localhost:8000"] |       test: ["CMD", "curl", "-fs", "-S", "--max-time", "2", "http://localhost:8000"] | ||||||
|       interval: 30s |       interval: 30s | ||||||
|   | |||||||
| @@ -37,7 +37,7 @@ services: | |||||||
|     depends_on: |     depends_on: | ||||||
|       - broker |       - broker | ||||||
|     ports: |     ports: | ||||||
|       - 8000:8000 |       - "8000:8000" | ||||||
|     healthcheck: |     healthcheck: | ||||||
|       test: ["CMD", "curl", "-fs", "-S", "--max-time", "2", "http://localhost:8000"] |       test: ["CMD", "curl", "-fs", "-S", "--max-time", "2", "http://localhost:8000"] | ||||||
|       interval: 30s |       interval: 30s | ||||||
|   | |||||||
| @@ -3,5 +3,10 @@ | |||||||
| echo "Checking if we should start flower..." | echo "Checking if we should start flower..." | ||||||
|  |  | ||||||
| if [[ -n  "${PAPERLESS_ENABLE_FLOWER}" ]]; then | if [[ -n  "${PAPERLESS_ENABLE_FLOWER}" ]]; then | ||||||
| 	celery --app paperless flower | 	# Small delay to allow celery to be up first | ||||||
|  | 	echo "Starting flower in 5s" | ||||||
|  | 	sleep 5 | ||||||
|  | 	celery --app paperless flower --conf=/usr/src/paperless/src/paperless/flowerconfig.py | ||||||
|  | else | ||||||
|  | 	echo "Not starting flower" | ||||||
| fi | fi | ||||||
|   | |||||||
| @@ -346,7 +346,7 @@ read -r -a OCR_LANGUAGES_ARRAY <<< "${_split_langs}" | |||||||
| 	fi | 	fi | ||||||
| } > docker-compose.env | } > docker-compose.env | ||||||
|  |  | ||||||
| sed -i "s/- 8000:8000/- $PORT:8000/g" docker-compose.yml | sed -i "s/- \"8000:8000\"/- \"$PORT:8000\"/g" docker-compose.yml | ||||||
|  |  | ||||||
| sed -i "s#- \./consume:/usr/src/paperless/consume#- $CONSUME_FOLDER:/usr/src/paperless/consume#g" docker-compose.yml | sed -i "s#- \./consume:/usr/src/paperless/consume#- $CONSUME_FOLDER:/usr/src/paperless/consume#g" docker-compose.yml | ||||||
|  |  | ||||||
|   | |||||||
| @@ -18,7 +18,7 @@ | |||||||
|       (blur)="onBlur()"> |       (blur)="onBlur()"> | ||||||
|  |  | ||||||
|       <ng-template ng-label-tmp let-item="item"> |       <ng-template ng-label-tmp let-item="item"> | ||||||
|         <span class="tag-wrap tag-wrap-delete" (click)="removeTag(item.id)"> |         <span class="tag-wrap tag-wrap-delete" (mousedown)="removeTag($event, item.id)"> | ||||||
|           <svg width="1.2em" height="1em" viewBox="0 0 16 16" fill="currentColor" xmlns="http://www.w3.org/2000/svg"> |           <svg width="1.2em" height="1em" viewBox="0 0 16 16" fill="currentColor" xmlns="http://www.w3.org/2000/svg"> | ||||||
|             <use xlink:href="assets/bootstrap-icons.svg#x"/> |             <use xlink:href="assets/bootstrap-icons.svg#x"/> | ||||||
|           </svg> |           </svg> | ||||||
|   | |||||||
| @@ -65,7 +65,7 @@ export class TagsComponent implements OnInit, ControlValueAccessor { | |||||||
|  |  | ||||||
|   private _lastSearchTerm: string |   private _lastSearchTerm: string | ||||||
|  |  | ||||||
|   getTag(id) { |   getTag(id: number) { | ||||||
|     if (this.tags) { |     if (this.tags) { | ||||||
|       return this.tags.find((tag) => tag.id == id) |       return this.tags.find((tag) => tag.id == id) | ||||||
|     } else { |     } else { | ||||||
| @@ -73,8 +73,12 @@ export class TagsComponent implements OnInit, ControlValueAccessor { | |||||||
|     } |     } | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   removeTag(id) { |   removeTag(event: PointerEvent, id: number) { | ||||||
|     if (this.disabled) return |     if (this.disabled) return | ||||||
|  |  | ||||||
|  |     // prevent opening dropdown | ||||||
|  |     event.stopImmediatePropagation() | ||||||
|  |  | ||||||
|     let index = this.value.indexOf(id) |     let index = this.value.indexOf(id) | ||||||
|     if (index > -1) { |     if (index > -1) { | ||||||
|       let oldValue = this.value |       let oldValue = this.value | ||||||
|   | |||||||
| @@ -63,7 +63,7 @@ | |||||||
|  |  | ||||||
|  |  | ||||||
| <div class="row"> | <div class="row"> | ||||||
|     <div class="col mb-4"> |     <div class="col-md-6 col-xl-4 mb-4"> | ||||||
|  |  | ||||||
|         <form [formGroup]='documentForm' (ngSubmit)="save()"> |         <form [formGroup]='documentForm' (ngSubmit)="save()"> | ||||||
|  |  | ||||||
|   | |||||||
| @@ -22,6 +22,15 @@ | |||||||
|   --page-margin: 1px 0 20px; |   --page-margin: 1px 0 20px; | ||||||
| } | } | ||||||
|  |  | ||||||
|  | ::ng-deep .ng-select-taggable { | ||||||
|  |   max-width: calc(100% - 46px); // fudge factor for ng-select button width | ||||||
|  | } | ||||||
|  |  | ||||||
|  | .btn-group .dropdown-toggle-split { | ||||||
|  |   border-top-right-radius: inherit; | ||||||
|  |   border-bottom-right-radius: inherit; | ||||||
|  | } | ||||||
|  |  | ||||||
| .password-prompt { | .password-prompt { | ||||||
|   position: absolute; |   position: absolute; | ||||||
|   top: 30%; |   top: 30%; | ||||||
|   | |||||||
| @@ -10,7 +10,7 @@ | |||||||
|     </div> |     </div> | ||||||
|   </div> |   </div> | ||||||
|  |  | ||||||
|   <ngb-pagination class="col-auto" [pageSize]="25" [collectionSize]="collectionSize" [(page)]="page" (pageChange)="reloadData()" aria-label="Default pagination"></ngb-pagination> |   <ngb-pagination class="col-auto" [pageSize]="25" [collectionSize]="collectionSize" [(page)]="page" [maxSize]="5" (pageChange)="reloadData()" aria-label="Default pagination"></ngb-pagination> | ||||||
| </div> | </div> | ||||||
|  |  | ||||||
| <table class="table table-striped align-middle border shadow-sm"> | <table class="table table-striped align-middle border shadow-sm"> | ||||||
| @@ -72,5 +72,5 @@ | |||||||
|  |  | ||||||
| <div class="d-flex"> | <div class="d-flex"> | ||||||
|   <div i18n *ngIf="collectionSize > 0">{collectionSize, plural, =1 {One {{typeName}}} other {{{collectionSize || 0}} total {{typeNamePlural}}}}</div> |   <div i18n *ngIf="collectionSize > 0">{collectionSize, plural, =1 {One {{typeName}}} other {{{collectionSize || 0}} total {{typeNamePlural}}}}</div> | ||||||
|   <ngb-pagination *ngIf="collectionSize > 20" class="ms-auto" [pageSize]="25" [collectionSize]="collectionSize" [(page)]="page" (pageChange)="reloadData()" aria-label="Default pagination"></ngb-pagination> |   <ngb-pagination *ngIf="collectionSize > 20" class="ms-auto" [pageSize]="25" [collectionSize]="collectionSize" [(page)]="page" [maxSize]="5" (pageChange)="reloadData()" aria-label="Default pagination"></ngb-pagination> | ||||||
| </div> | </div> | ||||||
|   | |||||||
| @@ -325,11 +325,10 @@ def save_to_dir( | |||||||
|     Optionally rename the file. |     Optionally rename the file. | ||||||
|     """ |     """ | ||||||
|     if os.path.isfile(filepath) and os.path.isdir(target_dir): |     if os.path.isfile(filepath) and os.path.isdir(target_dir): | ||||||
|         dst = shutil.copy(filepath, target_dir) |         dest = target_dir | ||||||
|         logging.debug(f"saved {str(filepath)} to {str(dst)}") |         if newname is not None: | ||||||
|         if newname: |             dest = os.path.join(dest, newname) | ||||||
|             dst_new = os.path.join(target_dir, newname) |         shutil.copy(filepath, dest) | ||||||
|             logger.debug(f"moving {str(dst)} to {str(dst_new)}") |         logging.debug(f"saved {str(filepath)} to {str(dest)}") | ||||||
|             os.rename(dst, dst_new) |  | ||||||
|     else: |     else: | ||||||
|         logger.warning(f"{str(filepath)} or {str(target_dir)} don't exist.") |         logger.warning(f"{str(filepath)} or {str(target_dir)} don't exist.") | ||||||
|   | |||||||
| @@ -346,6 +346,7 @@ class Consumer(LoggingMixin): | |||||||
|             mime_type, |             mime_type, | ||||||
|         ) |         ) | ||||||
|         if not parser_class: |         if not parser_class: | ||||||
|  |             tempdir.cleanup() | ||||||
|             self._fail(MESSAGE_UNSUPPORTED_TYPE, f"Unsupported mime type {mime_type}") |             self._fail(MESSAGE_UNSUPPORTED_TYPE, f"Unsupported mime type {mime_type}") | ||||||
|  |  | ||||||
|         # Notify all listeners that we're going to do some work. |         # Notify all listeners that we're going to do some work. | ||||||
| @@ -404,6 +405,7 @@ class Consumer(LoggingMixin): | |||||||
|  |  | ||||||
|         except ParseError as e: |         except ParseError as e: | ||||||
|             document_parser.cleanup() |             document_parser.cleanup() | ||||||
|  |             tempdir.cleanup() | ||||||
|             self._fail( |             self._fail( | ||||||
|                 str(e), |                 str(e), | ||||||
|                 f"Error while consuming document {self.filename}: {e}", |                 f"Error while consuming document {self.filename}: {e}", | ||||||
|   | |||||||
| @@ -779,11 +779,17 @@ class StoragePathSerializer(MatchingModelSerializer, OwnedObjectSerializer): | |||||||
|                 document_type="document_type", |                 document_type="document_type", | ||||||
|                 created="created", |                 created="created", | ||||||
|                 created_year="created_year", |                 created_year="created_year", | ||||||
|  |                 created_year_short="created_year_short", | ||||||
|                 created_month="created_month", |                 created_month="created_month", | ||||||
|  |                 created_month_name="created_month_name", | ||||||
|  |                 created_month_name_short="created_month_name_short", | ||||||
|                 created_day="created_day", |                 created_day="created_day", | ||||||
|                 added="added", |                 added="added", | ||||||
|                 added_year="added_year", |                 added_year="added_year", | ||||||
|  |                 added_year_short="added_year_short", | ||||||
|                 added_month="added_month", |                 added_month="added_month", | ||||||
|  |                 added_month_name="added_month_name", | ||||||
|  |                 added_month_name_short="added_month_name_short", | ||||||
|                 added_day="added_day", |                 added_day="added_day", | ||||||
|                 asn="asn", |                 asn="asn", | ||||||
|                 tags="tags", |                 tags="tags", | ||||||
|   | |||||||
| @@ -130,13 +130,6 @@ def consume_file( | |||||||
|                 ) |                 ) | ||||||
|  |  | ||||||
|                 if document_list: |                 if document_list: | ||||||
|                     for n, document in enumerate(document_list): |  | ||||||
|                         # save to consumption dir |  | ||||||
|                         # rename it to the original filename  with number prefix |  | ||||||
|                         if override_filename: |  | ||||||
|                             newname = f"{str(n)}_" + override_filename |  | ||||||
|                         else: |  | ||||||
|                             newname = None |  | ||||||
|  |  | ||||||
|                     # If the file is an upload, it's in the scratch directory |                     # If the file is an upload, it's in the scratch directory | ||||||
|                     # Move it to consume directory to be picked up |                     # Move it to consume directory to be picked up | ||||||
| @@ -149,12 +142,26 @@ def consume_file( | |||||||
|                     except ValueError: |                     except ValueError: | ||||||
|                         save_to_dir = path.parent |                         save_to_dir = path.parent | ||||||
|  |  | ||||||
|  |                     for n, document in enumerate(document_list): | ||||||
|  |                         # save to consumption dir | ||||||
|  |                         # rename it to the original filename  with number prefix | ||||||
|  |                         if override_filename: | ||||||
|  |                             newname = f"{str(n)}_" + override_filename | ||||||
|  |                         else: | ||||||
|  |                             newname = None | ||||||
|  |  | ||||||
|                         barcodes.save_to_dir( |                         barcodes.save_to_dir( | ||||||
|                             document, |                             document, | ||||||
|                             newname=newname, |                             newname=newname, | ||||||
|                             target_dir=save_to_dir, |                             target_dir=save_to_dir, | ||||||
|                         ) |                         ) | ||||||
|  |  | ||||||
|  |                         # Split file has been copied safely, remove it | ||||||
|  |                         os.remove(document) | ||||||
|  |  | ||||||
|  |                     # And clean up the directory as well, now it's empty | ||||||
|  |                     shutil.rmtree(os.path.dirname(document_list[0])) | ||||||
|  |  | ||||||
|                     # Delete the PDF file which was split |                     # Delete the PDF file which was split | ||||||
|                     os.remove(doc_barcode_info.pdf_path) |                     os.remove(doc_barcode_info.pdf_path) | ||||||
|  |  | ||||||
|   | |||||||
| @@ -125,28 +125,28 @@ class TestDocumentApi(DirectoriesMixin, APITestCase): | |||||||
|         response = self.client.get("/api/documents/", format="json") |         response = self.client.get("/api/documents/", format="json") | ||||||
|         self.assertEqual(response.status_code, 200) |         self.assertEqual(response.status_code, 200) | ||||||
|         results_full = response.data["results"] |         results_full = response.data["results"] | ||||||
|         self.assertTrue("content" in results_full[0]) |         self.assertIn("content", results_full[0]) | ||||||
|         self.assertTrue("id" in results_full[0]) |         self.assertIn("id", results_full[0]) | ||||||
|  |  | ||||||
|         response = self.client.get("/api/documents/?fields=id", format="json") |         response = self.client.get("/api/documents/?fields=id", format="json") | ||||||
|         self.assertEqual(response.status_code, 200) |         self.assertEqual(response.status_code, 200) | ||||||
|         results = response.data["results"] |         results = response.data["results"] | ||||||
|         self.assertFalse("content" in results[0]) |         self.assertFalse("content" in results[0]) | ||||||
|         self.assertTrue("id" in results[0]) |         self.assertIn("id", results[0]) | ||||||
|         self.assertEqual(len(results[0]), 1) |         self.assertEqual(len(results[0]), 1) | ||||||
|  |  | ||||||
|         response = self.client.get("/api/documents/?fields=content", format="json") |         response = self.client.get("/api/documents/?fields=content", format="json") | ||||||
|         self.assertEqual(response.status_code, 200) |         self.assertEqual(response.status_code, 200) | ||||||
|         results = response.data["results"] |         results = response.data["results"] | ||||||
|         self.assertTrue("content" in results[0]) |         self.assertIn("content", results[0]) | ||||||
|         self.assertFalse("id" in results[0]) |         self.assertFalse("id" in results[0]) | ||||||
|         self.assertEqual(len(results[0]), 1) |         self.assertEqual(len(results[0]), 1) | ||||||
|  |  | ||||||
|         response = self.client.get("/api/documents/?fields=id,content", format="json") |         response = self.client.get("/api/documents/?fields=id,content", format="json") | ||||||
|         self.assertEqual(response.status_code, 200) |         self.assertEqual(response.status_code, 200) | ||||||
|         results = response.data["results"] |         results = response.data["results"] | ||||||
|         self.assertTrue("content" in results[0]) |         self.assertIn("content", results[0]) | ||||||
|         self.assertTrue("id" in results[0]) |         self.assertIn("id", results[0]) | ||||||
|         self.assertEqual(len(results[0]), 2) |         self.assertEqual(len(results[0]), 2) | ||||||
|  |  | ||||||
|         response = self.client.get( |         response = self.client.get( | ||||||
| @@ -156,7 +156,7 @@ class TestDocumentApi(DirectoriesMixin, APITestCase): | |||||||
|         self.assertEqual(response.status_code, 200) |         self.assertEqual(response.status_code, 200) | ||||||
|         results = response.data["results"] |         results = response.data["results"] | ||||||
|         self.assertFalse("content" in results[0]) |         self.assertFalse("content" in results[0]) | ||||||
|         self.assertTrue("id" in results[0]) |         self.assertIn("id", results[0]) | ||||||
|         self.assertEqual(len(results[0]), 1) |         self.assertEqual(len(results[0]), 1) | ||||||
|  |  | ||||||
|         response = self.client.get("/api/documents/?fields=", format="json") |         response = self.client.get("/api/documents/?fields=", format="json") | ||||||
| @@ -3291,8 +3291,32 @@ class TestApiStoragePaths(DirectoriesMixin, APITestCase): | |||||||
|         self.assertEqual(response.status_code, 400) |         self.assertEqual(response.status_code, 400) | ||||||
|         self.assertEqual(StoragePath.objects.count(), 1) |         self.assertEqual(StoragePath.objects.count(), 1) | ||||||
|  |  | ||||||
|  |     def test_api_storage_path_placeholders(self): | ||||||
|  |         """ | ||||||
|  |         GIVEN: | ||||||
|  |             - API request to create a storage path with placeholders | ||||||
|  |             - Storage path is valid | ||||||
|  |         WHEN: | ||||||
|  |             - API is called | ||||||
|  |         THEN: | ||||||
|  |             - Correct HTTP response | ||||||
|  |             - New storage path is created | ||||||
|  |         """ | ||||||
|  |         response = self.client.post( | ||||||
|  |             self.ENDPOINT, | ||||||
|  |             json.dumps( | ||||||
|  |                 { | ||||||
|  |                     "name": "Storage path with placeholders", | ||||||
|  |                     "path": "{title}/{correspondent}/{document_type}/{created}/{created_year}/{created_year_short}/{created_month}/{created_month_name}/{created_month_name_short}/{created_day}/{added}/{added_year}/{added_year_short}/{added_month}/{added_month_name}/{added_month_name_short}/{added_day}/{asn}/{tags}/{tag_list}/", | ||||||
|  |                 }, | ||||||
|  |             ), | ||||||
|  |             content_type="application/json", | ||||||
|  |         ) | ||||||
|  |         self.assertEqual(response.status_code, 201) | ||||||
|  |         self.assertEqual(StoragePath.objects.count(), 2) | ||||||
|  |  | ||||||
| class TestTasks(APITestCase): |  | ||||||
|  | class TestTasks(DirectoriesMixin, APITestCase): | ||||||
|     ENDPOINT = "/api/tasks/" |     ENDPOINT = "/api/tasks/" | ||||||
|     ENDPOINT_ACKNOWLEDGE = "/api/acknowledge_tasks/" |     ENDPOINT_ACKNOWLEDGE = "/api/acknowledge_tasks/" | ||||||
|  |  | ||||||
|   | |||||||
| @@ -847,13 +847,11 @@ class PreConsumeTestCase(TestCase): | |||||||
|                 self.assertEqual(command[0], script.name) |                 self.assertEqual(command[0], script.name) | ||||||
|                 self.assertEqual(command[1], "path-to-file") |                 self.assertEqual(command[1], "path-to-file") | ||||||
|  |  | ||||||
|                 self.assertDictContainsSubset( |                 subset = { | ||||||
|                     { |  | ||||||
|                     "DOCUMENT_SOURCE_PATH": c.original_path, |                     "DOCUMENT_SOURCE_PATH": c.original_path, | ||||||
|                     "DOCUMENT_WORKING_PATH": c.path, |                     "DOCUMENT_WORKING_PATH": c.path, | ||||||
|                     }, |                 } | ||||||
|                     environment, |                 self.assertDictEqual(environment, {**environment, **subset}) | ||||||
|                 ) |  | ||||||
|  |  | ||||||
|     @mock.patch("documents.consumer.Consumer.log") |     @mock.patch("documents.consumer.Consumer.log") | ||||||
|     def test_script_with_output(self, mocked_log): |     def test_script_with_output(self, mocked_log): | ||||||
| @@ -983,16 +981,15 @@ class PostConsumeTestCase(TestCase): | |||||||
|                 self.assertEqual(command[7], "my_bank") |                 self.assertEqual(command[7], "my_bank") | ||||||
|                 self.assertCountEqual(command[8].split(","), ["a", "b"]) |                 self.assertCountEqual(command[8].split(","), ["a", "b"]) | ||||||
|  |  | ||||||
|                 self.assertDictContainsSubset( |                 subset = { | ||||||
|                     { |  | ||||||
|                     "DOCUMENT_ID": str(doc.pk), |                     "DOCUMENT_ID": str(doc.pk), | ||||||
|                     "DOCUMENT_DOWNLOAD_URL": f"/api/documents/{doc.pk}/download/", |                     "DOCUMENT_DOWNLOAD_URL": f"/api/documents/{doc.pk}/download/", | ||||||
|                     "DOCUMENT_THUMBNAIL_URL": f"/api/documents/{doc.pk}/thumb/", |                     "DOCUMENT_THUMBNAIL_URL": f"/api/documents/{doc.pk}/thumb/", | ||||||
|                     "DOCUMENT_CORRESPONDENT": "my_bank", |                     "DOCUMENT_CORRESPONDENT": "my_bank", | ||||||
|                     "DOCUMENT_TAGS": "a,b", |                     "DOCUMENT_TAGS": "a,b", | ||||||
|                     }, |                 } | ||||||
|                     environment, |  | ||||||
|                 ) |                 self.assertDictEqual(environment, {**environment, **subset}) | ||||||
|  |  | ||||||
|     def test_script_exit_non_zero(self): |     def test_script_exit_non_zero(self): | ||||||
|         """ |         """ | ||||||
|   | |||||||
| @@ -25,7 +25,7 @@ class TestImporter(TestCase): | |||||||
|         cmd.manifest = [{"model": "documents.document"}] |         cmd.manifest = [{"model": "documents.document"}] | ||||||
|         with self.assertRaises(CommandError) as cm: |         with self.assertRaises(CommandError) as cm: | ||||||
|             cmd._check_manifest() |             cmd._check_manifest() | ||||||
|         self.assertTrue("The manifest file contains a record" in str(cm.exception)) |         self.assertIn("The manifest file contains a record", str(cm.exception)) | ||||||
|  |  | ||||||
|         cmd.manifest = [ |         cmd.manifest = [ | ||||||
|             {"model": "documents.document", EXPORTER_FILE_NAME: "noexist.pdf"}, |             {"model": "documents.document", EXPORTER_FILE_NAME: "noexist.pdf"}, | ||||||
| @@ -33,6 +33,7 @@ class TestImporter(TestCase): | |||||||
|         # self.assertRaises(CommandError, cmd._check_manifest) |         # self.assertRaises(CommandError, cmd._check_manifest) | ||||||
|         with self.assertRaises(CommandError) as cm: |         with self.assertRaises(CommandError) as cm: | ||||||
|             cmd._check_manifest() |             cmd._check_manifest() | ||||||
|         self.assertTrue( |         self.assertIn( | ||||||
|             'The manifest file refers to "noexist.pdf"' in str(cm.exception), |             'The manifest file refers to "noexist.pdf"', | ||||||
|  |             str(cm.exception), | ||||||
|         ) |         ) | ||||||
|   | |||||||
| @@ -1,6 +1,8 @@ | |||||||
| from tempfile import TemporaryDirectory | from tempfile import TemporaryDirectory | ||||||
| from unittest import mock | from unittest import mock | ||||||
|  |  | ||||||
|  | from django.apps import apps | ||||||
|  | from django.test import override_settings | ||||||
| from django.test import TestCase | from django.test import TestCase | ||||||
| from documents.parsers import get_default_file_extension | from documents.parsers import get_default_file_extension | ||||||
| from documents.parsers import get_parser_class_for_mime_type | from documents.parsers import get_parser_class_for_mime_type | ||||||
| @@ -8,6 +10,7 @@ from documents.parsers import get_supported_file_extensions | |||||||
| from documents.parsers import is_file_ext_supported | from documents.parsers import is_file_ext_supported | ||||||
| from paperless_tesseract.parsers import RasterisedDocumentParser | from paperless_tesseract.parsers import RasterisedDocumentParser | ||||||
| from paperless_text.parsers import TextDocumentParser | from paperless_text.parsers import TextDocumentParser | ||||||
|  | from paperless_tika.parsers import TikaDocumentParser | ||||||
|  |  | ||||||
|  |  | ||||||
| class TestParserDiscovery(TestCase): | class TestParserDiscovery(TestCase): | ||||||
| @@ -124,14 +127,43 @@ class TestParserDiscovery(TestCase): | |||||||
|  |  | ||||||
|  |  | ||||||
| class TestParserAvailability(TestCase): | class TestParserAvailability(TestCase): | ||||||
|     def test_file_extensions(self): |     def test_tesseract_parser(self): | ||||||
|  |         """ | ||||||
|  |         GIVEN: | ||||||
|  |             - Various mime types | ||||||
|  |         WHEN: | ||||||
|  |             - The parser class is instantiated | ||||||
|  |         THEN: | ||||||
|  |             - The Tesseract based parser is return | ||||||
|  |         """ | ||||||
|         supported_mimes_and_exts = [ |         supported_mimes_and_exts = [ | ||||||
|             ("application/pdf", ".pdf"), |             ("application/pdf", ".pdf"), | ||||||
|             ("image/png", ".png"), |             ("image/png", ".png"), | ||||||
|             ("image/jpeg", ".jpg"), |             ("image/jpeg", ".jpg"), | ||||||
|             ("image/tiff", ".tif"), |             ("image/tiff", ".tif"), | ||||||
|             ("image/webp", ".webp"), |             ("image/webp", ".webp"), | ||||||
|  |         ] | ||||||
|  |  | ||||||
|  |         supported_exts = get_supported_file_extensions() | ||||||
|  |  | ||||||
|  |         for mime_type, ext in supported_mimes_and_exts: | ||||||
|  |             self.assertIn(ext, supported_exts) | ||||||
|  |             self.assertEqual(get_default_file_extension(mime_type), ext) | ||||||
|  |             self.assertIsInstance( | ||||||
|  |                 get_parser_class_for_mime_type(mime_type)(logging_group=None), | ||||||
|  |                 RasterisedDocumentParser, | ||||||
|  |             ) | ||||||
|  |  | ||||||
|  |     def test_text_parser(self): | ||||||
|  |         """ | ||||||
|  |         GIVEN: | ||||||
|  |             - Various mime types of a text form | ||||||
|  |         WHEN: | ||||||
|  |             - The parser class is instantiated | ||||||
|  |         THEN: | ||||||
|  |             - The text based parser is return | ||||||
|  |         """ | ||||||
|  |         supported_mimes_and_exts = [ | ||||||
|             ("text/plain", ".txt"), |             ("text/plain", ".txt"), | ||||||
|             ("text/csv", ".csv"), |             ("text/csv", ".csv"), | ||||||
|         ] |         ] | ||||||
| @@ -141,23 +173,55 @@ class TestParserAvailability(TestCase): | |||||||
|         for mime_type, ext in supported_mimes_and_exts: |         for mime_type, ext in supported_mimes_and_exts: | ||||||
|             self.assertIn(ext, supported_exts) |             self.assertIn(ext, supported_exts) | ||||||
|             self.assertEqual(get_default_file_extension(mime_type), ext) |             self.assertEqual(get_default_file_extension(mime_type), ext) | ||||||
|  |             self.assertIsInstance( | ||||||
|  |                 get_parser_class_for_mime_type(mime_type)(logging_group=None), | ||||||
|  |                 TextDocumentParser, | ||||||
|  |             ) | ||||||
|  |  | ||||||
|  |     def test_tika_parser(self): | ||||||
|  |         """ | ||||||
|  |         GIVEN: | ||||||
|  |             - Various mime types of a office document form | ||||||
|  |         WHEN: | ||||||
|  |             - The parser class is instantiated | ||||||
|  |         THEN: | ||||||
|  |             - The Tika/Gotenberg based parser is return | ||||||
|  |         """ | ||||||
|  |         supported_mimes_and_exts = [ | ||||||
|  |             ("application/vnd.oasis.opendocument.text", ".odt"), | ||||||
|  |             ("text/rtf", ".rtf"), | ||||||
|  |             ("application/msword", ".doc"), | ||||||
|  |             ( | ||||||
|  |                 "application/vnd.openxmlformats-officedocument.wordprocessingml.document", | ||||||
|  |                 ".docx", | ||||||
|  |             ), | ||||||
|  |         ] | ||||||
|  |  | ||||||
|  |         # Force the app ready to notice the settings override | ||||||
|  |         with override_settings(TIKA_ENABLED=True, INSTALLED_APPS=["paperless_tika"]): | ||||||
|  |             app = apps.get_app_config("paperless_tika") | ||||||
|  |             app.ready() | ||||||
|  |             supported_exts = get_supported_file_extensions() | ||||||
|  |  | ||||||
|  |         for mime_type, ext in supported_mimes_and_exts: | ||||||
|  |             self.assertIn(ext, supported_exts) | ||||||
|  |             self.assertEqual(get_default_file_extension(mime_type), ext) | ||||||
|  |             self.assertIsInstance( | ||||||
|  |                 get_parser_class_for_mime_type(mime_type)(logging_group=None), | ||||||
|  |                 TikaDocumentParser, | ||||||
|  |             ) | ||||||
|  |  | ||||||
|  |     def test_no_parser_for_mime(self): | ||||||
|  |         self.assertIsNone(get_parser_class_for_mime_type("text/sdgsdf")) | ||||||
|  |  | ||||||
|  |     def test_default_extension(self): | ||||||
|         # Test no parser declared still returns a an extension |         # Test no parser declared still returns a an extension | ||||||
|         self.assertEqual(get_default_file_extension("application/zip"), ".zip") |         self.assertEqual(get_default_file_extension("application/zip"), ".zip") | ||||||
|  |  | ||||||
|         # Test invalid mimetype returns no extension |         # Test invalid mimetype returns no extension | ||||||
|         self.assertEqual(get_default_file_extension("aasdasd/dgfgf"), "") |         self.assertEqual(get_default_file_extension("aasdasd/dgfgf"), "") | ||||||
|  |  | ||||||
|         self.assertIsInstance( |     def test_file_extension_support(self): | ||||||
|             get_parser_class_for_mime_type("application/pdf")(logging_group=None), |  | ||||||
|             RasterisedDocumentParser, |  | ||||||
|         ) |  | ||||||
|         self.assertIsInstance( |  | ||||||
|             get_parser_class_for_mime_type("text/plain")(logging_group=None), |  | ||||||
|             TextDocumentParser, |  | ||||||
|         ) |  | ||||||
|         self.assertIsNone(get_parser_class_for_mime_type("text/sdgsdf")) |  | ||||||
|  |  | ||||||
|         self.assertTrue(is_file_ext_supported(".pdf")) |         self.assertTrue(is_file_ext_supported(".pdf")) | ||||||
|         self.assertFalse(is_file_ext_supported(".hsdfh")) |         self.assertFalse(is_file_ext_supported(".hsdfh")) | ||||||
|         self.assertFalse(is_file_ext_supported("")) |         self.assertFalse(is_file_ext_supported("")) | ||||||
|   | |||||||
| @@ -109,6 +109,16 @@ def _parse_redis_url(env_redis: Optional[str]) -> Tuple[str]: | |||||||
|  |  | ||||||
|  |  | ||||||
| def _parse_beat_schedule() -> Dict: | def _parse_beat_schedule() -> Dict: | ||||||
|  |     """ | ||||||
|  |     Configures the scheduled tasks, according to default or | ||||||
|  |     environment variables.  Task expiration is configured so the task will | ||||||
|  |     expire (and not run), shortly before the default frequency will put another | ||||||
|  |     of the same task into the queue | ||||||
|  |  | ||||||
|  |  | ||||||
|  |     https://docs.celeryq.dev/en/stable/userguide/periodic-tasks.html#beat-entries | ||||||
|  |     https://docs.celeryq.dev/en/latest/userguide/calling.html#expiration | ||||||
|  |     """ | ||||||
|     schedule = {} |     schedule = {} | ||||||
|     tasks = [ |     tasks = [ | ||||||
|         { |         { | ||||||
| @@ -117,6 +127,11 @@ def _parse_beat_schedule() -> Dict: | |||||||
|             # Default every ten minutes |             # Default every ten minutes | ||||||
|             "env_default": "*/10 * * * *", |             "env_default": "*/10 * * * *", | ||||||
|             "task": "paperless_mail.tasks.process_mail_accounts", |             "task": "paperless_mail.tasks.process_mail_accounts", | ||||||
|  |             "options": { | ||||||
|  |                 # 1 minute before default schedule sends again | ||||||
|  |                 "expires": 9.0 | ||||||
|  |                 * 60.0, | ||||||
|  |             }, | ||||||
|         }, |         }, | ||||||
|         { |         { | ||||||
|             "name": "Train the classifier", |             "name": "Train the classifier", | ||||||
| @@ -124,6 +139,11 @@ def _parse_beat_schedule() -> Dict: | |||||||
|             # Default hourly at 5 minutes past the hour |             # Default hourly at 5 minutes past the hour | ||||||
|             "env_default": "5 */1 * * *", |             "env_default": "5 */1 * * *", | ||||||
|             "task": "documents.tasks.train_classifier", |             "task": "documents.tasks.train_classifier", | ||||||
|  |             "options": { | ||||||
|  |                 # 1 minute before default schedule sends again | ||||||
|  |                 "expires": 59.0 | ||||||
|  |                 * 60.0, | ||||||
|  |             }, | ||||||
|         }, |         }, | ||||||
|         { |         { | ||||||
|             "name": "Optimize the index", |             "name": "Optimize the index", | ||||||
| @@ -131,6 +151,12 @@ def _parse_beat_schedule() -> Dict: | |||||||
|             # Default daily at midnight |             # Default daily at midnight | ||||||
|             "env_default": "0 0 * * *", |             "env_default": "0 0 * * *", | ||||||
|             "task": "documents.tasks.index_optimize", |             "task": "documents.tasks.index_optimize", | ||||||
|  |             "options": { | ||||||
|  |                 # 1 hour before default schedule sends again | ||||||
|  |                 "expires": 23.0 | ||||||
|  |                 * 60.0 | ||||||
|  |                 * 60.0, | ||||||
|  |             }, | ||||||
|         }, |         }, | ||||||
|         { |         { | ||||||
|             "name": "Perform sanity check", |             "name": "Perform sanity check", | ||||||
| @@ -138,6 +164,12 @@ def _parse_beat_schedule() -> Dict: | |||||||
|             # Default Sunday at 00:30 |             # Default Sunday at 00:30 | ||||||
|             "env_default": "30 0 * * sun", |             "env_default": "30 0 * * sun", | ||||||
|             "task": "documents.tasks.sanity_check", |             "task": "documents.tasks.sanity_check", | ||||||
|  |             "options": { | ||||||
|  |                 # 1 hour before default schedule sends again | ||||||
|  |                 "expires": ((7.0 * 24.0) - 1.0) | ||||||
|  |                 * 60.0 | ||||||
|  |                 * 60.0, | ||||||
|  |             }, | ||||||
|         }, |         }, | ||||||
|     ] |     ] | ||||||
|     for task in tasks: |     for task in tasks: | ||||||
| @@ -151,9 +183,11 @@ def _parse_beat_schedule() -> Dict: | |||||||
|         #   - five time-and-date fields |         #   - five time-and-date fields | ||||||
|         #   - separated by at least one blank |         #   - separated by at least one blank | ||||||
|         minute, hour, day_month, month, day_week = value.split(" ") |         minute, hour, day_month, month, day_week = value.split(" ") | ||||||
|  |  | ||||||
|         schedule[task["name"]] = { |         schedule[task["name"]] = { | ||||||
|             "task": task["task"], |             "task": task["task"], | ||||||
|             "schedule": crontab(minute, hour, day_week, day_month, month), |             "schedule": crontab(minute, hour, day_week, day_month, month), | ||||||
|  |             "options": task["options"], | ||||||
|         } |         } | ||||||
|  |  | ||||||
|     return schedule |     return schedule | ||||||
| @@ -564,22 +598,21 @@ LOGGING = { | |||||||
| # Task queue                                                                  # | # Task queue                                                                  # | ||||||
| ############################################################################### | ############################################################################### | ||||||
|  |  | ||||||
| TASK_WORKERS = __get_int("PAPERLESS_TASK_WORKERS", 1) | # https://docs.celeryq.dev/en/stable/userguide/configuration.html | ||||||
|  |  | ||||||
| WORKER_TIMEOUT: Final[int] = __get_int("PAPERLESS_WORKER_TIMEOUT", 1800) |  | ||||||
|  |  | ||||||
| CELERY_BROKER_URL = _CELERY_REDIS_URL | CELERY_BROKER_URL = _CELERY_REDIS_URL | ||||||
| CELERY_TIMEZONE = TIME_ZONE | CELERY_TIMEZONE = TIME_ZONE | ||||||
|  |  | ||||||
| CELERY_WORKER_HIJACK_ROOT_LOGGER = False | CELERY_WORKER_HIJACK_ROOT_LOGGER = False | ||||||
| CELERY_WORKER_CONCURRENCY = TASK_WORKERS | CELERY_WORKER_CONCURRENCY: Final[int] = __get_int("PAPERLESS_TASK_WORKERS", 1) | ||||||
|  | TASK_WORKERS = CELERY_WORKER_CONCURRENCY | ||||||
| CELERY_WORKER_MAX_TASKS_PER_CHILD = 1 | CELERY_WORKER_MAX_TASKS_PER_CHILD = 1 | ||||||
| CELERY_WORKER_SEND_TASK_EVENTS = True | CELERY_WORKER_SEND_TASK_EVENTS = True | ||||||
|  | CELERY_TASK_SEND_SENT_EVENT = True | ||||||
| CELERY_SEND_TASK_SENT_EVENT = True | CELERY_SEND_TASK_SENT_EVENT = True | ||||||
|  |  | ||||||
| CELERY_TASK_TRACK_STARTED = True | CELERY_TASK_TRACK_STARTED = True | ||||||
| CELERY_TASK_TIME_LIMIT = WORKER_TIMEOUT | CELERY_TASK_TIME_LIMIT: Final[int] = __get_int("PAPERLESS_WORKER_TIMEOUT", 1800) | ||||||
|  |  | ||||||
| CELERY_RESULT_EXTENDED = True | CELERY_RESULT_EXTENDED = True | ||||||
| CELERY_RESULT_BACKEND = "django-db" | CELERY_RESULT_BACKEND = "django-db" | ||||||
| @@ -611,7 +644,7 @@ def default_threads_per_worker(task_workers) -> int: | |||||||
|  |  | ||||||
| THREADS_PER_WORKER = os.getenv( | THREADS_PER_WORKER = os.getenv( | ||||||
|     "PAPERLESS_THREADS_PER_WORKER", |     "PAPERLESS_THREADS_PER_WORKER", | ||||||
|     default_threads_per_worker(TASK_WORKERS), |     default_threads_per_worker(CELERY_WORKER_CONCURRENCY), | ||||||
| ) | ) | ||||||
|  |  | ||||||
| ############################################################################### | ############################################################################### | ||||||
|   | |||||||
| @@ -149,6 +149,11 @@ class TestRedisSocketConversion(TestCase): | |||||||
|  |  | ||||||
|  |  | ||||||
| class TestCeleryScheduleParsing(TestCase): | class TestCeleryScheduleParsing(TestCase): | ||||||
|  |     MAIL_EXPIRE_TIME = 9.0 * 60.0 | ||||||
|  |     CLASSIFIER_EXPIRE_TIME = 59.0 * 60.0 | ||||||
|  |     INDEX_EXPIRE_TIME = 23.0 * 60.0 * 60.0 | ||||||
|  |     SANITY_EXPIRE_TIME = ((7.0 * 24.0) - 1.0) * 60.0 * 60.0 | ||||||
|  |  | ||||||
|     def test_schedule_configuration_default(self): |     def test_schedule_configuration_default(self): | ||||||
|         """ |         """ | ||||||
|         GIVEN: |         GIVEN: | ||||||
| @@ -165,18 +170,22 @@ class TestCeleryScheduleParsing(TestCase): | |||||||
|                 "Check all e-mail accounts": { |                 "Check all e-mail accounts": { | ||||||
|                     "task": "paperless_mail.tasks.process_mail_accounts", |                     "task": "paperless_mail.tasks.process_mail_accounts", | ||||||
|                     "schedule": crontab(minute="*/10"), |                     "schedule": crontab(minute="*/10"), | ||||||
|  |                     "options": {"expires": self.MAIL_EXPIRE_TIME}, | ||||||
|                 }, |                 }, | ||||||
|                 "Train the classifier": { |                 "Train the classifier": { | ||||||
|                     "task": "documents.tasks.train_classifier", |                     "task": "documents.tasks.train_classifier", | ||||||
|                     "schedule": crontab(minute="5", hour="*/1"), |                     "schedule": crontab(minute="5", hour="*/1"), | ||||||
|  |                     "options": {"expires": self.CLASSIFIER_EXPIRE_TIME}, | ||||||
|                 }, |                 }, | ||||||
|                 "Optimize the index": { |                 "Optimize the index": { | ||||||
|                     "task": "documents.tasks.index_optimize", |                     "task": "documents.tasks.index_optimize", | ||||||
|                     "schedule": crontab(minute=0, hour=0), |                     "schedule": crontab(minute=0, hour=0), | ||||||
|  |                     "options": {"expires": self.INDEX_EXPIRE_TIME}, | ||||||
|                 }, |                 }, | ||||||
|                 "Perform sanity check": { |                 "Perform sanity check": { | ||||||
|                     "task": "documents.tasks.sanity_check", |                     "task": "documents.tasks.sanity_check", | ||||||
|                     "schedule": crontab(minute=30, hour=0, day_of_week="sun"), |                     "schedule": crontab(minute=30, hour=0, day_of_week="sun"), | ||||||
|  |                     "options": {"expires": self.SANITY_EXPIRE_TIME}, | ||||||
|                 }, |                 }, | ||||||
|             }, |             }, | ||||||
|             schedule, |             schedule, | ||||||
| @@ -203,18 +212,22 @@ class TestCeleryScheduleParsing(TestCase): | |||||||
|                 "Check all e-mail accounts": { |                 "Check all e-mail accounts": { | ||||||
|                     "task": "paperless_mail.tasks.process_mail_accounts", |                     "task": "paperless_mail.tasks.process_mail_accounts", | ||||||
|                     "schedule": crontab(minute="*/50", day_of_week="mon"), |                     "schedule": crontab(minute="*/50", day_of_week="mon"), | ||||||
|  |                     "options": {"expires": self.MAIL_EXPIRE_TIME}, | ||||||
|                 }, |                 }, | ||||||
|                 "Train the classifier": { |                 "Train the classifier": { | ||||||
|                     "task": "documents.tasks.train_classifier", |                     "task": "documents.tasks.train_classifier", | ||||||
|                     "schedule": crontab(minute="5", hour="*/1"), |                     "schedule": crontab(minute="5", hour="*/1"), | ||||||
|  |                     "options": {"expires": self.CLASSIFIER_EXPIRE_TIME}, | ||||||
|                 }, |                 }, | ||||||
|                 "Optimize the index": { |                 "Optimize the index": { | ||||||
|                     "task": "documents.tasks.index_optimize", |                     "task": "documents.tasks.index_optimize", | ||||||
|                     "schedule": crontab(minute=0, hour=0), |                     "schedule": crontab(minute=0, hour=0), | ||||||
|  |                     "options": {"expires": self.INDEX_EXPIRE_TIME}, | ||||||
|                 }, |                 }, | ||||||
|                 "Perform sanity check": { |                 "Perform sanity check": { | ||||||
|                     "task": "documents.tasks.sanity_check", |                     "task": "documents.tasks.sanity_check", | ||||||
|                     "schedule": crontab(minute=30, hour=0, day_of_week="sun"), |                     "schedule": crontab(minute=30, hour=0, day_of_week="sun"), | ||||||
|  |                     "options": {"expires": self.SANITY_EXPIRE_TIME}, | ||||||
|                 }, |                 }, | ||||||
|             }, |             }, | ||||||
|             schedule, |             schedule, | ||||||
| @@ -238,14 +251,17 @@ class TestCeleryScheduleParsing(TestCase): | |||||||
|                 "Check all e-mail accounts": { |                 "Check all e-mail accounts": { | ||||||
|                     "task": "paperless_mail.tasks.process_mail_accounts", |                     "task": "paperless_mail.tasks.process_mail_accounts", | ||||||
|                     "schedule": crontab(minute="*/10"), |                     "schedule": crontab(minute="*/10"), | ||||||
|  |                     "options": {"expires": self.MAIL_EXPIRE_TIME}, | ||||||
|                 }, |                 }, | ||||||
|                 "Train the classifier": { |                 "Train the classifier": { | ||||||
|                     "task": "documents.tasks.train_classifier", |                     "task": "documents.tasks.train_classifier", | ||||||
|                     "schedule": crontab(minute="5", hour="*/1"), |                     "schedule": crontab(minute="5", hour="*/1"), | ||||||
|  |                     "options": {"expires": self.CLASSIFIER_EXPIRE_TIME}, | ||||||
|                 }, |                 }, | ||||||
|                 "Perform sanity check": { |                 "Perform sanity check": { | ||||||
|                     "task": "documents.tasks.sanity_check", |                     "task": "documents.tasks.sanity_check", | ||||||
|                     "schedule": crontab(minute=30, hour=0, day_of_week="sun"), |                     "schedule": crontab(minute=30, hour=0, day_of_week="sun"), | ||||||
|  |                     "options": {"expires": self.SANITY_EXPIRE_TIME}, | ||||||
|                 }, |                 }, | ||||||
|             }, |             }, | ||||||
|             schedule, |             schedule, | ||||||
|   | |||||||
| @@ -14,15 +14,14 @@ TEST_CHANNEL_LAYERS = { | |||||||
| } | } | ||||||
|  |  | ||||||
|  |  | ||||||
|  | @override_settings(CHANNEL_LAYERS=TEST_CHANNEL_LAYERS) | ||||||
| class TestWebSockets(TestCase): | class TestWebSockets(TestCase): | ||||||
|     @override_settings(CHANNEL_LAYERS=TEST_CHANNEL_LAYERS) |  | ||||||
|     async def test_no_auth(self): |     async def test_no_auth(self): | ||||||
|         communicator = WebsocketCommunicator(application, "/ws/status/") |         communicator = WebsocketCommunicator(application, "/ws/status/") | ||||||
|         connected, subprotocol = await communicator.connect() |         connected, subprotocol = await communicator.connect() | ||||||
|         self.assertFalse(connected) |         self.assertFalse(connected) | ||||||
|         await communicator.disconnect() |         await communicator.disconnect() | ||||||
|  |  | ||||||
|     @override_settings(CHANNEL_LAYERS=TEST_CHANNEL_LAYERS) |  | ||||||
|     @mock.patch("paperless.consumers.StatusConsumer._authenticated") |     @mock.patch("paperless.consumers.StatusConsumer._authenticated") | ||||||
|     async def test_auth(self, _authenticated): |     async def test_auth(self, _authenticated): | ||||||
|         _authenticated.return_value = True |         _authenticated.return_value = True | ||||||
| @@ -33,7 +32,6 @@ class TestWebSockets(TestCase): | |||||||
|  |  | ||||||
|         await communicator.disconnect() |         await communicator.disconnect() | ||||||
|  |  | ||||||
|     @override_settings(CHANNEL_LAYERS=TEST_CHANNEL_LAYERS) |  | ||||||
|     @mock.patch("paperless.consumers.StatusConsumer._authenticated") |     @mock.patch("paperless.consumers.StatusConsumer._authenticated") | ||||||
|     async def test_receive(self, _authenticated): |     async def test_receive(self, _authenticated): | ||||||
|         _authenticated.return_value = True |         _authenticated.return_value = True | ||||||
|   | |||||||
| @@ -24,7 +24,7 @@ class StandardPagination(PageNumberPagination): | |||||||
|  |  | ||||||
|  |  | ||||||
| class FaviconView(View): | class FaviconView(View): | ||||||
|     def get(self, request, *args, **kwargs): |     def get(self, request, *args, **kwargs):  # pragma: nocover | ||||||
|         favicon = os.path.join( |         favicon = os.path.join( | ||||||
|             os.path.dirname(__file__), |             os.path.dirname(__file__), | ||||||
|             "static", |             "static", | ||||||
|   | |||||||
| @@ -2,12 +2,13 @@ from django.contrib.auth.models import User | |||||||
| from documents.models import Correspondent | from documents.models import Correspondent | ||||||
| from documents.models import DocumentType | from documents.models import DocumentType | ||||||
| from documents.models import Tag | from documents.models import Tag | ||||||
|  | from documents.tests.utils import DirectoriesMixin | ||||||
| from paperless_mail.models import MailAccount | from paperless_mail.models import MailAccount | ||||||
| from paperless_mail.models import MailRule | from paperless_mail.models import MailRule | ||||||
| from rest_framework.test import APITestCase | from rest_framework.test import APITestCase | ||||||
|  |  | ||||||
|  |  | ||||||
| class TestAPIMailAccounts(APITestCase): | class TestAPIMailAccounts(DirectoriesMixin, APITestCase): | ||||||
|     ENDPOINT = "/api/mail_accounts/" |     ENDPOINT = "/api/mail_accounts/" | ||||||
|  |  | ||||||
|     def setUp(self): |     def setUp(self): | ||||||
| @@ -165,7 +166,7 @@ class TestAPIMailAccounts(APITestCase): | |||||||
|         self.assertEqual(returned_account2.password, "123xyz") |         self.assertEqual(returned_account2.password, "123xyz") | ||||||
|  |  | ||||||
|  |  | ||||||
| class TestAPIMailRules(APITestCase): | class TestAPIMailRules(DirectoriesMixin, APITestCase): | ||||||
|     ENDPOINT = "/api/mail_rules/" |     ENDPOINT = "/api/mail_rules/" | ||||||
|  |  | ||||||
|     def setUp(self): |     def setUp(self): | ||||||
|   | |||||||
| @@ -161,7 +161,7 @@ class RasterisedDocumentParser(DocumentParser): | |||||||
|  |  | ||||||
|         except Exception: |         except Exception: | ||||||
|             # TODO catch all for various issues with PDFminer.six. |             # TODO catch all for various issues with PDFminer.six. | ||||||
|             #  If PDFminer fails, fall back to OCR. |             #  If pdftotext fails, fall back to OCR. | ||||||
|             self.log( |             self.log( | ||||||
|                 "warning", |                 "warning", | ||||||
|                 "Error while getting text from PDF document with " "pdfminer.six", |                 "Error while getting text from PDF document with " "pdfminer.six", | ||||||
|   | |||||||
| @@ -364,7 +364,7 @@ class TestParser(DirectoriesMixin, TestCase): | |||||||
|         ) |         ) | ||||||
|         self.assertTrue(os.path.isfile(parser.archive_path)) |         self.assertTrue(os.path.isfile(parser.archive_path)) | ||||||
|         self.assertContainsStrings(parser.get_text().lower(), ["page 1", "page 2"]) |         self.assertContainsStrings(parser.get_text().lower(), ["page 1", "page 2"]) | ||||||
|         self.assertFalse("page 3" in parser.get_text().lower()) |         self.assertNotIn("page 3", parser.get_text().lower()) | ||||||
|  |  | ||||||
|     @override_settings(OCR_PAGES=1, OCR_MODE="force") |     @override_settings(OCR_PAGES=1, OCR_MODE="force") | ||||||
|     def test_multi_page_analog_pages_force(self): |     def test_multi_page_analog_pages_force(self): | ||||||
| @@ -386,8 +386,8 @@ class TestParser(DirectoriesMixin, TestCase): | |||||||
|         ) |         ) | ||||||
|         self.assertTrue(os.path.isfile(parser.archive_path)) |         self.assertTrue(os.path.isfile(parser.archive_path)) | ||||||
|         self.assertContainsStrings(parser.get_text().lower(), ["page 1"]) |         self.assertContainsStrings(parser.get_text().lower(), ["page 1"]) | ||||||
|         self.assertFalse("page 2" in parser.get_text().lower()) |         self.assertNotIn("page 2", parser.get_text().lower()) | ||||||
|         self.assertFalse("page 3" in parser.get_text().lower()) |         self.assertNotIn("page 3", parser.get_text().lower()) | ||||||
|  |  | ||||||
|     @override_settings(OCR_MODE="skip_noarchive") |     @override_settings(OCR_MODE="skip_noarchive") | ||||||
|     def test_skip_noarchive_withtext(self): |     def test_skip_noarchive_withtext(self): | ||||||
| @@ -660,6 +660,15 @@ class TestParser(DirectoriesMixin, TestCase): | |||||||
|             params = parser.construct_ocrmypdf_parameters("", "", "", "") |             params = parser.construct_ocrmypdf_parameters("", "", "", "") | ||||||
|             self.assertNotIn("deskew", params) |             self.assertNotIn("deskew", params) | ||||||
|  |  | ||||||
|  |         with override_settings(OCR_MAX_IMAGE_PIXELS=1_000_001.0): | ||||||
|  |             params = parser.construct_ocrmypdf_parameters("", "", "", "") | ||||||
|  |             self.assertIn("max_image_mpixels", params) | ||||||
|  |             self.assertAlmostEqual(params["max_image_mpixels"], 1, places=4) | ||||||
|  |  | ||||||
|  |         with override_settings(OCR_MAX_IMAGE_PIXELS=-1_000_001.0): | ||||||
|  |             params = parser.construct_ocrmypdf_parameters("", "", "", "") | ||||||
|  |             self.assertNotIn("max_image_mpixels", params) | ||||||
|  |  | ||||||
|     def test_rtl_language_detection(self): |     def test_rtl_language_detection(self): | ||||||
|         """ |         """ | ||||||
|         GIVEN: |         GIVEN: | ||||||
|   | |||||||
| @@ -3,7 +3,9 @@ import os | |||||||
| from pathlib import Path | from pathlib import Path | ||||||
| from unittest import mock | from unittest import mock | ||||||
|  |  | ||||||
|  | from django.test import override_settings | ||||||
| from django.test import TestCase | from django.test import TestCase | ||||||
|  | from documents.parsers import ParseError | ||||||
| from paperless_tika.parsers import TikaDocumentParser | from paperless_tika.parsers import TikaDocumentParser | ||||||
| from requests import Response | from requests import Response | ||||||
|  |  | ||||||
| @@ -54,3 +56,63 @@ class TestTikaParser(TestCase): | |||||||
|  |  | ||||||
|         self.assertTrue("Creation-Date" in [m["key"] for m in metadata]) |         self.assertTrue("Creation-Date" in [m["key"] for m in metadata]) | ||||||
|         self.assertTrue("Some-key" in [m["key"] for m in metadata]) |         self.assertTrue("Some-key" in [m["key"] for m in metadata]) | ||||||
|  |  | ||||||
|  |     @mock.patch("paperless_tika.parsers.parser.from_file") | ||||||
|  |     @mock.patch("paperless_tika.parsers.requests.post") | ||||||
|  |     def test_convert_failure(self, post, from_file): | ||||||
|  |         """ | ||||||
|  |         GIVEN: | ||||||
|  |             - Document needs to be converted to PDF | ||||||
|  |         WHEN: | ||||||
|  |             - Gotenberg server returns an error | ||||||
|  |         THEN: | ||||||
|  |             - Parse error is raised | ||||||
|  |         """ | ||||||
|  |         from_file.return_value = { | ||||||
|  |             "content": "the content", | ||||||
|  |             "metadata": {"Creation-Date": "2020-11-21"}, | ||||||
|  |         } | ||||||
|  |         response = Response() | ||||||
|  |         response._content = b"PDF document" | ||||||
|  |         response.status_code = 500 | ||||||
|  |         post.return_value = response | ||||||
|  |  | ||||||
|  |         file = os.path.join(self.parser.tempdir, "input.odt") | ||||||
|  |         Path(file).touch() | ||||||
|  |  | ||||||
|  |         with self.assertRaises(ParseError): | ||||||
|  |             self.parser.convert_to_pdf(file, None) | ||||||
|  |  | ||||||
|  |     @mock.patch("paperless_tika.parsers.requests.post") | ||||||
|  |     def test_request_pdf_a_format(self, post: mock.Mock): | ||||||
|  |         """ | ||||||
|  |         GIVEN: | ||||||
|  |             - Document needs to be converted to PDF | ||||||
|  |         WHEN: | ||||||
|  |             - Specific PDF/A format requested | ||||||
|  |         THEN: | ||||||
|  |             - Request to Gotenberg contains the expected PDF/A format string | ||||||
|  |         """ | ||||||
|  |         file = os.path.join(self.parser.tempdir, "input.odt") | ||||||
|  |         Path(file).touch() | ||||||
|  |  | ||||||
|  |         response = Response() | ||||||
|  |         response._content = b"PDF document" | ||||||
|  |         response.status_code = 200 | ||||||
|  |         post.return_value = response | ||||||
|  |  | ||||||
|  |         for setting, expected_key in [ | ||||||
|  |             ("pdfa", "PDF/A-2b"), | ||||||
|  |             ("pdfa-2", "PDF/A-2b"), | ||||||
|  |             ("pdfa-1", "PDF/A-1a"), | ||||||
|  |             ("pdfa-3", "PDF/A-3b"), | ||||||
|  |         ]: | ||||||
|  |             with override_settings(OCR_OUTPUT_TYPE=setting): | ||||||
|  |                 self.parser.convert_to_pdf(file, None) | ||||||
|  |  | ||||||
|  |                 post.assert_called_once() | ||||||
|  |                 _, kwargs = post.call_args | ||||||
|  |  | ||||||
|  |                 self.assertEqual(kwargs["data"]["pdfFormat"], expected_key) | ||||||
|  |  | ||||||
|  |                 post.reset_mock() | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 shamoon
					shamoon