From cedacc64e610d3ae05adb2e4131fe6a9380fa24a Mon Sep 17 00:00:00 2001 From: Trenton H <797416+stumpylog@users.noreply.github.com> Date: Sun, 16 Feb 2025 10:58:05 -0800 Subject: [PATCH] Creates url_validator to allow only http or https schemes for the webhook url --- src/documents/serialisers.py | 3 +- src/documents/tests/test_api_workflows.py | 57 ++++++++++++----------- src/documents/validators.py | 39 ++++++++++++++-- 3 files changed, 68 insertions(+), 31 deletions(-) diff --git a/src/documents/serialisers.py b/src/documents/serialisers.py index b62e8dd48..84894bff1 100644 --- a/src/documents/serialisers.py +++ b/src/documents/serialisers.py @@ -58,6 +58,7 @@ from documents.permissions import set_permissions_for_object from documents.templating.filepath import validate_filepath_template_and_render from documents.templating.utils import convert_format_str_to_template_format from documents.validators import uri_validator +from documents.validators import url_validator logger = logging.getLogger("paperless.serializers") @@ -1950,7 +1951,7 @@ class WorkflowActionWebhookSerializer(serializers.ModelSerializer): id = serializers.IntegerField(allow_null=True, required=False) def validate_url(self, url): - uri_validator(url) + url_validator(url) return url class Meta: diff --git a/src/documents/tests/test_api_workflows.py b/src/documents/tests/test_api_workflows.py index 40ecaca86..4aa3a81a6 100644 --- a/src/documents/tests/test_api_workflows.py +++ b/src/documents/tests/test_api_workflows.py @@ -598,30 +598,35 @@ class TestApiWorkflows(DirectoriesMixin, APITestCase): THEN: - Correct HTTP response """ - response = self.client.post( - self.ENDPOINT, - json.dumps( - { - "name": "Workflow 2", - "order": 1, - "triggers": [ - { - "type": WorkflowTrigger.WorkflowTriggerType.CONSUMPTION, - "sources": [DocumentSource.ApiUpload], - "filter_filename": "*", - }, - ], - "actions": [ - { - "type": WorkflowAction.WorkflowActionType.WEBHOOK, - "webhook": { - "url": "https://examplewithouttld:3000/path", - "include_document": False, + + for url, expected_resp_code in [ + ("https://examplewithouttld:3000/path", status.HTTP_201_CREATED), + ("file:///etc/passwd/path", status.HTTP_400_BAD_REQUEST), + ]: + response = self.client.post( + self.ENDPOINT, + json.dumps( + { + "name": "Workflow 2", + "order": 1, + "triggers": [ + { + "type": WorkflowTrigger.WorkflowTriggerType.CONSUMPTION, + "sources": [DocumentSource.ApiUpload], + "filter_filename": "*", }, - }, - ], - }, - ), - content_type="application/json", - ) - self.assertEqual(response.status_code, status.HTTP_201_CREATED) + ], + "actions": [ + { + "type": WorkflowAction.WorkflowActionType.WEBHOOK, + "webhook": { + "url": url, + "include_document": False, + }, + }, + ], + }, + ), + content_type="application/json", + ) + self.assertEqual(response.status_code, expected_resp_code) diff --git a/src/documents/validators.py b/src/documents/validators.py index 0ebf15697..bec7252bf 100644 --- a/src/documents/validators.py +++ b/src/documents/validators.py @@ -4,11 +4,18 @@ from django.core.exceptions import ValidationError from django.utils.translation import gettext_lazy as _ -def uri_validator(value) -> None: +def uri_validator(value: str, allowed_schemes: set[str] | None = None) -> None: """ - Raises a ValidationError if the given value does not parse as an - URI looking thing, which we're defining as a scheme and either network - location or path value + Validates that the given value parses as a URI with required components + and optionally restricts to specific schemes. + + Args: + value: The URI string to validate + allowed_schemes: Optional set/list of allowed schemes (e.g. {'http', 'https'}). + If None, all schemes are allowed. + + Raises: + ValidationError: If the URI is invalid or uses a disallowed scheme """ try: parts = urlparse(value) @@ -22,8 +29,32 @@ def uri_validator(value) -> None: _(f"Unable to parse URI {value}, missing net location or path"), params={"value": value}, ) + + if allowed_schemes and parts.scheme not in allowed_schemes: + raise ValidationError( + _( + f"URI scheme '{parts.scheme}' is not allowed. Allowed schemes: {', '.join(allowed_schemes)}", + ), + params={"value": value, "scheme": parts.scheme}, + ) + + except ValidationError: + raise except Exception as e: raise ValidationError( _(f"Unable to parse URI {value}"), params={"value": value}, ) from e + + +def url_validator(value) -> None: + """ + Validates that the given value is a valid HTTP or HTTPS URL. + + Args: + value: The URL string to validate + + Raises: + ValidationError: If the URL is invalid or not using http/https scheme + """ + uri_validator(value, allowed_schemes={"http", "https"})