From 6730896894f3a6ee4a4f1a6c16c1e2e1936f9cf3 Mon Sep 17 00:00:00 2001 From: shamoon <4887959+shamoon@users.noreply.github.com> Date: Mon, 11 Aug 2025 10:15:30 -0700 Subject: [PATCH] Enhancement: support webhook restrictions (#10555) --- docs/configuration.md | 24 ++++ docs/usage.md | 4 + src/documents/signals/handlers.py | 62 ++++++++-- src/documents/tests/test_workflows.py | 168 ++++++++++++++++++++++++++ src/paperless/settings.py | 22 ++++ 5 files changed, 269 insertions(+), 11 deletions(-) diff --git a/docs/configuration.md b/docs/configuration.md index e77c65e04..fdc0097dd 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -1282,6 +1282,30 @@ within your documents. Defaults to false. +## Workflow webhooks + +#### [`PAPERLESS_WEBHOOKS_ALLOWED_SCHEMES=`](#PAPERLESS_WEBHOOKS_ALLOWED_SCHEMES) {#PAPERLESS_WEBHOOKS_ALLOWED_SCHEMES} + +: A comma-separated list of allowed schemes for webhooks. This setting +controls which URL schemes are permitted for webhook URLs. + + Defaults to `http,https`. + +#### [`PAPERLESS_WEBHOOKS_ALLOWED_PORTS=`](#PAPERLESS_WEBHOOKS_ALLOWED_PORTS) {#PAPERLESS_WEBHOOKS_ALLOWED_PORTS} + +: A comma-separated list of allowed ports for webhooks. This setting +controls which ports are permitted for webhook URLs. For example, if you +set this to `80,443`, webhooks will only be sent to URLs that use these +ports. + + Defaults to empty list, which allows all ports. + +#### [`PAPERLESS_WEBHOOKS_ALLOW_INTERNAL_REQUESTS=`](#PAPERLESS_WEBHOOKS_ALLOW_INTERNAL_REQUESTS) {#PAPERLESS_WEBHOOKS_ALLOW_INTERNAL_REQUESTS} + +: If set to false, webhooks cannot be sent to internal URLs (e.g., localhost). + + Defaults to true, which allows internal requests. + ### Polling {#polling} #### [`PAPERLESS_CONSUMER_POLLING=`](#PAPERLESS_CONSUMER_POLLING) {#PAPERLESS_CONSUMER_POLLING} diff --git a/docs/usage.md b/docs/usage.md index 6f332396d..73d3336ce 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -499,6 +499,10 @@ The following workflow action types are available: - Encoding for the request body, either JSON or form data - The request headers as key-value pairs +For security reasons, webhooks can be limited to specific ports and disallowed from connecting to local URLs. See the relevant +[configuration settings](configuration.md#workflow-webhooks) to change this behavior. If you are allowing non-admins to create workflows, +you may want to adjust these settings to prevent abuse. + #### Workflow placeholders Some workflow text can include placeholders but the available options differ depending on the type of diff --git a/src/documents/signals/handlers.py b/src/documents/signals/handlers.py index 76df8d4e4..505bfeeea 100644 --- a/src/documents/signals/handlers.py +++ b/src/documents/signals/handlers.py @@ -1,9 +1,12 @@ from __future__ import annotations +import ipaddress import logging import shutil +import socket from pathlib import Path from typing import TYPE_CHECKING +from urllib.parse import urlparse import httpx from celery import shared_task @@ -660,6 +663,28 @@ def run_workflows_updated(sender, document: Document, logging_group=None, **kwar ) +def _is_public_ip(ip: str) -> bool: + try: + obj = ipaddress.ip_address(ip) + return not ( + obj.is_private + or obj.is_loopback + or obj.is_link_local + or obj.is_multicast + or obj.is_unspecified + ) + except ValueError: # pragma: no cover + return False + + +def _resolve_first_ip(host: str) -> str | None: + try: + info = socket.getaddrinfo(host, None) + return info[0][4][0] if info else None + except Exception: # pragma: no cover + return None + + @shared_task( retry_backoff=True, autoretry_for=(httpx.HTTPStatusError,), @@ -674,11 +699,35 @@ def send_webhook( *, as_json: bool = False, ): + p = urlparse(url) + if p.scheme.lower() not in settings.WEBHOOKS_ALLOWED_SCHEMES or not p.hostname: + logger.warning("Webhook blocked: invalid scheme/hostname") + raise ValueError("Invalid URL scheme or hostname.") + + port = p.port or (443 if p.scheme == "https" else 80) + if ( + len(settings.WEBHOOKS_ALLOWED_PORTS) > 0 + and port not in settings.WEBHOOKS_ALLOWED_PORTS + ): + logger.warning("Webhook blocked: port not permitted") + raise ValueError("Destination port not permitted.") + + ip = _resolve_first_ip(p.hostname) + if not ip or ( + not _is_public_ip(ip) and not settings.WEBHOOKS_ALLOW_INTERNAL_REQUESTS + ): + logger.warning("Webhook blocked: destination not allowed") + raise ValueError("Destination host is not allowed.") + try: post_args = { "url": url, - "headers": headers, - "files": files, + "headers": { + k: v for k, v in (headers or {}).items() if k.lower() != "host" + }, + "files": files or None, + "timeout": 5.0, + "follow_redirects": False, } if as_json: post_args["json"] = data @@ -699,15 +748,6 @@ def send_webhook( ) raise e - logger.info( - f"Webhook sent to {url}", - ) - except Exception as e: - logger.error( - f"Failed attempt sending webhook to {url}: {e}", - ) - raise e - def run_workflows( trigger_type: WorkflowTrigger.WorkflowTriggerType, diff --git a/src/documents/tests/test_workflows.py b/src/documents/tests/test_workflows.py index 5aada761c..08bcc1f78 100644 --- a/src/documents/tests/test_workflows.py +++ b/src/documents/tests/test_workflows.py @@ -1,8 +1,10 @@ import shutil +import socket from datetime import timedelta from typing import TYPE_CHECKING from unittest import mock +import pytest from django.contrib.auth.models import Group from django.contrib.auth.models import User from django.test import override_settings @@ -10,6 +12,7 @@ from django.utils import timezone from guardian.shortcuts import assign_perm from guardian.shortcuts import get_groups_with_perms from guardian.shortcuts import get_users_with_perms +from httpx import HTTPError from httpx import HTTPStatusError from pytest_httpx import HTTPXMock from rest_framework.test import APITestCase @@ -2825,6 +2828,8 @@ class TestWorkflows( content="Test message", headers={}, files=None, + follow_redirects=False, + timeout=5, ) expected_str = "Webhook sent to http://paperless-ngx.com" @@ -2842,6 +2847,8 @@ class TestWorkflows( data={"message": "Test message"}, headers={}, files=None, + follow_redirects=False, + timeout=5, ) @mock.patch("httpx.post") @@ -2962,3 +2969,164 @@ class TestWebhookSend: as_json=True, ) assert httpx_mock.get_request().headers["Content-Type"] == "application/json" + + +@pytest.fixture +def resolve_to(monkeypatch): + """ + Force DNS resolution to a specific IP for any hostname. + """ + + def _set(ip: str): + def fake_getaddrinfo(host, *_args, **_kwargs): + return [(socket.AF_INET, None, None, "", (ip, 0))] + + monkeypatch.setattr(socket, "getaddrinfo", fake_getaddrinfo) + + return _set + + +class TestWebhookSecurity: + def test_blocks_invalid_scheme_or_hostname(self, httpx_mock: HTTPXMock): + """ + GIVEN: + - Invalid URL schemes or hostnames + WHEN: + - send_webhook is called with such URLs + THEN: + - ValueError is raised + """ + with pytest.raises(ValueError): + send_webhook( + "ftp://example.com", + data="", + headers={}, + files=None, + as_json=False, + ) + + with pytest.raises(ValueError): + send_webhook( + "http:///nohost", + data="", + headers={}, + files=None, + as_json=False, + ) + + @override_settings(WEBHOOKS_ALLOWED_PORTS=[80, 443]) + def test_blocks_disallowed_port(self, httpx_mock: HTTPXMock): + """ + GIVEN: + - URL with a disallowed port + WHEN: + - send_webhook is called with such URL + THEN: + - ValueError is raised + """ + with pytest.raises(ValueError): + send_webhook( + "http://paperless-ngx.com:8080", + data="", + headers={}, + files=None, + as_json=False, + ) + + assert httpx_mock.get_request() is None + + @override_settings(WEBHOOKS_ALLOW_INTERNAL_REQUESTS=False) + def test_blocks_private_loopback_linklocal(self, httpx_mock: HTTPXMock, resolve_to): + """ + GIVEN: + - URL with a private, loopback, or link-local IP address + - WEBHOOKS_ALLOW_INTERNAL_REQUESTS is False + WHEN: + - send_webhook is called with such URL + THEN: + - ValueError is raised + """ + resolve_to("127.0.0.1") + with pytest.raises(ValueError): + send_webhook( + "http://paperless-ngx.com", + data="", + headers={}, + files=None, + as_json=False, + ) + + def test_allows_public_ip_and_sends(self, httpx_mock: HTTPXMock, resolve_to): + """ + GIVEN: + - URL with a public IP address + WHEN: + - send_webhook is called with such URL + THEN: + - Request is sent successfully + """ + resolve_to("52.207.186.75") + httpx_mock.add_response(content=b"ok") + + send_webhook( + url="http://paperless-ngx.com", + data="hi", + headers={}, + files=None, + as_json=False, + ) + + req = httpx_mock.get_request() + assert req.url.host == "paperless-ngx.com" + + def test_follow_redirects_disabled(self, httpx_mock: HTTPXMock, resolve_to): + """ + GIVEN: + - A URL that redirects + WHEN: + - send_webhook is called with follow_redirects=False + THEN: + - Request is made to the original URL and does not follow the redirect + """ + resolve_to("52.207.186.75") + # Return a redirect and ensure we don't follow it (only one request recorded) + httpx_mock.add_response( + status_code=302, + headers={"location": "http://internal-service.local"}, + content=b"", + ) + + with pytest.raises(HTTPError): + send_webhook( + "http://paperless-ngx.com", + data="", + headers={}, + files=None, + as_json=False, + ) + + assert len(httpx_mock.get_requests()) == 1 + + def test_strips_user_supplied_host_header(self, httpx_mock: HTTPXMock, resolve_to): + """ + GIVEN: + - A URL with a user-supplied Host header + WHEN: + - send_webhook is called with a malicious Host header + THEN: + - The Host header is stripped and replaced with the resolved hostname + """ + resolve_to("52.207.186.75") + httpx_mock.add_response(content=b"ok") + + send_webhook( + url="http://paperless-ngx.com", + data="ok", + headers={"Host": "evil.test"}, + files=None, + as_json=False, + ) + + req = httpx_mock.get_request() + assert req.headers["Host"] == "paperless-ngx.com" + assert "evil.test" not in req.headers.get("Host", "") diff --git a/src/paperless/settings.py b/src/paperless/settings.py index 834376ec6..63b6add5c 100644 --- a/src/paperless/settings.py +++ b/src/paperless/settings.py @@ -1421,3 +1421,25 @@ OUTLOOK_OAUTH_ENABLED = bool( and OUTLOOK_OAUTH_CLIENT_ID and OUTLOOK_OAUTH_CLIENT_SECRET, ) + +############################################################################### +# Webhooks +############################################################################### +WEBHOOKS_ALLOWED_SCHEMES = set( + s.lower() + for s in __get_list( + "PAPERLESS_WEBHOOKS_ALLOWED_SCHEMES", + ["http", "https"], + ) +) +WEBHOOKS_ALLOWED_PORTS = set( + int(p) + for p in __get_list( + "PAPERLESS_WEBHOOKS_ALLOWED_PORTS", + [], + ) +) +WEBHOOKS_ALLOW_INTERNAL_REQUESTS = __get_boolean( + "PAPERLESS_WEBHOOKS_ALLOW_INTERNAL_REQUESTS", + "true", +)