Enhancement: support webhook restrictions (#10555)

This commit is contained in:
shamoon
2025-08-11 10:15:30 -07:00
committed by GitHub
parent 1bee1495cf
commit 6730896894
5 changed files with 269 additions and 11 deletions

View File

@@ -1282,6 +1282,30 @@ within your documents.
Defaults to false.
## Workflow webhooks
#### [`PAPERLESS_WEBHOOKS_ALLOWED_SCHEMES=<str>`](#PAPERLESS_WEBHOOKS_ALLOWED_SCHEMES) {#PAPERLESS_WEBHOOKS_ALLOWED_SCHEMES}
: A comma-separated list of allowed schemes for webhooks. This setting
controls which URL schemes are permitted for webhook URLs.
Defaults to `http,https`.
#### [`PAPERLESS_WEBHOOKS_ALLOWED_PORTS=<str>`](#PAPERLESS_WEBHOOKS_ALLOWED_PORTS) {#PAPERLESS_WEBHOOKS_ALLOWED_PORTS}
: A comma-separated list of allowed ports for webhooks. This setting
controls which ports are permitted for webhook URLs. For example, if you
set this to `80,443`, webhooks will only be sent to URLs that use these
ports.
Defaults to empty list, which allows all ports.
#### [`PAPERLESS_WEBHOOKS_ALLOW_INTERNAL_REQUESTS=<bool>`](#PAPERLESS_WEBHOOKS_ALLOW_INTERNAL_REQUESTS) {#PAPERLESS_WEBHOOKS_ALLOW_INTERNAL_REQUESTS}
: If set to false, webhooks cannot be sent to internal URLs (e.g., localhost).
Defaults to true, which allows internal requests.
### Polling {#polling}
#### [`PAPERLESS_CONSUMER_POLLING=<num>`](#PAPERLESS_CONSUMER_POLLING) {#PAPERLESS_CONSUMER_POLLING}

View File

@@ -499,6 +499,10 @@ The following workflow action types are available:
- Encoding for the request body, either JSON or form data
- The request headers as key-value pairs
For security reasons, webhooks can be limited to specific ports and disallowed from connecting to local URLs. See the relevant
[configuration settings](configuration.md#workflow-webhooks) to change this behavior. If you are allowing non-admins to create workflows,
you may want to adjust these settings to prevent abuse.
#### Workflow placeholders
Some workflow text can include placeholders but the available options differ depending on the type of

View File

@@ -1,9 +1,12 @@
from __future__ import annotations
import ipaddress
import logging
import shutil
import socket
from pathlib import Path
from typing import TYPE_CHECKING
from urllib.parse import urlparse
import httpx
from celery import shared_task
@@ -660,6 +663,28 @@ def run_workflows_updated(sender, document: Document, logging_group=None, **kwar
)
def _is_public_ip(ip: str) -> bool:
try:
obj = ipaddress.ip_address(ip)
return not (
obj.is_private
or obj.is_loopback
or obj.is_link_local
or obj.is_multicast
or obj.is_unspecified
)
except ValueError: # pragma: no cover
return False
def _resolve_first_ip(host: str) -> str | None:
try:
info = socket.getaddrinfo(host, None)
return info[0][4][0] if info else None
except Exception: # pragma: no cover
return None
@shared_task(
retry_backoff=True,
autoretry_for=(httpx.HTTPStatusError,),
@@ -674,11 +699,35 @@ def send_webhook(
*,
as_json: bool = False,
):
p = urlparse(url)
if p.scheme.lower() not in settings.WEBHOOKS_ALLOWED_SCHEMES or not p.hostname:
logger.warning("Webhook blocked: invalid scheme/hostname")
raise ValueError("Invalid URL scheme or hostname.")
port = p.port or (443 if p.scheme == "https" else 80)
if (
len(settings.WEBHOOKS_ALLOWED_PORTS) > 0
and port not in settings.WEBHOOKS_ALLOWED_PORTS
):
logger.warning("Webhook blocked: port not permitted")
raise ValueError("Destination port not permitted.")
ip = _resolve_first_ip(p.hostname)
if not ip or (
not _is_public_ip(ip) and not settings.WEBHOOKS_ALLOW_INTERNAL_REQUESTS
):
logger.warning("Webhook blocked: destination not allowed")
raise ValueError("Destination host is not allowed.")
try:
post_args = {
"url": url,
"headers": headers,
"files": files,
"headers": {
k: v for k, v in (headers or {}).items() if k.lower() != "host"
},
"files": files or None,
"timeout": 5.0,
"follow_redirects": False,
}
if as_json:
post_args["json"] = data
@@ -699,15 +748,6 @@ def send_webhook(
)
raise e
logger.info(
f"Webhook sent to {url}",
)
except Exception as e:
logger.error(
f"Failed attempt sending webhook to {url}: {e}",
)
raise e
def run_workflows(
trigger_type: WorkflowTrigger.WorkflowTriggerType,

View File

@@ -1,8 +1,10 @@
import shutil
import socket
from datetime import timedelta
from typing import TYPE_CHECKING
from unittest import mock
import pytest
from django.contrib.auth.models import Group
from django.contrib.auth.models import User
from django.test import override_settings
@@ -10,6 +12,7 @@ from django.utils import timezone
from guardian.shortcuts import assign_perm
from guardian.shortcuts import get_groups_with_perms
from guardian.shortcuts import get_users_with_perms
from httpx import HTTPError
from httpx import HTTPStatusError
from pytest_httpx import HTTPXMock
from rest_framework.test import APITestCase
@@ -2825,6 +2828,8 @@ class TestWorkflows(
content="Test message",
headers={},
files=None,
follow_redirects=False,
timeout=5,
)
expected_str = "Webhook sent to http://paperless-ngx.com"
@@ -2842,6 +2847,8 @@ class TestWorkflows(
data={"message": "Test message"},
headers={},
files=None,
follow_redirects=False,
timeout=5,
)
@mock.patch("httpx.post")
@@ -2962,3 +2969,164 @@ class TestWebhookSend:
as_json=True,
)
assert httpx_mock.get_request().headers["Content-Type"] == "application/json"
@pytest.fixture
def resolve_to(monkeypatch):
"""
Force DNS resolution to a specific IP for any hostname.
"""
def _set(ip: str):
def fake_getaddrinfo(host, *_args, **_kwargs):
return [(socket.AF_INET, None, None, "", (ip, 0))]
monkeypatch.setattr(socket, "getaddrinfo", fake_getaddrinfo)
return _set
class TestWebhookSecurity:
def test_blocks_invalid_scheme_or_hostname(self, httpx_mock: HTTPXMock):
"""
GIVEN:
- Invalid URL schemes or hostnames
WHEN:
- send_webhook is called with such URLs
THEN:
- ValueError is raised
"""
with pytest.raises(ValueError):
send_webhook(
"ftp://example.com",
data="",
headers={},
files=None,
as_json=False,
)
with pytest.raises(ValueError):
send_webhook(
"http:///nohost",
data="",
headers={},
files=None,
as_json=False,
)
@override_settings(WEBHOOKS_ALLOWED_PORTS=[80, 443])
def test_blocks_disallowed_port(self, httpx_mock: HTTPXMock):
"""
GIVEN:
- URL with a disallowed port
WHEN:
- send_webhook is called with such URL
THEN:
- ValueError is raised
"""
with pytest.raises(ValueError):
send_webhook(
"http://paperless-ngx.com:8080",
data="",
headers={},
files=None,
as_json=False,
)
assert httpx_mock.get_request() is None
@override_settings(WEBHOOKS_ALLOW_INTERNAL_REQUESTS=False)
def test_blocks_private_loopback_linklocal(self, httpx_mock: HTTPXMock, resolve_to):
"""
GIVEN:
- URL with a private, loopback, or link-local IP address
- WEBHOOKS_ALLOW_INTERNAL_REQUESTS is False
WHEN:
- send_webhook is called with such URL
THEN:
- ValueError is raised
"""
resolve_to("127.0.0.1")
with pytest.raises(ValueError):
send_webhook(
"http://paperless-ngx.com",
data="",
headers={},
files=None,
as_json=False,
)
def test_allows_public_ip_and_sends(self, httpx_mock: HTTPXMock, resolve_to):
"""
GIVEN:
- URL with a public IP address
WHEN:
- send_webhook is called with such URL
THEN:
- Request is sent successfully
"""
resolve_to("52.207.186.75")
httpx_mock.add_response(content=b"ok")
send_webhook(
url="http://paperless-ngx.com",
data="hi",
headers={},
files=None,
as_json=False,
)
req = httpx_mock.get_request()
assert req.url.host == "paperless-ngx.com"
def test_follow_redirects_disabled(self, httpx_mock: HTTPXMock, resolve_to):
"""
GIVEN:
- A URL that redirects
WHEN:
- send_webhook is called with follow_redirects=False
THEN:
- Request is made to the original URL and does not follow the redirect
"""
resolve_to("52.207.186.75")
# Return a redirect and ensure we don't follow it (only one request recorded)
httpx_mock.add_response(
status_code=302,
headers={"location": "http://internal-service.local"},
content=b"",
)
with pytest.raises(HTTPError):
send_webhook(
"http://paperless-ngx.com",
data="",
headers={},
files=None,
as_json=False,
)
assert len(httpx_mock.get_requests()) == 1
def test_strips_user_supplied_host_header(self, httpx_mock: HTTPXMock, resolve_to):
"""
GIVEN:
- A URL with a user-supplied Host header
WHEN:
- send_webhook is called with a malicious Host header
THEN:
- The Host header is stripped and replaced with the resolved hostname
"""
resolve_to("52.207.186.75")
httpx_mock.add_response(content=b"ok")
send_webhook(
url="http://paperless-ngx.com",
data="ok",
headers={"Host": "evil.test"},
files=None,
as_json=False,
)
req = httpx_mock.get_request()
assert req.headers["Host"] == "paperless-ngx.com"
assert "evil.test" not in req.headers.get("Host", "")

View File

@@ -1421,3 +1421,25 @@ OUTLOOK_OAUTH_ENABLED = bool(
and OUTLOOK_OAUTH_CLIENT_ID
and OUTLOOK_OAUTH_CLIENT_SECRET,
)
###############################################################################
# Webhooks
###############################################################################
WEBHOOKS_ALLOWED_SCHEMES = set(
s.lower()
for s in __get_list(
"PAPERLESS_WEBHOOKS_ALLOWED_SCHEMES",
["http", "https"],
)
)
WEBHOOKS_ALLOWED_PORTS = set(
int(p)
for p in __get_list(
"PAPERLESS_WEBHOOKS_ALLOWED_PORTS",
[],
)
)
WEBHOOKS_ALLOW_INTERNAL_REQUESTS = __get_boolean(
"PAPERLESS_WEBHOOKS_ALLOW_INTERNAL_REQUESTS",
"true",
)