Enhancement: support webhook restrictions (#10555)

This commit is contained in:
shamoon
2025-08-11 10:15:30 -07:00
committed by GitHub
parent 1bee1495cf
commit 6730896894
5 changed files with 269 additions and 11 deletions

View File

@@ -1282,6 +1282,30 @@ within your documents.
Defaults to false. Defaults to false.
## Workflow webhooks
#### [`PAPERLESS_WEBHOOKS_ALLOWED_SCHEMES=<str>`](#PAPERLESS_WEBHOOKS_ALLOWED_SCHEMES) {#PAPERLESS_WEBHOOKS_ALLOWED_SCHEMES}
: A comma-separated list of allowed schemes for webhooks. This setting
controls which URL schemes are permitted for webhook URLs.
Defaults to `http,https`.
#### [`PAPERLESS_WEBHOOKS_ALLOWED_PORTS=<str>`](#PAPERLESS_WEBHOOKS_ALLOWED_PORTS) {#PAPERLESS_WEBHOOKS_ALLOWED_PORTS}
: A comma-separated list of allowed ports for webhooks. This setting
controls which ports are permitted for webhook URLs. For example, if you
set this to `80,443`, webhooks will only be sent to URLs that use these
ports.
Defaults to empty list, which allows all ports.
#### [`PAPERLESS_WEBHOOKS_ALLOW_INTERNAL_REQUESTS=<bool>`](#PAPERLESS_WEBHOOKS_ALLOW_INTERNAL_REQUESTS) {#PAPERLESS_WEBHOOKS_ALLOW_INTERNAL_REQUESTS}
: If set to false, webhooks cannot be sent to internal URLs (e.g., localhost).
Defaults to true, which allows internal requests.
### Polling {#polling} ### Polling {#polling}
#### [`PAPERLESS_CONSUMER_POLLING=<num>`](#PAPERLESS_CONSUMER_POLLING) {#PAPERLESS_CONSUMER_POLLING} #### [`PAPERLESS_CONSUMER_POLLING=<num>`](#PAPERLESS_CONSUMER_POLLING) {#PAPERLESS_CONSUMER_POLLING}

View File

@@ -499,6 +499,10 @@ The following workflow action types are available:
- Encoding for the request body, either JSON or form data - Encoding for the request body, either JSON or form data
- The request headers as key-value pairs - The request headers as key-value pairs
For security reasons, webhooks can be limited to specific ports and disallowed from connecting to local URLs. See the relevant
[configuration settings](configuration.md#workflow-webhooks) to change this behavior. If you are allowing non-admins to create workflows,
you may want to adjust these settings to prevent abuse.
#### Workflow placeholders #### Workflow placeholders
Some workflow text can include placeholders but the available options differ depending on the type of Some workflow text can include placeholders but the available options differ depending on the type of

View File

@@ -1,9 +1,12 @@
from __future__ import annotations from __future__ import annotations
import ipaddress
import logging import logging
import shutil import shutil
import socket
from pathlib import Path from pathlib import Path
from typing import TYPE_CHECKING from typing import TYPE_CHECKING
from urllib.parse import urlparse
import httpx import httpx
from celery import shared_task from celery import shared_task
@@ -660,6 +663,28 @@ def run_workflows_updated(sender, document: Document, logging_group=None, **kwar
) )
def _is_public_ip(ip: str) -> bool:
try:
obj = ipaddress.ip_address(ip)
return not (
obj.is_private
or obj.is_loopback
or obj.is_link_local
or obj.is_multicast
or obj.is_unspecified
)
except ValueError: # pragma: no cover
return False
def _resolve_first_ip(host: str) -> str | None:
try:
info = socket.getaddrinfo(host, None)
return info[0][4][0] if info else None
except Exception: # pragma: no cover
return None
@shared_task( @shared_task(
retry_backoff=True, retry_backoff=True,
autoretry_for=(httpx.HTTPStatusError,), autoretry_for=(httpx.HTTPStatusError,),
@@ -674,11 +699,35 @@ def send_webhook(
*, *,
as_json: bool = False, as_json: bool = False,
): ):
p = urlparse(url)
if p.scheme.lower() not in settings.WEBHOOKS_ALLOWED_SCHEMES or not p.hostname:
logger.warning("Webhook blocked: invalid scheme/hostname")
raise ValueError("Invalid URL scheme or hostname.")
port = p.port or (443 if p.scheme == "https" else 80)
if (
len(settings.WEBHOOKS_ALLOWED_PORTS) > 0
and port not in settings.WEBHOOKS_ALLOWED_PORTS
):
logger.warning("Webhook blocked: port not permitted")
raise ValueError("Destination port not permitted.")
ip = _resolve_first_ip(p.hostname)
if not ip or (
not _is_public_ip(ip) and not settings.WEBHOOKS_ALLOW_INTERNAL_REQUESTS
):
logger.warning("Webhook blocked: destination not allowed")
raise ValueError("Destination host is not allowed.")
try: try:
post_args = { post_args = {
"url": url, "url": url,
"headers": headers, "headers": {
"files": files, k: v for k, v in (headers or {}).items() if k.lower() != "host"
},
"files": files or None,
"timeout": 5.0,
"follow_redirects": False,
} }
if as_json: if as_json:
post_args["json"] = data post_args["json"] = data
@@ -699,15 +748,6 @@ def send_webhook(
) )
raise e raise e
logger.info(
f"Webhook sent to {url}",
)
except Exception as e:
logger.error(
f"Failed attempt sending webhook to {url}: {e}",
)
raise e
def run_workflows( def run_workflows(
trigger_type: WorkflowTrigger.WorkflowTriggerType, trigger_type: WorkflowTrigger.WorkflowTriggerType,

View File

@@ -1,8 +1,10 @@
import shutil import shutil
import socket
from datetime import timedelta from datetime import timedelta
from typing import TYPE_CHECKING from typing import TYPE_CHECKING
from unittest import mock from unittest import mock
import pytest
from django.contrib.auth.models import Group from django.contrib.auth.models import Group
from django.contrib.auth.models import User from django.contrib.auth.models import User
from django.test import override_settings from django.test import override_settings
@@ -10,6 +12,7 @@ from django.utils import timezone
from guardian.shortcuts import assign_perm from guardian.shortcuts import assign_perm
from guardian.shortcuts import get_groups_with_perms from guardian.shortcuts import get_groups_with_perms
from guardian.shortcuts import get_users_with_perms from guardian.shortcuts import get_users_with_perms
from httpx import HTTPError
from httpx import HTTPStatusError from httpx import HTTPStatusError
from pytest_httpx import HTTPXMock from pytest_httpx import HTTPXMock
from rest_framework.test import APITestCase from rest_framework.test import APITestCase
@@ -2825,6 +2828,8 @@ class TestWorkflows(
content="Test message", content="Test message",
headers={}, headers={},
files=None, files=None,
follow_redirects=False,
timeout=5,
) )
expected_str = "Webhook sent to http://paperless-ngx.com" expected_str = "Webhook sent to http://paperless-ngx.com"
@@ -2842,6 +2847,8 @@ class TestWorkflows(
data={"message": "Test message"}, data={"message": "Test message"},
headers={}, headers={},
files=None, files=None,
follow_redirects=False,
timeout=5,
) )
@mock.patch("httpx.post") @mock.patch("httpx.post")
@@ -2962,3 +2969,164 @@ class TestWebhookSend:
as_json=True, as_json=True,
) )
assert httpx_mock.get_request().headers["Content-Type"] == "application/json" assert httpx_mock.get_request().headers["Content-Type"] == "application/json"
@pytest.fixture
def resolve_to(monkeypatch):
"""
Force DNS resolution to a specific IP for any hostname.
"""
def _set(ip: str):
def fake_getaddrinfo(host, *_args, **_kwargs):
return [(socket.AF_INET, None, None, "", (ip, 0))]
monkeypatch.setattr(socket, "getaddrinfo", fake_getaddrinfo)
return _set
class TestWebhookSecurity:
def test_blocks_invalid_scheme_or_hostname(self, httpx_mock: HTTPXMock):
"""
GIVEN:
- Invalid URL schemes or hostnames
WHEN:
- send_webhook is called with such URLs
THEN:
- ValueError is raised
"""
with pytest.raises(ValueError):
send_webhook(
"ftp://example.com",
data="",
headers={},
files=None,
as_json=False,
)
with pytest.raises(ValueError):
send_webhook(
"http:///nohost",
data="",
headers={},
files=None,
as_json=False,
)
@override_settings(WEBHOOKS_ALLOWED_PORTS=[80, 443])
def test_blocks_disallowed_port(self, httpx_mock: HTTPXMock):
"""
GIVEN:
- URL with a disallowed port
WHEN:
- send_webhook is called with such URL
THEN:
- ValueError is raised
"""
with pytest.raises(ValueError):
send_webhook(
"http://paperless-ngx.com:8080",
data="",
headers={},
files=None,
as_json=False,
)
assert httpx_mock.get_request() is None
@override_settings(WEBHOOKS_ALLOW_INTERNAL_REQUESTS=False)
def test_blocks_private_loopback_linklocal(self, httpx_mock: HTTPXMock, resolve_to):
"""
GIVEN:
- URL with a private, loopback, or link-local IP address
- WEBHOOKS_ALLOW_INTERNAL_REQUESTS is False
WHEN:
- send_webhook is called with such URL
THEN:
- ValueError is raised
"""
resolve_to("127.0.0.1")
with pytest.raises(ValueError):
send_webhook(
"http://paperless-ngx.com",
data="",
headers={},
files=None,
as_json=False,
)
def test_allows_public_ip_and_sends(self, httpx_mock: HTTPXMock, resolve_to):
"""
GIVEN:
- URL with a public IP address
WHEN:
- send_webhook is called with such URL
THEN:
- Request is sent successfully
"""
resolve_to("52.207.186.75")
httpx_mock.add_response(content=b"ok")
send_webhook(
url="http://paperless-ngx.com",
data="hi",
headers={},
files=None,
as_json=False,
)
req = httpx_mock.get_request()
assert req.url.host == "paperless-ngx.com"
def test_follow_redirects_disabled(self, httpx_mock: HTTPXMock, resolve_to):
"""
GIVEN:
- A URL that redirects
WHEN:
- send_webhook is called with follow_redirects=False
THEN:
- Request is made to the original URL and does not follow the redirect
"""
resolve_to("52.207.186.75")
# Return a redirect and ensure we don't follow it (only one request recorded)
httpx_mock.add_response(
status_code=302,
headers={"location": "http://internal-service.local"},
content=b"",
)
with pytest.raises(HTTPError):
send_webhook(
"http://paperless-ngx.com",
data="",
headers={},
files=None,
as_json=False,
)
assert len(httpx_mock.get_requests()) == 1
def test_strips_user_supplied_host_header(self, httpx_mock: HTTPXMock, resolve_to):
"""
GIVEN:
- A URL with a user-supplied Host header
WHEN:
- send_webhook is called with a malicious Host header
THEN:
- The Host header is stripped and replaced with the resolved hostname
"""
resolve_to("52.207.186.75")
httpx_mock.add_response(content=b"ok")
send_webhook(
url="http://paperless-ngx.com",
data="ok",
headers={"Host": "evil.test"},
files=None,
as_json=False,
)
req = httpx_mock.get_request()
assert req.headers["Host"] == "paperless-ngx.com"
assert "evil.test" not in req.headers.get("Host", "")

View File

@@ -1421,3 +1421,25 @@ OUTLOOK_OAUTH_ENABLED = bool(
and OUTLOOK_OAUTH_CLIENT_ID and OUTLOOK_OAUTH_CLIENT_ID
and OUTLOOK_OAUTH_CLIENT_SECRET, and OUTLOOK_OAUTH_CLIENT_SECRET,
) )
###############################################################################
# Webhooks
###############################################################################
WEBHOOKS_ALLOWED_SCHEMES = set(
s.lower()
for s in __get_list(
"PAPERLESS_WEBHOOKS_ALLOWED_SCHEMES",
["http", "https"],
)
)
WEBHOOKS_ALLOWED_PORTS = set(
int(p)
for p in __get_list(
"PAPERLESS_WEBHOOKS_ALLOWED_PORTS",
[],
)
)
WEBHOOKS_ALLOW_INTERNAL_REQUESTS = __get_boolean(
"PAPERLESS_WEBHOOKS_ALLOW_INTERNAL_REQUESTS",
"true",
)