Fix: Expanded SVG validation whitelist and additional checks (#11590)

2026-01-30 23:08:59 -06:00 · 2025-12-12 12:04:04 -08:00
parent a1026f03db
commit d9a596d67a
4 changed files with 516 additions and 86 deletions
--- a/src/documents/tests/samples/malicious.svg
+++ b/src/documents/tests/samples/malicious.svg
@@ -1,4 +0,0 @@
-<svg xmlns="http://www.w3.org/2000/svg" width="100" height="100">
-  <text x="10" y="20">Hello</text>
-  <script>alert('XSS')</script>
-</svg>
--- a/src/documents/tests/test_api_app_config.py
+++ b/src/documents/tests/test_api_app_config.py
@@ -1,4 +1,5 @@
 import json
+from io import BytesIO
 from pathlib import Path

 from django.contrib.auth.models import User
@@ -199,17 +200,337 @@ class TestApiAppConfig(DirectoriesMixin, APITestCase):
        THEN:
            - SVG is rejected with 400
        """
-        path = Path(__file__).parent / "samples" / "malicious.svg"
-        with path.open("rb") as f:
+        malicious_svg = b"""<svg xmlns="http://www.w3.org/2000/svg" width="100" height="100">
+                            <text x="10" y="20">Hello</text>
+                            <script>alert('XSS')</script>
+                            </svg>
+                        """
+
+        svg_file = BytesIO(malicious_svg)
+        svg_file.name = "malicious_script.svg"
+
        response = self.client.patch(
            f"{self.ENDPOINT}1/",
-                {"app_logo": f},
+            {"app_logo": svg_file},
            format="multipart",
        )

+        self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
+        self.assertIn("disallowed svg tag", str(response.data).lower())
+
+    def test_api_rejects_malicious_svg_with_style_javascript(self):
+        """
+        GIVEN:
+            - An SVG logo containing javascript: in style attribute
+        WHEN:
+            - Uploaded via PATCH to app config
+        THEN:
+            - SVG is rejected with 400
+        """
+
+        malicious_svg = b"""<?xml version="1.0" encoding="UTF-8"?>
+    <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 100 100">
+        <rect width="100" height="100" style="background: url(javascript:alert('XSS'));" fill="red"/>
+    </svg>"""
+
+        svg_file = BytesIO(malicious_svg)
+        svg_file.name = "malicious_style.svg"
+
+        response = self.client.patch(
+            f"{self.ENDPOINT}1/",
+            {"app_logo": svg_file},
+            format="multipart",
+        )
+        self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
+        self.assertIn(
+            "disallowed pattern in style attribute",
+            str(response.data).lower(),
+        )
+        self.assertIn("style", str(response.data).lower())
+
+    def test_api_rejects_svg_with_style_expression(self):
+        """
+        GIVEN:
+            - An SVG logo containing CSS expression() in style
+        WHEN:
+            - Uploaded via PATCH to app config
+        THEN:
+            - SVG is rejected with 400
+        """
+
+        malicious_svg = b"""<?xml version="1.0" encoding="UTF-8"?>
+    <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 100 100">
+        <rect width="100" height="100" style="width: expression(alert('XSS'));" fill="blue"/>
+    </svg>"""
+
+        svg_file = BytesIO(malicious_svg)
+        svg_file.name = "expression_style.svg"
+
+        response = self.client.patch(
+            f"{self.ENDPOINT}1/",
+            {"app_logo": svg_file},
+            format="multipart",
+        )
        self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
        self.assertIn("disallowed", str(response.data).lower())

+    def test_api_rejects_svg_with_style_import(self):
+        """
+        GIVEN:
+            - An SVG logo containing @import in style
+        WHEN:
+            - Uploaded via PATCH to app config
+        THEN:
+            - SVG is rejected with 400
+        """
+
+        malicious_svg = b"""<?xml version="1.0" encoding="UTF-8"?>
+    <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 100 100">
+        <rect width="100" height="100" style="@import url('http://evil.com/malicious.css');" fill="green"/>
+    </svg>"""
+
+        svg_file = BytesIO(malicious_svg)
+        svg_file.name = "import_style.svg"
+
+        response = self.client.patch(
+            f"{self.ENDPOINT}1/",
+            {"app_logo": svg_file},
+            format="multipart",
+        )
+        self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
+        self.assertIn("disallowed", str(response.data).lower())
+
+    def test_api_accepts_valid_svg_with_safe_style(self):
+        """
+        GIVEN:
+            - A valid SVG logo with safe style attributes
+        WHEN:
+            - Uploaded via PATCH to app config
+        THEN:
+            - SVG is accepted with 200
+        """
+
+        safe_svg = b"""<?xml version="1.0" encoding="UTF-8"?>
+    <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 100 100">
+        <rect width="100" height="100" style="fill: #ff6b6b; stroke: #333; stroke-width: 2;"/>
+        <circle cx="50" cy="50" r="30" style="fill: white; opacity: 0.8;"/>
+    </svg>"""
+
+        svg_file = BytesIO(safe_svg)
+        svg_file.name = "safe_logo.svg"
+
+        response = self.client.patch(
+            f"{self.ENDPOINT}1/",
+            {"app_logo": svg_file},
+            format="multipart",
+        )
+        self.assertEqual(response.status_code, status.HTTP_200_OK)
+
+    def test_api_rejects_svg_with_disallowed_attribute(self):
+        """
+        GIVEN:
+            - An SVG with a disallowed attribute (onclick)
+        WHEN:
+            - Uploaded via PATCH to app config
+        THEN:
+            - SVG is rejected with 400
+        """
+
+        malicious_svg = b"""<?xml version="1.0" encoding="UTF-8"?>
+    <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 100 100">
+        <rect width="100" height="100" fill="red" onclick="alert('XSS')"/>
+    </svg>"""
+
+        svg_file = BytesIO(malicious_svg)
+        svg_file.name = "onclick_attribute.svg"
+
+        response = self.client.patch(
+            f"{self.ENDPOINT}1/",
+            {"app_logo": svg_file},
+            format="multipart",
+        )
+        self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
+        self.assertIn("disallowed", str(response.data).lower())
+        self.assertIn("attribute", str(response.data).lower())
+
+    def test_api_rejects_svg_with_disallowed_tag(self):
+        """
+        GIVEN:
+            - An SVG with a disallowed tag (script)
+        WHEN:
+            - Uploaded via PATCH to app config
+        THEN:
+            - SVG is rejected with 400
+        """
+
+        malicious_svg = b"""<?xml version="1.0" encoding="UTF-8"?>
+    <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 100 100">
+        <script>alert('XSS')</script>
+        <rect width="100" height="100" fill="blue"/>
+    </svg>"""
+
+        svg_file = BytesIO(malicious_svg)
+        svg_file.name = "script_tag.svg"
+
+        response = self.client.patch(
+            f"{self.ENDPOINT}1/",
+            {"app_logo": svg_file},
+            format="multipart",
+        )
+        self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
+        self.assertIn("disallowed", str(response.data).lower())
+        self.assertIn("tag", str(response.data).lower())
+
+    def test_api_rejects_svg_with_javascript_href(self):
+        """
+        GIVEN:
+            - An SVG with javascript: in href attribute
+        WHEN:
+            - Uploaded via PATCH to app config
+        THEN:
+            - SVG is rejected with 400
+        """
+        malicious_svg = b"""<?xml version="1.0" encoding="UTF-8"?>
+    <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 100 100">
+        <defs>
+            <rect id="a" width="10" height="10" />
+        </defs>
+        <use href="javascript:alert('XSS')" />
+    </svg>"""
+
+        svg_file = BytesIO(malicious_svg)
+        svg_file.name = "javascript_href.svg"
+
+        response = self.client.patch(
+            f"{self.ENDPOINT}1/",
+            {"app_logo": svg_file},
+            format="multipart",
+        )
+        self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
+        self.assertIn("disallowed", str(response.data).lower())
+        self.assertIn("javascript", str(response.data).lower())
+
+    def test_api_rejects_svg_with_javascript_xlink_href(self):
+        """
+        GIVEN:
+            - An SVG with javascript: in xlink:href attribute
+        WHEN:
+            - Uploaded via PATCH to app config
+        THEN:
+            - SVG is rejected with 400
+        """
+        malicious_svg = b"""<?xml version="1.0" encoding="UTF-8"?>
+    <svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 0 100 100">
+        <use xlink:href="javascript:alert('XSS')" />
+    </svg>"""
+
+        svg_file = BytesIO(malicious_svg)
+        svg_file.name = "javascript_xlink_href.svg"
+
+        response = self.client.patch(
+            f"{self.ENDPOINT}1/",
+            {"app_logo": svg_file},
+            format="multipart",
+        )
+        self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
+        self.assertIn("disallowed", str(response.data).lower())
+        self.assertIn("javascript", str(response.data).lower())
+
+    def test_api_rejects_svg_with_data_text_html_href(self):
+        """
+        GIVEN:
+            - An SVG with data:text/html in href attribute
+        WHEN:
+            - Uploaded via PATCH to app config
+        THEN:
+            - SVG is rejected with 400
+        """
+        malicious_svg = b"""<?xml version="1.0" encoding="UTF-8"?>
+        <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 100 100">
+            <defs>
+                <rect id="r" width="100" height="100" fill="purple"/>
+            </defs>
+            <use href="javascript:alert(1)" />
+        </svg>"""
+
+        svg_file = BytesIO(malicious_svg)
+        svg_file.name = "data_html_href.svg"
+
+        response = self.client.patch(
+            f"{self.ENDPOINT}1/",
+            {"app_logo": svg_file},
+            format="multipart",
+        )
+        self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
+        # This will now catch "Disallowed URI scheme"
+        self.assertIn("disallowed", str(response.data).lower())
+
+    def test_api_rejects_svg_with_unknown_namespace_attribute(self):
+        """
+        GIVEN:
+            - An SVG with an attribute in an unknown/custom namespace
+        WHEN:
+            - Uploaded via PATCH to app config
+        THEN:
+            - SVG is rejected with 400
+            - Error message identifies the namespaced attribute as disallowed
+        """
+
+        # Define a custom namespace "my:hack" and try to use it
+        malicious_svg = b"""<?xml version="1.0" encoding="UTF-8"?>
+    <svg xmlns="http://www.w3.org/2000/svg"
+         xmlns:hack="http://example.com/hack"
+         viewBox="0 0 100 100">
+        <rect width="100" height="100" hack:fill="red" />
+    </svg>"""
+
+        svg_file = BytesIO(malicious_svg)
+        svg_file.name = "unknown_namespace.svg"
+
+        response = self.client.patch(
+            f"{self.ENDPOINT}1/",
+            {"app_logo": svg_file},
+            format="multipart",
+        )
+
+        self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
+
+        # The error message should show the full Clark notation (curly braces)
+        # because the validator's 'else' block kept the raw lxml name.
+        error_msg = str(response.data).lower()
+        self.assertIn("disallowed svg attribute", error_msg)
+        self.assertIn("{http://example.com/hack}fill", error_msg)
+
+    def test_api_rejects_svg_with_external_http_href(self) -> None:
+        """
+        GIVEN:
+            - An SVG with an external URI (http://) in a safe tag's href attribute.
+        WHEN:
+            - Uploaded via PATCH to app config
+        THEN:
+            - SVG is rejected with 400 because http:// is not a safe_prefix.
+        """
+        from io import BytesIO
+
+        # http:// is not in dangerous_schemes, but it is not in safe_prefixes.
+        malicious_svg = b"""<?xml version="1.0" encoding="UTF-8"?>
+    <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 100 100">
+        <use href="http://evil.com/logo.svg" />
+    </svg>"""
+
+        svg_file = BytesIO(malicious_svg)
+        svg_file.name = "external_http_href.svg"
+
+        response = self.client.patch(
+            f"{self.ENDPOINT}1/",
+            {"app_logo": svg_file},
+            format="multipart",
+        )
+        self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
+
+        # Check for the error message raised by the safe_prefixes check
+        self.assertIn("uri scheme not allowed", str(response.data).lower())
+
    def test_create_not_allowed(self):
        """
        GIVEN:
--- a/src/paperless/serialisers.py
+++ b/src/paperless/serialisers.py
@@ -10,6 +10,7 @@ from django.contrib.auth.models import Group
 from django.contrib.auth.models import Permission
 from django.contrib.auth.models import User
 from django.contrib.auth.password_validation import validate_password
+from django.core.files.uploadedfile import UploadedFile
 from rest_framework import serializers
 from rest_framework.authtoken.serializers import AuthTokenSerializer

@@ -221,7 +222,7 @@ class ApplicationConfigurationSerializer(serializers.ModelSerializer):
            instance.app_logo.delete()
        return super().update(instance, validated_data)

-    def validate_app_logo(self, file):
+    def validate_app_logo(self, file: UploadedFile):
        if file and magic.from_buffer(file.read(2048), mime=True) == "image/svg+xml":
            reject_dangerous_svg(file)
        return file
--- a/src/paperless/validators.py
+++ b/src/paperless/validators.py
@@ -1,86 +1,159 @@
 from django.core.exceptions import ValidationError
+from django.core.files.uploadedfile import UploadedFile
 from lxml import etree

 ALLOWED_SVG_TAGS: set[str] = {
-    "svg",
-    "g",
-    "path",
-    "rect",
-    "circle",
-    "ellipse",
-    "line",
-    "polyline",
-    "polygon",
-    "text",
-    "tspan",
-    "defs",
-    "lineargradient",
-    "radialgradient",
-    "stop",
-    "clippath",
-    "use",
-    "title",
-    "desc",
-    "style",
+    # Basic shapes
+    "svg",  # Root SVG element
+    "g",  # Group elements together
+    "path",  # Draw complex shapes with commands
+    "rect",  # Rectangle
+    "circle",  # Circle
+    "ellipse",  # Ellipse/oval
+    "line",  # Straight line
+    "polyline",  # Connected lines (open path)
+    "polygon",  # Connected lines (closed path)
+    # Text
+    "text",  # Text container
+    "tspan",  # Text span within text
+    "textpath",  # Text along a path
+    # Definitions and reusable content
+    "defs",  # Container for reusable elements
+    "symbol",  # Reusable graphic template
+    "use",  # Reference/instantiate reusable elements
+    "marker",  # Arrowheads and path markers
+    "pattern",  # Repeating pattern fills
+    "mask",  # Masking effects
+    # Gradients
+    "lineargradient",  # Linear gradient fill
+    "radialgradient",  # Radial gradient fill
+    "stop",  # Gradient color stop
+    # Clipping
+    "clippath",  # Clipping path definition
+    # Metadata
+    "title",  # Accessible title
+    "desc",  # Accessible description
+    "metadata",  # Document metadata
 }

 ALLOWED_SVG_ATTRIBUTES: set[str] = {
-    "id",
-    "class",
-    "style",
-    "d",
-    "fill",
-    "fill-opacity",
-    "fill-rule",
-    "stroke",
-    "stroke-width",
-    "stroke-linecap",
-    "stroke-linejoin",
-    "stroke-miterlimit",
-    "stroke-dasharray",
-    "stroke-dashoffset",
-    "stroke-opacity",
-    "transform",
-    "x",
-    "y",
-    "cx",
-    "cy",
-    "r",
-    "rx",
-    "ry",
-    "width",
-    "height",
-    "x1",
-    "y1",
-    "x2",
-    "y2",
-    "gradienttransform",
-    "gradientunits",
-    "offset",
-    "stop-color",
-    "stop-opacity",
-    "clip-path",
-    "viewbox",
-    "preserveaspectratio",
-    "href",
-    "xlink:href",
-    "font-family",
-    "font-size",
-    "font-weight",
-    "text-anchor",
-    "xmlns",
-    "xmlns:xlink",
-    "version",
+    # Core attributes
+    "id",  # Unique identifier
+    "class",  # CSS class names
+    "style",  # Inline CSS styles (validate content separately!)
+    # Positioning and sizing
+    "x",  # X coordinate
+    "y",  # Y coordinate
+    "cx",  # Center X coordinate (circle/ellipse)
+    "cy",  # Center Y coordinate (circle/ellipse)
+    "r",  # Radius (circle)
+    "rx",  # X radius (ellipse, rounded corners)
+    "ry",  # Y radius (ellipse, rounded corners)
+    "width",  # Width
+    "height",  # Height
+    "x1",  # Start X (line, gradient)
+    "y1",  # Start Y (line, gradient)
+    "x2",  # End X (line, gradient)
+    "y2",  # End Y (line, gradient)
+    "dx",  # X offset (text)
+    "dy",  # Y offset (text)
+    "points",  # Point list for polyline/polygon
+    # Path data
+    "d",  # Path commands and coordinates
+    # Fill properties
+    "fill",  # Fill color or none
+    "fill-opacity",  # Fill transparency
+    "fill-rule",  # Fill algorithm (nonzero/evenodd)
+    # Stroke properties
+    "stroke",  # Stroke color or none
+    "stroke-width",  # Stroke thickness
+    "stroke-opacity",  # Stroke transparency
+    "stroke-linecap",  # Line ending style (butt/round/square)
+    "stroke-linejoin",  # Corner style (miter/round/bevel)
+    "stroke-miterlimit",  # Miter join limit
+    "stroke-dasharray",  # Dash pattern
+    "stroke-dashoffset",  # Dash pattern offset
+    # Transforms and positioning
+    "transform",  # Transformations (translate/rotate/scale)
+    "viewbox",  # Coordinate system and viewport
+    "preserveaspectratio",  # Scaling behavior
+    # Opacity
+    "opacity",  # Overall element opacity
+    # Gradient attributes
+    "gradienttransform",  # Transform applied to gradient
+    "gradientunits",  # Gradient coordinate system
+    "offset",  # Position of gradient stop
+    "stop-color",  # Color at gradient stop
+    "stop-opacity",  # Opacity at gradient stop
+    # Clipping and masking
+    "clip-path",  # Reference to clipping path
+    "mask",  # Reference to mask
+    # Markers
+    "marker-start",  # Marker at path start
+    "marker-mid",  # Marker at path vertices
+    "marker-end",  # Marker at path end
+    # Text attributes
+    "font-family",  # Font name
+    "font-size",  # Font size
+    "font-weight",  # Font weight (normal/bold)
+    "font-style",  # Font style (normal/italic)
+    "text-anchor",  # Text alignment (start/middle/end)
+    "text-decoration",  # Text decoration (underline/etc)
+    "letter-spacing",  # Space between letters
+    # Links and references
+    "href",  # Link or reference (validate for javascript:!)
+    "xlink:href",  # Legacy link reference (validate for javascript:!)
+    "xlink:title",  # Accessible title for links
+    # Pattern attributes
+    "patternunits",  # Pattern coordinate system
+    "patterntransform",  # Transform applied to pattern
+    "patterncontentunits",  # Pattern content coordinate system
+    # Mask attributes
+    "maskunits",  # Mask coordinate system
+    "maskcontentunits",  # Mask content coordinate system
+    # SVG namespace declarations
+    "xmlns",  # XML namespace (usually http://www.w3.org/2000/svg)
+    "xmlns:xlink",  # XLink namespace
+    "version",  # SVG version
    "type",
 }

+# Dangerous patterns in style attributes that can execute code
+DANGEROUS_STYLE_PATTERNS: set[str] = {
+    "javascript:",  # javascript: URLs in url() functions
+    "data:text/html",  # HTML data URIs can contain scripts
+    "expression(",  # IE's CSS expressions (legacy but dangerous)
+    "import",  # CSS @import can load external resources
+    "@import",  # CSS @import directive
+    "-moz-binding:",  # Firefox XBL bindings (can execute code)
+    "behaviour:",  # IE behavior property
+    "vbscript:",  # VBScript URLs
+}

-def reject_dangerous_svg(file):
+XLINK_NS: set[str] = {
+    "http://www.w3.org/1999/xlink",
+    "https://www.w3.org/1999/xlink",
+}
+
+# Dangerous URI schemes
+DANGEROUS_SCHEMES: set[str] = {
+    "javascript:",
+    "data:text/html",
+    "vbscript:",
+    "file:",
+    "data:application/",  # Can contain scripts
+}
+
+SAFE_PREFIXES: set[str] = {"#", "/", "./", "../", "data:image/"}
+
+
+def reject_dangerous_svg(file: UploadedFile) -> None:
    """
    Rejects SVG files that contain dangerous tags or attributes.
    Raises ValidationError if unsafe content is found.
    See GHSA-6p53-hqqw-8j62
    """
+
    try:
        parser = etree.XMLParser(resolve_entities=False)
        file.seek(0)
@@ -90,17 +163,56 @@ def reject_dangerous_svg(file):
        raise ValidationError("Invalid SVG file.")

    for element in root.iter():
-        tag = etree.QName(element.tag).localname.lower()
+        tag: str = etree.QName(element.tag).localname.lower()
        if tag not in ALLOWED_SVG_TAGS:
            raise ValidationError(f"Disallowed SVG tag: <{tag}>")

+        attr_name: str
+        attr_value: str
        for attr_name, attr_value in element.attrib.items():
-            attr_name_lower = attr_name.lower()
+            # lxml expands namespaces to {url}name. We must convert the standard
+            # XLink namespace back to 'xlink:' so it matches our allowlist.
+            if attr_name.startswith("{"):
+                qname = etree.QName(attr_name)
+                if qname.namespace in XLINK_NS:
+                    attr_name_check = f"xlink:{qname.localname}"
+                else:
+                    # Unknown namespace: keep raw name (will fail allowlist)
+                    attr_name_check = attr_name
+            else:
+                attr_name_check = attr_name
+
+            attr_name_lower = attr_name_check.lower().strip()
+
            if attr_name_lower not in ALLOWED_SVG_ATTRIBUTES:
                raise ValidationError(f"Disallowed SVG attribute: {attr_name}")

-            if attr_name_lower in {
-                "href",
-                "xlink:href",
-            } and attr_value.strip().lower().startswith("javascript:"):
-                raise ValidationError(f"Disallowed javascript: URI in {attr_name}")
+            if attr_name_lower == "style":
+                style_lower: str = attr_value.lower()
+                # Check if any dangerous pattern is a substring of the style
+                for pattern in DANGEROUS_STYLE_PATTERNS:
+                    if pattern in style_lower:
+                        raise ValidationError(
+                            f"Disallowed pattern in style attribute: {pattern}",
+                        )
+
+            # Validate URI attributes (href, xlink:href)
+            if attr_name_lower in {"href", "xlink:href"}:
+                value_stripped: str = attr_value.strip().lower()
+
+                # Check if value starts with any dangerous scheme
+                for scheme in DANGEROUS_SCHEMES:
+                    if value_stripped.startswith(scheme):
+                        raise ValidationError(
+                            f"Disallowed URI scheme in {attr_name}: {scheme}",
+                        )
+
+                # Allow safe schemes for logos: #anchor, relative paths, data:image/*
+                # No external resources (http/https) needed for logos
+
+                if value_stripped and not any(
+                    value_stripped.startswith(prefix) for prefix in SAFE_PREFIXES
+                ):
+                    raise ValidationError(
+                        f"URI scheme not allowed in {attr_name}: must be #anchor, relative path, or data:image/*",
+                    )