From d9a596d67ab3db3982b99c8fa1f7d669b0ced46e Mon Sep 17 00:00:00 2001 From: Trenton H <797416+stumpylog@users.noreply.github.com> Date: Fri, 12 Dec 2025 12:04:04 -0800 Subject: [PATCH] Fix: Expanded SVG validation whitelist and additional checks (#11590) --- src/documents/tests/samples/malicious.svg | 4 - src/documents/tests/test_api_app_config.py | 335 ++++++++++++++++++++- src/paperless/serialisers.py | 3 +- src/paperless/validators.py | 260 +++++++++++----- 4 files changed, 516 insertions(+), 86 deletions(-) delete mode 100644 src/documents/tests/samples/malicious.svg diff --git a/src/documents/tests/samples/malicious.svg b/src/documents/tests/samples/malicious.svg deleted file mode 100644 index 11fb65821..000000000 --- a/src/documents/tests/samples/malicious.svg +++ /dev/null @@ -1,4 +0,0 @@ - - Hello - - diff --git a/src/documents/tests/test_api_app_config.py b/src/documents/tests/test_api_app_config.py index 750aeddbf..974f0d205 100644 --- a/src/documents/tests/test_api_app_config.py +++ b/src/documents/tests/test_api_app_config.py @@ -1,4 +1,5 @@ import json +from io import BytesIO from pathlib import Path from django.contrib.auth.models import User @@ -199,17 +200,337 @@ class TestApiAppConfig(DirectoriesMixin, APITestCase): THEN: - SVG is rejected with 400 """ - path = Path(__file__).parent / "samples" / "malicious.svg" - with path.open("rb") as f: - response = self.client.patch( - f"{self.ENDPOINT}1/", - {"app_logo": f}, - format="multipart", - ) + malicious_svg = b""" + Hello + + + """ + + svg_file = BytesIO(malicious_svg) + svg_file.name = "malicious_script.svg" + + response = self.client.patch( + f"{self.ENDPOINT}1/", + {"app_logo": svg_file}, + format="multipart", + ) + self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) + self.assertIn("disallowed svg tag", str(response.data).lower()) + + def test_api_rejects_malicious_svg_with_style_javascript(self): + """ + GIVEN: + - An SVG logo containing javascript: in style attribute + WHEN: + - Uploaded via PATCH to app config + THEN: + - SVG is rejected with 400 + """ + + malicious_svg = b""" + + + """ + + svg_file = BytesIO(malicious_svg) + svg_file.name = "malicious_style.svg" + + response = self.client.patch( + f"{self.ENDPOINT}1/", + {"app_logo": svg_file}, + format="multipart", + ) + self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) + self.assertIn( + "disallowed pattern in style attribute", + str(response.data).lower(), + ) + self.assertIn("style", str(response.data).lower()) + + def test_api_rejects_svg_with_style_expression(self): + """ + GIVEN: + - An SVG logo containing CSS expression() in style + WHEN: + - Uploaded via PATCH to app config + THEN: + - SVG is rejected with 400 + """ + + malicious_svg = b""" + + + """ + + svg_file = BytesIO(malicious_svg) + svg_file.name = "expression_style.svg" + + response = self.client.patch( + f"{self.ENDPOINT}1/", + {"app_logo": svg_file}, + format="multipart", + ) self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) self.assertIn("disallowed", str(response.data).lower()) + def test_api_rejects_svg_with_style_import(self): + """ + GIVEN: + - An SVG logo containing @import in style + WHEN: + - Uploaded via PATCH to app config + THEN: + - SVG is rejected with 400 + """ + + malicious_svg = b""" + + + """ + + svg_file = BytesIO(malicious_svg) + svg_file.name = "import_style.svg" + + response = self.client.patch( + f"{self.ENDPOINT}1/", + {"app_logo": svg_file}, + format="multipart", + ) + self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) + self.assertIn("disallowed", str(response.data).lower()) + + def test_api_accepts_valid_svg_with_safe_style(self): + """ + GIVEN: + - A valid SVG logo with safe style attributes + WHEN: + - Uploaded via PATCH to app config + THEN: + - SVG is accepted with 200 + """ + + safe_svg = b""" + + + + """ + + svg_file = BytesIO(safe_svg) + svg_file.name = "safe_logo.svg" + + response = self.client.patch( + f"{self.ENDPOINT}1/", + {"app_logo": svg_file}, + format="multipart", + ) + self.assertEqual(response.status_code, status.HTTP_200_OK) + + def test_api_rejects_svg_with_disallowed_attribute(self): + """ + GIVEN: + - An SVG with a disallowed attribute (onclick) + WHEN: + - Uploaded via PATCH to app config + THEN: + - SVG is rejected with 400 + """ + + malicious_svg = b""" + + + """ + + svg_file = BytesIO(malicious_svg) + svg_file.name = "onclick_attribute.svg" + + response = self.client.patch( + f"{self.ENDPOINT}1/", + {"app_logo": svg_file}, + format="multipart", + ) + self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) + self.assertIn("disallowed", str(response.data).lower()) + self.assertIn("attribute", str(response.data).lower()) + + def test_api_rejects_svg_with_disallowed_tag(self): + """ + GIVEN: + - An SVG with a disallowed tag (script) + WHEN: + - Uploaded via PATCH to app config + THEN: + - SVG is rejected with 400 + """ + + malicious_svg = b""" + + + + """ + + svg_file = BytesIO(malicious_svg) + svg_file.name = "script_tag.svg" + + response = self.client.patch( + f"{self.ENDPOINT}1/", + {"app_logo": svg_file}, + format="multipart", + ) + self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) + self.assertIn("disallowed", str(response.data).lower()) + self.assertIn("tag", str(response.data).lower()) + + def test_api_rejects_svg_with_javascript_href(self): + """ + GIVEN: + - An SVG with javascript: in href attribute + WHEN: + - Uploaded via PATCH to app config + THEN: + - SVG is rejected with 400 + """ + malicious_svg = b""" + + + + + + """ + + svg_file = BytesIO(malicious_svg) + svg_file.name = "javascript_href.svg" + + response = self.client.patch( + f"{self.ENDPOINT}1/", + {"app_logo": svg_file}, + format="multipart", + ) + self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) + self.assertIn("disallowed", str(response.data).lower()) + self.assertIn("javascript", str(response.data).lower()) + + def test_api_rejects_svg_with_javascript_xlink_href(self): + """ + GIVEN: + - An SVG with javascript: in xlink:href attribute + WHEN: + - Uploaded via PATCH to app config + THEN: + - SVG is rejected with 400 + """ + malicious_svg = b""" + + + """ + + svg_file = BytesIO(malicious_svg) + svg_file.name = "javascript_xlink_href.svg" + + response = self.client.patch( + f"{self.ENDPOINT}1/", + {"app_logo": svg_file}, + format="multipart", + ) + self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) + self.assertIn("disallowed", str(response.data).lower()) + self.assertIn("javascript", str(response.data).lower()) + + def test_api_rejects_svg_with_data_text_html_href(self): + """ + GIVEN: + - An SVG with data:text/html in href attribute + WHEN: + - Uploaded via PATCH to app config + THEN: + - SVG is rejected with 400 + """ + malicious_svg = b""" + + + + + + """ + + svg_file = BytesIO(malicious_svg) + svg_file.name = "data_html_href.svg" + + response = self.client.patch( + f"{self.ENDPOINT}1/", + {"app_logo": svg_file}, + format="multipart", + ) + self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) + # This will now catch "Disallowed URI scheme" + self.assertIn("disallowed", str(response.data).lower()) + + def test_api_rejects_svg_with_unknown_namespace_attribute(self): + """ + GIVEN: + - An SVG with an attribute in an unknown/custom namespace + WHEN: + - Uploaded via PATCH to app config + THEN: + - SVG is rejected with 400 + - Error message identifies the namespaced attribute as disallowed + """ + + # Define a custom namespace "my:hack" and try to use it + malicious_svg = b""" + + + """ + + svg_file = BytesIO(malicious_svg) + svg_file.name = "unknown_namespace.svg" + + response = self.client.patch( + f"{self.ENDPOINT}1/", + {"app_logo": svg_file}, + format="multipart", + ) + + self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) + + # The error message should show the full Clark notation (curly braces) + # because the validator's 'else' block kept the raw lxml name. + error_msg = str(response.data).lower() + self.assertIn("disallowed svg attribute", error_msg) + self.assertIn("{http://example.com/hack}fill", error_msg) + + def test_api_rejects_svg_with_external_http_href(self) -> None: + """ + GIVEN: + - An SVG with an external URI (http://) in a safe tag's href attribute. + WHEN: + - Uploaded via PATCH to app config + THEN: + - SVG is rejected with 400 because http:// is not a safe_prefix. + """ + from io import BytesIO + + # http:// is not in dangerous_schemes, but it is not in safe_prefixes. + malicious_svg = b""" + + + """ + + svg_file = BytesIO(malicious_svg) + svg_file.name = "external_http_href.svg" + + response = self.client.patch( + f"{self.ENDPOINT}1/", + {"app_logo": svg_file}, + format="multipart", + ) + self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) + + # Check for the error message raised by the safe_prefixes check + self.assertIn("uri scheme not allowed", str(response.data).lower()) + def test_create_not_allowed(self): """ GIVEN: diff --git a/src/paperless/serialisers.py b/src/paperless/serialisers.py index 97b84fd14..256f40680 100644 --- a/src/paperless/serialisers.py +++ b/src/paperless/serialisers.py @@ -10,6 +10,7 @@ from django.contrib.auth.models import Group from django.contrib.auth.models import Permission from django.contrib.auth.models import User from django.contrib.auth.password_validation import validate_password +from django.core.files.uploadedfile import UploadedFile from rest_framework import serializers from rest_framework.authtoken.serializers import AuthTokenSerializer @@ -221,7 +222,7 @@ class ApplicationConfigurationSerializer(serializers.ModelSerializer): instance.app_logo.delete() return super().update(instance, validated_data) - def validate_app_logo(self, file): + def validate_app_logo(self, file: UploadedFile): if file and magic.from_buffer(file.read(2048), mime=True) == "image/svg+xml": reject_dangerous_svg(file) return file diff --git a/src/paperless/validators.py b/src/paperless/validators.py index 36e153881..ab5ae6f47 100644 --- a/src/paperless/validators.py +++ b/src/paperless/validators.py @@ -1,86 +1,159 @@ from django.core.exceptions import ValidationError +from django.core.files.uploadedfile import UploadedFile from lxml import etree ALLOWED_SVG_TAGS: set[str] = { - "svg", - "g", - "path", - "rect", - "circle", - "ellipse", - "line", - "polyline", - "polygon", - "text", - "tspan", - "defs", - "lineargradient", - "radialgradient", - "stop", - "clippath", - "use", - "title", - "desc", - "style", + # Basic shapes + "svg", # Root SVG element + "g", # Group elements together + "path", # Draw complex shapes with commands + "rect", # Rectangle + "circle", # Circle + "ellipse", # Ellipse/oval + "line", # Straight line + "polyline", # Connected lines (open path) + "polygon", # Connected lines (closed path) + # Text + "text", # Text container + "tspan", # Text span within text + "textpath", # Text along a path + # Definitions and reusable content + "defs", # Container for reusable elements + "symbol", # Reusable graphic template + "use", # Reference/instantiate reusable elements + "marker", # Arrowheads and path markers + "pattern", # Repeating pattern fills + "mask", # Masking effects + # Gradients + "lineargradient", # Linear gradient fill + "radialgradient", # Radial gradient fill + "stop", # Gradient color stop + # Clipping + "clippath", # Clipping path definition + # Metadata + "title", # Accessible title + "desc", # Accessible description + "metadata", # Document metadata } ALLOWED_SVG_ATTRIBUTES: set[str] = { - "id", - "class", - "style", - "d", - "fill", - "fill-opacity", - "fill-rule", - "stroke", - "stroke-width", - "stroke-linecap", - "stroke-linejoin", - "stroke-miterlimit", - "stroke-dasharray", - "stroke-dashoffset", - "stroke-opacity", - "transform", - "x", - "y", - "cx", - "cy", - "r", - "rx", - "ry", - "width", - "height", - "x1", - "y1", - "x2", - "y2", - "gradienttransform", - "gradientunits", - "offset", - "stop-color", - "stop-opacity", - "clip-path", - "viewbox", - "preserveaspectratio", - "href", - "xlink:href", - "font-family", - "font-size", - "font-weight", - "text-anchor", - "xmlns", - "xmlns:xlink", - "version", + # Core attributes + "id", # Unique identifier + "class", # CSS class names + "style", # Inline CSS styles (validate content separately!) + # Positioning and sizing + "x", # X coordinate + "y", # Y coordinate + "cx", # Center X coordinate (circle/ellipse) + "cy", # Center Y coordinate (circle/ellipse) + "r", # Radius (circle) + "rx", # X radius (ellipse, rounded corners) + "ry", # Y radius (ellipse, rounded corners) + "width", # Width + "height", # Height + "x1", # Start X (line, gradient) + "y1", # Start Y (line, gradient) + "x2", # End X (line, gradient) + "y2", # End Y (line, gradient) + "dx", # X offset (text) + "dy", # Y offset (text) + "points", # Point list for polyline/polygon + # Path data + "d", # Path commands and coordinates + # Fill properties + "fill", # Fill color or none + "fill-opacity", # Fill transparency + "fill-rule", # Fill algorithm (nonzero/evenodd) + # Stroke properties + "stroke", # Stroke color or none + "stroke-width", # Stroke thickness + "stroke-opacity", # Stroke transparency + "stroke-linecap", # Line ending style (butt/round/square) + "stroke-linejoin", # Corner style (miter/round/bevel) + "stroke-miterlimit", # Miter join limit + "stroke-dasharray", # Dash pattern + "stroke-dashoffset", # Dash pattern offset + # Transforms and positioning + "transform", # Transformations (translate/rotate/scale) + "viewbox", # Coordinate system and viewport + "preserveaspectratio", # Scaling behavior + # Opacity + "opacity", # Overall element opacity + # Gradient attributes + "gradienttransform", # Transform applied to gradient + "gradientunits", # Gradient coordinate system + "offset", # Position of gradient stop + "stop-color", # Color at gradient stop + "stop-opacity", # Opacity at gradient stop + # Clipping and masking + "clip-path", # Reference to clipping path + "mask", # Reference to mask + # Markers + "marker-start", # Marker at path start + "marker-mid", # Marker at path vertices + "marker-end", # Marker at path end + # Text attributes + "font-family", # Font name + "font-size", # Font size + "font-weight", # Font weight (normal/bold) + "font-style", # Font style (normal/italic) + "text-anchor", # Text alignment (start/middle/end) + "text-decoration", # Text decoration (underline/etc) + "letter-spacing", # Space between letters + # Links and references + "href", # Link or reference (validate for javascript:!) + "xlink:href", # Legacy link reference (validate for javascript:!) + "xlink:title", # Accessible title for links + # Pattern attributes + "patternunits", # Pattern coordinate system + "patterntransform", # Transform applied to pattern + "patterncontentunits", # Pattern content coordinate system + # Mask attributes + "maskunits", # Mask coordinate system + "maskcontentunits", # Mask content coordinate system + # SVG namespace declarations + "xmlns", # XML namespace (usually http://www.w3.org/2000/svg) + "xmlns:xlink", # XLink namespace + "version", # SVG version "type", } +# Dangerous patterns in style attributes that can execute code +DANGEROUS_STYLE_PATTERNS: set[str] = { + "javascript:", # javascript: URLs in url() functions + "data:text/html", # HTML data URIs can contain scripts + "expression(", # IE's CSS expressions (legacy but dangerous) + "import", # CSS @import can load external resources + "@import", # CSS @import directive + "-moz-binding:", # Firefox XBL bindings (can execute code) + "behaviour:", # IE behavior property + "vbscript:", # VBScript URLs +} -def reject_dangerous_svg(file): +XLINK_NS: set[str] = { + "http://www.w3.org/1999/xlink", + "https://www.w3.org/1999/xlink", +} + +# Dangerous URI schemes +DANGEROUS_SCHEMES: set[str] = { + "javascript:", + "data:text/html", + "vbscript:", + "file:", + "data:application/", # Can contain scripts +} + +SAFE_PREFIXES: set[str] = {"#", "/", "./", "../", "data:image/"} + + +def reject_dangerous_svg(file: UploadedFile) -> None: """ Rejects SVG files that contain dangerous tags or attributes. Raises ValidationError if unsafe content is found. See GHSA-6p53-hqqw-8j62 """ + try: parser = etree.XMLParser(resolve_entities=False) file.seek(0) @@ -90,17 +163,56 @@ def reject_dangerous_svg(file): raise ValidationError("Invalid SVG file.") for element in root.iter(): - tag = etree.QName(element.tag).localname.lower() + tag: str = etree.QName(element.tag).localname.lower() if tag not in ALLOWED_SVG_TAGS: raise ValidationError(f"Disallowed SVG tag: <{tag}>") + attr_name: str + attr_value: str for attr_name, attr_value in element.attrib.items(): - attr_name_lower = attr_name.lower() + # lxml expands namespaces to {url}name. We must convert the standard + # XLink namespace back to 'xlink:' so it matches our allowlist. + if attr_name.startswith("{"): + qname = etree.QName(attr_name) + if qname.namespace in XLINK_NS: + attr_name_check = f"xlink:{qname.localname}" + else: + # Unknown namespace: keep raw name (will fail allowlist) + attr_name_check = attr_name + else: + attr_name_check = attr_name + + attr_name_lower = attr_name_check.lower().strip() + if attr_name_lower not in ALLOWED_SVG_ATTRIBUTES: raise ValidationError(f"Disallowed SVG attribute: {attr_name}") - if attr_name_lower in { - "href", - "xlink:href", - } and attr_value.strip().lower().startswith("javascript:"): - raise ValidationError(f"Disallowed javascript: URI in {attr_name}") + if attr_name_lower == "style": + style_lower: str = attr_value.lower() + # Check if any dangerous pattern is a substring of the style + for pattern in DANGEROUS_STYLE_PATTERNS: + if pattern in style_lower: + raise ValidationError( + f"Disallowed pattern in style attribute: {pattern}", + ) + + # Validate URI attributes (href, xlink:href) + if attr_name_lower in {"href", "xlink:href"}: + value_stripped: str = attr_value.strip().lower() + + # Check if value starts with any dangerous scheme + for scheme in DANGEROUS_SCHEMES: + if value_stripped.startswith(scheme): + raise ValidationError( + f"Disallowed URI scheme in {attr_name}: {scheme}", + ) + + # Allow safe schemes for logos: #anchor, relative paths, data:image/* + # No external resources (http/https) needed for logos + + if value_stripped and not any( + value_stripped.startswith(prefix) for prefix in SAFE_PREFIXES + ): + raise ValidationError( + f"URI scheme not allowed in {attr_name}: must be #anchor, relative path, or data:image/*", + )