mirror of
				https://github.com/paperless-ngx/paperless-ngx.git
				synced 2025-11-03 03:16:10 -06:00 
			
		
		
		
	Enhancement: improve validation of custom field values (#5166)
* Support all URI schemes * Reworks custom field value validation to check and return a 400 error code in more cases and support more URL looking items, not just some basic schemes * Fixes a spelling error in the message --------- Co-authored-by: Trenton H <797416+stumpylog@users.noreply.github.com>
This commit is contained in:
		@@ -2,6 +2,7 @@ import datetime
 | 
			
		||||
import math
 | 
			
		||||
import re
 | 
			
		||||
import zoneinfo
 | 
			
		||||
from decimal import Decimal
 | 
			
		||||
 | 
			
		||||
import magic
 | 
			
		||||
from celery import states
 | 
			
		||||
@@ -9,7 +10,9 @@ from django.conf import settings
 | 
			
		||||
from django.contrib.auth.models import Group
 | 
			
		||||
from django.contrib.auth.models import User
 | 
			
		||||
from django.contrib.contenttypes.models import ContentType
 | 
			
		||||
from django.core.validators import URLValidator
 | 
			
		||||
from django.core.validators import DecimalValidator
 | 
			
		||||
from django.core.validators import MaxLengthValidator
 | 
			
		||||
from django.core.validators import integer_validator
 | 
			
		||||
from django.utils.crypto import get_random_string
 | 
			
		||||
from django.utils.text import slugify
 | 
			
		||||
from django.utils.translation import gettext as _
 | 
			
		||||
@@ -41,6 +44,7 @@ from documents.models import UiSettings
 | 
			
		||||
from documents.parsers import is_mime_type_supported
 | 
			
		||||
from documents.permissions import get_groups_with_only_permission
 | 
			
		||||
from documents.permissions import set_permissions_for_object
 | 
			
		||||
from documents.validators import uri_validator
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# https://www.django-rest-framework.org/api-guide/serializers/#example
 | 
			
		||||
@@ -489,17 +493,29 @@ class CustomFieldInstanceSerializer(serializers.ModelSerializer):
 | 
			
		||||
 | 
			
		||||
    def validate(self, data):
 | 
			
		||||
        """
 | 
			
		||||
        For some reason, URLField validation is not run against the value
 | 
			
		||||
        automatically.  Force it to run against the value
 | 
			
		||||
        Probably because we're kind of doing it odd, validation from the model
 | 
			
		||||
        doesn't run against the field "value", so we have to re-create it here.
 | 
			
		||||
 | 
			
		||||
        Don't like it, but it is better than returning an HTTP 500 when the database
 | 
			
		||||
        hates the value
 | 
			
		||||
        """
 | 
			
		||||
        data = super().validate(data)
 | 
			
		||||
        field: CustomField = data["field"]
 | 
			
		||||
        if (
 | 
			
		||||
            field.data_type == CustomField.FieldDataType.URL
 | 
			
		||||
            and data["value"] is not None
 | 
			
		||||
            and len(data["value"]) > 0
 | 
			
		||||
        ):
 | 
			
		||||
            URLValidator()(data["value"])
 | 
			
		||||
        if "value" in data and data["value"] is not None:
 | 
			
		||||
            if (
 | 
			
		||||
                field.data_type == CustomField.FieldDataType.URL
 | 
			
		||||
                and len(data["value"]) > 0
 | 
			
		||||
            ):
 | 
			
		||||
                uri_validator(data["value"])
 | 
			
		||||
            elif field.data_type == CustomField.FieldDataType.INT:
 | 
			
		||||
                integer_validator(data["value"])
 | 
			
		||||
            elif field.data_type == CustomField.FieldDataType.MONETARY:
 | 
			
		||||
                DecimalValidator(max_digits=12, decimal_places=2)(
 | 
			
		||||
                    Decimal(str(data["value"])),
 | 
			
		||||
                )
 | 
			
		||||
            elif field.data_type == CustomField.FieldDataType.STRING:
 | 
			
		||||
                MaxLengthValidator(limit_value=128)(data["value"])
 | 
			
		||||
 | 
			
		||||
        return data
 | 
			
		||||
 | 
			
		||||
    def reflect_doclinks(
 | 
			
		||||
 
 | 
			
		||||
@@ -333,19 +333,17 @@ class TestCustomField(DirectoriesMixin, APITestCase):
 | 
			
		||||
            },
 | 
			
		||||
            format="json",
 | 
			
		||||
        )
 | 
			
		||||
        from pprint import pprint
 | 
			
		||||
 | 
			
		||||
        pprint(resp.json())
 | 
			
		||||
        self.assertEqual(resp.status_code, status.HTTP_400_BAD_REQUEST)
 | 
			
		||||
        self.assertEqual(CustomFieldInstance.objects.count(), 0)
 | 
			
		||||
        self.assertEqual(len(doc.custom_fields.all()), 0)
 | 
			
		||||
 | 
			
		||||
    def test_custom_field_value_validation(self):
 | 
			
		||||
    def test_custom_field_value_url_validation(self):
 | 
			
		||||
        """
 | 
			
		||||
        GIVEN:
 | 
			
		||||
            - Document & custom field exist
 | 
			
		||||
        WHEN:
 | 
			
		||||
            - API request to set a field value
 | 
			
		||||
            - API request to set a field value to something which is or is not a link
 | 
			
		||||
        THEN:
 | 
			
		||||
            - HTTP 400 is returned
 | 
			
		||||
            - No field instance is created or attached to the document
 | 
			
		||||
@@ -360,31 +358,62 @@ class TestCustomField(DirectoriesMixin, APITestCase):
 | 
			
		||||
            name="Test Custom Field URL",
 | 
			
		||||
            data_type=CustomField.FieldDataType.URL,
 | 
			
		||||
        )
 | 
			
		||||
        custom_field_int = CustomField.objects.create(
 | 
			
		||||
            name="Test Custom Field INT",
 | 
			
		||||
            data_type=CustomField.FieldDataType.INT,
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
        for value in ["not a url", "file:"]:
 | 
			
		||||
            with self.subTest(f"Test value {value}"):
 | 
			
		||||
                resp = self.client.patch(
 | 
			
		||||
                    f"/api/documents/{doc.id}/",
 | 
			
		||||
                    data={
 | 
			
		||||
                        "custom_fields": [
 | 
			
		||||
                            {
 | 
			
		||||
                                "field": custom_field_url.id,
 | 
			
		||||
                                "value": value,
 | 
			
		||||
                            },
 | 
			
		||||
                        ],
 | 
			
		||||
                    },
 | 
			
		||||
                    format="json",
 | 
			
		||||
                )
 | 
			
		||||
 | 
			
		||||
                self.assertEqual(resp.status_code, status.HTTP_400_BAD_REQUEST)
 | 
			
		||||
                self.assertEqual(CustomFieldInstance.objects.count(), 0)
 | 
			
		||||
                self.assertEqual(len(doc.custom_fields.all()), 0)
 | 
			
		||||
        resp = self.client.patch(
 | 
			
		||||
            f"/api/documents/{doc.id}/",
 | 
			
		||||
            data={
 | 
			
		||||
                "custom_fields": [
 | 
			
		||||
                    {
 | 
			
		||||
                        "field": custom_field_url.id,
 | 
			
		||||
                        "value": "not a url",
 | 
			
		||||
                        "value": "tel:+1-816-555-1212",
 | 
			
		||||
                    },
 | 
			
		||||
                ],
 | 
			
		||||
            },
 | 
			
		||||
            format="json",
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
        self.assertEqual(resp.status_code, status.HTTP_400_BAD_REQUEST)
 | 
			
		||||
        self.assertEqual(CustomFieldInstance.objects.count(), 0)
 | 
			
		||||
        self.assertEqual(len(doc.custom_fields.all()), 0)
 | 
			
		||||
        self.assertEqual(resp.status_code, status.HTTP_200_OK)
 | 
			
		||||
 | 
			
		||||
        self.assertRaises(
 | 
			
		||||
            Exception,
 | 
			
		||||
            self.client.patch,
 | 
			
		||||
    def test_custom_field_value_integer_validation(self):
 | 
			
		||||
        """
 | 
			
		||||
        GIVEN:
 | 
			
		||||
            - Document & custom field exist
 | 
			
		||||
        WHEN:
 | 
			
		||||
            - API request to set a field value to something not an integer
 | 
			
		||||
        THEN:
 | 
			
		||||
            - HTTP 400 is returned
 | 
			
		||||
            - No field instance is created or attached to the document
 | 
			
		||||
        """
 | 
			
		||||
        doc = Document.objects.create(
 | 
			
		||||
            title="WOW",
 | 
			
		||||
            content="the content",
 | 
			
		||||
            checksum="123",
 | 
			
		||||
            mime_type="application/pdf",
 | 
			
		||||
        )
 | 
			
		||||
        custom_field_int = CustomField.objects.create(
 | 
			
		||||
            name="Test Custom Field INT",
 | 
			
		||||
            data_type=CustomField.FieldDataType.INT,
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
        resp = self.client.patch(
 | 
			
		||||
            f"/api/documents/{doc.id}/",
 | 
			
		||||
            data={
 | 
			
		||||
                "custom_fields": [
 | 
			
		||||
@@ -397,6 +426,81 @@ class TestCustomField(DirectoriesMixin, APITestCase):
 | 
			
		||||
            format="json",
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
        self.assertEqual(resp.status_code, status.HTTP_400_BAD_REQUEST)
 | 
			
		||||
        self.assertEqual(CustomFieldInstance.objects.count(), 0)
 | 
			
		||||
        self.assertEqual(len(doc.custom_fields.all()), 0)
 | 
			
		||||
 | 
			
		||||
    def test_custom_field_value_monetary_validation(self):
 | 
			
		||||
        """
 | 
			
		||||
        GIVEN:
 | 
			
		||||
            - Document & custom field exist
 | 
			
		||||
        WHEN:
 | 
			
		||||
            - API request to set a field value to something not a valid monetary decimal
 | 
			
		||||
        THEN:
 | 
			
		||||
            - HTTP 400 is returned
 | 
			
		||||
            - No field instance is created or attached to the document
 | 
			
		||||
        """
 | 
			
		||||
        doc = Document.objects.create(
 | 
			
		||||
            title="WOW",
 | 
			
		||||
            content="the content",
 | 
			
		||||
            checksum="123",
 | 
			
		||||
            mime_type="application/pdf",
 | 
			
		||||
        )
 | 
			
		||||
        custom_field_money = CustomField.objects.create(
 | 
			
		||||
            name="Test Custom Field MONETARY",
 | 
			
		||||
            data_type=CustomField.FieldDataType.MONETARY,
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
        resp = self.client.patch(
 | 
			
		||||
            f"/api/documents/{doc.id}/",
 | 
			
		||||
            data={
 | 
			
		||||
                "custom_fields": [
 | 
			
		||||
                    {
 | 
			
		||||
                        "field": custom_field_money.id,
 | 
			
		||||
                        # Too many places past decimal
 | 
			
		||||
                        "value": 12.123,
 | 
			
		||||
                    },
 | 
			
		||||
                ],
 | 
			
		||||
            },
 | 
			
		||||
            format="json",
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
        self.assertEqual(resp.status_code, status.HTTP_400_BAD_REQUEST)
 | 
			
		||||
        self.assertEqual(CustomFieldInstance.objects.count(), 0)
 | 
			
		||||
        self.assertEqual(len(doc.custom_fields.all()), 0)
 | 
			
		||||
 | 
			
		||||
    def test_custom_field_value_short_text_validation(self):
 | 
			
		||||
        """
 | 
			
		||||
        GIVEN:
 | 
			
		||||
            - Document & custom field exist
 | 
			
		||||
        WHEN:
 | 
			
		||||
            - API request to set a field value to a too long string
 | 
			
		||||
        THEN:
 | 
			
		||||
            - HTTP 400 is returned
 | 
			
		||||
            - No field instance is created or attached to the document
 | 
			
		||||
        """
 | 
			
		||||
        doc = Document.objects.create(
 | 
			
		||||
            title="WOW",
 | 
			
		||||
            content="the content",
 | 
			
		||||
            checksum="123",
 | 
			
		||||
            mime_type="application/pdf",
 | 
			
		||||
        )
 | 
			
		||||
        custom_field_string = CustomField.objects.create(
 | 
			
		||||
            name="Test Custom Field STRING",
 | 
			
		||||
            data_type=CustomField.FieldDataType.STRING,
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
        resp = self.client.patch(
 | 
			
		||||
            f"/api/documents/{doc.id}/",
 | 
			
		||||
            data={
 | 
			
		||||
                "custom_fields": [
 | 
			
		||||
                    {"field": custom_field_string.id, "value": "a" * 129},
 | 
			
		||||
                ],
 | 
			
		||||
            },
 | 
			
		||||
            format="json",
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
        self.assertEqual(resp.status_code, status.HTTP_400_BAD_REQUEST)
 | 
			
		||||
        self.assertEqual(CustomFieldInstance.objects.count(), 0)
 | 
			
		||||
        self.assertEqual(len(doc.custom_fields.all()), 0)
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										29
									
								
								src/documents/validators.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										29
									
								
								src/documents/validators.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,29 @@
 | 
			
		||||
from urllib.parse import urlparse
 | 
			
		||||
 | 
			
		||||
from django.core.exceptions import ValidationError
 | 
			
		||||
from django.utils.translation import gettext_lazy as _
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def uri_validator(value) -> None:
 | 
			
		||||
    """
 | 
			
		||||
    Raises a ValidationError if the given value does not parse as an
 | 
			
		||||
    URI looking thing, which we're defining as a scheme and either network
 | 
			
		||||
    location or path value
 | 
			
		||||
    """
 | 
			
		||||
    try:
 | 
			
		||||
        parts = urlparse(value)
 | 
			
		||||
        if not parts.scheme:
 | 
			
		||||
            raise ValidationError(
 | 
			
		||||
                _(f"Unable to parse URI {value}, missing scheme"),
 | 
			
		||||
                params={"value": value},
 | 
			
		||||
            )
 | 
			
		||||
        elif not parts.netloc and not parts.path:
 | 
			
		||||
            raise ValidationError(
 | 
			
		||||
                _(f"Unable to parse URI {value}, missing net location or path"),
 | 
			
		||||
                params={"value": value},
 | 
			
		||||
            )
 | 
			
		||||
    except Exception as e:
 | 
			
		||||
        raise ValidationError(
 | 
			
		||||
            _(f"Unable to parse URI {value}"),
 | 
			
		||||
            params={"value": value},
 | 
			
		||||
        ) from e
 | 
			
		||||
		Reference in New Issue
	
	Block a user