diff --git a/packages/overture-schema-addresses-theme/tests/address_baseline_schema.json b/packages/overture-schema-addresses-theme/tests/address_baseline_schema.json index 46d07f176..1ae5a6ab6 100644 --- a/packages/overture-schema-addresses-theme/tests/address_baseline_schema.json +++ b/packages/overture-schema-addresses-theme/tests/address_baseline_schema.json @@ -7,7 +7,7 @@ "value": { "description": "String with no leading/trailing whitespace", "minLength": 1, - "pattern": "^(\\S.*)?\\S$", + "pattern": "^(\\S(.*\\S)?)?$", "title": "Value", "type": "string" } @@ -45,7 +45,7 @@ }, "license": { "description": "Source data license name.\n\nThis should be a valid SPDX license identifier when available.\n\nIf omitted, contact the data provider for more license information.", - "pattern": "^(\\S.*)?\\S$", + "pattern": "^(\\S(.*\\S)?)?$", "title": "License", "type": "string" }, @@ -160,21 +160,21 @@ "number": { "description": "The house number.\n\nThis field does not necessarily contain an integer or even a number. Values such as\n\"74B\", \"189 1/2\", and \"208.5\", where the non-integer or non-number part is part of\nthe house number, not a unit number, are in common use.", "minLength": 1, - "pattern": "^(\\S.*)?\\S$", + "pattern": "^(\\S(.*\\S)?)?$", "title": "Number", "type": "string" }, "postal_city": { "description": "The postal authority designated city name, if applicable.\n\nIn some countries or regions, a mailing address may need to specify a different city\nname than the city that actually contains the address coordinates. This optional\nfield can be used to specify the alternate city name to use.\n\nFor example:\n\n- The postal city for the US address *716 East County Road, Winchester, Indiana*\n is Ridgeville.\n- The postal city for the Slovenian address *Tomaj 71, 6221 Tomaj, Slovenia* is\n Dutovlje.", "minLength": 1, - "pattern": "^(\\S.*)?\\S$", + "pattern": "^(\\S(.*\\S)?)?$", "title": "Postal City", "type": "string" }, "postcode": { "description": "The postal code.", "minLength": 1, - "pattern": "^(\\S.*)?\\S$", + "pattern": "^(\\S(.*\\S)?)?$", "title": "Postcode", "type": "string" }, @@ -191,7 +191,7 @@ "street": { "description": "The street name.\n\nThe street name can include a type (*e.g.*, \"Street\" or \"St\", \"Boulevard\" or \"Blvd\",\n*etc.*) and a directional (*e.g.*, \"NW\" or \"Northwest\", \"S\" or \"Sud\"). Both type and\ndirectional, if present, may be either a prefix or a suffix to the primary name.\nThey may either be fully spelled-out or abbreviated.", "minLength": 1, - "pattern": "^(\\S.*)?\\S$", + "pattern": "^(\\S(.*\\S)?)?$", "title": "Street", "type": "string" }, @@ -208,7 +208,7 @@ "unit": { "description": "The secondary address unit designator.\n\nIn the case where the primary street address is divided into secondary units, which\nmay be apartments, floors, or even buildings if the primary street address is a\ncampus, this field names the specific secondary unit being addressed.", "minLength": 1, - "pattern": "^(\\S.*)?\\S$", + "pattern": "^(\\S(.*\\S)?)?$", "title": "Unit", "type": "string" }, diff --git a/packages/overture-schema-base-theme/tests/bathymetry_baseline_schema.json b/packages/overture-schema-base-theme/tests/bathymetry_baseline_schema.json index 8f319f90f..a36763e92 100644 --- a/packages/overture-schema-base-theme/tests/bathymetry_baseline_schema.json +++ b/packages/overture-schema-base-theme/tests/bathymetry_baseline_schema.json @@ -66,7 +66,7 @@ }, "license": { "description": "Source data license name.\n\nThis should be a valid SPDX license identifier when available.\n\nIf omitted, contact the data provider for more license information.", - "pattern": "^(\\S.*)?\\S$", + "pattern": "^(\\S(.*\\S)?)?$", "title": "License", "type": "string" }, diff --git a/packages/overture-schema-base-theme/tests/infrastructure_baseline_schema.json b/packages/overture-schema-base-theme/tests/infrastructure_baseline_schema.json index 7427b2a68..625a8fd17 100644 --- a/packages/overture-schema-base-theme/tests/infrastructure_baseline_schema.json +++ b/packages/overture-schema-base-theme/tests/infrastructure_baseline_schema.json @@ -229,7 +229,7 @@ "value": { "description": "The actual name value.", "minLength": 1, - "pattern": "^(\\S.*)?\\S$", + "pattern": "^(\\S(.*\\S)?)?$", "title": "Value", "type": "string" }, @@ -265,7 +265,7 @@ "patternProperties": { "^(?:(?:[A-Za-z]{2,3}(?:-[A-Za-z]{3}){0,3}?)|(?:[A-Za-z]{4,8}))(?:-[A-Za-z]{4})?(?:-[A-Za-z]{2}|[0-9]{3})?(?:-(?:[A-Za-z0-9]{5,8}|[0-9][A-Za-z0-9]{3}))*(?:-[A-WY-Za-wy-z0-9](?:-[A-Za-z0-9]{2,8})+)*$": { "description": "String with no leading/trailing whitespace", - "pattern": "^(\\S.*)?\\S$", + "pattern": "^(\\S(.*\\S)?)?$", "type": "string" } }, @@ -278,7 +278,7 @@ "primary": { "description": "The most commonly used name.", "minLength": 1, - "pattern": "^(\\S.*)?\\S$", + "pattern": "^(\\S(.*\\S)?)?$", "title": "Primary", "type": "string" }, @@ -375,7 +375,7 @@ }, "license": { "description": "Source data license name.\n\nThis should be a valid SPDX license identifier when available.\n\nIf omitted, contact the data provider for more license information.", - "pattern": "^(\\S.*)?\\S$", + "pattern": "^(\\S(.*\\S)?)?$", "title": "License", "type": "string" }, diff --git a/packages/overture-schema-base-theme/tests/land_baseline_schema.json b/packages/overture-schema-base-theme/tests/land_baseline_schema.json index deded69f6..b6df0a401 100644 --- a/packages/overture-schema-base-theme/tests/land_baseline_schema.json +++ b/packages/overture-schema-base-theme/tests/land_baseline_schema.json @@ -102,7 +102,7 @@ "value": { "description": "The actual name value.", "minLength": 1, - "pattern": "^(\\S.*)?\\S$", + "pattern": "^(\\S(.*\\S)?)?$", "title": "Value", "type": "string" }, @@ -138,7 +138,7 @@ "patternProperties": { "^(?:(?:[A-Za-z]{2,3}(?:-[A-Za-z]{3}){0,3}?)|(?:[A-Za-z]{4,8}))(?:-[A-Za-z]{4})?(?:-[A-Za-z]{2}|[0-9]{3})?(?:-(?:[A-Za-z0-9]{5,8}|[0-9][A-Za-z0-9]{3}))*(?:-[A-WY-Za-wy-z0-9](?:-[A-Za-z0-9]{2,8})+)*$": { "description": "String with no leading/trailing whitespace", - "pattern": "^(\\S.*)?\\S$", + "pattern": "^(\\S(.*\\S)?)?$", "type": "string" } }, @@ -151,7 +151,7 @@ "primary": { "description": "The most commonly used name.", "minLength": 1, - "pattern": "^(\\S.*)?\\S$", + "pattern": "^(\\S(.*\\S)?)?$", "title": "Primary", "type": "string" }, @@ -248,7 +248,7 @@ }, "license": { "description": "Source data license name.\n\nThis should be a valid SPDX license identifier when available.\n\nIf omitted, contact the data provider for more license information.", - "pattern": "^(\\S.*)?\\S$", + "pattern": "^(\\S(.*\\S)?)?$", "title": "License", "type": "string" }, diff --git a/packages/overture-schema-base-theme/tests/land_cover_baseline_schema.json b/packages/overture-schema-base-theme/tests/land_cover_baseline_schema.json index ae2d03838..79dea27fe 100644 --- a/packages/overture-schema-base-theme/tests/land_cover_baseline_schema.json +++ b/packages/overture-schema-base-theme/tests/land_cover_baseline_schema.json @@ -83,7 +83,7 @@ }, "license": { "description": "Source data license name.\n\nThis should be a valid SPDX license identifier when available.\n\nIf omitted, contact the data provider for more license information.", - "pattern": "^(\\S.*)?\\S$", + "pattern": "^(\\S(.*\\S)?)?$", "title": "License", "type": "string" }, diff --git a/packages/overture-schema-base-theme/tests/land_use_baseline_schema.json b/packages/overture-schema-base-theme/tests/land_use_baseline_schema.json index 2984bf3af..e1d461ece 100644 --- a/packages/overture-schema-base-theme/tests/land_use_baseline_schema.json +++ b/packages/overture-schema-base-theme/tests/land_use_baseline_schema.json @@ -180,7 +180,7 @@ "value": { "description": "The actual name value.", "minLength": 1, - "pattern": "^(\\S.*)?\\S$", + "pattern": "^(\\S(.*\\S)?)?$", "title": "Value", "type": "string" }, @@ -216,7 +216,7 @@ "patternProperties": { "^(?:(?:[A-Za-z]{2,3}(?:-[A-Za-z]{3}){0,3}?)|(?:[A-Za-z]{4,8}))(?:-[A-Za-z]{4})?(?:-[A-Za-z]{2}|[0-9]{3})?(?:-(?:[A-Za-z0-9]{5,8}|[0-9][A-Za-z0-9]{3}))*(?:-[A-WY-Za-wy-z0-9](?:-[A-Za-z0-9]{2,8})+)*$": { "description": "String with no leading/trailing whitespace", - "pattern": "^(\\S.*)?\\S$", + "pattern": "^(\\S(.*\\S)?)?$", "type": "string" } }, @@ -229,7 +229,7 @@ "primary": { "description": "The most commonly used name.", "minLength": 1, - "pattern": "^(\\S.*)?\\S$", + "pattern": "^(\\S(.*\\S)?)?$", "title": "Primary", "type": "string" }, @@ -326,7 +326,7 @@ }, "license": { "description": "Source data license name.\n\nThis should be a valid SPDX license identifier when available.\n\nIf omitted, contact the data provider for more license information.", - "pattern": "^(\\S.*)?\\S$", + "pattern": "^(\\S(.*\\S)?)?$", "title": "License", "type": "string" }, diff --git a/packages/overture-schema-base-theme/tests/water_baseline_schema.json b/packages/overture-schema-base-theme/tests/water_baseline_schema.json index 09c4472f4..b99811f95 100644 --- a/packages/overture-schema-base-theme/tests/water_baseline_schema.json +++ b/packages/overture-schema-base-theme/tests/water_baseline_schema.json @@ -33,7 +33,7 @@ "value": { "description": "The actual name value.", "minLength": 1, - "pattern": "^(\\S.*)?\\S$", + "pattern": "^(\\S(.*\\S)?)?$", "title": "Value", "type": "string" }, @@ -69,7 +69,7 @@ "patternProperties": { "^(?:(?:[A-Za-z]{2,3}(?:-[A-Za-z]{3}){0,3}?)|(?:[A-Za-z]{4,8}))(?:-[A-Za-z]{4})?(?:-[A-Za-z]{2}|[0-9]{3})?(?:-(?:[A-Za-z0-9]{5,8}|[0-9][A-Za-z0-9]{3}))*(?:-[A-WY-Za-wy-z0-9](?:-[A-Za-z0-9]{2,8})+)*$": { "description": "String with no leading/trailing whitespace", - "pattern": "^(\\S.*)?\\S$", + "pattern": "^(\\S(.*\\S)?)?$", "type": "string" } }, @@ -82,7 +82,7 @@ "primary": { "description": "The most commonly used name.", "minLength": 1, - "pattern": "^(\\S.*)?\\S$", + "pattern": "^(\\S(.*\\S)?)?$", "title": "Primary", "type": "string" }, @@ -179,7 +179,7 @@ }, "license": { "description": "Source data license name.\n\nThis should be a valid SPDX license identifier when available.\n\nIf omitted, contact the data provider for more license information.", - "pattern": "^(\\S.*)?\\S$", + "pattern": "^(\\S(.*\\S)?)?$", "title": "License", "type": "string" }, diff --git a/packages/overture-schema-buildings-theme/tests/building_baseline_schema.json b/packages/overture-schema-buildings-theme/tests/building_baseline_schema.json index e3c151808..ec4b34d32 100644 --- a/packages/overture-schema-buildings-theme/tests/building_baseline_schema.json +++ b/packages/overture-schema-buildings-theme/tests/building_baseline_schema.json @@ -165,7 +165,7 @@ "value": { "description": "The actual name value.", "minLength": 1, - "pattern": "^(\\S.*)?\\S$", + "pattern": "^(\\S(.*\\S)?)?$", "title": "Value", "type": "string" }, @@ -201,7 +201,7 @@ "patternProperties": { "^(?:(?:[A-Za-z]{2,3}(?:-[A-Za-z]{3}){0,3}?)|(?:[A-Za-z]{4,8}))(?:-[A-Za-z]{4})?(?:-[A-Za-z]{2}|[0-9]{3})?(?:-(?:[A-Za-z0-9]{5,8}|[0-9][A-Za-z0-9]{3}))*(?:-[A-WY-Za-wy-z0-9](?:-[A-Za-z0-9]{2,8})+)*$": { "description": "String with no leading/trailing whitespace", - "pattern": "^(\\S.*)?\\S$", + "pattern": "^(\\S(.*\\S)?)?$", "type": "string" } }, @@ -214,7 +214,7 @@ "primary": { "description": "The most commonly used name.", "minLength": 1, - "pattern": "^(\\S.*)?\\S$", + "pattern": "^(\\S(.*\\S)?)?$", "title": "Primary", "type": "string" }, @@ -362,7 +362,7 @@ }, "license": { "description": "Source data license name.\n\nThis should be a valid SPDX license identifier when available.\n\nIf omitted, contact the data provider for more license information.", - "pattern": "^(\\S.*)?\\S$", + "pattern": "^(\\S(.*\\S)?)?$", "title": "License", "type": "string" }, diff --git a/packages/overture-schema-buildings-theme/tests/building_part_baseline_schema.json b/packages/overture-schema-buildings-theme/tests/building_part_baseline_schema.json index cdb525c5d..0d22d8df2 100644 --- a/packages/overture-schema-buildings-theme/tests/building_part_baseline_schema.json +++ b/packages/overture-schema-buildings-theme/tests/building_part_baseline_schema.json @@ -51,7 +51,7 @@ "value": { "description": "The actual name value.", "minLength": 1, - "pattern": "^(\\S.*)?\\S$", + "pattern": "^(\\S(.*\\S)?)?$", "title": "Value", "type": "string" }, @@ -87,7 +87,7 @@ "patternProperties": { "^(?:(?:[A-Za-z]{2,3}(?:-[A-Za-z]{3}){0,3}?)|(?:[A-Za-z]{4,8}))(?:-[A-Za-z]{4})?(?:-[A-Za-z]{2}|[0-9]{3})?(?:-(?:[A-Za-z0-9]{5,8}|[0-9][A-Za-z0-9]{3}))*(?:-[A-WY-Za-wy-z0-9](?:-[A-Za-z0-9]{2,8})+)*$": { "description": "String with no leading/trailing whitespace", - "pattern": "^(\\S.*)?\\S$", + "pattern": "^(\\S(.*\\S)?)?$", "type": "string" } }, @@ -100,7 +100,7 @@ "primary": { "description": "The most commonly used name.", "minLength": 1, - "pattern": "^(\\S.*)?\\S$", + "pattern": "^(\\S(.*\\S)?)?$", "title": "Primary", "type": "string" }, @@ -248,7 +248,7 @@ }, "license": { "description": "Source data license name.\n\nThis should be a valid SPDX license identifier when available.\n\nIf omitted, contact the data provider for more license information.", - "pattern": "^(\\S.*)?\\S$", + "pattern": "^(\\S(.*\\S)?)?$", "title": "License", "type": "string" }, diff --git a/packages/overture-schema-core/tests/test_models.py b/packages/overture-schema-core/tests/test_models.py index c674ab5e1..c36510bea 100644 --- a/packages/overture-schema-core/tests/test_models.py +++ b/packages/overture-schema-core/tests/test_models.py @@ -58,7 +58,7 @@ def test_feature_json_schema() -> None: "property": {"type": "string"}, "dataset": {"type": "string"}, "license": { - "pattern": "^(\\S.*)?\\S$", + "pattern": "^(\\S(.*\\S)?)?$", "type": "string", }, "record_id": {"type": "string"}, diff --git a/packages/overture-schema-divisions-theme/tests/division_area_baseline_schema.json b/packages/overture-schema-divisions-theme/tests/division_area_baseline_schema.json index f3e8c2abd..b6eec6130 100644 --- a/packages/overture-schema-divisions-theme/tests/division_area_baseline_schema.json +++ b/packages/overture-schema-divisions-theme/tests/division_area_baseline_schema.json @@ -42,7 +42,7 @@ "value": { "description": "The actual name value.", "minLength": 1, - "pattern": "^(\\S.*)?\\S$", + "pattern": "^(\\S(.*\\S)?)?$", "title": "Value", "type": "string" }, @@ -78,7 +78,7 @@ "patternProperties": { "^(?:(?:[A-Za-z]{2,3}(?:-[A-Za-z]{3}){0,3}?)|(?:[A-Za-z]{4,8}))(?:-[A-Za-z]{4})?(?:-[A-Za-z]{2}|[0-9]{3})?(?:-(?:[A-Za-z0-9]{5,8}|[0-9][A-Za-z0-9]{3}))*(?:-[A-WY-Za-wy-z0-9](?:-[A-Za-z0-9]{2,8})+)*$": { "description": "String with no leading/trailing whitespace", - "pattern": "^(\\S.*)?\\S$", + "pattern": "^(\\S(.*\\S)?)?$", "type": "string" } }, @@ -91,7 +91,7 @@ "primary": { "description": "The most commonly used name.", "minLength": 1, - "pattern": "^(\\S.*)?\\S$", + "pattern": "^(\\S(.*\\S)?)?$", "title": "Primary", "type": "string" }, @@ -207,7 +207,7 @@ }, "license": { "description": "Source data license name.\n\nThis should be a valid SPDX license identifier when available.\n\nIf omitted, contact the data provider for more license information.", - "pattern": "^(\\S.*)?\\S$", + "pattern": "^(\\S(.*\\S)?)?$", "title": "License", "type": "string" }, diff --git a/packages/overture-schema-divisions-theme/tests/division_baseline_schema.json b/packages/overture-schema-divisions-theme/tests/division_baseline_schema.json index a2ee027b5..a34c36294 100644 --- a/packages/overture-schema-divisions-theme/tests/division_baseline_schema.json +++ b/packages/overture-schema-divisions-theme/tests/division_baseline_schema.json @@ -84,7 +84,7 @@ "name": { "description": "Primary name of the division", "minLength": 1, - "pattern": "^(\\S.*)?\\S$", + "pattern": "^(\\S(.*\\S)?)?$", "title": "Name", "type": "string" }, @@ -133,7 +133,7 @@ "value": { "description": "The actual name value.", "minLength": 1, - "pattern": "^(\\S.*)?\\S$", + "pattern": "^(\\S(.*\\S)?)?$", "title": "Value", "type": "string" }, @@ -169,7 +169,7 @@ "patternProperties": { "^(?:(?:[A-Za-z]{2,3}(?:-[A-Za-z]{3}){0,3}?)|(?:[A-Za-z]{4,8}))(?:-[A-Za-z]{4})?(?:-[A-Za-z]{2}|[0-9]{3})?(?:-(?:[A-Za-z0-9]{5,8}|[0-9][A-Za-z0-9]{3}))*(?:-[A-WY-Za-wy-z0-9](?:-[A-Za-z0-9]{2,8})+)*$": { "description": "String with no leading/trailing whitespace", - "pattern": "^(\\S.*)?\\S$", + "pattern": "^(\\S(.*\\S)?)?$", "type": "string" } }, @@ -182,7 +182,7 @@ "primary": { "description": "The most commonly used name.", "minLength": 1, - "pattern": "^(\\S.*)?\\S$", + "pattern": "^(\\S(.*\\S)?)?$", "title": "Primary", "type": "string" }, @@ -310,7 +310,7 @@ }, "license": { "description": "Source data license name.\n\nThis should be a valid SPDX license identifier when available.\n\nIf omitted, contact the data provider for more license information.", - "pattern": "^(\\S.*)?\\S$", + "pattern": "^(\\S(.*\\S)?)?$", "title": "License", "type": "string" }, @@ -589,7 +589,7 @@ "patternProperties": { "^(?:(?:[A-Za-z]{2,3}(?:-[A-Za-z]{3}){0,3}?)|(?:[A-Za-z]{4,8}))(?:-[A-Za-z]{4})?(?:-[A-Za-z]{2}|[0-9]{3})?(?:-(?:[A-Za-z0-9]{5,8}|[0-9][A-Za-z0-9]{3}))*(?:-[A-WY-Za-wy-z0-9](?:-[A-Za-z0-9]{2,8})+)*$": { "description": "String with no leading/trailing whitespace", - "pattern": "^(\\S.*)?\\S$", + "pattern": "^(\\S(.*\\S)?)?$", "type": "string" } }, diff --git a/packages/overture-schema-divisions-theme/tests/division_boundary_baseline_schema.json b/packages/overture-schema-divisions-theme/tests/division_boundary_baseline_schema.json index 336d2d484..6df53596a 100644 --- a/packages/overture-schema-divisions-theme/tests/division_boundary_baseline_schema.json +++ b/packages/overture-schema-divisions-theme/tests/division_boundary_baseline_schema.json @@ -96,7 +96,7 @@ }, "license": { "description": "Source data license name.\n\nThis should be a valid SPDX license identifier when available.\n\nIf omitted, contact the data provider for more license information.", - "pattern": "^(\\S.*)?\\S$", + "pattern": "^(\\S(.*\\S)?)?$", "title": "License", "type": "string" }, diff --git a/packages/overture-schema-places-theme/tests/place_baseline_schema.json b/packages/overture-schema-places-theme/tests/place_baseline_schema.json index b4d37759b..6f862f4ed 100644 --- a/packages/overture-schema-places-theme/tests/place_baseline_schema.json +++ b/packages/overture-schema-places-theme/tests/place_baseline_schema.json @@ -117,7 +117,7 @@ "value": { "description": "The actual name value.", "minLength": 1, - "pattern": "^(\\S.*)?\\S$", + "pattern": "^(\\S(.*\\S)?)?$", "title": "Value", "type": "string" }, @@ -153,7 +153,7 @@ "patternProperties": { "^(?:(?:[A-Za-z]{2,3}(?:-[A-Za-z]{3}){0,3}?)|(?:[A-Za-z]{4,8}))(?:-[A-Za-z]{4})?(?:-[A-Za-z]{2}|[0-9]{3})?(?:-(?:[A-Za-z0-9]{5,8}|[0-9][A-Za-z0-9]{3}))*(?:-[A-WY-Za-wy-z0-9](?:-[A-Za-z0-9]{2,8})+)*$": { "description": "String with no leading/trailing whitespace", - "pattern": "^(\\S.*)?\\S$", + "pattern": "^(\\S(.*\\S)?)?$", "type": "string" } }, @@ -166,7 +166,7 @@ "primary": { "description": "The most commonly used name.", "minLength": 1, - "pattern": "^(\\S.*)?\\S$", + "pattern": "^(\\S(.*\\S)?)?$", "title": "Primary", "type": "string" }, @@ -273,7 +273,7 @@ }, "license": { "description": "Source data license name.\n\nThis should be a valid SPDX license identifier when available.\n\nIf omitted, contact the data provider for more license information.", - "pattern": "^(\\S.*)?\\S$", + "pattern": "^(\\S(.*\\S)?)?$", "title": "License", "type": "string" }, diff --git a/packages/overture-schema-system/src/overture/schema/system/field_constraint/string.py b/packages/overture-schema-system/src/overture/schema/system/field_constraint/string.py index 8c2d90415..559095a95 100644 --- a/packages/overture-schema-system/src/overture/schema/system/field_constraint/string.py +++ b/packages/overture-schema-system/src/overture/schema/system/field_constraint/string.py @@ -3,7 +3,7 @@ """ import re -from typing import Any +from typing import Any, NoReturn from pydantic import ( GetCoreSchemaHandler, @@ -23,6 +23,23 @@ class StringConstraint(FieldConstraint): """Base class for string-based constraints.""" + def _raise_validation_error( + self, value: str, info: ValidationInfo, message: str + ) -> NoReturn: + context = info.context or {} + loc = context.get("loc_prefix", ()) + ("value",) + raise ValidationError.from_exception_data( + title=self.__class__.__name__, + line_errors=[ + InitErrorDetails( + type="value_error", + loc=loc, + input=value, + ctx={"error": message}, + ) + ], + ) + def __get_pydantic_core_schema__( self, source: type[Any], handler: GetCoreSchemaHandler ) -> core_schema.CoreSchema: @@ -38,26 +55,45 @@ def validate_string(value: str, info: ValidationInfo) -> str: class PatternConstraint(StringConstraint): - """Generic pattern-based string constraint.""" + """Generic pattern-based string constraint. + + Parameters + ---------- + pattern : str + Regular expression to match against. + error_message : str + Error message template. Use ``{value}`` to interpolate the failing + value (the only available placeholder). + flags : int + Regex flags passed to ``re.compile``. + description : str or None + JSON Schema ``description`` annotation. + min_length : int or None + JSON Schema ``minLength`` annotation. + max_length : int or None + JSON Schema ``maxLength`` annotation. + """ - def __init__(self, pattern: str, error_message: str, flags: int = 0): + def __init__( + self, + pattern: str, + error_message: str, + flags: int = 0, + *, + description: str | None = None, + min_length: int | None = None, + max_length: int | None = None, + ): self.pattern = re.compile(pattern, flags) self.error_message = error_message + self.description = description + self.min_length = min_length + self.max_length = max_length def validate(self, value: str, info: ValidationInfo) -> None: if not self.pattern.match(value): - context = info.context or {} - loc = context.get("loc_prefix", ()) + ("value",) - raise ValidationError.from_exception_data( - title=self.__class__.__name__, - line_errors=[ - InitErrorDetails( - type="value_error", - loc=loc, - input=value, - ctx={"error": self.error_message.format(value=value)}, - ) - ], + self._raise_validation_error( + value, info, self.error_message.format(value=value) ) def __get_pydantic_json_schema__( @@ -65,6 +101,12 @@ def __get_pydantic_json_schema__( ) -> dict[str, Any]: json_schema = handler(core_schema) json_schema["pattern"] = self.pattern.pattern + if self.description is not None: + json_schema["description"] = self.description + if self.min_length is not None: + json_schema["minLength"] = self.min_length + if self.max_length is not None: + json_schema["maxLength"] = self.max_length return json_schema @@ -73,72 +115,28 @@ def __get_pydantic_json_schema__( ######################################################################## -class CountryCodeAlpha2Constraint(StringConstraint): +class CountryCodeAlpha2Constraint(PatternConstraint): """Allows only ISO 3166-1 alpha-2 country codes.""" def __init__(self) -> None: - self.pattern = re.compile(r"^[A-Z]{2}$") - - def validate(self, value: str, info: ValidationInfo) -> None: - if not self.pattern.match(value): - context = info.context or {} - loc = context.get("loc_prefix", ()) + ("value",) - raise ValidationError.from_exception_data( - title=self.__class__.__name__, - line_errors=[ - InitErrorDetails( - type="value_error", - loc=loc, - input=value, - ctx={ - "error": f"Invalid ISO 3166-1 alpha-2 country code: {value}" - }, - ) - ], - ) - - def __get_pydantic_json_schema__( - self, core_schema: core_schema.CoreSchema, handler: GetJsonSchemaHandler - ) -> dict[str, Any]: - json_schema = handler(core_schema) - json_schema["pattern"] = self.pattern.pattern - json_schema["minLength"] = 2 - json_schema["maxLength"] = 2 - json_schema["description"] = "ISO 3166-1 alpha-2 country code" - return json_schema + super().__init__( + pattern=r"^[A-Z]{2}$", + error_message="Invalid ISO 3166-1 alpha-2 country code: {value}", + description="ISO 3166-1 alpha-2 country code", + min_length=2, + max_length=2, + ) -class HexColorConstraint(StringConstraint): +class HexColorConstraint(PatternConstraint): """Allows only hexadecimal color codes (e.g., #FF0000 or #FFF).""" def __init__(self) -> None: - self.pattern = re.compile(r"^#[0-9A-Fa-f]{3}([0-9A-Fa-f]{3})?$") - - def validate(self, value: str, info: ValidationInfo) -> None: - if not self.pattern.match(value): - context = info.context or {} - loc = context.get("loc_prefix", ()) + ("value",) - raise ValidationError.from_exception_data( - title=self.__class__.__name__, - line_errors=[ - InitErrorDetails( - type="value_error", - loc=loc, - input=value, - ctx={ - "error": f"Invalid hexadecimal color format: {value}. Must be in format #RGB or #RRGGBB (e.g., #FFF or #FF0000)" - }, - ) - ], - ) - - def __get_pydantic_json_schema__( - self, core_schema: core_schema.CoreSchema, handler: GetJsonSchemaHandler - ) -> dict[str, Any]: - json_schema = handler(core_schema) - json_schema["pattern"] = self.pattern.pattern - json_schema["description"] = "Hexadecimal color code in format #RGB or #RRGGBB" - return json_schema + super().__init__( + pattern=r"^#[0-9A-Fa-f]{3}([0-9A-Fa-f]{3})?$", + error_message="Invalid hexadecimal color format: {value}. Must be in format #RGB or #RRGGBB (e.g., #FFF or #FF0000)", + description="Hexadecimal color code in format #RGB or #RRGGBB", + ) class JsonPointerConstraint(StringConstraint): @@ -150,20 +148,10 @@ def validate(self, value: str, info: ValidationInfo) -> None: return if not value.startswith("/"): - context = info.context or {} - loc = context.get("loc_prefix", ()) + ("value",) - raise ValidationError.from_exception_data( - title=self.__class__.__name__, - line_errors=[ - InitErrorDetails( - type="value_error", - loc=loc, - input=value, - ctx={ - "error": f"JSON Pointer must start with '/' or be empty string: {value}" - }, - ) - ], + self._raise_validation_error( + value, + info, + f"JSON Pointer must start with '/' or be empty string: {value}", ) def __get_pydantic_json_schema__( @@ -174,7 +162,7 @@ def __get_pydantic_json_schema__( return json_schema -class LanguageTagConstraint(StringConstraint): +class LanguageTagConstraint(PatternConstraint): """ Allows only `BCP-47`_ language tags. @@ -188,109 +176,36 @@ class LanguageTagConstraint(StringConstraint): """ def __init__(self) -> None: - # In understanding the regular expression, remark that '(:?' indicates a non-capturing - # group, and that all the top-level or non-nested groups represent top-level components of - # `langtag` referenced in the syntax section of https://www.rfc-editor.org/rfc/bcp/bcp47.txt. - # In particular, the top-level groups in left-to-right order represent: - # - # 1. language - # 2. ["-" script] - # 3. ["-" region] - # 4. *("-" variant) - # 5. *("-" extension) - self.pattern = re.compile( - r"^(?:(?:[A-Za-z]{2,3}(?:-[A-Za-z]{3}){0,3}?)|(?:[A-Za-z]{4,8}))(?:-[A-Za-z]{4})?(?:-[A-Za-z]{2}|[0-9]{3})?(?:-(?:[A-Za-z0-9]{5,8}|[0-9][A-Za-z0-9]{3}))*(?:-[A-WY-Za-wy-z0-9](?:-[A-Za-z0-9]{2,8})+)*$" + # Top-level groups in the pattern (left-to-right) correspond to BCP-47 langtag components: + # 1. language, 2. ["-" script], 3. ["-" region], 4. *("-" variant), 5. *("-" extension) + # See: https://www.rfc-editor.org/rfc/bcp/bcp47.txt + super().__init__( + pattern=r"^(?:(?:[A-Za-z]{2,3}(?:-[A-Za-z]{3}){0,3}?)|(?:[A-Za-z]{4,8}))(?:-[A-Za-z]{4})?(?:-[A-Za-z]{2}|[0-9]{3})?(?:-(?:[A-Za-z0-9]{5,8}|[0-9][A-Za-z0-9]{3}))*(?:-[A-WY-Za-wy-z0-9](?:-[A-Za-z0-9]{2,8})+)*$", + error_message="Invalid IETF BCP-47 language tag: {value}", + description="IETF BCP-47 language tag", ) - def validate(self, value: str, info: ValidationInfo) -> None: - if not self.pattern.match(value): - context = info.context or {} - loc = context.get("loc_prefix", ()) + ("value",) - raise ValidationError.from_exception_data( - title=self.__class__.__name__, - line_errors=[ - InitErrorDetails( - type="value_error", - loc=loc, - input=value, - ctx={"error": f"Invalid IETF BCP-47 language tag: {value}"}, - ) - ], - ) - - def __get_pydantic_json_schema__( - self, core_schema: core_schema.CoreSchema, handler: GetJsonSchemaHandler - ) -> dict[str, Any]: - json_schema = handler(core_schema) - json_schema["pattern"] = self.pattern.pattern - json_schema["description"] = "IETF BCP-47 language tag" - return json_schema - -class NoWhitespaceConstraint(StringConstraint): +class NoWhitespaceConstraint(PatternConstraint): """Allows only strings that contain no whitespace characters.""" def __init__(self) -> None: - self.pattern = re.compile(r"^\S+$") - - def validate(self, value: str, info: ValidationInfo) -> None: - if not self.pattern.match(value): - context = info.context or {} - loc = context.get("loc_prefix", ()) + ("value",) - raise ValidationError.from_exception_data( - title=self.__class__.__name__, - line_errors=[ - InitErrorDetails( - type="value_error", - loc=loc, - input=value, - ctx={ - "error": f"String cannot contain whitespace characters: '{value}'" - }, - ) - ], - ) - - def __get_pydantic_json_schema__( - self, core_schema: core_schema.CoreSchema, handler: GetJsonSchemaHandler - ) -> dict[str, Any]: - json_schema = handler(core_schema) - json_schema["pattern"] = self.pattern.pattern - json_schema["description"] = "String without whitespace characters" - return json_schema + super().__init__( + pattern=r"^\S+$", + error_message="String cannot contain whitespace characters: '{value}'", + description="String without whitespace characters", + ) -class SnakeCaseConstraint(StringConstraint): - """Allows only strings that look like snake case identifiers, *e.g.* `"foo_bar"`.""" +class SnakeCaseConstraint(PatternConstraint): + """Allows only strings that look like snake case identifiers, *e.g.* ``"foo_bar"``.""" def __init__(self) -> None: - self.pattern = re.compile(r"^[a-z0-9]+(_[a-z0-9]+)*$") - - def validate(self, value: str, info: ValidationInfo) -> None: - if not self.pattern.match(value): - context = info.context or {} - loc = context.get("loc_prefix", ()) + ("value",) - raise ValidationError.from_exception_data( - title=self.__class__.__name__, - line_errors=[ - InitErrorDetails( - type="value_error", - loc=loc, - input=value, - ctx={ - "error": f"Invalid category format: {value}. Must be snake_case (lowercase letters, numbers, underscores)" - }, - ) - ], - ) - - def __get_pydantic_json_schema__( - self, core_schema: core_schema.CoreSchema, handler: GetJsonSchemaHandler - ) -> dict[str, Any]: - json_schema = handler(core_schema) - json_schema["pattern"] = self.pattern.pattern - json_schema["description"] = "Category in snake_case format" - return json_schema + super().__init__( + pattern=r"^[a-z0-9]+(_[a-z0-9]+)*$", + error_message="Invalid category format: {value}. Must be snake_case (lowercase letters, numbers, underscores)", + description="Category in snake_case format", + ) class StrippedConstraint(StringConstraint): @@ -298,127 +213,51 @@ class StrippedConstraint(StringConstraint): def validate(self, value: str, info: ValidationInfo) -> None: if value != value.strip(): - context = info.context or {} - loc = context.get("loc_prefix", ()) + ("value",) - raise ValidationError.from_exception_data( - title=self.__class__.__name__, - line_errors=[ - InitErrorDetails( - type="value_error", - loc=loc, - input=value, - ctx={ - "error": f"String cannot have leading or trailing whitespace: {repr(value)}" - }, - ) - ], + self._raise_validation_error( + value, + info, + f"String cannot have leading or trailing whitespace: {repr(value)}", ) def __get_pydantic_json_schema__( self, core_schema: core_schema.CoreSchema, handler: GetJsonSchemaHandler ) -> dict[str, Any]: json_schema = handler(core_schema) - json_schema["pattern"] = r"^(\S.*)?\S$" + json_schema["pattern"] = r"^(\S(.*\S)?)?$" json_schema["description"] = "String with no leading/trailing whitespace" return json_schema -class PhoneNumberConstraint(StringConstraint): +class PhoneNumberConstraint(PatternConstraint): """Constraint for international phone numbers.""" def __init__(self) -> None: - self.pattern = re.compile(r"^\+\d{1,3}[\s\-\(\)0-9]+$") - - def validate(self, value: str, info: ValidationInfo) -> None: - if not self.pattern.match(value): - context = info.context or {} - loc = context.get("loc_prefix", ()) + ("value",) - raise ValidationError.from_exception_data( - title=self.__class__.__name__, - line_errors=[ - InitErrorDetails( - type="value_error", - loc=loc, - input=value, - ctx={ - "error": f"Invalid phone number format: {value}. Must start with + and country code" - }, - ) - ], - ) - - def __get_pydantic_json_schema__( - self, core_schema: core_schema.CoreSchema, handler: GetJsonSchemaHandler - ) -> dict[str, Any]: - json_schema = handler(core_schema) - json_schema["pattern"] = self.pattern.pattern - json_schema["description"] = ( - "International phone number (+ followed by country code and number)" + super().__init__( + pattern=r"^\+\d{1,3}[\s\-\(\)0-9]+$", + error_message="Invalid phone number format: {value}. Must start with + and country code", + description="International phone number (+ followed by country code and number)", ) - return json_schema -class RegionCodeConstraint(StringConstraint): +class RegionCodeConstraint(PatternConstraint): """ISO 3166-2 principal subdivision code constraint.""" def __init__(self) -> None: - self.pattern = re.compile(r"^[A-Z]{2}-[A-Z0-9]{1,3}$") - - def validate(self, value: str, info: ValidationInfo) -> None: - if not self.pattern.match(value): - context = info.context or {} - loc = context.get("loc_prefix", ()) + ("value",) - raise ValidationError.from_exception_data( - title=self.__class__.__name__, - line_errors=[ - InitErrorDetails( - type="value_error", - loc=loc, - input=value, - ctx={"error": f"Invalid ISO 3166-2 subdivision code: {value}"}, - ) - ], - ) - - def __get_pydantic_json_schema__( - self, core_schema: core_schema.CoreSchema, handler: GetJsonSchemaHandler - ) -> dict[str, Any]: - json_schema = handler(core_schema) - json_schema["pattern"] = self.pattern.pattern - json_schema["minLength"] = 4 - json_schema["maxLength"] = 6 - json_schema["description"] = "ISO 3166-2 subdivision code" - return json_schema + super().__init__( + pattern=r"^[A-Z]{2}-[A-Z0-9]{1,3}$", + error_message="Invalid ISO 3166-2 subdivision code: {value}", + description="ISO 3166-2 subdivision code", + min_length=4, + max_length=6, + ) -class WikidataIdConstraint(StringConstraint): +class WikidataIdConstraint(PatternConstraint): """Constraint for Wikidata identifiers (Q followed by digits).""" def __init__(self) -> None: - self.pattern = re.compile(r"^Q\d+$") - - def validate(self, value: str, info: ValidationInfo) -> None: - if not self.pattern.match(value): - context = info.context or {} - loc = context.get("loc_prefix", ()) + ("value",) - raise ValidationError.from_exception_data( - title=self.__class__.__name__, - line_errors=[ - InitErrorDetails( - type="value_error", - loc=loc, - input=value, - ctx={ - "error": f"Invalid Wikidata identifier: {value}. Must be Q followed by digits (e.g., Q123)" - }, - ) - ], - ) - - def __get_pydantic_json_schema__( - self, core_schema: core_schema.CoreSchema, handler: GetJsonSchemaHandler - ) -> dict[str, Any]: - json_schema = handler(core_schema) - json_schema["pattern"] = self.pattern.pattern - json_schema["description"] = "Wikidata identifier (Q followed by digits)" - return json_schema + super().__init__( + pattern=r"^Q\d+$", + error_message="Invalid Wikidata identifier: {value}. Must be Q followed by digits (e.g., Q123)", + description="Wikidata identifier (Q followed by digits)", + ) diff --git a/packages/overture-schema-system/tests/field_constraint/test_string_constraints.py b/packages/overture-schema-system/tests/field_constraint/test_string_constraints.py index 6047e896c..6a6a21c72 100644 --- a/packages/overture-schema-system/tests/field_constraint/test_string_constraints.py +++ b/packages/overture-schema-system/tests/field_constraint/test_string_constraints.py @@ -17,6 +17,66 @@ WikidataIdConstraint, ) +PATTERN_CONSTRAINT_CASES = [ + ( + LanguageTagConstraint, + ["en", "en-US", "en-GB", "zh-CN", "fr-CA", "es-MX"], + ["invalid-tag-format", "123", "en_US", "toolongcode"], + "Invalid IETF BCP-47 language tag", + ), + ( + CountryCodeAlpha2Constraint, + ["US", "GB", "CA", "FR", "DE", "JP", "CN", "BR"], + ["USA", "123", "invalid", "gb", "us"], + "Invalid ISO 3166-1 alpha-2 country code", + ), + ( + RegionCodeConstraint, + ["US-CA", "GB-ENG", "CA-ON", "FR-75", "DE-BY"], + ["US", "123-45", "invalid-region", "us-ca"], + "Invalid ISO 3166-2 subdivision code", + ), + ( + WikidataIdConstraint, + ["Q1", "Q123", "Q999999", "Q1234567890"], + ["q123", "P123", "Q", "123", "Q12abc"], + "Invalid Wikidata identifier", + ), + ( + PhoneNumberConstraint, + ["+1-555-123-4567", "+44-20-7946-0958", "+33-1-42-86-83-26", "+81-3-1234-5678"], + ["555-123-4567", "1-555-123-4567", "not-a-phone"], + "Invalid phone number format", + ), + ( + HexColorConstraint, + ["#FFFFFF", "#000000", "#FF0000", "#ffffff", "#FFF", "#fff", "#ABC", "#123"], + ["FFFFFF", "#FF", "#FFFFFFF", "#GGGGGG", "red", "#", "#FFFF"], + "Invalid hexadecimal color format", + ), + ( + NoWhitespaceConstraint, + ["hello", "identifier123", "snake_case_id", "kebab-case-id", "camelCaseId"], + [ + "hello world", + "id with spaces", + "tab\tcharacter", + "new\nline", + "carriage\rreturn", + ], + "cannot contain whitespace", + ), + ( + SnakeCaseConstraint, + ["restaurant", "gas_station", "shopping_mall", "coffee_shop", "bank_atm"], + ["Restaurant", "gas-station", "shopping mall", "category!"], + "Invalid category format", + ), +] + + +PATTERN_CONSTRAINT_IDS = [cls.__name__ for cls, *_ in PATTERN_CONSTRAINT_CASES] + class TestStringConstraints: """Test all string-based constraints.""" @@ -28,7 +88,6 @@ def test_pattern_constraint_valid(self) -> None: class TestModel(BaseModel): code: Annotated[str, constraint] - # Valid values model = TestModel(code="US") assert model.code == "US" @@ -42,7 +101,6 @@ def test_pattern_constraint_invalid(self) -> None: class TestModel(BaseModel): code: Annotated[str, constraint] - # Invalid values with pytest.raises(ValidationError) as exc_info: TestModel(code="usa") assert "Must be 2 uppercase letters" in str(exc_info.value) @@ -50,85 +108,46 @@ class TestModel(BaseModel): with pytest.raises(ValidationError): TestModel(code="123") - def test_language_tag_constraint_valid(self) -> None: - """Test LanguageTagConstraint with valid language tags.""" - - class TestModel(BaseModel): - language: Annotated[str, LanguageTagConstraint()] - - # Valid language tags - valid_tags = ["en", "en-US", "en-GB", "zh-CN", "fr-CA", "es-MX"] - - for tag in valid_tags: - model = TestModel(language=tag) - assert model.language == tag - - def test_language_tag_constraint_invalid(self) -> None: - """Test LanguageTagConstraint with invalid language tags.""" - - class TestModel(BaseModel): - language: Annotated[str, LanguageTagConstraint()] - - invalid_tags = ["invalid-tag-format", "123", "en_US", "toolongcode"] - - for tag in invalid_tags: - with pytest.raises(ValidationError) as exc_info: - TestModel(language=tag) - assert "Invalid IETF BCP-47 language tag" in str(exc_info.value) - - def test_country_code_constraint_valid(self) -> None: - """Test CountryCodeAlpha2Constraint with valid ISO 3166-1 alpha-2 codes.""" - - class TestModel(BaseModel): - country: Annotated[str, CountryCodeAlpha2Constraint()] - - valid_codes = ["US", "GB", "CA", "FR", "DE", "JP", "CN", "BR"] - - for code in valid_codes: - model = TestModel(country=code) - assert model.country == code - - def test_country_code_constraint_invalid(self) -> None: - """Test CountryCodeAlpha2Constraint with invalid country codes.""" - - class TestModel(BaseModel): - country: Annotated[str, CountryCodeAlpha2Constraint()] - - invalid_codes = ["USA", "123", "invalid", "gb", "us"] - - for code in invalid_codes: - with pytest.raises(ValidationError) as exc_info: - TestModel(country=code) - assert "Invalid ISO 3166-1 alpha-2 country code" in str(exc_info.value) - - def test_region_code_constraint_valid(self) -> None: - """Test RegionCodeConstraint with valid ISO 3166-2 codes.""" - + @pytest.mark.parametrize( + "constraint_cls,valid,invalid,error_substr", + PATTERN_CONSTRAINT_CASES, + ids=PATTERN_CONSTRAINT_IDS, + ) + def test_subclass_valid( + self, + constraint_cls: type, + valid: list[str], + invalid: list[str], + error_substr: str, + ) -> None: class TestModel(BaseModel): - region: Annotated[str, RegionCodeConstraint()] - - valid_codes = ["US-CA", "GB-ENG", "CA-ON", "FR-75", "DE-BY"] - - for code in valid_codes: - model = TestModel(region=code) - assert model.region == code - - def test_region_code_constraint_invalid(self) -> None: - """Test RegionCodeConstraint with invalid region codes.""" - + value: Annotated[str, constraint_cls()] + + for v in valid: + model = TestModel(value=v) + assert model.value == v + + @pytest.mark.parametrize( + "constraint_cls,valid,invalid,error_substr", + PATTERN_CONSTRAINT_CASES, + ids=PATTERN_CONSTRAINT_IDS, + ) + def test_subclass_invalid( + self, + constraint_cls: type, + valid: list[str], + invalid: list[str], + error_substr: str, + ) -> None: class TestModel(BaseModel): - region: Annotated[str, RegionCodeConstraint()] + value: Annotated[str, constraint_cls()] - invalid_codes = ["US", "123-45", "invalid-region", "us-ca"] - - for code in invalid_codes: + for v in invalid: with pytest.raises(ValidationError) as exc_info: - TestModel(region=code) - assert "Invalid ISO 3166-2 subdivision code" in str(exc_info.value) + TestModel(value=v) + assert error_substr in str(exc_info.value) def test_json_pointer_constraint_valid(self) -> None: - """Test JsonPointerConstraint with valid JSON pointers.""" - class TestModel(BaseModel): pointer: Annotated[str, JsonPointerConstraint()] @@ -138,8 +157,8 @@ class TestModel(BaseModel): "/foo/bar", "/0", "/foo/0/bar", - "/~0", # Represents ~ - "/~1", # Represents / + "/~0", + "/~1", ] for ptr in valid_pointers: @@ -147,211 +166,31 @@ class TestModel(BaseModel): assert model.pointer == ptr def test_json_pointer_constraint_invalid(self) -> None: - """Test JsonPointerConstraint with invalid JSON pointers.""" - class TestModel(BaseModel): pointer: Annotated[str, JsonPointerConstraint()] - invalid_pointers = [ - "foo", # Must start with / - "foo/bar", # Must start with / - ] - - for ptr in invalid_pointers: + for ptr in ["foo", "foo/bar"]: with pytest.raises(ValidationError) as exc_info: TestModel(pointer=ptr) assert "JSON Pointer must start" in str(exc_info.value) def test_whitespace_constraint_valid(self) -> None: - """Test WhitespaceConstraint with valid strings (no leading/trailing - whitespace).""" - class TestModel(BaseModel): text: Annotated[str, StrippedConstraint()] - valid_strings = [ - "hello", - "hello world", - "text with internal spaces", - "", # Empty string is valid - ] - - for text in valid_strings: + for text in ["hello", "hello world", "text with internal spaces", ""]: model = TestModel(text=text) assert model.text == text def test_whitespace_constraint_invalid(self) -> None: - """Test WhitespaceConstraint with invalid strings (leading/trailing - whitespace).""" - class TestModel(BaseModel): text: Annotated[str, StrippedConstraint()] - invalid_strings = [ - " hello", # Leading space - "hello ", # Trailing space - "\thello", # Leading tab - "hello\n", # Trailing newline - " hello world ", # Both leading and trailing - ] - - for text in invalid_strings: + for text in [" hello", "hello ", "\thello", "hello\n", " hello world "]: with pytest.raises(ValidationError) as exc_info: TestModel(text=text) assert "cannot have leading or trailing whitespace" in str(exc_info.value) - def test_wikidata_constraint_valid(self) -> None: - """Test WikidataConstraint with valid Wikidata identifiers.""" - - class TestModel(BaseModel): - wikidata_id: Annotated[str, WikidataIdConstraint()] - - valid_ids = ["Q1", "Q123", "Q999999", "Q1234567890"] - - for wid in valid_ids: - model = TestModel(wikidata_id=wid) - assert model.wikidata_id == wid - - def test_wikidata_constraint_invalid(self) -> None: - """Test WikidataConstraint with invalid Wikidata identifiers.""" - - class TestModel(BaseModel): - wikidata_id: Annotated[str, WikidataIdConstraint()] - - invalid_ids = [ - "q123", # Lowercase q - "P123", # Property instead of item - "Q", # Missing number - "123", # Missing Q prefix - "Q12abc", # Non-numeric suffix - ] - - for wid in invalid_ids: - with pytest.raises(ValidationError) as exc_info: - TestModel(wikidata_id=wid) - assert "Invalid Wikidata identifier" in str(exc_info.value) - - def test_phone_number_constraint_valid(self) -> None: - """Test PhoneNumberConstraint with valid international phone numbers.""" - - class TestModel(BaseModel): - phone: Annotated[str, PhoneNumberConstraint()] - - valid_phones = [ - "+1-555-123-4567", - "+44-20-7946-0958", - "+33-1-42-86-83-26", - "+81-3-1234-5678", - "+86-10-8888-8888", - ] - - for phone in valid_phones: - model = TestModel(phone=phone) - assert model.phone == phone - - def test_phone_number_constraint_invalid(self) -> None: - """Test PhoneNumberConstraint with invalid phone numbers.""" - - class TestModel(BaseModel): - phone: Annotated[str, PhoneNumberConstraint()] - - invalid_phones = [ - "555-123-4567", # Missing country code - "1-555-123-4567", # Missing + - "not-a-phone", # Not a phone number - ] - - for phone in invalid_phones: - with pytest.raises(ValidationError) as exc_info: - TestModel(phone=phone) - assert "Invalid phone number format" in str(exc_info.value) - - def test_hex_color_constraint_valid(self) -> None: - """Test HexColorConstraint with valid hex colors.""" - - class TestModel(BaseModel): - color: Annotated[str, HexColorConstraint()] - - valid_colors = [ - "#FFFFFF", - "#000000", - "#FF0000", - "#00FF00", - "#0000FF", - "#ABCDEF", - "#123456", - "#ffffff", # lowercase - "#abcdef", # lowercase - "#FFF", # 3-character uppercase - "#fff", # 3-character lowercase - "#ABC", # 3-character mixed case - "#123", # 3-character numbers - ] - - for color in valid_colors: - model = TestModel(color=color) - assert model.color == color - - def test_hex_color_constraint_invalid(self) -> None: - """Test HexColorConstraint with invalid hex colors.""" - - class TestModel(BaseModel): - color: Annotated[str, HexColorConstraint()] - - invalid_colors = [ - "FFFFFF", # Missing # - "#FF", # Too short (2 chars) - "#FFFFFFF", # Too long (7 chars) - "#GGGGGG", # Invalid hex characters - "red", # Not hex - "#", # Just hash - "#FFFF", # Invalid length (4 chars) - ] - - for color in invalid_colors: - with pytest.raises(ValidationError) as exc_info: - TestModel(color=color) - # Just check that validation fails - message may vary - assert len(exc_info.value.errors()) > 0 - - def test_no_whitespace_constraint_valid(self) -> None: - """Test NoWhitespaceConstraint with valid strings (no whitespace).""" - - class TestModel(BaseModel): - identifier: Annotated[str, NoWhitespaceConstraint()] - - valid_identifiers = [ - "hello", - "identifier123", - "snake_case_id", - "kebab-case-id", - "camelCaseId", - ] - - for ident in valid_identifiers: - model = TestModel(identifier=ident) - assert model.identifier == ident - - def test_no_whitespace_constraint_invalid(self) -> None: - """Test NoWhitespaceConstraint with invalid strings (containing whitespace).""" - - class TestModel(BaseModel): - identifier: Annotated[str, NoWhitespaceConstraint()] - - invalid_identifiers = [ - "hello world", - "id with spaces", - "tab\tcharacter", - "new\nline", - "carriage\rreturn", - ] - - for ident in invalid_identifiers: - with pytest.raises(ValidationError) as exc_info: - TestModel(identifier=ident) - # Just check that validation fails - message may vary - assert len(exc_info.value.errors()) > 0 - class TestJsonSchemaGeneration: """Test JSON schema generation for all constraints.""" @@ -375,6 +214,23 @@ class TestModel(BaseModel): # Check descriptions assert "IETF BCP-47 language tag" in props["language"].get("description", "") + def test_stripped_constraint_json_schema_pattern(self) -> None: + """StrippedConstraint's JSON schema pattern accepts empty string + and rejects leading/trailing whitespace.""" + import re + + class TestModel(BaseModel): + text: Annotated[str, StrippedConstraint()] + + schema = TestModel.model_json_schema() + pattern = re.compile(schema["properties"]["text"]["pattern"]) + + assert pattern.match("") is not None + assert pattern.match("a") is not None + assert pattern.match("a b c") is not None + assert pattern.match(" leading") is None + assert pattern.match("trailing ") is None + class TestErrorHandling: """Test error handling and validation context.""" @@ -424,38 +280,58 @@ class TestModel(BaseModel): error = exc_info.value assert error.error_count() >= 1 - def test_snake_case_constraint_valid(self) -> None: - """Test CategoryPatternConstraint with valid snake_case patterns.""" + +class TestPatternConstraintHierarchy: + """Test that pattern-based constraints extend PatternConstraint.""" + + @pytest.mark.parametrize( + "constraint_cls", + [ + CountryCodeAlpha2Constraint, + HexColorConstraint, + LanguageTagConstraint, + NoWhitespaceConstraint, + SnakeCaseConstraint, + PhoneNumberConstraint, + RegionCodeConstraint, + WikidataIdConstraint, + ], + ) + def test_pattern_constraints_are_pattern_constraint_instances( + self, constraint_cls: type + ) -> None: + assert isinstance(constraint_cls(), PatternConstraint) + + def test_pattern_constraint_with_description_kwargs(self) -> None: + """Bare PatternConstraint with description/length kwargs emits correct JSON schema.""" + constraint = PatternConstraint( + r"^[A-Z]{2}$", + "Must be 2 uppercase letters", + description="Two letter code", + min_length=2, + max_length=2, + ) class TestModel(BaseModel): - category: Annotated[str, SnakeCaseConstraint()] - - valid_categories = [ - "restaurant", - "gas_station", - "shopping_mall", - "coffee_shop", - "bank_atm", - ] + code: Annotated[str, constraint] - for cat in valid_categories: - model = TestModel(category=cat) - assert model.category == cat + schema = TestModel.model_json_schema() + props = schema["properties"]["code"] + assert props["pattern"] == "^[A-Z]{2}$" + assert props["description"] == "Two letter code" + assert props["minLength"] == 2 + assert props["maxLength"] == 2 - def test_snake_case_constraint_invalid(self) -> None: - """Test CategoryPatternConstraint with invalid category patterns.""" + def test_pattern_constraint_without_optional_kwargs(self) -> None: + """Bare PatternConstraint without optional kwargs omits them from JSON schema.""" + constraint = PatternConstraint(r"^[A-Z]+$", "Must be uppercase") class TestModel(BaseModel): - category: Annotated[str, SnakeCaseConstraint()] - - invalid_categories = [ - "Restaurant", # Capital letter - "gas-station", # Hyphen instead of underscore - "shopping mall", # Space instead of underscore - "category!", # Special character - ] + code: Annotated[str, constraint] - for cat in invalid_categories: - with pytest.raises(ValidationError) as exc_info: - TestModel(category=cat) - assert "Invalid category format" in str(exc_info.value) + schema = TestModel.model_json_schema() + props = schema["properties"]["code"] + assert props["pattern"] == "^[A-Z]+$" + assert "description" not in props + assert "minLength" not in props + assert "maxLength" not in props diff --git a/packages/overture-schema-transportation-theme/tests/connector_baseline_schema.json b/packages/overture-schema-transportation-theme/tests/connector_baseline_schema.json index b633d1219..a51faf3f0 100644 --- a/packages/overture-schema-transportation-theme/tests/connector_baseline_schema.json +++ b/packages/overture-schema-transportation-theme/tests/connector_baseline_schema.json @@ -30,7 +30,7 @@ }, "license": { "description": "Source data license name.\n\nThis should be a valid SPDX license identifier when available.\n\nIf omitted, contact the data provider for more license information.", - "pattern": "^(\\S.*)?\\S$", + "pattern": "^(\\S(.*\\S)?)?$", "title": "License", "type": "string" }, diff --git a/packages/overture-schema-transportation-theme/tests/segment_baseline_schema.json b/packages/overture-schema-transportation-theme/tests/segment_baseline_schema.json index b863d3b3a..cf016f2f9 100644 --- a/packages/overture-schema-transportation-theme/tests/segment_baseline_schema.json +++ b/packages/overture-schema-transportation-theme/tests/segment_baseline_schema.json @@ -84,7 +84,7 @@ "value": { "description": "Names the object that is reached", "minLength": 1, - "pattern": "^(\\S.*)?\\S$", + "pattern": "^(\\S(.*\\S)?)?$", "title": "Value", "type": "string" } @@ -281,7 +281,7 @@ "value": { "description": "The actual name value.", "minLength": 1, - "pattern": "^(\\S.*)?\\S$", + "pattern": "^(\\S(.*\\S)?)?$", "title": "Value", "type": "string" }, @@ -317,7 +317,7 @@ "patternProperties": { "^(?:(?:[A-Za-z]{2,3}(?:-[A-Za-z]{3}){0,3}?)|(?:[A-Za-z]{4,8}))(?:-[A-Za-z]{4})?(?:-[A-Za-z]{2}|[0-9]{3})?(?:-(?:[A-Za-z0-9]{5,8}|[0-9][A-Za-z0-9]{3}))*(?:-[A-WY-Za-wy-z0-9](?:-[A-Za-z0-9]{2,8})+)*$": { "description": "String with no leading/trailing whitespace", - "pattern": "^(\\S.*)?\\S$", + "pattern": "^(\\S(.*\\S)?)?$", "type": "string" } }, @@ -330,7 +330,7 @@ "primary": { "description": "The most commonly used name.", "minLength": 1, - "pattern": "^(\\S.*)?\\S$", + "pattern": "^(\\S(.*\\S)?)?$", "title": "Primary", "type": "string" }, @@ -1040,28 +1040,28 @@ "name": { "description": "Full name of the route", "minLength": 1, - "pattern": "^(\\S.*)?\\S$", + "pattern": "^(\\S(.*\\S)?)?$", "title": "Name", "type": "string" }, "network": { "description": "Name of the highway system this route belongs to", "minLength": 1, - "pattern": "^(\\S.*)?\\S$", + "pattern": "^(\\S(.*\\S)?)?$", "title": "Network", "type": "string" }, "ref": { "description": "Code or number used to reference the route", "minLength": 1, - "pattern": "^(\\S.*)?\\S$", + "pattern": "^(\\S(.*\\S)?)?$", "title": "Ref", "type": "string" }, "symbol": { "description": "URL or description of route signage", "minLength": 1, - "pattern": "^(\\S.*)?\\S$", + "pattern": "^(\\S(.*\\S)?)?$", "title": "Symbol", "type": "string" }, @@ -1140,7 +1140,7 @@ }, "license": { "description": "Source data license name.\n\nThis should be a valid SPDX license identifier when available.\n\nIf omitted, contact the data provider for more license information.", - "pattern": "^(\\S.*)?\\S$", + "pattern": "^(\\S(.*\\S)?)?$", "title": "License", "type": "string" },