From 6a13b1dde72733518e677c431d8161fb118e7926 Mon Sep 17 00:00:00 2001 From: Seth Fitzsimmons Date: Fri, 6 Mar 2026 16:57:26 -0800 Subject: [PATCH] feat(wassirman): add validation IR generation target New `--format wassirman` option for `overture-codegen generate` that emits YAML validation IR from Pydantic schema models. The pipeline walks expanded FeatureSpec trees and emits one rule per field constraint: not_null, numeric bounds (gte/lte/between), length, enum/literal membership, geometry type, pattern, and uniqueness. Model-level constraints (require_any_of, radio_group, require_if, forbid_if) produce multi-column or conditional rules. list_columns tracks array nesting for element-level checks. Parent optionality propagates as `when: not_null` guards. Structural fields (theme, type, bbox, ext_*) are skipped. With --output-dir, writes one YAML file per feature type. Without it, emits a single envelope to stdout. Golden snapshot tests cover all 16 discovered feature types, verified against the reference validator output. --- .../src/overture/schema/codegen/cli.py | 25 +- .../codegen/extraction/type_registry.py | 22 + .../schema/codegen/wassirman/__init__.py | 0 .../overture/schema/codegen/wassirman/ir.py | 91 ++ .../schema/codegen/wassirman/pipeline.py | 66 + .../schema/codegen/wassirman/walker.py | 526 ++++++++ .../tests/golden/wassirman/address.yaml | 163 +++ .../tests/golden/wassirman/bathymetry.yaml | 112 ++ .../tests/golden/wassirman/building.yaml | 412 ++++++ .../tests/golden/wassirman/buildingpart.yaml | 316 +++++ .../tests/golden/wassirman/connector.yaml | 81 ++ .../tests/golden/wassirman/division.yaml | 577 ++++++++ .../tests/golden/wassirman/divisionarea.yaml | 323 +++++ .../golden/wassirman/divisionboundary.yaml | 268 ++++ .../golden/wassirman/infrastructure.yaml | 442 ++++++ .../tests/golden/wassirman/land.yaml | 307 +++++ .../tests/golden/wassirman/landcover.yaml | 122 ++ .../tests/golden/wassirman/landuse.yaml | 393 ++++++ .../tests/golden/wassirman/place.yaml | 484 +++++++ .../tests/golden/wassirman/segment.yaml | 1198 +++++++++++++++++ .../tests/golden/wassirman/sources.yaml | 103 ++ .../tests/golden/wassirman/water.yaml | 265 ++++ .../tests/test_golden_wassirman.py | 71 + .../tests/test_type_registry.py | 21 + .../tests/test_wassirman_ir.py | 94 ++ .../tests/test_wassirman_pipeline.py | 38 + .../tests/test_wassirman_walker.py | 146 ++ 27 files changed, 6664 insertions(+), 2 deletions(-) create mode 100644 packages/overture-schema-codegen/src/overture/schema/codegen/wassirman/__init__.py create mode 100644 packages/overture-schema-codegen/src/overture/schema/codegen/wassirman/ir.py create mode 100644 packages/overture-schema-codegen/src/overture/schema/codegen/wassirman/pipeline.py create mode 100644 packages/overture-schema-codegen/src/overture/schema/codegen/wassirman/walker.py create mode 100644 packages/overture-schema-codegen/tests/golden/wassirman/address.yaml create mode 100644 packages/overture-schema-codegen/tests/golden/wassirman/bathymetry.yaml create mode 100644 packages/overture-schema-codegen/tests/golden/wassirman/building.yaml create mode 100644 packages/overture-schema-codegen/tests/golden/wassirman/buildingpart.yaml create mode 100644 packages/overture-schema-codegen/tests/golden/wassirman/connector.yaml create mode 100644 packages/overture-schema-codegen/tests/golden/wassirman/division.yaml create mode 100644 packages/overture-schema-codegen/tests/golden/wassirman/divisionarea.yaml create mode 100644 packages/overture-schema-codegen/tests/golden/wassirman/divisionboundary.yaml create mode 100644 packages/overture-schema-codegen/tests/golden/wassirman/infrastructure.yaml create mode 100644 packages/overture-schema-codegen/tests/golden/wassirman/land.yaml create mode 100644 packages/overture-schema-codegen/tests/golden/wassirman/landcover.yaml create mode 100644 packages/overture-schema-codegen/tests/golden/wassirman/landuse.yaml create mode 100644 packages/overture-schema-codegen/tests/golden/wassirman/place.yaml create mode 100644 packages/overture-schema-codegen/tests/golden/wassirman/segment.yaml create mode 100644 packages/overture-schema-codegen/tests/golden/wassirman/sources.yaml create mode 100644 packages/overture-schema-codegen/tests/golden/wassirman/water.yaml create mode 100644 packages/overture-schema-codegen/tests/test_golden_wassirman.py create mode 100644 packages/overture-schema-codegen/tests/test_wassirman_ir.py create mode 100644 packages/overture-schema-codegen/tests/test_wassirman_pipeline.py create mode 100644 packages/overture-schema-codegen/tests/test_wassirman_walker.py diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/cli.py b/packages/overture-schema-codegen/src/overture/schema/codegen/cli.py index 0a24c7348..cb1d21b6f 100644 --- a/packages/overture-schema-codegen/src/overture/schema/codegen/cli.py +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/cli.py @@ -22,12 +22,14 @@ entry_point_module, ) from .markdown.pipeline import generate_markdown_pages +from .wassirman.ir import ValidationIR +from .wassirman.pipeline import generate_validation_ir log = logging.getLogger(__name__) __all__ = ["cli"] -_OUTPUT_FORMATS = ("markdown",) +_OUTPUT_FORMATS = ("markdown", "wassirman") _FEATURE_FRONTMATTER = "---\nsidebar_position: 1\n---\n\n" @@ -120,7 +122,26 @@ def generate( ) ) - _generate_markdown(feature_specs, schema_root, output_dir) + if output_format == "markdown": + _generate_markdown(feature_specs, schema_root, output_dir) + elif output_format == "wassirman": + _generate_wassirman(feature_specs, output_dir) + + +def _generate_wassirman( + feature_specs: list[FeatureSpec], + output_dir: Path | None, +) -> None: + """Generate validation IR as YAML.""" + ir = generate_validation_ir(feature_specs) + if output_dir: + for dataset in ir.datasets: + file_path = output_dir / f"{dataset.name}.yaml" + file_path.parent.mkdir(parents=True, exist_ok=True) + single_ir = ValidationIR(datasets=[dataset]) + file_path.write_text(single_ir.to_yaml()) + else: + click.echo(ir.to_yaml()) def _generate_markdown( diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/type_registry.py b/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/type_registry.py index 505657866..fe5e9b950 100644 --- a/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/type_registry.py +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/type_registry.py @@ -9,6 +9,7 @@ "PRIMITIVE_TYPES", "get_type_mapping", "is_semantic_newtype", + "is_storage_primitive_source", "resolve_type_name", ] @@ -82,6 +83,27 @@ def get_type_mapping(type_name: str) -> TypeMapping | None: return PRIMITIVE_TYPES.get(type_name) +def is_storage_primitive_source(source_name: str | None) -> bool: + """Whether a ConstraintSource name refers to a registered storage primitive. + + Used by validation renderers to filter out storage-level constraints + (e.g., int32 range) in favor of domain-level constraints. + + Parameters + ---------- + source_name + The NewType or primitive name to check, or None. + + Returns + ------- + bool + True if source_name is a key in PRIMITIVE_TYPES. + """ + if source_name is None: + return False + return source_name in PRIMITIVE_TYPES + + def resolve_type_name(type_info: TypeInfo, target: str) -> str: """Resolve a TypeInfo to the base type string for a given target. diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/wassirman/__init__.py b/packages/overture-schema-codegen/src/overture/schema/codegen/wassirman/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/wassirman/ir.py b/packages/overture-schema-codegen/src/overture/schema/codegen/wassirman/ir.py new file mode 100644 index 000000000..8ddd49d8d --- /dev/null +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/wassirman/ir.py @@ -0,0 +1,91 @@ +"""Validation IR data types for YAML serialization.""" + +from __future__ import annotations + +from dataclasses import dataclass + +import yaml + +__all__ = ["ConditionIR", "DatasetIR", "RuleIR", "ValidationIR"] + + +@dataclass(frozen=True, slots=True) +class ConditionIR: + """Guard predicate for conditional rules.""" + + column: str + check: str + value: object | None = None + + def to_dict(self) -> dict[str, object]: + """Serialize to dict, omitting None fields.""" + d: dict[str, object] = {"column": self.column, "check": self.check} + if self.value is not None: + d["value"] = self.value + return d + + +@dataclass(frozen=True, slots=True) +class RuleIR: + """Single validation rule.""" + + name: str + check: str + severity: str + column: str | None = None + columns: list[str] | None = None + value: object | None = None + list_columns: list[str] | None = None + when: ConditionIR | None = None + + def to_dict(self) -> dict[str, object]: + """Serialize to dict, omitting None fields.""" + d: dict[str, object] = {"name": self.name} + if self.column is not None: + d["column"] = self.column + if self.columns is not None: + d["columns"] = self.columns + d["check"] = self.check + if self.value is not None: + d["value"] = self.value + if self.list_columns is not None: + d["list_columns"] = self.list_columns + if self.when is not None: + d["when"] = self.when.to_dict() + d["severity"] = self.severity + return d + + +@dataclass(frozen=True, slots=True) +class DatasetIR: + """Validation rules for one feature type.""" + + name: str + source_model: str + id_column: str + rules: list[RuleIR] + + def to_dict(self) -> dict[str, object]: + """Serialize to dict.""" + return { + "name": self.name, + "source_model": self.source_model, + "id_column": self.id_column, + "rules": [r.to_dict() for r in self.rules], + } + + +@dataclass(frozen=True, slots=True) +class ValidationIR: + """Full validation IR envelope.""" + + datasets: list[DatasetIR] + version: str = "1" + + def to_yaml(self) -> str: + """Serialize to YAML string.""" + data = { + "version": self.version, + "datasets": [ds.to_dict() for ds in self.datasets], + } + return yaml.dump(data, default_flow_style=False, sort_keys=False) diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/wassirman/pipeline.py b/packages/overture-schema-codegen/src/overture/schema/codegen/wassirman/pipeline.py new file mode 100644 index 000000000..8faa3b044 --- /dev/null +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/wassirman/pipeline.py @@ -0,0 +1,66 @@ +"""Validation IR generation pipeline.""" + +from __future__ import annotations + +from collections.abc import Sequence + +from ..extraction.model_extraction import expand_model_tree +from ..extraction.specs import FeatureSpec, ModelSpec +from ..extraction.type_analyzer import TypeKind +from .ir import DatasetIR, ValidationIR +from .walker import walk_feature + +__all__ = ["generate_validation_ir"] + + +def _dataset_name(spec: FeatureSpec) -> str: + """Derive dataset name from the model's type Literal field.""" + for field_spec in spec.fields: + if field_spec.name == "type" and field_spec.type_info.kind == TypeKind.LITERAL: + vals = field_spec.type_info.literal_values + if vals and len(vals) == 1: + return str(vals[0]) + return spec.name.lower() + + +def _source_model_fqn(spec: FeatureSpec) -> str: + """Fully qualified name of the source model.""" + src = spec.source_type + if src is None: + return spec.name + return f"{src.__module__}.{src.__qualname__}" + + +def generate_validation_ir( + feature_specs: Sequence[FeatureSpec], +) -> ValidationIR: + """Generate validation IR from feature specs. + + Parameters + ---------- + feature_specs + Extracted feature specs to convert to validation IR. + + Returns + ------- + ValidationIR + Full validation IR with one dataset per feature spec. + """ + cache: dict[type, ModelSpec] = {} + for spec in feature_specs: + expand_model_tree(spec, cache) + + datasets: list[DatasetIR] = [] + for spec in feature_specs: + name = _dataset_name(spec) + rules = walk_feature(spec, name) + datasets.append( + DatasetIR( + name=name, + source_model=_source_model_fqn(spec), + id_column="id", + rules=rules, + ) + ) + + return ValidationIR(datasets=datasets) diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/wassirman/walker.py b/packages/overture-schema-codegen/src/overture/schema/codegen/wassirman/walker.py new file mode 100644 index 000000000..121bc9908 --- /dev/null +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/wassirman/walker.py @@ -0,0 +1,526 @@ +"""Recursive tree walker that emits validation rules from FeatureSpec trees.""" + +from __future__ import annotations + +from enum import Enum +from typing import cast + +from annotated_types import Ge, Gt, Le, Lt, MaxLen, MinLen + +from overture.schema.core.scoping.lr import LinearReferenceRangeConstraint + +from ..extraction.field_constraints import constraint_pattern +from ..extraction.specs import FeatureSpec, FieldSpec +from ..extraction.type_analyzer import ConstraintSource, TypeInfo, TypeKind +from ..extraction.type_registry import is_storage_primitive_source +from .ir import ConditionIR, RuleIR + +__all__ = ["walk_feature"] + +# Fields that carry no domain semantics and produce no validation rules. +_STRUCTURAL_FIELDS = frozenset({"theme", "type", "bbox"}) +_STRUCTURAL_PREFIX = "ext_" + + +def walk_feature(spec: FeatureSpec, dataset_name: str) -> list[RuleIR]: + """Walk an expanded FeatureSpec and emit validation rules. + + Parameters + ---------- + spec + An expanded FeatureSpec (fields populated via expand_model_tree). + dataset_name + The dataset prefix used in rule names (e.g. ``"place"``). + + Returns + ------- + list[RuleIR] + Flat list of validation rules in field-declaration order. + """ + rules: list[RuleIR] = [] + _walk_fields( + spec.fields, + dataset_name, + prefix="", + list_columns=[], + parent_guard=None, + rules=rules, + ) + _emit_model_constraints(spec, dataset_name, rules) + return rules + + +def _is_structural(field_name: str) -> bool: + return field_name in _STRUCTURAL_FIELDS or field_name.startswith(_STRUCTURAL_PREFIX) + + +def _walk_fields( + fields: list[FieldSpec], + dataset_name: str, + *, + prefix: str, + list_columns: list[str], + parent_guard: str | None, + rules: list[RuleIR], +) -> None: + for field_spec in fields: + if _is_structural(field_spec.name): + continue + column = f"{prefix}{field_spec.name}" if prefix else field_spec.name + ti = field_spec.type_info + _emit_field_rules( + field_spec, ti, dataset_name, column, list_columns, parent_guard, rules + ) + + if field_spec.model is not None and not field_spec.starts_cycle: + child_guard = ( + column if ti.is_optional or not field_spec.is_required else parent_guard + ) + child_list_columns = list(list_columns) + if ti.is_list: + child_list_columns.append(column) + _walk_fields( + field_spec.model.fields, + dataset_name, + prefix=f"{column}.", + list_columns=child_list_columns, + parent_guard=child_guard, + rules=rules, + ) + + +def _make_rule( + dataset_name: str, + column: str, + check: str, + suffix: str, + *, + value: object | None = None, + list_columns: list[str] | None = None, + when: ConditionIR | None = None, +) -> RuleIR: + return RuleIR( + name=f"{dataset_name}.{column}.{suffix}", + column=column, + check=check, + severity="error", + value=value, + list_columns=list_columns if list_columns else None, + when=when, + ) + + +def _domain_constraints(ti: TypeInfo) -> list[ConstraintSource]: + return [ + cs for cs in ti.constraints if not is_storage_primitive_source(cs.source_name) + ] + + +def _emit_field_rules( + field_spec: FieldSpec, + ti: TypeInfo, + dataset_name: str, + column: str, + list_columns: list[str], + parent_guard: str | None, + rules: list[RuleIR], +) -> None: + lc_container = list_columns if list_columns else None + if ti.is_list: + lc_element: list[str] | None = list(list_columns) + [column] + else: + lc_element = list(list_columns) if list_columns else None + + if field_spec.is_required: + when = ( + ConditionIR(column=parent_guard, check="not_null") if parent_guard else None + ) + rules.append( + _make_rule( + dataset_name, + column, + "not_null", + "not_null", + list_columns=lc_container, + when=when, + ) + ) + + constraints = _domain_constraints(ti) + _emit_numeric_bounds(dataset_name, column, constraints, lc_element, rules) + _emit_length_constraints( + dataset_name, column, ti, constraints, lc_container, lc_element, rules + ) + _emit_enum_rules(dataset_name, column, ti, lc_element, rules) + _emit_literal_rules(dataset_name, column, ti, lc_element, rules) + _emit_geometry_rules(dataset_name, column, constraints, lc_container, rules) + _emit_pattern_rules(dataset_name, column, constraints, lc_element, rules) + _emit_unique_rules(dataset_name, column, constraints, lc_container, rules) + + +def _has_range_constraint(constraints: list[ConstraintSource]) -> bool: + """Whether any constraint signals a paired-range field (e.g. LinearReferenceRange). + + These fields carry Ge/Le from their inner NewType but the range is + validated structurally, not as independent bounds. + """ + return any( + isinstance(cs.constraint, LinearReferenceRangeConstraint) for cs in constraints + ) + + +def _emit_numeric_bounds( + dataset_name: str, + column: str, + constraints: list[ConstraintSource], + lc_element: list[str] | None, + rules: list[RuleIR], +) -> None: + if _has_range_constraint(constraints): + return + ge_val = gt_val = le_val = lt_val = None + for cs in constraints: + c = cs.constraint + if isinstance(c, Ge): + ge_val = c.ge + elif isinstance(c, Gt): + gt_val = c.gt + elif isinstance(c, Le): + le_val = c.le + elif isinstance(c, Lt): + lt_val = c.lt + + if ge_val is not None and le_val is not None and gt_val is None and lt_val is None: + rules.append( + _make_rule( + dataset_name, + column, + "between", + "range", + value=[ge_val, le_val], + list_columns=lc_element, + ) + ) + return + + if ge_val is not None: + rules.append( + _make_rule( + dataset_name, + column, + "gte", + "gte", + value=ge_val, + list_columns=lc_element, + ) + ) + if gt_val is not None: + rules.append( + _make_rule( + dataset_name, + column, + "gt", + "positive", + value=gt_val, + list_columns=lc_element, + ) + ) + if le_val is not None: + rules.append( + _make_rule( + dataset_name, + column, + "lte", + "lte", + value=le_val, + list_columns=lc_element, + ) + ) + if lt_val is not None: + rules.append( + _make_rule( + dataset_name, column, "lt", "lt", value=lt_val, list_columns=lc_element + ) + ) + + +def _emit_length_constraints( + dataset_name: str, + column: str, + ti: TypeInfo, + constraints: list[ConstraintSource], + lc_container: list[str] | None, + lc_element: list[str] | None, + rules: list[RuleIR], +) -> None: + for cs in constraints: + c = cs.constraint + if isinstance(c, MinLen): + if ti.is_list: + rules.append( + _make_rule( + dataset_name, + column, + "min_list_length", + "min_list_length", + value=c.min_length, + list_columns=lc_container, + ) + ) + else: + rules.append( + _make_rule( + dataset_name, + column, + "min_length", + "min_length", + value=c.min_length, + list_columns=lc_element, + ) + ) + elif isinstance(c, MaxLen): + if ti.is_list: + rules.append( + _make_rule( + dataset_name, + column, + "max_list_length", + "max_list_length", + value=c.max_length, + list_columns=lc_container, + ) + ) + else: + rules.append( + _make_rule( + dataset_name, + column, + "max_length", + "max_length", + value=c.max_length, + list_columns=lc_element, + ) + ) + + +def _emit_enum_rules( + dataset_name: str, + column: str, + ti: TypeInfo, + lc_element: list[str] | None, + rules: list[RuleIR], +) -> None: + if ti.kind == TypeKind.ENUM and ti.source_type is not None: + enum_class = cast("type[Enum]", ti.source_type) + members = sorted(m.value for m in enum_class) + rules.append( + _make_rule( + dataset_name, + column, + "in", + "valid", + value=members, + list_columns=lc_element, + ) + ) + + +def _emit_literal_rules( + dataset_name: str, + column: str, + ti: TypeInfo, + lc_element: list[str] | None, + rules: list[RuleIR], +) -> None: + if ti.kind != TypeKind.LITERAL or not ti.literal_values: + return + if len(ti.literal_values) == 1: + rules.append( + _make_rule( + dataset_name, + column, + "eq", + "eq", + value=ti.literal_values[0], + list_columns=lc_element, + ) + ) + else: + rules.append( + _make_rule( + dataset_name, + column, + "in", + "valid", + value=sorted(str(v) for v in ti.literal_values), + list_columns=lc_element, + ) + ) + + +def _emit_geometry_rules( + dataset_name: str, + column: str, + constraints: list[ConstraintSource], + lc_container: list[str] | None, + rules: list[RuleIR], +) -> None: + from overture.schema.system.primitive import GeometryTypeConstraint # noqa: PLC0415 + + for cs in constraints: + c = cs.constraint + if isinstance(c, GeometryTypeConstraint): + values = [ + "".join(p.title() for p in gt.value.split("_")) + for gt in c.allowed_types + ] + rules.append( + _make_rule( + dataset_name, + column, + "geometry_type", + "type", + value=values, + list_columns=lc_container, + ) + ) + break + + +def _emit_pattern_rules( + dataset_name: str, + column: str, + constraints: list[ConstraintSource], + lc_element: list[str] | None, + rules: list[RuleIR], +) -> None: + from overture.schema.system.field_constraint.string import ( # noqa: PLC0415 + StrippedConstraint, + ) + + for cs in constraints: + if isinstance(cs.constraint, StrippedConstraint): + rules.append( + _make_rule( + dataset_name, + column, + "pattern", + "pattern", + value=r"^(\S.*)?\S$", + list_columns=lc_element, + ) + ) + return + pattern = constraint_pattern(cs.constraint) + if pattern: + rules.append( + _make_rule( + dataset_name, + column, + "pattern", + "pattern", + value=pattern, + list_columns=lc_element, + ) + ) + return + + +def _emit_unique_rules( + dataset_name: str, + column: str, + constraints: list[ConstraintSource], + lc_container: list[str] | None, + rules: list[RuleIR], +) -> None: + from overture.schema.system.field_constraint import ( # noqa: PLC0415 + UniqueItemsConstraint, + ) + + for cs in constraints: + if isinstance(cs.constraint, UniqueItemsConstraint): + rules.append( + _make_rule( + dataset_name, column, "unique", "unique", list_columns=lc_container + ) + ) + break + + +def _emit_model_constraints( + spec: FeatureSpec, + dataset_name: str, + rules: list[RuleIR], +) -> None: + from overture.schema.system.model_constraint import ( # noqa: PLC0415 + ForbidIfConstraint, + RadioGroupConstraint, + RequireAnyOfConstraint, + RequireIfConstraint, + ) + + for mc in spec.constraints: + if isinstance(mc, RequireAnyOfConstraint): + rules.append( + RuleIR( + name=f"{dataset_name}.any_of", + check="any_of", + severity="error", + columns=list(mc.field_names), + ) + ) + elif isinstance(mc, RadioGroupConstraint): + rules.append( + RuleIR( + name=f"{dataset_name}.exactly_one_of", + check="exactly_one_of", + severity="error", + columns=list(mc.field_names), + ) + ) + elif isinstance(mc, RequireIfConstraint): + cond = _convert_condition(mc.condition) + for field_name in mc.field_names: + rules.append( + RuleIR( + name=f"{dataset_name}.{field_name}.required_when", + column=field_name, + check="not_null", + severity="error", + when=cond, + ) + ) + elif isinstance(mc, ForbidIfConstraint): + cond = _convert_condition(mc.condition) + for field_name in mc.field_names: + rules.append( + RuleIR( + name=f"{dataset_name}.{field_name}.forbidden_when", + column=field_name, + check="is_null", + severity="error", + when=cond, + ) + ) + + +def _unwrap_enum_value(value: object) -> object: + """Extract the raw value from an enum member, or return as-is.""" + if isinstance(value, Enum): + return value.value + return value + + +def _convert_condition(condition: object) -> ConditionIR: + from overture.schema.system.model_constraint import ( # noqa: PLC0415 + FieldEqCondition, + Not, + ) + + if isinstance(condition, Not) and isinstance(condition.inner, FieldEqCondition): + value = _unwrap_enum_value(condition.inner.value) + return ConditionIR(column=condition.inner.field_name, check="neq", value=value) + if isinstance(condition, FieldEqCondition): + value = _unwrap_enum_value(condition.value) + return ConditionIR(column=condition.field_name, check="eq", value=value) + raise ValueError(f"Unsupported condition type: {type(condition)}") diff --git a/packages/overture-schema-codegen/tests/golden/wassirman/address.yaml b/packages/overture-schema-codegen/tests/golden/wassirman/address.yaml new file mode 100644 index 000000000..cc5d7b7b3 --- /dev/null +++ b/packages/overture-schema-codegen/tests/golden/wassirman/address.yaml @@ -0,0 +1,163 @@ +version: '1' +datasets: +- name: address + source_model: overture.schema.addresses.address.Address + id_column: id + rules: + - name: address.id.not_null + column: id + check: not_null + severity: error + - name: address.id.min_length + column: id + check: min_length + value: 1 + severity: error + - name: address.id.pattern + column: id + check: pattern + value: ^\S+$ + severity: error + - name: address.geometry.not_null + column: geometry + check: not_null + severity: error + - name: address.geometry.type + column: geometry + check: geometry_type + value: + - Point + severity: error + - name: address.version.not_null + column: version + check: not_null + severity: error + - name: address.version.gte + column: version + check: gte + value: 0 + severity: error + - name: address.sources.min_list_length + column: sources + check: min_list_length + value: 1 + severity: error + - name: address.sources.unique + column: sources + check: unique + severity: error + - name: address.sources.property.not_null + column: sources.property + check: not_null + list_columns: &id001 + - sources + when: + column: sources + check: not_null + severity: error + - name: address.sources.dataset.not_null + column: sources.dataset + check: not_null + list_columns: *id001 + when: + column: sources + check: not_null + severity: error + - name: address.sources.license.pattern + column: sources.license + check: pattern + value: ^(\S.*)?\S$ + list_columns: + - sources + severity: error + - name: address.sources.confidence.range + column: sources.confidence + check: between + value: + - 0.0 + - 1.0 + list_columns: + - sources + severity: error + - name: address.address_levels.min_list_length + column: address_levels + check: min_list_length + value: 1 + severity: error + - name: address.address_levels.max_list_length + column: address_levels + check: max_list_length + value: 5 + severity: error + - name: address.address_levels.value.min_length + column: address_levels.value + check: min_length + value: 1 + list_columns: &id002 + - address_levels + severity: error + - name: address.address_levels.value.pattern + column: address_levels.value + check: pattern + value: ^(\S.*)?\S$ + list_columns: *id002 + severity: error + - name: address.country.not_null + column: country + check: not_null + severity: error + - name: address.country.pattern + column: country + check: pattern + value: ^[A-Z]{2}$ + severity: error + - name: address.number.min_length + column: number + check: min_length + value: 1 + severity: error + - name: address.number.pattern + column: number + check: pattern + value: ^(\S.*)?\S$ + severity: error + - name: address.postal_city.min_length + column: postal_city + check: min_length + value: 1 + severity: error + - name: address.postal_city.pattern + column: postal_city + check: pattern + value: ^(\S.*)?\S$ + severity: error + - name: address.postcode.min_length + column: postcode + check: min_length + value: 1 + severity: error + - name: address.postcode.pattern + column: postcode + check: pattern + value: ^(\S.*)?\S$ + severity: error + - name: address.street.min_length + column: street + check: min_length + value: 1 + severity: error + - name: address.street.pattern + column: street + check: pattern + value: ^(\S.*)?\S$ + severity: error + - name: address.unit.min_length + column: unit + check: min_length + value: 1 + severity: error + - name: address.unit.pattern + column: unit + check: pattern + value: ^(\S.*)?\S$ + severity: error diff --git a/packages/overture-schema-codegen/tests/golden/wassirman/bathymetry.yaml b/packages/overture-schema-codegen/tests/golden/wassirman/bathymetry.yaml new file mode 100644 index 000000000..ee28ab2ff --- /dev/null +++ b/packages/overture-schema-codegen/tests/golden/wassirman/bathymetry.yaml @@ -0,0 +1,112 @@ +version: '1' +datasets: +- name: bathymetry + source_model: overture.schema.base.bathymetry.Bathymetry + id_column: id + rules: + - name: bathymetry.id.not_null + column: id + check: not_null + severity: error + - name: bathymetry.id.min_length + column: id + check: min_length + value: 1 + severity: error + - name: bathymetry.id.pattern + column: id + check: pattern + value: ^\S+$ + severity: error + - name: bathymetry.geometry.not_null + column: geometry + check: not_null + severity: error + - name: bathymetry.geometry.type + column: geometry + check: geometry_type + value: + - MultiPolygon + - Polygon + severity: error + - name: bathymetry.version.not_null + column: version + check: not_null + severity: error + - name: bathymetry.version.gte + column: version + check: gte + value: 0 + severity: error + - name: bathymetry.sources.min_list_length + column: sources + check: min_list_length + value: 1 + severity: error + - name: bathymetry.sources.unique + column: sources + check: unique + severity: error + - name: bathymetry.sources.property.not_null + column: sources.property + check: not_null + list_columns: &id001 + - sources + when: + column: sources + check: not_null + severity: error + - name: bathymetry.sources.dataset.not_null + column: sources.dataset + check: not_null + list_columns: *id001 + when: + column: sources + check: not_null + severity: error + - name: bathymetry.sources.license.pattern + column: sources.license + check: pattern + value: ^(\S.*)?\S$ + list_columns: + - sources + severity: error + - name: bathymetry.sources.confidence.range + column: sources.confidence + check: between + value: + - 0.0 + - 1.0 + list_columns: + - sources + severity: error + - name: bathymetry.depth.not_null + column: depth + check: not_null + severity: error + - name: bathymetry.depth.gte + column: depth + check: gte + value: 0 + severity: error + - name: bathymetry.cartography.prominence.range + column: cartography.prominence + check: between + value: + - 1 + - 100 + severity: error + - name: bathymetry.cartography.min_zoom.range + column: cartography.min_zoom + check: between + value: + - 0 + - 23 + severity: error + - name: bathymetry.cartography.max_zoom.range + column: cartography.max_zoom + check: between + value: + - 0 + - 23 + severity: error diff --git a/packages/overture-schema-codegen/tests/golden/wassirman/building.yaml b/packages/overture-schema-codegen/tests/golden/wassirman/building.yaml new file mode 100644 index 000000000..2ad347181 --- /dev/null +++ b/packages/overture-schema-codegen/tests/golden/wassirman/building.yaml @@ -0,0 +1,412 @@ +version: '1' +datasets: +- name: building + source_model: overture.schema.buildings.building.Building + id_column: id + rules: + - name: building.id.not_null + column: id + check: not_null + severity: error + - name: building.id.min_length + column: id + check: min_length + value: 1 + severity: error + - name: building.id.pattern + column: id + check: pattern + value: ^\S+$ + severity: error + - name: building.geometry.not_null + column: geometry + check: not_null + severity: error + - name: building.geometry.type + column: geometry + check: geometry_type + value: + - MultiPolygon + - Polygon + severity: error + - name: building.version.not_null + column: version + check: not_null + severity: error + - name: building.version.gte + column: version + check: gte + value: 0 + severity: error + - name: building.sources.min_list_length + column: sources + check: min_list_length + value: 1 + severity: error + - name: building.sources.unique + column: sources + check: unique + severity: error + - name: building.sources.property.not_null + column: sources.property + check: not_null + list_columns: &id001 + - sources + when: + column: sources + check: not_null + severity: error + - name: building.sources.dataset.not_null + column: sources.dataset + check: not_null + list_columns: *id001 + when: + column: sources + check: not_null + severity: error + - name: building.sources.license.pattern + column: sources.license + check: pattern + value: ^(\S.*)?\S$ + list_columns: + - sources + severity: error + - name: building.sources.confidence.range + column: sources.confidence + check: between + value: + - 0.0 + - 1.0 + list_columns: + - sources + severity: error + - name: building.subtype.valid + column: subtype + check: in + value: + - agricultural + - civic + - commercial + - education + - entertainment + - industrial + - medical + - military + - outbuilding + - religious + - residential + - service + - transportation + severity: error + - name: building.class.valid + column: class + check: in + value: + - agricultural + - allotment_house + - apartments + - barn + - beach_hut + - boathouse + - bridge_structure + - bungalow + - bunker + - cabin + - carport + - cathedral + - chapel + - church + - civic + - college + - commercial + - cowshed + - detached + - digester + - dormitory + - dwelling_house + - factory + - farm + - farm_auxiliary + - fire_station + - garage + - garages + - ger + - glasshouse + - government + - grandstand + - greenhouse + - guardhouse + - hangar + - hospital + - hotel + - house + - houseboat + - hut + - industrial + - kindergarten + - kiosk + - library + - manufacture + - military + - monastery + - mosque + - office + - outbuilding + - parking + - pavilion + - post_office + - presbytery + - public + - religious + - residential + - retail + - roof + - school + - semi + - semidetached_house + - service + - shed + - shrine + - silo + - slurry_tank + - sports_centre + - sports_hall + - stable + - stadium + - static_caravan + - stilt_house + - storage_tank + - sty + - supermarket + - synagogue + - temple + - terrace + - toilets + - train_station + - transformer_tower + - transportation + - trullo + - university + - warehouse + - wayside_shrine + severity: error + - name: building.names.primary.not_null + column: names.primary + check: not_null + when: + column: names + check: not_null + severity: error + - name: building.names.primary.min_length + column: names.primary + check: min_length + value: 1 + severity: error + - name: building.names.primary.pattern + column: names.primary + check: pattern + value: ^(\S.*)?\S$ + severity: error + - name: building.names.rules.value.not_null + column: names.rules.value + check: not_null + list_columns: &id003 + - names.rules + when: + column: names.rules + check: not_null + severity: error + - name: building.names.rules.value.min_length + column: names.rules.value + check: min_length + value: 1 + list_columns: &id002 + - names.rules + severity: error + - name: building.names.rules.value.pattern + column: names.rules.value + check: pattern + value: ^(\S.*)?\S$ + list_columns: *id002 + severity: error + - name: building.names.rules.variant.not_null + column: names.rules.variant + check: not_null + list_columns: *id003 + when: + column: names.rules + check: not_null + severity: error + - name: building.names.rules.variant.valid + column: names.rules.variant + check: in + value: + - alternate + - common + - official + - short + list_columns: + - names.rules + severity: error + - name: building.names.rules.language.pattern + column: names.rules.language + check: pattern + value: ^(?:(?:[A-Za-z]{2,3}(?:-[A-Za-z]{3}){0,3}?)|(?:[A-Za-z]{4,8}))(?:-[A-Za-z]{4})?(?:-[A-Za-z]{2}|[0-9]{3})?(?:-(?:[A-Za-z0-9]{5,8}|[0-9][A-Za-z0-9]{3}))*(?:-[A-WY-Za-wy-z0-9](?:-[A-Za-z0-9]{2,8})+)*$ + list_columns: + - names.rules + severity: error + - name: building.names.rules.perspectives.mode.not_null + column: names.rules.perspectives.mode + check: not_null + list_columns: &id004 + - names.rules + when: + column: names.rules.perspectives + check: not_null + severity: error + - name: building.names.rules.perspectives.mode.valid + column: names.rules.perspectives.mode + check: in + value: + - accepted_by + - disputed_by + list_columns: + - names.rules + severity: error + - name: building.names.rules.perspectives.countries.not_null + column: names.rules.perspectives.countries + check: not_null + list_columns: *id004 + when: + column: names.rules.perspectives + check: not_null + severity: error + - name: building.names.rules.perspectives.countries.min_list_length + column: names.rules.perspectives.countries + check: min_list_length + value: 1 + list_columns: *id004 + severity: error + - name: building.names.rules.perspectives.countries.pattern + column: names.rules.perspectives.countries + check: pattern + value: ^[A-Z]{2}$ + list_columns: + - names.rules + - names.rules.perspectives.countries + severity: error + - name: building.names.rules.perspectives.countries.unique + column: names.rules.perspectives.countries + check: unique + list_columns: *id004 + severity: error + - name: building.names.rules.side.valid + column: names.rules.side + check: in + value: + - left + - right + list_columns: + - names.rules + severity: error + - name: building.height.positive + column: height + check: gt + value: 0 + severity: error + - name: building.num_floors.positive + column: num_floors + check: gt + value: 0 + severity: error + - name: building.num_floors_underground.positive + column: num_floors_underground + check: gt + value: 0 + severity: error + - name: building.min_floor.positive + column: min_floor + check: gt + value: 0 + severity: error + - name: building.facade_color.pattern + column: facade_color + check: pattern + value: ^#[0-9A-Fa-f]{3}([0-9A-Fa-f]{3})?$ + severity: error + - name: building.facade_material.valid + column: facade_material + check: in + value: + - brick + - cement_block + - clay + - concrete + - glass + - metal + - plaster + - plastic + - stone + - timber_framing + - wood + severity: error + - name: building.roof_material.valid + column: roof_material + check: in + value: + - concrete + - copper + - eternit + - glass + - grass + - gravel + - metal + - plastic + - roof_tiles + - slate + - solar_panels + - tar_paper + - thatch + - wood + severity: error + - name: building.roof_shape.valid + column: roof_shape + check: in + value: + - dome + - flat + - gabled + - gambrel + - half_hipped + - hipped + - mansard + - onion + - pyramidal + - round + - saltbox + - sawtooth + - skillion + - spherical + severity: error + - name: building.roof_direction.gte + column: roof_direction + check: gte + value: 0 + severity: error + - name: building.roof_direction.lt + column: roof_direction + check: lt + value: 360 + severity: error + - name: building.roof_orientation.valid + column: roof_orientation + check: in + value: + - across + - along + severity: error + - name: building.roof_color.pattern + column: roof_color + check: pattern + value: ^#[0-9A-Fa-f]{3}([0-9A-Fa-f]{3})?$ + severity: error diff --git a/packages/overture-schema-codegen/tests/golden/wassirman/buildingpart.yaml b/packages/overture-schema-codegen/tests/golden/wassirman/buildingpart.yaml new file mode 100644 index 000000000..f7801c599 --- /dev/null +++ b/packages/overture-schema-codegen/tests/golden/wassirman/buildingpart.yaml @@ -0,0 +1,316 @@ +version: '1' +datasets: +- name: building_part + source_model: overture.schema.buildings.building_part.BuildingPart + id_column: id + rules: + - name: building_part.id.not_null + column: id + check: not_null + severity: error + - name: building_part.id.min_length + column: id + check: min_length + value: 1 + severity: error + - name: building_part.id.pattern + column: id + check: pattern + value: ^\S+$ + severity: error + - name: building_part.geometry.not_null + column: geometry + check: not_null + severity: error + - name: building_part.geometry.type + column: geometry + check: geometry_type + value: + - MultiPolygon + - Polygon + severity: error + - name: building_part.version.not_null + column: version + check: not_null + severity: error + - name: building_part.version.gte + column: version + check: gte + value: 0 + severity: error + - name: building_part.sources.min_list_length + column: sources + check: min_list_length + value: 1 + severity: error + - name: building_part.sources.unique + column: sources + check: unique + severity: error + - name: building_part.sources.property.not_null + column: sources.property + check: not_null + list_columns: &id001 + - sources + when: + column: sources + check: not_null + severity: error + - name: building_part.sources.dataset.not_null + column: sources.dataset + check: not_null + list_columns: *id001 + when: + column: sources + check: not_null + severity: error + - name: building_part.sources.license.pattern + column: sources.license + check: pattern + value: ^(\S.*)?\S$ + list_columns: + - sources + severity: error + - name: building_part.sources.confidence.range + column: sources.confidence + check: between + value: + - 0.0 + - 1.0 + list_columns: + - sources + severity: error + - name: building_part.building_id.not_null + column: building_id + check: not_null + severity: error + - name: building_part.building_id.min_length + column: building_id + check: min_length + value: 1 + severity: error + - name: building_part.building_id.pattern + column: building_id + check: pattern + value: ^\S+$ + severity: error + - name: building_part.names.primary.not_null + column: names.primary + check: not_null + when: + column: names + check: not_null + severity: error + - name: building_part.names.primary.min_length + column: names.primary + check: min_length + value: 1 + severity: error + - name: building_part.names.primary.pattern + column: names.primary + check: pattern + value: ^(\S.*)?\S$ + severity: error + - name: building_part.names.rules.value.not_null + column: names.rules.value + check: not_null + list_columns: &id003 + - names.rules + when: + column: names.rules + check: not_null + severity: error + - name: building_part.names.rules.value.min_length + column: names.rules.value + check: min_length + value: 1 + list_columns: &id002 + - names.rules + severity: error + - name: building_part.names.rules.value.pattern + column: names.rules.value + check: pattern + value: ^(\S.*)?\S$ + list_columns: *id002 + severity: error + - name: building_part.names.rules.variant.not_null + column: names.rules.variant + check: not_null + list_columns: *id003 + when: + column: names.rules + check: not_null + severity: error + - name: building_part.names.rules.variant.valid + column: names.rules.variant + check: in + value: + - alternate + - common + - official + - short + list_columns: + - names.rules + severity: error + - name: building_part.names.rules.language.pattern + column: names.rules.language + check: pattern + value: ^(?:(?:[A-Za-z]{2,3}(?:-[A-Za-z]{3}){0,3}?)|(?:[A-Za-z]{4,8}))(?:-[A-Za-z]{4})?(?:-[A-Za-z]{2}|[0-9]{3})?(?:-(?:[A-Za-z0-9]{5,8}|[0-9][A-Za-z0-9]{3}))*(?:-[A-WY-Za-wy-z0-9](?:-[A-Za-z0-9]{2,8})+)*$ + list_columns: + - names.rules + severity: error + - name: building_part.names.rules.perspectives.mode.not_null + column: names.rules.perspectives.mode + check: not_null + list_columns: &id004 + - names.rules + when: + column: names.rules.perspectives + check: not_null + severity: error + - name: building_part.names.rules.perspectives.mode.valid + column: names.rules.perspectives.mode + check: in + value: + - accepted_by + - disputed_by + list_columns: + - names.rules + severity: error + - name: building_part.names.rules.perspectives.countries.not_null + column: names.rules.perspectives.countries + check: not_null + list_columns: *id004 + when: + column: names.rules.perspectives + check: not_null + severity: error + - name: building_part.names.rules.perspectives.countries.min_list_length + column: names.rules.perspectives.countries + check: min_list_length + value: 1 + list_columns: *id004 + severity: error + - name: building_part.names.rules.perspectives.countries.pattern + column: names.rules.perspectives.countries + check: pattern + value: ^[A-Z]{2}$ + list_columns: + - names.rules + - names.rules.perspectives.countries + severity: error + - name: building_part.names.rules.perspectives.countries.unique + column: names.rules.perspectives.countries + check: unique + list_columns: *id004 + severity: error + - name: building_part.names.rules.side.valid + column: names.rules.side + check: in + value: + - left + - right + list_columns: + - names.rules + severity: error + - name: building_part.height.positive + column: height + check: gt + value: 0 + severity: error + - name: building_part.num_floors.positive + column: num_floors + check: gt + value: 0 + severity: error + - name: building_part.num_floors_underground.positive + column: num_floors_underground + check: gt + value: 0 + severity: error + - name: building_part.min_floor.positive + column: min_floor + check: gt + value: 0 + severity: error + - name: building_part.facade_color.pattern + column: facade_color + check: pattern + value: ^#[0-9A-Fa-f]{3}([0-9A-Fa-f]{3})?$ + severity: error + - name: building_part.facade_material.valid + column: facade_material + check: in + value: + - brick + - cement_block + - clay + - concrete + - glass + - metal + - plaster + - plastic + - stone + - timber_framing + - wood + severity: error + - name: building_part.roof_material.valid + column: roof_material + check: in + value: + - concrete + - copper + - eternit + - glass + - grass + - gravel + - metal + - plastic + - roof_tiles + - slate + - solar_panels + - tar_paper + - thatch + - wood + severity: error + - name: building_part.roof_shape.valid + column: roof_shape + check: in + value: + - dome + - flat + - gabled + - gambrel + - half_hipped + - hipped + - mansard + - onion + - pyramidal + - round + - saltbox + - sawtooth + - skillion + - spherical + severity: error + - name: building_part.roof_direction.gte + column: roof_direction + check: gte + value: 0 + severity: error + - name: building_part.roof_direction.lt + column: roof_direction + check: lt + value: 360 + severity: error + - name: building_part.roof_orientation.valid + column: roof_orientation + check: in + value: + - across + - along + severity: error + - name: building_part.roof_color.pattern + column: roof_color + check: pattern + value: ^#[0-9A-Fa-f]{3}([0-9A-Fa-f]{3})?$ + severity: error diff --git a/packages/overture-schema-codegen/tests/golden/wassirman/connector.yaml b/packages/overture-schema-codegen/tests/golden/wassirman/connector.yaml new file mode 100644 index 000000000..f6d510a75 --- /dev/null +++ b/packages/overture-schema-codegen/tests/golden/wassirman/connector.yaml @@ -0,0 +1,81 @@ +version: '1' +datasets: +- name: connector + source_model: overture.schema.transportation.connector.models.Connector + id_column: id + rules: + - name: connector.id.not_null + column: id + check: not_null + severity: error + - name: connector.id.min_length + column: id + check: min_length + value: 1 + severity: error + - name: connector.id.pattern + column: id + check: pattern + value: ^\S+$ + severity: error + - name: connector.geometry.not_null + column: geometry + check: not_null + severity: error + - name: connector.geometry.type + column: geometry + check: geometry_type + value: + - Point + severity: error + - name: connector.version.not_null + column: version + check: not_null + severity: error + - name: connector.version.gte + column: version + check: gte + value: 0 + severity: error + - name: connector.sources.min_list_length + column: sources + check: min_list_length + value: 1 + severity: error + - name: connector.sources.unique + column: sources + check: unique + severity: error + - name: connector.sources.property.not_null + column: sources.property + check: not_null + list_columns: &id001 + - sources + when: + column: sources + check: not_null + severity: error + - name: connector.sources.dataset.not_null + column: sources.dataset + check: not_null + list_columns: *id001 + when: + column: sources + check: not_null + severity: error + - name: connector.sources.license.pattern + column: sources.license + check: pattern + value: ^(\S.*)?\S$ + list_columns: + - sources + severity: error + - name: connector.sources.confidence.range + column: sources.confidence + check: between + value: + - 0.0 + - 1.0 + list_columns: + - sources + severity: error diff --git a/packages/overture-schema-codegen/tests/golden/wassirman/division.yaml b/packages/overture-schema-codegen/tests/golden/wassirman/division.yaml new file mode 100644 index 000000000..2d5722340 --- /dev/null +++ b/packages/overture-schema-codegen/tests/golden/wassirman/division.yaml @@ -0,0 +1,577 @@ +version: '1' +datasets: +- name: division + source_model: overture.schema.divisions.division.models.Division + id_column: id + rules: + - name: division.cartography.prominence.range + column: cartography.prominence + check: between + value: + - 1 + - 100 + severity: error + - name: division.cartography.min_zoom.range + column: cartography.min_zoom + check: between + value: + - 0 + - 23 + severity: error + - name: division.cartography.max_zoom.range + column: cartography.max_zoom + check: between + value: + - 0 + - 23 + severity: error + - name: division.names.not_null + column: names + check: not_null + severity: error + - name: division.names.primary.not_null + column: names.primary + check: not_null + severity: error + - name: division.names.primary.min_length + column: names.primary + check: min_length + value: 1 + severity: error + - name: division.names.primary.pattern + column: names.primary + check: pattern + value: ^(\S.*)?\S$ + severity: error + - name: division.names.rules.value.not_null + column: names.rules.value + check: not_null + list_columns: &id002 + - names.rules + when: + column: names.rules + check: not_null + severity: error + - name: division.names.rules.value.min_length + column: names.rules.value + check: min_length + value: 1 + list_columns: &id001 + - names.rules + severity: error + - name: division.names.rules.value.pattern + column: names.rules.value + check: pattern + value: ^(\S.*)?\S$ + list_columns: *id001 + severity: error + - name: division.names.rules.variant.not_null + column: names.rules.variant + check: not_null + list_columns: *id002 + when: + column: names.rules + check: not_null + severity: error + - name: division.names.rules.variant.valid + column: names.rules.variant + check: in + value: + - alternate + - common + - official + - short + list_columns: + - names.rules + severity: error + - name: division.names.rules.language.pattern + column: names.rules.language + check: pattern + value: ^(?:(?:[A-Za-z]{2,3}(?:-[A-Za-z]{3}){0,3}?)|(?:[A-Za-z]{4,8}))(?:-[A-Za-z]{4})?(?:-[A-Za-z]{2}|[0-9]{3})?(?:-(?:[A-Za-z0-9]{5,8}|[0-9][A-Za-z0-9]{3}))*(?:-[A-WY-Za-wy-z0-9](?:-[A-Za-z0-9]{2,8})+)*$ + list_columns: + - names.rules + severity: error + - name: division.names.rules.perspectives.mode.not_null + column: names.rules.perspectives.mode + check: not_null + list_columns: &id003 + - names.rules + when: + column: names.rules.perspectives + check: not_null + severity: error + - name: division.names.rules.perspectives.mode.valid + column: names.rules.perspectives.mode + check: in + value: + - accepted_by + - disputed_by + list_columns: + - names.rules + severity: error + - name: division.names.rules.perspectives.countries.not_null + column: names.rules.perspectives.countries + check: not_null + list_columns: *id003 + when: + column: names.rules.perspectives + check: not_null + severity: error + - name: division.names.rules.perspectives.countries.min_list_length + column: names.rules.perspectives.countries + check: min_list_length + value: 1 + list_columns: *id003 + severity: error + - name: division.names.rules.perspectives.countries.pattern + column: names.rules.perspectives.countries + check: pattern + value: ^[A-Z]{2}$ + list_columns: + - names.rules + - names.rules.perspectives.countries + severity: error + - name: division.names.rules.perspectives.countries.unique + column: names.rules.perspectives.countries + check: unique + list_columns: *id003 + severity: error + - name: division.names.rules.side.valid + column: names.rules.side + check: in + value: + - left + - right + list_columns: + - names.rules + severity: error + - name: division.id.not_null + column: id + check: not_null + severity: error + - name: division.id.min_length + column: id + check: min_length + value: 1 + severity: error + - name: division.id.pattern + column: id + check: pattern + value: ^\S+$ + severity: error + - name: division.geometry.not_null + column: geometry + check: not_null + severity: error + - name: division.geometry.type + column: geometry + check: geometry_type + value: + - Point + severity: error + - name: division.version.not_null + column: version + check: not_null + severity: error + - name: division.version.gte + column: version + check: gte + value: 0 + severity: error + - name: division.sources.min_list_length + column: sources + check: min_list_length + value: 1 + severity: error + - name: division.sources.unique + column: sources + check: unique + severity: error + - name: division.sources.property.not_null + column: sources.property + check: not_null + list_columns: &id004 + - sources + when: + column: sources + check: not_null + severity: error + - name: division.sources.dataset.not_null + column: sources.dataset + check: not_null + list_columns: *id004 + when: + column: sources + check: not_null + severity: error + - name: division.sources.license.pattern + column: sources.license + check: pattern + value: ^(\S.*)?\S$ + list_columns: + - sources + severity: error + - name: division.sources.confidence.range + column: sources.confidence + check: between + value: + - 0.0 + - 1.0 + list_columns: + - sources + severity: error + - name: division.subtype.not_null + column: subtype + check: not_null + severity: error + - name: division.subtype.valid + column: subtype + check: in + value: + - borough + - country + - county + - dependency + - localadmin + - locality + - macrocounty + - macrohood + - macroregion + - microhood + - neighborhood + - region + severity: error + - name: division.country.not_null + column: country + check: not_null + severity: error + - name: division.country.pattern + column: country + check: pattern + value: ^[A-Z]{2}$ + severity: error + - name: division.hierarchies.not_null + column: hierarchies + check: not_null + severity: error + - name: division.hierarchies.min_list_length + column: hierarchies + check: min_list_length + value: 1 + severity: error + - name: division.hierarchies.min_list_length + column: hierarchies + check: min_list_length + value: 1 + severity: error + - name: division.hierarchies.unique + column: hierarchies + check: unique + severity: error + - name: division.hierarchies.division_id.not_null + column: hierarchies.division_id + check: not_null + list_columns: &id006 + - hierarchies + severity: error + - name: division.hierarchies.division_id.min_length + column: hierarchies.division_id + check: min_length + value: 1 + list_columns: &id005 + - hierarchies + severity: error + - name: division.hierarchies.division_id.min_length + column: hierarchies.division_id + check: min_length + value: 1 + list_columns: *id005 + severity: error + - name: division.hierarchies.division_id.pattern + column: hierarchies.division_id + check: pattern + value: ^\S+$ + list_columns: *id005 + severity: error + - name: division.hierarchies.subtype.not_null + column: hierarchies.subtype + check: not_null + list_columns: *id006 + severity: error + - name: division.hierarchies.subtype.valid + column: hierarchies.subtype + check: in + value: + - borough + - country + - county + - dependency + - localadmin + - locality + - macrocounty + - macrohood + - macroregion + - microhood + - neighborhood + - region + list_columns: + - hierarchies + severity: error + - name: division.hierarchies.name.not_null + column: hierarchies.name + check: not_null + list_columns: *id006 + severity: error + - name: division.hierarchies.name.min_length + column: hierarchies.name + check: min_length + value: 1 + list_columns: &id007 + - hierarchies + severity: error + - name: division.hierarchies.name.pattern + column: hierarchies.name + check: pattern + value: ^(\S.*)?\S$ + list_columns: *id007 + severity: error + - name: division.parent_division_id.min_length + column: parent_division_id + check: min_length + value: 1 + severity: error + - name: division.parent_division_id.min_length + column: parent_division_id + check: min_length + value: 1 + severity: error + - name: division.parent_division_id.pattern + column: parent_division_id + check: pattern + value: ^\S+$ + severity: error + - name: division.admin_level.range + column: admin_level + check: between + value: + - 0 + - 16 + severity: error + - name: division.class.valid + column: class + check: in + value: + - city + - hamlet + - megacity + - town + - village + severity: error + - name: division.region.pattern + column: region + check: pattern + value: ^[A-Z]{2}-[A-Z0-9]{1,3}$ + severity: error + - name: division.perspectives.mode.not_null + column: perspectives.mode + check: not_null + when: + column: perspectives + check: not_null + severity: error + - name: division.perspectives.mode.valid + column: perspectives.mode + check: in + value: + - accepted_by + - disputed_by + severity: error + - name: division.perspectives.countries.not_null + column: perspectives.countries + check: not_null + when: + column: perspectives + check: not_null + severity: error + - name: division.perspectives.countries.min_list_length + column: perspectives.countries + check: min_list_length + value: 1 + severity: error + - name: division.perspectives.countries.pattern + column: perspectives.countries + check: pattern + value: ^[A-Z]{2}$ + list_columns: + - perspectives.countries + severity: error + - name: division.perspectives.countries.unique + column: perspectives.countries + check: unique + severity: error + - name: division.norms.driving_side.valid + column: norms.driving_side + check: in + value: + - left + - right + severity: error + - name: division.population.gte + column: population + check: gte + value: 0 + severity: error + - name: division.capital_division_ids.min_list_length + column: capital_division_ids + check: min_list_length + value: 1 + severity: error + - name: division.capital_division_ids.min_list_length + column: capital_division_ids + check: min_list_length + value: 1 + severity: error + - name: division.capital_division_ids.pattern + column: capital_division_ids + check: pattern + value: ^\S+$ + list_columns: + - capital_division_ids + severity: error + - name: division.capital_division_ids.unique + column: capital_division_ids + check: unique + severity: error + - name: division.capital_of_divisions.min_list_length + column: capital_of_divisions + check: min_list_length + value: 1 + severity: error + - name: division.capital_of_divisions.unique + column: capital_of_divisions + check: unique + severity: error + - name: division.capital_of_divisions.division_id.not_null + column: capital_of_divisions.division_id + check: not_null + list_columns: &id009 + - capital_of_divisions + when: + column: capital_of_divisions + check: not_null + severity: error + - name: division.capital_of_divisions.division_id.min_length + column: capital_of_divisions.division_id + check: min_length + value: 1 + list_columns: &id008 + - capital_of_divisions + severity: error + - name: division.capital_of_divisions.division_id.min_length + column: capital_of_divisions.division_id + check: min_length + value: 1 + list_columns: *id008 + severity: error + - name: division.capital_of_divisions.division_id.pattern + column: capital_of_divisions.division_id + check: pattern + value: ^\S+$ + list_columns: *id008 + severity: error + - name: division.capital_of_divisions.subtype.not_null + column: capital_of_divisions.subtype + check: not_null + list_columns: *id009 + when: + column: capital_of_divisions + check: not_null + severity: error + - name: division.capital_of_divisions.subtype.valid + column: capital_of_divisions.subtype + check: in + value: + - borough + - country + - county + - dependency + - localadmin + - locality + - macrocounty + - macrohood + - macroregion + - microhood + - neighborhood + - region + list_columns: + - capital_of_divisions + severity: error + - name: division.wikidata.pattern + column: wikidata + check: pattern + value: ^Q\d+$ + severity: error + - name: division.admin_level.required_when + column: admin_level + check: not_null + when: + column: subtype + check: eq + value: county + severity: error + - name: division.admin_level.required_when + column: admin_level + check: not_null + when: + column: subtype + check: eq + value: macrocounty + severity: error + - name: division.admin_level.required_when + column: admin_level + check: not_null + when: + column: subtype + check: eq + value: region + severity: error + - name: division.admin_level.required_when + column: admin_level + check: not_null + when: + column: subtype + check: eq + value: macroregion + severity: error + - name: division.admin_level.required_when + column: admin_level + check: not_null + when: + column: subtype + check: eq + value: dependency + severity: error + - name: division.admin_level.required_when + column: admin_level + check: not_null + when: + column: subtype + check: eq + value: country + severity: error + - name: division.parent_division_id.required_when + column: parent_division_id + check: not_null + when: + column: subtype + check: neq + value: country + severity: error + - name: division.parent_division_id.forbidden_when + column: parent_division_id + check: is_null + when: + column: subtype + check: eq + value: country + severity: error diff --git a/packages/overture-schema-codegen/tests/golden/wassirman/divisionarea.yaml b/packages/overture-schema-codegen/tests/golden/wassirman/divisionarea.yaml new file mode 100644 index 000000000..c7c6a34f5 --- /dev/null +++ b/packages/overture-schema-codegen/tests/golden/wassirman/divisionarea.yaml @@ -0,0 +1,323 @@ +version: '1' +datasets: +- name: division_area + source_model: overture.schema.divisions.division_area.models.DivisionArea + id_column: id + rules: + - name: division_area.names.not_null + column: names + check: not_null + severity: error + - name: division_area.names.primary.not_null + column: names.primary + check: not_null + severity: error + - name: division_area.names.primary.min_length + column: names.primary + check: min_length + value: 1 + severity: error + - name: division_area.names.primary.pattern + column: names.primary + check: pattern + value: ^(\S.*)?\S$ + severity: error + - name: division_area.names.rules.value.not_null + column: names.rules.value + check: not_null + list_columns: &id002 + - names.rules + when: + column: names.rules + check: not_null + severity: error + - name: division_area.names.rules.value.min_length + column: names.rules.value + check: min_length + value: 1 + list_columns: &id001 + - names.rules + severity: error + - name: division_area.names.rules.value.pattern + column: names.rules.value + check: pattern + value: ^(\S.*)?\S$ + list_columns: *id001 + severity: error + - name: division_area.names.rules.variant.not_null + column: names.rules.variant + check: not_null + list_columns: *id002 + when: + column: names.rules + check: not_null + severity: error + - name: division_area.names.rules.variant.valid + column: names.rules.variant + check: in + value: + - alternate + - common + - official + - short + list_columns: + - names.rules + severity: error + - name: division_area.names.rules.language.pattern + column: names.rules.language + check: pattern + value: ^(?:(?:[A-Za-z]{2,3}(?:-[A-Za-z]{3}){0,3}?)|(?:[A-Za-z]{4,8}))(?:-[A-Za-z]{4})?(?:-[A-Za-z]{2}|[0-9]{3})?(?:-(?:[A-Za-z0-9]{5,8}|[0-9][A-Za-z0-9]{3}))*(?:-[A-WY-Za-wy-z0-9](?:-[A-Za-z0-9]{2,8})+)*$ + list_columns: + - names.rules + severity: error + - name: division_area.names.rules.perspectives.mode.not_null + column: names.rules.perspectives.mode + check: not_null + list_columns: &id003 + - names.rules + when: + column: names.rules.perspectives + check: not_null + severity: error + - name: division_area.names.rules.perspectives.mode.valid + column: names.rules.perspectives.mode + check: in + value: + - accepted_by + - disputed_by + list_columns: + - names.rules + severity: error + - name: division_area.names.rules.perspectives.countries.not_null + column: names.rules.perspectives.countries + check: not_null + list_columns: *id003 + when: + column: names.rules.perspectives + check: not_null + severity: error + - name: division_area.names.rules.perspectives.countries.min_list_length + column: names.rules.perspectives.countries + check: min_list_length + value: 1 + list_columns: *id003 + severity: error + - name: division_area.names.rules.perspectives.countries.pattern + column: names.rules.perspectives.countries + check: pattern + value: ^[A-Z]{2}$ + list_columns: + - names.rules + - names.rules.perspectives.countries + severity: error + - name: division_area.names.rules.perspectives.countries.unique + column: names.rules.perspectives.countries + check: unique + list_columns: *id003 + severity: error + - name: division_area.names.rules.side.valid + column: names.rules.side + check: in + value: + - left + - right + list_columns: + - names.rules + severity: error + - name: division_area.id.not_null + column: id + check: not_null + severity: error + - name: division_area.id.min_length + column: id + check: min_length + value: 1 + severity: error + - name: division_area.id.pattern + column: id + check: pattern + value: ^\S+$ + severity: error + - name: division_area.geometry.not_null + column: geometry + check: not_null + severity: error + - name: division_area.geometry.type + column: geometry + check: geometry_type + value: + - MultiPolygon + - Polygon + severity: error + - name: division_area.version.not_null + column: version + check: not_null + severity: error + - name: division_area.version.gte + column: version + check: gte + value: 0 + severity: error + - name: division_area.sources.min_list_length + column: sources + check: min_list_length + value: 1 + severity: error + - name: division_area.sources.unique + column: sources + check: unique + severity: error + - name: division_area.sources.property.not_null + column: sources.property + check: not_null + list_columns: &id004 + - sources + when: + column: sources + check: not_null + severity: error + - name: division_area.sources.dataset.not_null + column: sources.dataset + check: not_null + list_columns: *id004 + when: + column: sources + check: not_null + severity: error + - name: division_area.sources.license.pattern + column: sources.license + check: pattern + value: ^(\S.*)?\S$ + list_columns: + - sources + severity: error + - name: division_area.sources.confidence.range + column: sources.confidence + check: between + value: + - 0.0 + - 1.0 + list_columns: + - sources + severity: error + - name: division_area.subtype.not_null + column: subtype + check: not_null + severity: error + - name: division_area.subtype.valid + column: subtype + check: in + value: + - borough + - country + - county + - dependency + - localadmin + - locality + - macrocounty + - macrohood + - macroregion + - microhood + - neighborhood + - region + severity: error + - name: division_area.class.not_null + column: class + check: not_null + severity: error + - name: division_area.class.valid + column: class + check: in + value: + - land + - maritime + severity: error + - name: division_area.division_id.not_null + column: division_id + check: not_null + severity: error + - name: division_area.division_id.min_length + column: division_id + check: min_length + value: 1 + severity: error + - name: division_area.division_id.pattern + column: division_id + check: pattern + value: ^\S+$ + severity: error + - name: division_area.country.not_null + column: country + check: not_null + severity: error + - name: division_area.country.pattern + column: country + check: pattern + value: ^[A-Z]{2}$ + severity: error + - name: division_area.region.pattern + column: region + check: pattern + value: ^[A-Z]{2}-[A-Z0-9]{1,3}$ + severity: error + - name: division_area.admin_level.range + column: admin_level + check: between + value: + - 0 + - 16 + severity: error + - name: division_area.exactly_one_of + columns: + - is_land + - is_territorial + check: exactly_one_of + severity: error + - name: division_area.admin_level.required_when + column: admin_level + check: not_null + when: + column: subtype + check: eq + value: county + severity: error + - name: division_area.admin_level.required_when + column: admin_level + check: not_null + when: + column: subtype + check: eq + value: macrocounty + severity: error + - name: division_area.admin_level.required_when + column: admin_level + check: not_null + when: + column: subtype + check: eq + value: region + severity: error + - name: division_area.admin_level.required_when + column: admin_level + check: not_null + when: + column: subtype + check: eq + value: macroregion + severity: error + - name: division_area.admin_level.required_when + column: admin_level + check: not_null + when: + column: subtype + check: eq + value: dependency + severity: error + - name: division_area.admin_level.required_when + column: admin_level + check: not_null + when: + column: subtype + check: eq + value: country + severity: error diff --git a/packages/overture-schema-codegen/tests/golden/wassirman/divisionboundary.yaml b/packages/overture-schema-codegen/tests/golden/wassirman/divisionboundary.yaml new file mode 100644 index 000000000..cf3ae98f7 --- /dev/null +++ b/packages/overture-schema-codegen/tests/golden/wassirman/divisionboundary.yaml @@ -0,0 +1,268 @@ +version: '1' +datasets: +- name: division_boundary + source_model: overture.schema.divisions.division_boundary.models.DivisionBoundary + id_column: id + rules: + - name: division_boundary.id.not_null + column: id + check: not_null + severity: error + - name: division_boundary.id.min_length + column: id + check: min_length + value: 1 + severity: error + - name: division_boundary.id.pattern + column: id + check: pattern + value: ^\S+$ + severity: error + - name: division_boundary.geometry.not_null + column: geometry + check: not_null + severity: error + - name: division_boundary.geometry.type + column: geometry + check: geometry_type + value: + - LineString + - MultiLineString + severity: error + - name: division_boundary.version.not_null + column: version + check: not_null + severity: error + - name: division_boundary.version.gte + column: version + check: gte + value: 0 + severity: error + - name: division_boundary.sources.min_list_length + column: sources + check: min_list_length + value: 1 + severity: error + - name: division_boundary.sources.unique + column: sources + check: unique + severity: error + - name: division_boundary.sources.property.not_null + column: sources.property + check: not_null + list_columns: &id001 + - sources + when: + column: sources + check: not_null + severity: error + - name: division_boundary.sources.dataset.not_null + column: sources.dataset + check: not_null + list_columns: *id001 + when: + column: sources + check: not_null + severity: error + - name: division_boundary.sources.license.pattern + column: sources.license + check: pattern + value: ^(\S.*)?\S$ + list_columns: + - sources + severity: error + - name: division_boundary.sources.confidence.range + column: sources.confidence + check: between + value: + - 0.0 + - 1.0 + list_columns: + - sources + severity: error + - name: division_boundary.subtype.not_null + column: subtype + check: not_null + severity: error + - name: division_boundary.subtype.valid + column: subtype + check: in + value: + - borough + - country + - county + - dependency + - localadmin + - locality + - macrocounty + - macrohood + - macroregion + - microhood + - neighborhood + - region + severity: error + - name: division_boundary.class.not_null + column: class + check: not_null + severity: error + - name: division_boundary.class.valid + column: class + check: in + value: + - land + - maritime + severity: error + - name: division_boundary.division_ids.not_null + column: division_ids + check: not_null + severity: error + - name: division_boundary.division_ids.min_list_length + column: division_ids + check: min_list_length + value: 1 + severity: error + - name: division_boundary.division_ids.min_list_length + column: division_ids + check: min_list_length + value: 2 + severity: error + - name: division_boundary.division_ids.max_list_length + column: division_ids + check: max_list_length + value: 2 + severity: error + - name: division_boundary.division_ids.pattern + column: division_ids + check: pattern + value: ^\S+$ + list_columns: + - division_ids + severity: error + - name: division_boundary.division_ids.unique + column: division_ids + check: unique + severity: error + - name: division_boundary.country.pattern + column: country + check: pattern + value: ^[A-Z]{2}$ + severity: error + - name: division_boundary.region.pattern + column: region + check: pattern + value: ^[A-Z]{2}-[A-Z0-9]{1,3}$ + severity: error + - name: division_boundary.admin_level.range + column: admin_level + check: between + value: + - 0 + - 16 + severity: error + - name: division_boundary.perspectives.mode.not_null + column: perspectives.mode + check: not_null + when: + column: perspectives + check: not_null + severity: error + - name: division_boundary.perspectives.mode.valid + column: perspectives.mode + check: in + value: + - accepted_by + - disputed_by + severity: error + - name: division_boundary.perspectives.countries.not_null + column: perspectives.countries + check: not_null + when: + column: perspectives + check: not_null + severity: error + - name: division_boundary.perspectives.countries.min_list_length + column: perspectives.countries + check: min_list_length + value: 1 + severity: error + - name: division_boundary.perspectives.countries.pattern + column: perspectives.countries + check: pattern + value: ^[A-Z]{2}$ + list_columns: + - perspectives.countries + severity: error + - name: division_boundary.perspectives.countries.unique + column: perspectives.countries + check: unique + severity: error + - name: division_boundary.exactly_one_of + columns: + - is_land + - is_territorial + check: exactly_one_of + severity: error + - name: division_boundary.admin_level.required_when + column: admin_level + check: not_null + when: + column: subtype + check: eq + value: county + severity: error + - name: division_boundary.admin_level.required_when + column: admin_level + check: not_null + when: + column: subtype + check: eq + value: macrocounty + severity: error + - name: division_boundary.admin_level.required_when + column: admin_level + check: not_null + when: + column: subtype + check: eq + value: region + severity: error + - name: division_boundary.admin_level.required_when + column: admin_level + check: not_null + when: + column: subtype + check: eq + value: macroregion + severity: error + - name: division_boundary.admin_level.required_when + column: admin_level + check: not_null + when: + column: subtype + check: eq + value: dependency + severity: error + - name: division_boundary.admin_level.required_when + column: admin_level + check: not_null + when: + column: subtype + check: eq + value: country + severity: error + - name: division_boundary.country.required_when + column: country + check: not_null + when: + column: subtype + check: neq + value: country + severity: error + - name: division_boundary.country.forbidden_when + column: country + check: is_null + when: + column: subtype + check: eq + value: country + severity: error diff --git a/packages/overture-schema-codegen/tests/golden/wassirman/infrastructure.yaml b/packages/overture-schema-codegen/tests/golden/wassirman/infrastructure.yaml new file mode 100644 index 000000000..49f61674f --- /dev/null +++ b/packages/overture-schema-codegen/tests/golden/wassirman/infrastructure.yaml @@ -0,0 +1,442 @@ +version: '1' +datasets: +- name: infrastructure + source_model: overture.schema.base.infrastructure.Infrastructure + id_column: id + rules: + - name: infrastructure.id.not_null + column: id + check: not_null + severity: error + - name: infrastructure.id.min_length + column: id + check: min_length + value: 1 + severity: error + - name: infrastructure.id.pattern + column: id + check: pattern + value: ^\S+$ + severity: error + - name: infrastructure.geometry.not_null + column: geometry + check: not_null + severity: error + - name: infrastructure.geometry.type + column: geometry + check: geometry_type + value: + - LineString + - MultiPolygon + - Point + - Polygon + severity: error + - name: infrastructure.version.not_null + column: version + check: not_null + severity: error + - name: infrastructure.version.gte + column: version + check: gte + value: 0 + severity: error + - name: infrastructure.sources.min_list_length + column: sources + check: min_list_length + value: 1 + severity: error + - name: infrastructure.sources.unique + column: sources + check: unique + severity: error + - name: infrastructure.sources.property.not_null + column: sources.property + check: not_null + list_columns: &id001 + - sources + when: + column: sources + check: not_null + severity: error + - name: infrastructure.sources.dataset.not_null + column: sources.dataset + check: not_null + list_columns: *id001 + when: + column: sources + check: not_null + severity: error + - name: infrastructure.sources.license.pattern + column: sources.license + check: pattern + value: ^(\S.*)?\S$ + list_columns: + - sources + severity: error + - name: infrastructure.sources.confidence.range + column: sources.confidence + check: between + value: + - 0.0 + - 1.0 + list_columns: + - sources + severity: error + - name: infrastructure.class.not_null + column: class + check: not_null + severity: error + - name: infrastructure.class.valid + column: class + check: in + value: + - aerialway_station + - airport + - airport_gate + - airstrip + - apron + - aqueduct + - artwork + - atm + - barrier + - bell_tower + - bench + - bicycle_parking + - bicycle_rental + - block + - boardwalk + - bollard + - border_control + - breakwater + - bridge + - bridge_support + - bump_gate + - bus_route + - bus_station + - bus_stop + - bus_trap + - cable + - cable_barrier + - cable_car + - cable_distribution + - camp_site + - cantilever + - catenary_mast + - cattle_grid + - chain + - chair_lift + - charging_station + - city_wall + - communication_line + - communication_pole + - communication_tower + - connection + - cooling + - covered + - crossing + - cutline + - cycle_barrier + - dam + - defensive + - ditch + - diving + - drag_lift + - drain + - drinking_water + - entrance + - fence + - ferry_terminal + - fire_hydrant + - fountain + - full-height_turnstile + - gasometer + - gate + - generator + - give_way + - gondola + - goods + - guard_rail + - hampshire_gate + - handrail + - hedge + - height_restrictor + - heliostat + - helipad + - heliport + - hose + - information + - insulator + - international_airport + - j-bar + - jersey_barrier + - kerb + - kissing_gate + - launchpad + - lift_gate + - lighting + - lightning_protection + - magic_carpet + - manhole + - milestone + - military_airport + - minaret + - minor_line + - mixed_lift + - mobile_phone_tower + - monitoring + - motorcycle_parking + - motorway_junction + - movable + - municipal_airport + - observation + - parking + - parking_entrance + - parking_space + - pedestrian_crossing + - picnic_table + - pier + - pipeline + - plant + - planter + - platform + - platter + - portal + - post_box + - power_line + - power_pole + - power_tower + - private_airport + - pylon + - quay + - radar + - railway_halt + - railway_station + - recycling + - regional_airport + - reservoir_covered + - retaining_wall + - roller_coaster + - rope_tow + - runway + - sally_port + - seaplane_airport + - sewer + - silo + - siren + - stile + - stop + - stop_position + - stopway + - storage_tank + - street_cabinet + - street_lamp + - substation + - subway_station + - swing_gate + - switch + - t-bar + - taxilane + - taxiway + - terminal + - toilets + - toll_booth + - traffic_signals + - transformer + - trestle + - utility_pole + - vending_machine + - viaduct + - viewpoint + - wall + - waste_basket + - waste_disposal + - watchtower + - water_tower + - weir + - zip_line + severity: error + - name: infrastructure.subtype.not_null + column: subtype + check: not_null + severity: error + - name: infrastructure.subtype.valid + column: subtype + check: in + value: + - aerialway + - airport + - barrier + - bridge + - communication + - emergency + - manhole + - pedestrian + - pier + - power + - quay + - recreation + - tower + - transit + - transportation + - utility + - waste_management + - water + severity: error + - name: infrastructure.height.positive + column: height + check: gt + value: 0 + severity: error + - name: infrastructure.surface.valid + column: surface + check: in + value: + - asphalt + - cobblestone + - compacted + - concrete + - concrete_plates + - dirt + - earth + - fine_gravel + - grass + - gravel + - ground + - paved + - paving_stones + - pebblestone + - recreation_grass + - recreation_paved + - recreation_sand + - rubber + - sand + - sett + - tartan + - unpaved + - wood + - woodchips + severity: error + - name: infrastructure.names.primary.not_null + column: names.primary + check: not_null + when: + column: names + check: not_null + severity: error + - name: infrastructure.names.primary.min_length + column: names.primary + check: min_length + value: 1 + severity: error + - name: infrastructure.names.primary.pattern + column: names.primary + check: pattern + value: ^(\S.*)?\S$ + severity: error + - name: infrastructure.names.rules.value.not_null + column: names.rules.value + check: not_null + list_columns: &id003 + - names.rules + when: + column: names.rules + check: not_null + severity: error + - name: infrastructure.names.rules.value.min_length + column: names.rules.value + check: min_length + value: 1 + list_columns: &id002 + - names.rules + severity: error + - name: infrastructure.names.rules.value.pattern + column: names.rules.value + check: pattern + value: ^(\S.*)?\S$ + list_columns: *id002 + severity: error + - name: infrastructure.names.rules.variant.not_null + column: names.rules.variant + check: not_null + list_columns: *id003 + when: + column: names.rules + check: not_null + severity: error + - name: infrastructure.names.rules.variant.valid + column: names.rules.variant + check: in + value: + - alternate + - common + - official + - short + list_columns: + - names.rules + severity: error + - name: infrastructure.names.rules.language.pattern + column: names.rules.language + check: pattern + value: ^(?:(?:[A-Za-z]{2,3}(?:-[A-Za-z]{3}){0,3}?)|(?:[A-Za-z]{4,8}))(?:-[A-Za-z]{4})?(?:-[A-Za-z]{2}|[0-9]{3})?(?:-(?:[A-Za-z0-9]{5,8}|[0-9][A-Za-z0-9]{3}))*(?:-[A-WY-Za-wy-z0-9](?:-[A-Za-z0-9]{2,8})+)*$ + list_columns: + - names.rules + severity: error + - name: infrastructure.names.rules.perspectives.mode.not_null + column: names.rules.perspectives.mode + check: not_null + list_columns: &id004 + - names.rules + when: + column: names.rules.perspectives + check: not_null + severity: error + - name: infrastructure.names.rules.perspectives.mode.valid + column: names.rules.perspectives.mode + check: in + value: + - accepted_by + - disputed_by + list_columns: + - names.rules + severity: error + - name: infrastructure.names.rules.perspectives.countries.not_null + column: names.rules.perspectives.countries + check: not_null + list_columns: *id004 + when: + column: names.rules.perspectives + check: not_null + severity: error + - name: infrastructure.names.rules.perspectives.countries.min_list_length + column: names.rules.perspectives.countries + check: min_list_length + value: 1 + list_columns: *id004 + severity: error + - name: infrastructure.names.rules.perspectives.countries.pattern + column: names.rules.perspectives.countries + check: pattern + value: ^[A-Z]{2}$ + list_columns: + - names.rules + - names.rules.perspectives.countries + severity: error + - name: infrastructure.names.rules.perspectives.countries.unique + column: names.rules.perspectives.countries + check: unique + list_columns: *id004 + severity: error + - name: infrastructure.names.rules.side.valid + column: names.rules.side + check: in + value: + - left + - right + list_columns: + - names.rules + severity: error + - name: infrastructure.wikidata.pattern + column: wikidata + check: pattern + value: ^Q\d+$ + severity: error diff --git a/packages/overture-schema-codegen/tests/golden/wassirman/land.yaml b/packages/overture-schema-codegen/tests/golden/wassirman/land.yaml new file mode 100644 index 000000000..c021bc18d --- /dev/null +++ b/packages/overture-schema-codegen/tests/golden/wassirman/land.yaml @@ -0,0 +1,307 @@ +version: '1' +datasets: +- name: land + source_model: overture.schema.base.land.Land + id_column: id + rules: + - name: land.id.not_null + column: id + check: not_null + severity: error + - name: land.id.min_length + column: id + check: min_length + value: 1 + severity: error + - name: land.id.pattern + column: id + check: pattern + value: ^\S+$ + severity: error + - name: land.geometry.not_null + column: geometry + check: not_null + severity: error + - name: land.geometry.type + column: geometry + check: geometry_type + value: + - LineString + - MultiPolygon + - Point + - Polygon + severity: error + - name: land.version.not_null + column: version + check: not_null + severity: error + - name: land.version.gte + column: version + check: gte + value: 0 + severity: error + - name: land.sources.min_list_length + column: sources + check: min_list_length + value: 1 + severity: error + - name: land.sources.unique + column: sources + check: unique + severity: error + - name: land.sources.property.not_null + column: sources.property + check: not_null + list_columns: &id001 + - sources + when: + column: sources + check: not_null + severity: error + - name: land.sources.dataset.not_null + column: sources.dataset + check: not_null + list_columns: *id001 + when: + column: sources + check: not_null + severity: error + - name: land.sources.license.pattern + column: sources.license + check: pattern + value: ^(\S.*)?\S$ + list_columns: + - sources + severity: error + - name: land.sources.confidence.range + column: sources.confidence + check: between + value: + - 0.0 + - 1.0 + list_columns: + - sources + severity: error + - name: land.class.valid + column: class + check: in + value: + - archipelago + - bare_rock + - beach + - cave_entrance + - cliff + - desert + - dune + - fell + - forest + - glacier + - grass + - grassland + - heath + - hill + - island + - islet + - land + - meadow + - meteor_crater + - mountain_range + - peak + - peninsula + - plateau + - reef + - ridge + - rock + - saddle + - sand + - scree + - scrub + - shingle + - shrub + - shrubbery + - stone + - tree + - tree_row + - tundra + - valley + - volcanic_caldera_rim + - volcano + - wetland + - wood + severity: error + - name: land.subtype.valid + column: subtype + check: in + value: + - crater + - desert + - forest + - glacier + - grass + - land + - physical + - reef + - rock + - sand + - shrub + - tree + - wetland + severity: error + - name: land.elevation.lte + column: elevation + check: lte + value: 9000 + severity: error + - name: land.surface.valid + column: surface + check: in + value: + - asphalt + - cobblestone + - compacted + - concrete + - concrete_plates + - dirt + - earth + - fine_gravel + - grass + - gravel + - ground + - paved + - paving_stones + - pebblestone + - recreation_grass + - recreation_paved + - recreation_sand + - rubber + - sand + - sett + - tartan + - unpaved + - wood + - woodchips + severity: error + - name: land.names.primary.not_null + column: names.primary + check: not_null + when: + column: names + check: not_null + severity: error + - name: land.names.primary.min_length + column: names.primary + check: min_length + value: 1 + severity: error + - name: land.names.primary.pattern + column: names.primary + check: pattern + value: ^(\S.*)?\S$ + severity: error + - name: land.names.rules.value.not_null + column: names.rules.value + check: not_null + list_columns: &id003 + - names.rules + when: + column: names.rules + check: not_null + severity: error + - name: land.names.rules.value.min_length + column: names.rules.value + check: min_length + value: 1 + list_columns: &id002 + - names.rules + severity: error + - name: land.names.rules.value.pattern + column: names.rules.value + check: pattern + value: ^(\S.*)?\S$ + list_columns: *id002 + severity: error + - name: land.names.rules.variant.not_null + column: names.rules.variant + check: not_null + list_columns: *id003 + when: + column: names.rules + check: not_null + severity: error + - name: land.names.rules.variant.valid + column: names.rules.variant + check: in + value: + - alternate + - common + - official + - short + list_columns: + - names.rules + severity: error + - name: land.names.rules.language.pattern + column: names.rules.language + check: pattern + value: ^(?:(?:[A-Za-z]{2,3}(?:-[A-Za-z]{3}){0,3}?)|(?:[A-Za-z]{4,8}))(?:-[A-Za-z]{4})?(?:-[A-Za-z]{2}|[0-9]{3})?(?:-(?:[A-Za-z0-9]{5,8}|[0-9][A-Za-z0-9]{3}))*(?:-[A-WY-Za-wy-z0-9](?:-[A-Za-z0-9]{2,8})+)*$ + list_columns: + - names.rules + severity: error + - name: land.names.rules.perspectives.mode.not_null + column: names.rules.perspectives.mode + check: not_null + list_columns: &id004 + - names.rules + when: + column: names.rules.perspectives + check: not_null + severity: error + - name: land.names.rules.perspectives.mode.valid + column: names.rules.perspectives.mode + check: in + value: + - accepted_by + - disputed_by + list_columns: + - names.rules + severity: error + - name: land.names.rules.perspectives.countries.not_null + column: names.rules.perspectives.countries + check: not_null + list_columns: *id004 + when: + column: names.rules.perspectives + check: not_null + severity: error + - name: land.names.rules.perspectives.countries.min_list_length + column: names.rules.perspectives.countries + check: min_list_length + value: 1 + list_columns: *id004 + severity: error + - name: land.names.rules.perspectives.countries.pattern + column: names.rules.perspectives.countries + check: pattern + value: ^[A-Z]{2}$ + list_columns: + - names.rules + - names.rules.perspectives.countries + severity: error + - name: land.names.rules.perspectives.countries.unique + column: names.rules.perspectives.countries + check: unique + list_columns: *id004 + severity: error + - name: land.names.rules.side.valid + column: names.rules.side + check: in + value: + - left + - right + list_columns: + - names.rules + severity: error + - name: land.wikidata.pattern + column: wikidata + check: pattern + value: ^Q\d+$ + severity: error diff --git a/packages/overture-schema-codegen/tests/golden/wassirman/landcover.yaml b/packages/overture-schema-codegen/tests/golden/wassirman/landcover.yaml new file mode 100644 index 000000000..b5ba426f4 --- /dev/null +++ b/packages/overture-schema-codegen/tests/golden/wassirman/landcover.yaml @@ -0,0 +1,122 @@ +version: '1' +datasets: +- name: land_cover + source_model: overture.schema.base.land_cover.LandCover + id_column: id + rules: + - name: land_cover.id.not_null + column: id + check: not_null + severity: error + - name: land_cover.id.min_length + column: id + check: min_length + value: 1 + severity: error + - name: land_cover.id.pattern + column: id + check: pattern + value: ^\S+$ + severity: error + - name: land_cover.geometry.not_null + column: geometry + check: not_null + severity: error + - name: land_cover.geometry.type + column: geometry + check: geometry_type + value: + - MultiPolygon + - Polygon + severity: error + - name: land_cover.version.not_null + column: version + check: not_null + severity: error + - name: land_cover.version.gte + column: version + check: gte + value: 0 + severity: error + - name: land_cover.sources.min_list_length + column: sources + check: min_list_length + value: 1 + severity: error + - name: land_cover.sources.unique + column: sources + check: unique + severity: error + - name: land_cover.sources.property.not_null + column: sources.property + check: not_null + list_columns: &id001 + - sources + when: + column: sources + check: not_null + severity: error + - name: land_cover.sources.dataset.not_null + column: sources.dataset + check: not_null + list_columns: *id001 + when: + column: sources + check: not_null + severity: error + - name: land_cover.sources.license.pattern + column: sources.license + check: pattern + value: ^(\S.*)?\S$ + list_columns: + - sources + severity: error + - name: land_cover.sources.confidence.range + column: sources.confidence + check: between + value: + - 0.0 + - 1.0 + list_columns: + - sources + severity: error + - name: land_cover.subtype.not_null + column: subtype + check: not_null + severity: error + - name: land_cover.subtype.valid + column: subtype + check: in + value: + - barren + - crop + - forest + - grass + - mangrove + - moss + - shrub + - snow + - urban + - wetland + severity: error + - name: land_cover.cartography.prominence.range + column: cartography.prominence + check: between + value: + - 1 + - 100 + severity: error + - name: land_cover.cartography.min_zoom.range + column: cartography.min_zoom + check: between + value: + - 0 + - 23 + severity: error + - name: land_cover.cartography.max_zoom.range + column: cartography.max_zoom + check: between + value: + - 0 + - 23 + severity: error diff --git a/packages/overture-schema-codegen/tests/golden/wassirman/landuse.yaml b/packages/overture-schema-codegen/tests/golden/wassirman/landuse.yaml new file mode 100644 index 000000000..5450b7710 --- /dev/null +++ b/packages/overture-schema-codegen/tests/golden/wassirman/landuse.yaml @@ -0,0 +1,393 @@ +version: '1' +datasets: +- name: land_use + source_model: overture.schema.base.land_use.LandUse + id_column: id + rules: + - name: land_use.id.not_null + column: id + check: not_null + severity: error + - name: land_use.id.min_length + column: id + check: min_length + value: 1 + severity: error + - name: land_use.id.pattern + column: id + check: pattern + value: ^\S+$ + severity: error + - name: land_use.geometry.not_null + column: geometry + check: not_null + severity: error + - name: land_use.geometry.type + column: geometry + check: geometry_type + value: + - LineString + - MultiPolygon + - Point + - Polygon + severity: error + - name: land_use.version.not_null + column: version + check: not_null + severity: error + - name: land_use.version.gte + column: version + check: gte + value: 0 + severity: error + - name: land_use.sources.min_list_length + column: sources + check: min_list_length + value: 1 + severity: error + - name: land_use.sources.unique + column: sources + check: unique + severity: error + - name: land_use.sources.property.not_null + column: sources.property + check: not_null + list_columns: &id001 + - sources + when: + column: sources + check: not_null + severity: error + - name: land_use.sources.dataset.not_null + column: sources.dataset + check: not_null + list_columns: *id001 + when: + column: sources + check: not_null + severity: error + - name: land_use.sources.license.pattern + column: sources.license + check: pattern + value: ^(\S.*)?\S$ + list_columns: + - sources + severity: error + - name: land_use.sources.confidence.range + column: sources.confidence + check: between + value: + - 0.0 + - 1.0 + list_columns: + - sources + severity: error + - name: land_use.class.not_null + column: class + check: not_null + severity: error + - name: land_use.class.valid + column: class + check: in + value: + - aboriginal_land + - airfield + - allotments + - animal_keeping + - aquaculture + - barracks + - base + - beach_resort + - brownfield + - bunker + - camp_site + - cemetery + - clinic + - college + - commercial + - connection + - construction + - danger_area + - doctors + - dog_park + - downhill + - driving_range + - driving_school + - education + - environmental + - fairway + - farmland + - farmyard + - fatbike + - flowerbed + - forest + - garages + - garden + - golf_course + - grass + - grave_yard + - green + - greenfield + - greenhouse_horticulture + - highway + - hike + - hospital + - ice_skate + - industrial + - institutional + - kindergarten + - landfill + - lateral_water_hazard + - logging + - marina + - meadow + - military + - military_hospital + - military_school + - music_school + - national_park + - natural_monument + - nature_reserve + - naval_base + - nordic + - nuclear_explosion_site + - obstacle_course + - orchard + - park + - peat_cutting + - pedestrian + - pitch + - plant_nursery + - playground + - plaza + - protected + - protected_landscape_seascape + - quarry + - railway + - range + - recreation_ground + - religious + - residential + - resort + - retail + - rough + - salt_pond + - school + - schoolyard + - ski_jump + - skitour + - sled + - sleigh + - snow_park + - species_management_area + - stadium + - state_park + - static_caravan + - strict_nature_reserve + - tee + - theme_park + - track + - traffic_island + - training_area + - trench + - university + - village_green + - vineyard + - water_hazard + - water_park + - wilderness_area + - winter_sports + - works + - zoo + severity: error + - name: land_use.subtype.not_null + column: subtype + check: not_null + severity: error + - name: land_use.subtype.valid + column: subtype + check: in + value: + - agriculture + - aquaculture + - campground + - cemetery + - construction + - developed + - education + - entertainment + - golf + - grass + - horticulture + - landfill + - managed + - medical + - military + - park + - pedestrian + - protected + - recreation + - religious + - residential + - resource_extraction + - transportation + - winter_sports + severity: error + - name: land_use.elevation.lte + column: elevation + check: lte + value: 9000 + severity: error + - name: land_use.surface.valid + column: surface + check: in + value: + - asphalt + - cobblestone + - compacted + - concrete + - concrete_plates + - dirt + - earth + - fine_gravel + - grass + - gravel + - ground + - paved + - paving_stones + - pebblestone + - recreation_grass + - recreation_paved + - recreation_sand + - rubber + - sand + - sett + - tartan + - unpaved + - wood + - woodchips + severity: error + - name: land_use.names.primary.not_null + column: names.primary + check: not_null + when: + column: names + check: not_null + severity: error + - name: land_use.names.primary.min_length + column: names.primary + check: min_length + value: 1 + severity: error + - name: land_use.names.primary.pattern + column: names.primary + check: pattern + value: ^(\S.*)?\S$ + severity: error + - name: land_use.names.rules.value.not_null + column: names.rules.value + check: not_null + list_columns: &id003 + - names.rules + when: + column: names.rules + check: not_null + severity: error + - name: land_use.names.rules.value.min_length + column: names.rules.value + check: min_length + value: 1 + list_columns: &id002 + - names.rules + severity: error + - name: land_use.names.rules.value.pattern + column: names.rules.value + check: pattern + value: ^(\S.*)?\S$ + list_columns: *id002 + severity: error + - name: land_use.names.rules.variant.not_null + column: names.rules.variant + check: not_null + list_columns: *id003 + when: + column: names.rules + check: not_null + severity: error + - name: land_use.names.rules.variant.valid + column: names.rules.variant + check: in + value: + - alternate + - common + - official + - short + list_columns: + - names.rules + severity: error + - name: land_use.names.rules.language.pattern + column: names.rules.language + check: pattern + value: ^(?:(?:[A-Za-z]{2,3}(?:-[A-Za-z]{3}){0,3}?)|(?:[A-Za-z]{4,8}))(?:-[A-Za-z]{4})?(?:-[A-Za-z]{2}|[0-9]{3})?(?:-(?:[A-Za-z0-9]{5,8}|[0-9][A-Za-z0-9]{3}))*(?:-[A-WY-Za-wy-z0-9](?:-[A-Za-z0-9]{2,8})+)*$ + list_columns: + - names.rules + severity: error + - name: land_use.names.rules.perspectives.mode.not_null + column: names.rules.perspectives.mode + check: not_null + list_columns: &id004 + - names.rules + when: + column: names.rules.perspectives + check: not_null + severity: error + - name: land_use.names.rules.perspectives.mode.valid + column: names.rules.perspectives.mode + check: in + value: + - accepted_by + - disputed_by + list_columns: + - names.rules + severity: error + - name: land_use.names.rules.perspectives.countries.not_null + column: names.rules.perspectives.countries + check: not_null + list_columns: *id004 + when: + column: names.rules.perspectives + check: not_null + severity: error + - name: land_use.names.rules.perspectives.countries.min_list_length + column: names.rules.perspectives.countries + check: min_list_length + value: 1 + list_columns: *id004 + severity: error + - name: land_use.names.rules.perspectives.countries.pattern + column: names.rules.perspectives.countries + check: pattern + value: ^[A-Z]{2}$ + list_columns: + - names.rules + - names.rules.perspectives.countries + severity: error + - name: land_use.names.rules.perspectives.countries.unique + column: names.rules.perspectives.countries + check: unique + list_columns: *id004 + severity: error + - name: land_use.names.rules.side.valid + column: names.rules.side + check: in + value: + - left + - right + list_columns: + - names.rules + severity: error + - name: land_use.wikidata.pattern + column: wikidata + check: pattern + value: ^Q\d+$ + severity: error diff --git a/packages/overture-schema-codegen/tests/golden/wassirman/place.yaml b/packages/overture-schema-codegen/tests/golden/wassirman/place.yaml new file mode 100644 index 000000000..f6f274336 --- /dev/null +++ b/packages/overture-schema-codegen/tests/golden/wassirman/place.yaml @@ -0,0 +1,484 @@ +version: '1' +datasets: +- name: place + source_model: overture.schema.places.place.Place + id_column: id + rules: + - name: place.id.not_null + column: id + check: not_null + severity: error + - name: place.id.min_length + column: id + check: min_length + value: 1 + severity: error + - name: place.id.pattern + column: id + check: pattern + value: ^\S+$ + severity: error + - name: place.geometry.not_null + column: geometry + check: not_null + severity: error + - name: place.geometry.type + column: geometry + check: geometry_type + value: + - Point + severity: error + - name: place.version.not_null + column: version + check: not_null + severity: error + - name: place.version.gte + column: version + check: gte + value: 0 + severity: error + - name: place.sources.min_list_length + column: sources + check: min_list_length + value: 1 + severity: error + - name: place.sources.unique + column: sources + check: unique + severity: error + - name: place.sources.property.not_null + column: sources.property + check: not_null + list_columns: &id001 + - sources + when: + column: sources + check: not_null + severity: error + - name: place.sources.dataset.not_null + column: sources.dataset + check: not_null + list_columns: *id001 + when: + column: sources + check: not_null + severity: error + - name: place.sources.license.pattern + column: sources.license + check: pattern + value: ^(\S.*)?\S$ + list_columns: + - sources + severity: error + - name: place.sources.confidence.range + column: sources.confidence + check: between + value: + - 0.0 + - 1.0 + list_columns: + - sources + severity: error + - name: place.operating_status.not_null + column: operating_status + check: not_null + severity: error + - name: place.operating_status.valid + column: operating_status + check: in + value: + - open + - permanently_closed + - temporarily_closed + severity: error + - name: place.categories.primary.not_null + column: categories.primary + check: not_null + when: + column: categories + check: not_null + severity: error + - name: place.categories.primary.pattern + column: categories.primary + check: pattern + value: ^[a-z0-9]+(_[a-z0-9]+)*$ + severity: error + - name: place.categories.alternate.pattern + column: categories.alternate + check: pattern + value: ^[a-z0-9]+(_[a-z0-9]+)*$ + list_columns: + - categories.alternate + severity: error + - name: place.categories.alternate.unique + column: categories.alternate + check: unique + severity: error + - name: place.basic_category.pattern + column: basic_category + check: pattern + value: ^[a-z0-9]+(_[a-z0-9]+)*$ + severity: error + - name: place.taxonomy.primary.not_null + column: taxonomy.primary + check: not_null + when: + column: taxonomy + check: not_null + severity: error + - name: place.taxonomy.primary.pattern + column: taxonomy.primary + check: pattern + value: ^[a-z0-9]+(_[a-z0-9]+)*$ + severity: error + - name: place.taxonomy.hierarchy.not_null + column: taxonomy.hierarchy + check: not_null + when: + column: taxonomy + check: not_null + severity: error + - name: place.taxonomy.hierarchy.min_list_length + column: taxonomy.hierarchy + check: min_list_length + value: 1 + severity: error + - name: place.taxonomy.hierarchy.pattern + column: taxonomy.hierarchy + check: pattern + value: ^[a-z0-9]+(_[a-z0-9]+)*$ + list_columns: + - taxonomy.hierarchy + severity: error + - name: place.taxonomy.hierarchy.unique + column: taxonomy.hierarchy + check: unique + severity: error + - name: place.taxonomy.alternates.min_list_length + column: taxonomy.alternates + check: min_list_length + value: 1 + severity: error + - name: place.taxonomy.alternates.pattern + column: taxonomy.alternates + check: pattern + value: ^[a-z0-9]+(_[a-z0-9]+)*$ + list_columns: + - taxonomy.alternates + severity: error + - name: place.taxonomy.alternates.unique + column: taxonomy.alternates + check: unique + severity: error + - name: place.confidence.range + column: confidence + check: between + value: + - 0.0 + - 1.0 + severity: error + - name: place.websites.min_list_length + column: websites + check: min_list_length + value: 1 + severity: error + - name: place.websites.unique + column: websites + check: unique + severity: error + - name: place.socials.min_list_length + column: socials + check: min_list_length + value: 1 + severity: error + - name: place.socials.unique + column: socials + check: unique + severity: error + - name: place.emails.min_list_length + column: emails + check: min_list_length + value: 1 + severity: error + - name: place.emails.unique + column: emails + check: unique + severity: error + - name: place.phones.min_list_length + column: phones + check: min_list_length + value: 1 + severity: error + - name: place.phones.pattern + column: phones + check: pattern + value: ^\+\d{1,3}[\s\-\(\)0-9]+$ + list_columns: + - phones + severity: error + - name: place.phones.unique + column: phones + check: unique + severity: error + - name: place.brand.names.primary.not_null + column: brand.names.primary + check: not_null + when: + column: brand.names + check: not_null + severity: error + - name: place.brand.names.primary.min_length + column: brand.names.primary + check: min_length + value: 1 + severity: error + - name: place.brand.names.primary.pattern + column: brand.names.primary + check: pattern + value: ^(\S.*)?\S$ + severity: error + - name: place.brand.names.rules.value.not_null + column: brand.names.rules.value + check: not_null + list_columns: &id003 + - brand.names.rules + when: + column: brand.names.rules + check: not_null + severity: error + - name: place.brand.names.rules.value.min_length + column: brand.names.rules.value + check: min_length + value: 1 + list_columns: &id002 + - brand.names.rules + severity: error + - name: place.brand.names.rules.value.pattern + column: brand.names.rules.value + check: pattern + value: ^(\S.*)?\S$ + list_columns: *id002 + severity: error + - name: place.brand.names.rules.variant.not_null + column: brand.names.rules.variant + check: not_null + list_columns: *id003 + when: + column: brand.names.rules + check: not_null + severity: error + - name: place.brand.names.rules.variant.valid + column: brand.names.rules.variant + check: in + value: + - alternate + - common + - official + - short + list_columns: + - brand.names.rules + severity: error + - name: place.brand.names.rules.language.pattern + column: brand.names.rules.language + check: pattern + value: ^(?:(?:[A-Za-z]{2,3}(?:-[A-Za-z]{3}){0,3}?)|(?:[A-Za-z]{4,8}))(?:-[A-Za-z]{4})?(?:-[A-Za-z]{2}|[0-9]{3})?(?:-(?:[A-Za-z0-9]{5,8}|[0-9][A-Za-z0-9]{3}))*(?:-[A-WY-Za-wy-z0-9](?:-[A-Za-z0-9]{2,8})+)*$ + list_columns: + - brand.names.rules + severity: error + - name: place.brand.names.rules.perspectives.mode.not_null + column: brand.names.rules.perspectives.mode + check: not_null + list_columns: &id004 + - brand.names.rules + when: + column: brand.names.rules.perspectives + check: not_null + severity: error + - name: place.brand.names.rules.perspectives.mode.valid + column: brand.names.rules.perspectives.mode + check: in + value: + - accepted_by + - disputed_by + list_columns: + - brand.names.rules + severity: error + - name: place.brand.names.rules.perspectives.countries.not_null + column: brand.names.rules.perspectives.countries + check: not_null + list_columns: *id004 + when: + column: brand.names.rules.perspectives + check: not_null + severity: error + - name: place.brand.names.rules.perspectives.countries.min_list_length + column: brand.names.rules.perspectives.countries + check: min_list_length + value: 1 + list_columns: *id004 + severity: error + - name: place.brand.names.rules.perspectives.countries.pattern + column: brand.names.rules.perspectives.countries + check: pattern + value: ^[A-Z]{2}$ + list_columns: + - brand.names.rules + - brand.names.rules.perspectives.countries + severity: error + - name: place.brand.names.rules.perspectives.countries.unique + column: brand.names.rules.perspectives.countries + check: unique + list_columns: *id004 + severity: error + - name: place.brand.names.rules.side.valid + column: brand.names.rules.side + check: in + value: + - left + - right + list_columns: + - brand.names.rules + severity: error + - name: place.brand.wikidata.pattern + column: brand.wikidata + check: pattern + value: ^Q\d+$ + severity: error + - name: place.addresses.min_list_length + column: addresses + check: min_list_length + value: 1 + severity: error + - name: place.addresses.region.pattern + column: addresses.region + check: pattern + value: ^[A-Z]{2}-[A-Z0-9]{1,3}$ + list_columns: + - addresses + severity: error + - name: place.addresses.country.pattern + column: addresses.country + check: pattern + value: ^[A-Z]{2}$ + list_columns: + - addresses + severity: error + - name: place.names.primary.not_null + column: names.primary + check: not_null + when: + column: names + check: not_null + severity: error + - name: place.names.primary.min_length + column: names.primary + check: min_length + value: 1 + severity: error + - name: place.names.primary.pattern + column: names.primary + check: pattern + value: ^(\S.*)?\S$ + severity: error + - name: place.names.rules.value.not_null + column: names.rules.value + check: not_null + list_columns: &id006 + - names.rules + when: + column: names.rules + check: not_null + severity: error + - name: place.names.rules.value.min_length + column: names.rules.value + check: min_length + value: 1 + list_columns: &id005 + - names.rules + severity: error + - name: place.names.rules.value.pattern + column: names.rules.value + check: pattern + value: ^(\S.*)?\S$ + list_columns: *id005 + severity: error + - name: place.names.rules.variant.not_null + column: names.rules.variant + check: not_null + list_columns: *id006 + when: + column: names.rules + check: not_null + severity: error + - name: place.names.rules.variant.valid + column: names.rules.variant + check: in + value: + - alternate + - common + - official + - short + list_columns: + - names.rules + severity: error + - name: place.names.rules.language.pattern + column: names.rules.language + check: pattern + value: ^(?:(?:[A-Za-z]{2,3}(?:-[A-Za-z]{3}){0,3}?)|(?:[A-Za-z]{4,8}))(?:-[A-Za-z]{4})?(?:-[A-Za-z]{2}|[0-9]{3})?(?:-(?:[A-Za-z0-9]{5,8}|[0-9][A-Za-z0-9]{3}))*(?:-[A-WY-Za-wy-z0-9](?:-[A-Za-z0-9]{2,8})+)*$ + list_columns: + - names.rules + severity: error + - name: place.names.rules.perspectives.mode.not_null + column: names.rules.perspectives.mode + check: not_null + list_columns: &id007 + - names.rules + when: + column: names.rules.perspectives + check: not_null + severity: error + - name: place.names.rules.perspectives.mode.valid + column: names.rules.perspectives.mode + check: in + value: + - accepted_by + - disputed_by + list_columns: + - names.rules + severity: error + - name: place.names.rules.perspectives.countries.not_null + column: names.rules.perspectives.countries + check: not_null + list_columns: *id007 + when: + column: names.rules.perspectives + check: not_null + severity: error + - name: place.names.rules.perspectives.countries.min_list_length + column: names.rules.perspectives.countries + check: min_list_length + value: 1 + list_columns: *id007 + severity: error + - name: place.names.rules.perspectives.countries.pattern + column: names.rules.perspectives.countries + check: pattern + value: ^[A-Z]{2}$ + list_columns: + - names.rules + - names.rules.perspectives.countries + severity: error + - name: place.names.rules.perspectives.countries.unique + column: names.rules.perspectives.countries + check: unique + list_columns: *id007 + severity: error + - name: place.names.rules.side.valid + column: names.rules.side + check: in + value: + - left + - right + list_columns: + - names.rules + severity: error diff --git a/packages/overture-schema-codegen/tests/golden/wassirman/segment.yaml b/packages/overture-schema-codegen/tests/golden/wassirman/segment.yaml new file mode 100644 index 000000000..7f4314d73 --- /dev/null +++ b/packages/overture-schema-codegen/tests/golden/wassirman/segment.yaml @@ -0,0 +1,1198 @@ +version: '1' +datasets: +- name: segment + source_model: Segment + id_column: id + rules: + - name: segment.id.not_null + column: id + check: not_null + severity: error + - name: segment.id.min_length + column: id + check: min_length + value: 1 + severity: error + - name: segment.id.pattern + column: id + check: pattern + value: ^\S+$ + severity: error + - name: segment.geometry.not_null + column: geometry + check: not_null + severity: error + - name: segment.geometry.type + column: geometry + check: geometry_type + value: + - LineString + severity: error + - name: segment.version.not_null + column: version + check: not_null + severity: error + - name: segment.version.gte + column: version + check: gte + value: 0 + severity: error + - name: segment.sources.min_list_length + column: sources + check: min_list_length + value: 1 + severity: error + - name: segment.sources.unique + column: sources + check: unique + severity: error + - name: segment.sources.property.not_null + column: sources.property + check: not_null + list_columns: &id001 + - sources + when: + column: sources + check: not_null + severity: error + - name: segment.sources.dataset.not_null + column: sources.dataset + check: not_null + list_columns: *id001 + when: + column: sources + check: not_null + severity: error + - name: segment.sources.license.pattern + column: sources.license + check: pattern + value: ^(\S.*)?\S$ + list_columns: + - sources + severity: error + - name: segment.sources.confidence.range + column: sources.confidence + check: between + value: + - 0.0 + - 1.0 + list_columns: + - sources + severity: error + - name: segment.subtype.not_null + column: subtype + check: not_null + severity: error + - name: segment.subtype.valid + column: subtype + check: in + value: + - rail + - road + - water + severity: error + - name: segment.access_restrictions.min_list_length + column: access_restrictions + check: min_list_length + value: 1 + severity: error + - name: segment.access_restrictions.unique + column: access_restrictions + check: unique + severity: error + - name: segment.access_restrictions.access_type.not_null + column: access_restrictions.access_type + check: not_null + list_columns: + - access_restrictions + when: + column: access_restrictions + check: not_null + severity: error + - name: segment.access_restrictions.access_type.valid + column: access_restrictions.access_type + check: in + value: + - allowed + - denied + - designated + list_columns: + - access_restrictions + severity: error + - name: segment.access_restrictions.when.heading.valid + column: access_restrictions.when.heading + check: in + value: + - backward + - forward + list_columns: + - access_restrictions + severity: error + - name: segment.access_restrictions.when.mode.min_list_length + column: access_restrictions.when.mode + check: min_list_length + value: 1 + list_columns: &id002 + - access_restrictions + severity: error + - name: segment.access_restrictions.when.mode.valid + column: access_restrictions.when.mode + check: in + value: + - bicycle + - bus + - car + - emergency + - foot + - hgv + - hov + - motor_vehicle + - motorcycle + - truck + - vehicle + list_columns: + - access_restrictions + - access_restrictions.when.mode + severity: error + - name: segment.access_restrictions.when.mode.unique + column: access_restrictions.when.mode + check: unique + list_columns: *id002 + severity: error + - name: segment.access_restrictions.when.using.min_list_length + column: access_restrictions.when.using + check: min_list_length + value: 1 + list_columns: *id002 + severity: error + - name: segment.access_restrictions.when.using.valid + column: access_restrictions.when.using + check: in + value: + - as_customer + - at_destination + - for_forestry + - to_deliver + - to_farm + list_columns: + - access_restrictions + - access_restrictions.when.using + severity: error + - name: segment.access_restrictions.when.using.unique + column: access_restrictions.when.using + check: unique + list_columns: *id002 + severity: error + - name: segment.access_restrictions.when.recognized.min_list_length + column: access_restrictions.when.recognized + check: min_list_length + value: 1 + list_columns: *id002 + severity: error + - name: segment.access_restrictions.when.recognized.valid + column: access_restrictions.when.recognized + check: in + value: + - as_disabled + - as_employee + - as_permitted + - as_private + - as_student + list_columns: + - access_restrictions + - access_restrictions.when.recognized + severity: error + - name: segment.access_restrictions.when.recognized.unique + column: access_restrictions.when.recognized + check: unique + list_columns: *id002 + severity: error + - name: segment.access_restrictions.when.vehicle.min_list_length + column: access_restrictions.when.vehicle + check: min_list_length + value: 1 + list_columns: *id002 + severity: error + - name: segment.access_restrictions.when.vehicle.unique + column: access_restrictions.when.vehicle + check: unique + list_columns: *id002 + severity: error + - name: segment.connectors.min_list_length + column: connectors + check: min_list_length + value: 2 + severity: error + - name: segment.connectors.unique + column: connectors + check: unique + severity: error + - name: segment.connectors.connector_id.not_null + column: connectors.connector_id + check: not_null + list_columns: + - connectors + when: + column: connectors + check: not_null + severity: error + - name: segment.connectors.connector_id.min_length + column: connectors.connector_id + check: min_length + value: 1 + list_columns: &id003 + - connectors + severity: error + - name: segment.connectors.connector_id.pattern + column: connectors.connector_id + check: pattern + value: ^\S+$ + list_columns: *id003 + severity: error + - name: segment.connectors.at.range + column: connectors.at + check: between + value: + - 0.0 + - 1.0 + list_columns: + - connectors + severity: error + - name: segment.level_rules.value.not_null + column: level_rules.value + check: not_null + list_columns: + - level_rules + when: + column: level_rules + check: not_null + severity: error + - name: segment.routes.name.min_length + column: routes.name + check: min_length + value: 1 + list_columns: &id004 + - routes + severity: error + - name: segment.routes.name.pattern + column: routes.name + check: pattern + value: ^(\S.*)?\S$ + list_columns: *id004 + severity: error + - name: segment.routes.network.min_length + column: routes.network + check: min_length + value: 1 + list_columns: &id005 + - routes + severity: error + - name: segment.routes.network.pattern + column: routes.network + check: pattern + value: ^(\S.*)?\S$ + list_columns: *id005 + severity: error + - name: segment.routes.ref.min_length + column: routes.ref + check: min_length + value: 1 + list_columns: &id006 + - routes + severity: error + - name: segment.routes.ref.pattern + column: routes.ref + check: pattern + value: ^(\S.*)?\S$ + list_columns: *id006 + severity: error + - name: segment.routes.symbol.min_length + column: routes.symbol + check: min_length + value: 1 + list_columns: &id007 + - routes + severity: error + - name: segment.routes.symbol.pattern + column: routes.symbol + check: pattern + value: ^(\S.*)?\S$ + list_columns: *id007 + severity: error + - name: segment.routes.wikidata.pattern + column: routes.wikidata + check: pattern + value: ^Q\d+$ + list_columns: + - routes + severity: error + - name: segment.subclass_rules.value.not_null + column: subclass_rules.value + check: not_null + list_columns: + - subclass_rules + when: + column: subclass_rules + check: not_null + severity: error + - name: segment.subclass_rules.value.valid + column: subclass_rules.value + check: in + value: + - alley + - crosswalk + - cycle_crossing + - driveway + - link + - parking_aisle + - sidewalk + list_columns: + - subclass_rules + severity: error + - name: segment.names.primary.not_null + column: names.primary + check: not_null + when: + column: names + check: not_null + severity: error + - name: segment.names.primary.min_length + column: names.primary + check: min_length + value: 1 + severity: error + - name: segment.names.primary.pattern + column: names.primary + check: pattern + value: ^(\S.*)?\S$ + severity: error + - name: segment.names.rules.value.not_null + column: names.rules.value + check: not_null + list_columns: &id009 + - names.rules + when: + column: names.rules + check: not_null + severity: error + - name: segment.names.rules.value.min_length + column: names.rules.value + check: min_length + value: 1 + list_columns: &id008 + - names.rules + severity: error + - name: segment.names.rules.value.pattern + column: names.rules.value + check: pattern + value: ^(\S.*)?\S$ + list_columns: *id008 + severity: error + - name: segment.names.rules.variant.not_null + column: names.rules.variant + check: not_null + list_columns: *id009 + when: + column: names.rules + check: not_null + severity: error + - name: segment.names.rules.variant.valid + column: names.rules.variant + check: in + value: + - alternate + - common + - official + - short + list_columns: + - names.rules + severity: error + - name: segment.names.rules.language.pattern + column: names.rules.language + check: pattern + value: ^(?:(?:[A-Za-z]{2,3}(?:-[A-Za-z]{3}){0,3}?)|(?:[A-Za-z]{4,8}))(?:-[A-Za-z]{4})?(?:-[A-Za-z]{2}|[0-9]{3})?(?:-(?:[A-Za-z0-9]{5,8}|[0-9][A-Za-z0-9]{3}))*(?:-[A-WY-Za-wy-z0-9](?:-[A-Za-z0-9]{2,8})+)*$ + list_columns: + - names.rules + severity: error + - name: segment.names.rules.perspectives.mode.not_null + column: names.rules.perspectives.mode + check: not_null + list_columns: &id010 + - names.rules + when: + column: names.rules.perspectives + check: not_null + severity: error + - name: segment.names.rules.perspectives.mode.valid + column: names.rules.perspectives.mode + check: in + value: + - accepted_by + - disputed_by + list_columns: + - names.rules + severity: error + - name: segment.names.rules.perspectives.countries.not_null + column: names.rules.perspectives.countries + check: not_null + list_columns: *id010 + when: + column: names.rules.perspectives + check: not_null + severity: error + - name: segment.names.rules.perspectives.countries.min_list_length + column: names.rules.perspectives.countries + check: min_list_length + value: 1 + list_columns: *id010 + severity: error + - name: segment.names.rules.perspectives.countries.pattern + column: names.rules.perspectives.countries + check: pattern + value: ^[A-Z]{2}$ + list_columns: + - names.rules + - names.rules.perspectives.countries + severity: error + - name: segment.names.rules.perspectives.countries.unique + column: names.rules.perspectives.countries + check: unique + list_columns: *id010 + severity: error + - name: segment.names.rules.side.valid + column: names.rules.side + check: in + value: + - left + - right + list_columns: + - names.rules + severity: error + - name: segment.class.not_null + column: class + check: not_null + severity: error + - name: segment.class.valid + column: class + check: in + value: + - bridleway + - cycleway + - footway + - living_street + - motorway + - path + - pedestrian + - primary + - residential + - secondary + - service + - steps + - tertiary + - track + - trunk + - unclassified + - unknown + severity: error + - name: segment.destinations.from_connector_id.not_null + column: destinations.from_connector_id + check: not_null + list_columns: &id012 + - destinations + when: + column: destinations + check: not_null + severity: error + - name: segment.destinations.from_connector_id.min_length + column: destinations.from_connector_id + check: min_length + value: 1 + list_columns: &id011 + - destinations + severity: error + - name: segment.destinations.from_connector_id.pattern + column: destinations.from_connector_id + check: pattern + value: ^\S+$ + list_columns: *id011 + severity: error + - name: segment.destinations.to_connector_id.not_null + column: destinations.to_connector_id + check: not_null + list_columns: *id012 + when: + column: destinations + check: not_null + severity: error + - name: segment.destinations.to_connector_id.min_length + column: destinations.to_connector_id + check: min_length + value: 1 + list_columns: &id013 + - destinations + severity: error + - name: segment.destinations.to_connector_id.pattern + column: destinations.to_connector_id + check: pattern + value: ^\S+$ + list_columns: *id013 + severity: error + - name: segment.destinations.to_segment_id.not_null + column: destinations.to_segment_id + check: not_null + list_columns: *id012 + when: + column: destinations + check: not_null + severity: error + - name: segment.destinations.to_segment_id.min_length + column: destinations.to_segment_id + check: min_length + value: 1 + list_columns: &id014 + - destinations + severity: error + - name: segment.destinations.to_segment_id.pattern + column: destinations.to_segment_id + check: pattern + value: ^\S+$ + list_columns: *id014 + severity: error + - name: segment.destinations.final_heading.not_null + column: destinations.final_heading + check: not_null + list_columns: *id012 + when: + column: destinations + check: not_null + severity: error + - name: segment.destinations.final_heading.valid + column: destinations.final_heading + check: in + value: + - backward + - forward + list_columns: + - destinations + severity: error + - name: segment.destinations.labels.min_list_length + column: destinations.labels + check: min_list_length + value: 1 + list_columns: *id012 + severity: error + - name: segment.destinations.labels.unique + column: destinations.labels + check: unique + list_columns: *id012 + severity: error + - name: segment.destinations.labels.value.not_null + column: destinations.labels.value + check: not_null + list_columns: + - destinations + - destinations.labels + when: + column: destinations.labels + check: not_null + severity: error + - name: segment.destinations.labels.value.min_length + column: destinations.labels.value + check: min_length + value: 1 + list_columns: &id015 + - destinations + - destinations.labels + severity: error + - name: segment.destinations.labels.value.pattern + column: destinations.labels.value + check: pattern + value: ^(\S.*)?\S$ + list_columns: *id015 + severity: error + - name: segment.destinations.symbols.valid + column: destinations.symbols + check: in + value: + - airport + - bus + - camp_site + - center + - ferry + - food + - fuel + - fuel_diesel + - hospital + - industrial + - info + - interchange + - lodging + - motorroad + - motorway + - parking + - rest_area + - restrooms + - train_station + - viewpoint + list_columns: + - destinations + - destinations.symbols + severity: error + - name: segment.destinations.symbols.unique + column: destinations.symbols + check: unique + list_columns: *id012 + severity: error + - name: segment.destinations.when.heading.not_null + column: destinations.when.heading + check: not_null + list_columns: + - destinations + when: + column: destinations.when + check: not_null + severity: error + - name: segment.destinations.when.heading.valid + column: destinations.when.heading + check: in + value: + - backward + - forward + list_columns: + - destinations + severity: error + - name: segment.prohibited_transitions.sequence.not_null + column: prohibited_transitions.sequence + check: not_null + list_columns: &id016 + - prohibited_transitions + when: + column: prohibited_transitions + check: not_null + severity: error + - name: segment.prohibited_transitions.sequence.min_list_length + column: prohibited_transitions.sequence + check: min_list_length + value: 1 + list_columns: *id016 + severity: error + - name: segment.prohibited_transitions.sequence.unique + column: prohibited_transitions.sequence + check: unique + list_columns: *id016 + severity: error + - name: segment.prohibited_transitions.sequence.connector_id.not_null + column: prohibited_transitions.sequence.connector_id + check: not_null + list_columns: &id018 + - prohibited_transitions + - prohibited_transitions.sequence + when: + column: prohibited_transitions + check: not_null + severity: error + - name: segment.prohibited_transitions.sequence.connector_id.min_length + column: prohibited_transitions.sequence.connector_id + check: min_length + value: 1 + list_columns: &id017 + - prohibited_transitions + - prohibited_transitions.sequence + severity: error + - name: segment.prohibited_transitions.sequence.connector_id.pattern + column: prohibited_transitions.sequence.connector_id + check: pattern + value: ^\S+$ + list_columns: *id017 + severity: error + - name: segment.prohibited_transitions.sequence.segment_id.not_null + column: prohibited_transitions.sequence.segment_id + check: not_null + list_columns: *id018 + when: + column: prohibited_transitions + check: not_null + severity: error + - name: segment.prohibited_transitions.sequence.segment_id.min_length + column: prohibited_transitions.sequence.segment_id + check: min_length + value: 1 + list_columns: &id019 + - prohibited_transitions + - prohibited_transitions.sequence + severity: error + - name: segment.prohibited_transitions.sequence.segment_id.pattern + column: prohibited_transitions.sequence.segment_id + check: pattern + value: ^\S+$ + list_columns: *id019 + severity: error + - name: segment.prohibited_transitions.final_heading.not_null + column: prohibited_transitions.final_heading + check: not_null + list_columns: *id016 + when: + column: prohibited_transitions + check: not_null + severity: error + - name: segment.prohibited_transitions.final_heading.valid + column: prohibited_transitions.final_heading + check: in + value: + - backward + - forward + list_columns: + - prohibited_transitions + severity: error + - name: segment.prohibited_transitions.when.heading.valid + column: prohibited_transitions.when.heading + check: in + value: + - backward + - forward + list_columns: + - prohibited_transitions + severity: error + - name: segment.prohibited_transitions.when.mode.min_list_length + column: prohibited_transitions.when.mode + check: min_list_length + value: 1 + list_columns: &id020 + - prohibited_transitions + severity: error + - name: segment.prohibited_transitions.when.mode.valid + column: prohibited_transitions.when.mode + check: in + value: + - bicycle + - bus + - car + - emergency + - foot + - hgv + - hov + - motor_vehicle + - motorcycle + - truck + - vehicle + list_columns: + - prohibited_transitions + - prohibited_transitions.when.mode + severity: error + - name: segment.prohibited_transitions.when.mode.unique + column: prohibited_transitions.when.mode + check: unique + list_columns: *id020 + severity: error + - name: segment.prohibited_transitions.when.using.min_list_length + column: prohibited_transitions.when.using + check: min_list_length + value: 1 + list_columns: *id020 + severity: error + - name: segment.prohibited_transitions.when.using.valid + column: prohibited_transitions.when.using + check: in + value: + - as_customer + - at_destination + - for_forestry + - to_deliver + - to_farm + list_columns: + - prohibited_transitions + - prohibited_transitions.when.using + severity: error + - name: segment.prohibited_transitions.when.using.unique + column: prohibited_transitions.when.using + check: unique + list_columns: *id020 + severity: error + - name: segment.prohibited_transitions.when.recognized.min_list_length + column: prohibited_transitions.when.recognized + check: min_list_length + value: 1 + list_columns: *id020 + severity: error + - name: segment.prohibited_transitions.when.recognized.valid + column: prohibited_transitions.when.recognized + check: in + value: + - as_disabled + - as_employee + - as_permitted + - as_private + - as_student + list_columns: + - prohibited_transitions + - prohibited_transitions.when.recognized + severity: error + - name: segment.prohibited_transitions.when.recognized.unique + column: prohibited_transitions.when.recognized + check: unique + list_columns: *id020 + severity: error + - name: segment.prohibited_transitions.when.vehicle.min_list_length + column: prohibited_transitions.when.vehicle + check: min_list_length + value: 1 + list_columns: *id020 + severity: error + - name: segment.prohibited_transitions.when.vehicle.unique + column: prohibited_transitions.when.vehicle + check: unique + list_columns: *id020 + severity: error + - name: segment.road_flags.min_list_length + column: road_flags + check: min_list_length + value: 1 + severity: error + - name: segment.road_flags.unique + column: road_flags + check: unique + severity: error + - name: segment.road_flags.values.not_null + column: road_flags.values + check: not_null + list_columns: &id021 + - road_flags + when: + column: road_flags + check: not_null + severity: error + - name: segment.road_flags.values.min_list_length + column: road_flags.values + check: min_list_length + value: 1 + list_columns: *id021 + severity: error + - name: segment.road_flags.values.valid + column: road_flags.values + check: in + value: + - is_abandoned + - is_bridge + - is_covered + - is_indoor + - is_link + - is_tunnel + - is_under_construction + list_columns: + - road_flags + - road_flags.values + severity: error + - name: segment.road_flags.values.unique + column: road_flags.values + check: unique + list_columns: *id021 + severity: error + - name: segment.road_surface.min_list_length + column: road_surface + check: min_list_length + value: 1 + severity: error + - name: segment.road_surface.unique + column: road_surface + check: unique + severity: error + - name: segment.road_surface.value.not_null + column: road_surface.value + check: not_null + list_columns: + - road_surface + when: + column: road_surface + check: not_null + severity: error + - name: segment.road_surface.value.valid + column: road_surface.value + check: in + value: + - dirt + - gravel + - metal + - paved + - paving_stones + - unknown + - unpaved + list_columns: + - road_surface + severity: error + - name: segment.speed_limits.min_list_length + column: speed_limits + check: min_list_length + value: 1 + severity: error + - name: segment.speed_limits.unique + column: speed_limits + check: unique + severity: error + - name: segment.speed_limits.max_speed.value.not_null + column: speed_limits.max_speed.value + check: not_null + list_columns: &id022 + - speed_limits + when: + column: speed_limits.max_speed + check: not_null + severity: error + - name: segment.speed_limits.max_speed.value.range + column: speed_limits.max_speed.value + check: between + value: + - 1 + - 350 + list_columns: + - speed_limits + severity: error + - name: segment.speed_limits.max_speed.unit.not_null + column: speed_limits.max_speed.unit + check: not_null + list_columns: *id022 + when: + column: speed_limits.max_speed + check: not_null + severity: error + - name: segment.speed_limits.max_speed.unit.valid + column: speed_limits.max_speed.unit + check: in + value: + - km/h + - mph + list_columns: + - speed_limits + severity: error + - name: segment.speed_limits.min_speed.value.not_null + column: speed_limits.min_speed.value + check: not_null + list_columns: &id023 + - speed_limits + when: + column: speed_limits.min_speed + check: not_null + severity: error + - name: segment.speed_limits.min_speed.value.range + column: speed_limits.min_speed.value + check: between + value: + - 1 + - 350 + list_columns: + - speed_limits + severity: error + - name: segment.speed_limits.min_speed.unit.not_null + column: speed_limits.min_speed.unit + check: not_null + list_columns: *id023 + when: + column: speed_limits.min_speed + check: not_null + severity: error + - name: segment.speed_limits.min_speed.unit.valid + column: speed_limits.min_speed.unit + check: in + value: + - km/h + - mph + list_columns: + - speed_limits + severity: error + - name: segment.speed_limits.when.heading.valid + column: speed_limits.when.heading + check: in + value: + - backward + - forward + list_columns: + - speed_limits + severity: error + - name: segment.speed_limits.when.mode.min_list_length + column: speed_limits.when.mode + check: min_list_length + value: 1 + list_columns: &id024 + - speed_limits + severity: error + - name: segment.speed_limits.when.mode.valid + column: speed_limits.when.mode + check: in + value: + - bicycle + - bus + - car + - emergency + - foot + - hgv + - hov + - motor_vehicle + - motorcycle + - truck + - vehicle + list_columns: + - speed_limits + - speed_limits.when.mode + severity: error + - name: segment.speed_limits.when.mode.unique + column: speed_limits.when.mode + check: unique + list_columns: *id024 + severity: error + - name: segment.speed_limits.when.using.min_list_length + column: speed_limits.when.using + check: min_list_length + value: 1 + list_columns: *id024 + severity: error + - name: segment.speed_limits.when.using.valid + column: speed_limits.when.using + check: in + value: + - as_customer + - at_destination + - for_forestry + - to_deliver + - to_farm + list_columns: + - speed_limits + - speed_limits.when.using + severity: error + - name: segment.speed_limits.when.using.unique + column: speed_limits.when.using + check: unique + list_columns: *id024 + severity: error + - name: segment.speed_limits.when.recognized.min_list_length + column: speed_limits.when.recognized + check: min_list_length + value: 1 + list_columns: *id024 + severity: error + - name: segment.speed_limits.when.recognized.valid + column: speed_limits.when.recognized + check: in + value: + - as_disabled + - as_employee + - as_permitted + - as_private + - as_student + list_columns: + - speed_limits + - speed_limits.when.recognized + severity: error + - name: segment.speed_limits.when.recognized.unique + column: speed_limits.when.recognized + check: unique + list_columns: *id024 + severity: error + - name: segment.speed_limits.when.vehicle.min_list_length + column: speed_limits.when.vehicle + check: min_list_length + value: 1 + list_columns: *id024 + severity: error + - name: segment.speed_limits.when.vehicle.unique + column: speed_limits.when.vehicle + check: unique + list_columns: *id024 + severity: error + - name: segment.subclass.valid + column: subclass + check: in + value: + - alley + - crosswalk + - cycle_crossing + - driveway + - link + - parking_aisle + - sidewalk + severity: error + - name: segment.width_rules.min_list_length + column: width_rules + check: min_list_length + value: 1 + severity: error + - name: segment.width_rules.unique + column: width_rules + check: unique + severity: error + - name: segment.width_rules.value.not_null + column: width_rules.value + check: not_null + list_columns: + - width_rules + when: + column: width_rules + check: not_null + severity: error + - name: segment.width_rules.value.positive + column: width_rules.value + check: gt + value: 0 + list_columns: + - width_rules + severity: error + - name: segment.class.not_null + column: class + check: not_null + severity: error + - name: segment.class.valid + column: class + check: in + value: + - funicular + - light_rail + - monorail + - narrow_gauge + - standard_gauge + - subway + - tram + - unknown + severity: error + - name: segment.rail_flags.min_list_length + column: rail_flags + check: min_list_length + value: 1 + severity: error + - name: segment.rail_flags.unique + column: rail_flags + check: unique + severity: error + - name: segment.rail_flags.values.not_null + column: rail_flags.values + check: not_null + list_columns: &id025 + - rail_flags + when: + column: rail_flags + check: not_null + severity: error + - name: segment.rail_flags.values.min_list_length + column: rail_flags.values + check: min_list_length + value: 1 + list_columns: *id025 + severity: error + - name: segment.rail_flags.values.valid + column: rail_flags.values + check: in + value: + - is_abandoned + - is_bridge + - is_covered + - is_disused + - is_freight + - is_passenger + - is_tunnel + - is_under_construction + list_columns: + - rail_flags + - rail_flags.values + severity: error + - name: segment.rail_flags.values.unique + column: rail_flags.values + check: unique + list_columns: *id025 + severity: error diff --git a/packages/overture-schema-codegen/tests/golden/wassirman/sources.yaml b/packages/overture-schema-codegen/tests/golden/wassirman/sources.yaml new file mode 100644 index 000000000..f5b8c7a63 --- /dev/null +++ b/packages/overture-schema-codegen/tests/golden/wassirman/sources.yaml @@ -0,0 +1,103 @@ +version: '1' +datasets: +- name: sources + source_model: overture.schema.annex.models.Sources + id_column: id + rules: + - name: sources.datasets.not_null + column: datasets + check: not_null + severity: error + - name: sources.datasets.source_name.not_null + column: datasets.source_name + check: not_null + list_columns: &id001 + - datasets + severity: error + - name: sources.datasets.source_dataset_name.not_null + column: datasets.source_dataset_name + check: not_null + list_columns: *id001 + severity: error + - name: sources.datasets.data_url.not_null + column: datasets.data_url + check: not_null + list_columns: *id001 + severity: error + - name: sources.datasets.data_url_archived.not_null + column: datasets.data_url_archived + check: not_null + list_columns: *id001 + severity: error + - name: sources.datasets.license_url.not_null + column: datasets.license_url + check: not_null + list_columns: *id001 + severity: error + - name: sources.datasets.license_url_archived.not_null + column: datasets.license_url_archived + check: not_null + list_columns: *id001 + severity: error + - name: sources.datasets.license_type.not_null + column: datasets.license_type + check: not_null + list_columns: *id001 + severity: error + - name: sources.datasets.license_text.not_null + column: datasets.license_text + check: not_null + list_columns: *id001 + severity: error + - name: sources.datasets.license_attribution.not_null + column: datasets.license_attribution + check: not_null + list_columns: *id001 + severity: error + - name: sources.datasets.coverage_bbox.not_null + column: datasets.coverage_bbox + check: not_null + list_columns: *id001 + severity: error + - name: sources.datasets.coverage_bbox.min_list_length + column: datasets.coverage_bbox + check: min_list_length + value: 4 + list_columns: *id001 + severity: error + - name: sources.datasets.coverage_bbox.max_list_length + column: datasets.coverage_bbox + check: max_list_length + value: 4 + list_columns: *id001 + severity: error + - name: sources.datasets.countries.pattern + column: datasets.countries + check: pattern + value: ^[A-Z]{2}$ + list_columns: + - datasets + - datasets.countries + severity: error + - name: sources.datasets.build_source.valid + column: datasets.build_source + check: in + value: + - OpenAddresses + - tf-data-platform + list_columns: + - datasets + severity: error + - name: sources.datasets.update_type.valid + column: datasets.update_type + check: in + value: + - continuous + - manual + list_columns: + - datasets + severity: error + - name: sources.license_priority.not_null + column: license_priority + check: not_null + severity: error diff --git a/packages/overture-schema-codegen/tests/golden/wassirman/water.yaml b/packages/overture-schema-codegen/tests/golden/wassirman/water.yaml new file mode 100644 index 000000000..1c653201f --- /dev/null +++ b/packages/overture-schema-codegen/tests/golden/wassirman/water.yaml @@ -0,0 +1,265 @@ +version: '1' +datasets: +- name: water + source_model: overture.schema.base.water.Water + id_column: id + rules: + - name: water.id.not_null + column: id + check: not_null + severity: error + - name: water.id.min_length + column: id + check: min_length + value: 1 + severity: error + - name: water.id.pattern + column: id + check: pattern + value: ^\S+$ + severity: error + - name: water.geometry.not_null + column: geometry + check: not_null + severity: error + - name: water.geometry.type + column: geometry + check: geometry_type + value: + - LineString + - MultiPolygon + - Point + - Polygon + severity: error + - name: water.version.not_null + column: version + check: not_null + severity: error + - name: water.version.gte + column: version + check: gte + value: 0 + severity: error + - name: water.sources.min_list_length + column: sources + check: min_list_length + value: 1 + severity: error + - name: water.sources.unique + column: sources + check: unique + severity: error + - name: water.sources.property.not_null + column: sources.property + check: not_null + list_columns: &id001 + - sources + when: + column: sources + check: not_null + severity: error + - name: water.sources.dataset.not_null + column: sources.dataset + check: not_null + list_columns: *id001 + when: + column: sources + check: not_null + severity: error + - name: water.sources.license.pattern + column: sources.license + check: pattern + value: ^(\S.*)?\S$ + list_columns: + - sources + severity: error + - name: water.sources.confidence.range + column: sources.confidence + check: between + value: + - 0.0 + - 1.0 + list_columns: + - sources + severity: error + - name: water.class.valid + column: class + check: in + value: + - basin + - bay + - blowhole + - canal + - cape + - ditch + - dock + - drain + - fairway + - fish_pass + - fishpond + - geyser + - hot_spring + - lagoon + - lake + - moat + - ocean + - oxbow + - pond + - reflecting_pool + - reservoir + - river + - salt_pond + - sea + - sewage + - shoal + - spring + - strait + - stream + - swimming_pool + - tidal_channel + - wastewater + - water + - water_storage + - waterfall + severity: error + - name: water.subtype.valid + column: subtype + check: in + value: + - canal + - human_made + - lake + - ocean + - physical + - pond + - reservoir + - river + - spring + - stream + - wastewater + - water + severity: error + - name: water.names.primary.not_null + column: names.primary + check: not_null + when: + column: names + check: not_null + severity: error + - name: water.names.primary.min_length + column: names.primary + check: min_length + value: 1 + severity: error + - name: water.names.primary.pattern + column: names.primary + check: pattern + value: ^(\S.*)?\S$ + severity: error + - name: water.names.rules.value.not_null + column: names.rules.value + check: not_null + list_columns: &id003 + - names.rules + when: + column: names.rules + check: not_null + severity: error + - name: water.names.rules.value.min_length + column: names.rules.value + check: min_length + value: 1 + list_columns: &id002 + - names.rules + severity: error + - name: water.names.rules.value.pattern + column: names.rules.value + check: pattern + value: ^(\S.*)?\S$ + list_columns: *id002 + severity: error + - name: water.names.rules.variant.not_null + column: names.rules.variant + check: not_null + list_columns: *id003 + when: + column: names.rules + check: not_null + severity: error + - name: water.names.rules.variant.valid + column: names.rules.variant + check: in + value: + - alternate + - common + - official + - short + list_columns: + - names.rules + severity: error + - name: water.names.rules.language.pattern + column: names.rules.language + check: pattern + value: ^(?:(?:[A-Za-z]{2,3}(?:-[A-Za-z]{3}){0,3}?)|(?:[A-Za-z]{4,8}))(?:-[A-Za-z]{4})?(?:-[A-Za-z]{2}|[0-9]{3})?(?:-(?:[A-Za-z0-9]{5,8}|[0-9][A-Za-z0-9]{3}))*(?:-[A-WY-Za-wy-z0-9](?:-[A-Za-z0-9]{2,8})+)*$ + list_columns: + - names.rules + severity: error + - name: water.names.rules.perspectives.mode.not_null + column: names.rules.perspectives.mode + check: not_null + list_columns: &id004 + - names.rules + when: + column: names.rules.perspectives + check: not_null + severity: error + - name: water.names.rules.perspectives.mode.valid + column: names.rules.perspectives.mode + check: in + value: + - accepted_by + - disputed_by + list_columns: + - names.rules + severity: error + - name: water.names.rules.perspectives.countries.not_null + column: names.rules.perspectives.countries + check: not_null + list_columns: *id004 + when: + column: names.rules.perspectives + check: not_null + severity: error + - name: water.names.rules.perspectives.countries.min_list_length + column: names.rules.perspectives.countries + check: min_list_length + value: 1 + list_columns: *id004 + severity: error + - name: water.names.rules.perspectives.countries.pattern + column: names.rules.perspectives.countries + check: pattern + value: ^[A-Z]{2}$ + list_columns: + - names.rules + - names.rules.perspectives.countries + severity: error + - name: water.names.rules.perspectives.countries.unique + column: names.rules.perspectives.countries + check: unique + list_columns: *id004 + severity: error + - name: water.names.rules.side.valid + column: names.rules.side + check: in + value: + - left + - right + list_columns: + - names.rules + severity: error + - name: water.wikidata.pattern + column: wikidata + check: pattern + value: ^Q\d+$ + severity: error diff --git a/packages/overture-schema-codegen/tests/test_golden_wassirman.py b/packages/overture-schema-codegen/tests/test_golden_wassirman.py new file mode 100644 index 000000000..3b41e5eda --- /dev/null +++ b/packages/overture-schema-codegen/tests/test_golden_wassirman.py @@ -0,0 +1,71 @@ +"""Golden-file snapshot tests for wassirman validation IR output.""" + +from pathlib import Path + +import pytest +from codegen_test_support import assert_golden +from overture.schema.codegen.extraction.model_extraction import extract_model +from overture.schema.codegen.extraction.specs import ( + FeatureSpec, + is_model_class, + is_union_alias, +) +from overture.schema.codegen.extraction.union_extraction import extract_union +from overture.schema.codegen.layout.module_layout import entry_point_class +from overture.schema.codegen.wassirman.pipeline import generate_validation_ir +from overture.schema.core.discovery import ModelKey, discover_models + +GOLDEN_DIR = Path(__file__).parent / "golden" / "wassirman" + + +def _entry_name(key: ModelKey, entry: object) -> str: + if isinstance(entry, type): + return entry.__name__ + return entry_point_class(key.entry_point) + + +@pytest.fixture(scope="module") +def all_models() -> dict: + return discover_models() + + +def pytest_generate_tests(metafunc: pytest.Metafunc) -> None: + """Parametrize from discovered models so new types are picked up automatically.""" + if "model_name" not in metafunc.fixturenames: + return + models = discover_models() + cases = [ + (_entry_name(key, entry), f"{_entry_name(key, entry).lower()}.yaml") + for key, entry in models.items() + ] + metafunc.parametrize( + ("model_name", "golden_filename"), + cases, + ids=[name for name, _ in cases], + ) + + +def test_wassirman_golden( + model_name: str, + golden_filename: str, + update_golden: bool, + all_models: dict, +) -> None: + spec: FeatureSpec | None = None + for key, entry in all_models.items(): + if _entry_name(key, entry) == model_name: + if is_model_class(entry): + spec = extract_model(entry, entry_point=key.entry_point) + elif is_union_alias(entry): + spec = extract_union( + entry_point_class(key.entry_point), + entry, + entry_point=key.entry_point, + ) + break + if spec is None: + pytest.fail(f"Model {model_name} not found in discovered models") + + ir = generate_validation_ir([spec]) + actual = ir.to_yaml() + assert_golden(actual, GOLDEN_DIR / golden_filename, update=update_golden) diff --git a/packages/overture-schema-codegen/tests/test_type_registry.py b/packages/overture-schema-codegen/tests/test_type_registry.py index b9d02d2ac..deb456bd8 100644 --- a/packages/overture-schema-codegen/tests/test_type_registry.py +++ b/packages/overture-schema-codegen/tests/test_type_registry.py @@ -6,6 +6,7 @@ PRIMITIVE_TYPES, TypeMapping, get_type_mapping, + is_storage_primitive_source, resolve_type_name, ) @@ -141,3 +142,23 @@ def test_ignores_is_optional(self) -> None: """resolve_type_name returns the base type regardless of is_optional.""" ti = self._make_type_info(is_optional=True) assert resolve_type_name(ti, "markdown") == "string" + + +class TestIsStoragePrimitiveSource: + def test_int32_is_storage_primitive(self) -> None: + assert is_storage_primitive_source("int32") is True + + def test_int64_is_storage_primitive(self) -> None: + assert is_storage_primitive_source("int64") is True + + def test_float64_is_storage_primitive(self) -> None: + assert is_storage_primitive_source("float64") is True + + def test_str_is_storage_primitive(self) -> None: + assert is_storage_primitive_source("str") is True + + def test_semantic_newtype_is_not(self) -> None: + assert is_storage_primitive_source("HexColor") is False + + def test_none_is_not(self) -> None: + assert is_storage_primitive_source(None) is False diff --git a/packages/overture-schema-codegen/tests/test_wassirman_ir.py b/packages/overture-schema-codegen/tests/test_wassirman_ir.py new file mode 100644 index 000000000..517bef8b1 --- /dev/null +++ b/packages/overture-schema-codegen/tests/test_wassirman_ir.py @@ -0,0 +1,94 @@ +"""Tests for wassirman IR data types.""" + +import yaml +from overture.schema.codegen.wassirman.ir import ( + ConditionIR, + DatasetIR, + RuleIR, + ValidationIR, +) + + +class TestRuleIR: + def test_to_dict_excludes_none(self) -> None: + rule = RuleIR( + name="t.id.not_null", check="not_null", severity="error", column="id" + ) + d = rule.to_dict() + assert d == { + "name": "t.id.not_null", + "column": "id", + "check": "not_null", + "severity": "error", + } + assert "value" not in d + assert "list_columns" not in d + assert "when" not in d + + def test_to_dict_with_all_fields(self) -> None: + rule = RuleIR( + name="t.x.not_null", + check="not_null", + severity="error", + column="x", + list_columns=["a"], + when=ConditionIR(column="a", check="not_null"), + ) + d = rule.to_dict() + assert d["list_columns"] == ["a"] + assert d["when"] == {"column": "a", "check": "not_null"} + + def test_to_dict_multi_field(self) -> None: + rule = RuleIR( + name="t.any_of", check="any_of", severity="error", columns=["a", "b"] + ) + d = rule.to_dict() + assert d["columns"] == ["a", "b"] + assert "column" not in d + + def test_to_dict_with_value(self) -> None: + rule = RuleIR( + name="t.x.gte", check="gte", severity="error", column="x", value=0 + ) + assert rule.to_dict()["value"] == 0 + + +class TestConditionIR: + def test_to_dict_no_value(self) -> None: + cond = ConditionIR(column="a", check="not_null") + assert cond.to_dict() == {"column": "a", "check": "not_null"} + + def test_to_dict_with_value(self) -> None: + cond = ConditionIR(column="a", check="eq", value="x") + assert cond.to_dict() == {"column": "a", "check": "eq", "value": "x"} + + +class TestDatasetIR: + def test_to_dict(self) -> None: + rule = RuleIR( + name="t.id.not_null", check="not_null", severity="error", column="id" + ) + ds = DatasetIR( + name="test", source_model="mod.Test", id_column="id", rules=[rule] + ) + d = ds.to_dict() + assert d["name"] == "test" + assert d["source_model"] == "mod.Test" + assert d["id_column"] == "id" + assert len(d["rules"]) == 1 # type: ignore[arg-type] + + +class TestValidationIR: + def test_to_yaml_roundtrips(self) -> None: + rule = RuleIR( + name="t.id.not_null", check="not_null", severity="error", column="id" + ) + ds = DatasetIR( + name="test", source_model="mod.Test", id_column="id", rules=[rule] + ) + ir = ValidationIR(datasets=[ds]) + text = ir.to_yaml() + parsed = yaml.safe_load(text) + assert parsed["version"] == "1" + assert len(parsed["datasets"]) == 1 + assert parsed["datasets"][0]["rules"][0]["check"] == "not_null" diff --git a/packages/overture-schema-codegen/tests/test_wassirman_pipeline.py b/packages/overture-schema-codegen/tests/test_wassirman_pipeline.py new file mode 100644 index 000000000..ed689e17e --- /dev/null +++ b/packages/overture-schema-codegen/tests/test_wassirman_pipeline.py @@ -0,0 +1,38 @@ +"""Tests for the wassirman pipeline.""" + +from typing import Literal + +import pytest +import yaml +from overture.schema.codegen.extraction.model_extraction import extract_model +from overture.schema.codegen.wassirman.ir import ValidationIR +from overture.schema.codegen.wassirman.pipeline import generate_validation_ir +from pydantic import BaseModel + + +class TinyModel(BaseModel): + theme: Literal["test"] = "test" + type: Literal["tiny"] = "tiny" + id: str + + +@pytest.fixture +def tiny_ir() -> ValidationIR: + spec = extract_model(TinyModel) + return generate_validation_ir([spec]) + + +def test_generate_returns_validation_ir(tiny_ir: ValidationIR) -> None: + assert tiny_ir.version == "1" + assert len(tiny_ir.datasets) == 1 + assert tiny_ir.datasets[0].name == "tiny" + + +def test_source_model_fqn(tiny_ir: ValidationIR) -> None: + assert "TinyModel" in tiny_ir.datasets[0].source_model + + +def test_to_yaml_produces_valid_yaml(tiny_ir: ValidationIR) -> None: + parsed = yaml.safe_load(tiny_ir.to_yaml()) + assert parsed["version"] == "1" + assert parsed["datasets"][0]["name"] == "tiny" diff --git a/packages/overture-schema-codegen/tests/test_wassirman_walker.py b/packages/overture-schema-codegen/tests/test_wassirman_walker.py new file mode 100644 index 000000000..9dbd3f6a3 --- /dev/null +++ b/packages/overture-schema-codegen/tests/test_wassirman_walker.py @@ -0,0 +1,146 @@ +"""Tests for the wassirman walker.""" + +from __future__ import annotations + +from typing import Annotated, Literal + +from annotated_types import Ge, Le, MinLen +from overture.schema.codegen.extraction.model_extraction import ( + expand_model_tree, + extract_model, +) +from overture.schema.codegen.wassirman.ir import RuleIR +from overture.schema.codegen.wassirman.walker import walk_feature +from overture.schema.system.field_constraint import UniqueItemsConstraint +from pydantic import BaseModel + + +def _walk(model: type[BaseModel], dataset_name: str) -> list[RuleIR]: + spec = extract_model(model) + expand_model_tree(spec) + return walk_feature(spec, dataset_name) + + +class ScalarModel(BaseModel): + """Model with scalar fields for testing basic rule emission.""" + + theme: Literal["test"] = "test" + type: Literal["scalar"] = "scalar" + id: str + version: Annotated[int, Ge(0)] + name: str | None = None + score: Annotated[float, Ge(0.0), Le(1.0)] | None = None + + +class TestSkippedFields: + def test_theme_skipped(self) -> None: + rules = _walk(ScalarModel, "scalar") + names = [r.name for r in rules] + assert not any("theme" in n for n in names) + + def test_type_skipped(self) -> None: + rules = _walk(ScalarModel, "scalar") + names = [r.name for r in rules] + assert not any(n.startswith("scalar.type") for n in names) + + +class TestNotNullRules: + def test_required_field(self) -> None: + rules = _walk(ScalarModel, "scalar") + not_null_rules = [r for r in rules if r.name == "scalar.id.not_null"] + assert len(not_null_rules) == 1 + assert not_null_rules[0].check == "not_null" + assert not_null_rules[0].column == "id" + assert not_null_rules[0].when is None + + def test_optional_field_no_not_null(self) -> None: + rules = _walk(ScalarModel, "scalar") + assert not any(r.name == "scalar.name.not_null" for r in rules) + + +class TestNumericBounds: + def test_ge_emits_gte(self) -> None: + rules = _walk(ScalarModel, "scalar") + version_rules = [r for r in rules if r.column == "version" and r.check == "gte"] + assert len(version_rules) == 1 + assert version_rules[0].value == 0 + + def test_ge_le_collapses_to_between(self) -> None: + rules = _walk(ScalarModel, "scalar") + between_rules = [ + r for r in rules if r.column == "score" and r.check == "between" + ] + assert len(between_rules) == 1 + assert between_rules[0].value == [0.0, 1.0] + + +class NestedChild(BaseModel): + value: Annotated[str, MinLen(1)] + variant: Literal["a", "b"] + + +class ListParent(BaseModel): + theme: Literal["test"] = "test" + type: Literal["listy"] = "listy" + id: str + items: Annotated[list[NestedChild], MinLen(1), UniqueItemsConstraint()] | None = ( + None + ) + + +class OptionalParent(BaseModel): + theme: Literal["test"] = "test" + type: Literal["opty"] = "opty" + id: str + nested: NestedChild | None = None + + +class TestListColumns: + def test_list_field_min_list_length(self) -> None: + rules = _walk(ListParent, "listy") + min_len_rules = [ + r for r in rules if r.column == "items" and r.check == "min_list_length" + ] + assert len(min_len_rules) == 1 + assert min_len_rules[0].value == 1 + # Container-level check: no list_columns (items itself isn't inside another list) + assert min_len_rules[0].list_columns is None + + def test_list_element_gets_list_columns(self) -> None: + rules = _walk(ListParent, "listy") + value_rules = [r for r in rules if r.column == "items.value"] + assert len(value_rules) > 0 + for r in value_rules: + if r.list_columns is not None: + assert "items" in r.list_columns + + def test_unique_on_list(self) -> None: + rules = _walk(ListParent, "listy") + unique_rules = [r for r in rules if r.column == "items" and r.check == "unique"] + assert len(unique_rules) == 1 + + +class TestParentOptionalityGuard: + def test_required_child_under_optional_parent(self) -> None: + rules = _walk(OptionalParent, "opty") + value_not_null = [ + r for r in rules if r.column == "nested.value" and r.check == "not_null" + ] + assert len(value_not_null) == 1 + assert value_not_null[0].when is not None + assert value_not_null[0].when.column == "nested" + assert value_not_null[0].when.check == "not_null" + + def test_required_child_under_required_parent_no_guard(self) -> None: + class RequiredParent(BaseModel): + theme: Literal["test"] = "test" + type: Literal["reqp"] = "reqp" + id: str + nested: NestedChild + + rules = _walk(RequiredParent, "reqp") + value_not_null = [ + r for r in rules if r.column == "nested.value" and r.check == "not_null" + ] + assert len(value_not_null) == 1 + assert value_not_null[0].when is None