From c652c760054ac4e6ba4105a0c695b76fd7d3767d Mon Sep 17 00:00:00 2001 From: Thomas Lin Pedersen Date: Fri, 24 Apr 2026 17:41:15 +0200 Subject: [PATCH] normalize column references --- src/execute/mod.rs | 278 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 278 insertions(+) diff --git a/src/execute/mod.rs b/src/execute/mod.rs index 82c9e1c0..e3e027a2 100644 --- a/src/execute/mod.rs +++ b/src/execute/mod.rs @@ -143,6 +143,93 @@ fn validate( Ok(()) } +// ============================================================================= +// Column Name Normalization +// ============================================================================= + +/// Rewrite `name` to match the schema's casing (case-insensitive resolution). +/// +/// SQL treats unquoted identifiers as case-insensitive, so users may write +/// `VISUALISE CATEGORY AS x` even when DuckDB returns the column as `category`. +/// ggsql's own validator and generated SQL treat column names case-sensitively, +/// so we reconcile by rewriting the user-written name to the schema's casing +/// before either runs. +/// +/// Exact match wins. Otherwise, if exactly one case-insensitive match exists, +/// `name` is rewritten to that match. Ambiguous matches (e.g. schema has both +/// `"Foo"` and `"foo"` and user wrote `FOO`) and missing references are left +/// untouched so the existing validator can report them with its normal error. +fn normalize_column_ref(name: &mut String, schema_names: &[&str]) { + if schema_names.contains(&name.as_str()) { + return; + } + let name_lower = name.to_lowercase(); + let mut match_iter = schema_names + .iter() + .filter(|s| s.to_lowercase() == name_lower); + if let Some(first) = match_iter.next() { + if match_iter.next().is_none() { + *name = (*first).to_string(); + } + } +} + +/// Normalize all user-written column references in `specs` against their layer +/// schemas. +/// +/// Runs after `merge_global_mappings_into_layers` so every aesthetic that a +/// layer will consult is already attached to that layer's `mappings`; each +/// layer can then be normalized against its own schema. This matters for +/// multi-source layers (e.g. `MAPPING ... FROM temps` vs `... FROM ozone`), +/// where the schemas — and the column casings — can legitimately differ. +/// +/// Covers aesthetic `Column` values and `partition_by` per layer, plus +/// user-written facet variables on the plot-level `FACET` clause. +fn normalize_column_references(specs: &mut [Plot], layer_schemas: &[Schema]) { + for spec in specs { + for (layer, schema) in spec.layers.iter_mut().zip(layer_schemas.iter()) { + if matches!(layer.source, Some(DataSource::Annotation)) { + continue; + } + let names: Vec<&str> = schema.iter().map(|c| c.name.as_str()).collect(); + for value in layer.mappings.aesthetics.values_mut() { + if let AestheticValue::Column { name, .. } = value { + normalize_column_ref(name, &names); + } + } + for col in &mut layer.partition_by { + normalize_column_ref(col, &names); + } + } + + // Facet variables are plot-level. Normalize against the first layer + // whose schema contains the variable (case-insensitively). If no + // layer matches, leave it — `add_facet_mappings_to_layers` simply + // won't inject a mapping for layers that don't have the column. + if let Some(facet) = spec.facet.as_mut() { + let normalize_var = |var: &mut String| { + for schema in layer_schemas { + let names: Vec<&str> = schema.iter().map(|c| c.name.as_str()).collect(); + let before = var.clone(); + normalize_column_ref(var, &names); + if *var != before || names.contains(&var.as_str()) { + break; + } + } + }; + match &mut facet.layout { + crate::plot::FacetLayout::Wrap { variables } => { + variables.iter_mut().for_each(normalize_var); + } + crate::plot::FacetLayout::Grid { row, column } => { + row.iter_mut().for_each(normalize_var); + column.iter_mut().for_each(normalize_var); + } + } + } + } +} + // ============================================================================= // Global Mapping & Color Splitting // ============================================================================= @@ -1023,6 +1110,11 @@ pub fn prepare_data_with_reader(query: &str, reader: &dyn Reader) -> Result