diff --git a/Cargo.lock b/Cargo.lock index 2761357f..de3f46a2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1413,6 +1413,43 @@ dependencies = [ "version_check", ] +[[package]] +name = "geo-types" +version = "0.7.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94776032c45f950d30a13af6113c2ad5625316c9abfbccee4dd5a6695f8fe0f5" +dependencies = [ + "approx", + "num-traits", + "serde", +] + +[[package]] +name = "geojson" +version = "0.24.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e26f3c45b36fccc9cf2805e61d4da6bc4bbd5a3a9589b01afa3a40eff703bd79" +dependencies = [ + "log", + "serde", + "serde_json", + "thiserror 2.0.18", +] + +[[package]] +name = "geozero" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5f28f34864745eb2f123c990c6ffd92c1584bd39439b3f27ff2a0f4ea5b309b" +dependencies = [ + "geojson", + "log", + "scroll", + "serde_json", + "thiserror 1.0.69", + "wkt", +] + [[package]] name = "getrandom" version = "0.2.17" @@ -1466,6 +1503,7 @@ dependencies = [ "const_format", "csscolorparser", "duckdb", + "geozero", "jsonschema", "odbc-api", "palette", @@ -3813,6 +3851,12 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" +[[package]] +name = "scroll" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "04c565b551bafbef4157586fa379538366e4385d42082f255bfd96e4fe8519da" + [[package]] name = "seahash" version = "4.1.0" @@ -5477,6 +5521,18 @@ dependencies = [ "wasmparser", ] +[[package]] +name = "wkt" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "54f7f1ff4ea4c18936d6cd26a6fd24f0003af37e951a8e0e8b9e9a2d0bd0a46d" +dependencies = [ + "geo-types", + "log", + "num-traits", + "thiserror 1.0.69", +] + [[package]] name = "writeable" version = "0.6.3" diff --git a/Cargo.toml b/Cargo.toml index 55e9f6f4..4de5d32f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -62,6 +62,10 @@ proptest = "1.4" # Color interpolation palette = "0.7" +# Spatial +geozero = { version = "0.14", default-features = false } +hex = "0.4" + # Utilities regex = "1.10" chrono = "0.4" diff --git a/doc/ggsql.xml b/doc/ggsql.xml index 3d4a4e37..8d6ca961 100644 --- a/doc/ggsql.xml +++ b/doc/ggsql.xml @@ -142,6 +142,7 @@ arrow rule errorbar + spatial @@ -188,6 +189,7 @@ slope intercept + geometry panel row diff --git a/doc/syntax/index.qmd b/doc/syntax/index.qmd index 223dcb01..6a881fab 100644 --- a/doc/syntax/index.qmd +++ b/doc/syntax/index.qmd @@ -33,6 +33,7 @@ There are many different layers to choose from when visualising your data. Some - [`boxplot`](layer/type/boxplot.qmd) displays continuous variables as 5-number summaries. - [`errorbar`](layer/type/errorbar.qmd) a line segment with hinges at the endpoints. - [`smooth`](layer/type/smooth.qmd) a trendline that follows the data shape. +- [`spatial`](layer/type/spatial.qmd) simple features from geometry. ### Position adjustments - [`stack`](layer/position/stack.qmd) places objects with a shared baseline on top of each other. diff --git a/doc/syntax/layer/type/spatial.qmd b/doc/syntax/layer/type/spatial.qmd new file mode 100644 index 00000000..2648360b --- /dev/null +++ b/doc/syntax/layer/type/spatial.qmd @@ -0,0 +1,84 @@ +--- +title: "Spatial" +--- + +> Layers are declared with the [`DRAW` clause](../../clause/draw.qmd). Read the documentation for this clause for a thorough description of how to use it. + +The spatial layer is used to render geographic geometries consisting of polygons, lines and points used to make maps like choropleths. +It differs from other layers in that uses a special [simple features](https://en.wikipedia.org/wiki/Simple_Features) geometry column that defines the shapes. + +## Aesthetics +The following aesthetics are recognised by the spatial layer. + +### Required +* `geometry`: a column of simple features. + +Note that the `geometry` column is required, but an attempt is made to detect such a column automatically. +In practise, this mapping does not often need to be declared. + +### Optional +* `stroke` The colour of the lines. +* `fill` The colour of the inner area. +* `colour` Shorthand for setting `stroke` and `fill` simultaneously. +* `opacity` The opacity of colours. +* `linewidth` The width of the lines. +* `linetype` The dash pattern of the line. + +## Settings +The spatial layer has no additional settings. + +## Data transformation +The spatial layer transforms the `geometry` column to [Well-Known Binary](https://libgeos.org/specifications/wkb/). + +## Orientation +The spatial layer has no orientations. + +## Examples + +Note that depending on your reader, you may need to activate modules for spatial analysis. + +```{ggsql} +-- For example, for DuckDB, one could use: +INSTALL spatial; +LOAD spatial; +``` + +A basic map of the world using built-in data. +Note that the geometry column is automatically detected. + +```{ggsql} +VISUALISE FROM ggsql:world + DRAW spatial +``` + +If the geometry column isn't automatically detected —for example because it has a non-standard name— you may need to declare the mapping explicitly. + +```{ggsql} +SELECT geom AS foo FROM ggsql:world +VISUALISE + DRAW spatial MAPPING foo AS geometry +``` + +Filtering on other columns. + +```{ggsql} +VISUALISE FROM ggsql:world + DRAW spatial FILTER continent == 'Asia' +``` + +Filtering based on spatial operations. + +```{ggsql} +VISUALISE FROM ggsql:world + DRAW spatial + FILTER ST_Intersects(geom, ST_MAkeEnvelope(-20.0, -35.0, 55.0, 38.0)) +``` + +Make a choropleth map by mapping a variable to a fill aesthetic. + +```{ggsql} +VISUALISE FROM ggsql:world + DRAW spatial + MAPPING population AS fill + SETTING opacity => 1 +``` \ No newline at end of file diff --git a/ggsql-vscode/syntaxes/ggsql.tmLanguage.json b/ggsql-vscode/syntaxes/ggsql.tmLanguage.json index 1a02214a..323623e9 100644 --- a/ggsql-vscode/syntaxes/ggsql.tmLanguage.json +++ b/ggsql-vscode/syntaxes/ggsql.tmLanguage.json @@ -269,7 +269,7 @@ { "comment": "Specialty and computed aesthetics", "name": "support.type.aesthetic.ggsql", - "match": "\\b(weight|coef|intercept|offset|density|count|intensity)\\b" + "match": "\\b(weight|coef|intercept|offset|density|count|intensity|geometry)\\b" }, { "comment": "Facet aesthetics", @@ -320,7 +320,7 @@ { "comment": "Geom types from grammar.js", "name": "support.type.geom.ggsql", - "match": "\\b(point|line|path|bar|col|area|tile|polygon|ribbon|histogram|density|smooth|boxplot|violin|text|label|segment|arrow|rule|errorbar)\\b" + "match": "\\b(point|line|path|bar|col|area|tile|polygon|ribbon|histogram|density|smooth|boxplot|violin|text|label|segment|arrow|rule|errorbar|spatial)\\b" }, { "include": "#common-clause-patterns" } ] @@ -334,7 +334,7 @@ "patterns": [ { "name": "support.type.geom.ggsql", - "match": "\\b(point|line|path|bar|col|area|tile|polygon|ribbon|histogram|density|smooth|boxplot|violin|text|label|segment|arrow|rule|errorbar)\\b" + "match": "\\b(point|line|path|bar|col|area|tile|polygon|ribbon|histogram|density|smooth|boxplot|violin|text|label|segment|arrow|rule|errorbar|spatial)\\b" }, { "include": "#common-clause-patterns" } ] diff --git a/src/Cargo.toml b/src/Cargo.toml index 239e82df..290fff6d 100644 --- a/src/Cargo.toml +++ b/src/Cargo.toml @@ -40,6 +40,9 @@ bytes = { workspace = true } # Writers plotters = { workspace = true, optional = true } +# Spatial +geozero = { workspace = true, optional = true, features = ["with-wkb", "with-geojson"] } + # Serialization serde.workspace = true serde_json.workspace = true @@ -64,13 +67,14 @@ tempfile = "3.8" ureq = "3" [features] -default = ["duckdb", "sqlite", "vegalite", "ipc", "parquet", "builtin-data", "odbc"] +default = ["duckdb", "sqlite", "vegalite", "ipc", "parquet", "builtin-data", "odbc", "spatial"] ipc = [] duckdb = ["dep:duckdb"] parquet = ["dep:parquet"] postgres = ["dep:postgres"] sqlite = ["dep:rusqlite"] odbc = ["dep:odbc-api", "dep:toml_edit"] +spatial = ["dep:geozero"] vegalite = [] ggplot2 = [] builtin-data = [] diff --git a/src/data/world.parquet b/src/data/world.parquet new file mode 100644 index 00000000..5baadbb0 Binary files /dev/null and b/src/data/world.parquet differ diff --git a/src/execute/mod.rs b/src/execute/mod.rs index 82c9e1c0..d9fe2abc 100644 --- a/src/execute/mod.rs +++ b/src/execute/mod.rs @@ -221,6 +221,19 @@ fn merge_global_mappings_into_layers(specs: &mut [Plot], layer_schemas: &[Schema // Clear wildcard flag since it's been resolved layer.mappings.wildcard = false; + // Auto-detect geometry column when the geom declares one + if layer.geom.aesthetics().contains("geometry") + && !layer.mappings.aesthetics.contains_key("geometry") + { + if let Some(col) = detect_geometry_column(schema) { + layer + .mappings + .aesthetics + .entry("geometry".to_string()) + .or_insert(AestheticValue::standard_column(&col)); + } + } + // Remove null sentinel mappings (explicit "don't inherit" markers) layer .mappings @@ -230,6 +243,52 @@ fn merge_global_mappings_into_layers(specs: &mut [Plot], layer_schemas: &[Schema } } +/// Detect a geometry column by name and type. +/// +/// Returns the column name if exactly one candidate is found. Returns `None` +/// if zero or more than one column matches (ambiguous). +fn detect_geometry_column(schema: &Schema) -> Option { + use arrow::datatypes::DataType; + + fn looks_like_geometry(name: &str) -> bool { + matches!( + name.to_lowercase().as_str(), + "geom" | "geometry" | "wkb_geometry" | "the_geom" | "shape" + ) + } + + fn is_geometry_type(dtype: &DataType) -> bool { + matches!( + dtype, + DataType::Binary | DataType::LargeBinary | DataType::BinaryView + ) + } + + // Prefer columns that match both name and type + let candidates: Vec<_> = schema + .iter() + .filter(|c| looks_like_geometry(&c.name) && is_geometry_type(&c.dtype)) + .collect(); + + if candidates.len() == 1 { + return Some(candidates[0].name.clone()); + } + + // Fall back to name-only match (e.g. extension types we don't recognise) + if candidates.is_empty() { + let by_name: Vec<_> = schema + .iter() + .filter(|c| looks_like_geometry(&c.name)) + .collect(); + + if by_name.len() == 1 { + return Some(by_name[0].name.clone()); + } + } + + None +} + /// Resolve aesthetic aliases in a plot specification. /// /// For each alias defined in [`AESTHETIC_ALIASES`], splits the alias in scales, @@ -949,6 +1008,11 @@ pub fn prepare_data_with_reader(query: &str, reader: &dyn Reader) -> Result = data + .iter() + .filter(|r| r[naming::SOURCE_COLUMN] == layer_key.as_str()) + .collect(); + assert_eq!(spatial_rows.len(), 2); + + let feature = &spatial_rows[0]; + assert_eq!(feature["type"], "Feature"); + assert_eq!(feature["geometry"]["type"], "Polygon"); + } } diff --git a/src/parser/builder.rs b/src/parser/builder.rs index b3beff35..e8829454 100644 --- a/src/parser/builder.rs +++ b/src/parser/builder.rs @@ -636,6 +636,7 @@ fn parse_geom_type(text: &str) -> Result { "arrow" => Ok(Geom::arrow()), "rule" => Ok(Geom::rule()), "errorbar" => Ok(Geom::errorbar()), + "spatial" => Ok(Geom::spatial()), _ => Err(GgsqlError::ParseError(format!( "Unknown geom type: {}", text diff --git a/src/plot/layer/geom/mod.rs b/src/plot/layer/geom/mod.rs index 145f8089..693d2aaf 100644 --- a/src/plot/layer/geom/mod.rs +++ b/src/plot/layer/geom/mod.rs @@ -43,6 +43,7 @@ mod ribbon; mod rule; mod segment; mod smooth; +mod spatial; mod text; mod tile; mod violin; @@ -68,6 +69,7 @@ pub use ribbon::Ribbon; pub use rule::Rule; pub use segment::Segment; pub use smooth::Smooth; +pub use spatial::Spatial; pub use text::Text; pub use tile::Tile; pub use violin::Violin; @@ -97,6 +99,7 @@ pub enum GeomType { Arrow, Rule, ErrorBar, + Spatial, } impl std::fmt::Display for GeomType { @@ -120,6 +123,7 @@ impl std::fmt::Display for GeomType { GeomType::Arrow => "arrow", GeomType::Rule => "rule", GeomType::ErrorBar => "errorbar", + GeomType::Spatial => "spatial", }; write!(f, "{}", s) } @@ -350,6 +354,11 @@ impl Geom { Self(Arc::new(ErrorBar)) } + /// Create a Spatial geom + pub fn spatial() -> Self { + Self(Arc::new(Spatial)) + } + /// Create a Geom from a GeomType pub fn from_type(t: GeomType) -> Self { match t { @@ -371,6 +380,7 @@ impl Geom { GeomType::Arrow => Self::arrow(), GeomType::Rule => Self::rule(), GeomType::ErrorBar => Self::errorbar(), + GeomType::Spatial => Self::spatial(), } } @@ -583,6 +593,7 @@ mod tests { GeomType::Arrow, GeomType::Rule, GeomType::ErrorBar, + GeomType::Spatial, ]; // This test is rigged to trigger a compiler error when new variants are added. @@ -605,7 +616,8 @@ mod tests { | GeomType::Segment | GeomType::Arrow | GeomType::Rule - | GeomType::ErrorBar => {} + | GeomType::ErrorBar + | GeomType::Spatial => {} }; for geom_type in all_geom_types { diff --git a/src/plot/layer/geom/spatial.rs b/src/plot/layer/geom/spatial.rs new file mode 100644 index 00000000..74ae703c --- /dev/null +++ b/src/plot/layer/geom/spatial.rs @@ -0,0 +1,61 @@ +use super::{DefaultAesthetics, GeomTrait, GeomType, StatResult}; +use crate::plot::types::DefaultAestheticValue; +use crate::{naming, Mappings}; + +#[derive(Debug, Clone, Copy)] +pub struct Spatial; + +impl GeomTrait for Spatial { + fn geom_type(&self) -> GeomType { + GeomType::Spatial + } + + fn aesthetics(&self) -> DefaultAesthetics { + DefaultAesthetics { + defaults: &[ + ("geometry", DefaultAestheticValue::Required), + ("fill", DefaultAestheticValue::String("#747474")), + ("stroke", DefaultAestheticValue::String("black")), + ("opacity", DefaultAestheticValue::Number(0.8)), + ("linewidth", DefaultAestheticValue::Number(0.2)), + ("linetype", DefaultAestheticValue::String("solid")), + ], + } + } + + fn needs_stat_transform(&self, _aesthetics: &Mappings) -> bool { + true + } + + fn apply_stat_transform( + &self, + query: &str, + _schema: &crate::plot::Schema, + _aesthetics: &Mappings, + _group_by: &[String], + _parameters: &std::collections::HashMap, + execute_query: &dyn Fn(&str) -> crate::Result, + dialect: &dyn crate::reader::SqlDialect, + ) -> crate::Result { + for stmt in dialect.sql_spatial_setup() { + execute_query(&stmt)?; + } + + // Geometry columns use database-native types that don't have an Arrow equivalent. + // Convert to standard WKB so the writer can parse them with geozero. + let col = naming::quote_ident(&naming::aesthetic_column("geometry")); + let wkb_expr = dialect.sql_geometry_to_wkb(&col); + Ok(StatResult::Transformed { + query: format!("SELECT * REPLACE ({wkb_expr} AS {col}) FROM ({query})"), + stat_columns: vec![], + dummy_columns: vec![], + consumed_aesthetics: vec![], + }) + } +} + +impl std::fmt::Display for Spatial { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "spatial") + } +} diff --git a/src/reader/data.rs b/src/reader/data.rs index 1dd90934..2328760c 100644 --- a/src/reader/data.rs +++ b/src/reader/data.rs @@ -46,12 +46,16 @@ static AIRQUALITY: &[u8] = include_bytes!(concat!( "/data/airquality.parquet" )); +#[cfg(feature = "builtin-data")] +static WORLD: &[u8] = include_bytes!(concat!(env!("CARGO_MANIFEST_DIR"), "/data/world.parquet")); + /// Get the embedded parquet bytes for a known builtin dataset. #[cfg(feature = "builtin-data")] pub fn builtin_parquet_bytes(name: &str) -> Option<&'static [u8]> { match name { "penguins" => Some(PENGUINS), "airquality" => Some(AIRQUALITY), + "world" => Some(WORLD), _ => None, } } @@ -72,6 +76,13 @@ pub fn register_builtin_datasets_duckdb( use std::{env, fs}; let dataset_names = extract_builtin_dataset_names(sql)?; + + // Load spatial extension before registering datasets that contain + // geometry columns, so DuckDB reads them as GEOMETRY rather than BLOB. + if dataset_names.iter().any(|n| n == "world") { + let _ = conn.execute("LOAD spatial", duckdb::params![]); + } + for name in dataset_names { let Some(parquet_bytes) = builtin_parquet_bytes(&name) else { continue; @@ -120,6 +131,7 @@ pub fn load_builtin_dataframe(name: &str) -> Result PENGUINS, "airquality" => AIRQUALITY, + "world" => WORLD, _ => { return Err(GgsqlError::ReaderError(format!( "Unknown builtin dataset: '{}'", @@ -160,7 +172,7 @@ pub fn load_builtin_dataframe(name: &str) -> Result bool { diff --git a/src/reader/duckdb.rs b/src/reader/duckdb.rs index aae89f20..d631b61c 100644 --- a/src/reader/duckdb.rs +++ b/src/reader/duckdb.rs @@ -34,6 +34,14 @@ impl super::SqlDialect for DuckDbDialect { format!("LEAST({})", exprs.join(", ")) } + fn sql_geometry_to_wkb(&self, column: &str) -> String { + format!("ST_AsWKB({column})") + } + + fn sql_spatial_setup(&self) -> Vec { + vec!["LOAD spatial".into()] + } + fn sql_generate_series(&self, n: usize) -> String { format!( "\"__ggsql_seq__\"(n) AS (SELECT generate_series FROM GENERATE_SERIES(0, {}))", @@ -211,21 +219,16 @@ impl Reader for DuckDBReader { // Rewrite ggsql:name → __ggsql_data_name__ in SQL let sql = super::data::rewrite_namespaced_sql(sql)?; - // Check if this is a DDL statement (CREATE, DROP, INSERT, UPDATE, DELETE, ALTER) - // DDL statements don't return rows, so we handle them specially - let trimmed = sql.trim().to_uppercase(); - let is_ddl = trimmed.starts_with("CREATE ") - || trimmed.starts_with("DROP ") - || trimmed.starts_with("INSERT ") - || trimmed.starts_with("UPDATE ") - || trimmed.starts_with("DELETE ") - || trimmed.starts_with("ALTER "); - - if is_ddl { - // For DDL, just execute and return an empty DataFrame + let first_word = sql.split_whitespace().next().unwrap_or("").to_uppercase(); + let returns_rows = matches!( + first_word.as_str(), + "SELECT" | "WITH" | "DESCRIBE" | "SHOW" | "EXPLAIN" | "FROM" + ); + + if !returns_rows { self.conn .execute(&sql, params![]) - .map_err(|e| GgsqlError::ReaderError(format!("Failed to execute DDL: {}", e)))?; + .map_err(|e| GgsqlError::ReaderError(format!("Failed to execute SQL: {}", e)))?; return Ok(DataFrame::empty()); } diff --git a/src/reader/mod.rs b/src/reader/mod.rs index 16a96b66..232b20ee 100644 --- a/src/reader/mod.rs +++ b/src/reader/mod.rs @@ -156,6 +156,21 @@ pub trait SqlDialect { result } + /// SQL expression to convert a geometry column to WKB. + /// + /// Default uses `ST_AsBinary` (OGC standard). Override for backends + /// with different function names (e.g. DuckDB uses `ST_AsWKB`). + fn sql_geometry_to_wkb(&self, column: &str) -> String { + format!("ST_AsBinary({column})") + } + + /// SQL statements to run before spatial operations. + /// + /// Override for backends that need an extension loaded (e.g. DuckDB spatial). + fn sql_spatial_setup(&self) -> Vec { + vec![] + } + /// Generate a series of integers 0..n-1 as a CTE fragment. /// /// Returns CTE fragment(s) producing table `__ggsql_seq__` with column `n`. diff --git a/src/reader/sqlite.rs b/src/reader/sqlite.rs index 7023d04f..310cb1c4 100644 --- a/src/reader/sqlite.rs +++ b/src/reader/sqlite.rs @@ -362,19 +362,16 @@ impl Reader for SqliteReader { // Rewrite ggsql:name → __ggsql_data_name__ in SQL let sql = super::data::rewrite_namespaced_sql(sql)?; - // Check if this is a DDL statement - let trimmed = sql.trim().to_uppercase(); - let is_ddl = trimmed.starts_with("CREATE ") - || trimmed.starts_with("DROP ") - || trimmed.starts_with("INSERT ") - || trimmed.starts_with("UPDATE ") - || trimmed.starts_with("DELETE ") - || trimmed.starts_with("ALTER "); - - if is_ddl { + let first_word = sql.split_whitespace().next().unwrap_or("").to_uppercase(); + let returns_rows = matches!( + first_word.as_str(), + "SELECT" | "WITH" | "DESCRIBE" | "SHOW" | "EXPLAIN" | "FROM" + ); + + if !returns_rows { self.conn .execute_batch(&sql) - .map_err(|e| GgsqlError::ReaderError(format!("Failed to execute DDL: {}", e)))?; + .map_err(|e| GgsqlError::ReaderError(format!("Failed to execute SQL: {}", e)))?; return Ok(DataFrame::empty()); } diff --git a/src/writer/vegalite/layer.rs b/src/writer/vegalite/layer.rs index 23fdf741..df1be824 100644 --- a/src/writer/vegalite/layer.rs +++ b/src/writer/vegalite/layer.rs @@ -45,6 +45,7 @@ pub fn geom_to_mark(geom: &Geom) -> Value { GeomType::Smooth => "line", GeomType::Rule => "rule", GeomType::ErrorBar => "rule", + GeomType::Spatial => "geoshape", _ => "point", // Default fallback }; json!({ @@ -2102,6 +2103,128 @@ impl GeomRenderer for BoxplotRenderer { } } +// ============================================================================= +// Spatial Renderer +// ============================================================================= + +struct SpatialRenderer; + +#[cfg(feature = "spatial")] +impl SpatialRenderer { + fn wkb_to_geojson(wkb_bytes: &[u8]) -> Result { + use geozero::geojson::GeoJsonWriter; + use geozero::wkb::Wkb; + use geozero::GeozeroGeometry; + use std::io::Cursor; + + let mut geojson_out = Vec::new(); + let wkb = Wkb(wkb_bytes); + wkb.process_geom(&mut GeoJsonWriter::new(Cursor::new(&mut geojson_out))) + .map_err(|e| { + GgsqlError::WriterError(format!("Failed to convert WKB to GeoJSON: {}", e)) + })?; + + serde_json::from_slice(&geojson_out) + .map_err(|e| GgsqlError::WriterError(format!("Invalid GeoJSON from WKB: {}", e))) + } + + fn parse_geometry_from_array(array: &arrow::array::ArrayRef, idx: usize) -> Result { + use arrow::datatypes::DataType; + + if array.is_null(idx) { + return Ok(Value::Null); + } + + match array.data_type() { + DataType::Binary => { + let bin = array + .as_any() + .downcast_ref::() + .ok_or_else(|| { + GgsqlError::WriterError("Failed to read geometry as Binary".into()) + })?; + Self::wkb_to_geojson(bin.value(idx)) + } + DataType::LargeBinary => { + let bin = array + .as_any() + .downcast_ref::() + .ok_or_else(|| { + GgsqlError::WriterError("Failed to read geometry as LargeBinary".into()) + })?; + Self::wkb_to_geojson(bin.value(idx)) + } + other => Err(GgsqlError::WriterError(format!( + "Geometry column has unsupported type {:?}; expected Binary (WKB)", + other + ))), + } + } +} + +impl GeomRenderer for SpatialRenderer { + fn prepare_data( + &self, + df: &DataFrame, + _layer: &Layer, + _data_key: &str, + _binned_columns: &HashMap>, + ) -> Result { + #[cfg(not(feature = "spatial"))] + { + return Err(GgsqlError::WriterError( + "Spatial visualization requires the 'spatial' feature to be enabled".to_string(), + )); + } + + #[cfg(feature = "spatial")] + { + let geometry_col = naming::aesthetic_column("geometry"); + + let col_names: Vec = df + .get_column_names() + .iter() + .map(|s| s.to_string()) + .collect(); + + let mut features = Vec::with_capacity(df.height()); + + for row_idx in 0..df.height() { + let mut feature = serde_json::Map::new(); + feature.insert("type".to_string(), json!("Feature")); + + let mut properties = serde_json::Map::new(); + + for col_name in &col_names { + let col = df.column(col_name).map_err(|e| { + GgsqlError::WriterError(format!( + "Failed to get column '{}': {}", + col_name, e + )) + })?; + + if *col_name == geometry_col { + let geom = Self::parse_geometry_from_array(col, row_idx)?; + feature.insert("geometry".to_string(), geom); + } else { + let value = super::data::series_value_at(col, row_idx)?; + properties.insert(col_name.clone(), value.clone()); + feature.insert(col_name.clone(), value); + } + } + + feature.insert("properties".to_string(), Value::Object(properties)); + features.push(Value::Object(feature)); + } + + Ok(PreparedData::Single { + values: features, + metadata: Box::new(()), + }) + } + } +} + // ============================================================================= // Dispatcher // ============================================================================= @@ -2120,6 +2243,7 @@ pub fn get_renderer(geom: &Geom) -> Box { GeomType::Segment => Box::new(SegmentRenderer), GeomType::ErrorBar => Box::new(ErrorBarRenderer), GeomType::Rule => Box::new(RuleRenderer), + GeomType::Spatial => Box::new(SpatialRenderer), // All other geoms (Point, Area, Ribbon, Density, etc.) use the default renderer _ => Box::new(DefaultRenderer), } diff --git a/src/writer/vegalite/mod.rs b/src/writer/vegalite/mod.rs index 69cf307d..cc33b9ec 100644 --- a/src/writer/vegalite/mod.rs +++ b/src/writer/vegalite/mod.rs @@ -304,6 +304,12 @@ fn build_layer_encoding( continue; } + // Skip geometry aesthetic - it is structural (consumed by SpatialRenderer + // to build GeoJSON Features), not a visual encoding channel. + if aesthetic == "geometry" { + continue; + } + let mut channel_name = map_aesthetic_name(aesthetic, &aesthetic_ctx, coord_kind); // Opacity is retargeted to the fill when fill is supported if channel_name == "opacity" && layer.mappings.contains_key("fill") { diff --git a/tree-sitter-ggsql/grammar.js b/tree-sitter-ggsql/grammar.js index 36b2504e..5531dcb6 100644 --- a/tree-sitter-ggsql/grammar.js +++ b/tree-sitter-ggsql/grammar.js @@ -154,19 +154,14 @@ module.exports = grammar({ )) )), - // Other SQL statements - DO NOT match if starts with keywords we handle - // explicitly (WITH, SELECT, CREATE, INSERT, UPDATE, DELETE, VISUALISE) - other_sql_statement: $ => { - const exclude_pattern = /[^\s;(),'"WwSsCcIiUuDdVv]+/; - return prec(-1, repeat1(choice( - $.sql_keyword, - token(exclude_pattern), // Tokens not starting with excluded letters - $.string, - $.number, - $.subquery, - ',', '(', ')', '*', '.', '=' - ))); - }, + other_sql_statement: $ => prec(-1, repeat1(choice( + $.sql_keyword, + token(/[^\s;(),'"]+/), + $.string, + $.number, + $.subquery, + ',', '(', ')', '*', '.', '=' + ))), // Subquery in parentheses - fully recursive, can contain any SQL // Prioritizes WITH/SELECT statements, falls back to token-by-token parsing @@ -504,7 +499,8 @@ module.exports = grammar({ geom_type: $ => choice( 'point', 'line', 'path', 'bar', 'area', 'tile', 'polygon', 'ribbon', 'histogram', 'density', 'smooth', 'boxplot', 'violin', - 'text', 'label', 'segment', 'arrow', 'rule', 'errorbar' + 'text', 'label', 'segment', 'arrow', 'rule', 'errorbar', + 'spatial' ), // MAPPING clause for aesthetic mappings: MAPPING col AS x, "blue" AS color [FROM source] @@ -686,7 +682,7 @@ module.exports = grammar({ // Text aesthetics 'label', 'typeface', 'fontweight', 'italic', 'fontsize', 'hjust', 'vjust', 'rotation', // Specialty aesthetics, - 'slope', + 'slope', 'geometry', // Facet aesthetics 'panel', 'row', 'column', // Computed variables diff --git a/tree-sitter-ggsql/queries/highlights.scm b/tree-sitter-ggsql/queries/highlights.scm index 6cb23d77..e3939fd0 100644 --- a/tree-sitter-ggsql/queries/highlights.scm +++ b/tree-sitter-ggsql/queries/highlights.scm @@ -26,6 +26,7 @@ "arrow" "rule" "errorbar" + "spatial" ] @type.builtin ; Aesthetic names @@ -74,6 +75,7 @@ "rotation" ; Specialty aesthetics "slope" + "geometry" ; Facet aesthetics "panel" "row"