diff --git a/Cargo.toml b/Cargo.toml index 508dd686..5cc6aea0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -234,3 +234,12 @@ incremental = false [profile.debugging] inherits = "dev" debug = true + +# core2 0.4.0 is yanked on crates.io but no 0.4.1+ exists, and libflate / +# libflate_lz77 (pulled in via the `jieba` feature of nodedb-fts → +# include-flate → include-flate-compress) still require `core2 ^0.4`. +# Point cargo at the upstream git source so fresh resolution (CI without +# a committed lockfile) succeeds — git sources bypass the registry's yank +# check. Safe to remove once libflate releases a bump off core2. +[patch.crates-io] +core2 = { git = "https://github.com/technocreatives/core2", rev = "545e84bcb0f235b12e21351e0c69767958efe2a7" } diff --git a/nodedb-fts/Cargo.toml b/nodedb-fts/Cargo.toml index 96019210..95a4c662 100644 --- a/nodedb-fts/Cargo.toml +++ b/nodedb-fts/Cargo.toml @@ -29,7 +29,7 @@ thiserror = { workspace = true } # Optional: dictionary-based CJK segmentation lindera = { version = "2.3", optional = true } -jieba-rs = { version = "0.8", optional = true } +jieba-rs = { version = "0.9", optional = true } icu_segmenter = { version = "1", optional = true } whatlang = { version = "0.18", optional = true } diff --git a/nodedb-query/src/expr.rs b/nodedb-query/src/expr.rs deleted file mode 100644 index 2264ea8a..00000000 --- a/nodedb-query/src/expr.rs +++ /dev/null @@ -1,625 +0,0 @@ -//! SqlExpr AST definition and core evaluation. - -use crate::value_ops::{ - coerced_eq, compare_values, is_truthy, to_value_number, value_to_display_string, value_to_f64, -}; -use nodedb_types::Value; - -/// A serializable SQL expression that can be evaluated against a document. -#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] -pub enum SqlExpr { - /// Column reference: extract field value from the document. - Column(String), - /// Literal value. - Literal(Value), - /// Binary operation: left op right. - BinaryOp { - left: Box, - op: BinaryOp, - right: Box, - }, - /// Unary negation: -expr or NOT expr. - Negate(Box), - /// Scalar function call. - Function { name: String, args: Vec }, - /// CAST(expr AS type). - Cast { - expr: Box, - to_type: CastType, - }, - /// CASE WHEN cond1 THEN val1 ... ELSE default END. - Case { - operand: Option>, - when_thens: Vec<(SqlExpr, SqlExpr)>, - else_expr: Option>, - }, - /// COALESCE(expr1, expr2, ...): first non-null value. - Coalesce(Vec), - /// NULLIF(expr1, expr2): returns NULL if expr1 = expr2, else expr1. - NullIf(Box, Box), - /// IS NULL / IS NOT NULL. - IsNull { expr: Box, negated: bool }, - /// OLD column reference: extract field value from the pre-update document. - /// Used in TRANSITION CHECK predicates. Resolves against the OLD row - /// when evaluated via `eval_with_old()`. Returns NULL in normal `eval()`. - OldColumn(String), -} - -/// Binary operators. -#[derive( - Debug, - Clone, - Copy, - serde::Serialize, - serde::Deserialize, - zerompk::ToMessagePack, - zerompk::FromMessagePack, -)] -#[msgpack(c_enum)] -pub enum BinaryOp { - Add, - Sub, - Mul, - Div, - Mod, - Eq, - NotEq, - Gt, - GtEq, - Lt, - LtEq, - And, - Or, - Concat, -} - -/// Target types for CAST. -#[derive( - Debug, - Clone, - serde::Serialize, - serde::Deserialize, - zerompk::ToMessagePack, - zerompk::FromMessagePack, -)] -#[msgpack(c_enum)] -pub enum CastType { - Int, - Float, - String, - Bool, -} - -/// A computed projection column: alias + expression. -#[derive( - Debug, - Clone, - serde::Serialize, - serde::Deserialize, - zerompk::ToMessagePack, - zerompk::FromMessagePack, -)] -pub struct ComputedColumn { - pub alias: String, - pub expr: SqlExpr, -} - -// ─── Manual zerompk impls for SqlExpr ──────────────────────────────────────── -// -// SqlExpr contains `nodedb_types::Value` (in the Literal variant) which implements -// `zerompk::ToMessagePack` and `zerompk::FromMessagePack` natively. -// -// Encoding format: each variant is an array `[tag_u8, field1, field2, ...]`. -// Tags: Column=0, Literal=1, BinaryOp=2, Negate=3, Function=4, Cast=5, -// Case=6, Coalesce=7, NullIf=8, IsNull=9, OldColumn=10. - -impl zerompk::ToMessagePack for SqlExpr { - fn write(&self, writer: &mut W) -> zerompk::Result<()> { - match self { - SqlExpr::Column(s) => { - writer.write_array_len(2)?; - writer.write_u8(0)?; - writer.write_string(s) - } - SqlExpr::Literal(v) => { - writer.write_array_len(2)?; - writer.write_u8(1)?; - v.write(writer) - } - SqlExpr::BinaryOp { left, op, right } => { - writer.write_array_len(4)?; - writer.write_u8(2)?; - left.write(writer)?; - op.write(writer)?; - right.write(writer) - } - SqlExpr::Negate(inner) => { - writer.write_array_len(2)?; - writer.write_u8(3)?; - inner.write(writer) - } - SqlExpr::Function { name, args } => { - writer.write_array_len(3)?; - writer.write_u8(4)?; - writer.write_string(name)?; - args.write(writer) - } - SqlExpr::Cast { expr, to_type } => { - writer.write_array_len(3)?; - writer.write_u8(5)?; - expr.write(writer)?; - to_type.write(writer) - } - SqlExpr::Case { - operand, - when_thens, - else_expr, - } => { - writer.write_array_len(4)?; - writer.write_u8(6)?; - operand.write(writer)?; - // Encode when_thens as array of 2-element arrays. - writer.write_array_len(when_thens.len())?; - for (cond, val) in when_thens { - writer.write_array_len(2)?; - cond.write(writer)?; - val.write(writer)?; - } - else_expr.write(writer) - } - SqlExpr::Coalesce(exprs) => { - writer.write_array_len(2)?; - writer.write_u8(7)?; - exprs.write(writer) - } - SqlExpr::NullIf(e1, e2) => { - writer.write_array_len(3)?; - writer.write_u8(8)?; - e1.write(writer)?; - e2.write(writer) - } - SqlExpr::IsNull { expr, negated } => { - writer.write_array_len(3)?; - writer.write_u8(9)?; - expr.write(writer)?; - writer.write_boolean(*negated) - } - SqlExpr::OldColumn(s) => { - writer.write_array_len(2)?; - writer.write_u8(10)?; - writer.write_string(s) - } - } - } -} - -impl<'a> zerompk::FromMessagePack<'a> for SqlExpr { - fn read>(reader: &mut R) -> zerompk::Result { - let len = reader.read_array_len()?; - if len == 0 { - return Err(zerompk::Error::ArrayLengthMismatch { - expected: 1, - actual: 0, - }); - } - let tag = reader.read_u8()?; - match tag { - 0 => { - // Column(String) - Ok(SqlExpr::Column(reader.read_string()?.into_owned())) - } - 1 => { - // Literal(Value) - let v = Value::read(reader)?; - Ok(SqlExpr::Literal(v)) - } - 2 => { - // BinaryOp { left, op, right } - let left = SqlExpr::read(reader)?; - let op = BinaryOp::read(reader)?; - let right = SqlExpr::read(reader)?; - Ok(SqlExpr::BinaryOp { - left: Box::new(left), - op, - right: Box::new(right), - }) - } - 3 => { - // Negate(Box) - let inner = SqlExpr::read(reader)?; - Ok(SqlExpr::Negate(Box::new(inner))) - } - 4 => { - // Function { name, args } - let name = reader.read_string()?.into_owned(); - let args = Vec::::read(reader)?; - Ok(SqlExpr::Function { name, args }) - } - 5 => { - // Cast { expr, to_type } - let expr = SqlExpr::read(reader)?; - let to_type = CastType::read(reader)?; - Ok(SqlExpr::Cast { - expr: Box::new(expr), - to_type, - }) - } - 6 => { - // Case { operand, when_thens, else_expr } - let operand = Option::>::read(reader)?; - let wt_len = reader.read_array_len()?; - let mut when_thens = Vec::with_capacity(wt_len); - for _ in 0..wt_len { - let pair_len = reader.read_array_len()?; - if pair_len != 2 { - return Err(zerompk::Error::ArrayLengthMismatch { - expected: 2, - actual: pair_len, - }); - } - let cond = SqlExpr::read(reader)?; - let val = SqlExpr::read(reader)?; - when_thens.push((cond, val)); - } - let else_expr = Option::>::read(reader)?; - Ok(SqlExpr::Case { - operand, - when_thens, - else_expr, - }) - } - 7 => { - // Coalesce(Vec) - let exprs = Vec::::read(reader)?; - Ok(SqlExpr::Coalesce(exprs)) - } - 8 => { - // NullIf(Box, Box) - let e1 = SqlExpr::read(reader)?; - let e2 = SqlExpr::read(reader)?; - Ok(SqlExpr::NullIf(Box::new(e1), Box::new(e2))) - } - 9 => { - // IsNull { expr, negated } - let expr = SqlExpr::read(reader)?; - let negated = reader.read_boolean()?; - Ok(SqlExpr::IsNull { - expr: Box::new(expr), - negated, - }) - } - 10 => { - // OldColumn(String) - Ok(SqlExpr::OldColumn(reader.read_string()?.into_owned())) - } - _ => Err(zerompk::Error::InvalidMarker(tag)), - } - } -} - -impl SqlExpr { - /// Evaluate this expression against a document. - /// - /// Returns a `Value`. Column references look up fields in the document. - /// Missing fields return `Null`. Arithmetic on non-numeric values returns `Null`. - pub fn eval(&self, doc: &Value) -> Value { - match self { - SqlExpr::Column(name) => doc.get(name).cloned().unwrap_or(Value::Null), - - SqlExpr::Literal(v) => v.clone(), - - SqlExpr::BinaryOp { left, op, right } => { - let l = left.eval(doc); - let r = right.eval(doc); - eval_binary_op(&l, *op, &r) - } - - SqlExpr::Negate(inner) => { - let v = inner.eval(doc); - if let Some(b) = v.as_bool() { - Value::Bool(!b) - } else { - match value_to_f64(&v, false) { - Some(n) => to_value_number(-n), - None => Value::Null, - } - } - } - - SqlExpr::Function { name, args } => { - let evaluated: Vec = args.iter().map(|a| a.eval(doc)).collect(); - crate::functions::eval_function(name, &evaluated) - } - - SqlExpr::Cast { expr, to_type } => { - let v = expr.eval(doc); - crate::cast::eval_cast(&v, to_type) - } - - SqlExpr::Case { - operand, - when_thens, - else_expr, - } => { - let op_val = operand.as_ref().map(|e| e.eval(doc)); - for (when_expr, then_expr) in when_thens { - let when_val = when_expr.eval(doc); - let matches = match &op_val { - Some(ov) => coerced_eq(ov, &when_val), - None => is_truthy(&when_val), - }; - if matches { - return then_expr.eval(doc); - } - } - match else_expr { - Some(e) => e.eval(doc), - None => Value::Null, - } - } - - SqlExpr::Coalesce(exprs) => { - for expr in exprs { - let v = expr.eval(doc); - if !v.is_null() { - return v; - } - } - Value::Null - } - - SqlExpr::NullIf(a, b) => { - let va = a.eval(doc); - let vb = b.eval(doc); - if coerced_eq(&va, &vb) { - Value::Null - } else { - va - } - } - - SqlExpr::IsNull { expr, negated } => { - let v = expr.eval(doc); - let is_null = v.is_null(); - Value::Bool(if *negated { !is_null } else { is_null }) - } - - SqlExpr::OldColumn(_) => Value::Null, - } - } - - /// Evaluate with access to both NEW and OLD documents (for TRANSITION CHECK). - /// - /// `Column(name)` resolves against `new_doc`. - /// `OldColumn(name)` resolves against `old_doc`. - pub fn eval_with_old(&self, new_doc: &Value, old_doc: &Value) -> Value { - match self { - SqlExpr::Column(name) => new_doc.get(name).cloned().unwrap_or(Value::Null), - SqlExpr::OldColumn(name) => old_doc.get(name).cloned().unwrap_or(Value::Null), - SqlExpr::Literal(v) => v.clone(), - SqlExpr::BinaryOp { left, op, right } => { - let l = left.eval_with_old(new_doc, old_doc); - let r = right.eval_with_old(new_doc, old_doc); - eval_binary_op(&l, *op, &r) - } - SqlExpr::Negate(inner) => { - let v = inner.eval_with_old(new_doc, old_doc); - if let Some(b) = v.as_bool() { - Value::Bool(!b) - } else { - match value_to_f64(&v, false) { - Some(n) => to_value_number(-n), - None => Value::Null, - } - } - } - SqlExpr::Function { name, args } => { - let evaluated: Vec = args - .iter() - .map(|a| a.eval_with_old(new_doc, old_doc)) - .collect(); - crate::functions::eval_function(name, &evaluated) - } - SqlExpr::Cast { expr, to_type } => { - let v = expr.eval_with_old(new_doc, old_doc); - crate::cast::eval_cast(&v, to_type) - } - SqlExpr::Case { - operand, - when_thens, - else_expr, - } => { - let op_val = operand.as_ref().map(|e| e.eval_with_old(new_doc, old_doc)); - for (when_expr, then_expr) in when_thens { - let when_val = when_expr.eval_with_old(new_doc, old_doc); - let matches = match &op_val { - Some(ov) => coerced_eq(ov, &when_val), - None => is_truthy(&when_val), - }; - if matches { - return then_expr.eval_with_old(new_doc, old_doc); - } - } - match else_expr { - Some(e) => e.eval_with_old(new_doc, old_doc), - None => Value::Null, - } - } - SqlExpr::Coalesce(exprs) => { - for expr in exprs { - let v = expr.eval_with_old(new_doc, old_doc); - if !v.is_null() { - return v; - } - } - Value::Null - } - SqlExpr::NullIf(a, b) => { - let va = a.eval_with_old(new_doc, old_doc); - let vb = b.eval_with_old(new_doc, old_doc); - if coerced_eq(&va, &vb) { - Value::Null - } else { - va - } - } - SqlExpr::IsNull { expr, negated } => { - let v = expr.eval_with_old(new_doc, old_doc); - let is_null = v.is_null(); - Value::Bool(if *negated { !is_null } else { is_null }) - } - } - } -} - -fn eval_binary_op(left: &Value, op: BinaryOp, right: &Value) -> Value { - match op { - BinaryOp::Add => match (value_to_f64(left, true), value_to_f64(right, true)) { - (Some(a), Some(b)) => to_value_number(a + b), - _ => Value::Null, - }, - BinaryOp::Sub => match (value_to_f64(left, true), value_to_f64(right, true)) { - (Some(a), Some(b)) => to_value_number(a - b), - _ => Value::Null, - }, - BinaryOp::Mul => match (value_to_f64(left, true), value_to_f64(right, true)) { - (Some(a), Some(b)) => to_value_number(a * b), - _ => Value::Null, - }, - BinaryOp::Div => match (value_to_f64(left, true), value_to_f64(right, true)) { - (Some(a), Some(b)) => { - if b == 0.0 { - Value::Null - } else { - to_value_number(a / b) - } - } - _ => Value::Null, - }, - BinaryOp::Mod => match (value_to_f64(left, true), value_to_f64(right, true)) { - (Some(a), Some(b)) => { - if b == 0.0 { - Value::Null - } else { - to_value_number(a % b) - } - } - _ => Value::Null, - }, - BinaryOp::Concat => { - let ls = value_to_display_string(left); - let rs = value_to_display_string(right); - Value::String(format!("{ls}{rs}")) - } - BinaryOp::Eq => Value::Bool(coerced_eq(left, right)), - BinaryOp::NotEq => Value::Bool(!coerced_eq(left, right)), - BinaryOp::Gt => Value::Bool(compare_values(left, right) == std::cmp::Ordering::Greater), - BinaryOp::GtEq => { - let c = compare_values(left, right); - Value::Bool(c == std::cmp::Ordering::Greater || c == std::cmp::Ordering::Equal) - } - BinaryOp::Lt => Value::Bool(compare_values(left, right) == std::cmp::Ordering::Less), - BinaryOp::LtEq => { - let c = compare_values(left, right); - Value::Bool(c == std::cmp::Ordering::Less || c == std::cmp::Ordering::Equal) - } - BinaryOp::And => Value::Bool(is_truthy(left) && is_truthy(right)), - BinaryOp::Or => Value::Bool(is_truthy(left) || is_truthy(right)), - } -} - -#[cfg(test)] -mod tests { - use super::*; - - fn doc() -> Value { - Value::Object( - [ - ("name".to_string(), Value::String("Alice".into())), - ("age".to_string(), Value::Integer(30)), - ("price".to_string(), Value::Float(10.5)), - ("qty".to_string(), Value::Integer(4)), - ("active".to_string(), Value::Bool(true)), - ("email".to_string(), Value::Null), - ] - .into_iter() - .collect(), - ) - } - - #[test] - fn column_ref() { - let expr = SqlExpr::Column("name".into()); - assert_eq!(expr.eval(&doc()), Value::String("Alice".into())); - } - - #[test] - fn missing_column() { - let expr = SqlExpr::Column("missing".into()); - assert_eq!(expr.eval(&doc()), Value::Null); - } - - #[test] - fn literal() { - let expr = SqlExpr::Literal(Value::Integer(42)); - assert_eq!(expr.eval(&doc()), Value::Integer(42)); - } - - #[test] - fn add() { - let expr = SqlExpr::BinaryOp { - left: Box::new(SqlExpr::Column("price".into())), - op: BinaryOp::Add, - right: Box::new(SqlExpr::Literal(Value::Float(1.5))), - }; - assert_eq!(expr.eval(&doc()), Value::Integer(12)); - } - - #[test] - fn multiply() { - let expr = SqlExpr::BinaryOp { - left: Box::new(SqlExpr::Column("price".into())), - op: BinaryOp::Mul, - right: Box::new(SqlExpr::Column("qty".into())), - }; - assert_eq!(expr.eval(&doc()), Value::Integer(42)); - } - - #[test] - fn case_when() { - let expr = SqlExpr::Case { - operand: None, - when_thens: vec![( - SqlExpr::BinaryOp { - left: Box::new(SqlExpr::Column("age".into())), - op: BinaryOp::GtEq, - right: Box::new(SqlExpr::Literal(Value::Integer(18))), - }, - SqlExpr::Literal(Value::String("adult".into())), - )], - else_expr: Some(Box::new(SqlExpr::Literal(Value::String("minor".into())))), - }; - assert_eq!(expr.eval(&doc()), Value::String("adult".into())); - } - - #[test] - fn coalesce() { - let expr = SqlExpr::Coalesce(vec![ - SqlExpr::Column("email".into()), - SqlExpr::Literal(Value::String("default@example.com".into())), - ]); - assert_eq!( - expr.eval(&doc()), - Value::String("default@example.com".into()) - ); - } - - #[test] - fn is_null() { - let expr = SqlExpr::IsNull { - expr: Box::new(SqlExpr::Column("email".into())), - negated: false, - }; - assert_eq!(expr.eval(&doc()), Value::Bool(true)); - } -} diff --git a/nodedb-query/src/expr/binary.rs b/nodedb-query/src/expr/binary.rs new file mode 100644 index 00000000..34020b5b --- /dev/null +++ b/nodedb-query/src/expr/binary.rs @@ -0,0 +1,65 @@ +//! Binary-operator evaluation on `Value` operands. + +use nodedb_types::Value; + +use crate::value_ops::{ + coerced_eq, compare_values, is_truthy, to_value_number, value_to_display_string, value_to_f64, +}; + +use super::types::BinaryOp; + +pub(super) fn eval_binary_op(left: &Value, op: BinaryOp, right: &Value) -> Value { + match op { + BinaryOp::Add => match (value_to_f64(left, true), value_to_f64(right, true)) { + (Some(a), Some(b)) => to_value_number(a + b), + _ => Value::Null, + }, + BinaryOp::Sub => match (value_to_f64(left, true), value_to_f64(right, true)) { + (Some(a), Some(b)) => to_value_number(a - b), + _ => Value::Null, + }, + BinaryOp::Mul => match (value_to_f64(left, true), value_to_f64(right, true)) { + (Some(a), Some(b)) => to_value_number(a * b), + _ => Value::Null, + }, + BinaryOp::Div => match (value_to_f64(left, true), value_to_f64(right, true)) { + (Some(a), Some(b)) => { + if b == 0.0 { + Value::Null + } else { + to_value_number(a / b) + } + } + _ => Value::Null, + }, + BinaryOp::Mod => match (value_to_f64(left, true), value_to_f64(right, true)) { + (Some(a), Some(b)) => { + if b == 0.0 { + Value::Null + } else { + to_value_number(a % b) + } + } + _ => Value::Null, + }, + BinaryOp::Concat => { + let ls = value_to_display_string(left); + let rs = value_to_display_string(right); + Value::String(format!("{ls}{rs}")) + } + BinaryOp::Eq => Value::Bool(coerced_eq(left, right)), + BinaryOp::NotEq => Value::Bool(!coerced_eq(left, right)), + BinaryOp::Gt => Value::Bool(compare_values(left, right) == std::cmp::Ordering::Greater), + BinaryOp::GtEq => { + let c = compare_values(left, right); + Value::Bool(c == std::cmp::Ordering::Greater || c == std::cmp::Ordering::Equal) + } + BinaryOp::Lt => Value::Bool(compare_values(left, right) == std::cmp::Ordering::Less), + BinaryOp::LtEq => { + let c = compare_values(left, right); + Value::Bool(c == std::cmp::Ordering::Less || c == std::cmp::Ordering::Equal) + } + BinaryOp::And => Value::Bool(is_truthy(left) && is_truthy(right)), + BinaryOp::Or => Value::Bool(is_truthy(left) || is_truthy(right)), + } +} diff --git a/nodedb-query/src/expr/codec.rs b/nodedb-query/src/expr/codec.rs new file mode 100644 index 00000000..98de1b2c --- /dev/null +++ b/nodedb-query/src/expr/codec.rs @@ -0,0 +1,181 @@ +//! Manual zerompk wire format for [`SqlExpr`]. +//! +//! Each variant encodes as an array `[tag_u8, field1, field2, ...]`. Tags +//! are stable and MUST NOT be renumbered — they are on-wire values in +//! physical-plan envelopes. `Value`, `BinaryOp`, and `CastType` implement +//! zerompk natively so they nest transparently. +//! +//! Tags: Column=0, Literal=1, BinaryOp=2, Negate=3, Function=4, Cast=5, +//! Case=6, Coalesce=7, NullIf=8, IsNull=9, OldColumn=10. + +use nodedb_types::Value; + +use super::types::{BinaryOp, CastType, SqlExpr}; + +impl zerompk::ToMessagePack for SqlExpr { + fn write(&self, writer: &mut W) -> zerompk::Result<()> { + match self { + SqlExpr::Column(s) => { + writer.write_array_len(2)?; + writer.write_u8(0)?; + writer.write_string(s) + } + SqlExpr::Literal(v) => { + writer.write_array_len(2)?; + writer.write_u8(1)?; + v.write(writer) + } + SqlExpr::BinaryOp { left, op, right } => { + writer.write_array_len(4)?; + writer.write_u8(2)?; + left.write(writer)?; + op.write(writer)?; + right.write(writer) + } + SqlExpr::Negate(inner) => { + writer.write_array_len(2)?; + writer.write_u8(3)?; + inner.write(writer) + } + SqlExpr::Function { name, args } => { + writer.write_array_len(3)?; + writer.write_u8(4)?; + writer.write_string(name)?; + args.write(writer) + } + SqlExpr::Cast { expr, to_type } => { + writer.write_array_len(3)?; + writer.write_u8(5)?; + expr.write(writer)?; + to_type.write(writer) + } + SqlExpr::Case { + operand, + when_thens, + else_expr, + } => { + writer.write_array_len(4)?; + writer.write_u8(6)?; + operand.write(writer)?; + writer.write_array_len(when_thens.len())?; + for (cond, val) in when_thens { + writer.write_array_len(2)?; + cond.write(writer)?; + val.write(writer)?; + } + else_expr.write(writer) + } + SqlExpr::Coalesce(exprs) => { + writer.write_array_len(2)?; + writer.write_u8(7)?; + exprs.write(writer) + } + SqlExpr::NullIf(e1, e2) => { + writer.write_array_len(3)?; + writer.write_u8(8)?; + e1.write(writer)?; + e2.write(writer) + } + SqlExpr::IsNull { expr, negated } => { + writer.write_array_len(3)?; + writer.write_u8(9)?; + expr.write(writer)?; + writer.write_boolean(*negated) + } + SqlExpr::OldColumn(s) => { + writer.write_array_len(2)?; + writer.write_u8(10)?; + writer.write_string(s) + } + } + } +} + +impl<'a> zerompk::FromMessagePack<'a> for SqlExpr { + fn read>(reader: &mut R) -> zerompk::Result { + let len = reader.read_array_len()?; + if len == 0 { + return Err(zerompk::Error::ArrayLengthMismatch { + expected: 1, + actual: 0, + }); + } + let tag = reader.read_u8()?; + match tag { + 0 => Ok(SqlExpr::Column(reader.read_string()?.into_owned())), + 1 => { + let v = Value::read(reader)?; + Ok(SqlExpr::Literal(v)) + } + 2 => { + let left = SqlExpr::read(reader)?; + let op = BinaryOp::read(reader)?; + let right = SqlExpr::read(reader)?; + Ok(SqlExpr::BinaryOp { + left: Box::new(left), + op, + right: Box::new(right), + }) + } + 3 => { + let inner = SqlExpr::read(reader)?; + Ok(SqlExpr::Negate(Box::new(inner))) + } + 4 => { + let name = reader.read_string()?.into_owned(); + let args = Vec::::read(reader)?; + Ok(SqlExpr::Function { name, args }) + } + 5 => { + let expr = SqlExpr::read(reader)?; + let to_type = CastType::read(reader)?; + Ok(SqlExpr::Cast { + expr: Box::new(expr), + to_type, + }) + } + 6 => { + let operand = Option::>::read(reader)?; + let wt_len = reader.read_array_len()?; + let mut when_thens = Vec::with_capacity(wt_len); + for _ in 0..wt_len { + let pair_len = reader.read_array_len()?; + if pair_len != 2 { + return Err(zerompk::Error::ArrayLengthMismatch { + expected: 2, + actual: pair_len, + }); + } + let cond = SqlExpr::read(reader)?; + let val = SqlExpr::read(reader)?; + when_thens.push((cond, val)); + } + let else_expr = Option::>::read(reader)?; + Ok(SqlExpr::Case { + operand, + when_thens, + else_expr, + }) + } + 7 => { + let exprs = Vec::::read(reader)?; + Ok(SqlExpr::Coalesce(exprs)) + } + 8 => { + let e1 = SqlExpr::read(reader)?; + let e2 = SqlExpr::read(reader)?; + Ok(SqlExpr::NullIf(Box::new(e1), Box::new(e2))) + } + 9 => { + let expr = SqlExpr::read(reader)?; + let negated = reader.read_boolean()?; + Ok(SqlExpr::IsNull { + expr: Box::new(expr), + negated, + }) + } + 10 => Ok(SqlExpr::OldColumn(reader.read_string()?.into_owned())), + _ => Err(zerompk::Error::InvalidMarker(tag)), + } + } +} diff --git a/nodedb-query/src/expr/eval.rs b/nodedb-query/src/expr/eval.rs new file mode 100644 index 00000000..8408b7b6 --- /dev/null +++ b/nodedb-query/src/expr/eval.rs @@ -0,0 +1,246 @@ +//! Row-scope evaluator for [`SqlExpr`]. +//! +//! `eval()` resolves column references against a single document. `eval_with_old()` +//! resolves `Column(..)` against the post-update ("new") document and `OldColumn(..)` +//! against the pre-update ("old") document — this is the path used by TRANSITION +//! CHECK and similar old/new diff predicates. + +use nodedb_types::Value; + +use crate::value_ops::{coerced_eq, is_truthy, to_value_number, value_to_f64}; + +use super::binary::eval_binary_op; +use super::types::SqlExpr; + +/// Row scope for `SqlExpr::eval_scope`: how `Column(..)` and `OldColumn(..)` +/// resolve to `Value`s. The shared evaluator walks the AST once and calls +/// into this scope for every leaf column reference — both `eval()` and +/// `eval_with_old()` delegate here instead of duplicating the walk. +struct RowScope<'a> { + new_doc: &'a Value, + /// Pre-update row, if this is an old/new evaluation (TRANSITION CHECK). + /// `None` means `OldColumn(..)` resolves to `Null`, matching plain `eval`. + old_doc: Option<&'a Value>, +} + +impl<'a> RowScope<'a> { + fn column(&self, name: &str) -> Value { + self.new_doc.get(name).cloned().unwrap_or(Value::Null) + } + + fn old_column(&self, name: &str) -> Value { + match self.old_doc { + Some(old) => old.get(name).cloned().unwrap_or(Value::Null), + None => Value::Null, + } + } +} + +impl SqlExpr { + /// Evaluate this expression against a document. + /// + /// Column references look up fields in the document. Missing fields + /// return `Null`. Arithmetic on non-numeric values returns `Null`. + /// `OldColumn(..)` resolves to `Null` (use `eval_with_old` for the + /// TRANSITION CHECK path). + pub fn eval(&self, doc: &Value) -> Value { + self.eval_scope(&RowScope { + new_doc: doc, + old_doc: None, + }) + } + + /// Evaluate with access to both NEW and OLD documents, used by + /// TRANSITION CHECK predicates. `Column(name)` resolves against + /// `new_doc`; `OldColumn(name)` resolves against `old_doc`. + pub fn eval_with_old(&self, new_doc: &Value, old_doc: &Value) -> Value { + self.eval_scope(&RowScope { + new_doc, + old_doc: Some(old_doc), + }) + } + + /// Shared walker: one match, one recursion scheme, parameterised by the + /// row-scope so `eval` and `eval_with_old` can't drift out of sync. + fn eval_scope(&self, scope: &RowScope<'_>) -> Value { + match self { + SqlExpr::Column(name) => scope.column(name), + SqlExpr::OldColumn(name) => scope.old_column(name), + + SqlExpr::Literal(v) => v.clone(), + + SqlExpr::BinaryOp { left, op, right } => { + let l = left.eval_scope(scope); + let r = right.eval_scope(scope); + eval_binary_op(&l, *op, &r) + } + + SqlExpr::Negate(inner) => { + let v = inner.eval_scope(scope); + if let Some(b) = v.as_bool() { + Value::Bool(!b) + } else { + match value_to_f64(&v, false) { + Some(n) => to_value_number(-n), + None => Value::Null, + } + } + } + + SqlExpr::Function { name, args } => { + let evaluated: Vec = args.iter().map(|a| a.eval_scope(scope)).collect(); + crate::functions::eval_function(name, &evaluated) + } + + SqlExpr::Cast { expr, to_type } => { + let v = expr.eval_scope(scope); + crate::cast::eval_cast(&v, to_type) + } + + SqlExpr::Case { + operand, + when_thens, + else_expr, + } => { + let op_val = operand.as_ref().map(|e| e.eval_scope(scope)); + for (when_expr, then_expr) in when_thens { + let when_val = when_expr.eval_scope(scope); + let matches = match &op_val { + Some(ov) => coerced_eq(ov, &when_val), + None => is_truthy(&when_val), + }; + if matches { + return then_expr.eval_scope(scope); + } + } + match else_expr { + Some(e) => e.eval_scope(scope), + None => Value::Null, + } + } + + SqlExpr::Coalesce(exprs) => { + for expr in exprs { + let v = expr.eval_scope(scope); + if !v.is_null() { + return v; + } + } + Value::Null + } + + SqlExpr::NullIf(a, b) => { + let va = a.eval_scope(scope); + let vb = b.eval_scope(scope); + if coerced_eq(&va, &vb) { + Value::Null + } else { + va + } + } + + SqlExpr::IsNull { expr, negated } => { + let v = expr.eval_scope(scope); + let is_null = v.is_null(); + Value::Bool(if *negated { !is_null } else { is_null }) + } + } + } +} + +#[cfg(test)] +mod tests { + use super::super::types::BinaryOp; + use super::*; + + fn doc() -> Value { + Value::Object( + [ + ("name".to_string(), Value::String("Alice".into())), + ("age".to_string(), Value::Integer(30)), + ("price".to_string(), Value::Float(10.5)), + ("qty".to_string(), Value::Integer(4)), + ("active".to_string(), Value::Bool(true)), + ("email".to_string(), Value::Null), + ] + .into_iter() + .collect(), + ) + } + + #[test] + fn column_ref() { + let expr = SqlExpr::Column("name".into()); + assert_eq!(expr.eval(&doc()), Value::String("Alice".into())); + } + + #[test] + fn missing_column() { + let expr = SqlExpr::Column("missing".into()); + assert_eq!(expr.eval(&doc()), Value::Null); + } + + #[test] + fn literal() { + let expr = SqlExpr::Literal(Value::Integer(42)); + assert_eq!(expr.eval(&doc()), Value::Integer(42)); + } + + #[test] + fn add() { + let expr = SqlExpr::BinaryOp { + left: Box::new(SqlExpr::Column("price".into())), + op: BinaryOp::Add, + right: Box::new(SqlExpr::Literal(Value::Float(1.5))), + }; + assert_eq!(expr.eval(&doc()), Value::Integer(12)); + } + + #[test] + fn multiply() { + let expr = SqlExpr::BinaryOp { + left: Box::new(SqlExpr::Column("price".into())), + op: BinaryOp::Mul, + right: Box::new(SqlExpr::Column("qty".into())), + }; + assert_eq!(expr.eval(&doc()), Value::Integer(42)); + } + + #[test] + fn case_when() { + let expr = SqlExpr::Case { + operand: None, + when_thens: vec![( + SqlExpr::BinaryOp { + left: Box::new(SqlExpr::Column("age".into())), + op: BinaryOp::GtEq, + right: Box::new(SqlExpr::Literal(Value::Integer(18))), + }, + SqlExpr::Literal(Value::String("adult".into())), + )], + else_expr: Some(Box::new(SqlExpr::Literal(Value::String("minor".into())))), + }; + assert_eq!(expr.eval(&doc()), Value::String("adult".into())); + } + + #[test] + fn coalesce() { + let expr = SqlExpr::Coalesce(vec![ + SqlExpr::Column("email".into()), + SqlExpr::Literal(Value::String("default@example.com".into())), + ]); + assert_eq!( + expr.eval(&doc()), + Value::String("default@example.com".into()) + ); + } + + #[test] + fn is_null() { + let expr = SqlExpr::IsNull { + expr: Box::new(SqlExpr::Column("email".into())), + negated: false, + }; + assert_eq!(expr.eval(&doc()), Value::Bool(true)); + } +} diff --git a/nodedb-query/src/expr/mod.rs b/nodedb-query/src/expr/mod.rs new file mode 100644 index 00000000..2933d78d --- /dev/null +++ b/nodedb-query/src/expr/mod.rs @@ -0,0 +1,13 @@ +//! SqlExpr AST, on-wire codec, and row-scope evaluator. +//! +//! This module is the canonical expression type shared between the planner, +//! the Data Plane executor, the UPDATE assignment path, and the WHERE scan +//! filter. It is also the payload carried through msgpack-encoded physical +//! plans, so the zerompk codec must stay in lockstep with the AST variants. + +pub mod binary; +pub mod codec; +pub mod eval; +pub mod types; + +pub use types::{BinaryOp, CastType, ComputedColumn, SqlExpr}; diff --git a/nodedb-query/src/expr/types.rs b/nodedb-query/src/expr/types.rs new file mode 100644 index 00000000..92d8d332 --- /dev/null +++ b/nodedb-query/src/expr/types.rs @@ -0,0 +1,102 @@ +//! SqlExpr AST node definitions. + +use nodedb_types::Value; + +/// A serializable SQL expression that can be evaluated against a document. +#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] +pub enum SqlExpr { + /// Column reference: extract field value from the document. + Column(String), + /// Literal value. + Literal(Value), + /// Binary operation: left op right. + BinaryOp { + left: Box, + op: BinaryOp, + right: Box, + }, + /// Unary negation: -expr or NOT expr. + Negate(Box), + /// Scalar function call. + Function { name: String, args: Vec }, + /// CAST(expr AS type). + Cast { + expr: Box, + to_type: CastType, + }, + /// CASE WHEN cond1 THEN val1 ... ELSE default END. + Case { + operand: Option>, + when_thens: Vec<(SqlExpr, SqlExpr)>, + else_expr: Option>, + }, + /// COALESCE(expr1, expr2, ...): first non-null value. + Coalesce(Vec), + /// NULLIF(expr1, expr2): returns NULL if expr1 = expr2, else expr1. + NullIf(Box, Box), + /// IS NULL / IS NOT NULL. + IsNull { expr: Box, negated: bool }, + /// OLD column reference: extract field value from the pre-update document. + /// Used in TRANSITION CHECK predicates. Resolves against the OLD row + /// when evaluated via `eval_with_old()`. Returns NULL in normal `eval()`. + OldColumn(String), +} + +/// Binary operators. +#[derive( + Debug, + Clone, + Copy, + serde::Serialize, + serde::Deserialize, + zerompk::ToMessagePack, + zerompk::FromMessagePack, +)] +#[msgpack(c_enum)] +pub enum BinaryOp { + Add, + Sub, + Mul, + Div, + Mod, + Eq, + NotEq, + Gt, + GtEq, + Lt, + LtEq, + And, + Or, + Concat, +} + +/// Target types for CAST. +#[derive( + Debug, + Clone, + serde::Serialize, + serde::Deserialize, + zerompk::ToMessagePack, + zerompk::FromMessagePack, +)] +#[msgpack(c_enum)] +pub enum CastType { + Int, + Float, + String, + Bool, +} + +/// A computed projection column: alias + expression. +#[derive( + Debug, + Clone, + serde::Serialize, + serde::Deserialize, + zerompk::ToMessagePack, + zerompk::FromMessagePack, +)] +pub struct ComputedColumn { + pub alias: String, + pub expr: SqlExpr, +} diff --git a/nodedb-query/src/msgpack_scan/filter.rs b/nodedb-query/src/msgpack_scan/filter.rs index 5e00308f..540b8679 100644 --- a/nodedb-query/src/msgpack_scan/filter.rs +++ b/nodedb-query/src/msgpack_scan/filter.rs @@ -28,6 +28,12 @@ impl ScanFilter { .iter() .any(|clause| clause.iter().all(|f| f.matches_binary(doc))); } + FilterOp::Expr => { + return match (self.expr.as_ref(), nodedb_types::value_from_msgpack(doc)) { + (Some(expr), Ok(value)) => crate::value_ops::is_truthy(&expr.eval(&value)), + _ => false, + }; + } _ => {} } @@ -58,6 +64,12 @@ impl ScanFilter { .iter() .any(|clause| clause.iter().all(|f| f.matches_binary_indexed(doc, idx))); } + FilterOp::Expr => { + return match (self.expr.as_ref(), nodedb_types::value_from_msgpack(doc)) { + (Some(expr), Ok(value)) => crate::value_ops::is_truthy(&expr.eval(&value)), + _ => false, + }; + } _ => {} } @@ -263,6 +275,7 @@ mod tests { op: op.into(), value, clauses: vec![], + expr: None, } } @@ -273,6 +286,18 @@ mod tests { assert!(!filter("age", "eq", nodedb_types::Value::Integer(30)).matches_binary(&doc)); } + #[test] + fn eq_coerces_string_to_integer() { + let doc = encode(&json!({"age": 25})); + assert!(filter("age", "eq", nodedb_types::Value::String("25".into())).matches_binary(&doc)); + } + + #[test] + fn gt_coerces_string_to_integer() { + let doc = encode(&json!({"score": "90"})); + assert!(filter("score", "gt", nodedb_types::Value::Integer(80)).matches_binary(&doc)); + } + #[test] fn eq_string() { let doc = encode(&json!({"name": "alice"})); @@ -381,7 +406,8 @@ mod tests { field: "status".into(), op: "in".into(), value: vals.clone(), - clauses: vec![] + clauses: vec![], + expr: None } .matches_binary(&doc) ); @@ -392,7 +418,8 @@ mod tests { field: "status".into(), op: "not_in".into(), value: vals, - clauses: vec![] + clauses: vec![], + expr: None } .matches_binary(&doc2) ); @@ -431,7 +458,8 @@ mod tests { field: "tags".into(), op: "array_contains_all".into(), value: needles, - clauses: vec![] + clauses: vec![], + expr: None } .matches_binary(&doc) ); @@ -449,7 +477,8 @@ mod tests { field: "tags".into(), op: "array_overlap".into(), value: needles, - clauses: vec![] + clauses: vec![], + expr: None } .matches_binary(&doc) ); @@ -466,6 +495,7 @@ mod tests { vec![filter("x", "eq", nodedb_types::Value::Integer(10))], vec![filter("x", "eq", nodedb_types::Value::Integer(5))], ], + expr: None, }; assert!(f.matches_binary(&doc)); } diff --git a/nodedb-query/src/scan_filter/like.rs b/nodedb-query/src/scan_filter/like.rs index bc8fe3ba..0ccf11b3 100644 --- a/nodedb-query/src/scan_filter/like.rs +++ b/nodedb-query/src/scan_filter/like.rs @@ -38,3 +38,21 @@ pub fn sql_like_match(input: &str, pattern: &str, case_insensitive: bool) -> boo j == pattern.len() } + +#[cfg(test)] +mod tests { + use super::sql_like_match; + + #[test] + fn like_basic() { + assert!(sql_like_match("hello world", "%world", false)); + assert!(sql_like_match("hello world", "hello%", false)); + assert!(!sql_like_match("hello world", "xyz%", false)); + } + + #[test] + fn ilike_case_insensitive() { + assert!(sql_like_match("Hello", "hello", true)); + assert!(sql_like_match("WORLD", "%world%", true)); + } +} diff --git a/nodedb-query/src/scan_filter/mod.rs b/nodedb-query/src/scan_filter/mod.rs index a8dbfd74..ddf16bb4 100644 --- a/nodedb-query/src/scan_filter/mod.rs +++ b/nodedb-query/src/scan_filter/mod.rs @@ -1,386 +1,15 @@ //! Post-scan filter evaluation. //! -//! `ScanFilter` represents a single filter predicate. `compare_json_values` -//! provides total ordering for JSON values used in sort and range comparisons. +//! `ScanFilter` represents a single filter predicate. //! //! Shared between Origin (Control Plane + Data Plane) and Lite. pub mod like; +pub mod op; pub mod parse; +pub mod types; pub use like::sql_like_match; +pub use op::FilterOp; pub use parse::parse_simple_predicates; - -/// Filter operator enum for O(1) dispatch instead of string comparison. -#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)] -pub enum FilterOp { - Eq, - Ne, - Gt, - Gte, - Lt, - Lte, - Contains, - Like, - NotLike, - Ilike, - NotIlike, - In, - NotIn, - IsNull, - IsNotNull, - ArrayContains, - ArrayContainsAll, - ArrayOverlap, - #[default] - MatchAll, - Exists, - NotExists, - Or, - /// Column-vs-column comparison: `field` op `value` where `value` is a - /// `Value::String` containing the name of the other column. The comparison - /// reads both fields from the same document row. - GtColumn, - GteColumn, - LtColumn, - LteColumn, - EqColumn, - NeColumn, -} - -impl FilterOp { - pub fn parse_op(s: &str) -> Self { - match s { - "eq" => Self::Eq, - "ne" | "neq" => Self::Ne, - "gt" => Self::Gt, - "gte" | "ge" => Self::Gte, - "lt" => Self::Lt, - "lte" | "le" => Self::Lte, - "contains" => Self::Contains, - "like" => Self::Like, - "not_like" => Self::NotLike, - "ilike" => Self::Ilike, - "not_ilike" => Self::NotIlike, - "in" => Self::In, - "not_in" => Self::NotIn, - "is_null" => Self::IsNull, - "is_not_null" => Self::IsNotNull, - "array_contains" => Self::ArrayContains, - "array_contains_all" => Self::ArrayContainsAll, - "array_overlap" => Self::ArrayOverlap, - "match_all" => Self::MatchAll, - "exists" => Self::Exists, - "not_exists" => Self::NotExists, - "or" => Self::Or, - "gt_col" => Self::GtColumn, - "gte_col" => Self::GteColumn, - "lt_col" => Self::LtColumn, - "lte_col" => Self::LteColumn, - "eq_col" => Self::EqColumn, - "ne_col" => Self::NeColumn, - _ => Self::MatchAll, - } - } - - pub fn as_str(&self) -> &'static str { - match self { - Self::Eq => "eq", - Self::Ne => "ne", - Self::Gt => "gt", - Self::Gte => "gte", - Self::Lt => "lt", - Self::Lte => "lte", - Self::Contains => "contains", - Self::Like => "like", - Self::NotLike => "not_like", - Self::Ilike => "ilike", - Self::NotIlike => "not_ilike", - Self::In => "in", - Self::NotIn => "not_in", - Self::IsNull => "is_null", - Self::IsNotNull => "is_not_null", - Self::ArrayContains => "array_contains", - Self::ArrayContainsAll => "array_contains_all", - Self::ArrayOverlap => "array_overlap", - Self::MatchAll => "match_all", - Self::Exists => "exists", - Self::NotExists => "not_exists", - Self::Or => "or", - Self::GtColumn => "gt_col", - Self::GteColumn => "gte_col", - Self::LtColumn => "lt_col", - Self::LteColumn => "lte_col", - Self::EqColumn => "eq_col", - Self::NeColumn => "ne_col", - } - } -} - -impl From<&str> for FilterOp { - fn from(s: &str) -> Self { - Self::parse_op(s) - } -} - -impl From for FilterOp { - fn from(s: String) -> Self { - Self::parse_op(&s) - } -} - -impl serde::Serialize for FilterOp { - fn serialize(&self, serializer: S) -> Result { - serializer.serialize_str(self.as_str()) - } -} - -impl<'de> serde::Deserialize<'de> for FilterOp { - fn deserialize>(deserializer: D) -> Result { - let s = String::deserialize(deserializer)?; - Ok(FilterOp::parse_op(&s)) - } -} - -/// A single filter predicate for document scan evaluation. -/// -/// Supports simple comparison operators (eq, ne, gt, gte, lt, lte, contains, -/// is_null, is_not_null) and disjunctive groups via the `"or"` operator. -/// -/// OR representation: `{"op": "or", "clauses": [[filter1, filter2], [filter3]]}` -/// means `(filter1 AND filter2) OR filter3`. Each clause is an AND-group; -/// the document matches if ANY clause group fully matches. -#[derive(Clone, serde::Serialize, serde::Deserialize, Default)] -pub struct ScanFilter { - #[serde(default)] - pub field: String, - pub op: FilterOp, - #[serde(default)] - pub value: nodedb_types::Value, - /// Disjunctive clause groups for OR predicates. - /// Each inner Vec is an AND-group. The document matches if ANY group matches. - #[serde(default)] - pub clauses: Vec>, -} - -impl zerompk::ToMessagePack for ScanFilter { - fn write(&self, writer: &mut W) -> zerompk::Result<()> { - writer.write_array_len(4)?; - self.field.write(writer)?; - writer.write_string(self.op.as_str())?; - // Convert nodedb_types::Value → serde_json::Value for wire compat. - let json_val: serde_json::Value = self.value.clone().into(); - nodedb_types::JsonValue(json_val).write(writer)?; - self.clauses.write(writer) - } -} - -impl<'a> zerompk::FromMessagePack<'a> for ScanFilter { - fn read>(reader: &mut R) -> zerompk::Result { - reader.check_array_len(4)?; - let field = String::read(reader)?; - let op_str = String::read(reader)?; - let jv = nodedb_types::JsonValue::read(reader)?; - let clauses = Vec::>::read(reader)?; - Ok(Self { - field, - op: FilterOp::parse_op(&op_str), - // Convert serde_json::Value → nodedb_types::Value at wire boundary. - value: nodedb_types::Value::from(jv.0), - clauses, - }) - } -} - -impl ScanFilter { - /// Evaluate this filter against a `nodedb_types::Value` document. - /// - /// Same semantics as `matches()` but operates on the native Value type - /// instead of serde_json::Value, avoiding lossy JSON roundtrips. - pub fn matches_value(&self, doc: &nodedb_types::Value) -> bool { - match self.op { - FilterOp::MatchAll | FilterOp::Exists | FilterOp::NotExists => return true, - FilterOp::Or => { - return self - .clauses - .iter() - .any(|clause| clause.iter().all(|f| f.matches_value(doc))); - } - _ => {} - } - - let field_val = match doc.get(&self.field) { - Some(v) => v, - None => return self.op == FilterOp::IsNull, - }; - - match self.op { - FilterOp::Eq => self.value.eq_coerced(field_val), - FilterOp::Ne => !self.value.eq_coerced(field_val), - FilterOp::Gt => self.value.cmp_coerced(field_val) == std::cmp::Ordering::Less, - FilterOp::Gte => { - let cmp = self.value.cmp_coerced(field_val); - cmp == std::cmp::Ordering::Less || cmp == std::cmp::Ordering::Equal - } - FilterOp::Lt => self.value.cmp_coerced(field_val) == std::cmp::Ordering::Greater, - FilterOp::Lte => { - let cmp = self.value.cmp_coerced(field_val); - cmp == std::cmp::Ordering::Greater || cmp == std::cmp::Ordering::Equal - } - FilterOp::Contains => { - if let (Some(s), Some(pattern)) = (field_val.as_str(), self.value.as_str()) { - s.contains(pattern) - } else { - false - } - } - FilterOp::Like => { - if let (Some(s), Some(pattern)) = (field_val.as_str(), self.value.as_str()) { - like::sql_like_match(s, pattern, false) - } else { - false - } - } - FilterOp::NotLike => { - if let (Some(s), Some(pattern)) = (field_val.as_str(), self.value.as_str()) { - !like::sql_like_match(s, pattern, false) - } else { - false - } - } - FilterOp::Ilike => { - if let (Some(s), Some(pattern)) = (field_val.as_str(), self.value.as_str()) { - like::sql_like_match(s, pattern, true) - } else { - false - } - } - FilterOp::NotIlike => { - if let (Some(s), Some(pattern)) = (field_val.as_str(), self.value.as_str()) { - !like::sql_like_match(s, pattern, true) - } else { - false - } - } - FilterOp::In => { - if let Some(mut iter) = self.value.as_array_iter() { - iter.any(|v| v.eq_coerced(field_val)) - } else { - false - } - } - FilterOp::NotIn => { - if let Some(mut iter) = self.value.as_array_iter() { - !iter.any(|v| v.eq_coerced(field_val)) - } else { - true - } - } - FilterOp::IsNull => field_val.is_null(), - FilterOp::IsNotNull => !field_val.is_null(), - FilterOp::ArrayContains => { - if let Some(arr) = field_val.as_array() { - arr.iter().any(|v| self.value.eq_coerced(v)) - } else { - false - } - } - FilterOp::ArrayContainsAll => { - if let (Some(field_arr), Some(mut needles)) = - (field_val.as_array(), self.value.as_array_iter()) - { - needles.all(|needle| field_arr.iter().any(|v| needle.eq_coerced(v))) - } else { - false - } - } - FilterOp::ArrayOverlap => { - if let (Some(field_arr), Some(mut needles)) = - (field_val.as_array(), self.value.as_array_iter()) - { - needles.any(|needle| field_arr.iter().any(|v| needle.eq_coerced(v))) - } else { - false - } - } - FilterOp::GtColumn - | FilterOp::GteColumn - | FilterOp::LtColumn - | FilterOp::LteColumn - | FilterOp::EqColumn - | FilterOp::NeColumn => { - let other_col = match &self.value { - nodedb_types::Value::String(s) => s.as_str(), - _ => return false, - }; - let other_val = match doc.get(other_col) { - Some(v) => v, - None => return false, - }; - match self.op { - FilterOp::GtColumn => { - field_val.cmp_coerced(other_val) == std::cmp::Ordering::Greater - } - FilterOp::GteColumn => { - field_val.cmp_coerced(other_val) != std::cmp::Ordering::Less - } - FilterOp::LtColumn => { - field_val.cmp_coerced(other_val) == std::cmp::Ordering::Less - } - FilterOp::LteColumn => { - field_val.cmp_coerced(other_val) != std::cmp::Ordering::Greater - } - FilterOp::EqColumn => field_val.eq_coerced(other_val), - FilterOp::NeColumn => !field_val.eq_coerced(other_val), - _ => false, - } - } - _ => false, - } - } -} - -#[cfg(test)] -mod tests { - use super::*; - use serde_json::json; - - #[test] - fn filter_eq_coercion() { - let doc = json!({"age": 25}); - let msgpack = nodedb_types::json_msgpack::json_to_msgpack(&doc).unwrap(); - let filter = ScanFilter { - field: "age".into(), - op: "eq".into(), - value: nodedb_types::Value::String("25".into()), - clauses: vec![], - }; - assert!(filter.matches_binary(&msgpack)); - } - - #[test] - fn filter_gt_coercion() { - let doc = json!({"score": "90"}); - let msgpack = nodedb_types::json_msgpack::json_to_msgpack(&doc).unwrap(); - let filter = ScanFilter { - field: "score".into(), - op: "gt".into(), - value: nodedb_types::Value::Integer(80), - clauses: vec![], - }; - assert!(filter.matches_binary(&msgpack)); - } - - #[test] - fn like_basic() { - assert!(sql_like_match("hello world", "%world", false)); - assert!(sql_like_match("hello world", "hello%", false)); - assert!(!sql_like_match("hello world", "xyz%", false)); - } - - #[test] - fn ilike_case_insensitive() { - assert!(sql_like_match("Hello", "hello", true)); - assert!(sql_like_match("WORLD", "%world%", true)); - } -} +pub use types::ScanFilter; diff --git a/nodedb-query/src/scan_filter/op.rs b/nodedb-query/src/scan_filter/op.rs new file mode 100644 index 00000000..27761ccd --- /dev/null +++ b/nodedb-query/src/scan_filter/op.rs @@ -0,0 +1,144 @@ +//! `FilterOp` enum and its string/serde conversions. +//! +//! `FilterOp` is an O(1)-dispatch discriminant used by the scan filter +//! evaluator. On-wire it travels as a lowercase string tag so physical +//! plans remain debuggable by hand. + +/// Filter operator enum for O(1) dispatch instead of string comparison. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)] +pub enum FilterOp { + Eq, + Ne, + Gt, + Gte, + Lt, + Lte, + Contains, + Like, + NotLike, + Ilike, + NotIlike, + In, + NotIn, + IsNull, + IsNotNull, + ArrayContains, + ArrayContainsAll, + ArrayOverlap, + #[default] + MatchAll, + Exists, + NotExists, + Or, + /// Arbitrary expression predicate: the filter's `expr` field holds a + /// `nodedb_query::expr::SqlExpr`. The scan evaluator runs the expression + /// against the full row and treats truthy results as a match. Used when + /// the planner cannot reduce the WHERE clause to a simple `(field, op, value)` + /// — e.g. `LOWER(col) = 'x'`, `qty + 1 = 5`, `NOT (col = 'x')`. + Expr, + /// Column-vs-column comparison: `field` op `value` where `value` is a + /// `Value::String` containing the name of the other column. The comparison + /// reads both fields from the same document row. + GtColumn, + GteColumn, + LtColumn, + LteColumn, + EqColumn, + NeColumn, +} + +impl FilterOp { + pub fn parse_op(s: &str) -> Self { + match s { + "eq" => Self::Eq, + "ne" | "neq" => Self::Ne, + "gt" => Self::Gt, + "gte" | "ge" => Self::Gte, + "lt" => Self::Lt, + "lte" | "le" => Self::Lte, + "contains" => Self::Contains, + "like" => Self::Like, + "not_like" => Self::NotLike, + "ilike" => Self::Ilike, + "not_ilike" => Self::NotIlike, + "in" => Self::In, + "not_in" => Self::NotIn, + "is_null" => Self::IsNull, + "is_not_null" => Self::IsNotNull, + "array_contains" => Self::ArrayContains, + "array_contains_all" => Self::ArrayContainsAll, + "array_overlap" => Self::ArrayOverlap, + "match_all" => Self::MatchAll, + "exists" => Self::Exists, + "not_exists" => Self::NotExists, + "or" => Self::Or, + "expr" => Self::Expr, + "gt_col" => Self::GtColumn, + "gte_col" => Self::GteColumn, + "lt_col" => Self::LtColumn, + "lte_col" => Self::LteColumn, + "eq_col" => Self::EqColumn, + "ne_col" => Self::NeColumn, + _ => Self::MatchAll, + } + } + + pub fn as_str(&self) -> &'static str { + match self { + Self::Eq => "eq", + Self::Ne => "ne", + Self::Gt => "gt", + Self::Gte => "gte", + Self::Lt => "lt", + Self::Lte => "lte", + Self::Contains => "contains", + Self::Like => "like", + Self::NotLike => "not_like", + Self::Ilike => "ilike", + Self::NotIlike => "not_ilike", + Self::In => "in", + Self::NotIn => "not_in", + Self::IsNull => "is_null", + Self::IsNotNull => "is_not_null", + Self::ArrayContains => "array_contains", + Self::ArrayContainsAll => "array_contains_all", + Self::ArrayOverlap => "array_overlap", + Self::MatchAll => "match_all", + Self::Exists => "exists", + Self::NotExists => "not_exists", + Self::Or => "or", + Self::Expr => "expr", + Self::GtColumn => "gt_col", + Self::GteColumn => "gte_col", + Self::LtColumn => "lt_col", + Self::LteColumn => "lte_col", + Self::EqColumn => "eq_col", + Self::NeColumn => "ne_col", + } + } +} + +impl From<&str> for FilterOp { + fn from(s: &str) -> Self { + Self::parse_op(s) + } +} + +impl From for FilterOp { + fn from(s: String) -> Self { + Self::parse_op(&s) + } +} + +impl serde::Serialize for FilterOp { + fn serialize(&self, serializer: S) -> Result { + serializer.serialize_str(self.as_str()) + } +} + +impl<'de> serde::Deserialize<'de> for FilterOp { + fn deserialize>(deserializer: D) -> Result { + let s = String::deserialize(deserializer)?; + Ok(FilterOp::parse_op(&s)) + } +} diff --git a/nodedb-query/src/scan_filter/parse.rs b/nodedb-query/src/scan_filter/parse.rs index 8379d14a..d4d5234c 100644 --- a/nodedb-query/src/scan_filter/parse.rs +++ b/nodedb-query/src/scan_filter/parse.rs @@ -42,6 +42,7 @@ fn parse_single_predicate(clause: &str) -> Option { op: super::FilterOp::parse_op(op), value: nodedb_types::Value::from(parse_predicate_value(raw_value)), clauses: Vec::new(), + expr: None, }); } } @@ -55,6 +56,7 @@ fn parse_single_predicate(clause: &str) -> Option { op: super::FilterOp::Like, value: nodedb_types::Value::from(parse_predicate_value(raw_value)), clauses: Vec::new(), + expr: None, }); } if let Some(pos) = upper.find(" ILIKE ") { @@ -65,6 +67,7 @@ fn parse_single_predicate(clause: &str) -> Option { op: super::FilterOp::Ilike, value: nodedb_types::Value::from(parse_predicate_value(raw_value)), clauses: Vec::new(), + expr: None, }); } diff --git a/nodedb-query/src/scan_filter/types.rs b/nodedb-query/src/scan_filter/types.rs new file mode 100644 index 00000000..5b0a5ccf --- /dev/null +++ b/nodedb-query/src/scan_filter/types.rs @@ -0,0 +1,221 @@ +//! `ScanFilter` record, its wire codec, and per-row evaluation against a +//! `nodedb_types::Value` document. + +use crate::expr::SqlExpr; + +use super::like; +use super::op::FilterOp; + +/// A single filter predicate for document scan evaluation. +/// +/// Supports simple comparison operators (eq, ne, gt, gte, lt, lte, contains, +/// is_null, is_not_null), disjunctive groups via the `"or"` operator, and +/// full SqlExpr predicates via `FilterOp::Expr` for anything the planner +/// cannot reduce to a simple `(field, op, value)` — scalar functions in +/// WHERE, non-literal IN lists, column arithmetic, `NOT(...)`, etc. +/// +/// OR representation: `{"op": "or", "clauses": [[filter1, filter2], [filter3]]}` +/// means `(filter1 AND filter2) OR filter3`. Each clause is an AND-group; +/// the document matches if ANY clause group fully matches. +#[derive(Clone, serde::Serialize, serde::Deserialize, Default)] +pub struct ScanFilter { + #[serde(default)] + pub field: String, + pub op: FilterOp, + #[serde(default)] + pub value: nodedb_types::Value, + /// Disjunctive clause groups for OR predicates. + /// Each inner Vec is an AND-group. The document matches if ANY group matches. + #[serde(default)] + pub clauses: Vec>, + /// Expression predicate payload. Only meaningful when `op == FilterOp::Expr`; + /// must be `None` for every other operator. + #[serde(default)] + pub expr: Option, +} + +impl zerompk::ToMessagePack for ScanFilter { + fn write(&self, writer: &mut W) -> zerompk::Result<()> { + writer.write_array_len(5)?; + self.field.write(writer)?; + writer.write_string(self.op.as_str())?; + // Convert nodedb_types::Value → serde_json::Value for wire compat. + let json_val: serde_json::Value = self.value.clone().into(); + nodedb_types::JsonValue(json_val).write(writer)?; + self.clauses.write(writer)?; + self.expr.write(writer) + } +} + +impl<'a> zerompk::FromMessagePack<'a> for ScanFilter { + fn read>(reader: &mut R) -> zerompk::Result { + reader.check_array_len(5)?; + let field = String::read(reader)?; + let op_str = String::read(reader)?; + let jv = nodedb_types::JsonValue::read(reader)?; + let clauses = Vec::>::read(reader)?; + let expr = Option::::read(reader)?; + Ok(Self { + field, + op: FilterOp::parse_op(&op_str), + // Convert serde_json::Value → nodedb_types::Value at wire boundary. + value: nodedb_types::Value::from(jv.0), + clauses, + expr, + }) + } +} + +impl ScanFilter { + /// Evaluate this filter against a `nodedb_types::Value` document. + /// + /// Same semantics as `matches()` but operates on the native Value type + /// instead of serde_json::Value, avoiding lossy JSON roundtrips. + pub fn matches_value(&self, doc: &nodedb_types::Value) -> bool { + match self.op { + FilterOp::MatchAll | FilterOp::Exists | FilterOp::NotExists => return true, + FilterOp::Or => { + return self + .clauses + .iter() + .any(|clause| clause.iter().all(|f| f.matches_value(doc))); + } + FilterOp::Expr => { + return match &self.expr { + Some(expr) => crate::value_ops::is_truthy(&expr.eval(doc)), + None => false, + }; + } + _ => {} + } + + let field_val = match doc.get(&self.field) { + Some(v) => v, + None => return self.op == FilterOp::IsNull, + }; + + match self.op { + FilterOp::Eq => self.value.eq_coerced(field_val), + FilterOp::Ne => !self.value.eq_coerced(field_val), + FilterOp::Gt => self.value.cmp_coerced(field_val) == std::cmp::Ordering::Less, + FilterOp::Gte => { + let cmp = self.value.cmp_coerced(field_val); + cmp == std::cmp::Ordering::Less || cmp == std::cmp::Ordering::Equal + } + FilterOp::Lt => self.value.cmp_coerced(field_val) == std::cmp::Ordering::Greater, + FilterOp::Lte => { + let cmp = self.value.cmp_coerced(field_val); + cmp == std::cmp::Ordering::Greater || cmp == std::cmp::Ordering::Equal + } + FilterOp::Contains => { + if let (Some(s), Some(pattern)) = (field_val.as_str(), self.value.as_str()) { + s.contains(pattern) + } else { + false + } + } + FilterOp::Like => { + if let (Some(s), Some(pattern)) = (field_val.as_str(), self.value.as_str()) { + like::sql_like_match(s, pattern, false) + } else { + false + } + } + FilterOp::NotLike => { + if let (Some(s), Some(pattern)) = (field_val.as_str(), self.value.as_str()) { + !like::sql_like_match(s, pattern, false) + } else { + false + } + } + FilterOp::Ilike => { + if let (Some(s), Some(pattern)) = (field_val.as_str(), self.value.as_str()) { + like::sql_like_match(s, pattern, true) + } else { + false + } + } + FilterOp::NotIlike => { + if let (Some(s), Some(pattern)) = (field_val.as_str(), self.value.as_str()) { + !like::sql_like_match(s, pattern, true) + } else { + false + } + } + FilterOp::In => { + if let Some(mut iter) = self.value.as_array_iter() { + iter.any(|v| v.eq_coerced(field_val)) + } else { + false + } + } + FilterOp::NotIn => { + if let Some(mut iter) = self.value.as_array_iter() { + !iter.any(|v| v.eq_coerced(field_val)) + } else { + true + } + } + FilterOp::IsNull => field_val.is_null(), + FilterOp::IsNotNull => !field_val.is_null(), + FilterOp::ArrayContains => { + if let Some(arr) = field_val.as_array() { + arr.iter().any(|v| self.value.eq_coerced(v)) + } else { + false + } + } + FilterOp::ArrayContainsAll => { + if let (Some(field_arr), Some(mut needles)) = + (field_val.as_array(), self.value.as_array_iter()) + { + needles.all(|needle| field_arr.iter().any(|v| needle.eq_coerced(v))) + } else { + false + } + } + FilterOp::ArrayOverlap => { + if let (Some(field_arr), Some(mut needles)) = + (field_val.as_array(), self.value.as_array_iter()) + { + needles.any(|needle| field_arr.iter().any(|v| needle.eq_coerced(v))) + } else { + false + } + } + FilterOp::GtColumn + | FilterOp::GteColumn + | FilterOp::LtColumn + | FilterOp::LteColumn + | FilterOp::EqColumn + | FilterOp::NeColumn => { + let other_col = match &self.value { + nodedb_types::Value::String(s) => s.as_str(), + _ => return false, + }; + let other_val = match doc.get(other_col) { + Some(v) => v, + None => return false, + }; + match self.op { + FilterOp::GtColumn => { + field_val.cmp_coerced(other_val) == std::cmp::Ordering::Greater + } + FilterOp::GteColumn => { + field_val.cmp_coerced(other_val) != std::cmp::Ordering::Less + } + FilterOp::LtColumn => { + field_val.cmp_coerced(other_val) == std::cmp::Ordering::Less + } + FilterOp::LteColumn => { + field_val.cmp_coerced(other_val) != std::cmp::Ordering::Greater + } + FilterOp::EqColumn => field_val.eq_coerced(other_val), + FilterOp::NeColumn => !field_val.eq_coerced(other_val), + _ => false, + } + } + _ => false, + } + } +} diff --git a/nodedb-sql/src/engine_rules/document_schemaless.rs b/nodedb-sql/src/engine_rules/document_schemaless.rs index 1f2ce02c..ad610a61 100644 --- a/nodedb-sql/src/engine_rules/document_schemaless.rs +++ b/nodedb-sql/src/engine_rules/document_schemaless.rs @@ -22,6 +22,7 @@ impl EngineRules for SchemalessRules { engine: EngineType::DocumentSchemaless, rows: p.rows, column_defaults: p.column_defaults, + on_conflict_updates: p.on_conflict_updates, }]) } diff --git a/nodedb-sql/src/engine_rules/document_strict.rs b/nodedb-sql/src/engine_rules/document_strict.rs index 2ea6d89a..e09291ae 100644 --- a/nodedb-sql/src/engine_rules/document_strict.rs +++ b/nodedb-sql/src/engine_rules/document_strict.rs @@ -22,6 +22,7 @@ impl EngineRules for StrictRules { engine: EngineType::DocumentStrict, rows: p.rows, column_defaults: p.column_defaults, + on_conflict_updates: p.on_conflict_updates, }]) } diff --git a/nodedb-sql/src/engine_rules/mod.rs b/nodedb-sql/src/engine_rules/mod.rs index 2c779a24..7fcf01c4 100644 --- a/nodedb-sql/src/engine_rules/mod.rs +++ b/nodedb-sql/src/engine_rules/mod.rs @@ -59,6 +59,10 @@ pub struct UpsertParams { pub columns: Vec, pub rows: Vec>, pub column_defaults: Vec<(String, String)>, + /// `ON CONFLICT (...) DO UPDATE SET` assignments. Empty for plain + /// `UPSERT INTO ...`; populated when the caller is + /// `INSERT ... ON CONFLICT ... DO UPDATE SET`. + pub on_conflict_updates: Vec<(String, SqlExpr)>, } /// Parameters for planning an AGGREGATE operation. diff --git a/nodedb-sql/src/planner/dml.rs b/nodedb-sql/src/planner/dml.rs index 0d5f83fe..32855bbe 100644 --- a/nodedb-sql/src/planner/dml.rs +++ b/nodedb-sql/src/planner/dml.rs @@ -8,8 +8,45 @@ use crate::parser::normalize::{normalize_ident, normalize_object_name}; use crate::resolver::expr::{convert_expr, convert_value}; use crate::types::*; +/// Extract `ON CONFLICT (...) DO UPDATE SET` assignments from an AST +/// insert, or `None` if this is a plain INSERT. +fn extract_on_conflict_updates(ins: &ast::Insert) -> Result>> { + let Some(on) = ins.on.as_ref() else { + return Ok(None); + }; + let ast::OnInsert::OnConflict(oc) = on else { + return Ok(None); + }; + let ast::OnConflictAction::DoUpdate(do_update) = &oc.action else { + // DO NOTHING maps to "ignore conflict" — currently unsupported. + return Err(SqlError::Unsupported { + detail: "ON CONFLICT DO NOTHING is not yet supported".into(), + }); + }; + let mut pairs = Vec::with_capacity(do_update.assignments.len()); + for a in &do_update.assignments { + let name = match &a.target { + ast::AssignmentTarget::ColumnName(obj) => normalize_object_name(obj), + _ => { + return Err(SqlError::Unsupported { + detail: "ON CONFLICT DO UPDATE SET target must be a column name".into(), + }); + } + }; + let expr = convert_expr(&a.value)?; + pairs.push((name, expr)); + } + Ok(Some(pairs)) +} + /// Plan an INSERT statement. pub fn plan_insert(ins: &ast::Insert, catalog: &dyn SqlCatalog) -> Result> { + // `INSERT ... ON CONFLICT DO UPDATE SET` reroutes to the upsert path + // with the assignments carried through. Detected before any other + // work so both planning paths share the `ast::Insert` decode below. + if let Some(on_conflict_updates) = extract_on_conflict_updates(ins)? { + return plan_upsert_with_on_conflict(ins, catalog, on_conflict_updates); + } let table_name = match &ins.table { ast::TableObject::TableName(name) => normalize_object_name(name), ast::TableObject::TableFunction(_) => { @@ -190,6 +227,60 @@ pub fn plan_upsert(ins: &ast::Insert, catalog: &dyn SqlCatalog) -> Result, +) -> Result> { + let table_name = match &ins.table { + ast::TableObject::TableName(name) => normalize_object_name(name), + ast::TableObject::TableFunction(_) => { + return Err(SqlError::Unsupported { + detail: "INSERT ... ON CONFLICT on a table function is not supported".into(), + }); + } + }; + let info = catalog + .get_collection(&table_name)? + .ok_or_else(|| SqlError::UnknownTable { + name: table_name.clone(), + })?; + + let columns: Vec = ins.columns.iter().map(normalize_ident).collect(); + + let source = ins.source.as_ref().ok_or_else(|| SqlError::Parse { + detail: "INSERT ... ON CONFLICT requires VALUES".into(), + })?; + let rows_ast = match &*source.body { + ast::SetExpr::Values(values) => &values.rows, + _ => { + return Err(SqlError::Unsupported { + detail: "INSERT ... ON CONFLICT source must be VALUES".into(), + }); + } + }; + + let rows = convert_value_rows(&columns, rows_ast)?; + let column_defaults: Vec<(String, String)> = info + .columns + .iter() + .filter_map(|c| c.default.as_ref().map(|d| (c.name.clone(), d.clone()))) + .collect(); + let rules = engine_rules::resolve_engine_rules(info.engine); + rules.plan_upsert(engine_rules::UpsertParams { + collection: table_name, + columns, + rows, + column_defaults, + on_conflict_updates, }) } diff --git a/nodedb-sql/src/types.rs b/nodedb-sql/src/types.rs index 1b104816..9718275f 100644 --- a/nodedb-sql/src/types.rs +++ b/nodedb-sql/src/types.rs @@ -65,6 +65,11 @@ pub enum SqlPlan { engine: EngineType, rows: Vec>, column_defaults: Vec<(String, String)>, + /// `ON CONFLICT (...) DO UPDATE SET field = expr` assignments. + /// When empty, upsert is a plain merge: new columns overwrite existing. + /// When non-empty, the engine applies these per-row against the + /// *existing* document instead of merging the inserted values. + on_conflict_updates: Vec<(String, SqlExpr)>, }, InsertSelect { target: String, diff --git a/nodedb/src/bridge/physical_plan/document.rs b/nodedb/src/bridge/physical_plan/document.rs index c88aad22..56fdcbe8 100644 --- a/nodedb/src/bridge/physical_plan/document.rs +++ b/nodedb/src/bridge/physical_plan/document.rs @@ -2,6 +2,54 @@ use nodedb_types::columnar::StrictSchema; +/// Right-hand side of an UPDATE ... SET field = <...> assignment. +/// +/// The planner turns each assignment into one of these before it crosses +/// the SPSC bridge: +/// +/// - `Literal` — pre-encoded msgpack bytes for constant RHS. This is the +/// fast path: the Data Plane can merge these at the binary level for +/// non-strict collections without decoding the current row. +/// - `Expr` — a `SqlExpr` that must be evaluated against the *current* +/// document at apply time. Used for arithmetic (`col + 1`), functions +/// (`LOWER(col)`, `NOW()`), `CASE`, concatenation, and anything else +/// whose result depends on the row being updated. +#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] +pub enum UpdateValue { + Literal(Vec), + Expr(crate::bridge::expr_eval::SqlExpr), +} + +impl zerompk::ToMessagePack for UpdateValue { + fn write(&self, writer: &mut W) -> zerompk::Result<()> { + writer.write_array_len(2)?; + match self { + UpdateValue::Literal(bytes) => { + writer.write_u8(0)?; + bytes.write(writer) + } + UpdateValue::Expr(expr) => { + writer.write_u8(1)?; + expr.write(writer) + } + } + } +} + +impl<'a> zerompk::FromMessagePack<'a> for UpdateValue { + fn read>(reader: &mut R) -> zerompk::Result { + reader.check_array_len(2)?; + let tag = reader.read_u8()?; + match tag { + 0 => Ok(UpdateValue::Literal(Vec::::read(reader)?)), + 1 => Ok(UpdateValue::Expr(crate::bridge::expr_eval::SqlExpr::read( + reader, + )?)), + _ => Err(zerompk::Error::InvalidMarker(tag)), + } + } +} + /// Storage encoding mode for a document collection. /// /// Determines how documents are serialized before storage in the sparse engine. @@ -139,8 +187,8 @@ pub enum DocumentOp { PointUpdate { collection: String, document_id: String, - /// Field name → new JSON value. - updates: Vec<(String, Vec)>, + /// Field name → assignment RHS (literal bytes or row-scope expression). + updates: Vec<(String, UpdateValue)>, /// If true, return the post-update document as payload (for RETURNING clause). returning: bool, }, @@ -217,18 +265,22 @@ pub enum DocumentOp { source_limit: usize, }, - /// Upsert: insert or merge. + /// Upsert: insert or merge. When `on_conflict_updates` is non-empty, + /// the conflict branch evaluates those assignments against the + /// *existing* document instead of merging the inserted value — + /// the `INSERT ... ON CONFLICT DO UPDATE SET ...` path. Upsert { collection: String, document_id: String, value: Vec, + on_conflict_updates: Vec<(String, UpdateValue)>, }, /// Bulk update: scan + apply field updates to all matches. BulkUpdate { collection: String, filters: Vec, - updates: Vec<(String, Vec)>, + updates: Vec<(String, UpdateValue)>, /// If true, return updated documents as JSON array payload (for RETURNING clause). returning: bool, }, diff --git a/nodedb/src/bridge/physical_plan/mod.rs b/nodedb/src/bridge/physical_plan/mod.rs index e54d9fa7..db258c9a 100644 --- a/nodedb/src/bridge/physical_plan/mod.rs +++ b/nodedb/src/bridge/physical_plan/mod.rs @@ -20,7 +20,7 @@ pub use columnar::ColumnarOp; pub use crdt::CrdtOp; pub use document::{ BalancedDef, DocumentOp, EnforcementOptions, GeneratedColumnSpec, MaterializedSumBinding, - PeriodLockConfig, StorageMode, + PeriodLockConfig, StorageMode, UpdateValue, }; pub use graph::GraphOp; pub use kv::KvOp; diff --git a/nodedb/src/control/planner/rls_injection.rs b/nodedb/src/control/planner/rls_injection.rs index cb7cbdfb..7a8ff70d 100644 --- a/nodedb/src/control/planner/rls_injection.rs +++ b/nodedb/src/control/planner/rls_injection.rs @@ -352,6 +352,7 @@ fn inject_permission_tree_for_plan( .collect(), ), clauses: Vec::new(), + expr: None, }; let filter_bytes = diff --git a/nodedb/src/control/planner/sql_plan_convert/aggregate.rs b/nodedb/src/control/planner/sql_plan_convert/aggregate.rs index 3b5169a9..d9f3ec6e 100644 --- a/nodedb/src/control/planner/sql_plan_convert/aggregate.rs +++ b/nodedb/src/control/planner/sql_plan_convert/aggregate.rs @@ -345,8 +345,43 @@ pub(super) fn serialize_window_functions( #[cfg(test)] mod tests { - use super::{extract_computed_columns, extract_projection_names}; - use nodedb_sql::types::{Projection, SqlExpr, WindowSpec}; + use super::{agg_expr_to_spec, extract_computed_columns, extract_projection_names}; + use nodedb_sql::types::{AggregateExpr, BinaryOp, Projection, SqlExpr, SqlValue, WindowSpec}; + + #[test] + fn aggregate_spec_preserves_alias_and_case_expression() { + let agg = AggregateExpr { + function: "sum".into(), + args: vec![SqlExpr::Case { + operand: None, + when_then: vec![( + SqlExpr::BinaryOp { + left: Box::new(SqlExpr::Column { + table: None, + name: "category".into(), + }), + op: BinaryOp::Eq, + right: Box::new(SqlExpr::Literal(SqlValue::String("tools".into()))), + }, + SqlExpr::Literal(SqlValue::Int(1)), + )], + else_expr: Some(Box::new(SqlExpr::Literal(SqlValue::Int(0)))), + }], + alias: "tools_count".into(), + distinct: false, + }; + + let spec = agg_expr_to_spec(&agg); + + assert_eq!(spec.function, "sum"); + assert_eq!(spec.alias, "sum(*)"); + assert_eq!(spec.user_alias.as_deref(), Some("tools_count")); + assert_eq!(spec.field, "*"); + assert!(matches!( + spec.expr, + Some(crate::bridge::expr_eval::SqlExpr::Case { .. }) + )); + } #[test] fn window_aliases_stay_in_projection_and_out_of_computed_columns() { diff --git a/nodedb/src/control/planner/sql_plan_convert/convert.rs b/nodedb/src/control/planner/sql_plan_convert/convert.rs index c34e28ac..028c6729 100644 --- a/nodedb/src/control/planner/sql_plan_convert/convert.rs +++ b/nodedb/src/control/planner/sql_plan_convert/convert.rs @@ -84,7 +84,15 @@ pub(super) fn convert_one( engine, rows, column_defaults, - } => super::dml::convert_upsert(collection, engine, rows, column_defaults, tenant_id), + on_conflict_updates, + } => super::dml::convert_upsert( + collection, + engine, + rows, + column_defaults, + on_conflict_updates, + tenant_id, + ), SqlPlan::KvInsert { collection, diff --git a/nodedb/src/control/planner/sql_plan_convert/dml.rs b/nodedb/src/control/planner/sql_plan_convert/dml.rs index f2c8ef0d..7a2cbe51 100644 --- a/nodedb/src/control/planner/sql_plan_convert/dml.rs +++ b/nodedb/src/control/planner/sql_plan_convert/dml.rs @@ -9,7 +9,7 @@ use crate::types::{TenantId, VShardId}; use super::super::physical::{PhysicalTask, PostSetOp}; use super::filter::serialize_filters; use super::value::{ - assignments_to_bytes, row_to_msgpack, rows_to_msgpack_array, sql_value_to_bytes, + assignments_to_update_values, row_to_msgpack, rows_to_msgpack_array, sql_value_to_bytes, sql_value_to_msgpack, sql_value_to_string, write_msgpack_map_header, write_msgpack_str, write_msgpack_value, }; @@ -110,11 +110,21 @@ pub(super) fn convert_upsert( engine: &EngineType, rows: &[Vec<(String, SqlValue)>], _column_defaults: &[(String, String)], + on_conflict_updates: &[(String, SqlExpr)], tenant_id: TenantId, ) -> crate::Result> { let vshard = VShardId::from_collection(collection); let mut tasks = Vec::new(); + // The ON CONFLICT assignments travel alongside the insert bytes. Each + // non-literal RHS becomes an `UpdateValue::Expr` that the Data Plane + // evaluates against the *existing* row at apply time. + let on_conflict_values = if on_conflict_updates.is_empty() { + Vec::new() + } else { + assignments_to_update_values(on_conflict_updates)? + }; + for row in rows { let doc_id = row .iter() @@ -132,6 +142,7 @@ pub(super) fn convert_upsert( collection: collection.into(), document_id: doc_id, value: value_bytes, + on_conflict_updates: on_conflict_values.clone(), }), post_set_op: PostSetOp::None, }); @@ -205,10 +216,24 @@ pub(super) fn convert_update( ) -> crate::Result> { let vshard = VShardId::from_collection(collection); let filter_bytes = serialize_filters(filters)?; - let updates = assignments_to_bytes(assignments)?; + let updates = assignments_to_update_values(assignments)?; // KV engine: route to FieldSet for point updates. if matches!(engine, EngineType::KeyValue) && !target_keys.is_empty() { + // KV FieldSet doesn't yet evaluate per-row expressions — any + // non-literal RHS must be rejected loudly rather than silently + // dropped (which would update no fields and return "ok"). + if let Some((field, _)) = assignments + .iter() + .find(|(_, expr)| !matches!(expr, SqlExpr::Literal(_))) + { + return Err(crate::Error::BadRequest { + detail: format!( + "UPDATE with non-literal RHS on KV engine (field '{field}') \ + is not yet supported; use a literal value" + ), + }); + } let mut tasks = Vec::new(); for key in target_keys { let field_updates: Vec<(String, Vec)> = assignments diff --git a/nodedb/src/control/planner/sql_plan_convert/expr.rs b/nodedb/src/control/planner/sql_plan_convert/expr.rs index cada7997..b2567f6e 100644 --- a/nodedb/src/control/planner/sql_plan_convert/expr.rs +++ b/nodedb/src/control/planner/sql_plan_convert/expr.rs @@ -68,6 +68,127 @@ pub(super) fn sql_expr_to_bridge_expr(expr: &SqlExpr) -> crate::bridge::expr_eva } } SqlExpr::Wildcard => BExpr::Column("*".into()), + + // NOT e / -e → evaluator's Negate (handles both bool and numeric). + SqlExpr::UnaryOp { expr, .. } => BExpr::Negate(Box::new(sql_expr_to_bridge_expr(expr))), + + // `e IS NULL` / `e IS NOT NULL` — direct passthrough. + SqlExpr::IsNull { expr, negated } => BExpr::IsNull { + expr: Box::new(sql_expr_to_bridge_expr(expr)), + negated: *negated, + }, + + // `e BETWEEN low AND high` desugars to `e >= low AND e <= high` + // (or `e < low OR e > high` when negated). The evaluator has no + // native Between variant, so the planner must lower it here. + SqlExpr::Between { + expr, + low, + high, + negated, + } => { + let e = sql_expr_to_bridge_expr(expr); + let l = sql_expr_to_bridge_expr(low); + let h = sql_expr_to_bridge_expr(high); + if *negated { + let lt = BExpr::BinaryOp { + left: Box::new(e.clone()), + op: crate::bridge::expr_eval::BinaryOp::Lt, + right: Box::new(l), + }; + let gt = BExpr::BinaryOp { + left: Box::new(e), + op: crate::bridge::expr_eval::BinaryOp::Gt, + right: Box::new(h), + }; + BExpr::BinaryOp { + left: Box::new(lt), + op: crate::bridge::expr_eval::BinaryOp::Or, + right: Box::new(gt), + } + } else { + let ge = BExpr::BinaryOp { + left: Box::new(e.clone()), + op: crate::bridge::expr_eval::BinaryOp::GtEq, + right: Box::new(l), + }; + let le = BExpr::BinaryOp { + left: Box::new(e), + op: crate::bridge::expr_eval::BinaryOp::LtEq, + right: Box::new(h), + }; + BExpr::BinaryOp { + left: Box::new(ge), + op: crate::bridge::expr_eval::BinaryOp::And, + right: Box::new(le), + } + } + } + + // `e IN (a, b, c)` desugars to `e = a OR e = b OR e = c` — each + // element may itself be a non-literal expression, so we must + // recursively convert and OR the comparisons together. `NOT IN` + // is `e <> a AND e <> b AND e <> c`. + SqlExpr::InList { + expr, + list, + negated, + } => { + let target = sql_expr_to_bridge_expr(expr); + if list.is_empty() { + // Empty list: `e IN ()` = false, `e NOT IN ()` = true. + return BExpr::Literal(nodedb_types::Value::Bool(*negated)); + } + let (eq_op, combine_op) = if *negated { + ( + crate::bridge::expr_eval::BinaryOp::NotEq, + crate::bridge::expr_eval::BinaryOp::And, + ) + } else { + ( + crate::bridge::expr_eval::BinaryOp::Eq, + crate::bridge::expr_eval::BinaryOp::Or, + ) + }; + // Empty list is handled above, so `list` is guaranteed non-empty + // here: we reduce `(target eq list[0]) op (target eq list[1]) op ...` + // without touching `.unwrap()` or `.expect()`. + list.iter() + .map(|item| BExpr::BinaryOp { + left: Box::new(target.clone()), + op: eq_op, + right: Box::new(sql_expr_to_bridge_expr(item)), + }) + .reduce(|acc, next| BExpr::BinaryOp { + left: Box::new(acc), + op: combine_op, + right: Box::new(next), + }) + // Unreachable: `list.is_empty()` returns early above. + .unwrap_or(BExpr::Literal(nodedb_types::Value::Bool(*negated))) + } + + // `e LIKE pattern` — no direct evaluator variant; route through a + // function call so the shared function dispatcher handles it. + SqlExpr::Like { + expr, + pattern, + negated, + } => { + let call = BExpr::Function { + name: "like".into(), + args: vec![ + sql_expr_to_bridge_expr(expr), + sql_expr_to_bridge_expr(pattern), + ], + }; + if *negated { + BExpr::Negate(Box::new(call)) + } else { + call + } + } + _ => BExpr::Literal(nodedb_types::Value::Null), } } diff --git a/nodedb/src/control/planner/sql_plan_convert/filter.rs b/nodedb/src/control/planner/sql_plan_convert/filter.rs index 2c47f6d2..5ccb90a3 100644 --- a/nodedb/src/control/planner/sql_plan_convert/filter.rs +++ b/nodedb/src/control/planner/sql_plan_convert/filter.rs @@ -1,8 +1,18 @@ //! Filter serialization: SqlPlan filters → ScanFilter msgpack bytes. +//! +//! This is the boundary between the Control Plane planner and the Data Plane +//! scan evaluator. Filter expressions the planner can reduce to simple +//! `(field, op, value)` triples travel as native `ScanFilter` records; any +//! expression the planner cannot reduce — scalar functions in WHERE, +//! non-literal BETWEEN bounds, column arithmetic, `NOT(...)`, IN with +//! computed elements — is shipped verbatim as a `FilterOp::Expr` carrying +//! a `nodedb_query::expr::SqlExpr`. The Data Plane evaluates that against +//! each candidate row via the shared evaluator. use nodedb_sql::planner::qualified_name; use nodedb_sql::types::{Filter, FilterExpr, SqlExpr, SqlValue}; +use super::expr::sql_expr_to_bridge_expr; use super::value::sql_value_to_nodedb_value; /// Convert SqlPlan filters to ScanFilter msgpack bytes. @@ -41,6 +51,7 @@ fn filter_to_scan_filters(expr: &FilterExpr) -> Vec { @@ -49,6 +60,7 @@ fn filter_to_scan_filters(expr: &FilterExpr) -> Vec { @@ -58,6 +70,7 @@ fn filter_to_scan_filters(expr: &FilterExpr) -> Vec { @@ -66,6 +79,7 @@ fn filter_to_scan_filters(expr: &FilterExpr) -> Vec { @@ -74,6 +88,7 @@ fn filter_to_scan_filters(expr: &FilterExpr) -> Vec filters @@ -90,26 +105,44 @@ fn filter_to_scan_filters(expr: &FilterExpr) -> Vec { - // Convert SqlExpr to ScanFilter via pattern matching. - sql_expr_to_scan_filters(sql_expr) - } + FilterExpr::Expr(sql_expr) => sql_expr_to_scan_filters(sql_expr), _ => vec![ScanFilter { field: String::new(), op: FilterOp::MatchAll, value: nodedb_types::Value::Null, clauses: Vec::new(), + expr: None, }], } } -/// Convert a raw SqlExpr (from WHERE clause) to ScanFilter list. -fn sql_expr_to_scan_filters(expr: &SqlExpr) -> Vec { +/// Build a `ScanFilter` carrying a full expression predicate. Used whenever +/// the planner cannot reduce the WHERE expression to a simple +/// `(field, op, value)` tuple. +fn expr_filter(expr: &SqlExpr) -> nodedb_query::scan_filter::ScanFilter { + nodedb_query::scan_filter::ScanFilter { + field: String::new(), + op: nodedb_query::scan_filter::FilterOp::Expr, + value: nodedb_types::Value::Null, + clauses: Vec::new(), + expr: Some(sql_expr_to_bridge_expr(expr)), + } +} + +/// Convert a raw `SqlExpr` (from WHERE clause) to a `ScanFilter` list. +/// +/// Tries to produce simple, field-indexed filters for common cases (direct +/// comparisons, BETWEEN with literals, IN with literals) so the scanner can +/// use its fast pre-filtered path. Anything that doesn't fit — scalar +/// functions on the LHS, arithmetic, NOT, non-literal bounds — is shipped +/// as a single `FilterOp::Expr` carrying the whole expression tree. +fn sql_expr_to_scan_filters(root: &SqlExpr) -> Vec { use nodedb_query::scan_filter::{FilterOp, ScanFilter}; - match expr { + match root { SqlExpr::BinaryOp { left, op: nodedb_sql::types::BinaryOp::And, @@ -131,13 +164,18 @@ fn sql_expr_to_scan_filters(expr: &SqlExpr) -> Vec { let field = match left.as_ref() { SqlExpr::Column { table, name } => qualified_name(table.as_deref(), name), SqlExpr::Function { name, args, .. } => { - // HAVING: COUNT(*) > 2 → field = "count(*)" + // HAVING fast path: COUNT(*) > 2 → field = "count(*)". + // Any other function goes through the generic evaluator. + if !is_aggregate_function(name) { + return vec![expr_filter(root)]; + } let arg = args .first() .map(|a| match a { @@ -150,12 +188,11 @@ fn sql_expr_to_scan_filters(expr: &SqlExpr) -> Vec return vec![match_all()], + _ => return vec![expr_filter(root)], }; let value = match right.as_ref() { SqlExpr::Literal(v) => sql_value_to_nodedb_value(v), SqlExpr::Column { table, name } => { - // Column-vs-column comparison (e.g. scalar subquery result). let col_op = match op { nodedb_sql::types::BinaryOp::Gt => FilterOp::GtColumn, nodedb_sql::types::BinaryOp::Ge => FilterOp::GteColumn, @@ -163,16 +200,17 @@ fn sql_expr_to_scan_filters(expr: &SqlExpr) -> Vec FilterOp::LteColumn, nodedb_sql::types::BinaryOp::Eq => FilterOp::EqColumn, nodedb_sql::types::BinaryOp::Ne => FilterOp::NeColumn, - _ => return vec![match_all()], + _ => return vec![expr_filter(root)], }; return vec![ScanFilter { field, op: col_op, value: nodedb_types::Value::String(qualified_name(table.as_deref(), name)), clauses: Vec::new(), + expr: None, }]; } - _ => return vec![match_all()], + _ => return vec![expr_filter(root)], }; let filter_op = match op { nodedb_sql::types::BinaryOp::Eq => FilterOp::Eq, @@ -181,19 +219,20 @@ fn sql_expr_to_scan_filters(expr: &SqlExpr) -> Vec FilterOp::Gte, nodedb_sql::types::BinaryOp::Lt => FilterOp::Lt, nodedb_sql::types::BinaryOp::Le => FilterOp::Lte, - _ => return vec![match_all()], + _ => return vec![expr_filter(root)], }; vec![ScanFilter { field, op: filter_op, value, clauses: Vec::new(), + expr: None, }] } SqlExpr::IsNull { expr, negated } => { let field = match expr.as_ref() { SqlExpr::Column { table, name } => qualified_name(table.as_deref(), name), - _ => return vec![match_all()], + _ => return vec![expr_filter(root)], }; let op = if *negated { FilterOp::IsNotNull @@ -205,6 +244,7 @@ fn sql_expr_to_scan_filters(expr: &SqlExpr) -> Vec Vec { let field = match expr.as_ref() { SqlExpr::Column { table, name } => qualified_name(table.as_deref(), name), - _ => return vec![match_all()], + _ => return vec![expr_filter(root)], }; + // If any list element is non-literal, the IN set cannot be + // pre-materialized — fall back to expression evaluation so the + // computed values are honoured per row. + if list.iter().any(|e| !matches!(e, SqlExpr::Literal(_))) { + return vec![expr_filter(root)]; + } let values: Vec = list .iter() .filter_map(|e| match e { @@ -232,6 +278,7 @@ fn sql_expr_to_scan_filters(expr: &SqlExpr) -> Vec Vec { let field = match expr.as_ref() { SqlExpr::Column { table, name } => qualified_name(table.as_deref(), name), - _ => return vec![match_all()], + _ => return vec![expr_filter(root)], }; let pat = match pattern.as_ref() { SqlExpr::Literal(SqlValue::String(s)) => s.clone(), - _ => return vec![match_all()], + _ => return vec![expr_filter(root)], }; vec![ScanFilter { field, @@ -256,6 +303,7 @@ fn sql_expr_to_scan_filters(expr: &SqlExpr) -> Vec Vec { let field = match expr.as_ref() { SqlExpr::Column { table, name } => qualified_name(table.as_deref(), name), - _ => return vec![match_all()], + _ => return vec![expr_filter(root)], }; let low_val = match low.as_ref() { SqlExpr::Literal(v) => sql_value_to_nodedb_value(v), - _ => return vec![match_all()], + _ => return vec![expr_filter(root)], }; let high_val = match high.as_ref() { SqlExpr::Literal(v) => sql_value_to_nodedb_value(v), - _ => return vec![match_all()], + _ => return vec![expr_filter(root)], }; if *negated { // NOT BETWEEN → lt OR gt (outside the range) @@ -288,14 +336,17 @@ fn sql_expr_to_scan_filters(expr: &SqlExpr) -> Vec Vec vec![match_all()], + _ => vec![expr_filter(root)], } } -fn match_all() -> nodedb_query::scan_filter::ScanFilter { - nodedb_query::scan_filter::ScanFilter { - field: String::new(), - op: nodedb_query::scan_filter::FilterOp::MatchAll, - value: nodedb_types::Value::Null, - clauses: Vec::new(), - } +/// Aggregate function names — these are the only functions whose reduction +/// through the HAVING fast path is sound. Anything else on the LHS of a +/// comparison must go through the generic expression evaluator. +fn is_aggregate_function(name: &str) -> bool { + matches!( + name.to_ascii_lowercase().as_str(), + "count" | "sum" | "avg" | "min" | "max" + ) } diff --git a/nodedb/src/control/planner/sql_plan_convert/value.rs b/nodedb/src/control/planner/sql_plan_convert/value.rs deleted file mode 100644 index a864151a..00000000 --- a/nodedb/src/control/planner/sql_plan_convert/value.rs +++ /dev/null @@ -1,509 +0,0 @@ -//! Value conversion utilities: SqlValue ↔ nodedb_types::Value, msgpack encoding, -//! time range extraction, and column default evaluation. - -use nodedb_sql::types::{Filter, FilterExpr, SqlExpr, SqlValue}; - -pub(super) fn sql_value_to_nodedb_value(v: &SqlValue) -> nodedb_types::Value { - match v { - SqlValue::Int(i) => nodedb_types::Value::Integer(*i), - SqlValue::Float(f) => nodedb_types::Value::Float(*f), - SqlValue::String(s) => nodedb_types::Value::String(s.clone()), - SqlValue::Bool(b) => nodedb_types::Value::Bool(*b), - SqlValue::Null => nodedb_types::Value::Null, - SqlValue::Array(arr) => { - nodedb_types::Value::Array(arr.iter().map(sql_value_to_nodedb_value).collect()) - } - SqlValue::Bytes(b) => nodedb_types::Value::Bytes(b.clone()), - } -} - -pub(super) fn sql_value_to_string(v: &SqlValue) -> String { - match v { - SqlValue::String(s) => s.clone(), - SqlValue::Int(i) => i.to_string(), - SqlValue::Float(f) => f.to_string(), - SqlValue::Bool(b) => b.to_string(), - _ => String::new(), - } -} - -pub(super) fn sql_value_to_bytes(v: &SqlValue) -> Vec { - match v { - SqlValue::String(s) => s.as_bytes().to_vec(), - SqlValue::Bytes(b) => b.clone(), - SqlValue::Int(i) => i.to_string().as_bytes().to_vec(), - _ => sql_value_to_string(v).into_bytes(), - } -} - -/// Encode a SQL value as standard msgpack for field-level updates. -pub(super) fn sql_value_to_msgpack(v: &SqlValue) -> Vec { - let mut buf = Vec::with_capacity(16); - write_msgpack_value(&mut buf, v); - buf -} - -// ── Msgpack encoding ── - -pub(super) fn row_to_msgpack(row: &[(String, SqlValue)]) -> crate::Result> { - // Write standard msgpack map directly from SqlValue — no JSON or zerompk intermediary. - let mut buf = Vec::with_capacity(row.len() * 32); - write_msgpack_map_header(&mut buf, row.len()); - for (key, val) in row { - write_msgpack_str(&mut buf, key); - write_msgpack_value(&mut buf, val); - } - Ok(buf) -} - -pub(super) fn write_msgpack_map_header(buf: &mut Vec, len: usize) { - if len < 16 { - buf.push(0x80 | len as u8); - } else if len <= u16::MAX as usize { - buf.push(0xDE); - buf.extend_from_slice(&(len as u16).to_be_bytes()); - } else { - buf.push(0xDF); - buf.extend_from_slice(&(len as u32).to_be_bytes()); - } -} - -pub(super) fn write_msgpack_array_header(buf: &mut Vec, len: usize) { - if len < 16 { - buf.push(0x90 | len as u8); - } else if len <= u16::MAX as usize { - buf.push(0xDC); - buf.extend_from_slice(&(len as u16).to_be_bytes()); - } else { - buf.push(0xDD); - buf.extend_from_slice(&(len as u32).to_be_bytes()); - } -} - -pub(super) fn write_msgpack_str(buf: &mut Vec, s: &str) { - let bytes = s.as_bytes(); - let len = bytes.len(); - if len < 32 { - buf.push(0xA0 | len as u8); - } else if len <= u8::MAX as usize { - buf.push(0xD9); - buf.push(len as u8); - } else if len <= u16::MAX as usize { - buf.push(0xDA); - buf.extend_from_slice(&(len as u16).to_be_bytes()); - } else { - buf.push(0xDB); - buf.extend_from_slice(&(len as u32).to_be_bytes()); - } - buf.extend_from_slice(bytes); -} - -pub(super) fn write_msgpack_value(buf: &mut Vec, val: &SqlValue) { - match val { - SqlValue::Null => buf.push(0xC0), - SqlValue::Bool(true) => buf.push(0xC3), - SqlValue::Bool(false) => buf.push(0xC2), - SqlValue::Int(i) => { - let i = *i; - if (0..=127).contains(&i) { - buf.push(i as u8); - } else if (-32..0).contains(&i) { - buf.push(i as u8); // negative fixint - } else if i >= i8::MIN as i64 && i <= i8::MAX as i64 { - buf.push(0xD0); - buf.push(i as i8 as u8); - } else if i >= i16::MIN as i64 && i <= i16::MAX as i64 { - buf.push(0xD1); - buf.extend_from_slice(&(i as i16).to_be_bytes()); - } else if i >= i32::MIN as i64 && i <= i32::MAX as i64 { - buf.push(0xD2); - buf.extend_from_slice(&(i as i32).to_be_bytes()); - } else { - buf.push(0xD3); - buf.extend_from_slice(&i.to_be_bytes()); - } - } - SqlValue::Float(f) => { - buf.push(0xCB); - buf.extend_from_slice(&f.to_be_bytes()); - } - SqlValue::String(s) => write_msgpack_str(buf, s), - SqlValue::Array(arr) => { - let len = arr.len(); - if len < 16 { - buf.push(0x90 | len as u8); - } else if len <= u16::MAX as usize { - buf.push(0xDC); - buf.extend_from_slice(&(len as u16).to_be_bytes()); - } else { - buf.push(0xDD); - buf.extend_from_slice(&(len as u32).to_be_bytes()); - } - for item in arr { - write_msgpack_value(buf, item); - } - } - SqlValue::Bytes(b) => { - let len = b.len(); - if len <= u8::MAX as usize { - buf.push(0xC4); - buf.push(len as u8); - } else if len <= u16::MAX as usize { - buf.push(0xC5); - buf.extend_from_slice(&(len as u16).to_be_bytes()); - } else { - buf.push(0xC6); - buf.extend_from_slice(&(len as u32).to_be_bytes()); - } - buf.extend_from_slice(b); - } - } -} - -pub(super) fn assignments_to_bytes( - assignments: &[(String, SqlExpr)], -) -> crate::Result)>> { - let mut result = Vec::new(); - for (field, expr) in assignments { - let bytes = match expr { - SqlExpr::Literal(v) => sql_value_to_msgpack(v), - _ => { - // Non-literal expression — encode as string. - let mut buf = Vec::new(); - write_msgpack_str(&mut buf, &format!("{expr:?}")); - buf - } - }; - result.push((field.clone(), bytes)); - } - Ok(result) -} - -pub(super) fn rows_to_msgpack_array( - rows: &[&Vec<(String, SqlValue)>], - column_defaults: &[(String, String)], -) -> crate::Result> { - let arr: Vec = rows - .iter() - .map(|row| { - let mut map = std::collections::HashMap::new(); - for (key, val) in row.iter() { - map.insert(key.clone(), sql_value_to_nodedb_value(val)); - } - // Apply column defaults for missing fields. - for (col_name, default_expr) in column_defaults { - if !map.contains_key(col_name) - && let Some(val) = evaluate_default_expr(default_expr) - { - map.insert(col_name.clone(), val); - } - } - nodedb_types::Value::Object(map) - }) - .collect(); - let val = nodedb_types::Value::Array(arr); - nodedb_types::value_to_msgpack(&val).map_err(|e| crate::Error::Serialization { - format: "msgpack".into(), - detail: format!("columnar row batch: {e}"), - }) -} - -// ── Column default evaluation ── - -/// Evaluate a column DEFAULT expression at insert time. -/// -/// Supports ID generation functions and literal values: -/// - `UUID_V7` / `UUIDV7` → time-sortable UUID v7 -/// - `UUID_V4` / `UUIDV4` / `UUID` → random UUID v4 -/// - `ULID` → time-sortable ULID -/// - `CUID2` → collision-resistant unique ID -/// - `NANOID` → URL-friendly 21-char ID -/// - `NANOID(N)` → URL-friendly N-char ID -/// - Integer/float literals → numeric values -/// - Quoted strings → string values -pub(super) fn evaluate_default_expr(expr: &str) -> Option { - let upper = expr.trim().to_uppercase(); - match upper.as_str() { - // Bare keywords and function call forms. - "UUID_V7" | "UUIDV7" | "GEN_UUID_V7()" | "UUID_V7()" => { - Some(nodedb_types::Value::String(nodedb_types::id_gen::uuid_v7())) - } - "UUID_V4" | "UUIDV4" | "UUID" | "GEN_UUID_V4()" | "UUID_V4()" => { - Some(nodedb_types::Value::String(nodedb_types::id_gen::uuid_v4())) - } - "ULID" | "GEN_ULID()" | "ULID()" => { - Some(nodedb_types::Value::String(nodedb_types::id_gen::ulid())) - } - "CUID2" | "CUID2()" => Some(nodedb_types::Value::String(nodedb_types::id_gen::cuid2())), - "NANOID" | "NANOID()" => Some(nodedb_types::Value::String(nodedb_types::id_gen::nanoid())), - "NOW()" => { - let now = std::time::SystemTime::now() - .duration_since(std::time::UNIX_EPOCH) - .unwrap_or_default(); - Some(nodedb_types::Value::String( - chrono::DateTime::from_timestamp_millis(now.as_millis() as i64) - .map(|dt| dt.to_rfc3339()) - .unwrap_or_else(|| now.as_millis().to_string()), - )) - } - _ => { - // NANOID(N) — custom length - if upper.starts_with("NANOID(") && upper.ends_with(')') { - let len_str = &upper[7..upper.len() - 1]; - if let Ok(len) = len_str.parse::() { - return Some(nodedb_types::Value::String( - nodedb_types::id_gen::nanoid_with_length(len), - )); - } - } - // CUID2(N) — custom length - if upper.starts_with("CUID2(") && upper.ends_with(')') { - let len_str = &upper[6..upper.len() - 1]; - if let Ok(len) = len_str.parse::() { - return Some(nodedb_types::Value::String( - nodedb_types::id_gen::cuid2_with_length(len), - )); - } - } - // Numeric literal - if let Ok(i) = expr.trim().parse::() { - return Some(nodedb_types::Value::Integer(i)); - } - if let Ok(f) = expr.trim().parse::() { - return Some(nodedb_types::Value::Float(f)); - } - // Quoted string literal - let trimmed = expr.trim(); - if (trimmed.starts_with('\'') && trimmed.ends_with('\'')) - || (trimmed.starts_with('"') && trimmed.ends_with('"')) - { - return Some(nodedb_types::Value::String( - trimmed[1..trimmed.len() - 1].to_string(), - )); - } - None - } - } -} - -// ── Time range extraction ── - -/// Extract (min_ts_ms, max_ts_ms) time bounds from WHERE filters on timestamp columns. -/// -/// Recognizes patterns like `ts >= '2024-01-01' AND ts <= '2024-01-02'` and converts -/// timestamp strings to epoch milliseconds. -pub(super) fn extract_time_range(filters: &[Filter]) -> (i64, i64) { - let mut min_ts = i64::MIN; - let mut max_ts = i64::MAX; - - for filter in filters { - extract_time_bounds_from_filter(&filter.expr, &mut min_ts, &mut max_ts); - } - - (min_ts, max_ts) -} - -fn extract_time_bounds_from_filter(expr: &FilterExpr, min_ts: &mut i64, max_ts: &mut i64) { - match expr { - FilterExpr::Comparison { field, op, value } if is_time_field(field) => { - if let Some(ms) = sql_value_to_timestamp_ms(value) { - match op { - nodedb_sql::types::CompareOp::Ge | nodedb_sql::types::CompareOp::Gt => { - if ms > *min_ts { - *min_ts = ms; - } - } - nodedb_sql::types::CompareOp::Le | nodedb_sql::types::CompareOp::Lt => { - if ms < *max_ts { - *max_ts = ms; - } - } - nodedb_sql::types::CompareOp::Eq => { - *min_ts = ms; - *max_ts = ms; - } - _ => {} - } - } - } - FilterExpr::Between { field, low, high } if is_time_field(field) => { - if let Some(lo) = sql_value_to_timestamp_ms(low) { - *min_ts = lo; - } - if let Some(hi) = sql_value_to_timestamp_ms(high) { - *max_ts = hi; - } - } - FilterExpr::And(children) => { - for child in children { - extract_time_bounds_from_filter(&child.expr, min_ts, max_ts); - } - } - // Expr-based filters: walk the SqlExpr tree for timestamp comparisons. - FilterExpr::Expr(sql_expr) => { - extract_time_bounds_from_expr(sql_expr, min_ts, max_ts); - } - _ => {} - } -} - -fn extract_time_bounds_from_expr(expr: &SqlExpr, min_ts: &mut i64, max_ts: &mut i64) { - let SqlExpr::BinaryOp { left, op, right } = expr else { - return; - }; - match op { - nodedb_sql::types::BinaryOp::And => { - extract_time_bounds_from_expr(left, min_ts, max_ts); - extract_time_bounds_from_expr(right, min_ts, max_ts); - } - nodedb_sql::types::BinaryOp::Ge | nodedb_sql::types::BinaryOp::Gt => { - if let Some(field) = expr_column_name(left) - && is_time_field(&field) - && let Some(ms) = expr_to_timestamp_ms(right) - && ms > *min_ts - { - *min_ts = ms; - } - } - nodedb_sql::types::BinaryOp::Le | nodedb_sql::types::BinaryOp::Lt => { - if let Some(field) = expr_column_name(left) - && is_time_field(&field) - && let Some(ms) = expr_to_timestamp_ms(right) - && ms < *max_ts - { - *max_ts = ms; - } - } - _ => {} - } -} - -fn is_time_field(name: &str) -> bool { - let lower = name.to_lowercase(); - lower == "ts" - || lower == "timestamp" - || lower == "time" - || lower == "created_at" - || lower.ends_with("_at") - || lower.ends_with("_time") - || lower.ends_with("_ts") -} - -fn expr_column_name(expr: &SqlExpr) -> Option { - match expr { - SqlExpr::Column { name, .. } => Some(name.clone()), - _ => None, - } -} - -fn expr_to_timestamp_ms(expr: &SqlExpr) -> Option { - match expr { - SqlExpr::Literal(val) => sql_value_to_timestamp_ms(val), - _ => None, - } -} - -fn sql_value_to_timestamp_ms(val: &SqlValue) -> Option { - match val { - SqlValue::Int(ms) => Some(*ms), - SqlValue::String(s) => parse_timestamp_to_ms(s), - _ => None, - } -} - -fn parse_timestamp_to_ms(s: &str) -> Option { - // Try common timestamp formats. - // "2024-01-01 00:00:00" → epoch ms - if let Ok(dt) = chrono::NaiveDateTime::parse_from_str(s, "%Y-%m-%d %H:%M:%S") { - return Some(dt.and_utc().timestamp_millis()); - } - // "2024-01-01T00:00:00" (ISO 8601) - if let Ok(dt) = chrono::NaiveDateTime::parse_from_str(s, "%Y-%m-%dT%H:%M:%S") { - return Some(dt.and_utc().timestamp_millis()); - } - // "2024-01-01" (date only) - if let Ok(d) = chrono::NaiveDate::parse_from_str(s, "%Y-%m-%d") { - return Some(d.and_hms_opt(0, 0, 0)?.and_utc().timestamp_millis()); - } - // Raw milliseconds as string - s.parse::().ok() -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn row_to_msgpack_produces_standard_format() { - let row = vec![ - ("count".to_string(), SqlValue::Int(1)), - ("label".to_string(), SqlValue::String("homepage".into())), - ]; - let bytes = row_to_msgpack(&row).unwrap(); - // First byte should be a fixmap header (0x82 = map with 2 entries). - assert_eq!( - bytes[0], 0x82, - "expected fixmap(2), got 0x{:02X}. bytes={bytes:?}", - bytes[0] - ); - // Should be decodable by json_from_msgpack (standard msgpack parser). - let json = nodedb_types::json_from_msgpack(&bytes).unwrap(); - let obj = json.as_object().unwrap(); - assert_eq!(obj["count"], 1); - assert_eq!(obj["label"], "homepage"); - } - - #[test] - fn write_msgpack_value_int() { - let mut buf = Vec::new(); - write_msgpack_value(&mut buf, &SqlValue::Int(42)); - // 42 fits in positive fixint (0x00-0x7F). - assert_eq!(buf, vec![42]); - } - - #[test] - fn write_msgpack_value_string() { - let mut buf = Vec::new(); - write_msgpack_value(&mut buf, &SqlValue::String("hi".into())); - // fixstr: 0xA0 | 2 = 0xA2, then "hi". - assert_eq!(buf, vec![0xA2, b'h', b'i']); - } - - #[test] - fn aggregate_spec_preserves_alias_and_case_expression() { - use super::super::aggregate::agg_expr_to_spec; - use nodedb_sql::types::AggregateExpr; - - let agg = AggregateExpr { - function: "sum".into(), - args: vec![SqlExpr::Case { - operand: None, - when_then: vec![( - SqlExpr::BinaryOp { - left: Box::new(SqlExpr::Column { - table: None, - name: "category".into(), - }), - op: nodedb_sql::types::BinaryOp::Eq, - right: Box::new(SqlExpr::Literal(SqlValue::String("tools".into()))), - }, - SqlExpr::Literal(SqlValue::Int(1)), - )], - else_expr: Some(Box::new(SqlExpr::Literal(SqlValue::Int(0)))), - }], - alias: "tools_count".into(), - distinct: false, - }; - - let spec = agg_expr_to_spec(&agg); - - assert_eq!(spec.function, "sum"); - assert_eq!(spec.alias, "sum(*)"); - assert_eq!(spec.user_alias.as_deref(), Some("tools_count")); - assert_eq!(spec.field, "*"); - assert!(matches!( - spec.expr, - Some(crate::bridge::expr_eval::SqlExpr::Case { .. }) - )); - } -} diff --git a/nodedb/src/control/planner/sql_plan_convert/value/assignments.rs b/nodedb/src/control/planner/sql_plan_convert/value/assignments.rs new file mode 100644 index 00000000..73c504d6 --- /dev/null +++ b/nodedb/src/control/planner/sql_plan_convert/value/assignments.rs @@ -0,0 +1,28 @@ +//! UPDATE assignment serialization: `(field, SqlExpr)` pairs → wire-ready +//! `UpdateValue` payloads. +//! +//! Literal RHS is pre-encoded as msgpack. Non-literal RHS (arithmetic, +//! functions, CASE, concatenation, ...) is converted to the shared evaluator +//! type `bridge::expr_eval::SqlExpr` and shipped to the Data Plane, where +//! it is evaluated against the current row at apply time. + +use nodedb_sql::types::SqlExpr; + +use crate::bridge::physical_plan::UpdateValue; + +use super::super::expr::sql_expr_to_bridge_expr; +use super::convert::sql_value_to_msgpack; + +pub(crate) fn assignments_to_update_values( + assignments: &[(String, SqlExpr)], +) -> crate::Result> { + let mut result = Vec::with_capacity(assignments.len()); + for (field, expr) in assignments { + let value = match expr { + SqlExpr::Literal(v) => UpdateValue::Literal(sql_value_to_msgpack(v)), + _ => UpdateValue::Expr(sql_expr_to_bridge_expr(expr)), + }; + result.push((field.clone(), value)); + } + Ok(result) +} diff --git a/nodedb/src/control/planner/sql_plan_convert/value/convert.rs b/nodedb/src/control/planner/sql_plan_convert/value/convert.rs new file mode 100644 index 00000000..c704b03c --- /dev/null +++ b/nodedb/src/control/planner/sql_plan_convert/value/convert.rs @@ -0,0 +1,45 @@ +//! Conversions from `nodedb_sql::types::SqlValue` into runtime / wire forms. + +use nodedb_sql::types::SqlValue; + +use super::msgpack_write::write_msgpack_value; + +pub(crate) fn sql_value_to_nodedb_value(v: &SqlValue) -> nodedb_types::Value { + match v { + SqlValue::Int(i) => nodedb_types::Value::Integer(*i), + SqlValue::Float(f) => nodedb_types::Value::Float(*f), + SqlValue::String(s) => nodedb_types::Value::String(s.clone()), + SqlValue::Bool(b) => nodedb_types::Value::Bool(*b), + SqlValue::Null => nodedb_types::Value::Null, + SqlValue::Array(arr) => { + nodedb_types::Value::Array(arr.iter().map(sql_value_to_nodedb_value).collect()) + } + SqlValue::Bytes(b) => nodedb_types::Value::Bytes(b.clone()), + } +} + +pub(crate) fn sql_value_to_string(v: &SqlValue) -> String { + match v { + SqlValue::String(s) => s.clone(), + SqlValue::Int(i) => i.to_string(), + SqlValue::Float(f) => f.to_string(), + SqlValue::Bool(b) => b.to_string(), + _ => String::new(), + } +} + +pub(crate) fn sql_value_to_bytes(v: &SqlValue) -> Vec { + match v { + SqlValue::String(s) => s.as_bytes().to_vec(), + SqlValue::Bytes(b) => b.clone(), + SqlValue::Int(i) => i.to_string().as_bytes().to_vec(), + _ => sql_value_to_string(v).into_bytes(), + } +} + +/// Encode a SQL value as standard msgpack for field-level updates. +pub(crate) fn sql_value_to_msgpack(v: &SqlValue) -> Vec { + let mut buf = Vec::with_capacity(16); + write_msgpack_value(&mut buf, v); + buf +} diff --git a/nodedb/src/control/planner/sql_plan_convert/value/defaults.rs b/nodedb/src/control/planner/sql_plan_convert/value/defaults.rs new file mode 100644 index 00000000..09958e90 --- /dev/null +++ b/nodedb/src/control/planner/sql_plan_convert/value/defaults.rs @@ -0,0 +1,71 @@ +//! Column DEFAULT expression evaluation at insert time. +//! +//! Supports ID generation functions (UUIDv4/v7, ULID, CUID2, NANOID), `NOW()`, +//! and literal values. More complex defaults (arbitrary expressions) go +//! through the shared SqlExpr evaluator path. + +pub(crate) fn evaluate_default_expr(expr: &str) -> Option { + let upper = expr.trim().to_uppercase(); + match upper.as_str() { + "UUID_V7" | "UUIDV7" | "GEN_UUID_V7()" | "UUID_V7()" => { + Some(nodedb_types::Value::String(nodedb_types::id_gen::uuid_v7())) + } + "UUID_V4" | "UUIDV4" | "UUID" | "GEN_UUID_V4()" | "UUID_V4()" => { + Some(nodedb_types::Value::String(nodedb_types::id_gen::uuid_v4())) + } + "ULID" | "GEN_ULID()" | "ULID()" => { + Some(nodedb_types::Value::String(nodedb_types::id_gen::ulid())) + } + "CUID2" | "CUID2()" => Some(nodedb_types::Value::String(nodedb_types::id_gen::cuid2())), + "NANOID" | "NANOID()" => Some(nodedb_types::Value::String(nodedb_types::id_gen::nanoid())), + "NOW()" => { + let now = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .unwrap_or_default(); + Some(nodedb_types::Value::String( + chrono::DateTime::from_timestamp_millis(now.as_millis() as i64) + .map(|dt| dt.to_rfc3339()) + .unwrap_or_else(|| now.as_millis().to_string()), + )) + } + _ => parse_parametric_or_literal(expr, &upper), + } +} + +fn parse_parametric_or_literal(expr: &str, upper: &str) -> Option { + // NANOID(N) — custom length. + if upper.starts_with("NANOID(") && upper.ends_with(')') { + let len_str = &upper[7..upper.len() - 1]; + if let Ok(len) = len_str.parse::() { + return Some(nodedb_types::Value::String( + nodedb_types::id_gen::nanoid_with_length(len), + )); + } + } + // CUID2(N) — custom length. + if upper.starts_with("CUID2(") && upper.ends_with(')') { + let len_str = &upper[6..upper.len() - 1]; + if let Ok(len) = len_str.parse::() { + return Some(nodedb_types::Value::String( + nodedb_types::id_gen::cuid2_with_length(len), + )); + } + } + // Numeric literal. + if let Ok(i) = expr.trim().parse::() { + return Some(nodedb_types::Value::Integer(i)); + } + if let Ok(f) = expr.trim().parse::() { + return Some(nodedb_types::Value::Float(f)); + } + // Quoted string literal. + let trimmed = expr.trim(); + if (trimmed.starts_with('\'') && trimmed.ends_with('\'')) + || (trimmed.starts_with('"') && trimmed.ends_with('"')) + { + return Some(nodedb_types::Value::String( + trimmed[1..trimmed.len() - 1].to_string(), + )); + } + None +} diff --git a/nodedb/src/control/planner/sql_plan_convert/value/mod.rs b/nodedb/src/control/planner/sql_plan_convert/value/mod.rs new file mode 100644 index 00000000..b9695223 --- /dev/null +++ b/nodedb/src/control/planner/sql_plan_convert/value/mod.rs @@ -0,0 +1,21 @@ +//! Value conversion utilities: SqlValue ↔ nodedb_types::Value, msgpack encoding, +//! column default evaluation, and WHERE-clause time-range extraction. + +pub(super) mod assignments; +pub(super) mod convert; +pub(super) mod defaults; +pub(super) mod msgpack_write; +pub(super) mod rows; +pub(super) mod time_range; + +pub(super) use assignments::assignments_to_update_values; +pub(super) use convert::{ + sql_value_to_bytes, sql_value_to_msgpack, sql_value_to_nodedb_value, sql_value_to_string, +}; +pub(super) use defaults::evaluate_default_expr; +pub(super) use msgpack_write::{ + row_to_msgpack, write_msgpack_array_header, write_msgpack_map_header, write_msgpack_str, + write_msgpack_value, +}; +pub(super) use rows::rows_to_msgpack_array; +pub(super) use time_range::extract_time_range; diff --git a/nodedb/src/control/planner/sql_plan_convert/value/msgpack_write.rs b/nodedb/src/control/planner/sql_plan_convert/value/msgpack_write.rs new file mode 100644 index 00000000..e53b8c7a --- /dev/null +++ b/nodedb/src/control/planner/sql_plan_convert/value/msgpack_write.rs @@ -0,0 +1,152 @@ +//! Standard-msgpack writers for `SqlValue`. +//! +//! These are the *only* msgpack producers used by the DML path — no JSON or +//! zerompk intermediary. Format matches the on-wire layout read by +//! `json_from_msgpack` and the Data Plane row decoders. + +use nodedb_sql::types::SqlValue; + +pub(crate) fn row_to_msgpack(row: &[(String, SqlValue)]) -> crate::Result> { + let mut buf = Vec::with_capacity(row.len() * 32); + write_msgpack_map_header(&mut buf, row.len()); + for (key, val) in row { + write_msgpack_str(&mut buf, key); + write_msgpack_value(&mut buf, val); + } + Ok(buf) +} + +pub(crate) fn write_msgpack_map_header(buf: &mut Vec, len: usize) { + if len < 16 { + buf.push(0x80 | len as u8); + } else if len <= u16::MAX as usize { + buf.push(0xDE); + buf.extend_from_slice(&(len as u16).to_be_bytes()); + } else { + buf.push(0xDF); + buf.extend_from_slice(&(len as u32).to_be_bytes()); + } +} + +pub(crate) fn write_msgpack_array_header(buf: &mut Vec, len: usize) { + if len < 16 { + buf.push(0x90 | len as u8); + } else if len <= u16::MAX as usize { + buf.push(0xDC); + buf.extend_from_slice(&(len as u16).to_be_bytes()); + } else { + buf.push(0xDD); + buf.extend_from_slice(&(len as u32).to_be_bytes()); + } +} + +pub(crate) fn write_msgpack_str(buf: &mut Vec, s: &str) { + let bytes = s.as_bytes(); + let len = bytes.len(); + if len < 32 { + buf.push(0xA0 | len as u8); + } else if len <= u8::MAX as usize { + buf.push(0xD9); + buf.push(len as u8); + } else if len <= u16::MAX as usize { + buf.push(0xDA); + buf.extend_from_slice(&(len as u16).to_be_bytes()); + } else { + buf.push(0xDB); + buf.extend_from_slice(&(len as u32).to_be_bytes()); + } + buf.extend_from_slice(bytes); +} + +pub(crate) fn write_msgpack_value(buf: &mut Vec, val: &SqlValue) { + match val { + SqlValue::Null => buf.push(0xC0), + SqlValue::Bool(true) => buf.push(0xC3), + SqlValue::Bool(false) => buf.push(0xC2), + SqlValue::Int(i) => write_msgpack_int(buf, *i), + SqlValue::Float(f) => { + buf.push(0xCB); + buf.extend_from_slice(&f.to_be_bytes()); + } + SqlValue::String(s) => write_msgpack_str(buf, s), + SqlValue::Array(arr) => { + write_msgpack_array_header(buf, arr.len()); + for item in arr { + write_msgpack_value(buf, item); + } + } + SqlValue::Bytes(b) => write_msgpack_bin(buf, b), + } +} + +fn write_msgpack_int(buf: &mut Vec, i: i64) { + if (0..=127).contains(&i) { + buf.push(i as u8); + } else if (-32..0).contains(&i) { + buf.push(i as u8); // negative fixint + } else if i >= i8::MIN as i64 && i <= i8::MAX as i64 { + buf.push(0xD0); + buf.push(i as i8 as u8); + } else if i >= i16::MIN as i64 && i <= i16::MAX as i64 { + buf.push(0xD1); + buf.extend_from_slice(&(i as i16).to_be_bytes()); + } else if i >= i32::MIN as i64 && i <= i32::MAX as i64 { + buf.push(0xD2); + buf.extend_from_slice(&(i as i32).to_be_bytes()); + } else { + buf.push(0xD3); + buf.extend_from_slice(&i.to_be_bytes()); + } +} + +fn write_msgpack_bin(buf: &mut Vec, b: &[u8]) { + let len = b.len(); + if len <= u8::MAX as usize { + buf.push(0xC4); + buf.push(len as u8); + } else if len <= u16::MAX as usize { + buf.push(0xC5); + buf.extend_from_slice(&(len as u16).to_be_bytes()); + } else { + buf.push(0xC6); + buf.extend_from_slice(&(len as u32).to_be_bytes()); + } + buf.extend_from_slice(b); +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn row_to_msgpack_produces_standard_format() { + let row = vec![ + ("count".to_string(), SqlValue::Int(1)), + ("label".to_string(), SqlValue::String("homepage".into())), + ]; + let bytes = row_to_msgpack(&row).unwrap(); + assert_eq!( + bytes[0], 0x82, + "expected fixmap(2), got 0x{:02X}. bytes={bytes:?}", + bytes[0] + ); + let json = nodedb_types::json_from_msgpack(&bytes).unwrap(); + let obj = json.as_object().unwrap(); + assert_eq!(obj["count"], 1); + assert_eq!(obj["label"], "homepage"); + } + + #[test] + fn write_msgpack_value_int() { + let mut buf = Vec::new(); + write_msgpack_value(&mut buf, &SqlValue::Int(42)); + assert_eq!(buf, vec![42]); + } + + #[test] + fn write_msgpack_value_string() { + let mut buf = Vec::new(); + write_msgpack_value(&mut buf, &SqlValue::String("hi".into())); + assert_eq!(buf, vec![0xA2, b'h', b'i']); + } +} diff --git a/nodedb/src/control/planner/sql_plan_convert/value/rows.rs b/nodedb/src/control/planner/sql_plan_convert/value/rows.rs new file mode 100644 index 00000000..e8d36f4e --- /dev/null +++ b/nodedb/src/control/planner/sql_plan_convert/value/rows.rs @@ -0,0 +1,34 @@ +//! Batch-of-rows msgpack encoding for columnar INSERT paths. + +use nodedb_sql::types::SqlValue; + +use super::convert::sql_value_to_nodedb_value; +use super::defaults::evaluate_default_expr; + +pub(crate) fn rows_to_msgpack_array( + rows: &[&Vec<(String, SqlValue)>], + column_defaults: &[(String, String)], +) -> crate::Result> { + let arr: Vec = rows + .iter() + .map(|row| { + let mut map = std::collections::HashMap::new(); + for (key, val) in row.iter() { + map.insert(key.clone(), sql_value_to_nodedb_value(val)); + } + for (col_name, default_expr) in column_defaults { + if !map.contains_key(col_name) + && let Some(val) = evaluate_default_expr(default_expr) + { + map.insert(col_name.clone(), val); + } + } + nodedb_types::Value::Object(map) + }) + .collect(); + let val = nodedb_types::Value::Array(arr); + nodedb_types::value_to_msgpack(&val).map_err(|e| crate::Error::Serialization { + format: "msgpack".into(), + detail: format!("columnar row batch: {e}"), + }) +} diff --git a/nodedb/src/control/planner/sql_plan_convert/value/time_range.rs b/nodedb/src/control/planner/sql_plan_convert/value/time_range.rs new file mode 100644 index 00000000..e63a8739 --- /dev/null +++ b/nodedb/src/control/planner/sql_plan_convert/value/time_range.rs @@ -0,0 +1,139 @@ +//! Time-range extraction from WHERE filters on timestamp columns. +//! +//! Walks the `Filter` / `SqlExpr` tree looking for comparisons or BETWEEN on +//! recognized time fields (`ts`, `created_at`, anything ending in `_at`/`_time`/`_ts`), +//! returning `(min_ts_ms, max_ts_ms)` for the timeseries engine's block pruning. + +use nodedb_sql::types::{Filter, FilterExpr, SqlExpr, SqlValue}; + +/// Extract `(min_ts_ms, max_ts_ms)` time bounds from WHERE filters. +pub(crate) fn extract_time_range(filters: &[Filter]) -> (i64, i64) { + let mut min_ts = i64::MIN; + let mut max_ts = i64::MAX; + + for filter in filters { + extract_time_bounds_from_filter(&filter.expr, &mut min_ts, &mut max_ts); + } + + (min_ts, max_ts) +} + +fn extract_time_bounds_from_filter(expr: &FilterExpr, min_ts: &mut i64, max_ts: &mut i64) { + match expr { + FilterExpr::Comparison { field, op, value } if is_time_field(field) => { + if let Some(ms) = sql_value_to_timestamp_ms(value) { + match op { + nodedb_sql::types::CompareOp::Ge | nodedb_sql::types::CompareOp::Gt => { + if ms > *min_ts { + *min_ts = ms; + } + } + nodedb_sql::types::CompareOp::Le | nodedb_sql::types::CompareOp::Lt => { + if ms < *max_ts { + *max_ts = ms; + } + } + nodedb_sql::types::CompareOp::Eq => { + *min_ts = ms; + *max_ts = ms; + } + _ => {} + } + } + } + FilterExpr::Between { field, low, high } if is_time_field(field) => { + if let Some(lo) = sql_value_to_timestamp_ms(low) { + *min_ts = lo; + } + if let Some(hi) = sql_value_to_timestamp_ms(high) { + *max_ts = hi; + } + } + FilterExpr::And(children) => { + for child in children { + extract_time_bounds_from_filter(&child.expr, min_ts, max_ts); + } + } + FilterExpr::Expr(sql_expr) => { + extract_time_bounds_from_expr(sql_expr, min_ts, max_ts); + } + _ => {} + } +} + +fn extract_time_bounds_from_expr(expr: &SqlExpr, min_ts: &mut i64, max_ts: &mut i64) { + let SqlExpr::BinaryOp { left, op, right } = expr else { + return; + }; + match op { + nodedb_sql::types::BinaryOp::And => { + extract_time_bounds_from_expr(left, min_ts, max_ts); + extract_time_bounds_from_expr(right, min_ts, max_ts); + } + nodedb_sql::types::BinaryOp::Ge | nodedb_sql::types::BinaryOp::Gt => { + if let Some(field) = expr_column_name(left) + && is_time_field(&field) + && let Some(ms) = expr_to_timestamp_ms(right) + && ms > *min_ts + { + *min_ts = ms; + } + } + nodedb_sql::types::BinaryOp::Le | nodedb_sql::types::BinaryOp::Lt => { + if let Some(field) = expr_column_name(left) + && is_time_field(&field) + && let Some(ms) = expr_to_timestamp_ms(right) + && ms < *max_ts + { + *max_ts = ms; + } + } + _ => {} + } +} + +fn is_time_field(name: &str) -> bool { + let lower = name.to_lowercase(); + lower == "ts" + || lower == "timestamp" + || lower == "time" + || lower == "created_at" + || lower.ends_with("_at") + || lower.ends_with("_time") + || lower.ends_with("_ts") +} + +fn expr_column_name(expr: &SqlExpr) -> Option { + match expr { + SqlExpr::Column { name, .. } => Some(name.clone()), + _ => None, + } +} + +fn expr_to_timestamp_ms(expr: &SqlExpr) -> Option { + match expr { + SqlExpr::Literal(val) => sql_value_to_timestamp_ms(val), + _ => None, + } +} + +fn sql_value_to_timestamp_ms(val: &SqlValue) -> Option { + match val { + SqlValue::Int(ms) => Some(*ms), + SqlValue::String(s) => parse_timestamp_to_ms(s), + _ => None, + } +} + +fn parse_timestamp_to_ms(s: &str) -> Option { + if let Ok(dt) = chrono::NaiveDateTime::parse_from_str(s, "%Y-%m-%d %H:%M:%S") { + return Some(dt.and_utc().timestamp_millis()); + } + if let Ok(dt) = chrono::NaiveDateTime::parse_from_str(s, "%Y-%m-%dT%H:%M:%S") { + return Some(dt.and_utc().timestamp_millis()); + } + if let Ok(d) = chrono::NaiveDate::parse_from_str(s, "%Y-%m-%d") { + return Some(d.and_hms_opt(0, 0, 0)?.and_utc().timestamp_millis()); + } + s.parse::().ok() +} diff --git a/nodedb/src/control/security/predicate_eval.rs b/nodedb/src/control/security/predicate_eval.rs index 8cac2799..eb2d2613 100644 --- a/nodedb/src/control/security/predicate_eval.rs +++ b/nodedb/src/control/security/predicate_eval.rs @@ -30,6 +30,7 @@ pub fn substitute_to_scan_filters( op: "match_all".into(), value: nodedb_types::Value::Null, clauses: Vec::new(), + expr: None, }]), RlsPredicate::AlwaysFalse => { @@ -39,6 +40,7 @@ pub fn substitute_to_scan_filters( op: "is_not_null".into(), value: nodedb_types::Value::Null, clauses: Vec::new(), + expr: None, }]) } @@ -58,6 +60,7 @@ pub fn substitute_to_scan_filters( op: op.as_filter_op().into(), value: nodedb_types::Value::from(resolved), clauses: Vec::new(), + expr: None, }]) } @@ -91,6 +94,7 @@ pub fn substitute_to_scan_filters( op: "is_not_null".into(), value: nodedb_types::Value::Null, clauses: Vec::new(), + expr: None, }]); } @@ -103,6 +107,7 @@ pub fn substitute_to_scan_filters( op: "or".into(), value: nodedb_types::Value::Null, clauses: clause_groups, + expr: None, }]) } @@ -173,6 +178,7 @@ fn substitute_contains( op: "match_all".into(), value: nodedb_types::Value::Null, clauses: Vec::new(), + expr: None, }]) } else { Some(vec![ScanFilter { @@ -180,6 +186,7 @@ fn substitute_contains( op: "is_not_null".into(), value: nodedb_types::Value::Null, clauses: Vec::new(), + expr: None, }]) } } else { @@ -197,6 +204,7 @@ fn substitute_contains( op: "match_all".into(), value: nodedb_types::Value::Null, clauses: Vec::new(), + expr: None, }]) } else { Some(vec![ScanFilter { @@ -204,6 +212,7 @@ fn substitute_contains( op: "is_not_null".into(), value: nodedb_types::Value::Null, clauses: Vec::new(), + expr: None, }]) } } else { @@ -219,6 +228,7 @@ fn substitute_contains( op: "contains".into(), value: nodedb_types::Value::from(auth_val), clauses: Vec::new(), + expr: None, }]) } @@ -230,6 +240,7 @@ fn substitute_contains( op: "contains".into(), value: nodedb_types::Value::from(auth_val), clauses: Vec::new(), + expr: None, }]) } @@ -240,6 +251,7 @@ fn substitute_contains( op: "contains".into(), value: nodedb_types::Value::from(lit.clone()), clauses: Vec::new(), + expr: None, }]) } @@ -262,6 +274,7 @@ fn substitute_intersects( op: "any_in".into(), value: nodedb_types::Value::from(auth_val), clauses: Vec::new(), + expr: None, }]) } @@ -273,6 +286,7 @@ fn substitute_intersects( op: "any_in".into(), value: nodedb_types::Value::from(auth_val), clauses: Vec::new(), + expr: None, }]) } (PredicateValue::AuthFunc { .. }, PredicateValue::Field(doc_field)) => { @@ -282,6 +296,7 @@ fn substitute_intersects( op: "any_in".into(), value: nodedb_types::Value::from(auth_val), clauses: Vec::new(), + expr: None, }]) } @@ -302,6 +317,7 @@ fn substitute_intersects( op: "match_all".into(), value: nodedb_types::Value::Null, clauses: Vec::new(), + expr: None, }]) } else { Some(vec![ScanFilter { @@ -309,6 +325,7 @@ fn substitute_intersects( op: "is_not_null".into(), value: nodedb_types::Value::Null, clauses: Vec::new(), + expr: None, }]) } } @@ -332,6 +349,7 @@ fn substitute_intersects( op: "match_all".into(), value: nodedb_types::Value::Null, clauses: Vec::new(), + expr: None, }]) } else { Some(vec![ScanFilter { @@ -339,6 +357,7 @@ fn substitute_intersects( op: "is_not_null".into(), value: nodedb_types::Value::Null, clauses: Vec::new(), + expr: None, }]) } } diff --git a/nodedb/src/control/security/rls/eval.rs b/nodedb/src/control/security/rls/eval.rs index 3e55ec17..1538e4b1 100644 --- a/nodedb/src/control/security/rls/eval.rs +++ b/nodedb/src/control/security/rls/eval.rs @@ -294,6 +294,7 @@ mod tests { op: "eq".into(), value: nodedb_types::Value::String("active".into()), clauses: Vec::new(), + expr: None, }; let predicate = zerompk::to_msgpack_vec(&vec![filter]).unwrap(); let mut policy = make_policy("require_active", "orders", PolicyType::Write); diff --git a/nodedb/src/control/server/native/dispatch/plan_builder/document.rs b/nodedb/src/control/server/native/dispatch/plan_builder/document.rs index 7fa9ef04..7119ff0d 100644 --- a/nodedb/src/control/server/native/dispatch/plan_builder/document.rs +++ b/nodedb/src/control/server/native/dispatch/plan_builder/document.rs @@ -170,13 +170,20 @@ pub(crate) fn build_batch_insert( pub(crate) fn build_update(fields: &TextFields, collection: &str) -> crate::Result { let doc_id = require_doc_id(fields)?; - let updates = fields + let updates: Vec<(String, crate::bridge::physical_plan::UpdateValue)> = fields .updates .as_ref() .ok_or_else(|| crate::Error::BadRequest { detail: "missing 'updates'".to_string(), })? - .clone(); + .iter() + .map(|(f, b)| { + ( + f.clone(), + crate::bridge::physical_plan::UpdateValue::Literal(b.clone()), + ) + }) + .collect(); Ok(PhysicalPlan::Document(DocumentOp::PointUpdate { collection: collection.to_string(), document_id: doc_id, @@ -208,6 +215,9 @@ pub(crate) fn build_upsert(fields: &TextFields, collection: &str) -> crate::Resu collection: collection.to_string(), document_id: doc_id, value, + // The native text protocol carries no ON CONFLICT clause; plain + // merge semantics apply. + on_conflict_updates: Vec::new(), })) } @@ -222,13 +232,20 @@ pub(crate) fn build_bulk_update( detail: "missing 'filters'".to_string(), })? .clone(); - let updates = fields + let updates: Vec<(String, crate::bridge::physical_plan::UpdateValue)> = fields .updates .as_ref() .ok_or_else(|| crate::Error::BadRequest { detail: "missing 'updates'".to_string(), })? - .clone(); + .iter() + .map(|(f, b)| { + ( + f.clone(), + crate::bridge::physical_plan::UpdateValue::Literal(b.clone()), + ) + }) + .collect(); Ok(PhysicalPlan::Document(DocumentOp::BulkUpdate { collection: collection.to_string(), filters, diff --git a/nodedb/src/control/server/pgwire/ddl/collection/alter.rs b/nodedb/src/control/server/pgwire/ddl/collection/alter.rs deleted file mode 100644 index d68d6296..00000000 --- a/nodedb/src/control/server/pgwire/ddl/collection/alter.rs +++ /dev/null @@ -1,441 +0,0 @@ -//! ALTER TABLE and ALTER COLLECTION enforcement DDL. - -use pgwire::api::results::{Response, Tag}; -use pgwire::error::PgWireResult; - -use crate::control::security::audit::AuditEvent; -use crate::control::security::identity::AuthenticatedIdentity; -use crate::control::state::SharedState; - -use super::super::super::types::sqlstate_error; -use super::helpers::parse_origin_column_def; - -/// ALTER TABLE ADD [COLUMN] [NOT NULL] [DEFAULT ...] -pub async fn alter_table_add_column( - state: &SharedState, - identity: &AuthenticatedIdentity, - parts: &[&str], - sql: &str, -) -> PgWireResult> { - let table_name = parts - .get(2) - .ok_or_else(|| sqlstate_error("42601", "ALTER TABLE requires a table name"))? - .to_lowercase(); - let tenant_id = identity.tenant_id; - - // Find column def after ADD [COLUMN]. - let upper = sql.to_uppercase(); - let add_pos = upper - .find("ADD COLUMN ") - .map(|p| p + 11) - .or_else(|| upper.find("ADD ").map(|p| p + 4)) - .ok_or_else(|| sqlstate_error("42601", "expected ADD [COLUMN]"))?; - - let col_def_str = sql[add_pos..].trim(); - let column = parse_origin_column_def(col_def_str).map_err(|e| sqlstate_error("42601", &e))?; - let column_name = column.name.clone(); // Save before potential move. - - // Validate: new column must be nullable or have a default. - if !column.nullable && column.default.is_none() { - return Err(sqlstate_error( - "42601", - &format!( - "ALTER ADD COLUMN '{}': non-nullable column must have a DEFAULT", - column.name - ), - )); - } - - // Verify collection exists. - if let Some(catalog) = state.credentials.catalog() { - match catalog.get_collection(tenant_id.as_u32(), &table_name) { - Ok(Some(coll)) if coll.is_active => { - // Update the stored schema if it's a strict collection. - if coll.collection_type.is_strict() - && let Some(config_json) = &coll.timeseries_config - && let Ok(mut schema) = - sonic_rs::from_str::(config_json) - { - if schema.columns.iter().any(|c| c.name == column.name) { - return Err(sqlstate_error( - "42P07", - &format!("column '{}' already exists", column.name), - )); - } - schema.columns.push(column); - schema.version = schema.version.saturating_add(1); - - let mut updated = coll; - updated.collection_type = nodedb_types::CollectionType::strict(schema.clone()); - updated.timeseries_config = sonic_rs::to_string(&schema).ok(); - let entry = crate::control::catalog_entry::CatalogEntry::PutCollection( - Box::new(updated.clone()), - ); - let log_index = - crate::control::metadata_proposer::propose_catalog_entry(state, &entry) - .map_err(|e| sqlstate_error("XX000", &e.to_string()))?; - if log_index == 0 { - catalog - .put_collection(&updated) - .map_err(|e| sqlstate_error("XX000", &e.to_string()))?; - } - } - } - _ => { - return Err(sqlstate_error( - "42P01", - &format!("collection '{table_name}' does not exist"), - )); - } - } - } - - // Re-register the collection with the Data Plane to refresh the cached schema. - super::create::dispatch_register_if_needed(state, identity, parts, sql).await; - - state.audit_record( - AuditEvent::AdminAction, - Some(tenant_id), - &identity.username, - &format!("ALTER TABLE '{table_name}' ADD COLUMN '{column_name}'"), - ); - - Ok(vec![Response::Execution(Tag::new("ALTER TABLE"))]) -} - -/// Handle ALTER COLLECTION enforcement commands: SET RETENTION, SET/RELEASE LEGAL_HOLD, -/// SET APPEND_ONLY. -pub fn alter_collection_enforcement( - state: &SharedState, - identity: &AuthenticatedIdentity, - sql: &str, - kind: &str, -) -> PgWireResult> { - let tenant_id = identity.tenant_id.as_u32(); - let parts: Vec<&str> = sql.split_whitespace().collect(); - let upper = sql.to_uppercase(); - - let name = parts - .get(2) - .ok_or_else(|| sqlstate_error("42601", "missing collection name"))? - .to_lowercase(); - - let Some(catalog) = state.credentials.catalog() else { - return Err(sqlstate_error("XX000", "no catalog available")); - }; - - let mut coll = catalog - .get_collection(tenant_id, &name) - .map_err(|e| sqlstate_error("XX000", &e.to_string()))? - .ok_or_else(|| sqlstate_error("42P01", &format!("collection '{name}' not found")))?; - - match kind { - "retention" => { - // ALTER COLLECTION x SET RETENTION = '7 years' - let value = extract_set_value(&upper, "RETENTION") - .ok_or_else(|| sqlstate_error("42601", "SET RETENTION requires = 'duration'"))?; - - // Validate the retention period parses correctly. - crate::data::executor::enforcement::retention::parse_retention_period(&value) - .map_err(|e| sqlstate_error("22023", &e))?; - - coll.retention_period = Some(value); - } - "legal_hold" => { - if upper.contains("LEGAL_HOLD = TRUE") || upper.contains("LEGAL_HOLD=TRUE") { - // ALTER COLLECTION x SET LEGAL_HOLD = TRUE TAG 'case-001' - let tag = extract_tag_value(&upper).ok_or_else(|| { - sqlstate_error("42601", "SET LEGAL_HOLD = TRUE requires TAG 'name'") - })?; - - // Check for duplicate tag. - if coll.legal_holds.iter().any(|h| h.tag == tag) { - return Err(sqlstate_error( - "23505", - &format!("legal hold tag '{tag}' already exists on {name}"), - )); - } - - let now = std::time::SystemTime::now() - .duration_since(std::time::UNIX_EPOCH) - .unwrap_or_default() - .as_millis() as u64; - - coll.legal_holds - .push(crate::control::security::catalog::LegalHold { - tag, - created_at: now, - created_by: identity.username.clone(), - }); - } else if upper.contains("LEGAL_HOLD = FALSE") || upper.contains("LEGAL_HOLD=FALSE") { - // ALTER COLLECTION x SET LEGAL_HOLD = FALSE TAG 'case-001' - let tag = extract_tag_value(&upper).ok_or_else(|| { - sqlstate_error("42601", "SET LEGAL_HOLD = FALSE requires TAG 'name'") - })?; - - let before = coll.legal_holds.len(); - coll.legal_holds.retain(|h| h.tag != tag); - if coll.legal_holds.len() == before { - return Err(sqlstate_error( - "42704", - &format!("legal hold tag '{tag}' not found on {name}"), - )); - } - } else { - return Err(sqlstate_error( - "42601", - "ALTER COLLECTION SET LEGAL_HOLD requires = TRUE TAG 'name' or = FALSE TAG 'name'", - )); - } - } - "append_only" => { - // ALTER COLLECTION x SET APPEND_ONLY = TRUE (one-way flag). - if coll.append_only { - return Err(sqlstate_error( - "42710", - &format!("collection '{name}' is already append-only"), - )); - } - coll.append_only = true; - } - "last_value_cache" => { - // ALTER COLLECTION x SET LAST_VALUE_CACHE = TRUE | FALSE - if !coll.collection_type.is_timeseries() { - return Err(sqlstate_error( - "42809", - &format!("'{name}' is not a timeseries collection"), - )); - } - let val = extract_set_value(&upper, "LAST_VALUE_CACHE").ok_or_else(|| { - sqlstate_error("42601", "SET LAST_VALUE_CACHE requires = TRUE or = FALSE") - })?; - coll.lvc_enabled = val.eq_ignore_ascii_case("TRUE"); - } - _ => { - return Err(sqlstate_error( - "42601", - &format!("unknown ALTER COLLECTION enforcement kind: '{kind}'"), - )); - } - } - - // Enforcement alters (retention, legal_hold, append_only, LVC) - // ship the whole updated `StoredCollection` through the - // generic `CatalogEntry::PutCollection` pipe. - let entry = crate::control::catalog_entry::CatalogEntry::PutCollection(Box::new(coll.clone())); - let log_index = crate::control::metadata_proposer::propose_catalog_entry(state, &entry) - .map_err(|e| sqlstate_error("XX000", &e.to_string()))?; - if log_index == 0 { - catalog - .put_collection(&coll) - .map_err(|e| sqlstate_error("XX000", &e.to_string()))?; - } - - state.schema_version.bump(); - - Ok(vec![Response::Execution(Tag::new("ALTER COLLECTION"))]) -} - -/// Extract value from `SET KEY = 'value'` pattern. -fn extract_set_value(upper: &str, key: &str) -> Option { - let pattern = format!("{key} ="); - let pos = upper - .find(&pattern) - .or_else(|| upper.find(&format!("{key}=")))?; - let after = upper[pos..].split('=').nth(1)?.trim(); - let value = after.trim_start_matches('\'').trim_start_matches('"'); - let end = value - .find('\'') - .or_else(|| value.find('"')) - .unwrap_or(value.len()); - Some(value[..end].to_string()) -} - -/// Extract TAG value from `TAG 'name'` pattern. -fn extract_tag_value(upper: &str) -> Option { - let pos = upper.find("TAG ")?; - let after = upper[pos + 4..].trim(); - let value = after.trim_start_matches('\'').trim_start_matches('"'); - let end = value - .find('\'') - .or_else(|| value.find('"')) - .or_else(|| value.find(' ')) - .unwrap_or(value.len()); - if end == 0 { - return None; - } - Some(value[..end].to_string()) -} - -/// Handle `ALTER COLLECTION accounts ADD COLUMN balance DECIMAL DEFAULT 0 -/// AS MATERIALIZED_SUM SOURCE journal_entries ON journal_entries.account_id = accounts.id -/// VALUE journal_entries.signed_amount`. -pub fn add_materialized_sum( - state: &SharedState, - identity: &AuthenticatedIdentity, - sql: &str, -) -> PgWireResult> { - let tenant_id = identity.tenant_id.as_u32(); - let parts: Vec<&str> = sql.split_whitespace().collect(); - let upper = sql.to_uppercase(); - - // Target collection name. - let target_coll = parts - .get(2) - .ok_or_else(|| sqlstate_error("42601", "missing collection name"))? - .to_lowercase(); - - // Target column name: token after ADD COLUMN (or just ADD). - let col_idx = parts - .iter() - .position(|p| p.eq_ignore_ascii_case("COLUMN")) - .or_else(|| parts.iter().position(|p| p.eq_ignore_ascii_case("ADD"))) - .ok_or_else(|| sqlstate_error("42601", "missing ADD COLUMN"))?; - let target_column = parts - .get(col_idx + 1) - .ok_or_else(|| sqlstate_error("42601", "missing column name"))? - .to_lowercase(); - - // SOURCE - let source_idx = parts - .iter() - .position(|p| p.eq_ignore_ascii_case("SOURCE")) - .ok_or_else(|| sqlstate_error("42601", "MATERIALIZED_SUM requires SOURCE "))?; - let source_coll = parts - .get(source_idx + 1) - .ok_or_else(|| sqlstate_error("42601", "missing collection after SOURCE"))? - .to_lowercase(); - - // ON . = . — extract the join column from source side. - let on_idx = upper - .find(" ON ") - .ok_or_else(|| sqlstate_error("42601", "MATERIALIZED_SUM requires ON join_condition"))?; - let after_on = &sql[on_idx + 4..]; - let join_column = parse_join_column(after_on, &source_coll)?; - - // VALUE — extract the value expression. - let value_idx = upper - .find(" VALUE ") - .ok_or_else(|| sqlstate_error("42601", "MATERIALIZED_SUM requires VALUE expression"))?; - let value_expr_str = sql[value_idx + 7..].trim(); - let value_expr = parse_value_expression(value_expr_str, &source_coll)?; - - let def = crate::control::security::catalog::types::MaterializedSumDef { - target_collection: target_coll.clone(), - target_column: target_column.clone(), - source_collection: source_coll, - join_column, - value_expr, - }; - - // Store the definition on the TARGET collection. - let Some(catalog) = state.credentials.catalog() else { - return Err(sqlstate_error("XX000", "no catalog available")); - }; - - let mut coll = catalog - .get_collection(tenant_id, &target_coll) - .map_err(|e| sqlstate_error("XX000", &e.to_string()))? - .ok_or_else(|| sqlstate_error("42P01", &format!("collection '{target_coll}' not found")))?; - - // Check for duplicate column binding. - if coll - .materialized_sums - .iter() - .any(|m| m.target_column == target_column) - { - return Err(sqlstate_error( - "42710", - &format!("materialized sum already defined for column '{target_column}'"), - )); - } - - coll.materialized_sums.push(def); - let entry = crate::control::catalog_entry::CatalogEntry::PutCollection(Box::new(coll.clone())); - let log_index = crate::control::metadata_proposer::propose_catalog_entry(state, &entry) - .map_err(|e| sqlstate_error("XX000", &e.to_string()))?; - if log_index == 0 { - catalog - .put_collection(&coll) - .map_err(|e| sqlstate_error("XX000", &e.to_string()))?; - } - - state.schema_version.bump(); - - state.audit_record( - AuditEvent::ConfigChange, - Some(identity.tenant_id), - &identity.username, - &format!("ADD MATERIALIZED_SUM {target_column} on {target_coll}"), - ); - - Ok(vec![Response::Execution(Tag::new("ALTER COLLECTION"))]) -} - -/// Parse join column from `source.col = target.id` — returns `col` (the source side). -fn parse_join_column(join_clause: &str, source_coll: &str) -> PgWireResult { - let eq_parts: Vec<&str> = join_clause.splitn(2, '=').collect(); - if eq_parts.len() != 2 { - return Err(sqlstate_error("42601", "ON clause requires '=' join")); - } - - // Find the side that references the source collection. - let left = eq_parts[0].trim().to_lowercase(); - let right = eq_parts[1].trim().to_lowercase(); - - let prefix = format!("{}.", source_coll); - let col = if left.starts_with(&prefix) { - left.strip_prefix(&prefix).unwrap_or(&left).to_string() - } else if right.starts_with(&prefix) { - right.strip_prefix(&prefix).unwrap_or(&right).to_string() - } else { - // No table prefix — assume left is source column. - left.split('.').next_back().unwrap_or(&left).to_string() - }; - - // Clean up — remove anything after the column name (e.g. trailing keywords). - let col = col.split_whitespace().next().unwrap_or(&col).to_string(); - - Ok(col) -} - -/// Parse value expression — simple column reference or qualified `source.column`. -fn parse_value_expression( - expr_str: &str, - source_coll: &str, -) -> PgWireResult { - use crate::bridge::expr_eval::SqlExpr; - - let trimmed = expr_str.trim().trim_end_matches(';'); - let lower = trimmed.to_lowercase(); - - // Strip source collection prefix if present: `journal_entries.signed_amount` → `signed_amount`. - let prefix = format!("{}.", source_coll); - let col_name = if lower.starts_with(&prefix) { - lower.strip_prefix(&prefix).unwrap_or(&lower).to_string() - } else { - // Could be a bare column name or a CASE expression. - lower.to_string() - }; - - // For simple column references, return Column(name). - // For complex expressions (CASE WHEN ...), parse recursively. - if col_name.chars().all(|c| c.is_alphanumeric() || c == '_') { - Ok(SqlExpr::Column(col_name)) - } else { - // Complex expression — for now, treat the whole thing as a column reference - // to the first word (simple heuristic). Full CASE parsing would require - // the DDL constraint parser infrastructure. - let first_word = col_name - .split_whitespace() - .next() - .unwrap_or(&col_name) - .to_string(); - Err(sqlstate_error( - "0A000", - &format!( - "complex VALUE expressions not yet supported; use a pre-computed column. Got: '{first_word}...'" - ), - )) - } -} diff --git a/nodedb/src/control/server/pgwire/ddl/collection/alter/add_column.rs b/nodedb/src/control/server/pgwire/ddl/collection/alter/add_column.rs new file mode 100644 index 00000000..fe4245ee --- /dev/null +++ b/nodedb/src/control/server/pgwire/ddl/collection/alter/add_column.rs @@ -0,0 +1,103 @@ +//! `ALTER {TABLE,COLLECTION} ADD [COLUMN] ` — append a column +//! to a strict-document / columnar collection's schema. + +use pgwire::api::results::{Response, Tag}; +use pgwire::error::PgWireResult; + +use crate::control::security::audit::AuditEvent; +use crate::control::security::identity::AuthenticatedIdentity; +use crate::control::state::SharedState; + +use super::super::super::super::types::sqlstate_error; +use super::super::helpers::parse_origin_column_def; + +/// ALTER TABLE ADD [COLUMN] [NOT NULL] [DEFAULT ...] +pub async fn alter_table_add_column( + state: &SharedState, + identity: &AuthenticatedIdentity, + parts: &[&str], + sql: &str, +) -> PgWireResult> { + let table_name = parts + .get(2) + .ok_or_else(|| sqlstate_error("42601", "ALTER TABLE requires a table name"))? + .to_lowercase(); + let tenant_id = identity.tenant_id; + + // Find column def after ADD [COLUMN]. + let upper = sql.to_uppercase(); + let add_pos = upper + .find("ADD COLUMN ") + .map(|p| p + 11) + .or_else(|| upper.find("ADD ").map(|p| p + 4)) + .ok_or_else(|| sqlstate_error("42601", "expected ADD [COLUMN]"))?; + + let col_def_str = sql[add_pos..].trim(); + let column = parse_origin_column_def(col_def_str).map_err(|e| sqlstate_error("42601", &e))?; + let column_name = column.name.clone(); + + // Validate: new column must be nullable or have a default. + if !column.nullable && column.default.is_none() { + return Err(sqlstate_error( + "42601", + &format!( + "ALTER ADD COLUMN '{}': non-nullable column must have a DEFAULT", + column.name + ), + )); + } + + // Verify collection exists. + if let Some(catalog) = state.credentials.catalog() { + match catalog.get_collection(tenant_id.as_u32(), &table_name) { + Ok(Some(coll)) if coll.is_active => { + if coll.collection_type.is_strict() + && let Some(config_json) = &coll.timeseries_config + && let Ok(mut schema) = + sonic_rs::from_str::(config_json) + { + if schema.columns.iter().any(|c| c.name == column.name) { + return Err(sqlstate_error( + "42P07", + &format!("column '{}' already exists", column.name), + )); + } + schema.columns.push(column); + schema.version = schema.version.saturating_add(1); + + let mut updated = coll; + updated.collection_type = nodedb_types::CollectionType::strict(schema.clone()); + updated.timeseries_config = sonic_rs::to_string(&schema).ok(); + let entry = crate::control::catalog_entry::CatalogEntry::PutCollection( + Box::new(updated.clone()), + ); + let log_index = + crate::control::metadata_proposer::propose_catalog_entry(state, &entry) + .map_err(|e| sqlstate_error("XX000", &e.to_string()))?; + if log_index == 0 { + catalog + .put_collection(&updated) + .map_err(|e| sqlstate_error("XX000", &e.to_string()))?; + } + } + } + _ => { + return Err(sqlstate_error( + "42P01", + &format!("collection '{table_name}' does not exist"), + )); + } + } + } + + super::super::create::dispatch_register_if_needed(state, identity, parts, sql).await; + + state.audit_record( + AuditEvent::AdminAction, + Some(tenant_id), + &identity.username, + &format!("ALTER TABLE '{table_name}' ADD COLUMN '{column_name}'"), + ); + + Ok(vec![Response::Execution(Tag::new("ALTER TABLE"))]) +} diff --git a/nodedb/src/control/server/pgwire/ddl/collection/alter/alter_type.rs b/nodedb/src/control/server/pgwire/ddl/collection/alter/alter_type.rs new file mode 100644 index 00000000..50dfdea7 --- /dev/null +++ b/nodedb/src/control/server/pgwire/ddl/collection/alter/alter_type.rs @@ -0,0 +1,127 @@ +//! `ALTER COLLECTION ALTER COLUMN TYPE ` — change a +//! column's declared type in a strict-document collection's schema. +//! +//! The current implementation only accepts type changes that map to the +//! same underlying `ColumnType` discriminant as the existing column — +//! equivalently, no-op aliases like INT → BIGINT (both map to `Int64`). +//! Widening across discriminants would require a full online rewrite and +//! is tracked as a separate enhancement. + +use std::str::FromStr; + +use pgwire::api::results::{Response, Tag}; +use pgwire::error::PgWireResult; + +use crate::control::security::audit::AuditEvent; +use crate::control::security::identity::AuthenticatedIdentity; +use crate::control::state::SharedState; + +use super::super::super::super::types::sqlstate_error; + +pub async fn alter_collection_alter_column_type( + state: &SharedState, + identity: &AuthenticatedIdentity, + parts: &[&str], + sql: &str, +) -> PgWireResult> { + let name = parts + .get(2) + .ok_or_else(|| sqlstate_error("42601", "ALTER COLLECTION requires a name"))? + .to_lowercase(); + let tenant_id = identity.tenant_id; + + // Expect: ALTER COLLECTION ALTER COLUMN TYPE . + let col_idx = parts + .iter() + .position(|p| p.eq_ignore_ascii_case("COLUMN")) + .ok_or_else(|| sqlstate_error("42601", "expected ALTER COLUMN TYPE "))?; + let column_name = parts + .get(col_idx + 1) + .ok_or_else(|| sqlstate_error("42601", "missing column name"))? + .to_lowercase(); + match parts.get(col_idx + 2) { + Some(tok) if tok.eq_ignore_ascii_case("TYPE") => {} + _ => return Err(sqlstate_error("42601", "expected TYPE keyword")), + } + let new_type_str = parts + .get(col_idx + 3) + .ok_or_else(|| sqlstate_error("42601", "missing new type"))? + .trim_end_matches(';'); + + let new_type = nodedb_types::columnar::ColumnType::from_str(new_type_str) + .map_err(|e| sqlstate_error("42601", &format!("invalid type '{new_type_str}': {e}")))?; + + let Some(catalog) = state.credentials.catalog() else { + return Err(sqlstate_error("XX000", "no catalog available")); + }; + + let coll = catalog + .get_collection(tenant_id.as_u32(), &name) + .map_err(|e| sqlstate_error("XX000", &e.to_string()))? + .filter(|c| c.is_active) + .ok_or_else(|| sqlstate_error("42P01", &format!("collection '{name}' does not exist")))?; + + if !coll.collection_type.is_strict() { + return Err(sqlstate_error( + "0A000", + "ALTER COLUMN TYPE is only supported on strict document collections", + )); + } + + let mut schema: nodedb_types::columnar::StrictSchema = coll + .timeseries_config + .as_deref() + .and_then(|s| sonic_rs::from_str(s).ok()) + .ok_or_else(|| sqlstate_error("XX000", "strict schema missing or malformed"))?; + + let col = schema + .columns + .iter_mut() + .find(|c| c.name.eq_ignore_ascii_case(&column_name)) + .ok_or_else(|| { + sqlstate_error( + "42703", + &format!("column '{column_name}' does not exist on '{name}'"), + ) + })?; + + // Reject a true type change that would require re-encoding existing rows. + if std::mem::discriminant(&col.column_type) != std::mem::discriminant(&new_type) { + return Err(sqlstate_error( + "0A000", + &format!( + "cross-type change from {:?} to {:?} requires an online rewrite; \ + only alias type changes (e.g. INT ↔ BIGINT) are supported today", + col.column_type, new_type + ), + )); + } + col.column_type = new_type; + schema.version = schema.version.saturating_add(1); + + let mut updated = coll; + updated.collection_type = nodedb_types::CollectionType::strict(schema.clone()); + updated.timeseries_config = sonic_rs::to_string(&schema).ok(); + + let entry = + crate::control::catalog_entry::CatalogEntry::PutCollection(Box::new(updated.clone())); + let log_index = crate::control::metadata_proposer::propose_catalog_entry(state, &entry) + .map_err(|e| sqlstate_error("XX000", &e.to_string()))?; + if log_index == 0 { + catalog + .put_collection(&updated) + .map_err(|e| sqlstate_error("XX000", &e.to_string()))?; + } + + super::super::create::dispatch_register_if_needed(state, identity, parts, sql).await; + state.schema_version.bump(); + + state.audit_record( + AuditEvent::AdminAction, + Some(tenant_id), + &identity.username, + &format!("ALTER COLLECTION '{name}' ALTER COLUMN '{column_name}' TYPE {new_type_str}"), + ); + + Ok(vec![Response::Execution(Tag::new("ALTER COLLECTION"))]) +} diff --git a/nodedb/src/control/server/pgwire/ddl/collection/alter/drop_column.rs b/nodedb/src/control/server/pgwire/ddl/collection/alter/drop_column.rs new file mode 100644 index 00000000..6415a64e --- /dev/null +++ b/nodedb/src/control/server/pgwire/ddl/collection/alter/drop_column.rs @@ -0,0 +1,110 @@ +//! `ALTER COLLECTION DROP COLUMN ` — remove a column from a +//! strict-document collection's schema. +//! +//! Current scope: strict collections only. The column is removed from the +//! schema metadata and the schema version is bumped; existing rows are not +//! re-encoded, so any row written before the drop retains the column's +//! physical bytes. Reads of those rows will see trailing bytes as extra +//! tuple elements — acceptable for the "fix after drop, new writes work" +//! workflow. A full online rewrite is tracked as a separate enhancement. + +use pgwire::api::results::{Response, Tag}; +use pgwire::error::PgWireResult; + +use crate::control::security::audit::AuditEvent; +use crate::control::security::identity::AuthenticatedIdentity; +use crate::control::state::SharedState; + +use super::super::super::super::types::sqlstate_error; + +pub async fn alter_collection_drop_column( + state: &SharedState, + identity: &AuthenticatedIdentity, + parts: &[&str], + sql: &str, +) -> PgWireResult> { + let name = parts + .get(2) + .ok_or_else(|| sqlstate_error("42601", "ALTER COLLECTION requires a name"))? + .to_lowercase(); + let tenant_id = identity.tenant_id; + + // Locate "DROP COLUMN " — `DROP` then `COLUMN` then the name. + let column_name = parts + .iter() + .position(|p| p.eq_ignore_ascii_case("COLUMN")) + .and_then(|i| parts.get(i + 1)) + .ok_or_else(|| sqlstate_error("42601", "expected DROP COLUMN "))? + .trim_end_matches(';') + .to_lowercase(); + + let Some(catalog) = state.credentials.catalog() else { + return Err(sqlstate_error("XX000", "no catalog available")); + }; + + let coll = catalog + .get_collection(tenant_id.as_u32(), &name) + .map_err(|e| sqlstate_error("XX000", &e.to_string()))? + .filter(|c| c.is_active) + .ok_or_else(|| sqlstate_error("42P01", &format!("collection '{name}' does not exist")))?; + + if !coll.collection_type.is_strict() { + return Err(sqlstate_error( + "0A000", + "DROP COLUMN is only supported on strict document collections", + )); + } + + let mut schema: nodedb_types::columnar::StrictSchema = coll + .timeseries_config + .as_deref() + .and_then(|s| sonic_rs::from_str(s).ok()) + .ok_or_else(|| sqlstate_error("XX000", "strict schema missing or malformed"))?; + + let idx = schema + .columns + .iter() + .position(|c| c.name.eq_ignore_ascii_case(&column_name)) + .ok_or_else(|| { + sqlstate_error( + "42703", + &format!("column '{column_name}' does not exist on '{name}'"), + ) + })?; + + if schema.columns[idx].primary_key { + return Err(sqlstate_error( + "42601", + &format!("cannot drop primary key column '{column_name}'"), + )); + } + + schema.columns.remove(idx); + schema.version = schema.version.saturating_add(1); + + let mut updated = coll; + updated.collection_type = nodedb_types::CollectionType::strict(schema.clone()); + updated.timeseries_config = sonic_rs::to_string(&schema).ok(); + + let entry = + crate::control::catalog_entry::CatalogEntry::PutCollection(Box::new(updated.clone())); + let log_index = crate::control::metadata_proposer::propose_catalog_entry(state, &entry) + .map_err(|e| sqlstate_error("XX000", &e.to_string()))?; + if log_index == 0 { + catalog + .put_collection(&updated) + .map_err(|e| sqlstate_error("XX000", &e.to_string()))?; + } + + super::super::create::dispatch_register_if_needed(state, identity, parts, sql).await; + state.schema_version.bump(); + + state.audit_record( + AuditEvent::AdminAction, + Some(tenant_id), + &identity.username, + &format!("ALTER COLLECTION '{name}' DROP COLUMN '{column_name}'"), + ); + + Ok(vec![Response::Execution(Tag::new("ALTER COLLECTION"))]) +} diff --git a/nodedb/src/control/server/pgwire/ddl/collection/alter/enforcement.rs b/nodedb/src/control/server/pgwire/ddl/collection/alter/enforcement.rs new file mode 100644 index 00000000..cbe6811b --- /dev/null +++ b/nodedb/src/control/server/pgwire/ddl/collection/alter/enforcement.rs @@ -0,0 +1,164 @@ +//! `ALTER COLLECTION ... SET {RETENTION,LEGAL_HOLD,APPEND_ONLY,LAST_VALUE_CACHE}` +//! — non-schema enforcement knobs propagated through `CatalogEntry::PutCollection`. + +use pgwire::api::results::{Response, Tag}; +use pgwire::error::PgWireResult; + +use crate::control::security::identity::AuthenticatedIdentity; +use crate::control::state::SharedState; + +use super::super::super::super::types::sqlstate_error; + +/// Handle ALTER COLLECTION enforcement commands: SET RETENTION, SET/RELEASE LEGAL_HOLD, +/// SET APPEND_ONLY, SET LAST_VALUE_CACHE. +pub fn alter_collection_enforcement( + state: &SharedState, + identity: &AuthenticatedIdentity, + sql: &str, + kind: &str, +) -> PgWireResult> { + let tenant_id = identity.tenant_id.as_u32(); + let parts: Vec<&str> = sql.split_whitespace().collect(); + let upper = sql.to_uppercase(); + + let name = parts + .get(2) + .ok_or_else(|| sqlstate_error("42601", "missing collection name"))? + .to_lowercase(); + + let Some(catalog) = state.credentials.catalog() else { + return Err(sqlstate_error("XX000", "no catalog available")); + }; + + let mut coll = catalog + .get_collection(tenant_id, &name) + .map_err(|e| sqlstate_error("XX000", &e.to_string()))? + .ok_or_else(|| sqlstate_error("42P01", &format!("collection '{name}' not found")))?; + + match kind { + "retention" => { + let value = extract_set_value(&upper, "RETENTION") + .ok_or_else(|| sqlstate_error("42601", "SET RETENTION requires = 'duration'"))?; + + crate::data::executor::enforcement::retention::parse_retention_period(&value) + .map_err(|e| sqlstate_error("22023", &e))?; + + coll.retention_period = Some(value); + } + "legal_hold" => { + if upper.contains("LEGAL_HOLD = TRUE") || upper.contains("LEGAL_HOLD=TRUE") { + let tag = extract_tag_value(&upper).ok_or_else(|| { + sqlstate_error("42601", "SET LEGAL_HOLD = TRUE requires TAG 'name'") + })?; + + if coll.legal_holds.iter().any(|h| h.tag == tag) { + return Err(sqlstate_error( + "23505", + &format!("legal hold tag '{tag}' already exists on {name}"), + )); + } + + let now = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .unwrap_or_default() + .as_millis() as u64; + + coll.legal_holds + .push(crate::control::security::catalog::LegalHold { + tag, + created_at: now, + created_by: identity.username.clone(), + }); + } else if upper.contains("LEGAL_HOLD = FALSE") || upper.contains("LEGAL_HOLD=FALSE") { + let tag = extract_tag_value(&upper).ok_or_else(|| { + sqlstate_error("42601", "SET LEGAL_HOLD = FALSE requires TAG 'name'") + })?; + + let before = coll.legal_holds.len(); + coll.legal_holds.retain(|h| h.tag != tag); + if coll.legal_holds.len() == before { + return Err(sqlstate_error( + "42704", + &format!("legal hold tag '{tag}' not found on {name}"), + )); + } + } else { + return Err(sqlstate_error( + "42601", + "ALTER COLLECTION SET LEGAL_HOLD requires = TRUE TAG 'name' or = FALSE TAG 'name'", + )); + } + } + "append_only" => { + if coll.append_only { + return Err(sqlstate_error( + "42710", + &format!("collection '{name}' is already append-only"), + )); + } + coll.append_only = true; + } + "last_value_cache" => { + if !coll.collection_type.is_timeseries() { + return Err(sqlstate_error( + "42809", + &format!("'{name}' is not a timeseries collection"), + )); + } + let val = extract_set_value(&upper, "LAST_VALUE_CACHE").ok_or_else(|| { + sqlstate_error("42601", "SET LAST_VALUE_CACHE requires = TRUE or = FALSE") + })?; + coll.lvc_enabled = val.eq_ignore_ascii_case("TRUE"); + } + _ => { + return Err(sqlstate_error( + "42601", + &format!("unknown ALTER COLLECTION enforcement kind: '{kind}'"), + )); + } + } + + let entry = crate::control::catalog_entry::CatalogEntry::PutCollection(Box::new(coll.clone())); + let log_index = crate::control::metadata_proposer::propose_catalog_entry(state, &entry) + .map_err(|e| sqlstate_error("XX000", &e.to_string()))?; + if log_index == 0 { + catalog + .put_collection(&coll) + .map_err(|e| sqlstate_error("XX000", &e.to_string()))?; + } + + state.schema_version.bump(); + + Ok(vec![Response::Execution(Tag::new("ALTER COLLECTION"))]) +} + +/// Extract value from `SET KEY = 'value'` pattern. +fn extract_set_value(upper: &str, key: &str) -> Option { + let pattern = format!("{key} ="); + let pos = upper + .find(&pattern) + .or_else(|| upper.find(&format!("{key}=")))?; + let after = upper[pos..].split('=').nth(1)?.trim(); + let value = after.trim_start_matches('\'').trim_start_matches('"'); + let end = value + .find('\'') + .or_else(|| value.find('"')) + .unwrap_or(value.len()); + Some(value[..end].to_string()) +} + +/// Extract TAG value from `TAG 'name'` pattern. +fn extract_tag_value(upper: &str) -> Option { + let pos = upper.find("TAG ")?; + let after = upper[pos + 4..].trim(); + let value = after.trim_start_matches('\'').trim_start_matches('"'); + let end = value + .find('\'') + .or_else(|| value.find('"')) + .or_else(|| value.find(' ')) + .unwrap_or(value.len()); + if end == 0 { + return None; + } + Some(value[..end].to_string()) +} diff --git a/nodedb/src/control/server/pgwire/ddl/collection/alter/materialized_sum.rs b/nodedb/src/control/server/pgwire/ddl/collection/alter/materialized_sum.rs new file mode 100644 index 00000000..29e40e2d --- /dev/null +++ b/nodedb/src/control/server/pgwire/ddl/collection/alter/materialized_sum.rs @@ -0,0 +1,165 @@ +//! `ALTER COLLECTION accounts ADD COLUMN balance DECIMAL DEFAULT 0 AS MATERIALIZED_SUM ...` +//! — ADD COLUMN variant that binds a computed balance to another collection's +//! per-row contribution. Atomically maintained on INSERT into the source side. + +use pgwire::api::results::{Response, Tag}; +use pgwire::error::PgWireResult; + +use crate::control::security::audit::AuditEvent; +use crate::control::security::identity::AuthenticatedIdentity; +use crate::control::state::SharedState; + +use super::super::super::super::types::sqlstate_error; + +pub fn add_materialized_sum( + state: &SharedState, + identity: &AuthenticatedIdentity, + sql: &str, +) -> PgWireResult> { + let tenant_id = identity.tenant_id.as_u32(); + let parts: Vec<&str> = sql.split_whitespace().collect(); + let upper = sql.to_uppercase(); + + let target_coll = parts + .get(2) + .ok_or_else(|| sqlstate_error("42601", "missing collection name"))? + .to_lowercase(); + + let col_idx = parts + .iter() + .position(|p| p.eq_ignore_ascii_case("COLUMN")) + .or_else(|| parts.iter().position(|p| p.eq_ignore_ascii_case("ADD"))) + .ok_or_else(|| sqlstate_error("42601", "missing ADD COLUMN"))?; + let target_column = parts + .get(col_idx + 1) + .ok_or_else(|| sqlstate_error("42601", "missing column name"))? + .to_lowercase(); + + let source_idx = parts + .iter() + .position(|p| p.eq_ignore_ascii_case("SOURCE")) + .ok_or_else(|| sqlstate_error("42601", "MATERIALIZED_SUM requires SOURCE "))?; + let source_coll = parts + .get(source_idx + 1) + .ok_or_else(|| sqlstate_error("42601", "missing collection after SOURCE"))? + .to_lowercase(); + + let on_idx = upper + .find(" ON ") + .ok_or_else(|| sqlstate_error("42601", "MATERIALIZED_SUM requires ON join_condition"))?; + let after_on = &sql[on_idx + 4..]; + let join_column = parse_join_column(after_on, &source_coll)?; + + let value_idx = upper + .find(" VALUE ") + .ok_or_else(|| sqlstate_error("42601", "MATERIALIZED_SUM requires VALUE expression"))?; + let value_expr_str = sql[value_idx + 7..].trim(); + let value_expr = parse_value_expression(value_expr_str, &source_coll)?; + + let def = crate::control::security::catalog::types::MaterializedSumDef { + target_collection: target_coll.clone(), + target_column: target_column.clone(), + source_collection: source_coll, + join_column, + value_expr, + }; + + let Some(catalog) = state.credentials.catalog() else { + return Err(sqlstate_error("XX000", "no catalog available")); + }; + + let mut coll = catalog + .get_collection(tenant_id, &target_coll) + .map_err(|e| sqlstate_error("XX000", &e.to_string()))? + .ok_or_else(|| sqlstate_error("42P01", &format!("collection '{target_coll}' not found")))?; + + if coll + .materialized_sums + .iter() + .any(|m| m.target_column == target_column) + { + return Err(sqlstate_error( + "42710", + &format!("materialized sum already defined for column '{target_column}'"), + )); + } + + coll.materialized_sums.push(def); + let entry = crate::control::catalog_entry::CatalogEntry::PutCollection(Box::new(coll.clone())); + let log_index = crate::control::metadata_proposer::propose_catalog_entry(state, &entry) + .map_err(|e| sqlstate_error("XX000", &e.to_string()))?; + if log_index == 0 { + catalog + .put_collection(&coll) + .map_err(|e| sqlstate_error("XX000", &e.to_string()))?; + } + + state.schema_version.bump(); + + state.audit_record( + AuditEvent::ConfigChange, + Some(identity.tenant_id), + &identity.username, + &format!("ADD MATERIALIZED_SUM {target_column} on {target_coll}"), + ); + + Ok(vec![Response::Execution(Tag::new("ALTER COLLECTION"))]) +} + +/// Parse join column from `source.col = target.id` — returns `col` (the source side). +fn parse_join_column(join_clause: &str, source_coll: &str) -> PgWireResult { + let eq_parts: Vec<&str> = join_clause.splitn(2, '=').collect(); + if eq_parts.len() != 2 { + return Err(sqlstate_error("42601", "ON clause requires '=' join")); + } + + let left = eq_parts[0].trim().to_lowercase(); + let right = eq_parts[1].trim().to_lowercase(); + + let prefix = format!("{}.", source_coll); + let col = if left.starts_with(&prefix) { + left.strip_prefix(&prefix).unwrap_or(&left).to_string() + } else if right.starts_with(&prefix) { + right.strip_prefix(&prefix).unwrap_or(&right).to_string() + } else { + left.split('.').next_back().unwrap_or(&left).to_string() + }; + + let col = col.split_whitespace().next().unwrap_or(&col).to_string(); + + Ok(col) +} + +/// Parse value expression — simple column reference or qualified `source.column`. +fn parse_value_expression( + expr_str: &str, + source_coll: &str, +) -> PgWireResult { + use crate::bridge::expr_eval::SqlExpr; + + let trimmed = expr_str.trim().trim_end_matches(';'); + let lower = trimmed.to_lowercase(); + + let prefix = format!("{}.", source_coll); + let col_name = if lower.starts_with(&prefix) { + lower.strip_prefix(&prefix).unwrap_or(&lower).to_string() + } else { + lower.to_string() + }; + + if col_name.chars().all(|c| c.is_alphanumeric() || c == '_') { + Ok(SqlExpr::Column(col_name)) + } else { + let first_word = col_name + .split_whitespace() + .next() + .unwrap_or(&col_name) + .to_string(); + Err(sqlstate_error( + "0A000", + &format!( + "complex VALUE expressions not yet supported; use a pre-computed column. Got: '{first_word}...'" + ), + )) + } +} diff --git a/nodedb/src/control/server/pgwire/ddl/collection/alter/mod.rs b/nodedb/src/control/server/pgwire/ddl/collection/alter/mod.rs new file mode 100644 index 00000000..e342e5e8 --- /dev/null +++ b/nodedb/src/control/server/pgwire/ddl/collection/alter/mod.rs @@ -0,0 +1,21 @@ +//! ALTER TABLE / ALTER COLLECTION DDL handlers. +//! +//! Each column operation (`ADD COLUMN`, `DROP COLUMN`, `RENAME COLUMN`, +//! `ALTER COLUMN ... TYPE ...`) lives in its own file and shares the +//! catalog mutation pattern: fetch `StoredCollection`, mutate the +//! `StrictSchema`, bump version, propose through Raft, refresh the +//! Data Plane cache. + +pub mod add_column; +pub mod alter_type; +pub mod drop_column; +pub mod enforcement; +pub mod materialized_sum; +pub mod rename_column; + +pub use add_column::alter_table_add_column; +pub use alter_type::alter_collection_alter_column_type; +pub use drop_column::alter_collection_drop_column; +pub use enforcement::alter_collection_enforcement; +pub use materialized_sum::add_materialized_sum; +pub use rename_column::alter_collection_rename_column; diff --git a/nodedb/src/control/server/pgwire/ddl/collection/alter/rename_column.rs b/nodedb/src/control/server/pgwire/ddl/collection/alter/rename_column.rs new file mode 100644 index 00000000..9c15499e --- /dev/null +++ b/nodedb/src/control/server/pgwire/ddl/collection/alter/rename_column.rs @@ -0,0 +1,126 @@ +//! `ALTER COLLECTION RENAME COLUMN TO ` — rename a +//! column in a strict-document collection's schema. +//! +//! Binary-tuple layout is positional, so a rename is pure metadata: no row +//! re-encoding is required. The schema version is bumped so the Data Plane +//! picks up the new name on the next register dispatch. + +use pgwire::api::results::{Response, Tag}; +use pgwire::error::PgWireResult; + +use crate::control::security::audit::AuditEvent; +use crate::control::security::identity::AuthenticatedIdentity; +use crate::control::state::SharedState; + +use super::super::super::super::types::sqlstate_error; + +pub async fn alter_collection_rename_column( + state: &SharedState, + identity: &AuthenticatedIdentity, + parts: &[&str], + sql: &str, +) -> PgWireResult> { + let name = parts + .get(2) + .ok_or_else(|| sqlstate_error("42601", "ALTER COLLECTION requires a name"))? + .to_lowercase(); + let tenant_id = identity.tenant_id; + + // Expect: ALTER COLLECTION RENAME COLUMN TO . + let col_idx = parts + .iter() + .position(|p| p.eq_ignore_ascii_case("COLUMN")) + .ok_or_else(|| sqlstate_error("42601", "expected RENAME COLUMN TO "))?; + let old_name = parts + .get(col_idx + 1) + .ok_or_else(|| sqlstate_error("42601", "missing old column name"))? + .to_lowercase(); + // TO keyword. + match parts.get(col_idx + 2) { + Some(tok) if tok.eq_ignore_ascii_case("TO") => {} + _ => { + return Err(sqlstate_error( + "42601", + "expected TO between old and new name", + )); + } + } + let new_name = parts + .get(col_idx + 3) + .ok_or_else(|| sqlstate_error("42601", "missing new column name"))? + .trim_end_matches(';') + .to_lowercase(); + + let Some(catalog) = state.credentials.catalog() else { + return Err(sqlstate_error("XX000", "no catalog available")); + }; + + let coll = catalog + .get_collection(tenant_id.as_u32(), &name) + .map_err(|e| sqlstate_error("XX000", &e.to_string()))? + .filter(|c| c.is_active) + .ok_or_else(|| sqlstate_error("42P01", &format!("collection '{name}' does not exist")))?; + + if !coll.collection_type.is_strict() { + return Err(sqlstate_error( + "0A000", + "RENAME COLUMN is only supported on strict document collections", + )); + } + + let mut schema: nodedb_types::columnar::StrictSchema = coll + .timeseries_config + .as_deref() + .and_then(|s| sonic_rs::from_str(s).ok()) + .ok_or_else(|| sqlstate_error("XX000", "strict schema missing or malformed"))?; + + if schema + .columns + .iter() + .any(|c| c.name.eq_ignore_ascii_case(&new_name)) + { + return Err(sqlstate_error( + "42P07", + &format!("column '{new_name}' already exists on '{name}'"), + )); + } + + let col = schema + .columns + .iter_mut() + .find(|c| c.name.eq_ignore_ascii_case(&old_name)) + .ok_or_else(|| { + sqlstate_error( + "42703", + &format!("column '{old_name}' does not exist on '{name}'"), + ) + })?; + col.name = new_name.clone(); + schema.version = schema.version.saturating_add(1); + + let mut updated = coll; + updated.collection_type = nodedb_types::CollectionType::strict(schema.clone()); + updated.timeseries_config = sonic_rs::to_string(&schema).ok(); + + let entry = + crate::control::catalog_entry::CatalogEntry::PutCollection(Box::new(updated.clone())); + let log_index = crate::control::metadata_proposer::propose_catalog_entry(state, &entry) + .map_err(|e| sqlstate_error("XX000", &e.to_string()))?; + if log_index == 0 { + catalog + .put_collection(&updated) + .map_err(|e| sqlstate_error("XX000", &e.to_string()))?; + } + + super::super::create::dispatch_register_if_needed(state, identity, parts, sql).await; + state.schema_version.bump(); + + state.audit_record( + AuditEvent::AdminAction, + Some(tenant_id), + &identity.username, + &format!("ALTER COLLECTION '{name}' RENAME COLUMN '{old_name}' TO '{new_name}'"), + ); + + Ok(vec![Response::Execution(Tag::new("ALTER COLLECTION"))]) +} diff --git a/nodedb/src/control/server/pgwire/ddl/collection/mod.rs b/nodedb/src/control/server/pgwire/ddl/collection/mod.rs index bd4fffcf..0fc0a286 100644 --- a/nodedb/src/control/server/pgwire/ddl/collection/mod.rs +++ b/nodedb/src/control/server/pgwire/ddl/collection/mod.rs @@ -14,7 +14,10 @@ pub mod upsert; pub mod vector_metadata; // Re-export all public functions so existing callers via `super::collection::*` continue to work. -pub use alter::{alter_collection_enforcement, alter_table_add_column}; +pub use alter::{ + alter_collection_alter_column_type, alter_collection_drop_column, alter_collection_enforcement, + alter_collection_rename_column, alter_table_add_column, +}; pub use create::{create_collection, dispatch_register_if_needed}; pub use describe::{describe_collection, show_collections}; pub use drop::drop_collection; diff --git a/nodedb/src/control/server/pgwire/ddl/rls/parse.rs b/nodedb/src/control/server/pgwire/ddl/rls/parse.rs index f8f5745d..ac7bcbfc 100644 --- a/nodedb/src/control/server/pgwire/ddl/rls/parse.rs +++ b/nodedb/src/control/server/pgwire/ddl/rls/parse.rs @@ -121,6 +121,7 @@ pub fn parse_create_rls_policy( op: op.into(), value: nodedb_types::Value::String(value_str), clauses: Vec::new(), + expr: None, }; let predicate = zerompk::to_msgpack_vec(&vec![filter]) .map_err(|e| sqlstate_error("XX000", &e.to_string()))?; diff --git a/nodedb/src/control/server/pgwire/ddl/router/schema.rs b/nodedb/src/control/server/pgwire/ddl/router/schema.rs index 03d90a2b..11ae2c65 100644 --- a/nodedb/src/control/server/pgwire/ddl/router/schema.rs +++ b/nodedb/src/control/server/pgwire/ddl/router/schema.rs @@ -88,6 +88,47 @@ pub(super) async fn dispatch( ); } + // ALTER COLLECTION ADD COLUMN — same catalog-generic handler, but only + // when this isn't a specialised variant (MATERIALIZED_SUM is routed + // earlier via the collaborative dispatcher). + if upper.starts_with("ALTER COLLECTION ") + && upper.contains("ADD COLUMN") + && !upper.contains("MATERIALIZED_SUM") + { + return Some( + super::super::collection::alter_table_add_column(state, identity, parts, sql).await, + ); + } + + // ALTER COLLECTION DROP COLUMN — strict-schema column removal. + if upper.starts_with("ALTER COLLECTION ") && upper.contains("DROP COLUMN") { + return Some( + super::super::collection::alter_collection_drop_column(state, identity, parts, sql) + .await, + ); + } + + // ALTER COLLECTION RENAME COLUMN — metadata-only rename. + if upper.starts_with("ALTER COLLECTION ") && upper.contains("RENAME COLUMN") { + return Some( + super::super::collection::alter_collection_rename_column(state, identity, parts, sql) + .await, + ); + } + + // ALTER COLLECTION ALTER COLUMN ... TYPE ... — type alias change. + if upper.starts_with("ALTER COLLECTION ") + && upper.contains("ALTER COLUMN") + && upper.contains(" TYPE ") + { + return Some( + super::super::collection::alter_collection_alter_column_type( + state, identity, parts, sql, + ) + .await, + ); + } + // RLS policies. if upper.starts_with("CREATE RLS POLICY ") { return Some(super::super::rls::create_rls_policy(state, identity, parts)); diff --git a/nodedb/src/control/server/sync/security.rs b/nodedb/src/control/server/sync/security.rs index 0e4737f9..21cde5fc 100644 --- a/nodedb/src/control/server/sync/security.rs +++ b/nodedb/src/control/server/sync/security.rs @@ -322,6 +322,7 @@ mod tests { op: "eq".into(), value: nodedb_types::Value::String("active".into()), clauses: Vec::new(), + expr: None, }; let predicate = zerompk::to_msgpack_vec(&vec![filter]).unwrap(); @@ -355,6 +356,7 @@ mod tests { op: "eq".into(), value: nodedb_types::Value::String("active".into()), clauses: Vec::new(), + expr: None, }; let predicate = zerompk::to_msgpack_vec(&vec![filter]).unwrap(); diff --git a/nodedb/src/control/server/sync/session.rs b/nodedb/src/control/server/sync/session.rs index e2681058..5090ac34 100644 --- a/nodedb/src/control/server/sync/session.rs +++ b/nodedb/src/control/server/sync/session.rs @@ -833,6 +833,7 @@ mod tests { op: "eq".into(), value: nodedb_types::Value::String("active".into()), clauses: Vec::new(), + expr: None, }; let predicate = zerompk::to_msgpack_vec(&vec![filter]).unwrap(); rls_store diff --git a/nodedb/src/control/trigger/dml_hook.rs b/nodedb/src/control/trigger/dml_hook.rs index 7482e31c..76b541ad 100644 --- a/nodedb/src/control/trigger/dml_hook.rs +++ b/nodedb/src/control/trigger/dml_hook.rs @@ -74,6 +74,7 @@ fn classify_document_op(op: &DocumentOp) -> Option { collection, document_id, value, + .. } => { let new_fields = deserialize_value_to_fields(value); Some(DmlWriteInfo { @@ -190,13 +191,18 @@ pub fn patch_task_with_mutated_fields( *value = new_bytes; } PhysicalPlan::Document(DocumentOp::PointUpdate { updates, .. }) => { - // Re-derive field-level updates from the full mutated row. + // Re-derive field-level updates from the full mutated row. Trigger + // mutations are fully-evaluated post-trigger values, so they ship + // as `UpdateValue::Literal`. *updates = mutated .iter() .filter_map(|(k, v)| { - nodedb_types::value_to_msgpack(v) - .ok() - .map(|b| (k.clone(), b)) + nodedb_types::value_to_msgpack(v).ok().map(|b| { + ( + k.clone(), + crate::bridge::physical_plan::UpdateValue::Literal(b), + ) + }) }) .collect(); } diff --git a/nodedb/src/control/trigger/when_parse.rs b/nodedb/src/control/trigger/when_parse.rs index 0701da9f..89110a97 100644 --- a/nodedb/src/control/trigger/when_parse.rs +++ b/nodedb/src/control/trigger/when_parse.rs @@ -95,6 +95,7 @@ pub fn try_parse_when_to_filter(condition: &str) -> Option<(WhenTarget, ScanFilt op: FilterOp::IsNotNull, value: Value::Null, clauses: vec![], + expr: None, }, )); } @@ -110,6 +111,7 @@ pub fn try_parse_when_to_filter(condition: &str) -> Option<(WhenTarget, ScanFilt op: FilterOp::IsNull, value: Value::Null, clauses: vec![], + expr: None, }, )); } @@ -144,6 +146,7 @@ pub fn try_parse_when_to_filter(condition: &str) -> Option<(WhenTarget, ScanFilt op: *op, value, clauses: vec![], + expr: None, }, )); } diff --git a/nodedb/src/control/wal_replication.rs b/nodedb/src/control/wal_replication.rs index 406ce616..156ee38b 100644 --- a/nodedb/src/control/wal_replication.rs +++ b/nodedb/src/control/wal_replication.rs @@ -56,7 +56,7 @@ pub enum ReplicatedWrite { PointUpdate { collection: String, document_id: String, - updates: Vec<(String, Vec)>, + updates: Vec<(String, crate::bridge::physical_plan::UpdateValue)>, }, VectorInsert { collection: String, diff --git a/nodedb/src/data/executor/dispatch/document.rs b/nodedb/src/data/executor/dispatch/document.rs index 66f74ae1..e565f9eb 100644 --- a/nodedb/src/data/executor/dispatch/document.rs +++ b/nodedb/src/data/executor/dispatch/document.rs @@ -95,7 +95,15 @@ impl CoreLoop { collection, document_id, value, - } => self.execute_upsert(task, tid, collection, document_id, value), + on_conflict_updates, + } => self.execute_upsert( + task, + tid, + collection, + document_id, + value, + on_conflict_updates, + ), DocumentOp::Truncate { collection, .. } => self.execute_truncate(task, tid, collection), diff --git a/nodedb/src/data/executor/handlers/bulk_dml.rs b/nodedb/src/data/executor/handlers/bulk_dml.rs index 8e45e56d..413481a0 100644 --- a/nodedb/src/data/executor/handlers/bulk_dml.rs +++ b/nodedb/src/data/executor/handlers/bulk_dml.rs @@ -88,7 +88,7 @@ impl CoreLoop { tid: u32, collection: &str, filter_bytes: &[u8], - updates: &[(String, Vec)], + updates: &[(String, crate::bridge::physical_plan::UpdateValue)], returning: bool, ) -> Response { debug!(core = self.core_id, %collection, returning, "bulk update"); @@ -104,15 +104,20 @@ impl CoreLoop { return self.response_error(task, e); } - let filters: Vec = match zerompk::from_msgpack(filter_bytes) { - Ok(f) => f, - Err(e) => { - return self.response_error( - task, - ErrorCode::Internal { - detail: format!("deserialize filters: {e}"), - }, - ); + // Empty `filter_bytes` means "no WHERE clause" — match every row. + let filters: Vec = if filter_bytes.is_empty() { + Vec::new() + } else { + match zerompk::from_msgpack(filter_bytes) { + Ok(f) => f, + Err(e) => { + return self.response_error( + task, + ErrorCode::Internal { + detail: format!("deserialize filters: {e}"), + }, + ); + } } }; @@ -164,13 +169,25 @@ impl CoreLoop { None => continue, } }; + // Snapshot the current row for expression evaluation. All + // expression assignments see the pre-update state — multiple + // assignments in the same UPDATE do not observe each other, + // matching PostgreSQL semantics. + let eval_doc: nodedb_types::Value = doc.clone().into(); if let Some(obj) = doc.as_object_mut() { - for (field, value_bytes) in updates { - let val: serde_json::Value = - match nodedb_types::json_from_msgpack(value_bytes) { - Ok(v) => v, - Err(_) => continue, - }; + for (field, update_val) in updates { + let val: serde_json::Value = match update_val { + crate::bridge::physical_plan::UpdateValue::Literal(bytes) => { + match nodedb_types::json_from_msgpack(bytes) { + Ok(v) => v, + Err(_) => continue, + } + } + crate::bridge::physical_plan::UpdateValue::Expr(expr) => { + let result: nodedb_types::Value = expr.eval(&eval_doc); + result.into() + } + }; obj.insert(field.clone(), val); } } @@ -273,15 +290,20 @@ impl CoreLoop { ) -> Response { debug!(core = self.core_id, %collection, "bulk delete"); - let filters: Vec = match zerompk::from_msgpack(filter_bytes) { - Ok(f) => f, - Err(e) => { - return self.response_error( - task, - ErrorCode::Internal { - detail: format!("deserialize filters: {e}"), - }, - ); + // Empty `filter_bytes` means "no WHERE clause" — match every row. + let filters: Vec = if filter_bytes.is_empty() { + Vec::new() + } else { + match zerompk::from_msgpack(filter_bytes) { + Ok(f) => f, + Err(e) => { + return self.response_error( + task, + ErrorCode::Internal { + detail: format!("deserialize filters: {e}"), + }, + ); + } } }; diff --git a/nodedb/src/data/executor/handlers/columnar_agg.rs b/nodedb/src/data/executor/handlers/columnar_agg.rs index f1fbf11d..a49879a5 100644 --- a/nodedb/src/data/executor/handlers/columnar_agg.rs +++ b/nodedb/src/data/executor/handlers/columnar_agg.rs @@ -593,6 +593,7 @@ mod tests { op: "gt".into(), value: nodedb_types::Value::Float(5000.0), clauses: vec![], + expr: None, }; let result = try_columnar_aggregate( &mt, diff --git a/nodedb/src/data/executor/handlers/columnar_filter/mod.rs b/nodedb/src/data/executor/handlers/columnar_filter/mod.rs index 7050e5ef..37229936 100644 --- a/nodedb/src/data/executor/handlers/columnar_filter/mod.rs +++ b/nodedb/src/data/executor/handlers/columnar_filter/mod.rs @@ -102,6 +102,7 @@ mod tests { op: "gt".into(), value: nodedb_types::Value::Float(200.0), clauses: vec![], + expr: None, }; let mask = eval_filters_dense(&mt, &[f], 30).unwrap(); let passing: usize = mask.iter().filter(|&&b| b).count(); @@ -117,6 +118,7 @@ mod tests { op: "eq".into(), value: nodedb_types::Value::String("db-1".into()), clauses: vec![], + expr: None, }; let mask = eval_filters_sparse(&mt, &[f], &indices).unwrap(); let passing: usize = mask.iter().filter(|&&b| b).count(); @@ -132,6 +134,7 @@ mod tests { op: "eq".into(), value: nodedb_types::Value::String("nonexistent".into()), clauses: vec![], + expr: None, }; let mask = eval_filters_sparse(&mt, &[f], &indices).unwrap(); let passing: usize = mask.iter().filter(|&&b| b).count(); @@ -148,12 +151,14 @@ mod tests { op: "gte".into(), value: nodedb_types::Value::Float(100.0), clauses: vec![], + expr: None, }, ScanFilter { field: "host".into(), op: "eq".into(), value: nodedb_types::Value::String("web-1".into()), clauses: vec![], + expr: None, }, ]; let mask = eval_filters_sparse(&mt, &filters, &indices).unwrap(); @@ -173,7 +178,9 @@ mod tests { op: "gt".into(), value: nodedb_types::Value::Float(100.0), clauses: vec![], + expr: None, }]], + expr: None, }; assert!(eval_filters_dense(&mt, &[f], 30).is_none()); } @@ -208,6 +215,7 @@ mod tests { op: "eq".into(), value: nodedb_types::Value::String("web-1".into()), clauses: vec![], + expr: None, }; let mask = eval_filters_dense(&src, &[f], 6).unwrap(); let passing: usize = mask.iter().filter(|&&b| b).count(); @@ -243,6 +251,7 @@ mod tests { op: "eq".into(), value: nodedb_types::Value::String("z".into()), clauses: vec![], + expr: None, }; let mask = eval_filters_dense(&src, &[f], 4).unwrap(); assert!(mask.iter().all(|&b| !b)); @@ -276,6 +285,7 @@ mod tests { op: "contains".into(), value: nodedb_types::Value::String("web".into()), clauses: vec![], + expr: None, }; let mask = eval_filters_dense(&src, &[f], 6).unwrap(); let passing: usize = mask.iter().filter(|&&b| b).count(); @@ -310,6 +320,7 @@ mod tests { op: "eq".into(), value: nodedb_types::Value::String("alpha".into()), clauses: vec![], + expr: None, }; let bm = eval_filters_bitmask(&src, &[f], 8).unwrap(); // bits 0,2,4,6 should be set (alpha rows) @@ -344,6 +355,7 @@ mod tests { op: "ne".into(), value: nodedb_types::Value::String("y".into()), clauses: vec![], + expr: None, }; let bm = eval_filters_bitmask(&src, &[f], 5).unwrap(); let indices = nodedb_query::simd_filter::bitmask_to_indices(&bm); @@ -377,6 +389,7 @@ mod tests { op: "eq".into(), value: nodedb_types::Value::String("a".into()), clauses: vec![], + expr: None, }; let mask = eval_filters_dense(&src, &[f], 4).unwrap(); // Only rows 0 and 2 are valid and match. @@ -421,6 +434,7 @@ mod tests { op: "gt".into(), value: nodedb_types::Value::Float(500.0), clauses: vec![], + expr: None, }; let mask = eval_filters_sparse(&src, &[f], &indices).unwrap(); let passing: usize = mask.iter().filter(|&&b| b).count(); @@ -432,6 +446,7 @@ mod tests { op: "eq".into(), value: nodedb_types::Value::String("alpha".into()), clauses: vec![], + expr: None, }; let mask2 = eval_filters_sparse(&src, &[f2], &indices).unwrap(); let passing2: usize = mask2.iter().filter(|&&b| b).count(); diff --git a/nodedb/src/data/executor/handlers/generated.rs b/nodedb/src/data/executor/handlers/generated.rs index fd4db51b..55bdcc77 100644 --- a/nodedb/src/data/executor/handlers/generated.rs +++ b/nodedb/src/data/executor/handlers/generated.rs @@ -38,8 +38,10 @@ pub fn evaluate_generated_columns( /// Check that an UPDATE doesn't directly modify any generated column. /// /// Returns an error if any of the update field names matches a generated column. -pub fn check_generated_readonly( - update_fields: &[(String, Vec)], +/// Generic over the payload type so both `Vec` literal updates and the +/// new `UpdateValue` carrier can share one implementation. +pub fn check_generated_readonly( + update_fields: &[(String, V)], specs: &[GeneratedColumnSpec], ) -> Result<(), ErrorCode> { for (field, _) in update_fields { @@ -58,8 +60,8 @@ pub fn check_generated_readonly( /// Check if any of the updated fields are dependencies of generated columns. /// /// Returns `true` if generated columns need recomputation after this UPDATE. -pub fn needs_recomputation( - update_fields: &[(String, Vec)], +pub fn needs_recomputation( + update_fields: &[(String, V)], specs: &[GeneratedColumnSpec], ) -> bool { for (field, _) in update_fields { diff --git a/nodedb/src/data/executor/handlers/join/mod.rs b/nodedb/src/data/executor/handlers/join/mod.rs index 18456a51..059d8f5a 100644 --- a/nodedb/src/data/executor/handlers/join/mod.rs +++ b/nodedb/src/data/executor/handlers/join/mod.rs @@ -299,6 +299,7 @@ mod tests { op: FilterOp::LtColumn, value: nodedb_types::Value::String("b.id".into()), clauses: Vec::new(), + expr: None, }]; assert!(binary_row_matches_filters(&merged, &filters)); diff --git a/nodedb/src/data/executor/handlers/point.rs b/nodedb/src/data/executor/handlers/point.rs deleted file mode 100644 index f658d68b..00000000 --- a/nodedb/src/data/executor/handlers/point.rs +++ /dev/null @@ -1,696 +0,0 @@ -//! Point operation handlers: PointGet, PointPut, PointDelete, PointUpdate. - -use redb::WriteTransaction; -use tracing::{debug, warn}; - -use crate::bridge::envelope::{ErrorCode, Response}; -use crate::data::executor::core_loop::CoreLoop; -use crate::data::executor::task::ExecutionTask; - -impl CoreLoop { - pub(in crate::data::executor) fn execute_point_get( - &mut self, - task: &ExecutionTask, - tid: u32, - collection: &str, - document_id: &str, - rls_filters: &[u8], - ) -> Response { - debug!(core = self.core_id, %collection, %document_id, "point get"); - - // Check if this is a strict collection — affects decode format. - let config_key = format!("{tid}:{collection}"); - let strict_schema = self.doc_configs.get(&config_key).and_then(|c| { - if let crate::bridge::physical_plan::StorageMode::Strict { ref schema } = c.storage_mode - { - Some(schema.clone()) - } else { - None - } - }); - - // Fetch data from cache or redb. - let cached = self - .doc_cache - .get(tid, collection, document_id) - .map(|v| v.to_vec()); - let data = if let Some(data) = cached { - data - } else { - match self.sparse.get(tid, collection, document_id) { - Ok(Some(data)) => { - self.doc_cache.put(tid, collection, document_id, &data); - data - } - Ok(None) => return self.response_with_payload(task, Vec::new()), - Err(e) => { - tracing::warn!(core = self.core_id, error = %e, "sparse get failed"); - return self.response_error( - task, - ErrorCode::Internal { - detail: e.to_string(), - }, - ); - } - } - }; - - // RLS post-fetch: evaluate filters against msgpack bytes. - if !rls_filters.is_empty() { - if let Some(ref schema) = strict_schema { - // Strict: decode Binary Tuple to msgpack for RLS evaluation. - if let Some(mp) = - super::super::strict_format::binary_tuple_to_msgpack(&data, schema) - && !super::rls_eval::rls_check_msgpack_bytes(rls_filters, &mp) - { - return self.response_with_payload(task, Vec::new()); - } - } else if !super::rls_eval::rls_check_msgpack_bytes(rls_filters, &data) { - return self.response_with_payload(task, Vec::new()); - } - } - - // For strict collections, return msgpack (decoded from Binary Tuple). - if let Some(ref schema) = strict_schema - && let Some(mp) = super::super::strict_format::binary_tuple_to_msgpack(&data, schema) - { - return self.response_with_payload(task, mp); - } - - self.response_with_payload(task, data) - } - - pub(in crate::data::executor) fn execute_point_put( - &mut self, - task: &ExecutionTask, - tid: u32, - collection: &str, - document_id: &str, - value: &[u8], - ) -> Response { - debug!(core = self.core_id, %collection, %document_id, "point put"); - - // Unified write transaction: document + inverted index + stats in one commit. - let txn = match self.sparse.begin_write() { - Ok(t) => t, - Err(e) => { - return self.response_error( - task, - ErrorCode::Internal { - detail: e.to_string(), - }, - ); - } - }; - - if let Err(e) = self.apply_point_put(&txn, tid, collection, document_id, value) { - return self.response_error( - task, - ErrorCode::Internal { - detail: e.to_string(), - }, - ); - } - - if let Err(e) = txn.commit() { - return self.response_error( - task, - ErrorCode::Internal { - detail: format!("commit: {e}"), - }, - ); - } - - self.checkpoint_coordinator.mark_dirty("sparse", 1); - - // Emit write event to Event Plane (after successful commit). - // For strict collections, convert Binary Tuple → msgpack so the - // Event Plane can deserialize the payload for trigger dispatch. - let event_value = self.resolve_event_payload(tid, collection, value); - self.emit_write_event( - task, - collection, - crate::event::WriteOp::Insert, - document_id, - Some(event_value.as_deref().unwrap_or(value)), - None, - ); - - self.response_ok(task) - } - - pub(in crate::data::executor) fn execute_point_delete( - &mut self, - task: &ExecutionTask, - tid: u32, - collection: &str, - document_id: &str, - ) -> Response { - debug!(core = self.core_id, %collection, %document_id, "point delete"); - match self.sparse.delete(tid, collection, document_id) { - Ok(_) => { - // Cascade 1: Remove from full-text inverted index (tenant-scoped). - let scoped_coll = format!("{tid}:{collection}"); - if let Err(e) = self.inverted.remove_document(&scoped_coll, document_id) { - warn!(core = self.core_id, %collection, %document_id, error = %e, "inverted index removal failed"); - } - - // Cascade 2: Remove secondary index entries for this document. - // Secondary indexes use key format "{tenant}:{collection}:{field}:{value}:{doc_id}". - // We scan and delete all entries ending with this doc_id. - if let Err(e) = - self.sparse - .delete_indexes_for_document(tid, collection, document_id) - { - warn!(core = self.core_id, %collection, %document_id, error = %e, "secondary index cascade failed"); - } - - // Cascade 3: Remove graph edges where this document is src or dst. - let edges_removed = self.csr.remove_node_edges(document_id); - if edges_removed > 0 { - // Also remove from persistent edge store. - if let Err(e) = self.edge_store.delete_edges_for_node(document_id) { - warn!(core = self.core_id, %document_id, error = %e, "edge cascade failed"); - } - tracing::trace!(core = self.core_id, %document_id, edges_removed, "EDGE_CASCADE_DELETE"); - } - - // Cascade 4: Remove from spatial R-tree indexes + reverse map. - let entry_id = crate::util::fnv1a_hash(document_id.as_bytes()); - let prefix = format!("{tid}:{collection}:"); - let spatial_keys: Vec = self - .spatial_indexes - .keys() - .filter(|k| k.starts_with(&prefix)) - .cloned() - .collect(); - for key in spatial_keys { - if let Some(rtree) = self.spatial_indexes.get_mut(&key) { - rtree.delete(entry_id); - } - self.spatial_doc_map.remove(&(key, entry_id)); - } - - // Record deletion for edge referential integrity. - self.deleted_nodes.insert(document_id.to_string()); - - // Invalidate document cache. - self.doc_cache.invalidate(tid, collection, document_id); - - self.checkpoint_coordinator.mark_dirty("sparse", 1); - - // Emit delete event to Event Plane. - self.emit_write_event( - task, - collection, - crate::event::WriteOp::Delete, - document_id, - None, - None, // old_value: would require reading before delete; future batch adds this. - ); - - self.response_ok(task) - } - Err(e) => self.response_error( - task, - ErrorCode::Internal { - detail: e.to_string(), - }, - ), - } - } - - pub(in crate::data::executor) fn execute_point_update( - &mut self, - task: &ExecutionTask, - tid: u32, - collection: &str, - document_id: &str, - updates: &[(String, Vec)], - returning: bool, - ) -> Response { - debug!(core = self.core_id, %collection, %document_id, fields = updates.len(), returning, "point update"); - - let config_key = format!("{tid}:{collection}"); - let is_strict = self.doc_configs.get(&config_key).is_some_and(|c| { - matches!( - c.storage_mode, - crate::bridge::physical_plan::StorageMode::Strict { .. } - ) - }); - - // Reject direct updates to generated columns. - if let Some(config) = self.doc_configs.get(&config_key) - && let Err(e) = super::generated::check_generated_readonly( - updates, - &config.enforcement.generated_columns, - ) - { - return self.response_error(task, e); - } - - match self.sparse.get(tid, collection, document_id) { - Ok(Some(current_bytes)) => { - // Check if generated columns need recomputation. - let has_generated = self.doc_configs.get(&config_key).is_some_and(|c| { - !c.enforcement.generated_columns.is_empty() - && super::generated::needs_recomputation( - updates, - &c.enforcement.generated_columns, - ) - }); - - // Fast path: non-strict, no generated columns — merge at binary level. - let updated_bytes = if !is_strict && !has_generated { - let base_mp = super::super::doc_format::json_to_msgpack(¤t_bytes); - let update_pairs: Vec<(&str, &[u8])> = updates - .iter() - .map(|(field, val_bytes)| (field.as_str(), val_bytes.as_slice())) - .collect(); - nodedb_query::msgpack_scan::merge_fields(&base_mp, &update_pairs) - } else { - // Strict or generated columns: decode → mutate → re-encode. - let mut doc = if is_strict { - if let Some(config) = self.doc_configs.get(&config_key) - && let crate::bridge::physical_plan::StorageMode::Strict { ref schema } = - config.storage_mode - { - match super::super::strict_format::binary_tuple_to_json( - ¤t_bytes, - schema, - ) { - Some(v) => v, - None => { - return self.response_error( - task, - ErrorCode::Internal { - detail: "failed to decode Binary Tuple for update" - .into(), - }, - ); - } - } - } else { - return self.response_error( - task, - ErrorCode::Internal { - detail: "strict config missing during update".into(), - }, - ); - } - } else { - match super::super::doc_format::decode_document(¤t_bytes) { - Some(v) => v, - None => { - return self.response_error( - task, - ErrorCode::Internal { - detail: "failed to parse document for update".into(), - }, - ); - } - } - }; - - // Apply field-level updates. - if let Some(obj) = doc.as_object_mut() { - for (field, value_bytes) in updates { - let val: serde_json::Value = - match nodedb_types::json_from_msgpack(value_bytes) { - Ok(v) => v, - Err(e) => { - return self.response_error( - task, - ErrorCode::Internal { - detail: format!( - "update field '{field}': msgpack decode: {e}" - ), - }, - ); - } - }; - obj.insert(field.clone(), val); - } - } - - // Recompute generated columns. - if has_generated - && let Some(config) = self.doc_configs.get(&config_key) - && let Err(e) = super::generated::evaluate_generated_columns( - &mut doc, - &config.enforcement.generated_columns, - ) - { - return self.response_error(task, e); - } - - // Re-encode. - if is_strict { - if let Some(config) = self.doc_configs.get(&config_key) - && let crate::bridge::physical_plan::StorageMode::Strict { ref schema } = - config.storage_mode - { - let ndb_val: nodedb_types::Value = doc.clone().into(); - match super::super::strict_format::value_to_binary_tuple( - &ndb_val, schema, - ) { - Ok(bytes) => bytes, - Err(e) => { - return self.response_error( - task, - ErrorCode::Internal { - detail: format!("strict re-encode: {e}"), - }, - ); - } - } - } else { - return self.response_error( - task, - ErrorCode::Internal { - detail: "strict config missing during re-encode".into(), - }, - ); - } - } else { - super::super::doc_format::encode_to_msgpack(&doc) - } - }; - - match self - .sparse - .put(tid, collection, document_id, &updated_bytes) - { - Ok(()) => { - self.doc_cache - .put(tid, collection, document_id, &updated_bytes); - - // Emit update event to Event Plane. - // Convert Binary Tuple → msgpack for strict collections. - let new_ev = self.resolve_event_payload(tid, collection, &updated_bytes); - let old_ev = self.resolve_event_payload(tid, collection, ¤t_bytes); - self.emit_write_event( - task, - collection, - crate::event::WriteOp::Update, - document_id, - Some(new_ev.as_deref().unwrap_or(&updated_bytes)), - Some(old_ev.as_deref().unwrap_or(¤t_bytes)), - ); - - if returning { - // Return post-update document as msgpack array. - let with_id = nodedb_query::msgpack_scan::inject_str_field( - &updated_bytes, - "id", - document_id, - ); - let mut payload = Vec::with_capacity(with_id.len() + 4); - nodedb_query::msgpack_scan::write_array_header(&mut payload, 1); - payload.extend_from_slice(&with_id); - self.response_with_payload(task, payload) - } else { - let mut payload = Vec::with_capacity(16); - nodedb_query::msgpack_scan::write_map_header(&mut payload, 1); - nodedb_query::msgpack_scan::write_kv_i64(&mut payload, "affected", 1); - self.response_with_payload(task, payload) - } - } - Err(e) => self.response_error( - task, - ErrorCode::Internal { - detail: e.to_string(), - }, - ), - } - } - Ok(None) => { - // Document not found — return 0 affected rows, not an error. - let mut payload = Vec::with_capacity(16); - nodedb_query::msgpack_scan::write_map_header(&mut payload, 1); - nodedb_query::msgpack_scan::write_kv_i64(&mut payload, "affected", 0); - self.response_with_payload(task, payload) - } - Err(e) => self.response_error( - task, - ErrorCode::Internal { - detail: e.to_string(), - }, - ), - } - } - - /// Apply a PointPut within an externally-owned WriteTransaction. - /// - /// Stores the document, auto-indexes text fields, updates column stats, - /// and populates the document cache. Does NOT commit the transaction. - pub(in crate::data::executor) fn apply_point_put( - &mut self, - txn: &WriteTransaction, - tid: u32, - collection: &str, - document_id: &str, - value: &[u8], - ) -> crate::Result<()> { - // Evaluate generated columns before encoding. - let config_key = format!("{tid}:{collection}"); - let value = if let Some(config) = self.doc_configs.get(&config_key) - && !config.enforcement.generated_columns.is_empty() - { - if let Some(mut doc) = super::super::doc_format::decode_document(value) { - if let Err(e) = super::generated::evaluate_generated_columns( - &mut doc, - &config.enforcement.generated_columns, - ) { - return Err(crate::Error::Storage { - engine: "generated".into(), - detail: format!("generated column evaluation failed: {e:?}"), - }); - } - super::super::doc_format::encode_to_msgpack(&doc) - } else { - value.to_vec() - } - } else { - super::super::doc_format::canonicalize_document_for_storage(value) - }; - let value = &value; - - // Check if this collection uses strict (Binary Tuple) encoding. - let stored = if let Some(config) = self.doc_configs.get(&config_key) - && let crate::bridge::physical_plan::StorageMode::Strict { ref schema } = - config.storage_mode - { - super::super::strict_format::bytes_to_binary_tuple(value, schema).map_err(|e| { - crate::Error::Serialization { - format: "binary_tuple".into(), - detail: e, - } - })? - } else { - value.to_vec() - }; - - self.sparse - .put_in_txn(txn, tid, collection, document_id, &stored)?; - - // Text indexing and stats use the original JSON input, not the stored - // bytes — Binary Tuple requires a schema to decode, and the input JSON - // is already available here regardless of storage mode. - if let Some(doc) = super::super::doc_format::decode_document(value) { - if let Some(obj) = doc.as_object() { - let text_content: String = obj - .values() - .filter_map(|v| v.as_str()) - .collect::>() - .join(" "); - if !text_content.is_empty() - && let Err(e) = self.inverted.index_document_in_txn( - txn, - &config_key, // "{tid}:{collection}" — tenant-scoped - document_id, - &text_content, - ) - { - warn!(core = self.core_id, %collection, %document_id, error = %e, "inverted index update failed"); - } - } - - if let Err(e) = self - .stats_store - .observe_document_in_txn(txn, tid, collection, &doc) - { - warn!(core = self.core_id, %collection, error = %e, "column stats update failed"); - } - - let cache_prefix = format!("{tid}:{collection}\0"); - self.aggregate_cache - .retain(|k, _| !k.starts_with(&cache_prefix)); - } - - self.doc_cache.put(tid, collection, document_id, &stored); - - // Secondary index extraction: if this collection has registered index paths, - // extract values from the incoming document and store them in the INDEXES - // redb B-Tree for range-scan-based lookups. - let config_key = format!("{tid}:{collection}"); - if let Some(config) = self.doc_configs.get(&config_key) - && let Some(doc) = super::super::doc_format::decode_document(value) - { - let paths = config.index_paths.clone(); - self.apply_secondary_indexes(tid, collection, &doc, document_id, &paths); - } - - // Spatial index: detect geometry fields and insert into R-tree. - // Tries to parse each object field as a GeoJSON Geometry. - // If successful, computes bbox and inserts into the per-field R-tree. - // Also writes the document to columnar_memtables so that bare table scans - // and aggregates on spatial collections read from columnar (spatial extends columnar). - if let Some(doc) = super::super::doc_format::decode_document(value) - && let Some(obj) = doc.as_object() - { - let mut has_geometry = false; - for (field_name, field_value) in obj { - if let Ok(geom) = - serde_json::from_value::(field_value.clone()) - { - has_geometry = true; - let bbox = nodedb_types::bbox::geometry_bbox(&geom); - let index_key = format!("{tid}:{collection}:{field_name}"); - let entry_id = crate::util::fnv1a_hash(document_id.as_bytes()); - let rtree = self.spatial_indexes.entry(index_key.clone()).or_default(); - rtree.insert(crate::engine::spatial::RTreeEntry { id: entry_id, bbox }); - // Maintain reverse map: entry_id → document_id. - self.spatial_doc_map - .insert((index_key, entry_id), document_id.to_string()); - } - } - - // If document has geometry, also write to columnar memtable. - // This ensures bare scans + aggregates work via columnar path. - if has_geometry { - self.ingest_doc_to_columnar(collection, obj); - } - } - - // Vector index: if the strict schema declares Vector(dim) columns, - // extract float arrays and insert into HNSW so KNN search works. - // Collect vector fields from schema first (avoids borrow conflict). - let vector_fields: Vec<(String, u32)> = self - .doc_configs - .get(&config_key) - .and_then(|config| { - if let crate::bridge::physical_plan::StorageMode::Strict { ref schema } = - config.storage_mode - { - let fields: Vec<_> = schema - .columns - .iter() - .filter_map(|col| { - if let nodedb_types::columnar::ColumnType::Vector(dim) = col.column_type - { - Some((col.name.clone(), dim)) - } else { - None - } - }) - .collect(); - if fields.is_empty() { - None - } else { - Some(fields) - } - } else { - None - } - }) - .unwrap_or_default(); - - if !vector_fields.is_empty() { - // Decode from MessagePack (internal format) — not JSON. - if let Ok(ndb_val) = nodedb_types::value_from_msgpack(value) - && let nodedb_types::Value::Object(ref obj) = ndb_val - { - for (field_name, dim) in &vector_fields { - if let Some(nodedb_types::Value::Array(arr)) = obj.get(field_name) { - let floats: Vec = arr - .iter() - .filter_map(|v| match v { - nodedb_types::Value::Float(f) => Some(*f as f32), - nodedb_types::Value::Integer(i) => Some(*i as f32), - _ => None, - }) - .collect(); - if floats.len() == *dim as usize { - let index_key = Self::vector_index_key(tid, collection, field_name); - let params = self - .vector_params - .get(&index_key) - .cloned() - .unwrap_or_default(); - let coll = - self.vector_collections.entry(index_key).or_insert_with(|| { - nodedb_vector::VectorCollection::new(*dim as usize, params) - }); - coll.insert_with_doc_id(floats, document_id.to_string()); - } - } - } - } - } - - // Schemaless vector indexing: if no strict schema but vector_params exist - // for this collection, extract matching fields and index them. - if vector_fields.is_empty() { - let prefix = format!("{tid}:{collection}:"); - let bare_key = format!("{tid}:{collection}"); - let mut schemaless_keys: Vec<(String, String)> = self - .vector_params - .keys() - .filter(|k| k.starts_with(&prefix)) - .map(|k| { - let field = k[prefix.len()..].to_string(); - (k.clone(), field) - }) - .collect(); - // Also check for bare key (no field name) — default to "embedding". - if schemaless_keys.is_empty() && self.vector_params.contains_key(&bare_key) { - schemaless_keys.push((bare_key, "embedding".to_string())); - } - - if !schemaless_keys.is_empty() - && let Ok(ndb_val) = nodedb_types::value_from_msgpack(value) - && let nodedb_types::Value::Object(ref obj) = ndb_val - { - for (params_key, field_name) in &schemaless_keys { - if let Some(nodedb_types::Value::Array(arr)) = obj.get(field_name) { - let floats: Vec = arr - .iter() - .filter_map(|v| match v { - nodedb_types::Value::Float(f) => Some(*f as f32), - nodedb_types::Value::Integer(i) => Some(*i as f32), - _ => None, - }) - .collect(); - if !floats.is_empty() { - let params = self - .vector_params - .get(params_key) - .cloned() - .unwrap_or_default(); - // Use field-qualified key so search can find it. - let store_key = Self::vector_index_key(tid, collection, field_name); - let coll = - self.vector_collections.entry(store_key).or_insert_with(|| { - nodedb_vector::VectorCollection::new(floats.len(), params) - }); - coll.insert_with_doc_id(floats, document_id.to_string()); - } - } - } - } - } - - Ok(()) - } -} diff --git a/nodedb/src/data/executor/handlers/point/apply_put.rs b/nodedb/src/data/executor/handlers/point/apply_put.rs new file mode 100644 index 00000000..39207702 --- /dev/null +++ b/nodedb/src/data/executor/handlers/point/apply_put.rs @@ -0,0 +1,265 @@ +//! Shared "apply a PointPut inside an externally-owned transaction" helper. +//! +//! This is called by PointPut and by any composite path (triggers, UPSERT) +//! that needs document write + index + stats side-effects atomically. + +use redb::WriteTransaction; +use tracing::warn; + +use crate::data::executor::core_loop::CoreLoop; + +impl CoreLoop { + /// Apply a PointPut within an externally-owned WriteTransaction. + /// + /// Stores the document, auto-indexes text fields, updates column stats, + /// and populates the document cache. Does NOT commit the transaction. + pub(in crate::data::executor) fn apply_point_put( + &mut self, + txn: &WriteTransaction, + tid: u32, + collection: &str, + document_id: &str, + value: &[u8], + ) -> crate::Result<()> { + // Evaluate generated columns before encoding. + let config_key = format!("{tid}:{collection}"); + let value = if let Some(config) = self.doc_configs.get(&config_key) + && !config.enforcement.generated_columns.is_empty() + { + if let Some(mut doc) = super::super::super::doc_format::decode_document(value) { + if let Err(e) = super::super::generated::evaluate_generated_columns( + &mut doc, + &config.enforcement.generated_columns, + ) { + return Err(crate::Error::Storage { + engine: "generated".into(), + detail: format!("generated column evaluation failed: {e:?}"), + }); + } + super::super::super::doc_format::encode_to_msgpack(&doc) + } else { + value.to_vec() + } + } else { + super::super::super::doc_format::canonicalize_document_for_storage(value) + }; + let value = &value; + + // Check if this collection uses strict (Binary Tuple) encoding. + let stored = if let Some(config) = self.doc_configs.get(&config_key) + && let crate::bridge::physical_plan::StorageMode::Strict { ref schema } = + config.storage_mode + { + super::super::super::strict_format::bytes_to_binary_tuple(value, schema).map_err( + |e| crate::Error::Serialization { + format: "binary_tuple".into(), + detail: e, + }, + )? + } else { + value.to_vec() + }; + + self.sparse + .put_in_txn(txn, tid, collection, document_id, &stored)?; + + // Text indexing and stats use the original JSON input, not the stored + // bytes — Binary Tuple requires a schema to decode, and the input JSON + // is already available here regardless of storage mode. + if let Some(doc) = super::super::super::doc_format::decode_document(value) { + if let Some(obj) = doc.as_object() { + let text_content: String = obj + .values() + .filter_map(|v| v.as_str()) + .collect::>() + .join(" "); + if !text_content.is_empty() + && let Err(e) = self.inverted.index_document_in_txn( + txn, + &config_key, // "{tid}:{collection}" — tenant-scoped + document_id, + &text_content, + ) + { + warn!(core = self.core_id, %collection, %document_id, error = %e, "inverted index update failed"); + } + } + + if let Err(e) = self + .stats_store + .observe_document_in_txn(txn, tid, collection, &doc) + { + warn!(core = self.core_id, %collection, error = %e, "column stats update failed"); + } + + let cache_prefix = format!("{tid}:{collection}\0"); + self.aggregate_cache + .retain(|k, _| !k.starts_with(&cache_prefix)); + } + + self.doc_cache.put(tid, collection, document_id, &stored); + + // Secondary index extraction: if this collection has registered index paths, + // extract values from the incoming document and store them in the INDEXES + // redb B-Tree for range-scan-based lookups. + let config_key = format!("{tid}:{collection}"); + if let Some(config) = self.doc_configs.get(&config_key) + && let Some(doc) = super::super::super::doc_format::decode_document(value) + { + let paths = config.index_paths.clone(); + self.apply_secondary_indexes(tid, collection, &doc, document_id, &paths); + } + + // Spatial index: detect geometry fields and insert into R-tree. + // Tries to parse each object field as a GeoJSON Geometry. + // If successful, computes bbox and inserts into the per-field R-tree. + // Also writes the document to columnar_memtables so that bare table scans + // and aggregates on spatial collections read from columnar (spatial extends columnar). + if let Some(doc) = super::super::super::doc_format::decode_document(value) + && let Some(obj) = doc.as_object() + { + let mut has_geometry = false; + for (field_name, field_value) in obj { + if let Ok(geom) = + serde_json::from_value::(field_value.clone()) + { + has_geometry = true; + let bbox = nodedb_types::bbox::geometry_bbox(&geom); + let index_key = format!("{tid}:{collection}:{field_name}"); + let entry_id = crate::util::fnv1a_hash(document_id.as_bytes()); + let rtree = self.spatial_indexes.entry(index_key.clone()).or_default(); + rtree.insert(crate::engine::spatial::RTreeEntry { id: entry_id, bbox }); + // Maintain reverse map: entry_id → document_id. + self.spatial_doc_map + .insert((index_key, entry_id), document_id.to_string()); + } + } + + // If document has geometry, also write to columnar memtable. + // This ensures bare scans + aggregates work via columnar path. + if has_geometry { + self.ingest_doc_to_columnar(collection, obj); + } + } + + // Vector index: if the strict schema declares Vector(dim) columns, + // extract float arrays and insert into HNSW so KNN search works. + // Collect vector fields from schema first (avoids borrow conflict). + let vector_fields: Vec<(String, u32)> = self + .doc_configs + .get(&config_key) + .and_then(|config| { + if let crate::bridge::physical_plan::StorageMode::Strict { ref schema } = + config.storage_mode + { + let fields: Vec<_> = schema + .columns + .iter() + .filter_map(|col| { + if let nodedb_types::columnar::ColumnType::Vector(dim) = col.column_type + { + Some((col.name.clone(), dim)) + } else { + None + } + }) + .collect(); + if fields.is_empty() { + None + } else { + Some(fields) + } + } else { + None + } + }) + .unwrap_or_default(); + + if !vector_fields.is_empty() { + // Decode from MessagePack (internal format) — not JSON. + if let Ok(ndb_val) = nodedb_types::value_from_msgpack(value) + && let nodedb_types::Value::Object(ref obj) = ndb_val + { + for (field_name, dim) in &vector_fields { + if let Some(nodedb_types::Value::Array(arr)) = obj.get(field_name) { + let floats: Vec = arr + .iter() + .filter_map(|v| match v { + nodedb_types::Value::Float(f) => Some(*f as f32), + nodedb_types::Value::Integer(i) => Some(*i as f32), + _ => None, + }) + .collect(); + if floats.len() == *dim as usize { + let index_key = Self::vector_index_key(tid, collection, field_name); + let params = self + .vector_params + .get(&index_key) + .cloned() + .unwrap_or_default(); + let coll = + self.vector_collections.entry(index_key).or_insert_with(|| { + nodedb_vector::VectorCollection::new(*dim as usize, params) + }); + coll.insert_with_doc_id(floats, document_id.to_string()); + } + } + } + } + } + + // Schemaless vector indexing: if no strict schema but vector_params exist + // for this collection, extract matching fields and index them. + if vector_fields.is_empty() { + let prefix = format!("{tid}:{collection}:"); + let bare_key = format!("{tid}:{collection}"); + let mut schemaless_keys: Vec<(String, String)> = self + .vector_params + .keys() + .filter(|k| k.starts_with(&prefix)) + .map(|k| { + let field = k[prefix.len()..].to_string(); + (k.clone(), field) + }) + .collect(); + // Also check for bare key (no field name) — default to "embedding". + if schemaless_keys.is_empty() && self.vector_params.contains_key(&bare_key) { + schemaless_keys.push((bare_key, "embedding".to_string())); + } + + if !schemaless_keys.is_empty() + && let Ok(ndb_val) = nodedb_types::value_from_msgpack(value) + && let nodedb_types::Value::Object(ref obj) = ndb_val + { + for (params_key, field_name) in &schemaless_keys { + if let Some(nodedb_types::Value::Array(arr)) = obj.get(field_name) { + let floats: Vec = arr + .iter() + .filter_map(|v| match v { + nodedb_types::Value::Float(f) => Some(*f as f32), + nodedb_types::Value::Integer(i) => Some(*i as f32), + _ => None, + }) + .collect(); + if !floats.is_empty() { + let params = self + .vector_params + .get(params_key) + .cloned() + .unwrap_or_default(); + // Use field-qualified key so search can find it. + let store_key = Self::vector_index_key(tid, collection, field_name); + let coll = + self.vector_collections.entry(store_key).or_insert_with(|| { + nodedb_vector::VectorCollection::new(floats.len(), params) + }); + coll.insert_with_doc_id(floats, document_id.to_string()); + } + } + } + } + } + + Ok(()) + } +} diff --git a/nodedb/src/data/executor/handlers/point/delete.rs b/nodedb/src/data/executor/handlers/point/delete.rs new file mode 100644 index 00000000..b7c463da --- /dev/null +++ b/nodedb/src/data/executor/handlers/point/delete.rs @@ -0,0 +1,91 @@ +//! PointDelete: remove one document plus its cascading side-effects across +//! inverted, secondary, graph, and spatial indexes. + +use tracing::{debug, warn}; + +use crate::bridge::envelope::{ErrorCode, Response}; +use crate::data::executor::core_loop::CoreLoop; +use crate::data::executor::task::ExecutionTask; + +impl CoreLoop { + pub(in crate::data::executor) fn execute_point_delete( + &mut self, + task: &ExecutionTask, + tid: u32, + collection: &str, + document_id: &str, + ) -> Response { + debug!(core = self.core_id, %collection, %document_id, "point delete"); + match self.sparse.delete(tid, collection, document_id) { + Ok(_) => { + // Cascade 1: Remove from full-text inverted index (tenant-scoped). + let scoped_coll = format!("{tid}:{collection}"); + if let Err(e) = self.inverted.remove_document(&scoped_coll, document_id) { + warn!(core = self.core_id, %collection, %document_id, error = %e, "inverted index removal failed"); + } + + // Cascade 2: Remove secondary index entries for this document. + // Secondary indexes use key format "{tenant}:{collection}:{field}:{value}:{doc_id}". + // We scan and delete all entries ending with this doc_id. + if let Err(e) = + self.sparse + .delete_indexes_for_document(tid, collection, document_id) + { + warn!(core = self.core_id, %collection, %document_id, error = %e, "secondary index cascade failed"); + } + + // Cascade 3: Remove graph edges where this document is src or dst. + let edges_removed = self.csr.remove_node_edges(document_id); + if edges_removed > 0 { + // Also remove from persistent edge store. + if let Err(e) = self.edge_store.delete_edges_for_node(document_id) { + warn!(core = self.core_id, %document_id, error = %e, "edge cascade failed"); + } + tracing::trace!(core = self.core_id, %document_id, edges_removed, "EDGE_CASCADE_DELETE"); + } + + // Cascade 4: Remove from spatial R-tree indexes + reverse map. + let entry_id = crate::util::fnv1a_hash(document_id.as_bytes()); + let prefix = format!("{tid}:{collection}:"); + let spatial_keys: Vec = self + .spatial_indexes + .keys() + .filter(|k| k.starts_with(&prefix)) + .cloned() + .collect(); + for key in spatial_keys { + if let Some(rtree) = self.spatial_indexes.get_mut(&key) { + rtree.delete(entry_id); + } + self.spatial_doc_map.remove(&(key, entry_id)); + } + + // Record deletion for edge referential integrity. + self.deleted_nodes.insert(document_id.to_string()); + + // Invalidate document cache. + self.doc_cache.invalidate(tid, collection, document_id); + + self.checkpoint_coordinator.mark_dirty("sparse", 1); + + // Emit delete event to Event Plane. + self.emit_write_event( + task, + collection, + crate::event::WriteOp::Delete, + document_id, + None, + None, // old_value: would require reading before delete; future batch adds this. + ); + + self.response_ok(task) + } + Err(e) => self.response_error( + task, + ErrorCode::Internal { + detail: e.to_string(), + }, + ), + } + } +} diff --git a/nodedb/src/data/executor/handlers/point/get.rs b/nodedb/src/data/executor/handlers/point/get.rs new file mode 100644 index 00000000..1c67a8f1 --- /dev/null +++ b/nodedb/src/data/executor/handlers/point/get.rs @@ -0,0 +1,82 @@ +//! PointGet: read one document by id, apply RLS filters, return bytes. + +use tracing::debug; + +use crate::bridge::envelope::{ErrorCode, Response}; +use crate::data::executor::core_loop::CoreLoop; +use crate::data::executor::task::ExecutionTask; + +impl CoreLoop { + pub(in crate::data::executor) fn execute_point_get( + &mut self, + task: &ExecutionTask, + tid: u32, + collection: &str, + document_id: &str, + rls_filters: &[u8], + ) -> Response { + debug!(core = self.core_id, %collection, %document_id, "point get"); + + // Check if this is a strict collection — affects decode format. + let config_key = format!("{tid}:{collection}"); + let strict_schema = self.doc_configs.get(&config_key).and_then(|c| { + if let crate::bridge::physical_plan::StorageMode::Strict { ref schema } = c.storage_mode + { + Some(schema.clone()) + } else { + None + } + }); + + // Fetch data from cache or redb. + let cached = self + .doc_cache + .get(tid, collection, document_id) + .map(|v| v.to_vec()); + let data = if let Some(data) = cached { + data + } else { + match self.sparse.get(tid, collection, document_id) { + Ok(Some(data)) => { + self.doc_cache.put(tid, collection, document_id, &data); + data + } + Ok(None) => return self.response_with_payload(task, Vec::new()), + Err(e) => { + tracing::warn!(core = self.core_id, error = %e, "sparse get failed"); + return self.response_error( + task, + ErrorCode::Internal { + detail: e.to_string(), + }, + ); + } + } + }; + + // RLS post-fetch: evaluate filters against msgpack bytes. + if !rls_filters.is_empty() { + if let Some(ref schema) = strict_schema { + // Strict: decode Binary Tuple to msgpack for RLS evaluation. + if let Some(mp) = + super::super::super::strict_format::binary_tuple_to_msgpack(&data, schema) + && !super::super::rls_eval::rls_check_msgpack_bytes(rls_filters, &mp) + { + return self.response_with_payload(task, Vec::new()); + } + } else if !super::super::rls_eval::rls_check_msgpack_bytes(rls_filters, &data) { + return self.response_with_payload(task, Vec::new()); + } + } + + // For strict collections, return msgpack (decoded from Binary Tuple). + if let Some(ref schema) = strict_schema + && let Some(mp) = + super::super::super::strict_format::binary_tuple_to_msgpack(&data, schema) + { + return self.response_with_payload(task, mp); + } + + self.response_with_payload(task, data) + } +} diff --git a/nodedb/src/data/executor/handlers/point/mod.rs b/nodedb/src/data/executor/handlers/point/mod.rs new file mode 100644 index 00000000..2eed2641 --- /dev/null +++ b/nodedb/src/data/executor/handlers/point/mod.rs @@ -0,0 +1,12 @@ +//! Point operation handlers: PointGet, PointPut, PointDelete, PointUpdate, +//! plus the shared `apply_point_put` transaction helper. +//! +//! Each handler is a method on `CoreLoop`; files here contribute `impl CoreLoop` +//! blocks that share the same type. Dispatch sees them via the normal method +//! lookup — no re-export needed. + +pub mod apply_put; +pub mod delete; +pub mod get; +pub mod put; +pub mod update; diff --git a/nodedb/src/data/executor/handlers/point/put.rs b/nodedb/src/data/executor/handlers/point/put.rs new file mode 100644 index 00000000..fe786a55 --- /dev/null +++ b/nodedb/src/data/executor/handlers/point/put.rs @@ -0,0 +1,69 @@ +//! PointPut: insert or overwrite one document, committing storage + indexes +//! + stats in a single redb transaction via `apply_point_put`. + +use tracing::debug; + +use crate::bridge::envelope::{ErrorCode, Response}; +use crate::data::executor::core_loop::CoreLoop; +use crate::data::executor::task::ExecutionTask; + +impl CoreLoop { + pub(in crate::data::executor) fn execute_point_put( + &mut self, + task: &ExecutionTask, + tid: u32, + collection: &str, + document_id: &str, + value: &[u8], + ) -> Response { + debug!(core = self.core_id, %collection, %document_id, "point put"); + + // Unified write transaction: document + inverted index + stats in one commit. + let txn = match self.sparse.begin_write() { + Ok(t) => t, + Err(e) => { + return self.response_error( + task, + ErrorCode::Internal { + detail: e.to_string(), + }, + ); + } + }; + + if let Err(e) = self.apply_point_put(&txn, tid, collection, document_id, value) { + return self.response_error( + task, + ErrorCode::Internal { + detail: e.to_string(), + }, + ); + } + + if let Err(e) = txn.commit() { + return self.response_error( + task, + ErrorCode::Internal { + detail: format!("commit: {e}"), + }, + ); + } + + self.checkpoint_coordinator.mark_dirty("sparse", 1); + + // Emit write event to Event Plane (after successful commit). + // For strict collections, convert Binary Tuple → msgpack so the + // Event Plane can deserialize the payload for trigger dispatch. + let event_value = self.resolve_event_payload(tid, collection, value); + self.emit_write_event( + task, + collection, + crate::event::WriteOp::Insert, + document_id, + Some(event_value.as_deref().unwrap_or(value)), + None, + ); + + self.response_ok(task) + } +} diff --git a/nodedb/src/data/executor/handlers/point/update.rs b/nodedb/src/data/executor/handlers/point/update.rs new file mode 100644 index 00000000..f32510c4 --- /dev/null +++ b/nodedb/src/data/executor/handlers/point/update.rs @@ -0,0 +1,263 @@ +//! PointUpdate: read-modify-write field-level changes to a single document. +//! +//! Each assignment is either a pre-encoded literal (fast binary merge when +//! possible) or a `SqlExpr` that must be evaluated against the *current* row — +//! the evaluator is `nodedb_query::expr::SqlExpr::eval`, shared with +//! computed-column, window, and typeguard paths. + +use tracing::debug; + +use crate::bridge::envelope::{ErrorCode, Response}; +use crate::bridge::physical_plan::UpdateValue; +use crate::data::executor::core_loop::CoreLoop; +use crate::data::executor::task::ExecutionTask; + +impl CoreLoop { + pub(in crate::data::executor) fn execute_point_update( + &mut self, + task: &ExecutionTask, + tid: u32, + collection: &str, + document_id: &str, + updates: &[(String, UpdateValue)], + returning: bool, + ) -> Response { + debug!( + core = self.core_id, + %collection, + %document_id, + fields = updates.len(), + returning, + "point update" + ); + + let config_key = format!("{tid}:{collection}"); + let is_strict = self.doc_configs.get(&config_key).is_some_and(|c| { + matches!( + c.storage_mode, + crate::bridge::physical_plan::StorageMode::Strict { .. } + ) + }); + + // Reject direct updates to generated columns. + if let Some(config) = self.doc_configs.get(&config_key) + && let Err(e) = super::super::generated::check_generated_readonly( + updates, + &config.enforcement.generated_columns, + ) + { + return self.response_error(task, e); + } + + // Any non-literal assignment forces the slow decode→eval→re-encode path, + // because we need the current document to evaluate against. + let has_expr = updates + .iter() + .any(|(_, v)| matches!(v, UpdateValue::Expr(_))); + + match self.sparse.get(tid, collection, document_id) { + Ok(Some(current_bytes)) => { + let has_generated = self.doc_configs.get(&config_key).is_some_and(|c| { + !c.enforcement.generated_columns.is_empty() + && super::super::generated::needs_recomputation( + updates, + &c.enforcement.generated_columns, + ) + }); + + // Fast path: non-strict, no generated columns, all literal — merge at binary level. + let updated_bytes = if !is_strict && !has_generated && !has_expr { + let base_mp = super::super::super::doc_format::json_to_msgpack(¤t_bytes); + let update_pairs: Vec<(&str, &[u8])> = updates + .iter() + .filter_map(|(field, v)| match v { + UpdateValue::Literal(bytes) => Some((field.as_str(), bytes.as_slice())), + UpdateValue::Expr(_) => None, + }) + .collect(); + nodedb_query::msgpack_scan::merge_fields(&base_mp, &update_pairs) + } else { + // Strict, generated, or expression RHS: decode → mutate → re-encode. + let mut doc = if is_strict { + if let Some(config) = self.doc_configs.get(&config_key) + && let crate::bridge::physical_plan::StorageMode::Strict { ref schema } = + config.storage_mode + { + match super::super::super::strict_format::binary_tuple_to_json( + ¤t_bytes, + schema, + ) { + Some(v) => v, + None => { + return self.response_error( + task, + ErrorCode::Internal { + detail: "failed to decode Binary Tuple for update" + .into(), + }, + ); + } + } + } else { + return self.response_error( + task, + ErrorCode::Internal { + detail: "strict config missing during update".into(), + }, + ); + } + } else { + match super::super::super::doc_format::decode_document(¤t_bytes) { + Some(v) => v, + None => { + return self.response_error( + task, + ErrorCode::Internal { + detail: "failed to parse document for update".into(), + }, + ); + } + } + }; + + // Apply field-level updates. Expressions are evaluated + // against the current-row snapshot, so a later assignment + // observing a column updated earlier in the same statement + // still sees the pre-update value — matches PostgreSQL. + let eval_doc: nodedb_types::Value = doc.clone().into(); + if let Some(obj) = doc.as_object_mut() { + for (field, update_val) in updates { + let val = match update_val { + UpdateValue::Literal(bytes) => { + match nodedb_types::json_from_msgpack(bytes) { + Ok(v) => v, + Err(e) => { + return self.response_error( + task, + ErrorCode::Internal { + detail: format!( + "update field '{field}': msgpack decode: {e}" + ), + }, + ); + } + } + } + UpdateValue::Expr(expr) => { + let result: nodedb_types::Value = expr.eval(&eval_doc); + // Convert nodedb_types::Value → serde_json::Value so the + // downstream re-encode path (strict or msgpack) can proceed + // through its existing json-based branches unchanged. + let json: serde_json::Value = result.into(); + json + } + }; + obj.insert(field.clone(), val); + } + } + + // Recompute generated columns. + if has_generated + && let Some(config) = self.doc_configs.get(&config_key) + && let Err(e) = super::super::generated::evaluate_generated_columns( + &mut doc, + &config.enforcement.generated_columns, + ) + { + return self.response_error(task, e); + } + + // Re-encode. + if is_strict { + if let Some(config) = self.doc_configs.get(&config_key) + && let crate::bridge::physical_plan::StorageMode::Strict { ref schema } = + config.storage_mode + { + let ndb_val: nodedb_types::Value = doc.clone().into(); + match super::super::super::strict_format::value_to_binary_tuple( + &ndb_val, schema, + ) { + Ok(bytes) => bytes, + Err(e) => { + return self.response_error( + task, + ErrorCode::Internal { + detail: format!("strict re-encode: {e}"), + }, + ); + } + } + } else { + return self.response_error( + task, + ErrorCode::Internal { + detail: "strict config missing during re-encode".into(), + }, + ); + } + } else { + super::super::super::doc_format::encode_to_msgpack(&doc) + } + }; + + match self + .sparse + .put(tid, collection, document_id, &updated_bytes) + { + Ok(()) => { + self.doc_cache + .put(tid, collection, document_id, &updated_bytes); + + // Emit update event to Event Plane. + // Convert Binary Tuple → msgpack for strict collections. + let new_ev = self.resolve_event_payload(tid, collection, &updated_bytes); + let old_ev = self.resolve_event_payload(tid, collection, ¤t_bytes); + self.emit_write_event( + task, + collection, + crate::event::WriteOp::Update, + document_id, + Some(new_ev.as_deref().unwrap_or(&updated_bytes)), + Some(old_ev.as_deref().unwrap_or(¤t_bytes)), + ); + + if returning { + let with_id = nodedb_query::msgpack_scan::inject_str_field( + &updated_bytes, + "id", + document_id, + ); + let mut payload = Vec::with_capacity(with_id.len() + 4); + nodedb_query::msgpack_scan::write_array_header(&mut payload, 1); + payload.extend_from_slice(&with_id); + self.response_with_payload(task, payload) + } else { + let mut payload = Vec::with_capacity(16); + nodedb_query::msgpack_scan::write_map_header(&mut payload, 1); + nodedb_query::msgpack_scan::write_kv_i64(&mut payload, "affected", 1); + self.response_with_payload(task, payload) + } + } + Err(e) => self.response_error( + task, + ErrorCode::Internal { + detail: e.to_string(), + }, + ), + } + } + Ok(None) => { + let mut payload = Vec::with_capacity(16); + nodedb_query::msgpack_scan::write_map_header(&mut payload, 1); + nodedb_query::msgpack_scan::write_kv_i64(&mut payload, "affected", 0); + self.response_with_payload(task, payload) + } + Err(e) => self.response_error( + task, + ErrorCode::Internal { + detail: e.to_string(), + }, + ), + } + } +} diff --git a/nodedb/src/data/executor/handlers/rls_eval.rs b/nodedb/src/data/executor/handlers/rls_eval.rs index e217e0aa..54579994 100644 --- a/nodedb/src/data/executor/handlers/rls_eval.rs +++ b/nodedb/src/data/executor/handlers/rls_eval.rs @@ -67,6 +67,7 @@ mod tests { op: op.into(), value, clauses: Vec::new(), + expr: None, }; zerompk::to_msgpack_vec(&vec![filter]).unwrap() } @@ -113,12 +114,14 @@ mod tests { op: "eq".into(), value: nodedb_types::Value::String("42".into()), clauses: Vec::new(), + expr: None, }, ScanFilter { field: "status".into(), op: "eq".into(), value: nodedb_types::Value::String("active".into()), clauses: Vec::new(), + expr: None, }, ]; let rls = zerompk::to_msgpack_vec(&filters).unwrap(); diff --git a/nodedb/src/data/executor/handlers/upsert.rs b/nodedb/src/data/executor/handlers/upsert.rs index ebdbb13e..6d914db3 100644 --- a/nodedb/src/data/executor/handlers/upsert.rs +++ b/nodedb/src/data/executor/handlers/upsert.rs @@ -25,8 +25,15 @@ impl CoreLoop { collection: &str, document_id: &str, value: &[u8], + on_conflict_updates: &[(String, crate::bridge::physical_plan::UpdateValue)], ) -> Response { - debug!(core = self.core_id, %collection, %document_id, "upsert"); + debug!( + core = self.core_id, + %collection, + %document_id, + has_on_conflict = !on_conflict_updates.is_empty(), + "upsert" + ); // Detect strict storage mode for this collection. let config_key = format!("{tid}:{collection}"); @@ -94,8 +101,15 @@ impl CoreLoop { } }; - // Merge: overlay new fields onto existing. - let merged = merge_values(existing_val, new_val); + // Conflict branch: if `ON CONFLICT DO UPDATE SET` assignments + // are present, evaluate each against the *existing* row and + // apply only those fields. Otherwise fall back to the plain + // merge semantics used by `UPSERT INTO` / no-action upserts. + let merged = if on_conflict_updates.is_empty() { + merge_values(existing_val, new_val) + } else { + apply_on_conflict_updates(existing_val, on_conflict_updates) + }; // Encode merged value for storage. let stored_bytes = if let Some(ref schema) = strict_schema { @@ -185,6 +199,40 @@ impl CoreLoop { } } +/// Apply `ON CONFLICT DO UPDATE SET` assignments against the existing row. +/// +/// Each assignment's RHS is evaluated via `SqlExpr::eval` — identical to +/// the UPDATE handler's path — so arithmetic (`n = n + 1`), functions +/// (`name = UPPER(name)`), `CASE`, and concatenation all work. Literal +/// assignments bypass the evaluator and decode their msgpack directly. +fn apply_on_conflict_updates( + existing: nodedb_types::Value, + updates: &[(String, crate::bridge::physical_plan::UpdateValue)], +) -> nodedb_types::Value { + let mut obj = match existing { + nodedb_types::Value::Object(map) => map, + // If the existing row isn't an object (shouldn't happen for + // document engines) fall back to the assignments as a blank slate. + _ => std::collections::HashMap::new(), + }; + // Snapshot the row before any assignment applies, so all assignments + // see the pre-update state — matches PostgreSQL semantics. + let snapshot = nodedb_types::Value::Object(obj.clone()); + for (field, update_val) in updates { + let new_val: nodedb_types::Value = match update_val { + crate::bridge::physical_plan::UpdateValue::Literal(bytes) => { + match nodedb_types::value_from_msgpack(bytes) { + Ok(v) => v, + Err(_) => continue, + } + } + crate::bridge::physical_plan::UpdateValue::Expr(expr) => expr.eval(&snapshot), + }; + obj.insert(field.clone(), new_val); + } + nodedb_types::Value::Object(obj) +} + /// Merge two `nodedb_types::Value` objects: overlay `new` fields onto `existing`. fn merge_values(existing: nodedb_types::Value, new: nodedb_types::Value) -> nodedb_types::Value { match (existing, new) { diff --git a/nodedb/src/storage/cold_filter.rs b/nodedb/src/storage/cold_filter.rs index 01a228cd..f3727f57 100644 --- a/nodedb/src/storage/cold_filter.rs +++ b/nodedb/src/storage/cold_filter.rs @@ -434,6 +434,7 @@ mod tests { op: "gt".into(), value: nodedb_types::Value::Integer(25), clauses: vec![], + expr: None, }]; let batches = read_parquet_filtered(&parquet, &filters, &[]).unwrap(); diff --git a/nodedb/tests/executor_tests/test_aggregate_aliases.rs b/nodedb/tests/executor_tests/test_aggregate_aliases.rs index 366bb4d3..706729a7 100644 --- a/nodedb/tests/executor_tests/test_aggregate_aliases.rs +++ b/nodedb/tests/executor_tests/test_aggregate_aliases.rs @@ -36,6 +36,7 @@ fn aggregate_output_uses_user_alias_but_having_reads_canonical_key() { op: FilterOp::Gt, value: nodedb_types::Value::Integer(1), clauses: Vec::new(), + expr: None, }]) .unwrap(); diff --git a/nodedb/tests/executor_tests/test_array_ops.rs b/nodedb/tests/executor_tests/test_array_ops.rs index e2501053..f69bb4db 100644 --- a/nodedb/tests/executor_tests/test_array_ops.rs +++ b/nodedb/tests/executor_tests/test_array_ops.rs @@ -31,6 +31,7 @@ fn filter(field: &str, op: &str, value: nodedb_types::Value) -> ScanFilter { op: op.into(), value, clauses: Vec::new(), + expr: None, } } diff --git a/nodedb/tests/executor_tests/test_columnar_aggregate.rs b/nodedb/tests/executor_tests/test_columnar_aggregate.rs index 5ae51bcd..c01a23bc 100644 --- a/nodedb/tests/executor_tests/test_columnar_aggregate.rs +++ b/nodedb/tests/executor_tests/test_columnar_aggregate.rs @@ -89,6 +89,7 @@ fn columnar_having_uses_canonical_key_but_output_keeps_user_alias() { op: FilterOp::Gt, value: nodedb_types::Value::Integer(1), clauses: Vec::new(), + expr: None, }]) .unwrap(); diff --git a/nodedb/tests/executor_tests/test_conditional_update.rs b/nodedb/tests/executor_tests/test_conditional_update.rs index 34e72882..1da07958 100644 --- a/nodedb/tests/executor_tests/test_conditional_update.rs +++ b/nodedb/tests/executor_tests/test_conditional_update.rs @@ -19,6 +19,7 @@ fn filter(field: &str, op: &str, value: nodedb_types::Value) -> ScanFilter { op: op.into(), value, clauses: Vec::new(), + expr: None, } } @@ -77,7 +78,9 @@ fn bulk_update_returns_affected_count() { let filter_bytes = zerompk::to_msgpack_vec(&filters).unwrap(); let updates = vec![( "stock".to_string(), - nodedb_types::json_to_msgpack(&serde_json::json!(99)).unwrap(), + nodedb::bridge::physical_plan::UpdateValue::Literal( + nodedb_types::json_to_msgpack(&serde_json::json!(99)).unwrap(), + ), )]; let payload = send_ok( @@ -116,7 +119,9 @@ fn conditional_decrement_stops_at_zero() { let new_stock = current_stock.saturating_sub(1); let updates = vec![( "stock".to_string(), - nodedb_types::json_to_msgpack(&serde_json::json!(new_stock)).unwrap(), + nodedb::bridge::physical_plan::UpdateValue::Literal( + nodedb_types::json_to_msgpack(&serde_json::json!(new_stock)).unwrap(), + ), )]; let payload = send_ok( @@ -159,7 +164,9 @@ fn bulk_update_zero_match_returns_zero_affected() { let filter_bytes = zerompk::to_msgpack_vec(&filters).unwrap(); let updates = vec![( "stock".to_string(), - nodedb_types::json_to_msgpack(&serde_json::json!(999)).unwrap(), + nodedb::bridge::physical_plan::UpdateValue::Literal( + nodedb_types::json_to_msgpack(&serde_json::json!(999)).unwrap(), + ), )]; let payload = send_ok( @@ -189,7 +196,9 @@ fn bulk_update_returning_returns_updated_documents() { let filter_bytes = zerompk::to_msgpack_vec(&filters).unwrap(); let updates = vec![( "stock".to_string(), - nodedb_types::json_to_msgpack(&serde_json::json!(0)).unwrap(), + nodedb::bridge::physical_plan::UpdateValue::Literal( + nodedb_types::json_to_msgpack(&serde_json::json!(0)).unwrap(), + ), )]; let payload = send_ok( @@ -224,7 +233,9 @@ fn bulk_update_returning_zero_match_returns_affected_zero() { let filter_bytes = zerompk::to_msgpack_vec(&filters).unwrap(); let updates = vec![( "stock".to_string(), - nodedb_types::json_to_msgpack(&serde_json::json!(999)).unwrap(), + nodedb::bridge::physical_plan::UpdateValue::Literal( + nodedb_types::json_to_msgpack(&serde_json::json!(999)).unwrap(), + ), )]; let payload = send_ok( @@ -251,7 +262,9 @@ fn point_update_returns_affected_count() { let updates = vec![( "stock".to_string(), - nodedb_types::json_to_msgpack(&serde_json::json!(5)).unwrap(), + nodedb::bridge::physical_plan::UpdateValue::Literal( + nodedb_types::json_to_msgpack(&serde_json::json!(5)).unwrap(), + ), )]; let payload = send_ok( @@ -279,7 +292,9 @@ fn point_update_returning_returns_updated_document() { let updates = vec![( "stock".to_string(), - nodedb_types::json_to_msgpack(&serde_json::json!(7)).unwrap(), + nodedb::bridge::physical_plan::UpdateValue::Literal( + nodedb_types::json_to_msgpack(&serde_json::json!(7)).unwrap(), + ), )]; let payload = send_ok( @@ -335,7 +350,9 @@ fn transaction_batch_does_not_abort_on_zero_row_update() { filters: filters_match, updates: vec![( "stock".to_string(), - nodedb_types::json_to_msgpack(&serde_json::json!(0)).unwrap(), + nodedb::bridge::physical_plan::UpdateValue::Literal( + nodedb_types::json_to_msgpack(&serde_json::json!(0)).unwrap(), + ), )], returning: false, }), @@ -344,7 +361,9 @@ fn transaction_batch_does_not_abort_on_zero_row_update() { filters: filters_nomatch, updates: vec![( "stock".to_string(), - nodedb_types::json_to_msgpack(&serde_json::json!(999)).unwrap(), + nodedb::bridge::physical_plan::UpdateValue::Literal( + nodedb_types::json_to_msgpack(&serde_json::json!(999)).unwrap(), + ), )], returning: false, }), diff --git a/nodedb/tests/executor_tests/test_cross_engine_validation.rs b/nodedb/tests/executor_tests/test_cross_engine_validation.rs index 5afa7081..9fc451ce 100644 --- a/nodedb/tests/executor_tests/test_cross_engine_validation.rs +++ b/nodedb/tests/executor_tests/test_cross_engine_validation.rs @@ -123,6 +123,7 @@ fn cross_model_query_vector_graph_relational() { op: "gte".into(), value: nodedb_types::Value::Integer(2023), clauses: Vec::new(), + expr: None, }]; let filter_bytes = zerompk::to_msgpack_vec(&filter).unwrap(); let scan_payload = send_ok( diff --git a/nodedb/tests/executor_tests/test_cross_type_join.rs b/nodedb/tests/executor_tests/test_cross_type_join.rs index 6ac055b2..25efc5ac 100644 --- a/nodedb/tests/executor_tests/test_cross_type_join.rs +++ b/nodedb/tests/executor_tests/test_cross_type_join.rs @@ -428,6 +428,7 @@ fn single_core_self_join_respects_aliases_in_filter_and_projection() { op: FilterOp::LtColumn, value: nodedb_types::Value::String("b.id".into()), clauses: Vec::new(), + expr: None, }]) .unwrap(); @@ -571,6 +572,7 @@ fn schemaless_self_join_matches_on_canonicalized_object_fields() { op: FilterOp::LtColumn, value: nodedb_types::Value::String("b.id".into()), clauses: Vec::new(), + expr: None, }]) .unwrap(); @@ -651,6 +653,7 @@ fn cross_join_uses_inline_right_scalar_aggregate_for_post_filter() { op: FilterOp::GtColumn, value: nodedb_types::Value::String("avg_score".into()), clauses: Vec::new(), + expr: None, }]) .unwrap(); @@ -738,6 +741,7 @@ fn cross_join_uses_unaliased_scalar_aggregate_key_for_post_filter() { op: FilterOp::GtColumn, value: nodedb_types::Value::String("avg(amount)".into()), clauses: Vec::new(), + expr: None, }]) .unwrap(); @@ -843,6 +847,7 @@ fn semi_join_uses_nested_scalar_subquery_result_as_inline_right() { op: FilterOp::GtColumn, value: nodedb_types::Value::String("avg(amount)".into()), clauses: Vec::new(), + expr: None, }]) .unwrap(); diff --git a/nodedb/tests/executor_tests/test_facet.rs b/nodedb/tests/executor_tests/test_facet.rs index d0480fe7..206ab26d 100644 --- a/nodedb/tests/executor_tests/test_facet.rs +++ b/nodedb/tests/executor_tests/test_facet.rs @@ -69,6 +69,7 @@ fn filter(field: &str, op: &str, value: nodedb_types::Value) -> ScanFilter { op: op.into(), value, clauses: Vec::new(), + expr: None, } } diff --git a/nodedb/tests/executor_tests/test_generated_columns.rs b/nodedb/tests/executor_tests/test_generated_columns.rs index 35d52139..9982a849 100644 --- a/nodedb/tests/executor_tests/test_generated_columns.rs +++ b/nodedb/tests/executor_tests/test_generated_columns.rs @@ -186,7 +186,9 @@ fn update_recomputes_generated_column() { document_id: "p1".into(), updates: vec![( "price".to_string(), - nodedb_types::json_to_msgpack(&serde_json::json!(200.0)).unwrap(), + nodedb::bridge::physical_plan::UpdateValue::Literal( + nodedb_types::json_to_msgpack(&serde_json::json!(200.0)).unwrap(), + ), )], returning: false, }), @@ -236,7 +238,9 @@ fn update_generated_column_directly_rejected() { document_id: "p1".into(), updates: vec![( "price_with_tax".to_string(), - nodedb_types::json_to_msgpack(&serde_json::json!(999.0)).unwrap(), + nodedb::bridge::physical_plan::UpdateValue::Literal( + nodedb_types::json_to_msgpack(&serde_json::json!(999.0)).unwrap(), + ), )], returning: false, }), diff --git a/nodedb/tests/executor_tests/test_security_and_isolation.rs b/nodedb/tests/executor_tests/test_security_and_isolation.rs index 2161be55..766fa7a0 100644 --- a/nodedb/tests/executor_tests/test_security_and_isolation.rs +++ b/nodedb/tests/executor_tests/test_security_and_isolation.rs @@ -62,6 +62,7 @@ fn security_rls_policy_enforcement() { op: "eq".into(), value: nodedb_types::Value::String("approved".into()), clauses: Vec::new(), + expr: None, }; let predicate = zerompk::to_msgpack_vec(&vec![filter]).unwrap(); diff --git a/nodedb/tests/executor_tests/test_tenant_isolation_rls.rs b/nodedb/tests/executor_tests/test_tenant_isolation_rls.rs index e4d2b1df..36cb49ec 100644 --- a/nodedb/tests/executor_tests/test_tenant_isolation_rls.rs +++ b/nodedb/tests/executor_tests/test_tenant_isolation_rls.rs @@ -18,6 +18,7 @@ fn rls_policies_isolated_between_tenants() { op: "eq".into(), value: nodedb_types::Value::String("approved".into()), clauses: Vec::new(), + expr: None, }; let predicate = zerompk::to_msgpack_vec(&vec![filter]).unwrap(); @@ -72,6 +73,7 @@ fn rls_policy_listing_scoped() { op: "eq".into(), value: nodedb_types::Value::String("admin".into()), clauses: Vec::new(), + expr: None, }; store .create_policy(RlsPolicy { diff --git a/nodedb/tests/executor_tests/test_timeseries.rs b/nodedb/tests/executor_tests/test_timeseries.rs index 7f7c2865..e35bf54a 100644 --- a/nodedb/tests/executor_tests/test_timeseries.rs +++ b/nodedb/tests/executor_tests/test_timeseries.rs @@ -290,6 +290,7 @@ fn where_predicate_filters_count() { op: "eq".into(), value: nodedb_types::Value::String("A".into()), clauses: vec![], + expr: None, }], ); let filtered_count = filtered[0]["count(*)"].as_u64().unwrap(); diff --git a/nodedb/tests/sql_transactions.rs b/nodedb/tests/sql_transactions.rs index 048f705d..8f70a366 100644 --- a/nodedb/tests/sql_transactions.rs +++ b/nodedb/tests/sql_transactions.rs @@ -86,3 +86,143 @@ async fn alter_table_add_column_refreshes_strict_schema() { "expected row to include inserted id" ); } + +#[tokio::test(flavor = "multi_thread", worker_threads = 4)] +async fn alter_collection_add_column_refreshes_strict_schema() { + let server = TestServer::start().await; + + server + .exec( + "CREATE COLLECTION memories TYPE DOCUMENT STRICT (\ + id TEXT PRIMARY KEY, \ + name TEXT NOT NULL)", + ) + .await + .unwrap(); + server + .exec("INSERT INTO memories (id, name) VALUES ('m1', 'first')") + .await + .unwrap(); + + // `ALTER COLLECTION ... ADD COLUMN` must reach the catalog-generic + // add-column handler — the same path exercised by `ALTER TABLE` above. + server + .exec("ALTER COLLECTION memories ADD COLUMN is_latest BOOL DEFAULT true") + .await + .unwrap(); + + server + .exec("INSERT INTO memories (id, name, is_latest) VALUES ('m2', 'second', false)") + .await + .unwrap(); + + let rows = server + .query_text("SELECT id FROM memories WHERE id = 'm2'") + .await + .unwrap(); + assert_eq!(rows.len(), 1); + assert!(rows[0].contains("m2")); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 4)] +async fn alter_collection_drop_column() { + let server = TestServer::start().await; + + server + .exec( + "CREATE COLLECTION memories TYPE DOCUMENT STRICT (\ + id TEXT PRIMARY KEY, \ + name TEXT NOT NULL, \ + scratch TEXT)", + ) + .await + .unwrap(); + server + .exec("INSERT INTO memories (id, name, scratch) VALUES ('m1', 'first', 'temp')") + .await + .unwrap(); + + server + .exec("ALTER COLLECTION memories DROP COLUMN scratch") + .await + .unwrap(); + + // New inserts without the dropped column still succeed, and old data reads. + server + .exec("INSERT INTO memories (id, name) VALUES ('m2', 'second')") + .await + .unwrap(); + let rows = server + .query_text("SELECT id FROM memories WHERE id = 'm2'") + .await + .unwrap(); + assert_eq!(rows.len(), 1); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 4)] +async fn alter_collection_rename_column() { + let server = TestServer::start().await; + + server + .exec( + "CREATE COLLECTION memories TYPE DOCUMENT STRICT (\ + id TEXT PRIMARY KEY, \ + name TEXT NOT NULL)", + ) + .await + .unwrap(); + server + .exec("INSERT INTO memories (id, name) VALUES ('m1', 'first')") + .await + .unwrap(); + + server + .exec("ALTER COLLECTION memories RENAME COLUMN name TO title") + .await + .unwrap(); + + let rows = server + .query_text("SELECT title FROM memories WHERE id = 'm1'") + .await + .unwrap(); + assert_eq!(rows.len(), 1); + assert!( + rows[0].contains("\"title\":\"first\""), + "expected renamed column 'title' = 'first', got {:?}", + rows[0] + ); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 4)] +async fn alter_collection_alter_column_type() { + let server = TestServer::start().await; + + server + .exec( + "CREATE COLLECTION measurements TYPE DOCUMENT STRICT (\ + id TEXT PRIMARY KEY, \ + value INT NOT NULL)", + ) + .await + .unwrap(); + server + .exec("INSERT INTO measurements (id, value) VALUES ('m1', 42)") + .await + .unwrap(); + + server + .exec("ALTER COLLECTION measurements ALTER COLUMN value TYPE BIGINT") + .await + .unwrap(); + + // Re-insert using the widened type. + server + .exec("INSERT INTO measurements (id, value) VALUES ('m2', 9999999999)") + .await + .unwrap(); + let rows = server + .query_text("SELECT id FROM measurements WHERE id = 'm2'") + .await + .unwrap(); + assert_eq!(rows.len(), 1); +} diff --git a/nodedb/tests/sql_update_expressions.rs b/nodedb/tests/sql_update_expressions.rs new file mode 100644 index 00000000..6f091ebb --- /dev/null +++ b/nodedb/tests/sql_update_expressions.rs @@ -0,0 +1,327 @@ +//! Integration coverage for UPDATE statements whose RHS is a non-literal +//! expression (column arithmetic, scalar functions, `NOW()`, ...). +//! +//! These must be evaluated against the current row by the executor — not +//! serialized as `format!("{expr:?}")` and written back as a string. That +//! failure mode errors loudly on strict collections (re-encoder rejects +//! the debug string) and silently corrupts schemaless collections. + +mod common; + +use common::pgwire_harness::TestServer; + +#[tokio::test(flavor = "multi_thread", worker_threads = 4)] +async fn update_column_increment_strict() { + let server = TestServer::start().await; + + server + .exec("CREATE COLLECTION counters TYPE DOCUMENT STRICT (id STRING PRIMARY KEY, n INT)") + .await + .unwrap(); + server + .exec("INSERT INTO counters (id, n) VALUES ('a', 1)") + .await + .unwrap(); + + server + .exec("UPDATE counters SET n = n + 1 WHERE id = 'a'") + .await + .unwrap(); + + let rows = server + .query_text("SELECT n FROM counters WHERE id = 'a'") + .await + .unwrap(); + assert_eq!(rows.len(), 1); + assert!( + rows[0].contains("\"n\":2"), + "expected n=2, got {:?}", + rows[0] + ); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 4)] +async fn update_column_decrement_strict() { + let server = TestServer::start().await; + + server + .exec("CREATE COLLECTION inventory TYPE DOCUMENT STRICT (id STRING PRIMARY KEY, stock INT)") + .await + .unwrap(); + server + .exec("INSERT INTO inventory (id, stock) VALUES ('item1', 10)") + .await + .unwrap(); + + server + .exec("UPDATE inventory SET stock = stock - 1 WHERE id = 'item1'") + .await + .unwrap(); + + let rows = server + .query_text("SELECT stock FROM inventory WHERE id = 'item1'") + .await + .unwrap(); + assert_eq!(rows.len(), 1); + assert!( + rows[0].contains("\"stock\":9"), + "expected stock=9, got {:?}", + rows[0] + ); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 4)] +async fn update_column_increment_schemaless() { + let server = TestServer::start().await; + + server.exec("CREATE COLLECTION counters").await.unwrap(); + server + .exec("INSERT INTO counters (id, n) VALUES ('a', 1)") + .await + .unwrap(); + + server + .exec("UPDATE counters SET n = n + 1 WHERE id = 'a'") + .await + .unwrap(); + + let rows = server + .query_text("SELECT n FROM counters WHERE id = 'a'") + .await + .unwrap(); + assert_eq!(rows.len(), 1); + + // Regression guard: the previous failure mode wrote a `format!("{expr:?}")` + // debug string ("BinaryOp { left: Column { .. }, ... }") into the column + // instead of evaluating the expression. + assert!( + !rows[0].contains("BinaryOp") && !rows[0].contains("Literal"), + "schemaless UPDATE stored stringified AST instead of evaluating: {:?}", + rows[0] + ); + assert!( + rows[0].contains("\"n\":2"), + "expected n=2, got {:?}", + rows[0] + ); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 4)] +async fn update_now_function_rhs_strict() { + let server = TestServer::start().await; + + server + .exec( + "CREATE COLLECTION rows TYPE DOCUMENT STRICT (\ + id STRING PRIMARY KEY, \ + updated_at TIMESTAMP)", + ) + .await + .unwrap(); + server + .exec("INSERT INTO rows (id, updated_at) VALUES ('r1', '2020-01-01T00:00:00Z')") + .await + .unwrap(); + + server + .exec("UPDATE rows SET updated_at = NOW() WHERE id = 'r1'") + .await + .unwrap(); + + let rows = server + .query_text("SELECT updated_at FROM rows WHERE id = 'r1'") + .await + .unwrap(); + assert_eq!(rows.len(), 1); + assert!( + !rows[0].contains("Function") && !rows[0].contains("Identifier"), + "updated_at stored stringified AST: {:?}", + rows[0] + ); + assert_ne!(rows[0], "2020-01-01T00:00:00Z", "NOW() was not evaluated"); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 4)] +async fn update_upper_function_rhs_strict() { + let server = TestServer::start().await; + + server + .exec( + "CREATE COLLECTION users TYPE DOCUMENT STRICT (\ + id STRING PRIMARY KEY, \ + name STRING NOT NULL)", + ) + .await + .unwrap(); + server + .exec("INSERT INTO users (id, name) VALUES ('u1', 'alice')") + .await + .unwrap(); + + server + .exec("UPDATE users SET name = UPPER(name) WHERE id = 'u1'") + .await + .unwrap(); + + let rows = server + .query_text("SELECT name FROM users WHERE id = 'u1'") + .await + .unwrap(); + assert_eq!(rows.len(), 1); + assert!( + rows[0].contains("\"name\":\"ALICE\""), + "expected name=ALICE, got {:?}", + rows[0] + ); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 4)] +async fn update_string_concat_rhs_strict() { + let server = TestServer::start().await; + + server + .exec( + "CREATE COLLECTION users TYPE DOCUMENT STRICT (\ + id STRING PRIMARY KEY, \ + first STRING NOT NULL, \ + last STRING NOT NULL, \ + full STRING)", + ) + .await + .unwrap(); + server + .exec("INSERT INTO users (id, first, last) VALUES ('u1', 'Ada', 'Lovelace')") + .await + .unwrap(); + + server + .exec("UPDATE users SET full = first || ' ' || last WHERE id = 'u1'") + .await + .unwrap(); + + let rows = server + .query_text("SELECT full FROM users WHERE id = 'u1'") + .await + .unwrap(); + assert_eq!(rows.len(), 1); + assert!( + rows[0].contains("\"full\":\"Ada Lovelace\""), + "expected full='Ada Lovelace', got {:?}", + rows[0] + ); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 4)] +async fn update_case_expression_rhs_strict() { + let server = TestServer::start().await; + + server + .exec( + "CREATE COLLECTION players TYPE DOCUMENT STRICT (\ + id STRING PRIMARY KEY, \ + score INT NOT NULL, \ + tier STRING)", + ) + .await + .unwrap(); + server + .exec("INSERT INTO players (id, score) VALUES ('p1', 95)") + .await + .unwrap(); + server + .exec("INSERT INTO players (id, score) VALUES ('p2', 60)") + .await + .unwrap(); + + server + .exec("UPDATE players SET tier = CASE WHEN score > 90 THEN 'gold' ELSE 'silver' END") + .await + .unwrap(); + + let gold = server + .query_text("SELECT id FROM players WHERE tier = 'gold'") + .await + .unwrap(); + assert_eq!(gold.len(), 1); + assert!(gold[0].contains("p1")); + + let silver = server + .query_text("SELECT id FROM players WHERE tier = 'silver'") + .await + .unwrap(); + assert_eq!(silver.len(), 1); + assert!(silver[0].contains("p2")); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 4)] +async fn upsert_do_update_with_column_arithmetic() { + let server = TestServer::start().await; + + server + .exec( + "CREATE COLLECTION counters TYPE DOCUMENT STRICT (\ + id STRING PRIMARY KEY, \ + n INT NOT NULL)", + ) + .await + .unwrap(); + server + .exec("INSERT INTO counters (id, n) VALUES ('a', 5)") + .await + .unwrap(); + + // ON CONFLICT DO UPDATE with column arithmetic on the existing row. + // Hits the same expression-evaluator path as plain UPDATE. + server + .exec( + "INSERT INTO counters (id, n) VALUES ('a', 999) \ + ON CONFLICT (id) DO UPDATE SET n = n + 10", + ) + .await + .unwrap(); + + let rows = server + .query_text("SELECT n FROM counters WHERE id = 'a'") + .await + .unwrap(); + assert_eq!(rows.len(), 1); + assert!( + rows[0].contains("\"n\":15"), + "expected n=15 (5+10 via UPSERT DO UPDATE), got {:?}", + rows[0] + ); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 4)] +async fn update_scalar_function_rhs_strict() { + let server = TestServer::start().await; + + server + .exec( + "CREATE COLLECTION scores TYPE DOCUMENT STRICT (\ + id STRING PRIMARY KEY, \ + confidence DOUBLE)", + ) + .await + .unwrap(); + server + .exec("INSERT INTO scores (id, confidence) VALUES ('s1', 0.8)") + .await + .unwrap(); + + server + .exec("UPDATE scores SET confidence = LEAST(confidence + 0.05, 1.0) WHERE id = 's1'") + .await + .unwrap(); + + let rows = server + .query_text("SELECT confidence FROM scores WHERE id = 's1'") + .await + .unwrap(); + assert_eq!(rows.len(), 1); + assert!( + rows[0].contains("\"confidence\":0.85"), + "expected confidence=0.85, got {:?}", + rows[0] + ); +} diff --git a/nodedb/tests/sql_where_expressions.rs b/nodedb/tests/sql_where_expressions.rs new file mode 100644 index 00000000..a575129f --- /dev/null +++ b/nodedb/tests/sql_where_expressions.rs @@ -0,0 +1,270 @@ +//! Integration coverage for non-trivial WHERE-clause expressions. +//! +//! Scalar functions (`LOWER`, `UPPER`, `LENGTH`, arithmetic, ...) on the +//! left-hand side of a WHERE comparison must be evaluated by the scan +//! filter — not silently dropped through a match-all fall-through. + +mod common; + +use common::pgwire_harness::TestServer; + +#[tokio::test(flavor = "multi_thread", worker_threads = 4)] +async fn lower_function_in_where_matches_row() { + let server = TestServer::start().await; + + server + .exec( + "CREATE COLLECTION entities TYPE DOCUMENT STRICT (\ + id STRING PRIMARY KEY, \ + canonical_name STRING NOT NULL)", + ) + .await + .unwrap(); + server + .exec("INSERT INTO entities (id, canonical_name) VALUES ('ent_1', 'JavaScript')") + .await + .unwrap(); + + // Baseline: direct equality resolves through the normal filter path. + let direct = server + .query_text("SELECT id FROM entities WHERE canonical_name = 'JavaScript'") + .await + .unwrap(); + assert_eq!(direct.len(), 1); + + // Scalar function on the LHS must be evaluated per-row, not dropped. + let lowered = server + .query_text("SELECT id FROM entities WHERE LOWER(canonical_name) = 'javascript'") + .await + .unwrap(); + assert_eq!( + lowered.len(), + 1, + "LOWER(canonical_name) = 'javascript' should match the inserted row" + ); + assert!(lowered[0].contains("ent_1")); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 4)] +async fn lower_function_in_where_excludes_nonmatching_row() { + let server = TestServer::start().await; + + server + .exec( + "CREATE COLLECTION entities TYPE DOCUMENT STRICT (\ + id STRING PRIMARY KEY, \ + canonical_name STRING NOT NULL)", + ) + .await + .unwrap(); + server + .exec("INSERT INTO entities (id, canonical_name) VALUES ('ent_1', 'JavaScript')") + .await + .unwrap(); + server + .exec("INSERT INTO entities (id, canonical_name) VALUES ('ent_2', 'Python')") + .await + .unwrap(); + + let rows = server + .query_text("SELECT id FROM entities WHERE LOWER(canonical_name) = 'python'") + .await + .unwrap(); + assert_eq!(rows.len(), 1); + assert!(rows[0].contains("ent_2")); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 4)] +async fn delete_with_scalar_function_in_where_is_scoped() { + let server = TestServer::start().await; + + server + .exec( + "CREATE COLLECTION entities TYPE DOCUMENT STRICT (\ + id STRING PRIMARY KEY, \ + canonical_name STRING NOT NULL)", + ) + .await + .unwrap(); + server + .exec("INSERT INTO entities (id, canonical_name) VALUES ('ent_1', 'JavaScript')") + .await + .unwrap(); + server + .exec("INSERT INTO entities (id, canonical_name) VALUES ('ent_2', 'Python')") + .await + .unwrap(); + + // If the filter falls through to match-all, this wipes the table. + server + .exec("DELETE FROM entities WHERE LOWER(canonical_name) = 'javascript'") + .await + .unwrap(); + + let rows = server.query_text("SELECT id FROM entities").await.unwrap(); + assert_eq!(rows.len(), 1, "DELETE should only remove the matching row"); + assert!(rows[0].contains("ent_2")); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 4)] +async fn update_with_scalar_function_in_where_is_scoped() { + let server = TestServer::start().await; + + server + .exec( + "CREATE COLLECTION entities TYPE DOCUMENT STRICT (\ + id STRING PRIMARY KEY, \ + canonical_name STRING NOT NULL, \ + status STRING)", + ) + .await + .unwrap(); + server + .exec("INSERT INTO entities (id, canonical_name, status) VALUES ('ent_1', 'JavaScript', 'new')") + .await + .unwrap(); + server + .exec("INSERT INTO entities (id, canonical_name, status) VALUES ('ent_2', 'Python', 'new')") + .await + .unwrap(); + + server + .exec("UPDATE entities SET status = 'seen' WHERE LOWER(canonical_name) = 'javascript'") + .await + .unwrap(); + + let touched = server + .query_text("SELECT id FROM entities WHERE status = 'seen'") + .await + .unwrap(); + assert_eq!( + touched.len(), + 1, + "UPDATE should only affect the matching row — match-all would touch both" + ); + assert!(touched[0].contains("ent_1")); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 4)] +async fn between_with_non_literal_bounds_is_evaluated() { + let server = TestServer::start().await; + + server + .exec( + "CREATE COLLECTION items TYPE DOCUMENT STRICT (\ + id STRING PRIMARY KEY, \ + qty INT NOT NULL, \ + lo INT NOT NULL, \ + hi INT NOT NULL)", + ) + .await + .unwrap(); + server + .exec("INSERT INTO items (id, qty, lo, hi) VALUES ('i1', 5, 1, 10)") + .await + .unwrap(); + server + .exec("INSERT INTO items (id, qty, lo, hi) VALUES ('i2', 50, 1, 10)") + .await + .unwrap(); + + let rows = server + .query_text("SELECT id FROM items WHERE qty BETWEEN lo AND hi") + .await + .unwrap(); + assert_eq!(rows.len(), 1); + assert!(rows[0].contains("i1")); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 4)] +async fn not_expression_in_where_is_evaluated() { + let server = TestServer::start().await; + + server + .exec( + "CREATE COLLECTION items TYPE DOCUMENT STRICT (\ + id STRING PRIMARY KEY, \ + status STRING NOT NULL)", + ) + .await + .unwrap(); + server + .exec("INSERT INTO items (id, status) VALUES ('a', 'open')") + .await + .unwrap(); + server + .exec("INSERT INTO items (id, status) VALUES ('b', 'closed')") + .await + .unwrap(); + + let rows = server + .query_text("SELECT id FROM items WHERE NOT (status = 'closed')") + .await + .unwrap(); + assert_eq!(rows.len(), 1); + assert!(rows[0].contains("a")); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 4)] +async fn in_list_with_expression_element_is_evaluated() { + let server = TestServer::start().await; + + server + .exec( + "CREATE COLLECTION items TYPE DOCUMENT STRICT (\ + id STRING PRIMARY KEY, \ + qty INT NOT NULL)", + ) + .await + .unwrap(); + server + .exec("INSERT INTO items (id, qty) VALUES ('a', 3)") + .await + .unwrap(); + server + .exec("INSERT INTO items (id, qty) VALUES ('b', 7)") + .await + .unwrap(); + server + .exec("INSERT INTO items (id, qty) VALUES ('c', 9)") + .await + .unwrap(); + + // `2 + 1` must be evaluated; today it is silently filtered out of the IN set. + let rows = server + .query_text("SELECT id FROM items WHERE qty IN (2 + 1, 7)") + .await + .unwrap(); + assert_eq!(rows.len(), 2, "IN (2+1, 7) should match qty=3 and qty=7"); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 4)] +async fn column_arithmetic_in_where_is_evaluated() { + let server = TestServer::start().await; + + server + .exec( + "CREATE COLLECTION items TYPE DOCUMENT STRICT (\ + id STRING PRIMARY KEY, \ + qty INT NOT NULL)", + ) + .await + .unwrap(); + server + .exec("INSERT INTO items (id, qty) VALUES ('i1', 4)") + .await + .unwrap(); + server + .exec("INSERT INTO items (id, qty) VALUES ('i2', 9)") + .await + .unwrap(); + + // `qty + 1 = 5` — arithmetic on a column reference must not fall + // through to match-all. + let rows = server + .query_text("SELECT id FROM items WHERE qty + 1 = 5") + .await + .unwrap(); + assert_eq!(rows.len(), 1); + assert!(rows[0].contains("i1")); +}