From 67a675469c455e5ec29b78cecb5548dca1161832 Mon Sep 17 00:00:00 2001 From: bplatz Date: Sat, 13 Jun 2026 11:00:35 -0400 Subject: [PATCH 01/25] feat(core): inline xsd:decimal encoding primitives Add the wire contract and packing primitives for storing small, exact xsd:decimal values inline in the (OType, ObjKey) pair instead of routing every decimal to the per-(graph, predicate) NumBig arena. - OType::XSD_DECIMAL_INLINE (0x0020) in the reserved embedded range, routed to a new DecodeKind::Decimal. Old binaries route unknown embedded payloads to Sentinel, never to the lossy f64 decimal lane, so the byte layout is not a misdecode hazard on its own. - ObjKind::NUM_DEC (0x15) as the late-materialized binding kind. - ObjKey::encode_decimal / decode_decimal: canonical equality-keyed packing [sign:1 | scale:6 | mantissa:57]. Trailing fractional zeros stripped, integer-valued decimals folded to scale 0, zero canonical, so equal values encode to identical bits. Returns None (arena fallback) when scale > 63 or |mantissa| >= 2^57. Equality-keyed, not order-preserving: this kind is excluded from o_key-order range pushdown. Nothing reads or writes the new encoding yet; resolver, query-constant, decode, and root-version wiring follow. --- fluree-db-core/src/o_type.rs | 15 ++- fluree-db-core/src/value_id.rs | 204 +++++++++++++++++++++++++++++++++ 2 files changed, 218 insertions(+), 1 deletion(-) diff --git a/fluree-db-core/src/o_type.rs b/fluree-db-core/src/o_type.rs index bbbb40a14f..f77fef60e4 100644 --- a/fluree-db-core/src/o_type.rs +++ b/fluree-db-core/src/o_type.rs @@ -125,7 +125,14 @@ impl OType { /// Blank node (`_:b{id}`) — `o_key` is the atomic bnode integer. pub const BLANK_NODE: Self = Self(0x001F); - // Tag `00` payload range 0x0020–0x3FFF reserved for future embedded types. + /// `xsd:decimal` stored **inline** as an exact packed `(mantissa, scale)` + /// (see [`ObjKey::encode_decimal`]). Distinct from the lossy f64 + /// [`XSD_DECIMAL`](Self::XSD_DECIMAL) lane: this carries the exact value with + /// no arena handle. Only written by new-format index roots; large/high-precision + /// decimals still fall back to the NumBig arena ([`NUM_BIG_OVERFLOW`](Self::NUM_BIG_OVERFLOW)). + pub const XSD_DECIMAL_INLINE: Self = Self(0x0020); + + // Tag `00` payload range 0x0021–0x3FFF reserved for future embedded types. // ── Tag `10` — Fluree-reserved dictionary/arena-backed ───────────── @@ -341,6 +348,7 @@ impl OType { 0x001D => DecodeKind::Duration, 0x001E => DecodeKind::GeoPoint, 0x001F => DecodeKind::BlankNode, + 0x0020 => DecodeKind::Decimal, _ => DecodeKind::Sentinel, // future embedded types } } @@ -414,6 +422,9 @@ pub enum DecodeKind { NumBigArena, /// Spatial arena handle (per-predicate). SpatialArena, + /// Exact inline `xsd:decimal` — o_key is a packed `(sign, scale, mantissa)` + /// (see [`super::value_id::ObjKey::decode_decimal`]). Not arena-backed. + Decimal, } impl DecodeKind { @@ -444,6 +455,7 @@ impl DecodeKind { 21 => Some(Self::VectorArena), 22 => Some(Self::NumBigArena), 23 => Some(Self::SpatialArena), + 24 => Some(Self::Decimal), _ => None, } } @@ -486,6 +498,7 @@ impl fmt::Debug for OType { 0x001D => write!(f, "OType::XSD_DURATION"), 0x001E => write!(f, "OType::GEO_POINT"), 0x001F => write!(f, "OType::BLANK_NODE"), + 0x0020 => write!(f, "OType::XSD_DECIMAL_INLINE"), 0x8000 => write!(f, "OType::XSD_STRING"), 0x8001 => write!(f, "OType::XSD_ANY_URI"), 0x8002 => write!(f, "OType::XSD_NORMALIZED_STRING"), diff --git a/fluree-db-core/src/value_id.rs b/fluree-db-core/src/value_id.rs index 38b8e0d484..e1caa6c00c 100644 --- a/fluree-db-core/src/value_id.rs +++ b/fluree-db-core/src/value_id.rs @@ -116,6 +116,13 @@ impl ObjKind { /// Precision: approximately 0.3mm at the equator. pub const GEO_POINT: Self = Self(0x14); + /// Exact inline `xsd:decimal` — `o_key` is a packed `(sign, scale, mantissa)` + /// (see [`ObjKey::encode_decimal`]). Equality-keyed: the payload is canonical + /// but is **not** value-ordered, so this kind must be excluded from any + /// `o_key`-order range pushdown. Distinct from [`NUM_BIG`](Self::NUM_BIG) + /// (arena handle) — inline decimals carry the exact value with no arena. + pub const NUM_DEC: Self = Self(0x15); + /// Get the raw `u8` discriminant. #[inline] pub const fn as_u8(self) -> u8 { @@ -159,6 +166,7 @@ impl fmt::Debug for ObjKind { 0x12 => write!(f, "ObjKind::YearMonthDur"), 0x13 => write!(f, "ObjKind::DayTimeDur"), 0x14 => write!(f, "ObjKind::GeoPoint"), + 0x15 => write!(f, "ObjKind::NumDec"), 0xFF => write!(f, "ObjKind::Max"), n => write!(f, "ObjKind({n:#04x})"), } @@ -189,6 +197,25 @@ const SIGN_FLIP: u64 = 1u64 << 63; /// Sign bit mask for f64 bits. const F64_SIGN_BIT: u64 = 1u64 << 63; +// ---- Inline decimal (NumDec) packing layout: [sign:1 | scale:6 | mantissa:57] ---- + +/// Number of bits the inline-decimal mantissa magnitude occupies (low bits). +const DEC_MANTISSA_BITS: u64 = 57; +/// Mask selecting the 57-bit mantissa magnitude. +const DEC_MANTISSA_MASK: u64 = (1u64 << DEC_MANTISSA_BITS) - 1; +/// Bit offset of the 6-bit scale field. +const DEC_SCALE_SHIFT: u64 = DEC_MANTISSA_BITS; +/// Mask (pre-shift) selecting the 6-bit scale field. +const DEC_SCALE_MASK: u64 = (1u64 << 6) - 1; +/// Maximum representable scale (number of fractional digits). +const DEC_MAX_SCALE: u64 = DEC_SCALE_MASK; // 63 +/// Bit offset of the sign bit (high bit). +const DEC_SIGN_SHIFT: u64 = 63; +/// Largest `|negative scale|` we will fold back into the mantissa before giving +/// up. 10^19 already exceeds the 57-bit mantissa budget, so anything beyond this +/// cannot fit inline regardless. +const DEC_FOLD_EXP_LIMIT: i64 = 19; + /// Error returned when a value cannot be stored in the index. #[derive(Debug, Clone, PartialEq)] pub enum ObjKeyError { @@ -287,6 +314,81 @@ impl ObjKey { f64::from_bits(bits) } + // ---- Inline decimal encoding (NumDec) ---- + // + // Exact packed `xsd:decimal`: a canonical `(sign, scale, mantissa)` laid out + // as `[ sign:1 | scale:6 | mantissa:57 ]` in the 64-bit key. The packing is + // *equality-keyed*, not order-preserving — equal values produce identical + // bits, but raw `u64` ordering is NOT numeric ordering, so this kind must be + // excluded from any `o_key`-order range pushdown. + // + // A value is inline-eligible iff, after canonicalization (trailing fractional + // zeros stripped, integer-valued decimals folded to scale 0, zero canonical), + // it has `0 <= scale <= 63` and `|mantissa| < 2^57`. Anything else returns + // `None` and falls back to the NumBig arena, exactly like overflow integers. + + /// Encode a canonical `xsd:decimal` inline, or `None` if it does not fit. + /// + /// Canonicalization guarantees that two numerically-equal decimals (e.g. + /// `1.50` and `1.5`, or `1.00` and `1`) encode to identical bits, so the + /// packed key is a stable fact identity. + pub fn encode_decimal(value: &bigdecimal::BigDecimal) -> Option { + use num_bigint::Sign; + use num_traits::{ToPrimitive, Zero}; + + // Strip trailing fractional zeros; canonicalizes 1.50 -> 1.5, 1.00 -> 1E2 etc. + let normalized = value.normalized(); + let (mut mantissa, mut scale) = normalized.as_bigint_and_exponent(); + + if mantissa.is_zero() { + // Canonical zero: sign 0, scale 0, mantissa 0 (so 0, 0.0, -0.00 all match). + return Some(Self(0)); + } + + // A negative scale means the normalized form pushed trailing zeros into the + // exponent (e.g. 100 -> mantissa 1, scale -2). Fold them back into the + // mantissa at scale 0. Bound the exponent first: 10^19 already exceeds the + // 57-bit mantissa budget, so anything past that can never fit inline. + if scale < 0 { + if scale < -DEC_FOLD_EXP_LIMIT { + return None; + } + mantissa *= num_bigint::BigInt::from(10).pow((-scale) as u32); + scale = 0; + } + + if scale > DEC_MAX_SCALE as i64 { + return None; + } + + let (sign, magnitude) = mantissa.into_parts(); + // |mantissa| must fit in 57 bits. + if magnitude.bits() > DEC_MANTISSA_BITS { + return None; + } + let magnitude = magnitude.to_u64()?; // always Some given the bit check above + + let sign_bit = u64::from(sign == Sign::Minus); + let key = (sign_bit << DEC_SIGN_SHIFT) + | ((scale as u64) << DEC_SCALE_SHIFT) + | magnitude; + Some(Self(key)) + } + + /// Decode an inline `xsd:decimal` previously produced by [`encode_decimal`]. + /// + /// [`encode_decimal`]: Self::encode_decimal + pub fn decode_decimal(self) -> bigdecimal::BigDecimal { + let magnitude = self.0 & DEC_MANTISSA_MASK; + let scale = ((self.0 >> DEC_SCALE_SHIFT) & DEC_SCALE_MASK) as i64; + let negative = (self.0 >> DEC_SIGN_SHIFT) & 1 == 1; + let mut mantissa = num_bigint::BigInt::from(magnitude); + if negative { + mantissa = -mantissa; + } + bigdecimal::BigDecimal::from_bigint(mantissa, scale) + } + // ---- Boolean encoding ---- /// Encode a boolean (false = 0, true = 1). @@ -1631,4 +1733,106 @@ mod tests { assert!(!dt.is_float_type(), "{dt} should not be float type"); } } + + // ---- Inline decimal (NumDec) encode/decode ---- + + fn bd(s: &str) -> bigdecimal::BigDecimal { + s.parse().unwrap() + } + + /// Round-trip an inline-eligible decimal: encode must succeed and decode back + /// to the numerically-equal value. + fn assert_decimal_roundtrip(s: &str) { + let v = bd(s); + let key = ObjKey::encode_decimal(&v) + .unwrap_or_else(|| panic!("{s} should be inline-eligible")); + let back = key.decode_decimal(); + assert_eq!(back, v, "round-trip mismatch for {s}: got {back}"); + } + + #[test] + fn decimal_roundtrip_common_values() { + for s in [ + "0", "1", "-1", "19.99", "-19.99", "0.01", "-0.01", "3.14159", "100", + "1000000.5", "-1000000.5", "0.0000001", "12345678901234.56", + ] { + assert_decimal_roundtrip(s); + } + } + + #[test] + fn decimal_zero_is_canonical() { + // All spellings of zero encode to the same key. + let keys: Vec<_> = ["0", "0.0", "-0", "-0.00", "0.000000"] + .iter() + .map(|s| ObjKey::encode_decimal(&bd(s)).unwrap()) + .collect(); + for k in &keys { + assert_eq!(*k, keys[0], "zero spellings must share one key"); + } + assert_eq!(keys[0].decode_decimal(), bd("0")); + } + + #[test] + fn decimal_scale_variants_share_key() { + // 1.50 and 1.5 are the same value -> identical key (equality identity). + assert_eq!( + ObjKey::encode_decimal(&bd("1.50")).unwrap(), + ObjKey::encode_decimal(&bd("1.5")).unwrap(), + ); + // 1.00 and 1 fold to scale 0 -> identical key. + assert_eq!( + ObjKey::encode_decimal(&bd("1.00")).unwrap(), + ObjKey::encode_decimal(&bd("1")).unwrap(), + ); + } + + #[test] + fn decimal_integer_valued_folds_to_scale_zero() { + // Trailing-zero integers (normalized form has negative exponent) fold back. + for s in ["100", "1000", "100000000", "-100"] { + assert_decimal_roundtrip(s); + } + } + + #[test] + fn decimal_max_scale_boundary() { + // 63 fractional digits is the max scale that fits. + let s = format!("0.{}1", "0".repeat(62)); // scale 63 + let v = bd(&s); + let key = ObjKey::encode_decimal(&v).expect("scale 63 should fit"); + assert_eq!(key.decode_decimal(), v); + + // scale 64 does not fit. + let s_over = format!("0.{}1", "0".repeat(63)); // scale 64 + assert!(ObjKey::encode_decimal(&bd(&s_over)).is_none()); + } + + #[test] + fn decimal_mantissa_boundary() { + // |mantissa| just below 2^57 fits; at/above 2^57 does not. + let max_mantissa = (1i128 << 57) - 1; + let fits = bd(&max_mantissa.to_string()); + assert!(ObjKey::encode_decimal(&fits).is_some()); + assert_eq!( + ObjKey::encode_decimal(&fits).unwrap().decode_decimal(), + fits + ); + + let over = bd(&(1i128 << 57).to_string()); + assert!(ObjKey::encode_decimal(&over).is_none()); + + // A fractional value whose mantissa overflows also falls back. + let frac_over = bd("1444115188.07585588"); // mantissa 144411518807585588 > 2^57 + assert!(ObjKey::encode_decimal(&frac_over).is_none()); + } + + #[test] + fn decimal_sign_distinguished() { + let pos = ObjKey::encode_decimal(&bd("19.99")).unwrap(); + let neg = ObjKey::encode_decimal(&bd("-19.99")).unwrap(); + assert_ne!(pos, neg); + assert_eq!(pos.decode_decimal(), bd("19.99")); + assert_eq!(neg.decode_decimal(), bd("-19.99")); + } } From 751ea0abbf19f88a3f8281ef6f667a77db99a5cf Mon Sep 17 00:00:00 2001 From: bplatz Date: Sat, 13 Jun 2026 11:09:59 -0400 Subject: [PATCH 02/25] fix(binary-index): wire inline decimal decode in decode_value_v3 Adding DecodeKind::Decimal broke exhaustiveness in decode_value_v3, the on-disk (OType, ObjKey) -> FlakeValue decode. Decode is meant to be unconditional (never policy-gated), so wire it now: NUM_DEC keys decode via ObjKey::decode_decimal into FlakeValue::Decimal. Also document the NUM_DEC carve-out in the ObjKey module docs: inline decimals are equality-keyed, not value-ordered, so raw u64 ordering of the packed payload is not numeric ordering and range pushdown must exclude this kind. Other read sites still route the kind through wildcard arms; they are unreachable until the write path emits inline decimals and will be wired in the decode slice. --- fluree-db-binary-index/src/read/binary_index_store.rs | 1 + fluree-db-core/src/value_id.rs | 7 +++++++ 2 files changed, 8 insertions(+) diff --git a/fluree-db-binary-index/src/read/binary_index_store.rs b/fluree-db-binary-index/src/read/binary_index_store.rs index 347717983c..5f1eca98fb 100644 --- a/fluree-db-binary-index/src/read/binary_index_store.rs +++ b/fluree-db-binary-index/src/read/binary_index_store.rs @@ -789,6 +789,7 @@ impl BinaryIndexStore { DecodeKind::Bool => Ok(FlakeValue::Boolean(o_key != 0)), DecodeKind::I64 => Ok(FlakeValue::Long(key.decode_i64())), DecodeKind::F64 => Ok(FlakeValue::Double(key.decode_f64())), + DecodeKind::Decimal => Ok(FlakeValue::Decimal(Box::new(key.decode_decimal()))), DecodeKind::Date => { let days = key.decode_date(); let date = chrono::NaiveDate::from_num_days_from_ce_opt(days + 719_163).unwrap_or( diff --git a/fluree-db-core/src/value_id.rs b/fluree-db-core/src/value_id.rs index e1caa6c00c..34941934ab 100644 --- a/fluree-db-core/src/value_id.rs +++ b/fluree-db-core/src/value_id.rs @@ -15,6 +15,13 @@ //! `NumInt(3)` vs `NumF64(3.0)`) is a query-layer concern resolved via //! multi-scan merge, not an index property. //! +//! **Exception — [`ObjKind::NUM_DEC`]:** inline `xsd:decimal` keys are +//! *equality-keyed*, not value-ordered. Equal values encode to identical bits +//! (so equality, dedup, and joins are correct), but raw `u64` ordering of the +//! packed `(sign, scale, mantissa)` is NOT numeric ordering. Range/`FILTER` +//! pushdown that relies on `o_key` order must therefore exclude this kind. See +//! [`ObjKey::encode_decimal`]. +//! //! [`ValueTypeTag`] is a compact `u8` identifier for XSD/RDF datatypes, used as //! a tie-breaker in index sort keys so that values with the same `(ObjKind, //! ObjKey)` but different types (e.g., `xsd:integer 3` vs `xsd:long 3`) From 76ee0ed6b86bad513b215a149b2c6c88a5d179e6 Mon Sep 17 00:00:00 2001 From: bplatz Date: Sat, 13 Jun 2026 11:46:33 -0400 Subject: [PATCH 03/25] feat(core,index): decimal encoding policy derived from root version Introduce DecimalEncoding { ArenaOnly, InlineWhenFits } as the encode-time policy for xsd:decimal, and make the FIR6 root version its carrier. - DecimalEncoding (core): sticky per-root policy. Default ArenaOnly. Decode is always capable of both schemes regardless of policy, so it governs writes only. - ROOT_V6_VERSION_INLINE_DECIMAL (3): the capability signal. A root that inlines decimals is written as v3; the layout is byte-identical to v2. Old binaries refuse a v3 root via the strict version check rather than misdecoding inline rows, giving the 'upgrade code first' safety property. - decimal_encoding_for_version / version_for_decimal_encoding: the two-way mapping. encode() derives the version byte from the policy; decode() accepts v2 and v3 and sets IndexRoot::decimal_encoding accordingly. - IndexRoot gains a sticky decimal_encoding field; all construction sites set ArenaOnly for now. Still inert: no write path emits InlineWhenFits, so every root is v2/arena-only and behavior is bit-for-bit unchanged. Tests cover the version<->policy round trip, byte-identical-except-version invariant, and unknown-version refusal. --- fluree-db-api/src/import.rs | 1 + .../src/format/index_root.rs | 100 +++++++++++++++++- fluree-db-core/src/lib.rs | 2 +- fluree-db-core/src/value_id.rs | 42 ++++++++ fluree-db-indexer/src/build/root_assembly.rs | 4 + fluree-db-indexer/src/drop.rs | 1 + fluree-db-indexer/src/gc/collector.rs | 1 + 7 files changed, 148 insertions(+), 3 deletions(-) diff --git a/fluree-db-api/src/import.rs b/fluree-db-api/src/import.rs index ac4aa24481..0c5703f13a 100644 --- a/fluree-db-api/src/import.rs +++ b/fluree-db-api/src/import.rs @@ -5820,6 +5820,7 @@ where garbage: None, sketch_ref: None, ns_split_mode: input.ns_split_mode, + decimal_encoding: fluree_db_core::DecimalEncoding::ArenaOnly, }; // Encode and upload FIR6 root. diff --git a/fluree-db-binary-index/src/format/index_root.rs b/fluree-db-binary-index/src/format/index_root.rs index 66b44b96ab..5eb71787e6 100644 --- a/fluree-db-binary-index/src/format/index_root.rs +++ b/fluree-db-binary-index/src/format/index_root.rs @@ -117,8 +117,40 @@ pub const ROOT_V6_MAGIC: &[u8; 4] = b"FIR6"; /// be keyed by `(g_id, p_id, lang_id)` for multi-language full-text indexing. /// Pre-v2 roots are refused outright — operators upgrading must run a full /// reindex before queries resume. +/// +/// Version 3 enables inline `xsd:decimal` encoding ([`DecimalEncoding`]): leaf +/// data may carry [`OType::XSD_DECIMAL_INLINE`] rows. The root layout is +/// byte-identical to v2; the version is the capability signal. Old binaries +/// refuse a v3 root outright (the strict version check below) rather than +/// misdecoding inline-decimal leaf rows, which is the required "upgrade code +/// first" safety property. pub const ROOT_V6_VERSION: u8 = 2; +/// Root format version that enables inline `xsd:decimal` encoding. Written only +/// by a full reindex/import under [`DecimalEncoding::InlineWhenFits`]; see +/// [`IndexRoot::decimal_encoding`]. +pub const ROOT_V6_VERSION_INLINE_DECIMAL: u8 = 3; + +/// Derive the decimal-encoding policy from a decoded root format version. +#[inline] +pub const fn decimal_encoding_for_version(version: u8) -> fluree_db_core::DecimalEncoding { + if version >= ROOT_V6_VERSION_INLINE_DECIMAL { + fluree_db_core::DecimalEncoding::InlineWhenFits + } else { + fluree_db_core::DecimalEncoding::ArenaOnly + } +} + +/// The root format version that must be written for a given decimal-encoding +/// policy. Inverse of [`decimal_encoding_for_version`]. +#[inline] +pub const fn version_for_decimal_encoding(enc: fluree_db_core::DecimalEncoding) -> u8 { + match enc { + fluree_db_core::DecimalEncoding::InlineWhenFits => ROOT_V6_VERSION_INLINE_DECIMAL, + fluree_db_core::DecimalEncoding::ArenaOnly => ROOT_V6_VERSION, + } +} + /// Binary index root (`FIR6`). /// /// Contains all sections needed to load an index: dict refs, arena refs, @@ -164,6 +196,12 @@ pub struct IndexRoot { /// this flag on the first post-import write. pub lex_sorted_string_ids: bool, + /// How this root encodes `xsd:decimal` values. Derived from the format + /// version on decode ([`decimal_encoding_for_version`]) and mapped back to + /// the version on encode ([`version_for_decimal_encoding`]). Sticky: + /// incremental writes preserve it; only a full reindex changes it. + pub decimal_encoding: fluree_db_core::DecimalEncoding, + // ── Cumulative commit stats ──────────────────────────────────── pub total_commit_size: u64, pub total_asserts: u64, @@ -481,6 +519,14 @@ impl IndexRoot { const FLAG_HAS_SKETCH: u8 = 1 << 4; const FLAG_LEX_SORTED_STRING_IDS: u8 = 1 << 5; + /// The decimal-encoding policy this root writes under. Equivalent to reading + /// [`decimal_encoding`](Self::decimal_encoding) directly; provided as the + /// stable accessor for encode-path callers. + #[inline] + pub fn decimal_encoding(&self) -> fluree_db_core::DecimalEncoding { + self.decimal_encoding + } + /// Encode to the binary FIR6 wire format. /// /// Determinism: namespaces sorted by ns_code, named graphs by g_id, @@ -490,7 +536,9 @@ impl IndexRoot { // ---- Header (24 bytes) ---- buf.extend_from_slice(ROOT_V6_MAGIC); - buf.push(ROOT_V6_VERSION); + // The version byte is the inline-decimal capability signal: a root that + // inlines decimals is written as v3 so old binaries refuse it. + buf.push(version_for_decimal_encoding(self.decimal_encoding)); let flags = (if self.stats.is_some() { Self::FLAG_HAS_STATS } else { @@ -683,9 +731,13 @@ impl IndexRoot { ))); } let version = data[4]; - if version != ROOT_V6_VERSION { + // Accept v2 (arena-only) and v3 (inline-decimal-capable); the layouts are + // byte-identical, the version only signals whether inline-decimal leaf + // rows may appear. Any other version is refused. + if version != ROOT_V6_VERSION && version != ROOT_V6_VERSION_INLINE_DECIMAL { return Err(io_err(&format!("root v6: unsupported version {version}"))); } + let decimal_encoding = decimal_encoding_for_version(version); let flags = data[5]; let mut pos = 8; // skip pad(2) @@ -898,6 +950,7 @@ impl IndexRoot { subject_watermarks, string_watermark, lex_sorted_string_ids, + decimal_encoding, total_commit_size, total_asserts, total_retracts, @@ -1186,6 +1239,7 @@ mod tests { garbage: None, sketch_ref: None, ns_split_mode: fluree_db_core::ns_encoding::NsSplitMode::default(), + decimal_encoding: fluree_db_core::DecimalEncoding::ArenaOnly, } } @@ -1199,6 +1253,10 @@ mod tests { assert_eq!(bytes[5], 0); // no optional sections let decoded = IndexRoot::decode(&bytes).unwrap(); + assert_eq!( + decoded.decimal_encoding(), + fluree_db_core::DecimalEncoding::ArenaOnly + ); assert_eq!(decoded.ledger_id, "test:main"); assert_eq!(decoded.index_t, 42); assert_eq!(decoded.base_t, 0); @@ -1211,6 +1269,44 @@ mod tests { assert!(decoded.stats.is_none()); } + #[test] + fn fir6_decimal_encoding_version_round_trip() { + use fluree_db_core::DecimalEncoding; + + // Arena-only root is written as v2 and decodes back to ArenaOnly. + let arena = minimal_root_v6(); + let arena_bytes = arena.encode(); + assert_eq!(arena_bytes[4], ROOT_V6_VERSION); + assert_eq!( + IndexRoot::decode(&arena_bytes).unwrap().decimal_encoding(), + DecimalEncoding::ArenaOnly + ); + + // Inline-decimal root is written as v3 (the capability signal) and + // decodes back to InlineWhenFits. + let mut inline = minimal_root_v6(); + inline.decimal_encoding = DecimalEncoding::InlineWhenFits; + let inline_bytes = inline.encode(); + assert_eq!(inline_bytes[4], ROOT_V6_VERSION_INLINE_DECIMAL); + assert_eq!( + IndexRoot::decode(&inline_bytes).unwrap().decimal_encoding(), + DecimalEncoding::InlineWhenFits + ); + + // The two roots are byte-identical except for the version byte: the v3 + // capability is purely a header signal, not a layout change. + assert_eq!(arena_bytes[0..4], inline_bytes[0..4]); // magic + assert_eq!(arena_bytes[5..], inline_bytes[5..]); // everything after version + } + + #[test] + fn fir6_unknown_version_refused() { + let mut bytes = minimal_root_v6().encode(); + bytes[4] = 99; // neither v2 nor v3 + let err = IndexRoot::decode(&bytes).unwrap_err(); + assert!(err.to_string().contains("unsupported version")); + } + #[test] fn fir6_round_trip_with_default_graph() { let mut root = minimal_root_v6(); diff --git a/fluree-db-core/src/lib.rs b/fluree-db-core/src/lib.rs index fb4baaf78b..859d8c5b58 100644 --- a/fluree-db-core/src/lib.rs +++ b/fluree-db-core/src/lib.rs @@ -194,7 +194,7 @@ pub use value::{ parse_decimal, parse_decimal_string, parse_double, parse_integer, parse_integer_string, FlakeValue, GeoPointBits, }; -pub use value_id::{ObjKey, ObjKeyError, ObjKind, ObjPair, ValueTypeTag}; +pub use value_id::{DecimalEncoding, ObjKey, ObjKeyError, ObjKind, ObjPair, ValueTypeTag}; /// Prelude module for convenient imports of storage traits and common types. /// diff --git a/fluree-db-core/src/value_id.rs b/fluree-db-core/src/value_id.rs index 34941934ab..63cf09047a 100644 --- a/fluree-db-core/src/value_id.rs +++ b/fluree-db-core/src/value_id.rs @@ -186,6 +186,48 @@ impl fmt::Display for ObjKind { } } +// ============================================================================ +// DecimalEncoding +// ============================================================================ + +/// How an index root encodes `xsd:decimal` object values. +/// +/// This is an **encode-time** policy derived from the active index root's +/// format version — it is never user-facing. Decode is always capable of both +/// schemes regardless of this policy, so new code reading any root is fully +/// backward-compatible. +/// +/// The policy is **sticky per root and preserved across incremental writes**: +/// extending an `ArenaOnly` root keeps writing decimals to the NumBig arena; +/// extending an `InlineWhenFits` root keeps inlining. Only a full reindex +/// changes the policy, because only a full reindex rewrites existing facts under +/// a new `(o_type, o_key)` identity. (Contrast `lex_sorted_string_ids`, which is +/// *cleared* on incremental writes; inline decimals are not broken by appends.) +/// +/// The hard invariant this protects: `(o_type, o_key)` is persisted fact +/// identity, so a single root must use **one** decimal encoding for all +/// inline-eligible values — never a mix — or a retract computed under one scheme +/// would miss an assert stored under the other. +#[derive(Copy, Clone, Eq, PartialEq, Debug, Default)] +pub enum DecimalEncoding { + /// All `xsd:decimal` values route to the per-`(graph, predicate)` NumBig + /// arena. The behavior of every pre-inline index root, bit-for-bit unchanged. + #[default] + ArenaOnly, + /// Inline-eligible decimals (see [`ObjKey::encode_decimal`]) encode inline + /// under [`ObjKind::NUM_DEC`]; values that do not fit fall back to the arena, + /// exactly like overflow integers. + InlineWhenFits, +} + +impl DecimalEncoding { + /// True if this policy may emit inline decimal keys on write. + #[inline] + pub const fn inlines(self) -> bool { + matches!(self, Self::InlineWhenFits) + } +} + // ============================================================================ // ObjKey // ============================================================================ diff --git a/fluree-db-indexer/src/build/root_assembly.rs b/fluree-db-indexer/src/build/root_assembly.rs index 68febc37ef..0cb62ae216 100644 --- a/fluree-db-indexer/src/build/root_assembly.rs +++ b/fluree-db-indexer/src/build/root_assembly.rs @@ -291,6 +291,10 @@ pub(crate) async fn encode_and_write_root_v6( prev_index: None, garbage: None, sketch_ref: inputs.sketch_ref, + // Sticky per root: extending an inline-decimal root keeps inlining, an + // arena-only root keeps the arena. Until the write path emits inline + // decimals, every root is arena-only. + decimal_encoding: fluree_db_core::DecimalEncoding::ArenaOnly, }; // `IndexStats.size` is defined as total commit data size (bytes) for the ledger. diff --git a/fluree-db-indexer/src/drop.rs b/fluree-db-indexer/src/drop.rs index e5f4ecc632..e2ebd5921c 100644 --- a/fluree-db-indexer/src/drop.rs +++ b/fluree-db-indexer/src/drop.rs @@ -263,6 +263,7 @@ mod tests { sketch_ref: None, o_type_table: IndexRoot::build_o_type_table(&[], &[]), ns_split_mode: fluree_db_core::ns_encoding::NsSplitMode::default(), + decimal_encoding: fluree_db_core::DecimalEncoding::ArenaOnly, }; root.encode() } diff --git a/fluree-db-indexer/src/gc/collector.rs b/fluree-db-indexer/src/gc/collector.rs index f2377654ab..764bc21ad6 100644 --- a/fluree-db-indexer/src/gc/collector.rs +++ b/fluree-db-indexer/src/gc/collector.rs @@ -412,6 +412,7 @@ mod tests { sketch_ref: None, o_type_table: IndexRoot::build_o_type_table(&[], &[]), ns_split_mode: fluree_db_core::ns_encoding::NsSplitMode::default(), + decimal_encoding: fluree_db_core::DecimalEncoding::ArenaOnly, }; root.encode() } From 7028e17cdff84ac2fb5e224c46464c57560f5c04 Mon Sep 17 00:00:00 2001 From: bplatz Date: Sat, 13 Jun 2026 12:09:48 -0400 Subject: [PATCH 04/25] feat(indexer): resolver emits inline decimals under InlineWhenFits Wire the decimal-encoding policy through the resolve path so the write side honors it. - OTypeRegistry maps ObjKind::NUM_DEC -> OType::XSD_DECIMAL_INLINE, the inline counterpart of NUM_BIG -> NUM_BIG_OVERFLOW. - CommitResolver and SharedResolverState carry a sticky decimal_encoding (default ArenaOnly) with a setter. The DecimalStr arms (both the commit and chunk resolvers) try ObjKey::encode_decimal first under InlineWhenFits and fall back to the NumBig arena when the value doesn't fit; under ArenaOnly they keep today's arena path. - SharedResolverState::from_index_root derives the policy from the base root being extended, so an incremental rebuild inherits the root's encoding and never mixes inline and arena under one (o_type, o_key) identity. Still inert at the system level: no writer sets InlineWhenFits and every root is built ArenaOnly, so production behavior is unchanged. Tested in isolation by setting the policy directly: small decimals -> NUM_DEC (decode round-trips), oversized -> NUM_BIG fallback, ArenaOnly -> NUM_BIG. --- fluree-db-core/src/o_type_registry.rs | 5 + .../src/run_index/resolve/resolver.rs | 126 +++++++++++++++--- 2 files changed, 114 insertions(+), 17 deletions(-) diff --git a/fluree-db-core/src/o_type_registry.rs b/fluree-db-core/src/o_type_registry.rs index e918987f1d..e0616cfbde 100644 --- a/fluree-db-core/src/o_type_registry.rs +++ b/fluree-db-core/src/o_type_registry.rs @@ -84,6 +84,7 @@ impl OTypeRegistry { ObjKind::VECTOR_ID => OType::VECTOR, ObjKind::JSON_ID => OType::RDF_JSON, ObjKind::NUM_BIG => OType::NUM_BIG_OVERFLOW, + ObjKind::NUM_DEC => OType::XSD_DECIMAL_INLINE, ObjKind::G_YEAR => OType::XSD_G_YEAR, ObjKind::G_YEAR_MONTH => OType::XSD_G_YEAR_MONTH, ObjKind::G_MONTH => OType::XSD_G_MONTH, @@ -308,6 +309,10 @@ mod tests { reg.resolve(ObjKind::NUM_BIG, DatatypeDictId::DECIMAL, 0), OType::NUM_BIG_OVERFLOW ); + assert_eq!( + reg.resolve(ObjKind::NUM_DEC, DatatypeDictId::DECIMAL, 0), + OType::XSD_DECIMAL_INLINE + ); assert_eq!( reg.resolve(ObjKind::GEO_POINT, DatatypeDictId::STRING, 0), OType::GEO_POINT diff --git a/fluree-db-indexer/src/run_index/resolve/resolver.rs b/fluree-db-indexer/src/run_index/resolve/resolver.rs index 2b0d638ac6..049c2e22a8 100644 --- a/fluree-db-indexer/src/run_index/resolve/resolver.rs +++ b/fluree-db-indexer/src/run_index/resolve/resolver.rs @@ -80,6 +80,10 @@ pub struct CommitResolver { /// be collected. Empty by default so the `@fulltext`-datatype path keeps /// working without any config setup. fulltext_hook_config: crate::fulltext_hook::FulltextHookConfig, + /// Decimal-encoding policy for this run, derived from the root being + /// extended. `ArenaOnly` by default so behavior is unchanged until a caller + /// opts a run into inline decimals. + decimal_encoding: fluree_db_core::DecimalEncoding, } impl CommitResolver { @@ -92,9 +96,15 @@ impl CommitResolver { spatial_hook: None, fulltext_hook: None, fulltext_hook_config: crate::fulltext_hook::FulltextHookConfig::default(), + decimal_encoding: fluree_db_core::DecimalEncoding::default(), } } + /// Set the decimal-encoding policy for this run (sticky per root). + pub fn set_decimal_encoding(&mut self, enc: fluree_db_core::DecimalEncoding) { + self.decimal_encoding = enc; + } + /// Set the ID-based stats hook for per-op stats collection. pub fn set_stats_hook(&mut self, hook: crate::stats::IdStatsHook) { self.stats_hook = Some(hook); @@ -951,17 +961,29 @@ impl CommitResolver { } } RawObject::DecimalStr(s) => { - // All typed xsd:decimal values route to NumBig by default match s.parse::() { Ok(bd) => { - let handle = dicts - .numbigs - .entry(g_id) - .or_default() - .entry(p_id) - .or_default() - .get_or_insert_bigdec(&bd); - Ok((ObjKind::NUM_BIG, ObjKey::encode_u32_id(handle))) + // Under InlineWhenFits, small/exact decimals encode inline; + // large/high-precision values fall back to the arena. Under + // ArenaOnly every decimal routes to the arena. + let inline = self + .decimal_encoding + .inlines() + .then(|| ObjKey::encode_decimal(&bd)) + .flatten(); + match inline { + Some(key) => Ok((ObjKind::NUM_DEC, key)), + None => { + let handle = dicts + .numbigs + .entry(g_id) + .or_default() + .entry(p_id) + .or_default() + .get_or_insert_bigdec(&bd); + Ok((ObjKind::NUM_BIG, ObjKey::encode_u32_id(handle))) + } + } } Err(_) => { // Cannot parse as BigDecimal -- store as string @@ -1115,6 +1137,10 @@ pub struct SharedResolverState { /// for every `rdfs:subClassOf` / `rdfs:subPropertyOf` user-data op so rebuild /// can populate `IndexSchema` in the FIR6 root. pub schema_hook: Option, + /// Decimal-encoding policy for this rebuild, derived from the root being + /// extended. `ArenaOnly` by default so behavior is unchanged until a caller + /// opts the rebuild into inline decimals. + pub decimal_encoding: fluree_db_core::DecimalEncoding, } impl SharedResolverState { @@ -1161,6 +1187,7 @@ impl SharedResolverState { fulltext_hook: None, fulltext_hook_config: crate::fulltext_hook::FulltextHookConfig::default(), schema_hook: None, + decimal_encoding: fluree_db_core::DecimalEncoding::default(), } } @@ -1288,6 +1315,9 @@ impl SharedResolverState { fulltext_hook: None, fulltext_hook_config: crate::fulltext_hook::FulltextHookConfig::default(), schema_hook: None, + // Sticky: an incremental rebuild inherits the base root's policy so + // it never mixes inline and arena encodings under one identity. + decimal_encoding: root.decimal_encoding(), }) } @@ -1736,14 +1766,24 @@ impl SharedResolverState { } RawObject::DecimalStr(s) => match s.parse::() { Ok(bd) => { - let handle = self - .numbigs - .entry(g_id) - .or_default() - .entry(p_id) - .or_default() - .get_or_insert_bigdec(&bd); - Ok((ObjKind::NUM_BIG, ObjKey::encode_u32_id(handle))) + let inline = self + .decimal_encoding + .inlines() + .then(|| ObjKey::encode_decimal(&bd)) + .flatten(); + match inline { + Some(key) => Ok((ObjKind::NUM_DEC, key)), + None => { + let handle = self + .numbigs + .entry(g_id) + .or_default() + .entry(p_id) + .or_default() + .get_or_insert_bigdec(&bd); + Ok((ObjKind::NUM_BIG, ObjKey::encode_u32_id(handle))) + } + } } Err(_) => { let id = chunk.strings.get_or_insert(s.as_bytes()); @@ -2406,6 +2446,58 @@ mod tests { assert_eq!(collector.records.len(), 3); } + #[test] + fn resolve_decimal_inline_vs_arena() { + use fluree_db_core::value_id::{ObjKey, ObjKind}; + use fluree_db_core::DecimalEncoding; + + // Resolve a single decimal-valued flake under the given policy and return + // the emitted record's (o_kind, o_key). + let resolve = |dec: &str, enc: DecimalEncoding| -> (u8, u64) { + let flake = Flake::new( + Sid::new(101, "Item"), + Sid::new(101, "price"), + FlakeValue::Decimal(Box::new(dec.parse().unwrap())), + Sid::new(2, "decimal"), + 1, + true, + None, + ); + let blob = build_test_blob(&[flake], 1); + let commit_ops = load_commit_ops(&blob).unwrap(); + let mut dicts = GlobalDicts::new_memory("test:main"); + let mut resolver = CommitResolver::new(); + resolver + .ns_prefixes + .insert(101, "http://example.org/".to_string()); + resolver.set_decimal_encoding(enc); + let mut collector = RecordCollector::new(); + resolver + .resolve_commit_ops(&commit_ops, &mut dicts, &mut collector) + .unwrap(); + assert_eq!(collector.records.len(), 1); + let r = &collector.records[0]; + (r.o_kind, r.o_key) + }; + + // ArenaOnly: every decimal routes to the NumBig arena (handle in o_key). + let (kind, _) = resolve("19.99", DecimalEncoding::ArenaOnly); + assert_eq!(kind, ObjKind::NUM_BIG.as_u8()); + + // InlineWhenFits: a small decimal encodes inline and decodes back exactly. + let (kind, key) = resolve("19.99", DecimalEncoding::InlineWhenFits); + assert_eq!(kind, ObjKind::NUM_DEC.as_u8()); + assert_eq!( + ObjKey::from_u64(key).decode_decimal(), + "19.99".parse::().unwrap() + ); + + // InlineWhenFits: a value too large to fit (mantissa >= 2^57) falls back + // to the arena, exactly like overflow integers. + let (kind, _) = resolve("144115188075855872", DecimalEncoding::InlineWhenFits); + assert_eq!(kind, ObjKind::NUM_BIG.as_u8()); + } + #[test] fn test_resolve_ref_and_dedup() { let flakes = vec![ From bac556041c9088949598cf03a88832962d9de74f Mon Sep 17 00:00:00 2001 From: bplatz Date: Sat, 13 Jun 2026 12:25:23 -0400 Subject: [PATCH 05/25] feat(query): encode decimal query constants under the root's policy Query constants must encode the same way as stored rows or equality lookups and prefilters miss. Thread the loaded root's decimal policy through constant encoding. - BinaryIndexStore mirrors the root's decimal_encoding (like lex_sorted_string_ids) and exposes it via decimal_encoding(). - value_to_otype_okey: under InlineWhenFits a fitting decimal constant encodes inline (XSD_DECIMAL_INLINE) to match the stored inline row; otherwise it falls back to the NumBig arena handle lookup, as before. - value_to_otype_okey_simple gains a Decimal arm (previously Unsupported): an inline-eligible decimal narrows the bound-object prefilter with no arena round-trip (issue #1328). Arena decimals stay Unsupported (un-narrowed scan, never NotFound) since this helper has no (graph, predicate) context. Inert under ArenaOnly: every loaded root is arena-only today, so decimal constants take the unchanged arena path. --- .../src/read/binary_index_store.rs | 12 ++++++++ fluree-db-query/src/binary_scan.rs | 30 ++++++++++++++++++- 2 files changed, 41 insertions(+), 1 deletion(-) diff --git a/fluree-db-binary-index/src/read/binary_index_store.rs b/fluree-db-binary-index/src/read/binary_index_store.rs index 5f1eca98fb..053072dd36 100644 --- a/fluree-db-binary-index/src/read/binary_index_store.rs +++ b/fluree-db-binary-index/src/read/binary_index_store.rs @@ -207,6 +207,9 @@ pub struct BinaryIndexStore { base_t: i64, language_tags: Vec, lex_sorted_string_ids: bool, + /// Decimal-encoding policy of the loaded root. Governs how query constants + /// encode so they match stored rows (inline vs NumBig arena). + decimal_encoding: fluree_db_core::DecimalEncoding, /// Ledger-fixed split mode for canonical IRI encoding. /// Set from the snapshot's `ns_split_mode` via `set_ns_split_mode()`. ns_split_mode: NsSplitMode, @@ -333,6 +336,7 @@ impl BinaryIndexStore { base_t: root.base_t, language_tags: root.language_tags.clone(), lex_sorted_string_ids: root.lex_sorted_string_ids, + decimal_encoding: root.decimal_encoding(), ns_split_mode: root.ns_split_mode, ns_split_mode_set: true, }) @@ -362,6 +366,13 @@ impl BinaryIndexStore { self.lex_sorted_string_ids } + /// The loaded root's decimal-encoding policy. Query constants must encode + /// under this policy so they match stored `(o_type, o_key)` rows. + #[inline] + pub fn decimal_encoding(&self) -> fluree_db_core::DecimalEncoding { + self.decimal_encoding + } + /// Get the branch manifest for a graph + sort order. pub fn branch_for_order( &self, @@ -2813,6 +2824,7 @@ mod tests { base_t: 0, language_tags: Vec::new(), lex_sorted_string_ids: false, + decimal_encoding: fluree_db_core::DecimalEncoding::ArenaOnly, ns_split_mode: NsSplitMode::default(), ns_split_mode_set: true, } diff --git a/fluree-db-query/src/binary_scan.rs b/fluree-db-query/src/binary_scan.rs index 875e95c29a..fe4c619266 100644 --- a/fluree-db-query/src/binary_scan.rs +++ b/fluree-db-query/src/binary_scan.rs @@ -2674,7 +2674,18 @@ fn value_to_otype_okey( } find_numbig_okey(val, store, numbig_ctx) } - FlakeValue::Decimal(_) => find_numbig_okey(val, store, numbig_ctx), + FlakeValue::Decimal(bd) => { + // Mirror the resolver exactly: under InlineWhenFits a decimal that + // fits is stored inline (XSD_DECIMAL_INLINE), so the constant must + // encode the same way to match the stored row; values that don't fit + // (and every decimal under ArenaOnly) live in the NumBig arena. + if store.decimal_encoding().inlines() { + if let Some(key) = ObjKey::encode_decimal(bd) { + return Ok((OType::XSD_DECIMAL_INLINE, key.as_u64())); + } + } + find_numbig_okey(val, store, numbig_ctx) + } // Not handled: Vector (arena + HNSW identity; raw-merge is the // intended lane) and generic Duration (its V3 decode is a stub — // the raw flake preserves the value, the binary row would not). @@ -3028,6 +3039,23 @@ pub(crate) fn value_to_otype_okey_simple( OType::XSD_G_MONTH_DAY, ObjKey::encode_g_month_day(g.month(), g.day()).as_u64(), )), + FlakeValue::Decimal(bd) => { + // An inline-eligible decimal under InlineWhenFits has a + // self-describing key, so the prefilter narrows with no arena + // round-trip (issue #1328). Arena decimals (too large, or any + // decimal under ArenaOnly) need a per-(graph, predicate) handle this + // helper has no context for, so leave the scan un-narrowed + // (Unsupported) — never NotFound, since the value may still exist. + if store.decimal_encoding().inlines() { + if let Some(key) = ObjKey::encode_decimal(bd) { + return Ok((OType::XSD_DECIMAL_INLINE, key.as_u64())); + } + } + Err(Error::new( + ErrorKind::Unsupported, + "arena decimal not encodable without (graph, predicate) context", + )) + } _ => Err(std::io::Error::new( std::io::ErrorKind::Unsupported, format!("unsupported FlakeValue variant for V6 fast-path: {val:?}"), From b93bde9a4ad463e4418ea0d247e953623d58d1bc Mon Sep 17 00:00:00 2001 From: bplatz Date: Sat, 13 Jun 2026 12:35:12 -0400 Subject: [PATCH 06/25] feat(query): materialize inline decimals at all decode sites MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Complete the decode side for inline xsd:decimal. The strategy is to materialize NUM_DEC to FlakeValue::Decimal everywhere rather than carrying it as an EncodedLit: inline decimals decode cheaply from o_key alone (no arena, unlike NUM_BIG), so materialization is not a cost, and it keeps them on the ordinary value path where equality/aggregate/sort already compare by canonical BigDecimal against decoded sources (VALUES, BIND, novelty, arena decimals). - late_materialized_object_binding declines NUM_DEC (returns None) so callers materialize via decode_value/decode_value_v3. - Verified every other decode site is already correct: join probe/bounds/ batched paths fall through to decode_value_v3; spool remap leaves the self-describing o_key untouched; decode_value_from_kind resolves NUM_DEC -> XSD_DECIMAL_INLINE then decodes; export's NotFound fallback only covers dict-backed kinds. - Confirmed the numeric MIN/MAX and scalar-agg fast paths exclude XSD_DECIMAL_INLINE via their is_numeric() gate — which is also required for correctness, since inline o_key order is not value order. --- fluree-db-query/src/object_binding.rs | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/fluree-db-query/src/object_binding.rs b/fluree-db-query/src/object_binding.rs index 41e8fd1f18..a588a01542 100644 --- a/fluree-db-query/src/object_binding.rs +++ b/fluree-db-query/src/object_binding.rs @@ -169,6 +169,13 @@ pub(crate) fn late_materialized_object_binding( t, }) } + // Inline decimals decode cheaply from `o_key` alone (no arena, unlike + // NUM_BIG), so we decline the encoded fast path and let the caller + // materialize a `FlakeValue::Decimal`. That keeps them on the ordinary + // value path for equality/aggregate surfaces, where they compare by + // canonical BigDecimal against decoded sources (VALUES, BIND, novelty, + // and arena-backed decimals on other roots). + DecodeKind::Decimal => None, _ => None, } } @@ -439,6 +446,25 @@ mod tests { )); } + #[test] + fn late_materialized_object_binding_declines_inline_decimal() { + // Inline decimals decode cheaply, so the encoded fast path declines and + // the caller materializes a FlakeValue::Decimal — they never become an + // EncodedLit (which would need every equality surface to learn NUM_DEC). + use fluree_db_core::value_id::ObjKey; + let binding = late_materialized_object_binding( + OType::XSD_DECIMAL_INLINE.as_u16(), + ObjKey::encode_decimal(&"19.99".parse().unwrap()) + .unwrap() + .as_u64(), + 7, + 0, + u32::MAX, + None, + ); + assert!(binding.is_none()); + } + #[test] fn late_materialized_object_binding_keeps_datetime_encoded() { let binding = late_materialized_object_binding( From d5475e0b8d52ba73f477a626c368cc961b65d58b Mon Sep 17 00:00:00 2001 From: bplatz Date: Sat, 13 Jun 2026 12:48:04 -0400 Subject: [PATCH 07/25] feat(query): exclude inline decimals from o_key-order pushdown Inline decimals are equality-keyed, not value-ordered, so any fast path that compares or orders by o_key must exclude XSD_DECIMAL_INLINE or it would return wrong ranges/order/counts once inline decimals are written. - is_post_desc_orderable already excludes XSD_DECIMAL_INLINE (0x0020 is outside the is_numeric/is_temporal ranges); documented the exclusion so it is not later 'fixed' into the orderable set. This gates the reverse-POST ORDER BY LIMIT fast path. - fast_count numeric-compare overlay lane: XSD_DECIMAL_INLINE now joins the unsupported-numeric set (defer) instead of falling through to the non-numeric 'no match' arm, which would have undercounted COUNT(?o cmp k) over inline decimals. The base lane already deferred for any non integer/double o_type. - fast_star_const_order_topk numeric '>' filter now declines to its fallback when a row carries a numeric o_type it can't compare by o_key (inline decimal or arena NUM_BIG), instead of silently dropping the row. This also closes the pre-existing arena-decimal gap in that benchmark fast path. MIN/MAX and scalar-agg numeric fast paths already exclude it via is_numeric. --- fluree-db-query/src/fast_count.rs | 17 ++++++-- fluree-db-query/src/fast_path_common.rs | 5 ++- .../src/fast_star_const_order_topk.rs | 39 ++++++++++++++++--- 3 files changed, 52 insertions(+), 9 deletions(-) diff --git a/fluree-db-query/src/fast_count.rs b/fluree-db-query/src/fast_count.rs index cb329fd9b2..e7b091aa49 100644 --- a/fluree-db-query/src/fast_count.rs +++ b/fluree-db-query/src/fast_count.rs @@ -390,7 +390,10 @@ fn count_rows_for_predicate_numeric_compare_post( /// arena-keyed NUM_BIG): rows of these kinds force the count to defer. fn otype_unsupported_numeric(raw: u16) -> bool { let ot = OType::from_u16(raw); - (ot.is_numeric() || ot == OType::NUM_BIG_OVERFLOW) + // XSD_DECIMAL_INLINE is numeric but equality-keyed (o_key is not value + // ordered), so it can't be compared by o_key here — treat it as unsupported + // (defer), exactly like arena NUM_BIG and the non-canonical integer widths. + (ot.is_numeric() || ot == OType::NUM_BIG_OVERFLOW || ot == OType::XSD_DECIMAL_INLINE) && !matches!(ot, OType::XSD_INTEGER | OType::XSD_DOUBLE) } @@ -660,7 +663,12 @@ fn count_numeric_compare_overlay_parallel( OType::XSD_INTEGER if tk_int.is_none() => return Ok(None), OType::XSD_DOUBLE if tk_dbl.is_none() => return Ok(None), OType::XSD_INTEGER | OType::XSD_DOUBLE => {} - ot if ot.is_numeric() || ot == OType::NUM_BIG_OVERFLOW => return Ok(None), + ot if ot.is_numeric() + || ot == OType::NUM_BIG_OVERFLOW + || ot == OType::XSD_DECIMAL_INLINE => + { + return Ok(None) + } _ => {} } } else if otype_unsupported_numeric(min_ot) || otype_unsupported_numeric(max_ot) { @@ -687,7 +695,10 @@ fn count_numeric_compare_overlay_parallel( let tk = match ot { OType::XSD_INTEGER => tk_int, OType::XSD_DOUBLE => tk_dbl, - _ if ot.is_numeric() || ot == OType::NUM_BIG_OVERFLOW => { + _ if ot.is_numeric() + || ot == OType::NUM_BIG_OVERFLOW + || ot == OType::XSD_DECIMAL_INLINE => + { saw_unsupported_numeric.store(true, std::sync::atomic::Ordering::Relaxed); return false; } diff --git a/fluree-db-query/src/fast_path_common.rs b/fluree-db-query/src/fast_path_common.rs index 3b8e7a0fc0..e512b5aede 100644 --- a/fluree-db-query/src/fast_path_common.rs +++ b/fluree-db-query/src/fast_path_common.rs @@ -283,7 +283,10 @@ pub fn cursor_projection_otype_okey() -> ColumnProjection { /// by insertion order, not lexicographic value order; /// - lang strings (tag `11`); /// - `GEO_POINT` (packed lat/long — not a linear value order) and `BLANK_NODE`; -/// - overflow big numerics / JSON / vector arena handles (equality-only). +/// - overflow big numerics / JSON / vector arena handles (equality-only); +/// - inline decimals (`XSD_DECIMAL_INLINE`, 0x0020): the packed +/// `(sign, scale, mantissa)` is equality-keyed, NOT value-ordered, so it is +/// outside the `is_numeric` range above and must never be admitted here. /// /// Within one `o_type`, this equals the SPARQL `ORDER BY` order; mixing /// `o_type`s under one predicate is rejected by the operator at runtime. diff --git a/fluree-db-query/src/fast_star_const_order_topk.rs b/fluree-db-query/src/fast_star_const_order_topk.rs index 23f97d557a..4e7004bec0 100644 --- a/fluree-db-query/src/fast_star_const_order_topk.rs +++ b/fluree-db-query/src/fast_star_const_order_topk.rs @@ -75,14 +75,20 @@ pub fn star_const_ordered_limit_operator( } // Apply numeric existence filter: keep subjects with any numeric value satisfying the threshold. - let filtered_subjects = filter_subjects_by_numeric_gt( + // `None` ⇒ a row carried a numeric o_type we can't compare by o_key + // (inline decimal / arena big numeric); decline so the fallback path + // evaluates the filter correctly. + let Some(filtered_subjects) = filter_subjects_by_numeric_gt( store, g_id, numeric_p_id, &candidates, ctx.to_t, &numeric_threshold, - )?; + )? + else { + return Ok(None); + }; if filtered_subjects.is_empty() { return Ok(Some(empty_batch(schema.clone())?)); } @@ -329,6 +335,13 @@ where Ok(()) } +/// Returns `Some(subjects)` whose value satisfies `> threshold`, or `None` to +/// decline the fast path (the caller falls back) when a row carries a numeric +/// o_type this lane can't compare by `o_key` — only `XSD_INTEGER`/`XSD_DOUBLE` +/// are o_key-order-comparable. Equality-keyed inline decimals +/// (`XSD_DECIMAL_INLINE`) and arena `NUM_BIG_OVERFLOW` would be silently +/// dropped by a naive `_ => false`, undercounting the filter; declining keeps +/// the result correct via the general path. fn filter_subjects_by_numeric_gt( store: &Arc, g_id: GraphId, @@ -336,12 +349,12 @@ fn filter_subjects_by_numeric_gt( subjects_sorted: &[u64], to_t: i64, threshold: &FlakeValue, -) -> Result> { +) -> Result>> { // Only support numeric thresholds used in benchmark filters. let (thr_i, thr_d) = match threshold { FlakeValue::Long(n) => (*n, *n as f64), FlakeValue::Double(d) => (*d as i64, *d), - _ => return Ok(Vec::new()), + _ => return Ok(Some(Vec::new())), }; let thr_i_key = fluree_db_core::value_id::ObjKey::encode_i64(thr_i).as_u64(); let thr_d_key = fluree_db_core::value_id::ObjKey::encode_f64(thr_d) @@ -349,6 +362,7 @@ fn filter_subjects_by_numeric_gt( .as_u64(); let mut keep: FxHashSet = FxHashSet::default(); + let mut saw_uncomparable_numeric = false; for_each_subject_row_psot( store, g_id, @@ -360,6 +374,17 @@ fn filter_subjects_by_numeric_gt( let over_threshold = match ot { OType::XSD_INTEGER => batch.o_key.get(i) > thr_i_key, OType::XSD_DOUBLE => batch.o_key.get(i) > thr_d_key, + // Numeric but not o_key-comparable (inline decimals, arena big + // numerics, other integer widths/floats): can't decide here. + _ if ot.is_numeric() + || ot == OType::NUM_BIG_OVERFLOW + || ot == OType::XSD_DECIMAL_INLINE => + { + saw_uncomparable_numeric = true; + false + } + // Genuinely non-numeric object: `?o > number` is a type + // mismatch, so it is correctly a non-match. _ => false, }; if over_threshold { @@ -369,9 +394,13 @@ fn filter_subjects_by_numeric_gt( }, )?; + if saw_uncomparable_numeric { + return Ok(None); + } + let mut out: Vec = keep.into_iter().collect(); out.sort_unstable(); - Ok(out) + Ok(Some(out)) } fn collect_label_pairs( From ef09dccd1c5bf6e45db23f7f54ef649c8d9f753d Mon Sep 17 00:00:00 2001 From: bplatz Date: Sat, 13 Jun 2026 13:09:03 -0400 Subject: [PATCH 08/25] feat(indexer,core): enable inline decimals on full reindex (v3 roots) Make the inline-decimal format live: a full rebuild now writes a v3 root under DecimalEncoding::InlineWhenFits, so small exact decimals are stored inline and only large/high-precision values fall back to the NumBig arena. Existing ledgers keep running on their current format and adopt inline decimals on their next full reindex. Single-source invariant: the full-rebuild resolver policy and the output root version come from one value (shared.decimal_encoding threaded into Fir6Inputs), so a reindex can never inline-encode while writing a v2 root. Incremental builds inherit the base root's policy (from_index_root for the resolver, from_old_root preserves decimal_encoding for the output), staying sticky. Bulk import stays arena-only for now (internally consistent; inlines on first reindex). Decode/serialize sites completed for the new format: - db.rs core snapshot metadata decoder accepts v3 (header layout unchanged). - build_o_type_table maps XSD_DECIMAL_INLINE -> xsd:decimal. - resolve_datatype_sid returns xsd:decimal for XSD_DECIMAL_INLINE, so decoded inline decimals carry the correct datatype on output. Tests: end-to-end round-trip across the inline/arena boundary, equality-constant match (issue #1328 narrowing), and a novelty-vs-inline differential proving the two representations are observably identical by value and datatype. --- fluree-db-api/tests/it_decimal_exactness.rs | 243 ++++++++++++++++++ .../src/format/index_root.rs | 20 +- .../src/read/binary_index_store.rs | 1 + fluree-db-core/src/db.rs | 14 +- fluree-db-indexer/src/build/rebuild.rs | 7 + fluree-db-indexer/src/build/root_assembly.rs | 11 +- 6 files changed, 279 insertions(+), 17 deletions(-) diff --git a/fluree-db-api/tests/it_decimal_exactness.rs b/fluree-db-api/tests/it_decimal_exactness.rs index 91784679e7..8f01184d4e 100644 --- a/fluree-db-api/tests/it_decimal_exactness.rs +++ b/fluree-db-api/tests/it_decimal_exactness.rs @@ -889,3 +889,246 @@ async fn sparql_delete_data_decimal_retracts_exactly() { "deleted decimal fact must not survive" ); } + +// ============================================================================= +// Inline xsd:decimal encoding (v3 root format) +// ============================================================================= + +/// Run a full rebuild, publish the new index, and return the decoded index root +/// so tests can assert the on-disk decimal-encoding format. +async fn full_rebuild_publish_decode_root( + fluree: &fluree_db_api::Fluree, + ledger_id: &str, +) -> fluree_db_binary_index::format::index_root::IndexRoot { + use fluree_db_core::storage::ContentStore; + let record = fluree + .nameservice() + .lookup(ledger_id) + .await + .expect("nameservice lookup") + .expect("ledger record"); + let result = fluree_db_indexer::rebuild_index_from_commits( + fluree.content_store(ledger_id), + ledger_id, + &record, + fluree_db_indexer::IndexerConfig::default(), + ) + .await + .expect("full rebuild"); + let root_bytes = fluree + .content_store(ledger_id) + .get(&result.root_id) + .await + .expect("fetch root bytes"); + fluree + .publisher() + .expect("read-write nameservice") + .publish_index(ledger_id, result.index_t, &result.root_id) + .await + .expect("publish index"); + fluree_db_binary_index::format::index_root::IndexRoot::decode(&root_bytes).expect("decode root") +} + +#[tokio::test] +async fn full_reindex_writes_inline_decimal_v3_format_and_roundtrips() { + // A full rebuild adopts the inline-decimal format: the root is v3 + // (InlineWhenFits), small exact decimals encode inline, and a value too + // large to fit inline falls back to the arena — all round-trip exactly. + let fluree = memory_fluree(); + let ledger_id = "decimal/inline-format:main"; + let ledger = genesis_ledger(&fluree, ledger_id); + + let result = run_sparql_update( + &fluree, + ledger, + r#" + PREFIX ex: + INSERT DATA { + ex:a ex:amount 19.99 . + ex:b ex:amount 0.0000001 . + ex:c ex:amount "1234567890123456789.5"^^ . + } + "#, + ) + .await; + let _ = result; + + let root = full_rebuild_publish_decode_root(&fluree, ledger_id).await; + assert_eq!( + root.decimal_encoding(), + fluree_db_core::DecimalEncoding::InlineWhenFits, + "a full rebuild must write the inline-decimal (v3) format" + ); + + let ledger = fluree.ledger(ledger_id).await.expect("load reindexed ledger"); + let query = r" + PREFIX ex: + SELECT ?s ?amount WHERE { ?s ex:amount ?amount . } + "; + let result = support::query_sparql(&fluree, &ledger, query) + .await + .expect("query"); + let sparql_json = result + .to_sparql_json(&ledger.snapshot) + .expect("to_sparql_json"); + + let mut amounts = binding_values(&sparql_json, "amount"); + amounts.sort(); + // Two inline-eligible decimals + one arena-overflow decimal, all exact and + // in plain (non-exponent) form. + assert_eq!( + amounts, + vec![ + "0.0000001".to_string(), + "1234567890123456789.5".to_string(), + "19.99".to_string(), + ], + "inline + arena decimals must round-trip exactly after reindex" + ); +} + +#[tokio::test] +async fn inline_decimal_equality_constant_matches_after_reindex() { + // A decimal equality constant must encode the same way as the stored inline + // row so the bound-object lookup hits it (issue #1328 narrowing). + let fluree = memory_fluree(); + let ledger_id = "decimal/inline-eq:main"; + let ledger = genesis_ledger(&fluree, ledger_id); + + run_sparql_update( + &fluree, + ledger, + r" + PREFIX ex: + INSERT DATA { + ex:a ex:price 19.99 . + ex:b ex:price 20.00 . + } + ", + ) + .await; + + let root = full_rebuild_publish_decode_root(&fluree, ledger_id).await; + assert_eq!( + root.decimal_encoding(), + fluree_db_core::DecimalEncoding::InlineWhenFits + ); + + let ledger = fluree.ledger(ledger_id).await.expect("load reindexed ledger"); + let query = r" + PREFIX ex: + SELECT ?s WHERE { ?s ex:price 19.99 . } + "; + let result = support::query_sparql(&fluree, &ledger, query) + .await + .expect("query"); + let sparql_json = result + .to_sparql_json(&ledger.snapshot) + .expect("to_sparql_json"); + assert_eq!( + binding_values(&sparql_json, "s"), + vec!["ex:a".to_string()], + "decimal equality constant must match the stored inline decimal" + ); +} + +/// Canonicalize a SPARQL bindings array for differential comparison: any literal +/// whose value parses as a `BigDecimal` is rewritten to its normalized form. +/// Indexing canonicalizes decimal scale (`10.50` -> `10.5`) for both the arena +/// and inline encodings, so a novelty-vs-indexed comparison must compare by +/// numeric value, not lexical form. Datatype and structure are preserved and +/// still compared exactly. +fn canon_decimal_bindings(bindings: &JsonValue) -> JsonValue { + let mut bindings = bindings.clone(); + if let Some(rows) = bindings.as_array_mut() { + for row in rows { + if let Some(obj) = row.as_object_mut() { + for (_var, cell) in obj.iter_mut() { + if let Some(v) = cell.get("value").and_then(|v| v.as_str()) { + if let Ok(bd) = v.parse::() { + cell["value"] = + JsonValue::String(bd.normalized().to_plain_string()); + } + } + } + } + } + } + bindings +} + +#[tokio::test] +async fn inline_decimal_results_match_novelty_differential() { + // Differential: the same query must return identical results whether the + // decimals are unindexed (novelty, canonical FlakeValue::Decimal) or indexed + // under the inline (v3) format. Proves inline encoding is observably + // identical to the canonical representation across SELECT / ORDER BY / FILTER + // / aggregation. + let fluree = memory_fluree(); + let ledger_id = "decimal/inline-differential:main"; + let ledger = genesis_ledger(&fluree, ledger_id); + + let result = run_sparql_update( + &fluree, + ledger, + r" + PREFIX ex: + INSERT DATA { + ex:a ex:amount 19.99 . + ex:b ex:amount 0.01 . + ex:c ex:amount 10.50 . + ex:d ex:amount 100.00 . + } + ", + ) + .await; + let novelty_ledger = result.ledger; + + let queries = [ + // Plain projection + ORDER BY on the decimal value. + r"PREFIX ex: + SELECT ?amount WHERE { ?s ex:amount ?amount . } ORDER BY ?amount", + // FILTER comparison against a decimal threshold. + r"PREFIX ex: + SELECT ?amount WHERE { ?s ex:amount ?amount . FILTER(?amount > 10.0) } ORDER BY ?amount", + // Aggregation (SUM/AVG) + COUNT. + r"PREFIX ex: + SELECT (SUM(?amount) AS ?total) (COUNT(?amount) AS ?n) WHERE { ?s ex:amount ?amount . }", + ]; + + // Results from the unindexed (novelty) state. + let mut novelty_results = Vec::new(); + for q in &queries { + let r = support::query_sparql(&fluree, &novelty_ledger, q) + .await + .expect("novelty query"); + novelty_results.push( + r.to_sparql_json(&novelty_ledger.snapshot) + .expect("to_sparql_json")["results"]["bindings"] + .clone(), + ); + } + + // Reindex into the inline (v3) format. + let root = full_rebuild_publish_decode_root(&fluree, ledger_id).await; + assert_eq!( + root.decimal_encoding(), + fluree_db_core::DecimalEncoding::InlineWhenFits + ); + let indexed_ledger = fluree.ledger(ledger_id).await.expect("load reindexed ledger"); + + for (q, novelty_bindings) in queries.iter().zip(novelty_results) { + let r = support::query_sparql(&fluree, &indexed_ledger, q) + .await + .expect("indexed query"); + let indexed_bindings = r + .to_sparql_json(&indexed_ledger.snapshot) + .expect("to_sparql_json")["results"]["bindings"] + .clone(); + assert_eq!( + canon_decimal_bindings(&indexed_bindings), + canon_decimal_bindings(&novelty_bindings), + "inline-indexed results must match novelty results (by value + datatype) for query:\n{q}" + ); + } +} diff --git a/fluree-db-binary-index/src/format/index_root.rs b/fluree-db-binary-index/src/format/index_root.rs index 5eb71787e6..d6d8226595 100644 --- a/fluree-db-binary-index/src/format/index_root.rs +++ b/fluree-db-binary-index/src/format/index_root.rs @@ -372,6 +372,14 @@ impl IndexRoot { Some(geo::WKT_LITERAL), ), (OType::BLANK_NODE.as_u16(), DecodeKind::BlankNode, None), + // Inline exact xsd:decimal (v3 roots). Maps back to xsd:decimal so + // decoded values carry the correct datatype, distinct from the lossy + // f64 XSD_DECIMAL lane above. + ( + OType::XSD_DECIMAL_INLINE.as_u16(), + DecodeKind::Decimal, + Some(xsd::DECIMAL), + ), ]; for &(o_type, decode_kind, dt_iri) in embedded_types { @@ -1430,8 +1438,8 @@ mod tests { #[test] fn o_type_table_built_in() { let table = IndexRoot::build_o_type_table(&[], &[]); - // Should contain all 31 embedded + 13 Fluree = 44 entries. - assert_eq!(table.len(), 44); + // Should contain all 32 embedded + 13 Fluree = 45 entries. + assert_eq!(table.len(), 45); // Spot-check a few entries. let int_entry = table @@ -1460,8 +1468,8 @@ mod tests { #[test] fn o_type_table_with_langs() { let table = IndexRoot::build_o_type_table(&[], &["en".to_string(), "fr".to_string()]); - // 44 built-in + 2 langString = 46. - assert_eq!(table.len(), 46); + // 45 built-in + 2 langString = 47. + assert_eq!(table.len(), 47); // lang_id is 1-based: first tag "en" gets lang_id=1 let en_entry = table @@ -1475,8 +1483,8 @@ mod tests { #[test] fn o_type_table_with_custom_types() { let table = IndexRoot::build_o_type_table(&["http://example.org/myType".to_string()], &[]); - // 44 built-in + 1 customer = 45. - assert_eq!(table.len(), 45); + // 45 built-in + 1 customer = 46. + assert_eq!(table.len(), 46); let custom = table.last().unwrap(); assert!(OType::from_u16(custom.o_type).is_customer_datatype()); diff --git a/fluree-db-binary-index/src/read/binary_index_store.rs b/fluree-db-binary-index/src/read/binary_index_store.rs index 053072dd36..e110ccbece 100644 --- a/fluree-db-binary-index/src/read/binary_index_store.rs +++ b/fluree-db-binary-index/src/read/binary_index_store.rs @@ -1358,6 +1358,7 @@ impl BinaryIndexStore { OType::XSD_DOUBLE => Some(Sid::new(namespaces::XSD, xsd_names::DOUBLE)), OType::XSD_FLOAT => Some(Sid::new(namespaces::XSD, xsd_names::FLOAT)), OType::XSD_DECIMAL => Some(Sid::new(namespaces::XSD, xsd_names::DECIMAL)), + OType::XSD_DECIMAL_INLINE => Some(Sid::new(namespaces::XSD, xsd_names::DECIMAL)), OType::XSD_DATE => Some(Sid::new(namespaces::XSD, xsd_names::DATE)), OType::XSD_TIME => Some(Sid::new(namespaces::XSD, xsd_names::TIME)), OType::XSD_DATE_TIME => Some(Sid::new(namespaces::XSD, xsd_names::DATE_TIME)), diff --git a/fluree-db-core/src/db.rs b/fluree-db-core/src/db.rs index 79d03b7901..0d167e1038 100644 --- a/fluree-db-core/src/db.rs +++ b/fluree-db-core/src/db.rs @@ -519,13 +519,13 @@ fn decode_fir6_metadata(bytes: &[u8]) -> std::io::Result )); } let version = bytes[4]; - // FIR6 version 2 adds `lang_id` to each `FulltextArenaRef` so fulltext - // arenas can be keyed by `(g_id, p_id, lang_id)`. This helper doesn't - // parse arena refs — it only consumes the header bits it needs — so - // both versions are accepted here. The authoritative parser - // (`IndexRoot::decode` in `fluree-db-binary-index`) enforces version - // matching for the full-root deserialization path. - if version != 1 && version != 2 { + // FIR6 version 2 adds `lang_id` to each `FulltextArenaRef`; version 3 enables + // inline xsd:decimal encoding. Neither changes the header layout this helper + // reads — it only consumes the fixed header + optional-section bits, not arena + // refs or leaf data — so all three versions are accepted here. The + // authoritative parser (`IndexRoot::decode` in `fluree-db-binary-index`) + // enforces version matching for the full-root deserialization path. + if version != 1 && version != 2 && version != 3 { return Err(std::io::Error::new( std::io::ErrorKind::InvalidData, format!("FIR6: unsupported version {version}"), diff --git a/fluree-db-indexer/src/build/rebuild.rs b/fluree-db-indexer/src/build/rebuild.rs index 0f1ef088e6..c5b55cc90d 100644 --- a/fluree-db-indexer/src/build/rebuild.rs +++ b/fluree-db-indexer/src/build/rebuild.rs @@ -217,6 +217,10 @@ where _span_b.record("fetch_concurrency", fetch_concurrency); let mut shared = SharedResolverState::new_for_ledger(&ledger_id); + // A full rebuild writes a fresh root, so it adopts the inline-decimal + // format: small exact decimals encode inline, the rest fall back to + // the arena. Existing ledgers keep their format until reindexed. + shared.decimal_encoding = fluree_db_core::DecimalEncoding::InlineWhenFits; // Pre-insert rdf:type into predicate dictionary so class tracking // works from the very first commit. @@ -1110,6 +1114,9 @@ where db_stats: Some(db_stats), db_schema, sketch_ref, + // Same source as the resolver above: the root version must match + // how decimals were just encoded. + decimal_encoding: shared.decimal_encoding, }; let result = super::root_assembly::encode_and_write_root_v6( diff --git a/fluree-db-indexer/src/build/root_assembly.rs b/fluree-db-indexer/src/build/root_assembly.rs index 0cb62ae216..42e3d9d6f6 100644 --- a/fluree-db-indexer/src/build/root_assembly.rs +++ b/fluree-db-indexer/src/build/root_assembly.rs @@ -219,6 +219,11 @@ pub(crate) struct Fir6Inputs { pub db_schema: Option, /// CAS reference for the serialized HLL sketch blob. pub sketch_ref: Option, + /// Decimal-encoding policy for this build. Must equal the resolver's policy + /// for this run (same source) so the written root version matches how the + /// resolver encoded decimals — a mismatch would split decimal identity + /// across the inline/arena boundary. + pub decimal_encoding: fluree_db_core::DecimalEncoding, } /// Encode an `IndexRoot` (FIR6), write to CAS, and return an `IndexResult`. @@ -291,10 +296,8 @@ pub(crate) async fn encode_and_write_root_v6( prev_index: None, garbage: None, sketch_ref: inputs.sketch_ref, - // Sticky per root: extending an inline-decimal root keeps inlining, an - // arena-only root keeps the arena. Until the write path emits inline - // decimals, every root is arena-only. - decimal_encoding: fluree_db_core::DecimalEncoding::ArenaOnly, + // Same source as the resolver's policy for this run (see Fir6Inputs). + decimal_encoding: inputs.decimal_encoding, }; // `IndexStats.size` is defined as total commit data size (bytes) for the ledger. From 6fcf026ae27ce232cc7fb4ed4245cf8149d68f73 Mon Sep 17 00:00:00 2001 From: bplatz Date: Sat, 13 Jun 2026 13:18:10 -0400 Subject: [PATCH 09/25] fix(indexer): classify inline decimals as DECIMAL in rebuild stats The SPOT/rebuild stats path (stats_record_from_v2 -> otype_to_value_type_tag) mapped OType by value, but XSD_DECIMAL_INLINE had no arm and fell through to ValueTypeTag::UNKNOWN. Since a full rebuild now writes inline decimals, that path would report inline-decimal properties as UNKNOWN instead of DECIMAL, diverging from the incremental path (which derives the tag from the declared datatype IRI and is already correct). Map XSD_DECIMAL_INLINE to DECIMAL. Unlike NUM_BIG_OVERFLOW (which mixes BigInt and BigDecimal under one ObjKind and stays UNKNOWN), the inline lane carries only decimals, so the classification is unambiguous. --- fluree-db-indexer/src/stats/id_hook.rs | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/fluree-db-indexer/src/stats/id_hook.rs b/fluree-db-indexer/src/stats/id_hook.rs index 13c81aa19a..1175299252 100644 --- a/fluree-db-indexer/src/stats/id_hook.rs +++ b/fluree-db-indexer/src/stats/id_hook.rs @@ -179,6 +179,10 @@ fn otype_to_value_type_tag(ot: fluree_db_core::o_type::OType) -> ValueTypeTag { OType::XSD_DOUBLE => ValueTypeTag::DOUBLE, OType::XSD_FLOAT => ValueTypeTag::FLOAT, OType::XSD_DECIMAL => ValueTypeTag::DECIMAL, + // Inline exact decimals (v3 roots) unambiguously carry only + // `FlakeValue::Decimal` — unlike NUM_BIG_OVERFLOW below — so they map + // straight to DECIMAL here. + OType::XSD_DECIMAL_INLINE => ValueTypeTag::DECIMAL, // NUM_BIG_OVERFLOW is intentionally NOT mapped here: it carries both // `FlakeValue::Decimal` (arbitrary-precision xsd:decimal) and // `FlakeValue::BigInt` (xsd:integer overflow > i64) — they share @@ -859,4 +863,20 @@ mod tests { assert_eq!(props[&key].count, 5); assert_eq!(props[&key].last_modified_t, 3); } + + #[test] + fn inline_decimal_otype_classified_as_decimal() { + use fluree_db_core::o_type::OType; + // Both the lossy f64 decimal lane and the exact inline lane count as + // DECIMAL for datatype stats, so a reindexed (inline) ledger reports the + // same property datatype as before. + assert_eq!( + otype_to_value_type_tag(OType::XSD_DECIMAL), + ValueTypeTag::DECIMAL + ); + assert_eq!( + otype_to_value_type_tag(OType::XSD_DECIMAL_INLINE), + ValueTypeTag::DECIMAL + ); + } } From 6be5df43ed08d887fba5601356af92d718c2b01d Mon Sep 17 00:00:00 2001 From: bplatz Date: Sat, 13 Jun 2026 13:22:53 -0400 Subject: [PATCH 10/25] fix(query): key overlay translation cache on decimal policy MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The global overlay-translation cache keyed on (ledger, snapshot_t, overlay_epoch, store_max_t, to_t, g_id, index). A full reindex can replace an arena-only (v2) root with an inline-decimal (v3) root at the SAME index_t — a pure re-encode of the same committed data — so store_max_t doesn't change but a novelty decimal now translates to a different (o_type, o_key): an inline XSD_DECIMAL_INLINE key vs a NUM_BIG_OVERFLOW arena handle. A cached arena-keyed translation served against the inline root (or vice versa) would not match base rows, breaking overlay assert/retract identity. Add the store's decimal-encoding policy to the cache key. For the same committed data a deterministic reindex reassigns dict ids identically, so the policy is the one translation-affecting property that flips on a same-t re-encode. Also clarify the bulk-import comment: import writes arena-only roots and adopts inline decimals on first reindex (its object-resolution path is separate from the rebuild resolver). --- fluree-db-api/src/import.rs | 4 ++++ fluree-db-core/src/value_id.rs | 2 +- fluree-db-query/src/binary_scan.rs | 9 +++++++++ 3 files changed, 14 insertions(+), 1 deletion(-) diff --git a/fluree-db-api/src/import.rs b/fluree-db-api/src/import.rs index 0c5703f13a..8862cfd2b6 100644 --- a/fluree-db-api/src/import.rs +++ b/fluree-db-api/src/import.rs @@ -5820,6 +5820,10 @@ where garbage: None, sketch_ref: None, ns_split_mode: input.ns_split_mode, + // Bulk import writes arena-only roots for now; inline-decimal encoding + // is adopted on the ledger's first full reindex. (The import build + // path resolves objects independently of the rebuild resolver, so + // enabling inline here is a separate, follow-up wiring.) decimal_encoding: fluree_db_core::DecimalEncoding::ArenaOnly, }; diff --git a/fluree-db-core/src/value_id.rs b/fluree-db-core/src/value_id.rs index 63cf09047a..8cd52bb72f 100644 --- a/fluree-db-core/src/value_id.rs +++ b/fluree-db-core/src/value_id.rs @@ -208,7 +208,7 @@ impl fmt::Display for ObjKind { /// identity, so a single root must use **one** decimal encoding for all /// inline-eligible values — never a mix — or a retract computed under one scheme /// would miss an assert stored under the other. -#[derive(Copy, Clone, Eq, PartialEq, Debug, Default)] +#[derive(Copy, Clone, Eq, PartialEq, Hash, Debug, Default)] pub enum DecimalEncoding { /// All `xsd:decimal` values route to the per-`(graph, predicate)` NumBig /// arena. The behavior of every pre-inline index root, bit-for-bit unchanged. diff --git a/fluree-db-query/src/binary_scan.rs b/fluree-db-query/src/binary_scan.rs index fe4c619266..93a01b933c 100644 --- a/fluree-db-query/src/binary_scan.rs +++ b/fluree-db-query/src/binary_scan.rs @@ -1965,6 +1965,7 @@ impl Operator for BinaryScanOperator { to_t: ctx.to_t, g_id: self.g_id, index: self.index, + decimal_encoding: store_arc.decimal_encoding(), }; let entry = if let Some(hit) = global_translation_cache().get(&global_key) { hit @@ -2215,6 +2216,14 @@ pub struct GlobalTranslationKey { pub to_t: i64, pub g_id: GraphId, pub index: IndexType, + /// The base store's decimal-encoding policy. A full reindex can replace an + /// arena-only (v2) root with an inline-decimal (v3) root at the *same* + /// `index_t` (a pure re-encode of the same committed data), so `store_max_t` + /// alone can't tell the two apart. The two roots translate the same novelty + /// decimal to different `(o_type, o_key)` (NUM_BIG_OVERFLOW handle vs inline + /// XSD_DECIMAL_INLINE); keying on the policy prevents serving a stale + /// arena-keyed translation against an inline root (or vice versa). + pub decimal_encoding: fluree_db_core::DecimalEncoding, } /// Cross-query LRU of translated overlay ops. From 67887a70d23c45312b13b392a0da18bd611ec5bc Mon Sep 17 00:00:00 2001 From: bplatz Date: Sat, 13 Jun 2026 13:40:57 -0400 Subject: [PATCH 11/25] feat(import): write inline decimals on bulk import (v3 roots) Bulk import has its own object-resolution path (ImportSink::resolve_object_value via the turtle parser -> SpoolContext, separate from the rebuild resolver), so it previously always routed decimals to the shared NumBig pool and wrote v2 arena-only roots. Wire it to the inline-decimal format like a full rebuild. - SpoolConfig/SpoolContext carry a decimal_encoding policy; the Decimal arm mirrors the resolver: under InlineWhenFits a fitting decimal encodes inline (NUM_DEC) with no pool handle, otherwise it falls back to the NumBig pool. - A single IMPORT_DECIMAL_ENCODING = InlineWhenFits constant feeds both the spool object resolution and the written root version, so they can't diverge (same single-source invariant as the rebuild path). Inline encoding also skips the shared NumBig pool insert (a locked global handle allocation) for the common small-decimal case, so it removes work from the import hot path rather than adding it. Test: bulk import of inline-eligible + arena-overflow decimals writes a v3 root and round-trips every value exactly with the xsd:decimal datatype. --- fluree-db-api/src/import.rs | 16 +++-- fluree-db-api/tests/it_import_v3.rs | 86 +++++++++++++++++++++++++++ fluree-db-transact/src/import_sink.rs | 40 ++++++++++--- 3 files changed, 129 insertions(+), 13 deletions(-) diff --git a/fluree-db-api/src/import.rs b/fluree-db-api/src/import.rs index 8862cfd2b6..fb8dbfad31 100644 --- a/fluree-db-api/src/import.rs +++ b/fluree-db-api/src/import.rs @@ -3587,6 +3587,7 @@ where numbig_pool: Arc::new(SharedNumBigPool::new()), vector_pool: Arc::new(SharedVectorArenaPool::new()), ns_alloc: Arc::clone(&shared_alloc), + decimal_encoding: IMPORT_DECIMAL_ENCODING, }); // Pre-insert rdf:type so we know the predicate ID before Phase A begins. @@ -5064,6 +5065,13 @@ struct IndexUploadResult { summary: Option, } +/// Decimal-encoding policy for a fresh bulk import. Like a full reindex, a new +/// import adopts the inline-decimal (v3) format. This is the single source for +/// BOTH the spool object resolution ([`SpoolConfig::decimal_encoding`]) and the +/// written root version — they must agree or decimal identity would split. +const IMPORT_DECIMAL_ENCODING: fluree_db_core::DecimalEncoding = + fluree_db_core::DecimalEncoding::InlineWhenFits; + #[allow(clippy::too_many_arguments)] async fn build_and_upload( storage: &S, @@ -5820,11 +5828,9 @@ where garbage: None, sketch_ref: None, ns_split_mode: input.ns_split_mode, - // Bulk import writes arena-only roots for now; inline-decimal encoding - // is adopted on the ledger's first full reindex. (The import build - // path resolves objects independently of the rebuild resolver, so - // enabling inline here is a separate, follow-up wiring.) - decimal_encoding: fluree_db_core::DecimalEncoding::ArenaOnly, + // Same source as the spool object resolution (SpoolConfig): the root + // version must match how the import encoded decimals. + decimal_encoding: IMPORT_DECIMAL_ENCODING, }; // Encode and upload FIR6 root. diff --git a/fluree-db-api/tests/it_import_v3.rs b/fluree-db-api/tests/it_import_v3.rs index 7d52379afb..f4ce0aa573 100644 --- a/fluree-db-api/tests/it_import_v3.rs +++ b/fluree-db-api/tests/it_import_v3.rs @@ -1321,3 +1321,89 @@ ex:remove a ex:User ; "rebuilt V3 index should only contain 'Keep' — 'Remove' should be filtered as retract-winner" ); } + +// ── Bulk import writes inline decimals (v3 root) and round-trips exactly ── +#[tokio::test] +async fn import_v3_inline_decimals_roundtrip() { + let db_dir = tempfile::tempdir().expect("db tmpdir"); + let data_dir = tempfile::tempdir().expect("data tmpdir"); + + // Two inline-eligible decimals + one too large to fit inline (mantissa + // exceeds 2^57, so it falls back to the NumBig arena). + let ttl = r#" +@prefix ex: . +@prefix xsd: . + +ex:a ex:amount 19.99 . +ex:b ex:amount 0.0000001 . +ex:c ex:amount "1234567890123456789.5"^^xsd:decimal . +"#; + let ttl_path = write_ttl(data_dir.path(), "decimals.ttl", ttl); + + let fluree = FlureeBuilder::file(db_dir.path().to_string_lossy().to_string()) + .build() + .expect("build file-backed Fluree"); + + let result = fluree + .create("test/v3-decimals:main") + .import(&ttl_path) + .threads(1) + .memory_budget_mb(128) + .cleanup(false) + .execute() + .await + .expect("decimal import should succeed"); + assert!(result.root_id.is_some(), "index should have been built"); + + // The import must write a v3 (inline-decimal) root: byte 4 of the FIR6 + // header is the version, and ROOT_V6_VERSION_INLINE_DECIMAL == 3. + let fir6_files = find_files_with_magic(db_dir.path(), b"FIR6"); + assert!(!fir6_files.is_empty(), "expected a FIR6 root file"); + let root_bytes = std::fs::read(&fir6_files[0]).expect("read FIR6 root"); + assert_eq!( + root_bytes[4], 3, + "bulk import must write a v3 inline-decimal root" + ); + + let ledger = fluree + .ledger("test/v3-decimals:main") + .await + .expect("load decimal ledger"); + + let result = support::query_sparql( + &fluree, + &ledger, + r" + PREFIX ex: + SELECT ?amount WHERE { ?s ex:amount ?amount } ORDER BY ?amount + ", + ) + .await + .expect("decimal query"); + let json = result + .to_sparql_json(&ledger.snapshot) + .expect("format sparql json"); + let bindings = json["results"]["bindings"].as_array().expect("bindings"); + let mut amounts: Vec<&str> = bindings + .iter() + .map(|b| b["amount"]["value"].as_str().unwrap()) + .collect(); + amounts.sort(); + + // All three decimals — two inline, one arena — round-trip exactly in plain + // (non-exponent) form. + assert_eq!( + amounts, + vec!["0.0000001", "1234567890123456789.5", "19.99"], + "inline + arena decimals must round-trip exactly through bulk import" + ); + + // Datatype is xsd:decimal for all (inline lane resolves the datatype). + for b in bindings { + assert_eq!( + b["amount"]["datatype"].as_str().unwrap(), + "http://www.w3.org/2001/XMLSchema#decimal", + "imported decimals must carry xsd:decimal datatype" + ); + } +} diff --git a/fluree-db-transact/src/import_sink.rs b/fluree-db-transact/src/import_sink.rs index 96c216051e..7ef0a1b710 100644 --- a/fluree-db-transact/src/import_sink.rs +++ b/fluree-db-transact/src/import_sink.rs @@ -90,6 +90,10 @@ mod inner { pub vector_pool: Arc, /// Shared namespace allocator (for prefix lookup). pub ns_alloc: Arc, + /// Decimal-encoding policy for the index this import builds. Must match + /// the version of the root written for the import (same source) so inline + /// decimals and the root format agree. + pub decimal_encoding: fluree_db_core::DecimalEncoding, } /// Result of finishing a [`SpoolContext`] via [`SpoolContext::finish`] — @@ -157,6 +161,9 @@ mod inner { next_lang_id: u16, /// Graph ID for all records in this chunk (0 = default). g_id: GraphId, + /// Decimal-encoding policy (from `SpoolConfig`): under `InlineWhenFits`, + /// small exact decimals encode inline instead of via the numbig pool. + decimal_encoding: fluree_db_core::DecimalEncoding, } impl SpoolContext { @@ -184,6 +191,7 @@ mod inner { ns_alloc: Arc::clone(&config.ns_alloc), ns_prefix_cache: FxHashMap::default(), languages: FxHashMap::default(), + decimal_encoding: config.decimal_encoding, next_lang_id: 1, // 0 = no language tag g_id, }) @@ -384,14 +392,29 @@ mod inner { } } FlakeValue::Decimal(dec) => { - // Use shared numbig pool for global handle. - let handle = - self.numbig_pool - .get_or_insert_bigdec(self.g_id, p_id, dec.as_ref()); - ( - ObjKind::NUM_BIG.as_u8(), - ObjKey::encode_u32_id(handle).as_u64(), - ) + // Under InlineWhenFits, a small exact decimal encodes inline — + // no numbig-pool handle, avoiding the shared-pool insert on the + // import hot path. Large/high-precision decimals (and every + // decimal under ArenaOnly) fall back to the pool. + let inline = self + .decimal_encoding + .inlines() + .then(|| ObjKey::encode_decimal(dec.as_ref())) + .flatten(); + match inline { + Some(key) => (ObjKind::NUM_DEC.as_u8(), key.as_u64()), + None => { + let handle = self.numbig_pool.get_or_insert_bigdec( + self.g_id, + p_id, + dec.as_ref(), + ); + ( + ObjKind::NUM_BIG.as_u8(), + ObjKey::encode_u32_id(handle).as_u64(), + ) + } + } } FlakeValue::Vector(v) => { // Use shared vector pool for global handle. @@ -823,6 +846,7 @@ mod inner { numbig_pool: Arc::new(SharedNumBigPool::new()), vector_pool: Arc::new(SharedVectorArenaPool::new()), ns_alloc: Arc::new(SharedNamespaceAllocator::from_registry(ns)), + decimal_encoding: fluree_db_core::DecimalEncoding::InlineWhenFits, } } From e0279b7693872c28a563fa4552123ae85553d510 Mon Sep 17 00:00:00 2001 From: bplatz Date: Sat, 13 Jun 2026 16:45:39 -0400 Subject: [PATCH 12/25] style: rustfmt inline-decimal additions Apply cargo fmt to the inline-decimal encoding primitives and the decimal-exactness test additions (line wrapping only; no behavior change). --- fluree-db-api/tests/it_decimal_exactness.rs | 18 +++++++++++----- fluree-db-core/src/value_id.rs | 23 ++++++++++++++------- 2 files changed, 29 insertions(+), 12 deletions(-) diff --git a/fluree-db-api/tests/it_decimal_exactness.rs b/fluree-db-api/tests/it_decimal_exactness.rs index 8f01184d4e..a0f42718ea 100644 --- a/fluree-db-api/tests/it_decimal_exactness.rs +++ b/fluree-db-api/tests/it_decimal_exactness.rs @@ -960,7 +960,10 @@ async fn full_reindex_writes_inline_decimal_v3_format_and_roundtrips() { "a full rebuild must write the inline-decimal (v3) format" ); - let ledger = fluree.ledger(ledger_id).await.expect("load reindexed ledger"); + let ledger = fluree + .ledger(ledger_id) + .await + .expect("load reindexed ledger"); let query = r" PREFIX ex: SELECT ?s ?amount WHERE { ?s ex:amount ?amount . } @@ -1014,7 +1017,10 @@ async fn inline_decimal_equality_constant_matches_after_reindex() { fluree_db_core::DecimalEncoding::InlineWhenFits ); - let ledger = fluree.ledger(ledger_id).await.expect("load reindexed ledger"); + let ledger = fluree + .ledger(ledger_id) + .await + .expect("load reindexed ledger"); let query = r" PREFIX ex: SELECT ?s WHERE { ?s ex:price 19.99 . } @@ -1046,8 +1052,7 @@ fn canon_decimal_bindings(bindings: &JsonValue) -> JsonValue { for (_var, cell) in obj.iter_mut() { if let Some(v) = cell.get("value").and_then(|v| v.as_str()) { if let Ok(bd) = v.parse::() { - cell["value"] = - JsonValue::String(bd.normalized().to_plain_string()); + cell["value"] = JsonValue::String(bd.normalized().to_plain_string()); } } } @@ -1115,7 +1120,10 @@ async fn inline_decimal_results_match_novelty_differential() { root.decimal_encoding(), fluree_db_core::DecimalEncoding::InlineWhenFits ); - let indexed_ledger = fluree.ledger(ledger_id).await.expect("load reindexed ledger"); + let indexed_ledger = fluree + .ledger(ledger_id) + .await + .expect("load reindexed ledger"); for (q, novelty_bindings) in queries.iter().zip(novelty_results) { let r = support::query_sparql(&fluree, &indexed_ledger, q) diff --git a/fluree-db-core/src/value_id.rs b/fluree-db-core/src/value_id.rs index 8cd52bb72f..e736fe1467 100644 --- a/fluree-db-core/src/value_id.rs +++ b/fluree-db-core/src/value_id.rs @@ -418,9 +418,7 @@ impl ObjKey { let magnitude = magnitude.to_u64()?; // always Some given the bit check above let sign_bit = u64::from(sign == Sign::Minus); - let key = (sign_bit << DEC_SIGN_SHIFT) - | ((scale as u64) << DEC_SCALE_SHIFT) - | magnitude; + let key = (sign_bit << DEC_SIGN_SHIFT) | ((scale as u64) << DEC_SCALE_SHIFT) | magnitude; Some(Self(key)) } @@ -1793,8 +1791,8 @@ mod tests { /// to the numerically-equal value. fn assert_decimal_roundtrip(s: &str) { let v = bd(s); - let key = ObjKey::encode_decimal(&v) - .unwrap_or_else(|| panic!("{s} should be inline-eligible")); + let key = + ObjKey::encode_decimal(&v).unwrap_or_else(|| panic!("{s} should be inline-eligible")); let back = key.decode_decimal(); assert_eq!(back, v, "round-trip mismatch for {s}: got {back}"); } @@ -1802,8 +1800,19 @@ mod tests { #[test] fn decimal_roundtrip_common_values() { for s in [ - "0", "1", "-1", "19.99", "-19.99", "0.01", "-0.01", "3.14159", "100", - "1000000.5", "-1000000.5", "0.0000001", "12345678901234.56", + "0", + "1", + "-1", + "19.99", + "-19.99", + "0.01", + "-0.01", + "3.14159", + "100", + "1000000.5", + "-1000000.5", + "0.0000001", + "12345678901234.56", ] { assert_decimal_roundtrip(s); } From 68003dee37716c03bd9b6db514b7877f1fe4465f Mon Sep 17 00:00:00 2001 From: bplatz Date: Sat, 13 Jun 2026 16:58:45 -0400 Subject: [PATCH 13/25] docs,test: note CommitResolver is non-production; cover decimal fold-overflow - Document that CommitResolver is constructed only in tests; live indexing uses SharedResolverState (rebuild/incremental) and ImportSink (import). If it is ever wired into production it must call set_decimal_encoding from the root policy, since it defaults to ArenaOnly and would otherwise write arena decimals into a v3 inline root. - Add a unit test for the negative-scale fold path in encode_decimal: an integer-valued decimal (1e18/1e19) folds to scale 0 and overflows the 57-bit mantissa -> arena fallback, and values past the fold-exponent limit (1e20+) take the early fallback. The prior boundary tests only used scale-0 values. --- fluree-db-core/src/value_id.rs | 22 +++++++++++++++++++ .../src/run_index/resolve/resolver.rs | 9 ++++++++ 2 files changed, 31 insertions(+) diff --git a/fluree-db-core/src/value_id.rs b/fluree-db-core/src/value_id.rs index e736fe1467..f3cbba8ce1 100644 --- a/fluree-db-core/src/value_id.rs +++ b/fluree-db-core/src/value_id.rs @@ -1885,6 +1885,28 @@ mod tests { assert!(ObjKey::encode_decimal(&frac_over).is_none()); } + #[test] + fn decimal_fold_overflow_falls_back_to_arena() { + // Integer-valued decimals normalize to a negative exponent (1e18 -> + // mantissa 1, scale -18); encode folds them back to scale 0. The fold + // can push the mantissa over the 57-bit budget, which must fall back to + // the arena (None) — a path the scale-0 boundary cases don't exercise. + + // 1e17 (< 2^57 ≈ 1.44e17) folds and still fits inline, round-tripping. + assert!(ObjKey::encode_decimal(&bd("100000000000000000")).is_some()); + assert_decimal_roundtrip("100000000000000000"); + + // 1e18 / 1e19 fold (scale -18 / -19, within the fold limit) but the + // folded mantissa exceeds 57 bits → arena fallback. + assert!(ObjKey::encode_decimal(&bd("1000000000000000000")).is_none()); + assert!(ObjKey::encode_decimal(&bd("10000000000000000000")).is_none()); + + // Past the fold-exponent limit (scale < -DEC_FOLD_EXP_LIMIT): early + // arena fallback, without attempting the 10^n multiply. + assert!(ObjKey::encode_decimal(&bd("100000000000000000000")).is_none()); // 1e20 + assert!(ObjKey::encode_decimal(&bd("1e30")).is_none()); + } + #[test] fn decimal_sign_distinguished() { let pos = ObjKey::encode_decimal(&bd("19.99")).unwrap(); diff --git a/fluree-db-indexer/src/run_index/resolve/resolver.rs b/fluree-db-indexer/src/run_index/resolve/resolver.rs index 049c2e22a8..08fa01d0ee 100644 --- a/fluree-db-indexer/src/run_index/resolve/resolver.rs +++ b/fluree-db-indexer/src/run_index/resolve/resolver.rs @@ -55,6 +55,15 @@ pub struct ResolvedCommit { } /// Resolves commit-local ops into globally-addressed RunRecords. +/// +/// **Not on any production indexing path.** Live indexing resolves through +/// [`SharedResolverState`] (full rebuild + incremental) or `ImportSink` (bulk +/// import); `CommitResolver` is currently constructed only in tests. If it is +/// ever wired into a production path, the caller MUST call +/// [`set_decimal_encoding`](Self::set_decimal_encoding) with the target root's +/// policy — it defaults to `ArenaOnly`, so without that call it would write +/// arena decimals into a root that may be inline-decimal (v3), splitting +/// `(o_type, o_key)` identity. pub struct CommitResolver { /// namespace_code -> prefix IRI. /// Seeded from `default_namespace_codes()`, updated by commit namespace_deltas. From 0576ce1ca4f0e8dbd10545d53dbe29ddd535e080 Mon Sep 17 00:00:00 2001 From: bplatz Date: Sat, 13 Jun 2026 17:46:16 -0400 Subject: [PATCH 14/25] feat(core): order-preserving inline decimal codec MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the equality-keyed inline decimal layout with an order-preserving base-10 float code, so inline decimals support range / ORDER BY pushdown in addition to equality. The key stays canonical (equal values -> identical bits), so everything built on the old codec (equality, dedup, joins, #1328 prefilter) is unaffected. Layout (magnitude is 63 bits, sign splits around the 2^63 midpoint): mag = [ exponent:6 (biased, exp10 in -32..=31) | significand:57 (17 digits) ] value > 0 -> key = 2^63 + mag value == 0 -> key = 2^63 (canonical midpoint) value < 0 -> key = 2^63 - 1 - mag (more negative -> smaller key) The significand is the coefficient normalized to 17 digits (MSD leading), so same-exponent significands compare as integers; negatives complement the magnitude like the f64 lane. Inline-eligible iff <= 17 significant digits and exp10 in -32..=31, else arena fallback (decimals beyond ~32 integer/fractional places spill — rare). Proven by property tests: order-preservation over a numerically-sorted value set and exhaustive pairwise (numeric cmp == key cmp), plus canonical-equality across scale variants and zero spellings, and round-trip exactness. This is the format-locking half. Query-side fast paths still exclude inline decimals from pushdown (safe — an ordered key is also a valid equality key); admitting them follows next. --- fluree-db-core/src/value_id.rs | 351 ++++++++++++++++++++++----------- 1 file changed, 232 insertions(+), 119 deletions(-) diff --git a/fluree-db-core/src/value_id.rs b/fluree-db-core/src/value_id.rs index f3cbba8ce1..d01e66427c 100644 --- a/fluree-db-core/src/value_id.rs +++ b/fluree-db-core/src/value_id.rs @@ -15,12 +15,10 @@ //! `NumInt(3)` vs `NumF64(3.0)`) is a query-layer concern resolved via //! multi-scan merge, not an index property. //! -//! **Exception — [`ObjKind::NUM_DEC`]:** inline `xsd:decimal` keys are -//! *equality-keyed*, not value-ordered. Equal values encode to identical bits -//! (so equality, dedup, and joins are correct), but raw `u64` ordering of the -//! packed `(sign, scale, mantissa)` is NOT numeric ordering. Range/`FILTER` -//! pushdown that relies on `o_key` order must therefore exclude this kind. See -//! [`ObjKey::encode_decimal`]. +//! [`ObjKind::NUM_DEC`] (inline `xsd:decimal`) upholds this contract too: its +//! key is canonical (equal values → identical bits) *and* order-preserving (raw +//! `u64` order == numeric order), via an order-preserving base-10 float layout. +//! See [`ObjKey::encode_decimal`]. //! //! [`ValueTypeTag`] is a compact `u8` identifier for XSD/RDF datatypes, used as //! a tie-breaker in index sort keys so that values with the same `(ObjKind, @@ -123,11 +121,12 @@ impl ObjKind { /// Precision: approximately 0.3mm at the equator. pub const GEO_POINT: Self = Self(0x14); - /// Exact inline `xsd:decimal` — `o_key` is a packed `(sign, scale, mantissa)` - /// (see [`ObjKey::encode_decimal`]). Equality-keyed: the payload is canonical - /// but is **not** value-ordered, so this kind must be excluded from any - /// `o_key`-order range pushdown. Distinct from [`NUM_BIG`](Self::NUM_BIG) - /// (arena handle) — inline decimals carry the exact value with no arena. + /// Exact inline `xsd:decimal` — `o_key` is an order-preserving base-10 float + /// code (see [`ObjKey::encode_decimal`]). Canonical (equal values → identical + /// bits) AND value-ordered (raw `u64` order == numeric order), so it supports + /// equality, dedup, joins, and range / ORDER BY pushdown. Distinct from + /// [`NUM_BIG`](Self::NUM_BIG) (arena handle) — inline decimals carry the exact + /// value with no arena. pub const NUM_DEC: Self = Self(0x15); /// Get the raw `u8` discriminant. @@ -246,24 +245,53 @@ const SIGN_FLIP: u64 = 1u64 << 63; /// Sign bit mask for f64 bits. const F64_SIGN_BIT: u64 = 1u64 << 63; -// ---- Inline decimal (NumDec) packing layout: [sign:1 | scale:6 | mantissa:57] ---- - -/// Number of bits the inline-decimal mantissa magnitude occupies (low bits). -const DEC_MANTISSA_BITS: u64 = 57; -/// Mask selecting the 57-bit mantissa magnitude. -const DEC_MANTISSA_MASK: u64 = (1u64 << DEC_MANTISSA_BITS) - 1; -/// Bit offset of the 6-bit scale field. -const DEC_SCALE_SHIFT: u64 = DEC_MANTISSA_BITS; -/// Mask (pre-shift) selecting the 6-bit scale field. -const DEC_SCALE_MASK: u64 = (1u64 << 6) - 1; -/// Maximum representable scale (number of fractional digits). -const DEC_MAX_SCALE: u64 = DEC_SCALE_MASK; // 63 -/// Bit offset of the sign bit (high bit). -const DEC_SIGN_SHIFT: u64 = 63; -/// Largest `|negative scale|` we will fold back into the mantissa before giving -/// up. 10^19 already exceeds the 57-bit mantissa budget, so anything beyond this -/// cannot fit inline regardless. -const DEC_FOLD_EXP_LIMIT: i64 = 19; +// ---- Inline decimal (NumDec) — ORDER-PRESERVING base-10 float layout ---- +// +// An inline decimal key is a canonical, order-preserving code: equal values +// produce identical bits (equality), and raw `u64` order equals numeric order +// (range/ORDER BY pushdown). The magnitude is laid out as a base-10 float +// +// magnitude = significand × 10^(exp10 - (DEC_DIGITS - 1)) +// +// where `significand` is `|mantissa|` normalized to exactly `DEC_DIGITS` +// decimal digits (MSD in the leading place, in `[10^(DEC_DIGITS-1), 10^DEC_DIGITS)`) +// and `exp10` is the base-10 exponent of the most significant digit. The packed +// magnitude code places the (biased) exponent above the significand: +// +// mag = (biased_exp << DEC_SIG_BITS) | significand (0 = the value zero) +// +// `mag` is monotonic in magnitude — a larger exponent dominates, and within one +// exponent a larger significand wins. Sign is then folded so the full `u64` +// order is numeric order, with zero at the exact midpoint: +// +// value > 0 → key = 2^63 + mag +// value == 0 → key = 2^63 +// value < 0 → key = 2^63 - 1 - mag (more negative ⇒ smaller key) +// +// Negatives complement the magnitude (like the f64 lane), so they sort below +// zero and more-negative values sort lower. Canonicalization (normalize the +// mantissa, strip trailing zeros) makes `1.5`, `1.50`, and `1.500` one code. + +// Magnitude budget is 63 bits (the 64th splits sign around the pivot), packed as +// `[ exponent:6 | significand:57 ]` — all 63 bits used, no waste. + +/// Significant decimal digits an inline decimal carries. Values with more +/// significant digits spill to the NumBig arena. 17 digits matches the original +/// inline precision; `10^17 < 2^57` so the significand fits 57 bits exactly. +const DEC_DIGITS: u32 = 17; +/// Bits the `DEC_DIGITS`-digit significand occupies (low bits of the magnitude). +const DEC_SIG_BITS: u64 = 57; +/// Mask selecting the significand. +const DEC_SIG_MASK: u64 = (1u64 << DEC_SIG_BITS) - 1; +/// Exponent field width (bits), sitting just above the significand. +const DEC_EXP_BITS: u64 = 6; +/// Bias added to `exp10` so the stored exponent is non-negative. Representable +/// `exp10` range is `[-DEC_EXP_BIAS, DEC_EXP_BIAS - 1]` = `[-32, 31]`; values +/// outside (more than ~32 integer or fractional places) spill to the arena. +const DEC_EXP_BIAS: i64 = 1 << (DEC_EXP_BITS - 1); // 32 +/// Sign split point: non-negative keys are `>= DEC_SIGN_PIVOT`, negatives below. +/// Zero encodes exactly to this value (the midpoint). +const DEC_SIGN_PIVOT: u64 = 1u64 << 63; /// Error returned when a value cannot be stored in the index. #[derive(Debug, Clone, PartialEq)] @@ -363,62 +391,67 @@ impl ObjKey { f64::from_bits(bits) } - // ---- Inline decimal encoding (NumDec) ---- + // ---- Inline decimal encoding (NumDec) — order-preserving ---- // - // Exact packed `xsd:decimal`: a canonical `(sign, scale, mantissa)` laid out - // as `[ sign:1 | scale:6 | mantissa:57 ]` in the 64-bit key. The packing is - // *equality-keyed*, not order-preserving — equal values produce identical - // bits, but raw `u64` ordering is NOT numeric ordering, so this kind must be - // excluded from any `o_key`-order range pushdown. - // - // A value is inline-eligible iff, after canonicalization (trailing fractional - // zeros stripped, integer-valued decimals folded to scale 0, zero canonical), - // it has `0 <= scale <= 63` and `|mantissa| < 2^57`. Anything else returns - // `None` and falls back to the NumBig arena, exactly like overflow integers. - - /// Encode a canonical `xsd:decimal` inline, or `None` if it does not fit. - /// - /// Canonicalization guarantees that two numerically-equal decimals (e.g. - /// `1.50` and `1.5`, or `1.00` and `1`) encode to identical bits, so the - /// packed key is a stable fact identity. + // See the layout notes above the `DEC_*` constants. The key is canonical + // (equal values → identical bits) AND order-preserving (raw `u64` order == + // numeric order), so inline decimals support equality, dedup, joins, AND + // range / ORDER BY pushdown. A value is inline-eligible iff, after + // canonicalization, it has at most `DEC_DIGITS` significant digits and a + // base-10 exponent in `[-DEC_EXP_BIAS, DEC_EXP_BIAS - 1]`. Anything else + // returns `None` and falls back to the NumBig arena, like overflow integers. + + /// Encode a canonical `xsd:decimal` inline, order-preserving, or `None` if it + /// does not fit. Numerically-equal decimals (`1.50`, `1.5`, `1.500`) encode + /// to identical bits, and `a < b` numerically iff `encode(a) < encode(b)` as + /// `u64`. pub fn encode_decimal(value: &bigdecimal::BigDecimal) -> Option { use num_bigint::Sign; use num_traits::{ToPrimitive, Zero}; - // Strip trailing fractional zeros; canonicalizes 1.50 -> 1.5, 1.00 -> 1E2 etc. + // Canonicalize: strip trailing zeros so the significant-digit count and + // significand are unique for a given value. let normalized = value.normalized(); - let (mut mantissa, mut scale) = normalized.as_bigint_and_exponent(); + let (mantissa, scale) = normalized.as_bigint_and_exponent(); if mantissa.is_zero() { - // Canonical zero: sign 0, scale 0, mantissa 0 (so 0, 0.0, -0.00 all match). - return Some(Self(0)); - } - - // A negative scale means the normalized form pushed trailing zeros into the - // exponent (e.g. 100 -> mantissa 1, scale -2). Fold them back into the - // mantissa at scale 0. Bound the exponent first: 10^19 already exceeds the - // 57-bit mantissa budget, so anything past that can never fit inline. - if scale < 0 { - if scale < -DEC_FOLD_EXP_LIMIT { - return None; - } - mantissa *= num_bigint::BigInt::from(10).pow((-scale) as u32); - scale = 0; + // Zero is the canonical midpoint between negatives and positives. + return Some(Self(DEC_SIGN_PIVOT)); } - if scale > DEC_MAX_SCALE as i64 { + let (sign, magnitude) = mantissa.into_parts(); + // Anything larger than u64 has > 19 decimal digits, well past DEC_DIGITS, + // so a `to_u64` miss is itself the spill signal — no BigUint digit walk. + let mag_u64 = magnitude.to_u64()?; + let digits = mag_u64.ilog10() + 1; // mag_u64 > 0 here + if digits > DEC_DIGITS { + // More significant digits than the inline significand holds. return None; } - let (sign, magnitude) = mantissa.into_parts(); - // |mantissa| must fit in 57 bits. - if magnitude.bits() > DEC_MANTISSA_BITS { + // Base-10 exponent of the most significant digit: with `magnitude` having + // `digits` digits and value = magnitude × 10^-scale, the MSD place is + // `(digits - 1) - scale`. + let exp10 = (digits as i64 - 1) - scale; + if !(-DEC_EXP_BIAS..DEC_EXP_BIAS).contains(&exp10) { return None; } - let magnitude = magnitude.to_u64()?; // always Some given the bit check above - let sign_bit = u64::from(sign == Sign::Minus); - let key = (sign_bit << DEC_SIGN_SHIFT) | ((scale as u64) << DEC_SCALE_SHIFT) | magnitude; + // Left-align to exactly DEC_DIGITS digits so same-exponent significands + // compare as integers. `mag_u64 < 10^digits` and the pad is + // `DEC_DIGITS - digits`, so the product is `< 10^17 < 2^DEC_SIG_BITS` and + // well under `u64::MAX` — no overflow. + let significand = mag_u64 * 10u64.pow(DEC_DIGITS - digits); + + let biased_exp = (exp10 + DEC_EXP_BIAS) as u64; + let mag = (biased_exp << DEC_SIG_BITS) | significand; + + let key = if sign == Sign::Minus { + // More negative ⇒ larger mag ⇒ smaller key, all below the pivot. + DEC_SIGN_PIVOT - 1 - mag + } else { + DEC_SIGN_PIVOT + mag + }; Some(Self(key)) } @@ -426,14 +459,32 @@ impl ObjKey { /// /// [`encode_decimal`]: Self::encode_decimal pub fn decode_decimal(self) -> bigdecimal::BigDecimal { - let magnitude = self.0 & DEC_MANTISSA_MASK; - let scale = ((self.0 >> DEC_SCALE_SHIFT) & DEC_SCALE_MASK) as i64; - let negative = (self.0 >> DEC_SIGN_SHIFT) & 1 == 1; - let mut mantissa = num_bigint::BigInt::from(magnitude); + use num_bigint::BigInt; + + if self.0 == DEC_SIGN_PIVOT { + return bigdecimal::BigDecimal::from(0); + } + + let (negative, mag) = if self.0 >= DEC_SIGN_PIVOT { + (false, self.0 - DEC_SIGN_PIVOT) + } else { + (true, DEC_SIGN_PIVOT - 1 - self.0) + }; + + let significand = mag & DEC_SIG_MASK; + let biased_exp = (mag >> DEC_SIG_BITS) as i64; + let exp10 = biased_exp - DEC_EXP_BIAS; + + // value = significand × 10^(exp10 - (DEC_DIGITS - 1)). + // As BigDecimal: mantissa = ±significand, scale = (DEC_DIGITS-1) - exp10. + let mut mantissa = BigInt::from(significand); if negative { mantissa = -mantissa; } - bigdecimal::BigDecimal::from_bigint(mantissa, scale) + let scale = (DEC_DIGITS as i64 - 1) - exp10; + // `normalized()` strips the left-alignment padding so output is minimal + // (1.5, not 1.500000000000000). + bigdecimal::BigDecimal::from_bigint(mantissa, scale).normalized() } // ---- Boolean encoding ---- @@ -1854,57 +1905,33 @@ mod tests { } #[test] - fn decimal_max_scale_boundary() { - // 63 fractional digits is the max scale that fits. - let s = format!("0.{}1", "0".repeat(62)); // scale 63 - let v = bd(&s); - let key = ObjKey::encode_decimal(&v).expect("scale 63 should fit"); - assert_eq!(key.decode_decimal(), v); - - // scale 64 does not fit. - let s_over = format!("0.{}1", "0".repeat(63)); // scale 64 - assert!(ObjKey::encode_decimal(&bd(&s_over)).is_none()); - } - - #[test] - fn decimal_mantissa_boundary() { - // |mantissa| just below 2^57 fits; at/above 2^57 does not. - let max_mantissa = (1i128 << 57) - 1; - let fits = bd(&max_mantissa.to_string()); - assert!(ObjKey::encode_decimal(&fits).is_some()); - assert_eq!( - ObjKey::encode_decimal(&fits).unwrap().decode_decimal(), - fits - ); + fn decimal_significant_digit_boundary() { + // Up to DEC_DIGITS (17) significant digits fit; an 18th spills. + let d17 = bd("12345678901234567"); // 17 significant digits + assert!(ObjKey::encode_decimal(&d17).is_some()); + assert_decimal_roundtrip("12345678901234567"); + assert_decimal_roundtrip("1.2345678901234567"); // same 17 digits, fractional - let over = bd(&(1i128 << 57).to_string()); - assert!(ObjKey::encode_decimal(&over).is_none()); + let d18 = bd("123456789012345678"); // 18 significant digits + assert!(ObjKey::encode_decimal(&d18).is_none()); + let d18_frac = bd("1.23456789012345678"); + assert!(ObjKey::encode_decimal(&d18_frac).is_none()); - // A fractional value whose mantissa overflows also falls back. - let frac_over = bd("1444115188.07585588"); // mantissa 144411518807585588 > 2^57 - assert!(ObjKey::encode_decimal(&frac_over).is_none()); + // A 20-digit value (exceeds u64) also spills via the to_u64 miss. + assert!(ObjKey::encode_decimal(&bd("12345678901234567890")).is_none()); } #[test] - fn decimal_fold_overflow_falls_back_to_arena() { - // Integer-valued decimals normalize to a negative exponent (1e18 -> - // mantissa 1, scale -18); encode folds them back to scale 0. The fold - // can push the mantissa over the 57-bit budget, which must fall back to - // the arena (None) — a path the scale-0 boundary cases don't exercise. + fn decimal_exponent_boundary() { + // exp10 in [-32, 31] fits; outside spills. A single-digit value's exp10 + // equals its power of ten. + assert!(ObjKey::encode_decimal(&bd("1e31")).is_some()); // exp10 = 31 + assert_decimal_roundtrip("1e31"); + assert!(ObjKey::encode_decimal(&bd("1e32")).is_none()); // exp10 = 32, out - // 1e17 (< 2^57 ≈ 1.44e17) folds and still fits inline, round-tripping. - assert!(ObjKey::encode_decimal(&bd("100000000000000000")).is_some()); - assert_decimal_roundtrip("100000000000000000"); - - // 1e18 / 1e19 fold (scale -18 / -19, within the fold limit) but the - // folded mantissa exceeds 57 bits → arena fallback. - assert!(ObjKey::encode_decimal(&bd("1000000000000000000")).is_none()); - assert!(ObjKey::encode_decimal(&bd("10000000000000000000")).is_none()); - - // Past the fold-exponent limit (scale < -DEC_FOLD_EXP_LIMIT): early - // arena fallback, without attempting the 10^n multiply. - assert!(ObjKey::encode_decimal(&bd("100000000000000000000")).is_none()); // 1e20 - assert!(ObjKey::encode_decimal(&bd("1e30")).is_none()); + assert!(ObjKey::encode_decimal(&bd("1e-32")).is_some()); // exp10 = -32 + assert_decimal_roundtrip("1e-32"); + assert!(ObjKey::encode_decimal(&bd("1e-33")).is_none()); // exp10 = -33, out } #[test] @@ -1915,4 +1942,90 @@ mod tests { assert_eq!(pos.decode_decimal(), bd("19.99")); assert_eq!(neg.decode_decimal(), bd("-19.99")); } + + #[test] + fn decimal_zero_sits_between_signs() { + let neg = ObjKey::encode_decimal(&bd("-0.0001")).unwrap(); + let zero = ObjKey::encode_decimal(&bd("0")).unwrap(); + let pos = ObjKey::encode_decimal(&bd("0.0001")).unwrap(); + assert!(neg < zero, "negatives sort below zero"); + assert!(zero < pos, "zero sorts below positives"); + } + + /// Build a broad, deterministic spread of distinct decimal values for the + /// order-preservation property test: every (sign, coefficient, scale) combo + /// that stays inline-eligible. + fn property_test_decimals() -> Vec { + use bigdecimal::BigDecimal; + use num_bigint::BigInt; + let coeffs: [i64; 9] = [1, 2, 7, 9, 15, 100, 999, 12345, 9999999999999999]; + let scales: [i64; 13] = [-15, -8, -3, -1, 0, 1, 2, 3, 5, 8, 12, 20, 28]; + let mut out = vec![BigDecimal::from(0)]; + for &c in &coeffs { + for &s in &scales { + let v = BigDecimal::from_bigint(BigInt::from(c), s); + out.push(v.clone()); + out.push(-v); + } + } + out + } + + #[test] + fn decimal_encoding_is_order_preserving() { + // The headline invariant: raw u64 key order == numeric order. Sort the + // value set numerically, then assert encoded keys are strictly ascending + // across distinct values (and equal across numerically-equal ones). + let mut values = property_test_decimals(); + // Numeric sort (BigDecimal Ord compares by value). + values.sort(); + + let mut prev: Option<(bigdecimal::BigDecimal, ObjKey)> = None; + for v in values { + let key = ObjKey::encode_decimal(&v) + .unwrap_or_else(|| panic!("{v} should be inline-eligible in the property set")); + // Decode must round-trip to the same numeric value. + assert_eq!(key.decode_decimal(), v, "round-trip failed for {v}"); + if let Some((pv, pk)) = prev { + use std::cmp::Ordering; + match v.cmp(&pv) { + Ordering::Greater => assert!( + key.as_u64() > pk.as_u64(), + "order broken: {pv} (key {}) !< {v} (key {})", + pk.as_u64(), + key.as_u64() + ), + Ordering::Equal => assert_eq!( + key.as_u64(), + pk.as_u64(), + "equal values must share a key: {pv} vs {v}" + ), + Ordering::Less => unreachable!("values are sorted ascending"), + } + } + prev = Some((v, key)); + } + } + + #[test] + fn decimal_encoding_pairwise_monotonic() { + // Exhaustive pairwise check: for every ordered pair, the sign of the + // numeric comparison matches the sign of the key comparison. + let values = property_test_decimals(); + let keyed: Vec<_> = values + .iter() + .map(|v| (v.clone(), ObjKey::encode_decimal(v).expect("inline-eligible").as_u64())) + .collect(); + for (a, ka) in &keyed { + for (b, kb) in &keyed { + assert_eq!( + a.cmp(b), + ka.cmp(kb), + "numeric {a} vs {b} ({:?}) disagrees with key {ka} vs {kb} ({:?})", + a.cmp(b), + ka.cmp(kb), + ); + } + } + } } From b23c64fce7209589ab4cce455737dd5f3cd0e827 Mon Sep 17 00:00:00 2001 From: bplatz Date: Sat, 13 Jun 2026 17:53:54 -0400 Subject: [PATCH 15/25] feat(query): admit inline decimals to range / ORDER BY pushdown MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that inline decimal keys are order-preserving, flip the fast-path guards that excluded them to instead treat them as o_key-order-comparable: - is_post_desc_orderable: admit XSD_DECIMAL_INLINE, so the reverse-POST ORDER BY ... LIMIT fast path eliminates the sort for decimal predicates. - fast_count numeric-compare (base + overlay lanes): encode the threshold into the decimal key space (encode_numeric_threshold_for_otype gains XSD_DECIMAL_INLINE arms for Decimal/Long/BigInt thresholds) and compare keys directly. New otype_okey_order_comparable helper centralizes the comparable set {INTEGER, DOUBLE, DECIMAL_INLINE}. - fast_star_const_order_topk: compare inline-decimal rows against an integer threshold's decimal key. Mixed-predicate safety is preserved: a predicate with both inline and arena (NUM_BIG) decimals spans two o_types, so the single-o_type uniformity checks still bail to the general path — an inline-only scan never drops arena rows. A double threshold against decimal rows, or a threshold too large to encode inline, also declines. Cross-form integer thresholds are exact (10 and 10.00 share a key). Integration test: ORDER BY (asc + DESC LIMIT), SELECT range FILTER, and COUNT with decimal and integer thresholds over a decimal predicate all return numerically correct results — including 0.05 vs 0.5, which the prior equality-keyed layout ordered wrong. --- fluree-db-api/tests/it_decimal_exactness.rs | 119 ++++++++++++++++++ fluree-db-query/src/fast_count.rs | 77 ++++++++---- fluree-db-query/src/fast_path_common.rs | 25 ++-- .../src/fast_star_const_order_topk.rs | 43 +++++-- 4 files changed, 217 insertions(+), 47 deletions(-) diff --git a/fluree-db-api/tests/it_decimal_exactness.rs b/fluree-db-api/tests/it_decimal_exactness.rs index a0f42718ea..4d3dff05cc 100644 --- a/fluree-db-api/tests/it_decimal_exactness.rs +++ b/fluree-db-api/tests/it_decimal_exactness.rs @@ -1140,3 +1140,122 @@ async fn inline_decimal_results_match_novelty_differential() { ); } } + +#[tokio::test] +async fn inline_decimal_order_by_and_range_are_numeric_after_reindex() { + // Order-preserving inline decimal keys: ORDER BY and range filters on a + // decimal predicate must use NUMERIC order, not scale-broken key order. + // 0.05 vs 0.5 (different scales) and negatives are the cases the old + // equality-keyed layout got wrong. + let fluree = memory_fluree(); + let ledger_id = "decimal/inline-order:main"; + let ledger = genesis_ledger(&fluree, ledger_id); + + run_sparql_update( + &fluree, + ledger, + r" + PREFIX ex: + INSERT DATA { + ex:a ex:v 0.5 . + ex:b ex:v 0.05 . + ex:c ex:v -1 . + ex:d ex:v 2 . + ex:e ex:v 19.99 . + ex:f ex:v -0.01 . + ex:g ex:v 1000.5 . + } + ", + ) + .await; + + let root = full_rebuild_publish_decode_root(&fluree, ledger_id).await; + assert_eq!( + root.decimal_encoding(), + fluree_db_core::DecimalEncoding::InlineWhenFits + ); + let ledger = fluree + .ledger(ledger_id) + .await + .expect("load reindexed ledger"); + + // 1. Plain ORDER BY ascending — full numeric order across signs and scales. + let asc = support::query_sparql( + &fluree, + &ledger, + r"PREFIX ex: + SELECT ?v WHERE { ?s ex:v ?v } ORDER BY ?v", + ) + .await + .expect("order by asc"); + let asc_json = asc.to_sparql_json(&ledger.snapshot).expect("json"); + assert_eq!( + binding_values(&asc_json, "v"), + vec!["-1", "-0.01", "0.05", "0.5", "2", "19.99", "1000.5"], + "ORDER BY must be numeric (0.05 < 0.5, negatives first)" + ); + + // 2. ORDER BY DESC LIMIT — exercises the reverse-POST top-k fast path. + let desc = support::query_sparql( + &fluree, + &ledger, + r"PREFIX ex: + SELECT ?v WHERE { ?s ex:v ?v } ORDER BY DESC(?v) LIMIT 3", + ) + .await + .expect("order by desc limit"); + let desc_json = desc.to_sparql_json(&ledger.snapshot).expect("json"); + assert_eq!( + binding_values(&desc_json, "v"), + vec!["1000.5", "19.99", "2"], + "ORDER BY DESC LIMIT must return the numerically largest values" + ); + + // 3. SELECT with a range FILTER — numeric subset. + let filtered = support::query_sparql( + &fluree, + &ledger, + r"PREFIX ex: + SELECT ?v WHERE { ?s ex:v ?v FILTER(?v > 0.1) } ORDER BY ?v", + ) + .await + .expect("range filter"); + let filtered_json = filtered.to_sparql_json(&ledger.snapshot).expect("json"); + assert_eq!( + binding_values(&filtered_json, "v"), + vec!["0.5", "2", "19.99", "1000.5"], + "FILTER(?v > 0.1) must exclude 0.05 and the negatives" + ); + + // 4. COUNT with a range FILTER — exercises the numeric-compare COUNT fast path. + let counted = support::query_sparql( + &fluree, + &ledger, + r"PREFIX ex: + SELECT (COUNT(?s) AS ?n) WHERE { ?s ex:v ?v FILTER(?v > 0.1) }", + ) + .await + .expect("count filter"); + let counted_json = counted.to_sparql_json(&ledger.snapshot).expect("json"); + assert_eq!( + binding_values(&counted_json, "n"), + vec!["4"], + "COUNT over a decimal range filter must match the four values > 0.1" + ); + + // 5. COUNT with an integer threshold against decimal rows (cross-form). + let counted_int = support::query_sparql( + &fluree, + &ledger, + r"PREFIX ex: + SELECT (COUNT(?s) AS ?n) WHERE { ?s ex:v ?v FILTER(?v >= 2) }", + ) + .await + .expect("count int threshold"); + let counted_int_json = counted_int.to_sparql_json(&ledger.snapshot).expect("json"); + assert_eq!( + binding_values(&counted_int_json, "n"), + vec!["3"], + "FILTER(?v >= 2) over decimals must count 2, 19.99, 1000.5" + ); +} diff --git a/fluree-db-query/src/fast_count.rs b/fluree-db-query/src/fast_count.rs index e7b091aa49..d506e184e4 100644 --- a/fluree-db-query/src/fast_count.rs +++ b/fluree-db-query/src/fast_count.rs @@ -340,8 +340,8 @@ fn count_rows_for_predicate_numeric_compare_post( // Same o_type at both ends ⇒ uniform o_type (POST sorts o_type before o_key). if min_ot == max_ot { let otype = OType::from_u16(min_ot); - if !matches!(otype, OType::XSD_INTEGER | OType::XSD_DOUBLE) { - // Uniformly unsupported (e.g. all-decimal predicate): the leaf + if !otype_okey_order_comparable(otype) { + // Uniformly not o_key-comparable (e.g. arena NUM_BIG): the leaf // scan below would bail on its first leaflet anyway — defer // now without opening any leaves. return Ok(None); @@ -385,16 +385,24 @@ fn count_rows_for_predicate_numeric_compare_post( /// per-leaflet directory must be consulted for this predicate's first/last key). /// Returns `None` if there are no leaves (an empty predicate — the caller's total is /// 0) or, defensively, if a boundary leaf yields no matching leaflet. +/// o_types whose `o_key` order equals numeric order, so a `?o K` count can +/// compare encoded keys directly: canonical-width integers, doubles, and the +/// order-preserving inline decimals. (Other integer widths and arena NUM_BIG are +/// numeric but not o_key-comparable.) +fn otype_okey_order_comparable(ot: OType) -> bool { + matches!( + ot, + OType::XSD_INTEGER | OType::XSD_DOUBLE | OType::XSD_DECIMAL_INLINE + ) +} + /// True if this o_type is numeric but cannot be compared by encoded o_key in -/// the numeric-COUNT lanes (non-canonical integer widths, floats, decimals, -/// arena-keyed NUM_BIG): rows of these kinds force the count to defer. +/// the numeric-COUNT lanes (non-canonical integer widths, floats, arena-keyed +/// NUM_BIG): rows of these kinds force the count to defer. fn otype_unsupported_numeric(raw: u16) -> bool { let ot = OType::from_u16(raw); - // XSD_DECIMAL_INLINE is numeric but equality-keyed (o_key is not value - // ordered), so it can't be compared by o_key here — treat it as unsupported - // (defer), exactly like arena NUM_BIG and the non-canonical integer widths. (ot.is_numeric() || ot == OType::NUM_BIG_OVERFLOW || ot == OType::XSD_DECIMAL_INLINE) - && !matches!(ot, OType::XSD_INTEGER | OType::XSD_DOUBLE) + && !otype_okey_order_comparable(ot) } fn predicate_post_global_extent( @@ -498,7 +506,7 @@ fn count_numeric_compare_in_leaf_slice( return Ok(None); }; let otype = OType::from_u16(raw_otype); - if !matches!(otype, OType::XSD_INTEGER | OType::XSD_DOUBLE) { + if !otype_okey_order_comparable(otype) { return Ok(None); } let threshold_key = match encode_numeric_threshold_for_otype(otype, threshold)? { @@ -536,6 +544,13 @@ fn count_numeric_compare_in_leaf_slice( } fn encode_numeric_threshold_for_otype(otype: OType, threshold: &FlakeValue) -> Result> { + use bigdecimal::BigDecimal; + // Encode the threshold into the row o_type's key space. Inline decimals use + // the order-preserving decimal codec, so a `>`/`<` comparison of `o_key`s is + // exact: an integer/decimal threshold and a numerically-equal stored decimal + // encode identically, so cross-form (`?price > 10` over decimal rows) is + // correct. A threshold that doesn't fit inline (or a double threshold against + // decimal rows) yields `None` → the caller declines the fast path. let key = match (otype, threshold) { (OType::XSD_INTEGER, FlakeValue::Long(n)) => ObjKey::encode_i64(*n).as_u64(), (OType::XSD_DOUBLE, FlakeValue::Long(n)) => ObjKey::encode_f64(*n as f64) @@ -544,6 +559,22 @@ fn encode_numeric_threshold_for_otype(otype: OType, threshold: &FlakeValue) -> R (OType::XSD_DOUBLE, FlakeValue::Double(d)) => ObjKey::encode_f64(*d) .map_err(|_| QueryError::execution("cannot encode f64 threshold".to_string()))? .as_u64(), + (OType::XSD_DECIMAL_INLINE, FlakeValue::Decimal(d)) => match ObjKey::encode_decimal(d) { + Some(k) => k.as_u64(), + None => return Ok(None), + }, + (OType::XSD_DECIMAL_INLINE, FlakeValue::Long(n)) => { + match ObjKey::encode_decimal(&BigDecimal::from(*n)) { + Some(k) => k.as_u64(), + None => return Ok(None), + } + } + (OType::XSD_DECIMAL_INLINE, FlakeValue::BigInt(b)) => { + match ObjKey::encode_decimal(&BigDecimal::from(b.as_ref().clone())) { + Some(k) => k.as_u64(), + None => return Ok(None), + } + } _ => return Ok(None), }; Ok(Some(key)) @@ -649,9 +680,10 @@ fn count_numeric_compare_overlay_parallel( ) -> Result> { let tk_int = encode_numeric_threshold_for_otype(OType::XSD_INTEGER, threshold)?; let tk_dbl = encode_numeric_threshold_for_otype(OType::XSD_DOUBLE, threshold)?; + let tk_dec = encode_numeric_threshold_for_otype(OType::XSD_DECIMAL_INLINE, threshold)?; - // Pre-check the base predicate's POST extent: if the base rows are - // uniformly an unsupported o_type (e.g. all-decimal), or the threshold + // Pre-check the base predicate's POST extent: if the base rows are uniformly + // an o_type we can't compare by o_key (e.g. arena NUM_BIG), or the threshold // can't encode for the uniform supported type, the full scan below is // doomed — defer immediately instead of scanning every partition first. // (Unsupported values arriving only via novelty are still caught by the @@ -662,13 +694,9 @@ fn count_numeric_compare_overlay_parallel( match OType::from_u16(min_ot) { OType::XSD_INTEGER if tk_int.is_none() => return Ok(None), OType::XSD_DOUBLE if tk_dbl.is_none() => return Ok(None), - OType::XSD_INTEGER | OType::XSD_DOUBLE => {} - ot if ot.is_numeric() - || ot == OType::NUM_BIG_OVERFLOW - || ot == OType::XSD_DECIMAL_INLINE => - { - return Ok(None) - } + OType::XSD_DECIMAL_INLINE if tk_dec.is_none() => return Ok(None), + OType::XSD_INTEGER | OType::XSD_DOUBLE | OType::XSD_DECIMAL_INLINE => {} + ot if ot.is_numeric() || ot == OType::NUM_BIG_OVERFLOW => return Ok(None), _ => {} } } else if otype_unsupported_numeric(min_ot) || otype_unsupported_numeric(max_ot) { @@ -680,9 +708,10 @@ fn count_numeric_compare_overlay_parallel( } // Numeric o_types this lane can't compare by o_key (other integer-family - // widths, floats, decimals — arena-keyed NUM_BIG has no value order at - // all) must defer the whole count: treating them as non-matches would - // silently undercount. Mirrors the base lane's per-leaflet Ok(None) bail. + // widths, and arena-keyed NUM_BIG which has no value order at all) must defer + // the whole count: treating them as non-matches would silently undercount. + // Inline decimals ARE comparable (order-preserving key). Mirrors the base + // lane's per-leaflet Ok(None) bail. let saw_unsupported_numeric = std::sync::atomic::AtomicBool::new(false); let count = parallel_overlay_psot_filter_count( ctx, @@ -695,10 +724,8 @@ fn count_numeric_compare_overlay_parallel( let tk = match ot { OType::XSD_INTEGER => tk_int, OType::XSD_DOUBLE => tk_dbl, - _ if ot.is_numeric() - || ot == OType::NUM_BIG_OVERFLOW - || ot == OType::XSD_DECIMAL_INLINE => - { + OType::XSD_DECIMAL_INLINE => tk_dec, + _ if ot.is_numeric() || ot == OType::NUM_BIG_OVERFLOW => { saw_unsupported_numeric.store(true, std::sync::atomic::Ordering::Relaxed); return false; } diff --git a/fluree-db-query/src/fast_path_common.rs b/fluree-db-query/src/fast_path_common.rs index e512b5aede..5077506f0c 100644 --- a/fluree-db-query/src/fast_path_common.rs +++ b/fluree-db-query/src/fast_path_common.rs @@ -283,21 +283,28 @@ pub fn cursor_projection_otype_okey() -> ColumnProjection { /// by insertion order, not lexicographic value order; /// - lang strings (tag `11`); /// - `GEO_POINT` (packed lat/long — not a linear value order) and `BLANK_NODE`; -/// - overflow big numerics / JSON / vector arena handles (equality-only); -/// - inline decimals (`XSD_DECIMAL_INLINE`, 0x0020): the packed -/// `(sign, scale, mantissa)` is equality-keyed, NOT value-ordered, so it is -/// outside the `is_numeric` range above and must never be admitted here. +/// - overflow big numerics / JSON / vector arena handles (equality-only). +/// +/// Inline decimals (`XSD_DECIMAL_INLINE`) ARE admitted: their key is an +/// order-preserving base-10 float code (raw `u64` order == numeric order), so a +/// single-`o_type` scan yields them in value order like the other numerics. /// /// Within one `o_type`, this equals the SPARQL `ORDER BY` order; mixing -/// `o_type`s under one predicate is rejected by the operator at runtime. +/// `o_type`s under one predicate is rejected by the operator at runtime — which +/// also means a predicate with both inline and arena (NUM_BIG) decimals can't +/// use this path, so the inline-only scan never silently drops arena rows. #[inline] pub const fn is_post_desc_orderable(o_type: u16) -> bool { let ot = OType::from_u16(o_type); // XSD_BOOLEAN (0x0002), the signed/unsigned/constrained integers and floats - // (is_numeric: 0x0003..=0x0012), and the temporal + duration range - // (is_temporal: XSD_DATE 0x0013..=XSD_DURATION 0x001D). Excludes GEO_POINT - // (0x001E), BLANK_NODE (0x001F), and every dict-backed/lang/arena type. - o_type == OType::XSD_BOOLEAN.as_u16() || ot.is_numeric() || ot.is_temporal() + // (is_numeric: 0x0003..=0x0012), the temporal + duration range (is_temporal: + // XSD_DATE 0x0013..=XSD_DURATION 0x001D), and inline decimals + // (XSD_DECIMAL_INLINE 0x0020, order-preserving). Excludes GEO_POINT (0x001E), + // BLANK_NODE (0x001F), and every dict-backed/lang/arena type. + o_type == OType::XSD_BOOLEAN.as_u16() + || ot.is_numeric() + || ot.is_temporal() + || o_type == OType::XSD_DECIMAL_INLINE.as_u16() } // --------------------------------------------------------------------------- diff --git a/fluree-db-query/src/fast_star_const_order_topk.rs b/fluree-db-query/src/fast_star_const_order_topk.rs index 4e7004bec0..1d178475d2 100644 --- a/fluree-db-query/src/fast_star_const_order_topk.rs +++ b/fluree-db-query/src/fast_star_const_order_topk.rs @@ -337,11 +337,12 @@ where /// Returns `Some(subjects)` whose value satisfies `> threshold`, or `None` to /// decline the fast path (the caller falls back) when a row carries a numeric -/// o_type this lane can't compare by `o_key` — only `XSD_INTEGER`/`XSD_DOUBLE` -/// are o_key-order-comparable. Equality-keyed inline decimals -/// (`XSD_DECIMAL_INLINE`) and arena `NUM_BIG_OVERFLOW` would be silently -/// dropped by a naive `_ => false`, undercounting the filter; declining keeps -/// the result correct via the general path. +/// o_type this lane can't compare by `o_key` against this threshold — +/// `XSD_INTEGER`, `XSD_DOUBLE`, and order-preserving inline decimals +/// (`XSD_DECIMAL_INLINE`, when the threshold encodes to a decimal key) are +/// comparable. Arena `NUM_BIG_OVERFLOW` (and decimals under a double threshold) +/// would be silently dropped by a naive `_ => false`, undercounting the filter; +/// declining keeps the result correct via the general path. fn filter_subjects_by_numeric_gt( store: &Arc, g_id: GraphId, @@ -350,16 +351,26 @@ fn filter_subjects_by_numeric_gt( to_t: i64, threshold: &FlakeValue, ) -> Result>> { + use fluree_db_core::value_id::ObjKey; // Only support numeric thresholds used in benchmark filters. let (thr_i, thr_d) = match threshold { FlakeValue::Long(n) => (*n, *n as f64), FlakeValue::Double(d) => (*d as i64, *d), _ => return Ok(Some(Vec::new())), }; - let thr_i_key = fluree_db_core::value_id::ObjKey::encode_i64(thr_i).as_u64(); - let thr_d_key = fluree_db_core::value_id::ObjKey::encode_f64(thr_d) + let thr_i_key = ObjKey::encode_i64(thr_i).as_u64(); + let thr_d_key = ObjKey::encode_f64(thr_d) .map_err(|_| QueryError::execution("cannot encode f64 threshold".to_string()))? .as_u64(); + // Decimal threshold key, for comparing inline-decimal rows. Only an integer + // threshold maps cleanly onto the decimal key space (10 == 10.00); a double + // threshold against decimal rows isn't compared here (decimal rows decline). + let thr_dec_key: Option = match threshold { + FlakeValue::Long(n) => { + ObjKey::encode_decimal(&bigdecimal::BigDecimal::from(*n)).map(ObjKey::as_u64) + } + _ => None, + }; let mut keep: FxHashSet = FxHashSet::default(); let mut saw_uncomparable_numeric = false; @@ -374,12 +385,18 @@ fn filter_subjects_by_numeric_gt( let over_threshold = match ot { OType::XSD_INTEGER => batch.o_key.get(i) > thr_i_key, OType::XSD_DOUBLE => batch.o_key.get(i) > thr_d_key, - // Numeric but not o_key-comparable (inline decimals, arena big - // numerics, other integer widths/floats): can't decide here. - _ if ot.is_numeric() - || ot == OType::NUM_BIG_OVERFLOW - || ot == OType::XSD_DECIMAL_INLINE => - { + // Inline decimals are order-preserving: compare keys when the + // threshold encodes to a decimal key, else decline. + OType::XSD_DECIMAL_INLINE => match thr_dec_key { + Some(k) => batch.o_key.get(i) > k, + None => { + saw_uncomparable_numeric = true; + false + } + }, + // Numeric but not o_key-comparable (arena big numerics, other + // integer widths/floats): can't decide here. + _ if ot.is_numeric() || ot == OType::NUM_BIG_OVERFLOW => { saw_uncomparable_numeric = true; false } From c94833e68262964c70e629b1e1075b2542db87b6 Mon Sep 17 00:00:00 2001 From: bplatz Date: Sat, 13 Jun 2026 17:56:45 -0400 Subject: [PATCH 16/25] style(core): digit separators in decimal property-test literal --- fluree-db-core/src/value_id.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fluree-db-core/src/value_id.rs b/fluree-db-core/src/value_id.rs index d01e66427c..3bdecd51ce 100644 --- a/fluree-db-core/src/value_id.rs +++ b/fluree-db-core/src/value_id.rs @@ -1958,7 +1958,7 @@ mod tests { fn property_test_decimals() -> Vec { use bigdecimal::BigDecimal; use num_bigint::BigInt; - let coeffs: [i64; 9] = [1, 2, 7, 9, 15, 100, 999, 12345, 9999999999999999]; + let coeffs: [i64; 9] = [1, 2, 7, 9, 15, 100, 999, 12345, 9_999_999_999_999_999]; let scales: [i64; 13] = [-15, -8, -3, -1, 0, 1, 2, 3, 5, 8, 12, 20, 28]; let mut out = vec![BigDecimal::from(0)]; for &c in &coeffs { From f6ca7f25e906708f25d8bd6febbbd6c2e5ba18d2 Mon Sep 17 00:00:00 2001 From: bplatz Date: Sat, 13 Jun 2026 18:22:00 -0400 Subject: [PATCH 17/25] feat(query): admit inline decimals to numeric MIN/MAX fast path MIN/MAX(?decimal) can now read the predicate's boundary o_keys instead of scanning + decoding every row: the inline decimal key is order-preserving, so a predicate's first/last POST key is its min/max value. minmax_numeric_post admits XSD_DECIMAL_INLINE (the single-o_type checks still bail a mixed inline+arena predicate), and numeric_binding_from_otype_okey decodes the boundary key to a FlakeValue::Decimal. Integration test extended: MIN/MAX over the decimal predicate returns the numerically smallest/largest values (-1 and 1000.5). --- fluree-db-api/tests/it_decimal_exactness.rs | 21 +++++++++++++++++++++ fluree-db-query/src/fast_min_max_string.rs | 9 ++++++++- 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/fluree-db-api/tests/it_decimal_exactness.rs b/fluree-db-api/tests/it_decimal_exactness.rs index 4d3dff05cc..16bc67a6bf 100644 --- a/fluree-db-api/tests/it_decimal_exactness.rs +++ b/fluree-db-api/tests/it_decimal_exactness.rs @@ -1258,4 +1258,25 @@ async fn inline_decimal_order_by_and_range_are_numeric_after_reindex() { vec!["3"], "FILTER(?v >= 2) over decimals must count 2, 19.99, 1000.5" ); + + // 6. MIN / MAX — exercises the boundary-key numeric MIN/MAX fast path. + let minmax = support::query_sparql( + &fluree, + &ledger, + r"PREFIX ex: + SELECT (MIN(?v) AS ?lo) (MAX(?v) AS ?hi) WHERE { ?s ex:v ?v }", + ) + .await + .expect("min/max"); + let minmax_json = minmax.to_sparql_json(&ledger.snapshot).expect("json"); + assert_eq!( + binding_values(&minmax_json, "lo"), + vec!["-1"], + "MIN over decimals must be the most negative value" + ); + assert_eq!( + binding_values(&minmax_json, "hi"), + vec!["1000.5"], + "MAX over decimals must be the largest value" + ); } diff --git a/fluree-db-query/src/fast_min_max_string.rs b/fluree-db-query/src/fast_min_max_string.rs index f69bcf6fff..5642b752e2 100644 --- a/fluree-db-query/src/fast_min_max_string.rs +++ b/fluree-db-query/src/fast_min_max_string.rs @@ -272,7 +272,10 @@ fn minmax_numeric_post( MinMaxMode::Max => read_ordered_key_v2(RunSortOrder::Post, &entry.last_key), }; let ot = OType::from_u16(rr.o_type); - if !ot.is_numeric() { + // Numeric kinds plus order-preserving inline decimals: their boundary + // o_key is the min/max value. (The single-o_type checks below ensure a + // predicate mixing inline and arena decimals declines.) + if !ot.is_numeric() && ot != OType::XSD_DECIMAL_INLINE { return Ok(None); } @@ -307,6 +310,10 @@ fn numeric_binding_from_otype_okey(store: &BinaryIndexStore, o_type: u16, o_key: DecodeKind::F64 => { Binding::lit(FlakeValue::Double(ObjKey::from_u64(o_key).decode_f64()), dt) } + DecodeKind::Decimal => Binding::lit( + FlakeValue::Decimal(Box::new(ObjKey::from_u64(o_key).decode_decimal())), + dt, + ), _ => Binding::Unbound, } } From 3a5e4ccb63e9b60d0297a4ca99c6139af5f67a78 Mon Sep 17 00:00:00 2001 From: bplatz Date: Sat, 13 Jun 2026 18:59:16 -0400 Subject: [PATCH 18/25] feat(query): range-scan pushdown for uniform inline-decimal predicates MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A SELECT range filter on an object (FILTER(?o > k)) previously switched to the POST index but post-filtered every row — no o_key-range narrowing, because numeric comparison is cross-type (an integer bound matches integer, double, and decimal rows under different o_types, so narrowing to one o_type's key range would drop the others). Range narrowing was therefore gated to temporal types only. When the predicate's POST extent is uniformly XSD_DECIMAL_INLINE (min_o_type == max_o_type), every value is an inline decimal with no arena spill and no other types — so the cross-type hazard is absent and the scan can seek the decimal key range. The order-preserving codec makes that range a contiguous, value-sorted run, turning O(predicate size) into O(log n + |result|). - fast_count::predicate_uniform_o_type: cheap manifest-extent probe (the same one COUNT uses; opens <=2 boundary leaves only when a predicate shares a leaf) exposing the uniform-o_type precondition. encode_numeric_threshold_for_otype is now pub(crate) for encoding bounds into the decimal key space. - binary_scan open(): when the predicate is uniform inline-decimal and a bound is numeric, encode bounds as decimal keys and set the cursor's o_key range; the temporal narrowing path is unchanged (and skipped when this fires). The existing post-filter stays as the correctness backstop, and overlay ops are windowed to the range by existing machinery. Tests: a uniform-decimal predicate narrows and stays correct (range filter, COUNT, ORDER BY); a MIXED int+decimal predicate must NOT narrow — FILTER(?v > 4) keeps the integer 5 alongside decimals 7.5/10.5, and ORDER BY interleaves both types numerically. --- fluree-db-api/tests/it_decimal_exactness.rs | 69 +++++++++++ fluree-db-query/src/binary_scan.rs | 124 ++++++++++++++------ fluree-db-query/src/fast_count.rs | 23 +++- 3 files changed, 178 insertions(+), 38 deletions(-) diff --git a/fluree-db-api/tests/it_decimal_exactness.rs b/fluree-db-api/tests/it_decimal_exactness.rs index 16bc67a6bf..2584129116 100644 --- a/fluree-db-api/tests/it_decimal_exactness.rs +++ b/fluree-db-api/tests/it_decimal_exactness.rs @@ -1280,3 +1280,72 @@ async fn inline_decimal_order_by_and_range_are_numeric_after_reindex() { "MAX over decimals must be the largest value" ); } + +#[tokio::test] +async fn mixed_int_decimal_predicate_range_filter_is_correct() { + // Correctness guard for decimal range-scan narrowing: a predicate with BOTH + // integer and inline-decimal values spans two o_types, so the uniform-extent + // precondition fails and the scan must NOT narrow to the decimal key range — + // doing so would drop the integer rows. The general post-filter must return + // every numerically-matching value regardless of type. + let fluree = memory_fluree(); + let ledger_id = "decimal/mixed-range:main"; + let ledger = genesis_ledger(&fluree, ledger_id); + + run_sparql_update( + &fluree, + ledger, + r" + PREFIX ex: + INSERT DATA { + ex:a ex:v 5 . + ex:b ex:v 10.5 . + ex:c ex:v 2 . + ex:d ex:v 7.5 . + ex:e ex:v 3 . + } + ", + ) + .await; + + full_rebuild_publish_decode_root(&fluree, ledger_id).await; + let ledger = fluree + .ledger(ledger_id) + .await + .expect("load reindexed ledger"); + + // FILTER(?v > 4): must match the integer 5 AND the decimals 7.5, 10.5 — + // three values across two o_types. A decimal-only narrowed scan would miss 5. + let filtered = support::query_sparql( + &fluree, + &ledger, + r"PREFIX ex: + SELECT ?v WHERE { ?s ex:v ?v FILTER(?v > 4) } ORDER BY ?v", + ) + .await + .expect("mixed range filter"); + let json = filtered.to_sparql_json(&ledger.snapshot).expect("json"); + let mut got = binding_values(&json, "v"); + got.sort(); + assert_eq!( + got, + vec!["10.5", "5", "7.5"], + "range filter over a mixed int+decimal predicate must keep matches of both types" + ); + + // Full ORDER BY must still interleave both types numerically. + let ordered = support::query_sparql( + &fluree, + &ledger, + r"PREFIX ex: + SELECT ?v WHERE { ?s ex:v ?v } ORDER BY ?v", + ) + .await + .expect("mixed order by"); + let oj = ordered.to_sparql_json(&ledger.snapshot).expect("json"); + assert_eq!( + binding_values(&oj, "v"), + vec!["2", "3", "5", "7.5", "10.5"], + "ORDER BY over mixed int+decimal must be numerically interleaved" + ); +} diff --git a/fluree-db-query/src/binary_scan.rs b/fluree-db-query/src/binary_scan.rs index 93a01b933c..9e3e522e92 100644 --- a/fluree-db-query/src/binary_scan.rs +++ b/fluree-db-query/src/binary_scan.rs @@ -1837,52 +1837,102 @@ impl Operator for BinaryScanOperator { let mut range_min_okey: Option = None; let mut range_max_okey: Option = None; let mut range_o_type: Option = None; - if order == RunSortOrder::Post && filter.p_id.is_some() && self.bound_o.is_none() { - if let Some(bounds) = self.object_bounds.as_ref() { - let supports_range = |ot: OType| -> bool { + if order == RunSortOrder::Post && self.bound_o.is_none() { + if let (Some(bounds), Some(p_id)) = (self.object_bounds.as_ref(), filter.p_id) { + // Only numeric bounds can target a decimal predicate; gate the + // manifest extent probe (which may open ≤2 boundary leaves) on + // that so temporal/string range scans don't pay for it. + let has_numeric_bound = |b: &Option<(FlakeValue, bool)>| { matches!( - ot, - OType::XSD_DATE - | OType::XSD_DATE_TIME - | OType::XSD_TIME - | OType::XSD_G_YEAR - | OType::XSD_G_YEAR_MONTH - | OType::XSD_G_MONTH - | OType::XSD_G_DAY - | OType::XSD_G_MONTH_DAY + b, + Some(( + FlakeValue::Long(_) | FlakeValue::BigInt(_) | FlakeValue::Decimal(_), + _ + )) ) }; - - let encode = |v: &FlakeValue| -> Option<(u16, u64)> { - let (ot, key) = value_to_otype_okey_simple(v, store_ref).ok()?; - supports_range(ot).then_some((ot.as_u16(), key)) - }; - - let mut ot: Option = None; - if let Some((v, _inclusive)) = bounds.lower.as_ref() { - if let Some((o_type, key)) = encode(v) { - ot = Some(o_type); - range_min_okey = Some(key); + // Numeric range narrowing is unsafe in general (cross-type: + // `?o > 10` matches integer 11 AND decimal 11.5, stored under + // different o_types). But when the predicate is *uniformly* + // inline decimal — manifest extent min_o_type == max_o_type == + // XSD_DECIMAL_INLINE, i.e. every value is an inline decimal with + // no arena spill and no other types — there are no other-typed + // rows to miss, so we can encode the bounds into the + // order-preserving decimal key space and seek the key range. + // The post-filter below stays as the correctness backstop. + let uniform_dec = (has_numeric_bound(&bounds.lower) + || has_numeric_bound(&bounds.upper)) + && crate::fast_count::predicate_uniform_o_type(store_ref, self.g_id, p_id) + == Some(OType::XSD_DECIMAL_INLINE.as_u16()); + if uniform_dec { + let enc = |v: &FlakeValue| { + crate::fast_count::encode_numeric_threshold_for_otype( + OType::XSD_DECIMAL_INLINE, + v, + ) + .ok() + .flatten() + }; + if let Some((v, _inclusive)) = bounds.lower.as_ref() { + range_min_okey = enc(v); + } + if let Some((v, _inclusive)) = bounds.upper.as_ref() { + range_max_okey = enc(v); + } + if range_min_okey.is_some() || range_max_okey.is_some() { + range_o_type = Some(OType::XSD_DECIMAL_INLINE.as_u16()); + filter.o_type = Some(OType::XSD_DECIMAL_INLINE.as_u16()); } } - if let Some((v, _inclusive)) = bounds.upper.as_ref() { - if let Some((o_type, key)) = encode(v) { - if ot.is_some() && ot != Some(o_type) { - // Mixed type bounds; don't attempt range narrowing. - ot = None; - range_min_okey = None; - range_max_okey = None; - } else { + + // Temporal range narrowing (within-type comparison, always safe). + // Skipped if the uniform-decimal branch above already narrowed. + if !uniform_dec { + let supports_range = |ot: OType| -> bool { + matches!( + ot, + OType::XSD_DATE + | OType::XSD_DATE_TIME + | OType::XSD_TIME + | OType::XSD_G_YEAR + | OType::XSD_G_YEAR_MONTH + | OType::XSD_G_MONTH + | OType::XSD_G_DAY + | OType::XSD_G_MONTH_DAY + ) + }; + + let encode = |v: &FlakeValue| -> Option<(u16, u64)> { + let (ot, key) = value_to_otype_okey_simple(v, store_ref).ok()?; + supports_range(ot).then_some((ot.as_u16(), key)) + }; + + let mut ot: Option = None; + if let Some((v, _inclusive)) = bounds.lower.as_ref() { + if let Some((o_type, key)) = encode(v) { ot = Some(o_type); - range_max_okey = Some(key); + range_min_okey = Some(key); + } + } + if let Some((v, _inclusive)) = bounds.upper.as_ref() { + if let Some((o_type, key)) = encode(v) { + if ot.is_some() && ot != Some(o_type) { + // Mixed type bounds; don't attempt range narrowing. + ot = None; + range_min_okey = None; + range_max_okey = None; + } else { + ot = Some(o_type); + range_max_okey = Some(key); + } } } - } - if let Some(o_type) = ot { - range_o_type = Some(o_type); - // Also set the filter o_type so directory-level pre-skip can eliminate non-matching leaflets. - filter.o_type = Some(o_type); + if let Some(o_type) = ot { + range_o_type = Some(o_type); + // Also set the filter o_type so directory-level pre-skip can eliminate non-matching leaflets. + filter.o_type = Some(o_type); + } } } } diff --git a/fluree-db-query/src/fast_count.rs b/fluree-db-query/src/fast_count.rs index d506e184e4..089fed2a01 100644 --- a/fluree-db-query/src/fast_count.rs +++ b/fluree-db-query/src/fast_count.rs @@ -405,6 +405,24 @@ fn otype_unsupported_numeric(raw: u16) -> bool { && !otype_okey_order_comparable(ot) } +/// The single `o_type` shared by every row of `p_id` in POST order, or `None` +/// if the predicate is empty or has mixed o_types. Read from the leaf manifest +/// (plus ≤2 boundary leaves) — cheap, no full scan. A uniform +/// `XSD_DECIMAL_INLINE` result means every value under the predicate is an +/// inline decimal with no arena spill and no other types, which is the +/// precondition for safely narrowing a numeric range scan by `o_key`. +pub(crate) fn predicate_uniform_o_type( + store: &BinaryIndexStore, + g_id: GraphId, + p_id: u32, +) -> Option { + let leaves = leaf_entries_for_predicate(store, g_id, RunSortOrder::Post, p_id); + match predicate_post_global_extent(store, p_id, leaves).ok()? { + Some((min_ot, _, max_ot, _)) if min_ot == max_ot => Some(min_ot), + _ => None, + } +} + fn predicate_post_global_extent( store: &BinaryIndexStore, p_id: u32, @@ -543,7 +561,10 @@ fn count_numeric_compare_in_leaf_slice( Ok(Some(total)) } -fn encode_numeric_threshold_for_otype(otype: OType, threshold: &FlakeValue) -> Result> { +pub(crate) fn encode_numeric_threshold_for_otype( + otype: OType, + threshold: &FlakeValue, +) -> Result> { use bigdecimal::BigDecimal; // Encode the threshold into the row o_type's key space. Inline decimals use // the order-preserving decimal codec, so a `>`/`<` comparison of `o_key`s is From c5058abea631c7a0b122946de0a2da913d608c54 Mon Sep 17 00:00:00 2001 From: bplatz Date: Sat, 13 Jun 2026 20:10:53 -0400 Subject: [PATCH 19/25] feat(query): generalize range pushdown to integer and double predicates MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Inline integer (encode_i64) and double/float (encode_f64) keys have always been order-preserving, so the uniform-predicate range pushdown built for decimals extends to them with no format change — it works on any existing index, including un-reindexed ledgers. - otype_okey_order_comparable now admits all inline integer subtypes (is_integer; overflow carries a different arena o_type, so an integer-subtype o_type guarantees an inline encode_i64 key) plus xsd:double/xsd:float and inline decimals. - encode_numeric_threshold_for_otype encodes thresholds into the integer (encode_i64) and float (encode_f64) key spaces, not just decimal. - The COUNT overlay lane family-routes per row (any integer subtype -> the i64 threshold key, double/float -> the f64 key) instead of matching only XSD_INTEGER, so xsd:long/int/short counts now push down too. - BinaryScanOperator SELECT range narrowing accepts any uniform order-preserving numeric predicate (not just decimal), encoding bounds into the predicate's o_type key space. Test: a uniform xsd:integer predicate narrows correctly (ORDER BY across negatives, range FILTER, COUNT). --- fluree-db-api/tests/it_decimal_exactness.rs | 80 ++++++++++++++ fluree-db-query/src/binary_scan.rs | 53 ++++----- fluree-db-query/src/fast_count.rs | 113 ++++++++++++-------- 3 files changed, 176 insertions(+), 70 deletions(-) diff --git a/fluree-db-api/tests/it_decimal_exactness.rs b/fluree-db-api/tests/it_decimal_exactness.rs index 2584129116..ee36d8e1a8 100644 --- a/fluree-db-api/tests/it_decimal_exactness.rs +++ b/fluree-db-api/tests/it_decimal_exactness.rs @@ -1349,3 +1349,83 @@ async fn mixed_int_decimal_predicate_range_filter_is_correct() { "ORDER BY over mixed int+decimal must be numerically interleaved" ); } + +#[tokio::test] +async fn inline_integer_range_pushdown_is_correct() { + // Integer keys are order-preserving (encode_i64), so a uniform-integer + // predicate gets the same range/ORDER BY/COUNT pushdown as decimals — and it + // needs no format change (works on any index). This checks correctness of + // the generalized path on a uniform xsd:integer predicate. + let fluree = memory_fluree(); + let ledger_id = "decimal/inline-int-range:main"; + let ledger = genesis_ledger(&fluree, ledger_id); + + run_sparql_update( + &fluree, + ledger, + r" + PREFIX ex: + INSERT DATA { + ex:a ex:n 5 . + ex:b ex:n 100 . + ex:c ex:n -3 . + ex:d ex:n 42 . + ex:e ex:n 0 . + } + ", + ) + .await; + + full_rebuild_publish_decode_root(&fluree, ledger_id).await; + let ledger = fluree + .ledger(ledger_id) + .await + .expect("load reindexed ledger"); + + // ORDER BY: numeric order across negatives. + let asc = support::query_sparql( + &fluree, + &ledger, + r"PREFIX ex: + SELECT ?n WHERE { ?s ex:n ?n } ORDER BY ?n", + ) + .await + .expect("order by"); + let asc_json = asc.to_sparql_json(&ledger.snapshot).expect("json"); + assert_eq!( + binding_values(&asc_json, "n"), + vec!["-3", "0", "5", "42", "100"], + "integer ORDER BY must be numeric" + ); + + // Range FILTER (the new narrowing path) + COUNT. + let filtered = support::query_sparql( + &fluree, + &ledger, + r"PREFIX ex: + SELECT ?n WHERE { ?s ex:n ?n FILTER(?n > 4) } ORDER BY ?n", + ) + .await + .expect("range filter"); + let fj = filtered.to_sparql_json(&ledger.snapshot).expect("json"); + assert_eq!( + binding_values(&fj, "n"), + vec!["5", "42", "100"], + "integer range filter must narrow to values > 4" + ); + + let counted = support::query_sparql( + &fluree, + &ledger, + r"PREFIX ex: + SELECT (COUNT(?s) AS ?c) WHERE { ?s ex:n ?n FILTER(?n >= 0) }", + ) + .await + .expect("count"); + let cj = counted.to_sparql_json(&ledger.snapshot).expect("json"); + assert_eq!( + binding_values(&cj, "c"), + vec!["4"], + "COUNT(?n >= 0) over integers must be 4 (0, 5, 42, 100)" + ); +} diff --git a/fluree-db-query/src/binary_scan.rs b/fluree-db-query/src/binary_scan.rs index 9e3e522e92..cc0f3c1c15 100644 --- a/fluree-db-query/src/binary_scan.rs +++ b/fluree-db-query/src/binary_scan.rs @@ -1839,39 +1839,44 @@ impl Operator for BinaryScanOperator { let mut range_o_type: Option = None; if order == RunSortOrder::Post && self.bound_o.is_none() { if let (Some(bounds), Some(p_id)) = (self.object_bounds.as_ref(), filter.p_id) { - // Only numeric bounds can target a decimal predicate; gate the + // Only numeric bounds can target a numeric predicate; gate the // manifest extent probe (which may open ≤2 boundary leaves) on // that so temporal/string range scans don't pay for it. let has_numeric_bound = |b: &Option<(FlakeValue, bool)>| { matches!( b, Some(( - FlakeValue::Long(_) | FlakeValue::BigInt(_) | FlakeValue::Decimal(_), + FlakeValue::Long(_) + | FlakeValue::BigInt(_) + | FlakeValue::Decimal(_) + | FlakeValue::Double(_), _ )) ) }; // Numeric range narrowing is unsafe in general (cross-type: // `?o > 10` matches integer 11 AND decimal 11.5, stored under - // different o_types). But when the predicate is *uniformly* - // inline decimal — manifest extent min_o_type == max_o_type == - // XSD_DECIMAL_INLINE, i.e. every value is an inline decimal with - // no arena spill and no other types — there are no other-typed - // rows to miss, so we can encode the bounds into the - // order-preserving decimal key space and seek the key range. - // The post-filter below stays as the correctness backstop. - let uniform_dec = (has_numeric_bound(&bounds.lower) - || has_numeric_bound(&bounds.upper)) - && crate::fast_count::predicate_uniform_o_type(store_ref, self.g_id, p_id) - == Some(OType::XSD_DECIMAL_INLINE.as_u16()); - if uniform_dec { + // different o_types). But when the predicate is *uniformly* one + // order-preserving numeric type — manifest extent + // min_o_type == max_o_type and that type is o_key-ordered + // (any inline integer subtype, double/float, or inline decimal) — + // there are no other-typed rows to miss, so we encode the bounds + // into that type's key space and seek the key range. The + // post-filter below stays as the correctness backstop. + let numeric_uniform_ot = if has_numeric_bound(&bounds.lower) + || has_numeric_bound(&bounds.upper) + { + crate::fast_count::predicate_uniform_o_type(store_ref, self.g_id, p_id) + .map(OType::from_u16) + .filter(|ot| crate::fast_count::otype_okey_order_comparable(*ot)) + } else { + None + }; + if let Some(pred_ot) = numeric_uniform_ot { let enc = |v: &FlakeValue| { - crate::fast_count::encode_numeric_threshold_for_otype( - OType::XSD_DECIMAL_INLINE, - v, - ) - .ok() - .flatten() + crate::fast_count::encode_numeric_threshold_for_otype(pred_ot, v) + .ok() + .flatten() }; if let Some((v, _inclusive)) = bounds.lower.as_ref() { range_min_okey = enc(v); @@ -1880,14 +1885,14 @@ impl Operator for BinaryScanOperator { range_max_okey = enc(v); } if range_min_okey.is_some() || range_max_okey.is_some() { - range_o_type = Some(OType::XSD_DECIMAL_INLINE.as_u16()); - filter.o_type = Some(OType::XSD_DECIMAL_INLINE.as_u16()); + range_o_type = Some(pred_ot.as_u16()); + filter.o_type = Some(pred_ot.as_u16()); } } // Temporal range narrowing (within-type comparison, always safe). - // Skipped if the uniform-decimal branch above already narrowed. - if !uniform_dec { + // Skipped if the numeric-uniform branch above already narrowed. + if numeric_uniform_ot.is_none() { let supports_range = |ot: OType| -> bool { matches!( ot, diff --git a/fluree-db-query/src/fast_count.rs b/fluree-db-query/src/fast_count.rs index 089fed2a01..533d44ad53 100644 --- a/fluree-db-query/src/fast_count.rs +++ b/fluree-db-query/src/fast_count.rs @@ -385,15 +385,21 @@ fn count_rows_for_predicate_numeric_compare_post( /// per-leaflet directory must be consulted for this predicate's first/last key). /// Returns `None` if there are no leaves (an empty predicate — the caller's total is /// 0) or, defensively, if a boundary leaf yields no matching leaflet. -/// o_types whose `o_key` order equals numeric order, so a `?o K` count can -/// compare encoded keys directly: canonical-width integers, doubles, and the -/// order-preserving inline decimals. (Other integer widths and arena NUM_BIG are -/// numeric but not o_key-comparable.) -fn otype_okey_order_comparable(ot: OType) -> bool { - matches!( - ot, - OType::XSD_INTEGER | OType::XSD_DOUBLE | OType::XSD_DECIMAL_INLINE - ) +/// o_types whose `o_key` order equals numeric order, so a `?o K` scan can +/// compare encoded keys directly: +/// - **all inline integer subtypes** (`is_integer`): every inline integer is +/// `encode_i64`-ordered; values that overflow `i64` carry the arena +/// `NUM_BIG_OVERFLOW` o_type instead, so an integer-subtype o_type guarantees +/// an inline, order-preserving key. +/// - **`xsd:double` / `xsd:float`**: `encode_f64` is total-order. +/// - **inline decimals** (`XSD_DECIMAL_INLINE`): order-preserving base-10 float. +/// +/// Arena `NUM_BIG_OVERFLOW` is numeric but equality-only, so it is excluded. +pub(crate) fn otype_okey_order_comparable(ot: OType) -> bool { + ot.is_integer() + || ot == OType::XSD_DOUBLE + || ot == OType::XSD_FLOAT + || ot == OType::XSD_DECIMAL_INLINE } /// True if this o_type is numeric but cannot be compared by encoded o_key in @@ -573,11 +579,17 @@ pub(crate) fn encode_numeric_threshold_for_otype( // correct. A threshold that doesn't fit inline (or a double threshold against // decimal rows) yields `None` → the caller declines the fast path. let key = match (otype, threshold) { - (OType::XSD_INTEGER, FlakeValue::Long(n)) => ObjKey::encode_i64(*n).as_u64(), - (OType::XSD_DOUBLE, FlakeValue::Long(n)) => ObjKey::encode_f64(*n as f64) - .map_err(|_| QueryError::execution("cannot encode f64 threshold".to_string()))? - .as_u64(), - (OType::XSD_DOUBLE, FlakeValue::Double(d)) => ObjKey::encode_f64(*d) + // Integer-family rows: every inline integer subtype is encode_i64-ordered. + // A non-integer bound (decimal/double) against integer rows can't encode + // exactly here → None → caller post-filters. + (ot, FlakeValue::Long(n)) if ot.is_integer() => ObjKey::encode_i64(*n).as_u64(), + // Float-family rows: encode_f64 (total-order). + (OType::XSD_DOUBLE | OType::XSD_FLOAT, FlakeValue::Long(n)) => { + ObjKey::encode_f64(*n as f64) + .map_err(|_| QueryError::execution("cannot encode f64 threshold".to_string()))? + .as_u64() + } + (OType::XSD_DOUBLE | OType::XSD_FLOAT, FlakeValue::Double(d)) => ObjKey::encode_f64(*d) .map_err(|_| QueryError::execution("cannot encode f64 threshold".to_string()))? .as_u64(), (OType::XSD_DECIMAL_INLINE, FlakeValue::Decimal(d)) => match ObjKey::encode_decimal(d) { @@ -699,39 +711,55 @@ fn count_numeric_compare_overlay_parallel( compare: NumericCompareOp, threshold: &FlakeValue, ) -> Result> { - let tk_int = encode_numeric_threshold_for_otype(OType::XSD_INTEGER, threshold)?; - let tk_dbl = encode_numeric_threshold_for_otype(OType::XSD_DOUBLE, threshold)?; + // One threshold key per order-preserving family. All integer subtypes share + // the encode_i64 key; double/float share encode_f64; inline decimals their + // own codec. `None` means the threshold doesn't encode in that family. + let tk_i64 = encode_numeric_threshold_for_otype(OType::XSD_INTEGER, threshold)?; + let tk_f64 = encode_numeric_threshold_for_otype(OType::XSD_DOUBLE, threshold)?; let tk_dec = encode_numeric_threshold_for_otype(OType::XSD_DECIMAL_INLINE, threshold)?; + // Map a row o_type to its threshold key: `Some(tk)` if the type is + // o_key-comparable (tk may itself be `None` if the threshold didn't encode + // for that family), `None` if the type isn't comparable at all. + let tk_for = |ot: OType| -> Option> { + if ot.is_integer() { + Some(tk_i64) + } else if ot == OType::XSD_DOUBLE || ot == OType::XSD_FLOAT { + Some(tk_f64) + } else if ot == OType::XSD_DECIMAL_INLINE { + Some(tk_dec) + } else { + None + } + }; + // Pre-check the base predicate's POST extent: if the base rows are uniformly // an o_type we can't compare by o_key (e.g. arena NUM_BIG), or the threshold - // can't encode for the uniform supported type, the full scan below is + // can't encode for the uniform supported family, the full scan below is // doomed — defer immediately instead of scanning every partition first. // (Unsupported values arriving only via novelty are still caught by the // per-row flag; novelty is small, so that residual pass is bounded.) let post_leaves = leaf_entries_for_predicate(store, g_id, RunSortOrder::Post, p_id); if let Some((min_ot, _, max_ot, _)) = predicate_post_global_extent(store, p_id, post_leaves)? { if min_ot == max_ot { - match OType::from_u16(min_ot) { - OType::XSD_INTEGER if tk_int.is_none() => return Ok(None), - OType::XSD_DOUBLE if tk_dbl.is_none() => return Ok(None), - OType::XSD_DECIMAL_INLINE if tk_dec.is_none() => return Ok(None), - OType::XSD_INTEGER | OType::XSD_DOUBLE | OType::XSD_DECIMAL_INLINE => {} - ot if ot.is_numeric() || ot == OType::NUM_BIG_OVERFLOW => return Ok(None), - _ => {} + let ot = OType::from_u16(min_ot); + match tk_for(ot) { + Some(Some(_)) => {} // comparable, threshold encodes → proceed + Some(None) => return Ok(None), // comparable family but threshold didn't encode + None if ot.is_numeric() || ot == OType::NUM_BIG_OVERFLOW => return Ok(None), + None => {} // non-numeric uniform → every row a non-match, fine } } else if otype_unsupported_numeric(min_ot) || otype_unsupported_numeric(max_ot) { // Mixed base with an unsupported-numeric boundary (e.g. integer - // rows + decimals): doomed regardless of novelty — defer before - // scanning any partition. + // rows + arena NUM_BIG): doomed regardless of novelty — defer. return Ok(None); } } - // Numeric o_types this lane can't compare by o_key (other integer-family - // widths, and arena-keyed NUM_BIG which has no value order at all) must defer - // the whole count: treating them as non-matches would silently undercount. - // Inline decimals ARE comparable (order-preserving key). Mirrors the base + // Numeric o_types this lane can't compare by o_key (arena-keyed NUM_BIG, + // which has no value order) must defer the whole count: treating them as + // non-matches would silently undercount. All inline integer subtypes, + // doubles/floats, and inline decimals ARE comparable. Mirrors the base // lane's per-leaflet Ok(None) bail. let saw_unsupported_numeric = std::sync::atomic::AtomicBool::new(false); let count = parallel_overlay_psot_filter_count( @@ -742,27 +770,20 @@ fn count_numeric_compare_overlay_parallel( p_id, |_s, o_type, o_key| { let ot = OType::from_u16(o_type); - let tk = match ot { - OType::XSD_INTEGER => tk_int, - OType::XSD_DOUBLE => tk_dbl, - OType::XSD_DECIMAL_INLINE => tk_dec, - _ if ot.is_numeric() || ot == OType::NUM_BIG_OVERFLOW => { + match tk_for(ot) { + Some(Some(tk)) => okey_matches(compare, o_key, tk), + Some(None) => { + // Comparable family but the threshold didn't encode for it + // (e.g. decimal threshold vs integer rows): defer. saw_unsupported_numeric.store(true, std::sync::atomic::Ordering::Relaxed); - return false; + false } - // Genuinely non-numeric object: comparison errors => not a match - _ => return false, - }; - match tk { - Some(tk) => okey_matches(compare, o_key, tk), - None => { - // Threshold not encodable for this row's o_type (e.g. a - // decimal threshold against integer rows): the comparison - // is still numerically valid, so defer rather than - // undercount — mirrors the base lane. + None if ot.is_numeric() || ot == OType::NUM_BIG_OVERFLOW => { saw_unsupported_numeric.store(true, std::sync::atomic::Ordering::Relaxed); false } + // Genuinely non-numeric object: comparison errors => not a match. + None => false, } }, )?; From bd30abfeec49320d09aef50e083eecae113421e8 Mon Sep 17 00:00:00 2001 From: bplatz Date: Sat, 13 Jun 2026 20:27:31 -0400 Subject: [PATCH 20/25] fix(query): close range-pushdown gaps from review MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Overlay/novelty safety (correctness): numeric range narrowing in the scan operator is now gated on overlay_free_single_graph(). The base manifest extent only proves the BASE rows are uniform; novelty can add a matching value of a different type (e.g. integer 100 to a decimal predicate) whose translated overlay op sorts outside the narrowed o_type/o_key window and would be dropped before the post-filter. With overlay present we fall back to the full base scan + merge + post-filter. (Temporal narrowing needs no gate: a cross-type value can't satisfy a temporal filter, so dropping it is harmless.) - Decimal/big-int COUNT pushdown now actually engages: the numeric-compare detector (extract_simple_numeric_compare_threshold) extracted only Long/Double constants, so FILTER(?v > 0.1) always deferred. Extract Decimal and BigInt too — the fast paths already encode them into the matching key space. - fast_star_const_order_topk numeric filter now declines (Ok(None) -> fallback) on a non-Long/Double threshold instead of returning an empty set; with decimal thresholds now extractable, the empty-set path would have silently undercount. - Refresh stale docs: OType::XSD_DECIMAL_INLINE and DecodeKind::Decimal now describe the order-preserving base-10 float key; the scan-operator comment documents the temporal + uniform-numeric-no-overlay narrowing cases. Test: a uniform-decimal base predicate with a cross-type novelty integer keeps that integer under a range filter (would be dropped without the overlay gate). --- fluree-db-api/tests/it_decimal_exactness.rs | 69 +++++++++++++++++++ fluree-db-core/src/o_type.rs | 15 ++-- fluree-db-query/src/binary_scan.rs | 43 ++++++++---- fluree-db-query/src/execute/operator_tree.rs | 6 ++ .../src/fast_star_const_order_topk.rs | 7 +- 5 files changed, 117 insertions(+), 23 deletions(-) diff --git a/fluree-db-api/tests/it_decimal_exactness.rs b/fluree-db-api/tests/it_decimal_exactness.rs index ee36d8e1a8..bf8a9354b9 100644 --- a/fluree-db-api/tests/it_decimal_exactness.rs +++ b/fluree-db-api/tests/it_decimal_exactness.rs @@ -1429,3 +1429,72 @@ async fn inline_integer_range_pushdown_is_correct() { "COUNT(?n >= 0) over integers must be 4 (0, 5, 42, 100)" ); } + +#[tokio::test] +async fn range_narrowing_keeps_cross_type_novelty() { + // Regression guard for the overlay/novelty hazard in numeric range + // narrowing. The base predicate is uniformly inline-decimal (so narrowing + // WOULD fire on a clean index), but novelty then adds a matching value of a + // DIFFERENT type (an integer) for the same predicate. The integer's overlay + // op sorts outside the decimal o_type/o_key window, so narrowing must be + // disabled while overlay is present — otherwise the integer is dropped + // before the post-filter sees it. With the overlay gate, the full scan + + // merge + post-filter keeps it. + let fluree = memory_fluree(); + let ledger_id = "decimal/xtype-novelty:main"; + let ledger = genesis_ledger(&fluree, ledger_id); + + run_sparql_update( + &fluree, + ledger, + r" + PREFIX ex: + INSERT DATA { + ex:a ex:v 0.5 . + ex:b ex:v 100.5 . + ex:c ex:v 10.5 . + } + ", + ) + .await; + + // Reindex: the base predicate is now uniformly inline-decimal (v3). + let root = full_rebuild_publish_decode_root(&fluree, ledger_id).await; + assert_eq!( + root.decimal_encoding(), + fluree_db_core::DecimalEncoding::InlineWhenFits + ); + let ledger = fluree + .ledger(ledger_id) + .await + .expect("load reindexed ledger"); + + // Add a NOVELTY integer (different o_type) that matches the filter, without + // reindexing — it lives in the overlay as XSD_INTEGER. + let result = run_sparql_update( + &fluree, + ledger, + r"PREFIX ex: INSERT DATA { ex:d ex:v 100 . }", + ) + .await; + let ledger = result.ledger; + + // FILTER(?v > 50): the indexed decimal 100.5 AND the novelty integer 100. + let r = support::query_sparql( + &fluree, + &ledger, + r"PREFIX ex: + SELECT ?v WHERE { ?s ex:v ?v FILTER(?v > 50) } ORDER BY ?v", + ) + .await + .expect("cross-type range filter"); + let json = r.to_sparql_json(&ledger.snapshot).expect("json"); + let mut got = binding_values(&json, "v"); + got.sort(); + assert_eq!( + got, + vec!["100", "100.5"], + "range filter must keep a cross-type novelty match (integer 100) on a \ + uniform-decimal base predicate — narrowing must not drop it" + ); +} diff --git a/fluree-db-core/src/o_type.rs b/fluree-db-core/src/o_type.rs index f77fef60e4..b5b9d8c226 100644 --- a/fluree-db-core/src/o_type.rs +++ b/fluree-db-core/src/o_type.rs @@ -125,11 +125,12 @@ impl OType { /// Blank node (`_:b{id}`) — `o_key` is the atomic bnode integer. pub const BLANK_NODE: Self = Self(0x001F); - /// `xsd:decimal` stored **inline** as an exact packed `(mantissa, scale)` - /// (see [`ObjKey::encode_decimal`]). Distinct from the lossy f64 - /// [`XSD_DECIMAL`](Self::XSD_DECIMAL) lane: this carries the exact value with - /// no arena handle. Only written by new-format index roots; large/high-precision - /// decimals still fall back to the NumBig arena ([`NUM_BIG_OVERFLOW`](Self::NUM_BIG_OVERFLOW)). + /// `xsd:decimal` stored **inline** as an exact, order-preserving base-10 + /// float key (see [`ObjKey::encode_decimal`]) — canonical *and* value-ordered. + /// Distinct from the lossy f64 [`XSD_DECIMAL`](Self::XSD_DECIMAL) lane: this + /// carries the exact value with no arena handle. Only written by new-format + /// index roots; large/high-precision decimals still fall back to the NumBig + /// arena ([`NUM_BIG_OVERFLOW`](Self::NUM_BIG_OVERFLOW)). pub const XSD_DECIMAL_INLINE: Self = Self(0x0020); // Tag `00` payload range 0x0021–0x3FFF reserved for future embedded types. @@ -422,8 +423,8 @@ pub enum DecodeKind { NumBigArena, /// Spatial arena handle (per-predicate). SpatialArena, - /// Exact inline `xsd:decimal` — o_key is a packed `(sign, scale, mantissa)` - /// (see [`super::value_id::ObjKey::decode_decimal`]). Not arena-backed. + /// Exact inline `xsd:decimal` — o_key is an order-preserving base-10 float + /// code (see [`super::value_id::ObjKey::decode_decimal`]). Not arena-backed. Decimal, } diff --git a/fluree-db-query/src/binary_scan.rs b/fluree-db-query/src/binary_scan.rs index cc0f3c1c15..dd4d520985 100644 --- a/fluree-db-query/src/binary_scan.rs +++ b/fluree-db-query/src/binary_scan.rs @@ -1829,11 +1829,15 @@ impl Operator for BinaryScanOperator { Arc::clone(branch_ref); // If this scan has range bounds on the object variable and we're scanning in POST order, - // narrow the cursor's leaf range by object-key range. - // - // IMPORTANT: SPARQL numeric comparisons are cross-type (integer bounds match double - // values), and ObjKey encodings differ between types. For correctness, we only apply - // range narrowing for temporal types where cross-type comparison does not apply. + // narrow the cursor's leaf range by object-key range. Two cases are safe: + // - **Temporal** types: comparison is within-type, so a cross-type value + // can't satisfy the filter — dropping it via narrowing is harmless. + // - **A uniform, order-preserving numeric predicate with no overlay**: + // no other-typed base rows exist and no novelty can introduce a + // cross-type match (see the numeric block below). + // SPARQL numeric comparison is otherwise cross-type (an integer bound + // matches double/decimal values under different o_types), so numeric + // narrowing is gated on those preconditions. let mut range_min_okey: Option = None; let mut range_max_okey: Option = None; let mut range_o_type: Option = None; @@ -1856,15 +1860,26 @@ impl Operator for BinaryScanOperator { }; // Numeric range narrowing is unsafe in general (cross-type: // `?o > 10` matches integer 11 AND decimal 11.5, stored under - // different o_types). But when the predicate is *uniformly* one - // order-preserving numeric type — manifest extent - // min_o_type == max_o_type and that type is o_key-ordered - // (any inline integer subtype, double/float, or inline decimal) — - // there are no other-typed rows to miss, so we encode the bounds - // into that type's key space and seek the key range. The - // post-filter below stays as the correctness backstop. - let numeric_uniform_ot = if has_numeric_bound(&bounds.lower) - || has_numeric_bound(&bounds.upper) + // different o_types). It's safe only when the predicate is + // *uniformly* one order-preserving numeric type AND there is no + // overlay: + // + // - **Uniform base** (manifest extent min_o_type == max_o_type, + // o_key-ordered — any inline integer subtype, double/float, or + // inline decimal) means no other-typed *base* rows to miss. + // - **Overlay-free** is required because novelty can add a + // matching value of a *different* type (e.g. integer 100 to a + // decimal predicate). Its translated overlay op sorts outside + // the narrowed o_type/o_key window and would be dropped before + // the post-filter could rescue it. (Temporal narrowing doesn't + // need this: cross-type values can't satisfy a temporal filter, + // so dropping them is harmless.) With overlay present we fall + // back to the full base scan + overlay merge + post-filter. + // + // The post-filter below stays as the correctness backstop. + let numeric_uniform_ot = if (has_numeric_bound(&bounds.lower) + || has_numeric_bound(&bounds.upper)) + && ctx.overlay_free_single_graph() { crate::fast_count::predicate_uniform_o_type(store_ref, self.g_id, p_id) .map(OType::from_u16) diff --git a/fluree-db-query/src/execute/operator_tree.rs b/fluree-db-query/src/execute/operator_tree.rs index b7011b2d27..965b8a6262 100644 --- a/fluree-db-query/src/execute/operator_tree.rs +++ b/fluree-db-query/src/execute/operator_tree.rs @@ -414,9 +414,15 @@ fn extract_simple_numeric_compare_threshold( if args.len() != 2 { return None; } + // Long/Double, plus exact integer (BigInt) and decimal constants — the + // numeric-compare fast paths now encode all of these into the matching + // order-preserving key space, so `FILTER(?v > 0.1)` and `FILTER(?n > big)` + // can take the pushdown rather than always deferring to the general scan. let const_to_flake = |c: &FlakeValue| match c { FlakeValue::Long(n) => Some(fluree_db_core::FlakeValue::Long(*n)), FlakeValue::Double(d) => Some(fluree_db_core::FlakeValue::Double(*d)), + FlakeValue::Decimal(d) => Some(fluree_db_core::FlakeValue::Decimal(d.clone())), + FlakeValue::BigInt(b) => Some(fluree_db_core::FlakeValue::BigInt(b.clone())), _ => None, }; let direct_op = match *func { diff --git a/fluree-db-query/src/fast_star_const_order_topk.rs b/fluree-db-query/src/fast_star_const_order_topk.rs index 1d178475d2..8901519dac 100644 --- a/fluree-db-query/src/fast_star_const_order_topk.rs +++ b/fluree-db-query/src/fast_star_const_order_topk.rs @@ -352,11 +352,14 @@ fn filter_subjects_by_numeric_gt( threshold: &FlakeValue, ) -> Result>> { use fluree_db_core::value_id::ObjKey; - // Only support numeric thresholds used in benchmark filters. + // This lane derives its row-comparison keys from a Long/Double threshold. + // Any other threshold type (e.g. an xsd:decimal constant, now that the + // detector can extract them) must DECLINE to the fallback — returning an + // empty set would silently undercount instead of evaluating the filter. let (thr_i, thr_d) = match threshold { FlakeValue::Long(n) => (*n, *n as f64), FlakeValue::Double(d) => (*d as i64, *d), - _ => return Ok(Some(Vec::new())), + _ => return Ok(None), }; let thr_i_key = ObjKey::encode_i64(thr_i).as_u64(); let thr_d_key = ObjKey::encode_f64(thr_d) From efb340d4ea79f427682cb6ce36ff2f64c0029a6a Mon Sep 17 00:00:00 2001 From: bplatz Date: Sat, 13 Jun 2026 20:54:27 -0400 Subject: [PATCH 21/25] test,docs: integer-base cross-type novelty guard; refresh uniform-o_type doc - Add a regression test mirroring the decimal cross-type novelty guard with the base/overlay types swapped: a uniform xsd:integer base predicate plus a matching xsd:decimal novelty value must keep the decimal under a range filter (would be dropped if narrowing ignored the overlay). Confirms the overlay gate is type-agnostic across the generalized integer/double/decimal path. - Update predicate_uniform_o_type docs: the precondition covers any order-preserving numeric type, not just inline decimals, and notes the caller must also ensure no overlay. --- fluree-db-api/tests/it_decimal_exactness.rs | 57 +++++++++++++++++++++ fluree-db-query/src/fast_count.rs | 10 ++-- 2 files changed, 63 insertions(+), 4 deletions(-) diff --git a/fluree-db-api/tests/it_decimal_exactness.rs b/fluree-db-api/tests/it_decimal_exactness.rs index bf8a9354b9..325ac03d36 100644 --- a/fluree-db-api/tests/it_decimal_exactness.rs +++ b/fluree-db-api/tests/it_decimal_exactness.rs @@ -1498,3 +1498,60 @@ async fn range_narrowing_keeps_cross_type_novelty() { uniform-decimal base predicate — narrowing must not drop it" ); } + +#[tokio::test] +async fn integer_range_narrowing_keeps_cross_type_novelty() { + // The overlay gate is type-agnostic: a uniform-INTEGER base predicate with a + // matching DECIMAL novelty value must not drop the decimal. (Mirror of + // range_narrowing_keeps_cross_type_novelty with the base/overlay types + // swapped, covering the generalized integer/double pushdown path.) + let fluree = memory_fluree(); + let ledger_id = "decimal/int-xtype-novelty:main"; + let ledger = genesis_ledger(&fluree, ledger_id); + + run_sparql_update( + &fluree, + ledger, + r" + PREFIX ex: + INSERT DATA { + ex:a ex:v 5 . + ex:b ex:v 100 . + ex:c ex:v 50 . + } + ", + ) + .await; + + full_rebuild_publish_decode_root(&fluree, ledger_id).await; + let ledger = fluree + .ledger(ledger_id) + .await + .expect("load reindexed ledger"); + + // Novelty decimal (different o_type) that matches the filter. + let result = run_sparql_update( + &fluree, + ledger, + r"PREFIX ex: INSERT DATA { ex:d ex:v 75.5 . }", + ) + .await; + let ledger = result.ledger; + + let r = support::query_sparql( + &fluree, + &ledger, + r"PREFIX ex: + SELECT ?v WHERE { ?s ex:v ?v FILTER(?v > 60) } ORDER BY ?v", + ) + .await + .expect("cross-type range filter"); + let json = r.to_sparql_json(&ledger.snapshot).expect("json"); + let mut got = binding_values(&json, "v"); + got.sort(); + assert_eq!( + got, + vec!["100", "75.5"], + "integer-base range filter must keep a cross-type novelty decimal (75.5)" + ); +} diff --git a/fluree-db-query/src/fast_count.rs b/fluree-db-query/src/fast_count.rs index 533d44ad53..81fa40d8e0 100644 --- a/fluree-db-query/src/fast_count.rs +++ b/fluree-db-query/src/fast_count.rs @@ -413,10 +413,12 @@ fn otype_unsupported_numeric(raw: u16) -> bool { /// The single `o_type` shared by every row of `p_id` in POST order, or `None` /// if the predicate is empty or has mixed o_types. Read from the leaf manifest -/// (plus ≤2 boundary leaves) — cheap, no full scan. A uniform -/// `XSD_DECIMAL_INLINE` result means every value under the predicate is an -/// inline decimal with no arena spill and no other types, which is the -/// precondition for safely narrowing a numeric range scan by `o_key`. +/// (plus ≤2 boundary leaves) — cheap, no full scan. A uniform result in an +/// order-preserving numeric type (any inline integer subtype, double/float, or +/// inline decimal — see [`otype_okey_order_comparable`]) means every value +/// shares that type with no arena spill and no other types, which is the base +/// precondition for narrowing a numeric range scan by `o_key`. (The caller must +/// additionally ensure no overlay, since novelty can add a cross-type value.) pub(crate) fn predicate_uniform_o_type( store: &BinaryIndexStore, g_id: GraphId, From d5a2527bb1838a333a17405d04ef18ce8e5ee2bb Mon Sep 17 00:00:00 2001 From: bplatz Date: Sat, 13 Jun 2026 21:13:26 -0400 Subject: [PATCH 22/25] docs: refresh otype_unsupported_numeric comment after numeric generalization The comment claimed non-canonical integer widths and floats force the count to defer; those are now o_key-order-comparable. Document that this is really the arena NUM_BIG_OVERFLOW lane plus the dormant lossy-f64 XSD_DECIMAL lane. --- fluree-db-query/src/fast_count.rs | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/fluree-db-query/src/fast_count.rs b/fluree-db-query/src/fast_count.rs index 81fa40d8e0..60e1bf09fe 100644 --- a/fluree-db-query/src/fast_count.rs +++ b/fluree-db-query/src/fast_count.rs @@ -402,9 +402,11 @@ pub(crate) fn otype_okey_order_comparable(ot: OType) -> bool { || ot == OType::XSD_DECIMAL_INLINE } -/// True if this o_type is numeric but cannot be compared by encoded o_key in -/// the numeric-COUNT lanes (non-canonical integer widths, floats, arena-keyed -/// NUM_BIG): rows of these kinds force the count to defer. +/// True if this o_type is numeric but NOT o_key-order-comparable, so rows of it +/// force the numeric-COUNT lanes to defer. With all inline integer subtypes, +/// `xsd:double`/`xsd:float`, and inline decimals now comparable +/// ([`otype_okey_order_comparable`]), this is the arena `NUM_BIG_OVERFLOW` lane +/// (equality-only) and the dormant lossy-f64 `XSD_DECIMAL` lane. fn otype_unsupported_numeric(raw: u16) -> bool { let ot = OType::from_u16(raw); (ot.is_numeric() || ot == OType::NUM_BIG_OVERFLOW || ot == OType::XSD_DECIMAL_INLINE) From 2f2295e67824474d3d08d81c08d74a02a0a1c261 Mon Sep 17 00:00:00 2001 From: bplatz Date: Tue, 23 Jun 2026 17:29:04 -0400 Subject: [PATCH 23/25] chore: reconcile IndexRoot test literals after main merge Add the new decimal_encoding field to IndexRoot test-helper literals pulled in from main, and reflow a decimal property-test closure. --- fluree-db-binary-index/src/format/expanded_cas.rs | 1 + fluree-db-core/src/value_id.rs | 7 ++++++- fluree-db-indexer/src/run_index/build/incremental_root.rs | 1 + 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/fluree-db-binary-index/src/format/expanded_cas.rs b/fluree-db-binary-index/src/format/expanded_cas.rs index 5526cb641d..cdef28c0a7 100644 --- a/fluree-db-binary-index/src/format/expanded_cas.rs +++ b/fluree-db-binary-index/src/format/expanded_cas.rs @@ -291,6 +291,7 @@ mod tests { had_annotation_arena: false, o_type_table: IndexRoot::build_o_type_table(&[], &[]), ns_split_mode: fluree_db_core::ns_encoding::NsSplitMode::default(), + decimal_encoding: fluree_db_core::DecimalEncoding::ArenaOnly, } } diff --git a/fluree-db-core/src/value_id.rs b/fluree-db-core/src/value_id.rs index 3bdecd51ce..d292844d72 100644 --- a/fluree-db-core/src/value_id.rs +++ b/fluree-db-core/src/value_id.rs @@ -2014,7 +2014,12 @@ mod tests { let values = property_test_decimals(); let keyed: Vec<_> = values .iter() - .map(|v| (v.clone(), ObjKey::encode_decimal(v).expect("inline-eligible").as_u64())) + .map(|v| { + ( + v.clone(), + ObjKey::encode_decimal(v).expect("inline-eligible").as_u64(), + ) + }) .collect(); for (a, ka) in &keyed { for (b, kb) in &keyed { diff --git a/fluree-db-indexer/src/run_index/build/incremental_root.rs b/fluree-db-indexer/src/run_index/build/incremental_root.rs index 074129412b..8746fd07c3 100644 --- a/fluree-db-indexer/src/run_index/build/incremental_root.rs +++ b/fluree-db-indexer/src/run_index/build/incremental_root.rs @@ -375,6 +375,7 @@ mod tests { had_annotation_arena: false, o_type_table: IndexRoot::build_o_type_table(&[], &[]), ns_split_mode: NsSplitMode::default(), + decimal_encoding: fluree_db_core::DecimalEncoding::ArenaOnly, } } From 1b6e057ec1d0957b2cbdaa64f37f059e330e9d39 Mon Sep 17 00:00:00 2001 From: bplatz Date: Tue, 23 Jun 2026 18:57:48 -0400 Subject: [PATCH 24/25] fix(import): don't block the runtime worker on parsed-chunk recv MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The parallel remote-import commit loop received parsed chunks with a direct blocking std::mpsc recv on the tokio worker thread. On a single-worker (current_thread) runtime, this parks the worker and starves the spawned remote producer task — which drives storage reads and re-parks waiting for channel capacity. With max-inflight backpressure of one chunk (small memory budget, e.g. CI runners) the producer can never be re-polled, so the import hangs forever. This surfaced as the load-dependent, Linux-CI-only timeout of remote_import_matches_local_flake_count. Receive off the worker via spawn_blocking, mirroring the serial arm which already documents and avoids the same hazard. Add a max_inflight_chunks builder setter and a deterministic regression test (yielding storage + single in-flight) that hangs on the old code and passes on the new. --- fluree-db-api/src/import.rs | 32 +++++++- fluree-db-api/tests/it_import_remote.rs | 103 ++++++++++++++++++++++++ 2 files changed, 133 insertions(+), 2 deletions(-) diff --git a/fluree-db-api/src/import.rs b/fluree-db-api/src/import.rs index c7e21edfa5..3fa8c517bd 100644 --- a/fluree-db-api/src/import.rs +++ b/fluree-db-api/src/import.rs @@ -2476,6 +2476,15 @@ impl<'a> ImportBuilder<'a> { self } + /// Set the maximum number of chunks held in flight (the memory-budget + /// backpressure knob). `0` = derive from the budget. Forcing `1` makes the + /// remote producer re-park between chunks, exercising the single-worker + /// runtime handoff path; used by regression tests for that path. + pub fn max_inflight_chunks(mut self, n: usize) -> Self { + self.config.max_inflight_chunks = n; + self + } + /// Set the maximum flakes per chunk. 0 = no limit. Default: 20_000_000. pub fn chunk_max_flakes(mut self, n: usize) -> Self { self.config.chunk_max_flakes = n; @@ -3372,8 +3381,27 @@ where let mut pending: std::collections::BTreeMap = std::collections::BTreeMap::new(); - for recv_result in &result_rx { - let (idx, parsed) = recv_result.map_err(ImportError::Transact)?; + // Receive parsed chunks off the runtime worker. A direct blocking + // `recv()` on this `std` channel would park the single current-thread + // runtime worker, starving the spawned remote producer task (which + // drives storage reads and may be re-parked waiting for channel + // capacity) and the sort/write tasks — deadlocking the whole import on + // a single-worker runtime. The serial arm below documents and avoids + // the same hazard via `spawn_blocking`. + let result_rx = Arc::new(std::sync::Mutex::new(result_rx)); + loop { + let recv_result = { + let rx = Arc::clone(&result_rx); + tokio::task::spawn_blocking(move || rx.lock().unwrap().recv()) + .await + .map_err(|e| { + ImportError::Transact(format!("parsed-chunk receive task panicked: {e}")) + })? + }; + let (idx, parsed) = match recv_result { + Ok(chunk) => chunk.map_err(ImportError::Transact)?, + Err(_) => break, // All parse workers dropped their senders. + }; pending.insert(idx, parsed); while let Some(parsed) = pending.remove(&next_expected) { diff --git a/fluree-db-api/tests/it_import_remote.rs b/fluree-db-api/tests/it_import_remote.rs index eea69e426c..133d601187 100644 --- a/fluree-db-api/tests/it_import_remote.rs +++ b/fluree-db-api/tests/it_import_remote.rs @@ -9,6 +9,7 @@ mod support; +use async_trait::async_trait; use fluree_db_api::{FlureeBuilder, RemoteObject, RemoteSource}; use fluree_db_core::{MemoryStorage, StorageRead, StorageWrite}; use serde_json::json; @@ -458,3 +459,105 @@ async fn import_from_storage_rejects_mixed_formats() { "expected MixedFormats error, got: {msg}" ); } + +// ============================================================================ +// Regression: single-worker runtime must not deadlock when the remote producer +// re-parks between chunks (in-flight backpressure == 1). +// ============================================================================ + +/// A `StorageRead` that yields to the runtime before serving each read, +/// mimicking a real async backend (File/S3) that hands control to the IO +/// driver mid-read. `MemoryStorage` resolves reads synchronously, so it cannot +/// reproduce the producer re-park that the commit consumer depends on the +/// runtime worker to drive. +#[derive(Debug)] +struct YieldingStorage { + inner: Arc, +} + +#[async_trait] +impl StorageRead for YieldingStorage { + async fn read_bytes(&self, address: &str) -> fluree_db_core::error::Result> { + // Hand control back to the runtime so the producer task genuinely + // depends on the worker being free to make progress. + tokio::task::yield_now().await; + self.inner.read_bytes(address).await + } + + async fn exists(&self, address: &str) -> fluree_db_core::error::Result { + self.inner.exists(address).await + } + + async fn list_prefix(&self, prefix: &str) -> fluree_db_core::error::Result> { + self.inner.list_prefix(prefix).await + } +} + +/// Reproduces the channel-fed remote-import deadlock deterministically. +/// +/// With `max_inflight_chunks == 1` the remote producer can hold only one chunk +/// in flight, so it re-parks (waiting for channel capacity, and on each +/// yielding read) and relies on the single current-thread runtime worker to be +/// re-polled. If the in-order commit consumer blocks that worker on a plain +/// `std::sync::mpsc::recv`, the producer can never advance and the import hangs +/// forever — the load/low-RAM-dependent failure seen on CI. The consumer must +/// receive off the worker (via `spawn_blocking`) for this to complete. +/// +/// Pre-fix: this test hangs (caught by nextest's `terminate-after`). +/// Post-fix: it completes in well under a second. +#[tokio::test] +async fn remote_import_single_inflight_yielding_storage_does_not_deadlock() { + const N: usize = 6; + + let inner = Arc::new(MemoryStorage::new()); + let mut objects = Vec::with_capacity(N); + for i in 0..N { + let addr = format!("imports/chunk_{i:04}.ttl"); + let body = format!( + "{TTL_PREFIX}\n\ + ex:user{i} a ex:User ;\n\ + schema:name \"User{i}\" ;\n\ + schema:age {age} .\n", + age = 20 + i + ); + inner.write_bytes(&addr, body.as_bytes()).await.unwrap(); + objects.push(RemoteObject { + address: addr, + size_bytes: body.len() as u64, + }); + } + + let db_dir = tempfile::tempdir().expect("db tmpdir"); + let fluree = FlureeBuilder::file(db_dir.path().to_string_lossy().to_string()) + .build() + .expect("build file-backed Fluree"); + + let storage_dyn: Arc = Arc::new(YieldingStorage { inner }); + let result = fluree + .create("test/remote-single-inflight:main") + .import_from_storage(storage_dyn, RemoteSource::OrderedObjects(objects)) + .threads(2) + .max_inflight_chunks(1) + .cleanup(false) + .execute() + .await + .expect("remote import must not deadlock with single in-flight chunk"); + + assert_eq!(result.t, N as i64, "every chunk commits one transaction"); + assert!(result.flake_count > 0); + + let ledger = fluree + .ledger("test/remote-single-inflight:main") + .await + .expect("load ledger"); + let q = json!({ + "@context": { "ex": "http://example.org/ns/", "schema": "http://schema.org/" }, + "select": ["?name"], + "where": { "schema:name": "?name" } + }); + let qr = support::query_jsonld(&fluree, &ledger, &q) + .await + .expect("query names"); + let names = extract_sorted_strings(&qr.to_jsonld(&ledger.snapshot).unwrap()); + assert_eq!(names.len(), N, "all {N} subjects imported"); +} From 7be883733b54eef3718389f8f9232bde5c96152a Mon Sep 17 00:00:00 2001 From: bplatz Date: Tue, 30 Jun 2026 07:08:51 -0400 Subject: [PATCH 25/25] test: align decimal exactness test with canonical inline encoding The inline decimal codec is canonical (order-preserving, trailing zeros stripped), so 19.90 -> 19.9 and 24.50 -> 24.5 on both the indexed and novelty serving paths, matching the XSD canonical form of xsd:decimal. Update the indexed value to 19.90 so the arena path positively asserts canonicalization too, and refresh the now-stale lexical-preservation expectations and comments. --- fluree-db-api/tests/it_decimal_exactness.rs | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/fluree-db-api/tests/it_decimal_exactness.rs b/fluree-db-api/tests/it_decimal_exactness.rs index e4f4f3a16d..91972bf1b2 100644 --- a/fluree-db-api/tests/it_decimal_exactness.rs +++ b/fluree-db-api/tests/it_decimal_exactness.rs @@ -1752,7 +1752,7 @@ async fn integer_valued_double_over_indexed_predicate_is_not_corrupted() { /// Issue #1329: JSON-LD decimal rendering must be consistent regardless of /// whether the value is served from the binary index (arena-decoded) or from /// novelty (raw flake merge). The reported bug rendered index-served decimals -/// as `{"@value": "19.99", "@type": ""}` (empty type) and novelty-served ones +/// as `{"@value": "19.90", "@type": ""}` (empty type) and novelty-served ones /// as a bare string with no `@type`. #[tokio::test] async fn jsonld_decimal_renders_consistently_across_index_and_novelty() { @@ -1774,13 +1774,15 @@ async fn jsonld_decimal_renders_consistently_across_index_and_novelty() { .run_until(async move { let ledger = genesis_ledger(&fluree, ledger_id); - // Indexed base: ex:a is arena-backed after the index build. + // Indexed base: ex:a is arena-backed after the index build. The + // trailing zero (19.90) exercises canonicalization on the indexed + // path — the inline decimal code strips it, so it renders as 19.9. let result = run_sparql_update( &fluree, ledger, r" PREFIX ex: - INSERT DATA { ex:a ex:price 19.99 . } + INSERT DATA { ex:a ex:price 19.90 . } ", ) .await; @@ -1817,8 +1819,8 @@ async fn jsonld_decimal_renders_consistently_across_index_and_novelty() { // Both the arena-served (indexed) and novelty-served decimals must // render in the SAME shape. Before the fix the indexed copy lost its - // datatype and rendered as `{"@value":"19.99","@type":""}` while the - // novelty copy rendered as the bare string `"24.50"` (issue #1329). + // datatype and rendered as `{"@value":"19.90","@type":""}` while the + // novelty copy rendered as a bare string (issue #1329). let mut by_id = std::collections::HashMap::new(); for node in rows { let id = node["@id"].as_str().expect("@id").to_string(); @@ -1839,7 +1841,10 @@ async fn jsonld_decimal_renders_consistently_across_index_and_novelty() { } // Consistency: identical JSON shape across the two paths (xsd:decimal - // is an inferable datatype, so both render as the exact bare string). + // is an inferable datatype, so both render as a bare string). The + // inline decimal code is canonical (order-preserving, trailing zeros + // stripped), so both paths drop the trailing zero — 19.90 → 19.9 and + // 24.50 → 24.5 — matching the XSD canonical form of xsd:decimal. assert_eq!( indexed.is_object(), novel.is_object(), @@ -1848,12 +1853,12 @@ async fn jsonld_decimal_renders_consistently_across_index_and_novelty() { ); assert_eq!( indexed, - &JsonValue::String("19.99".to_string()), + &JsonValue::String("19.9".to_string()), "indexed decimal" ); assert_eq!( novel, - &JsonValue::String("24.50".to_string()), + &JsonValue::String("24.5".to_string()), "novelty decimal" );