From 8b6eb436d819668918992d74fdfaabd902145a07 Mon Sep 17 00:00:00 2001 From: bplatz Date: Thu, 18 Jun 2026 13:43:39 -0400 Subject: [PATCH 1/2] fix(query): stop corrupting integer-valued doubles read through the overlay MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a double/float predicate already has indexed data, a newly inserted integer-valued double (e.g. 55000.0) was read back as a tiny subnormal (2.71736e-319). The novelty overlay encoder optimized integral doubles to encode_i64 but paired the key with the datatype-derived OType (XSD_DOUBLE/ XSD_FLOAT), whose decode kind is F64 — so the reader ran decode_f64 over the i64-encoded bits, i.e. f64::from_bits(N). The persisted commits were always correct; the corruption was purely on the read path, so affected data is recoverable without re-ingestion. value_to_otype_okey now always encodes finite doubles with encode_f64, matching the encode-side guards already in resolver.rs and import_sink.rs. The unused twin value_to_obj_pair gets the same guard defensively. Adds a regression test that indexes a double, inserts an integer-valued double, and asserts an exact round-trip. --- fluree-db-api/tests/it_decimal_exactness.rs | 74 +++++++++++++++++++++ fluree-db-query/src/binary_scan.rs | 12 ++-- fluree-db-query/src/dict_overlay.rs | 12 ++-- 3 files changed, 85 insertions(+), 13 deletions(-) diff --git a/fluree-db-api/tests/it_decimal_exactness.rs b/fluree-db-api/tests/it_decimal_exactness.rs index 91784679e7..14f99fab86 100644 --- a/fluree-db-api/tests/it_decimal_exactness.rs +++ b/fluree-db-api/tests/it_decimal_exactness.rs @@ -889,3 +889,77 @@ async fn sparql_delete_data_decimal_retracts_exactly() { "deleted decimal fact must not survive" ); } + +#[tokio::test] +async fn integer_valued_double_over_indexed_predicate_is_not_corrupted() { + // Regression (fluree/db-r#142): an integer-valued double inserted into a + // predicate that already has INDEXED double/float data was silently + // corrupted to a tiny subnormal. The novelty overlay encoder paired the + // datatype-derived OType (F64 decode) with an i64-encoded key, so the + // reader ran decode_f64 over integer bits (55000.0 -> 2.71736e-319). + // The trigger needs a persisted index for the predicate; novelty-only + // ledgers encode the value correctly. + let fluree = FlureeBuilder::memory() + .with_ledger_cache_config(fluree_db_api::LedgerManagerConfig::default()) + .build_memory(); + let ledger_id = "double/indexed-overlay:main"; + + let (local, handle) = start_background_indexer_local( + fluree.backend().clone(), + Arc::new(fluree.nameservice_mode().clone()), + fluree_db_indexer::IndexerConfig::small(), + ); + + local + .run_until(async move { + let ledger = genesis_ledger(&fluree, ledger_id); + + // Seed + index an integer-valued double for ex:amount. + let result = run_sparql_update( + &fluree, + ledger, + r#" + PREFIX ex: + PREFIX xsd: + INSERT DATA { ex:seed ex:amount "28575.0"^^xsd:double . } + "#, + ) + .await; + trigger_index_and_wait(&handle, ledger_id, result.receipt.t).await; + let ledger = fluree.ledger(ledger_id).await.expect("load ledger"); + + // Insert a NEW integer-valued double into the now-indexed predicate; + // it lands in novelty and is read back through the overlay merge. + let result = run_sparql_update( + &fluree, + ledger, + r#" + PREFIX ex: + PREFIX xsd: + INSERT DATA { ex:a ex:amount "55000.0"^^xsd:double . } + "#, + ) + .await; + let ledger = result.ledger; + + let query = r" + PREFIX ex: + SELECT ?amount WHERE { ex:a ex:amount ?amount . } + "; + let result = support::query_sparql(&fluree, &ledger, query) + .await + .expect("query"); + let sparql_json = result + .to_sparql_json(&ledger.snapshot) + .expect("to_sparql_json"); + let values = binding_values(&sparql_json, "amount"); + assert_eq!(values.len(), 1, "expected exactly one row, got {values:?}"); + let got: f64 = values[0].parse().expect("double result"); + assert_eq!( + got, 55000.0, + "integer-valued double over an indexed predicate must round-trip \ + exactly (was corrupted to a subnormal), got {got:e}" + ); + }) + .await; +} diff --git a/fluree-db-query/src/binary_scan.rs b/fluree-db-query/src/binary_scan.rs index 875e95c29a..1a30e74085 100644 --- a/fluree-db-query/src/binary_scan.rs +++ b/fluree-db-query/src/binary_scan.rs @@ -2586,12 +2586,12 @@ fn value_to_otype_okey( "datatype not resolvable to OType for Double value", ) })?; - if d.is_finite() && d.fract() == 0.0 { - let as_i64 = *d as i64; - if (as_i64 as f64) == *d { - return Ok((ot, ObjKey::encode_i64(as_i64).as_u64())); - } - } + // Do NOT optimize integral doubles to encode_i64: `ot` is the + // datatype-derived OType (e.g. XSD_DOUBLE), whose decode kind is F64. + // Pairing it with an i64-encoded key makes the reader run decode_f64 + // over integer bits, corrupting the value to a tiny subnormal + // (55000.0 -> 2.71736e-319). Mirrors the encode-side guards in + // resolver.rs / import_sink.rs. (fluree/db-r#142) if d.is_finite() { match ObjKey::encode_f64(*d) { Ok(key) => Ok((ot, key.as_u64())), diff --git a/fluree-db-query/src/dict_overlay.rs b/fluree-db-query/src/dict_overlay.rs index 8e51fdbf30..f4543e5ca3 100644 --- a/fluree-db-query/src/dict_overlay.rs +++ b/fluree-db-query/src/dict_overlay.rs @@ -483,13 +483,11 @@ impl DictOverlay { FlakeValue::Long(n) => Ok((ObjKind::NUM_INT, ObjKey::encode_i64(*n))), FlakeValue::Double(d) => { - // Integer-valued doubles that fit i64 → NUM_INT - if d.is_finite() && d.fract() == 0.0 { - let as_i64 = *d as i64; - if (as_i64 as f64) == *d { - return Ok((ObjKind::NUM_INT, ObjKey::encode_i64(as_i64))); - } - } + // Do NOT optimize integral doubles to NUM_INT: when paired with a + // float/double datatype the decode resolves an F64 OType and runs + // decode_f64 over the i64-encoded bits, corrupting the value to a + // tiny subnormal. Mirrors value_to_otype_okey and the encode-side + // guards in resolver.rs / import_sink.rs. (fluree/db-r#142) if d.is_finite() { match ObjKey::encode_f64(*d) { Ok(key) => Ok((ObjKind::NUM_F64, key)), From 15ea8d945e807f0965f312c9cbdec11ad86ff7c2 Mon Sep 17 00:00:00 2001 From: bplatz Date: Fri, 19 Jun 2026 21:18:56 -0400 Subject: [PATCH 2/2] test(query): harden integer-double regression; tidy overlay encoders Address review feedback on the integer-valued-double overlay fix: - it_decimal_exactness: assert the returned binding stays xsd:double (no silent downgrade to integer/long) and add boundary companions -55000.0 (sign-flip branch) and 2^53 (largest exact integral double). - dict_overlay: value_to_obj_pair is now pub(crate) + #[expect(dead_code)] with a note explaining it is kept for parity with value_to_otype_okey. - binary_scan / dict_overlay: drop the redundant is_finite() pre-check in the Double arm; encode_f64 already rejects NaN/Inf, so its Err arm is the single reachable "unrepresentable double -> NULL sentinel" path. --- fluree-db-api/tests/it_decimal_exactness.rs | 85 ++++++++++++++++----- fluree-db-query/src/binary_scan.rs | 11 +-- fluree-db-query/src/dict_overlay.rs | 21 ++--- 3 files changed, 80 insertions(+), 37 deletions(-) diff --git a/fluree-db-api/tests/it_decimal_exactness.rs b/fluree-db-api/tests/it_decimal_exactness.rs index 14f99fab86..2277e83276 100644 --- a/fluree-db-api/tests/it_decimal_exactness.rs +++ b/fluree-db-api/tests/it_decimal_exactness.rs @@ -56,6 +56,20 @@ fn binding_values(sparql_json: &JsonValue, var: &str) -> Vec { .collect() } +fn binding_datatypes(sparql_json: &JsonValue, var: &str) -> Vec { + sparql_json["results"]["bindings"] + .as_array() + .expect("bindings array") + .iter() + .map(|b| { + b[var]["datatype"] + .as_str() + .expect("binding datatype string") + .to_string() + }) + .collect() +} + fn memory_fluree() -> MemoryFluree { assert_index_defaults(); FlureeBuilder::memory().build_memory() @@ -928,38 +942,67 @@ async fn integer_valued_double_over_indexed_predicate_is_not_corrupted() { trigger_index_and_wait(&handle, ledger_id, result.receipt.t).await; let ledger = fluree.ledger(ledger_id).await.expect("load ledger"); - // Insert a NEW integer-valued double into the now-indexed predicate; - // it lands in novelty and is read back through the overlay merge. + // Insert NEW integer-valued doubles into the now-indexed predicate; + // they land in novelty and are read back through the overlay merge. + // Boundary companions exercise the encode_f64/decode_f64 sign-flip + // branch (-55000.0) and the i64-range edge (2^53, the largest + // exactly-representable integral double). let result = run_sparql_update( &fluree, ledger, r#" PREFIX ex: PREFIX xsd: - INSERT DATA { ex:a ex:amount "55000.0"^^xsd:double . } + INSERT DATA { + ex:a ex:amount "55000.0"^^xsd:double . + ex:b ex:amount "-55000.0"^^xsd:double . + ex:c ex:amount "9.007199254740992e15"^^xsd:double . + } "#, ) .await; let ledger = result.ledger; - let query = r" - PREFIX ex: - SELECT ?amount WHERE { ex:a ex:amount ?amount . } - "; - let result = support::query_sparql(&fluree, &ledger, query) - .await - .expect("query"); - let sparql_json = result - .to_sparql_json(&ledger.snapshot) - .expect("to_sparql_json"); - let values = binding_values(&sparql_json, "amount"); - assert_eq!(values.len(), 1, "expected exactly one row, got {values:?}"); - let got: f64 = values[0].parse().expect("double result"); - assert_eq!( - got, 55000.0, - "integer-valued double over an indexed predicate must round-trip \ - exactly (was corrupted to a subnormal), got {got:e}" - ); + // (subject, expected exact f64) for each inserted integral double. + let cases = [ + ("ex:a", 55000.0), + ("ex:b", -55000.0), + ("ex:c", 9.007_199_254_740_992e15), + ]; + for (subject, expected) in cases { + let query = format!( + "PREFIX ex: + SELECT ?amount WHERE {{ {subject} ex:amount ?amount . }}" + ); + let result = support::query_sparql(&fluree, &ledger, &query) + .await + .expect("query"); + let sparql_json = result + .to_sparql_json(&ledger.snapshot) + .expect("to_sparql_json"); + + let values = binding_values(&sparql_json, "amount"); + assert_eq!( + values.len(), + 1, + "{subject}: expected exactly one row, got {values:?}" + ); + let got: f64 = values[0].parse().expect("double result"); + assert_eq!( + got, expected, + "{subject}: integer-valued double over an indexed predicate must \ + round-trip exactly (was corrupted to a subnormal), got {got:e}" + ); + + // Lock in that the uniform-f64 encoding does not silently downgrade + // the reported datatype to xsd:integer/xsd:long. + let datatypes = binding_datatypes(&sparql_json, "amount"); + assert_eq!( + datatypes, + vec!["http://www.w3.org/2001/XMLSchema#double".to_string()], + "{subject}: datatype must stay xsd:double" + ); + } }) .await; } diff --git a/fluree-db-query/src/binary_scan.rs b/fluree-db-query/src/binary_scan.rs index 1a30e74085..1664333821 100644 --- a/fluree-db-query/src/binary_scan.rs +++ b/fluree-db-query/src/binary_scan.rs @@ -2592,13 +2592,10 @@ fn value_to_otype_okey( // over integer bits, corrupting the value to a tiny subnormal // (55000.0 -> 2.71736e-319). Mirrors the encode-side guards in // resolver.rs / import_sink.rs. (fluree/db-r#142) - if d.is_finite() { - match ObjKey::encode_f64(*d) { - Ok(key) => Ok((ot, key.as_u64())), - Err(_) => Ok((OType::NULL, 0)), - } - } else { - Ok((OType::NULL, 0)) + match ObjKey::encode_f64(*d) { + Ok(key) => Ok((ot, key.as_u64())), + // NaN/Inf can't be order-encoded → NULL sentinel. + Err(_) => Ok((OType::NULL, 0)), } } FlakeValue::Ref(sid) => { diff --git a/fluree-db-query/src/dict_overlay.rs b/fluree-db-query/src/dict_overlay.rs index f4543e5ca3..e07f23c052 100644 --- a/fluree-db-query/src/dict_overlay.rs +++ b/fluree-db-query/src/dict_overlay.rs @@ -476,7 +476,14 @@ impl DictOverlay { /// /// Unlike `BinaryIndexStore::value_to_obj_pair()`, this never returns `None` /// for representable values. - pub fn value_to_obj_pair(&mut self, val: &FlakeValue) -> io::Result<(ObjKind, ObjKey)> { + /// + /// Kept for: parity with the live `value_to_otype_okey` encoder in + /// `binary_scan.rs` — both must apply the same integral-double guard so a + /// future overlay write path can reuse this twin without reintroducing the + /// subnormal corruption (fluree/db-r#142). + /// Use when: a `DictOverlay`-based write path needs (ObjKind, ObjKey) pairs. + #[expect(dead_code)] + pub(crate) fn value_to_obj_pair(&mut self, val: &FlakeValue) -> io::Result<(ObjKind, ObjKey)> { match val { FlakeValue::Null => Ok((ObjKind::NULL, ObjKey::from_u64(0))), FlakeValue::Boolean(b) => Ok((ObjKind::BOOL, ObjKey::encode_bool(*b))), @@ -488,14 +495,10 @@ impl DictOverlay { // decode_f64 over the i64-encoded bits, corrupting the value to a // tiny subnormal. Mirrors value_to_otype_okey and the encode-side // guards in resolver.rs / import_sink.rs. (fluree/db-r#142) - if d.is_finite() { - match ObjKey::encode_f64(*d) { - Ok(key) => Ok((ObjKind::NUM_F64, key)), - Err(_) => Ok((ObjKind::NULL, ObjKey::from_u64(0))), - } - } else { - // NaN/Inf → NULL sentinel (can't represent in index) - Ok((ObjKind::NULL, ObjKey::from_u64(0))) + match ObjKey::encode_f64(*d) { + Ok(key) => Ok((ObjKind::NUM_F64, key)), + // NaN/Inf can't be order-encoded → NULL sentinel. + Err(_) => Ok((ObjKind::NULL, ObjKey::from_u64(0))), } }