From e46f4ffa087b4c5b0fa01f2391a85e89ce588edb Mon Sep 17 00:00:00 2001 From: Jack White Date: Thu, 2 Jul 2026 21:04:19 -0400 Subject: [PATCH 1/2] fix(query): make cross-ledger GRAPH queries work over indexed data (#1405) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two composed failures prevented property paths + cross-graph joins from working over an indexed, multi-ledger dataset: Bug 2 — a GRAPH-scoped query over an INDEXED multi-ledger dataset hit an internal invariant ("EncodedSid/EncodedPid reached stamp_provenance"). Root cause: `DatasetOperator` computed `multi_ledger` from the *active* graphs only, so a single default graph (alongside named graphs from other ledgers) was treated as single-ledger — the binary store stayed enabled and its scans emitted late `Binding::EncodedSid`, which then seeded a GRAPH block / crossed a boundary and reached provenance stamping, which cannot decode them. Fix: also treat the scan as multi-ledger when the whole dataset spans ledgers, forcing full `Binding::Sid` materialization (which stamps to `IriMatch`). Bug 3 — a cross-`GRAPH` join or path over DIVERGENT namespace codes silently returned []. The materialization fix above resolves the join/seed cases (bound keys now cross as `IriMatch` and re-encode). For property paths specifically, the operator also matched pattern *constants* (endpoints) and *predicates* against the primary/lowering snapshot's codes rather than the per-GRAPH graph's, so a divergent-namespace endpoint/predicate found nothing. Fix: re-encode path predicate and constant-endpoint SIDs into the active graph's dict (`reencode_pred` + the `Ref::Sid` arm of `resolve_sid`), reusing the same decode-primary/encode-target idiom as `binary_scan`'s `reencode_sid`. Single-ledger and single-graph queries are unchanged (re-encode round-trips to the same SID; materialization only kicks in for multi-ledger datasets). The union-path guard (failure 1 in #1405) is intentionally left in place. Adds tests/it_multi_graph_property_path.rs: the q1-q3d repro plus a usage matrix (A1-A8) covering object-position joins, multi-value completeness, precision, strict paths, three-ledger chains, single-ledger regression, FILTER EXISTS, and unbounded closures — all over indexed, divergent-namespace ledgers. Co-Authored-By: Claude Opus 4.8 --- fluree-db-api/Cargo.toml | 4 + .../tests/it_multi_graph_property_path.rs | 704 ++++++++++++++++++ fluree-db-query/src/dataset_operator.rs | 14 +- fluree-db-query/src/property_path.rs | 46 +- 4 files changed, 762 insertions(+), 6 deletions(-) create mode 100644 fluree-db-api/tests/it_multi_graph_property_path.rs diff --git a/fluree-db-api/Cargo.toml b/fluree-db-api/Cargo.toml index 234e751c68..e8f68dc931 100644 --- a/fluree-db-api/Cargo.toml +++ b/fluree-db-api/Cargo.toml @@ -187,6 +187,10 @@ path = "tests/it_cyclic_bgp_novelty_pred.rs" name = "it_cyclic_bgp_probe" path = "tests/it_cyclic_bgp_probe.rs" +[[test]] +name = "it_multi_graph_property_path" +path = "tests/it_multi_graph_property_path.rs" + [[test]] name = "it_query_explain" path = "tests/it_query_explain.rs" diff --git a/fluree-db-api/tests/it_multi_graph_property_path.rs b/fluree-db-api/tests/it_multi_graph_property_path.rs new file mode 100644 index 0000000000..388d665ed4 --- /dev/null +++ b/fluree-db-api/tests/it_multi_graph_property_path.rs @@ -0,0 +1,704 @@ +//! Multi-graph property-path traversal over a cross-ledger dataset (INDEXED). +//! +//! Mirrors the server's real conditions: data is pushed into the **binary +//! index** (not just novelty), then a cross-ledger dataset query is run through +//! the connection path. This is what triggers the multi-graph GRAPH + property +//! path failures that the novelty-only path does not. +//! +//! See the fluree/db issue "Property paths can't be combined with cross-ledger +//! queries". `q1`/`q2` characterize current (correct/guarded) behavior; `q3a`/ +//! `q3b` assert the *desired* behavior of the `use GRAPH` escape hatch. + +#![cfg(feature = "native")] + +mod support; + +use fluree_db_api::{FlureeBuilder, IndexConfig, LedgerManagerConfig}; +use fluree_db_transact::{CommitOpts, TxnOpts}; +use serde_json::{json, Value as JsonValue}; +use support::{ + genesis_ledger_for_fluree, start_background_indexer_local, trigger_index_and_wait_outcome, +}; + +type MemoryFluree = fluree_db_api::Fluree; + +async fn insert_indexed( + fluree: &MemoryFluree, + handle: &fluree_db_indexer::IndexerHandle, + ledger_id: &str, + doc: &JsonValue, +) { + let index_cfg = IndexConfig { + reindex_min_bytes: 0, + reindex_max_bytes: 10_000_000, + }; + let ledger = genesis_ledger_for_fluree(fluree, ledger_id); + let result = fluree + .insert_with_opts( + ledger, + doc, + TxnOpts::default(), + CommitOpts::default(), + &index_cfg, + ) + .await + .expect("insert"); + let _ = trigger_index_and_wait_outcome(handle, ledger_id, result.ledger.t()).await; +} + +async fn seed(fluree: &MemoryFluree, handle: &fluree_db_indexer::IndexerHandle) { + insert_indexed( + fluree, + handle, + "taxonomy:main", + &json!({ + "@context": {"ex": "https://example.org/", + "rdfs": "http://www.w3.org/2000/01/rdf-schema#"}, + "@graph": [ + {"@id": "ex:top", "rdfs:label": "Top"}, + {"@id": "ex:mid", "ex:broader": {"@id": "ex:top"}, "rdfs:label": "Mid"}, + {"@id": "ex:narrow", "ex:broader": {"@id": "ex:mid"}, "rdfs:label": "Narrow"} + ] + }), + ) + .await; + insert_indexed( + fluree, + handle, + "catalog:main", + &json!({ + "@context": {"ex": "https://example.org/"}, + "@graph": [ {"@id": "ex:thing", "ex:category": {"@id": "ex:narrow"}} ] + }), + ) + .await; +} + +fn fluree_with_indexer() -> ( + MemoryFluree, + tokio::task::LocalSet, + fluree_db_indexer::IndexerHandle, +) { + let fluree = FlureeBuilder::memory() + .with_ledger_cache_config(LedgerManagerConfig::default()) + .build_memory(); + let (local, handle) = start_background_indexer_local( + fluree.backend().clone(), + fluree + .nameservice_mode() + .as_arc_indexing_nameservice() + .expect("test fluree has writable nameservice"), + fluree_db_indexer::IndexerConfig::small(), + ); + (fluree, local, handle) +} + +/// Q1 (control) — property path over a SINGLE-graph `FROM`, indexed. +#[tokio::test] +async fn q1_single_graph_property_path_works() { + let (fluree, local, handle) = fluree_with_indexer(); + local + .run_until(async move { + seed(&fluree, &handle).await; + let sparql = r" +PREFIX ex: +SELECT ?anc FROM +WHERE { ex:narrow ex:broader* ?anc }"; + let result = fluree + .query_connection_sparql(sparql) + .await + .expect("single-graph property path should execute"); + let tax = fluree.ledger("taxonomy:main").await.expect("load"); + let s = result + .to_jsonld(&tax.snapshot) + .expect("to_jsonld") + .to_string(); + assert!( + s.contains("ex:narrow") && s.contains("ex:mid") && s.contains("ex:top"), + "{s}" + ); + }) + .await; +} + +/// Q2 (characterization) — property path over MULTI-graph `FROM` is guarded. +#[tokio::test] +async fn q2_multi_graph_property_path_is_guarded() { + let (fluree, local, handle) = fluree_with_indexer(); + local + .run_until(async move { + seed(&fluree, &handle).await; + let sparql = r" +PREFIX ex: +SELECT DISTINCT ?thing FROM FROM +WHERE { ?thing ex:category ?c . ?c ex:broader* ex:top }"; + let err = fluree + .query_connection_sparql(sparql) + .await + .expect_err("multi-graph property path should be rejected"); + assert!( + err.to_string() + .contains("Property paths over multi-graph datasets are not supported"), + "unexpected error: {err}" + ); + }) + .await; +} + +/// Q3a (BUG) — GRAPH-scoped property path over an INDEXED multi-ledger dataset +/// should join back to the default-graph instance. Expected: `ex:thing`. +#[tokio::test] +async fn q3a_graph_scoped_path_over_multiledger_should_join() { + let (fluree, local, handle) = fluree_with_indexer(); + local + .run_until(async move { + seed(&fluree, &handle).await; + let sparql = r" +PREFIX ex: +SELECT DISTINCT ?thing FROM FROM NAMED +WHERE { ?thing ex:category ?c . GRAPH { ?c ex:broader* ex:top } }"; + let result = fluree.query_connection_sparql(sparql).await; + assert!( + result.is_ok(), + "GRAPH-scoped property path over a multi-ledger dataset should execute, got: {:?}", + result.err() + ); + let cat = fluree.ledger("catalog:main").await.expect("load"); + let s = result + .unwrap() + .to_jsonld(&cat.snapshot) + .expect("to_jsonld") + .to_string(); + assert!(s.contains("ex:thing"), "expected ex:thing: {s}"); + }) + .await; +} + +/// Q3c (BUG?) — same cross-graph join, but with **namespace-code divergence**: +/// the join-key namespace (`https://example.org/`) is registered first in +/// `taxonomy` but only later (via a ref) in `catalog2`, so it gets a different +/// code in each ledger. If the GRAPH-boundary join compares raw SIDs without +/// re-encoding (the #1295 family), `?c` won't match and this returns empty. +/// Expected: `cat:thing`. +#[tokio::test] +async fn q3c_cross_graph_join_divergent_ns_should_return_rows() { + let (fluree, local, handle) = fluree_with_indexer(); + local + .run_until(async move { + // taxonomy: example.org registered FIRST (low code). + insert_indexed( + &fluree, &handle, "taxonomy:main", + &json!({"@context": {"ex": "https://example.org/"}, + "@graph": [ + {"@id": "ex:narrow", "ex:broader": {"@id": "ex:mid"}}, + {"@id": "ex:mid", "ex:broader": {"@id": "ex:top"}} + ]}), + ).await; + // catalog2: catalog.example registered first, example.org only via the + // ref to ex:narrow → example.org gets a *different* code here. + insert_indexed( + &fluree, &handle, "catalog2:main", + &json!({"@context": {"cat": "https://catalog.example/", "ex": "https://example.org/"}, + "@graph": [ {"@id": "cat:thing", "cat:category": {"@id": "ex:narrow"}} ]}), + ).await; + + let sparql = r" +PREFIX ex: +PREFIX cat: +SELECT DISTINCT ?thing FROM NAMED FROM NAMED +WHERE { GRAPH { ?thing cat:category ?c } + GRAPH { ?c ex:broader ex:mid } }"; + let result = fluree.query_connection_sparql(sparql).await; + assert!(result.is_ok(), "should execute, got: {:?}", result.err()); + let cat = fluree.ledger("catalog2:main").await.expect("load"); + let s = result.unwrap().to_jsonld(&cat.snapshot).expect("to_jsonld").to_string(); + assert!(s.contains("cat:thing"), "divergent-ns cross-graph join returned no rows: {s}"); + }) + .await; +} + +/// Q3d (MITIGATION) — same divergent setup as q3c, but `catalog3` is seeded +/// with a deterministic *vocabulary warm-up*: it touches the shared +/// `https://example.org/` namespace FIRST (before its own `cat:` namespace), +/// so example.org gets the SAME code as in `taxonomy`. If aligning codes +/// sidesteps the cross-graph re-encoding gap, this should return `cat:thing`. +#[tokio::test] +async fn q3d_namespace_warmup_aligns_codes_and_join_works() { + let (fluree, local, handle) = fluree_with_indexer(); + local + .run_until(async move { + insert_indexed( + &fluree, &handle, "taxonomy:main", + &json!({"@context": {"ex": "https://example.org/"}, + "@graph": [ + {"@id": "ex:narrow", "ex:broader": {"@id": "ex:mid"}}, + {"@id": "ex:mid", "ex:broader": {"@id": "ex:top"}} + ]}), + ).await; + // WARM-UP: register example.org FIRST via a throwaway vocab node, + // THEN the catalog-specific (cat:) data. example.org now aligns. + insert_indexed( + &fluree, &handle, "catalog3:main", + &json!({"@context": {"ex": "https://example.org/", "cat": "https://catalog.example/"}, + "@graph": [ + {"@id": "ex:_vocab", "ex:_seed": "1"}, + {"@id": "cat:thing", "cat:category": {"@id": "ex:narrow"}} + ]}), + ).await; + + let sparql = r" +PREFIX ex: +PREFIX cat: +SELECT DISTINCT ?thing FROM NAMED FROM NAMED +WHERE { GRAPH { ?thing cat:category ?c } + GRAPH { ?c ex:broader ex:mid } }"; + let result = fluree.query_connection_sparql(sparql).await; + assert!(result.is_ok(), "should execute, got: {:?}", result.err()); + let cat = fluree.ledger("catalog3:main").await.expect("load"); + let s = result.unwrap().to_jsonld(&cat.snapshot).expect("to_jsonld").to_string(); + assert!(s.contains("cat:thing"), "warm-up did NOT align codes: {s}"); + }) + .await; +} + +/// Q3b (BUG) — a variable bound inside a GRAPH block should join across the +/// boundary over an INDEXED multi-ledger dataset. Expected: `ex:thing`. +#[tokio::test] +async fn q3b_cross_graph_join_should_return_rows() { + let (fluree, local, handle) = fluree_with_indexer(); + local + .run_until(async move { + seed(&fluree, &handle).await; + let sparql = r" +PREFIX ex: +SELECT DISTINCT ?thing FROM NAMED FROM NAMED +WHERE { GRAPH { ?thing ex:category ?c } + GRAPH { ?c ex:broader ex:mid } }"; + let result = fluree.query_connection_sparql(sparql).await; + assert!( + result.is_ok(), + "cross-graph join should execute, got: {:?}", + result.err() + ); + let cat = fluree.ledger("catalog:main").await.expect("load"); + let s = result + .unwrap() + .to_jsonld(&cat.snapshot) + .expect("to_jsonld") + .to_string(); + assert!( + s.contains("ex:thing"), + "cross-graph join returned no rows: {s}" + ); + }) + .await; +} + +// ============================================================================= +// Usage-pattern matrix (issue #1405, bugs 2+3). Each new case is INDEXED and +// uses DIVERGENT namespace codes (the ledger registers its own namespace first, +// the shared one only via a ref — so the shared namespace gets a different code +// per ledger), unless noted. These pin behaviors the q1–q3d repro does not. +// ============================================================================= + +/// A1 (P2 — join independent datasets) — join key bound as SUBJECT in graph 1, +/// used as OBJECT in graph 2, under namespace divergence. Exercises the +/// object-position substitution arm the subject-position repro (q3c) does not. +/// Expected: `org:acme`. +#[tokio::test] +async fn a1_object_position_cross_graph_join_divergent_ns() { + let (fluree, local, handle) = fluree_with_indexer(); + local + .run_until(async move { + // staff: example.org registered FIRST → low code; ex:alice is a subject. + insert_indexed( + &fluree, + &handle, + "staff:main", + &json!({"@context": {"ex": "https://example.org/"}, + "@graph": [{"@id": "ex:alice", "@type": "ex:Engineer"}]}), + ) + .await; + // orgs: org.example registered first; example.org only via the employs + // ref → a different code. ex:alice appears as the OBJECT of org:employs. + insert_indexed( + &fluree, + &handle, + "orgs:main", + &json!({"@context": {"org": "https://org.example/", "ex": "https://example.org/"}, + "@graph": [{"@id": "org:acme", "org:employs": {"@id": "ex:alice"}}]}), + ) + .await; + + let sparql = r" +PREFIX ex: +PREFIX org: +SELECT DISTINCT ?org FROM NAMED FROM NAMED +WHERE { GRAPH { ?p a ex:Engineer } + GRAPH { ?org org:employs ?p } }"; + let result = fluree.query_connection_sparql(sparql).await; + assert!(result.is_ok(), "should execute, got: {:?}", result.err()); + let orgs = fluree.ledger("orgs:main").await.expect("load"); + let s = result + .unwrap() + .to_jsonld(&orgs.snapshot) + .expect("to_jsonld") + .to_string(); + assert!( + s.contains("org:acme"), + "object-position divergent-ns join returned no rows: {s}" + ); + }) + .await; +} + +/// A2 (P4 — completeness) — an instance in TWO matching categories under +/// divergence: BOTH must come back (per-value re-encode, nothing dropped). +/// Expected: labels `Narrow` AND `Mid`. +#[tokio::test] +async fn a2_multi_value_cross_graph_join_divergent_ns() { + let (fluree, local, handle) = fluree_with_indexer(); + local + .run_until(async move { + insert_indexed( + &fluree, + &handle, + "taxonomy:main", + &json!({"@context": {"ex": "https://example.org/", + "rdfs": "http://www.w3.org/2000/01/rdf-schema#"}, + "@graph": [ + {"@id": "ex:mid", "rdfs:label": "Mid"}, + {"@id": "ex:narrow", "rdfs:label": "Narrow"} + ]}), + ) + .await; + insert_indexed( + &fluree, + &handle, + "catm:main", + &json!({"@context": {"cat": "https://catalog.example/", "ex": "https://example.org/"}, + "@graph": [{"@id": "cat:thing", + "cat:category": [{"@id": "ex:narrow"}, {"@id": "ex:mid"}]}]}), + ) + .await; + + let sparql = r" +PREFIX ex: +PREFIX cat: +PREFIX rdfs: +SELECT DISTINCT ?label FROM NAMED FROM NAMED +WHERE { GRAPH { cat:thing cat:category ?c } + GRAPH { ?c rdfs:label ?label } }"; + let result = fluree.query_connection_sparql(sparql).await; + assert!(result.is_ok(), "should execute, got: {:?}", result.err()); + let cat = fluree.ledger("catm:main").await.expect("load"); + let s = result.unwrap().to_jsonld(&cat.snapshot).expect("to_jsonld").to_string(); + assert!( + s.contains("Narrow") && s.contains("Mid"), + "multi-value divergent-ns join dropped a value (want both Narrow+Mid): {s}" + ); + }) + .await; +} + +/// A3 (P4 — precision) — of two categories, only `ex:mid` has `ex:broader ex:top` +/// (`ex:narrow`'s broader is `ex:mid`). The divergent-ns join must return EXACTLY +/// `ex:mid` and must NOT falsely match `ex:narrow`. Guards against a re-encode so +/// loose it over-matches. +#[tokio::test] +async fn a3_cross_graph_join_precision_divergent_ns() { + let (fluree, local, handle) = fluree_with_indexer(); + local + .run_until(async move { + insert_indexed( + &fluree, + &handle, + "taxonomy:main", + &json!({"@context": {"ex": "https://example.org/"}, + "@graph": [ + {"@id": "ex:mid", "ex:broader": {"@id": "ex:top"}}, + {"@id": "ex:narrow", "ex:broader": {"@id": "ex:mid"}} + ]}), + ) + .await; + insert_indexed( + &fluree, + &handle, + "catp:main", + &json!({"@context": {"cat": "https://catalog.example/", "ex": "https://example.org/"}, + "@graph": [{"@id": "cat:thing", + "cat:category": [{"@id": "ex:narrow"}, {"@id": "ex:mid"}]}]}), + ) + .await; + + let sparql = r" +PREFIX ex: +PREFIX cat: +SELECT DISTINCT ?c FROM NAMED FROM NAMED +WHERE { GRAPH { cat:thing cat:category ?c } + GRAPH { ?c ex:broader ex:top } }"; + let result = fluree.query_connection_sparql(sparql).await; + assert!(result.is_ok(), "should execute, got: {:?}", result.err()); + let tax = fluree.ledger("taxonomy:main").await.expect("load"); + let s = result.unwrap().to_jsonld(&tax.snapshot).expect("to_jsonld").to_string(); + assert!( + s.contains("ex:mid"), + "precision join missed the true match ex:mid: {s}" + ); + assert!( + !s.contains("ex:narrow"), + "precision join falsely matched ex:narrow (over-match): {s}" + ); + }) + .await; +} + +/// A4 (P1 — taxonomy + instances) — `p+` (strict "proper ancestors") scoped path +/// joined to a default-graph instance, under divergence. Exercises bug 2 (path in +/// GRAPH, indexed) AND bug 3 (divergent join key) together. Expected: `cat:thing`. +#[tokio::test] +async fn a4_strict_path_plus_join_divergent_ns() { + let (fluree, local, handle) = fluree_with_indexer(); + local + .run_until(async move { + insert_indexed( + &fluree, + &handle, + "taxonomy:main", + &json!({"@context": {"ex": "https://example.org/"}, + "@graph": [ + {"@id": "ex:narrow", "ex:broader": {"@id": "ex:mid"}}, + {"@id": "ex:mid", "ex:broader": {"@id": "ex:top"}} + ]}), + ) + .await; + insert_indexed( + &fluree, + &handle, + "cata:main", + &json!({"@context": {"cat": "https://catalog.example/", "ex": "https://example.org/"}, + "@graph": [{"@id": "cat:thing", "cat:category": {"@id": "ex:narrow"}}]}), + ) + .await; + + let sparql = r" +PREFIX ex: +PREFIX cat: +SELECT DISTINCT ?thing FROM FROM NAMED +WHERE { ?thing cat:category ?c . GRAPH { ?c ex:broader+ ex:top } }"; + let result = fluree.query_connection_sparql(sparql).await; + assert!(result.is_ok(), "should execute, got: {:?}", result.err()); + let cat = fluree.ledger("cata:main").await.expect("load"); + let s = result.unwrap().to_jsonld(&cat.snapshot).expect("to_jsonld").to_string(); + assert!( + s.contains("cat:thing"), + "strict-path + divergent-ns join returned no rows: {s}" + ); + }) + .await; +} + +/// A5 (P3 — chained hop) — a join key crossing TWO ledger boundaries +/// (app → catalog → upper), all with divergent codes on the shared `sh:` +/// namespace. Pins that re-encoding COMPOSES across more than one boundary. +/// Expected: `Upper A`. +#[tokio::test] +async fn a5_three_ledger_chain_divergent_ns() { + let (fluree, local, handle) = fluree_with_indexer(); + local + .run_until(async move { + // Each ledger registers its own local namespace FIRST so the shared + // `sh:` namespace gets a different code in each. + insert_indexed( + &fluree, + &handle, + "appl:main", + &json!({"@context": {"l1": "https://l1.example/", "sh": "https://shared.example/"}, + "@graph": [ + {"@id": "l1:_seed", "l1:x": "1"}, + {"@id": "sh:item1", "sh:inCategory": {"@id": "sh:catX"}} + ]}), + ) + .await; + insert_indexed( + &fluree, + &handle, + "catl:main", + &json!({"@context": {"l2": "https://l2.example/", "sh": "https://shared.example/"}, + "@graph": [ + {"@id": "l2:_seed", "l2:x": "1"}, + {"@id": "sh:catX", "sh:mapsTo": {"@id": "sh:upA"}} + ]}), + ) + .await; + insert_indexed( + &fluree, + &handle, + "upl:main", + &json!({"@context": {"l3": "https://l3.example/", "sh": "https://shared.example/"}, + "@graph": [ + {"@id": "l3:_seed", "l3:x": "1"}, + {"@id": "sh:upA", "sh:label": "Upper A"} + ]}), + ) + .await; + + let sparql = r" +PREFIX sh: +SELECT DISTINCT ?l FROM NAMED FROM NAMED FROM NAMED +WHERE { GRAPH { sh:item1 sh:inCategory ?c } + GRAPH { ?c sh:mapsTo ?u } + GRAPH { ?u sh:label ?l } }"; + let result = fluree.query_connection_sparql(sparql).await; + assert!(result.is_ok(), "should execute, got: {:?}", result.err()); + let up = fluree.ledger("upl:main").await.expect("load"); + let s = result + .unwrap() + .to_jsonld(&up.snapshot) + .expect("to_jsonld") + .to_string(); + assert!( + s.contains("Upper A"), + "three-ledger chained divergent-ns join returned no rows: {s}" + ); + }) + .await; +} + +/// A6 (regression) — a SINGLE-ledger GRAPH-scoped path is unaffected by the +/// multi-ledger binary-store gating (Fix 1 fires only when stamping is needed). +/// Green before and after. Expected: `ex:narrow`, `ex:mid`, `ex:top`. +#[tokio::test] +async fn a6_single_ledger_graph_path_unaffected() { + let (fluree, local, handle) = fluree_with_indexer(); + local + .run_until(async move { + insert_indexed( + &fluree, + &handle, + "taxonomy:main", + &json!({"@context": {"ex": "https://example.org/"}, + "@graph": [ + {"@id": "ex:narrow", "ex:broader": {"@id": "ex:mid"}}, + {"@id": "ex:mid", "ex:broader": {"@id": "ex:top"}} + ]}), + ) + .await; + + let sparql = r" +PREFIX ex: +SELECT DISTINCT ?anc FROM NAMED +WHERE { GRAPH { ex:narrow ex:broader* ?anc } }"; + let result = fluree.query_connection_sparql(sparql).await; + assert!( + result.is_ok(), + "single-ledger GRAPH path should execute, got: {:?}", + result.err() + ); + let tax = fluree.ledger("taxonomy:main").await.expect("load"); + let s = result + .unwrap() + .to_jsonld(&tax.snapshot) + .expect("to_jsonld") + .to_string(); + assert!( + s.contains("ex:narrow") && s.contains("ex:mid") && s.contains("ex:top"), + "single-ledger GRAPH path did not return the full chain: {s}" + ); + }) + .await; +} + +/// A7 (characterization) — `FILTER EXISTS` across a GRAPH boundary (semi-join). +/// This path is `SeedOperator`-based, distinct from the nested-loop join, so the +/// bug-2/bug-3 fixes may NOT cover it. Asserts the DESIRED behavior (the instance +/// whose category has `ex:broader ex:mid` is kept); if it fails after the fixes, +/// mark `#[ignore]` and file a semi-join follow-up. Divergent codes. +#[tokio::test] +async fn a7_filter_exists_cross_graph_divergent_ns() { + let (fluree, local, handle) = fluree_with_indexer(); + local + .run_until(async move { + insert_indexed( + &fluree, + &handle, + "taxonomy:main", + &json!({"@context": {"ex": "https://example.org/"}, + "@graph": [{"@id": "ex:narrow", "ex:broader": {"@id": "ex:mid"}}]}), + ) + .await; + insert_indexed( + &fluree, + &handle, + "catx:main", + &json!({"@context": {"cat": "https://catalog.example/", "ex": "https://example.org/"}, + "@graph": [{"@id": "cat:thing", "cat:category": {"@id": "ex:narrow"}}]}), + ) + .await; + + let sparql = r" +PREFIX ex: +PREFIX cat: +SELECT DISTINCT ?thing FROM NAMED FROM NAMED +WHERE { GRAPH { ?thing cat:category ?c } + FILTER EXISTS { GRAPH { ?c ex:broader ex:mid } } }"; + let result = fluree.query_connection_sparql(sparql).await; + assert!(result.is_ok(), "should execute, got: {:?}", result.err()); + let cat = fluree.ledger("catx:main").await.expect("load"); + let s = result.unwrap().to_jsonld(&cat.snapshot).expect("to_jsonld").to_string(); + assert!( + s.contains("cat:thing"), + "FILTER EXISTS across a GRAPH boundary (divergent ns) dropped the row: {s}" + ); + }) + .await; +} + +/// A8 (P1 — taxonomy crawl) — a BOTH-endpoints-unbound closure (`?s ex:broader+ +/// ?o`) inside a GRAPH block, where the query's primary ledger differs from the +/// path's graph so the path predicate `ex:broader` has a divergent code. Pins +/// that the closure/adjacency read path (not just the bounded read_step) also +/// re-encodes the traversal predicate. Expected: ancestor pairs incl. ex:top. +#[tokio::test] +async fn a8_unbounded_closure_in_graph_divergent_pred() { + let (fluree, local, handle) = fluree_with_indexer(); + local + .run_until(async move { + // primary (first FROM NAMED): registers cat: first, ex: only via a + // ref → ex: gets a divergent code vs taxonomy. + insert_indexed( + &fluree, + &handle, + "prim:main", + &json!({"@context": {"cat": "https://catalog.example/", "ex": "https://example.org/"}, + "@graph": [{"@id": "cat:x", "cat:ref": {"@id": "ex:narrow"}}]}), + ) + .await; + insert_indexed( + &fluree, + &handle, + "taxonomy:main", + &json!({"@context": {"ex": "https://example.org/"}, + "@graph": [ + {"@id": "ex:narrow", "ex:broader": {"@id": "ex:mid"}}, + {"@id": "ex:mid", "ex:broader": {"@id": "ex:top"}} + ]}), + ) + .await; + + let sparql = r" +PREFIX ex: +SELECT DISTINCT ?s ?o FROM NAMED FROM NAMED +WHERE { GRAPH { ?s ex:broader+ ?o } }"; + let result = fluree.query_connection_sparql(sparql).await; + assert!(result.is_ok(), "should execute, got: {:?}", result.err()); + let tax = fluree.ledger("taxonomy:main").await.expect("load"); + let s = result.unwrap().to_jsonld(&tax.snapshot).expect("to_jsonld").to_string(); + // narrow→mid→top: closure must include the deep pair reaching ex:top. + assert!( + s.contains("ex:top") && s.contains("ex:narrow"), + "unbounded closure with a divergent-code predicate found no edges: {s}" + ); + }) + .await; +} diff --git a/fluree-db-query/src/dataset_operator.rs b/fluree-db-query/src/dataset_operator.rs index c19f00e05d..46c924eec1 100644 --- a/fluree-db-query/src/dataset_operator.rs +++ b/fluree-db-query/src/dataset_operator.rs @@ -370,7 +370,19 @@ impl Operator for DatasetOperator { // *before* opening any operators so we can consistently // disable binary stores for all graphs when provenance // stamping is needed. - let multi_ledger = graphs.windows(2).any(|w| w[0].ledger_id != w[1].ledger_id); + // A single active graph can still belong to a multi-ledger + // dataset (e.g. the default graph alongside named graphs from + // other ledgers). Its bindings may cross a graph boundary — + // seed a GRAPH block or a cross-graph join — and be stamped, so + // they must materialize to `Binding::Sid` rather than late + // `Binding::EncodedSid` (which `stamp_provenance` cannot decode + // without the store), exactly as when the active graphs + // themselves span ledgers (issue #1405). + let multi_ledger = graphs.windows(2).any(|w| w[0].ledger_id != w[1].ledger_id) + || ctx + .dataset + .as_ref() + .is_some_and(|d| d.spans_multiple_ledgers()); self.needs_provenance = multi_ledger; for graph in &graphs { diff --git a/fluree-db-query/src/property_path.rs b/fluree-db-query/src/property_path.rs index f11ec8bd1c..3f29fc5c90 100644 --- a/fluree-db-query/src/property_path.rs +++ b/fluree-db-query/src/property_path.rs @@ -57,6 +57,24 @@ fn is_reserved_edge_predicate(p: &Sid) -> bool { fluree_db_core::is_rdf_type(p) || fluree_db_core::is_reserved_reifies_predicate(p) } +/// Re-encode a pattern-constant predicate `Sid` into the active graph's +/// namespace table. +/// +/// Path pattern predicates are encoded against the primary/lowering snapshot at +/// plan time, but a path executes against a per-graph (`GRAPH `) snapshot +/// that may assign the same IRI a different namespace code. Without re-encoding, +/// a divergent-namespace predicate (e.g. `ex:broader`) reads the wrong SID and +/// the traversal silently finds no edges (issue #1405). Decodes against the +/// original snapshot (where the SID was encoded) and re-encodes against the +/// active graph; single-graph queries round-trip to the same SID (no change). +#[inline] +fn reencode_pred(ctx: &ExecutionContext<'_>, db: &fluree_db_core::LedgerSnapshot, p: &Sid) -> Sid { + ctx.original_snapshot + .decode_sid(p) + .and_then(|iri| db.encode_iri(&iri)) + .unwrap_or_else(|| p.clone()) +} + /// Property path operator - transitive graph traversal /// /// Supports two execution modes: @@ -280,10 +298,13 @@ impl PropertyPathOperator { use_post: bool, ) -> Result> { let (db, overlay, to_t) = ctx.require_single_graph()?; + // Re-encode the traversal predicates into the active graph's dict — see + // `reencode_pred` (issue #1405). + let preds: Vec = preds.iter().map(|p| reencode_pred(ctx, db, p)).collect(); let mut out = Vec::new(); let mut seen: HashSet = HashSet::new(); for node in nodes { - for pred in preds { + for pred in &preds { let (index, range_match) = if use_post { ( IndexType::Post, @@ -642,7 +663,8 @@ impl PropertyPathOperator { } } else { for pred in &self.pattern.predicates { - let range_match = RangeMatch::predicate(pred.clone()); + // Re-encode into the active graph's dict — see `reencode_pred`. + let range_match = RangeMatch::predicate(reencode_pred(ctx, db, pred)); let flakes = range_with_overlay( db, ctx.binary_g_id, @@ -785,7 +807,8 @@ impl PropertyPathOperator { let mut seen: HashSet = HashSet::new(); let mut out = Vec::new(); for pred in &self.pattern.predicates { - let range_match = RangeMatch::predicate(pred.clone()); + // Re-encode into the active graph's dict — see `reencode_pred`. + let range_match = RangeMatch::predicate(reencode_pred(ctx, db, pred)); let flakes = range_with_overlay( db, ctx.binary_g_id, @@ -830,7 +853,8 @@ impl PropertyPathOperator { .flat_map(|s| s.predicates.iter()), ); for pred in all_preds { - let range_match = RangeMatch::predicate(pred.clone()); + // Re-encode into the active graph's dict — see `reencode_pred`. + let range_match = RangeMatch::predicate(reencode_pred(ctx, db, pred)); let flakes = range_with_overlay( db, ctx.binary_g_id, @@ -1011,7 +1035,19 @@ impl PropertyPathOperator { let binary_store = ctx.binary_store.as_ref(); let resolve_sid = |term: &Ref, binding: Option<&Binding>| -> Option { match term { - Ref::Sid(s) => Some(s.clone()), + // A pattern-constant SID is encoded against the primary/lowering + // snapshot at plan time; re-encode it into the active graph's + // namespace table (matching the `Ref::Iri` arm) so a + // divergent-namespace path endpoint — e.g. `?c broader+ ex:top` + // where `ex:top`'s code differs across ledgers — is matched + // against the right code instead of silently finding nothing + // (issue #1405). Falls back to the raw SID when it can't be + // decoded (single-graph round-trips to the same SID). + Ref::Sid(s) => ctx + .original_snapshot + .decode_sid(s) + .and_then(|iri| db_for_encode.encode_iri(&iri)) + .or_else(|| Some(s.clone())), Ref::Iri(iri) => db_for_encode.encode_iri(iri), Ref::Var(_) => binding.and_then(|b| match b { Binding::Sid { sid: s, .. } => Some(s.clone()), From 7e175d091fd315befb885636a08c7c1d353e73c1 Mon Sep 17 00:00:00 2001 From: Jack White Date: Fri, 3 Jul 2026 15:38:40 -0400 Subject: [PATCH 2/2] refactor: tighten #1405 comments; concrete library/subjects test data MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Review feedback: - Trim the engine-side comments to the load-bearing invariant and drop #1405 references from implementation code (db engineers know the mechanics; issue refs belong in the regression tests, which keep them). - Rewrite the test data from generic `ex:thing`/`ex:category`/`narrow`/`mid`/ `top` into a concrete library/subjects domain with distinct per-ledger prefixes, matching the house style of the sibling cross-ledger tests: `lib:book1` (library.example) references a `subj:` subject taxonomy (jazz ⊂ music ⊂ arts via `subj:broader`, subject.example). The own-prefix-first / shared-via-ref seeding that produces namespace-code divergence is preserved, so the aligned (q3d warm-up) vs divergent contrast still holds. No behavior change; 14/14 in the suite, fmt + clippy clean. Co-Authored-By: Claude Opus 4.8 --- .../tests/it_multi_graph_property_path.rs | 540 ++++++++++-------- fluree-db-query/src/dataset_operator.rs | 11 +- fluree-db-query/src/property_path.rs | 29 +- 3 files changed, 310 insertions(+), 270 deletions(-) diff --git a/fluree-db-api/tests/it_multi_graph_property_path.rs b/fluree-db-api/tests/it_multi_graph_property_path.rs index 388d665ed4..645e4239db 100644 --- a/fluree-db-api/tests/it_multi_graph_property_path.rs +++ b/fluree-db-api/tests/it_multi_graph_property_path.rs @@ -5,9 +5,13 @@ //! the connection path. This is what triggers the multi-graph GRAPH + property //! path failures that the novelty-only path does not. //! -//! See the fluree/db issue "Property paths can't be combined with cross-ledger -//! queries". `q1`/`q2` characterize current (correct/guarded) behavior; `q3a`/ -//! `q3b` assert the *desired* behavior of the `use GRAPH` escape hatch. +//! Domain: a **library** ledger (`lib:` books) references a **subject taxonomy** +//! ledger (`subj:` topics linked by `subj:broader`, e.g. jazz ⊂ music ⊂ arts). +//! Most cases use *divergent* namespace codes — a ledger registers its own +//! prefix first and the shared `subj:` prefix only via a ref, so `subj:` gets a +//! different code per ledger (the real cross-ledger condition). +//! +//! See GitHub issue #1405 (property paths + multi-ledger datasets). #![cfg(feature = "native")] @@ -46,18 +50,23 @@ async fn insert_indexed( let _ = trigger_index_and_wait_outcome(handle, ledger_id, result.ledger.t()).await; } +/// Seed a `subject.example` taxonomy (jazz ⊂ music ⊂ arts) and a +/// `library.example` book that references the deepest subject. `subj:` is +/// registered first in the taxonomy but only via a ref in the catalog (whose +/// own `lib:` prefix registers first), so the two ledgers assign `subj:` +/// different namespace codes. async fn seed(fluree: &MemoryFluree, handle: &fluree_db_indexer::IndexerHandle) { insert_indexed( fluree, handle, "taxonomy:main", &json!({ - "@context": {"ex": "https://example.org/", + "@context": {"subj": "http://subject.example/", "rdfs": "http://www.w3.org/2000/01/rdf-schema#"}, "@graph": [ - {"@id": "ex:top", "rdfs:label": "Top"}, - {"@id": "ex:mid", "ex:broader": {"@id": "ex:top"}, "rdfs:label": "Mid"}, - {"@id": "ex:narrow", "ex:broader": {"@id": "ex:mid"}, "rdfs:label": "Narrow"} + {"@id": "subj:arts", "rdfs:label": "Arts"}, + {"@id": "subj:music", "subj:broader": {"@id": "subj:arts"}, "rdfs:label": "Music"}, + {"@id": "subj:jazz", "subj:broader": {"@id": "subj:music"}, "rdfs:label": "Jazz"} ] }), ) @@ -67,8 +76,8 @@ async fn seed(fluree: &MemoryFluree, handle: &fluree_db_indexer::IndexerHandle) handle, "catalog:main", &json!({ - "@context": {"ex": "https://example.org/"}, - "@graph": [ {"@id": "ex:thing", "ex:category": {"@id": "ex:narrow"}} ] + "@context": {"lib": "http://library.example/", "subj": "http://subject.example/"}, + "@graph": [ {"@id": "lib:book1", "lib:subject": {"@id": "subj:jazz"}} ] }), ) .await; @@ -101,9 +110,9 @@ async fn q1_single_graph_property_path_works() { .run_until(async move { seed(&fluree, &handle).await; let sparql = r" -PREFIX ex: +PREFIX subj: SELECT ?anc FROM -WHERE { ex:narrow ex:broader* ?anc }"; +WHERE { subj:jazz subj:broader* ?anc }"; let result = fluree .query_connection_sparql(sparql) .await @@ -114,7 +123,7 @@ WHERE { ex:narrow ex:broader* ?anc }"; .expect("to_jsonld") .to_string(); assert!( - s.contains("ex:narrow") && s.contains("ex:mid") && s.contains("ex:top"), + s.contains("subj:jazz") && s.contains("subj:music") && s.contains("subj:arts"), "{s}" ); }) @@ -122,6 +131,8 @@ WHERE { ex:narrow ex:broader* ?anc }"; } /// Q2 (characterization) — property path over MULTI-graph `FROM` is guarded. +/// This is failure 1 in #1405, intentionally left in place (cross-snapshot BFS +/// is a follow-up); the test asserts the guard still fires. #[tokio::test] async fn q2_multi_graph_property_path_is_guarded() { let (fluree, local, handle) = fluree_with_indexer(); @@ -129,9 +140,10 @@ async fn q2_multi_graph_property_path_is_guarded() { .run_until(async move { seed(&fluree, &handle).await; let sparql = r" -PREFIX ex: -SELECT DISTINCT ?thing FROM FROM -WHERE { ?thing ex:category ?c . ?c ex:broader* ex:top }"; +PREFIX subj: +PREFIX lib: +SELECT DISTINCT ?book FROM FROM +WHERE { ?book lib:subject ?c . ?c subj:broader* subj:arts }"; let err = fluree .query_connection_sparql(sparql) .await @@ -145,8 +157,9 @@ WHERE { ?thing ex:category ?c . ?c ex:broader* ex:top }"; .await; } -/// Q3a (BUG) — GRAPH-scoped property path over an INDEXED multi-ledger dataset -/// should join back to the default-graph instance. Expected: `ex:thing`. +/// Q3a — a GRAPH-scoped property path over an INDEXED multi-ledger dataset, +/// joined to a default-graph instance. Pre-fix this hit an internal invariant +/// (`EncodedSid reached stamp_provenance`). Expected: `lib:book1`. #[tokio::test] async fn q3a_graph_scoped_path_over_multiledger_should_join() { let (fluree, local, handle) = fluree_with_indexer(); @@ -154,9 +167,10 @@ async fn q3a_graph_scoped_path_over_multiledger_should_join() { .run_until(async move { seed(&fluree, &handle).await; let sparql = r" -PREFIX ex: -SELECT DISTINCT ?thing FROM FROM NAMED -WHERE { ?thing ex:category ?c . GRAPH { ?c ex:broader* ex:top } }"; +PREFIX subj: +PREFIX lib: +SELECT DISTINCT ?book FROM FROM NAMED +WHERE { ?book lib:subject ?c . GRAPH { ?c subj:broader* subj:arts } }"; let result = fluree.query_connection_sparql(sparql).await; assert!( result.is_ok(), @@ -169,192 +183,212 @@ WHERE { ?thing ex:category ?c . GRAPH { ?c ex:broader* ex:top } .to_jsonld(&cat.snapshot) .expect("to_jsonld") .to_string(); - assert!(s.contains("ex:thing"), "expected ex:thing: {s}"); + assert!(s.contains("lib:book1"), "expected lib:book1: {s}"); + }) + .await; +} + +/// Q3b — a plain cross-`GRAPH` variable join over an indexed multi-ledger +/// dataset. Expected: `lib:book1`. +#[tokio::test] +async fn q3b_cross_graph_join_should_return_rows() { + let (fluree, local, handle) = fluree_with_indexer(); + local + .run_until(async move { + seed(&fluree, &handle).await; + let sparql = r" +PREFIX subj: +PREFIX lib: +SELECT DISTINCT ?book FROM NAMED FROM NAMED +WHERE { GRAPH { ?book lib:subject ?c } + GRAPH { ?c subj:broader subj:music } }"; + let result = fluree.query_connection_sparql(sparql).await; + assert!( + result.is_ok(), + "cross-graph join should execute, got: {:?}", + result.err() + ); + let cat = fluree.ledger("catalog:main").await.expect("load"); + let s = result + .unwrap() + .to_jsonld(&cat.snapshot) + .expect("to_jsonld") + .to_string(); + assert!( + s.contains("lib:book1"), + "cross-graph join returned no rows: {s}" + ); }) .await; } -/// Q3c (BUG?) — same cross-graph join, but with **namespace-code divergence**: -/// the join-key namespace (`https://example.org/`) is registered first in -/// `taxonomy` but only later (via a ref) in `catalog2`, so it gets a different -/// code in each ledger. If the GRAPH-boundary join compares raw SIDs without -/// re-encoding (the #1295 family), `?c` won't match and this returns empty. -/// Expected: `cat:thing`. +/// Q3c — the cross-graph join under explicit **namespace-code divergence**: +/// `subj:` is registered first in `taxonomy` but only via a ref in `catalog2` +/// (whose own `lib:` prefix registers first), so `?c` has a different code in +/// each ledger. Pre-fix this returned []. Expected: `lib:book1`. #[tokio::test] async fn q3c_cross_graph_join_divergent_ns_should_return_rows() { let (fluree, local, handle) = fluree_with_indexer(); local .run_until(async move { - // taxonomy: example.org registered FIRST (low code). + // taxonomy: subject.example registered FIRST (low code). insert_indexed( - &fluree, &handle, "taxonomy:main", - &json!({"@context": {"ex": "https://example.org/"}, + &fluree, + &handle, + "taxonomy:main", + &json!({"@context": {"subj": "http://subject.example/"}, "@graph": [ - {"@id": "ex:narrow", "ex:broader": {"@id": "ex:mid"}}, - {"@id": "ex:mid", "ex:broader": {"@id": "ex:top"}} + {"@id": "subj:jazz", "subj:broader": {"@id": "subj:music"}}, + {"@id": "subj:music", "subj:broader": {"@id": "subj:arts"}} ]}), - ).await; - // catalog2: catalog.example registered first, example.org only via the - // ref to ex:narrow → example.org gets a *different* code here. + ) + .await; + // catalog2: library.example registered first, subject.example only via + // the ref to subj:jazz → subject.example gets a *different* code here. insert_indexed( - &fluree, &handle, "catalog2:main", - &json!({"@context": {"cat": "https://catalog.example/", "ex": "https://example.org/"}, - "@graph": [ {"@id": "cat:thing", "cat:category": {"@id": "ex:narrow"}} ]}), - ).await; + &fluree, + &handle, + "catalog2:main", + &json!({"@context": {"lib": "http://library.example/", "subj": "http://subject.example/"}, + "@graph": [ {"@id": "lib:book1", "lib:subject": {"@id": "subj:jazz"}} ]}), + ) + .await; let sparql = r" -PREFIX ex: -PREFIX cat: -SELECT DISTINCT ?thing FROM NAMED FROM NAMED -WHERE { GRAPH { ?thing cat:category ?c } - GRAPH { ?c ex:broader ex:mid } }"; +PREFIX subj: +PREFIX lib: +SELECT DISTINCT ?book FROM NAMED FROM NAMED +WHERE { GRAPH { ?book lib:subject ?c } + GRAPH { ?c subj:broader subj:music } }"; let result = fluree.query_connection_sparql(sparql).await; assert!(result.is_ok(), "should execute, got: {:?}", result.err()); let cat = fluree.ledger("catalog2:main").await.expect("load"); - let s = result.unwrap().to_jsonld(&cat.snapshot).expect("to_jsonld").to_string(); - assert!(s.contains("cat:thing"), "divergent-ns cross-graph join returned no rows: {s}"); + let s = result + .unwrap() + .to_jsonld(&cat.snapshot) + .expect("to_jsonld") + .to_string(); + assert!(s.contains("lib:book1"), "divergent-ns cross-graph join returned no rows: {s}"); }) .await; } -/// Q3d (MITIGATION) — same divergent setup as q3c, but `catalog3` is seeded -/// with a deterministic *vocabulary warm-up*: it touches the shared -/// `https://example.org/` namespace FIRST (before its own `cat:` namespace), -/// so example.org gets the SAME code as in `taxonomy`. If aligning codes -/// sidesteps the cross-graph re-encoding gap, this should return `cat:thing`. +/// Q3d (mitigation) — same divergent shape as Q3c, but `catalog3` is seeded with +/// a deterministic *vocabulary warm-up*: it touches the shared `subject.example` +/// namespace FIRST (before its own `lib:` prefix), so `subj:` gets the SAME code +/// as in `taxonomy`. Aligning the codes sidesteps the re-encoding gap, so this +/// returns `lib:book1` even before the fix. #[tokio::test] async fn q3d_namespace_warmup_aligns_codes_and_join_works() { let (fluree, local, handle) = fluree_with_indexer(); local .run_until(async move { insert_indexed( - &fluree, &handle, "taxonomy:main", - &json!({"@context": {"ex": "https://example.org/"}, + &fluree, + &handle, + "taxonomy:main", + &json!({"@context": {"subj": "http://subject.example/"}, "@graph": [ - {"@id": "ex:narrow", "ex:broader": {"@id": "ex:mid"}}, - {"@id": "ex:mid", "ex:broader": {"@id": "ex:top"}} + {"@id": "subj:jazz", "subj:broader": {"@id": "subj:music"}}, + {"@id": "subj:music", "subj:broader": {"@id": "subj:arts"}} ]}), - ).await; - // WARM-UP: register example.org FIRST via a throwaway vocab node, - // THEN the catalog-specific (cat:) data. example.org now aligns. + ) + .await; + // WARM-UP: register subject.example FIRST via a throwaway node, THEN + // the library-specific (lib:) data. subject.example now aligns. insert_indexed( - &fluree, &handle, "catalog3:main", - &json!({"@context": {"ex": "https://example.org/", "cat": "https://catalog.example/"}, + &fluree, + &handle, + "catalog3:main", + &json!({"@context": {"subj": "http://subject.example/", "lib": "http://library.example/"}, "@graph": [ - {"@id": "ex:_vocab", "ex:_seed": "1"}, - {"@id": "cat:thing", "cat:category": {"@id": "ex:narrow"}} + {"@id": "subj:_vocab", "subj:_seed": "1"}, + {"@id": "lib:book1", "lib:subject": {"@id": "subj:jazz"}} ]}), - ).await; + ) + .await; let sparql = r" -PREFIX ex: -PREFIX cat: -SELECT DISTINCT ?thing FROM NAMED FROM NAMED -WHERE { GRAPH { ?thing cat:category ?c } - GRAPH { ?c ex:broader ex:mid } }"; +PREFIX subj: +PREFIX lib: +SELECT DISTINCT ?book FROM NAMED FROM NAMED +WHERE { GRAPH { ?book lib:subject ?c } + GRAPH { ?c subj:broader subj:music } }"; let result = fluree.query_connection_sparql(sparql).await; assert!(result.is_ok(), "should execute, got: {:?}", result.err()); let cat = fluree.ledger("catalog3:main").await.expect("load"); - let s = result.unwrap().to_jsonld(&cat.snapshot).expect("to_jsonld").to_string(); - assert!(s.contains("cat:thing"), "warm-up did NOT align codes: {s}"); - }) - .await; -} - -/// Q3b (BUG) — a variable bound inside a GRAPH block should join across the -/// boundary over an INDEXED multi-ledger dataset. Expected: `ex:thing`. -#[tokio::test] -async fn q3b_cross_graph_join_should_return_rows() { - let (fluree, local, handle) = fluree_with_indexer(); - local - .run_until(async move { - seed(&fluree, &handle).await; - let sparql = r" -PREFIX ex: -SELECT DISTINCT ?thing FROM NAMED FROM NAMED -WHERE { GRAPH { ?thing ex:category ?c } - GRAPH { ?c ex:broader ex:mid } }"; - let result = fluree.query_connection_sparql(sparql).await; - assert!( - result.is_ok(), - "cross-graph join should execute, got: {:?}", - result.err() - ); - let cat = fluree.ledger("catalog:main").await.expect("load"); let s = result .unwrap() .to_jsonld(&cat.snapshot) .expect("to_jsonld") .to_string(); - assert!( - s.contains("ex:thing"), - "cross-graph join returned no rows: {s}" - ); + assert!(s.contains("lib:book1"), "warm-up did NOT align codes: {s}"); }) .await; } // ============================================================================= // Usage-pattern matrix (issue #1405, bugs 2+3). Each new case is INDEXED and -// uses DIVERGENT namespace codes (the ledger registers its own namespace first, -// the shared one only via a ref — so the shared namespace gets a different code -// per ledger), unless noted. These pin behaviors the q1–q3d repro does not. +// uses DIVERGENT namespace codes (a ledger registers its own prefix first, the +// shared one only via a ref), unless noted. These pin behaviors the q1–q3d +// repro does not. // ============================================================================= -/// A1 (P2 — join independent datasets) — join key bound as SUBJECT in graph 1, -/// used as OBJECT in graph 2, under namespace divergence. Exercises the -/// object-position substitution arm the subject-position repro (q3c) does not. -/// Expected: `org:acme`. +/// A1 (join independent datasets) — a join key bound as SUBJECT in graph 1 and +/// used as OBJECT in graph 2, under namespace divergence. `lib:book1` is a +/// `lib:Book` on the shelf and appears as the object of `list:includes` in a +/// reading list. Exercises the object-position substitution the subject-position +/// repro (Q3c) does not. Expected: `list:reading1`. #[tokio::test] async fn a1_object_position_cross_graph_join_divergent_ns() { let (fluree, local, handle) = fluree_with_indexer(); local .run_until(async move { - // staff: example.org registered FIRST → low code; ex:alice is a subject. + // shelf: library.example registered FIRST; lib:book1 is a subject. insert_indexed( &fluree, &handle, - "staff:main", - &json!({"@context": {"ex": "https://example.org/"}, - "@graph": [{"@id": "ex:alice", "@type": "ex:Engineer"}]}), + "shelf:main", + &json!({"@context": {"lib": "http://library.example/"}, + "@graph": [{"@id": "lib:book1", "@type": "lib:Book"}]}), ) .await; - // orgs: org.example registered first; example.org only via the employs - // ref → a different code. ex:alice appears as the OBJECT of org:employs. + // lists: list.example registered first; library.example only via the + // ref → a different code. lib:book1 is the OBJECT of list:includes. insert_indexed( &fluree, &handle, - "orgs:main", - &json!({"@context": {"org": "https://org.example/", "ex": "https://example.org/"}, - "@graph": [{"@id": "org:acme", "org:employs": {"@id": "ex:alice"}}]}), + "lists:main", + &json!({"@context": {"list": "http://list.example/", "lib": "http://library.example/"}, + "@graph": [{"@id": "list:reading1", "list:includes": {"@id": "lib:book1"}}]}), ) .await; let sparql = r" -PREFIX ex: -PREFIX org: -SELECT DISTINCT ?org FROM NAMED FROM NAMED -WHERE { GRAPH { ?p a ex:Engineer } - GRAPH { ?org org:employs ?p } }"; +PREFIX lib: +PREFIX list: +SELECT DISTINCT ?list FROM NAMED FROM NAMED +WHERE { GRAPH { ?book a lib:Book } + GRAPH { ?list list:includes ?book } }"; let result = fluree.query_connection_sparql(sparql).await; assert!(result.is_ok(), "should execute, got: {:?}", result.err()); - let orgs = fluree.ledger("orgs:main").await.expect("load"); + let lists = fluree.ledger("lists:main").await.expect("load"); let s = result .unwrap() - .to_jsonld(&orgs.snapshot) + .to_jsonld(&lists.snapshot) .expect("to_jsonld") .to_string(); assert!( - s.contains("org:acme"), + s.contains("list:reading1"), "object-position divergent-ns join returned no rows: {s}" ); }) .await; } -/// A2 (P4 — completeness) — an instance in TWO matching categories under -/// divergence: BOTH must come back (per-value re-encode, nothing dropped). -/// Expected: labels `Narrow` AND `Mid`. +/// A2 (completeness) — a book with TWO subjects under divergence: BOTH must come +/// back (per-value re-encode, nothing dropped). Expected: labels `Jazz` AND +/// `Blues`. #[tokio::test] async fn a2_multi_value_cross_graph_join_divergent_ns() { let (fluree, local, handle) = fluree_with_indexer(); @@ -364,11 +398,11 @@ async fn a2_multi_value_cross_graph_join_divergent_ns() { &fluree, &handle, "taxonomy:main", - &json!({"@context": {"ex": "https://example.org/", + &json!({"@context": {"subj": "http://subject.example/", "rdfs": "http://www.w3.org/2000/01/rdf-schema#"}, "@graph": [ - {"@id": "ex:mid", "rdfs:label": "Mid"}, - {"@id": "ex:narrow", "rdfs:label": "Narrow"} + {"@id": "subj:jazz", "rdfs:label": "Jazz"}, + {"@id": "subj:blues", "rdfs:label": "Blues"} ]}), ) .await; @@ -376,35 +410,39 @@ async fn a2_multi_value_cross_graph_join_divergent_ns() { &fluree, &handle, "catm:main", - &json!({"@context": {"cat": "https://catalog.example/", "ex": "https://example.org/"}, - "@graph": [{"@id": "cat:thing", - "cat:category": [{"@id": "ex:narrow"}, {"@id": "ex:mid"}]}]}), + &json!({"@context": {"lib": "http://library.example/", "subj": "http://subject.example/"}, + "@graph": [{"@id": "lib:book1", + "lib:subject": [{"@id": "subj:jazz"}, {"@id": "subj:blues"}]}]}), ) .await; let sparql = r" -PREFIX ex: -PREFIX cat: +PREFIX subj: +PREFIX lib: PREFIX rdfs: SELECT DISTINCT ?label FROM NAMED FROM NAMED -WHERE { GRAPH { cat:thing cat:category ?c } +WHERE { GRAPH { lib:book1 lib:subject ?c } GRAPH { ?c rdfs:label ?label } }"; let result = fluree.query_connection_sparql(sparql).await; assert!(result.is_ok(), "should execute, got: {:?}", result.err()); let cat = fluree.ledger("catm:main").await.expect("load"); - let s = result.unwrap().to_jsonld(&cat.snapshot).expect("to_jsonld").to_string(); + let s = result + .unwrap() + .to_jsonld(&cat.snapshot) + .expect("to_jsonld") + .to_string(); assert!( - s.contains("Narrow") && s.contains("Mid"), - "multi-value divergent-ns join dropped a value (want both Narrow+Mid): {s}" + s.contains("Jazz") && s.contains("Blues"), + "multi-value divergent-ns join dropped a value (want both Jazz+Blues): {s}" ); }) .await; } -/// A3 (P4 — precision) — of two categories, only `ex:mid` has `ex:broader ex:top` -/// (`ex:narrow`'s broader is `ex:mid`). The divergent-ns join must return EXACTLY -/// `ex:mid` and must NOT falsely match `ex:narrow`. Guards against a re-encode so -/// loose it over-matches. +/// A3 (precision) — a book with two subjects (jazz, music); only `subj:music` +/// has `subj:broader subj:arts` (`subj:jazz`'s broader is `subj:music`). The +/// divergent-ns join must return EXACTLY `subj:music` and must NOT falsely match +/// `subj:jazz`. Guards against a re-encode so loose it over-matches. #[tokio::test] async fn a3_cross_graph_join_precision_divergent_ns() { let (fluree, local, handle) = fluree_with_indexer(); @@ -414,10 +452,10 @@ async fn a3_cross_graph_join_precision_divergent_ns() { &fluree, &handle, "taxonomy:main", - &json!({"@context": {"ex": "https://example.org/"}, + &json!({"@context": {"subj": "http://subject.example/"}, "@graph": [ - {"@id": "ex:mid", "ex:broader": {"@id": "ex:top"}}, - {"@id": "ex:narrow", "ex:broader": {"@id": "ex:mid"}} + {"@id": "subj:music", "subj:broader": {"@id": "subj:arts"}}, + {"@id": "subj:jazz", "subj:broader": {"@id": "subj:music"}} ]}), ) .await; @@ -425,37 +463,39 @@ async fn a3_cross_graph_join_precision_divergent_ns() { &fluree, &handle, "catp:main", - &json!({"@context": {"cat": "https://catalog.example/", "ex": "https://example.org/"}, - "@graph": [{"@id": "cat:thing", - "cat:category": [{"@id": "ex:narrow"}, {"@id": "ex:mid"}]}]}), + &json!({"@context": {"lib": "http://library.example/", "subj": "http://subject.example/"}, + "@graph": [{"@id": "lib:book1", + "lib:subject": [{"@id": "subj:jazz"}, {"@id": "subj:music"}]}]}), ) .await; let sparql = r" -PREFIX ex: -PREFIX cat: +PREFIX subj: +PREFIX lib: SELECT DISTINCT ?c FROM NAMED FROM NAMED -WHERE { GRAPH { cat:thing cat:category ?c } - GRAPH { ?c ex:broader ex:top } }"; +WHERE { GRAPH { lib:book1 lib:subject ?c } + GRAPH { ?c subj:broader subj:arts } }"; let result = fluree.query_connection_sparql(sparql).await; assert!(result.is_ok(), "should execute, got: {:?}", result.err()); let tax = fluree.ledger("taxonomy:main").await.expect("load"); - let s = result.unwrap().to_jsonld(&tax.snapshot).expect("to_jsonld").to_string(); - assert!( - s.contains("ex:mid"), - "precision join missed the true match ex:mid: {s}" - ); + let s = result + .unwrap() + .to_jsonld(&tax.snapshot) + .expect("to_jsonld") + .to_string(); + assert!(s.contains("subj:music"), "precision join missed the true match subj:music: {s}"); assert!( - !s.contains("ex:narrow"), - "precision join falsely matched ex:narrow (over-match): {s}" + !s.contains("subj:jazz"), + "precision join falsely matched subj:jazz (over-match): {s}" ); }) .await; } -/// A4 (P1 — taxonomy + instances) — `p+` (strict "proper ancestors") scoped path -/// joined to a default-graph instance, under divergence. Exercises bug 2 (path in -/// GRAPH, indexed) AND bug 3 (divergent join key) together. Expected: `cat:thing`. +/// A4 (taxonomy + instances) — `subj:broader+` (strict "proper ancestors") +/// scoped path joined to a default-graph book, under divergence. Exercises the +/// indexed-GRAPH-path materialization AND the divergent join key together. +/// Expected: `lib:book1`. #[tokio::test] async fn a4_strict_path_plus_join_divergent_ns() { let (fluree, local, handle) = fluree_with_indexer(); @@ -465,10 +505,10 @@ async fn a4_strict_path_plus_join_divergent_ns() { &fluree, &handle, "taxonomy:main", - &json!({"@context": {"ex": "https://example.org/"}, + &json!({"@context": {"subj": "http://subject.example/"}, "@graph": [ - {"@id": "ex:narrow", "ex:broader": {"@id": "ex:mid"}}, - {"@id": "ex:mid", "ex:broader": {"@id": "ex:top"}} + {"@id": "subj:jazz", "subj:broader": {"@id": "subj:music"}}, + {"@id": "subj:music", "subj:broader": {"@id": "subj:arts"}} ]}), ) .await; @@ -476,89 +516,93 @@ async fn a4_strict_path_plus_join_divergent_ns() { &fluree, &handle, "cata:main", - &json!({"@context": {"cat": "https://catalog.example/", "ex": "https://example.org/"}, - "@graph": [{"@id": "cat:thing", "cat:category": {"@id": "ex:narrow"}}]}), + &json!({"@context": {"lib": "http://library.example/", "subj": "http://subject.example/"}, + "@graph": [{"@id": "lib:book1", "lib:subject": {"@id": "subj:jazz"}}]}), ) .await; let sparql = r" -PREFIX ex: -PREFIX cat: -SELECT DISTINCT ?thing FROM FROM NAMED -WHERE { ?thing cat:category ?c . GRAPH { ?c ex:broader+ ex:top } }"; +PREFIX subj: +PREFIX lib: +SELECT DISTINCT ?book FROM FROM NAMED +WHERE { ?book lib:subject ?c . GRAPH { ?c subj:broader+ subj:arts } }"; let result = fluree.query_connection_sparql(sparql).await; assert!(result.is_ok(), "should execute, got: {:?}", result.err()); let cat = fluree.ledger("cata:main").await.expect("load"); - let s = result.unwrap().to_jsonld(&cat.snapshot).expect("to_jsonld").to_string(); + let s = result + .unwrap() + .to_jsonld(&cat.snapshot) + .expect("to_jsonld") + .to_string(); assert!( - s.contains("cat:thing"), + s.contains("lib:book1"), "strict-path + divergent-ns join returned no rows: {s}" ); }) .await; } -/// A5 (P3 — chained hop) — a join key crossing TWO ledger boundaries -/// (app → catalog → upper), all with divergent codes on the shared `sh:` -/// namespace. Pins that re-encoding COMPOSES across more than one boundary. -/// Expected: `Upper A`. +/// A5 (chained hop) — a join key crossing TWO ledger boundaries +/// (catalog → thesaurus → labels), all with divergent codes on the shared +/// `subj:` namespace. Pins that re-encoding COMPOSES across more than one +/// boundary. Expected: `Music`. #[tokio::test] async fn a5_three_ledger_chain_divergent_ns() { let (fluree, local, handle) = fluree_with_indexer(); local .run_until(async move { - // Each ledger registers its own local namespace FIRST so the shared - // `sh:` namespace gets a different code in each. + // Each ledger registers its own local prefix FIRST so the shared + // `subj:` namespace gets a different code in each. insert_indexed( &fluree, &handle, - "appl:main", - &json!({"@context": {"l1": "https://l1.example/", "sh": "https://shared.example/"}, - "@graph": [ - {"@id": "l1:_seed", "l1:x": "1"}, - {"@id": "sh:item1", "sh:inCategory": {"@id": "sh:catX"}} - ]}), + "catalog:main", + &json!({"@context": {"lib": "http://library.example/", "subj": "http://subject.example/"}, + "@graph": [{"@id": "lib:book1", "lib:subject": {"@id": "subj:jazz"}}]}), ) .await; insert_indexed( &fluree, &handle, - "catl:main", - &json!({"@context": {"l2": "https://l2.example/", "sh": "https://shared.example/"}, - "@graph": [ - {"@id": "l2:_seed", "l2:x": "1"}, - {"@id": "sh:catX", "sh:mapsTo": {"@id": "sh:upA"}} - ]}), + "thesaurus:main", + &json!({"@context": {"th": "http://thesaurus.example/", "subj": "http://subject.example/"}, + "@graph": [ + {"@id": "th:_seed", "th:_x": "1"}, + {"@id": "subj:jazz", "subj:broader": {"@id": "subj:music"}} + ]}), ) .await; insert_indexed( &fluree, &handle, - "upl:main", - &json!({"@context": {"l3": "https://l3.example/", "sh": "https://shared.example/"}, - "@graph": [ - {"@id": "l3:_seed", "l3:x": "1"}, - {"@id": "sh:upA", "sh:label": "Upper A"} - ]}), + "labels:main", + &json!({"@context": {"lbl": "http://labels.example/", "subj": "http://subject.example/", + "rdfs": "http://www.w3.org/2000/01/rdf-schema#"}, + "@graph": [ + {"@id": "lbl:_seed", "lbl:_x": "1"}, + {"@id": "subj:music", "rdfs:label": "Music"} + ]}), ) .await; let sparql = r" -PREFIX sh: -SELECT DISTINCT ?l FROM NAMED FROM NAMED FROM NAMED -WHERE { GRAPH { sh:item1 sh:inCategory ?c } - GRAPH { ?c sh:mapsTo ?u } - GRAPH { ?u sh:label ?l } }"; +PREFIX subj: +PREFIX lib: +PREFIX rdfs: +SELECT DISTINCT ?l FROM NAMED FROM NAMED FROM NAMED +WHERE { GRAPH { lib:book1 lib:subject ?c } + GRAPH { ?c subj:broader ?d } + GRAPH { ?d rdfs:label ?l } }"; let result = fluree.query_connection_sparql(sparql).await; assert!(result.is_ok(), "should execute, got: {:?}", result.err()); - let up = fluree.ledger("upl:main").await.expect("load"); + let labels = fluree.ledger("labels:main").await.expect("load"); let s = result .unwrap() - .to_jsonld(&up.snapshot) + .to_jsonld(&labels.snapshot) .expect("to_jsonld") .to_string(); assert!( - s.contains("Upper A"), + s.contains("Music"), "three-ledger chained divergent-ns join returned no rows: {s}" ); }) @@ -566,8 +610,9 @@ WHERE { GRAPH { sh:item1 sh:inCategory ?c } } /// A6 (regression) — a SINGLE-ledger GRAPH-scoped path is unaffected by the -/// multi-ledger binary-store gating (Fix 1 fires only when stamping is needed). -/// Green before and after. Expected: `ex:narrow`, `ex:mid`, `ex:top`. +/// multi-ledger materialization gating (which fires only when stamping is +/// needed). Green before and after. Expected: `subj:jazz`, `subj:music`, +/// `subj:arts`. #[tokio::test] async fn a6_single_ledger_graph_path_unaffected() { let (fluree, local, handle) = fluree_with_indexer(); @@ -577,18 +622,18 @@ async fn a6_single_ledger_graph_path_unaffected() { &fluree, &handle, "taxonomy:main", - &json!({"@context": {"ex": "https://example.org/"}, + &json!({"@context": {"subj": "http://subject.example/"}, "@graph": [ - {"@id": "ex:narrow", "ex:broader": {"@id": "ex:mid"}}, - {"@id": "ex:mid", "ex:broader": {"@id": "ex:top"}} + {"@id": "subj:jazz", "subj:broader": {"@id": "subj:music"}}, + {"@id": "subj:music", "subj:broader": {"@id": "subj:arts"}} ]}), ) .await; let sparql = r" -PREFIX ex: +PREFIX subj: SELECT DISTINCT ?anc FROM NAMED -WHERE { GRAPH { ex:narrow ex:broader* ?anc } }"; +WHERE { GRAPH { subj:jazz subj:broader* ?anc } }"; let result = fluree.query_connection_sparql(sparql).await; assert!( result.is_ok(), @@ -602,18 +647,17 @@ WHERE { GRAPH { ex:narrow ex:broader* ?anc } }"; .expect("to_jsonld") .to_string(); assert!( - s.contains("ex:narrow") && s.contains("ex:mid") && s.contains("ex:top"), + s.contains("subj:jazz") && s.contains("subj:music") && s.contains("subj:arts"), "single-ledger GRAPH path did not return the full chain: {s}" ); }) .await; } -/// A7 (characterization) — `FILTER EXISTS` across a GRAPH boundary (semi-join). -/// This path is `SeedOperator`-based, distinct from the nested-loop join, so the -/// bug-2/bug-3 fixes may NOT cover it. Asserts the DESIRED behavior (the instance -/// whose category has `ex:broader ex:mid` is kept); if it fails after the fixes, -/// mark `#[ignore]` and file a semi-join follow-up. Divergent codes. +/// A7 (semi-join) — `FILTER EXISTS` across a GRAPH boundary. This path is +/// `SeedOperator`-based (distinct from the nested-loop join), so it's a separate +/// code path — but the root-cause materialization fix covers it too. Keeps the +/// book whose subject has `subj:broader subj:music`. Expected: `lib:book1`. #[tokio::test] async fn a7_filter_exists_cross_graph_divergent_ns() { let (fluree, local, handle) = fluree_with_indexer(); @@ -623,80 +667,88 @@ async fn a7_filter_exists_cross_graph_divergent_ns() { &fluree, &handle, "taxonomy:main", - &json!({"@context": {"ex": "https://example.org/"}, - "@graph": [{"@id": "ex:narrow", "ex:broader": {"@id": "ex:mid"}}]}), + &json!({"@context": {"subj": "http://subject.example/"}, + "@graph": [{"@id": "subj:jazz", "subj:broader": {"@id": "subj:music"}}]}), ) .await; insert_indexed( &fluree, &handle, "catx:main", - &json!({"@context": {"cat": "https://catalog.example/", "ex": "https://example.org/"}, - "@graph": [{"@id": "cat:thing", "cat:category": {"@id": "ex:narrow"}}]}), + &json!({"@context": {"lib": "http://library.example/", "subj": "http://subject.example/"}, + "@graph": [{"@id": "lib:book1", "lib:subject": {"@id": "subj:jazz"}}]}), ) .await; let sparql = r" -PREFIX ex: -PREFIX cat: -SELECT DISTINCT ?thing FROM NAMED FROM NAMED -WHERE { GRAPH { ?thing cat:category ?c } - FILTER EXISTS { GRAPH { ?c ex:broader ex:mid } } }"; +PREFIX subj: +PREFIX lib: +SELECT DISTINCT ?book FROM NAMED FROM NAMED +WHERE { GRAPH { ?book lib:subject ?c } + FILTER EXISTS { GRAPH { ?c subj:broader subj:music } } }"; let result = fluree.query_connection_sparql(sparql).await; assert!(result.is_ok(), "should execute, got: {:?}", result.err()); let cat = fluree.ledger("catx:main").await.expect("load"); - let s = result.unwrap().to_jsonld(&cat.snapshot).expect("to_jsonld").to_string(); + let s = result + .unwrap() + .to_jsonld(&cat.snapshot) + .expect("to_jsonld") + .to_string(); assert!( - s.contains("cat:thing"), + s.contains("lib:book1"), "FILTER EXISTS across a GRAPH boundary (divergent ns) dropped the row: {s}" ); }) .await; } -/// A8 (P1 — taxonomy crawl) — a BOTH-endpoints-unbound closure (`?s ex:broader+ +/// A8 (taxonomy crawl) — a BOTH-endpoints-unbound closure (`?s subj:broader+ /// ?o`) inside a GRAPH block, where the query's primary ledger differs from the -/// path's graph so the path predicate `ex:broader` has a divergent code. Pins -/// that the closure/adjacency read path (not just the bounded read_step) also -/// re-encodes the traversal predicate. Expected: ancestor pairs incl. ex:top. +/// path's graph so the path predicate `subj:broader` has a divergent code. Pins +/// that the closure/adjacency read (not just the bounded read_step) also +/// re-encodes the traversal predicate. Expected: pairs incl. `subj:arts`. #[tokio::test] async fn a8_unbounded_closure_in_graph_divergent_pred() { let (fluree, local, handle) = fluree_with_indexer(); local .run_until(async move { - // primary (first FROM NAMED): registers cat: first, ex: only via a - // ref → ex: gets a divergent code vs taxonomy. + // primary (first FROM NAMED): registers lib: first, subj: only via a + // ref → subj: gets a divergent code vs taxonomy. insert_indexed( &fluree, &handle, "prim:main", - &json!({"@context": {"cat": "https://catalog.example/", "ex": "https://example.org/"}, - "@graph": [{"@id": "cat:x", "cat:ref": {"@id": "ex:narrow"}}]}), + &json!({"@context": {"lib": "http://library.example/", "subj": "http://subject.example/"}, + "@graph": [{"@id": "lib:book1", "lib:subject": {"@id": "subj:jazz"}}]}), ) .await; insert_indexed( &fluree, &handle, "taxonomy:main", - &json!({"@context": {"ex": "https://example.org/"}, + &json!({"@context": {"subj": "http://subject.example/"}, "@graph": [ - {"@id": "ex:narrow", "ex:broader": {"@id": "ex:mid"}}, - {"@id": "ex:mid", "ex:broader": {"@id": "ex:top"}} + {"@id": "subj:jazz", "subj:broader": {"@id": "subj:music"}}, + {"@id": "subj:music", "subj:broader": {"@id": "subj:arts"}} ]}), ) .await; let sparql = r" -PREFIX ex: +PREFIX subj: SELECT DISTINCT ?s ?o FROM NAMED FROM NAMED -WHERE { GRAPH { ?s ex:broader+ ?o } }"; +WHERE { GRAPH { ?s subj:broader+ ?o } }"; let result = fluree.query_connection_sparql(sparql).await; assert!(result.is_ok(), "should execute, got: {:?}", result.err()); let tax = fluree.ledger("taxonomy:main").await.expect("load"); - let s = result.unwrap().to_jsonld(&tax.snapshot).expect("to_jsonld").to_string(); - // narrow→mid→top: closure must include the deep pair reaching ex:top. + let s = result + .unwrap() + .to_jsonld(&tax.snapshot) + .expect("to_jsonld") + .to_string(); + // jazz→music→arts: closure must include the deep pair reaching subj:arts. assert!( - s.contains("ex:top") && s.contains("ex:narrow"), + s.contains("subj:arts") && s.contains("subj:jazz"), "unbounded closure with a divergent-code predicate found no edges: {s}" ); }) diff --git a/fluree-db-query/src/dataset_operator.rs b/fluree-db-query/src/dataset_operator.rs index 46c924eec1..2f0e26ca1a 100644 --- a/fluree-db-query/src/dataset_operator.rs +++ b/fluree-db-query/src/dataset_operator.rs @@ -371,13 +371,10 @@ impl Operator for DatasetOperator { // disable binary stores for all graphs when provenance // stamping is needed. // A single active graph can still belong to a multi-ledger - // dataset (e.g. the default graph alongside named graphs from - // other ledgers). Its bindings may cross a graph boundary — - // seed a GRAPH block or a cross-graph join — and be stamped, so - // they must materialize to `Binding::Sid` rather than late - // `Binding::EncodedSid` (which `stamp_provenance` cannot decode - // without the store), exactly as when the active graphs - // themselves span ledgers (issue #1405). + // dataset (a default graph alongside named graphs from other + // ledgers); its bindings may cross a boundary and be stamped, so + // force materialization here too — not only when the active + // graphs themselves span ledgers. let multi_ledger = graphs.windows(2).any(|w| w[0].ledger_id != w[1].ledger_id) || ctx .dataset diff --git a/fluree-db-query/src/property_path.rs b/fluree-db-query/src/property_path.rs index 3f29fc5c90..1c6641612c 100644 --- a/fluree-db-query/src/property_path.rs +++ b/fluree-db-query/src/property_path.rs @@ -57,16 +57,12 @@ fn is_reserved_edge_predicate(p: &Sid) -> bool { fluree_db_core::is_rdf_type(p) || fluree_db_core::is_reserved_reifies_predicate(p) } -/// Re-encode a pattern-constant predicate `Sid` into the active graph's -/// namespace table. +/// Re-encode a pattern-constant predicate `Sid` into the active graph's dict. /// -/// Path pattern predicates are encoded against the primary/lowering snapshot at -/// plan time, but a path executes against a per-graph (`GRAPH `) snapshot -/// that may assign the same IRI a different namespace code. Without re-encoding, -/// a divergent-namespace predicate (e.g. `ex:broader`) reads the wrong SID and -/// the traversal silently finds no edges (issue #1405). Decodes against the -/// original snapshot (where the SID was encoded) and re-encodes against the -/// active graph; single-graph queries round-trip to the same SID (no change). +/// Path predicates are encoded against the primary/lowering snapshot at plan +/// time, but a path runs against a per-`GRAPH` snapshot that may code the same +/// IRI differently; without this the traversal reads the wrong SID and finds no +/// edges. Single-graph round-trips to the same SID. #[inline] fn reencode_pred(ctx: &ExecutionContext<'_>, db: &fluree_db_core::LedgerSnapshot, p: &Sid) -> Sid { ctx.original_snapshot @@ -298,8 +294,8 @@ impl PropertyPathOperator { use_post: bool, ) -> Result> { let (db, overlay, to_t) = ctx.require_single_graph()?; - // Re-encode the traversal predicates into the active graph's dict — see - // `reencode_pred` (issue #1405). + // Re-encode traversal predicates into the active graph's dict (see + // `reencode_pred`). let preds: Vec = preds.iter().map(|p| reencode_pred(ctx, db, p)).collect(); let mut out = Vec::new(); let mut seen: HashSet = HashSet::new(); @@ -1035,14 +1031,9 @@ impl PropertyPathOperator { let binary_store = ctx.binary_store.as_ref(); let resolve_sid = |term: &Ref, binding: Option<&Binding>| -> Option { match term { - // A pattern-constant SID is encoded against the primary/lowering - // snapshot at plan time; re-encode it into the active graph's - // namespace table (matching the `Ref::Iri` arm) so a - // divergent-namespace path endpoint — e.g. `?c broader+ ex:top` - // where `ex:top`'s code differs across ledgers — is matched - // against the right code instead of silently finding nothing - // (issue #1405). Falls back to the raw SID when it can't be - // decoded (single-graph round-trips to the same SID). + // Re-encode a pattern-constant endpoint into the active graph + // (like the `Ref::Iri` arm) so a divergent-namespace endpoint + // matches; falls back to the raw SID when undecodable. Ref::Sid(s) => ctx .original_snapshot .decode_sid(s)