From 9a44e3bac8164cd1ac74526ae589faaa321f351b Mon Sep 17 00:00:00 2001 From: bplatz Date: Sat, 4 Jul 2026 11:56:36 -0400 Subject: [PATCH 1/6] feat(transact): stable _:fdb blank-node ids addressable in queries and updates MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fluree skolemizes every blank node into the reserved _:fdb-... label space at insert time, and queries already resolved those labels to the stored Sid (canonical_split routes any _: string to the blank-node namespace). Writes, however, re-skolemized them: round-tripping an _:fdb-... id from a query silently minted a fresh node, and a SPARQL DELETE naming one retracted nothing. Blank-node-rooted structures (OWL restrictions, address objects) could only be edited by retracting and re-asserting the whole subtree. Labels with the reserved fdb- prefix now denote the existing stored node everywhere (RDF 1.1 §3.5 skolemization kept in blank-node syntax): - JSON-LD templates: parse_expanded_id_with_ctx and VALUES @id resolve stable ids to constant Sids - SPARQL SELECT WHERE: stable labels lower to constants; other labels stay non-distinguished variables per spec - SPARQL UPDATE: INSERT/DELETE templates, DELETE DATA/INSERT DATA, and DELETE WHERE (both pattern and template sides) resolve stable ids - Turtle: FlakeSink::term_blank passes stable labels through - Edge annotations: delete-by-@id now accepts stable ids (they are addressable), still rejects client-authored blank labels Ordinary labels (_:b0) keep standard semantics: fresh node per transaction on write, existential variable in SPARQL WHERE. Clients cannot collide with the reserved space because transaction skolemization wraps client labels with a txn id before prefixing fdb-. All checks happen at parse/lowering time (once per term, inside the existing _:-prefixed branches), never in per-solution template instantiation, so transactions that don't use stable ids pay nothing. W3C SPARQL eval: 283/327 before and after (no regression). --- docs/concepts/iri-and-context.md | 10 +- docs/transactions/insert.md | 5 +- .../update-where-delete-insert.md | 59 +++ fluree-db-api/tests/grp_transact.rs | 2 + fluree-db-api/tests/it_stable_blank_nodes.rs | 417 ++++++++++++++++++ fluree-db-core/src/ns_encoding.rs | 36 ++ fluree-db-sparql/src/lower/term.rs | 20 + fluree-db-transact/src/flake_sink.rs | 5 +- fluree-db-transact/src/lower_sparql_update.rs | 77 +++- fluree-db-transact/src/namespace.rs | 48 ++ .../src/parse/edge_annotations.rs | 19 +- fluree-db-transact/src/parse/jsonld.rs | 11 +- 12 files changed, 682 insertions(+), 27 deletions(-) create mode 100644 fluree-db-api/tests/it_stable_blank_nodes.rs diff --git a/docs/concepts/iri-and-context.md b/docs/concepts/iri-and-context.md index b0111ac7c1..d0d40330f7 100644 --- a/docs/concepts/iri-and-context.md +++ b/docs/concepts/iri-and-context.md @@ -256,9 +256,13 @@ For programmatic use from Rust, transactions can also set `TxnOpts.strict_compac ``` Blank nodes are: -- Local to a single transaction -- Cannot be referenced across transactions -- Useful for temporary or anonymous data +- Skolemized at insert time into the reserved `_:fdb-...` label space, which + queries return as the node's `@id` +- Addressable afterwards via that stable `_:fdb-...` id in queries and + transactions (see + [Editing blank-node structures](../transactions/update-where-delete-insert.md#editing-blank-node-structures-stable-_fdb--ids)) +- Client-authored labels (`_:b0`) stay local to a single transaction — reusing + the same label later mints a new node ## Best Practices diff --git a/docs/transactions/insert.md b/docs/transactions/insert.md index 59d0ccea4d..240e7e20f8 100644 --- a/docs/transactions/insert.md +++ b/docs/transactions/insert.md @@ -280,7 +280,10 @@ Create entities without explicit IRIs: } ``` -Fluree generates a unique IRI for the blank node address. +Fluree generates a unique IRI for the blank node address, in the reserved +`_:fdb-...` label space. Queries return that id, and it can be used later to +address the node directly — see +[Editing blank-node structures](update-where-delete-insert.md#editing-blank-node-structures-stable-_fdb--ids). ## Adding to Existing Entities diff --git a/docs/transactions/update-where-delete-insert.md b/docs/transactions/update-where-delete-insert.md index 75dba38ccd..da6a02adf6 100644 --- a/docs/transactions/update-where-delete-insert.md +++ b/docs/transactions/update-where-delete-insert.md @@ -556,6 +556,65 @@ Calculate new values based on old: } ``` +## Editing Blank-Node Structures (Stable `_:fdb-` Ids) + +Fluree skolemizes every blank node at insert time into the reserved +`_:fdb-...` label space, and queries return those labels as the node's `@id`. +These ids are **stable**: referencing an `_:fdb-...` id in a later query or +transaction denotes the existing stored node rather than minting a fresh one +(the blank-node-syntax equivalent of RDF 1.1 §3.5 skolem IRIs). This makes +blank-node-rooted structures — OWL restrictions, address objects, RDF lists — +editable in place, without retracting and re-asserting the whole subtree. + +Workflow: query for the node's id, then use it as an ordinary `@id`: + +```json +{ + "select": "?r", + "where": { "@id": "ex:ClassA", "ex:restriction": "?r" } +} +``` + +returns e.g. `"_:fdb-1751612345678901234-0-b0"`, which can then be edited +directly: + +```json +{ + "where": { "@id": "_:fdb-1751612345678901234-0-b0", "owl:someValuesFrom": "?old" }, + "delete": { "@id": "_:fdb-1751612345678901234-0-b0", "owl:someValuesFrom": "?old" }, + "insert": { "@id": "_:fdb-1751612345678901234-0-b0", "owl:someValuesFrom": { "@id": "ex:Gadget" } } +} +``` + +The parent's reference to the node is untouched and the node keeps its +identity across the edit. + +The same ids work in SPARQL, in all of SELECT patterns, `DELETE`/`INSERT` +templates, `DELETE DATA`, `INSERT DATA`, and `DELETE WHERE`: + +```sparql +DELETE { _:fdb-1751612345678901234-0-b0 owl:someValuesFrom ?old } +INSERT { _:fdb-1751612345678901234-0-b0 owl:someValuesFrom ex:Gadget } +WHERE { _:fdb-1751612345678901234-0-b0 owl:someValuesFrom ?old } +``` + +Notes: + +- Only labels beginning with the reserved `_:fdb-` prefix behave this way. + Ordinary client-authored labels (`_:b0`) keep standard RDF semantics: a + fresh node per transaction on the write side, and an existential variable + in SPARQL WHERE patterns. Clients cannot accidentally collide with the + reserved space — transaction skolemization wraps client labels with a + transaction id before prefixing `fdb-`. +- Strictly per spec, SPARQL forbids blank nodes in `DELETE` templates and + treats WHERE-pattern labels as variables; accepting `_:fdb-` ids as + constants is a deliberate Fluree extension (the same one Virtuoso's + `nodeID://` refs and Jena's `<_:label>` syntax provide). +- Some stable ids minted by bulk import embed `:` characters (e.g. + `_:fdb-lubm:main-1-genid10`). These are addressable from JSON-LD, but the + SPARQL grammar does not allow `:` inside blank-node labels, so such ids + cannot be written in SPARQL syntax. + ## Error Handling ### No Match diff --git a/fluree-db-api/tests/grp_transact.rs b/fluree-db-api/tests/grp_transact.rs index ba4888e0b9..2a451e44a1 100644 --- a/fluree-db-api/tests/grp_transact.rs +++ b/fluree-db-api/tests/grp_transact.rs @@ -7,6 +7,8 @@ mod it_concurrent_update_reconcile; mod it_enforce_unique_upsert_indexed; #[path = "it_raw_txn_parallel_upload.rs"] mod it_raw_txn_parallel_upload; +#[path = "it_stable_blank_nodes.rs"] +mod it_stable_blank_nodes; #[path = "it_transact.rs"] mod it_transact; #[path = "it_transact_conditional.rs"] diff --git a/fluree-db-api/tests/it_stable_blank_nodes.rs b/fluree-db-api/tests/it_stable_blank_nodes.rs new file mode 100644 index 0000000000..bb6224c754 --- /dev/null +++ b/fluree-db-api/tests/it_stable_blank_nodes.rs @@ -0,0 +1,417 @@ +//! Stable Fluree blank-node identifier tests. +//! +//! Fluree skolemizes every blank node into the reserved `_:fdb-...` label +//! space at insert time. These ids are returned by queries and — as pinned +//! here — are *stable*: when a later query or transaction references an +//! `_:fdb-...` label, it denotes the existing stored node instead of minting +//! a fresh one (RDF 1.1 §3.5 skolemization, kept in blank-node syntax). This +//! makes blank-node-rooted structures (e.g. OWL restrictions) editable in +//! place, without retracting and re-asserting the whole subtree. +//! +//! Ordinary client-authored labels (`_:b0`) keep standard semantics: fresh +//! node per transaction on the write side, existential variable in SPARQL +//! WHERE patterns. + +use crate::support; +use fluree_db_api::{FlureeBuilder, LedgerState, Novelty}; +use fluree_db_core::LedgerSnapshot; +use serde_json::{json, Value as JsonValue}; + +fn ctx() -> JsonValue { + json!({ + "ex": "http://example.org/", + "owl": "http://www.w3.org/2002/07/owl#" + }) +} + +/// Seed a class with a single OWL-restriction-like structure rooted at an +/// anonymous blank node and return the fluree handle + ledger. +async fn seed_restriction(ledger_id: &str) -> (fluree_db_api::Fluree, LedgerState) { + let fluree = FlureeBuilder::memory().build_memory(); + let db0 = LedgerSnapshot::genesis(ledger_id); + let ledger0 = LedgerState::new(db0, Novelty::new(0)); + + let seeded = fluree + .update( + ledger0, + &json!({ + "@context": ctx(), + "insert": { + "@id": "ex:ClassA", + "ex:restriction": { + "owl:onProperty": {"@id": "ex:hasPart"}, + "owl:someValuesFrom": {"@id": "ex:Widget"} + } + } + }), + ) + .await + .expect("seed insert"); + (fluree, seeded.ledger) +} + +async fn select_strings( + fluree: &fluree_db_api::Fluree, + ledger: &LedgerState, + query: &JsonValue, +) -> Vec { + let result = support::query_jsonld(fluree, ledger, query) + .await + .expect("query"); + let v = result.to_jsonld(&ledger.snapshot).expect("to_jsonld"); + let mut out: Vec = v + .as_array() + .expect("array result") + .iter() + .map(|x| x.as_str().expect("string binding").to_string()) + .collect(); + out.sort(); + out +} + +/// The `_:fdb-...` id of ex:ClassA's restriction node. +async fn restriction_id(fluree: &fluree_db_api::Fluree, ledger: &LedgerState) -> String { + let ids = select_strings( + fluree, + ledger, + &json!({ + "@context": ctx(), + "select": "?r", + "where": {"@id": "ex:ClassA", "ex:restriction": "?r"} + }), + ) + .await; + assert_eq!(ids.len(), 1, "exactly one restriction node: {ids:?}"); + let id = ids.into_iter().next().unwrap(); + assert!( + id.starts_with("_:fdb-"), + "restriction id should be a stable Fluree blank-node id, got {id}" + ); + id +} + +async fn run_sparql_update( + fluree: &fluree_db_api::Fluree, + ledger: LedgerState, + sparql: &str, +) -> fluree_db_api::TransactResult { + let parsed = fluree_db_sparql::parse_sparql(sparql); + assert!( + !parsed.has_errors(), + "SPARQL parse errors: {:?}", + parsed.diagnostics + ); + let ast = parsed.ast.expect("SPARQL AST"); + let mut ns = fluree_db_transact::NamespaceRegistry::from_db(&ledger.snapshot); + let txn = fluree_db_transact::lower_sparql_update_ast( + &ast, + &mut ns, + fluree_db_transact::TxnOpts::default(), + ) + .expect("lower SPARQL UPDATE"); + fluree + .stage_owned(ledger) + .txn(txn) + .execute() + .await + .expect("stage SPARQL UPDATE") +} + +// ============================================================================ +// JSON-LD transactions +// ============================================================================ + +/// Inserting with a stable id must extend the existing node, not mint a new +/// one. +#[tokio::test] +async fn jsonld_insert_extends_existing_blank_node() { + let (fluree, ledger) = seed_restriction("it/stable-bnode:jsonld-insert").await; + let bnode = restriction_id(&fluree, &ledger).await; + + let ledger = fluree + .update( + ledger, + &json!({ + "@context": ctx(), + "insert": {"@id": bnode, "ex:note": "edited"} + }), + ) + .await + .expect("insert on stable id") + .ledger; + + // The note is reachable through the parent's ref — proof the triple + // landed on the same node. + let notes = select_strings( + &fluree, + &ledger, + &json!({ + "@context": ctx(), + "select": "?note", + "where": {"@id": "ex:ClassA", "ex:restriction": {"ex:note": "?note"}} + }), + ) + .await; + assert_eq!(notes, vec!["edited"]); + + // Still exactly one restriction-shaped node in the ledger. + let restrictions = select_strings( + &fluree, + &ledger, + &json!({ + "@context": ctx(), + "select": "?r", + "where": {"@id": "?r", "owl:onProperty": {"@id": "ex:hasPart"}} + }), + ) + .await; + assert_eq!(restrictions.len(), 1); +} + +/// where/delete/insert against a stable id edits the node in place: the +/// parent's reference is untouched and the node id survives the edit. +#[tokio::test] +async fn jsonld_delete_insert_edits_blank_node_in_place() { + let (fluree, ledger) = seed_restriction("it/stable-bnode:jsonld-edit").await; + let bnode = restriction_id(&fluree, &ledger).await; + + let ledger = fluree + .update( + ledger, + &json!({ + "@context": ctx(), + "where": {"@id": bnode, "owl:someValuesFrom": "?old"}, + "delete": {"@id": bnode, "owl:someValuesFrom": "?old"}, + "insert": {"@id": bnode, "owl:someValuesFrom": {"@id": "ex:Gadget"}} + }), + ) + .await + .expect("edit restriction in place") + .ledger; + + let values = select_strings( + &fluree, + &ledger, + &json!({ + "@context": ctx(), + "select": "?v", + "where": {"@id": "ex:ClassA", "ex:restriction": {"owl:someValuesFrom": "?v"}} + }), + ) + .await; + assert_eq!(values, vec!["ex:Gadget"]); + + // Node identity is stable across the edit. + assert_eq!(restriction_id(&fluree, &ledger).await, bnode); +} + +/// Ordinary blank-node labels keep fresh-mint semantics: the same label in +/// two transactions produces two distinct nodes. +#[tokio::test] +async fn jsonld_plain_blank_label_still_mints_fresh() { + let fluree = FlureeBuilder::memory().build_memory(); + let db0 = LedgerSnapshot::genesis("it/stable-bnode:fresh-mint"); + let mut ledger = LedgerState::new(db0, Novelty::new(0)); + + for tag in ["one", "two"] { + ledger = fluree + .update( + ledger, + &json!({ + "@context": ctx(), + "insert": {"@id": "_:b0", "ex:tag": tag} + }), + ) + .await + .expect("insert") + .ledger; + } + + let subjects = select_strings( + &fluree, + &ledger, + &json!({ + "@context": ctx(), + "select": "?s", + "where": {"@id": "?s", "ex:tag": "?t"} + }), + ) + .await; + assert_eq!( + subjects.len(), + 2, + "same client label across transactions must mint distinct nodes: {subjects:?}" + ); +} + +// ============================================================================ +// SPARQL +// ============================================================================ + +/// A stable id in a SPARQL WHERE pattern is a constant pinned to the stored +/// node, while an ordinary label stays an existential variable. +#[tokio::test] +async fn sparql_select_stable_blank_node_is_constant() { + let (fluree, ledger) = seed_restriction("it/stable-bnode:sparql-select").await; + + // Add a second restriction so a wildcard match would return two rows. + let ledger = fluree + .update( + ledger, + &json!({ + "@context": ctx(), + "insert": { + "@id": "ex:ClassB", + "ex:restriction": { + "owl:onProperty": {"@id": "ex:hasPart"}, + "owl:someValuesFrom": {"@id": "ex:Sprocket"} + } + } + }), + ) + .await + .expect("insert ClassB") + .ledger; + let bnode = restriction_id(&fluree, &ledger).await; + + // Constant: only the addressed node's value comes back. + let sparql = format!( + "PREFIX owl: \n\ + SELECT ?v WHERE {{ {bnode} owl:someValuesFrom ?v }}" + ); + let result = support::query_sparql(&fluree, &ledger, &sparql) + .await + .expect("sparql select"); + let v = result.to_jsonld(&ledger.snapshot).expect("to_jsonld"); + let rows = v.as_array().expect("array"); + assert_eq!(rows.len(), 1, "stable id must pin one node: {rows:?}"); + + // Ordinary label: existential variable, matches both restrictions. + let sparql = "PREFIX owl: \n\ + SELECT ?v WHERE { _:b0 owl:someValuesFrom ?v }"; + let result = support::query_sparql(&fluree, &ledger, sparql) + .await + .expect("sparql select wildcard"); + let v = result.to_jsonld(&ledger.snapshot).expect("to_jsonld"); + assert_eq!( + v.as_array().expect("array").len(), + 2, + "plain blank label must stay a variable" + ); +} + +/// SPARQL DELETE/INSERT WHERE addressing a stable id edits the node in place. +#[tokio::test] +async fn sparql_delete_insert_edits_blank_node() { + let (fluree, ledger) = seed_restriction("it/stable-bnode:sparql-edit").await; + let bnode = restriction_id(&fluree, &ledger).await; + + let sparql = format!( + "PREFIX ex: \n\ + PREFIX owl: \n\ + DELETE {{ {bnode} owl:someValuesFrom ?old }}\n\ + INSERT {{ {bnode} owl:someValuesFrom ex:Gadget }}\n\ + WHERE {{ {bnode} owl:someValuesFrom ?old }}" + ); + let ledger = run_sparql_update(&fluree, ledger, &sparql).await.ledger; + + let values = select_strings( + &fluree, + &ledger, + &json!({ + "@context": ctx(), + "select": "?v", + "where": {"@id": "ex:ClassA", "ex:restriction": {"owl:someValuesFrom": "?v"}} + }), + ) + .await; + assert_eq!(values, vec!["ex:Gadget"]); + assert_eq!(restriction_id(&fluree, &ledger).await, bnode); +} + +/// SPARQL DELETE DATA / INSERT DATA with a stable id retract and assert +/// exact triples on the stored node. +#[tokio::test] +async fn sparql_delete_data_and_insert_data_stable_blank_node() { + let (fluree, ledger) = seed_restriction("it/stable-bnode:sparql-data").await; + let bnode = restriction_id(&fluree, &ledger).await; + + let sparql = format!( + "PREFIX ex: \n\ + PREFIX owl: \n\ + DELETE DATA {{ {bnode} owl:someValuesFrom ex:Widget }}" + ); + let ledger = run_sparql_update(&fluree, ledger, &sparql).await.ledger; + + let values = select_strings( + &fluree, + &ledger, + &json!({ + "@context": ctx(), + "select": "?v", + "where": {"@id": "ex:ClassA", "ex:restriction": {"owl:someValuesFrom": "?v"}} + }), + ) + .await; + assert!(values.is_empty(), "DELETE DATA must retract: {values:?}"); + + let sparql = format!( + "PREFIX ex: \n\ + PREFIX owl: \n\ + INSERT DATA {{ {bnode} owl:someValuesFrom ex:Gadget }}" + ); + let ledger = run_sparql_update(&fluree, ledger, &sparql).await.ledger; + + let values = select_strings( + &fluree, + &ledger, + &json!({ + "@context": ctx(), + "select": "?v", + "where": {"@id": "ex:ClassA", "ex:restriction": {"owl:someValuesFrom": "?v"}} + }), + ) + .await; + assert_eq!( + values, + vec!["ex:Gadget"], + "INSERT DATA must extend the existing node" + ); +} + +/// DELETE WHERE with a stable-id subject retracts that node's matching +/// triples only. +#[tokio::test] +async fn sparql_delete_where_stable_blank_node() { + let (fluree, ledger) = seed_restriction("it/stable-bnode:sparql-delete-where").await; + let bnode = restriction_id(&fluree, &ledger).await; + + let sparql = format!("DELETE WHERE {{ {bnode} ?p ?o }}"); + let ledger = run_sparql_update(&fluree, ledger, &sparql).await.ledger; + + let props = select_strings( + &fluree, + &ledger, + &json!({ + "@context": ctx(), + "select": "?p", + "where": {"@id": bnode, "?p": "?o"} + }), + ) + .await; + assert!(props.is_empty(), "node must be emptied: {props:?}"); + + // The parent's ref to the (now-empty) node is a separate triple and + // survives — retract it explicitly if the whole subtree should go. + let refs = select_strings( + &fluree, + &ledger, + &json!({ + "@context": ctx(), + "select": "?r", + "where": {"@id": "ex:ClassA", "ex:restriction": "?r"} + }), + ) + .await; + assert_eq!(refs, vec![bnode]); +} diff --git a/fluree-db-core/src/ns_encoding.rs b/fluree-db-core/src/ns_encoding.rs index 618d9c8402..eb39722ff7 100644 --- a/fluree-db-core/src/ns_encoding.rs +++ b/fluree-db-core/src/ns_encoding.rs @@ -121,6 +121,27 @@ pub fn builtin_prefix_trie() -> &'static PrefixTrie { /// split at this boundary unconditionally, before any other splitting logic. pub const BLANK_NODE_PREFIX: &str = "_:"; +/// Label prefix reserved for Fluree-minted stable blank-node identifiers. +/// +/// Every skolemized blank node the system mints has a local name beginning +/// with `fdb-` (e.g. `fdb-`, `fdb--