From 6fc5dab1d8d94da8c680c48e25b37822d785eb59 Mon Sep 17 00:00:00 2001 From: bplatz Date: Wed, 1 Jul 2026 22:56:52 -0400 Subject: [PATCH 01/23] fix(policy): enforce f:policySource and config policy defaults on writes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit f:policySource was honored on the read/query path but ignored on every write/modify path, which hardcoded the default graph (g_id = 0): a policy relocated into a named graph — or sourced cross-ledger from a model ledger — was enforced on queries but silently not enforced on transactions. Config-declared policy defaults (f:policyClass, f:defaultAllow) also never merged into write-time governance, so requests without policy inputs ran as root even on configured ledgers. Fixes #1416. Changes: - New `build_transact_policy_context` (policy_view.rs): the write-side counterpart of `wrap_policy`. Resolves the ledger config at to_t, merges config policy defaults via merge_policy_opts, and dispatches f:policySource — same-ledger selectors through resolve_policy_source_g_ids (fail-closed on unknown graphs), cross-ledger f:ledger references through the ArtifactKind::PolicyRules resolver with restrictions interned into the data ledger's term space. Returns None (root) only when neither the request nor the config supplies any policy input; a cross-ledger source always builds a context, mirroring the read path. - Extracted `resolve_cross_ledger_policy_restrictions` from wrap_policy's inline cross-ledger block and shared it between the read and write paths, so the Phase 1a identity-mode rejection and the f:policyClass intersection filter (default {f:AccessPolicy}) cannot drift between them. - Rewired the write-path call sites onto the new builder: - consensus transact (LocalCommitter + Raft commit worker) via crate::local::build_policy_context, which now takes &Fluree and no longer short-circuits on empty request governance - credential_transact (verified-identity transactions) - push replication (build_policy_ctx_for_push; stage_commit_flakes now takes Option<&PolicyContext>) - commit-detail fetch (graph_commit_builder, keeping its per-request identity/policy_class opt-in gate) - CLI local insert/upsert/update (flags-only gate preserved) - block_fetch has no Fluree handle, so it resolves same-ledger f:policySource via resolve_policy_graphs_from_config and fails closed on cross-ledger configs instead of silently reading the default graph - Tests (it_policy_write_path.rs): config defaults enforced on writes with no request inputs; same-ledger named-graph f:policySource modify-deny; cross-ledger modify-deny resolved live against the model ledger (violating write rejected, untargeted write allowed); identity + cross-ledger fails closed; no-config/no-inputs still runs root. All verified to fail against the previous hardcoded-[0] behavior. - Docs: policy-in-transactions.md (new "Config-driven write enforcement" section), cross-ledger-policy.md (transactions engage automatically; queries keep the header gate), setting-groups.md, programmatic-policy.md (build_transact_policy_context as the recommended transaction entry point), and the cross-ledger design doc's scope section. --- docs/design/cross-ledger-model-enforcement.md | 9 +- docs/ledger-config/setting-groups.md | 2 + docs/security/cross-ledger-policy.md | 37 +- docs/security/policy-in-transactions.md | 12 + docs/security/programmatic-policy.md | 24 +- fluree-db-api/src/block_fetch.rs | 20 +- fluree-db-api/src/commit_transfer.rs | 22 +- fluree-db-api/src/graph_commit_builder.rs | 15 +- fluree-db-api/src/lib.rs | 4 +- fluree-db-api/src/policy_view.rs | 160 +++++- fluree-db-api/src/tx.rs | 21 +- fluree-db-api/src/view/fluree_ext.rs | 67 +-- fluree-db-api/tests/grp_policy.rs | 2 + fluree-db-api/tests/it_policy_write_path.rs | 474 ++++++++++++++++++ fluree-db-cli/src/commands/insert.rs | 8 +- fluree-db-consensus/src/local.rs | 36 +- fluree-db-consensus/src/raft/commit_worker.rs | 2 +- 17 files changed, 799 insertions(+), 116 deletions(-) create mode 100644 fluree-db-api/tests/it_policy_write_path.rs diff --git a/docs/design/cross-ledger-model-enforcement.md b/docs/design/cross-ledger-model-enforcement.md index 4555dc80ca..053197e180 100644 --- a/docs/design/cross-ledger-model-enforcement.md +++ b/docs/design/cross-ledger-model-enforcement.md @@ -454,7 +454,14 @@ mode can be added without rewriting the failure taxonomy. - `f:policySource` cross-ledger via `resolve_graph_ref`. The policy IR carries definitional/contextual term references separately so the model ledger contributes rules while the - data ledger contributes identity binding. + data ledger contributes identity binding. Enforced on both + the read path (`wrap_policy`) and the write path + (`build_transact_policy_context`, used by the consensus + transact pipeline, push replication, credentialed + transactions, and the CLI); the two share one restriction + resolver (`resolve_cross_ledger_policy_restrictions`) so the + identity-mode rejection and `f:policyClass` filter cannot + drift. - `f:constraintsSource` cross-ledger via the same shared resolver. M's `f:enforceUnique true` annotations on properties apply to D's transactions; a tx that would diff --git a/docs/ledger-config/setting-groups.md b/docs/ledger-config/setting-groups.md index bf7e2d0803..6e6e6207c7 100644 --- a/docs/ledger-config/setting-groups.md +++ b/docs/ledger-config/setting-groups.md @@ -38,6 +38,8 @@ Controls default policy enforcement behavior. When `f:policySource` is set, the policy loader scans the specified graph for policy rules instead of the default graph. This keeps policy rules separate from end-user data. If `f:policySource` is not set, policies are loaded from the default graph (backward compatible). +`f:policySource` and the policy defaults are honored on **both reads and writes**: queries load view rules from the configured graph, and transactions load `f:modify` rules from the same graph before staging. Config-declared `f:policyClass` / `f:defaultAllow` defaults apply to transactions even when the request itself carries no policy inputs — an operator who relocates policy into a named graph (or a model ledger) gets the same enforcement on writes as on reads. + **Cross-ledger references are supported on `f:policySource`.** The graph source can name another ledger via `f:ledger`, so a single model ledger can hold policy rules that govern many data ledgers. See [Cross-ledger policy](../security/cross-ledger-policy.md) for the configuration pattern and the contract on `f:policyClass` filtering, baseline `f:AccessPolicy` semantics, and the failure modes. **Not yet honored on `f:policySource`** (parsed by the config layer but rejected at request time with a clear error): `f:atT` temporal pinning, `f:trustPolicy` verification, `f:rollbackGuard` freshness constraints. Cross-ledger references are also supported on `f:constraintsSource`, `f:schemaSource` (single graph only — transitive `owl:imports` recursion across ledgers is not yet supported), `f:shapesSource`, and `f:rulesSource`. See [Cross-ledger policy](../security/cross-ledger-policy.md) for the end-to-end configuration patterns and failure modes shared by all five subsystems. diff --git a/docs/security/cross-ledger-policy.md b/docs/security/cross-ledger-policy.md index 79efc2a917..6dc592b158 100644 --- a/docs/security/cross-ledger-policy.md +++ b/docs/security/cross-ledger-policy.md @@ -11,7 +11,8 @@ All five `f:GraphRef`-shaped governance predicates support cross-ledger references today: - **Cross-ledger policy** (`f:policySource` with `f:ledger`) — - M's policy rule set is applied to queries against D. + M's policy rule set is applied to queries (`f:view`) and + transactions (`f:modify`) against D. - **Cross-ledger constraints** (`f:constraintsSource` with `f:ledger`) — M's `f:enforceUnique` annotations are applied to transactions against D. @@ -154,16 +155,26 @@ rules are opt-in — operators name the class to enroll them. ## Engaging policy enforcement -There's a subtlety in how the server's JSON-LD query route -chooses whether to invoke policy enforcement at all. Requests -without an `fluree-policy-class`, `fluree-identity`, or inline -`opts.policy` go through a no-policy fast path that bypasses the -cross-ledger dispatch. A configured `f:policySource` in `#config` -is **not** enough on its own to force enforcement at the HTTP -layer today. - -To engage cross-ledger policy via HTTP, send a request with at -least one of: +**Transactions engage cross-ledger policy automatically.** The +transact path (JSON-LD / SPARQL UPDATE / Turtle / TriG through +the server, push replication, credentialed transactions, and the +CLI's local mode with policy flags) resolves D's config before +staging: a cross-ledger `f:policySource` always builds a policy +context, and M's `f:modify` rules are enforced on the staged +flakes even when the request carries no policy inputs at all. +Config `f:defaultAllow` / `f:policyClass` defaults merge in the +same way they do for reads. + +For **queries**, there's a subtlety in how the server's JSON-LD +query route chooses whether to invoke policy enforcement at all. +Requests without an `fluree-policy-class`, `fluree-identity`, or +inline `opts.policy` go through a no-policy fast path that +bypasses the cross-ledger dispatch. A configured `f:policySource` +in `#config` is **not** enough on its own to force enforcement at +the HTTP query layer today. + +To engage cross-ledger policy on an HTTP query, send a request +with at least one of: - `fluree-policy-class: ` — the policy class header (the cleanest way to declare "use the configured policy"). Matching @@ -177,7 +188,9 @@ least one of: When using the in-process Rust API, calling `fluree.db_with_policy(ledger_id, &opts)` always engages the policy path, even with empty opts. Programmatic users don't see -this gating. +this gating. The write-side equivalent is +`build_transact_policy_context` — see +[Programmatic policy API (Rust)](programmatic-policy.md). ## Cross-ledger uniqueness constraints diff --git a/docs/security/policy-in-transactions.md b/docs/security/policy-in-transactions.md index 033abfd5a6..5f3460a673 100644 --- a/docs/security/policy-in-transactions.md +++ b/docs/security/policy-in-transactions.md @@ -83,6 +83,18 @@ Crucially, the policy is checked against the **flakes**, not the operation type. Enforcement is also independent of the **wire format**: the check runs on the staged flakes, so JSON-LD, SPARQL UPDATE, and Turtle / TriG / N-Triples writes are all governed by the same `f:modify` policy. Sending data as Turtle is not a way to bypass write policy. +## Config-driven write enforcement + +The ledger's `#config` graph governs writes the same way it governs reads: + +- **Policy defaults apply without request inputs.** When `f:policyDefaults` declares `f:policyClass` (and optionally `f:defaultAllow`), transactions build a policy context from those defaults even when the request carries no `fluree-identity` / `fluree-policy-class` headers or inline `opts.policy`. A ledger configured with a modify-deny rule rejects violating writes from anonymous requests, matching read-side behavior. +- **`f:policySource` redirects the rule lookup.** Policy rules relocated into a named graph (or a cross-ledger model ledger via `f:ledger`) are loaded from the configured source at transaction time — never silently from the default graph. Unknown graph selectors fail closed. +- **Cross-ledger sources always engage.** A cross-ledger `f:policySource` builds a policy context unconditionally (mirroring the read path): the model ledger's `f:modify` rules apply to every transaction against the data ledger. See [Cross-ledger policy](cross-ledger-policy.md). +- **Identity-mode + cross-ledger fails closed.** A request that sets an identity against a ledger with a cross-ledger `f:policySource` is rejected with a config error, the same Phase 1a contract the read path enforces. Use `f:policyClass` with cross-ledger configs. +- **Override control gates request-time overrides.** A request that supplies its own policy inputs replaces the config defaults only when the config's `f:overrideControl` permits it — see [Override control](../ledger-config/override-control.md). + +This applies uniformly across the server transact routes (local and Raft consensus), push replication, credentialed transactions, and the CLI's local mode with policy flags. + ## Targeting patterns ### Whitelist a property to a role diff --git a/docs/security/programmatic-policy.md b/docs/security/programmatic-policy.md index cad38aed41..32285d59c1 100644 --- a/docs/security/programmatic-policy.md +++ b/docs/security/programmatic-policy.md @@ -274,7 +274,29 @@ When multiple policies match a flake, they are combined using **Deny Overrides** ## Transactions with Policy -Policies can also be applied to transactions using the builder API: +Policies can also be applied to transactions using the builder API. The +recommended entry point is `build_transact_policy_context`, which honors +the ledger's `#config` graph the same way the server transact path does: +it merges config policy defaults (`f:policyClass`, `f:defaultAllow`) into +the supplied options and resolves `f:policySource` — same-ledger named +graphs and cross-ledger model references — before building the context. +It returns `None` when neither the request nor the config supplies any +policy input (run as root): + +```rust +let policy_ctx = fluree_db_api::build_transact_policy_context( + &fluree, + &ledger.snapshot, + ledger.novelty.as_ref(), + Some(ledger.novelty.as_ref()), + ledger.t(), + &qc_opts, +).await?; // -> Option +``` + +The lower-level `build_policy_context_from_opts` remains available when +you want to control the policy graphs yourself (it does no config +resolution and no defaults merge): ```rust use fluree_db_api::policy_builder; diff --git a/fluree-db-api/src/block_fetch.rs b/fluree-db-api/src/block_fetch.rs index 154004cfd6..c158b83c9f 100644 --- a/fluree-db-api/src/block_fetch.rs +++ b/fluree-db-api/src/block_fetch.rs @@ -381,11 +381,27 @@ pub async fn apply_policy_filter( let overlay: &dyn OverlayProvider = &NoOverlay; - let policy_ctx = - policy_builder::build_policy_context_from_opts(snapshot, overlay, None, to_t, &opts, &[0]) + // Config-aware: a configured `f:policySource` redirects the policy-rule + // lookup to the declared graph. No `Fluree` handle is available on this + // path, so a cross-ledger `f:policySource` fails closed here (the + // resolver rejects `f:ledger`) rather than silently falling back to the + // default graph. + let policy_graphs = + crate::policy_view::resolve_policy_graphs_from_config(snapshot, overlay, to_t) .await .map_err(|e| BlockFetchError::PolicyBuild(e.to_string()))?; + let policy_ctx = policy_builder::build_policy_context_from_opts( + snapshot, + overlay, + None, + to_t, + &opts, + &policy_graphs, + ) + .await + .map_err(|e| BlockFetchError::PolicyBuild(e.to_string()))?; + if policy_ctx.wrapper().is_root() { return Ok((flakes, false)); } diff --git a/fluree-db-api/src/commit_transfer.rs b/fluree-db-api/src/commit_transfer.rs index 4f89070337..888a553faf 100644 --- a/fluree-db-api/src/commit_transfer.rs +++ b/fluree-db-api/src/commit_transfer.rs @@ -21,7 +21,6 @@ use crate::dataset::GovernanceOptions; use crate::error::{ApiError, Result}; use crate::ledger_manager::LedgerWriteGuard; -use crate::policy_builder::build_policy_context_from_opts; use crate::tx::{IndexingMode, IndexingStatus}; use crate::{Fluree, IndexConfig, LedgerHandle}; use base64::Engine as _; @@ -421,7 +420,8 @@ impl Fluree { // 4.2 Policy enforcement: build policy context from opts against current state. let policy_ctx = - build_policy_ctx_for_push(&base_state, &evolving_novelty, current_t, opts).await?; + build_policy_ctx_for_push(self, &base_state, &evolving_novelty, current_t, opts) + .await?; // 4.3 Stage flakes (policy/backpressure). No WHERE/cancellation; flakes are prebuilt. let evolving_state = base_state.clone_with_novelty(Arc::new(evolving_novelty.clone())); @@ -429,7 +429,7 @@ impl Fluree { evolving_state, &c.commit.flakes, index_config, - &policy_ctx, + policy_ctx.as_ref(), &routing.graph_sids, ) .await @@ -885,19 +885,23 @@ fn validate_required_blobs( } async fn build_policy_ctx_for_push( + fluree: &Fluree, base: &LedgerState, evolving: &Novelty, current_t: i64, opts: &GovernanceOptions, -) -> Result { - // Build policy context from opts against current state (db + evolving novelty). - build_policy_context_from_opts( +) -> Result> { + // Build policy context from opts merged with the ledger's #config policy + // defaults, against current state (db + evolving novelty). Config-aware: + // honors f:policySource (same-ledger named graphs and cross-ledger model + // references) instead of assuming policy rules live in the default graph. + crate::policy_view::build_transact_policy_context( + fluree, &base.snapshot, evolving, Some(evolving), current_t, opts, - &[0], ) .await } @@ -906,13 +910,13 @@ async fn stage_commit_flakes( ledger: LedgerState, flakes: &[Flake], index_config: &IndexConfig, - policy_ctx: &PolicyContext, + policy_ctx: Option<&PolicyContext>, graph_sids: &HashMap, ) -> std::result::Result { let mut options = fluree_db_transact::StageOptions::new() .with_index_config(index_config) .with_graph_sids(graph_sids); - if !policy_ctx.wrapper().is_root() { + if let Some(policy_ctx) = policy_ctx.filter(|p| !p.wrapper().is_root()) { options = options.with_policy(policy_ctx); } fluree_db_transact::stage_flakes(ledger, flakes.to_vec(), options) diff --git a/fluree-db-api/src/graph_commit_builder.rs b/fluree-db-api/src/graph_commit_builder.rs index ba8f268854..669f364181 100644 --- a/fluree-db-api/src/graph_commit_builder.rs +++ b/fluree-db-api/src/graph_commit_builder.rs @@ -23,7 +23,7 @@ use crate::dataset::GovernanceOptions; use crate::format::iri::IriCompactor; use crate::graph::Graph; use crate::ledger_view::CommitRef; -use crate::{policy_builder, ApiError, Result}; +use crate::{ApiError, Result}; use fluree_db_core::commit::codec::read_commit; use fluree_db_core::{ContentId, ContentStore, FlakeValue, OverlayProvider, Tracker}; use fluree_db_novelty::Commit; @@ -282,19 +282,24 @@ impl<'a, 'g> CommitBuilder<'a, 'g> { ..Default::default() }; // Use the novelty overlay so policy rules in uncommitted - // transactions are visible to the policy builder. + // transactions are visible to the policy builder. Config-aware: + // a configured `f:policySource` (same-ledger named graph or + // cross-ledger model reference) redirects the policy-rule + // lookup instead of assuming the default graph. The + // identity/policy_class gate above is unchanged — commit-detail + // filtering stays opt-in per request. let overlay: &dyn OverlayProvider = snapshot.novelty.as_ref(); - let policy_ctx = policy_builder::build_policy_context_from_opts( + let policy_ctx = crate::policy_view::build_transact_policy_context( + self.graph.fluree, &snapshot.snapshot, overlay, Some(snapshot.novelty.as_ref()), commit.t, &opts, - &[0], ) .await?; - if !policy_ctx.wrapper().is_root() { + if let Some(policy_ctx) = policy_ctx.filter(|p| !p.wrapper().is_root()) { let enforcer = QueryPolicyEnforcer::new(Arc::new(policy_ctx)); let tracker = Tracker::disabled(); diff --git a/fluree-db-api/src/lib.rs b/fluree-db-api/src/lib.rs index 6510b4a506..9cd1cb274c 100644 --- a/fluree-db-api/src/lib.rs +++ b/fluree-db-api/src/lib.rs @@ -164,8 +164,8 @@ pub use pack::{ }; pub use policy_builder::identity_has_no_policies; pub use policy_view::{ - build_policy_context, wrap_identity_policy_view, wrap_policy_view, wrap_policy_view_historical, - PolicyWrappedView, + build_policy_context, build_transact_policy_context, wrap_identity_policy_view, + wrap_policy_view, wrap_policy_view_historical, PolicyWrappedView, }; pub use query::builder::{ DatasetQueryBuilder, FromQueryBuilder, GraphSourceMode, ViewQueryBuilder, diff --git a/fluree-db-api/src/policy_view.rs b/fluree-db-api/src/policy_view.rs index 18c5eae381..04fdc86999 100644 --- a/fluree-db-api/src/policy_view.rs +++ b/fluree-db-api/src/policy_view.rs @@ -205,6 +205,11 @@ pub async fn wrap_policy_view_historical<'a>( /// don't go through `wrap_policy` / `GraphDb` (e.g., server transact handlers, /// CLI insert) use this function and still get config-driven policy graphs. /// +/// Same-ledger only: a cross-ledger `f:policySource` (with `f:ledger`) fails +/// closed here. Callers with a `Fluree` handle should use +/// [`build_transact_policy_context`], which also merges config policy +/// defaults and resolves cross-ledger sources. +/// /// # Arguments /// /// * `snapshot` - The database snapshot to query against @@ -232,6 +237,159 @@ pub async fn build_policy_context( .await } +/// Resolve a cross-ledger `f:policySource` into policy restrictions +/// interned against the data ledger's term space. +/// +/// Shared between `wrap_policy` (read path) and +/// [`build_transact_policy_context`] (write path) so both sides apply +/// identical semantics: identity-mode rejection, `ArtifactKind::PolicyRules` +/// dispatch, and the policy-class intersection filter. +/// +/// The filter contract: the data ledger's configured `policy_class` set is +/// applied as an exact-IRI intersection on the wire's restrictions, OR +/// `{f:AccessPolicy}` when no policy_class is set. `f:AccessPolicy` is the +/// canonical / baseline policy class — declaring `f:policySource` +/// cross-ledger pulls those rules in automatically; custom-typed rules +/// require an explicit `f:policyClass` in D's config to be enforced. This is +/// the safer default than "load every structurally-policy-looking subject +/// from M," which would silently include rules the operator never opted into. +pub(crate) async fn resolve_cross_ledger_policy_restrictions( + snapshot: &LedgerSnapshot, + effective_opts: &GovernanceOptions, + source: &fluree_db_core::ledger_config::GraphSourceRef, + ctx: &mut crate::cross_ledger::ResolveCtx<'_>, +) -> Result> { + // Phase 1a: cross-ledger + identity-mode is not supported. The model + // ledger contributes policy rules; the data ledger contributes identity + // binding. Mixing them ambiguously is a fail-closed config error. + if effective_opts.identity.is_some() { + return Err(crate::error::ApiError::config( + "cross-ledger f:policySource cannot be combined with opts.identity \ + in Phase 1a; use opts.policy_class with the cross-ledger config", + )); + } + + let resolved = crate::cross_ledger::resolve_graph_ref( + source, + crate::cross_ledger::ArtifactKind::PolicyRules, + ctx, + ) + .await?; + let crate::cross_ledger::GovernanceArtifact::PolicyRules(wire) = &resolved.artifact else { + // resolve_graph_ref dispatches on ArtifactKind, so requesting + // PolicyRules must yield PolicyRules. Surfacing this as + // TranslationFailed rather than panicking keeps the failure path + // uniform for operators reading the response body. + return Err(crate::error::ApiError::CrossLedger( + crate::cross_ledger::CrossLedgerError::TranslationFailed { + ledger_id: resolved.model_ledger_id.clone(), + graph_iri: resolved.graph_iri.clone(), + detail: "resolver returned a non-PolicyRules artifact for an \ + ArtifactKind::PolicyRules request; this is a bug in \ + the resolver dispatch" + .into(), + }, + )); + }; + + const DEFAULT_POLICY_CLASS_IRI: &str = fluree_vocab::policy_iris::ACCESS_POLICY; + let filter: std::collections::HashSet = effective_opts + .policy_class + .as_ref() + .filter(|v| !v.is_empty()) + .map(|v| v.iter().cloned().collect()) + .unwrap_or_else(|| [DEFAULT_POLICY_CLASS_IRI.to_string()].into_iter().collect()); + + fluree_db_policy::wire_to_restrictions(wire, |iri| snapshot.encode_iri(iri), Some(&filter)) + .map_err(crate::error::ApiError::from) +} + +/// Build the policy context for a write (or other non-view enforcement +/// point), honoring the ledger's `#config` graph the same way `wrap_policy` +/// does on the read path. +/// +/// This is the write-side counterpart of `Fluree::wrap_policy`: +/// +/// 1. Resolves the ledger config at `to_t` and merges config policy defaults +/// (`f:policyClass`, `f:defaultAllow`, override control) into `opts` via +/// `merge_policy_opts` — so config-declared policy governs writes even +/// when the request itself carries no policy inputs. +/// 2. A cross-ledger `f:policySource` (with `f:ledger`) is resolved live +/// against the model ledger (`ArtifactKind::PolicyRules`, latest committed +/// M) and its restrictions are interned into this ledger's term space. +/// 3. A same-ledger `f:policySource` resolves to concrete graph IDs via +/// `resolve_policy_source_g_ids` (fail-closed on unknown selectors). +/// +/// Returns `Ok(None)` when neither the request nor the config supplies any +/// policy input — the transaction runs under root, matching the previous +/// behavior for unconfigured ledgers. A cross-ledger source always builds a +/// context (mirroring the read path, where the model ledger's rules apply +/// regardless of request inputs). +pub async fn build_transact_policy_context( + fluree: &crate::Fluree, + snapshot: &LedgerSnapshot, + overlay: &dyn OverlayProvider, + novelty_for_stats: Option<&Novelty>, + to_t: i64, + opts: &GovernanceOptions, +) -> Result> { + let resolved = + match crate::config_resolver::resolve_ledger_config(snapshot, overlay, to_t).await { + Ok(Some(c)) => Some(crate::config_resolver::resolve_effective_config(&c, None)), + Ok(None) => None, + Err(e) => { + return Err(crate::error::ApiError::config(format!( + "Failed to load ledger config while resolving transaction policy: {e}" + ))); + } + }; + + let effective_opts = match &resolved { + Some(r) => crate::config_resolver::merge_policy_opts(r, opts, None), + None => opts.clone(), + }; + + let source = resolved + .as_ref() + .and_then(|r| r.policy.as_ref()) + .and_then(|p| p.policy_source.as_ref()); + + if let Some(source) = source.filter(|s| s.ledger.is_some()) { + let ledger_id: String = snapshot.ledger_id.to_string(); + let mut ctx = crate::cross_ledger::ResolveCtx::new(&ledger_id, fluree); + let restrictions = + resolve_cross_ledger_policy_restrictions(snapshot, &effective_opts, source, &mut ctx) + .await?; + let policy_ctx = policy_builder::build_policy_context_from_opts_with_cross_ledger( + snapshot, + overlay, + novelty_for_stats, + to_t, + &effective_opts, + &[0], // identity-mode uses [0]; unused under cross-ledger + restrictions, + ) + .await?; + return Ok(Some(policy_ctx)); + } + + if !effective_opts.has_any_policy_inputs() { + return Ok(None); + } + + let policy_graphs = policy_builder::resolve_policy_source_g_ids(source, snapshot)?; + let policy_ctx = policy_builder::build_policy_context_from_opts( + snapshot, + overlay, + novelty_for_stats, + to_t, + &effective_opts, + &policy_graphs, + ) + .await?; + Ok(Some(policy_ctx)) +} + /// Wrap a ledger with identity-based policy via `f:policyClass` lookup. /// /// Convenience wrapper for identity-based policy wrapping. @@ -271,7 +429,7 @@ pub async fn wrap_identity_policy_view<'a>( /// Returns `[0]` (default graph) only when no config has been written to the /// ledger yet (`Ok(None)`) or no `f:policySource` is configured — in both /// cases the caller's policy rules, if any, live in the default graph. -async fn resolve_policy_graphs_from_config( +pub(crate) async fn resolve_policy_graphs_from_config( snapshot: &LedgerSnapshot, overlay: &dyn OverlayProvider, to_t: i64, diff --git a/fluree-db-api/src/tx.rs b/fluree-db-api/src/tx.rs index a2594df086..3040150431 100644 --- a/fluree-db-api/src/tx.rs +++ b/fluree-db-api/src/tx.rs @@ -2620,20 +2620,33 @@ impl crate::Fluree { let verified = crate::credential::verify_credential(credential)?; - // Build policy context with verified identity + // Build policy context with verified identity. Config-aware: a + // configured `f:policySource` redirects the policy-rule lookup to + // the declared graph, and config policy defaults merge in — same + // semantics as the consensus transact path. (A cross-ledger + // f:policySource fails closed here: identity-mode + cross-ledger is + // rejected in Phase 1a.) let opts = crate::GovernanceOptions { identity: Some(verified.did.clone()), ..Default::default() }; - let policy_ctx = crate::policy_builder::build_policy_context_from_opts( + let policy_ctx = crate::policy_view::build_transact_policy_context( + self, &ledger.snapshot, ledger.novelty.as_ref(), Some(ledger.novelty.as_ref()), ledger.t(), &opts, - &[0], ) - .await?; + .await? + .ok_or_else(|| { + // opts.identity is always set above, so a same-ledger build + // always yields a context; None would mean the gate logic + // changed underneath us. + ApiError::internal( + "credential transact expected a policy context for a verified identity", + ) + })?; // Context propagation: inject parent context if subject doesn't have one let mut txn_json = verified.subject.clone(); diff --git a/fluree-db-api/src/view/fluree_ext.rs b/fluree-db-api/src/view/fluree_ext.rs index 10fcc211a3..027f661584 100644 --- a/fluree-db-api/src/view/fluree_ext.rs +++ b/fluree-db-api/src/view/fluree_ext.rs @@ -681,17 +681,6 @@ impl Fluree { let is_cross_ledger = source.is_some_and(|s| s.ledger.is_some()); if is_cross_ledger { - // Phase 1a: cross-ledger + identity-mode is not supported. - // The model ledger contributes policy rules; the data - // ledger contributes identity binding. Mixing them - // ambiguously is a fail-closed config error. - if effective_opts.identity.is_some() { - return Err(crate::error::ApiError::config( - "cross-ledger f:policySource cannot be combined with opts.identity \ - in Phase 1a; use opts.policy_class with the cross-ledger config", - )); - } - let source = source.expect("checked above"); // Seed from any prior governance-context capture stored // on the view (e.g., an earlier `wrap_policy` in the @@ -707,60 +696,16 @@ impl Fluree { self, (**view.cross_ledger_resolved_ts()).clone(), ); - let resolved = crate::cross_ledger::resolve_graph_ref( + // Identity-mode rejection (Phase 1a), PolicyRules dispatch, and + // the policy-class intersection filter all live in the shared + // helper so read and write paths can't drift. + let restrictions = crate::policy_view::resolve_cross_ledger_policy_restrictions( + &view.snapshot, + &effective_opts, source, - crate::cross_ledger::ArtifactKind::PolicyRules, &mut ctx, ) .await?; - let crate::cross_ledger::GovernanceArtifact::PolicyRules(wire) = &resolved.artifact - else { - // resolve_graph_ref dispatches on ArtifactKind, so - // requesting PolicyRules must yield PolicyRules. - // Surfacing this as TranslationFailed rather than - // panicking keeps the failure path uniform for - // operators reading the response body. - return Err(crate::error::ApiError::CrossLedger( - crate::cross_ledger::CrossLedgerError::TranslationFailed { - ledger_id: resolved.model_ledger_id.clone(), - graph_iri: resolved.graph_iri.clone(), - detail: "resolver returned a non-PolicyRules artifact for an \ - ArtifactKind::PolicyRules request; this is a bug in \ - the resolver dispatch" - .into(), - }, - )); - }; - - // Apply the data ledger's configured policy_class set as - // an exact-IRI intersection filter on the wire's - // restrictions. The contract is: - // - // filter = effective_opts.policy_class, OR - // {f:AccessPolicy} when no policy_class is set. - // - // f:AccessPolicy is the canonical / baseline policy class - // — declaring `f:policySource` cross-ledger pulls those - // rules in automatically. Custom-typed rules require - // an explicit `f:policyClass` in D's config to be - // enforced. This is the safer default than "load every - // structurally-policy-looking subject from M," which - // would silently include rules the operator never opted - // into. - const DEFAULT_POLICY_CLASS_IRI: &str = fluree_vocab::policy_iris::ACCESS_POLICY; - let filter: std::collections::HashSet = effective_opts - .policy_class - .as_ref() - .filter(|v| !v.is_empty()) - .map(|v| v.iter().cloned().collect()) - .unwrap_or_else(|| [DEFAULT_POLICY_CLASS_IRI.to_string()].into_iter().collect()); - let snapshot_ref = &view.snapshot; - let restrictions = fluree_db_policy::wire_to_restrictions( - wire, - |iri| snapshot_ref.encode_iri(iri), - Some(&filter), - ) - .map_err(crate::error::ApiError::from)?; let policy_ctx = crate::policy_builder::build_policy_context_from_opts_with_cross_ledger( diff --git a/fluree-db-api/tests/grp_policy.rs b/fluree-db-api/tests/grp_policy.rs index efa68c95b5..9fcd707d9a 100644 --- a/fluree-db-api/tests/grp_policy.rs +++ b/fluree-db-api/tests/grp_policy.rs @@ -27,3 +27,5 @@ mod it_policy_time_travel; mod it_policy_tracking; #[path = "it_policy_tx.rs"] mod it_policy_tx; +#[path = "it_policy_write_path.rs"] +mod it_policy_write_path; diff --git a/fluree-db-api/tests/it_policy_write_path.rs b/fluree-db-api/tests/it_policy_write_path.rs new file mode 100644 index 0000000000..4e51970816 --- /dev/null +++ b/fluree-db-api/tests/it_policy_write_path.rs @@ -0,0 +1,474 @@ +//! Write-path policy enforcement driven by the ledger's `#config` graph. +//! +//! `build_transact_policy_context` is the write-side counterpart of +//! `wrap_policy`: it merges config policy defaults (`f:policyClass`, +//! `f:defaultAllow`) into the request's governance options and resolves +//! `f:policySource` — same-ledger named graphs AND cross-ledger model +//! references — before building the `PolicyContext` a transaction stages +//! under. The consensus transact path (local + Raft), credential transact, +//! push, and the CLI all route through it. +//! +//! Before this existed, writes built policy exclusively from request +//! inputs against the default graph: config-declared policy was enforced +//! on reads but silently ignored on writes (issue #1416). + +#![cfg(feature = "native")] + +use crate::support::{assert_index_defaults, genesis_ledger}; +use fluree_db_api::{ + build_transact_policy_context, CommitOpts, FlureeBuilder, GovernanceOptions, IndexConfig, + TxnOpts, +}; +use serde_json::json; + +fn config_graph_iri(ledger_id: &str) -> String { + format!("urn:fluree:{ledger_id}#config") +} + +fn test_index_config() -> IndexConfig { + IndexConfig { + reindex_min_bytes: 100_000, + reindex_max_bytes: 1_000_000_000, + } +} + +/// No config, no request inputs → the transaction runs under root +/// (`None`), preserving the pre-existing behavior for unconfigured +/// ledgers. +#[tokio::test] +async fn no_config_no_inputs_yields_root() { + let fluree = FlureeBuilder::memory().build_memory(); + let ledger = genesis_ledger(&fluree, "policy/write-root:main"); + + let ctx = build_transact_policy_context( + &fluree, + &ledger.snapshot, + ledger.novelty.as_ref(), + Some(ledger.novelty.as_ref()), + ledger.t(), + &GovernanceOptions::default(), + ) + .await + .expect("build"); + assert!( + ctx.is_none(), + "no config + no request inputs must run under root" + ); +} + +/// Config-declared policy defaults (`f:policyClass` + `f:defaultAllow`) +/// govern writes even when the request carries NO policy inputs. The +/// policy rules live in the default graph; only the defaults-merge is +/// under test here. +#[tokio::test] +async fn config_policy_class_defaults_enforced_on_writes() { + assert_index_defaults(); + let fluree = FlureeBuilder::memory().build_memory(); + let ledger_id = "policy/write-config-defaults:main"; + let ledger0 = genesis_ledger(&fluree, ledger_id); + + // Seed data (registers ex: namespace) plus the write policy itself in + // the default graph: deny modifying ex:ssn, typed ex:WritePolicy. + let r1 = fluree + .insert( + ledger0, + &json!({ + "@context": {"ex": "http://example.org/ns/", "f": "https://ns.flur.ee/db#"}, + "@graph": [ + {"@id": "ex:alice", "@type": "ex:User", "ex:ssn": "111-11-1111"}, + { + "@id": "ex:noSsnWrite", + "@type": "ex:WritePolicy", + "f:required": true, + "f:onProperty": {"@id": "ex:ssn"}, + "f:action": {"@id": "f:modify"}, + "f:allow": false + } + ] + }), + ) + .await + .expect("seed data + policy"); + + // Config: defaultAllow=true so ONLY the ex:ssn rule blocks anything; + // policyClass opts the ex:WritePolicy-typed rule in. + let config_iri = config_graph_iri(ledger_id); + let r2 = fluree + .stage_owned(r1.ledger) + .upsert_turtle(&format!( + r" + @prefix f: . + @prefix rdf: . + @prefix ex: . + + GRAPH <{config_iri}> {{ + rdf:type f:LedgerConfig . + f:policyDefaults . + f:defaultAllow true . + f:policyClass ex:WritePolicy . + }} + " + )) + .execute() + .await + .expect("seed config"); + let ledger = r2.ledger; + + // Empty request opts: everything comes from config. + let ctx = build_transact_policy_context( + &fluree, + &ledger.snapshot, + ledger.novelty.as_ref(), + Some(ledger.novelty.as_ref()), + ledger.t(), + &GovernanceOptions::default(), + ) + .await + .expect("build") + .expect("config policyClass must produce a policy context for writes"); + + let cfg = test_index_config(); + let denied_turtle = "@prefix ex: .\nex:bob ex:ssn \"999-99-9999\" .\n"; + let denied = fluree + .insert_turtle_with_opts( + ledger.clone(), + denied_turtle, + TxnOpts::default(), + CommitOpts::default(), + &cfg, + Some(&ctx), + ) + .await; + assert!( + denied.is_err(), + "config-declared modify-deny on ex:ssn must reject the write, got: {denied:?}" + ); + + let allowed_turtle = "@prefix ex: .\nex:bob ex:name \"Bob\" .\n"; + let allowed = fluree + .insert_turtle_with_opts( + ledger, + allowed_turtle, + TxnOpts::default(), + CommitOpts::default(), + &cfg, + Some(&ctx), + ) + .await; + assert!( + allowed.is_ok(), + "defaultAllow=true must let unrelated writes through, got: {:?}", + allowed.err() + ); +} + +/// A same-ledger `f:policySource` pointing at a named graph: the write +/// path must load policy rules from THAT graph (previously it hardcoded +/// the default graph, where no rules exist, and allowed everything). +#[tokio::test] +async fn config_policy_source_named_graph_enforced_on_writes() { + assert_index_defaults(); + let fluree = FlureeBuilder::memory().build_memory(); + let ledger_id = "policy/write-named-graph-source:main"; + let ledger0 = genesis_ledger(&fluree, ledger_id); + + // Seed data in the default graph (no policy rules there). + let r1 = fluree + .insert( + ledger0, + &json!({ + "@context": {"ex": "http://example.org/ns/"}, + "@id": "ex:alice", + "@type": "ex:User", + "ex:ssn": "111-11-1111" + }), + ) + .await + .expect("seed data"); + + // Policy rules live exclusively in a named graph; config redirects + // the policy-rule lookup there via f:policySource. + let policy_graph_iri = "http://example.org/d-policies"; + let config_iri = config_graph_iri(ledger_id); + let r2 = fluree + .stage_owned(r1.ledger) + .upsert_turtle(&format!( + r" + @prefix f: . + @prefix rdf: . + @prefix ex: . + + GRAPH <{policy_graph_iri}> {{ + ex:noSsnWrite + rdf:type ex:WritePolicy ; + f:required true ; + f:onProperty ex:ssn ; + f:action f:modify ; + f:allow false . + }} + + GRAPH <{config_iri}> {{ + rdf:type f:LedgerConfig . + f:policyDefaults . + f:defaultAllow true . + f:policyClass ex:WritePolicy . + f:policySource . + rdf:type f:GraphRef ; + f:graphSource . + f:graphSelector <{policy_graph_iri}> . + }} + " + )) + .execute() + .await + .expect("seed policy graph + config"); + let ledger = r2.ledger; + + let ctx = build_transact_policy_context( + &fluree, + &ledger.snapshot, + ledger.novelty.as_ref(), + Some(ledger.novelty.as_ref()), + ledger.t(), + &GovernanceOptions::default(), + ) + .await + .expect("build") + .expect("config with f:policySource must produce a policy context"); + + let cfg = test_index_config(); + let denied_turtle = "@prefix ex: .\nex:bob ex:ssn \"999-99-9999\" .\n"; + let denied = fluree + .insert_turtle_with_opts( + ledger.clone(), + denied_turtle, + TxnOpts::default(), + CommitOpts::default(), + &cfg, + Some(&ctx), + ) + .await; + assert!( + denied.is_err(), + "modify-deny loaded from the f:policySource graph must reject the write, got: {denied:?}" + ); + + // Root control: the identical write with no policy context succeeds, + // proving the rejection above came from the named-graph rules. + let ok = fluree + .insert_turtle_with_opts( + ledger, + denied_turtle, + TxnOpts::default(), + CommitOpts::default(), + &cfg, + None, + ) + .await; + assert!( + ok.is_ok(), + "root write must succeed without the policy context, got: {:?}", + ok.err() + ); +} + +/// Cross-ledger `f:policySource`: model ledger M holds the policy rules; +/// data ledger D's config points at M. A write to D that violates M's +/// modify rules must be rejected — with NO policy inputs on the request. +/// This is the write-side counterpart of the read-path enforcement in +/// `it_policy_cross_ledger.rs`. +#[tokio::test] +async fn cross_ledger_policy_source_enforced_on_writes() { + assert_index_defaults(); + let fluree = FlureeBuilder::memory().build_memory(); + + // --- model ledger M: modify-deny on ex:ssn in a named policy graph + let model_id = "policy/write-xledger/model:main"; + let model = genesis_ledger(&fluree, model_id); + let policy_graph_iri = "http://example.org/m-policies"; + fluree + .stage_owned(model) + .upsert_turtle(&format!( + r" + @prefix f: . + @prefix rdf: . + @prefix ex: . + + GRAPH <{policy_graph_iri}> {{ + ex:noSsnWrite + rdf:type f:AccessPolicy ; + f:required true ; + f:onProperty ex:ssn ; + f:action f:modify ; + f:allow false . + }} + " + )) + .execute() + .await + .expect("seed M policy graph"); + + // --- data ledger D: data + cross-ledger config, no policy IRIs in D + let data_id = "policy/write-xledger/data:main"; + let data = genesis_ledger(&fluree, data_id); + let r1 = fluree + .insert( + data, + &json!({ + "@context": {"ex": "http://example.org/ns/"}, + "@id": "ex:alice", + "@type": "ex:User", + "ex:ssn": "111-11-1111" + }), + ) + .await + .expect("seed D data"); + + let config_iri = config_graph_iri(data_id); + let r2 = fluree + .stage_owned(r1.ledger) + .upsert_turtle(&format!( + r" + @prefix f: . + @prefix rdf: . + + GRAPH <{config_iri}> {{ + rdf:type f:LedgerConfig . + f:policyDefaults . + f:defaultAllow true . + f:policySource . + rdf:type f:GraphRef ; + f:graphSource . + f:ledger <{model_id}> ; + f:graphSelector <{policy_graph_iri}> . + }} + " + )) + .execute() + .await + .expect("seed D cross-ledger config"); + let ledger = r2.ledger; + + // Empty request opts: a cross-ledger source must still build a + // context — M's rules govern D regardless of request inputs (the + // default f:AccessPolicy class filter applies). + let ctx = build_transact_policy_context( + &fluree, + &ledger.snapshot, + ledger.novelty.as_ref(), + Some(ledger.novelty.as_ref()), + ledger.t(), + &GovernanceOptions::default(), + ) + .await + .expect("build") + .expect("cross-ledger f:policySource must produce a policy context for writes"); + + let cfg = test_index_config(); + let denied_turtle = "@prefix ex: .\nex:bob ex:ssn \"999-99-9999\" .\n"; + let denied = fluree + .insert_turtle_with_opts( + ledger.clone(), + denied_turtle, + TxnOpts::default(), + CommitOpts::default(), + &cfg, + Some(&ctx), + ) + .await; + assert!( + denied.is_err(), + "M's modify-deny on ex:ssn must reject the write to D, got: {denied:?}" + ); + + let allowed_turtle = "@prefix ex: .\nex:bob ex:name \"Bob\" .\n"; + let allowed = fluree + .insert_turtle_with_opts( + ledger, + allowed_turtle, + TxnOpts::default(), + CommitOpts::default(), + &cfg, + Some(&ctx), + ) + .await; + assert!( + allowed.is_ok(), + "defaultAllow=true must let writes M's rules don't target through, got: {:?}", + allowed.err() + ); +} + +/// Identity-mode + cross-ledger `f:policySource` fails closed on the +/// write builder, matching the read-path Phase 1a contract. +#[tokio::test] +async fn cross_ledger_plus_identity_fails_closed_on_writes() { + let fluree = FlureeBuilder::memory().build_memory(); + + let model_id = "policy/write-xledger-id/model:main"; + let model = genesis_ledger(&fluree, model_id); + let policy_graph_iri = "http://example.org/m-policies"; + fluree + .stage_owned(model) + .upsert_turtle(&format!( + r" + @prefix f: . + @prefix rdf: . + @prefix ex: . + + GRAPH <{policy_graph_iri}> {{ + ex:rule1 rdf:type f:AccessPolicy ; f:action f:modify ; f:allow true . + }} + " + )) + .execute() + .await + .expect("seed M"); + + let data_id = "policy/write-xledger-id/data:main"; + let data = genesis_ledger(&fluree, data_id); + let config_iri = config_graph_iri(data_id); + let r1 = fluree + .stage_owned(data) + .upsert_turtle(&format!( + r" + @prefix f: . + @prefix rdf: . + + GRAPH <{config_iri}> {{ + rdf:type f:LedgerConfig . + f:policyDefaults . + f:policySource . + rdf:type f:GraphRef ; + f:graphSource . + f:ledger <{model_id}> ; + f:graphSelector <{policy_graph_iri}> . + }} + " + )) + .execute() + .await + .expect("seed D config"); + let ledger = r1.ledger; + + let opts = GovernanceOptions { + identity: Some("http://example.org/users/alice".into()), + ..Default::default() + }; + let err = build_transact_policy_context( + &fluree, + &ledger.snapshot, + ledger.novelty.as_ref(), + Some(ledger.novelty.as_ref()), + ledger.t(), + &opts, + ) + .await + .expect_err("identity + cross-ledger must fail closed on the write builder"); + + let msg = err.to_string(); + assert!( + msg.contains("identity") && msg.contains("cross-ledger"), + "expected fail-closed diagnostic mentioning both, got: {msg}" + ); +} diff --git a/fluree-db-cli/src/commands/insert.rs b/fluree-db-cli/src/commands/insert.rs index d605a9e49f..ae9db65d3d 100644 --- a/fluree-db-cli/src/commands/insert.rs +++ b/fluree-db-cli/src/commands/insert.rs @@ -257,7 +257,11 @@ pub async fn build_policy_ctx( } let ledger = fluree.ledger(alias).await?; let opts = policy.to_options().map_err(CliError::Usage)?; - let ctx = fluree_db_api::build_policy_context( + // Config-aware: honors f:policySource (same-ledger named graphs and + // cross-ledger model references) and merges config policy defaults, + // matching server-side transact enforcement. + let ctx = fluree_db_api::build_transact_policy_context( + fluree, &ledger.snapshot, ledger.novelty.as_ref(), Some(ledger.novelty.as_ref()), @@ -265,7 +269,7 @@ pub async fn build_policy_ctx( &opts, ) .await?; - Ok(Some(ctx)) + Ok(ctx) } /// Print transaction result from remote server JSON response. diff --git a/fluree-db-consensus/src/local.rs b/fluree-db-consensus/src/local.rs index e4c843bbbc..8bb1510d16 100644 --- a/fluree-db-consensus/src/local.rs +++ b/fluree-db-consensus/src/local.rs @@ -113,7 +113,8 @@ impl Committer for LocalCommitter { // `CommitOpts` / `TrackingOptions`. let mut last_error: Option = None; for attempt in 1..=MAX_TXN_RETRIES { - let policy_ctx = build_policy_context(&ledger_handle, &governance).await?; + let policy_ctx = + build_policy_context(&self.fluree, &ledger_handle, &governance).await?; // Cypher lowers to a `Txn` here — under the write lock and re-resolved // each retry attempt — rather than pre-lock in the route. A conditional @@ -362,25 +363,31 @@ pub(crate) fn execution_failure(err: ApiError) -> SubmissionError { } } -/// Build a [`PolicyContext`] from the request's policy inputs. +/// Build a [`PolicyContext`] from the request's policy inputs merged with +/// the ledger's `#config` policy defaults. /// -/// Returns `Ok(None)` when there are no policy inputs — the transaction -/// runs under root. The context is built from a snapshot of the ledger -/// this node is about to stage against, so policy enforcement reflects -/// the same state the transaction commits onto. Building it here, rather -/// than having the caller pre-build and pass a context, keeps the policy -/// context bound to the executing node's state — the shape a replicated -/// implementation needs. +/// Returns `Ok(None)` when neither the request nor the ledger config +/// supplies any policy input — the transaction runs under root. The +/// context is built from a snapshot of the ledger this node is about to +/// stage against, so policy enforcement reflects the same state the +/// transaction commits onto. Building it here, rather than having the +/// caller pre-build and pass a context, keeps the policy context bound to +/// the executing node's state — the shape a replicated implementation +/// needs. +/// +/// Delegates to `fluree_db_api::build_transact_policy_context`, which +/// resolves `f:policySource` (same-ledger named graphs AND cross-ledger +/// model references) and applies config `f:policyClass` / `f:defaultAllow` +/// defaults — so writes are governed by the same config the read path +/// enforces via `wrap_policy`. pub(crate) async fn build_policy_context( + fluree: &Fluree, ledger_handle: &LedgerHandle, governance: &GovernanceOptions, ) -> Result, SubmissionError> { - if !governance.has_any_policy_inputs() { - return Ok(None); - } - let snap = ledger_handle.snapshot().await; - fluree_db_api::build_policy_context( + fluree_db_api::build_transact_policy_context( + fluree, &snap.snapshot, snap.novelty.as_ref(), Some(snap.novelty.as_ref()), @@ -388,7 +395,6 @@ pub(crate) async fn build_policy_context( governance, ) .await - .map(Some) .map_err(execution_failure) } diff --git a/fluree-db-consensus/src/raft/commit_worker.rs b/fluree-db-consensus/src/raft/commit_worker.rs index 3f5e380053..6275316c6f 100644 --- a/fluree-db-consensus/src/raft/commit_worker.rs +++ b/fluree-db-consensus/src/raft/commit_worker.rs @@ -517,7 +517,7 @@ impl Worker { .await .map_err(|e| stage_failure(&format!("ledger load failed: {e}")))?; - let policy_ctx = build_policy_context(&ledger_handle, &governance) + let policy_ctx = build_policy_context(&self.staging.fluree, &ledger_handle, &governance) .await .map_err(submission_to_stage)?; From cd2426ce0607519bf66c8e12a245bdd93fee0dbd Mon Sep 17 00:00:00 2001 From: bplatz Date: Thu, 2 Jul 2026 06:48:21 -0400 Subject: [PATCH 02/23] fix(policy): identity binds ?$identity under cross-ledger f:policySource instead of failing closed MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The cross-ledger guard rejected any request carrying an identity, even when a f:policyClass was available (from D's config or the request) to drive rule loading — the exact case the design intends: rules load by policy class, the identity binds ?$identity contextually. Since authenticated deployments attach an identity to virtually every request, cross-ledger policy governance was unusable outside anonymous / policy-class-only requests, and an identity-carrying write to a cross-ledger-governed ledger returned a hard config error. The naive fix (relax the guard alone) would have been worse than the bug: build_policy_context_from_opts_inner gave the identity branch unconditional priority, and that branch ignores cross_ledger_restrictions entirely — a relaxed guard would have routed identity-carrying requests into same-ledger identity-mode, silently dropping M's rules (fail-open). Changes: - resolve_cross_ledger_policy_restrictions: the class filter is now an explicit chain — request policy_class → config f:policyClass → {f:AccessPolicy} (anonymous requests only). The config's class is passed separately because merge_policy_opts returns request opts unchanged when the request carries any policy input and override is permitted, so an identity-only request never sees the config's class through the merge. An identity-carrying request with no class anywhere still fails closed: the identity is bind-only and can't select rules, so the operator must name what governs. - build_policy_context_from_opts_inner: the cross-ledger branch now takes priority over identity-mode. Under cross-ledger, the identity is bind-only — new resolve_identity_binding_sid resolves it against D (strict encode + subject-existence check, mirroring identity-mode's three-state binding contract) and populates ?$identity for f:query rules, without consulting the identity's D-local f:policyClass triples (a cross-ledger f:policySource declares M the policy authority). - Both read (wrap_policy) and write (build_transact_policy_context) builders pass the config policy class through the shared helper, so the contract cannot drift between paths. Credentialed transactions (inherently identity-carrying) now work against cross-ledger-governed ledgers whenever D's config declares a f:policyClass. Tests: - cross_ledger_identity_with_config_policy_class_enforced_on_writes — the reported scenario: identity + config policyClass builds a context (no error), M's modify-deny rejects the violating write, untargeted writes pass. - cross_ledger_identity_binding_drives_fquery_modify_rule — an owner-only f:query rule in M allows the identity to write its own user's email and rejects writes to another user's, proving ?$identity binds live. - identity_with_policy_class_engages_cross_ledger_rules (read) — the same owner-only rule filters another user's email from query results while the identity's own stays visible. - Existing identity-without-policy-class fail-closed tests unchanged. - All three new tests verified to fail under BOTH temp-reverts: the old blanket guard (fail-closed regression) and an identity-branch-first stub (the silent fail-open bypass). Docs: cross-ledger-policy.md gains an "Identity binding under cross-ledger policy" section (bind-only contract, class-filter chain, the defaultAllow/override-control merge subtlety); limitations table, policy-in-transactions.md, and the design doc scope updated to match. --- docs/design/cross-ledger-model-enforcement.md | 13 +- docs/security/cross-ledger-policy.md | 49 +++- docs/security/policy-in-transactions.md | 2 +- fluree-db-api/src/policy_builder.rs | 141 +++++++-- fluree-db-api/src/policy_view.rs | 87 ++++-- fluree-db-api/src/tx.rs | 7 +- fluree-db-api/src/view/fluree_ext.rs | 16 +- fluree-db-api/tests/it_policy_cross_ledger.rs | 123 +++++++- fluree-db-api/tests/it_policy_write_path.rs | 277 ++++++++++++++++++ 9 files changed, 636 insertions(+), 79 deletions(-) diff --git a/docs/design/cross-ledger-model-enforcement.md b/docs/design/cross-ledger-model-enforcement.md index 053197e180..f58d61bf1b 100644 --- a/docs/design/cross-ledger-model-enforcement.md +++ b/docs/design/cross-ledger-model-enforcement.md @@ -484,10 +484,15 @@ mode can be added without rewriting the failure taxonomy. - Reserved-feature rejection: `f:atT`, `f:trustPolicy`, and `f:rollbackGuard` are surfaced as `UnsupportedFeature` rather than silently ignored. -- Identity-mode + cross-ledger policy combination fails closed - with a config error — the design's "M contributes rules, D - contributes identity" boundary is enforced at the request - surface. +- Identity binding under cross-ledger policy: a request identity + is bind-only — it resolves against D to populate `?$identity` + for M's `f:query` rules and never selects rules (the design's + "M contributes rules, D contributes identity binding" + boundary). Rule selection is exclusively the policy-class + filter chain (request → config → `{f:AccessPolicy}` for + anonymous requests); an identity-carrying request with no + policy class anywhere fails closed with a config error rather + than silently defaulting. ### Reserved diff --git a/docs/security/cross-ledger-policy.md b/docs/security/cross-ledger-policy.md index 6dc592b158..b25d818655 100644 --- a/docs/security/cross-ledger-policy.md +++ b/docs/security/cross-ledger-policy.md @@ -180,8 +180,9 @@ with at least one of: cleanest way to declare "use the configured policy"). Matching the class in D's config (e.g., `f:AccessPolicy`) is the natural choice. -- `fluree-identity: ` — an identity header. Identity-mode - has a different contract; see below. +- `fluree-identity: ` — an identity header. Under + cross-ledger the identity is bind-only; see + [Identity binding](#identity-binding-under-cross-ledger-policy). - `opts.policy` in the body — inline JSON-LD policy. This still merges with cross-ledger rules. @@ -192,6 +193,48 @@ this gating. The write-side equivalent is `build_transact_policy_context` — see [Programmatic policy API (Rust)](programmatic-policy.md). +## Identity binding under cross-ledger policy + +An identity on the request (`fluree-identity` header, +`opts.identity`, or a verified credential's DID) is **bind-only** +under a cross-ledger `f:policySource`: + +- The identity resolves against **D** (identities are a + data-ledger concept — M never contributes identity records) and + populates `?$identity` for any `f:query` rules in M's policy + set. An owner-only rule authored in M therefore works across + every governed data ledger, with each D binding its own + identities. +- The identity **never selects rules**. Same-ledger identity-mode + loads policies via the identity's `f:policyClass` triples; + under cross-ledger those D-local triples are intentionally not + consulted — declaring a cross-ledger `f:policySource` makes M + the policy authority, and rule selection is exclusively the + policy-class filter chain: + + 1. the request's `policy_class` (when present), + 2. else the config's `f:policyClass`, + 3. else — for anonymous requests only — `{f:AccessPolicy}`. + +- Because the identity can't select rules, an identity-carrying + request with **no policy class anywhere** (request or config) + fails closed: the operator must name which classes govern. + In practice, setting `f:policyClass` in D's config (as in the + configuration example above) makes authenticated requests work + with no per-request changes. +- An identity IRI with no subject node in D yields an unbound + `?$identity`: `f:query` rules referencing it match nothing, so + `f:required` rules deny — the same contract as same-ledger + identity-mode's unknown-identity case. + +One merge subtlety: an identity counts as a request policy input, +so under the default `f:overrideControl` (`f:OverrideAll`) the +request's options take precedence and the config's +`f:defaultAllow` is **not** merged for identity-carrying requests +(same long-standing contract as same-ledger reads). Send the +`fluree-default-allow` header explicitly, or set a stricter +override control if the config should always win. + ## Cross-ledger uniqueness constraints Same two-ledger pattern, different subsystem. M holds an @@ -464,7 +507,7 @@ closed when configured: | `f:atT` (temporal pinning of M) | Request fails with `UnsupportedFeature { feature: "f:atT", phase: "Phase 3" }`. | | `f:trustPolicy` (commit-signer allowlist) | Request fails with `UnsupportedFeature`. | | `f:rollbackGuard` (freshness constraints) | Request fails with `UnsupportedFeature`. | -| `opts.identity` + cross-ledger `f:policySource` | Request fails with a config error. Identity-mode loads policies via the identity's `f:policyClass` triples, which would have to resolve in D (the identity isn't an M concept); combining the two modes ambiguously is rejected rather than silently choosing one. Use `opts.policy_class` with cross-ledger configs. | +| `opts.identity` + cross-ledger `f:policySource` **with no policy class anywhere** | Request fails with a config error. The identity is bind-only under cross-ledger (see [Identity binding](#identity-binding-under-cross-ledger-policy)) and can't select rules, so a policy class must be named on the request or in D's config. With a class available, identity-carrying requests work normally. | | `f:policySource` with `f:graphSelector` naming M's `#config` or `#txn-meta` | Request fails with `ReservedGraphSelected` before any storage read on M. | | Transitive `owl:imports` across model ledgers (`f:schemaSource` recursion) | Not yet honored. Imports inside M's schema graph are projected but the resolver doesn't follow them across ledger boundaries. | diff --git a/docs/security/policy-in-transactions.md b/docs/security/policy-in-transactions.md index 5f3460a673..53c2181377 100644 --- a/docs/security/policy-in-transactions.md +++ b/docs/security/policy-in-transactions.md @@ -90,7 +90,7 @@ The ledger's `#config` graph governs writes the same way it governs reads: - **Policy defaults apply without request inputs.** When `f:policyDefaults` declares `f:policyClass` (and optionally `f:defaultAllow`), transactions build a policy context from those defaults even when the request carries no `fluree-identity` / `fluree-policy-class` headers or inline `opts.policy`. A ledger configured with a modify-deny rule rejects violating writes from anonymous requests, matching read-side behavior. - **`f:policySource` redirects the rule lookup.** Policy rules relocated into a named graph (or a cross-ledger model ledger via `f:ledger`) are loaded from the configured source at transaction time — never silently from the default graph. Unknown graph selectors fail closed. - **Cross-ledger sources always engage.** A cross-ledger `f:policySource` builds a policy context unconditionally (mirroring the read path): the model ledger's `f:modify` rules apply to every transaction against the data ledger. See [Cross-ledger policy](cross-ledger-policy.md). -- **Identity-mode + cross-ledger fails closed.** A request that sets an identity against a ledger with a cross-ledger `f:policySource` is rejected with a config error, the same Phase 1a contract the read path enforces. Use `f:policyClass` with cross-ledger configs. +- **Identities are bind-only under cross-ledger.** A request identity (header, `opts.identity`, or a verified credential's DID) resolves against the data ledger and populates `?$identity` for the model ledger's `f:query` rules — it never selects rules the way same-ledger identity-mode does. Rule selection is the policy-class chain (request `policy_class` → config `f:policyClass` → `{f:AccessPolicy}` for anonymous requests). An identity-carrying request with no policy class anywhere fails closed; declaring `f:policyClass` in the config makes authenticated writes work with no per-request changes. See [Cross-ledger policy → Identity binding](cross-ledger-policy.md#identity-binding-under-cross-ledger-policy). - **Override control gates request-time overrides.** A request that supplies its own policy inputs replaces the config defaults only when the config's `f:overrideControl` permits it — see [Override control](../ledger-config/override-control.md). This applies uniformly across the server transact routes (local and Raft consensus), push replication, credentialed transactions, and the CLI's local mode with policy flags. diff --git a/fluree-db-api/src/policy_builder.rs b/fluree-db-api/src/policy_builder.rs index a3616d0697..4688fe06ae 100644 --- a/fluree-db-api/src/policy_builder.rs +++ b/fluree-db-api/src/policy_builder.rs @@ -134,16 +134,22 @@ pub async fn build_policy_context_from_opts( /// `cross_ledger_restrictions` is a pre-materialized list produced /// against a model ledger by the cross-ledger resolver and /// translated into D's term space via -/// `fluree_db_policy::wire_to_restrictions` (with D's configured -/// `policy_class` set already applied as a filter). When supplied, -/// the local same-ledger policy load (`load_policies_by_class` / -/// `parse_inline_policy`) is bypassed for the class / inline-policy -/// branch — those restrictions are used as-is. Identity loading and -/// `?$identity` binding still run locally against D per the -/// identity contract in the design doc. +/// `fluree_db_policy::wire_to_restrictions` (with the policy-class +/// filter chain already applied). When supplied, the local +/// same-ledger policy load (`load_policies_by_identity` / +/// `load_policies_by_class` / `parse_inline_policy`) is bypassed +/// for rule selection — those restrictions are used as-is, plus any +/// inline `opts.policy` merge. /// -/// `policy_graphs` is still consulted for the identity-mode path -/// (`opts.identity` set) because identity binding always resolves +/// Identity contract: `opts.identity` is **bind-only** under +/// cross-ledger. It resolves against D to populate `?$identity` for +/// f:query rules; it never selects rules (same-ledger identity-mode +/// consults the identity's D-local `f:policyClass` triples — those +/// are intentionally ignored here because a cross-ledger +/// `f:policySource` declares M the policy authority). +/// +/// `policy_graphs` is still consulted for the identity binding's +/// subject-existence check because identity binding always resolves /// against the data ledger; cross-ledger never contributes identity /// records. pub async fn build_policy_context_from_opts_with_cross_ledger( @@ -204,14 +210,60 @@ async fn build_policy_context_from_opts_inner( // Load policies and resolve identity SID. // - // When opts.identity is set, load_policies_by_identity returns a three-state enum - // distinguishing identity-not-in-ledger, identity-exists-with-no-policies, and - // identity-exists-with-policies. The distinction matters for binding `?$identity` - // in policy_values (only possible when we have a concrete SID), not for gating - // access — `opts.default_allow` governs in all three cases. + // When opts.identity is set (same-ledger), load_policies_by_identity returns a + // three-state enum distinguishing identity-not-in-ledger, + // identity-exists-with-no-policies, and identity-exists-with-policies. The + // distinction matters for binding `?$identity` in policy_values (only possible + // when we have a concrete SID), not for gating access — `opts.default_allow` + // governs in all three cases. // - // Priority: identity > policy_class > policy > policy_values["?$identity"] - let (identity_sid, restrictions) = if let Some(identity_iri) = &opts.identity { + // Priority: cross-ledger restrictions > identity > policy_class > policy > + // policy_values["?$identity"] + let (identity_sid, restrictions) = if let Some(mut merged) = cross_ledger_restrictions { + // Cross-ledger short-circuit: the resolver already materialized + // restrictions from the model ledger, filtered by the policy-class + // chain. Rule selection is complete before this function runs. + // + // Identity contract: an identity on the request is BIND-ONLY here. + // It resolves against the data ledger to populate `?$identity` for + // f:query rules — it never selects rules the way same-ledger + // identity-mode does (via the identity's f:policyClass triples in + // D). Those D-local triples are intentionally not consulted: a + // cross-ledger f:policySource declares M the policy authority. + // An identity with no subject node in D yields an unbound + // `?$identity` (f:query rules referencing it won't match), same as + // identity-mode's NotFound. + // + // opts.policy (inline JSON-LD) still applies and gets merged below. + // Moving — not cloning — the owned input keeps model-ledger policy + // sets (which can be large: each `PolicyRestriction` carries + // strings + hash sets) from paying a per-request copy. + let identity_sid = if let Some(identity_iri) = &opts.identity { + let resolved = + resolve_identity_binding_sid(snapshot, overlay, to_t, identity_iri, policy_graphs) + .await?; + if let Some(sid) = &resolved { + policy_values.insert("?$identity".to_string(), sid.clone()); + } + resolved + } else if let Some(sid) = policy_values.get("?$identity") { + Some(sid.clone()) + } else if let Some(pv) = &opts.policy_values { + if pv.contains_key("?$identity") { + return Err(ApiError::query( + "?$identity provided in policy-values but could not be encoded", + )); + } + None + } else { + None + }; + + if let Some(policy_json) = &opts.policy { + merged.extend(parse_inline_policy(snapshot, policy_json)?); + } + (identity_sid, merged) + } else if let Some(identity_iri) = &opts.identity { match load_policies_by_identity(snapshot, overlay, to_t, identity_iri, policy_graphs) .await? { @@ -248,22 +300,7 @@ async fn build_policy_context_from_opts_inner( None }; - let restrictions = if let Some(mut merged) = cross_ledger_restrictions { - // Cross-ledger short-circuit: the resolver already - // materialized restrictions from the model ledger and - // (per the identity contract) the wire artifact has been - // filtered by opts.policy_class. opts.policy (inline - // JSON-LD) still applies and gets merged below. - // - // Moving — not cloning — the owned input keeps - // model-ledger policy sets (which can be large: each - // `PolicyRestriction` carries strings + hash sets) from - // paying a per-request copy. - if let Some(policy_json) = &opts.policy { - merged.extend(parse_inline_policy(snapshot, policy_json)?); - } - merged - } else if let Some(classes) = &opts.policy_class { + let restrictions = if let Some(classes) = &opts.policy_class { load_policies_by_class(snapshot, overlay, to_t, classes, policy_graphs).await? } else if let Some(policy_json) = &opts.policy { parse_inline_policy(snapshot, policy_json)? @@ -395,6 +432,46 @@ enum IdentityLookupResult { }, } +/// Resolve an identity IRI to a bindable SID **without loading its policies**. +/// +/// Used under cross-ledger `f:policySource`, where rule selection is +/// exclusively the wire's policy-class filter and the identity contributes +/// only the `?$identity` binding. Mirrors identity-mode's three-state +/// contract for the binding decision: `None` when the IRI is unresolvable or +/// has no subject node in the searched graphs (identity-mode's `NotFound` — +/// no binding), `Some(sid)` when the subject exists (with or without +/// D-local policies, which are intentionally not consulted here). +async fn resolve_identity_binding_sid( + snapshot: &LedgerSnapshot, + overlay: &dyn fluree_db_core::OverlayProvider, + to_t: i64, + identity_iri: &str, + graphs: &[fluree_db_core::GraphId], +) -> Result> { + let identity_sid = match resolve_identity_iri_to_sid(snapshot, identity_iri) { + Ok(sid) => sid, + Err(_) => return Ok(None), + }; + + let range_opts = RangeOptions::default().with_flake_limit(1); + for &g_id in graphs { + let db = GraphDbRef::new(snapshot, g_id, overlay, to_t); + let exists = db + .range_with_opts( + IndexType::Spot, + RangeTest::Eq, + RangeMatch::subject(identity_sid.clone()), + range_opts.clone(), + ) + .await + .map_err(|e| ApiError::internal(format!("identity existence check failed: {e}")))?; + if !exists.is_empty() { + return Ok(Some(identity_sid)); + } + } + Ok(None) +} + /// Look up the policies for `identity_iri` via its `f:policyClass` property. /// /// Returns an [`IdentityLookupResult`] that distinguishes whether the identity diff --git a/fluree-db-api/src/policy_view.rs b/fluree-db-api/src/policy_view.rs index 04fdc86999..3379e5ef48 100644 --- a/fluree-db-api/src/policy_view.rs +++ b/fluree-db-api/src/policy_view.rs @@ -242,32 +242,60 @@ pub async fn build_policy_context( /// /// Shared between `wrap_policy` (read path) and /// [`build_transact_policy_context`] (write path) so both sides apply -/// identical semantics: identity-mode rejection, `ArtifactKind::PolicyRules` -/// dispatch, and the policy-class intersection filter. -/// -/// The filter contract: the data ledger's configured `policy_class` set is -/// applied as an exact-IRI intersection on the wire's restrictions, OR -/// `{f:AccessPolicy}` when no policy_class is set. `f:AccessPolicy` is the -/// canonical / baseline policy class — declaring `f:policySource` -/// cross-ledger pulls those rules in automatically; custom-typed rules -/// require an explicit `f:policyClass` in D's config to be enforced. This is -/// the safer default than "load every structurally-policy-looking subject -/// from M," which would silently include rules the operator never opted into. +/// identical semantics: the class-filter chain, the identity contract, and +/// the `ArtifactKind::PolicyRules` dispatch. +/// +/// The filter contract: rules materialized from M are intersected (exact +/// IRI) against the first non-empty entry in the chain +/// +/// `effective_opts.policy_class` → `config_policy_class` → +/// `{f:AccessPolicy}` (anonymous requests only). +/// +/// `config_policy_class` is passed separately because `merge_policy_opts` +/// returns the request opts unchanged when the request carries any policy +/// input and override is permitted — an identity-only request would +/// otherwise never see the config's `f:policyClass`. +/// +/// The identity contract: an identity on the request **binds `?$identity` +/// against D and never selects rules from M** — rule selection under +/// cross-ledger is exclusively the class filter (M contributes rules, D +/// contributes identity binding). Because the identity can't select rules, +/// an identity-carrying request with no policy class anywhere fails closed +/// rather than silently falling back to the `{f:AccessPolicy}` default: the +/// operator must name which classes govern. +/// +/// `f:AccessPolicy` is the canonical / baseline policy class — declaring +/// `f:policySource` cross-ledger pulls those rules in automatically for +/// anonymous requests; custom-typed rules require an explicit +/// `f:policyClass` in D's config to be enforced. This is the safer default +/// than "load every structurally-policy-looking subject from M," which +/// would silently include rules the operator never opted into. pub(crate) async fn resolve_cross_ledger_policy_restrictions( snapshot: &LedgerSnapshot, effective_opts: &GovernanceOptions, + config_policy_class: Option<&[String]>, source: &fluree_db_core::ledger_config::GraphSourceRef, ctx: &mut crate::cross_ledger::ResolveCtx<'_>, ) -> Result> { - // Phase 1a: cross-ledger + identity-mode is not supported. The model - // ledger contributes policy rules; the data ledger contributes identity - // binding. Mixing them ambiguously is a fail-closed config error. - if effective_opts.identity.is_some() { + const DEFAULT_POLICY_CLASS_IRI: &str = fluree_vocab::policy_iris::ACCESS_POLICY; + let filter: std::collections::HashSet = if let Some(classes) = effective_opts + .policy_class + .as_ref() + .filter(|v| !v.is_empty()) + { + classes.iter().cloned().collect() + } else if let Some(classes) = config_policy_class.filter(|v| !v.is_empty()) { + classes.iter().cloned().collect() + } else if effective_opts.identity.is_none() { + [DEFAULT_POLICY_CLASS_IRI.to_string()].into_iter().collect() + } else { return Err(crate::error::ApiError::config( - "cross-ledger f:policySource cannot be combined with opts.identity \ - in Phase 1a; use opts.policy_class with the cross-ledger config", + "cross-ledger f:policySource with an identity requires an explicit \ + f:policyClass (on the request or in the ledger config) to select \ + which of the model ledger's rules apply; the identity only binds \ + ?$identity and never selects rules", )); - } + }; let resolved = crate::cross_ledger::resolve_graph_ref( source, @@ -292,14 +320,6 @@ pub(crate) async fn resolve_cross_ledger_policy_restrictions( )); }; - const DEFAULT_POLICY_CLASS_IRI: &str = fluree_vocab::policy_iris::ACCESS_POLICY; - let filter: std::collections::HashSet = effective_opts - .policy_class - .as_ref() - .filter(|v| !v.is_empty()) - .map(|v| v.iter().cloned().collect()) - .unwrap_or_else(|| [DEFAULT_POLICY_CLASS_IRI.to_string()].into_iter().collect()); - fluree_db_policy::wire_to_restrictions(wire, |iri| snapshot.encode_iri(iri), Some(&filter)) .map_err(crate::error::ApiError::from) } @@ -357,9 +377,18 @@ pub async fn build_transact_policy_context( if let Some(source) = source.filter(|s| s.ledger.is_some()) { let ledger_id: String = snapshot.ledger_id.to_string(); let mut ctx = crate::cross_ledger::ResolveCtx::new(&ledger_id, fluree); - let restrictions = - resolve_cross_ledger_policy_restrictions(snapshot, &effective_opts, source, &mut ctx) - .await?; + let config_policy_class = resolved + .as_ref() + .and_then(|r| r.policy.as_ref()) + .and_then(|p| p.policy_class.as_deref()); + let restrictions = resolve_cross_ledger_policy_restrictions( + snapshot, + &effective_opts, + config_policy_class, + source, + &mut ctx, + ) + .await?; let policy_ctx = policy_builder::build_policy_context_from_opts_with_cross_ledger( snapshot, overlay, diff --git a/fluree-db-api/src/tx.rs b/fluree-db-api/src/tx.rs index 3040150431..14afb6f1e7 100644 --- a/fluree-db-api/src/tx.rs +++ b/fluree-db-api/src/tx.rs @@ -2623,9 +2623,10 @@ impl crate::Fluree { // Build policy context with verified identity. Config-aware: a // configured `f:policySource` redirects the policy-rule lookup to // the declared graph, and config policy defaults merge in — same - // semantics as the consensus transact path. (A cross-ledger - // f:policySource fails closed here: identity-mode + cross-ledger is - // rejected in Phase 1a.) + // semantics as the consensus transact path. Under a cross-ledger + // f:policySource the verified identity is bind-only (?$identity); + // rule selection needs a f:policyClass in the ledger config, else + // this fails closed. let opts = crate::GovernanceOptions { identity: Some(verified.did.clone()), ..Default::default() diff --git a/fluree-db-api/src/view/fluree_ext.rs b/fluree-db-api/src/view/fluree_ext.rs index 027f661584..ec49999a9e 100644 --- a/fluree-db-api/src/view/fluree_ext.rs +++ b/fluree-db-api/src/view/fluree_ext.rs @@ -696,12 +696,22 @@ impl Fluree { self, (**view.cross_ledger_resolved_ts()).clone(), ); - // Identity-mode rejection (Phase 1a), PolicyRules dispatch, and - // the policy-class intersection filter all live in the shared - // helper so read and write paths can't drift. + // The class-filter chain, identity contract (bind-only, never a + // rule selector), and PolicyRules dispatch all live in the shared + // helper so read and write paths can't drift. The config's + // policy_class is passed separately: merge_policy_opts returns + // the request opts unchanged when the request carries any policy + // input and override is permitted, so an identity-only request + // would otherwise never see the config's f:policyClass. + let config_policy_class = view + .resolved_config + .as_ref() + .and_then(|c| c.policy.as_ref()) + .and_then(|p| p.policy_class.as_deref()); let restrictions = crate::policy_view::resolve_cross_ledger_policy_restrictions( &view.snapshot, &effective_opts, + config_policy_class, source, &mut ctx, ) diff --git a/fluree-db-api/tests/it_policy_cross_ledger.rs b/fluree-db-api/tests/it_policy_cross_ledger.rs index 4bc828a14f..167002f9ab 100644 --- a/fluree-db-api/tests/it_policy_cross_ledger.rs +++ b/fluree-db-api/tests/it_policy_cross_ledger.rs @@ -616,11 +616,126 @@ async fn omitted_policy_class_defaults_to_access_policy_only() { ); } +/// Identity + cross-ledger with a policy class available (from D's +/// config): rules load from M via the class filter and the identity +/// binds `?$identity` against D, driving M's f:query rules. The +/// owner-only view rule hides bob's email from aliceIdentity while +/// alice's own email stays visible — proving both that the request +/// no longer fails closed and that the binding is live. +#[tokio::test] +async fn identity_with_policy_class_engages_cross_ledger_rules() { + let fluree = FlureeBuilder::memory().build_memory(); + + let model_id = "test/cross-ledger-e2e/id-bind-model:main"; + let model = genesis_ledger(&fluree, model_id); + let policy_graph_iri = "http://example.org/id-bind-policies"; + // Full IRIs inside f:query — it executes against D. + let owner_query = + r#"{"where": {"@id": "?$identity", "http://example.org/ns/user": {"@id": "?$this"}}}"#; + fluree + .stage_owned(model) + .upsert_turtle(&format!( + r#" + @prefix f: . + @prefix rdf: . + @prefix ex: . + + GRAPH <{policy_graph_iri}> {{ + ex:ownerEmailOnly + rdf:type f:AccessPolicy ; + f:required true ; + f:onProperty ex:email ; + f:action f:view ; + f:query """{owner_query}""" . + }} + "# + )) + .execute() + .await + .expect("seed M owner-only view rule"); + + let data_id = "test/cross-ledger-e2e/id-bind-data:main"; + let data = genesis_ledger(&fluree, data_id); + let r1 = fluree + .insert( + data, + &json!({ + "@context": {"ex": "http://example.org/ns/"}, + "@graph": [ + {"@id": "ex:alice", "@type": "ex:User", "ex:name": "Alice", "ex:email": "alice@flur.ee"}, + {"@id": "ex:bob", "@type": "ex:User", "ex:name": "Bob", "ex:email": "bob@flur.ee"}, + {"@id": "ex:aliceIdentity", "ex:user": {"@id": "ex:alice"}} + ] + }), + ) + .await + .expect("seed D users + identity"); + let data = r1.ledger; + + let config_iri = config_graph_iri(data_id); + fluree + .stage_owned(data) + .upsert_turtle(&format!( + r" + @prefix f: . + @prefix rdf: . + + GRAPH <{config_iri}> {{ + rdf:type f:LedgerConfig . + f:policyDefaults . + f:defaultAllow true . + f:policyClass f:AccessPolicy . + f:policySource . + rdf:type f:GraphRef ; + f:graphSource . + f:ledger <{model_id}> ; + f:graphSelector <{policy_graph_iri}> . + }} + " + )) + .execute() + .await + .expect("seed D cross-ledger config with policyClass"); + + // Identity-carrying request — previously a hard config error. + let opts = GovernanceOptions { + identity: Some("http://example.org/ns/aliceIdentity".into()), + ..Default::default() + }; + let wrapped = fluree + .db_with_policy(data_id, &opts) + .await + .expect("identity + config policyClass must not fail closed"); + + let emails = fluree + .query( + &wrapped, + &json!({ + "@context": {"ex": "http://example.org/ns/"}, + "select": ["?who", "?email"], + "where": {"@id": "?who", "ex:email": "?email"} + }), + ) + .await + .expect("query emails under cross-ledger identity binding"); + let rendered = emails + .to_jsonld(&wrapped.snapshot) + .expect("jsonld") + .to_string(); + assert!( + rendered.contains("alice@flur.ee"), + "aliceIdentity must see its own user's email via ?$identity binding, got {rendered}" + ); + assert!( + !rendered.contains("bob@flur.ee"), + "aliceIdentity must NOT see bob's email — M's owner-only rule must filter it, got {rendered}" + ); +} + /// Combining `opts.identity` with cross-ledger `f:policySource` is -/// a fail-closed config error in Phase 1a: the model ledger -/// contributes policy rules, the data ledger contributes identity -/// binding, and mixing them via identity-mode would attribute -/// policies ambiguously across ledger boundaries. +/// a fail-closed config error when no policy class is available +/// anywhere (request or config): the identity is bind-only and can't +/// select rules, so the operator must name which classes govern. #[tokio::test] async fn cross_ledger_plus_identity_mode_fails_closed() { let fluree = FlureeBuilder::memory().build_memory(); diff --git a/fluree-db-api/tests/it_policy_write_path.rs b/fluree-db-api/tests/it_policy_write_path.rs index 4e51970816..6679efd68f 100644 --- a/fluree-db-api/tests/it_policy_write_path.rs +++ b/fluree-db-api/tests/it_policy_write_path.rs @@ -472,3 +472,280 @@ async fn cross_ledger_plus_identity_fails_closed_on_writes() { "expected fail-closed diagnostic mentioning both, got: {msg}" ); } + +/// Identity + cross-ledger `f:policySource` works when a policy class is +/// available (here from D's config): M's rules load via the class filter, +/// the identity is bind-only, and enforcement applies. This is the common +/// authenticated-deployment case — previously it failed closed even though +/// the config named the governing class. +#[tokio::test] +async fn cross_ledger_identity_with_config_policy_class_enforced_on_writes() { + assert_index_defaults(); + let fluree = FlureeBuilder::memory().build_memory(); + + let model_id = "policy/write-xledger-idclass/model:main"; + let model = genesis_ledger(&fluree, model_id); + let policy_graph_iri = "http://example.org/m-policies"; + fluree + .stage_owned(model) + .upsert_turtle(&format!( + r" + @prefix f: . + @prefix rdf: . + @prefix ex: . + + GRAPH <{policy_graph_iri}> {{ + ex:noSsnWrite + rdf:type f:AccessPolicy ; + f:required true ; + f:onProperty ex:ssn ; + f:action f:modify ; + f:allow false . + }} + " + )) + .execute() + .await + .expect("seed M policy graph"); + + let data_id = "policy/write-xledger-idclass/data:main"; + let data = genesis_ledger(&fluree, data_id); + // The identity must exist as a subject in D for ?$identity binding. + let r1 = fluree + .insert( + data, + &json!({ + "@context": {"ex": "http://example.org/ns/"}, + "@graph": [ + {"@id": "ex:alice", "@type": "ex:User", "ex:ssn": "111-11-1111"}, + {"@id": "ex:aliceIdentity", "ex:user": {"@id": "ex:alice"}} + ] + }), + ) + .await + .expect("seed D data + identity"); + + let config_iri = config_graph_iri(data_id); + let r2 = fluree + .stage_owned(r1.ledger) + .upsert_turtle(&format!( + r" + @prefix f: . + @prefix rdf: . + + GRAPH <{config_iri}> {{ + rdf:type f:LedgerConfig . + f:policyDefaults . + f:defaultAllow true . + f:policyClass f:AccessPolicy . + f:policySource . + rdf:type f:GraphRef ; + f:graphSource . + f:ledger <{model_id}> ; + f:graphSelector <{policy_graph_iri}> . + }} + " + )) + .execute() + .await + .expect("seed D cross-ledger config with policyClass"); + let ledger = r2.ledger; + + // Identity-carrying request: must build (not fail closed) because the + // config's f:policyClass selects M's rules; the identity binds only. + // + // default_allow is set on the request: identity counts as a policy + // input, so under the default f:OverrideAll the request's options take + // precedence and the config's f:defaultAllow is NOT merged (same + // long-standing contract as same-ledger reads). Operators who want the + // config to always win set f:overrideControl accordingly. + let opts = GovernanceOptions { + identity: Some("http://example.org/ns/aliceIdentity".into()), + default_allow: true, + ..Default::default() + }; + let ctx = build_transact_policy_context( + &fluree, + &ledger.snapshot, + ledger.novelty.as_ref(), + Some(ledger.novelty.as_ref()), + ledger.t(), + &opts, + ) + .await + .expect("identity + config policyClass must not fail closed") + .expect("cross-ledger source must produce a policy context"); + + let cfg = test_index_config(); + let denied_turtle = "@prefix ex: .\nex:bob ex:ssn \"999-99-9999\" .\n"; + let denied = fluree + .insert_turtle_with_opts( + ledger.clone(), + denied_turtle, + TxnOpts::default(), + CommitOpts::default(), + &cfg, + Some(&ctx), + ) + .await; + assert!( + denied.is_err(), + "M's modify-deny on ex:ssn must reject the identity-carrying write, got: {denied:?}" + ); + + let allowed_turtle = "@prefix ex: .\nex:bob ex:name \"Bob\" .\n"; + let allowed = fluree + .insert_turtle_with_opts( + ledger, + allowed_turtle, + TxnOpts::default(), + CommitOpts::default(), + &cfg, + Some(&ctx), + ) + .await; + assert!( + allowed.is_ok(), + "defaultAllow=true must let untargeted writes through, got: {:?}", + allowed.err() + ); +} + +/// The `?$identity` binding actually drives f:query rules from M: an +/// owner-only modify rule in the model ledger allows the identity to write +/// its own user's email and rejects writes to anyone else's. This pins the +/// bind-only contract — the identity resolves in D and feeds `?$identity`, +/// while the rule itself lives exclusively in M. +#[tokio::test] +async fn cross_ledger_identity_binding_drives_fquery_modify_rule() { + assert_index_defaults(); + let fluree = FlureeBuilder::memory().build_memory(); + + let model_id = "policy/write-xledger-fquery/model:main"; + let model = genesis_ledger(&fluree, model_id); + let policy_graph_iri = "http://example.org/m-policies"; + // Full IRIs inside f:query — it executes against D, where prefixed + // names from M's turtle context wouldn't expand. + let owner_query = + r#"{"where": {"@id": "?$identity", "http://example.org/ns/user": {"@id": "?$this"}}}"#; + fluree + .stage_owned(model) + .upsert_turtle(&format!( + r#" + @prefix f: . + @prefix rdf: . + @prefix ex: . + + GRAPH <{policy_graph_iri}> {{ + ex:ownerEmailOnly + rdf:type f:AccessPolicy ; + f:required true ; + f:onProperty ex:email ; + f:action f:modify ; + f:query """{owner_query}""" . + }} + "# + )) + .execute() + .await + .expect("seed M owner-only f:query rule"); + + let data_id = "policy/write-xledger-fquery/data:main"; + let data = genesis_ledger(&fluree, data_id); + let r1 = fluree + .insert( + data, + &json!({ + "@context": {"ex": "http://example.org/ns/"}, + "@graph": [ + {"@id": "ex:alice", "ex:email": "alice@flur.ee"}, + {"@id": "ex:bob", "ex:email": "bob@flur.ee"}, + {"@id": "ex:aliceIdentity", "ex:user": {"@id": "ex:alice"}} + ] + }), + ) + .await + .expect("seed D users + identity"); + + let config_iri = config_graph_iri(data_id); + let r2 = fluree + .stage_owned(r1.ledger) + .upsert_turtle(&format!( + r" + @prefix f: . + @prefix rdf: . + + GRAPH <{config_iri}> {{ + rdf:type f:LedgerConfig . + f:policyDefaults . + f:defaultAllow true . + f:policyClass f:AccessPolicy . + f:policySource . + rdf:type f:GraphRef ; + f:graphSource . + f:ledger <{model_id}> ; + f:graphSelector <{policy_graph_iri}> . + }} + " + )) + .execute() + .await + .expect("seed D cross-ledger config"); + let ledger = r2.ledger; + + let opts = GovernanceOptions { + identity: Some("http://example.org/ns/aliceIdentity".into()), + ..Default::default() + }; + let ctx = build_transact_policy_context( + &fluree, + &ledger.snapshot, + ledger.novelty.as_ref(), + Some(ledger.novelty.as_ref()), + ledger.t(), + &opts, + ) + .await + .expect("build") + .expect("cross-ledger source must produce a policy context"); + + let cfg = test_index_config(); + // aliceIdentity owns ex:alice → writing alice's email matches the + // ?$identity → ex:user → ?$this chain and is allowed. + let own_turtle = + "@prefix ex: .\nex:alice ex:email \"new-alice@flur.ee\" .\n"; + let own = fluree + .insert_turtle_with_opts( + ledger.clone(), + own_turtle, + TxnOpts::default(), + CommitOpts::default(), + &cfg, + Some(&ctx), + ) + .await; + assert!( + own.is_ok(), + "identity must be able to write its own user's email via M's f:query rule, got: {:?}", + own.err() + ); + + // bob is not aliceIdentity's user → the required rule's f:query binds + // nothing → rejected. + let other_turtle = + "@prefix ex: .\nex:bob ex:email \"hacked@flur.ee\" .\n"; + let other = fluree + .insert_turtle_with_opts( + ledger, + other_turtle, + TxnOpts::default(), + CommitOpts::default(), + &cfg, + Some(&ctx), + ) + .await; + assert!( + other.is_err(), + "identity must NOT be able to write another user's email, got: {other:?}" + ); +} From 4c3fde2aa7dc3aa8668e49c2678c5a0a04b0942c Mon Sep 17 00:00:00 2001 From: bplatz Date: Thu, 2 Jul 2026 18:14:23 -0400 Subject: [PATCH 03/23] fix(reasoning): fail closed on followOwlImports with cross-ledger schemaSource MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The cross-ledger schema materializer resolves a single graph and does not walk owl:imports, but combining f:followOwlImports true with a cross-ledger f:schemaSource was silently ignored — reasoning ran over the starting graph alone while the config declared a full import closure. Reject the combination with ApiError::OntologyImport, matching the fail-closed treatment of f:atT / f:trustPolicy / f:rollbackGuard. Add the missing end-to-end coverage for config-driven cross-ledger f:schemaSource (named-graph and f:defaultGraph selectors): D's config points reasoning at M's ontology, and an rdfs query on D entails instances via M's subclass axiom without copying anything into D. Only wire-level materializer tests existed before. Also clarify the local resolver's rejection message (cross-ledger refs are supported for the top-level f:schemaSource on single-ledger queries; f:ontologyImportMap entries and dataset queries remain same-ledger) and update the design docs to match. --- docs/design/cross-ledger-model-enforcement.md | 5 +- docs/design/ontology-imports.md | 40 ++-- fluree-db-api/src/ontology_imports.rs | 7 +- fluree-db-api/src/view/query.rs | 19 +- fluree-db-api/tests/grp_reasoning.rs | 2 + fluree-db-api/tests/it_schema_cross_ledger.rs | 202 ++++++++++++++++++ 6 files changed, 259 insertions(+), 16 deletions(-) create mode 100644 fluree-db-api/tests/it_schema_cross_ledger.rs diff --git a/docs/design/cross-ledger-model-enforcement.md b/docs/design/cross-ledger-model-enforcement.md index f58d61bf1b..3cc5d7fcba 100644 --- a/docs/design/cross-ledger-model-enforcement.md +++ b/docs/design/cross-ledger-model-enforcement.md @@ -475,7 +475,10 @@ mode can be added without rewriting the failure taxonomy. imports, and rdf:type for the schema-class set) are projected into a `SchemaBundleFlakes` against D's snapshot and feed D's reasoner. Single-graph only today; transitive `owl:imports` - recursion across multiple model ledgers is reserved. + recursion across multiple model ledgers is reserved, and + `f:followOwlImports true` combined with a cross-ledger + `f:schemaSource` fails closed (`ApiError::OntologyImport`) + rather than silently reasoning over the starting graph alone. - Per-request memo + per-instance governance cache, both keyed on `(ArtifactKind, canonical_model_ledger_id, graph_iri, resolved_t)`. diff --git a/docs/design/ontology-imports.md b/docs/design/ontology-imports.md index 9519099f28..7fa68d31a1 100644 --- a/docs/design/ontology-imports.md +++ b/docs/design/ontology-imports.md @@ -69,13 +69,25 @@ An `OntologyImportBinding` has two fields: - `f:ontologyIri` — the IRI that appears in `owl:imports` statements. - `f:graphRef` — a nested `f:GraphRef` identifying the local graph. -The `GraphRef` shape supported for `f:schemaSource` and -`f:ontologyImportMap.graphRef` is the same-ledger shape: -`f:graphSelector` naming a local named graph, `f:defaultGraph`, or a -registered graph IRI. References are resolved at the query's effective -`to_t` — every named graph in a Fluree ledger shares the ledger's -monotonic `t`, so the entire closure is consistent at a single point in -time without per-import bookkeeping. +The `GraphRef` shape supported for `f:ontologyImportMap.graphRef` is the +same-ledger shape: `f:graphSelector` naming a local named graph, +`f:defaultGraph`, or a registered graph IRI. References are resolved at +the query's effective `to_t` — every named graph in a Fluree ledger +shares the ledger's monotonic `t`, so the entire closure is consistent +at a single point in time without per-import bookkeeping. + +`f:schemaSource` additionally accepts the cross-ledger shape (`f:ledger` +naming a model ledger plus an explicit `f:graphSelector`). Cross-ledger +refs bypass this module entirely: `view/query.rs:: +resolve_configured_schema_bundle` dispatches them through the shared +cross-ledger resolver (`ArtifactKind::SchemaClosure`), which projects +the model ledger's whitelisted axioms onto the data ledger's snapshot +at the model ledger's **current head** (as-of-now, matching +`f:policySource` / `f:shapesSource` semantics — not the query's `to_t`). +The cross-ledger materializer is single-graph: combining it with +`f:followOwlImports true` fails closed with `ApiError::OntologyImport` +rather than silently dropping the import closure. See +`cross-ledger-model-enforcement.md`. ## Resolution algorithm @@ -197,11 +209,15 @@ so broken ontology references surface early. Sources of this error: - A resolution that would land on a reserved system graph (config or txn-meta), whether via direct graph-IRI match, mapping table, or `f:schemaSource` selector. -- A `GraphRef` that targets a different ledger, uses `f:atT`, or carries a - `f:trustPolicy` / `f:rollbackGuard`. The bundle is resolved at the - query's single `to_t`, same-ledger scope only, and accepting these - fields silently would create a gap between declared intent and actual - behavior. +- An `f:ontologyImportMap.graphRef` that targets a different ledger, or + any `GraphRef` that uses `f:atT` or carries a `f:trustPolicy` / + `f:rollbackGuard`. The local bundle is resolved at the query's single + `to_t`, and accepting these fields silently would create a gap between + declared intent and actual behavior. (A cross-ledger `f:schemaSource` + is legal but never reaches this module — see above.) +- `f:followOwlImports true` combined with a cross-ledger + `f:schemaSource` (raised by `view/query.rs` before dispatch — the + cross-ledger materializer does not walk `owl:imports`). ## Wiring at query time diff --git a/fluree-db-api/src/ontology_imports.rs b/fluree-db-api/src/ontology_imports.rs index e70ea0ff7b..efc6524acc 100644 --- a/fluree-db-api/src/ontology_imports.rs +++ b/fluree-db-api/src/ontology_imports.rs @@ -161,9 +161,12 @@ fn resolve_local_graph_source( if let Some(ledger) = source.ledger.as_deref() { if ledger != snapshot.ledger_id { return Err(ApiError::OntologyImport(format!( - "schema/import sources must resolve within the current \ + "this schema/import source must resolve within the current \ ledger (ref targets ledger '{ledger}', current ledger is \ - '{}'). Move the schema into the current ledger.", + '{}'). Cross-ledger refs are supported only for the \ + top-level `f:schemaSource` of a single-ledger query — not \ + for `f:ontologyImportMap` entries or multi-ledger dataset \ + queries.", snapshot.ledger_id ))); } diff --git a/fluree-db-api/src/view/query.rs b/fluree-db-api/src/view/query.rs index 82a63b7ff2..b75574bc6f 100644 --- a/fluree-db-api/src/view/query.rs +++ b/fluree-db-api/src/view/query.rs @@ -757,7 +757,9 @@ impl Fluree { /// import; the bundle is a reasoning-only concern. /// /// Errors with [`ApiError::OntologyImport`] only when reasoning is - /// actually engaged and an import can't be resolved locally. + /// actually engaged and an import can't be resolved locally, or when + /// `f:followOwlImports` is combined with a cross-ledger + /// `f:schemaSource` (the cross-ledger materializer is single-graph). async fn attach_schema_bundle( &self, db: &GraphDb, @@ -830,6 +832,21 @@ impl Fluree { // the resulting `SchemaArtifactWire` into a SchemaBundleFlakes // against D's snapshot. if schema_source.ledger.is_some() { + // The cross-ledger schema materializer resolves a single + // graph and does not walk `owl:imports`. Fail closed — + // silently ignoring `f:followOwlImports` would let the user + // believe the import closure is part of the reasoning view + // when only the starting graph is. + if reasoning.follow_owl_imports.unwrap_or(false) { + return Err(crate::error::ApiError::OntologyImport( + "`f:followOwlImports` is not supported with a cross-ledger \ + `f:schemaSource` — the cross-ledger resolver materializes \ + the referenced graph only and does not walk `owl:imports`. \ + Consolidate the schema closure into the referenced graph, \ + or remove `f:followOwlImports`." + .to_string(), + )); + } let resolved = crate::cross_ledger::resolve_graph_ref( schema_source, crate::cross_ledger::ArtifactKind::SchemaClosure, diff --git a/fluree-db-api/tests/grp_reasoning.rs b/fluree-db-api/tests/grp_reasoning.rs index e4d456e9c0..5b1c0e5bcd 100644 --- a/fluree-db-api/tests/grp_reasoning.rs +++ b/fluree-db-api/tests/grp_reasoning.rs @@ -13,3 +13,5 @@ mod it_reasoning_join_repro; mod it_rules_cross_ledger; #[path = "it_rules_source.rs"] mod it_rules_source; +#[path = "it_schema_cross_ledger.rs"] +mod it_schema_cross_ledger; diff --git a/fluree-db-api/tests/it_schema_cross_ledger.rs b/fluree-db-api/tests/it_schema_cross_ledger.rs new file mode 100644 index 0000000000..cd122618b8 --- /dev/null +++ b/fluree-db-api/tests/it_schema_cross_ledger.rs @@ -0,0 +1,202 @@ +//! End-to-end cross-ledger `f:schemaSource` reasoning. +//! +//! Data ledger D's `#config` declares `f:reasoningDefaults` → +//! `f:schemaSource` with `f:ledger` pointing at model ledger M's +//! ontology graph. At query time, +//! `view/query.rs::resolve_configured_schema_bundle` dispatches the +//! ref through the cross-ledger resolver (`ArtifactKind::SchemaClosure`), +//! M's whitelisted ontology axioms are projected onto D's snapshot as +//! `SchemaBundleFlakes`, and D's reasoner entails over them — nothing +//! is copied into D. +//! +//! The wire-artifact contract is pinned separately in +//! `it_cross_ledger_resolver.rs`; these tests prove the config-driven +//! path end to end, including the fail-closed rejection of +//! `f:followOwlImports` (the cross-ledger materializer is single-graph +//! and does not walk `owl:imports`). + +#![cfg(feature = "native")] + +use crate::support::{genesis_ledger, normalize_rows}; +use fluree_db_api::{ApiError, FlureeBuilder}; +use serde_json::json; + +fn config_iri(ledger_id: &str) -> String { + format!("urn:fluree:{ledger_id}#config") +} + +/// Seed model ledger M with a subclass axiom, optionally inside a +/// named graph. +async fn seed_model(fluree: &fluree_db_api::Fluree, model_id: &str, graph_iri: Option<&str>) { + let model = genesis_ledger(fluree, model_id); + let axiom = "ex:Manager rdfs:subClassOf ex:Employee ."; + let body = match graph_iri { + Some(iri) => format!("GRAPH <{iri}> {{ {axiom} }}"), + None => axiom.to_string(), + }; + let trig = format!( + r" + @prefix rdfs: . + @prefix ex: . + + {body} + " + ); + fluree + .stage_owned(model) + .upsert_turtle(&trig) + .execute() + .await + .expect("seed M ontology"); +} + +/// Write D's `#config` wiring `f:schemaSource` at M's graph, then +/// insert `ex:anita a ex:Manager` into D's default graph. +async fn seed_data( + fluree: &fluree_db_api::Fluree, + data_id: &str, + model_id: &str, + graph_selector: &str, + extra_reasoning_config: &str, +) { + let data = genesis_ledger(fluree, data_id); + let cfg = config_iri(data_id); + let cfg_trig = format!( + r" + @prefix f: . + @prefix rdf: . + + GRAPH <{cfg}> {{ + rdf:type f:LedgerConfig ; + f:reasoningDefaults . + f:schemaSource {extra_reasoning_config}. + rdf:type f:GraphRef ; + f:graphSource . + f:ledger <{model_id}> ; + f:graphSelector {graph_selector} . + }} + " + ); + let r = fluree + .stage_owned(data) + .upsert_turtle(&cfg_trig) + .execute() + .await + .expect("seed D config with cross-ledger f:schemaSource"); + + let instances = json!({ + "@context": {"ex": "http://example.org/"}, + "@id": "ex:anita", + "@type": "ex:Manager" + }); + fluree + .insert(r.ledger, &instances) + .await + .expect("insert instance data into D"); +} + +fn employee_query() -> serde_json::Value { + json!({ + "@context": {"ex": "http://example.org/"}, + "select": "?x", + "where": {"@id": "?x", "@type": "ex:Employee"}, + "reasoning": "rdfs" + }) +} + +/// The core scenario: M owns `ex:Manager rdfs:subClassOf ex:Employee` +/// in a named ontology graph; D holds `ex:anita a ex:Manager`. A +/// reasoning query on D for `?x a ex:Employee` must entail anita via +/// M's hierarchy — resolved cross-ledger, nothing copied into D. +#[tokio::test] +async fn data_ledger_reasoning_pulls_schema_from_model_ledger() { + let fluree = FlureeBuilder::memory().build_memory(); + let model_id = "test/cross-ledger-schema/model:main"; + let data_id = "test/cross-ledger-schema/data:main"; + let ontology_iri = "http://example.org/ontology/core"; + + seed_model(&fluree, model_id, Some(ontology_iri)).await; + seed_data(&fluree, data_id, model_id, &format!("<{ontology_iri}>"), "").await; + + let view = fluree.db(data_id).await.expect("load D with config"); + let data = fluree.ledger(data_id).await.expect("reload D ledger"); + let rows = fluree + .query(&view, &employee_query()) + .await + .expect("query D with cross-ledger schema") + .to_jsonld(&data.snapshot) + .expect("to_jsonld"); + let results = normalize_rows(&rows); + + assert!( + results.contains(&json!("ex:anita")), + "M's subclass axiom (resolved cross-ledger) must entail anita \ + as an Employee on D; got: {results:?}" + ); +} + +/// Same scenario with `f:graphSelector f:defaultGraph` — the axiom +/// lives in M's default graph. +#[tokio::test] +async fn cross_ledger_schema_with_default_graph_selector() { + let fluree = FlureeBuilder::memory().build_memory(); + let model_id = "test/cross-ledger-schema/model-default:main"; + let data_id = "test/cross-ledger-schema/data-default:main"; + + seed_model(&fluree, model_id, None).await; + seed_data(&fluree, data_id, model_id, "f:defaultGraph", "").await; + + let view = fluree.db(data_id).await.expect("load D with config"); + let data = fluree.ledger(data_id).await.expect("reload D ledger"); + let rows = fluree + .query(&view, &employee_query()) + .await + .expect("query D with cross-ledger schema (default graph)") + .to_jsonld(&data.snapshot) + .expect("to_jsonld"); + let results = normalize_rows(&rows); + + assert!( + results.contains(&json!("ex:anita")), + "M's default-graph subclass axiom must entail anita as an \ + Employee on D; got: {results:?}" + ); +} + +/// `f:followOwlImports true` combined with a cross-ledger +/// `f:schemaSource` must fail closed: the cross-ledger materializer +/// resolves a single graph and does not walk `owl:imports`, so +/// accepting the flag would silently drop the import closure from the +/// reasoning view. +#[tokio::test] +async fn cross_ledger_schema_with_follow_owl_imports_fails_closed() { + let fluree = FlureeBuilder::memory().build_memory(); + let model_id = "test/cross-ledger-schema/model-follow:main"; + let data_id = "test/cross-ledger-schema/data-follow:main"; + let ontology_iri = "http://example.org/ontology/core"; + + seed_model(&fluree, model_id, Some(ontology_iri)).await; + seed_data( + &fluree, + data_id, + model_id, + &format!("<{ontology_iri}>"), + ";\n f:followOwlImports true ", + ) + .await; + + let view = fluree.db(data_id).await.expect("load D with config"); + let err = fluree + .query(&view, &employee_query()) + .await + .expect_err("followOwlImports + cross-ledger schemaSource must be rejected"); + match err { + ApiError::OntologyImport(msg) => { + assert!( + msg.contains("followOwlImports"), + "error should name the unsupported flag: {msg}" + ); + } + other => panic!("expected OntologyImport, got {other:?}"), + } +} From a5c29743fd4e9d99ac522dad16566b68f20a3fc3 Mon Sep 17 00:00:00 2001 From: bplatz Date: Thu, 2 Jul 2026 19:31:38 -0400 Subject: [PATCH 04/23] feat(reasoning): dataset-path reasoning parity + permissive reasoningModes parsing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Dataset (multi-ledger) queries now engage the full reasoning surface the single-ledger path has. build_executable_for_view's attach block is extracted into a shared apply_reasoning_to_executable choke point, and build_executable_for_dataset routes through it with the dataset's primary view — picking up cross-ledger f:schemaSource, local and cross-ledger f:rulesSource, config-graph datalog restrictions, and inline opts.ontology layering, all of which the dataset path previously lacked. Its same-ledger-only schema resolver is deleted. Dataset execution scans through per-graph GraphRefs rather than the top-level context overlay, so derived facts (datalog / OWL2-RL) were invisible even once rules attached. The executor now splices the reasoning overlay into every ref matching the primary execution view (same ledger, graph id, and t) via DataSet::with_overlay_for_graph. f:reasoningModes previously parsed only direct IRI objects (f:reasoningModes f:rdfs); string literals and RDF collections — both shapes the docs themselves show — silently produced no modes, so config-declared reasoning never engaged and queries returned non-entailed results with no error. A dedicated reader now accepts all three shapes (IRI-only parsing is kept for f:policyClass and f:allowedIdentities, where a stray string must not widen policy), and parse_single gains the hyphenated owl2-rl / owl2-ql aliases the docs use. The ontology_imports rejection message no longer names dataset queries as an unsupported context; f:ontologyImportMap entries remain the only same-ledger-only refs. --- docs/design/ontology-imports.md | 21 +-- docs/ledger-config/setting-groups.md | 2 +- fluree-db-api/src/config_resolver.rs | 124 ++++++++++++++++-- fluree-db-api/src/ontology_imports.rs | 5 +- fluree-db-api/src/view/dataset_query.rs | 91 ++----------- fluree-db-api/src/view/query.rs | 36 ++++- fluree-db-api/tests/it_config_graph.rs | 111 ++++++++++++++++ fluree-db-api/tests/it_rules_cross_ledger.rs | 114 ++++++++++++++++ fluree-db-api/tests/it_schema_cross_ledger.rs | 104 +++++++++++++++ fluree-db-query/src/dataset.rs | 41 ++++++ fluree-db-query/src/execute/runner.rs | 16 ++- fluree-db-query/src/ir/reasoning.rs | 4 +- 12 files changed, 564 insertions(+), 105 deletions(-) diff --git a/docs/design/ontology-imports.md b/docs/design/ontology-imports.md index 7fa68d31a1..77acf5bce7 100644 --- a/docs/design/ontology-imports.md +++ b/docs/design/ontology-imports.md @@ -84,10 +84,12 @@ cross-ledger resolver (`ArtifactKind::SchemaClosure`), which projects the model ledger's whitelisted axioms onto the data ledger's snapshot at the model ledger's **current head** (as-of-now, matching `f:policySource` / `f:shapesSource` semantics — not the query's `to_t`). -The cross-ledger materializer is single-graph: combining it with -`f:followOwlImports true` fails closed with `ApiError::OntologyImport` -rather than silently dropping the import closure. See -`cross-ledger-model-enforcement.md`. +Both the single-ledger view path and the multi-ledger dataset path +(where reasoning is governed by the dataset's primary view) share this +dispatch via `apply_reasoning_to_executable`. The cross-ledger +materializer is single-graph: combining it with `f:followOwlImports +true` fails closed with `ApiError::OntologyImport` rather than silently +dropping the import closure. See `cross-ledger-model-enforcement.md`. ## Resolution algorithm @@ -214,16 +216,19 @@ so broken ontology references surface early. Sources of this error: `f:rollbackGuard`. The local bundle is resolved at the query's single `to_t`, and accepting these fields silently would create a gap between declared intent and actual behavior. (A cross-ledger `f:schemaSource` - is legal but never reaches this module — see above.) + is legal on both the single-ledger and dataset query paths but never + reaches this module — see above.) - `f:followOwlImports true` combined with a cross-ledger `f:schemaSource` (raised by `view/query.rs` before dispatch — the cross-ledger materializer does not walk `owl:imports`). ## Wiring at query time -`Fluree::query(&db, ...)` (and the dataset-query counterpart) call -`build_executable_for_view` → `attach_schema_bundle` on every query. The -attach step: +`Fluree::query(&db, ...)` calls `build_executable_for_view`, and the +dataset counterpart calls `build_executable_for_dataset` with the +dataset's primary view; both route through the shared +`apply_reasoning_to_executable` → `attach_schema_bundle` on every query. +The attach step: 1. Reads `db.resolved_config().reasoning`. If there is no `f:schemaSource`, returns immediately — the legacy default-graph path applies unchanged. diff --git a/docs/ledger-config/setting-groups.md b/docs/ledger-config/setting-groups.md index 6e6e6207c7..bb51ca6cd8 100644 --- a/docs/ledger-config/setting-groups.md +++ b/docs/ledger-config/setting-groups.md @@ -127,7 +127,7 @@ Controls OWL/RDFS reasoning applied at query time. | Field | Type | Default | Description | |-------|------|---------|-------------| -| `f:reasoningModes` | IRI or list | (none) | Reasoning modes: `f:RDFS`, `f:OWL2QL`, `f:OWL2RL`, `f:Datalog` | +| `f:reasoningModes` | IRI, string, or list | (none) | Reasoning modes: `f:RDFS`, `f:OWL2QL`, `f:OWL2RL`, `f:Datalog`. Accepts repeated IRI objects (`f:reasoningModes f:rdfs, f:datalog`), string literals (`"rdfs"`), or an RDF collection of either (`( "rdfs" "datalog" )`); mode names are case-insensitive | | `f:schemaSource` | `f:GraphRef` | (none) | Graph containing schema triples (`rdfs:subClassOf`, etc.) | | `f:reasoningMaxFacts` | integer | 1,000,000 | OWL2-RL materialization budget: max derived facts before the closure is capped | | `f:reasoningMaxSeconds` | integer | 30 | OWL2-RL materialization budget: max wall-clock seconds before the closure is capped | diff --git a/fluree-db-api/src/config_resolver.rs b/fluree-db-api/src/config_resolver.rs index 0a503a3829..b23acd63e7 100644 --- a/fluree-db-api/src/config_resolver.rs +++ b/fluree-db-api/src/config_resolver.rs @@ -847,6 +847,121 @@ async fn read_iri_list_field( } } +/// Read `f:reasoningModes`, accepting every shape users naturally write: +/// +/// - repeated IRI objects — `f:reasoningModes f:rdfs, f:datalog` +/// - repeated string literals — `f:reasoningModes "rdfs"` +/// - an RDF collection of either — `f:reasoningModes ( "rdfs" "datalog" )` +/// +/// `ReasoningModes::from_mode_strings` downstream handles both full IRIs +/// and bare mode names, so all shapes normalize to the same modes. +/// +/// This is deliberately more permissive than [`read_iri_list_field`] +/// (which stays IRI-only for `f:policyClass` / `f:allowedIdentities`, +/// where a stray string must not widen policy). Before this reader, +/// string-literal and collection shapes silently produced no modes — +/// config-declared reasoning never engaged and queries returned +/// non-entailed results with no error. +async fn read_reasoning_modes_field( + snapshot: &LedgerSnapshot, + overlay: &dyn OverlayProvider, + to_t: i64, + subject_sid: &Sid, +) -> Result>> { + let pred_sid = match try_encode(snapshot, config_iris::REASONING_MODES) { + Some(sid) => sid, + None => return Ok(None), + }; + + let bindings = query_config_predicate(snapshot, overlay, to_t, subject_sid, &pred_sid).await?; + let mut values = Vec::new(); + for binding in bindings { + if let Some((fluree_db_core::FlakeValue::String(s), _)) = binding.as_lit() { + values.push(s.to_string()); + continue; + } + let Some(sid) = binding.as_sid() else { + continue; + }; + // An object ref is either a mode IRI or the head of an RDF + // collection. Distinguish by probing `rdf:first`. + match read_rdf_list_values(snapshot, overlay, to_t, sid).await? { + Some(items) => values.extend(items), + None => { + if let Some(iri) = snapshot.decode_sid(sid) { + values.push(iri); + } + } + } + } + + if values.is_empty() { + Ok(None) + } else { + Ok(Some(values)) + } +} + +/// Walk an RDF collection (`rdf:first`/`rdf:rest`.. `rdf:nil`) starting at +/// `head`, returning each element as a string (string literals verbatim, +/// IRI refs decoded). Returns `Ok(None)` when `head` is not a list node +/// (no `rdf:first`), so callers can fall back to treating it as a plain +/// IRI value. +async fn read_rdf_list_values( + snapshot: &LedgerSnapshot, + overlay: &dyn OverlayProvider, + to_t: i64, + head: &Sid, +) -> Result>> { + use fluree_vocab::rdf; + + let (Some(first_sid), Some(rest_sid)) = ( + try_encode(snapshot, rdf::FIRST), + try_encode(snapshot, rdf::REST), + ) else { + return Ok(None); + }; + + let mut node = head.clone(); + let mut values = Vec::new(); + let mut is_list = false; + // Bounded walk: a malformed cyclic list must not spin forever. + for _ in 0..MAX_RDF_LIST_LEN { + let firsts = query_config_predicate(snapshot, overlay, to_t, &node, &first_sid).await?; + if firsts.is_empty() { + return if is_list { Ok(Some(values)) } else { Ok(None) }; + } + is_list = true; + for binding in &firsts { + if let Some((fluree_db_core::FlakeValue::String(s), _)) = binding.as_lit() { + values.push(s.to_string()); + } else if let Some(sid) = binding.as_sid() { + if let Some(iri) = snapshot.decode_sid(sid) { + values.push(iri); + } + } + } + let rests = query_config_predicate(snapshot, overlay, to_t, &node, &rest_sid).await?; + let Some(next) = rests.iter().find_map(|b| b.as_sid().cloned()) else { + return Ok(Some(values)); + }; + if snapshot.decode_sid(&next).as_deref() == Some(rdf::NIL) { + return Ok(Some(values)); + } + node = next; + } + tracing::warn!( + "f:reasoningModes RDF collection exceeded {MAX_RDF_LIST_LEN} entries \ + (malformed or cyclic list?); truncating" + ); + Ok(Some(values)) +} + +/// Upper bound on RDF-collection length when walking `f:reasoningModes` +/// lists; there are only a handful of reasoning modes, so anything near +/// this is a malformed (likely cyclic) list. +const MAX_RDF_LIST_LEN: usize = 64; + /// Read an integer field from a subject at the config graph. async fn read_i64_field( snapshot: &LedgerSnapshot, @@ -995,14 +1110,7 @@ async fn read_reasoning_defaults( None => return Ok(None), }; - let modes = read_iri_list_field( - snapshot, - overlay, - to_t, - &group_sid, - config_iris::REASONING_MODES, - ) - .await?; + let modes = read_reasoning_modes_field(snapshot, overlay, to_t, &group_sid).await?; let schema_source = read_graph_source_ref( snapshot, overlay, diff --git a/fluree-db-api/src/ontology_imports.rs b/fluree-db-api/src/ontology_imports.rs index efc6524acc..7846b8bfc2 100644 --- a/fluree-db-api/src/ontology_imports.rs +++ b/fluree-db-api/src/ontology_imports.rs @@ -164,9 +164,8 @@ fn resolve_local_graph_source( "this schema/import source must resolve within the current \ ledger (ref targets ledger '{ledger}', current ledger is \ '{}'). Cross-ledger refs are supported only for the \ - top-level `f:schemaSource` of a single-ledger query — not \ - for `f:ontologyImportMap` entries or multi-ledger dataset \ - queries.", + top-level `f:schemaSource`, not for `f:ontologyImportMap` \ + entries.", snapshot.ledger_id ))); } diff --git a/fluree-db-api/src/view/dataset_query.rs b/fluree-db-api/src/view/dataset_query.rs index 7c3d558ebb..21dc24da46 100644 --- a/fluree-db-api/src/view/dataset_query.rs +++ b/fluree-db-api/src/view/dataset_query.rs @@ -492,9 +492,13 @@ impl Fluree { /// Build an ExecutableQuery for dataset queries. /// - /// Applies reasoning from the primary view if set. When reasoning config - /// on the primary view declares `f:schemaSource`, resolves the schema - /// bundle closure and attaches it to `executable.reasoning.schema_bundle`. + /// Reasoning is governed by the dataset's primary view: the shared + /// `apply_reasoning_to_executable` choke point applies the same surface + /// as the single-ledger path — mode precedence, config budget, datalog + /// restrictions, local and cross-ledger `f:rulesSource`, and the + /// `f:schemaSource` bundle (local, cross-ledger, and inline ontology). + /// The query-time rule policy gate uses `dataset.any_non_root_policy()` + /// so a restricted policy on *any* source strips caller-supplied rules. pub(crate) async fn build_executable_for_dataset( &self, dataset: &DataSetDb, @@ -502,44 +506,14 @@ impl Fluree { ) -> Result { let mut executable = prepare_for_execution(parsed); - // Apply reasoning from primary view if set if let Some(primary) = dataset.primary() { - if primary.reasoning().is_some() { - let query_has_reasoning = executable.reasoning.modes.has_any_enabled(); - let query_disabled = executable.reasoning.modes.is_disabled(); - - // Mode replacement keeps the query's budget — see - // `build_executable_for_view` for the rationale. - if let Some(effective) = - primary.effective_reasoning(query_has_reasoning, query_disabled) - { - let (max_facts, max_seconds) = ( - executable.reasoning.modes.max_facts, - executable.reasoning.modes.max_seconds, - ); - executable.reasoning.modes = effective.clone(); - executable.reasoning.modes.max_facts = max_facts; - executable.reasoning.modes.max_seconds = max_seconds; - } - } - - // Ledger-config materialization budget — after mode precedence, - // same rationale as `build_executable_for_view`. - if let Some(budget) = primary.config_reasoning_budget() { - budget.apply(&mut executable.reasoning.modes); - } - - // Resolve schema bundle against the primary view's ledger - // (same-ledger only). Mirrors the single-view path in - // `view/query.rs::attach_schema_bundle`; see that method for the - // reasoning-disabled short-circuit rationale. - Self::attach_dataset_schema_bundle(primary, &mut executable).await?; - } - - // Query-time datalog rule injection is admin-only: if any source of the - // dataset carries a non-root view policy, drop caller-supplied rules. - // See `view/query.rs::build_executable_for_view` for the rationale. - if dataset.any_non_root_policy() && !executable.reasoning.modes.rules.is_empty() { + self.apply_reasoning_to_executable( + primary, + &mut executable, + dataset.any_non_root_policy(), + ) + .await?; + } else if dataset.any_non_root_policy() && !executable.reasoning.modes.rules.is_empty() { tracing::debug!("stripping query-time datalog rules under non-root view policy"); executable.reasoning.modes.rules.clear(); } @@ -547,43 +521,6 @@ impl Fluree { Ok(executable) } - async fn attach_dataset_schema_bundle( - primary: &crate::view::GraphDb, - executable: &mut ExecutableQuery, - ) -> Result<()> { - if executable.reasoning.modes.is_disabled() { - return Ok(()); - } - let Some(resolved) = primary.resolved_config() else { - return Ok(()); - }; - let Some(reasoning) = resolved.reasoning.as_ref() else { - return Ok(()); - }; - if reasoning.schema_source.is_none() { - return Ok(()); - } - let db_ref = primary.as_graph_db_ref(); - let Some(bundle) = crate::ontology_imports::resolve_schema_bundle( - db_ref.snapshot, - db_ref.overlay, - db_ref.t, - reasoning, - ) - .await? - else { - return Ok(()); - }; - let flakes = crate::ontology_imports::get_or_build_schema_bundle_flakes( - db_ref.snapshot, - db_ref.overlay, - &bundle, - ) - .await?; - executable.reasoning.schema_bundle = Some(flakes); - Ok(()) - } - /// Execute against dataset (multi-ledger). /// /// Calls `prepare_execution` + `execute_prepared` directly so that diff --git a/fluree-db-api/src/view/query.rs b/fluree-db-api/src/view/query.rs index b75574bc6f..b514061ec4 100644 --- a/fluree-db-api/src/view/query.rs +++ b/fluree-db-api/src/view/query.rs @@ -591,6 +591,33 @@ impl Fluree { // Start with the standard executable let mut executable = prepare_for_execution(parsed); + self.apply_reasoning_to_executable(db, &mut executable, !db.is_root()) + .await?; + + Ok(executable) + } + + /// Apply a view's reasoning configuration to an executable. + /// + /// Single choke point shared by the single-ledger path + /// ([`build_executable_for_view`](Self::build_executable_for_view)) and + /// the dataset path (`build_executable_for_dataset`, which passes the + /// dataset's primary view) so both engage the same reasoning surface: + /// mode precedence, the config materialization budget, config-graph + /// datalog restrictions, the query-time rule policy gate, local and + /// cross-ledger `f:rulesSource`, and the `f:schemaSource` bundle + /// (local, cross-ledger, and inline `opts.ontology`). + /// + /// `strip_query_rules` is true when a non-root view policy applies — + /// `!db.is_root()` for a single view, `dataset.any_non_root_policy()` + /// for a dataset. It strips *query-supplied* rules only; config-sourced + /// rules attach afterwards because they're admin-controlled. + pub(crate) async fn apply_reasoning_to_executable( + &self, + db: &GraphDb, + executable: &mut ExecutableQuery, + strip_query_rules: bool, + ) -> Result<()> { // Apply wrapper reasoning if applicable if db.reasoning().is_some() { // Check query's reasoning state @@ -639,7 +666,7 @@ impl Fluree { // not its provenance, and a caller-invented predicate can't be // pre-denied. DB-stored rules and OWL reasoning are admin-controlled and // unaffected. See docs/security/policy-in-queries.md (Reasoning). - if !db.is_root() && !executable.reasoning.modes.rules.is_empty() { + if strip_query_rules && !executable.reasoning.modes.rules.is_empty() { tracing::debug!("stripping query-time datalog rules under non-root view policy"); executable.reasoning.modes.rules.clear(); } @@ -674,14 +701,13 @@ impl Fluree { // `executable.reasoning.rules` so they pass through the // existing query-time rule code path. Same-ledger references // are handled above via `rules_source_g_id`. - self.attach_cross_ledger_rules(db, &mut executable, &mut ctx) + self.attach_cross_ledger_rules(db, executable, &mut ctx) .await?; // Resolve `f:schemaSource` + `owl:imports` closure, if configured. - self.attach_schema_bundle(db, &mut executable, &mut ctx) - .await?; + self.attach_schema_bundle(db, executable, &mut ctx).await?; - Ok(executable) + Ok(()) } /// If the resolved datalog config carries a cross-ledger diff --git a/fluree-db-api/tests/it_config_graph.rs b/fluree-db-api/tests/it_config_graph.rs index 497ed513df..0d30782459 100644 --- a/fluree-db-api/tests/it_config_graph.rs +++ b/fluree-db-api/tests/it_config_graph.rs @@ -3061,3 +3061,114 @@ async fn constraints_source_cross_ledger_fails_closed() { "expected cross-ledger rejection, got: {msg}" ); } + +// ============================================================================= +// reasoningModes value shapes: string literal and RDF collection +// ============================================================================= +// +// `f:reasoningModes f:rdfs` (IRI objects) is covered by +// `reasoning_defaults_apply` above. Users also naturally write string +// literals and RDF collections — both previously parsed to *no modes* +// silently, so config-declared reasoning never engaged and queries +// returned non-entailed results with no error. These pin the permissive +// reader. + +/// Shared body: seed subProperty ontology + data, write the given +/// `f:reasoningModes` statement(s) into config, query WITHOUT a +/// "reasoning" key, and expect the RDFS expansion to fire. +/// +/// `modes_stmts` is one or more full Turtle statements on +/// `` (plus any list-node triples they need). +async fn assert_reasoning_modes_shape_engages(ledger_id: &str, modes_stmts: &str) { + let fluree = FlureeBuilder::memory().build_memory(); + let ledger = genesis_ledger(&fluree, ledger_id); + + let result = fluree + .insert( + ledger, + &json!({ + "@context": { + "ex": "http://example.org/", + "rdfs": "http://www.w3.org/2000/01/rdf-schema#" + }, + "@graph": [ + {"@id": "ex:childName", "rdfs:subPropertyOf": {"@id": "ex:name"}}, + {"@id": "ex:alice", "ex:childName": "Alice"} + ] + }), + ) + .await + .unwrap(); + + let config_iri = config_graph_iri(ledger_id); + let trig = format!( + r" + @prefix f: . + @prefix rdf: . + + GRAPH <{config_iri}> {{ + rdf:type f:LedgerConfig . + f:reasoningDefaults . + {modes_stmts} + }} + " + ); + fluree + .stage_owned(result.ledger) + .upsert_turtle(&trig) + .execute() + .await + .expect("config write"); + + let query = json!({ + "@context": {"ex": "http://example.org/"}, + "from": ledger_id, + "select": "?v", + "where": {"@id": "ex:alice", "ex:name": "?v"} + }); + let result = fluree.query_connection(&query).await.expect("query"); + let ledger_state = fluree.ledger(ledger_id).await.expect("load ledger"); + let jsonld = result.to_jsonld(&ledger_state.snapshot).expect("to_jsonld"); + + assert_eq!( + jsonld, + json!(["Alice"]), + "config `{modes_stmts}` should engage RDFS reasoning" + ); +} + +#[tokio::test] +async fn reasoning_modes_string_literal_engages() { + assert_reasoning_modes_shape_engages( + "it/config-reasoning-modes-string:main", + r#" f:reasoningModes "rdfs" ."#, + ) + .await; +} + +// The two collection tests write the `rdf:first`/`rdf:rest` structure +// explicitly (a JSON-LD `@list` or any list-producing ingest lands the +// same triples) because the Turtle parser does not support the `( .. )` +// collection shorthand. + +#[tokio::test] +async fn reasoning_modes_rdf_collection_engages() { + assert_reasoning_modes_shape_engages( + "it/config-reasoning-modes-collection:main", + r#" f:reasoningModes . + rdf:first "rdfs" ; + rdf:rest rdf:nil ."#, + ) + .await; +} + +#[tokio::test] +async fn reasoning_modes_collection_of_iris_engages() { + assert_reasoning_modes_shape_engages( + "it/config-reasoning-modes-iri-collection:main", + r" f:reasoningModes . + rdf:first f:rdfs ; + rdf:rest rdf:nil .", + ) + .await; +} diff --git a/fluree-db-api/tests/it_rules_cross_ledger.rs b/fluree-db-api/tests/it_rules_cross_ledger.rs index d941ffc540..b31e1a43e1 100644 --- a/fluree-db-api/tests/it_rules_cross_ledger.rs +++ b/fluree-db-api/tests/it_rules_cross_ledger.rs @@ -373,3 +373,117 @@ async fn missing_model_ledger_surfaces_cross_ledger_error() { "expected ApiError::CrossLedger for missing model ledger, got: {err:?}" ); } + +/// Dataset-path parity: cross-ledger `f:rulesSource` must engage when +/// the query takes the multi-ledger dataset path, which previously +/// attached neither `rules_source_g_id` nor cross-ledger rules. The +/// second source is an unrelated ledger — NOT M — so the rule can only +/// arrive via the cross-ledger resolver on D's config. +#[tokio::test] +async fn dataset_query_pulls_rules_from_model_ledger() { + let fluree = FlureeBuilder::memory().build_memory(); + + let model_id = "test/cross-ledger-rules/model-ds:main"; + let rules_graph_iri = "http://example.org/governance/rules"; + let model = genesis_ledger(&fluree, model_id); + + let rule_tx = json!({ + "@context": { + "ex": "http://example.org/", + "f": "https://ns.flur.ee/db#" + }, + "insert": [ + ["graph", rules_graph_iri, { + "@id": "ex:grandparentRule", + "f:rule": { + "@type": "@json", + "@value": { + "@context": {"ex": "http://example.org/"}, + "where": {"@id": "?p", "ex:parent": {"ex:parent": "?g"}}, + "insert": {"@id": "?p", "ex:grandparent": {"@id": "?g"}} + } + } + }] + ] + }); + fluree + .update(model, &rule_tx) + .await + .expect("seed M with grandparent rule"); + + let data_id = "test/cross-ledger-rules/data-ds:main"; + let data = genesis_ledger(&fluree, data_id); + + let cfg = config_iri(data_id); + let cfg_trig = format!( + r" + @prefix f: . + @prefix rdf: . + + GRAPH <{cfg}> {{ + rdf:type f:LedgerConfig . + f:datalogDefaults . + f:datalogEnabled true . + f:rulesSource . + rdf:type f:GraphRef ; + f:graphSource . + f:ledger <{model_id}> ; + f:graphSelector <{rules_graph_iri}> . + }} + " + ); + let r = fluree + .stage_owned(data) + .upsert_turtle(&cfg_trig) + .execute() + .await + .expect("seed D config with cross-ledger f:rulesSource"); + let data = r.ledger; + + let family = json!({ + "@context": {"ex": "http://example.org/"}, + "@graph": [ + {"@id": "ex:alice", "ex:parent": {"@id": "ex:bob"}}, + {"@id": "ex:bob", "ex:parent": {"@id": "ex:charlie"}} + ] + }); + fluree + .insert(data, &family) + .await + .expect("insert family data into D"); + + let other_id = "test/cross-ledger-rules/other-ds:main"; + let other = genesis_ledger(&fluree, other_id); + fluree + .insert( + other, + &json!({ + "@context": {"ex": "http://example.org/"}, + "@id": "ex:unrelated", + "ex:note": "no rules here" + }), + ) + .await + .expect("seed unrelated ledger"); + + let q = json!({ + "@context": {"ex": "http://example.org/"}, + "from": [data_id, other_id], + "select": "?grandparent", + "where": {"@id": "ex:alice", "ex:grandparent": "?grandparent"}, + "reasoning": "datalog" + }); + let result = fluree + .query_connection(&q) + .await + .expect("dataset query with cross-ledger rule"); + let data = fluree.ledger(data_id).await.expect("reload D ledger"); + let rows = result.to_jsonld(&data.snapshot).expect("to_jsonld"); + let results = normalize_rows(&rows); + + assert!( + results.contains(&json!("ex:charlie")), + "M's grandparent rule (resolved cross-ledger) must derive \ + charlie on the dataset path; got: {results:?}" + ); +} diff --git a/fluree-db-api/tests/it_schema_cross_ledger.rs b/fluree-db-api/tests/it_schema_cross_ledger.rs index cd122618b8..7514da1aa2 100644 --- a/fluree-db-api/tests/it_schema_cross_ledger.rs +++ b/fluree-db-api/tests/it_schema_cross_ledger.rs @@ -200,3 +200,107 @@ async fn cross_ledger_schema_with_follow_owl_imports_fails_closed() { other => panic!("expected OntologyImport, got {other:?}"), } } + +/// Dataset-path parity: the same cross-ledger entailment must work when +/// the query takes the multi-ledger dataset path (`from` with two +/// sources → `DataSetDb` → `build_executable_for_dataset`), which +/// previously resolved `f:schemaSource` same-ledger-only and errored on +/// a cross-ledger ref. The second source is an unrelated ledger — NOT M +/// — so the hierarchy is reachable only through the cross-ledger schema +/// bundle, not via the dataset union. +#[tokio::test] +async fn dataset_query_pulls_schema_from_model_ledger() { + let fluree = FlureeBuilder::memory().build_memory(); + let model_id = "test/cross-ledger-schema/model-ds:main"; + let data_id = "test/cross-ledger-schema/data-ds:main"; + let other_id = "test/cross-ledger-schema/other-ds:main"; + let ontology_iri = "http://example.org/ontology/core"; + + seed_model(&fluree, model_id, Some(ontology_iri)).await; + seed_data(&fluree, data_id, model_id, &format!("<{ontology_iri}>"), "").await; + + let other = genesis_ledger(&fluree, other_id); + fluree + .insert( + other, + &json!({ + "@context": {"ex": "http://example.org/"}, + "@id": "ex:unrelated", + "ex:note": "no ontology here" + }), + ) + .await + .expect("seed unrelated ledger"); + + let q = json!({ + "@context": {"ex": "http://example.org/"}, + "from": [data_id, other_id], + "select": "?x", + "where": {"@id": "?x", "@type": "ex:Employee"}, + "reasoning": "rdfs" + }); + let result = fluree + .query_connection(&q) + .await + .expect("dataset query with cross-ledger schema"); + let data = fluree.ledger(data_id).await.expect("reload D ledger"); + let rows = result.to_jsonld(&data.snapshot).expect("to_jsonld"); + let results = normalize_rows(&rows); + + assert!( + results.contains(&json!("ex:anita")), + "M's subclass axiom must entail anita on the dataset path; \ + got: {results:?}" + ); +} + +/// Dataset-path parity for the fail-closed guard: `f:followOwlImports` +/// + cross-ledger `f:schemaSource` must reject on the dataset path +/// exactly as it does on the single-ledger path (shared choke point). +#[tokio::test] +async fn dataset_follow_owl_imports_fails_closed() { + let fluree = FlureeBuilder::memory().build_memory(); + let model_id = "test/cross-ledger-schema/model-ds-follow:main"; + let data_id = "test/cross-ledger-schema/data-ds-follow:main"; + let other_id = "test/cross-ledger-schema/other-ds-follow:main"; + let ontology_iri = "http://example.org/ontology/core"; + + seed_model(&fluree, model_id, Some(ontology_iri)).await; + seed_data( + &fluree, + data_id, + model_id, + &format!("<{ontology_iri}>"), + ";\n f:followOwlImports true ", + ) + .await; + let other = genesis_ledger(&fluree, other_id); + fluree + .insert( + other, + &json!({ + "@context": {"ex": "http://example.org/"}, + "@id": "ex:unrelated", + "ex:note": "no ontology here" + }), + ) + .await + .expect("seed unrelated ledger"); + + let q = json!({ + "@context": {"ex": "http://example.org/"}, + "from": [data_id, other_id], + "select": "?x", + "where": {"@id": "?x", "@type": "ex:Employee"}, + "reasoning": "rdfs" + }); + let err = fluree + .query_connection(&q) + .await + .expect_err("followOwlImports + cross-ledger schemaSource must reject on dataset path"); + let msg = err.to_string(); + assert!( + msg.contains("followOwlImports"), + "error should name the unsupported flag: {msg}" + ); +} diff --git a/fluree-db-query/src/dataset.rs b/fluree-db-query/src/dataset.rs index f58a27f751..dc6a49902f 100644 --- a/fluree-db-query/src/dataset.rs +++ b/fluree-db-query/src/dataset.rs @@ -219,6 +219,47 @@ impl<'a> DataSet<'a> { self.named_graphs.contains_key(iri) } + /// Copy of this dataset where every graph matching the primary + /// execution view — same ledger, graph id, and `to_t` — reads through + /// `overlay` instead of its original overlay reference. + /// + /// Used by the executor to splice the reasoning derived-facts overlay + /// into dataset execution: dataset scans go through per-graph + /// [`GraphRef`]s, not the top-level context overlay, so without this + /// datalog / OWL2-RL derived facts (computed against the primary view) + /// are invisible to dataset queries. + pub fn with_overlay_for_graph<'b>( + &self, + ledger_id: &str, + g_id: GraphId, + to_t: i64, + overlay: &'b dyn OverlayProvider, + ) -> DataSet<'b> + where + 'a: 'b, + { + let patch = |graph: &GraphRef<'a>| -> GraphRef<'b> { + let matches = + graph.ledger_id.as_ref() == ledger_id && graph.g_id == g_id && graph.to_t == to_t; + GraphRef { + snapshot: graph.snapshot, + g_id: graph.g_id, + overlay: if matches { overlay } else { graph.overlay }, + to_t: graph.to_t, + ledger_id: Arc::clone(&graph.ledger_id), + policy_enforcer: graph.policy_enforcer.clone(), + } + }; + DataSet { + default_graphs: self.default_graphs.iter().map(&patch).collect(), + named_graphs: self + .named_graphs + .iter() + .map(|(iri, g)| (Arc::clone(iri), patch(g))) + .collect(), + } + } + /// True when any constituent graph (default or named) enforces a non-root /// view policy. /// diff --git a/fluree-db-query/src/execute/runner.rs b/fluree-db-query/src/execute/runner.rs index 7302b28faa..a398dc6ff1 100644 --- a/fluree-db-query/src/execute/runner.rs +++ b/fluree-db-query/src/execute/runner.rs @@ -740,6 +740,20 @@ async fn execute_prepared_into<'a, S: BatchSink>( .map(|o| o as &dyn fluree_db_core::OverlayProvider) .unwrap_or(db.overlay); + // Dataset execution scans through per-graph `GraphRef`s, not the + // top-level context overlay — splice the derived-facts overlay into + // every ref matching the primary execution view, or datalog / OWL2-RL + // derived facts are invisible to dataset queries. + let patched_dataset = match (config.dataset, reasoning_overlay.as_ref()) { + (Some(ds), Some(_)) => Some(ds.with_overlay_for_graph( + db.snapshot.ledger_id.as_str(), + db.g_id, + db.t, + effective_overlay, + )), + _ => None, + }; + let mut ctx = ExecutionContext::with_time_and_overlay( db.snapshot, vars, @@ -782,7 +796,7 @@ async fn execute_prepared_into<'a, S: BatchSink>( if let Some(enforcer) = config.policy_enforcer { ctx = ctx.with_policy_enforcer(enforcer); } - if let Some(dataset) = config.dataset { + if let Some(dataset) = patched_dataset.as_ref().or(config.dataset) { ctx = ctx.with_dataset(dataset); } if let Some((r2rml_provider, r2rml_table_provider)) = config.r2rml { diff --git a/fluree-db-query/src/ir/reasoning.rs b/fluree-db-query/src/ir/reasoning.rs index 4f0ba74341..08b020e2cf 100644 --- a/fluree-db-query/src/ir/reasoning.rs +++ b/fluree-db-query/src/ir/reasoning.rs @@ -319,7 +319,7 @@ impl ReasoningModes { rdfs: true, ..Default::default() }), - "owl2ql" | "owl-ql" | "owlql" => Ok(Self { + "owl2ql" | "owl-ql" | "owlql" | "owl2-ql" => Ok(Self { rdfs: true, // OWL2-QL implies RDFS for subclass expansion owl2ql: true, ..Default::default() @@ -328,7 +328,7 @@ impl ReasoningModes { datalog: true, ..Default::default() }), - "owl2rl" | "owl-rl" | "owlrl" => Ok(Self { + "owl2rl" | "owl-rl" | "owlrl" | "owl2-rl" => Ok(Self { owl2rl: true, ..Default::default() }), From 8a1b7dadca3596d51d27d37b07461c8c9fa03a4e Mon Sep 17 00:00:00 2001 From: bplatz Date: Thu, 2 Jul 2026 07:33:02 -0400 Subject: [PATCH 05/23] feat(shacl): compile and evaluate sh:path property path expressions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit sh:path previously stored whatever the sh:path object pointed at as a bare predicate Sid. Complex paths (sh:inversePath, sequence lists, sh:alternativePath, sh:zeroOrMore/oneOrMore/zeroOrOnePath) arrived as blank-node refs and were scanned as if they were predicates — SPOT found nothing, so minCount fired spuriously and every other constraint passed silently. Add a PropertyPath AST (fluree-db-shacl/src/path.rs): - resolve_sh_path compiles the sh:path object, handling bare predicates, Turtle blank-node path expressions (rdf:first/rdf:rest sequences, sh:alternativePath lists, the transitive *Path predicates) and the JSON-LD @list encoding (ordered flakes via metadata index). - eval_path evaluates a path to its value-node set: forward = SPOT, inverse = OPST, sequence = chained frontier, alternative = union, */+/? = BFS closure (reflexive for * and ?), deduplicated. - validate_property_shape keeps the single-predicate SPOT fast path; sh:resultPath is populated only for single-predicate paths. The inverse of a composite path (^(p1/p2)) and any blank-node path that cannot be resolved are rejected at compile time with ShaclError::InvalidConstraint instead of misbehaving silently. Adds vocab constants for the path predicates, five integration tests (inverse, sequence, alternative, oneOrMore, unsupported-rejection), and documents the feature in docs/contributing/shacl-implementation.md. --- docs/contributing/shacl-implementation.md | 13 + fluree-db-api/src/shacl_tests.rs | 265 ++++++++++++++ fluree-db-shacl/src/compile.rs | 65 +++- fluree-db-shacl/src/lib.rs | 12 + fluree-db-shacl/src/path.rs | 423 ++++++++++++++++++++++ fluree-db-shacl/src/validate.rs | 54 ++- fluree-vocab/src/lib.rs | 19 + 7 files changed, 825 insertions(+), 26 deletions(-) create mode 100644 fluree-db-shacl/src/path.rs diff --git a/docs/contributing/shacl-implementation.md b/docs/contributing/shacl-implementation.md index cbc5b8b4bc..e6ab6f4af9 100644 --- a/docs/contributing/shacl-implementation.md +++ b/docs/contributing/shacl-implementation.md @@ -153,6 +153,18 @@ Why the live db check for steps 3/4 instead of precomputed staged-flake hints? T Cost is bounded by the number of predicate-targeted shapes in the cache, not by data size — typically 0–10 per ledger. +## Property paths (`sh:path`) + +`sh:path` is not limited to a single predicate. `fluree-db-shacl/src/path.rs` compiles it into a `PropertyPath` AST and evaluates that AST to the set of *value nodes* a path reaches — the same set a plain predicate would produce via one `SPOT` scan. + +Supported forms: single predicate, `sh:inversePath` (over a single predicate), sequence paths, `sh:alternativePath`, `sh:zeroOrMorePath`, `sh:oneOrMorePath`, `sh:zeroOrOnePath`, and nesting of these. + +- **Compile** (`resolve_sh_path`): walks the `sh:path` object. It handles all three RDF encodings a path can arrive in — a bare predicate IRI, a Turtle blank-node expression (`sh:inversePath`, an `rdf:first`/`rdf:rest` sequence list, `sh:alternativePath` → list, the transitive `*Path` predicates), and the JSON-LD `@list` encoding (multiple ordered `sh:path`/`sh:alternativePath` flakes carrying a list index in flake metadata `m.i`). +- **Evaluate** (`eval_path`): forward step = `SPOT` scan; inverse step = `OPST` scan; sequence = chained steps over the reference frontier; alternative = union; `*/+/?` = BFS closure over reference nodes (reflexive for `*` and `?`). Value nodes are deduplicated (SHACL value nodes are a set). +- **Validation fast path**: `validate_property_shape` keeps the single-predicate `SPOT` scan when `PropertyPath::as_predicate()` is `Some`, so simple paths pay nothing for the AST. `sh:resultPath` in a violation is only populated for single-predicate paths. + +The one form deliberately **not** supported is the inverse of a composite path (`^(p1/p2)`); `resolve_sh_path` rejects it at compile time with a `ShaclError::InvalidConstraint` rather than silently misbehaving. A blank-node path whose structure lives in another graph is left unresolved so a later graph pass can complete it — and, failing that, `finalize` rejects it rather than treating the blank node as a predicate (the old silent-misbehavior mode). + ## Staged validation loop `validate_staged_nodes` in `fluree-db-transact/src/stage.rs`: @@ -295,6 +307,7 @@ This is how we guard against tests that pass trivially but don't actually exerci | What | File | |------|------| | Shape compilation (Turtle/JSON-LD → `CompiledShape`) | `fluree-db-shacl/src/compile.rs` | +| Property path compile + evaluation (`sh:path`) | `fluree-db-shacl/src/path.rs` | | Shape cache with target indexes | `fluree-db-shacl/src/cache.rs` | | Per-focus validation engine | `fluree-db-shacl/src/validate.rs` | | Per-constraint validators (pure values) | `fluree-db-shacl/src/constraints/` | diff --git a/fluree-db-api/src/shacl_tests.rs b/fluree-db-api/src/shacl_tests.rs index 8fc3b03f3a..81a35e46b2 100644 --- a/fluree-db-api/src/shacl_tests.rs +++ b/fluree-db-api/src/shacl_tests.rs @@ -2084,3 +2084,268 @@ async fn shacl_and_with_inline_anonymous_shapes() { .unwrap_err(); assert_shacl_violation(err, "sh:and"); } + +// =========================================================================== +// Property Paths (sh:path expressions) +// =========================================================================== + +/// Assert that an error is a SHACL *compile/shape* error (not a data violation) +/// whose message contains `expected`. +fn assert_shacl_shape_error(err: ApiError, expected: &str) { + match err { + ApiError::Transact(TransactError::Shacl(inner)) => { + let message = inner.to_string(); + assert!( + message.contains(expected), + "expected shape error to contain '{expected}', got: {message}" + ); + } + other => panic!("expected SHACL shape error, got {other:?}"), + } +} + +/// `sh:inversePath` — a Parent must be pointed at by at least one `ex:parent`. +#[tokio::test] +async fn shacl_inverse_path() { + let fluree = FlureeBuilder::memory().build_memory(); + let context = shacl_context(); + let shape_txn = json!({ + "@context": context.clone(), + "@id": "ex:ParentShape", + "@type": "sh:NodeShape", + "sh:targetClass": {"@id": "ex:Parent"}, + "sh:property": [{ + "@id": "ex:pshape_children", + "sh:path": {"sh:inversePath": {"@id": "ex:parent"}}, + "sh:minCount": 1 + }] + }); + + // Valid: ex:mom is a Parent and ex:kid points at her via ex:parent. + let ledger_ok = fluree.create_ledger("shacl/inv-ok:main").await.unwrap(); + let ledger_ok = fluree.upsert(ledger_ok, &shape_txn).await.unwrap().ledger; + fluree + .upsert( + ledger_ok, + &json!({ + "@context": context.clone(), + "@graph": [ + {"@id": "ex:mom", "@type": "ex:Parent"}, + {"@id": "ex:kid", "ex:parent": {"@id": "ex:mom"}} + ] + }), + ) + .await + .expect("parent with an inbound ex:parent edge should pass"); + + // Invalid: ex:childless is a Parent nobody points at → 0 inverse values. + let ledger_bad = fluree.create_ledger("shacl/inv-bad:main").await.unwrap(); + let ledger_bad = fluree.upsert(ledger_bad, &shape_txn).await.unwrap().ledger; + let err = fluree + .upsert( + ledger_bad, + &json!({ + "@context": context.clone(), + "@id": "ex:childless", + "@type": "ex:Parent" + }), + ) + .await + .unwrap_err(); + assert_shacl_violation(err, "at least 1"); +} + +/// Sequence path (`ex:knows / schema:name`) via the JSON-LD `@list` encoding. +#[tokio::test] +async fn shacl_sequence_path() { + let fluree = FlureeBuilder::memory().build_memory(); + let context = shacl_context(); + let shape_txn = json!({ + "@context": context.clone(), + "@id": "ex:KnowsNamedShape", + "@type": "sh:NodeShape", + "sh:targetClass": {"@id": "ex:Socialite"}, + "sh:property": [{ + "@id": "ex:pshape_knows_name", + "sh:path": {"@list": [{"@id": "ex:knows"}, {"@id": "schema:name"}]}, + "sh:minCount": 1, + "sh:datatype": {"@id": "xsd:string"} + }] + }); + + // Valid: alice knows bob, bob has a (string) name → sequence reaches "Bob". + let ledger_ok = fluree.create_ledger("shacl/seq-ok:main").await.unwrap(); + let ledger_ok = fluree.upsert(ledger_ok, &shape_txn).await.unwrap().ledger; + fluree + .upsert( + ledger_ok, + &json!({ + "@context": context.clone(), + "@graph": [ + {"@id": "ex:alice", "@type": "ex:Socialite", "ex:knows": {"@id": "ex:bob"}}, + {"@id": "ex:bob", "schema:name": "Bob"} + ] + }), + ) + .await + .expect("sequence path reaching a named acquaintance should pass"); + + // Invalid: carol is a Socialite who knows nobody → sequence reaches nothing. + let ledger_bad = fluree.create_ledger("shacl/seq-bad:main").await.unwrap(); + let ledger_bad = fluree.upsert(ledger_bad, &shape_txn).await.unwrap().ledger; + let err = fluree + .upsert( + ledger_bad, + &json!({ + "@context": context.clone(), + "@id": "ex:carol", + "@type": "ex:Socialite" + }), + ) + .await + .unwrap_err(); + assert_shacl_violation(err, "at least 1"); +} + +/// Alternative path (`ex:email | ex:altEmail`) — a contact reached via *either* +/// branch satisfies the shape. +#[tokio::test] +async fn shacl_alternative_path() { + let fluree = FlureeBuilder::memory().build_memory(); + let context = shacl_context(); + let shape_txn = json!({ + "@context": context.clone(), + "@id": "ex:ContactShape", + "@type": "sh:NodeShape", + "sh:targetClass": {"@id": "ex:Contact"}, + "sh:property": [{ + "@id": "ex:pshape_any_email", + "sh:path": {"sh:alternativePath": {"@list": [{"@id": "ex:email"}, {"@id": "ex:altEmail"}]}}, + "sh:minCount": 1 + }] + }); + + // Valid: dave has only ex:altEmail — the second branch must be evaluated. + let ledger_ok = fluree.create_ledger("shacl/alt-ok:main").await.unwrap(); + let ledger_ok = fluree.upsert(ledger_ok, &shape_txn).await.unwrap().ledger; + fluree + .upsert( + ledger_ok, + &json!({ + "@context": context.clone(), + "@id": "ex:dave", + "@type": "ex:Contact", + "ex:altEmail": "dave@example.org" + }), + ) + .await + .expect("alternative path reaching a value via the second branch should pass"); + + // Invalid: eve has neither email predicate. + let ledger_bad = fluree.create_ledger("shacl/alt-bad:main").await.unwrap(); + let ledger_bad = fluree.upsert(ledger_bad, &shape_txn).await.unwrap().ledger; + let err = fluree + .upsert( + ledger_bad, + &json!({ + "@context": context.clone(), + "@id": "ex:eve", + "@type": "ex:Contact" + }), + ) + .await + .unwrap_err(); + assert_shacl_violation(err, "at least 1"); +} + +/// Transitive path (`ex:parent+`, `sh:oneOrMorePath`) reaches all ancestors. +#[tokio::test] +async fn shacl_one_or_more_path() { + let fluree = FlureeBuilder::memory().build_memory(); + let context = shacl_context(); + // ex:child must have at least 2 ancestors reachable through ex:parent+. + let shape_txn = json!({ + "@context": context.clone(), + "@id": "ex:AncestryShape", + "@type": "sh:NodeShape", + "sh:targetNode": {"@id": "ex:child"}, + "sh:property": [{ + "@id": "ex:pshape_ancestors", + "sh:path": {"sh:oneOrMorePath": {"@id": "ex:parent"}}, + "sh:minCount": 2 + }] + }); + + // Valid: child → mom → grandma gives 2 transitive ancestors. + let ledger_ok = fluree.create_ledger("shacl/oom-ok:main").await.unwrap(); + let ledger_ok = fluree.upsert(ledger_ok, &shape_txn).await.unwrap().ledger; + fluree + .upsert( + ledger_ok, + &json!({ + "@context": context.clone(), + "@graph": [ + {"@id": "ex:child", "ex:parent": {"@id": "ex:mom"}}, + {"@id": "ex:mom", "ex:parent": {"@id": "ex:grandma"}}, + {"@id": "ex:grandma"} + ] + }), + ) + .await + .expect("two-hop ancestry should satisfy minCount 2"); + + // Invalid: child has a single (direct) parent only. + let ledger_bad = fluree.create_ledger("shacl/oom-bad:main").await.unwrap(); + let ledger_bad = fluree.upsert(ledger_bad, &shape_txn).await.unwrap().ledger; + let err = fluree + .upsert( + ledger_bad, + &json!({ + "@context": context.clone(), + "@graph": [ + {"@id": "ex:child", "ex:parent": {"@id": "ex:mom"}}, + {"@id": "ex:mom"} + ] + }), + ) + .await + .unwrap_err(); + assert_shacl_violation(err, "at least 2"); +} + +/// An unsupported path form — the inverse of a composite path (`^(ex:a+)`) — +/// must be rejected loudly at shape-compile time, not silently misbehave. +#[tokio::test] +async fn shacl_unsupported_path_rejected() { + let fluree = FlureeBuilder::memory().build_memory(); + let context = shacl_context(); + let shape_txn = json!({ + "@context": context.clone(), + "@id": "ex:BadPathShape", + "@type": "sh:NodeShape", + "sh:targetClass": {"@id": "ex:Thing"}, + "sh:property": [{ + "@id": "ex:pshape_bad", + "sh:path": {"sh:inversePath": {"sh:oneOrMorePath": {"@id": "ex:a"}}}, + "sh:minCount": 1 + }] + }); + + let ledger = fluree.create_ledger("shacl/badpath:main").await.unwrap(); + let ledger = fluree.upsert(ledger, &shape_txn).await.unwrap().ledger; + + // Compilation runs when a targetable instance triggers validation; the + // unsupported path must surface as a shape error rather than pass silently. + let err = fluree + .upsert( + ledger, + &json!({ + "@context": context.clone(), + "@id": "ex:thing1", + "@type": "ex:Thing" + }), + ) + .await + .unwrap_err(); + assert_shacl_shape_error(err, "sh:inversePath"); +} diff --git a/fluree-db-shacl/src/compile.rs b/fluree-db-shacl/src/compile.rs index 347e8296a4..f91ba60951 100644 --- a/fluree-db-shacl/src/compile.rs +++ b/fluree-db-shacl/src/compile.rs @@ -4,10 +4,11 @@ //! efficient `CompiledShape` structures that can be used for validation. use crate::constraints::{Constraint, NestedShape, NodeConstraint}; -use crate::error::Result; +use crate::error::{Result, ShaclError}; +use crate::path::{resolve_sh_path, PropertyPath}; use crate::predicates; use fluree_db_core::{Flake, FlakeValue, GraphDbRef, IndexType, RangeMatch, RangeTest, Sid}; -use fluree_vocab::namespaces::{RDF, SHACL}; +use fluree_vocab::namespaces::{BLANK_NODE, RDF, SHACL}; use fluree_vocab::rdf_names; use std::collections::{HashMap, HashSet}; use std::sync::Arc; @@ -44,8 +45,8 @@ pub enum Severity { pub struct PropertyShape { /// The shape ID (blank node or IRI) pub id: ShapeId, - /// The property path (simplified: just a predicate for now) - pub path: Sid, + /// The compiled `sh:path` expression (a single predicate or a path AST). + pub path: PropertyPath, /// Constraints on this property pub constraints: Vec, /// Per-value structural constraints (sh:or/sh:and/sh:xone/sh:not on a property shape). @@ -139,7 +140,10 @@ struct ShapeData { /// Intermediate representation for property shapes #[derive(Default)] struct PropertyShapeData { + /// Raw `sh:path` object (a predicate IRI or a path-expression blank node). path: Option, + /// `path` compiled into a [`PropertyPath`] AST (filled by `resolve_paths`). + resolved_path: Option, constraints: Vec, severity: Severity, name: Option, @@ -258,11 +262,50 @@ impl ShapeCompiler { // spanning multiple graphs will still resolve on a later pass // because `expand_rdf_lists` walks transitively via `db.range`. compiler.expand_rdf_lists(*db).await?; + + // Resolve each property shape's `sh:path` into a path AST. Runs per + // graph so a path whose blank-node structure lives in this graph can + // resolve; a plain-predicate path resolves trivially on any graph. + compiler.resolve_paths(*db).await?; } compiler.finalize() } + /// Resolve raw `sh:path` objects into [`PropertyPath`] ASTs. + /// + /// A plain predicate IRI resolves to [`PropertyPath::Predicate`]; a blank-node + /// path expression is walked into the full AST. A blank node that carries no + /// recognizable path structure in the current graph is left unresolved so a + /// later graph pass can complete it (and, failing that, `finalize` reports it + /// rather than silently treating the blank node as a predicate). + async fn resolve_paths(&mut self, db: GraphDbRef<'_>) -> Result<()> { + let pending: Vec = self + .property_shapes + .iter() + .filter(|(_, ps)| ps.resolved_path.is_none() && ps.path.is_some()) + .map(|(id, _)| id.clone()) + .collect(); + + for ps_id in pending { + let Some(resolved) = resolve_sh_path(db, &ps_id).await? else { + continue; + }; + // A blank node resolving to `Predicate(itself)` means no path + // structure was found in this graph — leave it for a later pass. + let meaningful = match &resolved { + PropertyPath::Predicate(sid) => sid.namespace_code != BLANK_NODE, + _ => true, + }; + if meaningful { + if let Some(ps) = self.property_shapes.get_mut(&ps_id) { + ps.resolved_path = Some(resolved); + } + } + } + Ok(()) + } + /// Expand RDF lists that were referenced by sh:in, sh:and, sh:or, sh:xone async fn expand_rdf_lists(&mut self, db: GraphDbRef<'_>) -> Result<()> { let rdf_first = Sid::new(RDF, rdf_names::FIRST); @@ -665,7 +708,17 @@ impl ShapeCompiler { let mut prop_shapes = Vec::new(); for ps_id in &data.property_shape_ids { if let Some(ps_data) = ps_map.get(ps_id) { - if let Some(path) = &ps_data.path { + if ps_data.path.is_some() { + // `sh:path` present but no AST => a blank-node path + // expression we could not resolve. Reject loudly instead + // of silently scanning a non-existent predicate. + let path = ps_data.resolved_path.clone().ok_or_else(|| { + ShaclError::InvalidConstraint { + shape_id: ps_id.clone(), + message: "unsupported or unresolvable sh:path expression" + .to_string(), + } + })?; let constraints = build_constraints_from_ps_data(ps_data); // Check if this property shape's subject also has structural @@ -678,7 +731,7 @@ impl ShapeCompiler { prop_shapes.push(PropertyShape { id: ps_id.clone(), - path: path.clone(), + path, constraints, value_structural_constraints, severity: ps_data.severity, diff --git a/fluree-db-shacl/src/lib.rs b/fluree-db-shacl/src/lib.rs index f16f7dfe51..c6f8132f45 100644 --- a/fluree-db-shacl/src/lib.rs +++ b/fluree-db-shacl/src/lib.rs @@ -24,6 +24,16 @@ //! - Pair: `sh:equals`, `sh:disjoint`, `sh:lessThan`, `sh:lessThanOrEquals` //! - Logical: `sh:not`, `sh:and`, `sh:or`, `sh:xone` //! +//! # Property Paths (`sh:path`) +//! +//! Besides a single predicate IRI, `sh:path` supports property path expressions +//! (compiled by [`path::resolve_sh_path`] into a [`PropertyPath`] AST and evaluated +//! by [`path::eval_path`]): `sh:inversePath` (over a single predicate), sequence +//! paths (RDF lists), `sh:alternativePath`, `sh:zeroOrMorePath`, +//! `sh:oneOrMorePath`, and `sh:zeroOrOnePath` — including nesting of these. +//! The one unsupported form, the inverse of a composite path (`^(p1/p2)`), is +//! rejected at shape-compile time rather than silently misbehaving. +//! //! # Target Selection //! //! All five SHACL target types select focus nodes: @@ -71,12 +81,14 @@ pub mod cache; pub mod compile; pub mod constraints; pub mod error; +pub mod path; pub mod validate; pub use cache::{ShaclCache, ShaclCacheKey}; pub use compile::{CompiledShape, PropertyShape, Severity, ShapeId, TargetType}; pub use constraints::Constraint; pub use error::{Result, ShaclError}; +pub use path::PropertyPath; pub use validate::{CrossLedgerMembership, ShaclEngine, ValidationReport, ValidationResult}; /// SHACL namespace code (re-exported from fluree-vocab) diff --git a/fluree-db-shacl/src/path.rs b/fluree-db-shacl/src/path.rs new file mode 100644 index 0000000000..92107f50bd --- /dev/null +++ b/fluree-db-shacl/src/path.rs @@ -0,0 +1,423 @@ +//! SHACL property paths (`sh:path`) +//! +//! `sh:path` may be a single predicate IRI or a *property path expression* built +//! from blank nodes: `sh:inversePath`, a sequence (bare RDF list), `sh:alternativePath`, +//! `sh:zeroOrMorePath`, `sh:oneOrMorePath`, and `sh:zeroOrOnePath`. +//! +//! Compilation ([`resolve_sh_path`]) walks the blank-node structure into a +//! [`PropertyPath`] AST. Validation ([`eval_path`]) evaluates that AST against a +//! focus node to produce the set of *value nodes* the path reaches — the same set +//! that a simple predicate would produce via a single `SPOT` scan. +//! +//! Unsupported forms (e.g. the inverse of a composite path, `^(p1/p2)`) are +//! rejected at compile time with a clear error rather than silently misbehaving. + +use crate::error::{Result, ShaclError}; +use crate::predicates; +use fluree_db_core::{FlakeValue, GraphDbRef, IndexType, RangeMatch, RangeTest, Sid}; +use fluree_vocab::namespaces::{JSON_LD, RDF, SHACL}; +use fluree_vocab::rdf_names; +use std::collections::HashSet; +use std::future::Future; +use std::pin::Pin; + +/// A resolved `sh:path` expression. +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum PropertyPath { + /// A single predicate IRI (`ex:knows`). + Predicate(Sid), + /// `sh:inversePath` — reversed traversal. Only the inverse of a single + /// predicate is supported; the inverse of a composite path is rejected. + Inverse(Sid), + /// A sequence path (RDF list of sub-paths): `p1 / p2 / …`. + Sequence(Vec), + /// `sh:alternativePath` (RDF list of sub-paths): `p1 | p2 | …`. + Alternative(Vec), + /// `sh:zeroOrMorePath`: `p*`. + ZeroOrMore(Box), + /// `sh:oneOrMorePath`: `p+`. + OneOrMore(Box), + /// `sh:zeroOrOnePath`: `p?`. + ZeroOrOne(Box), +} + +/// A value node reached by a path: `(value, datatype)`, mirroring a flake's +/// object + datatype columns. +pub type PathValue = (FlakeValue, Sid); + +/// Boxed future returned by the recursive async path helpers. +type PathFuture<'a, T> = Pin> + Send + 'a>>; + +impl PropertyPath { + /// The single predicate for a simple path, else `None`. + /// + /// Used both for the validation fast path (a plain `SPOT` scan) and for + /// `sh:resultPath` reporting, which can only name a single predicate. + pub fn as_predicate(&self) -> Option<&Sid> { + match self { + PropertyPath::Predicate(p) => Some(p), + _ => None, + } + } + + /// Whether this is a single predicate (the common, fast case). + pub fn is_simple(&self) -> bool { + matches!(self, PropertyPath::Predicate(_)) + } +} + +/// Datatype SID carried by reference (node) value nodes: `$id`. +fn ref_dt() -> Sid { + Sid::new(JSON_LD, "id") +} + +fn shacl(name: &str) -> Sid { + Sid::new(SHACL, name) +} + +/// Resolve the `sh:path` of a property shape subject into a [`PropertyPath`]. +/// +/// Handles all three encodings of `sh:path`: +/// - a single predicate IRI → [`PropertyPath::Predicate`]; +/// - a Turtle blank-node path expression (`sh:inversePath`, a bare RDF list +/// sequence, `sh:alternativePath`, `sh:zeroOrMorePath`, …); +/// - the JSON-LD `@list` sequence encoding, where multiple ordered `sh:path` +/// flakes (indexed via flake metadata) form the sequence. +/// +/// Returns `Ok(None)` if `ps_subject` has no usable `sh:path` in this graph +/// (e.g. a blank-node path whose structure lives in a different graph); the +/// caller may retry against another graph and ultimately reject if unresolved. +pub fn resolve_sh_path<'a>( + db: GraphDbRef<'a>, + ps_subject: &'a Sid, +) -> PathFuture<'a, Option> { + Box::pin(async move { + let members = ordered_objects(db, ps_subject, &shacl(predicates::PATH)).await?; + match members.len() { + 0 => Ok(None), + 1 => match &members[0] { + FlakeValue::Ref(obj) => Ok(Some(resolve_path_node(db, obj).await?)), + // sh:path with a literal object is invalid; skip. + _ => Ok(None), + }, + _ => { + // JSON-LD @list sequence: each ordered object is a path step. + let mut steps = Vec::new(); + for obj in members { + if let FlakeValue::Ref(sid) = obj { + steps.push(resolve_path_node(db, &sid).await?); + } + } + Ok(Some(PropertyPath::Sequence(steps))) + } + } + }) +} + +/// Resolve a single `sh:path` object node (a predicate IRI or a path-expression +/// blank node) into a [`PropertyPath`]. +fn resolve_path_node<'a>(db: GraphDbRef<'a>, node: &'a Sid) -> PathFuture<'a, PropertyPath> { + Box::pin(async move { + // sh:inversePath + if let Some(obj) = single_ref(db, node, &shacl(predicates::INVERSE_PATH)).await? { + let inner = resolve_path_node(db, &obj).await?; + return match inner { + PropertyPath::Predicate(p) => Ok(PropertyPath::Inverse(p)), + _ => Err(unsupported( + node, + "sh:inversePath is only supported over a single predicate", + )), + }; + } + + // sh:alternativePath (RDF list or JSON-LD @list of sub-paths) + if has_object(db, node, &shacl(predicates::ALTERNATIVE_PATH)).await? { + let members = resolve_members(db, node, &shacl(predicates::ALTERNATIVE_PATH)).await?; + if members.is_empty() { + return Err(unsupported(node, "sh:alternativePath list is empty")); + } + return Ok(PropertyPath::Alternative(members)); + } + + // sh:zeroOrMorePath / sh:oneOrMorePath / sh:zeroOrOnePath + for (pred, wrap) in [ + ( + predicates::ZERO_OR_MORE_PATH, + PropertyPath::ZeroOrMore as fn(Box) -> PropertyPath, + ), + (predicates::ONE_OR_MORE_PATH, PropertyPath::OneOrMore), + (predicates::ZERO_OR_ONE_PATH, PropertyPath::ZeroOrOne), + ] { + if let Some(obj) = single_ref(db, node, &shacl(pred)).await? { + let inner = resolve_path_node(db, &obj).await?; + return Ok(wrap(Box::new(inner))); + } + } + + // Bare RDF list → sequence path. + let rdf_first = Sid::new(RDF, rdf_names::FIRST); + if has_object(db, node, &rdf_first).await? { + let members = resolve_rdf_list(db, node).await?; + match members.len() { + 0 => return Err(unsupported(node, "sh:path sequence list is empty")), + 1 => return Ok(members.into_iter().next().unwrap()), + _ => return Ok(PropertyPath::Sequence(members)), + } + } + + // No path-expression structure → a plain predicate IRI. + Ok(PropertyPath::Predicate(node.clone())) + }) +} + +/// Resolve the ordered members of a `(subject, predicate)` list, transparently +/// handling both the Turtle RDF-list encoding (a single object that heads an +/// `rdf:first`/`rdf:rest` list) and the JSON-LD `@list` encoding (multiple +/// ordered objects). +fn resolve_members<'a>( + db: GraphDbRef<'a>, + subject: &'a Sid, + predicate: &'a Sid, +) -> PathFuture<'a, Vec> { + Box::pin(async move { + let objects = ordered_objects(db, subject, predicate).await?; + + // Turtle RDF-list form: a single object that is itself a list head. + if let [FlakeValue::Ref(head)] = objects.as_slice() { + let rdf_first = Sid::new(RDF, rdf_names::FIRST); + if has_object(db, head, &rdf_first).await? { + return resolve_rdf_list(db, head).await; + } + } + + // JSON-LD @list form (or a single direct member). + let mut out = Vec::new(); + for obj in objects { + if let FlakeValue::Ref(sid) = obj { + out.push(resolve_path_node(db, &sid).await?); + } + } + Ok(out) + }) +} + +/// Walk an `rdf:first`/`rdf:rest` list, resolving each element as a sub-path. +fn resolve_rdf_list<'a>( + db: GraphDbRef<'a>, + list_head: &'a Sid, +) -> PathFuture<'a, Vec> { + Box::pin(async move { + let rdf_first = Sid::new(RDF, rdf_names::FIRST); + let rdf_rest = Sid::new(RDF, rdf_names::REST); + let rdf_nil = Sid::new(RDF, rdf_names::NIL); + + let mut members = Vec::new(); + let mut current = list_head.clone(); + const MAX_LIST_LENGTH: usize = 10_000; + + for _ in 0..MAX_LIST_LENGTH { + if current == rdf_nil { + break; + } + let Some(first) = single_ref(db, ¤t, &rdf_first).await? else { + break; + }; + members.push(resolve_path_node(db, &first).await?); + + match single_ref(db, ¤t, &rdf_rest).await? { + Some(next) => current = next, + None => break, + } + } + Ok(members) + }) +} + +/// All objects of `(subject, predicate)`, ordered by the JSON-LD list index in +/// flake metadata (falling back to scan order when unindexed). +async fn ordered_objects( + db: GraphDbRef<'_>, + subject: &Sid, + predicate: &Sid, +) -> Result> { + let flakes = db + .range( + IndexType::Spot, + RangeTest::Eq, + RangeMatch::subject_predicate(subject.clone(), predicate.clone()), + ) + .await?; + let mut items: Vec<(i32, FlakeValue)> = flakes + .iter() + .enumerate() + .map(|(pos, f)| { + let idx = f.m.as_ref().and_then(|m| m.i).unwrap_or(pos as i32); + (idx, f.o.clone()) + }) + .collect(); + items.sort_by_key(|(i, _)| *i); + Ok(items.into_iter().map(|(_, v)| v).collect()) +} + +/// Evaluate a property path from `focus`, returning the reached value nodes as +/// `(value, datatype)` pairs — the direct analogue of the objects of a single +/// `SPOT` scan for a simple predicate. +pub fn eval_path<'a>( + db: GraphDbRef<'a>, + focus: &'a Sid, + path: &'a PropertyPath, +) -> PathFuture<'a, Vec> { + Box::pin(async move { + match path { + PropertyPath::Predicate(p) => forward_step(db, focus, p).await, + PropertyPath::Inverse(p) => inverse_step(db, focus, p).await, + PropertyPath::Sequence(steps) => eval_sequence(db, focus, steps).await, + PropertyPath::Alternative(alts) => { + let mut out = Vec::new(); + for alt in alts { + out.extend(eval_path(db, focus, alt).await?); + } + Ok(dedup(out)) + } + PropertyPath::ZeroOrMore(inner) => { + let mut out = vec![(FlakeValue::Ref(focus.clone()), ref_dt())]; + out.extend(closure(db, focus, inner).await?); + Ok(dedup(out)) + } + PropertyPath::OneOrMore(inner) => Ok(dedup(closure(db, focus, inner).await?)), + PropertyPath::ZeroOrOne(inner) => { + let mut out = vec![(FlakeValue::Ref(focus.clone()), ref_dt())]; + out.extend(eval_path(db, focus, inner).await?); + Ok(dedup(out)) + } + } + }) +} + +/// Forward single-predicate step: objects of `(focus, p, ?)`. +async fn forward_step(db: GraphDbRef<'_>, focus: &Sid, p: &Sid) -> Result> { + let flakes = db + .range( + IndexType::Spot, + RangeTest::Eq, + RangeMatch::subject_predicate(focus.clone(), p.clone()), + ) + .await?; + Ok(flakes.iter().map(|f| (f.o.clone(), f.dt.clone())).collect()) +} + +/// Inverse single-predicate step: subjects of `(?, p, focus)`. +async fn inverse_step(db: GraphDbRef<'_>, focus: &Sid, p: &Sid) -> Result> { + let flakes = db + .range( + IndexType::Opst, + RangeTest::Eq, + RangeMatch::predicate_object(p.clone(), FlakeValue::Ref(focus.clone())), + ) + .await?; + Ok(flakes + .iter() + .map(|f| (FlakeValue::Ref(f.s.clone()), ref_dt())) + .collect()) +} + +/// Evaluate a sequence path: chain each step, carrying `(value, dt)` only for +/// the final step. Intermediate steps must reach reference nodes to continue. +async fn eval_sequence( + db: GraphDbRef<'_>, + focus: &Sid, + steps: &[PropertyPath], +) -> Result> { + let mut frontier: Vec = vec![focus.clone()]; + + for (i, step) in steps.iter().enumerate() { + let is_last = i + 1 == steps.len(); + let mut reached: Vec<(FlakeValue, Sid)> = Vec::new(); + for node in &frontier { + reached.extend(eval_path(db, node, step).await?); + } + reached = dedup(reached); + + if is_last { + return Ok(reached); + } + frontier = reached + .into_iter() + .filter_map(|(v, _)| match v { + FlakeValue::Ref(sid) => Some(sid), + _ => None, + }) + .collect(); + frontier.sort(); + frontier.dedup(); + if frontier.is_empty() { + return Ok(Vec::new()); + } + } + Ok(Vec::new()) +} + +/// Transitive closure of `inner` from `focus` (one or more steps), BFS over the +/// reference nodes reached. Non-reference values are terminal value nodes. +async fn closure( + db: GraphDbRef<'_>, + focus: &Sid, + inner: &PropertyPath, +) -> Result> { + let mut out: Vec<(FlakeValue, Sid)> = Vec::new(); + let mut visited: HashSet = HashSet::new(); + let mut queue: Vec = vec![focus.clone()]; + + while let Some(node) = queue.pop() { + for (value, dt) in eval_path(db, &node, inner).await? { + if let FlakeValue::Ref(sid) = &value { + if visited.insert(sid.clone()) { + queue.push(sid.clone()); + } + } + out.push((value, dt)); + } + } + Ok(dedup(out)) +} + +/// Deduplicate value nodes (SHACL value nodes are a set). +fn dedup(mut values: Vec<(FlakeValue, Sid)>) -> Vec<(FlakeValue, Sid)> { + let mut seen: HashSet<(String, String)> = HashSet::new(); + values.retain(|(v, dt)| seen.insert((format!("{v:?}"), format!("{dt:?}")))); + values +} + +/// Fetch the single reference object of `(subject, predicate, ?)`, if any. +async fn single_ref(db: GraphDbRef<'_>, subject: &Sid, predicate: &Sid) -> Result> { + let flakes = db + .range( + IndexType::Spot, + RangeTest::Eq, + RangeMatch::subject_predicate(subject.clone(), predicate.clone()), + ) + .await?; + Ok(flakes.iter().find_map(|f| match &f.o { + FlakeValue::Ref(sid) => Some(sid.clone()), + _ => None, + })) +} + +/// Whether `(subject, predicate, ?)` has any object (regardless of type). +async fn has_object(db: GraphDbRef<'_>, subject: &Sid, predicate: &Sid) -> Result { + let flakes = db + .range( + IndexType::Spot, + RangeTest::Eq, + RangeMatch::subject_predicate(subject.clone(), predicate.clone()), + ) + .await?; + Ok(!flakes.is_empty()) +} + +fn unsupported(shape_node: &Sid, message: &str) -> ShaclError { + ShaclError::InvalidConstraint { + shape_id: shape_node.clone(), + message: message.to_string(), + } +} diff --git a/fluree-db-shacl/src/validate.rs b/fluree-db-shacl/src/validate.rs index b5013666a9..957766fe4a 100644 --- a/fluree-db-shacl/src/validate.rs +++ b/fluree-db-shacl/src/validate.rs @@ -641,10 +641,13 @@ fn validate_structural_constraint<'a>( .await?; // Collect declared properties from the shape's property shapes + // Only single-predicate property shapes declare a property + // for closed-shape purposes; complex paths have no single + // predicate to exempt. let declared_properties: std::collections::HashSet<&Sid> = parent_shape .property_shapes .iter() - .map(|ps| &ps.path) + .filter_map(|ps| ps.path.as_predicate()) .collect(); // Per SHACL spec section 4.8.1, rdf:type is implicitly ignored @@ -979,17 +982,28 @@ async fn validate_property_shape<'a>( ) -> Result> { let mut results = Vec::new(); - // Get all values for this property on the focus node - let flakes = db - .range( - IndexType::Spot, - RangeTest::Eq, - RangeMatch::subject_predicate(focus_node.clone(), prop_shape.path.clone()), - ) - .await?; - - let values: Vec = flakes.iter().map(|f| f.o.clone()).collect(); - let datatypes: Vec = flakes.iter().map(|f| f.dt.clone()).collect(); + // Get all value nodes reached by this property shape's path on the focus node. + // Simple single-predicate paths take the plain SPOT scan; complex paths + // (inverse/sequence/alternative/transitive) evaluate the path AST. + let (values, datatypes): (Vec, Vec) = + if let Some(pred) = prop_shape.path.as_predicate() { + let flakes = db + .range( + IndexType::Spot, + RangeTest::Eq, + RangeMatch::subject_predicate(focus_node.clone(), pred.clone()), + ) + .await?; + ( + flakes.iter().map(|f| f.o.clone()).collect(), + flakes.iter().map(|f| f.dt.clone()).collect(), + ) + } else { + crate::path::eval_path(db, focus_node, &prop_shape.path) + .await? + .into_iter() + .unzip() + }; // Validate each constraint for constraint in &prop_shape.constraints { @@ -1019,7 +1033,7 @@ async fn validate_property_shape<'a>( for violation in violations { results.push(ValidationResult { focus_node: focus_node.clone(), - result_path: Some(prop_shape.path.clone()), + result_path: prop_shape.path.as_predicate().cloned(), source_shape: parent_shape.id.clone(), source_constraint: Some(prop_shape.id.clone()), severity: prop_shape.severity, @@ -1035,7 +1049,7 @@ async fn validate_property_shape<'a>( for violation in class_violations { results.push(ValidationResult { focus_node: focus_node.clone(), - result_path: Some(prop_shape.path.clone()), + result_path: prop_shape.path.as_predicate().cloned(), source_shape: parent_shape.id.clone(), source_constraint: Some(prop_shape.id.clone()), severity: prop_shape.severity, @@ -1052,7 +1066,7 @@ async fn validate_property_shape<'a>( for violation in violations { results.push(ValidationResult { focus_node: focus_node.clone(), - result_path: Some(prop_shape.path.clone()), + result_path: prop_shape.path.as_predicate().cloned(), source_shape: parent_shape.id.clone(), source_constraint: Some(prop_shape.id.clone()), severity: prop_shape.severity, @@ -1131,7 +1145,7 @@ async fn validate_property_value_structural_constraint<'a>( if !any_conforms && !nested_shapes.is_empty() { results.push(ValidationResult { focus_node: focus_node.clone(), - result_path: Some(prop_shape.path.clone()), + result_path: prop_shape.path.as_predicate().cloned(), source_shape: parent_shape.id.clone(), source_constraint: Some(prop_shape.id.clone()), severity: prop_shape.severity, @@ -1164,7 +1178,7 @@ async fn validate_property_value_structural_constraint<'a>( if !conforms { results.push(ValidationResult { focus_node: focus_node.clone(), - result_path: Some(prop_shape.path.clone()), + result_path: prop_shape.path.as_predicate().cloned(), source_shape: parent_shape.id.clone(), source_constraint: Some(prop_shape.id.clone()), severity: prop_shape.severity, @@ -1204,7 +1218,7 @@ async fn validate_property_value_structural_constraint<'a>( if conforming_count == 0 { results.push(ValidationResult { focus_node: focus_node.clone(), - result_path: Some(prop_shape.path.clone()), + result_path: prop_shape.path.as_predicate().cloned(), source_shape: parent_shape.id.clone(), source_constraint: Some(prop_shape.id.clone()), severity: prop_shape.severity, @@ -1217,7 +1231,7 @@ async fn validate_property_value_structural_constraint<'a>( } else if conforming_count > 1 { results.push(ValidationResult { focus_node: focus_node.clone(), - result_path: Some(prop_shape.path.clone()), + result_path: prop_shape.path.as_predicate().cloned(), source_shape: parent_shape.id.clone(), source_constraint: Some(prop_shape.id.clone()), severity: prop_shape.severity, @@ -1247,7 +1261,7 @@ async fn validate_property_value_structural_constraint<'a>( if conforms { results.push(ValidationResult { focus_node: focus_node.clone(), - result_path: Some(prop_shape.path.clone()), + result_path: prop_shape.path.as_predicate().cloned(), source_shape: parent_shape.id.clone(), source_constraint: Some(prop_shape.id.clone()), severity: prop_shape.severity, diff --git a/fluree-vocab/src/lib.rs b/fluree-vocab/src/lib.rs index e38c007290..593fefa88b 100644 --- a/fluree-vocab/src/lib.rs +++ b/fluree-vocab/src/lib.rs @@ -1147,6 +1147,25 @@ pub mod shacl_names { /// sh:path local name pub const PATH: &str = "path"; + // ======================================================================== + // Property Path Expressions + // ======================================================================== + + /// sh:inversePath local name (inverse path `^p`) + pub const INVERSE_PATH: &str = "inversePath"; + + /// sh:alternativePath local name (alternative path `p1|p2`, points to an RDF list) + pub const ALTERNATIVE_PATH: &str = "alternativePath"; + + /// sh:zeroOrMorePath local name (`p*`) + pub const ZERO_OR_MORE_PATH: &str = "zeroOrMorePath"; + + /// sh:oneOrMorePath local name (`p+`) + pub const ONE_OR_MORE_PATH: &str = "oneOrMorePath"; + + /// sh:zeroOrOnePath local name (`p?`) + pub const ZERO_OR_ONE_PATH: &str = "zeroOrOnePath"; + // ======================================================================== // Cardinality Constraints // ======================================================================== From b2ac314600eb7f6d12624ee8112a7b7add7238e7 Mon Sep 17 00:00:00 2001 From: bplatz Date: Thu, 2 Jul 2026 07:38:16 -0400 Subject: [PATCH 06/23] docs(shacl): document property paths in the cookbook; drop contributor internals doc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a Property paths section to the SHACL cookbook covering inverse, sequence, alternative, and transitive (*/+/?) paths with Turtle and JSON-LD (@list) examples, plus the unsupported inverse-of-composite case. Remove docs/contributing/shacl-implementation.md — we don't keep a dedicated internals guide per feature. The user-facing semantics it carried (shapesSource, predicate-target discovery, per-graph config, value-sets) already live in the cookbook and config reference. Clean up its references in SUMMARY.md and contributing/README.md. --- docs/SUMMARY.md | 1 - docs/contributing/README.md | 13 - docs/contributing/shacl-implementation.md | 332 ---------------------- docs/guides/cookbook-shacl.md | 59 +++- 4 files changed, 57 insertions(+), 348 deletions(-) delete mode 100644 docs/contributing/shacl-implementation.md diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md index cf1073e6c4..32fef87cad 100644 --- a/docs/SUMMARY.md +++ b/docs/SUMMARY.md @@ -226,6 +226,5 @@ - [Dev setup](contributing/dev-setup.md) - [Tests](contributing/tests.md) - [W3C SPARQL compliance suite](contributing/sparql-compliance.md) - - [SHACL implementation](contributing/shacl-implementation.md) - [Adding tracing spans](contributing/tracing-guide.md) - [Releasing](contributing/releasing.md) diff --git a/docs/contributing/README.md b/docs/contributing/README.md index 3ee1d945fe..b8beefa273 100644 --- a/docs/contributing/README.md +++ b/docs/contributing/README.md @@ -41,19 +41,6 @@ Guide to the manifest-driven W3C compliance test suite: - Using Claude Code for compliance work - Architecture overview -### [SHACL Implementation](shacl-implementation.md) - -How SHACL validation is wired into Fluree, for contributors adding -constraints or fixing bugs: -- Pipeline: compile → cache → validate -- Crate layout (`fluree-db-shacl` / `-transact` / `-api`) -- Shared post-stage helper and its call sites -- Per-graph config, `f:shapesSource`, target-type resolution -- Adding a new constraint (walkthrough) -- Testing patterns (unit + integration + temp-revert regression trick) -- Known gaps (`sh:uniqueLang`, `sh:qualifiedValueShape`, cross-txn cache) - - ## How to Contribute ### Ways to Contribute diff --git a/docs/contributing/shacl-implementation.md b/docs/contributing/shacl-implementation.md deleted file mode 100644 index e6ab6f4af9..0000000000 --- a/docs/contributing/shacl-implementation.md +++ /dev/null @@ -1,332 +0,0 @@ -# SHACL Implementation - -This is the contributor-facing guide to how SHACL validation is wired into Fluree. It covers the pipeline, the crate layout, and the places you'll want to touch when fixing a bug or adding a constraint. - -User-facing docs: [Cookbook: SHACL Validation](../guides/cookbook-shacl.md) and [Setting Groups — SHACL](../ledger-config/setting-groups.md#shacl-defaults). - -## Pipeline at a glance - -``` -Transaction flakes - │ - ▼ -┌─────────────────────────────────────────────────────────────────┐ -│ fluree-db-transact :: stage() │ -│ stages flakes into a StagedLedger (novelty overlay) │ -└─────────────────────────────────────────────────────────────────┘ - │ - ▼ -┌─────────────────────────────────────────────────────────────────┐ -│ fluree-db-api :: apply_shacl_policy_to_staged_view() │ -│ (shared post-stage helper — called from every write surface) │ -│ │ -│ 1. load_transaction_config(ledger) │ -│ 2. build_per_graph_shacl_policy(config, graph_delta) │ -│ → HashMap │ -│ 3. resolve_shapes_source_g_ids(config, snapshot) │ -│ → Vec (where to compile shapes from) │ -│ 4. ShaclEngine::from_dbs_with_overlay(&[GraphDbRef], ledger) │ -│ 5. validate_view_with_shacl(view, cache, ..., per_graph_policy)│ -│ → ShaclValidationOutcome { reject, warn } │ -│ 6. log warn bucket; propagate ShaclViolation for reject bucket │ -└─────────────────────────────────────────────────────────────────┘ -``` - -## Crate layout - -| Crate | Role | -|-------|------| -| `fluree-db-shacl` | SHACL engine: shape compilation, cache, per-node validation, constraint evaluators. **No transaction-layer concerns.** | -| `fluree-db-transact` | Staged-validation plumbing: `validate_view_with_shacl`, `validate_staged_nodes`. Knows about `StagedLedger`, staged flakes, and graph routing. Defines the per-graph policy types. | -| `fluree-db-api` | Config resolution, policy building, and the shared helper that every write surface (JSON-LD, Turtle, commit replay) calls through. | - -SHACL is feature-gated (`shacl`). See [Standards and feature flags](../reference/compatibility.md). - -## The shared post-stage helper - -All SHACL-enforced write surfaces route through **`apply_shacl_policy_to_staged_view`** in `fluree-db-api/src/tx.rs`: - -```rust -pub(crate) async fn apply_shacl_policy_to_staged_view( - view: &StagedLedger, - ctx: StagedShaclContext<'_>, -) -> Result<(), TransactError> -``` - -`StagedShaclContext` carries everything that varies between call sites: - -| Field | Populated by JSON-LD txn | Populated by Turtle insert | Populated by commit replay | -|-------|-------------------------|----------------------------|----------------------------| -| `graph_delta` | `Some(&txn.graph_delta)` (IRIs) | `None` | `Some(&routing.graph_iris)` | -| `graph_sids` | `Some(&graph_sids)` | `None` | `Some(&routing.graph_sids)` | -| `tracker` | `options.tracker` | `None` | `None` | - -Why not fold this into `fluree-db-transact`? Config resolution (three-tier merge, override control, per-graph lookup) is API-layer policy, not a staging primitive. Keeping the helper in `tx.rs` lets `fluree-db-transact` stay focused on staging mechanics. - -Call sites: -- `fluree-db-api/src/tx.rs::stage_with_config_shacl` (JSON-LD / SPARQL UPDATE txns) -- `fluree-db-api/src/tx.rs::stage_turtle_insert` (plain Turtle) -- `fluree-db-api/src/commit_transfer.rs` (push / replay) - -## Config resolution - -### Ledger-wide and per-graph policy - -`build_per_graph_shacl_policy(config, graph_delta)` returns `Option>`: - -- Graphs **absent from the map** are **disabled** — their staged subjects are skipped by the validator. -- `ShaclGraphPolicy { mode: ValidationMode }` controls warn vs reject for that graph. -- The default graph (g_id=0) always gets the ledger-wide resolved policy when SHACL is enabled. -- Every graph in `graph_delta` is resolved independently via `config_resolver::resolve_effective_config(config, Some(graph_iri))`, which applies the three-tier merge (query-time → per-graph → ledger-wide) under override-control rules. -- Returns `None` when every graph resolves to disabled → the helper short-circuits before building the SHACL engine. - -The transact layer's `validate_view_with_shacl` signature: - -```rust -pub async fn validate_view_with_shacl( - view: &StagedLedger, - shacl_cache: &ShaclCache, - graph_sids: Option<&HashMap>, - tracker: Option<&Tracker>, - per_graph_policy: Option<&HashMap>, -) -> Result -``` - -- `per_graph_policy = None`: treat every graph with staged flakes as `Reject` (legacy / shapes-exist-heuristic path). -- `per_graph_policy = Some(map)`: only graphs in the map participate; their mode drives the warn/reject split. - -Output: - -```rust -pub struct ShaclValidationOutcome { - pub reject_violations: Vec, - pub warn_violations: Vec, -} -``` - -The API helper logs the warn bucket and returns `TransactError::ShaclViolation` for the reject bucket. - -### `f:shapesSource` resolution - -`resolve_shapes_source_g_ids(config, snapshot)` in `tx.rs` is the sibling of `policy_builder::resolve_policy_source_g_ids` — identical shape, different namespace. Both: - -1. Start with `[0]` (default graph) when the source field is unset. -2. Map `f:defaultGraph` → `[0]`. -3. Map a named graph IRI to its registered `GraphId` via `snapshot.graph_registry.graph_id_for_iri`. -4. Reject unsupported dimensions: `f:atT`, `f:trustPolicy`, `f:rollbackGuard`, cross-ledger `f:ledger` (these surface as `TransactError::Parse`). - -`f:shapesSource` is **authoritative, not additive** — when set, shapes come exclusively from the configured graph(s). It's intentionally non-overridable at query/txn time; it can only be changed via a config-graph transaction. - -## Shape compilation from multiple graphs - -`ShapeCompiler::compile_from_dbs(&[GraphDbRef])` in `fluree-db-shacl/src/compile.rs` scans each input graph for every SHACL predicate (see the `shacl_predicates` list), accumulates into a single `ShapeCompiler`, then finalizes. Cross-graph `sh:and` / `sh:or` / `sh:xone` / `sh:in` list references still resolve because finalization runs once after all graphs are consumed. - -`ShaclEngine::from_dbs_with_overlay(&[GraphDbRef], ledger_id)` is the corresponding engine constructor. `from_db_with_overlay(db, ledger_id)` is a single-graph convenience that delegates to the multi-graph path via `slice::from_ref(&db)`. - -The engine's `SchemaHierarchy` is taken from the first graph's snapshot — hierarchy is schema-level and not graph-scoped. - -## Target-type resolution - -The cache (`fluree-db-shacl/src/cache.rs`) holds four indexes: - -| Field | Keyed by | Used for | -|-------|----------|----------| -| `by_target_class` | class Sid (with `rdfs:subClassOf*` expansion) | `sh:targetClass` | -| `by_target_node` | subject Sid | `sh:targetNode` | -| `by_target_subjects_of` | predicate Sid | `sh:targetSubjectsOf` | -| `by_target_objects_of` | predicate Sid | `sh:targetObjectsOf` | - -`ShaclEngine::validate_node` assembles applicable shapes for a focus node by: - -1. `shapes_for_node(focus)` — O(1) hashmap hit. -2. `shapes_for_class(type)` for each of the focus's `rdf:type` values — O(1) per type. -3. For each key `p` in `by_target_subjects_of`: existence check `db.range(SPOT, s=focus, p=p)` — if non-empty, shape applies. -4. For each key `p` in `by_target_objects_of`: existence check `db.range(OPST, p=p, o=focus)` — if non-empty, shape applies. - -Why the live db check for steps 3/4 instead of precomputed staged-flake hints? Three scenarios a hint-only approach can't cover: - -- **Base-state edge**: the triggering edge is already indexed; the current txn only touches another property. -- **Retraction-only**: the staged flake set for a focus contains retractions that don't remove the last matching edge. -- **Cross-graph routing**: a subject's edge exists in graph A but we're validating the subject in graph B — the per-graph db ref sees only B. - -`db.range()` returns only post-state assertions (retractions are filtered in the range pipeline — see `fluree-db-core/src/range.rs`), so the check is exactly "is this edge present in the post-txn view of this graph". - -Cost is bounded by the number of predicate-targeted shapes in the cache, not by data size — typically 0–10 per ledger. - -## Property paths (`sh:path`) - -`sh:path` is not limited to a single predicate. `fluree-db-shacl/src/path.rs` compiles it into a `PropertyPath` AST and evaluates that AST to the set of *value nodes* a path reaches — the same set a plain predicate would produce via one `SPOT` scan. - -Supported forms: single predicate, `sh:inversePath` (over a single predicate), sequence paths, `sh:alternativePath`, `sh:zeroOrMorePath`, `sh:oneOrMorePath`, `sh:zeroOrOnePath`, and nesting of these. - -- **Compile** (`resolve_sh_path`): walks the `sh:path` object. It handles all three RDF encodings a path can arrive in — a bare predicate IRI, a Turtle blank-node expression (`sh:inversePath`, an `rdf:first`/`rdf:rest` sequence list, `sh:alternativePath` → list, the transitive `*Path` predicates), and the JSON-LD `@list` encoding (multiple ordered `sh:path`/`sh:alternativePath` flakes carrying a list index in flake metadata `m.i`). -- **Evaluate** (`eval_path`): forward step = `SPOT` scan; inverse step = `OPST` scan; sequence = chained steps over the reference frontier; alternative = union; `*/+/?` = BFS closure over reference nodes (reflexive for `*` and `?`). Value nodes are deduplicated (SHACL value nodes are a set). -- **Validation fast path**: `validate_property_shape` keeps the single-predicate `SPOT` scan when `PropertyPath::as_predicate()` is `Some`, so simple paths pay nothing for the AST. `sh:resultPath` in a violation is only populated for single-predicate paths. - -The one form deliberately **not** supported is the inverse of a composite path (`^(p1/p2)`); `resolve_sh_path` rejects it at compile time with a `ShaclError::InvalidConstraint` rather than silently misbehaving. A blank-node path whose structure lives in another graph is left unresolved so a later graph pass can complete it — and, failing that, `finalize` rejects it rather than treating the blank node as a predicate (the old silent-misbehavior mode). - -## Staged validation loop - -`validate_staged_nodes` in `fluree-db-transact/src/stage.rs`: - -1. Partition staged flakes into `subjects_by_graph: HashMap>`. - - Every flake's subject is added (including retractions — class/node targets still need to see them). - - Every **assert** flake's Ref-object is also added to the graph's focus set (ensures `sh:targetObjectsOf` shapes fire on newly-referenced nodes). -2. For each `(g_id, subjects)`: - - If `enabled_graphs` is `Some` and `g_id` is not in it: **skip**. - - Build a per-graph `GraphDbRef` with `view` as overlay and `view.staged_t()` as `t`. - - Attach the tracker (if any) — fuel accounting works for SHACL range scans too. - - For each subject: fetch `rdf:type` flakes, then call `engine.validate_node(db, subject, &types)`. - - Tag each returned `ValidationResult` with `graph_id = Some(g_id)` so the caller can partition reject vs warn. - -### `sh:class` value membership and value-sets across graphs - -`sh:class` is validated by `validate_class_constraint` in `fluree-db-shacl/src/validate.rs`. There are **two distinct graph contexts** at play: - -- **Compilation graph** — where the shapes themselves are read from (`f:shapesSource`; see [Shape compilation from multiple graphs](#shape-compilation-from-multiple-graphs)). -- **Membership graph(s)** — where a value's `rdf:type` (and any `rdfs:subClassOf` hierarchy) is looked up to decide `sh:class` conformance. - -By default, membership is resolved against the **focus node's own data graph** (plus `g_id=0` for the `subClassOf` walk). That breaks the "shared value-set" pattern — e.g. a controlled list of US states (`ex:illinois a ex:USState`) referenced by records living in a different graph. To support it, `validate_view_with_shacl` receives the `f:shapesSource` graph ids as `membership_g_ids` and the engine unions them into the lookup: a value's `rdf:type` is read across `{focus data graph} ∪ {membership graphs}`, so the value-set vocabulary can live alongside the shapes. When `f:shapesSource` is unset (shapes in `g_id=0`), this degenerates to the historical behaviour. - -A **per-transaction memo** on the `ShaclEngine` (`class_cache`, keyed `(value, class, focus g_id)`) collapses repeated checks — inserting 100 records that all reference `ex:illinois` performs a single membership lookup. The engine is built fresh per transaction (`validate_view_with_shacl`) and shared across all focus nodes, so the memo is scoped to exactly one validation pass. Cache hits skip the range scan **and** its fuel charge, so per-transaction fuel depends on intra-transaction value repetition. - -**Cross-ledger value-sets.** When `f:shapesSource` is cross-ledger (`f:ledger`), the controlled vocabulary lives in the *model ledger* M alongside the shapes. Because M's ABox (`ex:illinois a ex:USState`) is *not* carried in the shapes wire (which projects only SHACL predicates + `rdf:list` internals), membership is resolved by **querying M live**: `stage_with_config_shacl` opens M at the resolved `t` (`load_graph_db_at_t`) and threads a `CrossLedgerMembership { model_db, data_ns_map }` down to `validate_class_constraint`. On a local miss, `value_conforms_cross_ledger` decodes the value/class Sids to IRIs via D's staged namespace map (`data_ns_map` — the base snapshot alone can't decode namespaces staged this txn), re-encodes them against M (whose split mode may differ), then does the `rdf:type` + `subClassOf` lookup in M's term space. Well-known predicates (`rdf:type`, `rdfs:subClassOf`) share global namespace codes, so only the user IRIs are translated. The per-txn memo covers cross-ledger verdicts too. M is pinned at the resolved `t` (latest at tx time), consistent with cross-ledger shapes. - -Scope limits (as of this writing): -- **Top-level property shapes only.** `sh:class` reached via a referenced/nested shape (`sh:and`/`or`/`xone`/`node` referencing a shape by id) passes `None` for the context and keeps the legacy data-graph lookup (no vocabulary union, no cross-ledger, no memo). -- **`f:atT` / trust dimensions** on the source are still rejected globally, so a cross-ledger value-set tracks M's latest committed state. - -### RDFS subclass fallback (`is_subclass_of`) - -When the indexed `SchemaHierarchy` doesn't know about a `rdfs:subClassOf` edge (e.g. asserted in the same or a recent unindexed transaction), `validate_class_constraint` (via `value_conforms_to_class`) calls `is_subclass_of(db, membership_g_ids, start, target)` which walks `rdfs:subClassOf` upward via BFS. - -Two invariants in that walk: - -- **Scope to `g_id=0` unioned with the membership graphs** via `rescope_to_graph(db, g)` — schema lives in the default graph (matching how `SchemaHierarchy::from_db_root_schema` is built), while a value-set vocabulary configured via `f:shapesSource` may declare a small class hierarchy in its own graph that must also be honoured. Subject may be in graph G but the `subClassOf` edge is looked up in the schema/vocabulary graphs. -- **Preserve tracker + other `GraphDbRef` fields** — `rescope_to_graph` uses `db` copy + `g_id` mutation rather than `GraphDbRef::new(..)`, which would reset `tracker`, `runtime_small_dicts`, and `eager`. There's a unit test pinning this (`rescope_to_graph_preserves_tracker_and_other_fields`). - -## Adding a new constraint - -### 1. Compile - -In `fluree-db-shacl/src/compile.rs`: - -- Add a variant to the `Constraint` enum (or `NodeConstraint` for node-level). -- Add the predicate name to the `shacl_predicates` array in `ShapeCompiler::compile_from_dbs`. -- Handle the predicate in `process_flake` (sets the right field on the intermediate shape builder). -- If the constraint takes arguments via an RDF list, extend `expand_rdf_lists`. - -### 2. Validate - -Pure per-value constraints (no db access) go in `fluree-db-shacl/src/constraints/`: - -- Add a `validate_(values, ..) -> Option` helper next to the similar ones in `cardinality.rs` / `value.rs` / etc. -- Wire it into the big match in `validate_constraint` in `fluree-db-shacl/src/validate.rs`. - -Constraints that need database access (`sh:class`, pair constraints) are handled **before** the pure dispatch, inside `validate_property_shape`. Pattern: - -```rust -Constraint::MyConstraint(target) => { - let helper_violations = validate_my_constraint(db, &values, target).await?; - for v in helper_violations { - results.push(ValidationResult { - focus_node: focus_node.clone(), - result_path: Some(prop_shape.path.clone()), - source_shape: parent_shape.id.clone(), - source_constraint: Some(prop_shape.id.clone()), - severity: prop_shape.severity, - message: v.message, - value: v.value, - graph_id: None, // tagged later in validate_staged_nodes - }); - } -} -``` - -### 3. Advertise - -Update `fluree-db-shacl/src/lib.rs`: -- Add the constraint to the **Supported Constraints** list. -- Remove from the **Not Yet Supported** section if it was listed. - -### 4. Test - -- Add a unit test next to your `validate_` helper for the pure logic. -- Add an integration test in `fluree-db-api/src/shacl_tests.rs` that transacts a shape + violating data + valid data. -- For a bug fix: temp-revert the fix, confirm the test fails, restore, confirm it passes. This pins the regression into the test. - -## Testing patterns - -### Integration tests - -Most SHACL integration tests live in `fluree-db-api/src/shacl_tests.rs` and use the `assert_shacl_violation(err, "substring")` helper. Pattern: - -```rust -let shape = json!({ /* sh:NodeShape with the constraint under test */ }); -let ledger = fluree.create_ledger("shacl/foo:main").await.unwrap(); -let ledger = fluree.upsert(ledger, &shape).await.unwrap().ledger; - -// Negative case -let err = fluree.upsert(ledger, &violating_data).await.unwrap_err(); -assert_shacl_violation(err, "expected message fragment"); - -// Positive case -fluree.upsert(ledger, &valid_data).await.expect("must pass"); -``` - -### Cross-graph / per-graph tests - -See `fluree-db-api/tests/it_config_graph.rs` for patterns that write config via TriG into the config graph, then stage transactions across multiple graphs. Examples: - -- `shacl_shapes_source_points_to_named_graph` — `f:shapesSource` wiring. -- `shacl_class_value_set_in_shapes_graph` — `sh:class` value-set defined in the `f:shapesSource` graph, referenced by data in another graph (cross-graph membership union + per-txn memo). -- `shacl_per_graph_disable_honored` — per-graph `shaclEnabled: false`. -- `shacl_per_graph_mode_warn_vs_reject` — mixed modes across graphs. -- `shacl_target_subjects_of_fires_on_base_state_edge` — base-state predicate-target discovery. - -### The temp-revert trick - -For every correctness-fix PR, confirm the regression test actually covers the bug: - -1. Apply the minimum temp-revert in the production code (comment out the fix with a `// TEMP REVERT:` marker). -2. Run the new test — it should **fail** with the expected symptom. -3. Restore the fix — test passes. -4. Commit the fix + the test together. - -This is how we guard against tests that pass trivially but don't actually exercise the fix. - -## Known gaps - -- **`sh:uniqueLang`, `sh:languageIn`** — parsed but not evaluated. Needs language-tag metadata on flakes, which isn't yet threaded through the validation path. -- **`sh:qualifiedValueShape` (+ `sh:qualifiedMinCount` / `sh:qualifiedMaxCount`)** — parsed but not evaluated. Needs recursive nested-shape counting. -- **Cross-transaction shape cache** — every call to `from_dbs_with_overlay` recompiles from scratch. `ShaclCacheKey` has a `schema_epoch` field that's ready to drive a shared `Arc` cache on the connection, but nothing populates it yet. Low priority until perf regressions are observed. -- **`sh:class` in referenced/nested shapes** — value-membership context (vocabulary graphs, cross-ledger model, per-txn memo) isn't threaded through the `sh:and`/`or`/`xone`/`node` referenced-shape path; those keep the legacy data-graph lookup. - -## Where to look in the code - -| What | File | -|------|------| -| Shape compilation (Turtle/JSON-LD → `CompiledShape`) | `fluree-db-shacl/src/compile.rs` | -| Property path compile + evaluation (`sh:path`) | `fluree-db-shacl/src/path.rs` | -| Shape cache with target indexes | `fluree-db-shacl/src/cache.rs` | -| Per-focus validation engine | `fluree-db-shacl/src/validate.rs` | -| Per-constraint validators (pure values) | `fluree-db-shacl/src/constraints/` | -| Staged-validation loop (per-graph) | `fluree-db-transact/src/stage.rs::validate_staged_nodes` | -| Public transact entry + outcome split | `fluree-db-transact/src/stage.rs::validate_view_with_shacl` | -| Policy types (`ShaclGraphPolicy`, `ShaclValidationOutcome`) | `fluree-db-transact/src/stage.rs` | -| Shared post-stage helper | `fluree-db-api/src/tx.rs::apply_shacl_policy_to_staged_view` | -| Per-graph policy builder | `fluree-db-api/src/tx.rs::build_per_graph_shacl_policy` | -| `f:shapesSource` resolver | `fluree-db-api/src/tx.rs::resolve_shapes_source_g_ids` | -| JSON-LD / SPARQL txn call site | `fluree-db-api/src/tx.rs::stage_with_config_shacl` | -| Turtle insert call site | `fluree-db-api/src/tx.rs::stage_turtle_insert` | -| Commit replay call site | `fluree-db-api/src/commit_transfer.rs` | -| Config field definition | `fluree-db-core/src/ledger_config.rs::ShaclDefaults` | -| Config graph parser | `fluree-db-api/src/config_resolver.rs::read_shacl_defaults` | -| Effective-config merge | `fluree-db-api/src/config_resolver.rs::merge_shacl_opts` | - -## Related - -- [Cookbook: SHACL Validation](../guides/cookbook-shacl.md) — user-facing usage guide -- [Setting Groups — SHACL](../ledger-config/setting-groups.md#shacl-defaults) — config reference -- [Override Control](../ledger-config/override-control.md) — three-tier precedence and monotonicity rules -- [Crate map](../reference/crate-map.md) — layering overview diff --git a/docs/guides/cookbook-shacl.md b/docs/guides/cookbook-shacl.md index 7fe09b18b5..5f66910a08 100644 --- a/docs/guides/cookbook-shacl.md +++ b/docs/guides/cookbook-shacl.md @@ -94,6 +94,62 @@ ex:PersonShape a sh:NodeShape ; See [Predicate-target shapes](#predicate-target-shapes) for notes on how the staged-path validator discovers focus nodes for `sh:targetSubjectsOf` / `sh:targetObjectsOf`. +## Property paths + +`sh:path` is usually a single predicate, but it can also be a **property path expression**. The path is evaluated against the focus node to produce the set of *value nodes* the constraints then apply to — so `sh:minCount`, `sh:datatype`, `sh:class`, etc. all work over a path exactly as they do over a plain predicate. + +| Path form | Turtle syntax | Reaches | +|-----------|---------------|---------| +| Predicate | `sh:path ex:knows` | objects of `ex:knows` | +| Inverse | `sh:path [ sh:inversePath ex:parent ]` | subjects that point at the focus via `ex:parent` | +| Sequence | `sh:path ( ex:knows schema:name )` | names of the people the focus knows | +| Alternative | `sh:path [ sh:alternativePath ( ex:email ex:altEmail ) ]` | values via **either** predicate | +| Zero-or-more | `sh:path [ sh:zeroOrMorePath ex:parent ]` | the focus **and** all transitive `ex:parent` ancestors | +| One-or-more | `sh:path [ sh:oneOrMorePath ex:parent ]` | all transitive `ex:parent` ancestors (excludes the focus) | +| Zero-or-one | `sh:path [ sh:zeroOrOnePath ex:parent ]` | the focus and its direct parents | + +These nest: `sh:path ( [ sh:inversePath ex:parent ] schema:name )` reaches the names of the focus's children. + +```turtle +# Every Parent must have at least one child (something points at it via ex:parent), +# and each place a Person knows-of must be named. +ex:ParentShape a sh:NodeShape ; + sh:targetClass ex:Parent ; + sh:property [ + sh:path [ sh:inversePath ex:parent ] ; + sh:minCount 1 ; + sh:message "A Parent must have at least one child" + ] . + +ex:SocialiteShape a sh:NodeShape ; + sh:targetClass ex:Socialite ; + sh:property [ + sh:path ( ex:knows schema:name ) ; + sh:datatype xsd:string ; + sh:minCount 1 + ] . +``` + +In **JSON-LD**, a sequence path is written with `@list`, and the blank-node forms are written as nested objects: + +```json +{ + "@id": "ex:SocialiteShape", + "@type": "sh:NodeShape", + "sh:targetClass": { "@id": "ex:Socialite" }, + "sh:property": [{ + "sh:path": { "@list": [ { "@id": "ex:knows" }, { "@id": "schema:name" } ] }, + "sh:datatype": { "@id": "xsd:string" }, + "sh:minCount": 1 + }, { + "sh:path": { "sh:inversePath": { "@id": "ex:parent" } }, + "sh:minCount": 1 + }] +} +``` + +**Not supported:** the inverse of a composite path (e.g. `[ sh:inversePath ( ex:a ex:b ) ]`). `sh:inversePath` may only wrap a single predicate. An unsupported or unresolvable path is rejected when the shape is compiled — the transaction fails with a shape error rather than silently passing. + ## Constraint patterns ### Cardinality — required and multi-valued @@ -457,7 +513,7 @@ The following SHACL constructs are parsed/compiled but currently **no-ops** at v - `sh:uniqueLang`, `sh:languageIn` — require language-tag metadata on flakes, which isn't yet threaded through the validation path. - `sh:qualifiedValueShape` (+ `sh:qualifiedMinCount` / `sh:qualifiedMaxCount`) — requires recursive nested-shape counting. -These are tracked in the SHACL compliance effort. Contributors: see [Contributing / SHACL implementation](../contributing/shacl-implementation.md). +These are tracked in the SHACL compliance effort. ## Shapes are data @@ -481,4 +537,3 @@ Because shapes live as regular RDF in your ledger: - [Setting Groups — SHACL](../ledger-config/setting-groups.md#shacl-defaults) — Configuration reference for `f:shaclDefaults` - [Override Control](../ledger-config/override-control.md) — Per-graph / query-time override rules - [Writing Config Data](../ledger-config/writing-config.md) — How to transact into the config graph -- [Contributing / SHACL implementation](../contributing/shacl-implementation.md) — How the pipeline works internally (for contributors) From ac4afaf2a7b4c2da1ad37381c8df4212e3d462d7 Mon Sep 17 00:00:00 2001 From: bplatz Date: Thu, 2 Jul 2026 07:48:12 -0400 Subject: [PATCH 07/23] feat(shacl): surface sh:message as the violation message sh:message was compiled onto property and node shapes but never read when building validation results, so custom messages documented in the SHACL cookbook were silently ignored. Violations now use the property shape's sh:message (property constraints and per-value logical constraints) or the node shape's sh:message (sh:closed and node-level logical constraints), falling back to the generated message when no custom message is declared. Nested anonymous shapes keep generated messages (NestedShape carries no message). --- fluree-db-api/src/shacl_tests.rs | 77 +++++++++++++++++++++++++ fluree-db-shacl/src/lib.rs | 3 + fluree-db-shacl/src/validate.rs | 99 +++++++++++++++++++------------- 3 files changed, 138 insertions(+), 41 deletions(-) diff --git a/fluree-db-api/src/shacl_tests.rs b/fluree-db-api/src/shacl_tests.rs index 81a35e46b2..db0c797eaa 100644 --- a/fluree-db-api/src/shacl_tests.rs +++ b/fluree-db-api/src/shacl_tests.rs @@ -2349,3 +2349,80 @@ async fn shacl_unsupported_path_rejected() { .unwrap_err(); assert_shacl_shape_error(err, "sh:inversePath"); } + +// =========================================================================== +// Custom violation messages (sh:message) +// =========================================================================== + +/// `sh:message` on a property shape replaces the generated message. +#[tokio::test] +async fn shacl_custom_message_property_shape() { + let fluree = FlureeBuilder::memory().build_memory(); + let context = shacl_context(); + let shape_txn = json!({ + "@context": context.clone(), + "@id": "ex:PersonShape", + "@type": "sh:NodeShape", + "sh:targetClass": {"@id": "ex:Person"}, + "sh:property": [{ + "@id": "ex:pshape_name", + "sh:path": {"@id": "schema:name"}, + "sh:minCount": 1, + "sh:message": "Every person must have a name" + }] + }); + + let ledger = fluree.create_ledger("shacl/msg-prop:main").await.unwrap(); + let ledger = fluree.upsert(ledger, &shape_txn).await.unwrap().ledger; + + let err = fluree + .upsert( + ledger, + &json!({ + "@context": context.clone(), + "@id": "ex:nameless", + "@type": "ex:Person" + }), + ) + .await + .unwrap_err(); + assert_shacl_violation(err, "Every person must have a name"); +} + +/// `sh:message` on a node shape replaces the generated message for node-level +/// constraints (`sh:closed` here). +#[tokio::test] +async fn shacl_custom_message_node_shape_closed() { + let fluree = FlureeBuilder::memory().build_memory(); + let context = shacl_context(); + let shape_txn = json!({ + "@context": context.clone(), + "@id": "ex:StrictShape", + "@type": "sh:NodeShape", + "sh:targetClass": {"@id": "ex:Strict"}, + "sh:closed": true, + "sh:message": "Only declared properties are allowed on a Strict record", + "sh:property": [{ + "@id": "ex:pshape_label", + "sh:path": {"@id": "ex:label"} + }] + }); + + let ledger = fluree.create_ledger("shacl/msg-closed:main").await.unwrap(); + let ledger = fluree.upsert(ledger, &shape_txn).await.unwrap().ledger; + + let err = fluree + .upsert( + ledger, + &json!({ + "@context": context.clone(), + "@id": "ex:record1", + "@type": "ex:Strict", + "ex:label": "ok", + "ex:extra": "not allowed" + }), + ) + .await + .unwrap_err(); + assert_shacl_violation(err, "Only declared properties are allowed"); +} diff --git a/fluree-db-shacl/src/lib.rs b/fluree-db-shacl/src/lib.rs index c6f8132f45..b97f4d368f 100644 --- a/fluree-db-shacl/src/lib.rs +++ b/fluree-db-shacl/src/lib.rs @@ -23,6 +23,9 @@ //! - Closed: `sh:closed`, `sh:ignoredProperties` //! - Pair: `sh:equals`, `sh:disjoint`, `sh:lessThan`, `sh:lessThanOrEquals` //! - Logical: `sh:not`, `sh:and`, `sh:or`, `sh:xone` +//! - Messages: `sh:message` on a property shape (or on the node shape for +//! `sh:closed` and node-level logical constraints) replaces the generated +//! violation message //! //! # Property Paths (`sh:path`) //! diff --git a/fluree-db-shacl/src/validate.rs b/fluree-db-shacl/src/validate.rs index 957766fe4a..2bea1ff135 100644 --- a/fluree-db-shacl/src/validate.rs +++ b/fluree-db-shacl/src/validate.rs @@ -666,10 +666,9 @@ fn validate_structural_constraint<'a>( source_shape: parent_shape.id.clone(), source_constraint: None, severity: Severity::Violation, - message: format!( - "Property {} not allowed by closed shape", - prop.name - ), + message: parent_shape.message.clone().unwrap_or_else(|| { + format!("Property {} not allowed by closed shape", prop.name) + }), value: Some(flake.o.clone()), graph_id: None, }); @@ -701,10 +700,12 @@ fn validate_structural_constraint<'a>( source_shape: parent_shape.id.clone(), source_constraint: None, severity: Severity::Violation, - message: format!( - "Node conforms to shape {} which is not allowed (sh:not)", - nested_shape.id.name - ), + message: parent_shape.message.clone().unwrap_or_else(|| { + format!( + "Node conforms to shape {} which is not allowed (sh:not)", + nested_shape.id.name + ) + }), value: None, graph_id: None, }); @@ -731,7 +732,9 @@ fn validate_structural_constraint<'a>( source_shape: parent_shape.id.clone(), source_constraint: None, severity: Severity::Violation, - message: format!("sh:and constraint - {}", r.message), + message: parent_shape.message.clone().unwrap_or_else(|| { + format!("sh:and constraint - {}", r.message) + }), value: r.value, graph_id: None, }); @@ -776,10 +779,12 @@ fn validate_structural_constraint<'a>( source_shape: parent_shape.id.clone(), source_constraint: None, severity: Severity::Violation, - message: format!( - "Node does not conform to any shape in sh:or. Violations: {}", - all_messages.join("; ") - ), + message: parent_shape.message.clone().unwrap_or_else(|| { + format!( + "Node does not conform to any shape in sh:or. Violations: {}", + all_messages.join("; ") + ) + }), value: None, graph_id: None, }); @@ -816,7 +821,9 @@ fn validate_structural_constraint<'a>( source_shape: parent_shape.id.clone(), source_constraint: None, severity: Severity::Violation, - message: "Node does not conform to any shape in sh:xone".to_string(), + message: parent_shape.message.clone().unwrap_or_else(|| { + "Node does not conform to any shape in sh:xone".to_string() + }), value: None, graph_id: None, }); @@ -827,11 +834,13 @@ fn validate_structural_constraint<'a>( source_shape: parent_shape.id.clone(), source_constraint: None, severity: Severity::Violation, - message: format!( - "Node conforms to {} shapes in sh:xone (must be exactly 1): {}", - conforming_count, - conforming_shapes.join(", ") - ), + message: parent_shape.message.clone().unwrap_or_else(|| { + format!( + "Node conforms to {} shapes in sh:xone (must be exactly 1): {}", + conforming_count, + conforming_shapes.join(", ") + ) + }), value: None, graph_id: None, }); @@ -1037,7 +1046,7 @@ async fn validate_property_shape<'a>( source_shape: parent_shape.id.clone(), source_constraint: Some(prop_shape.id.clone()), severity: prop_shape.severity, - message: violation.message, + message: prop_shape.message.clone().unwrap_or(violation.message), value: violation.value, graph_id: None, }); @@ -1053,7 +1062,7 @@ async fn validate_property_shape<'a>( source_shape: parent_shape.id.clone(), source_constraint: Some(prop_shape.id.clone()), severity: prop_shape.severity, - message: violation.message, + message: prop_shape.message.clone().unwrap_or(violation.message), value: violation.value, graph_id: None, }); @@ -1070,7 +1079,7 @@ async fn validate_property_shape<'a>( source_shape: parent_shape.id.clone(), source_constraint: Some(prop_shape.id.clone()), severity: prop_shape.severity, - message: violation.message, + message: prop_shape.message.clone().unwrap_or(violation.message), value: violation.value, graph_id: None, }); @@ -1149,11 +1158,13 @@ async fn validate_property_value_structural_constraint<'a>( source_shape: parent_shape.id.clone(), source_constraint: Some(prop_shape.id.clone()), severity: prop_shape.severity, - message: format!( - "Value {:?} does not conform to any shape in sh:or (tried: {})", - value, - all_messages.join(", ") - ), + message: prop_shape.message.clone().unwrap_or_else(|| { + format!( + "Value {:?} does not conform to any shape in sh:or (tried: {})", + value, + all_messages.join(", ") + ) + }), value: Some(value.clone()), graph_id: None, }); @@ -1182,10 +1193,12 @@ async fn validate_property_value_structural_constraint<'a>( source_shape: parent_shape.id.clone(), source_constraint: Some(prop_shape.id.clone()), severity: prop_shape.severity, - message: format!( - "Value {:?} does not conform to shape {} (sh:and)", - value, nested.id.name - ), + message: prop_shape.message.clone().unwrap_or_else(|| { + format!( + "Value {:?} does not conform to shape {} (sh:and)", + value, nested.id.name + ) + }), value: Some(value.clone()), graph_id: None, }); @@ -1222,9 +1235,9 @@ async fn validate_property_value_structural_constraint<'a>( source_shape: parent_shape.id.clone(), source_constraint: Some(prop_shape.id.clone()), severity: prop_shape.severity, - message: format!( - "Value {value:?} does not conform to any shape in sh:xone" - ), + message: prop_shape.message.clone().unwrap_or_else(|| { + format!("Value {value:?} does not conform to any shape in sh:xone") + }), value: Some(value.clone()), graph_id: None, }); @@ -1235,9 +1248,11 @@ async fn validate_property_value_structural_constraint<'a>( source_shape: parent_shape.id.clone(), source_constraint: Some(prop_shape.id.clone()), severity: prop_shape.severity, - message: format!( - "Value {value:?} conforms to {conforming_count} shapes in sh:xone (must be exactly 1)" - ), + message: prop_shape.message.clone().unwrap_or_else(|| { + format!( + "Value {value:?} conforms to {conforming_count} shapes in sh:xone (must be exactly 1)" + ) + }), value: Some(value.clone()), graph_id: None, }); @@ -1265,10 +1280,12 @@ async fn validate_property_value_structural_constraint<'a>( source_shape: parent_shape.id.clone(), source_constraint: Some(prop_shape.id.clone()), severity: prop_shape.severity, - message: format!( - "Value {:?} conforms to shape {} which is not allowed (sh:not)", - value, nested.id.name - ), + message: prop_shape.message.clone().unwrap_or_else(|| { + format!( + "Value {:?} conforms to shape {} which is not allowed (sh:not)", + value, nested.id.name + ) + }), value: Some(value.clone()), graph_id: None, }); From 728e4c8be875f3590025fbc9e0872ca381a6bd2d Mon Sep 17 00:00:00 2001 From: bplatz Date: Thu, 2 Jul 2026 07:50:54 -0400 Subject: [PATCH 08/23] fix(shacl): evaluate complex paths in nested shapes; scope unresolvable paths to targets MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Property-path hardening follow-ups: - Nested shape members (sh:or/sh:and/sh:xone/sh:not) now carry the compiled PropertyPath and evaluate it, instead of scanning the path blank node as a bare predicate. A complex path on a nested member (e.g. an inverse path inside sh:or) no longer silently never matches. - An unsupported or unresolvable sh:path now compiles to PropertyPath::Unresolvable(reason) instead of failing shape compilation. The reason surfaces as a violation only when the owning shape fires on a focus node, so one broken shape no longer wedges every transaction on the ledger — the failure is scoped to that shape's targets. - references_blank_node() walks the whole AST so a path whose structure lives in a not-yet-scanned graph isn't accepted with a bnode predicate. - ordered_objects keeps indexed and unindexed flakes from interleaving. - literal members in a sequence path now error instead of being dropped. - closure() seeds visited with the focus (no re-expansion on cycles). - reuse fluree_db_core::id_datatype_sid() for ref value-node datatypes. Tests: complex-path-in-nested-or, unsupported-path scoped-to-targets. --- docs/guides/cookbook-shacl.md | 2 +- fluree-db-api/src/shacl_tests.rs | 120 +++++++++++++++++++++---- fluree-db-shacl/src/compile.rs | 62 ++++++++----- fluree-db-shacl/src/constraints/mod.rs | 3 +- fluree-db-shacl/src/lib.rs | 6 +- fluree-db-shacl/src/path.rs | 82 ++++++++++++----- fluree-db-shacl/src/validate.rs | 73 ++++++++++++--- 7 files changed, 269 insertions(+), 79 deletions(-) diff --git a/docs/guides/cookbook-shacl.md b/docs/guides/cookbook-shacl.md index 5f66910a08..78703a8d2b 100644 --- a/docs/guides/cookbook-shacl.md +++ b/docs/guides/cookbook-shacl.md @@ -148,7 +148,7 @@ In **JSON-LD**, a sequence path is written with `@list`, and the blank-node form } ``` -**Not supported:** the inverse of a composite path (e.g. `[ sh:inversePath ( ex:a ex:b ) ]`). `sh:inversePath` may only wrap a single predicate. An unsupported or unresolvable path is rejected when the shape is compiled — the transaction fails with a shape error rather than silently passing. +**Not supported:** the inverse of a composite path (e.g. `[ sh:inversePath ( ex:a ex:b ) ]`). `sh:inversePath` may only wrap a single predicate. An unsupported or unresolvable path doesn't silently pass — it produces a violation whenever the owning shape validates a focus node, so any transaction touching data that shape targets is rejected with a clear message. A broken path only affects the shape's own targets, not unrelated writes. ## Constraint patterns diff --git a/fluree-db-api/src/shacl_tests.rs b/fluree-db-api/src/shacl_tests.rs index db0c797eaa..67a9eedf66 100644 --- a/fluree-db-api/src/shacl_tests.rs +++ b/fluree-db-api/src/shacl_tests.rs @@ -2089,21 +2089,6 @@ async fn shacl_and_with_inline_anonymous_shapes() { // Property Paths (sh:path expressions) // =========================================================================== -/// Assert that an error is a SHACL *compile/shape* error (not a data violation) -/// whose message contains `expected`. -fn assert_shacl_shape_error(err: ApiError, expected: &str) { - match err { - ApiError::Transact(TransactError::Shacl(inner)) => { - let message = inner.to_string(); - assert!( - message.contains(expected), - "expected shape error to contain '{expected}', got: {message}" - ); - } - other => panic!("expected SHACL shape error, got {other:?}"), - } -} - /// `sh:inversePath` — a Parent must be pointed at by at least one `ex:parent`. #[tokio::test] async fn shacl_inverse_path() { @@ -2334,8 +2319,9 @@ async fn shacl_unsupported_path_rejected() { let ledger = fluree.create_ledger("shacl/badpath:main").await.unwrap(); let ledger = fluree.upsert(ledger, &shape_txn).await.unwrap().ledger; - // Compilation runs when a targetable instance triggers validation; the - // unsupported path must surface as a shape error rather than pass silently. + // The unsupported path surfaces as a violation when the shape fires on a + // targeted focus node — scoped to the shape's targets, not a ledger-wide + // compile failure — and must not silently pass. let err = fluree .upsert( ledger, @@ -2347,7 +2333,105 @@ async fn shacl_unsupported_path_rejected() { ) .await .unwrap_err(); - assert_shacl_shape_error(err, "sh:inversePath"); + assert_shacl_violation(err, "inversePath"); +} + +/// An unsupported path on a node that the shape does **not** target must not +/// block unrelated writes — the failure is scoped to the shape's focus nodes. +#[tokio::test] +async fn shacl_unsupported_path_scoped_to_targets() { + let fluree = FlureeBuilder::memory().build_memory(); + let context = shacl_context(); + let shape_txn = json!({ + "@context": context.clone(), + "@id": "ex:BadPathShape", + "@type": "sh:NodeShape", + "sh:targetClass": {"@id": "ex:Thing"}, + "sh:property": [{ + "@id": "ex:pshape_bad", + "sh:path": {"sh:inversePath": {"sh:oneOrMorePath": {"@id": "ex:a"}}}, + "sh:minCount": 1 + }] + }); + + let ledger = fluree + .create_ledger("shacl/badpath-scoped:main") + .await + .unwrap(); + let ledger = fluree.upsert(ledger, &shape_txn).await.unwrap().ledger; + + // A subject that is not an ex:Thing is unaffected by the broken shape. + fluree + .upsert( + ledger, + &json!({ + "@context": context.clone(), + "@id": "ex:unrelated", + "@type": "ex:Widget", + "schema:name": "fine" + }), + ) + .await + .expect("writes to non-targeted subjects must not be blocked by a broken shape"); +} + +/// A complex path on a member of `sh:or` must be *evaluated*, not scanned as a +/// bare blank-node predicate. Here a Doc conforms if it has a title OR is cited +/// by something (`^ex:cites`); a doc satisfied only via the inverse-path member +/// must pass — the pre-fix bug scanned the path bnode and never matched. +#[tokio::test] +async fn shacl_complex_path_in_nested_or() { + let fluree = FlureeBuilder::memory().build_memory(); + let context = shacl_context(); + let shape_txn = json!({ + "@context": context.clone(), + "@id": "ex:DocShape", + "@type": "sh:NodeShape", + "sh:targetClass": {"@id": "ex:Doc"}, + "sh:or": {"@list": [ + {"sh:path": {"@id": "ex:title"}, "sh:minCount": 1}, + {"sh:path": {"sh:inversePath": {"@id": "ex:cites"}}, "sh:minCount": 1} + ]} + }); + + // Valid: doc1 has no title but IS cited → satisfied via the inverse-path member. + let ledger_ok = fluree + .create_ledger("shacl/nested-path-ok:main") + .await + .unwrap(); + let ledger_ok = fluree.upsert(ledger_ok, &shape_txn).await.unwrap().ledger; + fluree + .upsert( + ledger_ok, + &json!({ + "@context": context.clone(), + "@graph": [ + {"@id": "ex:doc1", "@type": "ex:Doc"}, + {"@id": "ex:paper", "ex:cites": {"@id": "ex:doc1"}} + ] + }), + ) + .await + .expect("a Doc satisfied only via the inverse-path sh:or member should pass"); + + // Invalid: doc2 has no title and is cited by nothing → both members fail. + let ledger_bad = fluree + .create_ledger("shacl/nested-path-bad:main") + .await + .unwrap(); + let ledger_bad = fluree.upsert(ledger_bad, &shape_txn).await.unwrap().ledger; + let err = fluree + .upsert( + ledger_bad, + &json!({ + "@context": context.clone(), + "@id": "ex:doc2", + "@type": "ex:Doc" + }), + ) + .await + .unwrap_err(); + assert_shacl_violation(err, "sh:or"); } // =========================================================================== diff --git a/fluree-db-shacl/src/compile.rs b/fluree-db-shacl/src/compile.rs index f91ba60951..e3d19025f9 100644 --- a/fluree-db-shacl/src/compile.rs +++ b/fluree-db-shacl/src/compile.rs @@ -4,11 +4,11 @@ //! efficient `CompiledShape` structures that can be used for validation. use crate::constraints::{Constraint, NestedShape, NodeConstraint}; -use crate::error::{Result, ShaclError}; +use crate::error::Result; use crate::path::{resolve_sh_path, PropertyPath}; use crate::predicates; use fluree_db_core::{Flake, FlakeValue, GraphDbRef, IndexType, RangeMatch, RangeTest, Sid}; -use fluree_vocab::namespaces::{BLANK_NODE, RDF, SHACL}; +use fluree_vocab::namespaces::{RDF, SHACL}; use fluree_vocab::rdf_names; use std::collections::{HashMap, HashSet}; use std::sync::Arc; @@ -288,16 +288,25 @@ impl ShapeCompiler { .collect(); for ps_id in pending { - let Some(resolved) = resolve_sh_path(db, &ps_id).await? else { - continue; - }; - // A blank node resolving to `Predicate(itself)` means no path - // structure was found in this graph — leave it for a later pass. - let meaningful = match &resolved { - PropertyPath::Predicate(sid) => sid.namespace_code != BLANK_NODE, - _ => true, + let resolved = match resolve_sh_path(db, &ps_id).await { + Ok(Some(path)) => path, + // No usable sh:path in this graph — leave for a later pass. + Ok(None) => continue, + // Unsupported form (e.g. inverse of a composite path). Record the + // reason as an `Unresolvable` path — surfaced as a violation when + // the shape fires, not as a ledger-wide compile failure. The error + // only fires once the structure is present, so it's graph-correct. + Err(err) => { + if let Some(ps) = self.property_shapes.get_mut(&ps_id) { + ps.resolved_path = Some(PropertyPath::Unresolvable(err.to_string())); + } + continue; + } }; - if meaningful { + // A path still referencing a blank node anywhere in its AST wasn't + // fully resolved here (its structure lives in a graph not yet + // scanned) — leave it for a later pass. + if !resolved.references_blank_node() { if let Some(ps) = self.property_shapes.get_mut(&ps_id) { ps.resolved_path = Some(resolved); } @@ -709,16 +718,11 @@ impl ShapeCompiler { for ps_id in &data.property_shape_ids { if let Some(ps_data) = ps_map.get(ps_id) { if ps_data.path.is_some() { - // `sh:path` present but no AST => a blank-node path - // expression we could not resolve. Reject loudly instead - // of silently scanning a non-existent predicate. - let path = ps_data.resolved_path.clone().ok_or_else(|| { - ShaclError::InvalidConstraint { - shape_id: ps_id.clone(), - message: "unsupported or unresolvable sh:path expression" - .to_string(), - } - })?; + // `sh:path` present. If it never resolved to an AST it + // becomes an `Unresolvable` path, surfaced as a violation + // only when this shape fires — not a compile error that + // would wedge every transaction on the ledger. + let path = resolved_path_of(ps_data); let constraints = build_constraints_from_ps_data(ps_data); // Check if this property shape's subject also has structural @@ -812,6 +816,16 @@ fn build_constraints_from_ps_data(ps_data: &PropertyShapeData) -> Vec PropertyPath { + ps_data.resolved_path.clone().unwrap_or_else(|| { + PropertyPath::Unresolvable("unsupported or unresolvable sh:path expression".to_string()) + }) +} + /// Build a `NestedShape` for a member of sh:or/sh:and/sh:xone/sh:not, /// inlining value-level or property constraints from `PropertyShapeData` /// when the member is an anonymous shape. @@ -828,11 +842,13 @@ fn build_nested_shape(sid: &ShapeId, ps_map: &HashMap)>, + pub property_constraints: Vec<(PropertyPath, Vec)>, /// Node-level constraints pub node_constraints: Vec, /// Value-level constraints (e.g. sh:datatype on an anonymous shape without sh:path). diff --git a/fluree-db-shacl/src/lib.rs b/fluree-db-shacl/src/lib.rs index b97f4d368f..1955c1f7ca 100644 --- a/fluree-db-shacl/src/lib.rs +++ b/fluree-db-shacl/src/lib.rs @@ -34,8 +34,10 @@ //! by [`path::eval_path`]): `sh:inversePath` (over a single predicate), sequence //! paths (RDF lists), `sh:alternativePath`, `sh:zeroOrMorePath`, //! `sh:oneOrMorePath`, and `sh:zeroOrOnePath` — including nesting of these. -//! The one unsupported form, the inverse of a composite path (`^(p1/p2)`), is -//! rejected at shape-compile time rather than silently misbehaving. +//! The one unsupported form, the inverse of a composite path (`^(p1/p2)`), +//! compiles to [`PropertyPath::Unresolvable`] and surfaces as a violation when +//! the owning shape fires on a focus node — scoped to that shape's targets +//! rather than failing every transaction on the ledger. //! //! # Target Selection //! diff --git a/fluree-db-shacl/src/path.rs b/fluree-db-shacl/src/path.rs index 92107f50bd..789872061a 100644 --- a/fluree-db-shacl/src/path.rs +++ b/fluree-db-shacl/src/path.rs @@ -9,13 +9,17 @@ //! focus node to produce the set of *value nodes* the path reaches — the same set //! that a simple predicate would produce via a single `SPOT` scan. //! -//! Unsupported forms (e.g. the inverse of a composite path, `^(p1/p2)`) are -//! rejected at compile time with a clear error rather than silently misbehaving. +//! Unsupported forms (e.g. the inverse of a composite path, `^(p1/p2)`) compile +//! to [`PropertyPath::Unresolvable`] rather than silently misbehaving; the +//! reason is surfaced as a violation when the owning shape fires on a focus +//! node, keeping the failure scoped to that shape's targets. use crate::error::{Result, ShaclError}; use crate::predicates; -use fluree_db_core::{FlakeValue, GraphDbRef, IndexType, RangeMatch, RangeTest, Sid}; -use fluree_vocab::namespaces::{JSON_LD, RDF, SHACL}; +use fluree_db_core::{ + id_datatype_sid, FlakeValue, GraphDbRef, IndexType, RangeMatch, RangeTest, Sid, +}; +use fluree_vocab::namespaces::{BLANK_NODE, RDF, SHACL}; use fluree_vocab::rdf_names; use std::collections::HashSet; use std::future::Future; @@ -39,6 +43,11 @@ pub enum PropertyPath { OneOrMore(Box), /// `sh:zeroOrOnePath`: `p?`. ZeroOrOne(Box), + /// A path that could not be compiled (unsupported form, or a blank-node + /// expression whose structure never resolved). Carries the reason. Rather + /// than failing shape compilation for the whole ledger, this is surfaced as + /// a violation only when the owning shape actually fires on a focus node. + Unresolvable(String), } /// A value node reached by a path: `(value, datatype)`, mirroring a flake's @@ -64,11 +73,30 @@ impl PropertyPath { pub fn is_simple(&self) -> bool { matches!(self, PropertyPath::Predicate(_)) } -} -/// Datatype SID carried by reference (node) value nodes: `$id`. -fn ref_dt() -> Sid { - Sid::new(JSON_LD, "id") + /// The unresolvable reason, if this path failed to compile. + pub fn unresolvable_reason(&self) -> Option<&str> { + match self { + PropertyPath::Unresolvable(reason) => Some(reason), + _ => None, + } + } + + /// Whether any predicate anywhere in the AST is a blank node — i.e. the + /// path structure was not fully resolved (its sub-structure lives in a + /// graph not yet scanned). Used as a belt-and-braces resolution check. + pub fn references_blank_node(&self) -> bool { + match self { + PropertyPath::Predicate(p) | PropertyPath::Inverse(p) => p.namespace_code == BLANK_NODE, + PropertyPath::Sequence(steps) | PropertyPath::Alternative(steps) => { + steps.iter().any(PropertyPath::references_blank_node) + } + PropertyPath::ZeroOrMore(inner) + | PropertyPath::OneOrMore(inner) + | PropertyPath::ZeroOrOne(inner) => inner.references_blank_node(), + PropertyPath::Unresolvable(_) => false, + } + } } fn shacl(name: &str) -> Sid { @@ -104,8 +132,16 @@ pub fn resolve_sh_path<'a>( // JSON-LD @list sequence: each ordered object is a path step. let mut steps = Vec::new(); for obj in members { - if let FlakeValue::Ref(sid) = obj { - steps.push(resolve_path_node(db, &sid).await?); + match obj { + FlakeValue::Ref(sid) => steps.push(resolve_path_node(db, &sid).await?), + // A literal in a sequence path is invalid; reject rather + // than silently dropping the step. + _ => { + return Err(unsupported( + ps_subject, + "sh:path sequence step is a literal", + )) + } } } Ok(Some(PropertyPath::Sequence(steps))) @@ -247,16 +283,16 @@ async fn ordered_objects( RangeMatch::subject_predicate(subject.clone(), predicate.clone()), ) .await?; - let mut items: Vec<(i32, FlakeValue)> = flakes + // Order by the JSON-LD list index when present; unindexed flakes keep their + // scan order and sort after indexed ones (never interleaved). JSON-LD `@list` + // always stamps `m.i`, so in practice all-or-none carry an index. + let mut items: Vec<(Option, usize, FlakeValue)> = flakes .iter() .enumerate() - .map(|(pos, f)| { - let idx = f.m.as_ref().and_then(|m| m.i).unwrap_or(pos as i32); - (idx, f.o.clone()) - }) + .map(|(pos, f)| (f.m.as_ref().and_then(|m| m.i), pos, f.o.clone())) .collect(); - items.sort_by_key(|(i, _)| *i); - Ok(items.into_iter().map(|(_, v)| v).collect()) + items.sort_by_key(|(idx, pos, _)| (idx.is_none(), idx.unwrap_or(0), *pos)); + Ok(items.into_iter().map(|(_, _, v)| v).collect()) } /// Evaluate a property path from `focus`, returning the reached value nodes as @@ -280,16 +316,19 @@ pub fn eval_path<'a>( Ok(dedup(out)) } PropertyPath::ZeroOrMore(inner) => { - let mut out = vec![(FlakeValue::Ref(focus.clone()), ref_dt())]; + let mut out = vec![(FlakeValue::Ref(focus.clone()), id_datatype_sid())]; out.extend(closure(db, focus, inner).await?); Ok(dedup(out)) } PropertyPath::OneOrMore(inner) => Ok(dedup(closure(db, focus, inner).await?)), PropertyPath::ZeroOrOne(inner) => { - let mut out = vec![(FlakeValue::Ref(focus.clone()), ref_dt())]; + let mut out = vec![(FlakeValue::Ref(focus.clone()), id_datatype_sid())]; out.extend(eval_path(db, focus, inner).await?); Ok(dedup(out)) } + // Never evaluated: validation surfaces a violation for the owning + // shape before reaching value evaluation. Defensive empty set. + PropertyPath::Unresolvable(_) => Ok(Vec::new()), } }) } @@ -317,7 +356,7 @@ async fn inverse_step(db: GraphDbRef<'_>, focus: &Sid, p: &Sid) -> Result Result> { let mut out: Vec<(FlakeValue, Sid)> = Vec::new(); - let mut visited: HashSet = HashSet::new(); + // Seed `visited` with the focus so a cycle back to it isn't re-expanded. + let mut visited: HashSet = HashSet::from([focus.clone()]); let mut queue: Vec = vec![focus.clone()]; while let Some(node) = queue.pop() { diff --git a/fluree-db-shacl/src/validate.rs b/fluree-db-shacl/src/validate.rs index 2bea1ff135..25c8c20720 100644 --- a/fluree-db-shacl/src/validate.rs +++ b/fluree-db-shacl/src/validate.rs @@ -896,17 +896,48 @@ fn validate_nested_shape<'a>( // Validate property constraints for (path, constraints) in &nested.property_constraints { - // Get all values for this property on the focus node - let flakes = db - .range( - IndexType::Spot, - RangeTest::Eq, - RangeMatch::subject_predicate(focus_node.clone(), path.clone()), - ) - .await?; + // A path that never compiled surfaces as a violation on this member. + if let Some(reason) = path.unresolvable_reason() { + results.push(ValidationResult { + focus_node: focus_node.clone(), + result_path: None, + source_shape: parent_shape.id.clone(), + source_constraint: Some(nested.id.clone()), + severity: Severity::Violation, + message: format!("Unsupported sh:path expression: {reason}"), + value: None, + graph_id: None, + }); + continue; + } - let values: Vec = flakes.iter().map(|f| f.o.clone()).collect(); - let datatypes: Vec = flakes.iter().map(|f| f.dt.clone()).collect(); + // Value nodes reached by the member's path. Simple predicate → SPOT + // scan; complex path → evaluate the AST (same as top-level shapes). + let (values, datatypes): (Vec, Vec) = + if let Some(pred) = path.as_predicate() { + let flakes = db + .range( + IndexType::Spot, + RangeTest::Eq, + RangeMatch::subject_predicate(focus_node.clone(), pred.clone()), + ) + .await?; + ( + flakes.iter().map(|f| f.o.clone()).collect(), + flakes.iter().map(|f| f.dt.clone()).collect(), + ) + } else { + crate::path::eval_path(db, focus_node, path) + .await? + .into_iter() + .unzip() + }; + + let result_path = path.as_predicate().cloned(); + let path_label = path + .as_predicate() + .map(|p| p.name.to_string()) + .unwrap_or_else(|| "path".to_string()); // Validate each constraint for constraint in constraints { @@ -931,13 +962,13 @@ fn validate_nested_shape<'a>( if source_values != target_values { results.push(ValidationResult { focus_node: focus_node.clone(), - result_path: Some(path.clone()), + result_path: result_path.clone(), source_shape: parent_shape.id.clone(), source_constraint: Some(nested.id.clone()), severity: Severity::Violation, message: format!( "Value set for {} does not equal value set for {}", - path.name, target_prop.name + path_label, target_prop.name ), value: None, graph_id: None, @@ -949,7 +980,7 @@ fn validate_nested_shape<'a>( for violation in violations { results.push(ValidationResult { focus_node: focus_node.clone(), - result_path: Some(path.clone()), + result_path: result_path.clone(), source_shape: parent_shape.id.clone(), source_constraint: Some(nested.id.clone()), severity: Severity::Violation, @@ -991,6 +1022,22 @@ async fn validate_property_shape<'a>( ) -> Result> { let mut results = Vec::new(); + // A path that never compiled surfaces here (only for focus nodes this shape + // actually targets) rather than as a ledger-wide compile failure. + if let Some(reason) = prop_shape.path.unresolvable_reason() { + results.push(ValidationResult { + focus_node: focus_node.clone(), + result_path: None, + source_shape: parent_shape.id.clone(), + source_constraint: Some(prop_shape.id.clone()), + severity: prop_shape.severity, + message: format!("Unsupported sh:path expression: {reason}"), + value: None, + graph_id: None, + }); + return Ok(results); + } + // Get all value nodes reached by this property shape's path on the focus node. // Simple single-predicate paths take the plain SPOT scan; complex paths // (inverse/sequence/alternative/transitive) evaluate the path AST. From ae14bd44a441903da7ca40372d82ffe161b76376 Mon Sep 17 00:00:00 2001 From: bplatz Date: Thu, 2 Jul 2026 08:11:56 -0400 Subject: [PATCH 09/23] feat(shacl): close the silently-broken constraint gaps MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Six SHACL constructs compiled (or loaded) without error but never constrained data. All now enforce: - sh:node: new NodeConstraint::Node compiled like the other shape-ref constraints — per-value on property shapes, focus-node on node shapes, with anonymous inline shapes inlined via build_nested_shape. Recursive shape references over cyclic data (FriendShape -> knows -> sh:node FriendShape) terminate via a (focus, shape) active-check guard in validate_shape that assumes conformance on re-entry. - Value constraints directly on a node shape (no sh:path) previously accumulated in a path-less PropertyShapeData that finalize() dropped; they now become node_constraints evaluated against the focus node, and sh:message/sh:name that landed on that entry backfill the shape. - sh:deactivated is now parsed; deactivated node and property shapes are ignored entirely, including when referenced via sh:node or logical constraints. - Implicit class targets: a shape that is also rdfs:Class / owl:Class targets its own instances (bound-object rdf:type scans, cost scales with declared classes). - sh:qualifiedValueShape + sh:qualifiedMin/MaxCount: conforming values are counted against the qualified shape (top-level property shapes; qualifiedValueShapesDisjoint remains unsupported). - sh:ignoredProperties in Turtle RDF-list form is now expanded; the unexpanded list-head blank node was previously treated as the ignored property, so closed shapes rejected the actual members. --- docs/guides/cookbook-shacl.md | 64 +++- fluree-db-api/src/shacl_tests.rs | 468 +++++++++++++++++++++++++ fluree-db-shacl/src/compile.rs | 173 ++++++++- fluree-db-shacl/src/constraints/mod.rs | 20 +- fluree-db-shacl/src/lib.rs | 20 +- fluree-db-shacl/src/validate.rs | 281 ++++++++++++++- fluree-vocab/src/lib.rs | 6 + 7 files changed, 995 insertions(+), 37 deletions(-) diff --git a/docs/guides/cookbook-shacl.md b/docs/guides/cookbook-shacl.md index 78703a8d2b..3064725bab 100644 --- a/docs/guides/cookbook-shacl.md +++ b/docs/guides/cookbook-shacl.md @@ -91,6 +91,7 @@ ex:PersonShape a sh:NodeShape ; | `sh:targetNode ` | The specific subject `` | | `sh:targetSubjectsOf

` | Every subject that currently has predicate `

` | | `sh:targetObjectsOf

` | Every node that currently appears as the object of `

` | +| implicit (shape `a rdfs:Class`) | A shape that is also a class targets its own instances — no explicit target needed | See [Predicate-target shapes](#predicate-target-shapes) for notes on how the staged-path validator discovers focus nodes for `sh:targetSubjectsOf` / `sh:targetObjectsOf`. @@ -274,6 +275,67 @@ ex:ContactShape a sh:NodeShape ; Available: `sh:not`, `sh:and`, `sh:or`, `sh:xone`. +### Shape-based constraints (`sh:node`) + +`sh:node` validates a node against another node shape. On a property shape it +applies to each value; directly on a node shape it applies to the focus node +itself. The referenced shape is usually targetless — it fires only where it is +referenced. + +```turtle +ex:AddressShape a sh:NodeShape ; + sh:property [ sh:path ex:postalCode ; sh:minCount 1 ] . + +ex:PersonShape a sh:NodeShape ; + sh:targetClass ex:Person ; + sh:property [ + sh:path ex:address ; + sh:node ex:AddressShape + ] . +``` + +Recursive references are safe: a shape may reference itself (directly or via a +chain), and validation over cyclic data (e.g. a mutual `ex:knows` graph) +terminates — a node already being validated against a shape higher in the +evaluation is assumed conforming, matching common SHACL engine behavior. + +### Qualified value shapes + +`sh:qualifiedValueShape` counts how many values conform to a shape and checks +the count against `sh:qualifiedMinCount` / `sh:qualifiedMaxCount` — unlike +`sh:node`, values that don't conform are fine as long as enough do. + +```turtle +ex:TeamShape a sh:NodeShape ; + sh:targetClass ex:Team ; + sh:property [ + sh:path ex:member ; + sh:qualifiedValueShape ex:BadgedMemberShape ; + sh:qualifiedMinCount 1 + ] . +``` + +(`sh:qualifiedValueShapesDisjoint` is not supported.) + +### Constraints on the node itself + +Value constraints declared directly on a node shape (without `sh:path`) apply +to the focus node. Combined with a predicate target this restricts which nodes +may appear in a position: + +```turtle +# Only ex:active / ex:inactive may be used as an ex:status value. +ex:StatusShape a sh:NodeShape ; + sh:targetObjectsOf ex:status ; + sh:in ( ex:active ex:inactive ) . +``` + +### Deactivating a shape + +`sh:deactivated true` turns a shape off without deleting it — it stops firing +for its targets and is treated as conforming when referenced via `sh:node` or +logical constraints. + ### Closed shapes ```turtle @@ -511,7 +573,7 @@ All three routes go through the same post-stage helper, so the ledger's configur The following SHACL constructs are parsed/compiled but currently **no-ops** at validation time. Shapes using them load without error but don't constrain data: - `sh:uniqueLang`, `sh:languageIn` — require language-tag metadata on flakes, which isn't yet threaded through the validation path. -- `sh:qualifiedValueShape` (+ `sh:qualifiedMinCount` / `sh:qualifiedMaxCount`) — requires recursive nested-shape counting. +- `sh:qualifiedValueShapesDisjoint` — sibling-shape disjointness for qualified value shapes (the counting form of `sh:qualifiedValueShape` is supported). These are tracked in the SHACL compliance effort. diff --git a/fluree-db-api/src/shacl_tests.rs b/fluree-db-api/src/shacl_tests.rs index 67a9eedf66..f4386383dd 100644 --- a/fluree-db-api/src/shacl_tests.rs +++ b/fluree-db-api/src/shacl_tests.rs @@ -2510,3 +2510,471 @@ async fn shacl_custom_message_node_shape_closed() { .unwrap_err(); assert_shacl_violation(err, "Only declared properties are allowed"); } + +// =========================================================================== +// sh:node (shape-based constraint) +// =========================================================================== + +/// `sh:node` referencing a named (targetless) node shape: each value of the +/// property must conform to it. +#[tokio::test] +async fn shacl_node_named_shape_on_property() { + let fluree = FlureeBuilder::memory().build_memory(); + let context = shacl_context(); + let shape_txn = json!({ + "@context": context.clone(), + "@graph": [ + { + "@id": "ex:AddressShape", + "@type": "sh:NodeShape", + "sh:property": [{ + "@id": "ex:pshape_postal", + "sh:path": {"@id": "ex:postalCode"}, + "sh:minCount": 1 + }] + }, + { + "@id": "ex:PersonShape", + "@type": "sh:NodeShape", + "sh:targetClass": {"@id": "ex:Person"}, + "sh:property": [{ + "@id": "ex:pshape_address", + "sh:path": {"@id": "ex:address"}, + "sh:node": {"@id": "ex:AddressShape"} + }] + } + ] + }); + + // Valid: the address node has a postal code. + let ledger_ok = fluree.create_ledger("shacl/node-ok:main").await.unwrap(); + let ledger_ok = fluree.upsert(ledger_ok, &shape_txn).await.unwrap().ledger; + fluree + .upsert( + ledger_ok, + &json!({ + "@context": context.clone(), + "@id": "ex:alice", + "@type": "ex:Person", + "ex:address": {"@id": "ex:addr1", "ex:postalCode": "12345"} + }), + ) + .await + .expect("address conforming to AddressShape should pass"); + + // Invalid: the address node lacks a postal code. + let ledger_bad = fluree.create_ledger("shacl/node-bad:main").await.unwrap(); + let ledger_bad = fluree.upsert(ledger_bad, &shape_txn).await.unwrap().ledger; + let err = fluree + .upsert( + ledger_bad, + &json!({ + "@context": context.clone(), + "@id": "ex:bob", + "@type": "ex:Person", + "ex:address": {"@id": "ex:addr2", "ex:street": "Main St"} + }), + ) + .await + .unwrap_err(); + assert_shacl_violation(err, "sh:node"); +} + +/// `sh:node` directly on a node shape: the focus node itself must conform to +/// the referenced shape. +#[tokio::test] +async fn shacl_node_on_node_shape() { + let fluree = FlureeBuilder::memory().build_memory(); + let context = shacl_context(); + let shape_txn = json!({ + "@context": context.clone(), + "@graph": [ + { + "@id": "ex:EmployeeShape", + "@type": "sh:NodeShape", + "sh:property": [{ + "@id": "ex:pshape_empid", + "sh:path": {"@id": "ex:employeeId"}, + "sh:minCount": 1 + }] + }, + { + "@id": "ex:ManagerShape", + "@type": "sh:NodeShape", + "sh:targetClass": {"@id": "ex:Manager"}, + "sh:node": {"@id": "ex:EmployeeShape"} + } + ] + }); + + // Valid: manager carries an employeeId, so it conforms to EmployeeShape. + let ledger_ok = fluree.create_ledger("shacl/nodens-ok:main").await.unwrap(); + let ledger_ok = fluree.upsert(ledger_ok, &shape_txn).await.unwrap().ledger; + fluree + .upsert( + ledger_ok, + &json!({ + "@context": context.clone(), + "@id": "ex:carol", + "@type": "ex:Manager", + "ex:employeeId": "E-77" + }), + ) + .await + .expect("manager with employeeId should pass"); + + // Invalid: manager without an employeeId. + let ledger_bad = fluree.create_ledger("shacl/nodens-bad:main").await.unwrap(); + let ledger_bad = fluree.upsert(ledger_bad, &shape_txn).await.unwrap().ledger; + let err = fluree + .upsert( + ledger_bad, + &json!({ + "@context": context.clone(), + "@id": "ex:dave", + "@type": "ex:Manager" + }), + ) + .await + .unwrap_err(); + assert_shacl_violation(err, "sh:node"); +} + +/// A recursive shape reference over cyclic data must terminate: FriendShape +/// requires a name and validates `ex:knows` values against itself, while the +/// data forms a knows-cycle (alice ↔ bob). +#[tokio::test] +async fn shacl_node_recursive_shape_cyclic_data() { + let fluree = FlureeBuilder::memory().build_memory(); + let context = shacl_context(); + let shape_txn = json!({ + "@context": context.clone(), + "@id": "ex:FriendShape", + "@type": "sh:NodeShape", + "sh:targetClass": {"@id": "ex:Friend"}, + "sh:property": [ + { + "@id": "ex:pshape_friend_name", + "sh:path": {"@id": "schema:name"}, + "sh:minCount": 1 + }, + { + "@id": "ex:pshape_friend_knows", + "sh:path": {"@id": "ex:knows"}, + "sh:node": {"@id": "ex:FriendShape"} + } + ] + }); + + // Valid: both nodes named; the knows-cycle must not hang validation. + let ledger_ok = fluree.create_ledger("shacl/rec-ok:main").await.unwrap(); + let ledger_ok = fluree.upsert(ledger_ok, &shape_txn).await.unwrap().ledger; + fluree + .upsert( + ledger_ok, + &json!({ + "@context": context.clone(), + "@graph": [ + {"@id": "ex:alice", "@type": "ex:Friend", "schema:name": "Alice", + "ex:knows": {"@id": "ex:bob"}}, + {"@id": "ex:bob", "@type": "ex:Friend", "schema:name": "Bob", + "ex:knows": {"@id": "ex:alice"}} + ] + }), + ) + .await + .expect("cyclic knows-graph with conforming nodes should pass"); + + // Invalid: bob has no name, so alice's knows-value fails FriendShape. + let ledger_bad = fluree.create_ledger("shacl/rec-bad:main").await.unwrap(); + let ledger_bad = fluree.upsert(ledger_bad, &shape_txn).await.unwrap().ledger; + let err = fluree + .upsert( + ledger_bad, + &json!({ + "@context": context.clone(), + "@graph": [ + {"@id": "ex:carol", "@type": "ex:Friend", "schema:name": "Carol", + "ex:knows": {"@id": "ex:mallory"}}, + {"@id": "ex:mallory", "ex:knows": {"@id": "ex:carol"}} + ] + }), + ) + .await + .unwrap_err(); + assert_shacl_violation(err, "sh:node"); +} + +// =========================================================================== +// Node-shape value constraints, sh:deactivated, implicit class targets, +// sh:qualifiedValueShape, and Turtle-list sh:ignoredProperties +// =========================================================================== + +/// Value constraints directly on a node shape (no sh:path) apply to the focus +/// node itself — here `sh:in` restricts which nodes may appear as the object +/// of ex:status. +#[tokio::test] +async fn shacl_value_constraint_on_node_shape() { + let fluree = FlureeBuilder::memory().build_memory(); + let context = shacl_context(); + let shape_txn = json!({ + "@context": context.clone(), + "@id": "ex:StatusShape", + "@type": "sh:NodeShape", + "sh:targetObjectsOf": {"@id": "ex:status"}, + "sh:in": [{"@id": "ex:active"}, {"@id": "ex:inactive"}] + }); + + let ledger_ok = fluree.create_ledger("shacl/nodeval-ok:main").await.unwrap(); + let ledger_ok = fluree.upsert(ledger_ok, &shape_txn).await.unwrap().ledger; + fluree + .upsert( + ledger_ok, + &json!({ + "@context": context.clone(), + "@id": "ex:task1", + "ex:status": {"@id": "ex:active"} + }), + ) + .await + .expect("status in the allowed set should pass"); + + let ledger_bad = fluree + .create_ledger("shacl/nodeval-bad:main") + .await + .unwrap(); + let ledger_bad = fluree.upsert(ledger_bad, &shape_txn).await.unwrap().ledger; + let err = fluree + .upsert( + ledger_bad, + &json!({ + "@context": context.clone(), + "@id": "ex:task2", + "ex:status": {"@id": "ex:bogus"} + }), + ) + .await + .unwrap_err(); + assert_shacl_violation(err, "not in the allowed set"); +} + +/// A shape with `sh:deactivated true` must not fire at all. +#[tokio::test] +async fn shacl_deactivated_shape_ignored() { + let fluree = FlureeBuilder::memory().build_memory(); + let context = shacl_context(); + let shape_txn = json!({ + "@context": context.clone(), + "@id": "ex:NameShape", + "@type": "sh:NodeShape", + "sh:targetClass": {"@id": "ex:Person"}, + "sh:deactivated": true, + "sh:property": [{ + "@id": "ex:pshape_deact_name", + "sh:path": {"@id": "schema:name"}, + "sh:minCount": 1 + }] + }); + + let ledger = fluree.create_ledger("shacl/deact:main").await.unwrap(); + let ledger = fluree.upsert(ledger, &shape_txn).await.unwrap().ledger; + + // Would violate minCount if the shape were active. + fluree + .upsert( + ledger, + &json!({ + "@context": context.clone(), + "@id": "ex:nameless", + "@type": "ex:Person" + }), + ) + .await + .expect("deactivated shape must not reject data"); +} + +/// Implicit class target: a subject that is both `rdfs:Class` and a node shape +/// targets its own instances without an explicit sh:targetClass. +#[tokio::test] +async fn shacl_implicit_class_target() { + let fluree = FlureeBuilder::memory().build_memory(); + let context = shacl_context(); + let shape_txn = json!({ + "@context": context.clone(), + "@id": "ex:Employee", + "@type": ["rdfs:Class", "sh:NodeShape"], + "sh:property": [{ + "@id": "ex:pshape_impl_empid", + "sh:path": {"@id": "ex:employeeId"}, + "sh:minCount": 1 + }] + }); + + let ledger_ok = fluree.create_ledger("shacl/impl-ok:main").await.unwrap(); + let ledger_ok = fluree.upsert(ledger_ok, &shape_txn).await.unwrap().ledger; + fluree + .upsert( + ledger_ok, + &json!({ + "@context": context.clone(), + "@id": "ex:e1", + "@type": "ex:Employee", + "ex:employeeId": "E-1" + }), + ) + .await + .expect("instance with employeeId should pass"); + + let ledger_bad = fluree.create_ledger("shacl/impl-bad:main").await.unwrap(); + let ledger_bad = fluree.upsert(ledger_bad, &shape_txn).await.unwrap().ledger; + let err = fluree + .upsert( + ledger_bad, + &json!({ + "@context": context.clone(), + "@id": "ex:e2", + "@type": "ex:Employee" + }), + ) + .await + .unwrap_err(); + assert_shacl_violation(err, "at least 1"); +} + +/// `sh:qualifiedValueShape` + `sh:qualifiedMinCount`: at least N values must +/// conform to the qualified shape. +#[tokio::test] +async fn shacl_qualified_value_shape_min_count() { + let fluree = FlureeBuilder::memory().build_memory(); + let context = shacl_context(); + let shape_txn = json!({ + "@context": context.clone(), + "@graph": [ + { + "@id": "ex:BadgedShape", + "@type": "sh:NodeShape", + "sh:property": [{ + "@id": "ex:pshape_badge", + "sh:path": {"@id": "ex:badge"}, + "sh:minCount": 1 + }] + }, + { + "@id": "ex:TeamShape", + "@type": "sh:NodeShape", + "sh:targetClass": {"@id": "ex:Team"}, + "sh:property": [{ + "@id": "ex:pshape_member", + "sh:path": {"@id": "ex:member"}, + "sh:qualifiedValueShape": {"@id": "ex:BadgedShape"}, + "sh:qualifiedMinCount": 1 + }] + } + ] + }); + + // Valid: one of the two members carries a badge. + let ledger_ok = fluree.create_ledger("shacl/qual-ok:main").await.unwrap(); + let ledger_ok = fluree.upsert(ledger_ok, &shape_txn).await.unwrap().ledger; + fluree + .upsert( + ledger_ok, + &json!({ + "@context": context.clone(), + "@graph": [ + {"@id": "ex:teamA", "@type": "ex:Team", + "ex:member": [{"@id": "ex:m1"}, {"@id": "ex:m2"}]}, + {"@id": "ex:m1", "ex:badge": "B-1"}, + {"@id": "ex:m2", "ex:role": "guest"} + ] + }), + ) + .await + .expect("team with one badged member should pass"); + + // Invalid: no member conforms to BadgedShape. + let ledger_bad = fluree.create_ledger("shacl/qual-bad:main").await.unwrap(); + let ledger_bad = fluree.upsert(ledger_bad, &shape_txn).await.unwrap().ledger; + let err = fluree + .upsert( + ledger_bad, + &json!({ + "@context": context.clone(), + "@graph": [ + {"@id": "ex:teamB", "@type": "ex:Team", + "ex:member": [{"@id": "ex:m3"}]}, + {"@id": "ex:m3", "ex:role": "guest"} + ] + }), + ) + .await + .unwrap_err(); + assert_shacl_violation(err, "at least 1 value(s) conforming"); +} + +/// `sh:ignoredProperties` written as a Turtle RDF list: the list members must +/// be honored (pre-fix, the unexpanded list-head blank node was treated as the +/// ignored property, so the real members were rejected by sh:closed). +#[tokio::test] +async fn shacl_ignored_properties_turtle_list() { + let fluree = FlureeBuilder::memory().build_memory(); + let context = shacl_context(); + let shapes_ttl = r#" + @prefix sh: . + @prefix ex: . + + ex:AuditedShape a sh:NodeShape ; + sh:targetClass ex:Audited ; + sh:closed true ; + sh:ignoredProperties ( ex:internal ex:auditLog ) ; + sh:property [ sh:path ex:label ] . + "#; + + let ledger_ok = fluree.create_ledger("shacl/ignored-ok:main").await.unwrap(); + let ledger_ok = fluree + .stage_owned(ledger_ok) + .upsert_turtle(shapes_ttl) + .execute() + .await + .unwrap() + .ledger; + fluree + .upsert( + ledger_ok, + &json!({ + "@context": context.clone(), + "@id": "ex:rec1", + "@type": "ex:Audited", + "ex:label": "ok", + "ex:internal": "meta" + }), + ) + .await + .expect("list-declared ignored property must be allowed on a closed shape"); + + let ledger_bad = fluree + .create_ledger("shacl/ignored-bad:main") + .await + .unwrap(); + let ledger_bad = fluree + .stage_owned(ledger_bad) + .upsert_turtle(shapes_ttl) + .execute() + .await + .unwrap() + .ledger; + let err = fluree + .upsert( + ledger_bad, + &json!({ + "@context": context.clone(), + "@id": "ex:rec2", + "@type": "ex:Audited", + "ex:label": "ok", + "ex:other": "not allowed" + }), + ) + .await + .unwrap_err(); + assert_shacl_violation(err, "not allowed by closed shape"); +} diff --git a/fluree-db-shacl/src/compile.rs b/fluree-db-shacl/src/compile.rs index e3d19025f9..eaab46c9b4 100644 --- a/fluree-db-shacl/src/compile.rs +++ b/fluree-db-shacl/src/compile.rs @@ -117,6 +117,9 @@ struct ShapeData { is_closed: Option, /// sh:ignoredProperties (list of property SIDs) ignored_properties: HashSet, + /// sh:node - references to shapes the node (or each property value, when + /// this entry backs a property shape) must conform to + node_shapes: Vec, /// sh:not - reference to a shape that must NOT match not_shape: Option, /// sh:and - reference to RDF list head (expanded during list processing) @@ -154,6 +157,15 @@ struct PropertyShapeData { pattern_string: Option, /// sh:in list values (accumulated from RDF list traversal) in_values: Vec, + /// sh:deactivated — a deactivated property shape is skipped entirely + deactivated: bool, + /// sh:qualifiedValueShape — reference to the shape conforming values are + /// counted against (combined with the counts in finalize) + qualified_shape: Option, + /// sh:qualifiedMinCount + qualified_min: Option, + /// sh:qualifiedMaxCount + qualified_max: Option, } impl ShapeCompiler { @@ -231,12 +243,18 @@ impl ShapeCompiler { // Language predicates::UNIQUE_LANG, predicates::LANGUAGE_IN, + // Shape-based constraints + predicates::NODE, + predicates::QUALIFIED_VALUE_SHAPE, + predicates::QUALIFIED_MIN_COUNT, + predicates::QUALIFIED_MAX_COUNT, // Logical constraints predicates::NOT, predicates::AND, predicates::OR, predicates::XONE, // Metadata + predicates::DEACTIVATED, predicates::SEVERITY, predicates::MESSAGE, predicates::NAME, @@ -244,6 +262,7 @@ impl ShapeCompiler { // Query each input graph for all SHACL predicates, accumulating into // one compiler so cross-graph sh:and/or/xone/sh:in references resolve. + let mut class_typed: HashSet = HashSet::new(); for db in dbs { for pred_name in &shacl_predicates { let pred = Sid::new(SHACL, pred_name); @@ -256,6 +275,28 @@ impl ShapeCompiler { } } + // Collect subjects typed as a class — a shape that is also a class + // implicitly targets its own instances (SHACL "implicit class + // targets"). Bound-object scans, so cost scales with the number of + // declared classes, not the data. + let rdf_type = Sid::new(RDF, rdf_names::TYPE); + for class_class in [ + Sid::new(fluree_vocab::namespaces::RDFS, "Class"), + Sid::new(fluree_vocab::namespaces::OWL, "Class"), + ] { + let flakes = db + .range( + IndexType::Opst, + RangeTest::Eq, + RangeMatch::predicate_object( + rdf_type.clone(), + FlakeValue::Ref(class_class), + ), + ) + .await?; + class_typed.extend(flakes.iter().map(|f| f.s.clone())); + } + // Expand rdf:first/rdf:rest lists referenced by sh:in / sh:and / // sh:or / sh:xone / sh:ignoredProperties. Run after each graph so // that lists whose head lives in this graph can resolve — a list @@ -269,9 +310,26 @@ impl ShapeCompiler { compiler.resolve_paths(*db).await?; } + compiler.apply_implicit_class_targets(&class_typed); compiler.finalize() } + /// Add an implicit-class target to every compiled shape that is also + /// declared a class (`rdfs:Class` / `owl:Class`): per SHACL, such a shape + /// targets all instances of itself. + fn apply_implicit_class_targets(&mut self, class_typed: &HashSet) { + for (id, data) in &mut self.shapes { + if class_typed.contains(id) + && !data + .targets + .iter() + .any(|t| matches!(t, TargetType::ImplicitClass(c) if c == id)) + { + data.targets.push(TargetType::ImplicitClass(id.clone())); + } + } + } + /// Resolve raw `sh:path` objects into [`PropertyPath`] ASTs. /// /// A plain predicate IRI resolves to [`PropertyPath::Predicate`]; a blank-node @@ -345,6 +403,36 @@ impl ShapeCompiler { } } + // Expand sh:ignoredProperties RDF-list heads (Turtle encoding). JSON-LD + // @list flattens to one flake per member, so members arrive directly; + // a Turtle list arrives as a single blank-node head that must be + // walked, otherwise the head itself would be treated as the ignored + // property and the real members would be rejected by sh:closed. + let ignored_candidates: Vec<(Sid, Sid)> = self + .shapes + .iter() + .flat_map(|(shape_id, sd)| { + sd.ignored_properties + .iter() + .map(|p| (shape_id.clone(), p.clone())) + }) + .collect(); + for (shape_id, head) in ignored_candidates { + let values = traverse_rdf_list(db, &head, &rdf_first, &rdf_rest, &rdf_nil).await?; + if values.is_empty() { + // Not a list head in this graph — a plain property IRI. + continue; + } + if let Some(sd) = self.shapes.get_mut(&shape_id) { + sd.ignored_properties.remove(&head); + for v in values { + if let FlakeValue::Ref(p) = v { + sd.ignored_properties.insert(p); + } + } + } + } + // Collect logical constraint list heads let mut and_lists: Vec<(Sid, Sid)> = Vec::new(); let mut or_lists: Vec<(Sid, Sid)> = Vec::new(); @@ -620,6 +708,31 @@ impl ShapeCompiler { } } + // Shape-based constraints + name if name == predicates::NODE => { + if let FlakeValue::Ref(shape_ref) = &flake.o { + self.get_or_create_shape(&flake.s) + .node_shapes + .push(shape_ref.clone()); + } + } + name if name == predicates::QUALIFIED_VALUE_SHAPE => { + if let FlakeValue::Ref(shape_ref) = &flake.o { + self.get_or_create_property_shape(&flake.s).qualified_shape = + Some(shape_ref.clone()); + } + } + name if name == predicates::QUALIFIED_MIN_COUNT => { + if let FlakeValue::Long(n) = &flake.o { + self.get_or_create_property_shape(&flake.s).qualified_min = Some(*n as usize); + } + } + name if name == predicates::QUALIFIED_MAX_COUNT => { + if let FlakeValue::Long(n) = &flake.o { + self.get_or_create_property_shape(&flake.s).qualified_max = Some(*n as usize); + } + } + // Logical constraints (node-level) name if name == predicates::NOT => { if let FlakeValue::Ref(shape_ref) = &flake.o { @@ -652,6 +765,17 @@ impl ShapeCompiler { } // Metadata + name if name == predicates::DEACTIVATED => { + if let FlakeValue::Boolean(v) = &flake.o { + // The subject may be a node shape, a property shape, or + // both maps may hold an entry for it — deactivate wherever + // it appears so the shape is ignored entirely. + if let Some(ps) = self.property_shapes.get_mut(&flake.s) { + ps.deactivated = *v; + } + self.get_or_create_shape(&flake.s).deactivated = *v; + } + } name if name == predicates::SEVERITY => { if let FlakeValue::Ref(sev) = &flake.o { let severity = parse_severity(sev); @@ -717,13 +841,27 @@ impl ShapeCompiler { let mut prop_shapes = Vec::new(); for ps_id in &data.property_shape_ids { if let Some(ps_data) = ps_map.get(ps_id) { + if ps_data.deactivated { + continue; + } if ps_data.path.is_some() { // `sh:path` present. If it never resolved to an AST it // becomes an `Unresolvable` path, surfaced as a violation // only when this shape fires — not a compile error that // would wedge every transaction on the ledger. let path = resolved_path_of(ps_data); - let constraints = build_constraints_from_ps_data(ps_data); + let mut constraints = build_constraints_from_ps_data(ps_data); + + // sh:qualifiedValueShape needs the shape map to inline + // the qualified shape, so it's attached here rather + // than in build_constraints_from_ps_data. + if let Some(q_ref) = &ps_data.qualified_shape { + constraints.push(Constraint::QualifiedValueShape { + shape: Arc::new(build_nested_shape(q_ref, &ps_map)), + min_count: ps_data.qualified_min, + max_count: ps_data.qualified_max, + }); + } // Check if this property shape's subject also has structural // constraints (e.g. sh:or on a property shape). If so, build @@ -760,15 +898,31 @@ impl ShapeCompiler { // Add logical constraints (sh:not, sh:and, sh:or, sh:xone) structural_constraints.extend(build_logical_constraints(data, &ps_map)); + // Value constraints declared directly on the node shape (no + // sh:path) accumulate in a path-less PropertyShapeData entry keyed + // by the shape's own Sid; per spec they apply to the focus node + // itself. Metadata (sh:message / sh:name) that landed on that entry + // also belongs to the node shape. + let mut node_constraints = data.node_constraints.clone(); + let mut message = data.message.clone(); + let mut name = data.name.clone(); + if let Some(own_ps) = ps_map.get(id) { + if own_ps.path.is_none() { + node_constraints.extend(build_constraints_from_ps_data(own_ps)); + message = message.or_else(|| own_ps.message.clone()); + name = name.or_else(|| own_ps.name.clone()); + } + } + compiled.push(CompiledShape { id: id.clone(), targets: data.targets.clone(), property_shapes: prop_shapes, - node_constraints: data.node_constraints.clone(), + node_constraints, structural_constraints, severity: data.severity, - name: data.name.clone(), - message: data.message.clone(), + name, + message, deactivated: data.deactivated, }); } @@ -862,14 +1016,21 @@ fn build_nested_shape(sid: &ShapeId, ps_map: &HashMap, ) -> Vec { let mut constraints = Vec::new(); + for shape_ref in &data.node_shapes { + constraints.push(NodeConstraint::Node(Arc::new(build_nested_shape( + shape_ref, ps_map, + )))); + } + if let Some(ref shape_ref) = data.not_shape { constraints.push(NodeConstraint::Not(Arc::new(build_nested_shape( shape_ref, ps_map, diff --git a/fluree-db-shacl/src/constraints/mod.rs b/fluree-db-shacl/src/constraints/mod.rs index 85f8e15c80..02d794b297 100644 --- a/fluree-db-shacl/src/constraints/mod.rs +++ b/fluree-db-shacl/src/constraints/mod.rs @@ -73,10 +73,11 @@ pub enum Constraint { LanguageIn(Vec), // Qualified value shape constraints - /// sh:qualifiedValueShape with min/max counts + /// sh:qualifiedValueShape with min/max counts: the number of values + /// conforming to the nested shape must fall within the counts. QualifiedValueShape { /// The nested shape to validate against - shape: Arc, + shape: Arc, /// sh:qualifiedMinCount min_count: Option, /// sh:qualifiedMaxCount @@ -84,15 +85,6 @@ pub enum Constraint { }, } -/// A qualified shape for sh:qualifiedValueShape -#[derive(Debug, Clone, PartialEq)] -pub struct QualifiedShape { - /// The shape ID - pub id: Sid, - /// Constraints to apply - pub constraints: Vec, -} - /// Node-level constraints (applied to the focus node, not property values) #[derive(Debug, Clone, PartialEq)] pub enum NodeConstraint { @@ -104,6 +96,11 @@ pub enum NodeConstraint { ignored_properties: HashSet, }, + /// sh:node - the node must conform to the referenced node shape. On a node + /// shape this applies to the focus node; on a property shape it applies to + /// each value node individually. + Node(Arc), + // Logical constraints /// sh:not - the nested shape must NOT match Not(Arc), @@ -178,6 +175,7 @@ impl NodeConstraint { ignored_properties.len() ) } + NodeConstraint::Node(shape) => format!("sh:node {}", shape.id.name), NodeConstraint::Not(_) => "sh:not".to_string(), NodeConstraint::And(shapes) => format!("sh:and ({} shapes)", shapes.len()), NodeConstraint::Or(shapes) => format!("sh:or ({} shapes)", shapes.len()), diff --git a/fluree-db-shacl/src/lib.rs b/fluree-db-shacl/src/lib.rs index 1955c1f7ca..114fa66200 100644 --- a/fluree-db-shacl/src/lib.rs +++ b/fluree-db-shacl/src/lib.rs @@ -23,6 +23,21 @@ //! - Closed: `sh:closed`, `sh:ignoredProperties` //! - Pair: `sh:equals`, `sh:disjoint`, `sh:lessThan`, `sh:lessThanOrEquals` //! - Logical: `sh:not`, `sh:and`, `sh:or`, `sh:xone` +//! - Shape-based: `sh:node` — on a property shape each value node must conform +//! to the referenced node shape; on a node shape the focus node itself must. +//! Recursive shape references (e.g. `FriendShape → knows → sh:node +//! FriendShape` over cyclic data) terminate: a `(focus, shape)` pair already +//! being validated on the call stack is assumed conforming. +//! - Qualified: `sh:qualifiedValueShape` with `sh:qualifiedMinCount` / +//! `sh:qualifiedMaxCount` — counts the values conforming to the qualified +//! shape (top-level property shapes; `sh:qualifiedValueShapesDisjoint` is +//! not supported) +//! - Node-shape value constraints: per-value constraints declared directly on +//! a node shape (no `sh:path`) apply to the focus node itself +//! - `sh:deactivated` — a deactivated shape is ignored entirely, including +//! when referenced via `sh:node` or logical constraints +//! - Implicit class targets: a shape that is also an `rdfs:Class` / +//! `owl:Class` targets its own instances //! - Messages: `sh:message` on a property shape (or on the node shape for //! `sh:closed` and node-level logical constraints) replaces the generated //! violation message @@ -61,8 +76,9 @@ //! //! - `sh:uniqueLang`, `sh:languageIn` — require access to language-tag metadata //! on flakes, which is not yet threaded through the validation path. -//! - `sh:qualifiedValueShape` (+ `sh:qualifiedMinCount` / `sh:qualifiedMaxCount`) -//! — requires recursive nested-shape validation counting. +//! - `sh:qualifiedValueShapesDisjoint` — sibling-shape disjointness for +//! qualified value shapes (the counting form of `sh:qualifiedValueShape` is +//! supported). //! //! # Example //! diff --git a/fluree-db-shacl/src/validate.rs b/fluree-db-shacl/src/validate.rs index 25c8c20720..15497998a1 100644 --- a/fluree-db-shacl/src/validate.rs +++ b/fluree-db-shacl/src/validate.rs @@ -4,7 +4,7 @@ //! against SHACL shapes. use crate::cache::{ShaclCache, ShaclCacheKey}; -use crate::compile::{CompiledShape, PropertyShape, Severity, ShapeCompiler, TargetType}; +use crate::compile::{CompiledShape, PropertyShape, Severity, ShapeCompiler, ShapeId, TargetType}; use crate::constraints::cardinality::{validate_max_count, validate_min_count}; use crate::constraints::datatype::{validate_datatype, validate_node_kind}; use crate::constraints::pattern::{validate_max_length, validate_min_length, validate_pattern}; @@ -35,6 +35,17 @@ use std::collections::{HashMap, HashSet}; /// data graph. type ClassMembershipCache = Mutex>; +/// `(focus node, shape id)` pairs currently being validated on the call stack. +/// +/// Recursive shape references are legal SHACL (`FriendShape → sh:node +/// FriendShape` via a property) and cyclic *data* would otherwise recurse +/// forever. On re-entry of an already-active pair the engine assumes +/// conformance — the standard terminating interpretation, since the spec +/// leaves recursive validation undefined. One set is created per top-level +/// validation entry point; entries are removed on exit, so the set only +/// reflects the live call stack. +type ActiveShapeChecks = Mutex>; + /// Threaded context for resolving `sh:class` value membership: the extra /// vocabulary graphs to union into the `rdf:type` / `rdfs:subClassOf` lookup /// (the `f:shapesSource` graph[s]) plus the per-transaction memo. `Copy` @@ -276,13 +287,15 @@ impl ShaclEngine { cache: &self.class_cache, cross_ledger, }; + let active = ActiveShapeChecks::default(); for shape in applicable_shapes { if shape.deactivated { continue; } let shape_results = - validate_shape(db, focus_node, shape, &all_shapes, Some(class_ctx)).await?; + validate_shape(db, focus_node, shape, &all_shapes, Some(class_ctx), &active) + .await?; results.extend(shape_results); } @@ -326,8 +339,16 @@ impl ShaclEngine { let focus_nodes = get_focus_nodes(db, shape, self.hierarchy.as_ref()).await?; for focus_node in focus_nodes { - let results = - validate_shape(db, &focus_node, shape, &all_shapes, Some(class_ctx)).await?; + let active = ActiveShapeChecks::default(); + let results = validate_shape( + db, + &focus_node, + shape, + &all_shapes, + Some(class_ctx), + &active, + ) + .await?; all_results.extend(results); } } @@ -584,31 +605,123 @@ fn validate_shape<'a>( shape: &'a CompiledShape, all_shapes: &'a [&'a CompiledShape], class_ctx: Option>, + active: &'a ActiveShapeChecks, ) -> std::pin::Pin>> + Send + 'a>> { Box::pin(async move { + // A deactivated shape is ignored entirely — including when reached via + // sh:node / logical shape references, not just from target selection. + if shape.deactivated { + return Ok(Vec::new()); + } + + // Recursion guard: a (focus, shape) pair already on the call stack + // (via sh:node / logical shape references over cyclic data) is assumed + // conforming so validation terminates. + let guard_key = (focus_node.clone(), shape.id.clone()); + if !active.lock().insert(guard_key.clone()) { + return Ok(Vec::new()); + } + let mut results = Vec::new(); // Validate property shapes for prop_shape in &shape.property_shapes { - let prop_results = - validate_property_shape(db, focus_node, prop_shape, shape, all_shapes, class_ctx) - .await?; + let prop_results = validate_property_shape( + db, focus_node, prop_shape, shape, all_shapes, class_ctx, active, + ) + .await?; results.extend(prop_results); } + // Value constraints declared directly on the node shape apply to the + // focus node itself (a node shape's value nodes = the focus node). + if !shape.node_constraints.is_empty() { + let node_results = + validate_node_value_constraints(db, focus_node, shape, class_ctx).await?; + results.extend(node_results); + } + // Validate structural constraints (closed, logical) for constraint in &shape.structural_constraints { - let constraint_results = - validate_structural_constraint(db, focus_node, constraint, shape, all_shapes) - .await?; + let constraint_results = validate_structural_constraint( + db, focus_node, constraint, shape, all_shapes, active, + ) + .await?; results.extend(constraint_results); } + active.lock().remove(&guard_key); Ok(results) }) } +/// Validate value constraints declared directly on a node shape (no `sh:path`) +/// against the focus node itself. Per spec, a node shape's value-node set is +/// exactly the focus node, so per-value constraints (`sh:in`, `sh:hasValue`, +/// `sh:nodeKind`, `sh:class`, ranges, …) evaluate over `[focus]`. +async fn validate_node_value_constraints<'a>( + db: GraphDbRef<'a>, + focus_node: &Sid, + shape: &'a CompiledShape, + class_ctx: Option>, +) -> Result> { + let mut results = Vec::new(); + let values = [FlakeValue::Ref(focus_node.clone())]; + let datatypes = [fluree_db_core::id_datatype_sid()]; + + let push = |violation: ConstraintViolation, results: &mut Vec| { + results.push(ValidationResult { + focus_node: focus_node.clone(), + result_path: None, + source_shape: shape.id.clone(), + source_constraint: None, + severity: shape.severity, + message: shape.message.clone().unwrap_or(violation.message), + value: violation.value, + graph_id: None, + }); + }; + + for constraint in &shape.node_constraints { + match constraint { + Constraint::Equals(target_prop) + | Constraint::Disjoint(target_prop) + | Constraint::LessThan(target_prop) + | Constraint::LessThanOrEquals(target_prop) => { + let target_flakes = db + .range( + IndexType::Spot, + RangeTest::Eq, + RangeMatch::subject_predicate(focus_node.clone(), target_prop.clone()), + ) + .await?; + let target_values: Vec = + target_flakes.iter().map(|f| f.o.clone()).collect(); + for violation in + validate_pair_constraint(constraint, &values, &target_values, &target_prop.name) + { + push(violation, &mut results); + } + } + Constraint::Class(expected_class) => { + for violation in + validate_class_constraint(db, &values, expected_class, class_ctx).await? + { + push(violation, &mut results); + } + } + _ => { + for violation in validate_constraint(constraint, &values, &datatypes)? { + push(violation, &mut results); + } + } + } + } + + Ok(results) +} + /// Validate a structural (node-level) constraint /// /// Note: This function uses `Box::pin` for recursive calls to avoid infinitely-sized futures. @@ -618,6 +731,7 @@ fn validate_structural_constraint<'a>( constraint: &'a crate::constraints::NodeConstraint, parent_shape: &'a CompiledShape, all_shapes: &'a [&'a CompiledShape], + active: &'a ActiveShapeChecks, ) -> std::pin::Pin>> + Send + 'a>> { Box::pin(async move { @@ -677,6 +791,39 @@ fn validate_structural_constraint<'a>( } } + NodeConstraint::Node(nested_shape) => { + // sh:node - the focus node must conform to the referenced shape + let nested_results = validate_nested_shape( + db, + focus_node, + nested_shape.as_ref(), + parent_shape, + all_shapes, + active, + ) + .await?; + let has_violations = nested_results + .iter() + .any(|r| r.severity == Severity::Violation); + if has_violations { + results.push(ValidationResult { + focus_node: focus_node.clone(), + result_path: None, + source_shape: parent_shape.id.clone(), + source_constraint: None, + severity: Severity::Violation, + message: parent_shape.message.clone().unwrap_or_else(|| { + format!( + "Node does not conform to shape {} (sh:node)", + nested_shape.id.name + ) + }), + value: None, + graph_id: None, + }); + } + } + NodeConstraint::Not(nested_shape) => { // sh:not - the nested shape must NOT match let nested_results = validate_nested_shape( @@ -685,6 +832,7 @@ fn validate_structural_constraint<'a>( nested_shape.as_ref(), parent_shape, all_shapes, + active, ) .await?; // If the nested shape has NO violations, that's a violation of sh:not. @@ -721,6 +869,7 @@ fn validate_structural_constraint<'a>( nested.as_ref(), parent_shape, all_shapes, + active, ) .await?; // Include violations from the nested shape @@ -755,6 +904,7 @@ fn validate_structural_constraint<'a>( nested.as_ref(), parent_shape, all_shapes, + active, ) .await?; let has_violations = nested_results @@ -803,6 +953,7 @@ fn validate_structural_constraint<'a>( nested.as_ref(), parent_shape, all_shapes, + active, ) .await?; let has_violations = nested_results @@ -862,6 +1013,7 @@ fn validate_nested_shape<'a>( nested: &'a NestedShape, parent_shape: &'a CompiledShape, all_shapes: &'a [&'a CompiledShape], + active: &'a ActiveShapeChecks, ) -> std::pin::Pin>> + Send + 'a>> { Box::pin(async move { @@ -876,7 +1028,7 @@ fn validate_nested_shape<'a>( // legacy data-graph lookup (no `f:shapesSource` vocabulary union // and no shared memo) — the value-set feature targets top-level // property shapes. - return validate_shape(db, focus_node, ref_shape, all_shapes, None).await; + return validate_shape(db, focus_node, ref_shape, all_shapes, None, active).await; } // Shape not found and no inline constraints — treat as unresolved. // Return a violation to prevent sh:or from being trivially true. @@ -1002,6 +1154,7 @@ fn validate_nested_shape<'a>( node_constraint, parent_shape, all_shapes, + active, ) .await?; results.extend(nested_results); @@ -1012,6 +1165,7 @@ fn validate_nested_shape<'a>( } /// Validate a focus node against a property shape +#[allow(clippy::too_many_arguments)] async fn validate_property_shape<'a>( db: GraphDbRef<'a>, focus_node: &Sid, @@ -1019,6 +1173,7 @@ async fn validate_property_shape<'a>( parent_shape: &'a CompiledShape, all_shapes: &'a [&'a CompiledShape], class_ctx: Option>, + active: &'a ActiveShapeChecks, ) -> Result> { let mut results = Vec::new(); @@ -1115,6 +1270,58 @@ async fn validate_property_shape<'a>( }); } } + Constraint::QualifiedValueShape { + shape, + min_count, + max_count, + } => { + let mut conforming = 0usize; + for (i, value) in values.iter().enumerate() { + let conforms = check_value_against_nested_shape( + db, + value, + datatypes.get(i), + shape, + parent_shape, + all_shapes, + active, + ) + .await?; + if conforms { + conforming += 1; + } + } + + let mut qualified_messages: Vec = Vec::new(); + if let Some(min) = min_count { + if conforming < *min { + qualified_messages.push(format!( + "Expected at least {} value(s) conforming to shape {} but found {}", + min, shape.id.name, conforming + )); + } + } + if let Some(max) = max_count { + if conforming > *max { + qualified_messages.push(format!( + "Expected at most {} value(s) conforming to shape {} but found {}", + max, shape.id.name, conforming + )); + } + } + for message in qualified_messages { + results.push(ValidationResult { + focus_node: focus_node.clone(), + result_path: prop_shape.path.as_predicate().cloned(), + source_shape: parent_shape.id.clone(), + source_constraint: Some(prop_shape.id.clone()), + severity: prop_shape.severity, + message: prop_shape.message.clone().unwrap_or(message), + value: None, + graph_id: None, + }); + } + } _ => { // Handle other constraints let violations = validate_constraint(constraint, &values, &datatypes)?; @@ -1147,6 +1354,7 @@ async fn validate_property_shape<'a>( prop_shape, parent_shape, all_shapes, + active, ) .await?; results.extend(structural_results); @@ -1170,6 +1378,7 @@ async fn validate_property_value_structural_constraint<'a>( prop_shape: &PropertyShape, parent_shape: &'a CompiledShape, all_shapes: &'a [&'a CompiledShape], + active: &'a ActiveShapeChecks, ) -> Result> { let mut results = Vec::new(); @@ -1189,6 +1398,7 @@ async fn validate_property_value_structural_constraint<'a>( nested, parent_shape, all_shapes, + active, ) .await?; if conforms { @@ -1231,6 +1441,7 @@ async fn validate_property_value_structural_constraint<'a>( nested, parent_shape, all_shapes, + active, ) .await?; if !conforms { @@ -1268,6 +1479,7 @@ async fn validate_property_value_structural_constraint<'a>( nested, parent_shape, all_shapes, + active, ) .await?; if conforms { @@ -1307,6 +1519,40 @@ async fn validate_property_value_structural_constraint<'a>( } } + NodeConstraint::Node(nested) => { + // sh:node - each value must conform to the referenced shape + for (i, value) in values.iter().enumerate() { + let dt = datatypes.get(i); + let conforms = check_value_against_nested_shape( + db, + value, + dt, + nested, + parent_shape, + all_shapes, + active, + ) + .await?; + if !conforms { + results.push(ValidationResult { + focus_node: focus_node.clone(), + result_path: prop_shape.path.as_predicate().cloned(), + source_shape: parent_shape.id.clone(), + source_constraint: Some(prop_shape.id.clone()), + severity: prop_shape.severity, + message: prop_shape.message.clone().unwrap_or_else(|| { + format!( + "Value {:?} does not conform to shape {} (sh:node)", + value, nested.id.name + ) + }), + value: Some(value.clone()), + graph_id: None, + }); + } + } + } + NodeConstraint::Not(nested) => { // For each value, the nested shape must NOT accept it for (i, value) in values.iter().enumerate() { @@ -1318,6 +1564,7 @@ async fn validate_property_value_structural_constraint<'a>( nested, parent_shape, all_shapes, + active, ) .await?; if conforms { @@ -1362,6 +1609,7 @@ async fn check_value_against_nested_shape<'a>( nested: &'a NestedShape, parent_shape: &'a CompiledShape, all_shapes: &'a [&'a CompiledShape], + active: &'a ActiveShapeChecks, ) -> Result { // If the nested shape has value-level constraints (e.g. sh:datatype without sh:path), // check them directly against the value/datatype. @@ -1385,7 +1633,7 @@ async fn check_value_against_nested_shape<'a>( // For IRI/blank-node values, evaluate the nested shape against the value as a focus node if let FlakeValue::Ref(sid) = value { let nested_results = - validate_nested_shape(db, sid, nested, parent_shape, all_shapes).await?; + validate_nested_shape(db, sid, nested, parent_shape, all_shapes, active).await?; let has_violations = nested_results .iter() .any(|r| r.severity == Severity::Violation); @@ -1539,11 +1787,10 @@ fn validate_constraint( // Requires checking the language tag from flake metadata, not FlakeValue } - // Qualified value shape - requires nested validation - Constraint::QualifiedValueShape { .. } => { - // TODO: Implement qualified value shape validation - // This requires recursive shape validation - } + // Qualified value shape needs db access for nested-shape conformance + // counting — handled in `validate_property_shape` (this function is + // the pure-values path without a snapshot). + Constraint::QualifiedValueShape { .. } => {} } Ok(violations) diff --git a/fluree-vocab/src/lib.rs b/fluree-vocab/src/lib.rs index 593fefa88b..20094b1f88 100644 --- a/fluree-vocab/src/lib.rs +++ b/fluree-vocab/src/lib.rs @@ -1186,6 +1186,9 @@ pub mod shacl_names { /// sh:nodeKind local name pub const NODE_KIND: &str = "nodeKind"; + /// sh:node local name (value/focus must conform to the referenced node shape) + pub const NODE: &str = "node"; + /// sh:class local name pub const CLASS: &str = "class"; @@ -1325,6 +1328,9 @@ pub mod shacl_names { // Severity Levels // ======================================================================== + /// sh:deactivated local name (true = the shape is ignored entirely) + pub const DEACTIVATED: &str = "deactivated"; + /// sh:severity local name pub const SEVERITY: &str = "severity"; From 8e7890efa6cdfaf9f1c882fdc53b560fcf600115 Mon Sep 17 00:00:00 2001 From: bplatz Date: Thu, 2 Jul 2026 08:25:04 -0400 Subject: [PATCH 10/23] fix(shacl): honor sh:severity on node-level structural constraints MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit sh:closed, sh:node, and the logical constraints (sh:not/and/or/xone) hardcoded Severity::Violation on their results, so a shape marked sh:severity sh:Warning still rejected transactions for those constraints while property constraints honored severity correctly. Structural results now carry the shape's severity. Nested-shape internal results keep Violation — they are conformance signals for the logical operators, not surfaced directly. --- fluree-db-api/src/shacl_tests.rs | 40 ++++++++++++++++++++++++++++++++ fluree-db-shacl/src/validate.rs | 14 +++++------ 2 files changed, 47 insertions(+), 7 deletions(-) diff --git a/fluree-db-api/src/shacl_tests.rs b/fluree-db-api/src/shacl_tests.rs index f4386383dd..bae2705b52 100644 --- a/fluree-db-api/src/shacl_tests.rs +++ b/fluree-db-api/src/shacl_tests.rs @@ -2978,3 +2978,43 @@ async fn shacl_ignored_properties_turtle_list() { .unwrap_err(); assert_shacl_violation(err, "not allowed by closed shape"); } + +/// `sh:severity sh:Warning` on a node shape must apply to node-level +/// structural constraints too — a warn-severity closed shape must not reject +/// the transaction (property constraints already honored severity). +#[tokio::test] +async fn shacl_warning_severity_on_closed_shape_does_not_reject() { + let fluree = FlureeBuilder::memory().build_memory(); + let context = shacl_context(); + let shape_txn = json!({ + "@context": context.clone(), + "@id": "ex:AdvisoryClosedShape", + "@type": "sh:NodeShape", + "sh:targetClass": {"@id": "ex:Advisory"}, + "sh:closed": true, + "sh:severity": {"@id": "sh:Warning"}, + "sh:property": [{ + "@id": "ex:pshape_advisory_label", + "sh:path": {"@id": "ex:label"} + }] + }); + + let ledger = fluree.create_ledger("shacl/warnsev:main").await.unwrap(); + let ledger = fluree.upsert(ledger, &shape_txn).await.unwrap().ledger; + + // ex:extra is undeclared — a Violation-severity closed shape would reject + // this (covered by shacl_closed_constraint); Warning severity must not. + fluree + .upsert( + ledger, + &json!({ + "@context": context.clone(), + "@id": "ex:adv1", + "@type": "ex:Advisory", + "ex:label": "ok", + "ex:extra": "advisory only" + }), + ) + .await + .expect("warn-severity closed shape must not reject the transaction"); +} diff --git a/fluree-db-shacl/src/validate.rs b/fluree-db-shacl/src/validate.rs index 15497998a1..7570646b19 100644 --- a/fluree-db-shacl/src/validate.rs +++ b/fluree-db-shacl/src/validate.rs @@ -779,7 +779,7 @@ fn validate_structural_constraint<'a>( result_path: Some(prop.clone()), source_shape: parent_shape.id.clone(), source_constraint: None, - severity: Severity::Violation, + severity: parent_shape.severity, message: parent_shape.message.clone().unwrap_or_else(|| { format!("Property {} not allowed by closed shape", prop.name) }), @@ -811,7 +811,7 @@ fn validate_structural_constraint<'a>( result_path: None, source_shape: parent_shape.id.clone(), source_constraint: None, - severity: Severity::Violation, + severity: parent_shape.severity, message: parent_shape.message.clone().unwrap_or_else(|| { format!( "Node does not conform to shape {} (sh:node)", @@ -847,7 +847,7 @@ fn validate_structural_constraint<'a>( result_path: None, source_shape: parent_shape.id.clone(), source_constraint: None, - severity: Severity::Violation, + severity: parent_shape.severity, message: parent_shape.message.clone().unwrap_or_else(|| { format!( "Node conforms to shape {} which is not allowed (sh:not)", @@ -880,7 +880,7 @@ fn validate_structural_constraint<'a>( result_path: r.result_path, source_shape: parent_shape.id.clone(), source_constraint: None, - severity: Severity::Violation, + severity: parent_shape.severity, message: parent_shape.message.clone().unwrap_or_else(|| { format!("sh:and constraint - {}", r.message) }), @@ -928,7 +928,7 @@ fn validate_structural_constraint<'a>( result_path: None, source_shape: parent_shape.id.clone(), source_constraint: None, - severity: Severity::Violation, + severity: parent_shape.severity, message: parent_shape.message.clone().unwrap_or_else(|| { format!( "Node does not conform to any shape in sh:or. Violations: {}", @@ -971,7 +971,7 @@ fn validate_structural_constraint<'a>( result_path: None, source_shape: parent_shape.id.clone(), source_constraint: None, - severity: Severity::Violation, + severity: parent_shape.severity, message: parent_shape.message.clone().unwrap_or_else(|| { "Node does not conform to any shape in sh:xone".to_string() }), @@ -984,7 +984,7 @@ fn validate_structural_constraint<'a>( result_path: None, source_shape: parent_shape.id.clone(), source_constraint: None, - severity: Severity::Violation, + severity: parent_shape.severity, message: parent_shape.message.clone().unwrap_or_else(|| { format!( "Node conforms to {} shapes in sh:xone (must be exactly 1): {}", From 96103df72c2067ac0915c9f1521e7c8c6b5be953 Mon Sep 17 00:00:00 2001 From: bplatz Date: Thu, 2 Jul 2026 08:26:45 -0400 Subject: [PATCH 11/23] fix(shacl): match sh:pattern against the lexical form of literals sh:pattern unconditionally rejected every non-string value, so legitimate shapes over numeric, boolean, or date/time literals (e.g. ^\d{4}$ on an integer vintage year) always violated. Values now match on their lexical form per SPARQL STR() semantics. Non-literals still violate: blank nodes per spec; IRIs because matching them needs namespace decoding this pure path doesn't have (noted as a follow-up). --- fluree-db-api/src/shacl_tests.rs | 57 ++++++++++++++++++ fluree-db-shacl/src/constraints/pattern.rs | 69 ++++++++++++++++++---- 2 files changed, 115 insertions(+), 11 deletions(-) diff --git a/fluree-db-api/src/shacl_tests.rs b/fluree-db-api/src/shacl_tests.rs index bae2705b52..287c216a42 100644 --- a/fluree-db-api/src/shacl_tests.rs +++ b/fluree-db-api/src/shacl_tests.rs @@ -3018,3 +3018,60 @@ async fn shacl_warning_severity_on_closed_shape_does_not_reject() { .await .expect("warn-severity closed shape must not reject the transaction"); } + +/// `sh:pattern` matches the lexical form of non-string literals (SPARQL +/// `STR()` semantics) — an integer year matches `^\d{4}$` instead of being +/// rejected as "not a string". +#[tokio::test] +async fn shacl_pattern_on_integer_literal() { + let fluree = FlureeBuilder::memory().build_memory(); + let context = shacl_context(); + let shape_txn = json!({ + "@context": context.clone(), + "@id": "ex:VintageShape", + "@type": "sh:NodeShape", + "sh:targetClass": {"@id": "ex:Wine"}, + "sh:property": [{ + "@id": "ex:pshape_vintage", + "sh:path": {"@id": "ex:vintage"}, + "sh:pattern": "^\\d{4}$" + }] + }); + + let ledger_ok = fluree + .create_ledger("shacl/pattern-int-ok:main") + .await + .unwrap(); + let ledger_ok = fluree.upsert(ledger_ok, &shape_txn).await.unwrap().ledger; + fluree + .upsert( + ledger_ok, + &json!({ + "@context": context.clone(), + "@id": "ex:wine1", + "@type": "ex:Wine", + "ex:vintage": 2024 + }), + ) + .await + .expect("4-digit integer must match ^\\d{4}$ via its lexical form"); + + let ledger_bad = fluree + .create_ledger("shacl/pattern-int-bad:main") + .await + .unwrap(); + let ledger_bad = fluree.upsert(ledger_bad, &shape_txn).await.unwrap().ledger; + let err = fluree + .upsert( + ledger_bad, + &json!({ + "@context": context.clone(), + "@id": "ex:wine2", + "@type": "ex:Wine", + "ex:vintage": 12345 + }), + ) + .await + .unwrap_err(); + assert_shacl_violation(err, "does not match pattern"); +} diff --git a/fluree-db-shacl/src/constraints/pattern.rs b/fluree-db-shacl/src/constraints/pattern.rs index 345278ad14..9cb04b6505 100644 --- a/fluree-db-shacl/src/constraints/pattern.rs +++ b/fluree-db-shacl/src/constraints/pattern.rs @@ -5,25 +5,54 @@ use crate::error::{Result, ShaclError}; use fluree_db_core::FlakeValue; use regex::Regex; +/// The lexical form a literal is matched against for `sh:pattern`, mirroring +/// SPARQL `STR()`. `None` for non-literals: blank nodes always violate per +/// spec, and IRI matching would need namespace decoding that this pure path +/// doesn't have. +fn pattern_lexical_form(value: &FlakeValue) -> Option { + match value { + FlakeValue::String(s) => Some(s.clone()), + FlakeValue::Long(n) => Some(n.to_string()), + FlakeValue::Double(n) => Some(n.to_string()), + FlakeValue::Boolean(b) => Some(b.to_string()), + FlakeValue::BigInt(n) => Some(n.to_string()), + FlakeValue::Decimal(d) => Some(d.to_string()), + FlakeValue::DateTime(v) => Some(v.original().to_string()), + FlakeValue::Date(v) => Some(v.original().to_string()), + FlakeValue::Time(v) => Some(v.original().to_string()), + FlakeValue::GYear(v) => Some(v.original().to_string()), + FlakeValue::GYearMonth(v) => Some(v.original().to_string()), + FlakeValue::GMonth(v) => Some(v.original().to_string()), + FlakeValue::GDay(v) => Some(v.original().to_string()), + FlakeValue::GMonthDay(v) => Some(v.original().to_string()), + FlakeValue::YearMonthDuration(v) => Some(v.original().to_string()), + FlakeValue::DayTimeDuration(v) => Some(v.original().to_string()), + FlakeValue::Duration(v) => Some(v.original().to_string()), + FlakeValue::Json(s) => Some(s.clone()), + FlakeValue::GeoPoint(v) => Some(v.to_string()), + FlakeValue::Ref(_) | FlakeValue::Vector(_) | FlakeValue::Null => None, + } +} + /// Validate sh:pattern constraint /// -/// Checks that a string value matches the regular expression pattern. +/// Matches the value's lexical form (per SPARQL `STR()`) against the regular +/// expression — so numeric, boolean, and date/time literals participate, not +/// just strings. pub fn validate_pattern( value: &FlakeValue, pattern: &str, flags: Option<&str>, ) -> Result> { - let string_value = match value { - FlakeValue::String(s) => s.as_str(), - _ => { - // Non-string values fail pattern matching - return Ok(Some(ConstraintViolation { - constraint: Constraint::Pattern(pattern.to_string(), flags.map(String::from)), - value: Some(value.clone()), - message: "Pattern constraint requires a string value".to_string(), - })); - } + let Some(string_value) = pattern_lexical_form(value) else { + // Blank nodes / IRIs / non-literals fail pattern matching + return Ok(Some(ConstraintViolation { + constraint: Constraint::Pattern(pattern.to_string(), flags.map(String::from)), + value: Some(value.clone()), + message: "Pattern constraint cannot be applied to a non-literal value".to_string(), + })); }; + let string_value = string_value.as_str(); // Build regex with optional flags let regex_pattern = if let Some(f) = flags { @@ -141,6 +170,24 @@ mod tests { assert!(result.is_some()); } + #[test] + fn test_pattern_numeric_literal_matches_lexical_form() { + // Non-string literals match on their lexical form (SPARQL STR()), + // not unconditionally violate. + let year = FlakeValue::Long(2024); + assert!(validate_pattern(&year, r"^\d{4}$", None).unwrap().is_none()); + let too_long = FlakeValue::Long(12345); + assert!(validate_pattern(&too_long, r"^\d{4}$", None) + .unwrap() + .is_some()); + } + + #[test] + fn test_pattern_ref_still_violates() { + let value = FlakeValue::Ref(fluree_db_core::Sid::new(100, "thing")); + assert!(validate_pattern(&value, ".*", None).unwrap().is_some()); + } + #[test] fn test_pattern_case_insensitive() { let value = FlakeValue::String("HELLO".to_string()); From 37200fe2aa1b3025eedf02e5fae9df1b01ca041c Mon Sep 17 00:00:00 2001 From: bplatz Date: Thu, 2 Jul 2026 08:57:36 -0400 Subject: [PATCH 12/23] fix(shacl): evaluate sh:class and qualified shapes inside nested members MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two db-access constraints silently no-oped inside nested shapes: - sh:class on an inline logical member (sh:or [ sh:path p ; sh:class C ]) or an anonymous sh:node value shape fell through to the pure constraint dispatch, which skips db-access constraints — the check always passed. Nested property constraints and value shapes now resolve class membership, and the ClassMembershipCtx (f:shapesSource vocabulary graphs, per-txn memo, cross-ledger model) is threaded through the nested/referenced-shape paths instead of being dropped at the named-ref boundary, so value-set lookups behave the same at any nesting depth. - sh:qualifiedValueShape on a property shape used as a logical member was never compiled into the member. build_nested_shape now attaches it (cycle-guarded via a seen-set: a qualified reference cycle between anonymous property shapes falls back to named-ref resolution, where the runtime recursion guard applies), and the nested constraint loop counts conformance like the top-level arm. --- fluree-db-api/src/shacl_tests.rs | 190 +++++++++++++++++++++++++++++++ fluree-db-shacl/src/compile.rs | 100 +++++++++++----- fluree-db-shacl/src/lib.rs | 4 +- fluree-db-shacl/src/validate.rs | 106 +++++++++++++++-- 4 files changed, 361 insertions(+), 39 deletions(-) diff --git a/fluree-db-api/src/shacl_tests.rs b/fluree-db-api/src/shacl_tests.rs index 287c216a42..ff1a3b9bf1 100644 --- a/fluree-db-api/src/shacl_tests.rs +++ b/fluree-db-api/src/shacl_tests.rs @@ -3075,3 +3075,193 @@ async fn shacl_pattern_on_integer_literal() { .unwrap_err(); assert_shacl_violation(err, "does not match pattern"); } + +// =========================================================================== +// sh:class / sh:qualifiedValueShape inside nested shapes +// =========================================================================== + +/// `sh:class` inside an inline `sh:or` member was a silent no-op (the nested +/// property-constraint loop skipped db-access constraints). +#[tokio::test] +async fn shacl_class_inside_or_member() { + let fluree = FlureeBuilder::memory().build_memory(); + let context = shacl_context(); + let shape_txn = json!({ + "@context": context.clone(), + "@id": "ex:OwnedShape", + "@type": "sh:NodeShape", + "sh:targetClass": {"@id": "ex:Asset"}, + "sh:or": [ + { + "@id": "ex:or_owner_person", + "sh:path": {"@id": "ex:owner"}, + "sh:minCount": 1, + "sh:class": {"@id": "ex:Person"} + } + ] + }); + + // Valid: owner is a Person. + let ledger_ok = fluree.create_ledger("shacl/orclass-ok:main").await.unwrap(); + let ledger_ok = fluree.upsert(ledger_ok, &shape_txn).await.unwrap().ledger; + fluree + .upsert( + ledger_ok, + &json!({ + "@context": context.clone(), + "@graph": [ + {"@id": "ex:asset1", "@type": "ex:Asset", "ex:owner": {"@id": "ex:alice"}}, + {"@id": "ex:alice", "@type": "ex:Person"} + ] + }), + ) + .await + .expect("Person owner satisfies the sh:or member"); + + // Invalid: owner exists but is not a Person — pre-fix the sh:class check + // silently passed, so the member (and the sh:or) conformed. + let ledger_bad = fluree + .create_ledger("shacl/orclass-bad:main") + .await + .unwrap(); + let ledger_bad = fluree.upsert(ledger_bad, &shape_txn).await.unwrap().ledger; + let err = fluree + .upsert( + ledger_bad, + &json!({ + "@context": context.clone(), + "@graph": [ + {"@id": "ex:asset2", "@type": "ex:Asset", "ex:owner": {"@id": "ex:acme"}}, + {"@id": "ex:acme", "@type": "ex:Company"} + ] + }), + ) + .await + .unwrap_err(); + assert_shacl_violation(err, "sh:or"); +} + +/// `sh:qualifiedValueShape` on a property shape used as an `sh:or` member — +/// nested members now count conforming values instead of silently no-oping. +#[tokio::test] +async fn shacl_qualified_inside_or_member() { + let fluree = FlureeBuilder::memory().build_memory(); + let context = shacl_context(); + let shape_txn = json!({ + "@context": context.clone(), + "@graph": [ + { + "@id": "ex:BadgedShape2", + "@type": "sh:NodeShape", + "sh:property": [{ + "@id": "ex:pshape_badge2", + "sh:path": {"@id": "ex:badge"}, + "sh:minCount": 1 + }] + }, + { + "@id": "ex:TeamShape2", + "@type": "sh:NodeShape", + "sh:targetClass": {"@id": "ex:Squad"}, + "sh:or": [{ + "@id": "ex:or_badged_member", + "sh:path": {"@id": "ex:member"}, + "sh:qualifiedValueShape": {"@id": "ex:BadgedShape2"}, + "sh:qualifiedMinCount": 1 + }] + } + ] + }); + + let ledger_ok = fluree.create_ledger("shacl/orqual-ok:main").await.unwrap(); + let ledger_ok = fluree.upsert(ledger_ok, &shape_txn).await.unwrap().ledger; + fluree + .upsert( + ledger_ok, + &json!({ + "@context": context.clone(), + "@graph": [ + {"@id": "ex:squadA", "@type": "ex:Squad", "ex:member": {"@id": "ex:sm1"}}, + {"@id": "ex:sm1", "ex:badge": "B-9"} + ] + }), + ) + .await + .expect("badged member satisfies the qualified count in the sh:or member"); + + let ledger_bad = fluree.create_ledger("shacl/orqual-bad:main").await.unwrap(); + let ledger_bad = fluree.upsert(ledger_bad, &shape_txn).await.unwrap().ledger; + let err = fluree + .upsert( + ledger_bad, + &json!({ + "@context": context.clone(), + "@graph": [ + {"@id": "ex:squadB", "@type": "ex:Squad", "ex:member": {"@id": "ex:sm2"}}, + {"@id": "ex:sm2", "ex:role": "guest"} + ] + }), + ) + .await + .unwrap_err(); + assert_shacl_violation(err, "sh:or"); +} + +/// `sh:node` with an inline anonymous value shape carrying `sh:class`: the +/// value must be an instance of the class (needs db access in the +/// value-constraint path). +#[tokio::test] +async fn shacl_node_inline_class_value_shape() { + let fluree = FlureeBuilder::memory().build_memory(); + let context = shacl_context(); + let shape_txn = json!({ + "@context": context.clone(), + "@id": "ex:CuratedShape", + "@type": "sh:NodeShape", + "sh:targetClass": {"@id": "ex:Exhibit"}, + "sh:property": [{ + "@id": "ex:pshape_artifact", + "sh:path": {"@id": "ex:artifact"}, + "sh:node": {"@id": "ex:anon_artifact_class", "sh:class": {"@id": "ex:Artifact"}} + }] + }); + + let ledger_ok = fluree + .create_ledger("shacl/nodeclass-ok:main") + .await + .unwrap(); + let ledger_ok = fluree.upsert(ledger_ok, &shape_txn).await.unwrap().ledger; + fluree + .upsert( + ledger_ok, + &json!({ + "@context": context.clone(), + "@graph": [ + {"@id": "ex:ex1", "@type": "ex:Exhibit", "ex:artifact": {"@id": "ex:vase"}}, + {"@id": "ex:vase", "@type": "ex:Artifact"} + ] + }), + ) + .await + .expect("artifact typed ex:Artifact conforms to the inline sh:node class shape"); + + let ledger_bad = fluree + .create_ledger("shacl/nodeclass-bad:main") + .await + .unwrap(); + let ledger_bad = fluree.upsert(ledger_bad, &shape_txn).await.unwrap().ledger; + let err = fluree + .upsert( + ledger_bad, + &json!({ + "@context": context.clone(), + "@graph": [ + {"@id": "ex:ex2", "@type": "ex:Exhibit", "ex:artifact": {"@id": "ex:rock"}}, + {"@id": "ex:rock", "@type": "ex:Pebble"} + ] + }), + ) + .await + .unwrap_err(); + assert_shacl_violation(err, "sh:node"); +} diff --git a/fluree-db-shacl/src/compile.rs b/fluree-db-shacl/src/compile.rs index eaab46c9b4..6177218c4b 100644 --- a/fluree-db-shacl/src/compile.rs +++ b/fluree-db-shacl/src/compile.rs @@ -855,12 +855,9 @@ impl ShapeCompiler { // sh:qualifiedValueShape needs the shape map to inline // the qualified shape, so it's attached here rather // than in build_constraints_from_ps_data. - if let Some(q_ref) = &ps_data.qualified_shape { - constraints.push(Constraint::QualifiedValueShape { - shape: Arc::new(build_nested_shape(q_ref, &ps_map)), - min_count: ps_data.qualified_min, - max_count: ps_data.qualified_max, - }); + if let Some(q) = qualified_constraint(ps_data, &ps_map, &mut HashSet::new()) + { + constraints.push(q); } // Check if this property shape's subject also has structural @@ -980,40 +977,83 @@ fn resolved_path_of(ps_data: &PropertyShapeData) -> PropertyPath { }) } -/// Build a `NestedShape` for a member of sh:or/sh:and/sh:xone/sh:not, -/// inlining value-level or property constraints from `PropertyShapeData` -/// when the member is an anonymous shape. +/// Build a `NestedShape` for a member of sh:or/sh:and/sh:xone/sh:not/sh:node +/// or a qualified value shape, inlining value-level or property constraints +/// from `PropertyShapeData` when the member is an anonymous shape. fn build_nested_shape(sid: &ShapeId, ps_map: &HashMap) -> NestedShape { - if let Some(ps_data) = ps_map.get(sid) { + build_nested_shape_inner(sid, ps_map, &mut HashSet::new()) +} + +/// Recursive worker for [`build_nested_shape`]. `seen` holds the shape ids on +/// the current inlining stack: a qualified-shape reference cycle between +/// anonymous property shapes would otherwise inline forever. On re-entry the +/// member is left bare, deferring to named-ref resolution at validation time +/// (where the runtime recursion guard applies). +fn build_nested_shape_inner( + sid: &ShapeId, + ps_map: &HashMap, + seen: &mut HashSet, +) -> NestedShape { + let bare = || NestedShape { + id: sid.clone(), + property_constraints: Vec::new(), + node_constraints: Vec::new(), + value_constraints: Vec::new(), + }; + if !seen.insert(sid.clone()) { + return bare(); + } + + let nested = if let Some(ps_data) = ps_map.get(sid) { if ps_data.path.is_none() { // Anonymous shape with constraints but no sh:path — these are // value-level constraints (e.g. sh:datatype on the value node). let value_constraints = build_constraints_from_ps_data(ps_data); - return NestedShape { + NestedShape { id: sid.clone(), property_constraints: Vec::new(), node_constraints: Vec::new(), value_constraints, - }; + } + } else { + // Has sh:path — inline as a property constraint on the nested + // shape, carrying the compiled path AST (so complex paths on a + // nested member are evaluated, not scanned as a bare blank-node + // predicate). + let mut constraints = build_constraints_from_ps_data(ps_data); + if let Some(q) = qualified_constraint(ps_data, ps_map, seen) { + constraints.push(q); + } + NestedShape { + id: sid.clone(), + property_constraints: vec![(resolved_path_of(ps_data), constraints)], + node_constraints: Vec::new(), + value_constraints: Vec::new(), + } } - // Has sh:path — inline as a property constraint on the nested shape, - // carrying the compiled path AST (so complex paths on a nested member - // are evaluated, not scanned as a bare blank-node predicate). - let constraints = build_constraints_from_ps_data(ps_data); - return NestedShape { - id: sid.clone(), - property_constraints: vec![(resolved_path_of(ps_data), constraints)], - node_constraints: Vec::new(), - value_constraints: Vec::new(), - }; - } - // Named shape reference — constraints will be resolved at validation time - NestedShape { - id: sid.clone(), - property_constraints: Vec::new(), - node_constraints: Vec::new(), - value_constraints: Vec::new(), - } + } else { + // Named shape reference — constraints resolve at validation time. + bare() + }; + + seen.remove(sid); + nested +} + +/// The `sh:qualifiedValueShape` constraint for a property shape, if declared. +fn qualified_constraint( + ps_data: &PropertyShapeData, + ps_map: &HashMap, + seen: &mut HashSet, +) -> Option { + ps_data + .qualified_shape + .as_ref() + .map(|q_ref| Constraint::QualifiedValueShape { + shape: Arc::new(build_nested_shape_inner(q_ref, ps_map, seen)), + min_count: ps_data.qualified_min, + max_count: ps_data.qualified_max, + }) } /// Build shape-based and logical `NodeConstraint`s (sh:node, sh:not, sh:and, diff --git a/fluree-db-shacl/src/lib.rs b/fluree-db-shacl/src/lib.rs index 114fa66200..80bddbb9e6 100644 --- a/fluree-db-shacl/src/lib.rs +++ b/fluree-db-shacl/src/lib.rs @@ -30,8 +30,8 @@ //! being validated on the call stack is assumed conforming. //! - Qualified: `sh:qualifiedValueShape` with `sh:qualifiedMinCount` / //! `sh:qualifiedMaxCount` — counts the values conforming to the qualified -//! shape (top-level property shapes; `sh:qualifiedValueShapesDisjoint` is -//! not supported) +//! shape, including on property shapes used as logical-constraint members +//! (`sh:qualifiedValueShapesDisjoint` is not supported) //! - Node-shape value constraints: per-value constraints declared directly on //! a node shape (no `sh:path`) apply to the focus node itself //! - `sh:deactivated` — a deactivated shape is ignored entirely, including diff --git a/fluree-db-shacl/src/validate.rs b/fluree-db-shacl/src/validate.rs index 7570646b19..ebdea9923f 100644 --- a/fluree-db-shacl/src/validate.rs +++ b/fluree-db-shacl/src/validate.rs @@ -645,7 +645,7 @@ fn validate_shape<'a>( // Validate structural constraints (closed, logical) for constraint in &shape.structural_constraints { let constraint_results = validate_structural_constraint( - db, focus_node, constraint, shape, all_shapes, active, + db, focus_node, constraint, shape, all_shapes, class_ctx, active, ) .await?; results.extend(constraint_results); @@ -731,6 +731,7 @@ fn validate_structural_constraint<'a>( constraint: &'a crate::constraints::NodeConstraint, parent_shape: &'a CompiledShape, all_shapes: &'a [&'a CompiledShape], + class_ctx: Option>, active: &'a ActiveShapeChecks, ) -> std::pin::Pin>> + Send + 'a>> { @@ -799,6 +800,7 @@ fn validate_structural_constraint<'a>( nested_shape.as_ref(), parent_shape, all_shapes, + class_ctx, active, ) .await?; @@ -832,6 +834,7 @@ fn validate_structural_constraint<'a>( nested_shape.as_ref(), parent_shape, all_shapes, + class_ctx, active, ) .await?; @@ -869,6 +872,7 @@ fn validate_structural_constraint<'a>( nested.as_ref(), parent_shape, all_shapes, + class_ctx, active, ) .await?; @@ -904,6 +908,7 @@ fn validate_structural_constraint<'a>( nested.as_ref(), parent_shape, all_shapes, + class_ctx, active, ) .await?; @@ -953,6 +958,7 @@ fn validate_structural_constraint<'a>( nested.as_ref(), parent_shape, all_shapes, + class_ctx, active, ) .await?; @@ -1013,6 +1019,7 @@ fn validate_nested_shape<'a>( nested: &'a NestedShape, parent_shape: &'a CompiledShape, all_shapes: &'a [&'a CompiledShape], + class_ctx: Option>, active: &'a ActiveShapeChecks, ) -> std::pin::Pin>> + Send + 'a>> { @@ -1024,11 +1031,8 @@ fn validate_nested_shape<'a>( && nested.value_constraints.is_empty() { if let Some(ref_shape) = all_shapes.iter().find(|s| s.id == nested.id) { - // `sh:class` reached via a referenced/nested shape keeps the - // legacy data-graph lookup (no `f:shapesSource` vocabulary union - // and no shared memo) — the value-set feature targets top-level - // property shapes. - return validate_shape(db, focus_node, ref_shape, all_shapes, None, active).await; + return validate_shape(db, focus_node, ref_shape, all_shapes, class_ctx, active) + .await; } // Shape not found and no inline constraints — treat as unresolved. // Return a violation to prevent sh:or from being trivially true. @@ -1127,6 +1131,66 @@ fn validate_nested_shape<'a>( }); } } + Constraint::Class(expected_class) => { + let violations = + validate_class_constraint(db, &values, expected_class, class_ctx) + .await?; + for violation in violations { + results.push(ValidationResult { + focus_node: focus_node.clone(), + result_path: result_path.clone(), + source_shape: parent_shape.id.clone(), + source_constraint: Some(nested.id.clone()), + severity: Severity::Violation, + message: violation.message, + value: violation.value, + graph_id: None, + }); + } + } + Constraint::QualifiedValueShape { + shape, + min_count, + max_count, + } => { + let mut conforming = 0usize; + for (i, value) in values.iter().enumerate() { + let conforms = check_value_against_nested_shape( + db, + value, + datatypes.get(i), + shape, + parent_shape, + all_shapes, + class_ctx, + active, + ) + .await?; + if conforms { + conforming += 1; + } + } + let below = min_count.map(|min| conforming < min).unwrap_or(false); + let above = max_count.map(|max| conforming > max).unwrap_or(false); + if below || above { + results.push(ValidationResult { + focus_node: focus_node.clone(), + result_path: result_path.clone(), + source_shape: parent_shape.id.clone(), + source_constraint: Some(nested.id.clone()), + severity: Severity::Violation, + message: format!( + "Found {} value(s) conforming to shape {} (expected {}..{})", + conforming, + shape.id.name, + min_count.map_or_else(|| "0".into(), |n| n.to_string()), + max_count.map_or_else(|| "*".into(), |n| n.to_string()), + ), + value: None, + graph_id: None, + }); + } + } _ => { let violations = validate_constraint(constraint, &values, &datatypes)?; for violation in violations { @@ -1154,6 +1218,7 @@ fn validate_nested_shape<'a>( node_constraint, parent_shape, all_shapes, + class_ctx, active, ) .await?; @@ -1284,6 +1349,7 @@ async fn validate_property_shape<'a>( shape, parent_shape, all_shapes, + class_ctx, active, ) .await?; @@ -1354,6 +1420,7 @@ async fn validate_property_shape<'a>( prop_shape, parent_shape, all_shapes, + class_ctx, active, ) .await?; @@ -1378,6 +1445,7 @@ async fn validate_property_value_structural_constraint<'a>( prop_shape: &PropertyShape, parent_shape: &'a CompiledShape, all_shapes: &'a [&'a CompiledShape], + class_ctx: Option>, active: &'a ActiveShapeChecks, ) -> Result> { let mut results = Vec::new(); @@ -1398,6 +1466,7 @@ async fn validate_property_value_structural_constraint<'a>( nested, parent_shape, all_shapes, + class_ctx, active, ) .await?; @@ -1441,6 +1510,7 @@ async fn validate_property_value_structural_constraint<'a>( nested, parent_shape, all_shapes, + class_ctx, active, ) .await?; @@ -1479,6 +1549,7 @@ async fn validate_property_value_structural_constraint<'a>( nested, parent_shape, all_shapes, + class_ctx, active, ) .await?; @@ -1530,6 +1601,7 @@ async fn validate_property_value_structural_constraint<'a>( nested, parent_shape, all_shapes, + class_ctx, active, ) .await?; @@ -1564,6 +1636,7 @@ async fn validate_property_value_structural_constraint<'a>( nested, parent_shape, all_shapes, + class_ctx, active, ) .await?; @@ -1602,6 +1675,7 @@ async fn validate_property_value_structural_constraint<'a>( /// the value and datatype. For IRI/blank-node values (`FlakeValue::Ref`), /// delegates to `validate_nested_shape` which can look up the value as a /// focus node in the database. +#[allow(clippy::too_many_arguments)] async fn check_value_against_nested_shape<'a>( db: GraphDbRef<'a>, value: &FlakeValue, @@ -1609,11 +1683,28 @@ async fn check_value_against_nested_shape<'a>( nested: &'a NestedShape, parent_shape: &'a CompiledShape, all_shapes: &'a [&'a CompiledShape], + class_ctx: Option>, active: &'a ActiveShapeChecks, ) -> Result { // If the nested shape has value-level constraints (e.g. sh:datatype without sh:path), // check them directly against the value/datatype. if !nested.value_constraints.is_empty() { + // sh:class on an anonymous value shape needs db access for the + // rdf:type lookup; the pure constraint-set path below skips it. + for constraint in &nested.value_constraints { + if let Constraint::Class(expected_class) = constraint { + let violations = validate_class_constraint( + db, + std::slice::from_ref(value), + expected_class, + class_ctx, + ) + .await?; + if !violations.is_empty() { + return Ok(false); + } + } + } let dt_arr: [Sid; 1]; let dt_slice: &[Sid] = match datatype { Some(dt) => { @@ -1633,7 +1724,8 @@ async fn check_value_against_nested_shape<'a>( // For IRI/blank-node values, evaluate the nested shape against the value as a focus node if let FlakeValue::Ref(sid) = value { let nested_results = - validate_nested_shape(db, sid, nested, parent_shape, all_shapes, active).await?; + validate_nested_shape(db, sid, nested, parent_shape, all_shapes, class_ctx, active) + .await?; let has_violations = nested_results .iter() .any(|r| r.severity == Severity::Violation); From dcb6e6f1e83711c6088d8fc766d3b66df925bf00 Mon Sep 17 00:00:00 2001 From: bplatz Date: Thu, 2 Jul 2026 09:26:39 -0400 Subject: [PATCH 13/23] feat(shacl): enforce sh:uniqueLang and sh:languageIn MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Both constraints were parsed but silently unenforced — the last of the loads-fine-does-nothing pair. Language tags already live in flake metadata (FlakeMeta::lang); validation now carries a langs column parallel to values/datatypes, including through property-path evaluation (PathValue gains the language tag; inverse/closure steps and the focus node itself are untagged). - sh:languageIn matches via SPARQL langMatches basic filtering (RFC 4647): case-insensitive, "en" matches "en-US", "*" matches any tag; untagged values violate. - sh:uniqueLang true reports one violation per duplicated tag; untagged values are ignored. - Compile fix: sh:languageIn previously produced one singleton constraint per JSON-LD list member (an unsatisfiable conjunction had it ever been enforced) and dropped the Turtle RDF-list form entirely. Tags now accumulate into a single constraint, with the Turtle list head expanded like sh:in. --- docs/guides/cookbook-shacl.md | 20 +++- fluree-db-api/src/shacl_tests.rs | 141 ++++++++++++++++++++++++ fluree-db-shacl/src/compile.rs | 43 ++++++-- fluree-db-shacl/src/constraints/lang.rs | 135 +++++++++++++++++++++++ fluree-db-shacl/src/constraints/mod.rs | 1 + fluree-db-shacl/src/lib.rs | 9 +- fluree-db-shacl/src/path.rs | 71 +++++++----- fluree-db-shacl/src/validate.rs | 75 ++++++++----- 8 files changed, 428 insertions(+), 67 deletions(-) create mode 100644 fluree-db-shacl/src/constraints/lang.rs diff --git a/docs/guides/cookbook-shacl.md b/docs/guides/cookbook-shacl.md index 3064725bab..9ff934565c 100644 --- a/docs/guides/cookbook-shacl.md +++ b/docs/guides/cookbook-shacl.md @@ -202,6 +202,23 @@ ex:UserShape a sh:NodeShape ; `sh:pattern` accepts an optional `sh:flags` string (e.g. `"i"` for case-insensitive). +### Language constraints + +`sh:languageIn` restricts values to language-tagged literals whose tag matches +one of the given basic language ranges (`"en"` also matches `"en-US"`, per +SPARQL `langMatches`); untagged values violate. `sh:uniqueLang true` forbids +two values of the property from sharing a language tag. + +```turtle +ex:LabelShape a sh:NodeShape ; + sh:targetClass ex:Labeled ; + sh:property [ + sh:path ex:label ; + sh:languageIn ( "en" "fr" ) ; + sh:uniqueLang true + ] . +``` + ### Node kind ```turtle @@ -570,9 +587,6 @@ All three routes go through the same post-stage helper, so the ledger's configur ## Not yet supported -The following SHACL constructs are parsed/compiled but currently **no-ops** at validation time. Shapes using them load without error but don't constrain data: - -- `sh:uniqueLang`, `sh:languageIn` — require language-tag metadata on flakes, which isn't yet threaded through the validation path. - `sh:qualifiedValueShapesDisjoint` — sibling-shape disjointness for qualified value shapes (the counting form of `sh:qualifiedValueShape` is supported). These are tracked in the SHACL compliance effort. diff --git a/fluree-db-api/src/shacl_tests.rs b/fluree-db-api/src/shacl_tests.rs index ff1a3b9bf1..8a9ea8caec 100644 --- a/fluree-db-api/src/shacl_tests.rs +++ b/fluree-db-api/src/shacl_tests.rs @@ -3265,3 +3265,144 @@ async fn shacl_node_inline_class_value_shape() { .unwrap_err(); assert_shacl_violation(err, "sh:node"); } + +// =========================================================================== +// Language constraints (sh:languageIn, sh:uniqueLang) +// =========================================================================== + +/// `sh:languageIn` — labels must carry a language tag matching one of the +/// allowed basic ranges ("en" also matches "en-US"). +#[tokio::test] +async fn shacl_language_in() { + let fluree = FlureeBuilder::memory().build_memory(); + let context = shacl_context(); + let shape_txn = json!({ + "@context": context.clone(), + "@id": "ex:LabelShape", + "@type": "sh:NodeShape", + "sh:targetClass": {"@id": "ex:Labeled"}, + "sh:property": [{ + "@id": "ex:pshape_lang_label", + "sh:path": {"@id": "ex:label"}, + "sh:languageIn": ["en", "fr"] + }] + }); + + // Valid: en, fr, and en-US (basic language-range match) labels. + let ledger_ok = fluree.create_ledger("shacl/langin-ok:main").await.unwrap(); + let ledger_ok = fluree.upsert(ledger_ok, &shape_txn).await.unwrap().ledger; + fluree + .upsert( + ledger_ok, + &json!({ + "@context": context.clone(), + "@id": "ex:thing1", + "@type": "ex:Labeled", + "ex:label": [ + {"@value": "colour", "@language": "en"}, + {"@value": "couleur", "@language": "fr"}, + {"@value": "color", "@language": "en-US"} + ] + }), + ) + .await + .expect("en / fr / en-US labels are all within sh:languageIn (en fr)"); + + // Invalid: a German label. + let ledger_bad = fluree.create_ledger("shacl/langin-bad:main").await.unwrap(); + let ledger_bad = fluree.upsert(ledger_bad, &shape_txn).await.unwrap().ledger; + let err = fluree + .upsert( + ledger_bad, + &json!({ + "@context": context.clone(), + "@id": "ex:thing2", + "@type": "ex:Labeled", + "ex:label": {"@value": "Farbe", "@language": "de"} + }), + ) + .await + .unwrap_err(); + assert_shacl_violation(err, "not in the allowed set"); + + // Invalid: a plain (untagged) string. + let ledger_plain = fluree + .create_ledger("shacl/langin-plain:main") + .await + .unwrap(); + let ledger_plain = fluree + .upsert(ledger_plain, &shape_txn) + .await + .unwrap() + .ledger; + let err = fluree + .upsert( + ledger_plain, + &json!({ + "@context": context.clone(), + "@id": "ex:thing3", + "@type": "ex:Labeled", + "ex:label": "no language" + }), + ) + .await + .unwrap_err(); + assert_shacl_violation(err, "no language tag"); +} + +/// `sh:uniqueLang` — no two values of the property may share a language tag. +#[tokio::test] +async fn shacl_unique_lang() { + let fluree = FlureeBuilder::memory().build_memory(); + let context = shacl_context(); + let shape_txn = json!({ + "@context": context.clone(), + "@id": "ex:UniqueLabelShape", + "@type": "sh:NodeShape", + "sh:targetClass": {"@id": "ex:Translated"}, + "sh:property": [{ + "@id": "ex:pshape_unique_label", + "sh:path": {"@id": "ex:label"}, + "sh:uniqueLang": true + }] + }); + + // Valid: one label per language. + let ledger_ok = fluree.create_ledger("shacl/uniq-ok:main").await.unwrap(); + let ledger_ok = fluree.upsert(ledger_ok, &shape_txn).await.unwrap().ledger; + fluree + .upsert( + ledger_ok, + &json!({ + "@context": context.clone(), + "@id": "ex:doc1", + "@type": "ex:Translated", + "ex:label": [ + {"@value": "colour", "@language": "en"}, + {"@value": "couleur", "@language": "fr"} + ] + }), + ) + .await + .expect("distinct language tags satisfy sh:uniqueLang"); + + // Invalid: two English labels. + let ledger_bad = fluree.create_ledger("shacl/uniq-bad:main").await.unwrap(); + let ledger_bad = fluree.upsert(ledger_bad, &shape_txn).await.unwrap().ledger; + let err = fluree + .upsert( + ledger_bad, + &json!({ + "@context": context.clone(), + "@id": "ex:doc2", + "@type": "ex:Translated", + "ex:label": [ + {"@value": "colour", "@language": "en"}, + {"@value": "color", "@language": "en"} + ] + }), + ) + .await + .unwrap_err(); + assert_shacl_violation(err, "more than one value"); +} diff --git a/fluree-db-shacl/src/compile.rs b/fluree-db-shacl/src/compile.rs index 6177218c4b..de38e70051 100644 --- a/fluree-db-shacl/src/compile.rs +++ b/fluree-db-shacl/src/compile.rs @@ -157,6 +157,9 @@ struct PropertyShapeData { pattern_string: Option, /// sh:in list values (accumulated from RDF list traversal) in_values: Vec, + /// sh:languageIn values (String tags from JSON-LD @list flattening, or a + /// single Ref to a Turtle RDF-list head expanded in expand_rdf_lists) + language_in_values: Vec, /// sh:deactivated — a deactivated property shape is skipped entirely deactivated: bool, /// sh:qualifiedValueShape — reference to the shape conforming values are @@ -383,6 +386,7 @@ impl ShapeCompiler { // If in_values contains a single Ref, it might be an RDF list head that needs expansion let mut in_list_expansions: Vec<(Sid, Sid)> = Vec::new(); // (property_shape_id, list_head) + let mut lang_list_expansions: Vec<(Sid, Sid)> = Vec::new(); for (ps_id, ps_data) in &self.property_shapes { // Check if in_values has a single Ref value (potential RDF list head) if ps_data.in_values.len() == 1 { @@ -390,6 +394,12 @@ impl ShapeCompiler { in_list_expansions.push((ps_id.clone(), list_head.clone())); } } + // Same Turtle encoding for sh:languageIn. + if ps_data.language_in_values.len() == 1 { + if let FlakeValue::Ref(list_head) = &ps_data.language_in_values[0] { + lang_list_expansions.push((ps_id.clone(), list_head.clone())); + } + } } // Expand RDF list references @@ -402,6 +412,14 @@ impl ShapeCompiler { } } } + for (ps_id, list_head) in lang_list_expansions { + let values = traverse_rdf_list(db, &list_head, &rdf_first, &rdf_rest, &rdf_nil).await?; + if !values.is_empty() { + if let Some(ps_data) = self.property_shapes.get_mut(&ps_id) { + ps_data.language_in_values = values; + } + } + } // Expand sh:ignoredProperties RDF-list heads (Turtle encoding). JSON-LD // @list flattens to one flake per member, so members arrive directly; @@ -699,13 +717,9 @@ impl ShapeCompiler { } } name if name == predicates::LANGUAGE_IN => { - // Points to an RDF list of language tags - simplified for now - if let FlakeValue::String(lang) = &flake.o { - self.add_property_constraint( - &flake.s, - Constraint::LanguageIn(vec![lang.clone()]), - ); - } + self.get_or_create_property_shape(&flake.s) + .language_in_values + .push(flake.o.clone()); } // Shape-based constraints @@ -941,6 +955,21 @@ fn build_constraints_from_ps_data(ps_data: &PropertyShapeData) -> Vec = ps_data + .language_in_values + .iter() + .filter_map(|v| match v { + FlakeValue::String(s) => Some(s.clone()), + _ => None, + }) + .collect(); + if !langs.is_empty() { + constraints.push(Constraint::LanguageIn(langs)); + } + } + // Add Pattern constraint with flags if present if let Some(pattern) = &ps_data.pattern_string { constraints.push(Constraint::Pattern( diff --git a/fluree-db-shacl/src/constraints/lang.rs b/fluree-db-shacl/src/constraints/lang.rs new file mode 100644 index 0000000000..b6dfeac6ff --- /dev/null +++ b/fluree-db-shacl/src/constraints/lang.rs @@ -0,0 +1,135 @@ +//! Language-tag constraint validators (sh:uniqueLang, sh:languageIn) +//! +//! Language tags live in flake metadata (`FlakeMeta::lang`), threaded into +//! validation as a `langs` slice parallel to `values` / `datatypes`. + +use super::{Constraint, ConstraintViolation}; +use fluree_db_core::FlakeValue; +use std::collections::HashMap; + +/// Validate sh:uniqueLang: no two values may share a language tag. +/// +/// Values without a language tag are ignored (the constraint only concerns +/// language-tagged literals). Returns one violation per duplicated tag. +pub fn validate_unique_lang( + values: &[FlakeValue], + langs: &[Option], +) -> Vec { + let mut counts: HashMap<&str, usize> = HashMap::new(); + for lang in langs.iter().flatten() { + *counts.entry(lang.as_str()).or_default() += 1; + } + + let mut out = Vec::new(); + let mut duplicated: Vec<&str> = counts + .into_iter() + .filter(|(_, n)| *n > 1) + .map(|(lang, _)| lang) + .collect(); + duplicated.sort_unstable(); + for lang in duplicated { + // Report the first value carrying the duplicated tag. + let value = langs + .iter() + .position(|l| l.as_deref() == Some(lang)) + .and_then(|i| values.get(i)) + .cloned(); + out.push(ConstraintViolation { + constraint: Constraint::UniqueLang(true), + value, + message: format!("Language tag \"{lang}\" is used by more than one value"), + }); + } + out +} + +/// Validate sh:languageIn for a single value: the value must be a +/// language-tagged literal whose tag matches one of the allowed basic +/// language ranges. +pub fn validate_language_in( + value: &FlakeValue, + lang: Option<&str>, + allowed: &[String], +) -> Option { + let violation = |message: String| { + Some(ConstraintViolation { + constraint: Constraint::LanguageIn(allowed.to_vec()), + value: Some(value.clone()), + message, + }) + }; + + let Some(lang) = lang else { + return violation(format!( + "Value {value:?} has no language tag (sh:languageIn requires one of {allowed:?})" + )); + }; + if allowed.iter().any(|range| lang_matches(lang, range)) { + None + } else { + violation(format!( + "Language tag \"{lang}\" is not in the allowed set {allowed:?}" + )) + } +} + +/// SPARQL `langMatches` basic filtering (RFC 4647 §3.3.1): case-insensitive; +/// the range matches the tag exactly or as a prefix followed by `-`, and `*` +/// matches any tag. +fn lang_matches(tag: &str, range: &str) -> bool { + if range == "*" { + return !tag.is_empty(); + } + let tag = tag.to_ascii_lowercase(); + let range = range.to_ascii_lowercase(); + tag == range + || (tag.len() > range.len() + && tag.starts_with(&range) + && tag.as_bytes()[range.len()] == b'-') +} + +#[cfg(test)] +mod tests { + use super::*; + + fn s(v: &str) -> FlakeValue { + FlakeValue::String(v.to_string()) + } + + #[test] + fn unique_lang_flags_duplicates() { + let values = [s("colour"), s("color"), s("couleur")]; + let langs = [ + Some("en".to_string()), + Some("en".to_string()), + Some("fr".to_string()), + ]; + let violations = validate_unique_lang(&values, &langs); + assert_eq!(violations.len(), 1); + assert!(violations[0].message.contains("\"en\"")); + } + + #[test] + fn unique_lang_ignores_untagged_values() { + let values = [s("a"), s("b")]; + let langs = [None, None]; + assert!(validate_unique_lang(&values, &langs).is_empty()); + } + + #[test] + fn language_in_matches_exact_and_subtag() { + let allowed = vec!["en".to_string(), "fr".to_string()]; + assert!(validate_language_in(&s("hi"), Some("en"), &allowed).is_none()); + // Basic language range: "en" matches "en-US". + assert!(validate_language_in(&s("hi"), Some("en-US"), &allowed).is_none()); + assert!(validate_language_in(&s("hallo"), Some("de"), &allowed).is_some()); + // Prefix without a subtag separator must NOT match. + assert!(validate_language_in(&s("hi"), Some("eng"), &allowed).is_some()); + } + + #[test] + fn language_in_rejects_untagged_values() { + let allowed = vec!["en".to_string()]; + assert!(validate_language_in(&s("plain"), None, &allowed).is_some()); + } +} diff --git a/fluree-db-shacl/src/constraints/mod.rs b/fluree-db-shacl/src/constraints/mod.rs index 02d794b297..8faf6e73c2 100644 --- a/fluree-db-shacl/src/constraints/mod.rs +++ b/fluree-db-shacl/src/constraints/mod.rs @@ -5,6 +5,7 @@ pub mod cardinality; pub mod datatype; +pub mod lang; pub mod pair; pub mod pattern; pub mod value; diff --git a/fluree-db-shacl/src/lib.rs b/fluree-db-shacl/src/lib.rs index 80bddbb9e6..0588bb7983 100644 --- a/fluree-db-shacl/src/lib.rs +++ b/fluree-db-shacl/src/lib.rs @@ -22,6 +22,8 @@ //! - Value: `sh:hasValue`, `sh:in` //! - Closed: `sh:closed`, `sh:ignoredProperties` //! - Pair: `sh:equals`, `sh:disjoint`, `sh:lessThan`, `sh:lessThanOrEquals` +//! - Language: `sh:uniqueLang`, `sh:languageIn` (basic language-range matching +//! per SPARQL `langMatches`; tags come from flake metadata) //! - Logical: `sh:not`, `sh:and`, `sh:or`, `sh:xone` //! - Shape-based: `sh:node` — on a property shape each value node must conform //! to the referenced node shape; on a node shape the focus node itself must. @@ -69,13 +71,6 @@ //! //! # Not Yet Supported //! -//! The following constraints are parsed/compiled but are **not enforced** at -//! validation time. Shapes using these will load without error but their -//! constraints will silently pass. Plan to fix under the SHACL compliance -//! effort tracked in the repo. -//! -//! - `sh:uniqueLang`, `sh:languageIn` — require access to language-tag metadata -//! on flakes, which is not yet threaded through the validation path. //! - `sh:qualifiedValueShapesDisjoint` — sibling-shape disjointness for //! qualified value shapes (the counting form of `sh:qualifiedValueShape` is //! supported). diff --git a/fluree-db-shacl/src/path.rs b/fluree-db-shacl/src/path.rs index 789872061a..17724115b9 100644 --- a/fluree-db-shacl/src/path.rs +++ b/fluree-db-shacl/src/path.rs @@ -50,9 +50,25 @@ pub enum PropertyPath { Unresolvable(String), } -/// A value node reached by a path: `(value, datatype)`, mirroring a flake's -/// object + datatype columns. -pub type PathValue = (FlakeValue, Sid); +/// A value node reached by a path: `(value, datatype, language tag)`, +/// mirroring a flake's object, datatype, and metadata language columns. +pub type PathValue = (FlakeValue, Sid, Option); + +/// Split path values into the parallel `(values, datatypes, langs)` columns +/// the constraint validators consume. +pub fn split_path_values( + values: Vec, +) -> (Vec, Vec, Vec>) { + let mut vs = Vec::with_capacity(values.len()); + let mut dts = Vec::with_capacity(values.len()); + let mut langs = Vec::with_capacity(values.len()); + for (v, dt, lang) in values { + vs.push(v); + dts.push(dt); + langs.push(lang); + } + (vs, dts, langs) +} /// Boxed future returned by the recursive async path helpers. type PathFuture<'a, T> = Pin> + Send + 'a>>; @@ -296,8 +312,8 @@ async fn ordered_objects( } /// Evaluate a property path from `focus`, returning the reached value nodes as -/// `(value, datatype)` pairs — the direct analogue of the objects of a single -/// `SPOT` scan for a simple predicate. +/// `(value, datatype, language)` tuples — the direct analogue of the objects +/// of a single `SPOT` scan for a simple predicate. pub fn eval_path<'a>( db: GraphDbRef<'a>, focus: &'a Sid, @@ -316,13 +332,13 @@ pub fn eval_path<'a>( Ok(dedup(out)) } PropertyPath::ZeroOrMore(inner) => { - let mut out = vec![(FlakeValue::Ref(focus.clone()), id_datatype_sid())]; + let mut out = vec![(FlakeValue::Ref(focus.clone()), id_datatype_sid(), None)]; out.extend(closure(db, focus, inner).await?); Ok(dedup(out)) } PropertyPath::OneOrMore(inner) => Ok(dedup(closure(db, focus, inner).await?)), PropertyPath::ZeroOrOne(inner) => { - let mut out = vec![(FlakeValue::Ref(focus.clone()), id_datatype_sid())]; + let mut out = vec![(FlakeValue::Ref(focus.clone()), id_datatype_sid(), None)]; out.extend(eval_path(db, focus, inner).await?); Ok(dedup(out)) } @@ -334,7 +350,7 @@ pub fn eval_path<'a>( } /// Forward single-predicate step: objects of `(focus, p, ?)`. -async fn forward_step(db: GraphDbRef<'_>, focus: &Sid, p: &Sid) -> Result> { +async fn forward_step(db: GraphDbRef<'_>, focus: &Sid, p: &Sid) -> Result> { let flakes = db .range( IndexType::Spot, @@ -342,11 +358,20 @@ async fn forward_step(db: GraphDbRef<'_>, focus: &Sid, p: &Sid) -> Result, focus: &Sid, p: &Sid) -> Result> { +async fn inverse_step(db: GraphDbRef<'_>, focus: &Sid, p: &Sid) -> Result> { let flakes = db .range( IndexType::Opst, @@ -356,7 +381,7 @@ async fn inverse_step(db: GraphDbRef<'_>, focus: &Sid, p: &Sid) -> Result, focus: &Sid, steps: &[PropertyPath], -) -> Result> { +) -> Result> { let mut frontier: Vec = vec![focus.clone()]; for (i, step) in steps.iter().enumerate() { let is_last = i + 1 == steps.len(); - let mut reached: Vec<(FlakeValue, Sid)> = Vec::new(); + let mut reached: Vec = Vec::new(); for node in &frontier { reached.extend(eval_path(db, node, step).await?); } @@ -382,7 +407,7 @@ async fn eval_sequence( } frontier = reached .into_iter() - .filter_map(|(v, _)| match v { + .filter_map(|(v, _, _)| match v { FlakeValue::Ref(sid) => Some(sid), _ => None, }) @@ -398,33 +423,29 @@ async fn eval_sequence( /// Transitive closure of `inner` from `focus` (one or more steps), BFS over the /// reference nodes reached. Non-reference values are terminal value nodes. -async fn closure( - db: GraphDbRef<'_>, - focus: &Sid, - inner: &PropertyPath, -) -> Result> { - let mut out: Vec<(FlakeValue, Sid)> = Vec::new(); +async fn closure(db: GraphDbRef<'_>, focus: &Sid, inner: &PropertyPath) -> Result> { + let mut out: Vec = Vec::new(); // Seed `visited` with the focus so a cycle back to it isn't re-expanded. let mut visited: HashSet = HashSet::from([focus.clone()]); let mut queue: Vec = vec![focus.clone()]; while let Some(node) = queue.pop() { - for (value, dt) in eval_path(db, &node, inner).await? { + for (value, dt, lang) in eval_path(db, &node, inner).await? { if let FlakeValue::Ref(sid) = &value { if visited.insert(sid.clone()) { queue.push(sid.clone()); } } - out.push((value, dt)); + out.push((value, dt, lang)); } } Ok(dedup(out)) } /// Deduplicate value nodes (SHACL value nodes are a set). -fn dedup(mut values: Vec<(FlakeValue, Sid)>) -> Vec<(FlakeValue, Sid)> { - let mut seen: HashSet<(String, String)> = HashSet::new(); - values.retain(|(v, dt)| seen.insert((format!("{v:?}"), format!("{dt:?}")))); +fn dedup(mut values: Vec) -> Vec { + let mut seen: HashSet = HashSet::new(); + values.retain(|(v, dt, lang)| seen.insert(format!("{v:?}|{dt:?}|{lang:?}"))); values } diff --git a/fluree-db-shacl/src/validate.rs b/fluree-db-shacl/src/validate.rs index ebdea9923f..370a5e82fd 100644 --- a/fluree-db-shacl/src/validate.rs +++ b/fluree-db-shacl/src/validate.rs @@ -7,6 +7,7 @@ use crate::cache::{ShaclCache, ShaclCacheKey}; use crate::compile::{CompiledShape, PropertyShape, Severity, ShapeCompiler, ShapeId, TargetType}; use crate::constraints::cardinality::{validate_max_count, validate_min_count}; use crate::constraints::datatype::{validate_datatype, validate_node_kind}; +use crate::constraints::lang::{validate_language_in, validate_unique_lang}; use crate::constraints::pattern::{validate_max_length, validate_min_length, validate_pattern}; use crate::constraints::value::{ validate_has_value, validate_in, validate_max_exclusive, validate_max_inclusive, @@ -712,7 +713,7 @@ async fn validate_node_value_constraints<'a>( } } _ => { - for violation in validate_constraint(constraint, &values, &datatypes)? { + for violation in validate_constraint(constraint, &values, &datatypes, &[None])? { push(violation, &mut results); } } @@ -1069,7 +1070,7 @@ fn validate_nested_shape<'a>( // Value nodes reached by the member's path. Simple predicate → SPOT // scan; complex path → evaluate the AST (same as top-level shapes). - let (values, datatypes): (Vec, Vec) = + let (values, datatypes, langs): (Vec, Vec, Vec>) = if let Some(pred) = path.as_predicate() { let flakes = db .range( @@ -1081,12 +1082,15 @@ fn validate_nested_shape<'a>( ( flakes.iter().map(|f| f.o.clone()).collect(), flakes.iter().map(|f| f.dt.clone()).collect(), + flakes + .iter() + .map(|f| f.m.as_ref().and_then(|m| m.lang.clone())) + .collect(), ) } else { - crate::path::eval_path(db, focus_node, path) - .await? - .into_iter() - .unzip() + crate::path::split_path_values( + crate::path::eval_path(db, focus_node, path).await?, + ) }; let result_path = path.as_predicate().cloned(); @@ -1159,6 +1163,7 @@ fn validate_nested_shape<'a>( db, value, datatypes.get(i), + langs.get(i).and_then(|l| l.as_deref()), shape, parent_shape, all_shapes, @@ -1192,7 +1197,8 @@ fn validate_nested_shape<'a>( } } _ => { - let violations = validate_constraint(constraint, &values, &datatypes)?; + let violations = + validate_constraint(constraint, &values, &datatypes, &langs)?; for violation in violations { results.push(ValidationResult { focus_node: focus_node.clone(), @@ -1260,8 +1266,9 @@ async fn validate_property_shape<'a>( // Get all value nodes reached by this property shape's path on the focus node. // Simple single-predicate paths take the plain SPOT scan; complex paths - // (inverse/sequence/alternative/transitive) evaluate the path AST. - let (values, datatypes): (Vec, Vec) = + // (inverse/sequence/alternative/transitive) evaluate the path AST. The + // language column feeds sh:uniqueLang / sh:languageIn. + let (values, datatypes, langs): (Vec, Vec, Vec>) = if let Some(pred) = prop_shape.path.as_predicate() { let flakes = db .range( @@ -1273,12 +1280,15 @@ async fn validate_property_shape<'a>( ( flakes.iter().map(|f| f.o.clone()).collect(), flakes.iter().map(|f| f.dt.clone()).collect(), + flakes + .iter() + .map(|f| f.m.as_ref().and_then(|m| m.lang.clone())) + .collect(), ) } else { - crate::path::eval_path(db, focus_node, &prop_shape.path) - .await? - .into_iter() - .unzip() + crate::path::split_path_values( + crate::path::eval_path(db, focus_node, &prop_shape.path).await?, + ) }; // Validate each constraint @@ -1346,6 +1356,7 @@ async fn validate_property_shape<'a>( db, value, datatypes.get(i), + langs.get(i).and_then(|l| l.as_deref()), shape, parent_shape, all_shapes, @@ -1390,7 +1401,7 @@ async fn validate_property_shape<'a>( } _ => { // Handle other constraints - let violations = validate_constraint(constraint, &values, &datatypes)?; + let violations = validate_constraint(constraint, &values, &datatypes, &langs)?; for violation in violations { results.push(ValidationResult { @@ -1416,6 +1427,7 @@ async fn validate_property_shape<'a>( focus_node, &values, &datatypes, + &langs, structural, prop_shape, parent_shape, @@ -1441,6 +1453,7 @@ async fn validate_property_value_structural_constraint<'a>( focus_node: &Sid, values: &[FlakeValue], datatypes: &[Sid], + langs: &[Option], constraint: &'a NodeConstraint, prop_shape: &PropertyShape, parent_shape: &'a CompiledShape, @@ -1463,6 +1476,7 @@ async fn validate_property_value_structural_constraint<'a>( db, value, dt, + langs.get(i).and_then(|l| l.as_deref()), nested, parent_shape, all_shapes, @@ -1507,6 +1521,7 @@ async fn validate_property_value_structural_constraint<'a>( db, value, dt, + langs.get(i).and_then(|l| l.as_deref()), nested, parent_shape, all_shapes, @@ -1546,6 +1561,7 @@ async fn validate_property_value_structural_constraint<'a>( db, value, dt, + langs.get(i).and_then(|l| l.as_deref()), nested, parent_shape, all_shapes, @@ -1598,6 +1614,7 @@ async fn validate_property_value_structural_constraint<'a>( db, value, dt, + langs.get(i).and_then(|l| l.as_deref()), nested, parent_shape, all_shapes, @@ -1633,6 +1650,7 @@ async fn validate_property_value_structural_constraint<'a>( db, value, dt, + langs.get(i).and_then(|l| l.as_deref()), nested, parent_shape, all_shapes, @@ -1680,6 +1698,7 @@ async fn check_value_against_nested_shape<'a>( db: GraphDbRef<'a>, value: &FlakeValue, datatype: Option<&Sid>, + lang: Option<&str>, nested: &'a NestedShape, parent_shape: &'a CompiledShape, all_shapes: &'a [&'a CompiledShape], @@ -1713,10 +1732,12 @@ async fn check_value_against_nested_shape<'a>( } None => &[], }; + let lang_arr = [lang.map(str::to_string)]; let violations = validate_constraint_set( &nested.value_constraints, std::slice::from_ref(value), dt_slice, + &lang_arr, )?; return Ok(violations.is_empty()); } @@ -1742,10 +1763,11 @@ fn validate_constraint_set( constraints: &[Constraint], values: &[FlakeValue], datatypes: &[Sid], + langs: &[Option], ) -> Result> { let mut all_violations = Vec::new(); for constraint in constraints { - let violations = validate_constraint(constraint, values, datatypes)?; + let violations = validate_constraint(constraint, values, datatypes, langs)?; all_violations.extend(violations); } Ok(all_violations) @@ -1756,6 +1778,7 @@ fn validate_constraint( constraint: &Constraint, values: &[FlakeValue], datatypes: &[Sid], + langs: &[Option], ) -> Result> { let mut violations = Vec::new(); @@ -1866,17 +1889,19 @@ fn validate_constraint( | Constraint::LessThan(_) | Constraint::LessThanOrEquals(_) => {} - // Language constraints - // Note: Language tags are stored in the flake's datatype field (rdf:langString) - // with the language as a separate attribute. Full validation requires access to - // language metadata which is not available in this simplified validation path. - Constraint::UniqueLang(_unique) => { - // TODO: Implement when language metadata is available - // Requires checking the language tag from flake metadata, not FlakeValue + // Language constraints (tags come from flake metadata via `langs`) + Constraint::UniqueLang(unique) => { + if *unique { + violations.extend(validate_unique_lang(values, langs)); + } } - Constraint::LanguageIn(_allowed_langs) => { - // TODO: Implement when language metadata is available - // Requires checking the language tag from flake metadata, not FlakeValue + Constraint::LanguageIn(allowed) => { + for (i, value) in values.iter().enumerate() { + let lang = langs.get(i).and_then(|l| l.as_deref()); + if let Some(v) = validate_language_in(value, lang, allowed) { + violations.push(v); + } + } } // Qualified value shape needs db access for nested-shape conformance From b259515e32ca61997bcc6d50a4c0c60b1687a19e Mon Sep 17 00:00:00 2001 From: bplatz Date: Thu, 2 Jul 2026 09:30:55 -0400 Subject: [PATCH 14/23] fix(shacl): compare numerics by value in sh:in and sh:hasValue MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Membership used term equality, so sh:in (1.0 2.0) never matched an integer 1 and sh:hasValue 42 never matched a decimal 42.00 — while the range facets already compared across numeric representations via numeric_cmp. sh:in / sh:hasValue now use the same value equality for numeric pairs; everything else keeps term equality. --- fluree-db-shacl/src/constraints/value.rs | 26 ++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/fluree-db-shacl/src/constraints/value.rs b/fluree-db-shacl/src/constraints/value.rs index e060983c90..a70ceccbb7 100644 --- a/fluree-db-shacl/src/constraints/value.rs +++ b/fluree-db-shacl/src/constraints/value.rs @@ -3,6 +3,15 @@ use super::{Constraint, ConstraintViolation}; use fluree_db_core::FlakeValue; +/// Value equality for sh:hasValue / sh:in: numeric literals compare by value +/// across representations (1 == 1.0 == 1.00), everything else by term equality. +fn values_equal(a: &FlakeValue, b: &FlakeValue) -> bool { + if a.is_numeric() && b.is_numeric() { + return a.numeric_cmp(b) == Some(std::cmp::Ordering::Equal); + } + a == b +} + /// Validate sh:hasValue constraint /// /// Checks that the value set contains the expected value. @@ -10,7 +19,7 @@ pub fn validate_has_value( values: &[FlakeValue], expected: &FlakeValue, ) -> Option { - if values.contains(expected) { + if values.iter().any(|v| values_equal(v, expected)) { None } else { Some(ConstraintViolation { @@ -25,7 +34,7 @@ pub fn validate_has_value( /// /// Checks that a value is in the allowed set. pub fn validate_in(value: &FlakeValue, allowed: &[FlakeValue]) -> Option { - if allowed.contains(value) { + if allowed.iter().any(|a| values_equal(value, a)) { None } else { Some(ConstraintViolation { @@ -214,6 +223,19 @@ mod tests { assert!(validate_max_exclusive(&FlakeValue::Long(11), &max).is_some()); } + #[test] + fn test_in_and_has_value_numeric_across_representations() { + // sh:in / sh:hasValue use value equality for numerics: an integer 1 + // matches a decimal 1.0 constraint value and vice versa. + let allowed = vec![dec("1.0"), dec("2.0")]; + assert!(validate_in(&FlakeValue::Long(1), &allowed).is_none()); + assert!(validate_in(&FlakeValue::Long(3), &allowed).is_some()); + + let values = vec![FlakeValue::Long(42)]; + assert!(validate_has_value(&values, &dec("42.00")).is_none()); + assert!(validate_has_value(&values, &dec("42.5")).is_some()); + } + #[test] fn test_compare_mixed_numeric() { // Long vs Double comparison From fc303748dc74ea9f469a4aa909d26cab35921a1e Mon Sep 17 00:00:00 2001 From: bplatz Date: Thu, 2 Jul 2026 09:33:53 -0400 Subject: [PATCH 15/23] fix(shacl): apply string facets to IRIs via their full decoded IRI MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit sh:pattern / sh:minLength / sh:maxLength on IRI values previously violated unconditionally. Per SPARQL STR() they now match against the full IRI: value vectors containing non-blank IRI refs are rewritten with the decoded IRI string before the string-facet dispatch (top-level and nested property constraints). Blank nodes still fail per spec, and an IRI whose namespace was allocated in the same transaction cannot be decoded against the base snapshot — it fails closed with the generic message rather than silently passing. --- fluree-db-api/src/shacl_tests.rs | 91 ++++++++++++++++++++++++++++++++ fluree-db-shacl/src/lib.rs | 6 ++- fluree-db-shacl/src/validate.rs | 69 +++++++++++++++++++++++- 3 files changed, 164 insertions(+), 2 deletions(-) diff --git a/fluree-db-api/src/shacl_tests.rs b/fluree-db-api/src/shacl_tests.rs index 8a9ea8caec..1d03dea7f6 100644 --- a/fluree-db-api/src/shacl_tests.rs +++ b/fluree-db-api/src/shacl_tests.rs @@ -3406,3 +3406,94 @@ async fn shacl_unique_lang() { .unwrap_err(); assert_shacl_violation(err, "more than one value"); } + +/// `sh:pattern` on IRI values matches the full decoded IRI (SPARQL `STR()`), +/// not the SID name fragment — and non-matching IRIs violate. +#[tokio::test] +async fn shacl_pattern_on_iri_values() { + let fluree = FlureeBuilder::memory().build_memory(); + let context = shacl_context(); + // Links must point into example.org. + let shape_txn = json!({ + "@context": context.clone(), + "@id": "ex:LinkShape", + "@type": "sh:NodeShape", + "sh:targetClass": {"@id": "ex:Bookmark"}, + "sh:property": [{ + "@id": "ex:pshape_link", + "sh:path": {"@id": "ex:link"}, + "sh:pattern": "^http://example\\.org/" + }] + }); + + let ledger_ok = fluree.create_ledger("shacl/iripat-ok:main").await.unwrap(); + let ledger_ok = fluree.upsert(ledger_ok, &shape_txn).await.unwrap().ledger; + fluree + .upsert( + ledger_ok, + &json!({ + "@context": context.clone(), + "@id": "ex:bm1", + "@type": "ex:Bookmark", + "ex:link": {"@id": "http://example.org/ns/page1"} + }), + ) + .await + .expect("IRI under example.org must match the pattern via its full IRI"); + + // Violating IRI in a namespace committed by an earlier transaction — the + // full IRI decodes and fails the pattern with the precise message. + let ledger_bad = fluree.create_ledger("shacl/iripat-bad:main").await.unwrap(); + let ledger_bad = fluree.upsert(ledger_bad, &shape_txn).await.unwrap().ledger; + let ledger_bad = fluree + .upsert( + ledger_bad, + &json!({ + "@context": context.clone(), + "@id": "http://other.example.com/page", + "ex:note": "registers the foreign namespace" + }), + ) + .await + .unwrap() + .ledger; + let err = fluree + .upsert( + ledger_bad, + &json!({ + "@context": context.clone(), + "@id": "ex:bm2", + "@type": "ex:Bookmark", + "ex:link": {"@id": "http://other.example.com/page"} + }), + ) + .await + .unwrap_err(); + assert_shacl_violation(err, "does not match pattern"); + + // An IRI whose namespace is first seen in this very transaction can't be + // decoded against the base snapshot — pattern fails closed (still a + // violation, with the generic non-literal message). + let ledger_fresh = fluree + .create_ledger("shacl/iripat-fresh:main") + .await + .unwrap(); + let ledger_fresh = fluree + .upsert(ledger_fresh, &shape_txn) + .await + .unwrap() + .ledger; + let err = fluree + .upsert( + ledger_fresh, + &json!({ + "@context": context.clone(), + "@id": "ex:bm3", + "@type": "ex:Bookmark", + "ex:link": {"@id": "http://brand-new.example.net/page"} + }), + ) + .await + .unwrap_err(); + assert_shacl_violation(err, "Pattern constraint"); +} diff --git a/fluree-db-shacl/src/lib.rs b/fluree-db-shacl/src/lib.rs index 0588bb7983..6c70992409 100644 --- a/fluree-db-shacl/src/lib.rs +++ b/fluree-db-shacl/src/lib.rs @@ -18,7 +18,11 @@ //! - Cardinality: `sh:minCount`, `sh:maxCount` //! - Value type: `sh:datatype`, `sh:nodeKind`, `sh:class` (with RDFS subclass reasoning) //! - Value range: `sh:minInclusive`, `sh:maxInclusive`, `sh:minExclusive`, `sh:maxExclusive` -//! - String: `sh:pattern`, `sh:minLength`, `sh:maxLength` +//! - String: `sh:pattern`, `sh:minLength`, `sh:maxLength` — literals match on +//! their lexical form and IRIs on the full decoded IRI (per SPARQL `STR()`). +//! An IRI whose namespace was allocated in the same transaction can't be +//! decoded against the base snapshot and fails closed; blank nodes fail per +//! spec. //! - Value: `sh:hasValue`, `sh:in` //! - Closed: `sh:closed`, `sh:ignoredProperties` //! - Pair: `sh:equals`, `sh:disjoint`, `sh:lessThan`, `sh:lessThanOrEquals` diff --git a/fluree-db-shacl/src/validate.rs b/fluree-db-shacl/src/validate.rs index 370a5e82fd..35f052749c 100644 --- a/fluree-db-shacl/src/validate.rs +++ b/fluree-db-shacl/src/validate.rs @@ -19,7 +19,7 @@ use fluree_db_core::{ FlakeValue, GraphDbRef, GraphId, IndexType, LedgerSnapshot, NoOverlay, RangeMatch, RangeTest, SchemaHierarchy, Sid, }; -use fluree_vocab::namespaces::RDF; +use fluree_vocab::namespaces::{BLANK_NODE, RDF}; use fluree_vocab::rdf_names; use parking_lot::Mutex; use std::collections::{HashMap, HashSet}; @@ -1196,6 +1196,27 @@ fn validate_nested_shape<'a>( }); } } + Constraint::Pattern(..) + | Constraint::MinLength(_) + | Constraint::MaxLength(_) + if has_iri_ref(&values) => + { + let effective = stringify_iri_values(db, &values); + let violations = + validate_constraint(constraint, &effective, &datatypes, &langs)?; + for violation in violations { + results.push(ValidationResult { + focus_node: focus_node.clone(), + result_path: result_path.clone(), + source_shape: parent_shape.id.clone(), + source_constraint: Some(nested.id.clone()), + severity: Severity::Violation, + message: violation.message, + value: violation.value, + graph_id: None, + }); + } + } _ => { let violations = validate_constraint(constraint, &values, &datatypes, &langs)?; @@ -1399,6 +1420,25 @@ async fn validate_property_shape<'a>( }); } } + Constraint::Pattern(..) | Constraint::MinLength(_) | Constraint::MaxLength(_) + if has_iri_ref(&values) => + { + // String facets apply to STR(iri) — decode IRI refs first. + let effective = stringify_iri_values(db, &values); + let violations = validate_constraint(constraint, &effective, &datatypes, &langs)?; + for violation in violations { + results.push(ValidationResult { + focus_node: focus_node.clone(), + result_path: prop_shape.path.as_predicate().cloned(), + source_shape: parent_shape.id.clone(), + source_constraint: Some(prop_shape.id.clone()), + severity: prop_shape.severity, + message: prop_shape.message.clone().unwrap_or(violation.message), + value: violation.value, + graph_id: None, + }); + } + } _ => { // Handle other constraints let violations = validate_constraint(constraint, &values, &datatypes, &langs)?; @@ -1758,6 +1798,33 @@ async fn check_value_against_nested_shape<'a>( Ok(false) } +/// Replace IRI refs with their full-IRI string form for the string facets +/// (`sh:pattern` / `sh:minLength` / `sh:maxLength`), per SPARQL `STR()`. +/// Blank nodes stay refs (string facets fail on them, per spec); an IRI whose +/// namespace can't be decoded (e.g. allocated in this very transaction) also +/// stays a ref and fails closed. +fn stringify_iri_values(db: GraphDbRef<'_>, values: &[FlakeValue]) -> Vec { + values + .iter() + .map(|v| match v { + FlakeValue::Ref(sid) if sid.namespace_code != BLANK_NODE => db + .snapshot + .decode_sid(sid) + .map(FlakeValue::String) + .unwrap_or_else(|| v.clone()), + _ => v.clone(), + }) + .collect() +} + +/// Whether any value is a non-blank IRI ref (candidate for +/// [`stringify_iri_values`]). +fn has_iri_ref(values: &[FlakeValue]) -> bool { + values + .iter() + .any(|v| matches!(v, FlakeValue::Ref(sid) if sid.namespace_code != BLANK_NODE)) +} + /// Apply multiple constraints to a set of values and collect all violations. fn validate_constraint_set( constraints: &[Constraint], From 8f9f90b97af9b5853b7693dad41f803106dfd209 Mon Sep 17 00:00:00 2001 From: bplatz Date: Thu, 2 Jul 2026 09:35:14 -0400 Subject: [PATCH 16/23] feat(shacl): surface sh:message on anonymous nested member shapes NestedShape now carries the member's sh:message (populated from the anonymous property-shape entry at compile time; named references keep resolving their message from the referenced CompiledShape at validation time). Nested property-constraint violations prefer it over the generated text, so a custom message on an sh:and/or/xone/not/node member reaches the transaction error. --- fluree-db-api/src/shacl_tests.rs | 35 ++++++++++++++++++++++++++ fluree-db-shacl/src/compile.rs | 3 +++ fluree-db-shacl/src/constraints/mod.rs | 3 +++ fluree-db-shacl/src/validate.rs | 32 ++++++++++++----------- 4 files changed, 59 insertions(+), 14 deletions(-) diff --git a/fluree-db-api/src/shacl_tests.rs b/fluree-db-api/src/shacl_tests.rs index 1d03dea7f6..b0a5745dc6 100644 --- a/fluree-db-api/src/shacl_tests.rs +++ b/fluree-db-api/src/shacl_tests.rs @@ -3497,3 +3497,38 @@ async fn shacl_pattern_on_iri_values() { .unwrap_err(); assert_shacl_violation(err, "Pattern constraint"); } + +/// `sh:message` on an anonymous member inside a logical constraint surfaces in +/// the violation (via the sh:and wrapper here). +#[tokio::test] +async fn shacl_custom_message_on_nested_member() { + let fluree = FlureeBuilder::memory().build_memory(); + let context = shacl_context(); + let shape_txn = json!({ + "@context": context.clone(), + "@id": "ex:CheckedShape", + "@type": "sh:NodeShape", + "sh:targetClass": {"@id": "ex:Checked"}, + "sh:and": [{ + "@id": "ex:and_member_code", + "sh:path": {"@id": "ex:code"}, + "sh:minCount": 1, + "sh:message": "A Checked record always needs a code" + }] + }); + + let ledger = fluree.create_ledger("shacl/nestmsg:main").await.unwrap(); + let ledger = fluree.upsert(ledger, &shape_txn).await.unwrap().ledger; + let err = fluree + .upsert( + ledger, + &json!({ + "@context": context.clone(), + "@id": "ex:rec1", + "@type": "ex:Checked" + }), + ) + .await + .unwrap_err(); + assert_shacl_violation(err, "A Checked record always needs a code"); +} diff --git a/fluree-db-shacl/src/compile.rs b/fluree-db-shacl/src/compile.rs index de38e70051..dbc8b21502 100644 --- a/fluree-db-shacl/src/compile.rs +++ b/fluree-db-shacl/src/compile.rs @@ -1028,6 +1028,7 @@ fn build_nested_shape_inner( property_constraints: Vec::new(), node_constraints: Vec::new(), value_constraints: Vec::new(), + message: None, }; if !seen.insert(sid.clone()) { return bare(); @@ -1043,6 +1044,7 @@ fn build_nested_shape_inner( property_constraints: Vec::new(), node_constraints: Vec::new(), value_constraints, + message: ps_data.message.clone(), } } else { // Has sh:path — inline as a property constraint on the nested @@ -1058,6 +1060,7 @@ fn build_nested_shape_inner( property_constraints: vec![(resolved_path_of(ps_data), constraints)], node_constraints: Vec::new(), value_constraints: Vec::new(), + message: ps_data.message.clone(), } } } else { diff --git a/fluree-db-shacl/src/constraints/mod.rs b/fluree-db-shacl/src/constraints/mod.rs index 8faf6e73c2..c357e4f7c7 100644 --- a/fluree-db-shacl/src/constraints/mod.rs +++ b/fluree-db-shacl/src/constraints/mod.rs @@ -125,6 +125,9 @@ pub struct NestedShape { /// Value-level constraints (e.g. sh:datatype on an anonymous shape without sh:path). /// These constrain the focus node's own value/datatype rather than a nested property. pub value_constraints: Vec, + /// sh:message declared on an anonymous member shape. Named references get + /// their message from the referenced CompiledShape at validation time. + pub message: Option, } impl Constraint { diff --git a/fluree-db-shacl/src/validate.rs b/fluree-db-shacl/src/validate.rs index 35f052749c..e2c8a15567 100644 --- a/fluree-db-shacl/src/validate.rs +++ b/fluree-db-shacl/src/validate.rs @@ -1126,10 +1126,12 @@ fn validate_nested_shape<'a>( source_shape: parent_shape.id.clone(), source_constraint: Some(nested.id.clone()), severity: Severity::Violation, - message: format!( - "Value set for {} does not equal value set for {}", - path_label, target_prop.name - ), + message: nested.message.clone().unwrap_or_else(|| { + format!( + "Value set for {} does not equal value set for {}", + path_label, target_prop.name + ) + }), value: None, graph_id: None, }); @@ -1146,7 +1148,7 @@ fn validate_nested_shape<'a>( source_shape: parent_shape.id.clone(), source_constraint: Some(nested.id.clone()), severity: Severity::Violation, - message: violation.message, + message: nested.message.clone().unwrap_or(violation.message), value: violation.value, graph_id: None, }); @@ -1184,13 +1186,15 @@ fn validate_nested_shape<'a>( source_shape: parent_shape.id.clone(), source_constraint: Some(nested.id.clone()), severity: Severity::Violation, - message: format!( - "Found {} value(s) conforming to shape {} (expected {}..{})", - conforming, - shape.id.name, - min_count.map_or_else(|| "0".into(), |n| n.to_string()), - max_count.map_or_else(|| "*".into(), |n| n.to_string()), - ), + message: nested.message.clone().unwrap_or_else(|| { + format!( + "Found {} value(s) conforming to shape {} (expected {}..{})", + conforming, + shape.id.name, + min_count.map_or_else(|| "0".into(), |n| n.to_string()), + max_count.map_or_else(|| "*".into(), |n| n.to_string()), + ) + }), value: None, graph_id: None, }); @@ -1211,7 +1215,7 @@ fn validate_nested_shape<'a>( source_shape: parent_shape.id.clone(), source_constraint: Some(nested.id.clone()), severity: Severity::Violation, - message: violation.message, + message: nested.message.clone().unwrap_or(violation.message), value: violation.value, graph_id: None, }); @@ -1227,7 +1231,7 @@ fn validate_nested_shape<'a>( source_shape: parent_shape.id.clone(), source_constraint: Some(nested.id.clone()), severity: Severity::Violation, - message: violation.message, + message: nested.message.clone().unwrap_or(violation.message), value: violation.value, graph_id: None, }); From 7be42afb2408661d6b6e3141911bcbab9f1f561a Mon Sep 17 00:00:00 2001 From: bplatz Date: Thu, 2 Jul 2026 09:37:23 -0400 Subject: [PATCH 17/23] feat(shacl): enforce sh:qualifiedValueShapesDisjoint A qualified value shape marked disjoint now excludes values that conform to a sibling qualified shape (the qualified shapes declared by the other property shapes of the same node shape, gathered in a finalize pass). The canonical crew example works: requiring one pilot and one navigator as distinct members rejects a single member holding both roles, while the same data passes without disjointness. Sibling collection is top-level only; qualified constraints inside logical members keep counting without disjointness. --- docs/guides/cookbook-shacl.md | 7 +- fluree-db-api/src/shacl_tests.rs | 109 +++++++++++++++++++++++++ fluree-db-shacl/src/compile.rs | 47 +++++++++++ fluree-db-shacl/src/constraints/mod.rs | 6 ++ fluree-db-shacl/src/lib.rs | 11 +-- fluree-db-shacl/src/validate.rs | 27 +++++- 6 files changed, 199 insertions(+), 8 deletions(-) diff --git a/docs/guides/cookbook-shacl.md b/docs/guides/cookbook-shacl.md index 9ff934565c..cb71488975 100644 --- a/docs/guides/cookbook-shacl.md +++ b/docs/guides/cookbook-shacl.md @@ -332,7 +332,9 @@ ex:TeamShape a sh:NodeShape ; ] . ``` -(`sh:qualifiedValueShapesDisjoint` is not supported.) +`sh:qualifiedValueShapesDisjoint true` additionally excludes values that +conform to a *sibling* qualified shape — e.g. a crew needing one pilot and one +navigator as distinct members rejects a single member holding both roles. ### Constraints on the node itself @@ -587,7 +589,8 @@ All three routes go through the same post-stage helper, so the ledger's configur ## Not yet supported -- `sh:qualifiedValueShapesDisjoint` — sibling-shape disjointness for qualified value shapes (the counting form of `sh:qualifiedValueShape` is supported). +- `sh:targetNode` with a literal value — only IRI/blank-node targets are compiled. +- `sh:sparql` (SPARQL-based constraints). These are tracked in the SHACL compliance effort. diff --git a/fluree-db-api/src/shacl_tests.rs b/fluree-db-api/src/shacl_tests.rs index b0a5745dc6..7daec8b166 100644 --- a/fluree-db-api/src/shacl_tests.rs +++ b/fluree-db-api/src/shacl_tests.rs @@ -3532,3 +3532,112 @@ async fn shacl_custom_message_on_nested_member() { .unwrap_err(); assert_shacl_violation(err, "A Checked record always needs a code"); } + +/// `sh:qualifiedValueShapesDisjoint` — a value conforming to a sibling +/// qualified shape doesn't count. A crew needs a pilot and a navigator as +/// distinct members: one member holding both roles satisfies the counts only +/// without disjointness. +#[tokio::test] +async fn shacl_qualified_value_shapes_disjoint() { + let fluree = FlureeBuilder::memory().build_memory(); + let context = shacl_context(); + let crew_shape = |disjoint: bool| { + json!({ + "@context": context.clone(), + "@graph": [ + { + "@id": "ex:PilotShape", + "@type": "sh:NodeShape", + "sh:property": [{ + "@id": "ex:pshape_license", + "sh:path": {"@id": "ex:license"}, + "sh:minCount": 1 + }] + }, + { + "@id": "ex:NavigatorShape", + "@type": "sh:NodeShape", + "sh:property": [{ + "@id": "ex:pshape_chart", + "sh:path": {"@id": "ex:chart"}, + "sh:minCount": 1 + }] + }, + { + "@id": "ex:CrewShape", + "@type": "sh:NodeShape", + "sh:targetClass": {"@id": "ex:Crew"}, + "sh:property": [ + { + "@id": "ex:pshape_pilot", + "sh:path": {"@id": "ex:member"}, + "sh:qualifiedValueShape": {"@id": "ex:PilotShape"}, + "sh:qualifiedMinCount": 1, + "sh:qualifiedValueShapesDisjoint": disjoint + }, + { + "@id": "ex:pshape_navigator", + "sh:path": {"@id": "ex:member"}, + "sh:qualifiedValueShape": {"@id": "ex:NavigatorShape"}, + "sh:qualifiedMinCount": 1, + "sh:qualifiedValueShapesDisjoint": disjoint + } + ] + } + ] + }) + }; + // One member holds a license, a different member holds a chart. + let distinct_crew = json!({ + "@context": context.clone(), + "@graph": [ + {"@id": "ex:crew1", "@type": "ex:Crew", + "ex:member": [{"@id": "ex:p1"}, {"@id": "ex:n1"}]}, + {"@id": "ex:p1", "ex:license": "L-1"}, + {"@id": "ex:n1", "ex:chart": "C-1"} + ] + }); + // A single member holds both the license and the chart. + let dual_role_crew = json!({ + "@context": context.clone(), + "@graph": [ + {"@id": "ex:crew2", "@type": "ex:Crew", "ex:member": {"@id": "ex:b1"}}, + {"@id": "ex:b1", "ex:license": "L-2", "ex:chart": "C-2"} + ] + }); + + // Distinct members satisfy both qualified counts under disjointness. + let ledger = fluree.create_ledger("shacl/disj-ok:main").await.unwrap(); + let ledger = fluree + .upsert(ledger, &crew_shape(true)) + .await + .unwrap() + .ledger; + fluree + .upsert(ledger, &distinct_crew) + .await + .expect("distinct pilot and navigator satisfy disjoint qualified counts"); + + // A dual-role member conforms to the sibling shape too, so it counts for + // neither → both qualifiedMinCounts fail. + let ledger = fluree.create_ledger("shacl/disj-bad:main").await.unwrap(); + let ledger = fluree + .upsert(ledger, &crew_shape(true)) + .await + .unwrap() + .ledger; + let err = fluree.upsert(ledger, &dual_role_crew).await.unwrap_err(); + assert_shacl_violation(err, "at least 1 value(s) conforming"); + + // Control: without disjointness the dual-role member counts for both. + let ledger = fluree.create_ledger("shacl/disj-off:main").await.unwrap(); + let ledger = fluree + .upsert(ledger, &crew_shape(false)) + .await + .unwrap() + .ledger; + fluree + .upsert(ledger, &dual_role_crew) + .await + .expect("without disjointness a dual-role member satisfies both counts"); +} diff --git a/fluree-db-shacl/src/compile.rs b/fluree-db-shacl/src/compile.rs index dbc8b21502..e1248cd22a 100644 --- a/fluree-db-shacl/src/compile.rs +++ b/fluree-db-shacl/src/compile.rs @@ -169,6 +169,8 @@ struct PropertyShapeData { qualified_min: Option, /// sh:qualifiedMaxCount qualified_max: Option, + /// sh:qualifiedValueShapesDisjoint + qualified_disjoint: bool, } impl ShapeCompiler { @@ -251,6 +253,7 @@ impl ShapeCompiler { predicates::QUALIFIED_VALUE_SHAPE, predicates::QUALIFIED_MIN_COUNT, predicates::QUALIFIED_MAX_COUNT, + predicates::QUALIFIED_VALUE_SHAPES_DISJOINT, // Logical constraints predicates::NOT, predicates::AND, @@ -746,6 +749,12 @@ impl ShapeCompiler { self.get_or_create_property_shape(&flake.s).qualified_max = Some(*n as usize); } } + name if name == predicates::QUALIFIED_VALUE_SHAPES_DISJOINT => { + if let FlakeValue::Boolean(v) = &flake.o { + self.get_or_create_property_shape(&flake.s) + .qualified_disjoint = *v; + } + } // Logical constraints (node-level) name if name == predicates::NOT => { @@ -895,6 +904,42 @@ impl ShapeCompiler { } } + // Sibling disjointness: a disjoint qualified constraint consults + // the qualified shapes declared by the OTHER property shapes of + // this node shape. + let all_qualified: Vec<(usize, Arc)> = prop_shapes + .iter() + .enumerate() + .flat_map(|(i, ps)| { + ps.constraints + .iter() + .filter_map(move |constraint| match constraint { + Constraint::QualifiedValueShape { shape, .. } => { + Some((i, Arc::clone(shape))) + } + _ => None, + }) + }) + .collect(); + if all_qualified.len() > 1 { + for (i, ps) in prop_shapes.iter_mut().enumerate() { + for constraint in &mut ps.constraints { + if let Constraint::QualifiedValueShape { + disjoint: true, + sibling_shapes, + .. + } = constraint + { + *sibling_shapes = all_qualified + .iter() + .filter(|(j, _)| *j != i) + .map(|(_, s)| Arc::clone(s)) + .collect(); + } + } + } + } + // Build structural constraints (closed + logical) let mut structural_constraints = Vec::new(); @@ -1085,6 +1130,8 @@ fn qualified_constraint( shape: Arc::new(build_nested_shape_inner(q_ref, ps_map, seen)), min_count: ps_data.qualified_min, max_count: ps_data.qualified_max, + disjoint: ps_data.qualified_disjoint, + sibling_shapes: Vec::new(), }) } diff --git a/fluree-db-shacl/src/constraints/mod.rs b/fluree-db-shacl/src/constraints/mod.rs index c357e4f7c7..564ba635ce 100644 --- a/fluree-db-shacl/src/constraints/mod.rs +++ b/fluree-db-shacl/src/constraints/mod.rs @@ -83,6 +83,12 @@ pub enum Constraint { min_count: Option, /// sh:qualifiedMaxCount max_count: Option, + /// sh:qualifiedValueShapesDisjoint — when true, a value only counts + /// if it does NOT conform to any sibling qualified shape. + disjoint: bool, + /// Qualified shapes of the other property shapes of the same node + /// shape (filled during finalize; consulted only when `disjoint`). + sibling_shapes: Vec>, }, } diff --git a/fluree-db-shacl/src/lib.rs b/fluree-db-shacl/src/lib.rs index 6c70992409..a0a2ab0f8f 100644 --- a/fluree-db-shacl/src/lib.rs +++ b/fluree-db-shacl/src/lib.rs @@ -36,8 +36,9 @@ //! being validated on the call stack is assumed conforming. //! - Qualified: `sh:qualifiedValueShape` with `sh:qualifiedMinCount` / //! `sh:qualifiedMaxCount` — counts the values conforming to the qualified -//! shape, including on property shapes used as logical-constraint members -//! (`sh:qualifiedValueShapesDisjoint` is not supported) +//! shape, including on property shapes used as logical-constraint members. +//! `sh:qualifiedValueShapesDisjoint` excludes values that conform to a +//! sibling qualified shape (top-level property shapes) //! - Node-shape value constraints: per-value constraints declared directly on //! a node shape (no `sh:path`) apply to the focus node itself //! - `sh:deactivated` — a deactivated shape is ignored entirely, including @@ -75,9 +76,9 @@ //! //! # Not Yet Supported //! -//! - `sh:qualifiedValueShapesDisjoint` — sibling-shape disjointness for -//! qualified value shapes (the counting form of `sh:qualifiedValueShape` is -//! supported). +//! - `sh:targetNode` with a literal value — only IRI/blank-node targets are +//! compiled (focus nodes are subject ids throughout the engine). +//! - `sh:sparql` (SPARQL-based constraints). //! //! # Example //! diff --git a/fluree-db-shacl/src/validate.rs b/fluree-db-shacl/src/validate.rs index e2c8a15567..ae9a0a7880 100644 --- a/fluree-db-shacl/src/validate.rs +++ b/fluree-db-shacl/src/validate.rs @@ -1158,6 +1158,7 @@ fn validate_nested_shape<'a>( shape, min_count, max_count, + .. } => { let mut conforming = 0usize; for (i, value) in values.iter().enumerate() { @@ -1374,10 +1375,12 @@ async fn validate_property_shape<'a>( shape, min_count, max_count, + disjoint, + sibling_shapes, } => { let mut conforming = 0usize; for (i, value) in values.iter().enumerate() { - let conforms = check_value_against_nested_shape( + let mut conforms = check_value_against_nested_shape( db, value, datatypes.get(i), @@ -1389,6 +1392,28 @@ async fn validate_property_shape<'a>( active, ) .await?; + // Disjointness: a value conforming to a sibling qualified + // shape does not count toward this one. + if conforms && *disjoint { + for sibling in sibling_shapes { + if check_value_against_nested_shape( + db, + value, + datatypes.get(i), + langs.get(i).and_then(|l| l.as_deref()), + sibling, + parent_shape, + all_shapes, + class_ctx, + active, + ) + .await? + { + conforms = false; + break; + } + } + } if conforms { conforming += 1; } From a46380ee0c4f8ef12cd5a0d29d554d53afe3dc5d Mon Sep 17 00:00:00 2001 From: bplatz Date: Thu, 2 Jul 2026 10:14:44 -0400 Subject: [PATCH 18/23] docs(shacl): document that bulk import intentionally bypasses validation Bulk import is a trusted, high-throughput load path: validate source data against the shapes before importing; transaction-time validation keeps the ledger clean from there. --- docs/guides/cookbook-shacl.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/docs/guides/cookbook-shacl.md b/docs/guides/cookbook-shacl.md index cb71488975..79e2179288 100644 --- a/docs/guides/cookbook-shacl.md +++ b/docs/guides/cookbook-shacl.md @@ -23,6 +23,12 @@ Fluree decides whether to run SHACL validation on each transaction using this or This means you can start using SHACL **without writing any config** — just transact shapes and they're enforced. +**Bulk import is deliberately exempt.** The bulk-import pipeline never runs +SHACL — it is a trusted, high-throughput load path. If your source data must +conform, validate it *before* importing (e.g. run a SHACL report over the +source with your shapes) so the ledger starts clean; transaction-time +validation keeps it clean from there. + The `shacl` feature must be enabled at build time (it's on by default for the server and CLI binaries). See [Standards and feature flags](../reference/compatibility.md). ## Enabling SHACL via the config graph From 51f28eb44efc996f44cf5bbec259575e0a38ead9 Mon Sep 17 00:00:00 2001 From: bplatz Date: Thu, 2 Jul 2026 10:19:45 -0400 Subject: [PATCH 19/23] chore(shacl): drop needless raw-string hashes in Turtle test fixture --- fluree-db-api/src/shacl_tests.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fluree-db-api/src/shacl_tests.rs b/fluree-db-api/src/shacl_tests.rs index 7daec8b166..ff7f236c0a 100644 --- a/fluree-db-api/src/shacl_tests.rs +++ b/fluree-db-api/src/shacl_tests.rs @@ -2919,7 +2919,7 @@ async fn shacl_qualified_value_shape_min_count() { async fn shacl_ignored_properties_turtle_list() { let fluree = FlureeBuilder::memory().build_memory(); let context = shacl_context(); - let shapes_ttl = r#" + let shapes_ttl = r" @prefix sh: . @prefix ex: . @@ -2928,7 +2928,7 @@ async fn shacl_ignored_properties_turtle_list() { sh:closed true ; sh:ignoredProperties ( ex:internal ex:auditLog ) ; sh:property [ sh:path ex:label ] . - "#; + "; let ledger_ok = fluree.create_ledger("shacl/ignored-ok:main").await.unwrap(); let ledger_ok = fluree From 0c0f46d358cf2f421c30fe92698520420482e1b2 Mon Sep 17 00:00:00 2001 From: bplatz Date: Thu, 2 Jul 2026 11:02:15 -0400 Subject: [PATCH 20/23] fix(shacl): evaluate value-only nested members; unify focus/value string facets MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Review fixes: - A value-only anonymous member of a node-level logical constraint (sh:or ([ sh:class ex:C ]), sh:not [ sh:nodeKind ... ]) produced no checks at all — validate_nested_shape never evaluated value_constraints against the focus node, so such members always conformed. The focus-node evaluation core is now shared (focus_value_violations) between direct node-shape constraints and nested value-only members, including pair constraints, sh:class, and string facets. - String facets are now consistent everywhere: node-shape constraints and anonymous value shapes stringify IRI values (STR(iri)) like top-level property shapes, and sh:minLength/sh:maxLength use the same lexical form as sh:pattern — non-literals (blank nodes, undecodable IRIs) violate instead of measuring the SID name fragment. - sh:severity on a node shape with direct value constraints was lost (the metadata arms prefer the path-less property-shape entry; message and name were backfilled but severity was not). - sh:uniqueLang counts tags case-insensitively per BCP 47. - Nested qualified constraints honor disjoint/sibling fields uniformly (siblings remain empty for members not referenced via sh:property, which matches the spec's sibling definition). - Removed the now-unused validate_constraint_set. --- fluree-db-api/src/shacl_tests.rs | 153 +++++++++++++++++ fluree-db-shacl/src/compile.rs | 8 +- fluree-db-shacl/src/constraints/lang.rs | 16 +- fluree-db-shacl/src/constraints/pattern.rs | 57 +++---- fluree-db-shacl/src/validate.rs | 185 ++++++++++++++------- 5 files changed, 317 insertions(+), 102 deletions(-) diff --git a/fluree-db-api/src/shacl_tests.rs b/fluree-db-api/src/shacl_tests.rs index ff7f236c0a..49bdf06a1c 100644 --- a/fluree-db-api/src/shacl_tests.rs +++ b/fluree-db-api/src/shacl_tests.rs @@ -3641,3 +3641,156 @@ async fn shacl_qualified_value_shapes_disjoint() { .await .expect("without disjointness a dual-role member satisfies both counts"); } + +// =========================================================================== +// Review fixes: value-only nested members, focus string facets, severity +// =========================================================================== + +/// A value-only anonymous member of a node-level logical constraint +/// (`sh:or ([ sh:class ex:Person ])`) must be evaluated against the focus +/// node — previously it produced no checks and always conformed. +#[tokio::test] +async fn shacl_value_only_member_in_node_level_or() { + let fluree = FlureeBuilder::memory().build_memory(); + let context = shacl_context(); + let shape_txn = json!({ + "@context": context.clone(), + "@id": "ex:ActorShape", + "@type": "sh:NodeShape", + "sh:targetObjectsOf": {"@id": "ex:actor"}, + "sh:or": [ + {"@id": "ex:or_person", "sh:class": {"@id": "ex:Person"}}, + {"@id": "ex:or_org", "sh:class": {"@id": "ex:Organization"}} + ] + }); + + let ledger_ok = fluree.create_ledger("shacl/valmem-ok:main").await.unwrap(); + let ledger_ok = fluree.upsert(ledger_ok, &shape_txn).await.unwrap().ledger; + fluree + .upsert( + ledger_ok, + &json!({ + "@context": context.clone(), + "@graph": [ + {"@id": "ex:event1", "ex:actor": {"@id": "ex:alice"}}, + {"@id": "ex:alice", "@type": "ex:Person"} + ] + }), + ) + .await + .expect("a Person actor satisfies the value-only sh:or member"); + + let ledger_bad = fluree.create_ledger("shacl/valmem-bad:main").await.unwrap(); + let ledger_bad = fluree.upsert(ledger_bad, &shape_txn).await.unwrap().ledger; + let err = fluree + .upsert( + ledger_bad, + &json!({ + "@context": context.clone(), + "@graph": [ + {"@id": "ex:event2", "ex:actor": {"@id": "ex:hal"}}, + {"@id": "ex:hal", "@type": "ex:Robot"} + ] + }), + ) + .await + .unwrap_err(); + assert_shacl_violation(err, "sh:or"); +} + +/// String facets declared directly on a node shape apply to the focus node's +/// full decoded IRI, not reject it as a non-literal. +#[tokio::test] +async fn shacl_pattern_on_node_shape_focus_iri() { + let fluree = FlureeBuilder::memory().build_memory(); + let context = shacl_context(); + // Anything used as an ex:ref must be an example.org IRI. + let shape_txn = json!({ + "@context": context.clone(), + "@id": "ex:RefShape", + "@type": "sh:NodeShape", + "sh:targetObjectsOf": {"@id": "ex:ref"}, + "sh:pattern": "^http://example\\.org/" + }); + + let ledger_ok = fluree + .create_ledger("shacl/focuspat-ok:main") + .await + .unwrap(); + let ledger_ok = fluree.upsert(ledger_ok, &shape_txn).await.unwrap().ledger; + fluree + .upsert( + ledger_ok, + &json!({ + "@context": context.clone(), + "@id": "ex:doc1", + "ex:ref": {"@id": "ex:target1"} + }), + ) + .await + .expect("focus IRI under example.org matches the node-shape pattern"); + + // Register the foreign namespace first so the violating focus decodes. + let ledger_bad = fluree + .create_ledger("shacl/focuspat-bad:main") + .await + .unwrap(); + let ledger_bad = fluree.upsert(ledger_bad, &shape_txn).await.unwrap().ledger; + let ledger_bad = fluree + .upsert( + ledger_bad, + &json!({ + "@context": context.clone(), + "@id": "http://elsewhere.example.net/thing", + "ex:note": "registers namespace" + }), + ) + .await + .unwrap() + .ledger; + let err = fluree + .upsert( + ledger_bad, + &json!({ + "@context": context.clone(), + "@id": "ex:doc2", + "ex:ref": {"@id": "http://elsewhere.example.net/thing"} + }), + ) + .await + .unwrap_err(); + assert_shacl_violation(err, "does not match pattern"); +} + +/// `sh:severity sh:Warning` on a node shape carrying direct value constraints +/// must not reject (severity routes through the path-less metadata entry). +#[tokio::test] +async fn shacl_warning_severity_on_node_value_constraint() { + let fluree = FlureeBuilder::memory().build_memory(); + let context = shacl_context(); + let shape_txn = json!({ + "@context": context.clone(), + "@id": "ex:AdvisoryStatusShape", + "@type": "sh:NodeShape", + "sh:targetObjectsOf": {"@id": "ex:state"}, + "sh:severity": {"@id": "sh:Warning"}, + "sh:in": [{"@id": "ex:on"}, {"@id": "ex:off"}] + }); + + let ledger = fluree.create_ledger("shacl/warnval:main").await.unwrap(); + let ledger = fluree.upsert(ledger, &shape_txn).await.unwrap().ledger; + + // Out-of-set value — would reject under Violation severity (covered by + // shacl_value_constraint_on_node_shape); Warning must not. + fluree + .upsert( + ledger, + &json!({ + "@context": context.clone(), + "@id": "ex:device1", + "ex:state": {"@id": "ex:standby"} + }), + ) + .await + .expect("warn-severity node value constraint must not reject"); +} diff --git a/fluree-db-shacl/src/compile.rs b/fluree-db-shacl/src/compile.rs index e1248cd22a..bcf69e6a9e 100644 --- a/fluree-db-shacl/src/compile.rs +++ b/fluree-db-shacl/src/compile.rs @@ -962,11 +962,17 @@ impl ShapeCompiler { let mut node_constraints = data.node_constraints.clone(); let mut message = data.message.clone(); let mut name = data.name.clone(); + let mut severity = data.severity; if let Some(own_ps) = ps_map.get(id) { if own_ps.path.is_none() { node_constraints.extend(build_constraints_from_ps_data(own_ps)); message = message.or_else(|| own_ps.message.clone()); name = name.or_else(|| own_ps.name.clone()); + // sh:severity routes to the path-less entry too (the + // metadata arms prefer the property-shape map). + if severity == Severity::Violation { + severity = own_ps.severity; + } } } @@ -976,7 +982,7 @@ impl ShapeCompiler { property_shapes: prop_shapes, node_constraints, structural_constraints, - severity: data.severity, + severity, name, message, deactivated: data.deactivated, diff --git a/fluree-db-shacl/src/constraints/lang.rs b/fluree-db-shacl/src/constraints/lang.rs index b6dfeac6ff..c766f66147 100644 --- a/fluree-db-shacl/src/constraints/lang.rs +++ b/fluree-db-shacl/src/constraints/lang.rs @@ -15,13 +15,14 @@ pub fn validate_unique_lang( values: &[FlakeValue], langs: &[Option], ) -> Vec { - let mut counts: HashMap<&str, usize> = HashMap::new(); + // BCP 47 language tags are case-insensitive ("en" and "EN" collide). + let mut counts: HashMap = HashMap::new(); for lang in langs.iter().flatten() { - *counts.entry(lang.as_str()).or_default() += 1; + *counts.entry(lang.to_ascii_lowercase()).or_default() += 1; } let mut out = Vec::new(); - let mut duplicated: Vec<&str> = counts + let mut duplicated: Vec = counts .into_iter() .filter(|(_, n)| *n > 1) .map(|(lang, _)| lang) @@ -31,7 +32,7 @@ pub fn validate_unique_lang( // Report the first value carrying the duplicated tag. let value = langs .iter() - .position(|l| l.as_deref() == Some(lang)) + .position(|l| l.as_deref().is_some_and(|t| t.eq_ignore_ascii_case(&lang))) .and_then(|i| values.get(i)) .cloned(); out.push(ConstraintViolation { @@ -109,6 +110,13 @@ mod tests { assert!(violations[0].message.contains("\"en\"")); } + #[test] + fn unique_lang_is_case_insensitive() { + let values = [s("colour"), s("color")]; + let langs = [Some("en".to_string()), Some("EN".to_string())]; + assert_eq!(validate_unique_lang(&values, &langs).len(), 1); + } + #[test] fn unique_lang_ignores_untagged_values() { let values = [s("a"), s("b")]; diff --git a/fluree-db-shacl/src/constraints/pattern.rs b/fluree-db-shacl/src/constraints/pattern.rs index 9cb04b6505..a8da14dd1b 100644 --- a/fluree-db-shacl/src/constraints/pattern.rs +++ b/fluree-db-shacl/src/constraints/pattern.rs @@ -89,8 +89,17 @@ pub fn validate_pattern( } /// Validate sh:minLength constraint +/// +/// Applies to the value's lexical form (per SPARQL `STR()`); non-literals +/// (blank nodes, undecodable IRIs) violate per spec. pub fn validate_min_length(value: &FlakeValue, min: usize) -> Option { - let len = string_length(value); + let Some(len) = lexical_length(value) else { + return Some(ConstraintViolation { + constraint: Constraint::MinLength(min), + value: Some(value.clone()), + message: "Length constraint cannot be applied to a non-literal value".to_string(), + }); + }; if len < min { Some(ConstraintViolation { @@ -104,8 +113,17 @@ pub fn validate_min_length(value: &FlakeValue, min: usize) -> Option Option { - let len = string_length(value); + let Some(len) = lexical_length(value) else { + return Some(ConstraintViolation { + constraint: Constraint::MaxLength(max), + value: Some(value.clone()), + message: "Length constraint cannot be applied to a non-literal value".to_string(), + }); + }; if len > max { Some(ConstraintViolation { @@ -118,38 +136,9 @@ pub fn validate_max_length(value: &FlakeValue, max: usize) -> Option usize { - match value { - FlakeValue::String(s) => s.chars().count(), - FlakeValue::Long(n) => n.to_string().len(), - FlakeValue::Double(n) => n.to_string().len(), - FlakeValue::Boolean(b) => { - if *b { - 4 - } else { - 5 - } - } // "true" or "false" - FlakeValue::Ref(sid) => sid.name.len(), - FlakeValue::Vector(v) => v.len(), // Length of vector - FlakeValue::Null => 0, - FlakeValue::Json(s) => s.chars().count(), - FlakeValue::BigInt(n) => n.to_string().len(), - FlakeValue::Decimal(d) => d.to_string().len(), - FlakeValue::DateTime(dt) => dt.original().len(), - FlakeValue::Date(d) => d.original().len(), - FlakeValue::Time(t) => t.original().len(), - FlakeValue::GYear(v) => v.original().len(), - FlakeValue::GYearMonth(v) => v.original().len(), - FlakeValue::GMonth(v) => v.original().len(), - FlakeValue::GDay(v) => v.original().len(), - FlakeValue::GMonthDay(v) => v.original().len(), - FlakeValue::YearMonthDuration(v) => v.original().len(), - FlakeValue::DayTimeDuration(v) => v.original().len(), - FlakeValue::Duration(v) => v.original().len(), - FlakeValue::GeoPoint(v) => v.to_string().len(), // "POINT(lng lat)" - } +/// Character count of the value's lexical form; `None` for non-literals. +fn lexical_length(value: &FlakeValue) -> Option { + pattern_lexical_form(value).map(|s| s.chars().count()) } #[cfg(test)] diff --git a/fluree-db-shacl/src/validate.rs b/fluree-db-shacl/src/validate.rs index ae9a0a7880..c2b10e3e78 100644 --- a/fluree-db-shacl/src/validate.rs +++ b/fluree-db-shacl/src/validate.rs @@ -667,12 +667,11 @@ async fn validate_node_value_constraints<'a>( shape: &'a CompiledShape, class_ctx: Option>, ) -> Result> { - let mut results = Vec::new(); - let values = [FlakeValue::Ref(focus_node.clone())]; - let datatypes = [fluree_db_core::id_datatype_sid()]; - - let push = |violation: ConstraintViolation, results: &mut Vec| { - results.push(ValidationResult { + let violations = + focus_value_violations(db, focus_node, &shape.node_constraints, class_ctx).await?; + Ok(violations + .into_iter() + .map(|violation| ValidationResult { focus_node: focus_node.clone(), result_path: None, source_shape: shape.id.clone(), @@ -681,10 +680,25 @@ async fn validate_node_value_constraints<'a>( message: shape.message.clone().unwrap_or(violation.message), value: violation.value, graph_id: None, - }); - }; + }) + .collect()) +} + +/// Evaluate value constraints against the focus node itself — the shared core +/// for constraints declared directly on a node shape and for anonymous +/// value-only members of logical constraints (`sh:or ([ sh:class ex:C ])`). +/// The focus is an IRI ref, so string facets match its full decoded IRI. +async fn focus_value_violations<'a>( + db: GraphDbRef<'a>, + focus_node: &Sid, + constraints: &[Constraint], + class_ctx: Option>, +) -> Result> { + let mut violations = Vec::new(); + let values = [FlakeValue::Ref(focus_node.clone())]; + let datatypes = [fluree_db_core::id_datatype_sid()]; - for constraint in &shape.node_constraints { + for constraint in constraints { match constraint { Constraint::Equals(target_prop) | Constraint::Disjoint(target_prop) @@ -699,28 +713,39 @@ async fn validate_node_value_constraints<'a>( .await?; let target_values: Vec = target_flakes.iter().map(|f| f.o.clone()).collect(); - for violation in - validate_pair_constraint(constraint, &values, &target_values, &target_prop.name) - { - push(violation, &mut results); - } + violations.extend(validate_pair_constraint( + constraint, + &values, + &target_values, + &target_prop.name, + )); } Constraint::Class(expected_class) => { - for violation in - validate_class_constraint(db, &values, expected_class, class_ctx).await? - { - push(violation, &mut results); - } + violations.extend( + validate_class_constraint(db, &values, expected_class, class_ctx).await?, + ); + } + Constraint::Pattern(..) | Constraint::MinLength(_) | Constraint::MaxLength(_) => { + let effective = stringify_iri_values(db, &values); + violations.extend(validate_constraint( + constraint, + &effective, + &datatypes, + &[None], + )?); } _ => { - for violation in validate_constraint(constraint, &values, &datatypes, &[None])? { - push(violation, &mut results); - } + violations.extend(validate_constraint( + constraint, + &values, + &datatypes, + &[None], + )?); } } } - Ok(results) + Ok(violations) } /// Validate a structural (node-level) constraint @@ -1051,6 +1076,28 @@ fn validate_nested_shape<'a>( let mut results = Vec::new(); + // Value constraints on an anonymous member apply to the focus node + // itself (no sh:path — the member's value-node set is the focus). + // Without this, a value-only member like `sh:or ([ sh:class ex:C ])` + // would be treated as conforming with no checks. + if !nested.value_constraints.is_empty() { + let violations = + focus_value_violations(db, focus_node, &nested.value_constraints, class_ctx) + .await?; + for violation in violations { + results.push(ValidationResult { + focus_node: focus_node.clone(), + result_path: None, + source_shape: parent_shape.id.clone(), + source_constraint: Some(nested.id.clone()), + severity: Severity::Violation, + message: nested.message.clone().unwrap_or(violation.message), + value: violation.value, + graph_id: None, + }); + } + } + // Validate property constraints for (path, constraints) in &nested.property_constraints { // A path that never compiled surfaces as a violation on this member. @@ -1158,11 +1205,12 @@ fn validate_nested_shape<'a>( shape, min_count, max_count, - .. + disjoint, + sibling_shapes, } => { let mut conforming = 0usize; for (i, value) in values.iter().enumerate() { - let conforms = check_value_against_nested_shape( + let mut conforms = check_value_against_nested_shape( db, value, datatypes.get(i), @@ -1174,6 +1222,26 @@ fn validate_nested_shape<'a>( active, ) .await?; + if conforms && *disjoint { + for sibling in sibling_shapes { + if check_value_against_nested_shape( + db, + value, + datatypes.get(i), + langs.get(i).and_then(|l| l.as_deref()), + sibling, + parent_shape, + all_shapes, + class_ctx, + active, + ) + .await? + { + conforms = false; + break; + } + } + } if conforms { conforming += 1; } @@ -1777,22 +1845,6 @@ async fn check_value_against_nested_shape<'a>( // If the nested shape has value-level constraints (e.g. sh:datatype without sh:path), // check them directly against the value/datatype. if !nested.value_constraints.is_empty() { - // sh:class on an anonymous value shape needs db access for the - // rdf:type lookup; the pure constraint-set path below skips it. - for constraint in &nested.value_constraints { - if let Constraint::Class(expected_class) = constraint { - let violations = validate_class_constraint( - db, - std::slice::from_ref(value), - expected_class, - class_ctx, - ) - .await?; - if !violations.is_empty() { - return Ok(false); - } - } - } let dt_arr: [Sid; 1]; let dt_slice: &[Sid] = match datatype { Some(dt) => { @@ -1802,13 +1854,35 @@ async fn check_value_against_nested_shape<'a>( None => &[], }; let lang_arr = [lang.map(str::to_string)]; - let violations = validate_constraint_set( - &nested.value_constraints, - std::slice::from_ref(value), - dt_slice, - &lang_arr, - )?; - return Ok(violations.is_empty()); + for constraint in &nested.value_constraints { + let conforms = match constraint { + // sh:class needs db access for the rdf:type lookup. + Constraint::Class(expected_class) => validate_class_constraint( + db, + std::slice::from_ref(value), + expected_class, + class_ctx, + ) + .await? + .is_empty(), + // String facets match STR(iri) for IRI values. + Constraint::Pattern(..) | Constraint::MinLength(_) | Constraint::MaxLength(_) => { + let effective = stringify_iri_values(db, std::slice::from_ref(value)); + validate_constraint(constraint, &effective, dt_slice, &lang_arr)?.is_empty() + } + _ => validate_constraint( + constraint, + std::slice::from_ref(value), + dt_slice, + &lang_arr, + )? + .is_empty(), + }; + if !conforms { + return Ok(false); + } + } + return Ok(true); } // For IRI/blank-node values, evaluate the nested shape against the value as a focus node @@ -1854,21 +1928,6 @@ fn has_iri_ref(values: &[FlakeValue]) -> bool { .any(|v| matches!(v, FlakeValue::Ref(sid) if sid.namespace_code != BLANK_NODE)) } -/// Apply multiple constraints to a set of values and collect all violations. -fn validate_constraint_set( - constraints: &[Constraint], - values: &[FlakeValue], - datatypes: &[Sid], - langs: &[Option], -) -> Result> { - let mut all_violations = Vec::new(); - for constraint in constraints { - let violations = validate_constraint(constraint, values, datatypes, langs)?; - all_violations.extend(violations); - } - Ok(all_violations) -} - /// Validate a constraint against a set of values fn validate_constraint( constraint: &Constraint, From ed1e7ccba771a23c0e3f9c484ea21b2cb4ba50f9 Mon Sep 17 00:00:00 2001 From: bplatz Date: Thu, 2 Jul 2026 11:07:22 -0400 Subject: [PATCH 21/23] feat(shacl): support inverse over any property path; strict path parsing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Review fixes: - sh:inversePath was limited to a single predicate, rejecting valid SHACL like ^(p1/p2). Inversion now rewrites into the AST: inverse of a sequence is the reversed sequence of inverses, inverse of an alternative distributes, inverse of a closure wraps the inverted inner path, and a double inverse collapses. - Malformed paths error instead of compiling nondeterministically: path operators take exactly one value (multiple distinct refs for sh:inversePath / closures error via sole_ref/operand_path), and multiple sh:path or operator-operand objects only form a sequence under the JSON-LD @list encoding (every flake carries a list index) — un-indexed multiples are distinct assertions and error. Operator operands accept both encodings via operand_path. - fluree-vocab: full-IRI constants for the path operators, sh:node, and sh:deactivated to match the local-name additions. The unsupported-path tests now use a genuinely malformed fixture (a literal sequence step); a new test covers ^(parent/parent). --- docs/guides/cookbook-shacl.md | 2 +- fluree-db-api/src/shacl_tests.rs | 65 +++++++++++++-- fluree-db-shacl/src/lib.rs | 9 +- fluree-db-shacl/src/path.rs | 136 +++++++++++++++++++++++++------ fluree-vocab/src/lib.rs | 21 +++++ 5 files changed, 200 insertions(+), 33 deletions(-) diff --git a/docs/guides/cookbook-shacl.md b/docs/guides/cookbook-shacl.md index 79e2179288..1bb94d0f87 100644 --- a/docs/guides/cookbook-shacl.md +++ b/docs/guides/cookbook-shacl.md @@ -108,7 +108,7 @@ See [Predicate-target shapes](#predicate-target-shapes) for notes on how the sta | Path form | Turtle syntax | Reaches | |-----------|---------------|---------| | Predicate | `sh:path ex:knows` | objects of `ex:knows` | -| Inverse | `sh:path [ sh:inversePath ex:parent ]` | subjects that point at the focus via `ex:parent` | +| Inverse | `sh:path [ sh:inversePath ex:parent ]` | subjects that point at the focus via `ex:parent` (works over any path: `[ sh:inversePath ( ex:a ex:b ) ]` reaches nodes two hops upstream) | | Sequence | `sh:path ( ex:knows schema:name )` | names of the people the focus knows | | Alternative | `sh:path [ sh:alternativePath ( ex:email ex:altEmail ) ]` | values via **either** predicate | | Zero-or-more | `sh:path [ sh:zeroOrMorePath ex:parent ]` | the focus **and** all transitive `ex:parent` ancestors | diff --git a/fluree-db-api/src/shacl_tests.rs b/fluree-db-api/src/shacl_tests.rs index 49bdf06a1c..e2c3d07204 100644 --- a/fluree-db-api/src/shacl_tests.rs +++ b/fluree-db-api/src/shacl_tests.rs @@ -2298,8 +2298,8 @@ async fn shacl_one_or_more_path() { assert_shacl_violation(err, "at least 2"); } -/// An unsupported path form — the inverse of a composite path (`^(ex:a+)`) — -/// must be rejected loudly at shape-compile time, not silently misbehave. +/// A malformed path — a literal step inside a sequence — must surface as a +/// violation when the shape fires, not silently misbehave. #[tokio::test] async fn shacl_unsupported_path_rejected() { let fluree = FlureeBuilder::memory().build_memory(); @@ -2311,7 +2311,7 @@ async fn shacl_unsupported_path_rejected() { "sh:targetClass": {"@id": "ex:Thing"}, "sh:property": [{ "@id": "ex:pshape_bad", - "sh:path": {"sh:inversePath": {"sh:oneOrMorePath": {"@id": "ex:a"}}}, + "sh:path": {"@list": [{"@id": "ex:a"}, "not-a-path"]}, "sh:minCount": 1 }] }); @@ -2333,7 +2333,7 @@ async fn shacl_unsupported_path_rejected() { ) .await .unwrap_err(); - assert_shacl_violation(err, "inversePath"); + assert_shacl_violation(err, "literal"); } /// An unsupported path on a node that the shape does **not** target must not @@ -2349,7 +2349,7 @@ async fn shacl_unsupported_path_scoped_to_targets() { "sh:targetClass": {"@id": "ex:Thing"}, "sh:property": [{ "@id": "ex:pshape_bad", - "sh:path": {"sh:inversePath": {"sh:oneOrMorePath": {"@id": "ex:a"}}}, + "sh:path": {"@list": [{"@id": "ex:a"}, "not-a-path"]}, "sh:minCount": 1 }] }); @@ -3794,3 +3794,58 @@ async fn shacl_warning_severity_on_node_value_constraint() { .await .expect("warn-severity node value constraint must not reject"); } + +/// Inverse of a composite path — `^(ex:parent/ex:parent)` — is valid SHACL and +/// now rewrites into the AST: a Grandparent must have at least one grandchild. +#[tokio::test] +async fn shacl_inverse_of_sequence_path() { + let fluree = FlureeBuilder::memory().build_memory(); + let context = shacl_context(); + let shape_txn = json!({ + "@context": context.clone(), + "@id": "ex:GrandparentShape", + "@type": "sh:NodeShape", + "sh:targetClass": {"@id": "ex:Grandparent"}, + "sh:property": [{ + "@id": "ex:pshape_grandkids", + "sh:path": {"sh:inversePath": {"@list": [{"@id": "ex:parent"}, {"@id": "ex:parent"}]}}, + "sh:minCount": 1 + }] + }); + + // Valid: child → mom → grandma, so grandma has a grandchild via ^(parent/parent). + let ledger_ok = fluree.create_ledger("shacl/invseq-ok:main").await.unwrap(); + let ledger_ok = fluree.upsert(ledger_ok, &shape_txn).await.unwrap().ledger; + fluree + .upsert( + ledger_ok, + &json!({ + "@context": context.clone(), + "@graph": [ + {"@id": "ex:grandma", "@type": "ex:Grandparent"}, + {"@id": "ex:mom", "ex:parent": {"@id": "ex:grandma"}}, + {"@id": "ex:kid", "ex:parent": {"@id": "ex:mom"}} + ] + }), + ) + .await + .expect("grandchild reachable via inverse sequence path should pass"); + + // Invalid: grandma has a child but no grandchild. + let ledger_bad = fluree.create_ledger("shacl/invseq-bad:main").await.unwrap(); + let ledger_bad = fluree.upsert(ledger_bad, &shape_txn).await.unwrap().ledger; + let err = fluree + .upsert( + ledger_bad, + &json!({ + "@context": context.clone(), + "@graph": [ + {"@id": "ex:gran2", "@type": "ex:Grandparent"}, + {"@id": "ex:son", "ex:parent": {"@id": "ex:gran2"}} + ] + }), + ) + .await + .unwrap_err(); + assert_shacl_violation(err, "at least 1"); +} diff --git a/fluree-db-shacl/src/lib.rs b/fluree-db-shacl/src/lib.rs index a0a2ab0f8f..09489b0960 100644 --- a/fluree-db-shacl/src/lib.rs +++ b/fluree-db-shacl/src/lib.rs @@ -53,11 +53,12 @@ //! //! Besides a single predicate IRI, `sh:path` supports property path expressions //! (compiled by [`path::resolve_sh_path`] into a [`PropertyPath`] AST and evaluated -//! by [`path::eval_path`]): `sh:inversePath` (over a single predicate), sequence -//! paths (RDF lists), `sh:alternativePath`, `sh:zeroOrMorePath`, +//! by [`path::eval_path`]): `sh:inversePath` (over any path — the inverse of a +//! composite rewrites into the AST, e.g. `^(p1/p2)` becomes `^p2/^p1`), +//! sequence paths (RDF lists), `sh:alternativePath`, `sh:zeroOrMorePath`, //! `sh:oneOrMorePath`, and `sh:zeroOrOnePath` — including nesting of these. -//! The one unsupported form, the inverse of a composite path (`^(p1/p2)`), -//! compiles to [`PropertyPath::Unresolvable`] and surfaces as a violation when +//! Malformed paths (a literal step, multiple un-listed values for an operator) +//! compile to [`PropertyPath::Unresolvable`] and surface as a violation when //! the owning shape fires on a focus node — scoped to that shape's targets //! rather than failing every transaction on the ledger. //! diff --git a/fluree-db-shacl/src/path.rs b/fluree-db-shacl/src/path.rs index 17724115b9..6b1aa3460a 100644 --- a/fluree-db-shacl/src/path.rs +++ b/fluree-db-shacl/src/path.rs @@ -119,6 +119,26 @@ fn shacl(name: &str) -> Sid { Sid::new(SHACL, name) } +/// Rewrite a path into its inverse: `^(p1/p2)` becomes `^p2/^p1`, +/// `^(p1|p2)` becomes `^p1|^p2`, `^(p*)` becomes `(^p)*`, and `^^p` +/// collapses back to `p`. `Unresolvable` passes through unchanged. +fn invert(path: PropertyPath) -> PropertyPath { + match path { + PropertyPath::Predicate(p) => PropertyPath::Inverse(p), + PropertyPath::Inverse(p) => PropertyPath::Predicate(p), + PropertyPath::Sequence(steps) => { + PropertyPath::Sequence(steps.into_iter().rev().map(invert).collect()) + } + PropertyPath::Alternative(alts) => { + PropertyPath::Alternative(alts.into_iter().map(invert).collect()) + } + PropertyPath::ZeroOrMore(inner) => PropertyPath::ZeroOrMore(Box::new(invert(*inner))), + PropertyPath::OneOrMore(inner) => PropertyPath::OneOrMore(Box::new(invert(*inner))), + PropertyPath::ZeroOrOne(inner) => PropertyPath::ZeroOrOne(Box::new(invert(*inner))), + PropertyPath::Unresolvable(reason) => PropertyPath::Unresolvable(reason), + } +} + /// Resolve the `sh:path` of a property shape subject into a [`PropertyPath`]. /// /// Handles all three encodings of `sh:path`: @@ -136,7 +156,8 @@ pub fn resolve_sh_path<'a>( ps_subject: &'a Sid, ) -> PathFuture<'a, Option> { Box::pin(async move { - let members = ordered_objects(db, ps_subject, &shacl(predicates::PATH)).await?; + let (members, all_indexed) = + ordered_objects(db, ps_subject, &shacl(predicates::PATH)).await?; match members.len() { 0 => Ok(None), 1 => match &members[0] { @@ -144,6 +165,10 @@ pub fn resolve_sh_path<'a>( // sh:path with a literal object is invalid; skip. _ => Ok(None), }, + // Multiple sh:path objects are only a sequence under the JSON-LD + // @list encoding (every flake carries a list index). Un-indexed + // multiples are distinct sh:path assertions — a malformed shape. + _ if !all_indexed => Err(unsupported(ps_subject, "multiple sh:path values")), _ => { // JSON-LD @list sequence: each ordered object is a path step. let mut steps = Vec::new(); @@ -170,16 +195,10 @@ pub fn resolve_sh_path<'a>( /// blank node) into a [`PropertyPath`]. fn resolve_path_node<'a>(db: GraphDbRef<'a>, node: &'a Sid) -> PathFuture<'a, PropertyPath> { Box::pin(async move { - // sh:inversePath - if let Some(obj) = single_ref(db, node, &shacl(predicates::INVERSE_PATH)).await? { - let inner = resolve_path_node(db, &obj).await?; - return match inner { - PropertyPath::Predicate(p) => Ok(PropertyPath::Inverse(p)), - _ => Err(unsupported( - node, - "sh:inversePath is only supported over a single predicate", - )), - }; + // sh:inversePath — inverse of any path, rewritten into the AST + // (inverse of a sequence = reversed sequence of inverses, etc.). + if let Some(inner) = operand_path(db, node, &shacl(predicates::INVERSE_PATH)).await? { + return Ok(invert(inner)); } // sh:alternativePath (RDF list or JSON-LD @list of sub-paths) @@ -200,8 +219,7 @@ fn resolve_path_node<'a>(db: GraphDbRef<'a>, node: &'a Sid) -> PathFuture<'a, Pr (predicates::ONE_OR_MORE_PATH, PropertyPath::OneOrMore), (predicates::ZERO_OR_ONE_PATH, PropertyPath::ZeroOrOne), ] { - if let Some(obj) = single_ref(db, node, &shacl(pred)).await? { - let inner = resolve_path_node(db, &obj).await?; + if let Some(inner) = operand_path(db, node, &shacl(pred)).await? { return Ok(wrap(Box::new(inner))); } } @@ -232,7 +250,7 @@ fn resolve_members<'a>( predicate: &'a Sid, ) -> PathFuture<'a, Vec> { Box::pin(async move { - let objects = ordered_objects(db, subject, predicate).await?; + let (objects, all_indexed) = ordered_objects(db, subject, predicate).await?; // Turtle RDF-list form: a single object that is itself a list head. if let [FlakeValue::Ref(head)] = objects.as_slice() { @@ -241,6 +259,15 @@ fn resolve_members<'a>( return resolve_rdf_list(db, head).await; } } + if objects.len() > 1 && !all_indexed { + return Err(unsupported( + subject, + &format!( + "multiple values for {} in a path expression", + predicate.name + ), + )); + } // JSON-LD @list form (or a single direct member). let mut out = Vec::new(); @@ -271,12 +298,12 @@ fn resolve_rdf_list<'a>( if current == rdf_nil { break; } - let Some(first) = single_ref(db, ¤t, &rdf_first).await? else { + let Some(first) = sole_ref(db, ¤t, &rdf_first).await? else { break; }; members.push(resolve_path_node(db, &first).await?); - match single_ref(db, ¤t, &rdf_rest).await? { + match sole_ref(db, ¤t, &rdf_rest).await? { Some(next) => current = next, None => break, } @@ -286,12 +313,14 @@ fn resolve_rdf_list<'a>( } /// All objects of `(subject, predicate)`, ordered by the JSON-LD list index in -/// flake metadata (falling back to scan order when unindexed). +/// flake metadata (falling back to scan order when unindexed). The flag +/// reports whether every flake carried a list index — multiple objects +/// WITHOUT indexes are separate assertions, not a list encoding. async fn ordered_objects( db: GraphDbRef<'_>, subject: &Sid, predicate: &Sid, -) -> Result> { +) -> Result<(Vec, bool)> { let flakes = db .range( IndexType::Spot, @@ -308,7 +337,8 @@ async fn ordered_objects( .map(|(pos, f)| (f.m.as_ref().and_then(|m| m.i), pos, f.o.clone())) .collect(); items.sort_by_key(|(idx, pos, _)| (idx.is_none(), idx.unwrap_or(0), *pos)); - Ok(items.into_iter().map(|(_, _, v)| v).collect()) + let all_indexed = items.iter().all(|(idx, _, _)| idx.is_some()); + Ok((items.into_iter().map(|(_, _, v)| v).collect(), all_indexed)) } /// Evaluate a property path from `focus`, returning the reached value nodes as @@ -449,8 +479,55 @@ fn dedup(mut values: Vec) -> Vec { values } -/// Fetch the single reference object of `(subject, predicate, ?)`, if any. -async fn single_ref(db: GraphDbRef<'_>, subject: &Sid, predicate: &Sid) -> Result> { +/// Resolve a path operator's operand: a single reference resolves as one +/// sub-path; multiple references are a sequence only under the JSON-LD @list +/// encoding (every flake indexed) — un-indexed multiples are a malformed path. +fn operand_path<'a>( + db: GraphDbRef<'a>, + subject: &'a Sid, + predicate: &'a Sid, +) -> PathFuture<'a, Option> { + Box::pin(async move { + let (objects, all_indexed) = ordered_objects(db, subject, predicate).await?; + match objects.len() { + 0 => Ok(None), + 1 => match &objects[0] { + FlakeValue::Ref(obj) => Ok(Some(resolve_path_node(db, obj).await?)), + _ => Err(unsupported( + subject, + &format!("{} operand is a literal", predicate.name), + )), + }, + _ if !all_indexed => Err(unsupported( + subject, + &format!( + "multiple values for {} in a path expression", + predicate.name + ), + )), + _ => { + let mut steps = Vec::new(); + for obj in objects { + match obj { + FlakeValue::Ref(sid) => steps.push(resolve_path_node(db, &sid).await?), + _ => { + return Err(unsupported( + subject, + &format!("{} sequence step is a literal", predicate.name), + )) + } + } + } + Ok(Some(PropertyPath::Sequence(steps))) + } + } + }) +} + +/// Fetch the sole reference object of `(subject, predicate, ?)`, if any. +/// Path operators take exactly one value — multiple distinct references are a +/// malformed path and error rather than compiling nondeterministically. +async fn sole_ref(db: GraphDbRef<'_>, subject: &Sid, predicate: &Sid) -> Result> { let flakes = db .range( IndexType::Spot, @@ -458,10 +535,23 @@ async fn single_ref(db: GraphDbRef<'_>, subject: &Sid, predicate: &Sid) -> Resul RangeMatch::subject_predicate(subject.clone(), predicate.clone()), ) .await?; - Ok(flakes.iter().find_map(|f| match &f.o { + let mut refs = flakes.iter().filter_map(|f| match &f.o { FlakeValue::Ref(sid) => Some(sid.clone()), _ => None, - })) + }); + let first = refs.next(); + if let Some(first) = &first { + if refs.any(|other| &other != first) { + return Err(unsupported( + subject, + &format!( + "multiple values for {} in a path expression", + predicate.name + ), + )); + } + } + Ok(first) } /// Whether `(subject, predicate, ?)` has any object (regardless of type). diff --git a/fluree-vocab/src/lib.rs b/fluree-vocab/src/lib.rs index 20094b1f88..c355f6faf3 100644 --- a/fluree-vocab/src/lib.rs +++ b/fluree-vocab/src/lib.rs @@ -874,6 +874,27 @@ pub mod shacl { /// sh:path IRI pub const PATH: &str = "http://www.w3.org/ns/shacl#path"; + /// sh:inversePath IRI + pub const INVERSE_PATH: &str = "http://www.w3.org/ns/shacl#inversePath"; + + /// sh:alternativePath IRI + pub const ALTERNATIVE_PATH: &str = "http://www.w3.org/ns/shacl#alternativePath"; + + /// sh:zeroOrMorePath IRI + pub const ZERO_OR_MORE_PATH: &str = "http://www.w3.org/ns/shacl#zeroOrMorePath"; + + /// sh:oneOrMorePath IRI + pub const ONE_OR_MORE_PATH: &str = "http://www.w3.org/ns/shacl#oneOrMorePath"; + + /// sh:zeroOrOnePath IRI + pub const ZERO_OR_ONE_PATH: &str = "http://www.w3.org/ns/shacl#zeroOrOnePath"; + + /// sh:node IRI + pub const NODE: &str = "http://www.w3.org/ns/shacl#node"; + + /// sh:deactivated IRI + pub const DEACTIVATED: &str = "http://www.w3.org/ns/shacl#deactivated"; + // ======================================================================== // Cardinality Constraints // ======================================================================== From 2aa17551c83f1ad24ce7669e06ff2083d34d7234 Mon Sep 17 00:00:00 2001 From: bplatz Date: Thu, 2 Jul 2026 11:08:05 -0400 Subject: [PATCH 22/23] fix(shacl): exclude own qualified shape by value from disjoint siblings Per the spec's sibling definition the set excludes the constraint's own sh:qualifiedValueShape by value, so two property shapes referencing the same qualified shape don't disqualify each other's values. --- fluree-db-shacl/src/compile.rs | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/fluree-db-shacl/src/compile.rs b/fluree-db-shacl/src/compile.rs index bcf69e6a9e..35f9164941 100644 --- a/fluree-db-shacl/src/compile.rs +++ b/fluree-db-shacl/src/compile.rs @@ -925,14 +925,19 @@ impl ShapeCompiler { for (i, ps) in prop_shapes.iter_mut().enumerate() { for constraint in &mut ps.constraints { if let Constraint::QualifiedValueShape { + shape, disjoint: true, sibling_shapes, .. } = constraint { + // Per spec the sibling set excludes the + // constraint's own qualified shape by value, not + // just by position. + let own_id = shape.id.clone(); *sibling_shapes = all_qualified .iter() - .filter(|(j, _)| *j != i) + .filter(|(j, s)| *j != i && s.id != own_id) .map(|(_, s)| Arc::clone(s)) .collect(); } From 2063e087a9b54aa059a4d27711d2f089209512f1 Mon Sep 17 00:00:00 2001 From: bplatz Date: Sat, 4 Jul 2026 20:31:16 -0400 Subject: [PATCH 23/23] perf(shacl): skip empty path/class probes in per-transaction compile Three review-driven optimizations to compile_from_dbs, which recompiles shapes on every transaction against a shape-bearing ledger: - resolve_path_node: short-circuit IRI-valued sh:path to a plain predicate before the six operator probes. Per the SHACL spec only blank nodes carry path-expression structure, so N property shapes no longer add ~6N empty SPOT scans per compile. Restores the base branch's zero-scan behavior. - dedup: key value nodes on a structural (FlakeValue, Sid, Option) tuple instead of a per-value format! heap allocation, dropping the O(n) Debug-format pass on closure/alternative output. - implicit class targets: hoist the two Opst rdf:type->Class scans out of the per-graph loop and skip them entirely when no shapes were found. Cross-graph resolution is preserved (runs after all graphs processed). --- fluree-db-shacl/src/compile.rs | 49 +++++++++++++++++++--------------- fluree-db-shacl/src/path.rs | 12 +++++++-- 2 files changed, 37 insertions(+), 24 deletions(-) diff --git a/fluree-db-shacl/src/compile.rs b/fluree-db-shacl/src/compile.rs index 35f9164941..ad1fc0b224 100644 --- a/fluree-db-shacl/src/compile.rs +++ b/fluree-db-shacl/src/compile.rs @@ -281,28 +281,6 @@ impl ShapeCompiler { } } - // Collect subjects typed as a class — a shape that is also a class - // implicitly targets its own instances (SHACL "implicit class - // targets"). Bound-object scans, so cost scales with the number of - // declared classes, not the data. - let rdf_type = Sid::new(RDF, rdf_names::TYPE); - for class_class in [ - Sid::new(fluree_vocab::namespaces::RDFS, "Class"), - Sid::new(fluree_vocab::namespaces::OWL, "Class"), - ] { - let flakes = db - .range( - IndexType::Opst, - RangeTest::Eq, - RangeMatch::predicate_object( - rdf_type.clone(), - FlakeValue::Ref(class_class), - ), - ) - .await?; - class_typed.extend(flakes.iter().map(|f| f.s.clone())); - } - // Expand rdf:first/rdf:rest lists referenced by sh:in / sh:and / // sh:or / sh:xone / sh:ignoredProperties. Run after each graph so // that lists whose head lives in this graph can resolve — a list @@ -316,6 +294,33 @@ impl ShapeCompiler { compiler.resolve_paths(*db).await?; } + // Implicit class targets: a subject targets its own instances only when + // it is *both* a declared shape and typed as a class. When no shapes were + // found there is nothing to match, so skip the class-discovery scans + // entirely. Runs after all graphs are processed so a class declaration in + // one graph resolves against a shape defined in another. + if !compiler.shapes.is_empty() { + let rdf_type = Sid::new(RDF, rdf_names::TYPE); + for db in dbs { + for class_class in [ + Sid::new(fluree_vocab::namespaces::RDFS, "Class"), + Sid::new(fluree_vocab::namespaces::OWL, "Class"), + ] { + let flakes = db + .range( + IndexType::Opst, + RangeTest::Eq, + RangeMatch::predicate_object( + rdf_type.clone(), + FlakeValue::Ref(class_class), + ), + ) + .await?; + class_typed.extend(flakes.iter().map(|f| f.s.clone())); + } + } + } + compiler.apply_implicit_class_targets(&class_typed); compiler.finalize() } diff --git a/fluree-db-shacl/src/path.rs b/fluree-db-shacl/src/path.rs index 6b1aa3460a..73a9ad23ba 100644 --- a/fluree-db-shacl/src/path.rs +++ b/fluree-db-shacl/src/path.rs @@ -195,6 +195,14 @@ pub fn resolve_sh_path<'a>( /// blank node) into a [`PropertyPath`]. fn resolve_path_node<'a>(db: GraphDbRef<'a>, node: &'a Sid) -> PathFuture<'a, PropertyPath> { Box::pin(async move { + // An IRI-valued sh:path is always a plain predicate (SHACL spec); only + // blank nodes carry path-expression structure. Short-circuit the common + // case to avoid six empty operator probes per property shape — resolve + // runs per shape per compile, and compiles happen per transaction. + if node.namespace_code != BLANK_NODE { + return Ok(PropertyPath::Predicate(node.clone())); + } + // sh:inversePath — inverse of any path, rewritten into the AST // (inverse of a sequence = reversed sequence of inverses, etc.). if let Some(inner) = operand_path(db, node, &shacl(predicates::INVERSE_PATH)).await? { @@ -474,8 +482,8 @@ async fn closure(db: GraphDbRef<'_>, focus: &Sid, inner: &PropertyPath) -> Resul /// Deduplicate value nodes (SHACL value nodes are a set). fn dedup(mut values: Vec) -> Vec { - let mut seen: HashSet = HashSet::new(); - values.retain(|(v, dt, lang)| seen.insert(format!("{v:?}|{dt:?}|{lang:?}"))); + let mut seen: HashSet<(FlakeValue, Sid, Option)> = HashSet::new(); + values.retain(|(v, dt, lang)| seen.insert((v.clone(), dt.clone(), lang.clone()))); values }