Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions .fluree-memory/repo.ttl
Original file line number Diff line number Diff line change
Expand Up @@ -1620,6 +1620,22 @@ mem:fact-01ktyzzbs4ewxqpgvssvj72tt5 a mem:Fact ;
mem:createdAt "2026-06-12T22:41:50.884413+00:00"^^xsd:dateTime ;
mem:rationale "Continuation point for the decimal fix series; the remaining priority-4 cluster (canonical identity) is the prerequisite for issues #1324/#1325." .

mem:fact-01kwpj81fpnp0rk5s952wf3shd a mem:Fact ;
mem:content "FIXED (branch fix/filtered-delete-staging-hang): range_with_overlay point lookups paid a full overlay-translation tax per call (unbounded novelty walk + dict-translate + sort in binary_range_eq_v3) → per-flake lookup loops were O(calls × novelty log novelty); livelocked filtered-DELETE staging >900s. Two-layer fix: (1) staging list-meta hydration grouped per (g_id, s, p), FIRST-match dt-compatible meta per retraction — NOT distinct-per-duplicate: duplicate-value @list entries must lose exactly one entry per distinct WHERE binding, pinned by it_join_batched_overlay object-probe-list-retract; (2) cross-call LRU in binary_range.rs keyed (store_id, index_t, OverlayProvider::content_version, to_t, g, index) + overlay_window_for_range narrowing. content_version = globally-unique stamp refreshed on every Novelty mutation (per-instance epochs collide across divergent clones); wrapper overlays return None → uncached." ;
mem:tag "delete" ;
mem:tag "novelty" ;
mem:tag "overlay" ;
mem:tag "performance" ;
mem:tag "range-provider" ;
mem:tag "staging" ;
mem:tag "transact" ;
mem:scope mem:repo ;
mem:artifactRef "fluree-db-query/src/binary_range.rs" ;
mem:artifactRef "fluree-db-transact/src/stage.rs" ;
mem:branch "fix/filtered-delete-staging-hang" ;
mem:createdAt "2026-07-04T12:39:23.382379+00:00"^^xsd:dateTime ;
mem:rationale "range_with_overlay looks like a cheap point lookup at call sites but is O(novelty log novelty) per call; future per-flake lookup loops will reintroduce this livelock class." .

mem:decision-01kwphem9r3gvcvj2j5842ddt6 a mem:Decision ;
mem:content "Fulltext positioning: f:fullTextDefaults (#config graph) is the RECOMMENDED path — values keep standard xsd:string/rdf:langString datatypes (external RDF consumers see ordinary literals) and language-tagged values get per-language analyzers. The @fulltext datatype REPLACES the stored datatype with Fluree-specific f:fullText, so docs/guidance must not push it as the default; it's a quick-start for siloed databases or properties orthogonal to the core data model. Docs repositioned 2026-07 (fulltext.md, indexing-and-search README, cookbook-search, concepts/datatypes)." ;
mem:tag "datatypes" ;
Expand Down
254 changes: 254 additions & 0 deletions fluree-db-api/tests/it_transact_list_retract.rs
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,260 @@ async fn wildcard_delete_retracts_all_distinct_list_entries() {
);
}

/// Filtered two-pattern DELETE over subjects carrying `@list` properties,
/// on a novelty-heavy ledger (delete-everything then re-insert, no index
/// rebuild in between).
///
/// Mirrors the field-reported staging livelock: `{?s tag <doc>} {?s ?p ?o}
/// DELETE {?s ?p ?o}` matching subjects with large `@list` vectors, where
/// all matched data lives in novelty. List-index hydration is grouped by
/// (graph, subject, predicate) — one range lookup per group — so this pins
/// that the grouped path still fills `m.i` on every retraction: every list
/// entry and scalar of the tagged subjects must be retracted, and untagged
/// subjects must be untouched.
#[tokio::test]
async fn filtered_delete_retracts_tagged_subjects_with_lists_in_novelty() {
let fluree = FlureeBuilder::memory().build_memory();
let ledger0 = fluree
.create_ledger("tx/list-retract-filtered:main")
.await
.expect("create");

let list_ctx = json!({
"ex": "http://example.org/",
"ex:vector": { "@container": "@list" }
});
let make_docs = |tag: &str| {
let subjects: Vec<JsonValue> = (0..4)
.map(|i| {
let vector: Vec<f64> = (0..32).map(|k| (i * 100 + k) as f64 * 0.5).collect();
json!({
"@id": format!("ex:chunk-{tag}-{i}"),
"ex:sourceDocument": { "@id": format!("ex:doc-{tag}") },
"ex:label": format!("chunk {i} of {tag}"),
"ex:vector": vector
})
})
.collect();
json!({ "@context": list_ctx, "@graph": subjects })
};

// Build the novelty-heavy state: insert both docs' chunks, delete
// everything, then re-insert — all without an index rebuild, so every
// matched flake lives in the novelty overlay (assert + retract + assert).
let receipt = fluree
.insert(ledger0, &make_docs("a"))
.await
.expect("insert a");
let receipt = fluree
.insert(receipt.ledger, &make_docs("b"))
.await
.expect("insert b");
let receipt = fluree
.update(
receipt.ledger,
&json!({
"where": { "@id": "?s", "?p": "?o" },
"delete": { "@id": "?s", "?p": "?o" }
}),
)
.await
.expect("delete everything");
let receipt = fluree
.insert(receipt.ledger, &make_docs("a"))
.await
.expect("re-insert a");
let receipt = fluree
.insert(receipt.ledger, &make_docs("b"))
.await
.expect("re-insert b");

let count_all = |ledger: fluree_db_api::LedgerState, tag: &'static str| {
let fluree = &fluree;
async move {
let sparql = format!(
"PREFIX ex: <http://example.org/> \
SELECT (COUNT(*) AS ?c) WHERE {{ \
?s ex:sourceDocument ex:doc-{tag} . ?s ?p ?o }}"
);
let result = support::query_sparql(fluree, &ledger, &sparql)
.await
.expect("sparql count");
let jsonld = result
.to_jsonld_async(ledger.as_graph_db_ref(0))
.await
.expect("to_jsonld_async");
let arr = jsonld.as_array().expect("array result");
arr.first()
.and_then(JsonValue::as_array)
.and_then(|row| row.first())
.and_then(JsonValue::as_u64)
.unwrap_or(0)
}
};

// 4 subjects × (1 sourceDocument + 1 label + 32 list entries) per doc.
let per_doc_triples = 4 * (1 + 1 + 32);
assert_eq!(
count_all(receipt.ledger.clone(), "a").await,
per_doc_triples,
"precondition: doc-a chunks fully re-inserted into novelty"
);

// The reported shape: tag pattern + wildcard pattern, wildcard delete.
let out = fluree
.update(
receipt.ledger,
&json!({
"@context": { "ex": "http://example.org/" },
"where": [
{ "@id": "?s", "ex:sourceDocument": { "@id": "ex:doc-a" } },
{ "@id": "?s", "?p": "?o" }
],
"delete": { "@id": "?s", "?p": "?o" }
}),
)
.await
.expect("filtered delete");

assert_eq!(
count_all(out.ledger.clone(), "a").await,
0,
"every triple of the tagged subjects must be retracted, including \
all @list entries — survivors mean grouped hydration failed to \
populate `m.i` on some retraction"
);
assert_eq!(
count_all(out.ledger, "b").await,
per_doc_triples,
"untagged doc-b subjects must be untouched by the filtered delete"
);
}

/// Indexed variant of the filtered-delete case: a binary index is published
/// mid-history, so staging's list-meta hydration lookups route through the
/// V3 range provider (`binary_range_eq_v3`) and its cross-call overlay
/// translation cache, with the delete-everything + re-insert novelty stacked
/// on top of the persisted base. Pins that cached overlay translations are
/// (a) correct on repeated same-state lookups and (b) invalidated across the
/// intervening commits — a stale entry would surface pre-delete flakes or
/// miss re-inserted ones, breaking the counts below.
#[tokio::test]
async fn filtered_delete_with_lists_on_indexed_base_plus_novelty() {
let fluree = FlureeBuilder::memory().build_memory();
let ledger_id = "tx/list-retract-indexed:main";
let ledger0 = fluree.create_ledger(ledger_id).await.expect("create");

let list_ctx = json!({
"ex": "http://example.org/",
"ex:vector": { "@container": "@list" }
});
let make_docs = |tag: &str| {
let subjects: Vec<JsonValue> = (0..4)
.map(|i| {
let vector: Vec<f64> = (0..32).map(|k| (i * 100 + k) as f64 * 0.5).collect();
json!({
"@id": format!("ex:chunk-{tag}-{i}"),
"ex:sourceDocument": { "@id": format!("ex:doc-{tag}") },
"ex:label": format!("chunk {i} of {tag}"),
"ex:vector": vector
})
})
.collect();
json!({ "@context": list_ctx, "@graph": subjects })
};

// Base state: both docs inserted, then persisted into a binary index.
let receipt = fluree
.insert(ledger0, &make_docs("a"))
.await
.expect("insert a");
fluree
.insert(receipt.ledger, &make_docs("b"))
.await
.expect("insert b");
support::rebuild_and_publish_index(&fluree, ledger_id).await;
let indexed = fluree.ledger(ledger_id).await.expect("reload indexed");

// Novelty on top of the index: delete everything, re-insert both docs.
let receipt = fluree
.update(
indexed,
&json!({
"where": { "@id": "?s", "?p": "?o" },
"delete": { "@id": "?s", "?p": "?o" }
}),
)
.await
.expect("delete everything");
let receipt = fluree
.insert(receipt.ledger, &make_docs("a"))
.await
.expect("re-insert a");
let receipt = fluree
.insert(receipt.ledger, &make_docs("b"))
.await
.expect("re-insert b");

let count_all = |ledger: fluree_db_api::LedgerState, tag: &'static str| {
let fluree = &fluree;
async move {
let sparql = format!(
"PREFIX ex: <http://example.org/> \
SELECT (COUNT(*) AS ?c) WHERE {{ \
?s ex:sourceDocument ex:doc-{tag} . ?s ?p ?o }}"
);
let result = support::query_sparql(fluree, &ledger, &sparql)
.await
.expect("sparql count");
let jsonld = result
.to_jsonld_async(ledger.as_graph_db_ref(0))
.await
.expect("to_jsonld_async");
let arr = jsonld.as_array().expect("array result");
arr.first()
.and_then(JsonValue::as_array)
.and_then(|row| row.first())
.and_then(JsonValue::as_u64)
.unwrap_or(0)
}
};

let per_doc_triples = 4 * (1 + 1 + 32);
assert_eq!(
count_all(receipt.ledger.clone(), "a").await,
per_doc_triples,
"precondition: doc-a re-inserted into novelty over the indexed base"
);

let out = fluree
.update(
receipt.ledger,
&json!({
"@context": { "ex": "http://example.org/" },
"where": [
{ "@id": "?s", "ex:sourceDocument": { "@id": "ex:doc-a" } },
{ "@id": "?s", "?p": "?o" }
],
"delete": { "@id": "?s", "?p": "?o" }
}),
)
.await
.expect("filtered delete");

assert_eq!(
count_all(out.ledger.clone(), "a").await,
0,
"every triple of the tagged subjects must be retracted through the \
indexed range-provider path, including all @list entries"
);
assert_eq!(
count_all(out.ledger, "b").await,
per_doc_triples,
"untagged doc-b subjects must be untouched"
);
}

/// Companion to the three-entry case: retracting a single-entry `@list`
/// where the asserted flake has `m.i = 0`. Pins the hydration behavior
/// for the simplest case.
Expand Down
14 changes: 14 additions & 0 deletions fluree-db-core/src/overlay.rs
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,20 @@ pub trait OverlayProvider: Send + Sync {
false
}

/// Globally-unique version stamp of this overlay's current content, for
/// keying caches of data derived from a full overlay walk (e.g. V3
/// overlay-op translations shared across `range_with_overlay` calls).
///
/// Unlike [`Self::epoch`] — which is only unique within one overlay
/// instance's lineage — implementations must guarantee that **no two
/// overlays whose `for_each_overlay_flake` output differs ever report
/// the same version**, across instances, clones, and overlay types.
/// Return `None` (the default) when no such guarantee exists; callers
/// must then skip caching and derive from a fresh walk.
fn content_version(&self) -> Option<u64> {
None
}

/// Push overlay flakes for a leaf's range to the callback
///
/// # Arguments
Expand Down
9 changes: 9 additions & 0 deletions fluree-db-core/src/range.rs
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,15 @@ impl<O: OverlayProvider + ?Sized> OverlayProvider for SizedOverlayRef<'_, O> {
fn epoch(&self) -> u64 {
self.0.epoch()
}

fn is_effectively_empty(&self) -> bool {
self.0.is_effectively_empty()
}

fn content_version(&self) -> Option<u64> {
self.0.content_version()
}

fn for_each_overlay_flake(
&self,
g_id: GraphId,
Expand Down
Loading