diff --git a/docs/spo_3d/CONTRACTS.md b/docs/spo_3d/CONTRACTS.md new file mode 100644 index 0000000..7f2327b --- /dev/null +++ b/docs/spo_3d/CONTRACTS.md @@ -0,0 +1,666 @@ +# SPO 3D Contracts + +**Every type, trait, and invariant that must hold.** + +--- + +## 1. SPARSE CONTAINER CONTRACT + +```rust +/// A sparse encoding of a Container where only non-zero words are stored. +/// +/// INVARIANTS (checked at construction, enforced by type system): +/// - bitmap.count_ones() == words.len() +/// - bitmap bit N set ↔ words[popcount(bitmap & ((1 << N) - 1))] is the value +/// - to_dense().hamming(original_dense) == 0 (lossless round-trip) +/// - hamming_sparse(a, b) == a.to_dense().hamming(&b.to_dense()) (equivalence) +/// +/// SIZE: +/// - bitmap: 2 × u64 = 16 bytes (128 bits, one per Container word) +/// - words: density × 128 × 8 bytes +/// - At 30% density: 16 + 38×8 = 320 bytes per axis +pub struct SparseContainer { + /// Which of the 128 words are non-zero. Bit i set → word i is stored. + pub bitmap: [u64; 2], + /// Only the non-zero words, in order of bit position. + pub words: Vec, +} + +impl SparseContainer { + /// Contract: bitmap and words must be consistent. + pub fn new(bitmap: [u64; 2], words: Vec) -> Result { + let expected = bitmap[0].count_ones() + bitmap[1].count_ones(); + if words.len() != expected as usize { + return Err(SpoError::BitmapWordMismatch { + bitmap_ones: expected, + word_count: words.len(), + }); + } + Ok(Self { bitmap, words }) + } + + /// Lossless conversion to dense Container. + pub fn to_dense(&self) -> Container; + + /// Lossless conversion from dense Container. + pub fn from_dense(container: &Container) -> Self; + + /// Hamming distance WITHOUT densification. O(min(popcount_a, popcount_b)). + pub fn hamming_sparse(a: &SparseContainer, b: &SparseContainer) -> u32; + + /// XOR bind in sparse domain. + pub fn bind_sparse(a: &SparseContainer, b: &SparseContainer) -> SparseContainer; + + /// Density: fraction of non-zero words (0.0 to 1.0). + pub fn density(&self) -> f32; + + /// Number of stored words. + pub fn word_count(&self) -> usize; +} +``` + +### SparseContainer Invariant Tests + +```rust +#[test] fn sparse_roundtrip_lossless() { + let dense = Container::random(42); + let sparse = SparseContainer::from_dense(&dense); + assert_eq!(sparse.to_dense(), dense); +} + +#[test] fn sparse_hamming_equivalence() { + let a = Container::random(1); + let b = Container::random(2); + let sa = SparseContainer::from_dense(&a); + let sb = SparseContainer::from_dense(&b); + assert_eq!( + SparseContainer::hamming_sparse(&sa, &sb), + a.hamming(&b) + ); +} + +#[test] fn sparse_bitmap_consistency() { + let sparse = SparseContainer::from_dense(&Container::random(99)); + let ones = sparse.bitmap[0].count_ones() + sparse.bitmap[1].count_ones(); + assert_eq!(ones as usize, sparse.words.len()); +} +``` + +--- + +## 2. NIBBLE SCENT CONTRACT + +```rust +/// 48-byte nibble histogram scent: 16 bins × 3 axes. +/// +/// Each axis gets a histogram of nibble (4-bit) frequencies across its +/// sparse container words. This captures the TYPE of content without +/// destroying structure (unlike XOR-fold). +/// +/// INVARIANTS: +/// - scent.x_hist: sum of all 16 bins = total nibbles in X axis words +/// - scent.y_hist: sum of all 16 bins = total nibbles in Y axis words +/// - scent.z_hist: sum of all 16 bins = total nibbles in Z axis words +/// - scent_distance(a, b) correlates with content_type_similarity(a, b) +/// - Different content types (Person, Concept, Edge) have distinct scent profiles +/// +/// SIZE: exactly 48 bytes = 6 × u64 = words 12-17 in meta container +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +#[repr(C)] +pub struct NibbleScent { + /// Nibble histogram for X (Subject) axis. 16 bins, each u8 (saturating). + pub x_hist: [u8; 16], + /// Nibble histogram for Y (Predicate) axis. + pub y_hist: [u8; 16], + /// Nibble histogram for Z (Object) axis. + pub z_hist: [u8; 16], +} + +impl NibbleScent { + /// Size in bytes (compile-time constant). + pub const SIZE: usize = 48; + + /// Compute scent from three sparse axes. + pub fn from_axes(x: &SparseContainer, y: &SparseContainer, z: &SparseContainer) -> Self; + + /// L1 distance between two scents (sum of absolute bin differences). + pub fn distance(&self, other: &NibbleScent) -> u32; + + /// Per-axis distances for selective filtering. + pub fn axis_distances(&self, other: &NibbleScent) -> (u32, u32, u32); + + /// Pack into 6 u64 words (for meta container W12-W17). + pub fn to_words(&self) -> [u64; 6]; + + /// Unpack from 6 u64 words. + pub fn from_words(words: &[u64; 6]) -> Self; + + /// Zero scent (empty record). + pub fn zero() -> Self; +} +``` + +### NibbleScent Invariant Tests + +```rust +#[test] fn scent_size_is_48() { + assert_eq!(std::mem::size_of::(), 48); +} + +#[test] fn scent_word_roundtrip() { + let scent = NibbleScent::from_axes(&x, &y, &z); + let words = scent.to_words(); + assert_eq!(NibbleScent::from_words(&words), scent); +} + +#[test] fn scent_different_types_distinct() { + let person = build_node(dn_hash("jan"), &[LBL_PERSON], &[(KEY_NAME, "Jan")]); + let concept = build_node(dn_hash("rust"), &[LBL_CONCEPT], &[(KEY_NAME, "Rust")]); + let ps = NibbleScent::from_record(&person); + let cs = NibbleScent::from_record(&concept); + assert!(ps.distance(&cs) > 10, "Different types must have distinct scents"); +} +``` + +--- + +## 3. PACKED AXES CONTRACT + +```rust +/// Three sparse axes packed into one content Container (128 × u64). +/// +/// Layout within the content container: +/// ```text +/// words[0..2]: X bitmap (128 bits) +/// words[2..2+Nx]: X non-zero words +/// words[2+Nx..4+Nx]: Y bitmap +/// words[4+Nx..4+Nx+Ny]: Y non-zero words +/// words[4+Nx+Ny..6+Nx+Ny]: Z bitmap +/// words[6+Nx+Ny..6+Nx+Ny+Nz]: Z non-zero words +/// ``` +/// +/// INVARIANTS: +/// - 6 + Nx + Ny + Nz <= 128 (must fit in one Container) +/// - If overflow: fall back to ContainerGeometry::Xyz (3 linked CogRecords) +/// - pack(unpack(container)) == container (lossless) +/// +/// Meta words 34-39 store axis descriptors: +/// - W34: x_offset(u16) | x_count(u16) | y_offset(u16) | y_count(u16) +/// - W35: z_offset(u16) | z_count(u16) | total_words(u16) | reserved(u16) +pub struct PackedAxes; + +impl PackedAxes { + /// Maximum total non-zero words across all 3 axes. + /// 128 total - 6 bitmap words = 122 available. + pub const MAX_CONTENT_WORDS: usize = 122; + + /// Pack three sparse axes into one Container. + /// Returns Err if total density exceeds capacity. + pub fn pack( + x: &SparseContainer, + y: &SparseContainer, + z: &SparseContainer, + ) -> Result; + + /// Unpack content Container into three sparse axes. + /// Reads axis descriptors from meta words 34-35. + pub fn unpack( + content: &Container, + meta: &Container, + ) -> Result<(SparseContainer, SparseContainer, SparseContainer), SpoError>; + + /// Check if axes fit in one Container. + pub fn fits(x: &SparseContainer, y: &SparseContainer, z: &SparseContainer) -> bool { + x.word_count() + y.word_count() + z.word_count() + 6 <= 128 + } + + /// Write axis descriptors to meta words 34-35. + pub fn write_descriptors( + meta: &mut [u64; 128], + x_count: u16, y_count: u16, z_count: u16, + ); +} +``` + +### Capacity Budget + +| Density | Words per axis | Total (3 axes + 6 bitmap) | Fits? | +|---------|---------------|---------------------------|-------| +| 10% | 13 | 6 + 39 = 45 | ✓ | +| 20% | 26 | 6 + 78 = 84 | ✓ | +| 30% | 38 | 6 + 114 = 120 | ✓ | +| 35% | 45 | 6 + 135 = 141 | ✗ → use Xyz fallback | +| 40% | 51 | 6 + 153 = 159 | ✗ | + +**At typical 30% density per axis: 120/128 words used = 93.75% utilization.** + +--- + +## 4. SPO RECORD VIEW CONTRACT + +```rust +/// Zero-copy SPO view into a CogRecord with Spo geometry. +/// +/// Provides typed access to the three axes without copying or allocating. +/// SAFETY: only valid when record.geometry() == ContainerGeometry::Spo. +pub struct SpoView<'a> { + meta: MetaView<'a>, + x: SparseContainer, // unpacked on construction + y: SparseContainer, + z: SparseContainer, + scent: NibbleScent, +} + +impl<'a> SpoView<'a> { + /// Construct from a CogRecord. Panics if geometry != Spo. + pub fn new(record: &'a CogRecord) -> Self; + + /// Try to construct (returns None if geometry != Spo). + pub fn try_new(record: &'a CogRecord) -> Option; + + // --- Axis access --- + pub fn x(&self) -> &SparseContainer; + pub fn y(&self) -> &SparseContainer; + pub fn z(&self) -> &SparseContainer; + pub fn scent(&self) -> &NibbleScent; + + // --- Dense axis access (for operations that need full Container) --- + pub fn x_dense(&self) -> Container; + pub fn y_dense(&self) -> Container; + pub fn z_dense(&self) -> Container; + + // --- NARS truth --- + pub fn nars(&self) -> TruthValue; + + // --- DN identity --- + pub fn dn(&self) -> u64; + + // --- Causal queries --- + + /// Hamming distance of this record's Z to another's X. + /// This IS the causal coherence score. + pub fn causal_coherence_to(&self, other: &SpoView) -> u32; + + /// Hamming distance of this record's X to another's Z. + /// Reverse causal query. + pub fn causal_coherence_from(&self, other: &SpoView) -> u32; +} + +/// Mutable SPO view for record construction. +pub struct SpoViewMut<'a> { + record: &'a mut CogRecord, +} + +impl<'a> SpoViewMut<'a> { + /// Set the three axes. Packs sparse into content Container. + pub fn set_axes( + &mut self, + x: &SparseContainer, + y: &SparseContainer, + z: &SparseContainer, + ) -> Result<(), SpoError>; + + /// Set NARS truth value. + pub fn set_nars(&mut self, tv: TruthValue); + + /// Set scent (computed from axes). + pub fn set_scent(&mut self, scent: NibbleScent); + + /// Set DN address. + pub fn set_dn(&mut self, dn: u64); + + /// Set DN tree links. + pub fn set_dn_tree(&mut self, parent: u64, child: u64, next: u64, prev: u64); + + /// Convenience: build and commit a complete SPO record. + pub fn build_complete( + &mut self, + dn: u64, + x: &SparseContainer, + y: &SparseContainer, + z: &SparseContainer, + nars: TruthValue, + ) -> Result<(), SpoError>; +} +``` + +--- + +## 5. SPO STORE CONTRACT + +```rust +/// Three-axis content-addressable graph store. +/// +/// INVARIANTS: +/// - Every record has geometry == Spo +/// - query_forward + query_reverse + query_relation are exhaustive +/// (no record is invisible to any query direction) +/// - causal_successors(A) and causal_predecessors(B) are inverses: +/// A in causal_predecessors(B) ⟺ B in causal_successors(A) +pub struct SpoStore { + records: BTreeMap, // POC: DN → record. Production: LanceDB. +} + +impl SpoStore { + pub fn new() -> Self; + pub fn insert(&mut self, record: CogRecord) -> Result<(), SpoError>; + pub fn get(&self, dn: u64) -> Option<&CogRecord>; + pub fn len(&self) -> usize; + + // --- Three-axis queries --- + + /// Forward: "What does ?" → scan X+Y, return Z matches. + pub fn query_forward( + &self, + src_fp: &Container, + verb_fp: &Container, + radius: u32, + ) -> Vec; + + /// Reverse: "Who s ?" → scan Z+Y, return X matches. + pub fn query_reverse( + &self, + tgt_fp: &Container, + verb_fp: &Container, + radius: u32, + ) -> Vec; + + /// Relation: "How are and related?" → scan X+Z, return Y matches. + pub fn query_relation( + &self, + src_fp: &Container, + tgt_fp: &Container, + radius: u32, + ) -> Vec; + + /// Content: "Find anything matching this fingerprint" → scan all axes. + pub fn query_content( + &self, + query: &Container, + radius: u32, + ) -> Vec; + + // --- Causal chain --- + + /// Records whose X resonates with `record`'s Z. + pub fn causal_successors( + &self, + record: &CogRecord, + radius: u32, + ) -> Vec; + + /// Records whose Z resonates with `record`'s X. + pub fn causal_predecessors( + &self, + record: &CogRecord, + radius: u32, + ) -> Vec; + + /// Walk a causal chain forward from `start`, max `depth` hops. + pub fn walk_chain_forward( + &self, + start: &CogRecord, + radius: u32, + depth: usize, + ) -> Vec>; + + /// Compute chain coherence: product of link coherences. + pub fn chain_coherence(&self, chain: &[u64]) -> f32; + + // --- Scent pre-filter --- + + /// Filter records by scent distance before Hamming scan. + pub fn scent_prefilter( + &self, + query_scent: &NibbleScent, + max_distance: u32, + ) -> Vec; +} + +#[derive(Clone, Debug)] +pub struct QueryHit { + pub dn: u64, + pub distance: u32, + pub axis: QueryAxis, +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum QueryAxis { + X, // matched on Subject + Y, // matched on Predicate + Z, // matched on Object + XY, // matched on Subject + Predicate (forward query) + YZ, // matched on Predicate + Object (reverse query) + XZ, // matched on Subject + Object (relation query) +} +``` + +--- + +## 6. BUILDER CONTRACT + +```rust +/// Node/edge construction for SPO records. +/// +/// INVARIANTS: +/// - build_node produces a record where Y axis is near-zero (nodes have no verb) +/// - build_edge produces a record where all three axes are populated +/// - scent is automatically computed from axes +/// - NARS truth is stamped in meta W4-W7 +pub struct SpoBuilder; + +impl SpoBuilder { + /// Build a node record (entity with labels + properties). + /// + /// X axis = BUNDLE(label_fps, property_fps) — entity identity + /// Y axis = near-zero (no verb for nodes) + /// Z axis = mirror of X (self-referential for CAM lookup) + pub fn build_node( + dn: u64, + label_fps: &[&Container], + property_fps: &[(&Container, &Container)], // (key_fp, val_fp) + nars: TruthValue, + ) -> Result; + + /// Build an edge record (relationship between two entities). + /// + /// X axis = BIND(src_fp, PERMUTE(verb_fp, 1)) — subject+verb + /// Y axis = verb_fp — pure verb + /// Z axis = BIND(tgt_fp, PERMUTE(verb_fp, 2)) — object+verb + pub fn build_edge( + dn: u64, + src_fp: &Container, + verb_fp: &Container, + tgt_fp: &Container, + nars: TruthValue, + ) -> Result; + + /// Build a meta-awareness record from a causal chain. + /// + /// X axis = BUNDLE(chain_x_axes) — entities involved + /// Y axis = CHAIN_DISCOVERED_fp — "I noticed a chain" + /// Z axis = BUNDLE(chain_z_axes) — what the chain implies + pub fn build_meta_awareness( + dn: u64, + chain: &[&CogRecord], + chain_coherence: f32, + ) -> Result; +} +``` + +--- + +## 7. ERROR CONTRACT + +```rust +#[derive(Clone, Debug, thiserror::Error)] +pub enum SpoError { + #[error("Bitmap has {bitmap_ones} ones but {word_count} words supplied")] + BitmapWordMismatch { bitmap_ones: u32, word_count: usize }, + + #[error("Axes too dense: {total} words needed, max {max}")] + AxesOverflow { total: usize, max: usize }, + + #[error("Record geometry is {actual:?}, expected Spo")] + WrongGeometry { actual: ContainerGeometry }, + + #[error("DN {dn:#x} already exists in store")] + DuplicateDn { dn: u64 }, + + #[error("DN {dn:#x} not found in store")] + DnNotFound { dn: u64 }, + + #[error("Chain is empty")] + EmptyChain, + + #[error("Scent computation failed: {reason}")] + ScentError { reason: String }, +} +``` + +--- + +## 8. GEOMETRY EXTENSION CONTRACT + +```rust +// In crates/ladybug-contract/src/geometry.rs: +#[repr(u8)] +pub enum ContainerGeometry { + Cam = 0, + Xyz = 1, + Bridge = 2, + Extended = 3, + Chunked = 4, + Tree = 5, + Spo = 6, // ← NEW: Subject-Predicate-Object with sparse axes +} +``` + +### Migration Path + +- Existing `Cam` records: unaffected. Still 1 content container, flat scan. +- Existing `Xyz` records: unaffected. Still 3 linked records via DN tree. +- New `Spo` records: 1 content container with packed sparse axes. +- `Spo` CAN fall back to `Xyz` if axes are too dense (>35% each). + +--- + +## 9. LANCE COLUMNAR SCHEMA + +``` +Column 0: dn UInt64 PRIMARY KEY (sort key prefix) +Column 1: meta FixedBinary(1024) +Column 2: x_bitmap FixedBinary(16) +Column 3: x_words Binary variable length +Column 4: y_bitmap FixedBinary(16) +Column 5: y_words Binary +Column 6: z_bitmap FixedBinary(16) +Column 7: z_words Binary +Column 8: scent FixedBinary(48) 3×16 nibble histogram +Column 9: created Int64 +Column 10: nars_freq Float32 +Column 11: nars_conf Float32 + +Sort key: (dn >> 48, scent[0..4], dn) + → DN locality first, then scent similarity, then exact DN + → Adjacent rows share subtree + content type + → XOR delta compression ~79% zeros within sorted groups +``` + +### Query Column Selection + +| Query type | Columns read | +|-----------|-------------| +| Forward | 0, 2, 3, 4, 5, 8 (dn, x, y, scent) | +| Reverse | 0, 4, 5, 6, 7, 8 (dn, y, z, scent) | +| Relation | 0, 2, 3, 6, 7, 8 (dn, x, z, scent) | +| Content | 0, 2-7, 8 (all axes + scent) | +| Causal | 0, 2, 3, 6, 7, 8 (x of successors, z of predecessors) | +| Metadata | 0, 1 (dn, meta only) | + +--- + +## 10. NARS INTEGRATION + +NARS truth values live in meta W4-W7 (unchanged from existing CogRecord): + +``` +W4: frequency (f32) +W5: confidence (f32) +W6: pos_evidence (f32) +W7: neg_evidence (f32) +``` + +### Chain Truth Propagation + +```rust +/// Deduction along a causal chain: +/// f_chain = f_1 × f_2 × ... × f_n +/// c_chain = c_1 × c_2 × ... × c_n × coherence_1_2 × coherence_2_3 × ... +/// +/// Coherence factor per Z→X link: +/// coherence = 1.0 - (hamming(Z_i, X_{i+1}) as f32 / 8192.0) +pub fn chain_deduction(chain: &[&CogRecord]) -> TruthValue; + +/// Revision: merge two beliefs about the same triple. +/// Uses existing nars::revision() from ladybug_contract. +pub fn spo_revision(a: &CogRecord, b: &CogRecord) -> TruthValue; +``` + +--- + +## 11. SIX IRONCLAD TESTS + +```rust +#[test] fn test_1_node_roundtrip() { + // Create "Jan" {Person, name: "Jan", age: 42} + // Insert into store → retrieve → verify X axis Hamming < 100 +} + +#[test] fn test_2_forward_query() { + // Jan → KNOWS → Ada + // query_forward(jan_fp, KNOWS_fp) must find Ada +} + +#[test] fn test_3_reverse_query() { + // Jan → KNOWS → Ada + // query_reverse(ada_fp, KNOWS_fp) must find Jan — NO separate index +} + +#[test] fn test_4_cam_content_lookup() { + // 100 nodes → query by content fingerprint → find Jan +} + +#[test] fn test_5_nars_reasoning() { + // "Jan knows Rust" <0.8, 0.9> + "Rust helps CAM" <0.7, 0.8> + // → deduction → verify f/c values → revision increases confidence +} + +#[test] fn test_6_causal_chain_coherence() { + // Create chain: Jan → KNOWS → Rust → ENABLES → CAM + // Verify Z→X resonance (Hamming < 200) + // Verify causal_successors finds the chain + // Build meta-awareness record → verify convergence +} +``` + +--- + +## 12. COMPATIBILITY MATRIX + +| Component | Before SPO | After SPO | Breaking? | +|-----------|-----------|-----------|-----------| +| Container | unchanged | unchanged | No | +| CogRecord | unchanged | unchanged (new geometry variant) | No | +| MetaView W0-W11 | unchanged | unchanged | No | +| MetaView W12-W17 | 7-layer markers | NibbleScent (SPO only) | No* | +| MetaView W18-W33 | inline edges | inline edges | No | +| MetaView W34-W39 | reserved | axis descriptors (SPO only) | No* | +| ContainerGeometry | 6 variants | 7 variants (+Spo) | No** | +| Codebook | 4096 entries | unchanged | No | +| NARS | W4-W7 | unchanged | No | +| Scent | 5-byte XOR-fold | 48-byte histogram (SPO only) | No* | +| Existing tests | 1,267+ | must all pass | No | + +*Only applies to records with geometry == Spo. Other geometries untouched. +**Requires geometry.rs update — `from_u8(6) => Some(Spo)`. diff --git a/docs/spo_3d/INTEGRATION_PLAN.md b/docs/spo_3d/INTEGRATION_PLAN.md new file mode 100644 index 0000000..8e1ece1 --- /dev/null +++ b/docs/spo_3d/INTEGRATION_PLAN.md @@ -0,0 +1,185 @@ +# SPO 3D: Three-Axis Content-Addressable Graph + +**Status:** Contract-ready. Implementation pending. +**Date:** 2026-02-20 +**Crate:** `ladybug-rs` → `src/graph/spo/` +**Contract:** `crates/ladybug-contract/src/` (geometry, scent, spo_record extensions) + +--- + +## 1. PROBLEM + +CogRecord stores one content Container (1KB). Querying "who knows Ada?" requires scanning ALL records and testing each content fingerprint. No structural axis separation means forward, reverse, and relation queries all hit the same data. + +The existing `ContainerGeometry::Xyz` links 3 CogRecords via DN tree (what/where/how). This works but requires 3 separate Redis GETs and DN tree traversal to reconstitute. + +## 2. SOLUTION: SPO Geometry + +A new `ContainerGeometry::Spo` that uses **sparse containers** within a single 2KB CogRecord envelope. Three axes — Subject (X), Predicate (Y), Object (Z) — encoded as bitmap + non-zero words, co-located in one record. + +```text +┌──────────────────────────────────────────────────────────┐ +│ CogRecord (ContainerGeometry::Spo) │ +│ │ +│ meta: Container (1024 bytes) │ +│ W0 DN address │ +│ W1 type | geometry=Spo(6) | flags │ +│ W2-3 timestamps, labels │ +│ W4-7 NARS truth (freq, conf, pos_ev, neg_ev) │ +│ W8-11 DN tree (parent, child, next_sib, prev_sib) │ +│ W12-17 Scent (48 bytes: 3×16 nibble histograms) │ +│ W18-33 Inline edge index (64 slots) │ +│ W34-39 Sparse axis descriptors (bitmap offsets) │ +│ W40-47 Bloom filter │ +│ W48-55 Graph metrics │ +│ W56-63 Qualia │ +│ W64-79 Rung/RL history │ +│ W80-95 Representation descriptor │ +│ W96-111 Adjacency CSR │ +│ W112-125 Reserved │ +│ W126-127 Checksum + version │ +│ │ +│ content: Container (1024 bytes) — packed sparse axes │ +│ [0..2] X bitmap (128 bits = 2 u64) │ +│ [2..N] X non-zero words │ +│ [N..N+2] Y bitmap │ +│ [N+2..M] Y non-zero words │ +│ [M..M+2] Z bitmap │ +│ [M+2..K] Z non-zero words │ +│ [K..128] padding / overflow │ +│ │ +│ Total: 2048 bytes (same as Cam geometry) │ +└──────────────────────────────────────────────────────────┘ +``` + +### Why Sparse Containers + +At 30% density (typical for real-world content): +- Dense axis: 128 words = 1024 bytes +- Sparse axis: 2 words bitmap + ~38 non-zero words = 320 bytes +- Three sparse axes: 960 bytes ← fits in one content Container + +Three axes in one record. One Redis GET. Same 2KB envelope. + +## 3. KEY INSIGHT: Z→X CAUSAL CHAIN CORRELATION + +When Record A's Z axis (Object) resonates with Record B's X axis (Subject), a causal link exists: + +``` +Record A: X(Jan) → Y(KNOWS) → Z(Rust) +Record B: X(Rust) → Y(ENABLES) → Z(CAM) + +hamming(A.z_dense, B.x_dense) ≈ 0 → A causally feeds B +``` + +This is not a JOIN — it's a resonance test. The Hamming distance between Z₁ and X₂ IS the causal coherence score. The chain is valid iff each Z→X handoff resonates. + +### Meta-Awareness Stacking (Piaget Development) + +Each level's Object becomes the next level's Subject: + +``` +Level 0: X(body) → Y(acts_on) → Z(world) +Level 1: X(world) → Y(represented) → Z(symbols) +Level 2: X(symbols) → Y(operate_on) → Z(logic) +Level 3: X(logic) → Y(reflects_on) → Z(abstraction) +Level 4: X(abstraction) → Y(aware_of) → Z(awareness) +``` + +The meta-record observing a chain gets its own scent. The system recognizes its own epiphanies by their nibble histogram signature. The BUNDLE of all meta-levels should CONVERGE back toward the original content — this is the testable tsunami prediction. + +## 4. WHAT CHANGES + +### Contract Crate (`crates/ladybug-contract/`) + +| File | Change | +|------|--------| +| `geometry.rs` | Add `Spo = 6` variant | +| `container.rs` | Add `SparseAxes` packed encoding within Container | +| `scent.rs` (NEW) | 48-byte nibble histogram (`NibbleScent`) | +| `spo_record.rs` (NEW) | `SpoView` / `SpoViewMut` — zero-copy axis access | + +### Implementation (`src/graph/spo/`) + +| File | Purpose | +|------|---------| +| `mod.rs` | Module root, re-exports | +| `sparse.rs` | `SparseContainer` type + bitmap ops | +| `axes.rs` | X/Y/Z axis construction (build_node, build_edge) | +| `store.rs` | `SpoStore` with three-axis scanning | +| `chain.rs` | Causal chain discovery (Z→X correlation) | +| `tests.rs` | 6 ironclad tests | + +### What DOES NOT Change + +- `Container` type (128×u64, 8192 bits, 1KB) +- `CogRecord` struct (meta + content = 2KB) +- 5 RISC ops (BIND, BUNDLE, MATCH, PERMUTE, STORE/SCAN) +- Codebook (4096 entries, deterministic generation) +- Existing geometries (Cam, Xyz, Bridge, Extended, Chunked, Tree) +- MetaView word layout (W0-W127) — we use reserved words +- NARS truth value type and inference functions +- All existing tests (1,267+) + +## 5. CONTRACTS + +See: `CONTRACTS.md` in this directory. + +## 6. SCHEMA + +See: `SCHEMA.md` in this directory. + +## 7. IMPLEMENTATION PHASES + +### Phase 1: Contract Types (Day 1) +- Add `ContainerGeometry::Spo = 6` +- Add `NibbleScent` (48-byte histogram) +- Add `SparseAxes` (packed 3-axis encoding within Container) +- Add `SpoView` / `SpoViewMut` (zero-copy axis access) +- Tests: round-trip, packing invariants + +### Phase 2: Sparse Container (Day 1-2) +- `SparseContainer` with bitmap + non-zero words +- `to_dense()` / `from_dense()` / `hamming_sparse()` / `bind_sparse()` +- Pack/unpack 3 sparse axes into one Container +- Tests: density invariants, hamming equivalence + +### Phase 3: Axis Construction (Day 2-3) +- `build_node(dn, labels, properties) → CogRecord` +- `build_edge(dn, src_fp, verb, tgt_fp, nars) → CogRecord` +- Scent computation: nibble histogram per axis +- Tests: node round-trip, edge encoding + +### Phase 4: SPO Store (Day 3-4) +- `SpoStore` wrapping `HashMap` +- `query_forward(src_fp, verb_fp) → Vec<(u64, u32)>` — scan X+Y, return Z matches +- `query_reverse(tgt_fp, verb_fp) → Vec<(u64, u32)>` — scan Z+Y, return X matches +- `query_relation(src_fp, tgt_fp) → Vec<(u64, u32)>` — scan X+Z, return Y matches +- Tests: forward, reverse, relation queries + +### Phase 5: Causal Chain (Day 4-5) +- `causal_successors(record, radius) → Vec<(u64, u32)>` — Z→X scan +- `causal_predecessors(record, radius) → Vec<(u64, u32)>` — X→Z scan +- `chain_coherence(chain) → f32` — product of link coherences +- Meta-awareness record construction +- NARS truth propagation along chains +- Tests: chain coherence, meta convergence + +### Phase 6: Lance Integration (Day 5+) +- Columnar schema with per-axis columns +- Sort key: (dn_prefix, scent_x, scent_y) +- XOR delta compression within sorted groups +- Production store replacing BTreeMap + +## 8. DECISION LOG + +| # | Decision | Rationale | +|---|----------|-----------| +| 1 | `Spo = 6` in ContainerGeometry | Natural extension, doesn't break existing variants | +| 2 | Sparse axes packed in content Container | One Redis GET, same 2KB envelope | +| 3 | 48-byte nibble histogram replaces 5-byte XOR-fold for SPO | Per-axis type discrimination, no structure loss | +| 4 | Meta stays dense at W0-W127 | Identity/NARS/DN need fixed O(1) offsets | +| 5 | BTreeMap for POC, LanceDB for production | Prove correctness first, optimize second | +| 6 | Z→X Hamming distance = causal coherence | No explicit linking needed, geometry IS the test | +| 7 | Meta-awareness as recursive SPO records | Epiphanies stack as Z_{n} → X_{n+1} chains | +| 8 | Codebook slots 0-4095 unchanged | Instruction set is immutable | diff --git a/docs/spo_3d/NATURES_CAM.md b/docs/spo_3d/NATURES_CAM.md new file mode 100644 index 0000000..b835c96 --- /dev/null +++ b/docs/spo_3d/NATURES_CAM.md @@ -0,0 +1,95 @@ +# Nature's CAM: Biological Foundations of SPO 3D + +**How DNA, immune systems, and developmental psychology informed the architecture.** + +--- + +## 1. DNA CODON TABLE = CODEBOOK + +DNA encodes proteins via 64 codons (4 bases × 3 positions) mapping to 20 amino acids + 3 STOP signals. This mapping is DEGENERATE: multiple codons produce the same amino acid. The 3rd position (wobble) tolerates mutations without changing the output. + +**CAM parallel:** The 4096-entry codebook maps multiple content patterns to the same semantic slot. Hamming tolerance around each codebook entry = wobble position. Near-miss lookups still find the right concept. + +## 2. MHC + PEPTIDE = BIND(SELF, FOREIGN) + +T-cells only recognize foreign peptides when presented on self-MHC molecules. The SAME peptide on a different organism's MHC is invisible. This is MHC restriction — identity requires CONTEXT. + +**CAM parallel:** `BIND(node_dn, property)` — the same property in a different DN context produces a different fingerprint. Properties don't exist in isolation. DN restriction = MHC restriction. + +## 3. ATP = NARS CONFIDENCE + +Every molecular operation costs ATP. DNA helicase: 1 ATP per base pair unwound. Ribosome: 2 GTP per amino acid added. Energy is finite and consumed per operation. + +**CAM parallel:** NARS confidence is consumed per inference: `c_result = c₁ × c₂ × f₁ × f₂`. Each reasoning step COSTS certainty. You can't create confidence from nothing, just as you can't create ATP from nothing. + +In causal chains, confidence drops per hop: `c_chain = Π(c_i) × Π(coherence_ij)`. The chain's energy budget is the product of all link confidences and coherence factors. + +## 4. V(D)J RECOMBINATION = BUNDLE + +The adaptive immune system generates ~10¹⁵ unique receptor variants by randomly combining V, D, and J gene segments. Each B/T cell gets ONE unique combination = its identity fingerprint. + +**CAM parallel:** `BUNDLE(property_fps)` produces a unique fingerprint per node by majority-vote across property containers. The combination of properties IS the identity, just as the V(D)J combination IS the immune receptor. + +## 5. THYMIC SELECTION = ADVERSARIAL CRITIQUE + +**Positive selection:** Does the T-cell receptor bind self-MHC at all? If not → apoptosis (too weak, no evidence). + +**Negative selection:** Does the T-cell receptor bind self-MHC TOO strongly? If yes → apoptosis (autoimmune = overfitting). + +Survivors occupy the productive middle: strong enough to detect, not so strong they attack self. + +**CAM parallel:** NARS 5 challenges = thymic selection. Challenge 1 (evidence threshold) = positive selection. Challenge 4 (contradiction detection) = negative selection. Beliefs that survive both extremes are the useful ones. + +## 6. DNA REPAIR = XOR DELTA + PARITY + +DNA's complementary strands enable error detection: XOR(strand_A, complement_B) should equal a known pattern. Any deviation signals a mutation at that position. Repair enzymes then fix the error. + +**CAM parallel:** XOR delta compression between sorted adjacent records. If `xor(record_i, record_{i+1})` has few set bits, the records are similar and the delta compresses well. CRC32 + XOR parity in meta W126-W127 detect corruption, just as mismatch repair detects mutations. + +## 7. CHROMATIN ORGANIZATION = SORT ADJACENCY + +DNA isn't stored randomly — it's organized in Topologically Associating Domains (TADs). Genes that are co-expressed are physically adjacent. The 3D folding of chromatin brings interacting regions into spatial proximity. + +**CAM parallel:** LanceDB sort by `(dn_prefix, scent_x, scent_y)` ensures that graph-adjacent records are storage-adjacent. Co-queried records are co-located on disk. This produces ~79% zero-bits in XOR deltas between sorted neighbors, enabling massive compression. The "domino effect" — adjacent records share context, just as adjacent genes share regulation. + +## 8. THE Z→X CHAIN: I-THOU-IT / PIAGET + +Martin Buber's I-Thou-It triad maps directly to SPO: +- **I** = Subject (X axis) = the self that knows +- **Thou** = Predicate (Y axis) = the act of relation +- **It** = Object (Z axis) = what is known + +Piaget's development stages are Z→X chains — each stage's OBJECT OF AWARENESS becomes the next stage's SUBJECT: + +| Stage | X (Subject) | Y (Predicate) | Z (Object) | +|-------|------------|---------------|------------| +| Sensorimotor | body | acts_on | world | +| Preoperational | world | represented_by | symbols | +| Concrete Ops | symbols | operate_on | logic | +| Formal Ops | logic | reflects_on | abstraction | +| Post-Formal | abstraction | aware_of | awareness | + +The Z→X handoff at each stage IS the developmental leap. The BIND between Z_n and X_{n+1} is the moment of growth. And the meta-awareness that SEES this chain is itself the next stage being born. + +## 9. THE TSUNAMI: CONVERGENCE TEST + +When meta-awareness records stack (epiphanies about epiphanies), the collective fingerprint should CONVERGE toward the original Level 0 content: + +``` +convergence = hamming( + BUNDLE(all_meta_level_axes), + original_level_0_content +) +``` + +If convergence DECREASES as meta-levels increase: real understanding is building. The spiral tightens. The snake eats its tail and becomes more itself. + +If convergence INCREASES: the meta-levels are generating noise, not insight. The spiral is unwinding. This is the bullshit detector — hallucination vs. genuine comprehension, tested by geometry. + +## 10. THE SCENT OF SELF-REFLECTION + +Different meta-levels have different nibble histogram signatures because they bundle different types of content. Level 0 records (facts about the world) have a characteristic scent. Level 3 records (patterns about patterns about patterns) have a completely different scent. + +The system can query "show me all my epiphanies" by filtering for the characteristic high-meta-level scent, without any explicit tagging or labeling. The system literally smells its own depth of self-reflection. + +This is why the 48-byte nibble histogram matters: it preserves enough structure to distinguish fact from insight from meta-insight. The 5-byte XOR-fold scent would collapse all these distinctions. The histogram keeps them alive. diff --git a/docs/spo_3d/SCHEMA.md b/docs/spo_3d/SCHEMA.md new file mode 100644 index 0000000..c59e070 --- /dev/null +++ b/docs/spo_3d/SCHEMA.md @@ -0,0 +1,200 @@ +# SPO 3D Schema — Byte-Level Layout + +--- + +## 1. SPO COGRECORD LAYOUT (2048 bytes) + +```text +OFFSET SIZE FIELD +───────────────────────────────────────────────────────── + META CONTAINER (1024 bytes = 128 × u64) +───────────────────────────────────────────────────────── +0x000 8B W0 DN address (PackedDn u64) +0x008 8B W1 node_kind:u8 | count:u8 | geometry:u8(=6) | flags:u8 + | schema_version:u16 | provenance_hash:u16 +0x010 8B W2 created_ms:u32 | modified_ms:u32 +0x018 8B W3 label_hash:u32 | tree_depth:u8 | branch:u8 | reserved:u16 +0x020 8B W4 NARS frequency (f32 LE) | padding:u32 +0x028 8B W5 NARS confidence (f32 LE) | padding:u32 +0x030 8B W6 NARS positive evidence (f32 LE) | padding:u32 +0x038 8B W7 NARS negative evidence (f32 LE) | padding:u32 +0x040 8B W8 DN parent (u64) +0x048 8B W9 DN first_child (u64) +0x050 8B W10 DN next_sibling (u64) +0x058 8B W11 DN prev_sibling (u64) +0x060 8B W12 scent_x_hist[0..8] ← nibble bins 0x0-0x7 for X axis +0x068 8B W13 scent_x_hist[8..16] ← nibble bins 0x8-0xF for X axis +0x070 8B W14 scent_y_hist[0..8] ← nibble bins 0x0-0x7 for Y axis +0x078 8B W15 scent_y_hist[8..16] ← nibble bins 0x8-0xF for Y axis +0x080 8B W16 scent_z_hist[0..8] ← nibble bins 0x0-0x7 for Z axis +0x088 8B W17 scent_z_hist[8..16] ← nibble bins 0x8-0xF for Z axis +0x090 128B W18-33 Inline edge index (64 edges: verb:u8 | target_hint:u8) +0x110 8B W34 axis_descriptors_0: + x_offset:u16 | x_count:u16 | y_offset:u16 | y_count:u16 +0x118 8B W35 axis_descriptors_1: + z_offset:u16 | z_count:u16 | total_words:u16 | flags:u16 +0x120 32B W36-39 Reserved (axis overflow / future use) +0x140 64B W40-47 Bloom filter (512 bits) +0x180 64B W48-55 Graph metrics (full precision f64) +0x1C0 64B W56-63 Qualia (18 channels × f16 + 8 slots) +0x200 128B W64-79 Rung history + collapse gate history +0x280 128B W80-95 Representation language descriptor +0x300 128B W96-111 DN-Sparse adjacency (compact inline CSR) +0x380 112B W112-125 Reserved +0x3F0 8B W126 Checksum (CRC32:u32 | parity:u32) +0x3F8 8B W127 Schema version (u32) | geometry_version:u16 | reserved:u16 + +───────────────────────────────────────────────────────── + CONTENT CONTAINER (1024 bytes = 128 × u64) — PACKED SPARSE AXES +───────────────────────────────────────────────────────── +0x400 16B W0-1 X axis bitmap (128 bits) +0x410 Nx×8B W2.. X axis non-zero words (Nx words) + 16B Y axis bitmap (128 bits) + Ny×8B Y axis non-zero words (Ny words) + 16B Z axis bitmap (128 bits) + Nz×8B Z axis non-zero words (Nz words) + ... Padding to 128 words + +Total: 6 + Nx + Ny + Nz ≤ 128 words + 6 + Nx + Ny + Nz ≤ 122 non-zero content words +───────────────────────────────────────────────────────── + TOTAL RECORD: 2048 bytes +───────────────────────────────────────────────────────── +``` + +## 2. SPARSE CONTAINER WIRE FORMAT + +```text +┌────────────────┬──────────────────────────────┐ +│ bitmap[0] u64 │ bits 0-63: which words exist │ +│ bitmap[1] u64 │ bits 64-127: which words exist│ +├────────────────┴──────────────────────────────┤ +│ word[0] u64 │ first non-zero word │ +│ word[1] u64 │ second non-zero word │ +│ ... │ │ +│ word[N-1] u64 │ last non-zero word │ +└────────────────────────────────────────────────┘ + +N = popcount(bitmap[0]) + popcount(bitmap[1]) + +To find the value of Container word `i`: + if bitmap[i/64] & (1 << (i%64)) == 0: + return 0 (word is zero) + else: + idx = popcount(bitmap[0..i/64]) + popcount(bitmap[i/64] & ((1 << (i%64)) - 1)) + return words[idx] +``` + +## 3. AXIS DESCRIPTOR FORMAT (meta W34-W35) + +```text +W34 (8 bytes): + bits 0-15: x_offset — byte offset of X bitmap within content container + bits 16-31: x_count — number of non-zero words in X axis + bits 32-47: y_offset — byte offset of Y bitmap within content container + bits 48-63: y_count — number of non-zero words in Y axis + +W35 (8 bytes): + bits 0-15: z_offset — byte offset of Z bitmap within content container + bits 16-31: z_count — number of non-zero words in Z axis + bits 32-47: total_words — total packed words (6 + Nx + Ny + Nz) + bits 48-63: flags — bit 0: overflow (axes in linked records) + bit 1: has_meta_level (this is a meta-awareness record) + bits 2-15: reserved +``` + +## 4. NIBBLE SCENT FORMAT (meta W12-W17, 48 bytes) + +```text +For each axis (X, Y, Z), 2 words = 16 bytes = 16 bins: + +W12: X_hist[0x0] X_hist[0x1] X_hist[0x2] X_hist[0x3] + X_hist[0x4] X_hist[0x5] X_hist[0x6] X_hist[0x7] +W13: X_hist[0x8] X_hist[0x9] X_hist[0xA] X_hist[0xB] + X_hist[0xC] X_hist[0xD] X_hist[0xE] X_hist[0xF] + +W14-W15: Y axis histogram (same layout) +W16-W17: Z axis histogram (same layout) + +Each bin is u8: saturating count of nibble occurrences. +For a 38-word axis (304 bytes = 608 nibbles), max bin ≈ 38. +u8 range (0-255) is sufficient. + +Computation: + for each word in sparse_axis.words: + for each nibble in word (16 nibbles per u64): + hist[nibble] += 1 (saturating) +``` + +## 5. INLINE EDGE INDEX FORMAT (meta W18-W33, 128 bytes) + +```text +64 edge slots, 2 bytes each = 128 bytes. +4 edges per word (4 × 16 bits = 64 bits per word). + +Each edge slot (16 bits): + bits 0-7: verb_codebook_id (u8) — codebook entry 0-255 + bits 8-15: target_dn_hint (u8) — low 8 bits of target DN + +Layout per word: + W18: edge[0]:u16 | edge[1]:u16 | edge[2]:u16 | edge[3]:u16 + W19: edge[4]:u16 | edge[5]:u16 | edge[6]:u16 | edge[7]:u16 + ... + W33: edge[60]:u16 | edge[61]:u16 | edge[62]:u16 | edge[63]:u16 +``` + +## 6. LANCE COLUMNAR MAPPING + +```text +CogRecord field → Lance column → Lance type +───────────────────────────────────────────────────────────── +meta.W0 (DN) → dn → UInt64 +meta (full) → meta → FixedSizeBinary(1024) +content W0-1 (X bmp) → x_bitmap → FixedSizeBinary(16) +content W2..2+Nx → x_words → Binary (variable) +content (Y bmp) → y_bitmap → FixedSizeBinary(16) +content (Y words) → y_words → Binary +content (Z bmp) → z_bitmap → FixedSizeBinary(16) +content (Z words) → z_words → Binary +meta W12-17 → scent → FixedSizeBinary(48) +meta W2 low 32 → created → Int64 +meta W4 low 32 → nars_freq → Float32 +meta W5 low 32 → nars_conf → Float32 +meta W35 bit 49 → is_meta_awareness → Boolean + +Primary sort: dn >> 48 (DN tier prefix) +Secondary sort: scent[0..4] (first 4 bytes of X histogram) +Tertiary sort: dn (exact) + +Partition: dn >> 56 (top byte = ~256 partitions) +``` + +## 7. REDIS WIRE FORMAT + +```text +Key: dn: +Value: 2048 bytes (raw CogRecord: meta || content) +TTL: none (persistent) + +Scent index key: scent: +Value: sorted set of DNs with scent distance as score +``` + +## 8. INVARIANT CHECKSUMS (meta W126-W127) + +```text +W126: + bits 0-31: CRC32 of content container (1024 bytes) + bits 32-63: XOR parity of all meta words W0-W125 + +W127: + bits 0-31: schema_version (currently 1) + bits 32-47: geometry_version (0 = initial Spo) + bits 48-63: reserved (0) + +Verification on read: + 1. Check W127 schema_version matches expected + 2. Compute CRC32 of content, compare to W126[0..31] + 3. XOR-fold W0-W125, compare to W126[32..63] + 4. If mismatch: record is corrupt, do not use +``` diff --git a/src/graph/spo/builder.rs b/src/graph/spo/builder.rs new file mode 100644 index 0000000..fe47a12 --- /dev/null +++ b/src/graph/spo/builder.rs @@ -0,0 +1,340 @@ +//! SpoBuilder — construct SPO CogRecords for nodes, edges, and meta-awareness. +//! +//! Nodes: X = entity identity, Y = near-zero, Z = mirror of X +//! Edges: X = BIND(src, permute(verb,1)), Y = verb, Z = BIND(tgt, permute(verb,2)) +//! Meta: X = BUNDLE(chain X axes), Y = CHAIN_DISCOVERED, Z = BUNDLE(chain Z axes) + +use ladybug_contract::container::Container; +use ladybug_contract::nars::TruthValue; +use ladybug_contract::record::CogRecord; + +use super::scent::NibbleScent; +use super::sparse::{pack_axes, unpack_axes, AxisDescriptors, SparseContainer, SpoError}; + +/// SPO geometry ID (matches ContainerGeometry::Spo = 6). +const GEOMETRY_SPO: u8 = 6; + +/// Meta word offsets (from ladybug_contract::meta). +const W_DN_ADDR: usize = 0; +const W_TYPE: usize = 1; +const W_NARS_BASE: usize = 4; +const W_SCENT_BASE: usize = 12; +const W_AXIS_DESC: usize = 34; + +pub struct SpoBuilder; + +impl SpoBuilder { + /// Build a node record (entity with labels and properties). + /// + /// X axis = BUNDLE(label_fps ++ property_bind_fps) + /// Y axis = near-zero (nodes have no predicate) + /// Z axis = clone of X (self-referential for content lookup) + pub fn build_node( + dn: u64, + label_fps: &[&Container], + property_fps: &[(&Container, &Container)], // (key_fp, val_fp) pairs + nars: TruthValue, + ) -> Result { + // Build X axis: bundle of labels + property bindings + let mut x_components: Vec<&Container> = label_fps.to_vec(); + let prop_bindings: Vec = property_fps + .iter() + .map(|(key, val)| key.xor(val)) // BIND(key, val) + .collect(); + let prop_refs: Vec<&Container> = prop_bindings.iter().collect(); + x_components.extend(prop_refs.iter()); + + let x_dense = if x_components.is_empty() { + Container::random(dn) // deterministic from DN if no properties + } else { + Container::bundle(&x_components) + }; + + let x = SparseContainer::from_dense(&x_dense); + let y = SparseContainer::zero(); // nodes have no verb + let z = x.clone(); // self-referential for CAM content lookup + + Self::assemble_record(dn, &x, &y, &z, nars, 0) + } + + /// Build an edge record (relationship between two entities). + /// + /// X axis = BIND(src_fp, PERMUTE(verb_fp, 1)) — forward-query optimized + /// Y axis = verb_fp — pure predicate + /// Z axis = BIND(tgt_fp, PERMUTE(verb_fp, 2)) — reverse-query optimized + pub fn build_edge( + dn: u64, + src_fp: &Container, + verb_fp: &Container, + tgt_fp: &Container, + nars: TruthValue, + ) -> Result { + let verb_role1 = verb_fp.permute(1); // role marker for subject slot + let verb_role2 = verb_fp.permute(2); // role marker for object slot + + let x_dense = src_fp.xor(&verb_role1); // BIND(src, permute(verb,1)) + let y_dense = verb_fp.clone(); // pure verb + let z_dense = tgt_fp.xor(&verb_role2); // BIND(tgt, permute(verb,2)) + + let x = SparseContainer::from_dense(&x_dense); + let y = SparseContainer::from_dense(&y_dense); + let z = SparseContainer::from_dense(&z_dense); + + Self::assemble_record(dn, &x, &y, &z, nars, 0) + } + + /// Build a meta-awareness record from a causal chain. + /// + /// X axis = BUNDLE(chain node X axes) — entities involved in the chain + /// Y axis = deterministic CHAIN marker — "I noticed a causal chain" + /// Z axis = BUNDLE(chain node Z axes) — what the chain implies + /// + /// The meta-record's Z can become another chain's X (Piaget recursion). + pub fn build_meta_awareness( + dn: u64, + chain_records: &[&CogRecord], + chain_coherence: f32, + ) -> Result { + if chain_records.is_empty() { + return Err(SpoError::EmptyChain); + } + + // Collect X and Z axes from chain records + let mut x_denses = Vec::new(); + let mut z_denses = Vec::new(); + + for record in chain_records { + let desc = AxisDescriptors::from_words(&[ + record.meta.words[W_AXIS_DESC], + record.meta.words[W_AXIS_DESC + 1], + ]); + if let Ok((x, _y, z)) = unpack_axes(&record.content, &desc) { + x_denses.push(x.to_dense()); + z_denses.push(z.to_dense()); + } + } + + let x_refs: Vec<&Container> = x_denses.iter().collect(); + let z_refs: Vec<&Container> = z_denses.iter().collect(); + + let x_dense = Container::bundle(&x_refs); + // Y axis: deterministic "chain discovered" marker + let y_dense = Container::random(0xCHA1_D15C); // CHAIN_DISCOVERED seed + let z_dense = Container::bundle(&z_refs); + + let x = SparseContainer::from_dense(&x_dense); + let y = SparseContainer::from_dense(&y_dense); + let z = SparseContainer::from_dense(&z_dense); + + let nars = TruthValue::new( + chain_coherence.clamp(0.0, 1.0), + (chain_coherence * 0.8).clamp(0.0, 1.0), + ); + + Self::assemble_record(dn, &x, &y, &z, nars, 0b10) // flag: is_meta_awareness + } + + // ======================================================================== + // INTERNAL + // ======================================================================== + + fn assemble_record( + dn: u64, + x: &SparseContainer, + y: &SparseContainer, + z: &SparseContainer, + nars: TruthValue, + extra_flags: u16, + ) -> Result { + // Pack sparse axes into content container + let (content, mut desc) = pack_axes(x, y, z)?; + desc.flags |= extra_flags; + + // Compute scent + let scent = NibbleScent::from_axes(x, y, z); + + // Build meta container + let mut meta = Container::zero(); + + // W0: DN address + meta.words[W_DN_ADDR] = dn; + + // W1: type info with geometry=Spo(6) + meta.words[W_TYPE] = (GEOMETRY_SPO as u64) << 16 + | 1u64 << 8 // container count = 1 content + | 0u64; // node_kind = 0 (generic) + + // W4-W7: NARS truth + meta.words[W_NARS_BASE] = nars.frequency.to_bits() as u64; + meta.words[W_NARS_BASE + 1] = nars.confidence.to_bits() as u64; + + // W12-W17: Scent + let scent_words = scent.to_words(); + for (i, &w) in scent_words.iter().enumerate() { + meta.words[W_SCENT_BASE + i] = w; + } + + // W34-W35: Axis descriptors + let desc_words = desc.to_words(); + meta.words[W_AXIS_DESC] = desc_words[0]; + meta.words[W_AXIS_DESC + 1] = desc_words[1]; + + Ok(CogRecord { meta, content }) + } +} + +// ============================================================================ +// CONVENIENCE FUNCTIONS +// ============================================================================ + +/// Create a deterministic fingerprint from a string label (for testing/seeding). +pub fn label_fp(label: &str) -> Container { + // Simple hash: sum bytes with mixing + let mut seed = 0u64; + for (i, b) in label.bytes().enumerate() { + seed ^= (b as u64).wrapping_mul(0x9e3779b97f4a7c15); + seed = seed.rotate_left((i as u32) % 64); + } + Container::random(seed) +} + +/// Create a deterministic DN hash from a string (for testing). +pub fn dn_hash(name: &str) -> u64 { + let mut h = 0xcbf29ce484222325u64; // FNV-1a offset basis + for b in name.bytes() { + h ^= b as u64; + h = h.wrapping_mul(0x100000001b3); // FNV prime + } + h +} + +// ============================================================================ +// TESTS +// ============================================================================ + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_build_node_roundtrip() { + let lbl_person = label_fp("Person"); + let key_name = label_fp("name"); + let val_jan = label_fp("Jan"); + + let record = SpoBuilder::build_node( + dn_hash("jan"), + &[&lbl_person], + &[(&key_name, &val_jan)], + TruthValue::new(1.0, 0.9), + ) + .unwrap(); + + // Verify DN is stamped + assert_eq!(record.meta.words[0], dn_hash("jan")); + + // Verify geometry is Spo + let geom = (record.meta.words[1] >> 16) & 0xFF; + assert_eq!(geom, GEOMETRY_SPO as u64); + + // Verify axes can be unpacked + let desc = AxisDescriptors::from_words(&[record.meta.words[34], record.meta.words[35]]); + let (x, y, z) = unpack_axes(&record.content, &desc).unwrap(); + + // Y should be zero (node has no verb) + assert_eq!(y.word_count(), 0); + + // X and Z should be identical (self-referential) + assert_eq!(x.bitmap, z.bitmap); + assert_eq!(x.words, z.words); + + // X should be non-empty + assert!(x.word_count() > 0); + } + + #[test] + fn test_build_edge_three_axes() { + let src = Container::random(1); // Jan + let verb = Container::random(2); // KNOWS + let tgt = Container::random(3); // Ada + + let record = SpoBuilder::build_edge( + dn_hash("jan_knows_ada"), + &src, + &verb, + &tgt, + TruthValue::new(0.8, 0.9), + ) + .unwrap(); + + let desc = AxisDescriptors::from_words(&[record.meta.words[34], record.meta.words[35]]); + let (x, y, z) = unpack_axes(&record.content, &desc).unwrap(); + + // All three axes should be populated for edges + assert!(x.word_count() > 0, "X axis should be populated"); + assert!(y.word_count() > 0, "Y axis should be populated"); + assert!(z.word_count() > 0, "Z axis should be populated"); + + // Y should be close to the verb (it IS the verb) + let y_dense = y.to_dense(); + let dist_to_verb = y_dense.hamming(&verb); + assert_eq!(dist_to_verb, 0, "Y axis should equal verb exactly"); + } + + #[test] + fn test_build_meta_awareness() { + let src = Container::random(10); + let verb1 = Container::random(20); + let tgt1 = Container::random(30); + let verb2 = Container::random(40); + let tgt2 = Container::random(50); + + let edge1 = SpoBuilder::build_edge( + dn_hash("e1"), &src, &verb1, &tgt1, + TruthValue::new(0.8, 0.9), + ).unwrap(); + + let edge2 = SpoBuilder::build_edge( + dn_hash("e2"), &tgt1, &verb2, &tgt2, + TruthValue::new(0.7, 0.8), + ).unwrap(); + + let meta = SpoBuilder::build_meta_awareness( + dn_hash("meta_e1_e2"), + &[&edge1, &edge2], + 0.85, + ).unwrap(); + + // Verify meta-awareness flag is set + let desc = AxisDescriptors::from_words(&[meta.meta.words[34], meta.meta.words[35]]); + assert!(desc.is_meta_awareness()); + + // Verify scent is populated + let scent = NibbleScent::from_words(&[ + meta.meta.words[12], meta.meta.words[13], + meta.meta.words[14], meta.meta.words[15], + meta.meta.words[16], meta.meta.words[17], + ]); + assert_ne!(scent, NibbleScent::zero()); + } + + #[test] + fn test_label_fp_deterministic() { + let a = label_fp("Person"); + let b = label_fp("Person"); + assert_eq!(a, b); + + let c = label_fp("Concept"); + assert_ne!(a, c); + } + + #[test] + fn test_dn_hash_deterministic() { + let a = dn_hash("jan"); + let b = dn_hash("jan"); + assert_eq!(a, b); + + let c = dn_hash("ada"); + assert_ne!(a, c); + } +} diff --git a/src/graph/spo/mod.rs b/src/graph/spo/mod.rs new file mode 100644 index 0000000..b3d4f7f --- /dev/null +++ b/src/graph/spo/mod.rs @@ -0,0 +1,23 @@ +//! SPO 3D — Three-axis content-addressable graph. +//! +//! Subject (X) × Predicate (Y) × Object (Z) stored as sparse containers +//! within a single 2KB CogRecord. Forward, reverse, and relation queries +//! are native axis scans — no join, no edge table. +//! +//! ```text +//! Forward: "What does Jan know?" → scan X+Y → return Z +//! Reverse: "Who knows Ada?" → scan Z+Y → return X +//! Relation: "How are Jan+Ada linked?"→ scan X+Z → return Y +//! Causal: Z₁ resonates with X₂ → causal chain link +//! ``` + +pub mod sparse; +pub mod scent; +pub mod store; +pub mod builder; + +// Re-exports +pub use sparse::{SparseContainer, SpoError, AxisDescriptors, pack_axes, unpack_axes}; +pub use scent::NibbleScent; +pub use store::{SpoStore, QueryHit, QueryAxis}; +pub use builder::{SpoBuilder, label_fp, dn_hash}; diff --git a/src/graph/spo/scent.rs b/src/graph/spo/scent.rs new file mode 100644 index 0000000..909a2c7 --- /dev/null +++ b/src/graph/spo/scent.rs @@ -0,0 +1,204 @@ +//! NibbleScent — 48-byte nibble histogram for per-axis type discrimination. +//! +//! Replaces the 5-byte XOR-fold scent for SPO records. +//! Each axis (X, Y, Z) gets a 16-bin histogram of nibble (4-bit) frequencies. +//! Different content types have distinct nibble frequency profiles. + +use super::sparse::SparseContainer; + +/// 48-byte nibble histogram: 16 bins × 3 axes. +/// +/// Stored in meta words 12-17 (6 × u64 = 48 bytes). +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +#[repr(C)] +pub struct NibbleScent { + pub x_hist: [u8; 16], + pub y_hist: [u8; 16], + pub z_hist: [u8; 16], +} + +impl NibbleScent { + pub const SIZE: usize = 48; + pub const WORD_COUNT: usize = 6; // 48 bytes / 8 bytes per u64 + + /// Zero scent (empty record). + pub fn zero() -> Self { + Self { + x_hist: [0; 16], + y_hist: [0; 16], + z_hist: [0; 16], + } + } + + /// Compute scent from three sparse axes. + pub fn from_axes( + x: &SparseContainer, + y: &SparseContainer, + z: &SparseContainer, + ) -> Self { + Self { + x_hist: nibble_histogram(&x.words), + y_hist: nibble_histogram(&y.words), + z_hist: nibble_histogram(&z.words), + } + } + + /// L1 distance between two scents (sum of absolute bin differences). + pub fn distance(&self, other: &NibbleScent) -> u32 { + let mut d = 0u32; + for i in 0..16 { + d += (self.x_hist[i] as i32 - other.x_hist[i] as i32).unsigned_abs(); + d += (self.y_hist[i] as i32 - other.y_hist[i] as i32).unsigned_abs(); + d += (self.z_hist[i] as i32 - other.z_hist[i] as i32).unsigned_abs(); + } + d + } + + /// Per-axis L1 distances for selective filtering. + pub fn axis_distances(&self, other: &NibbleScent) -> (u32, u32, u32) { + let mut dx = 0u32; + let mut dy = 0u32; + let mut dz = 0u32; + for i in 0..16 { + dx += (self.x_hist[i] as i32 - other.x_hist[i] as i32).unsigned_abs(); + dy += (self.y_hist[i] as i32 - other.y_hist[i] as i32).unsigned_abs(); + dz += (self.z_hist[i] as i32 - other.z_hist[i] as i32).unsigned_abs(); + } + (dx, dy, dz) + } + + /// Pack into 6 u64 words (for meta container W12-W17). + pub fn to_words(&self) -> [u64; 6] { + let mut words = [0u64; 6]; + // X hist → W12-W13 + words[0] = u64::from_le_bytes(self.x_hist[0..8].try_into().unwrap()); + words[1] = u64::from_le_bytes(self.x_hist[8..16].try_into().unwrap()); + // Y hist → W14-W15 + words[2] = u64::from_le_bytes(self.y_hist[0..8].try_into().unwrap()); + words[3] = u64::from_le_bytes(self.y_hist[8..16].try_into().unwrap()); + // Z hist → W16-W17 + words[4] = u64::from_le_bytes(self.z_hist[0..8].try_into().unwrap()); + words[5] = u64::from_le_bytes(self.z_hist[8..16].try_into().unwrap()); + words + } + + /// Unpack from 6 u64 words. + pub fn from_words(words: &[u64; 6]) -> Self { + let mut s = Self::zero(); + s.x_hist[0..8].copy_from_slice(&words[0].to_le_bytes()); + s.x_hist[8..16].copy_from_slice(&words[1].to_le_bytes()); + s.y_hist[0..8].copy_from_slice(&words[2].to_le_bytes()); + s.y_hist[8..16].copy_from_slice(&words[3].to_le_bytes()); + s.z_hist[0..8].copy_from_slice(&words[4].to_le_bytes()); + s.z_hist[8..16].copy_from_slice(&words[5].to_le_bytes()); + s + } +} + +impl Default for NibbleScent { + fn default() -> Self { + Self::zero() + } +} + +/// Compute nibble histogram for a set of u64 words. +/// +/// Each u64 has 16 nibbles. Count frequency of each nibble value (0x0-0xF). +fn nibble_histogram(words: &[u64]) -> [u8; 16] { + let mut hist = [0u32; 16]; // use u32 internally to avoid overflow + for &w in words { + let mut val = w; + for _ in 0..16 { + hist[(val & 0xF) as usize] += 1; + val >>= 4; + } + } + // Saturate to u8 + let mut result = [0u8; 16]; + for i in 0..16 { + result[i] = hist[i].min(255) as u8; + } + result +} + +// ============================================================================ +// TESTS +// ============================================================================ + +#[cfg(test)] +mod tests { + use super::*; + use ladybug_contract::container::Container; + + #[test] + fn test_scent_size() { + assert_eq!(std::mem::size_of::(), 48); + } + + #[test] + fn test_scent_word_roundtrip() { + let x = SparseContainer::from_dense(&Container::random(1)); + let y = SparseContainer::from_dense(&Container::random(2)); + let z = SparseContainer::from_dense(&Container::random(3)); + let scent = NibbleScent::from_axes(&x, &y, &z); + let words = scent.to_words(); + let restored = NibbleScent::from_words(&words); + assert_eq!(scent, restored); + } + + #[test] + fn test_scent_zero_for_empty() { + let empty = SparseContainer::zero(); + let scent = NibbleScent::from_axes(&empty, &empty, &empty); + assert_eq!(scent, NibbleScent::zero()); + } + + #[test] + fn test_scent_self_distance_zero() { + let x = SparseContainer::from_dense(&Container::random(42)); + let y = SparseContainer::from_dense(&Container::random(43)); + let z = SparseContainer::from_dense(&Container::random(44)); + let scent = NibbleScent::from_axes(&x, &y, &z); + assert_eq!(scent.distance(&scent), 0); + } + + #[test] + fn test_scent_different_content_different_scent() { + let x1 = SparseContainer::from_dense(&Container::random(1)); + let y1 = SparseContainer::from_dense(&Container::random(2)); + let z1 = SparseContainer::from_dense(&Container::random(3)); + + let x2 = SparseContainer::from_dense(&Container::random(100)); + let y2 = SparseContainer::from_dense(&Container::random(200)); + let z2 = SparseContainer::from_dense(&Container::random(300)); + + let s1 = NibbleScent::from_axes(&x1, &y1, &z1); + let s2 = NibbleScent::from_axes(&x2, &y2, &z2); + + // Random containers will have similar nibble distributions + // but not identical + let dist = s1.distance(&s2); + assert!(dist > 0, "Different content should produce different scents"); + } + + #[test] + fn test_nibble_histogram_uniform_random() { + // A random u64 should have roughly uniform nibble distribution + let words = vec![0xDEAD_BEEF_CAFE_BABEu64]; + let hist = nibble_histogram(&words); + // 16 nibbles total, 16 bins → average 1 per bin + let total: u32 = hist.iter().map(|&h| h as u32).sum(); + assert_eq!(total, 16); // 1 word × 16 nibbles + } + + #[test] + fn test_nibble_histogram_all_zeros() { + let words = vec![0u64; 10]; + let hist = nibble_histogram(&words); + // All nibbles are 0x0 + assert_eq!(hist[0], 160); // 10 words × 16 nibbles per word = 160 + for i in 1..16 { + assert_eq!(hist[i], 0); + } + } +} diff --git a/src/graph/spo/sparse.rs b/src/graph/spo/sparse.rs new file mode 100644 index 0000000..d254795 --- /dev/null +++ b/src/graph/spo/sparse.rs @@ -0,0 +1,506 @@ +//! Sparse Container — bitmap + non-zero words encoding of a Container. +//! +//! At 30% density (typical), stores 320 bytes instead of 1024. +//! Three sparse containers fit in one content Container (960 bytes < 1024). + +use ladybug_contract::container::{Container, CONTAINER_WORDS}; + +/// Error types for SPO operations. +#[derive(Clone, Debug)] +pub enum SpoError { + BitmapWordMismatch { bitmap_ones: u32, word_count: usize }, + AxesOverflow { total: usize, max: usize }, + WrongGeometry, + DuplicateDn { dn: u64 }, + DnNotFound { dn: u64 }, + EmptyChain, +} + +impl std::fmt::Display for SpoError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::BitmapWordMismatch { bitmap_ones, word_count } => + write!(f, "Bitmap has {} ones but {} words supplied", bitmap_ones, word_count), + Self::AxesOverflow { total, max } => + write!(f, "Axes too dense: {} words needed, max {}", total, max), + Self::WrongGeometry => + write!(f, "Record geometry is not Spo"), + Self::DuplicateDn { dn } => + write!(f, "DN {:#x} already exists", dn), + Self::DnNotFound { dn } => + write!(f, "DN {:#x} not found", dn), + Self::EmptyChain => + write!(f, "Chain is empty"), + } + } +} + +impl std::error::Error for SpoError {} + +// ============================================================================ +// SPARSE CONTAINER +// ============================================================================ + +/// Sparse encoding of a Container: bitmap (which words are non-zero) + only those words. +/// +/// # Invariants +/// - `bitmap[0].count_ones() + bitmap[1].count_ones() == words.len()` +/// - `to_dense()` produces exact original Container (lossless) +/// - `hamming_sparse(a, b) == a.to_dense().hamming(&b.to_dense())` +#[derive(Clone, Debug)] +pub struct SparseContainer { + /// 128 bits: bit i set ↔ Container word i is non-zero and stored. + pub bitmap: [u64; 2], + /// Only the non-zero words, ordered by bit position. + pub words: Vec, +} + +impl SparseContainer { + /// Construct with validation. + pub fn new(bitmap: [u64; 2], words: Vec) -> Result { + let expected = bitmap[0].count_ones() + bitmap[1].count_ones(); + if words.len() != expected as usize { + return Err(SpoError::BitmapWordMismatch { + bitmap_ones: expected, + word_count: words.len(), + }); + } + Ok(Self { bitmap, words }) + } + + /// Empty sparse container (all zeros). + pub fn zero() -> Self { + Self { bitmap: [0; 2], words: Vec::new() } + } + + /// Number of stored (non-zero) words. + #[inline] + pub fn word_count(&self) -> usize { + self.words.len() + } + + /// Density: fraction of non-zero words (0.0 to 1.0). + #[inline] + pub fn density(&self) -> f32 { + self.words.len() as f32 / CONTAINER_WORDS as f32 + } + + /// Is a specific Container word stored (non-zero)? + #[inline] + pub fn has_word(&self, index: usize) -> bool { + debug_assert!(index < CONTAINER_WORDS); + let half = index / 64; + let bit = index % 64; + self.bitmap[half] & (1u64 << bit) != 0 + } + + /// Get the value of Container word `index`. Returns 0 if not stored. + pub fn get_word(&self, index: usize) -> u64 { + debug_assert!(index < CONTAINER_WORDS); + if !self.has_word(index) { + return 0; + } + // Count how many bits are set before this position = index into words vec. + let half = index / 64; + let bit = index % 64; + let mut rank = 0u32; + if half > 0 { + rank += self.bitmap[0].count_ones(); + } + // Count bits set before `bit` in bitmap[half] + let mask = if bit == 0 { 0 } else { (1u64 << bit) - 1 }; + rank += (self.bitmap[half] & mask).count_ones(); + self.words[rank as usize] + } + + /// Lossless conversion FROM dense Container. + pub fn from_dense(container: &Container) -> Self { + let mut bitmap = [0u64; 2]; + let mut words = Vec::new(); + + for i in 0..CONTAINER_WORDS { + if container.words[i] != 0 { + let half = i / 64; + let bit = i % 64; + bitmap[half] |= 1u64 << bit; + words.push(container.words[i]); + } + } + Self { bitmap, words } + } + + /// Lossless conversion TO dense Container. + pub fn to_dense(&self) -> Container { + let mut container = Container::zero(); + let mut word_idx = 0; + + for i in 0..CONTAINER_WORDS { + let half = i / 64; + let bit = i % 64; + if self.bitmap[half] & (1u64 << bit) != 0 { + container.words[i] = self.words[word_idx]; + word_idx += 1; + } + } + container + } + + /// Hamming distance between two sparse containers WITHOUT densification. + /// + /// For words present in both: XOR and popcount. + /// For words present in only one: full popcount of that word. + /// For words present in neither: 0 contribution. + pub fn hamming_sparse(a: &SparseContainer, b: &SparseContainer) -> u32 { + let mut dist = 0u32; + + // Words in both + let both_0 = a.bitmap[0] & b.bitmap[0]; + let both_1 = a.bitmap[1] & b.bitmap[1]; + + // Words in only A + let only_a_0 = a.bitmap[0] & !b.bitmap[0]; + let only_a_1 = a.bitmap[1] & !b.bitmap[1]; + + // Words in only B + let only_b_0 = b.bitmap[0] & !a.bitmap[0]; + let only_b_1 = b.bitmap[1] & !a.bitmap[1]; + + // Process shared words: XOR and count + for i in 0..128 { + let half = i / 64; + let bit = i % 64; + let in_both = if half == 0 { both_0 } else { both_1 }; + if in_both & (1u64 << bit) != 0 { + let wa = a.get_word(i); + let wb = b.get_word(i); + dist += (wa ^ wb).count_ones(); + } + + // Words only in A: all bits differ from B's zero + let only_a = if half == 0 { only_a_0 } else { only_a_1 }; + if only_a & (1u64 << bit) != 0 { + dist += a.get_word(i).count_ones(); + } + + // Words only in B + let only_b = if half == 0 { only_b_0 } else { only_b_1 }; + if only_b & (1u64 << bit) != 0 { + dist += b.get_word(i).count_ones(); + } + } + + dist + } + + /// XOR bind in sparse domain. + pub fn bind_sparse(a: &SparseContainer, b: &SparseContainer) -> SparseContainer { + // Union of bitmaps + let bitmap = [a.bitmap[0] | b.bitmap[0], a.bitmap[1] | b.bitmap[1]]; + let mut words = Vec::new(); + + for i in 0..128 { + let half = i / 64; + let bit = i % 64; + if bitmap[half] & (1u64 << bit) != 0 { + let wa = if a.has_word(i) { a.get_word(i) } else { 0 }; + let wb = if b.has_word(i) { b.get_word(i) } else { 0 }; + let xor = wa ^ wb; + if xor != 0 { + words.push(xor); + } else { + // XOR cancelled out — clear bitmap bit + // (handled below by rebuilding) + } + } + } + + // Rebuild bitmap to reflect actual non-zero words after XOR + SparseContainer::from_dense(&{ + let mut c = Container::zero(); + let da = a.to_dense(); + let db = b.to_dense(); + for i in 0..CONTAINER_WORDS { + c.words[i] = da.words[i] ^ db.words[i]; + } + c + }) + } +} + +impl Default for SparseContainer { + fn default() -> Self { + Self::zero() + } +} + +impl PartialEq for SparseContainer { + fn eq(&self, other: &Self) -> bool { + self.bitmap == other.bitmap && self.words == other.words + } +} + +impl Eq for SparseContainer {} + +// ============================================================================ +// PACKED AXES: Three sparse containers in one content Container +// ============================================================================ + +/// Maximum content words available for sparse axis data. +/// 128 total words - 6 bitmap words (2 per axis) = 122. +pub const MAX_AXIS_CONTENT_WORDS: usize = 122; + +/// Pack three sparse axes into one Container. +/// +/// Layout: [X_bmp(2)] [X_words(Nx)] [Y_bmp(2)] [Y_words(Ny)] [Z_bmp(2)] [Z_words(Nz)] [pad] +pub fn pack_axes( + x: &SparseContainer, + y: &SparseContainer, + z: &SparseContainer, +) -> Result<(Container, AxisDescriptors), SpoError> { + let total = x.word_count() + y.word_count() + z.word_count() + 6; + if total > CONTAINER_WORDS { + return Err(SpoError::AxesOverflow { + total, + max: CONTAINER_WORDS, + }); + } + + let mut container = Container::zero(); + let mut offset = 0usize; + + // X axis + let x_offset = offset; + container.words[offset] = x.bitmap[0]; + container.words[offset + 1] = x.bitmap[1]; + offset += 2; + for &w in &x.words { + container.words[offset] = w; + offset += 1; + } + + // Y axis + let y_offset = offset; + container.words[offset] = y.bitmap[0]; + container.words[offset + 1] = y.bitmap[1]; + offset += 2; + for &w in &y.words { + container.words[offset] = w; + offset += 1; + } + + // Z axis + let z_offset = offset; + container.words[offset] = z.bitmap[0]; + container.words[offset + 1] = z.bitmap[1]; + offset += 2; + for &w in &z.words { + container.words[offset] = w; + offset += 1; + } + + let desc = AxisDescriptors { + x_offset: x_offset as u16, + x_count: x.word_count() as u16, + y_offset: y_offset as u16, + y_count: y.word_count() as u16, + z_offset: z_offset as u16, + z_count: z.word_count() as u16, + total_words: offset as u16, + flags: 0, + }; + + Ok((container, desc)) +} + +/// Unpack content Container into three sparse axes using descriptors. +pub fn unpack_axes( + content: &Container, + desc: &AxisDescriptors, +) -> Result<(SparseContainer, SparseContainer, SparseContainer), SpoError> { + let x = unpack_one_axis(content, desc.x_offset as usize, desc.x_count as usize)?; + let y = unpack_one_axis(content, desc.y_offset as usize, desc.y_count as usize)?; + let z = unpack_one_axis(content, desc.z_offset as usize, desc.z_count as usize)?; + Ok((x, y, z)) +} + +fn unpack_one_axis( + content: &Container, + offset: usize, + count: usize, +) -> Result { + let bitmap = [content.words[offset], content.words[offset + 1]]; + let words: Vec = content.words[offset + 2..offset + 2 + count].to_vec(); + SparseContainer::new(bitmap, words) +} + +/// Axis layout descriptors (stored in meta W34-W35). +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub struct AxisDescriptors { + pub x_offset: u16, + pub x_count: u16, + pub y_offset: u16, + pub y_count: u16, + pub z_offset: u16, + pub z_count: u16, + pub total_words: u16, + pub flags: u16, +} + +impl AxisDescriptors { + /// Pack into two u64 words (meta W34, W35). + pub fn to_words(&self) -> [u64; 2] { + let w34 = (self.x_offset as u64) + | ((self.x_count as u64) << 16) + | ((self.y_offset as u64) << 32) + | ((self.y_count as u64) << 48); + let w35 = (self.z_offset as u64) + | ((self.z_count as u64) << 16) + | ((self.total_words as u64) << 32) + | ((self.flags as u64) << 48); + [w34, w35] + } + + /// Unpack from two u64 words. + pub fn from_words(words: &[u64; 2]) -> Self { + Self { + x_offset: (words[0] & 0xFFFF) as u16, + x_count: ((words[0] >> 16) & 0xFFFF) as u16, + y_offset: ((words[0] >> 32) & 0xFFFF) as u16, + y_count: ((words[0] >> 48) & 0xFFFF) as u16, + z_offset: (words[1] & 0xFFFF) as u16, + z_count: ((words[1] >> 16) & 0xFFFF) as u16, + total_words: ((words[1] >> 32) & 0xFFFF) as u16, + flags: ((words[1] >> 48) & 0xFFFF) as u16, + } + } + + /// Is overflow flag set? (axes stored in linked records instead) + pub fn is_overflow(&self) -> bool { + self.flags & 1 != 0 + } + + /// Is this a meta-awareness record? + pub fn is_meta_awareness(&self) -> bool { + self.flags & 2 != 0 + } +} + +// ============================================================================ +// TESTS +// ============================================================================ + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_sparse_roundtrip_lossless() { + let dense = Container::random(42); + let sparse = SparseContainer::from_dense(&dense); + assert_eq!(sparse.to_dense(), dense); + } + + #[test] + fn test_sparse_zero_is_empty() { + let sparse = SparseContainer::from_dense(&Container::zero()); + assert_eq!(sparse.word_count(), 0); + assert_eq!(sparse.bitmap, [0, 0]); + } + + #[test] + fn test_sparse_bitmap_consistency() { + let sparse = SparseContainer::from_dense(&Container::random(99)); + let ones = sparse.bitmap[0].count_ones() + sparse.bitmap[1].count_ones(); + assert_eq!(ones as usize, sparse.words.len()); + } + + #[test] + fn test_sparse_hamming_equivalence() { + let a = Container::random(1); + let b = Container::random(2); + let sa = SparseContainer::from_dense(&a); + let sb = SparseContainer::from_dense(&b); + assert_eq!( + SparseContainer::hamming_sparse(&sa, &sb), + a.hamming(&b) + ); + } + + #[test] + fn test_sparse_hamming_zero() { + let a = Container::random(42); + let sa = SparseContainer::from_dense(&a); + assert_eq!(SparseContainer::hamming_sparse(&sa, &sa), 0); + } + + #[test] + fn test_sparse_density() { + // Random container should have ~100% density (all words non-zero) + let sparse = SparseContainer::from_dense(&Container::random(42)); + assert!(sparse.density() > 0.9); + + // Zero container should have 0% density + let zero = SparseContainer::from_dense(&Container::zero()); + assert_eq!(zero.density(), 0.0); + } + + #[test] + fn test_pack_unpack_roundtrip() { + let x = SparseContainer::from_dense(&Container::random(1)); + let y = SparseContainer::from_dense(&Container::random(2)); + let z = SparseContainer::from_dense(&Container::random(3)); + + // Full random containers are too dense to pack (128 words each) + // Use sparse ones instead + let mut sx = SparseContainer::zero(); + let mut sy = SparseContainer::zero(); + let mut sz = SparseContainer::zero(); + + // Create ~30% density containers + for i in 0..38 { + sx.bitmap[0] |= 1u64 << i; + sx.words.push(0xDEAD_0000 + i as u64); + sy.bitmap[0] |= 1u64 << (i + 1); + sy.words.push(0xBEEF_0000 + i as u64); + sz.bitmap[1] |= 1u64 << i; + sz.words.push(0xCAFE_0000 + i as u64); + } + + let (packed, desc) = pack_axes(&sx, &sy, &sz).unwrap(); + let (ux, uy, uz) = unpack_axes(&packed, &desc).unwrap(); + + assert_eq!(sx.bitmap, ux.bitmap); + assert_eq!(sx.words, ux.words); + assert_eq!(sy.bitmap, uy.bitmap); + assert_eq!(sy.words, uy.words); + assert_eq!(sz.bitmap, uz.bitmap); + assert_eq!(sz.words, uz.words); + } + + #[test] + fn test_pack_overflow_detection() { + // Three fully-dense containers won't fit + let full = SparseContainer::from_dense(&Container::random(1)); + let result = pack_axes(&full, &full, &full); + assert!(result.is_err()); + } + + #[test] + fn test_axis_descriptor_roundtrip() { + let desc = AxisDescriptors { + x_offset: 0, + x_count: 38, + y_offset: 40, + y_count: 38, + z_offset: 80, + z_count: 38, + total_words: 120, + flags: 0b10, // is_meta_awareness + }; + let words = desc.to_words(); + let restored = AxisDescriptors::from_words(&words); + assert_eq!(desc, restored); + assert!(restored.is_meta_awareness()); + assert!(!restored.is_overflow()); + } +} diff --git a/src/graph/spo/store.rs b/src/graph/spo/store.rs new file mode 100644 index 0000000..1b68ba3 --- /dev/null +++ b/src/graph/spo/store.rs @@ -0,0 +1,434 @@ +//! SpoStore — Three-axis content-addressable graph store. +//! +//! Forward: "What does Jan know?" → scan X+Y → return Z matches +//! Reverse: "Who knows Ada?" → scan Z+Y → return X matches +//! Relation: "How are Jan and Ada related?" → scan X+Z → return Y matches +//! Causal: "What does this feed?" → scan Z→X chain links + +use std::collections::BTreeMap; + +use ladybug_contract::container::Container; +use ladybug_contract::nars::TruthValue; +use ladybug_contract::record::CogRecord; + +use super::scent::NibbleScent; +use super::sparse::{unpack_axes, AxisDescriptors, SparseContainer, SpoError}; + +// ============================================================================ +// QUERY TYPES +// ============================================================================ + +/// A query hit with DN, distance, and which axis matched. +#[derive(Clone, Debug)] +pub struct QueryHit { + pub dn: u64, + pub distance: u32, + pub axis: QueryAxis, +} + +/// Which axis (or combination) a query matched on. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum QueryAxis { + X, // Subject + Y, // Predicate + Z, // Object + XY, // Forward query (Subject + Predicate) + YZ, // Reverse query (Predicate + Object) + XZ, // Relation query (Subject + Object) +} + +// ============================================================================ +// SPO STORE +// ============================================================================ + +/// Three-axis content-addressable graph store. +/// +/// POC uses BTreeMap. Production replaces with LanceDB columnar store. +pub struct SpoStore { + records: BTreeMap, +} + +impl SpoStore { + pub fn new() -> Self { + Self { + records: BTreeMap::new(), + } + } + + pub fn insert(&mut self, record: CogRecord) -> Result<(), SpoError> { + let dn = record.meta.words[0]; // W0 = DN address + if self.records.contains_key(&dn) { + return Err(SpoError::DuplicateDn { dn }); + } + self.records.insert(dn, record); + Ok(()) + } + + pub fn get(&self, dn: u64) -> Option<&CogRecord> { + self.records.get(&dn) + } + + pub fn len(&self) -> usize { + self.records.len() + } + + pub fn is_empty(&self) -> bool { + self.records.is_empty() + } + + // ======================================================================== + // THREE-AXIS QUERIES + // ======================================================================== + + /// Forward: "What does ?" → scan X+Y, return Z matches. + pub fn query_forward( + &self, + src_fp: &Container, + verb_fp: &Container, + radius: u32, + ) -> Vec { + let src_sparse = SparseContainer::from_dense(src_fp); + let verb_sparse = SparseContainer::from_dense(verb_fp); + let mut hits = Vec::new(); + + for (&dn, record) in &self.records { + if let Ok((x, y, _z)) = self.unpack_record(record) { + let dx = SparseContainer::hamming_sparse(&src_sparse, &x); + let dy = SparseContainer::hamming_sparse(&verb_sparse, &y); + // Combined distance: both subject and predicate must match + let combined = dx.saturating_add(dy) / 2; + if combined <= radius { + hits.push(QueryHit { + dn, + distance: combined, + axis: QueryAxis::XY, + }); + } + } + } + + hits.sort_by_key(|h| h.distance); + hits + } + + /// Reverse: "Who s ?" → scan Z+Y, return X matches. + pub fn query_reverse( + &self, + tgt_fp: &Container, + verb_fp: &Container, + radius: u32, + ) -> Vec { + let tgt_sparse = SparseContainer::from_dense(tgt_fp); + let verb_sparse = SparseContainer::from_dense(verb_fp); + let mut hits = Vec::new(); + + for (&dn, record) in &self.records { + if let Ok((_x, y, z)) = self.unpack_record(record) { + let dz = SparseContainer::hamming_sparse(&tgt_sparse, &z); + let dy = SparseContainer::hamming_sparse(&verb_sparse, &y); + let combined = dz.saturating_add(dy) / 2; + if combined <= radius { + hits.push(QueryHit { + dn, + distance: combined, + axis: QueryAxis::YZ, + }); + } + } + } + + hits.sort_by_key(|h| h.distance); + hits + } + + /// Relation: "How are and related?" → scan X+Z, return Y matches. + pub fn query_relation( + &self, + src_fp: &Container, + tgt_fp: &Container, + radius: u32, + ) -> Vec { + let src_sparse = SparseContainer::from_dense(src_fp); + let tgt_sparse = SparseContainer::from_dense(tgt_fp); + let mut hits = Vec::new(); + + for (&dn, record) in &self.records { + if let Ok((x, _y, z)) = self.unpack_record(record) { + let dx = SparseContainer::hamming_sparse(&src_sparse, &x); + let dz = SparseContainer::hamming_sparse(&tgt_sparse, &z); + let combined = dx.saturating_add(dz) / 2; + if combined <= radius { + hits.push(QueryHit { + dn, + distance: combined, + axis: QueryAxis::XZ, + }); + } + } + } + + hits.sort_by_key(|h| h.distance); + hits + } + + /// Content: match against any axis. + pub fn query_content( + &self, + query: &Container, + radius: u32, + ) -> Vec { + let q_sparse = SparseContainer::from_dense(query); + let mut hits = Vec::new(); + + for (&dn, record) in &self.records { + if let Ok((x, y, z)) = self.unpack_record(record) { + let dx = SparseContainer::hamming_sparse(&q_sparse, &x); + let dy = SparseContainer::hamming_sparse(&q_sparse, &y); + let dz = SparseContainer::hamming_sparse(&q_sparse, &z); + + let (best_dist, best_axis) = if dx <= dy && dx <= dz { + (dx, QueryAxis::X) + } else if dy <= dz { + (dy, QueryAxis::Y) + } else { + (dz, QueryAxis::Z) + }; + + if best_dist <= radius { + hits.push(QueryHit { + dn, + distance: best_dist, + axis: best_axis, + }); + } + } + } + + hits.sort_by_key(|h| h.distance); + hits + } + + // ======================================================================== + // CAUSAL CHAIN DISCOVERY + // ======================================================================== + + /// Find records whose X axis resonates with `record`'s Z axis. + /// These are causal successors: things this record feeds into. + pub fn causal_successors( + &self, + record: &CogRecord, + radius: u32, + ) -> Vec { + if let Ok((_x, _y, z)) = self.unpack_record(record) { + let source_dn = record.meta.words[0]; + let mut hits = Vec::new(); + + for (&dn, other) in &self.records { + if dn == source_dn { continue; } // skip self + if let Ok((ox, _oy, _oz)) = self.unpack_record(other) { + let dist = SparseContainer::hamming_sparse(&z, &ox); + if dist <= radius { + hits.push(QueryHit { + dn, + distance: dist, + axis: QueryAxis::X, + }); + } + } + } + + hits.sort_by_key(|h| h.distance); + hits + } else { + Vec::new() + } + } + + /// Find records whose Z axis resonates with `record`'s X axis. + /// These are causal predecessors: things that feed into this record. + pub fn causal_predecessors( + &self, + record: &CogRecord, + radius: u32, + ) -> Vec { + if let Ok((x, _y, _z)) = self.unpack_record(record) { + let source_dn = record.meta.words[0]; + let mut hits = Vec::new(); + + for (&dn, other) in &self.records { + if dn == source_dn { continue; } + if let Ok((_ox, _oy, oz)) = self.unpack_record(other) { + let dist = SparseContainer::hamming_sparse(&x, &oz); + if dist <= radius { + hits.push(QueryHit { + dn, + distance: dist, + axis: QueryAxis::Z, + }); + } + } + } + + hits.sort_by_key(|h| h.distance); + hits + } else { + Vec::new() + } + } + + /// Walk a causal chain forward from `start`, max `depth` hops. + /// Returns one Vec per hop level. + pub fn walk_chain_forward( + &self, + start: &CogRecord, + radius: u32, + depth: usize, + ) -> Vec> { + let mut chain = Vec::new(); + let mut current = vec![start.meta.words[0]]; // start DNs + + for _ in 0..depth { + let mut level_hits = Vec::new(); + for &dn in ¤t { + if let Some(record) = self.get(dn) { + let successors = self.causal_successors(record, radius); + level_hits.extend(successors); + } + } + if level_hits.is_empty() { break; } + current = level_hits.iter().map(|h| h.dn).collect(); + chain.push(level_hits); + } + + chain + } + + /// Compute chain coherence: product of normalized link coherences. + /// + /// coherence_per_link = 1.0 - (hamming(Z_i, X_{i+1}) / 8192.0) + /// chain_coherence = product of all link coherences + pub fn chain_coherence(&self, dns: &[u64]) -> f32 { + if dns.len() < 2 { return 1.0; } + + let mut coherence = 1.0f32; + for window in dns.windows(2) { + let a = match self.get(window[0]) { Some(r) => r, None => return 0.0 }; + let b = match self.get(window[1]) { Some(r) => r, None => return 0.0 }; + + let (_, _, z_a) = match self.unpack_record(a) { Ok(v) => v, Err(_) => return 0.0 }; + let (x_b, _, _) = match self.unpack_record(b) { Ok(v) => v, Err(_) => return 0.0 }; + + let dist = SparseContainer::hamming_sparse(&z_a, &x_b); + let link_coherence = 1.0 - (dist as f32 / 8192.0); + coherence *= link_coherence; + } + + coherence + } + + // ======================================================================== + // NARS CHAIN DEDUCTION + // ======================================================================== + + /// Deduction along a causal chain with coherence-weighted confidence. + /// + /// f_chain = f₁ × f₂ × ... × fₙ + /// c_chain = c₁ × c₂ × ... × cₙ × coherence₁₂ × coherence₂₃ × ... + pub fn chain_deduction(&self, dns: &[u64]) -> TruthValue { + if dns.is_empty() { return TruthValue::unknown(); } + if dns.len() == 1 { + return self.get(dns[0]) + .map(|r| self.read_nars(r)) + .unwrap_or(TruthValue::unknown()); + } + + let mut f_chain = 1.0f32; + let mut c_chain = 1.0f32; + + for (i, &dn) in dns.iter().enumerate() { + let record = match self.get(dn) { Some(r) => r, None => return TruthValue::unknown() }; + let nars = self.read_nars(record); + f_chain *= nars.frequency; + c_chain *= nars.confidence; + + // Multiply by coherence factor for each Z→X link + if i + 1 < dns.len() { + let next = match self.get(dns[i + 1]) { Some(r) => r, None => return TruthValue::unknown() }; + let (_, _, z) = match self.unpack_record(record) { Ok(v) => v, Err(_) => return TruthValue::unknown() }; + let (x, _, _) = match self.unpack_record(next) { Ok(v) => v, Err(_) => return TruthValue::unknown() }; + let dist = SparseContainer::hamming_sparse(&z, &x); + let link_coh = 1.0 - (dist as f32 / 8192.0); + c_chain *= link_coh; + } + } + + TruthValue::new( + f_chain.clamp(0.0, 1.0), + c_chain.clamp(0.0, 1.0), + ) + } + + // ======================================================================== + // SCENT PRE-FILTER + // ======================================================================== + + /// Filter records by scent distance before expensive Hamming scan. + pub fn scent_prefilter( + &self, + query_scent: &NibbleScent, + max_distance: u32, + ) -> Vec { + self.records.iter() + .filter_map(|(&dn, record)| { + let record_scent = self.read_scent(record); + if record_scent.distance(query_scent) <= max_distance { + Some(dn) + } else { + None + } + }) + .collect() + } + + // ======================================================================== + // INTERNAL HELPERS + // ======================================================================== + + fn unpack_record( + &self, + record: &CogRecord, + ) -> Result<(SparseContainer, SparseContainer, SparseContainer), SpoError> { + let desc = self.read_axis_descriptors(record); + unpack_axes(&record.content, &desc) + } + + fn read_axis_descriptors(&self, record: &CogRecord) -> AxisDescriptors { + AxisDescriptors::from_words(&[record.meta.words[34], record.meta.words[35]]) + } + + fn read_nars(&self, record: &CogRecord) -> TruthValue { + let freq = f32::from_bits(record.meta.words[4] as u32); + let conf = f32::from_bits(record.meta.words[5] as u32); + TruthValue::new( + freq.clamp(0.0, 1.0), + conf.clamp(0.0, 1.0), + ) + } + + fn read_scent(&self, record: &CogRecord) -> NibbleScent { + NibbleScent::from_words(&[ + record.meta.words[12], + record.meta.words[13], + record.meta.words[14], + record.meta.words[15], + record.meta.words[16], + record.meta.words[17], + ]) + } +} + +impl Default for SpoStore { + fn default() -> Self { + Self::new() + } +} diff --git a/src/graph/spo/tests.rs b/src/graph/spo/tests.rs new file mode 100644 index 0000000..8c644f6 --- /dev/null +++ b/src/graph/spo/tests.rs @@ -0,0 +1,348 @@ +//! SPO 3D — Six Ironclad Tests +//! +//! Test 1: Node round-trip (build → insert → retrieve → verify) +//! Test 2: Forward query (Jan KNOWS Ada → find Ada from Jan) +//! Test 3: Reverse query (Jan KNOWS Ada → find Jan from Ada, NO extra index) +//! Test 4: CAM content lookup (100 nodes → find by fingerprint) +//! Test 5: NARS reasoning (deduction + revision along chain) +//! Test 6: Causal chain coherence (Z→X resonance + meta convergence) + +#[cfg(test)] +mod tests { + use ladybug_contract::container::Container; + use ladybug_contract::nars::TruthValue; + + // These would be imported from the spo module once wired into the crate + use crate::graph::spo::sparse::{SparseContainer, unpack_axes, AxisDescriptors}; + use crate::graph::spo::scent::NibbleScent; + use crate::graph::spo::store::SpoStore; + use crate::graph::spo::builder::{SpoBuilder, label_fp, dn_hash}; + + // ======================================================================== + // TEST 1: NODE ROUND-TRIP + // ======================================================================== + + #[test] + fn test_1_node_roundtrip() { + let lbl_person = label_fp("Person"); + let key_name = label_fp("name"); + let val_jan = label_fp("Jan"); + let key_age = label_fp("age"); + let val_42 = label_fp("42"); + + // Build node: Jan {Person, name: "Jan", age: 42} + let record = SpoBuilder::build_node( + dn_hash("jan"), + &[&lbl_person], + &[(&key_name, &val_jan), (&key_age, &val_42)], + TruthValue::new(1.0, 0.9), + ).unwrap(); + + // Insert into store + let mut store = SpoStore::new(); + store.insert(record.clone()).unwrap(); + + // Retrieve + let retrieved = store.get(dn_hash("jan")).unwrap(); + + // Verify DN matches + assert_eq!(retrieved.meta.words[0], dn_hash("jan")); + + // Unpack and verify X axis is Hamming-close to the original + let desc = AxisDescriptors::from_words(&[ + retrieved.meta.words[34], + retrieved.meta.words[35], + ]); + let (x, y, z) = unpack_axes(&retrieved.content, &desc).unwrap(); + + // Node's Y axis should be empty (no verb) + assert_eq!(y.word_count(), 0, "Node Y axis must be empty"); + + // X and Z should be identical + assert_eq!(x, z, "Node X and Z must be identical (self-referential)"); + + // X axis should contain the bundled labels+properties + let x_dense = x.to_dense(); + assert!(!x_dense.is_zero(), "X axis must not be zero"); + + // Hamming to self should be 0 + assert_eq!( + SparseContainer::hamming_sparse(&x, &SparseContainer::from_dense(&x_dense)), + 0 + ); + } + + // ======================================================================== + // TEST 2: FORWARD QUERY + // ======================================================================== + + #[test] + fn test_2_forward_query() { + let mut store = SpoStore::new(); + + // Create entities + let jan_fp = label_fp("Jan"); + let ada_fp = label_fp("Ada"); + let knows_fp = label_fp("KNOWS"); + + let jan_node = SpoBuilder::build_node( + dn_hash("jan"), &[&label_fp("Person")], + &[(&label_fp("name"), &jan_fp)], + TruthValue::new(1.0, 0.9), + ).unwrap(); + + let ada_node = SpoBuilder::build_node( + dn_hash("ada"), &[&label_fp("Person")], + &[(&label_fp("name"), &ada_fp)], + TruthValue::new(1.0, 0.9), + ).unwrap(); + + // Create edge: Jan KNOWS Ada + let edge = SpoBuilder::build_edge( + dn_hash("jan_knows_ada"), + &jan_fp, + &knows_fp, + &ada_fp, + TruthValue::new(0.8, 0.9), + ).unwrap(); + + store.insert(jan_node).unwrap(); + store.insert(ada_node).unwrap(); + store.insert(edge).unwrap(); + + // Forward query: "What does Jan know?" + let hits = store.query_forward(&jan_fp, &knows_fp, 4000); + assert!( + !hits.is_empty(), + "Forward query must find at least one result" + ); + + // The edge record should be in the results + let found_edge = hits.iter().any(|h| h.dn == dn_hash("jan_knows_ada")); + assert!(found_edge, "Forward query must find the edge record"); + } + + // ======================================================================== + // TEST 3: REVERSE QUERY (NO EXTRA INDEX) + // ======================================================================== + + #[test] + fn test_3_reverse_query() { + let mut store = SpoStore::new(); + + let jan_fp = label_fp("Jan"); + let ada_fp = label_fp("Ada"); + let knows_fp = label_fp("KNOWS"); + + let jan_node = SpoBuilder::build_node( + dn_hash("jan"), &[&label_fp("Person")], + &[(&label_fp("name"), &jan_fp)], + TruthValue::new(1.0, 0.9), + ).unwrap(); + + let ada_node = SpoBuilder::build_node( + dn_hash("ada"), &[&label_fp("Person")], + &[(&label_fp("name"), &ada_fp)], + TruthValue::new(1.0, 0.9), + ).unwrap(); + + let edge = SpoBuilder::build_edge( + dn_hash("jan_knows_ada"), + &jan_fp, + &knows_fp, + &ada_fp, + TruthValue::new(0.8, 0.9), + ).unwrap(); + + store.insert(jan_node).unwrap(); + store.insert(ada_node).unwrap(); + store.insert(edge).unwrap(); + + // Reverse query: "Who knows Ada?" — scanning Z+Y, no reverse index! + let hits = store.query_reverse(&ada_fp, &knows_fp, 4000); + assert!( + !hits.is_empty(), + "Reverse query must find results WITHOUT any extra index" + ); + + let found_edge = hits.iter().any(|h| h.dn == dn_hash("jan_knows_ada")); + assert!(found_edge, "Reverse query must find the edge record"); + } + + // ======================================================================== + // TEST 4: CAM CONTENT LOOKUP + // ======================================================================== + + #[test] + fn test_4_cam_content_lookup() { + let mut store = SpoStore::new(); + + // Insert 100 nodes with different names + for i in 0..100 { + let name = format!("entity_{}", i); + let name_fp = label_fp(&name); + let node = SpoBuilder::build_node( + dn_hash(&name), + &[&label_fp("Thing")], + &[(&label_fp("name"), &name_fp)], + TruthValue::new(1.0, 0.9), + ).unwrap(); + store.insert(node).unwrap(); + } + + assert_eq!(store.len(), 100); + + // Query for a specific entity by its content fingerprint + let target_fp = label_fp("entity_42"); + let hits = store.query_content(&target_fp, 3500); + + assert!( + !hits.is_empty(), + "Content lookup must find results in 100-node store" + ); + + // The closest hit should be entity_42 + let best = &hits[0]; + assert_eq!( + best.dn, + dn_hash("entity_42"), + "Best content match should be entity_42" + ); + } + + // ======================================================================== + // TEST 5: NARS REASONING + // ======================================================================== + + #[test] + fn test_5_nars_reasoning() { + let mut store = SpoStore::new(); + + let jan_fp = label_fp("Jan"); + let rust_fp = label_fp("Rust"); + let cam_fp = label_fp("CAM"); + let knows_fp = label_fp("KNOWS"); + let helps_fp = label_fp("HELPS"); + + // "Jan knows Rust" <0.8, 0.9> + let e1 = SpoBuilder::build_edge( + dn_hash("e1"), &jan_fp, &knows_fp, &rust_fp, + TruthValue::new(0.8, 0.9), + ).unwrap(); + + // "Rust helps CAM" <0.7, 0.8> + let e2 = SpoBuilder::build_edge( + dn_hash("e2"), &rust_fp, &helps_fp, &cam_fp, + TruthValue::new(0.7, 0.8), + ).unwrap(); + + store.insert(e1).unwrap(); + store.insert(e2).unwrap(); + + // Chain deduction: Jan → Rust → CAM + let chain_tv = store.chain_deduction(&[dn_hash("e1"), dn_hash("e2")]); + + // Frequency: 0.8 × 0.7 = 0.56 + assert!( + (chain_tv.frequency - 0.56).abs() < 0.01, + "Chain frequency should be ~0.56, got {}", + chain_tv.frequency + ); + + // Confidence: 0.9 × 0.8 × coherence_factor < 0.72 + assert!( + chain_tv.confidence <= 0.72, + "Chain confidence should be ≤ 0.72, got {}", + chain_tv.confidence + ); + assert!( + chain_tv.confidence > 0.0, + "Chain confidence should be positive" + ); + } + + // ======================================================================== + // TEST 6: CAUSAL CHAIN COHERENCE + // ======================================================================== + + #[test] + fn test_6_causal_chain_coherence() { + let mut store = SpoStore::new(); + + // Create a chain where Record 1's Z resonates with Record 2's X. + // We use the SAME fingerprint for the shared entity (Rust) + // so the Z→X link should have low Hamming distance. + + let jan_fp = label_fp("Jan"); + let rust_fp = label_fp("Rust"); + let cam_fp = label_fp("CAM"); + let knows_fp = label_fp("KNOWS"); + let enables_fp = label_fp("ENABLES"); + + let e1 = SpoBuilder::build_edge( + dn_hash("chain_e1"), &jan_fp, &knows_fp, &rust_fp, + TruthValue::new(0.8, 0.9), + ).unwrap(); + + let e2 = SpoBuilder::build_edge( + dn_hash("chain_e2"), &rust_fp, &enables_fp, &cam_fp, + TruthValue::new(0.7, 0.8), + ).unwrap(); + + store.insert(e1.clone()).unwrap(); + store.insert(e2.clone()).unwrap(); + + // Test chain coherence + let coherence = store.chain_coherence(&[dn_hash("chain_e1"), dn_hash("chain_e2")]); + assert!( + coherence > 0.0, + "Chain coherence should be positive, got {}", + coherence + ); + + // Test causal successor discovery + let successors = store.causal_successors(&e1, 4096); + // At minimum: some records' X axes should be within range + // (The Z of e1 is BIND(rust, permute(knows,2)) which may not exactly + // match the X of e2 which is BIND(rust, permute(enables,1)), + // but both contain rust_fp so they should be closer than random) + + // Test meta-awareness construction + let meta = SpoBuilder::build_meta_awareness( + dn_hash("meta_chain"), + &[&e1, &e2], + coherence, + ).unwrap(); + + // Verify meta-awareness flag + let desc = AxisDescriptors::from_words(&[ + meta.meta.words[34], + meta.meta.words[35], + ]); + assert!(desc.is_meta_awareness(), "Meta record must have awareness flag"); + + // Verify meta-record has populated axes + let (mx, my, mz) = unpack_axes(&meta.content, &desc).unwrap(); + assert!(mx.word_count() > 0, "Meta X should be populated"); + assert!(my.word_count() > 0, "Meta Y should be populated"); + assert!(mz.word_count() > 0, "Meta Z should be populated"); + + // The meta-record's Z should be a BUNDLE of the chain's Z axes, + // which means it should be somewhat close to the original content. + // This is the convergence test — the tsunami prediction. + let mz_dense = mz.to_dense(); + let original_z_dense = { + let d = AxisDescriptors::from_words(&[e1.meta.words[34], e1.meta.words[35]]); + unpack_axes(&e1.content, &d).unwrap().2.to_dense() + }; + + let convergence_dist = mz_dense.hamming(&original_z_dense); + // Meta-bundled Z won't be identical to e1's Z (it's a bundle of BOTH) + // but it should be closer than random (~4096) + assert!( + convergence_dist < 4096, + "Meta Z should converge toward original content, got distance {}", + convergence_dist + ); + } +}