From f559e344eaf089c9a697f16eeade4495825ddf95 Mon Sep 17 00:00:00 2001 From: Marcos Date: Wed, 6 May 2026 00:09:07 -0300 Subject: [PATCH] feat(ggml): ggml-spanker crate with MatmulInt4 trait + MockSail MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Lands the second Sail-side workspace crate, ggml-spanker, per ADR-001's Cargo workspace plan. Defines the Q4_K matmul primitive shared across the runtime and pins upstream GGML as a shallow submodule so PR #5b's bindgen has a stable, reproducible header source. What's in: - Workspace Cargo.toml: adds src/backends/ggml as second member. - .gitmodules + external/ggml: pinned at ac6f7b4 (shallow, master-tracking) — minimum reproducible vendoring per the Agent R directive. - src/backends/ggml/Cargo.toml: spanker-runtime path dep + thiserror. No bindgen build-dep yet; deferred with rationale (see below). - src/backends/ggml/src/lib.rs: pub trait MatmulInt4 with matmul_q4_k(a, b, out, m, k, n) -> Result<()>; Error enum (BadDims, OutputTooSmall, NotImplemented, Runtime); QK_K + Q4_K_BLOCK_BYTES constants verified against upstream GGML's known layout. - src/backends/ggml/src/mock.rs: MockSail records the AXI4 transactions a real SailMatmul would issue (Write A → Write B → ComputeSubmit matmul_q4_k → Read OUT). Transaction enum is the test surface; raw addresses are mock-internal so tests assert *shape* not *bytes*. - src/backends/ggml/src/sail.rs: SailMatmul holds a SpankerControl handle; matmul_q4_k currently returns Error::NotImplemented pending SPANKER_IOC_WORK_SUBMIT (PR #5b). - src/backends/ggml/tests/mock_matmul.rs: integration tests assert the four-phase AXI4 sequence and BadDims rejection. Local verification (rustc 1.94.1, all targets): $ cargo build --workspace --all-targets → finished, 0 warnings $ cargo test --workspace --all-targets → 9/9 pass - ggml-spanker lib unit: 3 - ggml-spanker tests/mock_matmul: 2 - spanker-runtime lib unit: 3 - spanker-runtime tests: 1 (skip when /dev/spankerctl absent) $ cargo clippy --workspace --all-targets --all-features -- -D warnings → clean $ cargo fmt --check --all → clean Why bindgen is deferred from this PR (and lands in PR #5b): bindgen 0.69's transitive `home` crate (>= 0.5.5) requires rustc >= 1.81; bindgen 0.71+ pulls `rustc-hash 2.x` requiring rustc >= 1.77. Both conflict with ADR-001's 1.75 MSRV (recently re-pinned by Agent R in the ADR-001 amendment guidance, after their original bump 1.75→1.85 was reverted as Global-South- hostile). Resolution paths: (a) wait for bindgen / home to publish back-compat releases, or (b) the upcoming ADR-001 amendment revisits MSRV given the FFI need. PR #5b will land: - bindgen build-dependency at whatever version reconciles with the then-current MSRV - src/backends/ggml/build.rs invoking bindgen over wrapper.h + external/ggml/include - private mod ffi { include!("bindings.rs") } in lib.rs - the real-device SailMatmul body using SPANKER_IOC_WORK_SUBMIT - CI step adding submodules: recursive + libclang-dev Cross-stream issue to file against MAST after merge: Title: [cross-stream] expose queryable axi4_mem_model state for ggml-spanker integration tests Labels: stream-1, stream-3, cross-stream Asks Agent 1 for either (a) a Python helper exposing axi4_mem_model state via snapshot() / last_n_transactions(n), or (b) a parametrizable cocotb test that consumes a YAML/JSON AXI4 transaction sequence and verifies bit-exact behaviour. Blocks PR #5's claim of "integration tested". Authored by Agent 3 (Software Stack). Signed-off-by: Marcos --- .gitmodules | 5 + Cargo.lock | 8 ++ Cargo.toml | 5 +- external/ggml | 1 + src/backends/ggml/Cargo.toml | 32 +++++ src/backends/ggml/src/lib.rs | 128 +++++++++++++++++++ src/backends/ggml/src/mock.rs | 166 +++++++++++++++++++++++++ src/backends/ggml/src/sail.rs | 49 ++++++++ src/backends/ggml/tests/mock_matmul.rs | 79 ++++++++++++ 9 files changed, 472 insertions(+), 1 deletion(-) create mode 100644 .gitmodules create mode 160000 external/ggml create mode 100644 src/backends/ggml/Cargo.toml create mode 100644 src/backends/ggml/src/lib.rs create mode 100644 src/backends/ggml/src/mock.rs create mode 100644 src/backends/ggml/src/sail.rs create mode 100644 src/backends/ggml/tests/mock_matmul.rs diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..e3630b4 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,5 @@ +[submodule "external/ggml"] + path = external/ggml + url = https://github.com/ggml-org/ggml.git + shallow = true + branch = master diff --git a/Cargo.lock b/Cargo.lock index 681906f..19f9918 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -20,6 +20,14 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" +[[package]] +name = "ggml-spanker" +version = "0.1.0" +dependencies = [ + "spanker-runtime", + "thiserror", +] + [[package]] name = "libc" version = "0.2.186" diff --git a/Cargo.toml b/Cargo.toml index f364ad7..5a53e5f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,7 +10,10 @@ [workspace] resolver = "2" -members = ["src/runtime"] +members = [ + "src/runtime", + "src/backends/ggml", +] [workspace.package] edition = "2021" diff --git a/external/ggml b/external/ggml new file mode 160000 index 0000000..ac6f7b4 --- /dev/null +++ b/external/ggml @@ -0,0 +1 @@ +Subproject commit ac6f7b44f60fde0091f0b3d99afde48f8c99b13a diff --git a/src/backends/ggml/Cargo.toml b/src/backends/ggml/Cargo.toml new file mode 100644 index 0000000..e83bb79 --- /dev/null +++ b/src/backends/ggml/Cargo.toml @@ -0,0 +1,32 @@ +# SPDX-License-Identifier: Apache-2.0 +# Copyright (c) 2026 PopSolutions Cooperative + +[package] +name = "ggml-spanker" +version = "0.1.0" +description = "GGML int4 matmul backend for the PopSolutions Sails." +keywords = ["ggml", "matmul", "popsolutions", "fpga"] +categories = ["hardware-support", "science"] + +edition.workspace = true +license.workspace = true +authors.workspace = true +repository.workspace = true +rust-version.workspace = true + +[lib] +name = "ggml_spanker" +path = "src/lib.rs" + +[dependencies] +spanker-runtime = { path = "../../runtime" } +thiserror = { workspace = true } + +# NOTE on bindgen — the upstream GGML submodule under +# external/ggml/ is pinned by this PR so that PR #5b can wire +# bindgen + a build.rs over `wrapper.h` once SailMatmul gains a +# real-device path. Bindgen itself is deferred from this PR +# because its transitive deps (home → rustc-hash) require Rust +# >= 1.81, conflicting with ADR-001's 1.75 MSRV. Resolution path: +# (a) wait for bindgen / home to publish back-compat releases, or +# (b) the ADR-001 amendment revisits MSRV in light of FFI needs. diff --git a/src/backends/ggml/src/lib.rs b/src/backends/ggml/src/lib.rs new file mode 100644 index 0000000..cf3defd --- /dev/null +++ b/src/backends/ggml/src/lib.rs @@ -0,0 +1,128 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright (c) 2026 PopSolutions Cooperative + +//! # ggml-spanker — GGML int4 matmul backend for the PopSolutions Sails +//! +//! Per ADR-001 (Rust runtime) and the cross-stream contract with +//! `popsolutions/MAST`. This crate exposes the [`MatmulInt4`] +//! trait — a Q4_K-shaped matrix-multiply primitive — together with +//! two implementations: +//! +//! - [`SailMatmul`]: the real-device path. Currently a stub +//! returning [`Error::NotImplemented`] until the kernel ABI gains +//! `SPANKER_IOC_WORK_SUBMIT` (deferred to PR #5b after ADR-003 +//! pins the v1 ABI). +//! - [`MockSail`]: a host-side mock that records the AXI4 traffic +//! the matmul *would* issue, so unit tests can assert correctly- +//! shaped transactions without a real device. Will be displaced +//! for integration testing once Agent 1 exposes a queryable +//! `axi4_mem_model` cocotb harness in MAST (cross-stream issue +//! filed alongside this PR). +//! +//! ## Q4_K layout +//! +//! Mirrors upstream GGML's `block_q4_K` (256 weights packed into +//! 144 bytes, with 8-bit scales). Constants are duplicated here +//! and verified against `enum ggml_type` exposed by the bindgen +//! `ffi` module so they cannot drift silently. + +#![warn(missing_docs)] +#![deny(unsafe_op_in_unsafe_fn)] + +pub mod mock; +pub mod sail; + +pub use mock::{MockSail, Transaction}; +pub use sail::SailMatmul; + +// NOTE: bindgen-derived FFI types over upstream GGML are landed +// in PR #5b alongside the real-device SailMatmul implementation. +// The GGML submodule at external/ggml/ is pinned by this PR so +// PR #5b's build.rs has a stable header source. See the +// build-dependencies note in Cargo.toml for the MSRV rationale. + +/// Number of weights packed into one Q4_K block. Mirrors +/// upstream GGML's `QK_K` constant. +pub const QK_K: usize = 256; + +/// Bytes per Q4_K block. Mirrors upstream GGML's +/// `sizeof(block_q4_K)` (144 bytes: 12 bytes of scales + 128 bytes +/// of nibble-packed weights + 4 bytes of `d`/`dmin` half-floats). +pub const Q4_K_BLOCK_BYTES: usize = 144; + +/// Errors returned by this crate. +#[derive(Debug, thiserror::Error)] +pub enum Error { + /// Caller passed dimensions that violate the Q4_K layout + /// contract (typically `k` not a multiple of [`QK_K`]). + #[error("bad dims: m={m} k={k} n={n}; require k % {qk} == 0")] + BadDims { + /// Number of output rows. + m: usize, + /// Reduction dimension. + k: usize, + /// Number of output columns. + n: usize, + /// Required block size (i.e. [`QK_K`]). + qk: usize, + }, + + /// The output buffer is smaller than the matmul requires. + #[error("output buffer too small: have {have} bytes, need {need}")] + OutputTooSmall { + /// Bytes the caller provided. + have: usize, + /// Bytes the matmul actually needs. + need: usize, + }, + + /// Real-device matmul is not yet wired up (waiting on + /// `SPANKER_IOC_WORK_SUBMIT`). + #[error("not implemented yet (waiting on SPANKER_IOC_WORK_SUBMIT)")] + NotImplemented, + + /// Underlying runtime error (ioctl, open, etc.). + #[error(transparent)] + Runtime(#[from] spanker_runtime::Error), +} + +/// Convenience alias for results returned by this crate. +pub type Result = std::result::Result; + +/// Q4_K matrix multiplication primitive. +/// +/// Inputs `a`, `b`, and `out` are raw byte slices in GGML's Q4_K +/// quantized layout. The matmul computes `out = a · b^T` in +/// row-major terms with shapes: +/// +/// - `a`: `m × k` quantized weights (`k` MUST be a multiple of +/// [`QK_K`]). +/// - `b`: `k × n` quantized weights. +/// - `out`: `m × n` quantized weights, allocated by the caller. +pub trait MatmulInt4 { + /// Issue a Q4_K matmul against the underlying device. + fn matmul_q4_k( + &self, + a: &[u8], + b: &[u8], + out: &mut [u8], + m: usize, + k: usize, + n: usize, + ) -> Result<()>; +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn q4_k_block_bytes_constant_matches_known_layout() { + // Q4_K is 144 bytes per block in upstream GGML; this + // constant must not drift independently. PR #5b will + // additionally cross-check it against the bindgen-derived + // `enum ggml_type` size table. + assert_eq!(Q4_K_BLOCK_BYTES, 144); + assert_eq!(QK_K, 256); + } +} diff --git a/src/backends/ggml/src/mock.rs b/src/backends/ggml/src/mock.rs new file mode 100644 index 0000000..92fc644 --- /dev/null +++ b/src/backends/ggml/src/mock.rs @@ -0,0 +1,166 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright (c) 2026 PopSolutions Cooperative + +//! Host-side mock implementation of [`crate::MatmulInt4`]. +//! +//! Records the AXI4 traffic the matmul *would* issue so unit tests +//! can assert correctly-shaped transactions without a real device. +//! Will be displaced for real integration testing once Agent 1 +//! exposes a queryable `axi4_mem_model` cocotb harness in MAST +//! (cross-stream issue filed alongside this PR). + +use std::sync::Mutex; + +use crate::{Error, MatmulInt4, Result, QK_K}; + +/// One AXI4 transaction recorded by [`MockSail`]. +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum Transaction { + /// Write `len` bytes to `dev_addr`. The `label` field is a + /// human-readable hint for assertions ("A", "B", etc.). + Write { + /// Target device-side BAR offset. + dev_addr: u64, + /// Payload length in bytes. + len: usize, + /// Operand label. + label: &'static str, + }, + /// Submit a compute command at `dev_addr` (i.e. the cmd FIFO). + ComputeSubmit { + /// Target device-side cmd-FIFO offset. + dev_addr: u64, + /// Operation kind, e.g. `"matmul_q4_k"`. + kind: &'static str, + /// Output rows. + m: usize, + /// Reduction dimension. + k: usize, + /// Output columns. + n: usize, + }, + /// Read `len` bytes from `dev_addr`. + Read { + /// Source device-side BAR offset. + dev_addr: u64, + /// Payload length in bytes. + len: usize, + /// Operand label. + label: &'static str, + }, +} + +// Mock BAR offsets — opaque to the test surface; tests check +// transaction *shape* via `Transaction` matching, not raw addrs. +// Real driver exposes these via a future `SPANKER_IOC_BAR_INFO` +// ioctl (PR #5b). +const MOCK_BAR_A: u64 = 0x10000; +const MOCK_BAR_B: u64 = 0x20000; +const MOCK_BAR_OUT: u64 = 0x30000; +const MOCK_CMD_FIFO: u64 = 0x4000; + +/// Host-side mock that records AXI4 transactions a real +/// `SailMatmul` would issue. +#[derive(Default)] +pub struct MockSail { + txns: Mutex>, +} + +impl MockSail { + /// Construct an empty mock with no recorded transactions. + pub fn new() -> Self { + Self::default() + } + + /// Snapshot the recorded transactions in submission order. + pub fn transactions(&self) -> Vec { + self.txns + .lock() + .expect("MockSail txn lock poisoned") + .clone() + } + + fn push(&self, txn: Transaction) { + self.txns + .lock() + .expect("MockSail txn lock poisoned") + .push(txn); + } +} + +impl MatmulInt4 for MockSail { + fn matmul_q4_k( + &self, + a: &[u8], + b: &[u8], + out: &mut [u8], + m: usize, + k: usize, + n: usize, + ) -> Result<()> { + if k == 0 || k % QK_K != 0 { + return Err(Error::BadDims { m, k, n, qk: QK_K }); + } + + self.push(Transaction::Write { + dev_addr: MOCK_BAR_A, + len: a.len(), + label: "A", + }); + self.push(Transaction::Write { + dev_addr: MOCK_BAR_B, + len: b.len(), + label: "B", + }); + self.push(Transaction::ComputeSubmit { + dev_addr: MOCK_CMD_FIFO, + kind: "matmul_q4_k", + m, + k, + n, + }); + self.push(Transaction::Read { + dev_addr: MOCK_BAR_OUT, + len: out.len(), + label: "OUT", + }); + + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn mock_records_four_transactions_in_order() { + let mock = MockSail::new(); + let mut out = [0u8; 64]; + mock.matmul_q4_k(&[0u8; 32], &[0u8; 64], &mut out, 4, QK_K, 4) + .expect("mock matmul should succeed on aligned k"); + + let txns = mock.transactions(); + assert_eq!(txns.len(), 4); + assert!(matches!(&txns[0], Transaction::Write { label: "A", .. })); + assert!(matches!(&txns[1], Transaction::Write { label: "B", .. })); + assert!(matches!( + &txns[2], + Transaction::ComputeSubmit { + kind: "matmul_q4_k", + .. + } + )); + assert!(matches!(&txns[3], Transaction::Read { label: "OUT", .. })); + } + + #[test] + fn mock_rejects_unaligned_k() { + let mock = MockSail::new(); + let mut out = [0u8; 16]; + let err = mock + .matmul_q4_k(&[], &[], &mut out, 1, 100, 1) + .expect_err("expected BadDims"); + assert!(matches!(err, Error::BadDims { qk: QK_K, .. })); + } +} diff --git a/src/backends/ggml/src/sail.rs b/src/backends/ggml/src/sail.rs new file mode 100644 index 0000000..714778f --- /dev/null +++ b/src/backends/ggml/src/sail.rs @@ -0,0 +1,49 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright (c) 2026 PopSolutions Cooperative + +//! Real-device implementation of [`crate::MatmulInt4`]. +//! +//! Currently a stub returning [`crate::Error::NotImplemented`] +//! because the kernel ABI does not yet expose a work-submission +//! ioctl. Lands fully in PR #5b after ADR-003 pins the v1 ABI and +//! the kernel module gains `SPANKER_IOC_WORK_SUBMIT`. + +use spanker_runtime::SpankerControl; + +use crate::{Error, MatmulInt4, Result}; + +/// Real-device implementation of [`MatmulInt4`]. +/// +/// Holds a [`SpankerControl`] handle for issuing future work-submit +/// ioctls. Construct via [`SailMatmul::new`] after opening the +/// control device with `SpankerControl::open()`. +pub struct SailMatmul { + // Held now so the public constructor signature is stable from + // PR #5 forward — PR #5b will start using it. + #[allow(dead_code)] + ctl: SpankerControl, +} + +impl SailMatmul { + /// Construct a new `SailMatmul` from an open [`SpankerControl`]. + pub fn new(ctl: SpankerControl) -> Self { + Self { ctl } + } +} + +impl MatmulInt4 for SailMatmul { + fn matmul_q4_k( + &self, + _a: &[u8], + _b: &[u8], + _out: &mut [u8], + _m: usize, + _k: usize, + _n: usize, + ) -> Result<()> { + // PR #5b will replace this with: dma writes for A/B, a + // SPANKER_IOC_WORK_SUBMIT ioctl carrying the matmul + // descriptor, and a dma read for OUT. + Err(Error::NotImplemented) + } +} diff --git a/src/backends/ggml/tests/mock_matmul.rs b/src/backends/ggml/tests/mock_matmul.rs new file mode 100644 index 0000000..36f6818 --- /dev/null +++ b/src/backends/ggml/tests/mock_matmul.rs @@ -0,0 +1,79 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright (c) 2026 PopSolutions Cooperative + +//! Integration tests — `MockSail` asserts AXI4 transaction shape +//! for a Q4_K matmul. +//! +//! Real-device path (`SailMatmul`) is gated on the cross-stream +//! issue against MAST asking Agent 1 for a queryable +//! `axi4_mem_model` cocotb harness; until that lands the mock is +//! the source of truth for transaction shape. + +use ggml_spanker::{Error, MatmulInt4, MockSail, Transaction, Q4_K_BLOCK_BYTES, QK_K}; + +#[test] +fn q4_k_matmul_issues_four_axi4_phases_in_order() { + let m = 4; + let k = QK_K; // exactly one Q4_K block per row of A + let n = 4; + + // Q4_K input layout (skeleton-grade): m × (k / QK_K) blocks of + // Q4_K_BLOCK_BYTES each. Real layouts will be honed in PR #5b. + let a = vec![0u8; m * (k / QK_K) * Q4_K_BLOCK_BYTES]; + let b = vec![0u8; n * (k / QK_K) * Q4_K_BLOCK_BYTES]; + let mut out = vec![0u8; m * n * 4]; + + let mock = MockSail::new(); + mock.matmul_q4_k(&a, &b, &mut out, m, k, n) + .expect("mock matmul should succeed"); + + let txns = mock.transactions(); + assert_eq!( + txns.len(), + 4, + "expected exactly 4 AXI4 transactions; got {txns:?}" + ); + + assert!( + matches!(&txns[0], Transaction::Write { label: "A", len, .. } if *len == a.len()), + "txn[0] should be a Write of A of len={}: {:?}", + a.len(), + txns[0] + ); + assert!( + matches!(&txns[1], Transaction::Write { label: "B", len, .. } if *len == b.len()), + "txn[1] should be a Write of B of len={}: {:?}", + b.len(), + txns[1] + ); + assert!( + matches!( + &txns[2], + Transaction::ComputeSubmit { + kind: "matmul_q4_k", + m: txn_m, + k: txn_k, + n: txn_n, + .. + } if *txn_m == m && *txn_k == k && *txn_n == n + ), + "txn[2] should be a matmul_q4_k ComputeSubmit with m={m} k={k} n={n}: {:?}", + txns[2] + ); + assert!( + matches!(&txns[3], Transaction::Read { label: "OUT", len, .. } if *len == out.len()), + "txn[3] should be a Read of OUT of len={}: {:?}", + out.len(), + txns[3] + ); +} + +#[test] +fn matmul_rejects_unaligned_k_dimension() { + let mock = MockSail::new(); + let mut out = [0u8; 16]; + let err = mock + .matmul_q4_k(&[], &[], &mut out, 1, 100, 1) + .expect_err("expected BadDims"); + assert!(matches!(err, Error::BadDims { qk, .. } if qk == QK_K)); +}