Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
[submodule "external/ggml"]
path = external/ggml
url = https://github.com/ggml-org/ggml.git
shallow = true
branch = master
8 changes: 8 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 4 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,10 @@

[workspace]
resolver = "2"
members = ["src/runtime"]
members = [
"src/runtime",
"src/backends/ggml",
]

[workspace.package]
edition = "2021"
Expand Down
1 change: 1 addition & 0 deletions external/ggml
Submodule ggml added at ac6f7b
32 changes: 32 additions & 0 deletions src/backends/ggml/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# SPDX-License-Identifier: Apache-2.0
# Copyright (c) 2026 PopSolutions Cooperative

[package]
name = "ggml-spanker"
version = "0.1.0"
description = "GGML int4 matmul backend for the PopSolutions Sails."
keywords = ["ggml", "matmul", "popsolutions", "fpga"]
categories = ["hardware-support", "science"]

edition.workspace = true
license.workspace = true
authors.workspace = true
repository.workspace = true
rust-version.workspace = true

[lib]
name = "ggml_spanker"
path = "src/lib.rs"

[dependencies]
spanker-runtime = { path = "../../runtime" }
thiserror = { workspace = true }

# NOTE on bindgen — the upstream GGML submodule under
# external/ggml/ is pinned by this PR so that PR #5b can wire
# bindgen + a build.rs over `wrapper.h` once SailMatmul gains a
# real-device path. Bindgen itself is deferred from this PR
# because its transitive deps (home → rustc-hash) require Rust
# >= 1.81, conflicting with ADR-001's 1.75 MSRV. Resolution path:
# (a) wait for bindgen / home to publish back-compat releases, or
# (b) the ADR-001 amendment revisits MSRV in light of FFI needs.
128 changes: 128 additions & 0 deletions src/backends/ggml/src/lib.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
// SPDX-License-Identifier: Apache-2.0
// Copyright (c) 2026 PopSolutions Cooperative

//! # ggml-spanker — GGML int4 matmul backend for the PopSolutions Sails
//!
//! Per ADR-001 (Rust runtime) and the cross-stream contract with
//! `popsolutions/MAST`. This crate exposes the [`MatmulInt4`]
//! trait — a Q4_K-shaped matrix-multiply primitive — together with
//! two implementations:
//!
//! - [`SailMatmul`]: the real-device path. Currently a stub
//! returning [`Error::NotImplemented`] until the kernel ABI gains
//! `SPANKER_IOC_WORK_SUBMIT` (deferred to PR #5b after ADR-003
//! pins the v1 ABI).
//! - [`MockSail`]: a host-side mock that records the AXI4 traffic
//! the matmul *would* issue, so unit tests can assert correctly-
//! shaped transactions without a real device. Will be displaced
//! for integration testing once Agent 1 exposes a queryable
//! `axi4_mem_model` cocotb harness in MAST (cross-stream issue
//! filed alongside this PR).
//!
//! ## Q4_K layout
//!
//! Mirrors upstream GGML's `block_q4_K` (256 weights packed into
//! 144 bytes, with 8-bit scales). Constants are duplicated here
//! and verified against `enum ggml_type` exposed by the bindgen
//! `ffi` module so they cannot drift silently.

#![warn(missing_docs)]
#![deny(unsafe_op_in_unsafe_fn)]

pub mod mock;
pub mod sail;

pub use mock::{MockSail, Transaction};
pub use sail::SailMatmul;

// NOTE: bindgen-derived FFI types over upstream GGML are landed
// in PR #5b alongside the real-device SailMatmul implementation.
// The GGML submodule at external/ggml/ is pinned by this PR so
// PR #5b's build.rs has a stable header source. See the
// build-dependencies note in Cargo.toml for the MSRV rationale.

/// Number of weights packed into one Q4_K block. Mirrors
/// upstream GGML's `QK_K` constant.
pub const QK_K: usize = 256;

/// Bytes per Q4_K block. Mirrors upstream GGML's
/// `sizeof(block_q4_K)` (144 bytes: 12 bytes of scales + 128 bytes
/// of nibble-packed weights + 4 bytes of `d`/`dmin` half-floats).
pub const Q4_K_BLOCK_BYTES: usize = 144;

/// Errors returned by this crate.
#[derive(Debug, thiserror::Error)]
pub enum Error {
/// Caller passed dimensions that violate the Q4_K layout
/// contract (typically `k` not a multiple of [`QK_K`]).
#[error("bad dims: m={m} k={k} n={n}; require k % {qk} == 0")]
BadDims {
/// Number of output rows.
m: usize,
/// Reduction dimension.
k: usize,
/// Number of output columns.
n: usize,
/// Required block size (i.e. [`QK_K`]).
qk: usize,
},

/// The output buffer is smaller than the matmul requires.
#[error("output buffer too small: have {have} bytes, need {need}")]
OutputTooSmall {
/// Bytes the caller provided.
have: usize,
/// Bytes the matmul actually needs.
need: usize,
},

/// Real-device matmul is not yet wired up (waiting on
/// `SPANKER_IOC_WORK_SUBMIT`).
#[error("not implemented yet (waiting on SPANKER_IOC_WORK_SUBMIT)")]
NotImplemented,

/// Underlying runtime error (ioctl, open, etc.).
#[error(transparent)]
Runtime(#[from] spanker_runtime::Error),
}

/// Convenience alias for results returned by this crate.
pub type Result<T> = std::result::Result<T, Error>;

/// Q4_K matrix multiplication primitive.
///
/// Inputs `a`, `b`, and `out` are raw byte slices in GGML's Q4_K
/// quantized layout. The matmul computes `out = a · b^T` in
/// row-major terms with shapes:
///
/// - `a`: `m × k` quantized weights (`k` MUST be a multiple of
/// [`QK_K`]).
/// - `b`: `k × n` quantized weights.
/// - `out`: `m × n` quantized weights, allocated by the caller.
pub trait MatmulInt4 {
/// Issue a Q4_K matmul against the underlying device.
fn matmul_q4_k(
&self,
a: &[u8],
b: &[u8],
out: &mut [u8],
m: usize,
k: usize,
n: usize,
) -> Result<()>;
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn q4_k_block_bytes_constant_matches_known_layout() {
// Q4_K is 144 bytes per block in upstream GGML; this
// constant must not drift independently. PR #5b will
// additionally cross-check it against the bindgen-derived
// `enum ggml_type` size table.
assert_eq!(Q4_K_BLOCK_BYTES, 144);
assert_eq!(QK_K, 256);
}
}
166 changes: 166 additions & 0 deletions src/backends/ggml/src/mock.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
// SPDX-License-Identifier: Apache-2.0
// Copyright (c) 2026 PopSolutions Cooperative

//! Host-side mock implementation of [`crate::MatmulInt4`].
//!
//! Records the AXI4 traffic the matmul *would* issue so unit tests
//! can assert correctly-shaped transactions without a real device.
//! Will be displaced for real integration testing once Agent 1
//! exposes a queryable `axi4_mem_model` cocotb harness in MAST
//! (cross-stream issue filed alongside this PR).

use std::sync::Mutex;

use crate::{Error, MatmulInt4, Result, QK_K};

/// One AXI4 transaction recorded by [`MockSail`].
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum Transaction {
/// Write `len` bytes to `dev_addr`. The `label` field is a
/// human-readable hint for assertions ("A", "B", etc.).
Write {
/// Target device-side BAR offset.
dev_addr: u64,
/// Payload length in bytes.
len: usize,
/// Operand label.
label: &'static str,
},
/// Submit a compute command at `dev_addr` (i.e. the cmd FIFO).
ComputeSubmit {
/// Target device-side cmd-FIFO offset.
dev_addr: u64,
/// Operation kind, e.g. `"matmul_q4_k"`.
kind: &'static str,
/// Output rows.
m: usize,
/// Reduction dimension.
k: usize,
/// Output columns.
n: usize,
},
/// Read `len` bytes from `dev_addr`.
Read {
/// Source device-side BAR offset.
dev_addr: u64,
/// Payload length in bytes.
len: usize,
/// Operand label.
label: &'static str,
},
}

// Mock BAR offsets — opaque to the test surface; tests check
// transaction *shape* via `Transaction` matching, not raw addrs.
// Real driver exposes these via a future `SPANKER_IOC_BAR_INFO`
// ioctl (PR #5b).
const MOCK_BAR_A: u64 = 0x10000;
const MOCK_BAR_B: u64 = 0x20000;
const MOCK_BAR_OUT: u64 = 0x30000;
const MOCK_CMD_FIFO: u64 = 0x4000;

/// Host-side mock that records AXI4 transactions a real
/// `SailMatmul` would issue.
#[derive(Default)]
pub struct MockSail {
txns: Mutex<Vec<Transaction>>,
}

impl MockSail {
/// Construct an empty mock with no recorded transactions.
pub fn new() -> Self {
Self::default()
}

/// Snapshot the recorded transactions in submission order.
pub fn transactions(&self) -> Vec<Transaction> {
self.txns
.lock()
.expect("MockSail txn lock poisoned")
.clone()
}

fn push(&self, txn: Transaction) {
self.txns
.lock()
.expect("MockSail txn lock poisoned")
.push(txn);
}
}

impl MatmulInt4 for MockSail {
fn matmul_q4_k(
&self,
a: &[u8],
b: &[u8],
out: &mut [u8],
m: usize,
k: usize,
n: usize,
) -> Result<()> {
if k == 0 || k % QK_K != 0 {
return Err(Error::BadDims { m, k, n, qk: QK_K });
}

self.push(Transaction::Write {
dev_addr: MOCK_BAR_A,
len: a.len(),
label: "A",
});
self.push(Transaction::Write {
dev_addr: MOCK_BAR_B,
len: b.len(),
label: "B",
});
self.push(Transaction::ComputeSubmit {
dev_addr: MOCK_CMD_FIFO,
kind: "matmul_q4_k",
m,
k,
n,
});
self.push(Transaction::Read {
dev_addr: MOCK_BAR_OUT,
len: out.len(),
label: "OUT",
});

Ok(())
}
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn mock_records_four_transactions_in_order() {
let mock = MockSail::new();
let mut out = [0u8; 64];
mock.matmul_q4_k(&[0u8; 32], &[0u8; 64], &mut out, 4, QK_K, 4)
.expect("mock matmul should succeed on aligned k");

let txns = mock.transactions();
assert_eq!(txns.len(), 4);
assert!(matches!(&txns[0], Transaction::Write { label: "A", .. }));
assert!(matches!(&txns[1], Transaction::Write { label: "B", .. }));
assert!(matches!(
&txns[2],
Transaction::ComputeSubmit {
kind: "matmul_q4_k",
..
}
));
assert!(matches!(&txns[3], Transaction::Read { label: "OUT", .. }));
}

#[test]
fn mock_rejects_unaligned_k() {
let mock = MockSail::new();
let mut out = [0u8; 16];
let err = mock
.matmul_q4_k(&[], &[], &mut out, 1, 100, 1)
.expect_err("expected BadDims");
assert!(matches!(err, Error::BadDims { qk: QK_K, .. }));
}
}
Loading
Loading