From a353279030dd2a95ee91d29610086eaa3879c7c1 Mon Sep 17 00:00:00 2001 From: kleopasevan Date: Wed, 29 Apr 2026 19:43:00 +0700 Subject: [PATCH] feat(storage): add single-node SPDK lvol backend --- .gitignore | 1 + apps/agent/src/features/storage/iscsi.rs | 25 + apps/agent/src/features/storage/local_file.rs | 25 + apps/agent/src/features/storage/mod.rs | 1 + apps/agent/src/features/storage/routes.rs | 36 +- apps/agent/src/features/storage/spdk_lvol.rs | 514 ++++++++++++++++++ apps/agent/src/main.rs | 30 + .../manager/src/features/storage/agent_rpc.rs | 12 +- .../src/features/storage/backends/mod.rs | 1 + .../features/storage/backends/spdk_lvol.rs | 197 +++++++ apps/manager/src/features/storage/config.rs | 24 + apps/manager/src/features/storage/registry.rs | 12 + .../src/features/storage/rootfs_allocator.rs | 9 +- apps/manager/src/features/vms/service.rs | 65 ++- crates/nexus-storage/src/error.rs | 9 + crates/nexus-storage/src/host.rs | 4 + crates/nexus-storage/src/lib.rs | 3 + crates/nexus-storage/src/spdk.rs | 234 ++++++++ crates/nexus-storage/src/types.rs | 3 + docs/runbooks/spdk-lvol-smoke.md | 59 ++ .../superpowers/plans/2026-04-29-spdk-lvol.md | 101 ++++ .../specs/2026-04-29-spdk-raft-hci-design.md | 173 ++++++ scripts/spdk-dev-bootstrap.sh | 281 ++++++++++ scripts/spdk-lvol-smoke.sh | 33 ++ 24 files changed, 1814 insertions(+), 38 deletions(-) create mode 100644 apps/agent/src/features/storage/spdk_lvol.rs create mode 100644 apps/manager/src/features/storage/backends/spdk_lvol.rs create mode 100644 crates/nexus-storage/src/spdk.rs create mode 100644 docs/runbooks/spdk-lvol-smoke.md create mode 100644 docs/superpowers/plans/2026-04-29-spdk-lvol.md create mode 100644 docs/superpowers/specs/2026-04-29-spdk-raft-hci-design.md create mode 100755 scripts/spdk-dev-bootstrap.sh create mode 100755 scripts/spdk-lvol-smoke.sh diff --git a/.gitignore b/.gitignore index e7f9884..3d3e2fb 100644 --- a/.gitignore +++ b/.gitignore @@ -76,6 +76,7 @@ scripts/airgap/output/ # Runtime data .data/ +.worktrees/ # Internal dev notes notes/ diff --git a/apps/agent/src/features/storage/iscsi.rs b/apps/agent/src/features/storage/iscsi.rs index 8f309eb..a50e881 100644 --- a/apps/agent/src/features/storage/iscsi.rs +++ b/apps/agent/src/features/storage/iscsi.rs @@ -166,6 +166,10 @@ impl HostBackend for IscsiHostBackend { Ok(()) } + async fn resize2fs(&self, attached: &AttachedPath) -> Result<(), StorageError> { + run_resize2fs(attached.path()).await + } + async fn read_snapshot( &self, snap: &VolumeSnapshotHandle, @@ -203,6 +207,27 @@ impl HostBackend for IscsiHostBackend { } } +async fn run_resize2fs(path: &Path) -> Result<(), StorageError> { + let _ = tokio::process::Command::new("e2fsck") + .args(["-f", "-y"]) + .arg(path) + .output() + .await?; + let out = tokio::process::Command::new("resize2fs") + .arg(path) + .output() + .await?; + if out.status.success() { + Ok(()) + } else { + Err(StorageError::InvalidLocator(format!( + "resize2fs {} failed: {}", + path.display(), + String::from_utf8_lossy(&out.stderr) + ))) + } +} + #[cfg(test)] mod tests { use super::*; diff --git a/apps/agent/src/features/storage/local_file.rs b/apps/agent/src/features/storage/local_file.rs index 1dfac38..e0caf82 100644 --- a/apps/agent/src/features/storage/local_file.rs +++ b/apps/agent/src/features/storage/local_file.rs @@ -48,6 +48,10 @@ impl HostBackend for LocalFileHostBackend { Ok(()) } + async fn resize2fs(&self, attached: &AttachedPath) -> Result<(), StorageError> { + run_resize2fs(attached.path()).await + } + async fn read_snapshot( &self, snap: &VolumeSnapshotHandle, @@ -58,6 +62,27 @@ impl HostBackend for LocalFileHostBackend { } } +async fn run_resize2fs(path: &Path) -> Result<(), StorageError> { + let _ = tokio::process::Command::new("e2fsck") + .args(["-f", "-y"]) + .arg(path) + .output() + .await?; + let out = tokio::process::Command::new("resize2fs") + .arg(path) + .output() + .await?; + if out.status.success() { + Ok(()) + } else { + Err(StorageError::InvalidLocator(format!( + "resize2fs {} failed: {}", + path.display(), + String::from_utf8_lossy(&out.stderr) + ))) + } +} + #[cfg(test)] mod tests { use super::*; diff --git a/apps/agent/src/features/storage/mod.rs b/apps/agent/src/features/storage/mod.rs index 8d12ecc..98a783f 100644 --- a/apps/agent/src/features/storage/mod.rs +++ b/apps/agent/src/features/storage/mod.rs @@ -4,3 +4,4 @@ pub mod local_file; pub mod registry; pub mod routes; pub mod s3; +pub mod spdk_lvol; diff --git a/apps/agent/src/features/storage/routes.rs b/apps/agent/src/features/storage/routes.rs index d99a26f..8720070 100644 --- a/apps/agent/src/features/storage/routes.rs +++ b/apps/agent/src/features/storage/routes.rs @@ -123,30 +123,26 @@ pub async fn populate( #[derive(Deserialize)] pub struct Resize2fsReq { + pub backend_kind: BackendKind, pub attached: AttachedPath, } -pub async fn resize2fs(Json(req): Json) -> impl IntoResponse { - let path = req.attached.path(); - let _ = tokio::process::Command::new("e2fsck") - .args(["-f", "-y"]) - .arg(path) - .output() - .await - .ok(); - let resize = tokio::process::Command::new("resize2fs") - .arg(path) - .output() - .await; - match resize { - Ok(o) if o.status.success() => { - (StatusCode::OK, Json(serde_json::json!({}))).into_response() +pub async fn resize2fs( + State(s): State>, + Json(req): Json, +) -> impl IntoResponse { + let backend = match s.registry.get(req.backend_kind) { + Some(b) => b, + None => { + return ( + StatusCode::BAD_REQUEST, + Json(serde_json::json!({"error": "unsupported backend kind"})), + ) + .into_response() } - Ok(o) => ( - StatusCode::INTERNAL_SERVER_ERROR, - Json(serde_json::json!({"stderr": String::from_utf8_lossy(&o.stderr).to_string()})), - ) - .into_response(), + }; + match backend.resize2fs(&req.attached).await { + Ok(()) => (StatusCode::OK, Json(serde_json::json!({}))).into_response(), Err(e) => ( StatusCode::INTERNAL_SERVER_ERROR, Json(serde_json::json!({"error": e.to_string()})), diff --git a/apps/agent/src/features/storage/spdk_lvol.rs b/apps/agent/src/features/storage/spdk_lvol.rs new file mode 100644 index 0000000..625f003 --- /dev/null +++ b/apps/agent/src/features/storage/spdk_lvol.rs @@ -0,0 +1,514 @@ +//! SPDK lvol host backend. +//! +//! Attach creates an SPDK vhost-blk controller for the lvol and returns the +//! Unix socket Firecracker must use as a vhost-user block device. + +use nexus_storage::{ + spdk_vhost_controller_name, AttachedPath, BackendKind, HostBackend, SpdkJsonRpcClient, + SpdkLvolLocator, StorageError, VolumeHandle, VolumeSnapshotHandle, +}; +use std::collections::HashMap; +use std::path::{Path, PathBuf}; +use std::pin::Pin; +use std::sync::Arc; +use tokio::io::AsyncWriteExt; +use tokio::sync::{Mutex, OwnedSemaphorePermit, Semaphore}; + +#[derive(Clone)] +pub struct SpdkLvolHostBackend { + pub client: SpdkJsonRpcClient, + pub vhost_socket_dir: PathBuf, + #[cfg_attr(not(test), allow(dead_code))] + pub nbd_devices: Vec, + nbd_pool: Arc, + attached_lvols: Arc>>, +} + +impl SpdkLvolHostBackend { + #[cfg(test)] + pub fn new(rpc_socket: impl Into, vhost_socket_dir: impl Into) -> Self { + Self::with_import_nbd_device(rpc_socket, vhost_socket_dir, "/dev/nbd0") + } + + #[cfg(test)] + pub fn with_import_nbd_device( + rpc_socket: impl Into, + vhost_socket_dir: impl Into, + import_nbd_device: impl Into, + ) -> Self { + Self::with_nbd_devices(rpc_socket, vhost_socket_dir, vec![import_nbd_device.into()]) + } + + pub fn with_nbd_devices( + rpc_socket: impl Into, + vhost_socket_dir: impl Into, + nbd_devices: Vec, + ) -> Self { + let nbd_devices = if nbd_devices.is_empty() { + vec![PathBuf::from("/dev/nbd0")] + } else { + nbd_devices + }; + Self { + client: SpdkJsonRpcClient::new(rpc_socket), + vhost_socket_dir: vhost_socket_dir.into(), + nbd_pool: Arc::new(NbdDevicePool::new(nbd_devices.clone())), + nbd_devices, + attached_lvols: Arc::new(Mutex::new(HashMap::new())), + } + } + + fn socket_for_controller(&self, ctrlr: &str) -> PathBuf { + self.vhost_socket_dir.join(ctrlr) + } +} + +#[async_trait::async_trait] +impl HostBackend for SpdkLvolHostBackend { + fn kind(&self) -> BackendKind { + BackendKind::SpdkLvol + } + + async fn attach(&self, volume: &VolumeHandle) -> Result { + let locator = SpdkLvolLocator::from_locator_str(&volume.locator)?; + let ctrlr = spdk_vhost_controller_name(volume.volume_id); + self.client + .vhost_create_blk_controller(&ctrlr, &locator.lvol_uuid) + .await?; + let socket = self.socket_for_controller(&ctrlr); + self.attached_lvols + .lock() + .await + .insert(socket.clone(), locator.lvol_uuid); + Ok(AttachedPath::VhostUserSock(socket)) + } + + async fn detach( + &self, + volume: &VolumeHandle, + _attached: AttachedPath, + ) -> Result<(), StorageError> { + let ctrlr = spdk_vhost_controller_name(volume.volume_id); + let result = self.client.vhost_delete_controller(&ctrlr).await; + let socket = self.socket_for_controller(&ctrlr); + self.attached_lvols.lock().await.remove(&socket); + result + } + + async fn populate_streaming( + &self, + attached: &AttachedPath, + source: &Path, + target_size_bytes: u64, + ) -> Result<(), StorageError> { + let lvol_uuid = self.lvol_uuid_for_attachment(attached).await?; + let exported = self.export_lvol_to_nbd(&lvol_uuid).await?; + let copy_result = copy_image_to_nbd(source, exported.device(), target_size_bytes).await; + let stop_result = self.stop_nbd_export(&exported).await; + copy_result?; + stop_result + } + + async fn resize2fs(&self, attached: &AttachedPath) -> Result<(), StorageError> { + let lvol_uuid = self.lvol_uuid_for_attachment(attached).await?; + let exported = self.export_lvol_to_nbd(&lvol_uuid).await?; + let resize_result = run_resize2fs(exported.device()).await; + let stop_result = self.stop_nbd_export(&exported).await; + resize_result?; + stop_result + } + + async fn read_snapshot( + &self, + snap: &VolumeSnapshotHandle, + ) -> Result, StorageError> { + let locator = SpdkLvolLocator::from_locator_str(&snap.locator)?; + let exported = self.export_lvol_to_nbd(&locator.lvol_uuid).await?; + let file = match tokio::fs::File::open(exported.device()).await { + Ok(file) => file, + Err(err) => { + let _ = self.stop_nbd_export(&exported).await; + return Err(StorageError::Io(err)); + } + }; + Ok(Box::new(SpdkNbdSnapshotReader { + inner: file, + client: self.client.clone(), + export: Some(exported), + })) + } +} + +struct SpdkNbdSnapshotReader { + inner: tokio::fs::File, + client: SpdkJsonRpcClient, + export: Option, +} + +impl tokio::io::AsyncRead for SpdkNbdSnapshotReader { + fn poll_read( + mut self: Pin<&mut Self>, + cx: &mut std::task::Context<'_>, + buf: &mut tokio::io::ReadBuf<'_>, + ) -> std::task::Poll> { + Pin::new(&mut self.inner).poll_read(cx, buf) + } +} + +impl Drop for SpdkNbdSnapshotReader { + fn drop(&mut self) { + let Some(export) = self.export.take() else { + return; + }; + let client = self.client.clone(); + let nbd_device = export.device().to_path_buf(); + tokio::spawn(async move { + let _ = client.nbd_stop_disk(&nbd_device).await; + let _ = wait_for_nbd_device_released(&nbd_device).await; + drop(export); + }); + } +} + +impl SpdkLvolHostBackend { + async fn lvol_uuid_for_attachment( + &self, + attached: &AttachedPath, + ) -> Result { + let AttachedPath::VhostUserSock(socket) = attached else { + return Err(StorageError::InvalidLocator( + "spdk_lvol requires a VhostUserSock attachment".into(), + )); + }; + self.attached_lvols + .lock() + .await + .get(socket) + .cloned() + .ok_or_else(|| { + StorageError::InvalidLocator(format!( + "no SPDK lvol is registered for vhost socket {}", + socket.display() + )) + }) + } + + async fn export_lvol_to_nbd(&self, lvol_uuid: &str) -> Result { + let lease = self.nbd_pool.acquire().await?; + let exported = self.client.nbd_start_disk(lvol_uuid, lease.path()).await?; + if let Err(err) = wait_for_nbd_device_size(&exported).await { + let _ = self.client.nbd_stop_disk(&exported).await; + return Err(err); + } + Ok(NbdExport { + device: exported, + _lease: lease, + }) + } + + async fn stop_nbd_export(&self, exported: &NbdExport) -> Result<(), StorageError> { + self.client.nbd_stop_disk(exported.device()).await?; + wait_for_nbd_device_released(exported.device()).await + } +} + +struct NbdExport { + device: PathBuf, + _lease: NbdLease, +} + +impl NbdExport { + fn device(&self) -> &Path { + &self.device + } +} + +struct NbdDevicePool { + available: std::sync::Mutex>, + semaphore: Arc, +} + +impl NbdDevicePool { + fn new(devices: Vec) -> Self { + let capacity = devices.len(); + Self { + available: std::sync::Mutex::new(devices), + semaphore: Arc::new(Semaphore::new(capacity)), + } + } + + async fn acquire(self: &Arc) -> Result { + let permit = self.semaphore.clone().acquire_owned().await.map_err(|e| { + StorageError::InvalidLocator(format!("SPDK NBD device pool closed: {e}")) + })?; + let path = self + .available + .lock() + .expect("NBD device pool mutex poisoned") + .pop() + .ok_or_else(|| StorageError::InvalidLocator("SPDK NBD pool exhausted".into()))?; + Ok(NbdLease { + path: Some(path), + pool: self.clone(), + _permit: Some(permit), + }) + } +} + +struct NbdLease { + path: Option, + pool: Arc, + _permit: Option, +} + +impl NbdLease { + fn path(&self) -> &Path { + self.path + .as_deref() + .expect("NBD lease path already released") + } +} + +impl Drop for NbdLease { + fn drop(&mut self) { + if let Some(path) = self.path.take() { + self.pool + .available + .lock() + .expect("NBD device pool mutex poisoned") + .push(path); + } + } +} + +async fn copy_image_to_nbd( + source: &Path, + nbd_device: &Path, + target_size_bytes: u64, +) -> Result<(), StorageError> { + let source_len = tokio::fs::metadata(source).await?.len(); + if source_len > target_size_bytes { + return Err(StorageError::InvalidLocator(format!( + "source image {} is larger than target SPDK lvol: {} > {} bytes", + source.display(), + source_len, + target_size_bytes + ))); + } + + let mut src = tokio::fs::File::open(source).await?; + let mut dst = tokio::fs::OpenOptions::new() + .write(true) + .open(nbd_device) + .await?; + tokio::io::copy(&mut src, &mut dst).await?; + dst.flush().await?; + dst.sync_all().await?; + Ok(()) +} + +async fn wait_for_nbd_device_size(nbd_device: &Path) -> Result<(), StorageError> { + for _ in 0..50 { + let output = tokio::process::Command::new("blockdev") + .arg("--getsize64") + .arg(nbd_device) + .output() + .await?; + if output.status.success() { + let stdout = String::from_utf8_lossy(&output.stdout); + if stdout.trim().parse::().unwrap_or(0) > 0 { + return Ok(()); + } + } + tokio::time::sleep(std::time::Duration::from_millis(100)).await; + } + + Err(StorageError::backend(std::io::Error::other(format!( + "SPDK NBD device {} did not report a nonzero size", + nbd_device.display() + )))) +} + +async fn wait_for_nbd_device_released(nbd_device: &Path) -> Result<(), StorageError> { + for _ in 0..50 { + let output = tokio::process::Command::new("blockdev") + .arg("--getsize64") + .arg(nbd_device) + .output() + .await?; + if output.status.success() { + let stdout = String::from_utf8_lossy(&output.stdout); + if stdout.trim().parse::().unwrap_or(u64::MAX) == 0 { + return Ok(()); + } + } + tokio::time::sleep(std::time::Duration::from_millis(100)).await; + } + + Err(StorageError::backend(std::io::Error::other(format!( + "SPDK NBD device {} did not release", + nbd_device.display() + )))) +} + +async fn run_resize2fs(path: &Path) -> Result<(), StorageError> { + let _ = tokio::process::Command::new("e2fsck") + .args(["-f", "-y"]) + .arg(path) + .output() + .await?; + let out = tokio::process::Command::new("resize2fs") + .arg(path) + .output() + .await?; + if out.status.success() { + Ok(()) + } else { + Err(StorageError::InvalidLocator(format!( + "resize2fs {} failed: {}", + path.display(), + String::from_utf8_lossy(&out.stderr) + ))) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use nexus_storage::{BackendInstanceId, SpdkLvolLocator, VolumeHandle, VolumeSnapshotHandle}; + use tokio::io::AsyncReadExt; + use uuid::Uuid; + + #[test] + fn socket_path_uses_controller_name_under_configured_dir() { + let backend = SpdkLvolHostBackend::new("/run/spdk/rpc.sock", "/run/spdk/vhost"); + let p = backend.socket_for_controller("nq.abc"); + assert_eq!(p, PathBuf::from("/run/spdk/vhost/nq.abc")); + } + + #[test] + fn import_nbd_device_is_configurable() { + let backend = SpdkLvolHostBackend::with_import_nbd_device( + "/run/spdk/rpc.sock", + "/run/spdk/vhost", + "/dev/nbd7", + ); + assert_eq!(backend.nbd_devices, vec![PathBuf::from("/dev/nbd7")]); + } + + #[tokio::test] + async fn nbd_pool_hands_out_unique_devices_then_reuses() { + let pool = Arc::new(NbdDevicePool::new(vec![ + PathBuf::from("/dev/nbd0"), + PathBuf::from("/dev/nbd1"), + ])); + + let first = pool.acquire().await.unwrap(); + let second = pool.acquire().await.unwrap(); + assert_ne!(first.path(), second.path()); + let first_path = first.path().to_path_buf(); + drop(first); + + let third = pool.acquire().await.unwrap(); + assert_eq!(third.path(), first_path.as_path()); + } + + #[tokio::test] + #[ignore = "requires a running SPDK process, pre-created lvol store, and loaded nbd module"] + async fn spdk_lvol_real_smoke_create_import_snapshot_read_destroy() { + let rpc_socket = std::env::var("AGENT_SPDK_IT_RPC_SOCKET") + .or_else(|_| std::env::var("AGENT_SPDK_RPC_SOCKET")) + .expect("set AGENT_SPDK_IT_RPC_SOCKET or AGENT_SPDK_RPC_SOCKET"); + let lvs_name = std::env::var("AGENT_SPDK_IT_LVS_NAME").expect("set AGENT_SPDK_IT_LVS_NAME"); + let vhost_socket_dir = + std::env::var("AGENT_SPDK_IT_VHOST_SOCKET_DIR").unwrap_or_else(|_| "/var/tmp".into()); + let nbd_devices = std::env::var("AGENT_SPDK_IT_NBD_DEVICES") + .unwrap_or_else(|_| "/dev/nbd0,/dev/nbd1".into()) + .split(',') + .map(str::trim) + .filter(|s| !s.is_empty()) + .map(PathBuf::from) + .collect::>(); + + let client = SpdkJsonRpcClient::new(&rpc_socket); + let backend = + SpdkLvolHostBackend::with_nbd_devices(&rpc_socket, vhost_socket_dir, nbd_devices); + + let volume_id = Uuid::new_v4(); + let volume_name = format!("nq-it-{}", volume_id.simple()); + let snapshot_id = Uuid::new_v4(); + let snapshot_name = format!("nq-it-snap-{}", snapshot_id.simple()); + let size_bytes = 16 * 1024 * 1024; + let lvol_uuid = client + .bdev_lvol_create(&lvs_name, &volume_name, size_bytes) + .await + .expect("create lvol"); + + let volume_locator = SpdkLvolLocator { + lvs_name: lvs_name.clone(), + lvol_name: volume_name.clone(), + lvol_uuid: lvol_uuid.clone(), + size_bytes, + }; + let volume = VolumeHandle { + volume_id, + backend_id: BackendInstanceId(Uuid::new_v4()), + backend_kind: BackendKind::SpdkLvol, + locator: volume_locator.to_locator_string().unwrap(), + size_bytes, + }; + + let result = async { + let dir = tempfile::tempdir().unwrap(); + let source = dir.path().join("source.raw"); + let payload = deterministic_payload(4 * 1024 * 1024); + tokio::fs::write(&source, &payload).await.unwrap(); + + let attached = backend.attach(&volume).await.expect("attach"); + backend + .populate_streaming(&attached, &source, size_bytes) + .await + .expect("populate via NBD"); + + let snap_uuid = client + .bdev_lvol_snapshot(&lvol_uuid, &snapshot_name) + .await + .expect("snapshot"); + let snap_locator = SpdkLvolLocator { + lvs_name: lvs_name.clone(), + lvol_name: snapshot_name.clone(), + lvol_uuid: snap_uuid.clone(), + size_bytes, + }; + let snap = VolumeSnapshotHandle { + snapshot_id, + source_volume_id: volume_id, + backend_id: volume.backend_id, + backend_kind: BackendKind::SpdkLvol, + locator: snap_locator.to_locator_string().unwrap(), + }; + + let mut reader = backend.read_snapshot(&snap).await.expect("read snapshot"); + let mut got = vec![0u8; payload.len()]; + reader.read_exact(&mut got).await.expect("read payload"); + assert_eq!(got, payload); + drop(reader); + + client + .bdev_lvol_delete(&snap_uuid) + .await + .expect("delete snapshot"); + backend.detach(&volume, attached).await.expect("detach"); + } + .await; + + let _ = client.bdev_lvol_delete(&lvol_uuid).await; + result + } + + fn deterministic_payload(len: usize) -> Vec { + (0..len) + .map(|i| ((i as u64 * 1103515245 + 12345) >> 16) as u8) + .collect() + } +} diff --git a/apps/agent/src/main.rs b/apps/agent/src/main.rs index 237b1dc..5d86535 100644 --- a/apps/agent/src/main.rs +++ b/apps/agent/src/main.rs @@ -29,6 +29,36 @@ async fn main() -> anyhow::Result<()> { let iscsi_host = std::sync::Arc::new(features::storage::iscsi::IscsiHostBackend); storage_registry.register_for(nexus_storage::BackendKind::Iscsi, iscsi_host.clone()); storage_registry.register_for(nexus_storage::BackendKind::TrueNasIscsi, iscsi_host); + if let Ok(rpc_socket) = std::env::var("AGENT_SPDK_RPC_SOCKET") { + let vhost_socket_dir = + std::env::var("AGENT_SPDK_VHOST_SOCKET_DIR").unwrap_or_else(|_| "/var/tmp".into()); + let nbd_devices = std::env::var("AGENT_SPDK_NBD_DEVICES") + .ok() + .map(|raw| { + raw.split(',') + .map(str::trim) + .filter(|s| !s.is_empty()) + .map(std::path::PathBuf::from) + .collect::>() + }) + .filter(|devices| !devices.is_empty()) + .unwrap_or_else(|| { + vec![std::path::PathBuf::from( + std::env::var("AGENT_SPDK_IMPORT_NBD_DEVICE") + .unwrap_or_else(|_| "/dev/nbd0".into()), + )] + }); + storage_registry.register_for( + nexus_storage::BackendKind::SpdkLvol, + std::sync::Arc::new( + features::storage::spdk_lvol::SpdkLvolHostBackend::with_nbd_devices( + rpc_socket, + vhost_socket_dir, + nbd_devices, + ), + ), + ); + } let state = AppState { run_dir: std::env::var("FC_RUN_DIR").unwrap_or_else(|_| "/srv/fc".into()), bridge: std::env::var("FC_BRIDGE").unwrap_or_else(|_| "fcbr0".into()), diff --git a/apps/manager/src/features/storage/agent_rpc.rs b/apps/manager/src/features/storage/agent_rpc.rs index 95e776c..33d67ec 100644 --- a/apps/manager/src/features/storage/agent_rpc.rs +++ b/apps/manager/src/features/storage/agent_rpc.rs @@ -100,13 +100,21 @@ pub async fn agent_populate( #[derive(Serialize)] struct Resize2fsReq<'a> { + backend_kind: BackendKind, attached: &'a AttachedPath, } -pub async fn agent_resize2fs(host_addr: &str, attached: &AttachedPath) -> Result<()> { +pub async fn agent_resize2fs( + host_addr: &str, + backend_kind: BackendKind, + attached: &AttachedPath, +) -> Result<()> { let resp = Client::new() .post(agent_url(host_addr, "/v1/storage/resize2fs")) - .json(&Resize2fsReq { attached }) + .json(&Resize2fsReq { + backend_kind, + attached, + }) .send() .await .with_context(|| format!("POST /v1/storage/resize2fs to {host_addr}"))?; diff --git a/apps/manager/src/features/storage/backends/mod.rs b/apps/manager/src/features/storage/backends/mod.rs index 186886d..9f0fd4c 100644 --- a/apps/manager/src/features/storage/backends/mod.rs +++ b/apps/manager/src/features/storage/backends/mod.rs @@ -1,5 +1,6 @@ pub mod iscsi_generic; pub mod local_file; +pub mod spdk_lvol; pub mod truenas_iscsi; #[cfg(test)] diff --git a/apps/manager/src/features/storage/backends/spdk_lvol.rs b/apps/manager/src/features/storage/backends/spdk_lvol.rs new file mode 100644 index 0000000..a84d998 --- /dev/null +++ b/apps/manager/src/features/storage/backends/spdk_lvol.rs @@ -0,0 +1,197 @@ +//! SPDK logical-volume control-plane backend. +//! +//! This first slice provisions lvols and snapshots through SPDK JSON-RPC. Image +//! import is intentionally not implemented yet because a vhost-user socket is a +//! Firecracker transport, not a writable block path. + +use nexus_storage::{ + BackendInstanceId, BackendKind, Capabilities, ControlPlaneBackend, CreateOpts, + SpdkJsonRpcClient, SpdkLvolLocator, StorageError, VolumeHandle, VolumeSnapshotHandle, +}; +use serde::Deserialize; +use std::path::{Path, PathBuf}; +use uuid::Uuid; + +#[derive(Debug, Clone, Deserialize)] +pub struct SpdkLvolConfig { + pub rpc_socket: PathBuf, + pub lvs_name: String, + #[allow(dead_code)] + #[serde(default = "default_vhost_socket_dir")] + pub vhost_socket_dir: PathBuf, +} + +fn default_vhost_socket_dir() -> PathBuf { + PathBuf::from("/var/tmp") +} + +pub struct SpdkLvolControlPlaneBackend { + pub id: BackendInstanceId, + pub config: SpdkLvolConfig, + pub client: SpdkJsonRpcClient, +} + +impl SpdkLvolControlPlaneBackend { + pub fn new(id: BackendInstanceId, config: SpdkLvolConfig) -> Self { + let client = SpdkJsonRpcClient::new(config.rpc_socket.clone()); + Self { id, config, client } + } +} + +#[async_trait::async_trait] +impl ControlPlaneBackend for SpdkLvolControlPlaneBackend { + fn kind(&self) -> BackendKind { + BackendKind::SpdkLvol + } + + fn capabilities(&self) -> Capabilities { + Capabilities { + supports_native_snapshots: true, + supports_concurrent_attach: false, + supports_live_migration: false, + supports_clone_from_image: false, + } + } + + async fn provision(&self, opts: CreateOpts) -> Result { + let volume_id = Uuid::new_v4(); + let lvol_name = spdk_name(&opts.name, volume_id); + let lvol_uuid = self + .client + .bdev_lvol_create(&self.config.lvs_name, &lvol_name, opts.size_bytes) + .await?; + let locator = SpdkLvolLocator { + lvs_name: self.config.lvs_name.clone(), + lvol_name, + lvol_uuid, + size_bytes: opts.size_bytes, + }; + Ok(VolumeHandle { + volume_id, + backend_id: self.id, + backend_kind: BackendKind::SpdkLvol, + locator: locator.to_locator_string()?, + size_bytes: opts.size_bytes, + }) + } + + async fn destroy(&self, handle: VolumeHandle) -> Result<(), StorageError> { + let locator = SpdkLvolLocator::from_locator_str(&handle.locator)?; + self.client.bdev_lvol_delete(&locator.lvol_uuid).await + } + + async fn clone_from_image( + &self, + _source_image: &Path, + _opts: CreateOpts, + ) -> Result { + Err(StorageError::NotSupported( + "spdk_lvol clone_from_image needs an image import path (NBD or bdev copy)".into(), + )) + } + + async fn snapshot( + &self, + volume: &VolumeHandle, + name: &str, + ) -> Result { + let locator = SpdkLvolLocator::from_locator_str(&volume.locator)?; + let snapshot_id = Uuid::new_v4(); + let snapshot_name = spdk_name(name, snapshot_id); + let snapshot_uuid = self + .client + .bdev_lvol_snapshot(&locator.lvol_uuid, &snapshot_name) + .await?; + let snap_locator = SpdkLvolLocator { + lvs_name: locator.lvs_name, + lvol_name: snapshot_name, + lvol_uuid: snapshot_uuid, + size_bytes: locator.size_bytes, + }; + Ok(VolumeSnapshotHandle { + snapshot_id, + source_volume_id: volume.volume_id, + backend_id: self.id, + backend_kind: BackendKind::SpdkLvol, + locator: snap_locator.to_locator_string()?, + }) + } + + async fn clone_from_snapshot( + &self, + snap: &VolumeSnapshotHandle, + ) -> Result { + let snap_locator = SpdkLvolLocator::from_locator_str(&snap.locator)?; + let volume_id = Uuid::new_v4(); + let lvol_name = spdk_name("clone", volume_id); + let lvol_uuid = self + .client + .bdev_lvol_clone(&snap_locator.lvol_uuid, &lvol_name) + .await?; + let locator = SpdkLvolLocator { + lvs_name: snap_locator.lvs_name, + lvol_name, + lvol_uuid, + size_bytes: snap_locator.size_bytes, + }; + Ok(VolumeHandle { + volume_id, + backend_id: self.id, + backend_kind: BackendKind::SpdkLvol, + locator: locator.to_locator_string()?, + size_bytes: snap_locator.size_bytes, + }) + } + + async fn delete_snapshot(&self, snap: VolumeSnapshotHandle) -> Result<(), StorageError> { + let locator = SpdkLvolLocator::from_locator_str(&snap.locator)?; + self.client.bdev_lvol_delete(&locator.lvol_uuid).await + } +} + +fn spdk_name(prefix: &str, id: Uuid) -> String { + let mut cleaned = String::with_capacity(prefix.len()); + for ch in prefix.chars() { + if ch.is_ascii_alphanumeric() || ch == '-' || ch == '_' || ch == '.' { + cleaned.push(ch); + } else { + cleaned.push('-'); + } + } + let cleaned = cleaned.trim_matches('-'); + if cleaned.is_empty() { + format!("nq-{}", id.simple()) + } else { + format!("nq-{cleaned}-{}", id.simple()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn spdk_name_is_sanitized_and_stable() { + let id = Uuid::parse_str("018f64ba-97aa-70d9-a7d2-6459256fd111").unwrap(); + assert_eq!( + spdk_name("rootfs / prod", id), + "nq-rootfs---prod-018f64ba97aa70d9a7d26459256fd111" + ); + } + + #[test] + fn capabilities_match_single_node_spdk_lvol() { + let backend = SpdkLvolControlPlaneBackend::new( + BackendInstanceId(Uuid::nil()), + SpdkLvolConfig { + rpc_socket: "/run/spdk/rpc.sock".into(), + lvs_name: "nexus".into(), + vhost_socket_dir: "/var/tmp".into(), + }, + ); + let caps = backend.capabilities(); + assert!(caps.supports_native_snapshots); + assert!(!caps.supports_clone_from_image); + assert!(!caps.supports_concurrent_attach); + } +} diff --git a/apps/manager/src/features/storage/config.rs b/apps/manager/src/features/storage/config.rs index 1fb3344..6b6ea55 100644 --- a/apps/manager/src/features/storage/config.rs +++ b/apps/manager/src/features/storage/config.rs @@ -77,6 +77,18 @@ pub fn validate(raw: RawBackendEntry) -> Result { supports_clone_from_image: false, } } + BackendKind::SpdkLvol => { + require_str(&raw.config, "rpc_socket") + .map_err(|e| anyhow!("backend '{}' (kind=spdk_lvol): {e}", raw.name))?; + require_str(&raw.config, "lvs_name") + .map_err(|e| anyhow!("backend '{}' (kind=spdk_lvol): {e}", raw.name))?; + Capabilities { + supports_native_snapshots: true, + supports_concurrent_attach: false, + supports_live_migration: false, + supports_clone_from_image: false, + } + } }; Ok(ValidatedBackend { @@ -142,6 +154,18 @@ mod tests { assert!(err.to_string().contains("target_iqn"), "got: {err}"); } + #[test] + fn spdk_lvol_requires_rpc_socket_and_lvs_name() { + let raw = RawBackendEntry { + name: "spdk".into(), + kind: BackendKind::SpdkLvol, + is_default: false, + config: serde_json::json!({"rpc_socket": "/run/spdk/rpc.sock"}), + }; + let err = validate(raw).unwrap_err(); + assert!(err.to_string().contains("lvs_name"), "got: {err}"); + } + /// T27: Malformed TrueNAS iSCSI entry parsed from TOML must fail validation /// with an error message naming BOTH the missing field and the backend name. #[test] diff --git a/apps/manager/src/features/storage/registry.rs b/apps/manager/src/features/storage/registry.rs index 6ec6ab1..fa8fa8b 100644 --- a/apps/manager/src/features/storage/registry.rs +++ b/apps/manager/src/features/storage/registry.rs @@ -117,6 +117,7 @@ fn build_backend(row: &StorageBackendRow) -> Result "local_file" => BackendKind::LocalFile, "iscsi" => BackendKind::Iscsi, "truenas_iscsi" => BackendKind::TrueNasIscsi, + "spdk_lvol" => BackendKind::SpdkLvol, other => { return Err(anyhow!("unknown backend kind '{other}'")); } @@ -155,6 +156,17 @@ fn build_backend(row: &StorageBackendRow) -> Result }, )) } + BackendKind::SpdkLvol => { + let cfg: crate::features::storage::backends::spdk_lvol::SpdkLvolConfig = + serde_json::from_value(row.config_json.clone()) + .with_context(|| format!("backend '{}' spdk_lvol config", row.name))?; + Ok(Arc::new( + crate::features::storage::backends::spdk_lvol::SpdkLvolControlPlaneBackend::new( + BackendInstanceId(row.id), + cfg, + ), + )) + } } } diff --git a/apps/manager/src/features/storage/rootfs_allocator.rs b/apps/manager/src/features/storage/rootfs_allocator.rs index ea56f29..4e2b409 100644 --- a/apps/manager/src/features/storage/rootfs_allocator.rs +++ b/apps/manager/src/features/storage/rootfs_allocator.rs @@ -81,9 +81,12 @@ pub async fn allocate_rootfs( match populate_result { Ok(()) => { if image_is_ext4_rootfs(source_image).await.unwrap_or(false) { - if let Err(e) = - crate::features::storage::agent_rpc::agent_resize2fs(host_addr, &attached) - .await + if let Err(e) = crate::features::storage::agent_rpc::agent_resize2fs( + host_addr, + h.backend_kind, + &attached, + ) + .await { tracing::warn!("resize2fs failed (non-fatal): {e:#}"); } diff --git a/apps/manager/src/features/vms/service.rs b/apps/manager/src/features/vms/service.rs index 864b971..c16847e 100644 --- a/apps/manager/src/features/vms/service.rs +++ b/apps/manager/src/features/vms/service.rs @@ -435,6 +435,7 @@ pub async fn create_from_snapshot( .context("stored mem_mib negative")?, kernel_path: source_vm.kernel_path.clone(), rootfs_path: source_vm.rootfs_path.clone(), + rootfs_is_vhost_user: false, rootfs_size_bytes: None, }; @@ -619,7 +620,10 @@ pub async fn create_from_snapshot( /// /// Falls back to `vm.rootfs_path` for legacy VMs that have no /// `volume_attachment` row, or whose backend_id is not in the registry. -async fn resolve_rootfs_attached_path(st: &AppState, vm: &super::repo::VmRow) -> Result { +async fn resolve_rootfs_attached_path( + st: &AppState, + vm: &super::repo::VmRow, +) -> Result<(String, bool)> { use nexus_storage::BackendKind; // Look up the rootfs volume row. The rootfs drive_id is "rootfs". @@ -639,18 +643,18 @@ async fn resolve_rootfs_attached_path(st: &AppState, vm: &super::repo::VmRow) -> let Some((volume_id, locator, backend_id)) = row else { // No volume_attachment row (legacy VM created before Plan 1) — fall // back to the stored rootfs_path. - return Ok(vm.rootfs_path.clone()); + return Ok((vm.rootfs_path.clone(), false)); }; let backend = match backend_id.and_then(|bid| st.registry.get(bid).cloned()) { Some(b) => b, - None => return Ok(vm.rootfs_path.clone()), + None => return Ok((vm.rootfs_path.clone(), false)), }; if backend.kind() == BackendKind::LocalFile { // LocalFile path is already a real filesystem path; no attach // round-trip needed. - return Ok(vm.rootfs_path.clone()); + return Ok((vm.rootfs_path.clone(), false)); } // Non-LocalFile: ask the agent to attach the volume and return the actual @@ -667,21 +671,27 @@ async fn resolve_rootfs_attached_path(st: &AppState, vm: &super::repo::VmRow) -> let attached = crate::features::storage::agent_rpc::agent_attach(&vm.host_addr, &volume_handle) .await .context("agent_attach during VM start")?; - Ok(attached.path().to_string_lossy().into_owned()) + let is_vhost_user = matches!(attached, nexus_storage::AttachedPath::VhostUserSock(_)); + Ok(( + attached.path().to_string_lossy().into_owned(), + is_vhost_user, + )) } pub async fn restart_vm(st: &AppState, vm: &super::repo::VmRow) -> Result<()> { let host = st.hosts.get(vm.host_id).await?; let paths = VmPaths::from_row(vm); ensure_allowed_path(st, &vm.kernel_path)?; - ensure_allowed_path(st, &vm.rootfs_path)?; // Resolve volume attachments through the registry. For non-LocalFile backends, // we need to call host.attach to log into the LUN and get the actual block // device path. For LocalFile, vm.rootfs_path is already correct. - let resolved_rootfs_path = resolve_rootfs_attached_path(st, vm) + let (resolved_rootfs_path, rootfs_is_vhost_user) = resolve_rootfs_attached_path(st, vm) .await .context("resolving rootfs attached path")?; + if !rootfs_is_vhost_user { + ensure_allowed_path(st, &resolved_rootfs_path)?; + } let spec = ResolvedVmSpec { name: vm.name.clone(), @@ -689,6 +699,7 @@ pub async fn restart_vm(st: &AppState, vm: &super::repo::VmRow) -> Result<()> { mem_mib: vm.mem_mib.try_into().context("stored mem_mib negative")?, kernel_path: vm.kernel_path.clone(), rootfs_path: resolved_rootfs_path, + rootfs_is_vhost_user, rootfs_size_bytes: None, }; @@ -1236,6 +1247,8 @@ struct ResolvedVmSpec { mem_mib: u32, kernel_path: String, rootfs_path: String, + #[cfg_attr(test, allow(dead_code))] + rootfs_is_vhost_user: bool, #[allow(dead_code)] rootfs_size_bytes: Option, } @@ -1267,6 +1280,7 @@ async fn resolve_vm_spec( mem_mib: req.mem_mib, kernel_path, rootfs_path, + rootfs_is_vhost_user: false, rootfs_size_bytes, }) } @@ -3385,6 +3399,30 @@ async fn spawn_firecracker(_: &AppState, _: &str, _: Uuid, _: &VmPaths) -> Resul Ok(()) } +#[cfg_attr(test, allow(dead_code))] +fn firecracker_drive_config( + drive_id: &str, + path_or_socket: &str, + is_root_device: bool, + is_read_only: bool, + is_vhost_user: bool, +) -> Value { + if is_vhost_user { + json!({ + "drive_id": drive_id, + "socket": path_or_socket, + "is_root_device": is_root_device + }) + } else { + json!({ + "drive_id": drive_id, + "path_on_host": path_or_socket, + "is_root_device": is_root_device, + "is_read_only": is_read_only + }) + } +} + #[cfg(not(test))] async fn configure_vm( st: &AppState, @@ -3430,12 +3468,13 @@ async fn configure_vm( info!(vm_id=%id, step="drives", rootfs_path=%spec.rootfs_path, "attaching rootfs drive"); http.put(format!("{base}/drives/rootfs{qs}")) - .json(&json!({ - "drive_id": "rootfs", - "path_on_host": spec.rootfs_path, - "is_root_device": true, - "is_read_only": false - })) + .json(&firecracker_drive_config( + "rootfs", + &spec.rootfs_path, + true, + false, + spec.rootfs_is_vhost_user, + )) .send() .await .context("drives request failed to send")? diff --git a/crates/nexus-storage/src/error.rs b/crates/nexus-storage/src/error.rs index 3b9e92c..b86206a 100644 --- a/crates/nexus-storage/src/error.rs +++ b/crates/nexus-storage/src/error.rs @@ -23,3 +23,12 @@ pub enum StorageError { #[error("backend error: {0}")] Backend(#[source] Box), } + +impl StorageError { + pub fn backend(err: E) -> Self + where + E: std::error::Error + Send + Sync + 'static, + { + Self::Backend(Box::new(err)) + } +} diff --git a/crates/nexus-storage/src/host.rs b/crates/nexus-storage/src/host.rs index fa2abc9..421674f 100644 --- a/crates/nexus-storage/src/host.rs +++ b/crates/nexus-storage/src/host.rs @@ -34,6 +34,10 @@ pub trait HostBackend: Send + Sync { target_size_bytes: u64, ) -> Result<(), StorageError>; + /// Filesystem-aware ext4 growth hook used only by rootfs allocation after + /// the caller has already identified the source image as ext4. + async fn resize2fs(&self, attached: &AttachedPath) -> Result<(), StorageError>; + /// Open a snapshot for reading. Returns a stream of bytes representing /// the volume contents at snapshot time. Used by the backup pipeline. /// diff --git a/crates/nexus-storage/src/lib.rs b/crates/nexus-storage/src/lib.rs index e41658a..9a76c8f 100644 --- a/crates/nexus-storage/src/lib.rs +++ b/crates/nexus-storage/src/lib.rs @@ -8,12 +8,14 @@ pub mod control_plane; pub mod error; pub mod handle; pub mod host; +pub mod spdk; pub mod types; pub use control_plane::ControlPlaneBackend; pub use error::StorageError; pub use handle::{AttachedPath, VolumeHandle, VolumeSnapshotHandle}; pub use host::HostBackend; +pub use spdk::{spdk_vhost_controller_name, SpdkJsonRpcClient, SpdkLvolLocator}; pub use types::{BackendInstanceId, BackendKind, Capabilities, CreateOpts}; #[cfg(test)] @@ -26,6 +28,7 @@ mod tests { BackendKind::LocalFile, BackendKind::Iscsi, BackendKind::TrueNasIscsi, + BackendKind::SpdkLvol, ]; for k in kinds { let json = serde_json::to_string(&k).unwrap(); diff --git a/crates/nexus-storage/src/spdk.rs b/crates/nexus-storage/src/spdk.rs new file mode 100644 index 0000000..a45c17d --- /dev/null +++ b/crates/nexus-storage/src/spdk.rs @@ -0,0 +1,234 @@ +use crate::error::StorageError; +use serde::{de::DeserializeOwned, Deserialize, Serialize}; +use serde_json::{json, Value}; +use std::path::{Path, PathBuf}; +use std::sync::atomic::{AtomicU64, Ordering}; +use tokio::io::{AsyncBufReadExt, AsyncWriteExt, BufReader}; +use tokio::net::UnixStream; + +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub struct SpdkLvolLocator { + pub lvs_name: String, + pub lvol_name: String, + pub lvol_uuid: String, + pub size_bytes: u64, +} + +impl SpdkLvolLocator { + pub fn to_locator_string(&self) -> Result { + serde_json::to_string(self).map_err(StorageError::backend) + } + + pub fn from_locator_str(s: &str) -> Result { + serde_json::from_str(s).map_err(|e| StorageError::InvalidLocator(e.to_string())) + } +} + +#[derive(Debug, Clone)] +pub struct SpdkJsonRpcClient { + socket: PathBuf, +} + +impl SpdkJsonRpcClient { + pub fn new(socket: impl Into) -> Self { + Self { + socket: socket.into(), + } + } + + pub fn socket(&self) -> &Path { + &self.socket + } + + pub async fn bdev_lvol_create( + &self, + lvs_name: &str, + lvol_name: &str, + size_bytes: u64, + ) -> Result { + let size_in_mib = size_bytes.div_ceil(1024 * 1024).max(1); + self.call( + "bdev_lvol_create", + json!({ + "lvs_name": lvs_name, + "lvol_name": lvol_name, + "size_in_mib": size_in_mib, + "thin_provision": true, + "clear_method": "unmap" + }), + ) + .await + } + + pub async fn bdev_lvol_delete(&self, lvol_name: &str) -> Result<(), StorageError> { + let _: Value = self + .call("bdev_lvol_delete", json!({ "name": lvol_name })) + .await?; + Ok(()) + } + + pub async fn bdev_lvol_snapshot( + &self, + lvol_name: &str, + snapshot_name: &str, + ) -> Result { + self.call( + "bdev_lvol_snapshot", + json!({ + "lvol_name": lvol_name, + "snapshot_name": snapshot_name + }), + ) + .await + } + + pub async fn bdev_lvol_clone( + &self, + snapshot_name: &str, + clone_name: &str, + ) -> Result { + self.call( + "bdev_lvol_clone", + json!({ + "snapshot_name": snapshot_name, + "clone_name": clone_name + }), + ) + .await + } + + pub async fn vhost_create_blk_controller( + &self, + ctrlr: &str, + dev_name: &str, + ) -> Result<(), StorageError> { + let _: Value = self + .call( + "vhost_create_blk_controller", + json!({ + "ctrlr": ctrlr, + "dev_name": dev_name + }), + ) + .await?; + Ok(()) + } + + pub async fn vhost_delete_controller(&self, ctrlr: &str) -> Result<(), StorageError> { + let _: Value = self + .call("vhost_delete_controller", json!({ "ctrlr": ctrlr })) + .await?; + Ok(()) + } + + pub async fn nbd_start_disk( + &self, + bdev_name: &str, + nbd_device: &Path, + ) -> Result { + let exported: String = self + .call( + "nbd_start_disk", + json!({ + "bdev_name": bdev_name, + "nbd_device": nbd_device + }), + ) + .await?; + Ok(PathBuf::from(exported)) + } + + pub async fn nbd_stop_disk(&self, nbd_device: &Path) -> Result<(), StorageError> { + let _: Value = self + .call("nbd_stop_disk", json!({ "nbd_device": nbd_device })) + .await?; + Ok(()) + } + + async fn call( + &self, + method: &str, + params: Value, + ) -> Result { + static NEXT_ID: AtomicU64 = AtomicU64::new(1); + let id = NEXT_ID.fetch_add(1, Ordering::Relaxed); + let req = json!({ + "jsonrpc": "2.0", + "method": method, + "id": id, + "params": params + }); + + let stream = UnixStream::connect(&self.socket).await?; + let mut stream = BufReader::new(stream); + let mut line = serde_json::to_vec(&req).map_err(StorageError::backend)?; + line.push(b'\n'); + stream.get_mut().write_all(&line).await?; + stream.get_mut().flush().await?; + + let mut response = String::new(); + stream.read_line(&mut response).await?; + if response.trim().is_empty() { + return Err(StorageError::InvalidLocator(format!( + "empty SPDK JSON-RPC response for {method}" + ))); + } + + let response: SpdkRpcResponse = + serde_json::from_str(&response).map_err(StorageError::backend)?; + if let Some(error) = response.error { + return Err(StorageError::InvalidLocator(format!( + "SPDK {method} failed: code={} message={}", + error.code, error.message + ))); + } + response.result.ok_or_else(|| { + StorageError::InvalidLocator(format!("SPDK {method} response missing result")) + }) + } +} + +#[derive(Debug, Deserialize)] +struct SpdkRpcResponse { + result: Option, + error: Option, +} + +#[derive(Debug, Deserialize)] +struct SpdkRpcError { + code: i64, + message: String, +} + +pub fn spdk_vhost_controller_name(volume_id: uuid::Uuid) -> String { + format!("nq.{}", volume_id.simple()) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn locator_round_trips_json() { + let locator = SpdkLvolLocator { + lvs_name: "nexus".into(), + lvol_name: "vol-a".into(), + lvol_uuid: "4f60".into(), + size_bytes: 4096, + }; + let encoded = locator.to_locator_string().unwrap(); + assert_eq!( + SpdkLvolLocator::from_locator_str(&encoded).unwrap(), + locator + ); + } + + #[test] + fn controller_name_is_stable_and_spdk_safe() { + let id = uuid::Uuid::parse_str("018f64ba-97aa-70d9-a7d2-6459256fd111").unwrap(); + assert_eq!( + spdk_vhost_controller_name(id), + "nq.018f64ba97aa70d9a7d26459256fd111" + ); + } +} diff --git a/crates/nexus-storage/src/types.rs b/crates/nexus-storage/src/types.rs index 22ebb78..a4123b3 100644 --- a/crates/nexus-storage/src/types.rs +++ b/crates/nexus-storage/src/types.rs @@ -29,6 +29,8 @@ pub enum BackendKind { Iscsi, #[serde(rename = "truenas_iscsi")] TrueNasIscsi, + #[serde(rename = "spdk_lvol")] + SpdkLvol, } impl BackendKind { @@ -37,6 +39,7 @@ impl BackendKind { BackendKind::LocalFile => "local_file", BackendKind::Iscsi => "iscsi", BackendKind::TrueNasIscsi => "truenas_iscsi", + BackendKind::SpdkLvol => "spdk_lvol", } } } diff --git a/docs/runbooks/spdk-lvol-smoke.md b/docs/runbooks/spdk-lvol-smoke.md new file mode 100644 index 0000000..50c1d5f --- /dev/null +++ b/docs/runbooks/spdk-lvol-smoke.md @@ -0,0 +1,59 @@ +# SPDK Lvol Smoke Test + +This runbook exercises the B-I SPDK lvol backend against a real local SPDK process. + +## Prerequisites + +- SPDK is running and exposes a JSON-RPC Unix socket, default `/run/spdk/rpc.sock`. +- An SPDK lvol store exists, default name `nexus`. +- Linux NBD devices exist and the module is loaded: + +```bash +sudo modprobe nbd nbds_max=8 +``` + +For a local development-only SPDK target backed by memory, run: + +```bash +./scripts/spdk-dev-bootstrap.sh +``` + +This builds SPDK under `.worktrees/spdk` if needed, starts `spdk_tgt` with a malloc bdev, creates an lvol store, and prints the matching smoke-test command. + +The bootstrap is a developer convenience, not production SPDK lifecycle management. It intentionally: + +- creates a Python virtualenv for SPDK build helpers (`meson`, `ninja`, `jinja2`, `pyelftools`, `tabulate`); +- passes DPDK `max_numa_nodes=1` for machines without `libnuma`; +- prunes optional SPDK build targets that require extra host packages or large memory pools (`bdev_aio`, iSCSI target pieces, and libaio-backed utility apps); +- starts `spdk_tgt` with a small memory/iobuf/bdev profile suitable for the smoke test; +- chmods the configured NBD devices so the Rust test can access them as the current user. + +On Arch, the bootstrap skips SPDK's `pkgdep.sh` by default to avoid accidental partial upgrades. If configure reports missing dependencies, either install them manually after a full system upgrade: + +```bash +sudo pacman -Syu +``` + +or explicitly allow SPDK's dependency script: + +```bash +SPDK_RUN_PKGDEP=1 ./scripts/spdk-dev-bootstrap.sh +``` + +## Run + +```bash +AGENT_SPDK_IT_RPC_SOCKET=/run/spdk/rpc.sock \ +AGENT_SPDK_IT_LVS_NAME=nexus \ +AGENT_SPDK_IT_NBD_DEVICES=/dev/nbd0,/dev/nbd1 \ +./scripts/spdk-lvol-smoke.sh +``` + +The test creates a temporary lvol, attaches it through the agent backend, imports deterministic bytes through NBD, snapshots the lvol, reads the snapshot through NBD, verifies the bytes, detaches, and deletes the lvol. + +## Notes + +- The test is ignored by default and is not part of normal CI. +- NBD devices in `AGENT_SPDK_IT_NBD_DEVICES` must be unused. +- `scripts/spdk-lvol-smoke.sh` does not start or configure SPDK; it assumes the operator or dev bootstrap has already created the lvstore. +- Stop the dev target with `sudo kill $(cat /tmp/nqrust-spdk-tgt.pid)` when finished. diff --git a/docs/superpowers/plans/2026-04-29-spdk-lvol.md b/docs/superpowers/plans/2026-04-29-spdk-lvol.md new file mode 100644 index 0000000..9333306 --- /dev/null +++ b/docs/superpowers/plans/2026-04-29-spdk-lvol.md @@ -0,0 +1,101 @@ +# SPDK Lvol Backend Implementation Plan + +**Status:** Implemented; PR verification complete +**Spec:** `docs/superpowers/specs/2026-04-29-spdk-raft-hci-design.md` +**Scope:** B-I single-node SPDK lvol backend foundation. B-II/B-III remain design/prototype work. + +## Task 1: Backend Kind + Shared SPDK Types + +- Add `BackendKind::SpdkLvol` serialized as `spdk_lvol`. +- Add `SpdkLvolLocator`. +- Add minimal SPDK JSON-RPC client over Unix socket. +- Add unit tests for locator round-trip and controller naming. + +Validation: + +```bash +cargo test -p nexus-storage spdk --lib +``` + +## Task 2: Manager Control-Plane Backend + +- Add `apps/manager/src/features/storage/backends/spdk_lvol.rs`. +- Implement `provision`, `destroy`, `snapshot`, `clone_from_snapshot`, `delete_snapshot`. +- Return `NotSupported` for `clone_from_image` until an image import path exists. +- Register the backend in manager registry. +- Extend TOML validation. + +Validation: + +```bash +cargo test -p manager storage::config::tests::spdk_lvol_requires_rpc_socket_and_lvs_name +``` + +## Task 3: Agent Host Backend + +- Add `apps/agent/src/features/storage/spdk_lvol.rs`. +- Register when `AGENT_SPDK_RPC_SOCKET` is set. +- `attach` creates `vhost_create_blk_controller` and returns `AttachedPath::VhostUserSock`. +- `detach` deletes the vhost controller. +- `populate_streaming` uses temporary SPDK NBD export to copy image bytes into the lvol. +- `read_snapshot` exports the snapshot lvol through NBD and stops the export when the reader is dropped. + +Validation: + +```bash +cargo test -p agent spdk_lvol +``` + +## Task 4: Firecracker Vhost-User Drive Config + +- Preserve `path_on_host` for files and block devices. +- Send `socket` for vhost-user block devices. +- Avoid validating SPDK locator JSON as a local filesystem path before attach. + +Validation: + +```bash +cargo check -p manager -p agent +``` + +## Task 5: Real SPDK Integration Smoke + +The current implementation uses a leased NBD device pool for image import, ext4 resize, and snapshot reads. The B-I branch includes a real-SPDK smoke harness and a development bootstrap script: + +- `scripts/spdk-dev-bootstrap.sh` builds a local SPDK checkout under `.worktrees/spdk`, starts a memory-backed `spdk_tgt`, creates the `nexus` lvstore, and prints the smoke command. +- `scripts/spdk-lvol-smoke.sh` runs the ignored agent smoke test against a live SPDK JSON-RPC socket. +- The smoke test creates an SPDK lvol, attaches it through the agent backend, imports deterministic bytes through NBD, snapshots the lvol, reads the snapshot through NBD, verifies the bytes, detaches, and deletes the lvol. + +Operational hardening still remains before production use: managed SPDK process lifecycle, persistent lvstore bootstrap, hugepage/vfio setup, and agent status for NBD pool capacity/usage. Until then, SPDK rootfs import and resize should be treated as experimental. Concurrency is bounded by `AGENT_SPDK_NBD_DEVICES`. + +### Real SPDK Smoke Test + +An ignored agent test is available for a host with SPDK already running, an lvol store already created, and the `nbd` module loaded: + +```bash +sudo modprobe nbd nbds_max=8 +AGENT_SPDK_IT_RPC_SOCKET=/run/spdk/rpc.sock \ +AGENT_SPDK_IT_LVS_NAME=nexus \ +AGENT_SPDK_IT_NBD_DEVICES=/dev/nbd0,/dev/nbd1 \ +cargo test -p agent spdk_lvol_real_smoke -- --ignored --nocapture +``` + +The test creates an SPDK lvol, attaches it through the agent backend, imports deterministic bytes through NBD, snapshots the lvol, reads the snapshot through NBD, verifies the bytes, detaches, and deletes the lvol. + +The same test can be run through `./scripts/spdk-lvol-smoke.sh`; see `docs/runbooks/spdk-lvol-smoke.md`. + +Validated on 2026-04-29 with: + +```bash +AGENT_SPDK_IT_RPC_SOCKET=/tmp/nqrust-spdk-rpc.sock \ +AGENT_SPDK_IT_LVS_NAME=nexus \ +AGENT_SPDK_IT_NBD_DEVICES=/dev/nbd0,/dev/nbd1 \ +./scripts/spdk-lvol-smoke.sh +``` + +## Non-Goals In This Plan + +- No Raft replication implementation. +- No dynamic membership. +- No automatic hugepage/vfio/SPDK daemon lifecycle. +- No claim of host-loss tolerance. diff --git a/docs/superpowers/specs/2026-04-29-spdk-raft-hci-design.md b/docs/superpowers/specs/2026-04-29-spdk-raft-hci-design.md new file mode 100644 index 0000000..e37c406 --- /dev/null +++ b/docs/superpowers/specs/2026-04-29-spdk-raft-hci-design.md @@ -0,0 +1,173 @@ +# NQRust-MicroVM: SPDK + Raft HCI Storage + +**Status:** Design +**Date:** 2026-04-29 +**Owner:** kleopasevan +**Scope:** Path B sub-project B. Defines the SPDK lvol performance backend and the later replicated block tier. This spec intentionally separates single-node SPDK from distributed replication. + +## Intent + +Move NQRust-MicroVM from pluggable storage plus backups into an HCI-style storage stack: + +1. **B-I: Single-node SPDK lvol backend** - fast local userspace block storage, native snapshots/clones, vhost-user-blk attachment to Firecracker. +2. **B-II: Raft-replicated block prototype and backend** - quorum-replicated writes across agent hosts, with correctness proven before production use. +3. **B-III: Cluster reconfiguration** - add/remove hosts, replica repair, rebalancing, decommissioning, and operator repair tooling. + +The project must not blur these phases. Single-node SPDK improves local I/O but has the same host-loss blast radius as LocalFile. The HCI guarantee only starts when B-II is correct under failure. + +## Current Foundation + +Already available on `main`: + +- `crates/nexus-storage` with split `ControlPlaneBackend` and `HostBackend` traits. +- Per-volume `backend_id`. +- `AttachedPath::VhostUserSock`, reserved for SPDK. +- Backup pipeline that can consume `HostBackend::read_snapshot`. +- Agent-side `supported_backend_kinds` handshake. + +## B-I: SPDK Lvol Backend + +### Backend Kind + +Add `BackendKind::SpdkLvol` serialized as `spdk_lvol`. + +### Manager TOML + +```toml +[[storage_backend]] +name = "spdk-local" +kind = "spdk_lvol" +is_default = false + +[storage_backend.config] +rpc_socket = "/run/spdk/rpc.sock" +lvs_name = "nexus" +vhost_socket_dir = "/var/tmp" +``` + +`rpc_socket` is the SPDK JSON-RPC Unix socket. `lvs_name` is the pre-created lvol store. `vhost_socket_dir` is where SPDK exposes vhost controllers. + +### Capabilities + +```text +supports_native_snapshots = true +supports_concurrent_attach = false +supports_live_migration = false +supports_clone_from_image = false initially +``` + +The initial backend cannot advertise `clone_from_image` until image import exists. A vhost-user socket is a Firecracker transport, not a writable block path, so the generic `populate_streaming` slow path cannot copy an image into SPDK by writing to the returned socket. + +### JSON-RPC Calls + +The first implementation uses SPDK JSON-RPC: + +- `bdev_lvol_create` +- `bdev_lvol_delete` +- `bdev_lvol_snapshot` +- `bdev_lvol_clone` +- `vhost_create_blk_controller` +- `vhost_delete_controller` + +The volume locator is JSON: + +```json +{ + "lvs_name": "nexus", + "lvol_name": "nq-rootfs-...", + "lvol_uuid": "...", + "size_bytes": 10737418240 +} +``` + +### Firecracker Integration + +Firecracker vhost-user block drives use `socket`, not `path_on_host`. + +For `AttachedPath::File` and `AttachedPath::BlockDevice`, manager sends: + +```json +{"drive_id":"rootfs","path_on_host":"/dev/...", "is_root_device":true,"is_read_only":false} +``` + +For `AttachedPath::VhostUserSock`, manager sends: + +```json +{"drive_id":"rootfs","socket":"/var/tmp/nq.", "is_root_device":true} +``` + +Read-only state is controlled by the backend-advertised virtio feature, not Firecracker's `is_read_only` field. + +### Image Import + +Image import into SPDK is not solved by vhost-user. The first implementation uses SPDK's Linux NBD export: + +1. `attach` creates the vhost controller and records `vhost socket -> lvol UUID` inside the agent backend. +2. `populate_streaming` starts `nbd_start_disk` for that lvol on a configured NBD device. +3. The agent writes image bytes to the NBD device and calls `sync_all`. +4. The agent always attempts `nbd_stop_disk` before returning. + +This requires the `nbd` kernel module to be loaded and one or more configured NBD devices. The agent reads `AGENT_SPDK_NBD_DEVICES` as a comma-separated pool, falling back to `AGENT_SPDK_IMPORT_NBD_DEVICE`, then `/dev/nbd0`. Each import, resize, or snapshot-read operation takes a lease from the pool and releases it only after `nbd_stop_disk`. + +Ext4 growth uses the same pattern. The manager's rootfs allocator detects the source image as ext4, then calls the agent resize route with the backend kind. LocalFile and iSCSI run `e2fsck`/`resize2fs` directly on their attached path; SPDK exports the lvol to NBD, runs `e2fsck`/`resize2fs` on the NBD device, and stops the export. + +Snapshot backup reads also use NBD in this first slice. `read_snapshot` parses the snapshot lvol locator, exports that lvol to the configured NBD device, opens it for reading, and stops the export when the reader is dropped. + +### Explicit B-I Gaps + +- NBD pool capacity directly limits concurrent SPDK imports, resizes, and snapshot backup reads on the agent. Operators must provision enough `/dev/nbdX` devices for expected concurrency. +- SPDK process lifecycle, hugepage setup, vfio binding, and lvstore creation are operational prerequisites and need installer/runbook support before production use. +- The development bootstrap script is intentionally not production lifecycle management. It builds a local SPDK checkout under `.worktrees/spdk`, starts a memory-backed target, and applies local build pruning so smoke tests can run on developer machines without the full SPDK dependency surface. + +## B-II: Raft Replication + +Do not implement B-II directly against production VM disks first. Build a fake-block prototype and chaos harness before using SPDK lvols. + +Required design decisions: + +- write unit size and alignment, +- log entry format and checksums, +- flush, FUA, and barrier semantics, +- leader fencing and stale leader prevention, +- idempotent replay after agent restart, +- read policy: leader-only first, follower reads only after leases are proven, +- log compaction and snapshot interaction, +- repair after missed writes, +- quorum loss behavior, +- corruption detection and operator-visible health. + +Minimum safety bar: + +- deterministic model tests for write ordering, +- crash/restart tests at every await boundary in write replication, +- partition tests: leader isolated, follower isolated, majority loss, +- disk-full and partial-write simulations, +- checksum mismatch tests, +- restore from backup after replica loss. + +## B-III: Reconfiguration + +B-III depends on B-II invariants. It adds: + +- host add/remove, +- replica placement, +- replica rebalancing, +- hot-spare promotion, +- decommission workflow, +- repair queue, +- status API and CLI. + +Membership changes must use Raft joint consensus or an equivalent safe transition. Never change replica sets by mutating DB rows outside the replicated protocol. + +## Success Criteria + +B-I succeeds when: + +- Manager can provision/destroy SPDK lvols. +- Manager can snapshot/clone/delete SPDK lvol snapshots. +- Agent can create/delete SPDK vhost-blk controllers and return `VhostUserSock`. +- VM start sends Firecracker `socket` for vhost-user rootfs. +- Agent can import image bytes and read snapshots through a leased NBD device, with NBD setup/release waits to avoid racing the kernel device. +- The real-SPDK smoke test passes against a live `spdk_tgt`. + +B-II succeeds only when chaos tests demonstrate correct behavior under crashes, partitions, disk-full, and replay. diff --git a/scripts/spdk-dev-bootstrap.sh b/scripts/spdk-dev-bootstrap.sh new file mode 100755 index 0000000..5d60e43 --- /dev/null +++ b/scripts/spdk-dev-bootstrap.sh @@ -0,0 +1,281 @@ +#!/usr/bin/env bash +set -euo pipefail + +ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +SPDK_DIR="${SPDK_DIR:-${ROOT}/.worktrees/spdk}" +SPDK_VENV="${SPDK_VENV:-${ROOT}/.worktrees/spdk-venv}" +SPDK_REPO="${SPDK_REPO:-https://github.com/spdk/spdk.git}" +SPDK_RUN_PKGDEP="${SPDK_RUN_PKGDEP:-auto}" +RPC_SOCKET="${AGENT_SPDK_IT_RPC_SOCKET:-${AGENT_SPDK_RPC_SOCKET:-/tmp/nqrust-spdk-rpc.sock}}" +LVS_NAME="${AGENT_SPDK_IT_LVS_NAME:-${AGENT_SPDK_LVS_NAME:-nexus}}" +MALLOC_BDEV="${SPDK_MALLOC_BDEV:-Malloc0}" +MALLOC_SIZE_MB="${SPDK_MALLOC_SIZE_MB:-32}" +MALLOC_BLOCK_SIZE="${SPDK_MALLOC_BLOCK_SIZE:-512}" +NBD_DEVICES="${AGENT_SPDK_IT_NBD_DEVICES:-${AGENT_SPDK_NBD_DEVICES:-/dev/nbd0,/dev/nbd1}}" +PID_FILE="${SPDK_PID_FILE:-/tmp/nqrust-spdk-tgt.pid}" +LOG_FILE="${SPDK_LOG_FILE:-/tmp/nqrust-spdk-tgt.log}" +CONFIG_FILE="${SPDK_CONFIG_FILE:-/tmp/nqrust-spdk-tgt.json}" +MEM_SIZE_MB="${SPDK_MEM_SIZE_MB:-64}" +IOBUF_SMALL_POOL_COUNT="${SPDK_IOBUF_SMALL_POOL_COUNT:-512}" +IOBUF_LARGE_POOL_COUNT="${SPDK_IOBUF_LARGE_POOL_COUNT:-64}" +IOBUF_SMALL_BUFSIZE="${SPDK_IOBUF_SMALL_BUFSIZE:-4096}" +IOBUF_LARGE_BUFSIZE="${SPDK_IOBUF_LARGE_BUFSIZE:-8192}" +BDEV_IO_POOL_SIZE="${SPDK_BDEV_IO_POOL_SIZE:-64}" +BDEV_IO_CACHE_SIZE="${SPDK_BDEV_IO_CACHE_SIZE:-1}" +BDEV_IOBUF_SMALL_CACHE_SIZE="${SPDK_BDEV_IOBUF_SMALL_CACHE_SIZE:-1}" +BDEV_IOBUF_LARGE_CACHE_SIZE="${SPDK_BDEV_IOBUF_LARGE_CACHE_SIZE:-1}" + +need() { + command -v "$1" >/dev/null 2>&1 || { + echo "Missing required command: $1" >&2 + exit 2 + } +} + +need git +need sudo +need python3 +need gcc +need make +need pkg-config + +python_has_module() { + python3 - "$1" <<'PY' +import importlib.util +import sys +sys.exit(0 if importlib.util.find_spec(sys.argv[1]) else 1) +PY +} + +ensure_python_build_tools() { + local need_venv=0 + command -v meson >/dev/null 2>&1 || need_venv=1 + command -v ninja >/dev/null 2>&1 || need_venv=1 + python_has_module jinja2 || need_venv=1 + python_has_module elftools || need_venv=1 + python_has_module tabulate || need_venv=1 + + if [[ "${need_venv}" == 0 ]]; then + return + fi + + echo "Installing SPDK Python build helpers in ${SPDK_VENV}..." + python3 -m venv "${SPDK_VENV}" + "${SPDK_VENV}/bin/python" -m pip install --upgrade pip + "${SPDK_VENV}/bin/python" -m pip install meson ninja jinja2 pyelftools tabulate + export PATH="${SPDK_VENV}/bin:${PATH}" +} + +run_pkgdep=false +case "${SPDK_RUN_PKGDEP}" in + 1|true|yes) run_pkgdep=true ;; + 0|false|no) run_pkgdep=false ;; + auto) + # On Arch, pkgdep.sh may trigger a partial system upgrade if package + # databases are newer than installed base packages. Prefer building with + # already-installed deps and let configure report anything missing. + if [[ -r /etc/arch-release ]]; then + run_pkgdep=false + else + run_pkgdep=true + fi + ;; + *) + echo "Invalid SPDK_RUN_PKGDEP=${SPDK_RUN_PKGDEP}; use auto, 1, or 0." >&2 + exit 2 + ;; +esac + +mkdir -p "$(dirname "${SPDK_DIR}")" +if [[ ! -d "${SPDK_DIR}/.git" ]]; then + git clone --recursive "${SPDK_REPO}" "${SPDK_DIR}" +else + git -C "${SPDK_DIR}" submodule update --init --recursive +fi + +patch_spdk_dev_build() { + local marker="${SPDK_DIR}/.nqrust-spdk-dev-patch-v3" + ( + cd "${SPDK_DIR}" + # The local smoke target only needs malloc/lvol/nbd/vhost-blk. Prune + # optional modules/apps that pull in extra host deps or large mempools. + sed -i \ + -e '/^BLOCKDEV_MODULES_LIST += bdev_aio$/d' \ + -e '/^BLOCKDEV_MODULES_PRIVATE_LIBS += -laio$/d' \ + -e '/^INTR_BLOCKDEV_MODULES_LIST += bdev_aio$/d' \ + mk/spdk.modules.mk + sed -i \ + -e 's/^DIRS-y += aio ftl$/DIRS-y += ftl/' \ + -e '/^DIRS-y += aio$/d' \ + module/bdev/Makefile + sed -i \ + -e '/^DIRS-y += spdk_nvme_perf$/d' \ + -e '/^DIRS-y += spdk_dd$/d' \ + -e '/^DIRS-y += iscsi_tgt$/d' \ + app/Makefile + sed -i \ + -e 's/^DIRS-y += bdev accel scheduler iscsi nvmf scsi vmd sock iobuf keyring$/DIRS-y += bdev accel scheduler nvmf scsi vmd sock iobuf keyring/' \ + -e 's/^DIRS-$(CONFIG_VHOST) += vhost_blk vhost_scsi$/DIRS-$(CONFIG_VHOST) += vhost_blk/' \ + -e '/^DEPDIRS-iscsi := scsi$/d' \ + -e '/^DEPDIRS-vhost_scsi := scsi$/d' \ + module/event/subsystems/Makefile + sed -i \ + -e 's/ iscsi notify init/ notify init/' \ + lib/Makefile + sed -i \ + -e 's/^SPDK_LIB_LIST += event event_iscsi event_nvmf$/SPDK_LIB_LIST += event event_nvmf/' \ + -e 's/^SPDK_LIB_LIST += event_vhost_blk event_vhost_scsi$/SPDK_LIB_LIST += event_vhost_blk/' \ + app/spdk_tgt/Makefile + ) + if [[ ! -f "${marker}" ]]; then + rm -f "${SPDK_DIR}/build/bin/spdk_tgt" + : >"${marker}" + fi +} + +patch_spdk_dev_build + +if [[ ! -x "${SPDK_DIR}/build/bin/spdk_tgt" ]]; then + ensure_python_build_tools + if [[ "${run_pkgdep}" == true ]]; then + echo "Installing SPDK build dependencies through SPDK pkgdep script..." + if ! sudo "${SPDK_DIR}/scripts/pkgdep.sh"; then + echo + echo "SPDK pkgdep failed." >&2 + if [[ -r /etc/arch-release ]]; then + echo "On Arch this is often a partial-upgrade guard. Run:" >&2 + echo " sudo pacman -Syu" >&2 + echo "Then retry, or skip pkgdep with:" >&2 + echo " SPDK_RUN_PKGDEP=0 ./scripts/spdk-dev-bootstrap.sh" >&2 + fi + exit 1 + fi + else + echo "Skipping SPDK pkgdep script (SPDK_RUN_PKGDEP=${SPDK_RUN_PKGDEP})." + echo "If configure reports missing dependencies, either install them or run:" + echo " SPDK_RUN_PKGDEP=1 ./scripts/spdk-dev-bootstrap.sh" + fi + ( + cd "${SPDK_DIR}" + rm -rf dpdk/build dpdk/build-tmp + rm -f include/spdk_internal/rpc_autogen.h + ./configure \ + --disable-tests \ + --disable-unit-tests \ + --disable-examples \ + --max-numa-nodes=1 \ + --without-rdma \ + --without-rbd \ + --without-crypto \ + --without-fio \ + --without-idxd \ + --without-vfio-user \ + --without-fc \ + --without-daos \ + --without-aio-fsdev \ + --without-uring \ + --without-xnvme \ + --without-ublk \ + --without-usdt + patch_spdk_dev_build + make DPDKBUILD_FLAGS="-Dmax_numa_nodes=1" -j"$(nproc)" + ) +fi + +sudo modprobe nbd nbds_max=8 +IFS=',' read -r -a NBD_ARRAY <<< "${NBD_DEVICES}" +for dev in "${NBD_ARRAY[@]}"; do + dev="$(echo "${dev}" | xargs)" + [[ -z "${dev}" ]] && continue + if [[ -b "${dev}" ]]; then + sudo chmod 666 "${dev}" || true + fi +done +sudo sysctl -w vm.nr_hugepages="${SPDK_HUGEPAGES:-512}" >/dev/null + +cat >"${CONFIG_FILE}" </dev/null; then + echo "SPDK already running with pid $(cat "${PID_FILE}")" +else + sudo rm -f "${RPC_SOCKET}" "${PID_FILE}" "${LOG_FILE}" + sudo mkdir -p "$(dirname "${RPC_SOCKET}")" + echo "Starting spdk_tgt..." + sudo "${SPDK_DIR}/build/bin/spdk_tgt" \ + -r "${RPC_SOCKET}" \ + -c "${CONFIG_FILE}" \ + -m 0x1 \ + -s "${MEM_SIZE_MB}" \ + --num-trace-entries 0 \ + >"${LOG_FILE}" 2>&1 & + echo "$!" | sudo tee "${PID_FILE}" >/dev/null +fi + +for _ in $(seq 1 100); do + [[ -S "${RPC_SOCKET}" ]] && break + sleep 0.1 +done +if [[ ! -S "${RPC_SOCKET}" ]]; then + echo "SPDK RPC socket did not appear: ${RPC_SOCKET}" >&2 + echo "Log: ${LOG_FILE}" >&2 + exit 1 +fi +sudo chmod 666 "${RPC_SOCKET}" || true + +RPC="${SPDK_DIR}/scripts/rpc.py -s ${RPC_SOCKET}" + +if ! ${RPC} bdev_get_bdevs -b "${MALLOC_BDEV}" >/dev/null 2>&1; then + ${RPC} bdev_malloc_create "${MALLOC_SIZE_MB}" "${MALLOC_BLOCK_SIZE}" -b "${MALLOC_BDEV}" >/dev/null +fi + +if ! ${RPC} bdev_lvol_get_lvstores -l "${LVS_NAME}" >/dev/null 2>&1; then + ${RPC} bdev_lvol_create_lvstore "${MALLOC_BDEV}" "${LVS_NAME}" >/dev/null +fi + +echo +echo "SPDK dev target is ready." +echo "RPC socket: ${RPC_SOCKET}" +echo "Lvol store: ${LVS_NAME}" +echo +echo "Run:" +echo " AGENT_SPDK_IT_RPC_SOCKET=${RPC_SOCKET} \\" +echo " AGENT_SPDK_IT_LVS_NAME=${LVS_NAME} \\" +echo " AGENT_SPDK_IT_NBD_DEVICES=${NBD_DEVICES} \\" +echo " ./scripts/spdk-lvol-smoke.sh" +echo +echo "Stop later with:" +echo " sudo kill \$(cat ${PID_FILE})" diff --git a/scripts/spdk-lvol-smoke.sh b/scripts/spdk-lvol-smoke.sh new file mode 100755 index 0000000..5e7f291 --- /dev/null +++ b/scripts/spdk-lvol-smoke.sh @@ -0,0 +1,33 @@ +#!/usr/bin/env bash +set -euo pipefail + +ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" + +RPC_SOCKET="${AGENT_SPDK_IT_RPC_SOCKET:-${AGENT_SPDK_RPC_SOCKET:-/run/spdk/rpc.sock}}" +LVS_NAME="${AGENT_SPDK_IT_LVS_NAME:-${AGENT_SPDK_LVS_NAME:-nexus}}" +NBD_DEVICES="${AGENT_SPDK_IT_NBD_DEVICES:-${AGENT_SPDK_NBD_DEVICES:-/dev/nbd0,/dev/nbd1}}" +VHOST_SOCKET_DIR="${AGENT_SPDK_IT_VHOST_SOCKET_DIR:-${AGENT_SPDK_VHOST_SOCKET_DIR:-/var/tmp}}" + +if [[ ! -S "${RPC_SOCKET}" ]]; then + echo "SPDK JSON-RPC socket not found: ${RPC_SOCKET}" >&2 + echo "Set AGENT_SPDK_IT_RPC_SOCKET or AGENT_SPDK_RPC_SOCKET." >&2 + exit 2 +fi + +IFS=',' read -r -a NBD_ARRAY <<< "${NBD_DEVICES}" +for dev in "${NBD_ARRAY[@]}"; do + dev="$(echo "${dev}" | xargs)" + [[ -z "${dev}" ]] && continue + if [[ ! -b "${dev}" ]]; then + echo "NBD device not found: ${dev}" >&2 + echo "Load the module first, for example: sudo modprobe nbd nbds_max=8" >&2 + exit 2 + fi +done + +cd "${ROOT}" +AGENT_SPDK_IT_RPC_SOCKET="${RPC_SOCKET}" \ +AGENT_SPDK_IT_LVS_NAME="${LVS_NAME}" \ +AGENT_SPDK_IT_NBD_DEVICES="${NBD_DEVICES}" \ +AGENT_SPDK_IT_VHOST_SOCKET_DIR="${VHOST_SOCKET_DIR}" \ +cargo test -p agent spdk_lvol_real_smoke -- --ignored --nocapture