From 48448fd3d54b991afa091db11185c7362f4756ce Mon Sep 17 00:00:00 2001 From: Kiyoshi Nakao Date: Tue, 12 May 2026 15:23:06 +0900 Subject: [PATCH 01/48] types: centralize light client store keys --- modules/light-client/Cargo.toml | 1 - modules/light-client/src/context.rs | 30 ++++++++--------- modules/light-client/src/lib.rs | 1 - modules/light-client/src/path.rs | 40 ---------------------- modules/types/src/lib.rs | 5 +++ modules/types/src/store_key.rs | 52 +++++++++++++++++++++++++++++ 6 files changed, 70 insertions(+), 59 deletions(-) delete mode 100644 modules/light-client/src/path.rs create mode 100644 modules/types/src/store_key.rs diff --git a/modules/light-client/Cargo.toml b/modules/light-client/Cargo.toml index 573eb117..6975fc33 100644 --- a/modules/light-client/Cargo.toml +++ b/modules/light-client/Cargo.toml @@ -5,7 +5,6 @@ edition = "2021" [dependencies] flex-error = { version = "0.4.4", default-features = false } -derive_more = { version = "1.0", default-features = false, features = ["display"] } bincode = { version = "2.0.0-rc.3", default-features = false, features = ["serde", "alloc"] } ibc = { version = "0.29.0", default-features = false, features = ["serde"], optional = true } diff --git a/modules/light-client/src/context.rs b/modules/light-client/src/context.rs index 08dd5c72..eb335c72 100644 --- a/modules/light-client/src/context.rs +++ b/modules/light-client/src/context.rs @@ -1,9 +1,6 @@ use crate::types::{Any, ClientId, Height, Time}; -use crate::{ - errors::Error, - path::{ClientConsensusStatePath, ClientStatePath, ClientTypePath}, - prelude::*, -}; +use crate::{errors::Error, prelude::*}; +use lcp_types::store_key; use store::KVStore; pub trait HostContext { @@ -14,13 +11,13 @@ pub trait HostContext { pub trait ClientReader: KVStore { /// Returns `true` if the client exists in the store. fn client_exists(&self, client_id: &ClientId) -> bool { - self.get(format!("{}", ClientTypePath::new(client_id)).as_bytes()) + self.get(store_key::client_type(client_id.as_str()).as_bytes()) .is_some() } /// Returns the ClientType for the given identifier `client_id`. fn client_type(&self, client_id: &ClientId) -> Result { - let value = self.get(format!("{}", ClientTypePath::new(client_id)).as_bytes()); + let value = self.get(store_key::client_type(client_id.as_str()).as_bytes()); if let Some(value) = value { Ok(String::from_utf8(value).unwrap()) } else { @@ -30,7 +27,7 @@ pub trait ClientReader: KVStore { /// Returns the ClientState for the given identifier `client_id`. fn client_state(&self, client_id: &ClientId) -> Result { - let value = self.get(format!("{}", ClientStatePath::new(client_id)).as_bytes()); + let value = self.get(store_key::client_state(client_id.as_str()).as_bytes()); if let Some(value) = value { Ok( bincode::serde::decode_from_slice(&value, bincode::config::standard()) @@ -47,8 +44,8 @@ pub trait ClientReader: KVStore { /// /// Returns an error if no such state exists. fn consensus_state(&self, client_id: &ClientId, height: &Height) -> Result { - let path = ClientConsensusStatePath::new(client_id, height); - let value = match self.get(format!("{}", path).as_bytes()) { + let key = store_key::consensus_state(client_id.as_str(), height); + let value = match self.get(key.as_bytes()) { Some(value) => value, None => { return Err(Error::consensus_state_not_found(client_id.clone(), *height)); @@ -66,7 +63,7 @@ pub trait ClientKeeper: ClientReader { /// Called upon successful client creation fn store_client_type(&mut self, client_id: ClientId, client_type: String) -> Result<(), Error> { self.set( - format!("{}", ClientTypePath(client_id)).into_bytes(), + store_key::client_type_bytes(client_id.as_str()), client_type.into_bytes(), ); Ok(()) @@ -79,10 +76,7 @@ pub trait ClientKeeper: ClientReader { client_state: Any, ) -> Result<(), Error> { let bz = bincode::serde::encode_to_vec(&client_state, bincode::config::standard()).unwrap(); - self.set( - format!("{}", ClientStatePath::new(&client_id)).into_bytes(), - bz, - ); + self.set(store_key::client_state_bytes(client_id.as_str()), bz); Ok(()) } @@ -95,8 +89,10 @@ pub trait ClientKeeper: ClientReader { ) -> Result<(), Error> { let bz = bincode::serde::encode_to_vec(&consensus_state, bincode::config::standard()).unwrap(); - let path = ClientConsensusStatePath::new(&client_id, &height); - self.set(format!("{}", path).into_bytes(), bz); + self.set( + store_key::consensus_state_bytes(client_id.as_str(), &height), + bz, + ); Ok(()) } } diff --git a/modules/light-client/src/lib.rs b/modules/light-client/src/lib.rs index 44c36ad3..a18de8a6 100644 --- a/modules/light-client/src/lib.rs +++ b/modules/light-client/src/lib.rs @@ -38,5 +38,4 @@ mod context; mod errors; #[cfg(feature = "ibc")] pub mod ibc; -mod path; mod registry; diff --git a/modules/light-client/src/path.rs b/modules/light-client/src/path.rs deleted file mode 100644 index 1bac7b0a..00000000 --- a/modules/light-client/src/path.rs +++ /dev/null @@ -1,40 +0,0 @@ -use crate::types::{ClientId, Height}; -use derive_more::Display; - -#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Display)] -#[display("clients/{_0}/clientType")] -pub struct ClientTypePath(pub ClientId); - -impl ClientTypePath { - pub fn new(client_id: &ClientId) -> ClientTypePath { - ClientTypePath(client_id.clone()) - } -} - -#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Display)] -#[display("clients/{_0}/clientState")] -pub struct ClientStatePath(pub ClientId); - -impl ClientStatePath { - pub fn new(client_id: &ClientId) -> ClientStatePath { - ClientStatePath(client_id.clone()) - } -} - -#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Display)] -#[display("clients/{client_id}/consensusStates/{epoch}-{height}")] -pub struct ClientConsensusStatePath { - pub client_id: ClientId, - pub epoch: u64, - pub height: u64, -} - -impl ClientConsensusStatePath { - pub fn new(client_id: &ClientId, height: &Height) -> ClientConsensusStatePath { - ClientConsensusStatePath { - client_id: client_id.clone(), - epoch: height.revision_number(), - height: height.revision_height(), - } - } -} diff --git a/modules/types/src/lib.rs b/modules/types/src/lib.rs index 705f3d05..214d5360 100644 --- a/modules/types/src/lib.rs +++ b/modules/types/src/lib.rs @@ -10,6 +10,10 @@ pub use host::ClientId; /// re-export pub use lcp_proto as proto; pub use sgx::{EnclaveMetadata, Mrenclave}; +pub use store_key::{ + client_state as client_state_key, client_state_bytes, client_type as client_type_key, + client_type_bytes, consensus_state as consensus_state_key, consensus_state_bytes, +}; pub use time::{nanos_to_duration, Time, MAX_UNIX_TIMESTAMP_NANOS}; pub use transmuter::{deserialize_bytes, serialize_bytes, BytesTransmuter}; @@ -18,6 +22,7 @@ mod errors; mod height; mod host; mod sgx; +pub mod store_key; mod time; mod transmuter; diff --git a/modules/types/src/store_key.rs b/modules/types/src/store_key.rs new file mode 100644 index 00000000..91194d7b --- /dev/null +++ b/modules/types/src/store_key.rs @@ -0,0 +1,52 @@ +use crate::{prelude::*, Height}; + +pub fn client_type(client_id: &str) -> String { + format!("clients/{client_id}/clientType") +} + +pub fn client_state(client_id: &str) -> String { + format!("clients/{client_id}/clientState") +} + +pub fn consensus_state(client_id: &str, height: &Height) -> String { + format!( + "clients/{}/consensusStates/{}-{}", + client_id, + height.revision_number(), + height.revision_height() + ) +} + +pub fn client_type_bytes(client_id: &str) -> Vec { + client_type(client_id).into_bytes() +} + +pub fn client_state_bytes(client_id: &str) -> Vec { + client_state(client_id).into_bytes() +} + +pub fn consensus_state_bytes(client_id: &str, height: &Height) -> Vec { + consensus_state(client_id, height).into_bytes() +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn builds_client_store_keys() { + let height = Height::new(1, 23); + assert_eq!( + client_type("07-tendermint-0"), + "clients/07-tendermint-0/clientType" + ); + assert_eq!( + client_state("07-tendermint-0"), + "clients/07-tendermint-0/clientState" + ); + assert_eq!( + consensus_state("07-tendermint-0", &height), + "clients/07-tendermint-0/consensusStates/1-23" + ); + } +} From 62cdaf3867380a0429e65dde4eb44a23a70cc97b Mon Sep 17 00:00:00 2001 From: Kiyoshi Nakao Date: Tue, 12 May 2026 15:23:28 +0900 Subject: [PATCH 02/48] store: add speculative transaction overlays --- modules/store/src/lib.rs | 4 + modules/store/src/memory.rs | 14 ++- modules/store/src/overlay.rs | 101 +++++++++++++++++++ modules/store/src/rocksdb.rs | 163 +++++++++++++++++++++++++++++-- modules/store/src/transaction.rs | 16 ++- 5 files changed, 288 insertions(+), 10 deletions(-) create mode 100644 modules/store/src/overlay.rs diff --git a/modules/store/src/lib.rs b/modules/store/src/lib.rs index 582c39f3..cb585058 100644 --- a/modules/store/src/lib.rs +++ b/modules/store/src/lib.rs @@ -22,6 +22,7 @@ mod prelude { pub use crate::errors::{Error, Result}; pub use crate::store::{KVStore, TxId}; +use alloc::collections::BTreeMap; pub mod cache; mod errors; @@ -29,8 +30,11 @@ mod errors; pub mod host; #[cfg(feature = "std")] pub mod memory; +pub mod overlay; #[cfg(feature = "rocksdb")] pub mod rocksdb; mod store; #[cfg(feature = "std")] pub mod transaction; + +pub type WriteSet = BTreeMap, Option>>; diff --git a/modules/store/src/memory.rs b/modules/store/src/memory.rs index f6aee602..950d6ee5 100644 --- a/modules/store/src/memory.rs +++ b/modules/store/src/memory.rs @@ -1,7 +1,7 @@ use crate::prelude::*; use crate::store::TxId; use crate::transaction::{CommitStore, CreatedTx, Tx, TxAccessor}; -use crate::{KVStore, Result}; +use crate::{KVStore, Result, WriteSet}; use std::collections::HashMap; use std::sync::Mutex; @@ -55,6 +55,10 @@ impl CommitStore for MemStore { self.0.lock().unwrap().commit(tx) } + fn take_write_set(&mut self, tx: ::PreparedTx) -> Result { + self.0.lock().unwrap().take_write_set(tx) + } + fn rollback(&mut self, tx: ::PreparedTx) { self.0.lock().unwrap().rollback(tx) } @@ -142,6 +146,14 @@ impl CommitStore for InnerMemStore { Ok(()) } + fn take_write_set(&mut self, _tx: ::PreparedTx) -> Result { + assert!(self.running_tx_exists); + self.running_tx_exists = false; + let data = HashMap::, Option>>::default(); + let uncommitted_data = std::mem::replace(&mut self.uncommitted_data, data); + Ok(uncommitted_data.into_iter().collect()) + } + fn rollback(&mut self, _tx: ::PreparedTx) { assert!(self.running_tx_exists); self.running_tx_exists = false; diff --git a/modules/store/src/overlay.rs b/modules/store/src/overlay.rs new file mode 100644 index 00000000..8d9da5a4 --- /dev/null +++ b/modules/store/src/overlay.rs @@ -0,0 +1,101 @@ +use crate::prelude::*; +use crate::{KVStore, WriteSet}; + +/// `OverlayKVS` is a speculative view over a parent key-value store. +/// +/// Reads first consult the in-memory overlay and then fall back to the parent. +/// Writes are accumulated only in the overlay and never mutate the parent. +pub struct OverlayKVS { + parent: S, + overlay: WriteSet, +} + +impl OverlayKVS { + pub fn new(parent: S) -> Self { + Self { + parent, + overlay: WriteSet::default(), + } + } + + pub fn overlay(&self) -> &WriteSet { + &self.overlay + } + + pub fn into_parts(self) -> (S, WriteSet) { + (self.parent, self.overlay) + } +} + +impl KVStore for OverlayKVS { + fn set(&mut self, key: Vec, value: Vec) { + self.overlay.insert(key, Some(value)); + } + + fn get(&self, key: &[u8]) -> Option> { + match self.overlay.get(key) { + Some(Some(v)) => Some(v.clone()), + Some(None) => None, + None => self.parent.get(key), + } + } + + fn remove(&mut self, key: &[u8]) { + self.overlay.insert(key.to_vec(), None); + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::memory::MemStore; + + #[allow(non_snake_case)] + fn B(s: &str) -> Vec { + s.as_bytes().to_vec() + } + + #[test] + fn overlay_reads_parent_when_missing() { + let mut parent = MemStore::default(); + parent.set(B("k1"), B("v1")); + + let overlay = OverlayKVS::new(parent); + assert_eq!(overlay.get(&B("k1")), Some(B("v1"))); + assert_eq!(overlay.get(&B("missing")), None); + } + + #[test] + fn overlay_write_shadows_parent_without_mutating_it() { + let mut parent = MemStore::default(); + parent.set(B("k1"), B("v1")); + + let mut overlay = OverlayKVS::new(parent); + overlay.set(B("k1"), B("v2")); + overlay.set(B("k2"), B("v3")); + + assert_eq!(overlay.get(&B("k1")), Some(B("v2"))); + assert_eq!(overlay.get(&B("k2")), Some(B("v3"))); + + let (parent, writes) = overlay.into_parts(); + assert_eq!(parent.get(&B("k1")), Some(B("v1"))); + assert_eq!(parent.get(&B("k2")), None); + assert_eq!(writes.get(&B("k1")), Some(&Some(B("v2")))); + assert_eq!(writes.get(&B("k2")), Some(&Some(B("v3")))); + } + + #[test] + fn overlay_delete_masks_parent_value() { + let mut parent = MemStore::default(); + parent.set(B("k1"), B("v1")); + + let mut overlay = OverlayKVS::new(parent); + overlay.remove(&B("k1")); + + assert_eq!(overlay.get(&B("k1")), None); + + let (parent, writes) = overlay.into_parts(); + assert_eq!(parent.get(&B("k1")), Some(B("v1"))); + assert_eq!(writes.get(&B("k1")), Some(&None)); + } +} diff --git a/modules/store/src/rocksdb.rs b/modules/store/src/rocksdb.rs index 7b9e57b6..c77d1f0d 100644 --- a/modules/store/src/rocksdb.rs +++ b/modules/store/src/rocksdb.rs @@ -1,5 +1,6 @@ +use crate::overlay::OverlayKVS; use crate::transaction::{CommitStore, CreatedTx, Tx, TxAccessor, UpdateKey}; -use crate::{Error, KVStore, Result, TxId}; +use crate::{Error, KVStore, Result, TxId, WriteSet}; use core::marker::PhantomData; use log::*; use ouroboros::self_referencing; @@ -65,8 +66,7 @@ impl RocksDBStore { f: impl FnOnce(StoreTransaction) -> T, ) -> T { self.with_mut(|fields| { - if tx.is_update_tx() { - let update_key = tx.borrow_update_key().as_ref().unwrap(); + if let RocksDBTxKind::Update(update_key) = tx.borrow_kind() { let v = fields.mutex.get(update_key).expect("invariant violation"); if Rc::strong_count(v) == 2 { // "2" indicates `v` and an entry of `mutex` only exist @@ -157,6 +157,20 @@ impl CommitStore for RocksDBStore { }) } + fn create_speculative_transaction(&mut self) -> Result { + debug!("create speculative tx"); + self.with_mut(|fields| { + if matches!(fields.db, InnerDB::ReadOnlyDB(_)) { + return Err(Error::not_supported_operation( + "create_speculative_transaction is only available for writable RocksDB stores" + .to_string(), + )); + } + fields.latest_tx_id.safe_incr()?; + Ok(RocksDBTx::new_speculative_tx(*fields.latest_tx_id)) + }) + } + fn begin(&mut self, tx: &::PreparedTx) -> Result<()> { debug!("begin tx: {:?}", tx.get_id()); self.with_mut(|fields| { @@ -177,6 +191,12 @@ impl CommitStore for RocksDBStore { } .build(), ) + } else if tx.is_speculative_tx() { + StoreTransaction::Speculative(SpeculativeTransaction { + overlay: OverlayKVS::new(TransactionSnapshotKVS { + snapshot: db.snapshot(), + }), + }) } else { StoreTransaction::Read(ReadTransaction { snapshot: db.snapshot(), @@ -196,6 +216,17 @@ impl CommitStore for RocksDBStore { self.finalize_tx(tx, |stx| stx.commit()) } + fn take_write_set(&mut self, tx: ::PreparedTx) -> Result { + debug!("take write set: {:?}", tx.get_id()); + self.finalize_tx(tx, |stx| { + stx.into_overlay_writes().ok_or_else(|| { + Error::not_supported_operation( + "take_write_set is only available for speculative transactions".to_string(), + ) + }) + }) + } + fn rollback(&mut self, tx: ::PreparedTx) { debug!("rollback tx: {:?}", tx.get_id()); self.finalize_tx(tx, |stx| stx.rollback()) @@ -240,6 +271,7 @@ pub enum StoreTransaction<'a> { Read(ReadTransaction<'a>), Update(UpdateTransaction<'a>), ReadSnapshot(ReadSnapshot<'a>), + Speculative(SpeculativeTransaction<'a>), } #[allow(clippy::single_match)] @@ -257,6 +289,15 @@ impl<'a> StoreTransaction<'a> { _ => {} } } + + fn into_overlay_writes(self) -> Option { + match self { + StoreTransaction::Speculative(stx) => Some(stx.into_overlay_writes()), + StoreTransaction::Read(_) + | StoreTransaction::ReadSnapshot(_) + | StoreTransaction::Update(_) => None, + } + } } impl<'a> KVStore for StoreTransaction<'a> { @@ -265,6 +306,7 @@ impl<'a> KVStore for StoreTransaction<'a> { StoreTransaction::Read(stx) => stx.set(key, value), StoreTransaction::Update(stx) => stx.set(key, value), StoreTransaction::ReadSnapshot(stx) => stx.set(key, value), + StoreTransaction::Speculative(stx) => stx.set(key, value), } } @@ -273,6 +315,7 @@ impl<'a> KVStore for StoreTransaction<'a> { StoreTransaction::Read(stx) => stx.get(key), StoreTransaction::Update(stx) => stx.get(key), StoreTransaction::ReadSnapshot(stx) => stx.get(key), + StoreTransaction::Speculative(stx) => stx.get(key), } } @@ -281,6 +324,7 @@ impl<'a> KVStore for StoreTransaction<'a> { StoreTransaction::Read(stx) => stx.remove(key), StoreTransaction::Update(stx) => stx.remove(key), StoreTransaction::ReadSnapshot(stx) => stx.remove(key), + StoreTransaction::Speculative(stx) => stx.remove(key), } } } @@ -312,6 +356,35 @@ impl<'a> KVStore for ReadTransaction<'a> { } } +/// SpeculativeTransaction is an isolated writable view over a transaction snapshot. +/// +/// All read operations are performed against the overlay first and then the snapshot. +/// All write operations are accumulated into the overlay and can be extracted as a WriteSet. +pub struct SpeculativeTransaction<'a> { + overlay: OverlayKVS>, +} + +impl<'a> SpeculativeTransaction<'a> { + fn into_overlay_writes(self) -> WriteSet { + let (_, writes) = self.overlay.into_parts(); + writes + } +} + +impl<'a> KVStore for SpeculativeTransaction<'a> { + fn set(&mut self, key: Vec, value: Vec) { + self.overlay.set(key, value); + } + + fn get(&self, key: &[u8]) -> Option> { + self.overlay.get(key) + } + + fn remove(&mut self, key: &[u8]) { + self.overlay.remove(key); + } +} + /// UpdateTransaction is a `writable` transaction /// /// All read operations are performed based on a specific version of snapshot. @@ -378,11 +451,37 @@ impl<'a> KVStore for ReadSnapshot<'a> { } } +pub struct TransactionSnapshotKVS<'a> { + snapshot: SnapshotWithThreadMode<'a, TransactionDB>, +} + +impl<'a> KVStore for TransactionSnapshotKVS<'a> { + fn set(&mut self, _key: Vec, _value: Vec) { + unreachable!("TransactionSnapshotKVS is read-only") + } + + fn get(&self, key: &[u8]) -> Option> { + self.snapshot.get(key).unwrap() + } + + fn remove(&mut self, _key: &[u8]) { + unreachable!("TransactionSnapshotKVS is read-only") + } +} + +/// RocksDBTxKind describes the transaction mode without allowing invalid +/// read/update/speculative flag combinations. +pub enum RocksDBTxKind { + Read, + Update(UpdateKey), + Speculative, +} + /// RocksDBTx is a transaction handle corresponding to `StoreTransaction` #[self_referencing] pub struct RocksDBTx { pub id: TxId, - pub update_key: Option, + pub kind: RocksDBTxKind, pub mutex: Option>>, #[borrows(mutex)] #[covariant] @@ -410,7 +509,7 @@ impl CreatedTx for RocksDBTx { let fields = self.into_heads(); let tx = RocksDBTxBuilder { id: fields.id, - update_key: fields.update_key, + kind: fields.kind, mutex: fields.mutex, mutex_guard_builder: |m| { if update { @@ -436,7 +535,7 @@ impl RocksDBTx { pub fn new_read_tx(id: TxId) -> Self { RocksDBTxBuilder { id, - update_key: None, + kind: RocksDBTxKind::Read, mutex: None, mutex_guard_builder: |_| None, marker: Default::default(), @@ -447,7 +546,7 @@ impl RocksDBTx { pub fn new_update_tx(id: TxId, update_key: UpdateKey, mutex: Rc>) -> Self { RocksDBTxBuilder { id, - update_key: Some(update_key), + kind: RocksDBTxKind::Update(update_key), mutex: Some(mutex), mutex_guard_builder: |_| None, marker: Default::default(), @@ -455,8 +554,23 @@ impl RocksDBTx { .build() } + pub fn new_speculative_tx(id: TxId) -> Self { + RocksDBTxBuilder { + id, + kind: RocksDBTxKind::Speculative, + mutex: None, + mutex_guard_builder: |_| None, + marker: Default::default(), + } + .build() + } + pub fn is_update_tx(&self) -> bool { - self.borrow_update_key().is_some() + matches!(self.borrow_kind(), RocksDBTxKind::Update(_)) + } + + pub fn is_speculative_tx(&self) -> bool { + matches!(self.borrow_kind(), RocksDBTxKind::Speculative) } } @@ -555,6 +669,39 @@ mod tests { assert_eq!(store.borrow_mutex().len(), 0); assert!(store.get(&key(0)).eq(&None)); } + + // case6: extract speculative writes without mutating canonical DB + { + let tx = store.create_speculative_transaction().unwrap(); + let tx = tx.prepare().unwrap(); + store.begin(&tx).unwrap(); + store.tx_set(tx.get_id(), key(1), value(1)).unwrap(); + store.tx_remove(tx.get_id(), &key(0)).unwrap(); + + let writes = store.take_write_set(tx).unwrap(); + assert_eq!(writes.get(&key(1)), Some(&Some(value(1)))); + assert_eq!(writes.get(&key(0)), Some(&None)); + assert_eq!(store.get(&key(1)), None); + assert_eq!(store.get(&key(0)), None); + assert_eq!(store.borrow_mutex().len(), 0); + } + + // case7: ordinary read transactions keep their legacy buffer but do not expose write sets + { + let tx = store.create_transaction(None).unwrap(); + let tx = tx.prepare().unwrap(); + store.begin(&tx).unwrap(); + store.tx_set(tx.get_id(), key(2), value(2)).unwrap(); + assert_eq!(store.tx_get(tx.get_id(), &key(2)).unwrap(), Some(value(2))); + assert!(store.take_write_set(tx).is_err()); + assert_eq!(store.get(&key(2)), None); + } + + // case8: read-only stores do not support speculative write extraction + { + let mut read_only_store = RocksDBStore::open_read_only(tmp_dir.as_ref()); + assert!(read_only_store.create_speculative_transaction().is_err()); + } } #[test] diff --git a/modules/store/src/transaction.rs b/modules/store/src/transaction.rs index e882d620..dd944465 100644 --- a/modules/store/src/transaction.rs +++ b/modules/store/src/transaction.rs @@ -1,5 +1,5 @@ use crate::prelude::*; -use crate::{KVStore, Result, TxId}; +use crate::{KVStore, Result, TxId, WriteSet}; /// `UpdateKey` is a hint to the store to control concurrent transactions pub type UpdateKey = String; @@ -27,12 +27,26 @@ pub trait CommitStore: Sync + Send { /// if `update_key` is None, it is desired that the store controls a transaction as read-only fn create_transaction(&mut self, update_key: Option) -> Result; + /// `create_speculative_transaction` creates a transaction whose writes remain isolated + /// and can be extracted with `take_write_set`. + fn create_speculative_transaction(&mut self) -> Result { + self.create_transaction(None) + } + /// `begin` begins the transaction fn begin(&mut self, tx: &::PreparedTx) -> Result<()>; /// `commit` consume the transaction handle to commit the changes fn commit(&mut self, tx: ::PreparedTx) -> Result<()>; + /// `take_write_set` consumes a speculative transaction and returns its isolated writes + /// without mutating the canonical store. + fn take_write_set(&mut self, _tx: ::PreparedTx) -> Result { + Err(crate::Error::not_supported_operation( + "take_write_set".to_string(), + )) + } + /// `rollback` consume the transaction handle to rollback the changes fn rollback(&mut self, tx: ::PreparedTx); } From e19a38e685af6c4294941685ae86c6f40f02b868 Mon Sep 17 00:00:00 2001 From: Kiyoshi Nakao Date: Tue, 12 May 2026 15:24:48 +0900 Subject: [PATCH 03/48] enclave-api: add speculative command support --- enclave-modules/host-api/src/api.rs | 2 +- modules/enclave-api/src/api.rs | 7 +- modules/enclave-api/src/api/command.rs | 104 +++++++++++++- modules/enclave-api/src/api/primitive.rs | 91 ++++++++++-- modules/enclave-api/src/enclave.rs | 172 ++++++++++++++++++++++- modules/enclave-api/src/lib.rs | 6 +- modules/enclave-api/src/memory.rs | 2 + modules/enclave-api/src/rocksdb.rs | 2 + 8 files changed, 360 insertions(+), 26 deletions(-) diff --git a/enclave-modules/host-api/src/api.rs b/enclave-modules/host-api/src/api.rs index 51391b68..c79393bc 100644 --- a/enclave-modules/host-api/src/api.rs +++ b/enclave-modules/host-api/src/api.rs @@ -92,7 +92,7 @@ mod tests { assert_eq!(res.unwrap(), CommandResult::Log); } { - let tx = TestEnv.begin_tx(None).unwrap(); + let tx = TestEnv.begin_tx(Some("test-client".to_string())).unwrap(); let res = execute_command( StoreCommand::Set(tx.get_id(), b"k0".to_vec(), b"v0".to_vec()).into(), ); diff --git a/modules/enclave-api/src/api.rs b/modules/enclave-api/src/api.rs index 36bc70bf..b5fc9488 100644 --- a/modules/enclave-api/src/api.rs +++ b/modules/enclave-api/src/api.rs @@ -1,5 +1,8 @@ -pub use command::EnclaveCommandAPI; -pub use primitive::EnclavePrimitiveAPI; +pub use command::{ + EnclaveCommandAPI, SpeculativeBaseState, SpeculativeEnclaveCommandAPI, + SpeculativeUpdateClientInput, SpeculativeUpdateClientResponse, +}; +pub use primitive::{EnclavePrimitiveAPI, SpeculativeEnclavePrimitiveAPI}; pub use proto::EnclaveProtoAPI; mod command; diff --git a/modules/enclave-api/src/api/command.rs b/modules/enclave-api/src/api/command.rs index 831cef1e..e2cf8c74 100644 --- a/modules/enclave-api/src/api/command.rs +++ b/modules/enclave-api/src/api/command.rs @@ -1,4 +1,4 @@ -use crate::{EnclavePrimitiveAPI, Result}; +use crate::{CommitStoreAccessor, EnclavePrimitiveAPI, Result, SpeculativeEnclavePrimitiveAPI}; use attestation_report::QEType; use ecall_commands::{ AggregateMessagesInput, AggregateMessagesResponse, Command, CommandResponse, @@ -8,7 +8,30 @@ use ecall_commands::{ QueryClientResponse, UpdateClientInput, UpdateClientResponse, VerifyMembershipInput, VerifyMembershipResponse, VerifyNonMembershipInput, VerifyNonMembershipResponse, }; -use store::transaction::CommitStore; +use lcp_types::{store_key, Any, Height}; +use log::debug; +use store::transaction::{CommitStore, TxAccessor}; +use store::TxId; +use store::WriteSet; + +#[derive(Debug)] +pub struct SpeculativeUpdateClientInput { + pub update: UpdateClientInput, + pub base_state: Option, +} + +#[derive(Debug, Clone)] +pub struct SpeculativeBaseState { + pub prev_height: Option, + pub client_state: Any, + pub consensus_state: Any, +} + +#[derive(Debug)] +pub struct SpeculativeUpdateClientResponse { + pub response: UpdateClientResponse, + pub write_set: WriteSet, +} pub trait EnclaveCommandAPI: EnclavePrimitiveAPI { /// generate_enclave_key generates a new key and perform remote attestation to generates an AVR @@ -45,12 +68,12 @@ pub trait EnclaveCommandAPI: EnclavePrimitiveAPI { /// update_client updates the ELC instance corresponding to client_id fn update_client(&self, input: UpdateClientInput) -> Result { - let update_key = Some(input.client_id.to_string()); + let update_key = input.client_id.to_string(); match self.execute_command( Command::LightClient(LightClientCommand::Execute( LightClientExecuteCommand::UpdateClient(input), )), - update_key, + Some(update_key), )? { CommandResponse::LightClient(LightClientResponse::UpdateClient(res)) => Ok(res), _ => unreachable!(), @@ -115,3 +138,76 @@ pub trait EnclaveCommandAPI: EnclavePrimitiveAPI { } } } + +pub trait SpeculativeEnclaveCommandAPI: + EnclaveCommandAPI + SpeculativeEnclavePrimitiveAPI +{ + /// speculative_update_client executes `UpdateClient` against an isolated host-side view and + /// returns both the response and the speculative write set for later stitching. + fn speculative_update_client( + &self, + input: SpeculativeUpdateClientInput, + ) -> Result + where + Self: Sized, + { + debug!( + "prepare speculative command with base state: has_base_state={}", + input.base_state.is_some() + ); + let client_id = input.update.client_id.to_string(); + let base_state = input.base_state; + + let cmd = Command::LightClient(LightClientCommand::Execute( + LightClientExecuteCommand::UpdateClient(input.update), + )); + let (res, write_set) = self.execute_command_speculatively_with_seed(cmd, |tx_id| { + if let Some(base_state) = base_state.as_ref() { + seed_speculative_base_state(self, tx_id, &client_id, base_state)?; + } + Ok(()) + })?; + + match res { + CommandResponse::LightClient(LightClientResponse::UpdateClient(response)) => { + Ok(SpeculativeUpdateClientResponse { + response, + write_set, + }) + } + _ => unreachable!(), + } + } +} + +fn seed_speculative_base_state( + enclave: &(impl CommitStoreAccessor + ?Sized), + tx_id: TxId, + client_id: &str, + base_state: &SpeculativeBaseState, +) -> Result<()> { + let client_state_key = store_key::client_state_bytes(client_id); + let client_state_value = + bincode::serde::encode_to_vec(&base_state.client_state, bincode::config::standard()) + .map_err(crate::errors::Error::bincode_encode)?; + enclave.use_mut_store(|store| store.tx_set(tx_id, client_state_key, client_state_value))?; + + // The client state is always seeded, but the consensus state is keyed by + // the predecessor height and is therefore only seeded when prev_height is + // present. Callers may still preserve explicit prev_state_id metadata above + // this layer; that identifier is validation metadata, not a store key. + if let Some(prev_height) = base_state.prev_height { + debug_assert!( + !base_state.consensus_state.type_url.is_empty(), + "seeded consensus state should carry a concrete type" + ); + let consensus_state_key = store_key::consensus_state_bytes(client_id, &prev_height); + let consensus_state_value = + bincode::serde::encode_to_vec(&base_state.consensus_state, bincode::config::standard()) + .map_err(crate::errors::Error::bincode_encode)?; + enclave.use_mut_store(|store| { + store.tx_set(tx_id, consensus_state_key, consensus_state_value) + })?; + } + Ok(()) +} diff --git a/modules/enclave-api/src/api/primitive.rs b/modules/enclave-api/src/api/primitive.rs index 646b9556..26ab0bc6 100644 --- a/modules/enclave-api/src/api/primitive.rs +++ b/modules/enclave-api/src/api/primitive.rs @@ -6,7 +6,9 @@ use ecall_commands::{Command, CommandContext, CommandResponse, ECallCommand, Enc use lcp_types::Time; use log::*; use sgx_types::{sgx_enclave_id_t, sgx_status_t}; -use store::transaction::{CommitStore, Tx}; +use store::transaction::{CommitStore, Tx, TxAccessor}; +use store::TxId; +use store::WriteSet; pub trait EnclavePrimitiveAPI: EnclaveInfo + HostStoreTxManager { /// execute_command runs a given command in the enclave @@ -15,20 +17,8 @@ pub trait EnclavePrimitiveAPI: EnclaveInfo + HostStoreTxManager< "prepare command: inner={:?} update_key={:?}", cmd, update_key ); - let current_timestamp = Time::now(); let tx = self.begin_tx(update_key)?; - - let cctx = match cmd.get_enclave_key() { - Some(addr) => { - let ski = self.get_key_manager().load(addr)?; - CommandContext::new(current_timestamp, Some(ski.sealed_ek), tx.get_id()) - } - None => CommandContext::new(current_timestamp, None, tx.get_id()), - }; - - let ecmd = ECallCommand::new(cctx, cmd); - debug!("try to execute command: {:?}", ecmd); - match raw_execute_command(self.get_eid(), ecmd) { + match execute_prepared_command(self, cmd, tx.get_id()) { Ok(res) => { self.commit_tx(tx)?; debug!("execute_command succeeded: res={:?}", res); @@ -43,7 +33,78 @@ pub trait EnclavePrimitiveAPI: EnclaveInfo + HostStoreTxManager< } } -fn raw_execute_command(eid: sgx_enclave_id_t, cmd: ECallCommand) -> Result { +pub trait SpeculativeEnclavePrimitiveAPI: + EnclavePrimitiveAPI +{ + /// execute_command_speculatively runs a command against an isolated host-side view and returns + /// the response together with the speculative write set instead of committing it. + fn execute_command_speculatively(&self, cmd: Command) -> Result<(CommandResponse, WriteSet)> { + self.execute_command_speculatively_with_seed(cmd, |_| Ok(())) + } + + /// execute_command_speculatively_with_seed runs a command against an isolated host-side view + /// after giving the caller a chance to seed the speculative transaction. + fn execute_command_speculatively_with_seed( + &self, + cmd: Command, + seed: impl FnOnce(TxId) -> Result<()>, + ) -> Result<(CommandResponse, WriteSet)> { + debug!("prepare speculative command: inner={:?}", cmd); + let tx = self.begin_speculative_tx()?; + if let Err(e) = seed(tx.get_id()) { + self.rollback_tx(tx); + return Err(e); + } + match execute_prepared_command(self, cmd, tx.get_id()) { + Ok(res) => { + let writes = self.use_mut_store(|store| store.take_write_set(tx))?; + debug!( + "execute_command_speculatively succeeded: res={:?} writes={}", + res, + writes.len() + ); + Ok((res, writes)) + } + Err(e) => { + self.rollback_tx(tx); + debug!("execute_command_speculatively failed: err={:?}", e); + Err(e) + } + } + } +} + +impl SpeculativeEnclavePrimitiveAPI for T +where + S: CommitStore + TxAccessor, + T: EnclavePrimitiveAPI, +{ +} + +/// execute_prepared_command runs a command against an already begun host-store transaction. +fn execute_prepared_command( + enclave: &(impl EnclaveInfo + ?Sized), + cmd: Command, + tx_id: TxId, +) -> Result { + let current_timestamp = Time::now(); + let cctx = match cmd.get_enclave_key() { + Some(addr) => { + let ski = enclave.get_key_manager().load(addr)?; + CommandContext::new(current_timestamp, Some(ski.sealed_ek), tx_id) + } + None => CommandContext::new(current_timestamp, None, tx_id), + }; + + let ecmd = ECallCommand::new(cctx, cmd); + debug!("try to execute command: {:?}", ecmd); + enclave.with_ecall_permit(|| raw_execute_command(enclave.get_eid(), ecmd)) +} + +pub(crate) fn raw_execute_command( + eid: sgx_enclave_id_t, + cmd: ECallCommand, +) -> Result { let mut output_len = 0; let output_maxlen = 65536; let mut output_buf = Vec::with_capacity(output_maxlen); diff --git a/modules/enclave-api/src/enclave.rs b/modules/enclave-api/src/enclave.rs index 609ffecd..ef8a0140 100644 --- a/modules/enclave-api/src/enclave.rs +++ b/modules/enclave-api/src/enclave.rs @@ -4,10 +4,11 @@ use lcp_types::EnclaveMetadata; use sgx_types::{sgx_enclave_id_t, SgxResult}; use sgx_urts::SgxEnclave; use std::path::PathBuf; -use std::sync::{Arc, RwLock}; +use std::sync::{Arc, Condvar, Mutex, RwLock}; use std::{marker::PhantomData, ops::DerefMut}; use store::host::{HostStore, IntoCommitStore}; -use store::transaction::{CommitStore, CreatedTx, UpdateKey}; +use store::transaction::{CommitStore, CreatedTx, Tx, TxAccessor, UpdateKey}; +use store::WriteSet; /// `Enclave` keeps an enclave id and reference to the host environement pub struct Enclave { @@ -15,21 +16,56 @@ pub struct Enclave { pub(crate) key_manager: EnclaveKeyManager, pub(crate) store: Arc>, pub(crate) sgx_enclave: SgxEnclave, + pub(crate) ecall_gate: Arc, _marker: PhantomData, } +#[derive(Debug)] +pub(crate) struct ECallGate { + state: Mutex, + ready: Condvar, +} + +#[derive(Debug)] +struct ECallGateState { + available: usize, +} + +struct ECallPermitGuard<'a> { + gate: &'a ECallGate, +} + impl Enclave { + pub const DEFAULT_ECALL_CONCURRENCY: usize = 4; + pub fn new( path: impl Into, key_manager: EnclaveKeyManager, store: Arc>, sgx_enclave: SgxEnclave, + ) -> Self { + Self::new_with_ecall_concurrency( + path, + key_manager, + store, + sgx_enclave, + Self::DEFAULT_ECALL_CONCURRENCY, + ) + } + + pub fn new_with_ecall_concurrency( + path: impl Into, + key_manager: EnclaveKeyManager, + store: Arc>, + sgx_enclave: SgxEnclave, + ecall_concurrency: usize, ) -> Self { Enclave { path: path.into(), key_manager, store, sgx_enclave, + ecall_gate: Arc::new(ECallGate::new(ecall_concurrency)), _marker: PhantomData, } } @@ -39,10 +75,32 @@ impl Enclave { debug: bool, key_manager: EnclaveKeyManager, store: Arc>, + ) -> SgxResult { + Self::create_with_ecall_concurrency( + path, + debug, + key_manager, + store, + Self::DEFAULT_ECALL_CONCURRENCY, + ) + } + + pub fn create_with_ecall_concurrency( + path: impl Into, + debug: bool, + key_manager: EnclaveKeyManager, + store: Arc>, + ecall_concurrency: usize, ) -> SgxResult { let path = path.into(); let enclave = host::create_enclave(path.clone(), debug)?; - Ok(Self::new(path, key_manager, store, enclave)) + Ok(Self::new_with_ecall_concurrency( + path, + key_manager, + store, + enclave, + ecall_concurrency, + )) } pub fn destroy(self) { @@ -50,6 +108,39 @@ impl Enclave { } } +impl ECallGate { + fn new(permits: usize) -> Self { + Self { + state: Mutex::new(ECallGateState { + available: permits.max(1), + }), + ready: Condvar::new(), + } + } + + fn with_permit(&self, f: impl FnOnce() -> Result) -> Result { + let _permit = self.acquire(); + f() + } + + fn acquire(&self) -> ECallPermitGuard<'_> { + let mut state = self.state.lock().unwrap(); + while state.available == 0 { + state = self.ready.wait(state).unwrap(); + } + state.available -= 1; + ECallPermitGuard { gate: self } + } +} + +impl Drop for ECallPermitGuard<'_> { + fn drop(&mut self) { + let mut state = self.gate.state.lock().unwrap(); + state.available += 1; + self.gate.ready.notify_one(); + } +} + /// `EnclaveInfo` is an accessor to enclave information pub trait EnclaveInfo: Sync + Send { /// `get_eid` returns the enclave id @@ -60,6 +151,10 @@ pub trait EnclaveInfo: Sync + Send { fn is_debug(&self) -> bool; /// `get_key_manager` returns a key manager for Enclave Keys fn get_key_manager(&self) -> &EnclaveKeyManager; + /// `with_ecall_permit` guards entry into enclave ECALLs. + fn with_ecall_permit(&self, f: impl FnOnce() -> Result) -> Result { + f() + } } impl EnclaveInfo for Enclave { @@ -79,6 +174,9 @@ impl EnclaveInfo for Enclave { fn get_key_manager(&self) -> &EnclaveKeyManager { &self.key_manager } + fn with_ecall_permit(&self, f: impl FnOnce() -> Result) -> Result { + self.ecall_gate.with_permit(f) + } } /// `HostStoreTxManager` is a transaction manager for the host store @@ -91,12 +189,41 @@ pub trait HostStoreTxManager: CommitStoreAccessor { Ok(tx) } + /// `begin_speculative_tx` creates a transaction whose writes remain isolated + /// from the canonical store until an explicit stitch/commit phase exists above this layer. + fn begin_speculative_tx(&self) -> Result<::PreparedTx> { + let tx = self.use_mut_store(|store| store.create_speculative_transaction())?; + let tx = tx.prepare()?; + self.use_mut_store(|store| store.begin(&tx))?; + Ok(tx) + } + /// `commit_tx` commits the changes in the transaction fn commit_tx(&self, tx: ::PreparedTx) -> Result<()> { self.use_mut_store(|store| store.commit(tx))?; Ok(()) } + /// `apply_write_set` applies a speculative write set to the canonical store under a + /// serialized update transaction keyed by `update_key`. + fn apply_write_set(&self, update_key: UpdateKey, write_set: WriteSet) -> Result<()> + where + S: TxAccessor, + { + let tx = self.begin_tx(Some(update_key))?; + let tx_id = tx.get_id(); + for (key, value) in write_set { + if let Err(e) = self.use_mut_store(|store| match value { + Some(value) => store.tx_set(tx_id, key, value), + None => store.tx_remove(tx_id, &key), + }) { + self.rollback_tx(tx); + return Err(e.into()); + } + } + self.commit_tx(tx) + } + /// `rollback_tx` rollbacks the changes in the transaction fn rollback_tx(&self, tx: ::PreparedTx) { self.use_mut_store(|store| store.rollback(tx)); @@ -118,3 +245,42 @@ where store.deref_mut().apply(f) } } + +#[cfg(test)] +mod tests { + use super::ECallGate; + use std::sync::atomic::{AtomicUsize, Ordering}; + use std::sync::Arc; + use std::thread; + use std::time::Duration; + + #[test] + fn ecall_gate_limits_concurrency() { + let gate = Arc::new(ECallGate::new(2)); + let in_flight = Arc::new(AtomicUsize::new(0)); + let observed_max = Arc::new(AtomicUsize::new(0)); + let mut handles = Vec::new(); + + for _ in 0..6 { + let gate = gate.clone(); + let in_flight = in_flight.clone(); + let observed_max = observed_max.clone(); + handles.push(thread::spawn(move || { + gate.with_permit(|| { + let current = in_flight.fetch_add(1, Ordering::SeqCst) + 1; + observed_max.fetch_max(current, Ordering::SeqCst); + thread::sleep(Duration::from_millis(25)); + in_flight.fetch_sub(1, Ordering::SeqCst); + Ok(()) + }) + .unwrap(); + })); + } + + for handle in handles { + handle.join().unwrap(); + } + + assert_eq!(observed_max.load(Ordering::SeqCst), 2); + } +} diff --git a/modules/enclave-api/src/lib.rs b/modules/enclave-api/src/lib.rs index 4225408f..61b846ef 100644 --- a/modules/enclave-api/src/lib.rs +++ b/modules/enclave-api/src/lib.rs @@ -1,5 +1,9 @@ #![allow(clippy::result_large_err)] -pub use api::{EnclaveCommandAPI, EnclavePrimitiveAPI, EnclaveProtoAPI}; +pub use api::{ + EnclaveCommandAPI, EnclavePrimitiveAPI, EnclaveProtoAPI, SpeculativeBaseState, + SpeculativeEnclaveCommandAPI, SpeculativeEnclavePrimitiveAPI, SpeculativeUpdateClientInput, + SpeculativeUpdateClientResponse, +}; pub use enclave::{CommitStoreAccessor, Enclave, EnclaveInfo, HostStoreTxManager}; pub use errors::Error; use errors::Result; diff --git a/modules/enclave-api/src/memory.rs b/modules/enclave-api/src/memory.rs index 42832b01..5bb65907 100644 --- a/modules/enclave-api/src/memory.rs +++ b/modules/enclave-api/src/memory.rs @@ -1,9 +1,11 @@ use crate::{ enclave::HostStoreTxManager, Enclave, EnclaveCommandAPI, EnclavePrimitiveAPI, EnclaveProtoAPI, + SpeculativeEnclaveCommandAPI, }; use store::memory::MemStore; impl HostStoreTxManager for Enclave {} impl EnclavePrimitiveAPI for Enclave {} impl EnclaveCommandAPI for Enclave {} +impl SpeculativeEnclaveCommandAPI for Enclave {} impl EnclaveProtoAPI for Enclave {} diff --git a/modules/enclave-api/src/rocksdb.rs b/modules/enclave-api/src/rocksdb.rs index 0b25cf11..6f237417 100644 --- a/modules/enclave-api/src/rocksdb.rs +++ b/modules/enclave-api/src/rocksdb.rs @@ -1,9 +1,11 @@ use crate::{ enclave::HostStoreTxManager, Enclave, EnclaveCommandAPI, EnclavePrimitiveAPI, EnclaveProtoAPI, + SpeculativeEnclaveCommandAPI, }; use store::rocksdb::RocksDBStore; impl HostStoreTxManager for Enclave {} impl EnclavePrimitiveAPI for Enclave {} impl EnclaveCommandAPI for Enclave {} +impl SpeculativeEnclaveCommandAPI for Enclave {} impl EnclaveProtoAPI for Enclave {} From 478f836e7665cc08a96a885b19f9e77467958dd2 Mon Sep 17 00:00:00 2001 From: Kiyoshi Nakao Date: Tue, 12 May 2026 15:25:42 +0900 Subject: [PATCH 04/48] proto: add speculative update client stream RPC --- proto/definitions/lcp/service/elc/v1/tx.proto | 110 ++++ proto/src/prost/lcp.service.elc.v1.rs | 604 ++++++++++-------- 2 files changed, 447 insertions(+), 267 deletions(-) diff --git a/proto/definitions/lcp/service/elc/v1/tx.proto b/proto/definitions/lcp/service/elc/v1/tx.proto index 174e4a62..0f9a4106 100644 --- a/proto/definitions/lcp/service/elc/v1/tx.proto +++ b/proto/definitions/lcp/service/elc/v1/tx.proto @@ -19,6 +19,10 @@ service Msg { // UpdateClientStream defines a rpc handler method for MsgUpdateClient. rpc UpdateClientStream(stream MsgUpdateClientStreamChunk) returns (MsgUpdateClientResponse); + // SpeculativeUpdateClientBatchStream defines a client-streaming rpc + // handler method for explicit-state speculative UpdateClient batch execution. + rpc SpeculativeUpdateClientBatchStream(stream MsgSpeculativeUpdateClientBatchStreamChunk) returns (ExecuteSpeculativeUpdateClientBatchResponse); + // AggregateMessages defines a rpc handler method for MsgAggregateMessages rpc AggregateMessages(MsgAggregateMessages) returns (MsgAggregateMessagesResponse); @@ -155,3 +159,109 @@ message UpdateClientStreamInit { message UpdateClientStreamHeaderChunk { bytes data = 1; } + +message ExplicitStateRef { + // Expected state metadata before executing a speculative unit. When provided, + // prev_height and prev_state_id are checked against the transition observed + // from the enclave response. + ibc.core.client.v1.Height prev_height = 1; + bytes prev_state_id = 2; + // Client and consensus states to seed into the unit's isolated speculative + // transaction before executing UpdateClient. + google.protobuf.Any client_state = 3; + google.protobuf.Any consensus_state = 4; +} + +// MsgSpeculativeUpdateClientBatchStreamChunk transports one explicit-state +// speculative UpdateClient batch. A single stream starts with exactly one +// batch-level init message and then sends zero or more units in order: +// +// init, +// unit_init, unit_header_chunk..., unit_end, +// unit_init, unit_header_chunk..., unit_end, ... +// +// The first implementation does not interleave chunks from multiple units. +message MsgSpeculativeUpdateClientBatchStreamChunk { + oneof chunk { + // Declares the batch-level client_id. All following units in this RPC + // stream are executed for this client_id. + SpeculativeUpdateClientBatchStreamInit init = 1; + // Starts one speculative UpdateClient unit and carries all unit metadata + // except the header bytes. + SpeculativeUpdateClientUnitInit unit_init = 2; + // Carries one bounded slice of the currently open unit's header bytes. + SpeculativeUpdateClientUnitHeaderChunk unit_header_chunk = 3; + // Closes the currently open unit. The receiver reconstructs a + // MsgUpdateClient from unit_init plus all accumulated header chunks. + SpeculativeUpdateClientUnitEnd unit_end = 4; + // Explicitly terminates the batch. EOF without this chunk is treated as a + // truncated stream and must not stitch a partial prefix. + SpeculativeUpdateClientBatchEnd batch_end = 5; + } +} + +message SpeculativeUpdateClientBatchStreamInit { + // Target client for the whole batch stream. Mixed-client batches are not + // supported; unit messages do not carry their own client_id. + string client_id = 1; +} + +// SpeculativeUpdateClientUnitInit starts a single speculative UpdateClient work +// unit. Units form a linear chain in stream order: the first unit starts from +// its explicit base_state, and each following unit is rebased onto the previous +// unit's observed post state before execution. +message SpeculativeUpdateClientUnitInit { + reserved 6; + + // Unique unit identifier within this batch. It is used for diagnostics and + // response correlation; it does not define execution dependencies. + string unit_id = 1; + // Type URL for the google.protobuf.Any header reconstructed from the + // following header chunks. + string type_url = 2; + bool include_state = 3; + bytes signer = 4; + // Explicit state used as the starting view for this unit's isolated + // speculative transaction. + ExplicitStateRef base_state = 5 [(gogoproto.nullable) = false]; +} + +message SpeculativeUpdateClientUnitHeaderChunk { + // Must match the currently open unit_id. This allows the receiver to detect + // misplaced chunks even though chunks are not interleaved. + string unit_id = 1; + // Raw bytes appended to the reconstructed google.protobuf.Any header value. + bytes data = 2; +} + +message SpeculativeUpdateClientUnitEnd { + // Must match the currently open unit_id. + string unit_id = 1; +} + +message SpeculativeUpdateClientBatchEnd {} + +// ObservedStateTransition is decoded from the enclave UpdateClient response. +// It records the actual state transition used by a speculative unit, not merely +// caller-provided expectations. The service compares prev_* with base_state and +// the previous unit's post_* values before stitching. +message ObservedStateTransition { + ibc.core.client.v1.Height prev_height = 1; + bytes prev_state_id = 2; + ibc.core.client.v1.Height post_height = 3 [(gogoproto.nullable) = false]; + bytes post_state_id = 4; +} + +// Per-unit result returned only after the batch has been validated and its +// merged write set has been applied to the canonical store. +message StitchedSpeculativeUpdateClientUnitResult { + MsgUpdateClientResponse response = 1 [(gogoproto.nullable) = false]; + ObservedStateTransition observed_transition = 2 [(gogoproto.nullable) = false]; +} + +message ExecuteSpeculativeUpdateClientBatchResponse { + // The batch-level client_id from the request stream. + string client_id = 1; + // Unit results in the same order as accepted request units. + repeated StitchedSpeculativeUpdateClientUnitResult units = 2; +} diff --git a/proto/src/prost/lcp.service.elc.v1.rs b/proto/src/prost/lcp.service.elc.v1.rs index ff703eee..95cbb8a4 100644 --- a/proto/src/prost/lcp.service.elc.v1.rs +++ b/proto/src/prost/lcp.service.elc.v1.rs @@ -14,22 +14,18 @@ pub struct QueryClientResponse { pub found: bool, /// light client state #[prost(message, optional, tag = "2")] - pub client_state: ::core::option::Option< - super::super::super::super::google::protobuf::Any, - >, + pub client_state: ::core::option::Option, /// consensus state associated with the client that corresponds to a given /// height. #[prost(message, optional, tag = "3")] - pub consensus_state: ::core::option::Option< - super::super::super::super::google::protobuf::Any, - >, + pub consensus_state: ::core::option::Option, } /// Generated client implementations. #[cfg(feature = "client")] pub mod query_client { #![allow(unused_variables, dead_code, missing_docs, clippy::let_unit_value)] - use tonic::codegen::*; use tonic::codegen::http::Uri; + use tonic::codegen::*; /// Query defines the ELC Query service. #[derive(Debug, Clone)] pub struct QueryClient { @@ -74,9 +70,8 @@ pub mod query_client { >::ResponseBody, >, >, - , - >>::Error: Into + Send + Sync, + >>::Error: + Into + Send + Sync, { QueryClient::new(InterceptedService::new(inner, interceptor)) } @@ -99,19 +94,14 @@ pub mod query_client { &mut self, request: impl tonic::IntoRequest, ) -> Result, tonic::Status> { - self.inner - .ready() - .await - .map_err(|e| { - tonic::Status::new( - tonic::Code::Unknown, - format!("Service was not ready: {}", e.into()), - ) - })?; + self.inner.ready().await.map_err(|e| { + tonic::Status::new( + tonic::Code::Unknown, + format!("Service was not ready: {}", e.into()), + ) + })?; let codec = tonic::codec::ProstCodec::default(); - let path = http::uri::PathAndQuery::from_static( - "/lcp.service.elc.v1.Query/Client", - ); + let path = http::uri::PathAndQuery::from_static("/lcp.service.elc.v1.Query/Client"); self.inner.unary(request.into_request(), path, codec).await } } @@ -149,10 +139,7 @@ pub mod query_server { send_compression_encodings: Default::default(), } } - pub fn with_interceptor( - inner: T, - interceptor: F, - ) -> InterceptedService + pub fn with_interceptor(inner: T, interceptor: F) -> InterceptedService where F: tonic::service::Interceptor, { @@ -180,10 +167,7 @@ pub mod query_server { type Response = http::Response; type Error = std::convert::Infallible; type Future = BoxFuture; - fn poll_ready( - &mut self, - _cx: &mut Context<'_>, - ) -> Poll> { + fn poll_ready(&mut self, _cx: &mut Context<'_>) -> Poll> { Poll::Ready(Ok(())) } fn call(&mut self, req: http::Request) -> Self::Future { @@ -192,13 +176,9 @@ pub mod query_server { "/lcp.service.elc.v1.Query/Client" => { #[allow(non_camel_case_types)] struct ClientSvc(pub Arc); - impl tonic::server::UnaryService - for ClientSvc { + impl tonic::server::UnaryService for ClientSvc { type Response = super::QueryClientResponse; - type Future = BoxFuture< - tonic::Response, - tonic::Status, - >; + type Future = BoxFuture, tonic::Status>; fn call( &mut self, request: tonic::Request, @@ -215,28 +195,23 @@ pub mod query_server { let inner = inner.0; let method = ClientSvc(inner); let codec = tonic::codec::ProstCodec::default(); - let mut grpc = tonic::server::Grpc::new(codec) - .apply_compression_config( - accept_compression_encodings, - send_compression_encodings, - ); + let mut grpc = tonic::server::Grpc::new(codec).apply_compression_config( + accept_compression_encodings, + send_compression_encodings, + ); let res = grpc.unary(method, req).await; Ok(res) }; Box::pin(fut) } - _ => { - Box::pin(async move { - Ok( - http::Response::builder() - .status(200) - .header("grpc-status", "12") - .header("content-type", "application/grpc") - .body(empty_body()) - .unwrap(), - ) - }) - } + _ => Box::pin(async move { + Ok(http::Response::builder() + .status(200) + .header("grpc-status", "12") + .header("content-type", "application/grpc") + .body(empty_body()) + .unwrap()) + }), } } } @@ -273,15 +248,11 @@ pub struct MsgCreateClient { pub client_id: ::prost::alloc::string::String, /// light client state #[prost(message, optional, tag = "2")] - pub client_state: ::core::option::Option< - super::super::super::super::google::protobuf::Any, - >, + pub client_state: ::core::option::Option, /// consensus state associated with the client that corresponds to a given /// height. #[prost(message, optional, tag = "3")] - pub consensus_state: ::core::option::Option< - super::super::super::super::google::protobuf::Any, - >, + pub consensus_state: ::core::option::Option, /// enclave key for signing #[prost(bytes = "vec", tag = "4")] pub signer: ::prost::alloc::vec::Vec, @@ -307,9 +278,7 @@ pub struct MsgUpdateClient { pub client_id: ::prost::alloc::string::String, /// header to update the light client #[prost(message, optional, tag = "2")] - pub header: ::core::option::Option< - super::super::super::super::google::protobuf::Any, - >, + pub header: ::core::option::Option, /// request to emit state #[prost(bool, tag = "3")] pub include_state: bool, @@ -360,9 +329,8 @@ pub struct MsgVerifyMembership { #[prost(bytes = "vec", tag = "4")] pub value: ::prost::alloc::vec::Vec, #[prost(message, optional, tag = "5")] - pub proof_height: ::core::option::Option< - super::super::super::super::ibc::core::client::v1::Height, - >, + pub proof_height: + ::core::option::Option, #[prost(bytes = "vec", tag = "6")] pub proof: ::prost::alloc::vec::Vec, /// enclave key for signing @@ -389,9 +357,8 @@ pub struct MsgVerifyNonMembership { #[prost(string, tag = "3")] pub path: ::prost::alloc::string::String, #[prost(message, optional, tag = "4")] - pub proof_height: ::core::option::Option< - super::super::super::super::ibc::core::client::v1::Height, - >, + pub proof_height: + ::core::option::Option, #[prost(bytes = "vec", tag = "5")] pub proof: ::prost::alloc::vec::Vec, /// enclave key for signing @@ -446,12 +413,129 @@ pub struct UpdateClientStreamHeaderChunk { #[prost(bytes = "vec", tag = "1")] pub data: ::prost::alloc::vec::Vec, } +#[derive(::serde::Serialize, ::serde::Deserialize)] +#[allow(clippy::derive_partial_eq_without_eq)] +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct ExplicitStateRef { + #[prost(message, optional, tag = "1")] + pub prev_height: + ::core::option::Option, + #[prost(bytes = "vec", tag = "2")] + pub prev_state_id: ::prost::alloc::vec::Vec, + #[prost(message, optional, tag = "3")] + pub client_state: ::core::option::Option, + #[prost(message, optional, tag = "4")] + pub consensus_state: ::core::option::Option, +} +#[derive(::serde::Serialize, ::serde::Deserialize)] +#[allow(clippy::derive_partial_eq_without_eq)] +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct MsgSpeculativeUpdateClientBatchStreamChunk { + #[prost( + oneof = "msg_speculative_update_client_batch_stream_chunk::Chunk", + tags = "1, 2, 3, 4, 5" + )] + pub chunk: ::core::option::Option, +} +/// Nested message and enum types in `MsgSpeculativeUpdateClientBatchStreamChunk`. +pub mod msg_speculative_update_client_batch_stream_chunk { + #[derive(::serde::Serialize, ::serde::Deserialize)] + #[allow(clippy::derive_partial_eq_without_eq)] + #[derive(Clone, PartialEq, ::prost::Oneof)] + pub enum Chunk { + #[prost(message, tag = "1")] + Init(super::SpeculativeUpdateClientBatchStreamInit), + #[prost(message, tag = "2")] + UnitInit(super::SpeculativeUpdateClientUnitInit), + #[prost(message, tag = "3")] + UnitHeaderChunk(super::SpeculativeUpdateClientUnitHeaderChunk), + #[prost(message, tag = "4")] + UnitEnd(super::SpeculativeUpdateClientUnitEnd), + #[prost(message, tag = "5")] + BatchEnd(super::SpeculativeUpdateClientBatchEnd), + } +} +#[derive(::serde::Serialize, ::serde::Deserialize)] +#[allow(clippy::derive_partial_eq_without_eq)] +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct SpeculativeUpdateClientBatchStreamInit { + #[prost(string, tag = "1")] + pub client_id: ::prost::alloc::string::String, +} +#[derive(::serde::Serialize, ::serde::Deserialize)] +#[allow(clippy::derive_partial_eq_without_eq)] +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct SpeculativeUpdateClientUnitInit { + #[prost(string, tag = "1")] + pub unit_id: ::prost::alloc::string::String, + #[prost(string, tag = "2")] + pub type_url: ::prost::alloc::string::String, + #[prost(bool, tag = "3")] + pub include_state: bool, + #[prost(bytes = "vec", tag = "4")] + pub signer: ::prost::alloc::vec::Vec, + #[prost(message, optional, tag = "5")] + pub base_state: ::core::option::Option, +} +#[derive(::serde::Serialize, ::serde::Deserialize)] +#[allow(clippy::derive_partial_eq_without_eq)] +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct SpeculativeUpdateClientUnitHeaderChunk { + #[prost(string, tag = "1")] + pub unit_id: ::prost::alloc::string::String, + #[prost(bytes = "vec", tag = "2")] + pub data: ::prost::alloc::vec::Vec, +} +#[derive(::serde::Serialize, ::serde::Deserialize)] +#[allow(clippy::derive_partial_eq_without_eq)] +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct SpeculativeUpdateClientUnitEnd { + #[prost(string, tag = "1")] + pub unit_id: ::prost::alloc::string::String, +} +#[derive(::serde::Serialize, ::serde::Deserialize)] +#[allow(clippy::derive_partial_eq_without_eq)] +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct SpeculativeUpdateClientBatchEnd {} +#[derive(::serde::Serialize, ::serde::Deserialize)] +#[allow(clippy::derive_partial_eq_without_eq)] +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct ObservedStateTransition { + #[prost(message, optional, tag = "1")] + pub prev_height: + ::core::option::Option, + #[prost(bytes = "vec", tag = "2")] + pub prev_state_id: ::prost::alloc::vec::Vec, + #[prost(message, optional, tag = "3")] + pub post_height: + ::core::option::Option, + #[prost(bytes = "vec", tag = "4")] + pub post_state_id: ::prost::alloc::vec::Vec, +} +#[derive(::serde::Serialize, ::serde::Deserialize)] +#[allow(clippy::derive_partial_eq_without_eq)] +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct StitchedSpeculativeUpdateClientUnitResult { + #[prost(message, optional, tag = "1")] + pub response: ::core::option::Option, + #[prost(message, optional, tag = "2")] + pub observed_transition: ::core::option::Option, +} +#[derive(::serde::Serialize, ::serde::Deserialize)] +#[allow(clippy::derive_partial_eq_without_eq)] +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct ExecuteSpeculativeUpdateClientBatchResponse { + #[prost(string, tag = "1")] + pub client_id: ::prost::alloc::string::String, + #[prost(message, repeated, tag = "2")] + pub units: ::prost::alloc::vec::Vec, +} /// Generated client implementations. #[cfg(feature = "client")] pub mod msg_client { #![allow(unused_variables, dead_code, missing_docs, clippy::let_unit_value)] - use tonic::codegen::*; use tonic::codegen::http::Uri; + use tonic::codegen::*; /// Msg defines the ELC Msg service. #[derive(Debug, Clone)] pub struct MsgClient { @@ -483,10 +567,7 @@ pub mod msg_client { let inner = tonic::client::Grpc::with_origin(inner, origin); Self { inner } } - pub fn with_interceptor( - inner: T, - interceptor: F, - ) -> MsgClient> + pub fn with_interceptor(inner: T, interceptor: F) -> MsgClient> where F: tonic::service::Interceptor, T::ResponseBody: Default, @@ -496,9 +577,8 @@ pub mod msg_client { >::ResponseBody, >, >, - , - >>::Error: Into + Send + Sync, + >>::Error: + Into + Send + Sync, { MsgClient::new(InterceptedService::new(inner, interceptor)) } @@ -522,19 +602,14 @@ pub mod msg_client { &mut self, request: impl tonic::IntoRequest, ) -> Result, tonic::Status> { - self.inner - .ready() - .await - .map_err(|e| { - tonic::Status::new( - tonic::Code::Unknown, - format!("Service was not ready: {}", e.into()), - ) - })?; + self.inner.ready().await.map_err(|e| { + tonic::Status::new( + tonic::Code::Unknown, + format!("Service was not ready: {}", e.into()), + ) + })?; let codec = tonic::codec::ProstCodec::default(); - let path = http::uri::PathAndQuery::from_static( - "/lcp.service.elc.v1.Msg/CreateClient", - ); + let path = http::uri::PathAndQuery::from_static("/lcp.service.elc.v1.Msg/CreateClient"); self.inner.unary(request.into_request(), path, codec).await } /// UpdateClient defines a rpc handler method for MsgUpdateClient. @@ -542,40 +617,55 @@ pub mod msg_client { &mut self, request: impl tonic::IntoRequest, ) -> Result, tonic::Status> { - self.inner - .ready() - .await - .map_err(|e| { - tonic::Status::new( - tonic::Code::Unknown, - format!("Service was not ready: {}", e.into()), - ) - })?; + self.inner.ready().await.map_err(|e| { + tonic::Status::new( + tonic::Code::Unknown, + format!("Service was not ready: {}", e.into()), + ) + })?; let codec = tonic::codec::ProstCodec::default(); - let path = http::uri::PathAndQuery::from_static( - "/lcp.service.elc.v1.Msg/UpdateClient", - ); + let path = http::uri::PathAndQuery::from_static("/lcp.service.elc.v1.Msg/UpdateClient"); self.inner.unary(request.into_request(), path, codec).await } /// UpdateClientStream defines a rpc handler method for MsgUpdateClient. pub async fn update_client_stream( &mut self, - request: impl tonic::IntoStreamingRequest< - Message = super::MsgUpdateClientStreamChunk, - >, + request: impl tonic::IntoStreamingRequest, ) -> Result, tonic::Status> { + self.inner.ready().await.map_err(|e| { + tonic::Status::new( + tonic::Code::Unknown, + format!("Service was not ready: {}", e.into()), + ) + })?; + let codec = tonic::codec::ProstCodec::default(); + let path = + http::uri::PathAndQuery::from_static("/lcp.service.elc.v1.Msg/UpdateClientStream"); self.inner - .ready() + .client_streaming(request.into_streaming_request(), path, codec) .await - .map_err(|e| { - tonic::Status::new( - tonic::Code::Unknown, - format!("Service was not ready: {}", e.into()), - ) - })?; + } + /// SpeculativeUpdateClientBatchStream defines a client-streaming + /// rpc handler method for explicit-state speculative UpdateClient batch + /// execution. + pub async fn speculative_update_client_batch_stream( + &mut self, + request: impl tonic::IntoStreamingRequest< + Message = super::MsgSpeculativeUpdateClientBatchStreamChunk, + >, + ) -> Result< + tonic::Response, + tonic::Status, + > { + self.inner.ready().await.map_err(|e| { + tonic::Status::new( + tonic::Code::Unknown, + format!("Service was not ready: {}", e.into()), + ) + })?; let codec = tonic::codec::ProstCodec::default(); let path = http::uri::PathAndQuery::from_static( - "/lcp.service.elc.v1.Msg/UpdateClientStream", + "/lcp.service.elc.v1.Msg/SpeculativeUpdateClientBatchStream", ); self.inner .client_streaming(request.into_streaming_request(), path, codec) @@ -585,23 +675,16 @@ pub mod msg_client { pub async fn aggregate_messages( &mut self, request: impl tonic::IntoRequest, - ) -> Result< - tonic::Response, - tonic::Status, - > { - self.inner - .ready() - .await - .map_err(|e| { - tonic::Status::new( - tonic::Code::Unknown, - format!("Service was not ready: {}", e.into()), - ) - })?; + ) -> Result, tonic::Status> { + self.inner.ready().await.map_err(|e| { + tonic::Status::new( + tonic::Code::Unknown, + format!("Service was not ready: {}", e.into()), + ) + })?; let codec = tonic::codec::ProstCodec::default(); - let path = http::uri::PathAndQuery::from_static( - "/lcp.service.elc.v1.Msg/AggregateMessages", - ); + let path = + http::uri::PathAndQuery::from_static("/lcp.service.elc.v1.Msg/AggregateMessages"); self.inner.unary(request.into_request(), path, codec).await } /// VerifyMembership defines a rpc handler method for MsgVerifyMembership @@ -609,42 +692,31 @@ pub mod msg_client { &mut self, request: impl tonic::IntoRequest, ) -> Result, tonic::Status> { - self.inner - .ready() - .await - .map_err(|e| { - tonic::Status::new( - tonic::Code::Unknown, - format!("Service was not ready: {}", e.into()), - ) - })?; + self.inner.ready().await.map_err(|e| { + tonic::Status::new( + tonic::Code::Unknown, + format!("Service was not ready: {}", e.into()), + ) + })?; let codec = tonic::codec::ProstCodec::default(); - let path = http::uri::PathAndQuery::from_static( - "/lcp.service.elc.v1.Msg/VerifyMembership", - ); + let path = + http::uri::PathAndQuery::from_static("/lcp.service.elc.v1.Msg/VerifyMembership"); self.inner.unary(request.into_request(), path, codec).await } /// VerifyNonMembership defines a rpc handler method for MsgVerifyNonMembership pub async fn verify_non_membership( &mut self, request: impl tonic::IntoRequest, - ) -> Result< - tonic::Response, - tonic::Status, - > { - self.inner - .ready() - .await - .map_err(|e| { - tonic::Status::new( - tonic::Code::Unknown, - format!("Service was not ready: {}", e.into()), - ) - })?; + ) -> Result, tonic::Status> { + self.inner.ready().await.map_err(|e| { + tonic::Status::new( + tonic::Code::Unknown, + format!("Service was not ready: {}", e.into()), + ) + })?; let codec = tonic::codec::ProstCodec::default(); - let path = http::uri::PathAndQuery::from_static( - "/lcp.service.elc.v1.Msg/VerifyNonMembership", - ); + let path = + http::uri::PathAndQuery::from_static("/lcp.service.elc.v1.Msg/VerifyNonMembership"); self.inner.unary(request.into_request(), path, codec).await } } @@ -672,6 +744,18 @@ pub mod msg_server { &self, request: tonic::Request>, ) -> Result, tonic::Status>; + /// SpeculativeUpdateClientBatchStream defines a client-streaming + /// rpc handler method for explicit-state speculative UpdateClient batch + /// execution. + async fn speculative_update_client_batch_stream( + &self, + request: tonic::Request< + tonic::Streaming, + >, + ) -> Result< + tonic::Response, + tonic::Status, + >; /// AggregateMessages defines a rpc handler method for MsgAggregateMessages async fn aggregate_messages( &self, @@ -686,10 +770,7 @@ pub mod msg_server { async fn verify_non_membership( &self, request: tonic::Request, - ) -> Result< - tonic::Response, - tonic::Status, - >; + ) -> Result, tonic::Status>; } /// Msg defines the ELC Msg service. #[derive(Debug)] @@ -711,10 +792,7 @@ pub mod msg_server { send_compression_encodings: Default::default(), } } - pub fn with_interceptor( - inner: T, - interceptor: F, - ) -> InterceptedService + pub fn with_interceptor(inner: T, interceptor: F) -> InterceptedService where F: tonic::service::Interceptor, { @@ -742,10 +820,7 @@ pub mod msg_server { type Response = http::Response; type Error = std::convert::Infallible; type Future = BoxFuture; - fn poll_ready( - &mut self, - _cx: &mut Context<'_>, - ) -> Poll> { + fn poll_ready(&mut self, _cx: &mut Context<'_>) -> Poll> { Poll::Ready(Ok(())) } fn call(&mut self, req: http::Request) -> Self::Future { @@ -754,21 +829,15 @@ pub mod msg_server { "/lcp.service.elc.v1.Msg/CreateClient" => { #[allow(non_camel_case_types)] struct CreateClientSvc(pub Arc); - impl tonic::server::UnaryService - for CreateClientSvc { + impl tonic::server::UnaryService for CreateClientSvc { type Response = super::MsgCreateClientResponse; - type Future = BoxFuture< - tonic::Response, - tonic::Status, - >; + type Future = BoxFuture, tonic::Status>; fn call( &mut self, request: tonic::Request, ) -> Self::Future { let inner = self.0.clone(); - let fut = async move { - (*inner).create_client(request).await - }; + let fut = async move { (*inner).create_client(request).await }; Box::pin(fut) } } @@ -779,11 +848,10 @@ pub mod msg_server { let inner = inner.0; let method = CreateClientSvc(inner); let codec = tonic::codec::ProstCodec::default(); - let mut grpc = tonic::server::Grpc::new(codec) - .apply_compression_config( - accept_compression_encodings, - send_compression_encodings, - ); + let mut grpc = tonic::server::Grpc::new(codec).apply_compression_config( + accept_compression_encodings, + send_compression_encodings, + ); let res = grpc.unary(method, req).await; Ok(res) }; @@ -792,21 +860,15 @@ pub mod msg_server { "/lcp.service.elc.v1.Msg/UpdateClient" => { #[allow(non_camel_case_types)] struct UpdateClientSvc(pub Arc); - impl tonic::server::UnaryService - for UpdateClientSvc { + impl tonic::server::UnaryService for UpdateClientSvc { type Response = super::MsgUpdateClientResponse; - type Future = BoxFuture< - tonic::Response, - tonic::Status, - >; + type Future = BoxFuture, tonic::Status>; fn call( &mut self, request: tonic::Request, ) -> Self::Future { let inner = self.0.clone(); - let fut = async move { - (*inner).update_client(request).await - }; + let fut = async move { (*inner).update_client(request).await }; Box::pin(fut) } } @@ -817,11 +879,10 @@ pub mod msg_server { let inner = inner.0; let method = UpdateClientSvc(inner); let codec = tonic::codec::ProstCodec::default(); - let mut grpc = tonic::server::Grpc::new(codec) - .apply_compression_config( - accept_compression_encodings, - send_compression_encodings, - ); + let mut grpc = tonic::server::Grpc::new(codec).apply_compression_config( + accept_compression_encodings, + send_compression_encodings, + ); let res = grpc.unary(method, req).await; Ok(res) }; @@ -830,25 +891,60 @@ pub mod msg_server { "/lcp.service.elc.v1.Msg/UpdateClientStream" => { #[allow(non_camel_case_types)] struct UpdateClientStreamSvc(pub Arc); - impl< - T: Msg, - > tonic::server::ClientStreamingService< - super::MsgUpdateClientStreamChunk, - > for UpdateClientStreamSvc { + impl + tonic::server::ClientStreamingService + for UpdateClientStreamSvc + { type Response = super::MsgUpdateClientResponse; - type Future = BoxFuture< - tonic::Response, - tonic::Status, - >; + type Future = BoxFuture, tonic::Status>; fn call( &mut self, request: tonic::Request< tonic::Streaming, >, + ) -> Self::Future { + let inner = self.0.clone(); + let fut = async move { (*inner).update_client_stream(request).await }; + Box::pin(fut) + } + } + let accept_compression_encodings = self.accept_compression_encodings; + let send_compression_encodings = self.send_compression_encodings; + let inner = self.inner.clone(); + let fut = async move { + let inner = inner.0; + let method = UpdateClientStreamSvc(inner); + let codec = tonic::codec::ProstCodec::default(); + let mut grpc = tonic::server::Grpc::new(codec).apply_compression_config( + accept_compression_encodings, + send_compression_encodings, + ); + let res = grpc.client_streaming(method, req).await; + Ok(res) + }; + Box::pin(fut) + } + "/lcp.service.elc.v1.Msg/SpeculativeUpdateClientBatchStream" => { + #[allow(non_camel_case_types)] + struct SpeculativeUpdateClientBatchStreamSvc(pub Arc); + impl + tonic::server::ClientStreamingService< + super::MsgSpeculativeUpdateClientBatchStreamChunk, + > for SpeculativeUpdateClientBatchStreamSvc + { + type Response = super::ExecuteSpeculativeUpdateClientBatchResponse; + type Future = BoxFuture, tonic::Status>; + fn call( + &mut self, + request: tonic::Request< + tonic::Streaming, + >, ) -> Self::Future { let inner = self.0.clone(); let fut = async move { - (*inner).update_client_stream(request).await + (*inner) + .speculative_update_client_batch_stream(request) + .await }; Box::pin(fut) } @@ -858,13 +954,12 @@ pub mod msg_server { let inner = self.inner.clone(); let fut = async move { let inner = inner.0; - let method = UpdateClientStreamSvc(inner); + let method = SpeculativeUpdateClientBatchStreamSvc(inner); let codec = tonic::codec::ProstCodec::default(); - let mut grpc = tonic::server::Grpc::new(codec) - .apply_compression_config( - accept_compression_encodings, - send_compression_encodings, - ); + let mut grpc = tonic::server::Grpc::new(codec).apply_compression_config( + accept_compression_encodings, + send_compression_encodings, + ); let res = grpc.client_streaming(method, req).await; Ok(res) }; @@ -873,21 +968,15 @@ pub mod msg_server { "/lcp.service.elc.v1.Msg/AggregateMessages" => { #[allow(non_camel_case_types)] struct AggregateMessagesSvc(pub Arc); - impl tonic::server::UnaryService - for AggregateMessagesSvc { + impl tonic::server::UnaryService for AggregateMessagesSvc { type Response = super::MsgAggregateMessagesResponse; - type Future = BoxFuture< - tonic::Response, - tonic::Status, - >; + type Future = BoxFuture, tonic::Status>; fn call( &mut self, request: tonic::Request, ) -> Self::Future { let inner = self.0.clone(); - let fut = async move { - (*inner).aggregate_messages(request).await - }; + let fut = async move { (*inner).aggregate_messages(request).await }; Box::pin(fut) } } @@ -898,11 +987,10 @@ pub mod msg_server { let inner = inner.0; let method = AggregateMessagesSvc(inner); let codec = tonic::codec::ProstCodec::default(); - let mut grpc = tonic::server::Grpc::new(codec) - .apply_compression_config( - accept_compression_encodings, - send_compression_encodings, - ); + let mut grpc = tonic::server::Grpc::new(codec).apply_compression_config( + accept_compression_encodings, + send_compression_encodings, + ); let res = grpc.unary(method, req).await; Ok(res) }; @@ -911,21 +999,15 @@ pub mod msg_server { "/lcp.service.elc.v1.Msg/VerifyMembership" => { #[allow(non_camel_case_types)] struct VerifyMembershipSvc(pub Arc); - impl tonic::server::UnaryService - for VerifyMembershipSvc { + impl tonic::server::UnaryService for VerifyMembershipSvc { type Response = super::MsgVerifyMembershipResponse; - type Future = BoxFuture< - tonic::Response, - tonic::Status, - >; + type Future = BoxFuture, tonic::Status>; fn call( &mut self, request: tonic::Request, ) -> Self::Future { let inner = self.0.clone(); - let fut = async move { - (*inner).verify_membership(request).await - }; + let fut = async move { (*inner).verify_membership(request).await }; Box::pin(fut) } } @@ -936,11 +1018,10 @@ pub mod msg_server { let inner = inner.0; let method = VerifyMembershipSvc(inner); let codec = tonic::codec::ProstCodec::default(); - let mut grpc = tonic::server::Grpc::new(codec) - .apply_compression_config( - accept_compression_encodings, - send_compression_encodings, - ); + let mut grpc = tonic::server::Grpc::new(codec).apply_compression_config( + accept_compression_encodings, + send_compression_encodings, + ); let res = grpc.unary(method, req).await; Ok(res) }; @@ -949,23 +1030,17 @@ pub mod msg_server { "/lcp.service.elc.v1.Msg/VerifyNonMembership" => { #[allow(non_camel_case_types)] struct VerifyNonMembershipSvc(pub Arc); - impl< - T: Msg, - > tonic::server::UnaryService - for VerifyNonMembershipSvc { + impl tonic::server::UnaryService + for VerifyNonMembershipSvc + { type Response = super::MsgVerifyNonMembershipResponse; - type Future = BoxFuture< - tonic::Response, - tonic::Status, - >; + type Future = BoxFuture, tonic::Status>; fn call( &mut self, request: tonic::Request, ) -> Self::Future { let inner = self.0.clone(); - let fut = async move { - (*inner).verify_non_membership(request).await - }; + let fut = async move { (*inner).verify_non_membership(request).await }; Box::pin(fut) } } @@ -976,28 +1051,23 @@ pub mod msg_server { let inner = inner.0; let method = VerifyNonMembershipSvc(inner); let codec = tonic::codec::ProstCodec::default(); - let mut grpc = tonic::server::Grpc::new(codec) - .apply_compression_config( - accept_compression_encodings, - send_compression_encodings, - ); + let mut grpc = tonic::server::Grpc::new(codec).apply_compression_config( + accept_compression_encodings, + send_compression_encodings, + ); let res = grpc.unary(method, req).await; Ok(res) }; Box::pin(fut) } - _ => { - Box::pin(async move { - Ok( - http::Response::builder() - .status(200) - .header("grpc-status", "12") - .header("content-type", "application/grpc") - .body(empty_body()) - .unwrap(), - ) - }) - } + _ => Box::pin(async move { + Ok(http::Response::builder() + .status(200) + .header("grpc-status", "12") + .header("content-type", "application/grpc") + .body(empty_body()) + .unwrap()) + }), } } } From 3823fdce166773e46a0ec50d38a7420f481c37e5 Mon Sep 17 00:00:00 2001 From: Kiyoshi Nakao Date: Tue, 12 May 2026 15:26:17 +0900 Subject: [PATCH 05/48] service: add speculative update client execution --- Cargo.lock | 9 +- modules/service/Cargo.toml | 10 + modules/service/src/elc.rs | 89 +- modules/service/src/lib.rs | 11 +- modules/service/src/service.rs | 54 +- modules/service/src/speculative/mod.rs | 16 + modules/service/src/speculative/permit.rs | 181 ++++ modules/service/src/speculative/rebase.rs | 74 ++ modules/service/src/speculative/scheduler.rs | 323 +++++++ modules/service/src/speculative/service.rs | 787 ++++++++++++++++++ modules/service/src/speculative/stream.rs | 638 ++++++++++++++ modules/service/src/speculative/types.rs | 117 +++ modules/service/src/speculative/validation.rs | 117 +++ 13 files changed, 2403 insertions(+), 23 deletions(-) create mode 100644 modules/service/src/speculative/mod.rs create mode 100644 modules/service/src/speculative/permit.rs create mode 100644 modules/service/src/speculative/rebase.rs create mode 100644 modules/service/src/speculative/scheduler.rs create mode 100644 modules/service/src/speculative/service.rs create mode 100644 modules/service/src/speculative/stream.rs create mode 100644 modules/service/src/speculative/types.rs create mode 100644 modules/service/src/speculative/validation.rs diff --git a/Cargo.lock b/Cargo.lock index 971061e5..f41589e0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4724,7 +4724,6 @@ version = "0.1.0" dependencies = [ "bincode 2.0.0-rc.3", "commitments", - "derive_more 1.0.0", "flex-error", "ibc", "lcp-types", @@ -7760,11 +7759,19 @@ version = "0.1.0" dependencies = [ "anyhow", "attestation-report", + "bincode 2.0.0-rc.3", + "commitments", "crypto", + "ecall-commands", "enclave-api", + "hex", + "keymanager", "lcp-proto", "lcp-types", "log", + "serde", + "sgx_types", + "sha2 0.10.8", "store", "tokio", "tonic 0.9.2", diff --git a/modules/service/Cargo.toml b/modules/service/Cargo.toml index 9b4c4a3c..6b7c9eb4 100644 --- a/modules/service/Cargo.toml +++ b/modules/service/Cargo.toml @@ -9,6 +9,10 @@ tonic-reflection = { version = "0.9" } tokio = { version = "1.0", features = ["full"] } anyhow = { version = "1.0.56" } log = { version = "0.4.8" } +serde = { version = "1.0", features = ["derive"] } +bincode = { version = "=2.0.0-rc.3", default-features = false, features = ["serde", "alloc"] } +hex = { version = "0.4", default-features = false, features = ["alloc"] } +sha2 = { version = "0.10.8", default-features = false } lcp-types = { path = "../types" } crypto = { path = "../crypto" } @@ -16,3 +20,9 @@ enclave-api = { path = "../enclave-api" } lcp-proto = { path = "../../proto", default-features = false, features = ["server"] } store = { path = "../store", default-features = false } attestation-report = { path = "../attestation-report" } +commitments = { path = "../commitments" } +ecall-commands = { path = "../ecall-commands", features = ["std"] } + +[dev-dependencies] +keymanager = { path = "../keymanager" } +sgx_types = { rev = "v1.1.6", git = "https://github.com/apache/incubator-teaclave-sgx-sdk" } diff --git a/modules/service/src/elc.rs b/modules/service/src/elc.rs index db5df0e6..6cc69518 100644 --- a/modules/service/src/elc.rs +++ b/modules/service/src/elc.rs @@ -1,28 +1,35 @@ -use crate::service::AppService; -use enclave_api::EnclaveProtoAPI; +use crate::service::{AppService, ElcService}; +use crate::speculative::stream::{ + decode_speculative_batch_stream_init, encode_stitched_batch_result, + SpeculativeBatchStreamDecoder, +}; +use enclave_api::{EnclaveProtoAPI, SpeculativeEnclaveCommandAPI}; use lcp_proto::google::protobuf::Any; use lcp_proto::lcp::service::elc::v1::msg_update_client_stream_chunk::Chunk; use lcp_proto::lcp::service::elc::v1::{ - msg_server::Msg, query_server::Query, MsgAggregateMessages, MsgAggregateMessagesResponse, - MsgCreateClient, MsgCreateClientResponse, MsgUpdateClient, MsgUpdateClientResponse, + msg_server::Msg, query_server::Query, ExecuteSpeculativeUpdateClientBatchResponse, + MsgAggregateMessages, MsgAggregateMessagesResponse, MsgCreateClient, MsgCreateClientResponse, + MsgSpeculativeUpdateClientBatchStreamChunk, MsgUpdateClient, MsgUpdateClientResponse, MsgUpdateClientStreamChunk, MsgVerifyMembership, MsgVerifyMembershipResponse, MsgVerifyNonMembership, MsgVerifyNonMembershipResponse, QueryClientRequest, QueryClientResponse, }; -use store::transaction::CommitStore; +use log::debug; +use std::sync::mpsc; +use store::transaction::{CommitStore, TxAccessor}; use tonic::{Request, Response, Status, Streaming}; #[tonic::async_trait] -impl Msg for AppService +impl Msg for ElcService where - S: CommitStore + 'static, - E: EnclaveProtoAPI + 'static, + S: CommitStore + TxAccessor + Send + 'static, + E: EnclaveProtoAPI + SpeculativeEnclaveCommandAPI + Send + Sync + 'static, { async fn create_client( &self, request: Request, ) -> Result, Status> { - match self.enclave.proto_create_client(request.into_inner()) { + match self.app.enclave.proto_create_client(request.into_inner()) { Ok(res) => Ok(Response::new(res)), Err(e) => Err(Status::aborted(e.to_string())), } @@ -32,7 +39,7 @@ where &self, request: Request, ) -> Result, Status> { - match self.enclave.proto_update_client(request.into_inner()) { + match self.app.enclave.proto_update_client(request.into_inner()) { Ok(res) => Ok(Response::new(res)), Err(e) => Err(Status::aborted(e.to_string())), } @@ -95,17 +102,66 @@ where }), }; - match self.enclave.proto_update_client(msg) { + match self.app.enclave.proto_update_client(msg) { Ok(res) => Ok(Response::new(res)), Err(e) => Err(Status::aborted(e.to_string())), } } + async fn speculative_update_client_batch_stream( + &self, + request: Request>, + ) -> Result, Status> { + let mut stream = request.into_inner(); + let init = decode_speculative_batch_stream_init(&mut stream).await?; + let client_id = init.client_id; + let (tx, rx) = mpsc::channel(); + let app = self.app.clone(); + let speculative = self.speculative.clone(); + let scheduler_client_id = client_id.clone(); + let scheduler = tokio::task::spawn_blocking(move || { + speculative.execute_serialized_speculative_update_client_stream( + &app, + scheduler_client_id, + rx, + ) + }); + let mut decoder = SpeculativeBatchStreamDecoder::new(client_id.clone()); + let mut units = 0usize; + + while let Some(chunk_msg) = stream.message().await? { + if let Some(unit) = decoder.push_chunk(chunk_msg.chunk)? { + units += 1; + tx.send(unit).map_err(|_| { + Status::aborted("speculative batch scheduler stopped before stream ended") + })?; + } + } + decoder.finish()?; + drop(tx); + + debug!( + "received speculative update client batch stream: client_id={} units={}", + client_id, units + ); + let result = scheduler + .await + .map_err(|e| Status::aborted(format!("speculative batch worker failed: {e}")))?; + match result { + Ok(res) => Ok(Response::new(encode_stitched_batch_result(res))), + Err(e) => Err(Status::aborted(format!("{:?}: {}", e.kind, e.detail))), + } + } + async fn aggregate_messages( &self, request: Request, ) -> Result, Status> { - match self.enclave.proto_aggregate_messages(request.into_inner()) { + match self + .app + .enclave + .proto_aggregate_messages(request.into_inner()) + { Ok(res) => Ok(Response::new(res)), Err(e) => Err(Status::aborted(e.to_string())), } @@ -115,7 +171,11 @@ where &self, request: Request, ) -> Result, Status> { - match self.enclave.proto_verify_membership(request.into_inner()) { + match self + .app + .enclave + .proto_verify_membership(request.into_inner()) + { Ok(res) => Ok(Response::new(res)), Err(e) => Err(Status::aborted(e.to_string())), } @@ -126,6 +186,7 @@ where request: Request, ) -> Result, Status> { match self + .app .enclave .proto_verify_non_membership(request.into_inner()) { @@ -138,7 +199,7 @@ where #[tonic::async_trait] impl Query for AppService where - S: CommitStore + 'static, + S: CommitStore + TxAccessor + 'static, E: EnclaveProtoAPI + 'static, { async fn client( diff --git a/modules/service/src/lib.rs b/modules/service/src/lib.rs index 06b26470..4663a3ee 100644 --- a/modules/service/src/lib.rs +++ b/modules/service/src/lib.rs @@ -1,5 +1,14 @@ mod elc; mod enclave; mod service; +mod speculative; -pub use crate::service::{run_service, AppService}; +pub use crate::service::{run_service, AppService, ElcService}; +pub use crate::speculative::{ + ExplicitStateRef, ObservedStateTransition, SpeculativeBatchFailure, + SpeculativeBatchFailureKind, SpeculativeService, SpeculativeUpdateClientBatch, + SpeculativeUpdateClientBatchResult, SpeculativeUpdateClientRequest, + SpeculativeUpdateClientResult, StitchedUpdateClientBatchResult, StitchedUpdateClientResult, + MAX_SPECULATIVE_BATCH_HEADER_BYTES, MAX_SPECULATIVE_BATCH_UNITS, + MAX_SPECULATIVE_UNIT_HEADER_BYTES, +}; diff --git a/modules/service/src/service.rs b/modules/service/src/service.rs index c90eb233..b4c86f08 100644 --- a/modules/service/src/service.rs +++ b/modules/service/src/service.rs @@ -1,12 +1,13 @@ +use crate::speculative::SpeculativeService; use anyhow::Result; -use enclave_api::EnclaveProtoAPI; +use enclave_api::{EnclaveProtoAPI, SpeculativeEnclaveCommandAPI}; use lcp_proto::lcp::service::{ elc::v1::{msg_server::MsgServer as ELCMsgServer, query_server::QueryServer as ELCQueryServer}, enclave::v1::query_server::QueryServer as EnclaveQueryServer, }; use log::*; use std::{marker::PhantomData, net::SocketAddr, path::PathBuf, sync::Arc}; -use store::transaction::CommitStore; +use store::transaction::{CommitStore, TxAccessor}; use tokio::signal::unix::{signal, SignalKind}; use tonic::transport::Server; @@ -20,6 +21,15 @@ where _marker: PhantomData, } +pub struct ElcService +where + S: CommitStore + TxAccessor + 'static, + E: EnclaveProtoAPI + SpeculativeEnclaveCommandAPI + 'static, +{ + pub(crate) app: AppService, + pub(crate) speculative: SpeculativeService, +} + impl Clone for AppService where S: CommitStore + 'static, @@ -34,6 +44,19 @@ where } } +impl Clone for ElcService +where + S: CommitStore + TxAccessor + 'static, + E: EnclaveProtoAPI + SpeculativeEnclaveCommandAPI + 'static, +{ + fn clone(&self) -> Self { + Self { + app: self.app.clone(), + speculative: self.speculative.clone(), + } + } +} + impl AppService where S: CommitStore + 'static, @@ -48,14 +71,31 @@ where } } -pub async fn run_service(srv: AppService, addr: SocketAddr) -> Result<()> +impl ElcService +where + S: CommitStore + TxAccessor + 'static, + E: EnclaveProtoAPI + SpeculativeEnclaveCommandAPI + 'static, +{ + pub fn new>( + home: P, + enclave: E, + speculative_concurrency_limit: usize, + ) -> Self { + let app = AppService::new(home, enclave); + let speculative = SpeculativeService::new(speculative_concurrency_limit); + Self { app, speculative } + } +} + +pub async fn run_service(srv: ElcService, addr: SocketAddr) -> Result<()> where - S: CommitStore, - E: EnclaveProtoAPI, + S: CommitStore + TxAccessor, + E: EnclaveProtoAPI + SpeculativeEnclaveCommandAPI, { + let app = srv.app.clone(); let elc_msg_srv = ELCMsgServer::new(srv.clone()); - let elc_query_srv = ELCQueryServer::new(srv.clone()); - let enclave_srv = EnclaveQueryServer::new(srv); + let elc_query_srv = ELCQueryServer::new(app.clone()); + let enclave_srv = EnclaveQueryServer::new(app); let reflection = tonic_reflection::server::Builder::configure() .register_encoded_file_descriptor_set(lcp_proto::FILE_DESCRIPTOR_SET) .build() diff --git a/modules/service/src/speculative/mod.rs b/modules/service/src/speculative/mod.rs new file mode 100644 index 00000000..ab77f932 --- /dev/null +++ b/modules/service/src/speculative/mod.rs @@ -0,0 +1,16 @@ +mod permit; +pub(crate) mod rebase; +pub(crate) mod scheduler; +mod service; +pub(crate) mod stream; +mod types; +pub(crate) mod validation; + +pub use service::SpeculativeService; +pub use types::{ + ExplicitStateRef, ObservedStateTransition, SpeculativeBatchFailure, + SpeculativeBatchFailureKind, SpeculativeUpdateClientBatch, SpeculativeUpdateClientBatchResult, + SpeculativeUpdateClientRequest, SpeculativeUpdateClientResult, StitchedUpdateClientBatchResult, + StitchedUpdateClientResult, MAX_SPECULATIVE_BATCH_HEADER_BYTES, MAX_SPECULATIVE_BATCH_UNITS, + MAX_SPECULATIVE_UNIT_HEADER_BYTES, +}; diff --git a/modules/service/src/speculative/permit.rs b/modules/service/src/speculative/permit.rs new file mode 100644 index 00000000..f4a9b916 --- /dev/null +++ b/modules/service/src/speculative/permit.rs @@ -0,0 +1,181 @@ +use enclave_api::Error as EnclaveError; +use std::collections::HashMap; +use std::sync::{Arc, Condvar, Mutex}; + +#[derive(Debug)] +pub(super) struct PermitGate { + state: Mutex, + ready: Condvar, +} + +#[derive(Debug, Default)] +pub(super) struct KeyLockMap { + locks: Mutex>>>, +} + +#[derive(Debug)] +struct PermitGateState { + available: usize, +} + +struct PermitGuard<'a> { + gate: &'a PermitGate, +} + +impl PermitGate { + pub(super) fn new(permits: usize) -> Self { + Self { + state: Mutex::new(PermitGateState { + available: permits.max(1), + }), + ready: Condvar::new(), + } + } + + #[allow(clippy::result_large_err)] + pub(super) fn with_permit( + &self, + f: impl FnOnce() -> std::result::Result, + ) -> std::result::Result { + let _permit = self.acquire(); + f() + } + + fn acquire(&self) -> PermitGuard<'_> { + let mut state = self.state.lock().unwrap(); + while state.available == 0 { + state = self.ready.wait(state).unwrap(); + } + state.available -= 1; + PermitGuard { gate: self } + } +} + +impl Drop for PermitGuard<'_> { + fn drop(&mut self) { + let mut state = self.gate.state.lock().unwrap(); + state.available += 1; + self.gate.ready.notify_one(); + } +} + +impl KeyLockMap { + pub(super) fn with_key_serialized(&self, key: &str, f: impl FnOnce() -> T) -> T { + let lock = { + let mut locks = self.locks.lock().unwrap(); + locks + .entry(key.to_string()) + .or_insert_with(|| Arc::new(Mutex::new(()))) + .clone() + }; + let guard = lock.lock().unwrap(); + let result = f(); + drop(guard); + + let mut locks = self.locks.lock().unwrap(); + let should_remove = Arc::strong_count(&lock) == 2 + && locks + .get(key) + .map(|existing| Arc::ptr_eq(existing, &lock)) + .unwrap_or(false); + if should_remove { + locks.remove(key); + } + result + } +} + +#[cfg(test)] +mod tests { + use super::{KeyLockMap, PermitGate}; + use std::sync::atomic::{AtomicUsize, Ordering}; + use std::sync::Arc; + use std::thread; + use std::time::Duration; + + #[test] + fn permit_gate_limits_concurrency() { + let gate = Arc::new(PermitGate::new(2)); + let in_flight = Arc::new(AtomicUsize::new(0)); + let observed_max = Arc::new(AtomicUsize::new(0)); + let mut handles = Vec::new(); + + for _ in 0..6 { + let gate = gate.clone(); + let in_flight = in_flight.clone(); + let observed_max = observed_max.clone(); + handles.push(thread::spawn(move || { + gate.with_permit(|| { + let current = in_flight.fetch_add(1, Ordering::SeqCst) + 1; + observed_max.fetch_max(current, Ordering::SeqCst); + thread::sleep(Duration::from_millis(25)); + in_flight.fetch_sub(1, Ordering::SeqCst); + Ok(()) + }) + .unwrap(); + })); + } + + for handle in handles { + handle.join().unwrap(); + } + + assert_eq!(observed_max.load(Ordering::SeqCst), 2); + } + + #[test] + fn key_lock_map_serializes_same_key() { + let locks = Arc::new(KeyLockMap::default()); + let in_flight = Arc::new(AtomicUsize::new(0)); + let observed_max = Arc::new(AtomicUsize::new(0)); + let mut handles = Vec::new(); + + for _ in 0..6 { + let locks = locks.clone(); + let in_flight = in_flight.clone(); + let observed_max = observed_max.clone(); + handles.push(thread::spawn(move || { + locks.with_key_serialized("client-0", || { + let current = in_flight.fetch_add(1, Ordering::SeqCst) + 1; + observed_max.fetch_max(current, Ordering::SeqCst); + thread::sleep(Duration::from_millis(25)); + in_flight.fetch_sub(1, Ordering::SeqCst); + }); + })); + } + + for handle in handles { + handle.join().unwrap(); + } + + assert_eq!(observed_max.load(Ordering::SeqCst), 1); + } + + #[test] + fn key_lock_map_allows_different_keys() { + let locks = Arc::new(KeyLockMap::default()); + let in_flight = Arc::new(AtomicUsize::new(0)); + let observed_max = Arc::new(AtomicUsize::new(0)); + let mut handles = Vec::new(); + + for i in 0..6 { + let locks = locks.clone(); + let in_flight = in_flight.clone(); + let observed_max = observed_max.clone(); + handles.push(thread::spawn(move || { + locks.with_key_serialized(&format!("client-{i}"), || { + let current = in_flight.fetch_add(1, Ordering::SeqCst) + 1; + observed_max.fetch_max(current, Ordering::SeqCst); + thread::sleep(Duration::from_millis(25)); + in_flight.fetch_sub(1, Ordering::SeqCst); + }); + })); + } + + for handle in handles { + handle.join().unwrap(); + } + + assert!(observed_max.load(Ordering::SeqCst) > 1); + } +} diff --git a/modules/service/src/speculative/rebase.rs b/modules/service/src/speculative/rebase.rs new file mode 100644 index 00000000..88522b75 --- /dev/null +++ b/modules/service/src/speculative/rebase.rs @@ -0,0 +1,74 @@ +use super::types::{ + ExplicitStateRef, ObservedStateTransition, SpeculativeUpdateClientRequest, + SpeculativeUpdateClientResult, +}; +use lcp_types::{store_key, Any, Height}; +use log::warn; +use store::WriteSet; + +#[derive(Debug, Clone)] +pub(crate) struct DependencyRebaseState { + pub(crate) observed_transition: ObservedStateTransition, + pub(crate) client_state: Option, + pub(crate) consensus_state: Option, +} + +pub(crate) fn rebase_speculative_request( + mut req: SpeculativeUpdateClientRequest, + previous: &DependencyRebaseState, +) -> SpeculativeUpdateClientRequest { + // Always seed the base state from the previous result so the next unit + // observes its predecessor's post-state and write set. + req.base_state = ExplicitStateRef { + prev_height: Some(previous.observed_transition.post_height), + prev_state_id: Some(previous.observed_transition.post_state_id.clone()), + client_state: previous.client_state.clone(), + consensus_state: previous.consensus_state.clone(), + }; + req +} + +pub(crate) fn build_dependency_rebase_state( + client_id: &str, + result: &SpeculativeUpdateClientResult, +) -> DependencyRebaseState { + DependencyRebaseState { + observed_transition: result.observed_transition.clone(), + client_state: extract_client_state_from_write_set(client_id, &result.write_set), + consensus_state: extract_consensus_state_from_write_set( + client_id, + result.observed_transition.post_height, + &result.write_set, + ), + } +} + +pub(crate) fn extract_client_state_from_write_set( + client_id: &str, + write_set: &WriteSet, +) -> Option { + let key = store_key::client_state_bytes(client_id); + decode_any_from_write_set("client_state", write_set.get(&key)?.as_ref()?) +} + +pub(crate) fn extract_consensus_state_from_write_set( + client_id: &str, + height: Height, + write_set: &WriteSet, +) -> Option { + let key = store_key::consensus_state_bytes(client_id, &height); + decode_any_from_write_set("consensus_state", write_set.get(&key)?.as_ref()?) +} + +fn decode_any_from_write_set(kind: &str, value: &[u8]) -> Option { + match bincode::serde::decode_from_slice(value, bincode::config::standard()) { + Ok((any, _)) => Some(any), + Err(e) => { + warn!( + "failed to decode {} from speculative write set: {}", + kind, e + ); + None + } + } +} diff --git a/modules/service/src/speculative/scheduler.rs b/modules/service/src/speculative/scheduler.rs new file mode 100644 index 00000000..4134b2e9 --- /dev/null +++ b/modules/service/src/speculative/scheduler.rs @@ -0,0 +1,323 @@ +use super::rebase::{ + build_dependency_rebase_state, rebase_speculative_request, DependencyRebaseState, +}; +use super::service::SpeculativeService; +use super::types::{ + SpeculativeBatchFailure, SpeculativeBatchFailureKind, SpeculativeUpdateClientBatchResult, + SpeculativeUpdateClientRequest, SpeculativeUpdateClientResult, +}; +use super::validation::validate_next_linear_request; +use crate::service::AppService; +use enclave_api::{EnclaveProtoAPI, SpeculativeEnclaveCommandAPI}; +use log::info; +use sha2::Digest; +use std::collections::{BTreeMap, BTreeSet, VecDeque}; +use std::sync::mpsc::Receiver; +use std::sync::{Arc, Condvar, Mutex}; +use std::thread; +use store::transaction::{CommitStore, TxAccessor}; + +pub(crate) struct StreamingSpeculativeBatchResult { + pub(crate) requests: Vec, + pub(crate) results: SpeculativeUpdateClientBatchResult, +} + +fn sha256_hex(bytes: &[u8]) -> String { + hex::encode(sha2::Sha256::digest(bytes)) +} + +fn speculative_request_header_digest( + req: &SpeculativeUpdateClientRequest, +) -> Option<(usize, String)> { + let header = req.update.header.as_ref()?; + Some((header.value.len(), sha256_hex(&header.value))) +} + +pub(crate) fn execute_speculative_update_client_stream( + speculative: &SpeculativeService, + app: &AppService, + client_id: String, + units: Receiver, +) -> core::result::Result +where + S: CommitStore + TxAccessor + Send + 'static, + E: EnclaveProtoAPI + SpeculativeEnclaveCommandAPI + Send + Sync + 'static, +{ + let max_parallelism = speculative.speculative_concurrency_limit(); + info!( + "execute speculative stream: client_id={} max_parallelism={}", + client_id, max_parallelism + ); + let shared = Arc::new(StreamingSchedulerShared { + state: Mutex::new(StreamingSchedulerState::new(client_id.clone())), + ready: Condvar::new(), + complete: Condvar::new(), + }); + + let failure = thread::scope(|scope| { + for _ in 0..max_parallelism { + let shared = shared.clone(); + scope.spawn(move || streaming_speculative_worker(speculative, app, shared)); + } + + for unit in units { + let mut state = shared.state.lock().unwrap(); + if state.failure.is_some() { + break; + } + if let Err(e) = state.enqueue(unit) { + state.failure = Some(e); + shared.ready.notify_all(); + shared.complete.notify_all(); + break; + } + shared.ready.notify_all(); + } + + let mut state = shared.state.lock().unwrap(); + state.closed = true; + shared.ready.notify_all(); + while state.failure.is_none() && state.has_unfinished_work() { + if state.has_unresolvable_pending_work() { + state.failure = Some(SpeculativeBatchFailure { + kind: SpeculativeBatchFailureKind::DependencyStateMismatch, + unit_id: state.pending.front().map(|(_, req)| req.unit_id.clone()), + detail: "speculative stream ended with unresolved linear dependencies" + .to_string(), + }); + shared.ready.notify_all(); + break; + } + state = shared.complete.wait(state).unwrap(); + } + info!( + "execute speculative stream complete: observed_max_in_flight={}", + state.observed_max_in_flight + ); + state.failure.clone() + }); + + if let Some(err) = failure { + return Err(err); + } + + let mut state = shared.state.lock().unwrap(); + let requests = (0..state.unit_count) + .map(|index| { + state + .request_by_index + .remove(&index) + .ok_or_else(|| SpeculativeBatchFailure { + kind: SpeculativeBatchFailureKind::BatchSizeMismatch, + unit_id: None, + detail: format!("unit index {} missing from executed requests", index), + }) + }) + .collect::, _>>()?; + let units = (0..state.unit_count) + .map(|index| { + state + .result_by_index + .remove(&index) + .ok_or_else(|| SpeculativeBatchFailure { + kind: SpeculativeBatchFailureKind::BatchSizeMismatch, + unit_id: requests.get(index).map(|req| req.unit_id.clone()), + detail: format!("unit index {} missing from execution results", index), + }) + }) + .collect::, _>>()?; + Ok(StreamingSpeculativeBatchResult { + requests, + results: SpeculativeUpdateClientBatchResult { client_id, units }, + }) +} + +// Shared synchronization wrapper for one streaming scheduler run. +// +// The scheduler state is protected by a single mutex so enqueue, promotion, +// completion, and failure transitions stay consistent across worker threads. +// `ready` wakes workers when executable units become available, while +// `complete` wakes the coordinator waiting for in-flight work to drain. +struct StreamingSchedulerShared { + state: Mutex, + ready: Condvar, + complete: Condvar, +} + +// Mutable state for one streaming speculative batch execution. +// +// Incoming units are assigned monotonically increasing stream indexes, then +// split into `ready` work that workers can execute immediately and `pending` +// work that must wait for the previous unit's rebase state. Completed units +// store their request/result by index so the final response can be rebuilt in +// input order, even if worker threads finish out of order. +struct StreamingSchedulerState { + client_id: String, + ready: VecDeque<(usize, SpeculativeUpdateClientRequest)>, + pending: VecDeque<(usize, SpeculativeUpdateClientRequest)>, + request_by_index: BTreeMap, + result_by_index: BTreeMap, + rebase_state_by_index: BTreeMap, + seen_unit_ids: BTreeSet, + unit_count: usize, + in_flight: usize, + observed_max_in_flight: usize, + closed: bool, + failure: Option, +} + +impl StreamingSchedulerState { + fn new(client_id: String) -> Self { + Self { + client_id, + ready: VecDeque::new(), + pending: VecDeque::new(), + request_by_index: BTreeMap::new(), + result_by_index: BTreeMap::new(), + rebase_state_by_index: BTreeMap::new(), + seen_unit_ids: BTreeSet::new(), + unit_count: 0, + in_flight: 0, + observed_max_in_flight: 0, + closed: false, + failure: None, + } + } + + fn has_unfinished_work(&self) -> bool { + self.in_flight > 0 || !self.ready.is_empty() || !self.pending.is_empty() + } + + fn has_unresolvable_pending_work(&self) -> bool { + self.in_flight == 0 && self.ready.is_empty() && !self.pending.is_empty() + } + + fn enqueue( + &mut self, + req: SpeculativeUpdateClientRequest, + ) -> core::result::Result<(), SpeculativeBatchFailure> { + let index = self.unit_count; + validate_next_linear_request(&self.client_id, index, &mut self.seen_unit_ids, &req)?; + self.unit_count += 1; + self.enqueue_ready_or_pending(index, req) + } + + fn enqueue_ready_or_pending( + &mut self, + index: usize, + req: SpeculativeUpdateClientRequest, + ) -> core::result::Result<(), SpeculativeBatchFailure> { + if index == 0 || req.base_state.has_complete_base_state_payload() { + self.ready.push_back((index, req)); + } else if let Some(previous) = self.rebase_state_by_index.get(&(index - 1)) { + self.ready + .push_back((index, rebase_speculative_request(req, previous))); + } else { + self.pending.push_back((index, req)); + } + Ok(()) + } + + fn complete_unit( + &mut self, + index: usize, + req: SpeculativeUpdateClientRequest, + result: SpeculativeUpdateClientResult, + ) -> core::result::Result<(), SpeculativeBatchFailure> { + self.request_by_index.insert(index, req); + self.rebase_state_by_index.insert( + index, + build_dependency_rebase_state(&self.client_id, &result), + ); + self.result_by_index.insert(index, result); + self.promote_pending() + } + + fn promote_pending(&mut self) -> core::result::Result<(), SpeculativeBatchFailure> { + let mut remaining = VecDeque::new(); + while let Some((index, req)) = self.pending.pop_front() { + if index == 0 || req.base_state.has_complete_base_state_payload() { + self.ready.push_back((index, req)); + } else if let Some(previous) = self.rebase_state_by_index.get(&(index - 1)) { + self.ready + .push_back((index, rebase_speculative_request(req, previous))); + } else { + remaining.push_back((index, req)); + } + } + self.pending = remaining; + Ok(()) + } +} + +fn streaming_speculative_worker( + speculative: &SpeculativeService, + app: &AppService, + shared: Arc, +) where + S: CommitStore + TxAccessor + Send + 'static, + E: EnclaveProtoAPI + SpeculativeEnclaveCommandAPI + Send + Sync + 'static, +{ + loop { + let (index, req) = { + let mut state = shared.state.lock().unwrap(); + loop { + if state.failure.is_some() { + return; + } + if let Some((index, req)) = state.ready.pop_front() { + state.in_flight += 1; + state.observed_max_in_flight = + state.observed_max_in_flight.max(state.in_flight); + break (index, req); + } + if state.closed && state.in_flight == 0 { + return; + } + state = shared.ready.wait(state).unwrap(); + } + }; + + let unit_id = req.unit_id.clone(); + let header_digest = speculative_request_header_digest(&req); + if let Some((header_bytes, header_sha256)) = header_digest.as_ref() { + info!( + "execute speculative update client unit: client_id={} unit_id={} header_bytes={} header_sha256={}", + req.update.client_id, + unit_id, + header_bytes, + header_sha256 + ); + } + let result = speculative + .with_speculative_request_permit(|| { + speculative.speculative_update_client(app, req.clone()) + }) + .map_err(|e| SpeculativeBatchFailure { + kind: SpeculativeBatchFailureKind::SpeculativeExecutionFailed, + unit_id: Some(unit_id), + detail: match header_digest { + Some((header_bytes, header_sha256)) => format!( + "{}; header_bytes={} header_sha256={}", + e, header_bytes, header_sha256 + ), + None => e.to_string(), + }, + }); + + let mut state = shared.state.lock().unwrap(); + state.in_flight -= 1; + match result { + Ok(result) => { + if let Err(e) = state.complete_unit(index, req, result) { + state.failure = Some(e); + } + } + Err(e) => { + state.failure = Some(e); + } + } + shared.ready.notify_all(); + shared.complete.notify_all(); + } +} diff --git a/modules/service/src/speculative/service.rs b/modules/service/src/speculative/service.rs new file mode 100644 index 00000000..e26b97ae --- /dev/null +++ b/modules/service/src/speculative/service.rs @@ -0,0 +1,787 @@ +use super::permit::{KeyLockMap, PermitGate}; +#[cfg(test)] +use super::rebase::{ + extract_client_state_from_write_set, extract_consensus_state_from_write_set, + rebase_speculative_request, DependencyRebaseState, +}; +use super::scheduler::execute_speculative_update_client_stream; +use super::types::{ + ExplicitStateRef, ObservedStateTransition, SpeculativeBatchFailure, + SpeculativeBatchFailureKind, SpeculativeUpdateClientBatch, SpeculativeUpdateClientBatchResult, + SpeculativeUpdateClientRequest, SpeculativeUpdateClientResult, StitchedUpdateClientBatchResult, + StitchedUpdateClientResult, +}; +use super::validation::{ + validate_linear_batch_requests, validate_linear_transitions, +}; +use crate::service::AppService; +use commitments::ProxyMessage; +use enclave_api::{ + EnclaveProtoAPI, Error as EnclaveError, SpeculativeBaseState, SpeculativeEnclaveCommandAPI, + SpeculativeUpdateClientInput as EnclaveSpeculativeUpdateClientInput, +}; +#[cfg(test)] +use lcp_proto::lcp::service::elc::v1::{MsgUpdateClient, MsgUpdateClientResponse}; +use std::sync::mpsc::Receiver; +use std::sync::Arc; +use store::transaction::{CommitStore, TxAccessor}; +use store::WriteSet; + +pub struct SpeculativeService { + key_locks: Arc, + speculative_concurrency_limit: usize, + speculative_request_permits: Arc, +} + +impl Clone for SpeculativeService { + fn clone(&self) -> Self { + Self { + key_locks: self.key_locks.clone(), + speculative_concurrency_limit: self.speculative_concurrency_limit, + speculative_request_permits: self.speculative_request_permits.clone(), + } + } +} + +impl SpeculativeService { + pub fn new(speculative_concurrency_limit: usize) -> Self { + Self { + key_locks: Arc::new(KeyLockMap::default()), + speculative_concurrency_limit: speculative_concurrency_limit.max(1), + speculative_request_permits: Arc::new(PermitGate::new(speculative_concurrency_limit)), + } + } + + pub fn speculative_concurrency_limit(&self) -> usize { + self.speculative_concurrency_limit + } + + pub fn with_client_serialized(&self, client_id: &str, f: impl FnOnce() -> T) -> T { + // Keep client-key serialization outside the speculative execution/stitch + // body so all canonical writes for one client are ordered. + self.key_locks.with_key_serialized(client_id, f) + } + + #[allow(clippy::result_large_err)] + pub fn with_speculative_request_permit( + &self, + f: impl FnOnce() -> std::result::Result, + ) -> std::result::Result { + self.speculative_request_permits.with_permit(f) + } + + #[allow(clippy::result_large_err)] + pub fn speculative_update_client( + &self, + app: &AppService, + req: SpeculativeUpdateClientRequest, + ) -> core::result::Result + where + S: CommitStore + TxAccessor + 'static, + E: EnclaveProtoAPI + SpeculativeEnclaveCommandAPI + 'static, + { + let update = req.update.try_into()?; + let base_state = req.base_state.clone(); + let res = app + .enclave + .speculative_update_client(EnclaveSpeculativeUpdateClientInput { + update, + base_state: base_state_payload_from_ref(&base_state), + })?; + let observed_transition = decode_observed_transition(&res.response)?; + Ok(SpeculativeUpdateClientResult { + response: res.response.into(), + write_set: res.write_set, + base_state: req.base_state, + observed_transition, + }) + } + + pub fn stitch_speculative_update_client_batch( + &self, + app: &AppService, + batch: SpeculativeUpdateClientBatch, + results: SpeculativeUpdateClientBatchResult, + ) -> core::result::Result + where + S: CommitStore + TxAccessor + 'static, + E: EnclaveProtoAPI + SpeculativeEnclaveCommandAPI + 'static, + { + validate_linear_batch_requests(&batch.client_id, &batch.units)?; + if batch.client_id != results.client_id { + return Err(SpeculativeBatchFailure { + kind: SpeculativeBatchFailureKind::ResultClientMismatch, + unit_id: None, + detail: format!( + "batch result client_id mismatch: expected={} observed={}", + batch.client_id, results.client_id + ), + }); + } + if batch.units.len() != results.units.len() { + return Err(SpeculativeBatchFailure { + kind: SpeculativeBatchFailureKind::BatchSizeMismatch, + unit_id: None, + detail: format!( + "batch size mismatch: requests={} results={}", + batch.units.len(), + results.units.len() + ), + }); + } + validate_linear_transitions(&batch.units, &results.units)?; + + let mut merged_write_set = WriteSet::default(); + let mut units = Vec::with_capacity(batch.units.len()); + for (req, result) in batch.units.iter().zip(results.units.into_iter()) { + result + .validate_base_state() + .map_err(|e| SpeculativeBatchFailure { + kind: SpeculativeBatchFailureKind::BaseStateMismatch, + unit_id: Some(req.unit_id.clone()), + detail: e.to_string(), + })?; + for (key, value) in result.write_set { + merged_write_set.insert(key, value); + } + units.push(StitchedUpdateClientResult { + response: result.response, + observed_transition: result.observed_transition, + }); + } + app.enclave + .apply_write_set(batch.client_id.clone(), merged_write_set) + .map_err(|e| SpeculativeBatchFailure { + kind: SpeculativeBatchFailureKind::StitchApplyFailed, + unit_id: None, + detail: e.to_string(), + })?; + + Ok(StitchedUpdateClientBatchResult { + client_id: batch.client_id, + units, + }) + } + + pub(crate) fn execute_serialized_speculative_update_client_stream( + &self, + app: &AppService, + client_id: String, + units: Receiver, + ) -> core::result::Result + where + S: CommitStore + TxAccessor + Send + 'static, + E: EnclaveProtoAPI + SpeculativeEnclaveCommandAPI + Send + Sync + 'static, + { + self.with_client_serialized(&client_id.clone(), || { + let batch_result = + execute_speculative_update_client_stream(self, app, client_id.clone(), units)?; + let batch = SpeculativeUpdateClientBatch { + client_id, + units: batch_result.requests, + }; + self.stitch_speculative_update_client_batch(app, batch, batch_result.results) + }) + } +} + +fn base_state_payload_from_ref(base_state: &ExplicitStateRef) -> Option { + Some(SpeculativeBaseState { + prev_height: Some(base_state.prev_height?), + client_state: base_state.client_state.clone()?, + consensus_state: base_state.consensus_state.clone()?, + }) +} + +#[allow(clippy::result_large_err)] +fn decode_observed_transition( + response: &ecall_commands::UpdateClientResponse, +) -> core::result::Result { + match response.0.message()? { + ProxyMessage::UpdateState(msg) => Ok(ObservedStateTransition { + prev_height: msg.prev_height, + prev_state_id: msg.prev_state_id.map(|id| id.to_vec()), + post_height: msg.post_height, + post_state_id: msg.post_state_id.to_vec(), + }), + other => Err(enclave_api::Error::invalid_argument(format!( + "expected UpdateState proxy message, got {:?}", + other + ))), + } +} + +#[cfg(test)] +mod tests { + use super::*; + use commitments::{CommitmentProof, StateID, UpdateStateProxyMessage, ValidationContext}; + use ecall_commands::UpdateClientResponse as EnclaveUpdateClientResponse; + use enclave_api::{ + CommitStoreAccessor, EnclaveCommandAPI, EnclaveInfo, EnclavePrimitiveAPI, EnclaveProtoAPI, + HostStoreTxManager, SpeculativeEnclaveCommandAPI, + SpeculativeUpdateClientInput as EnclaveSpeculativeUpdateClientInput, + SpeculativeUpdateClientResponse as EnclaveSpeculativeUpdateClientResponse, + }; + use keymanager::EnclaveKeyManager; + use lcp_proto::google::protobuf::Any; + use lcp_types::{store_key, Height}; + use lcp_types::{EnclaveMetadata, Time}; + use sgx_types::{sgx_enclave_id_t, sgx_status_t}; + use std::sync::atomic::{AtomicUsize, Ordering}; + use std::sync::Mutex; + use std::thread; + use std::time::Duration; + use store::memory::MemStore; + + struct FakeEnclave { + store: Mutex, + key_manager: EnclaveKeyManager, + current_in_flight: AtomicUsize, + observed_max_in_flight: AtomicUsize, + delay: Duration, + } + + impl FakeEnclave { + fn new(delay: Duration) -> Self { + let key_manager_home = std::env::temp_dir().join(format!( + "lcp-fake-enclave-{}", + std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .expect("system time") + .as_nanos() + )); + std::fs::create_dir_all(&key_manager_home).expect("fake enclave key manager home"); + Self { + store: Mutex::new(MemStore::default()), + key_manager: EnclaveKeyManager::new(&key_manager_home) + .expect("fake enclave key manager"), + current_in_flight: AtomicUsize::new(0), + observed_max_in_flight: AtomicUsize::new(0), + delay, + } + } + + fn observed_max_in_flight(&self) -> usize { + self.observed_max_in_flight.load(Ordering::SeqCst) + } + } + + impl CommitStoreAccessor for FakeEnclave { + fn use_mut_store(&self, f: impl FnOnce(&mut MemStore) -> T) -> T { + let mut store = self.store.lock().unwrap(); + f(&mut store) + } + } + + impl HostStoreTxManager for FakeEnclave {} + + impl EnclaveInfo for FakeEnclave { + fn get_eid(&self) -> sgx_enclave_id_t { + 0 + } + + fn metadata(&self) -> core::result::Result { + unimplemented!("metadata is not used in explicit-state unit tests") + } + + fn is_debug(&self) -> bool { + false + } + + fn get_key_manager(&self) -> &EnclaveKeyManager { + &self.key_manager + } + } + + impl EnclavePrimitiveAPI for FakeEnclave {} + + impl EnclaveCommandAPI for FakeEnclave {} + + impl SpeculativeEnclaveCommandAPI for FakeEnclave { + fn speculative_update_client( + &self, + input: EnclaveSpeculativeUpdateClientInput, + ) -> core::result::Result + { + let idx = input.update.signer.0[19] as u64; + let current = self.current_in_flight.fetch_add(1, Ordering::SeqCst) + 1; + self.observed_max_in_flight + .fetch_max(current, Ordering::SeqCst); + std::thread::sleep(self.delay); + self.current_in_flight.fetch_sub(1, Ordering::SeqCst); + + let prev_height = (idx > 0).then(|| Height::new(0, 10 + idx)); + let prev_state_id = (idx > 0).then(|| { + let mut prev_state_id = [0u8; 32]; + prev_state_id[31] = idx as u8; + StateID::from(prev_state_id) + }); + let mut post_state_id = [0u8; 32]; + post_state_id[31] = (idx as u8) + 1; + let message = ProxyMessage::from(UpdateStateProxyMessage { + prev_height, + prev_state_id, + post_height: Height::new(0, 10 + idx + 1), + post_state_id: StateID::from(post_state_id), + timestamp: Time::unix_epoch(), + context: ValidationContext::Empty, + emitted_states: vec![], + }) + .to_bytes(); + + Ok(EnclaveSpeculativeUpdateClientResponse { + response: EnclaveUpdateClientResponse(CommitmentProof::new_with_no_signature( + message, + )), + write_set: vec![(vec![idx as u8], Some(vec![idx as u8]))] + .into_iter() + .collect(), + }) + } + } + + impl EnclaveProtoAPI for FakeEnclave {} + + fn mk_req( + unit_id: &str, + client_id: &str, + prev_height: Option, + prev_state_id: Option<&[u8]>, + ) -> SpeculativeUpdateClientRequest { + SpeculativeUpdateClientRequest { + unit_id: unit_id.to_string(), + update: MsgUpdateClient { + client_id: client_id.to_string(), + header: Some(Any { + type_url: "/ibc.mock.Header".to_string(), + value: vec![1], + }), + ..Default::default() + }, + base_state: ExplicitStateRef { + prev_height, + prev_state_id: prev_state_id.map(|v| v.to_vec()), + client_state: None, + consensus_state: None, + }, + } + } + + fn with_explicit_base_state_payload( + mut req: SpeculativeUpdateClientRequest, + ) -> SpeculativeUpdateClientRequest { + req.base_state.client_state = Some( + Any { + type_url: "/ibc.mock.ClientState".to_string(), + value: vec![1], + } + .into(), + ); + req.base_state.consensus_state = Some( + Any { + type_url: "/ibc.mock.ConsensusState".to_string(), + value: vec![2], + } + .into(), + ); + req + } + + fn mk_result( + prev_height: Option, + prev_state_id: Option<&[u8]>, + post_height: Height, + post_state_id: &[u8], + ) -> SpeculativeUpdateClientResult { + SpeculativeUpdateClientResult { + response: MsgUpdateClientResponse::default(), + write_set: WriteSet::default(), + base_state: ExplicitStateRef { + prev_height, + prev_state_id: prev_state_id.map(|v| v.to_vec()), + client_state: None, + consensus_state: None, + }, + observed_transition: ObservedStateTransition { + prev_height, + prev_state_id: prev_state_id.map(|v| v.to_vec()), + post_height, + post_state_id: post_state_id.to_vec(), + }, + } + } + + #[test] + fn validates_linear_state_transitions() { + let requests = vec![ + mk_req("unit-0000", "client", None, None), + mk_req( + "unit-0001", + "client", + Some(Height::new(0, 11)), + Some(b"post-0"), + ), + ]; + let results = vec![ + mk_result(None, None, Height::new(0, 11), b"post-0"), + mk_result( + Some(Height::new(0, 11)), + Some(b"post-0"), + Height::new(0, 12), + b"post-1", + ), + ]; + + assert!(validate_linear_transitions(&requests, &results).is_ok()); + } + + #[test] + fn validates_base_state_prev_height_only_when_provided() { + let mut result = mk_result( + Some(Height::new(0, 11)), + None, + Height::new(0, 12), + b"post-1", + ); + result.base_state.prev_height = None; + + result + .validate_base_state() + .expect("missing prev_height should accept observed height"); + } + + #[test] + fn rejects_base_state_prev_height_mismatch_when_provided() { + let mut result = mk_result( + Some(Height::new(0, 11)), + None, + Height::new(0, 12), + b"post-1", + ); + result.base_state.prev_height = Some(Height::new(0, 10)); + + let err = result.validate_base_state().unwrap_err(); + assert!(err.to_string().contains("base prev_height mismatch")); + } + + #[test] + fn rejects_linear_state_mismatch() { + let requests = vec![ + mk_req("unit-0000", "client", None, None), + mk_req( + "unit-0001", + "client", + Some(Height::new(0, 11)), + Some(b"wrong"), + ), + ]; + let results = vec![ + mk_result(None, None, Height::new(0, 11), b"post-0"), + mk_result( + Some(Height::new(0, 11)), + Some(b"wrong"), + Height::new(0, 12), + b"post-1", + ), + ]; + + let err = validate_linear_transitions(&requests, &results).unwrap_err(); + assert_eq!( + err.kind, + SpeculativeBatchFailureKind::DependencyStateMismatch + ); + assert_eq!(err.unit_id.as_deref(), Some("unit-0001")); + } + + #[test] + fn replaces_explicit_base_state_metadata_when_rebasing_previous_payloads() { + let req = mk_req( + "unit-0001", + "client", + Some(Height::new(0, 10)), + Some(b"stale"), + ); + let previous = DependencyRebaseState { + observed_transition: ObservedStateTransition { + prev_height: None, + prev_state_id: None, + post_height: Height::new(0, 11), + post_state_id: b"post-0".to_vec(), + }, + client_state: None, + consensus_state: None, + }; + + let rebased = rebase_speculative_request(req, &previous); + + assert_eq!(rebased.base_state.prev_height, Some(Height::new(0, 11))); + assert_eq!( + rebased.base_state.prev_state_id.as_deref(), + Some(b"post-0".as_slice()) + ); + } + + #[test] + fn fills_missing_base_state_metadata_from_previous_post_state() { + let req = mk_req("unit-0001", "client", None, None); + let previous = DependencyRebaseState { + observed_transition: ObservedStateTransition { + prev_height: None, + prev_state_id: None, + post_height: Height::new(0, 11), + post_state_id: b"post-0".to_vec(), + }, + client_state: None, + consensus_state: None, + }; + + let rebased = rebase_speculative_request(req, &previous); + + assert_eq!(rebased.base_state.prev_height, Some(Height::new(0, 11))); + assert_eq!( + rebased.base_state.prev_state_id.as_deref(), + Some(b"post-0".as_slice()) + ); + } + + #[test] + fn seeds_previous_payloads_even_when_explicit_base_state_is_complete() { + let req = with_explicit_base_state_payload(mk_req( + "unit-0001", + "client", + Some(Height::new(0, 11)), + Some(b"post-0"), + )); + let previous = DependencyRebaseState { + observed_transition: ObservedStateTransition { + prev_height: None, + prev_state_id: None, + post_height: Height::new(0, 11), + post_state_id: b"post-0".to_vec(), + }, + client_state: Some( + Any { + type_url: "/ibc.mock.ClientState".to_string(), + value: vec![3], + } + .into(), + ), + consensus_state: Some( + Any { + type_url: "/ibc.mock.ConsensusState".to_string(), + value: vec![4], + } + .into(), + ), + }; + + let rebased = rebase_speculative_request(req, &previous); + + assert_eq!(rebased.base_state.prev_height, Some(Height::new(0, 11))); + assert_eq!( + rebased.base_state.prev_state_id.as_deref(), + Some(b"post-0".as_slice()) + ); + assert!(rebased.base_state.client_state.is_some()); + assert!(rebased.base_state.consensus_state.is_some()); + } + + #[test] + fn extracts_rebase_payloads_from_bincode_write_set() { + let client_id = "07-tendermint-0"; + let height = Height::new(0, 11); + let client_state = Any { + type_url: "/ibc.mock.ClientState".to_string(), + value: vec![1, 2, 3], + }; + let consensus_state = Any { + type_url: "/ibc.mock.ConsensusState".to_string(), + value: vec![4, 5, 6], + }; + let client_state_key = store_key::client_state_bytes(client_id); + let consensus_state_key = store_key::consensus_state_bytes(client_id, &height); + let mut write_set = WriteSet::default(); + write_set.insert( + client_state_key, + Some( + bincode::serde::encode_to_vec(&client_state, bincode::config::standard()) + .expect("encode client state"), + ), + ); + write_set.insert( + consensus_state_key, + Some( + bincode::serde::encode_to_vec(&consensus_state, bincode::config::standard()) + .expect("encode consensus state"), + ), + ); + + assert_eq!( + extract_client_state_from_write_set(client_id, &write_set), + Some(client_state.into()) + ); + assert_eq!( + extract_consensus_state_from_write_set(client_id, height, &write_set), + Some(consensus_state.into()) + ); + } + + #[test] + fn ignores_missing_or_malformed_rebase_payloads_from_write_set() { + let client_id = "07-tendermint-0"; + let height = Height::new(0, 11); + let client_state_key = store_key::client_state_bytes(client_id); + let consensus_state_key = store_key::consensus_state_bytes(client_id, &height); + let mut write_set = WriteSet::default(); + write_set.insert(client_state_key, Some(b"not-bincode-any".to_vec())); + write_set.insert(consensus_state_key, None); + + assert_eq!( + extract_client_state_from_write_set(client_id, &write_set), + None + ); + assert_eq!( + extract_consensus_state_from_write_set(client_id, height, &write_set), + None + ); + } + + #[test] + fn streaming_speculative_batch_executes_before_input_closes() { + let client_id = "07-tendermint-0"; + let enclave = FakeEnclave::new(Duration::from_millis(100)); + let app = AppService::::new("test-home", enclave); + let service = SpeculativeService::new(2); + let (tx, rx) = std::sync::mpsc::sync_channel(2); + let worker_service = service.clone(); + let worker_app = app.clone(); + let client_id_for_worker = client_id.to_string(); + let handle = thread::spawn(move || { + worker_service.execute_serialized_speculative_update_client_stream( + &worker_app, + client_id_for_worker, + rx, + ) + }); + + tx.send(SpeculativeUpdateClientRequest { + unit_id: "unit-0000".to_string(), + update: MsgUpdateClient { + client_id: client_id.to_string(), + signer: vec![0; 20], + header: Some(Any { + type_url: "/ibc.mock.Header".to_string(), + value: vec![1], + }), + ..Default::default() + }, + base_state: ExplicitStateRef { + prev_height: None, + prev_state_id: None, + client_state: None, + consensus_state: None, + }, + }) + .expect("send first unit"); + + for _ in 0..100 { + if app.enclave.observed_max_in_flight() >= 1 { + break; + } + thread::sleep(Duration::from_millis(5)); + } + assert!( + app.enclave.observed_max_in_flight() >= 1, + "expected first unit to start before input stream closes" + ); + + tx.send(SpeculativeUpdateClientRequest { + unit_id: "unit-0001".to_string(), + update: MsgUpdateClient { + client_id: client_id.to_string(), + signer: { + let mut signer = vec![0; 20]; + signer[19] = 1; + signer + }, + header: Some(Any { + type_url: "/ibc.mock.Header".to_string(), + value: vec![2], + }), + ..Default::default() + }, + base_state: ExplicitStateRef { + prev_height: None, + prev_state_id: None, + client_state: None, + consensus_state: None, + }, + }) + .expect("send second unit"); + drop(tx); + + let result = handle + .join() + .expect("streaming worker thread") + .expect("streaming speculative batch"); + assert_eq!(result.units.len(), 2); + assert_eq!(app.enclave.observed_max_in_flight(), 1); + } + + #[test] + fn streaming_speculative_batch_parallelizes_complete_base_state_units() { + let client_id = "07-tendermint-0"; + let enclave = FakeEnclave::new(Duration::from_millis(100)); + let app = AppService::::new("test-home", enclave); + let service = SpeculativeService::new(3); + let (tx, rx) = std::sync::mpsc::sync_channel(3); + let worker_service = service.clone(); + let worker_app = app.clone(); + let client_id_for_worker = client_id.to_string(); + let handle = thread::spawn(move || { + worker_service.execute_serialized_speculative_update_client_stream( + &worker_app, + client_id_for_worker, + rx, + ) + }); + + let mut requests = vec![ + with_explicit_base_state_payload(mk_req("unit-0000", client_id, None, None)), + with_explicit_base_state_payload(mk_req( + "unit-0001", + client_id, + Some(Height::new(0, 11)), + None, + )), + with_explicit_base_state_payload(mk_req( + "unit-0002", + client_id, + Some(Height::new(0, 12)), + None, + )), + ]; + for (i, req) in requests.iter_mut().enumerate() { + req.update.signer = { + let mut signer = vec![0; 20]; + signer[19] = i as u8; + signer + }; + } + for req in requests { + tx.send(req).expect("send unit"); + } + drop(tx); + + let result = handle + .join() + .expect("streaming worker thread") + .expect("streaming speculative batch"); + assert_eq!(result.units.len(), 3); + assert!( + app.enclave.observed_max_in_flight() >= 2, + "expected complete base-state units to run concurrently, saw {}", + app.enclave.observed_max_in_flight() + ); + } +} diff --git a/modules/service/src/speculative/stream.rs b/modules/service/src/speculative/stream.rs new file mode 100644 index 00000000..e61b5f8a --- /dev/null +++ b/modules/service/src/speculative/stream.rs @@ -0,0 +1,638 @@ +use crate::{ + ExplicitStateRef, ObservedStateTransition, SpeculativeUpdateClientRequest, + StitchedUpdateClientBatchResult, StitchedUpdateClientResult, + MAX_SPECULATIVE_BATCH_HEADER_BYTES, MAX_SPECULATIVE_UNIT_HEADER_BYTES, +}; +#[cfg(test)] +use crate::{SpeculativeUpdateClientBatch, MAX_SPECULATIVE_BATCH_UNITS}; +use lcp_proto::google::protobuf::Any; +use lcp_proto::lcp::service::elc::v1::{ + msg_speculative_update_client_batch_stream_chunk::Chunk as BatchChunk, + ExecuteSpeculativeUpdateClientBatchResponse, ExplicitStateRef as ProtoExplicitStateRef, + MsgSpeculativeUpdateClientBatchStreamChunk, MsgUpdateClient, + ObservedStateTransition as ProtoObservedStateTransition, + SpeculativeUpdateClientBatchStreamInit, SpeculativeUpdateClientUnitHeaderChunk, + SpeculativeUpdateClientUnitInit, + StitchedSpeculativeUpdateClientUnitResult as ProtoStitchedSpeculativeUpdateClientUnitResult, +}; +use lcp_types::Height; +use log::info; +use sha2::Digest; +use std::collections::HashSet; +use tonic::{Status, Streaming}; + +pub(crate) const MAX_SPECULATIVE_BATCH_HEADER_CHUNK_BYTES: usize = 4 * 1024 * 1024; + +fn sha256_hex(bytes: &[u8]) -> String { + hex::encode(sha2::Sha256::digest(bytes)) +} + +#[derive(Debug)] +#[cfg(test)] +struct DecodedSpeculativeBatchRequest { + client_id: String, + units: Vec, +} + +struct OpenSpeculativeUnit { + init: SpeculativeUpdateClientUnitInit, + header_bytes: Vec, +} + +pub(crate) struct SpeculativeBatchStreamDecoder { + client_id: String, + #[cfg(test)] + units: Vec, + open_unit: Option, + seen_unit_ids: HashSet, + closed: bool, + total_header_bytes: usize, +} + +impl SpeculativeBatchStreamDecoder { + pub(crate) fn new(client_id: String) -> Self { + Self { + client_id, + #[cfg(test)] + units: Vec::new(), + open_unit: None, + seen_unit_ids: HashSet::new(), + closed: false, + total_header_bytes: 0, + } + } + + #[allow(clippy::result_large_err)] + pub(crate) fn push_chunk( + &mut self, + chunk: Option, + ) -> Result, Status> { + if self.closed { + return Err(Status::invalid_argument( + "speculative batch stream received chunk after batch_end", + )); + } + match chunk { + Some(BatchChunk::Init(_)) => Err(Status::invalid_argument( + "Init must only appear as the first message", + )), + Some(BatchChunk::UnitInit(unit_init)) => { + if self.open_unit.is_some() { + return Err(Status::invalid_argument( + "speculative unit_init received before previous unit_end", + )); + } + if self.seen_unit_ids.contains(&unit_init.unit_id) { + return Err(Status::invalid_argument(format!( + "duplicate speculative unit_id: {}", + unit_init.unit_id + ))); + } + validate_speculative_unit_init(&unit_init)?; + self.open_unit = Some(OpenSpeculativeUnit { + init: unit_init, + header_bytes: Vec::new(), + }); + Ok(None) + } + Some(BatchChunk::UnitHeaderChunk(header_chunk)) => { + append_speculative_unit_header_chunk( + &mut self.open_unit, + header_chunk, + &mut self.total_header_bytes, + )?; + Ok(None) + } + Some(BatchChunk::UnitEnd(unit_end)) => { + let unit = close_speculative_unit( + &self.client_id, + self.open_unit.take(), + unit_end.unit_id, + )?; + if !self.seen_unit_ids.insert(unit.unit_id.clone()) { + return Err(Status::invalid_argument(format!( + "duplicate speculative unit_id: {}", + unit.unit_id + ))); + } + #[cfg(test)] + self.units.push(unit.clone()); + Ok(Some(unit)) + } + Some(BatchChunk::BatchEnd(_)) => { + if self.open_unit.is_some() { + return Err(Status::invalid_argument( + "speculative batch_end received while chunked unit is open", + )); + } + self.closed = true; + Ok(None) + } + None => Err(Status::invalid_argument("received empty chunk message")), + } + } + + #[allow(clippy::result_large_err)] + pub(crate) fn finish(&self) -> Result<(), Status> { + if self.open_unit.is_some() { + return Err(Status::invalid_argument( + "speculative batch stream ended while chunked unit is open", + )); + } + if !self.closed { + return Err(Status::invalid_argument( + "speculative batch stream ended without batch_end", + )); + } + Ok(()) + } +} + +pub(crate) async fn decode_speculative_batch_stream_init( + stream: &mut Streaming, +) -> Result { + match stream.message().await? { + Some(chunk) => match chunk.chunk { + Some(BatchChunk::Init(init)) => Ok(init), + _ => Err(Status::invalid_argument( + "first message must be of type Init", + )), + }, + None => Err(Status::invalid_argument( + "expected Init message as the first message", + )), + } +} + +#[allow(clippy::result_large_err)] +fn validate_speculative_unit_init( + unit_init: &SpeculativeUpdateClientUnitInit, +) -> Result<(), Status> { + if unit_init.unit_id.is_empty() { + return Err(Status::invalid_argument( + "speculative unit_init requires unit_id", + )); + } + if unit_init.type_url.is_empty() { + return Err(Status::invalid_argument( + "speculative unit_init requires type_url", + )); + } + if unit_init.base_state.is_none() { + return Err(Status::invalid_argument( + "speculative unit_init requires base_state", + )); + } + Ok(()) +} + +#[allow(clippy::result_large_err)] +fn append_speculative_unit_header_chunk( + open_unit: &mut Option, + header_chunk: SpeculativeUpdateClientUnitHeaderChunk, + total_header_bytes: &mut usize, +) -> Result<(), Status> { + if header_chunk.data.is_empty() { + return Err(Status::invalid_argument( + "speculative unit_header_chunk data must not be empty", + )); + } + if header_chunk.data.len() > MAX_SPECULATIVE_BATCH_HEADER_CHUNK_BYTES { + return Err(Status::resource_exhausted(format!( + "speculative unit_header_chunk too large: bytes={} max={}", + header_chunk.data.len(), + MAX_SPECULATIVE_BATCH_HEADER_CHUNK_BYTES + ))); + } + + let Some(open) = open_unit.as_mut() else { + return Err(Status::invalid_argument( + "speculative unit_header_chunk received before unit_init", + )); + }; + if header_chunk.unit_id != open.init.unit_id { + return Err(Status::invalid_argument(format!( + "speculative unit_header_chunk unit_id mismatch: open={} chunk={}", + open.init.unit_id, header_chunk.unit_id + ))); + } + + let chunk_len = header_chunk.data.len(); + open.header_bytes.extend(header_chunk.data); + *total_header_bytes += chunk_len; + validate_speculative_unit_header_payload_len(&open.init.unit_id, open.header_bytes.len())?; + if *total_header_bytes > MAX_SPECULATIVE_BATCH_HEADER_BYTES { + return Err(Status::resource_exhausted(format!( + "speculative batch header payload too large: bytes={} max={}", + *total_header_bytes, MAX_SPECULATIVE_BATCH_HEADER_BYTES + ))); + } + Ok(()) +} + +#[allow(clippy::result_large_err)] +fn validate_speculative_unit_header_payload_len( + unit_id: &str, + header_bytes: usize, +) -> Result<(), Status> { + if header_bytes > MAX_SPECULATIVE_UNIT_HEADER_BYTES { + return Err(Status::resource_exhausted(format!( + "speculative unit header payload too large: unit_id={} bytes={} max={}", + unit_id, header_bytes, MAX_SPECULATIVE_UNIT_HEADER_BYTES + ))); + } + Ok(()) +} + +#[allow(clippy::result_large_err)] +fn close_speculative_unit( + client_id: &str, + open_unit: Option, + unit_id: String, +) -> Result { + let Some(open) = open_unit else { + return Err(Status::invalid_argument( + "speculative unit_end received before unit_init", + )); + }; + if unit_id != open.init.unit_id { + return Err(Status::invalid_argument(format!( + "speculative unit_end unit_id mismatch: open={} end={}", + open.init.unit_id, unit_id + ))); + } + if open.header_bytes.is_empty() { + return Err(Status::invalid_argument(format!( + "speculative unit header is empty: unit_id={}", + open.init.unit_id + ))); + } + info!( + "received speculative update client unit: client_id={} unit_id={} header_bytes={} header_sha256={}", + client_id, + open.init.unit_id, + open.header_bytes.len(), + sha256_hex(&open.header_bytes) + ); + + Ok(SpeculativeUpdateClientRequest { + unit_id: open.init.unit_id, + update: MsgUpdateClient { + client_id: client_id.to_string(), + header: Some(Any { + type_url: open.init.type_url, + value: open.header_bytes, + }), + include_state: open.init.include_state, + signer: open.init.signer, + }, + base_state: decode_explicit_state_ref(open.init.base_state)?, + }) +} + +#[allow(clippy::result_large_err)] +#[cfg(test)] +fn decode_speculative_batch( + request: DecodedSpeculativeBatchRequest, +) -> Result { + validate_speculative_batch_limits(&request)?; + Ok(SpeculativeUpdateClientBatch { + client_id: request.client_id, + units: request.units, + }) +} + +#[allow(clippy::result_large_err)] +#[cfg(test)] +fn validate_speculative_batch_limits( + request: &DecodedSpeculativeBatchRequest, +) -> Result<(), Status> { + if request.units.len() > MAX_SPECULATIVE_BATCH_UNITS { + return Err(Status::invalid_argument(format!( + "speculative batch too large: units={} max={}", + request.units.len(), + MAX_SPECULATIVE_BATCH_UNITS + ))); + } + for unit in &request.units { + let header_bytes = unit + .update + .header + .as_ref() + .map(|header| header.value.len()) + .unwrap_or_default(); + validate_speculative_unit_header_payload_len(&unit.unit_id, header_bytes)?; + } + Ok(()) +} + +#[allow(clippy::result_large_err)] +fn decode_explicit_state_ref( + base_state: Option, +) -> Result { + let base_state = + base_state.ok_or_else(|| Status::invalid_argument("missing speculative base_state"))?; + Ok(ExplicitStateRef { + prev_height: base_state.prev_height.map(Height::from), + prev_state_id: if base_state.prev_state_id.is_empty() { + None + } else { + Some(base_state.prev_state_id) + }, + client_state: base_state.client_state.map(Into::into), + consensus_state: base_state.consensus_state.map(Into::into), + }) +} + +pub(crate) fn encode_stitched_batch_result( + result: StitchedUpdateClientBatchResult, +) -> ExecuteSpeculativeUpdateClientBatchResponse { + ExecuteSpeculativeUpdateClientBatchResponse { + client_id: result.client_id, + units: result + .units + .into_iter() + .map(encode_stitched_unit_result) + .collect(), + } +} + +fn encode_stitched_unit_result( + result: StitchedUpdateClientResult, +) -> ProtoStitchedSpeculativeUpdateClientUnitResult { + ProtoStitchedSpeculativeUpdateClientUnitResult { + response: Some(result.response), + observed_transition: Some(encode_observed_transition(result.observed_transition)), + } +} + +fn encode_observed_transition(transition: ObservedStateTransition) -> ProtoObservedStateTransition { + ProtoObservedStateTransition { + prev_height: transition.prev_height.map(Into::into), + prev_state_id: transition.prev_state_id.unwrap_or_default(), + post_height: Some(transition.post_height.into()), + post_state_id: transition.post_state_id, + } +} + +#[cfg(test)] +mod tests { + use super::{ + decode_speculative_batch, validate_speculative_unit_header_payload_len, + DecodedSpeculativeBatchRequest, SpeculativeBatchStreamDecoder, + MAX_SPECULATIVE_BATCH_HEADER_CHUNK_BYTES, + }; + use crate::{ + ExplicitStateRef, SpeculativeUpdateClientRequest, MAX_SPECULATIVE_BATCH_UNITS, + MAX_SPECULATIVE_UNIT_HEADER_BYTES, + }; + use lcp_proto::google::protobuf::Any; + use lcp_proto::lcp::service::elc::v1::{ + msg_speculative_update_client_batch_stream_chunk::Chunk as BatchChunk, + ExplicitStateRef as ProtoExplicitStateRef, MsgUpdateClient, + SpeculativeUpdateClientBatchEnd, SpeculativeUpdateClientBatchStreamInit, + SpeculativeUpdateClientUnitEnd, SpeculativeUpdateClientUnitHeaderChunk, + SpeculativeUpdateClientUnitInit, + }; + use tonic::Code; + + fn make_unit(unit_id: usize, header_len: usize) -> SpeculativeUpdateClientRequest { + SpeculativeUpdateClientRequest { + unit_id: format!("unit-{unit_id:04}"), + update: MsgUpdateClient { + client_id: "client-0".to_string(), + header: Some(Any { + type_url: "/test.Header".to_string(), + value: vec![0u8; header_len], + }), + include_state: false, + signer: Vec::new(), + }, + base_state: ExplicitStateRef { + prev_height: None, + prev_state_id: None, + client_state: None, + consensus_state: None, + }, + } + } + + fn make_unit_init(unit_id: &str) -> SpeculativeUpdateClientUnitInit { + SpeculativeUpdateClientUnitInit { + unit_id: unit_id.to_string(), + type_url: "/test.Header".to_string(), + include_state: false, + signer: Vec::new(), + base_state: Some(ProtoExplicitStateRef { + prev_height: None, + prev_state_id: Vec::new(), + client_state: None, + consensus_state: None, + }), + } + } + + #[allow(clippy::result_large_err)] + fn decode_stream_chunks( + chunks: impl IntoIterator, + ) -> Result { + let mut decoder = SpeculativeBatchStreamDecoder::new("client-0".to_string()); + for chunk in chunks { + decoder.push_chunk(Some(chunk))?; + } + decoder.finish()?; + Ok(DecodedSpeculativeBatchRequest { + client_id: decoder.client_id, + units: decoder.units, + }) + } + + fn assert_invalid_argument_contains(err: tonic::Status, expected_message: &str) { + assert_eq!(err.code(), Code::InvalidArgument); + assert!( + err.message().contains(expected_message), + "unexpected error message: {}", + err.message() + ); + } + + fn assert_resource_exhausted_contains(err: tonic::Status, expected_message: &str) { + assert_eq!(err.code(), Code::ResourceExhausted); + assert!( + err.message().contains(expected_message), + "unexpected error message: {}", + err.message() + ); + } + + #[test] + fn decode_speculative_batch_rejects_too_many_units() { + let request = DecodedSpeculativeBatchRequest { + client_id: "client-0".to_string(), + units: (0..=MAX_SPECULATIVE_BATCH_UNITS) + .map(|i| make_unit(i, 1)) + .collect(), + }; + let err = decode_speculative_batch(request).unwrap_err(); + assert!(err.message().contains("speculative batch too large")); + } + + #[test] + fn validate_speculative_unit_header_payload_len_rejects_excessive_payload() { + let err = validate_speculative_unit_header_payload_len( + "unit-0000", + MAX_SPECULATIVE_UNIT_HEADER_BYTES + 1, + ) + .unwrap_err(); + assert_resource_exhausted_contains(err, "speculative unit header payload too large"); + } + + #[test] + fn decode_speculative_batch_stream_chunks_decodes_units() { + let request = decode_stream_chunks([ + BatchChunk::UnitInit(make_unit_init("unit-0000")), + BatchChunk::UnitHeaderChunk(SpeculativeUpdateClientUnitHeaderChunk { + unit_id: "unit-0000".to_string(), + data: b"abc".to_vec(), + }), + BatchChunk::UnitEnd(SpeculativeUpdateClientUnitEnd { + unit_id: "unit-0000".to_string(), + }), + BatchChunk::BatchEnd(SpeculativeUpdateClientBatchEnd {}), + ]) + .unwrap(); + + assert_eq!(request.units.len(), 1); + assert_eq!(request.units[0].unit_id, "unit-0000"); + assert_eq!( + request.units[0].update.header.as_ref().unwrap().value, + b"abc" + ); + } + + #[test] + fn decode_speculative_batch_stream_chunks_rejects_second_init() { + let err = + decode_stream_chunks([BatchChunk::Init(SpeculativeUpdateClientBatchStreamInit { + client_id: "client-0".to_string(), + })]) + .unwrap_err(); + assert_invalid_argument_contains(err, "Init must only appear"); + } + + #[test] + fn decode_speculative_batch_stream_chunks_rejects_nested_unit_init() { + let err = decode_stream_chunks([ + BatchChunk::UnitInit(make_unit_init("unit-0000")), + BatchChunk::UnitInit(make_unit_init("unit-0001")), + ]) + .unwrap_err(); + assert_invalid_argument_contains(err, "unit_init received before previous unit_end"); + } + + #[test] + fn decode_speculative_batch_stream_chunks_rejects_header_chunk_before_unit_init() { + let err = decode_stream_chunks([BatchChunk::UnitHeaderChunk( + SpeculativeUpdateClientUnitHeaderChunk { + unit_id: "unit-0000".to_string(), + data: b"abc".to_vec(), + }, + )]) + .unwrap_err(); + assert_invalid_argument_contains(err, "unit_header_chunk received before unit_init"); + } + + #[test] + fn decode_speculative_batch_stream_chunks_rejects_chunk_unit_id_mismatch() { + let err = decode_stream_chunks([ + BatchChunk::UnitInit(make_unit_init("unit-0000")), + BatchChunk::UnitHeaderChunk(SpeculativeUpdateClientUnitHeaderChunk { + unit_id: "unit-0001".to_string(), + data: b"abc".to_vec(), + }), + ]) + .unwrap_err(); + assert_invalid_argument_contains(err, "unit_header_chunk unit_id mismatch"); + } + + #[test] + fn decode_speculative_batch_stream_chunks_rejects_end_unit_id_mismatch() { + let err = decode_stream_chunks([ + BatchChunk::UnitInit(make_unit_init("unit-0000")), + BatchChunk::UnitHeaderChunk(SpeculativeUpdateClientUnitHeaderChunk { + unit_id: "unit-0000".to_string(), + data: b"abc".to_vec(), + }), + BatchChunk::UnitEnd(SpeculativeUpdateClientUnitEnd { + unit_id: "unit-0001".to_string(), + }), + ]) + .unwrap_err(); + assert_invalid_argument_contains(err, "unit_end unit_id mismatch"); + } + + #[test] + fn decode_speculative_batch_stream_chunks_rejects_oversized_chunk() { + let err = decode_stream_chunks([ + BatchChunk::UnitInit(make_unit_init("unit-0000")), + BatchChunk::UnitHeaderChunk(SpeculativeUpdateClientUnitHeaderChunk { + unit_id: "unit-0000".to_string(), + data: vec![0u8; MAX_SPECULATIVE_BATCH_HEADER_CHUNK_BYTES + 1], + }), + ]) + .unwrap_err(); + assert_resource_exhausted_contains(err, "unit_header_chunk too large"); + } + + #[test] + fn decode_speculative_batch_stream_chunks_rejects_eof_with_open_unit() { + let err = + decode_stream_chunks([BatchChunk::UnitInit(make_unit_init("unit-0000"))]).unwrap_err(); + assert_invalid_argument_contains(err, "stream ended while chunked unit is open"); + } + + #[test] + fn decode_speculative_batch_stream_chunks_rejects_eof_without_batch_end() { + let err = decode_stream_chunks([ + BatchChunk::UnitInit(make_unit_init("unit-0000")), + BatchChunk::UnitHeaderChunk(SpeculativeUpdateClientUnitHeaderChunk { + unit_id: "unit-0000".to_string(), + data: b"abc".to_vec(), + }), + BatchChunk::UnitEnd(SpeculativeUpdateClientUnitEnd { + unit_id: "unit-0000".to_string(), + }), + ]) + .unwrap_err(); + assert_invalid_argument_contains(err, "stream ended without batch_end"); + } + + #[test] + fn decode_speculative_batch_stream_chunks_rejects_empty_header() { + let err = decode_stream_chunks([ + BatchChunk::UnitInit(make_unit_init("unit-0000")), + BatchChunk::UnitEnd(SpeculativeUpdateClientUnitEnd { + unit_id: "unit-0000".to_string(), + }), + ]) + .unwrap_err(); + assert_invalid_argument_contains(err, "speculative unit header is empty"); + } + + #[test] + fn decode_speculative_batch_stream_chunks_rejects_duplicate_unit_id() { + let err = decode_stream_chunks([ + BatchChunk::UnitInit(make_unit_init("unit-0000")), + BatchChunk::UnitHeaderChunk(SpeculativeUpdateClientUnitHeaderChunk { + unit_id: "unit-0000".to_string(), + data: b"abc".to_vec(), + }), + BatchChunk::UnitEnd(SpeculativeUpdateClientUnitEnd { + unit_id: "unit-0000".to_string(), + }), + BatchChunk::UnitInit(make_unit_init("unit-0000")), + ]) + .unwrap_err(); + assert_invalid_argument_contains(err, "duplicate speculative unit_id"); + } +} diff --git a/modules/service/src/speculative/types.rs b/modules/service/src/speculative/types.rs new file mode 100644 index 00000000..02f6c348 --- /dev/null +++ b/modules/service/src/speculative/types.rs @@ -0,0 +1,117 @@ +use lcp_proto::lcp::service::elc::v1::{MsgUpdateClient, MsgUpdateClientResponse}; +use lcp_types::{Any, Height}; +use serde::{Deserialize, Serialize}; +use store::WriteSet; + +pub const MAX_SPECULATIVE_BATCH_UNITS: usize = 256; +pub const MAX_SPECULATIVE_BATCH_HEADER_BYTES: usize = 512 * 1024 * 1024; +pub const MAX_SPECULATIVE_UNIT_HEADER_BYTES: usize = 256 * 1024 * 1024; + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ExplicitStateRef { + pub prev_height: Option, + pub prev_state_id: Option>, + pub client_state: Option, + pub consensus_state: Option, +} + +impl ExplicitStateRef { + pub(crate) fn has_complete_base_state_payload(&self) -> bool { + self.prev_height.is_some() && self.client_state.is_some() && self.consensus_state.is_some() + } +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ObservedStateTransition { + pub prev_height: Option, + pub prev_state_id: Option>, + pub post_height: Height, + pub post_state_id: Vec, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct SpeculativeUpdateClientRequest { + pub unit_id: String, + pub update: MsgUpdateClient, + pub base_state: ExplicitStateRef, +} + +impl SpeculativeUpdateClientRequest { + pub fn update_key(&self) -> String { + self.update.client_id.clone() + } +} + +#[derive(Debug, Clone)] +pub struct SpeculativeUpdateClientResult { + pub response: MsgUpdateClientResponse, + pub write_set: WriteSet, + pub base_state: ExplicitStateRef, + pub observed_transition: ObservedStateTransition, +} + +impl SpeculativeUpdateClientResult { + #[allow(clippy::result_large_err)] + pub fn validate_base_state(&self) -> core::result::Result<(), enclave_api::Error> { + if self.base_state.prev_height.is_some() + && self.base_state.prev_height != self.observed_transition.prev_height + { + return Err(enclave_api::Error::invalid_argument(format!( + "base prev_height mismatch: expected={:?} observed={:?}", + self.base_state.prev_height, self.observed_transition.prev_height + ))); + } + if self.base_state.prev_state_id.is_some() + && self.base_state.prev_state_id != self.observed_transition.prev_state_id + { + return Err(enclave_api::Error::invalid_argument(format!( + "base prev_state_id mismatch: expected={:?} observed={:?}", + self.base_state.prev_state_id, self.observed_transition.prev_state_id + ))); + } + Ok(()) + } +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct StitchedUpdateClientResult { + pub response: MsgUpdateClientResponse, + pub observed_transition: ObservedStateTransition, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct SpeculativeUpdateClientBatch { + pub client_id: String, + pub units: Vec, +} + +#[derive(Debug, Clone)] +pub struct SpeculativeUpdateClientBatchResult { + pub client_id: String, + pub units: Vec, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct StitchedUpdateClientBatchResult { + pub client_id: String, + pub units: Vec, +} + +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub enum SpeculativeBatchFailureKind { + MixedClientId, + DuplicateUnitId, + DependencyStateMismatch, + SpeculativeExecutionFailed, + ResultClientMismatch, + BatchSizeMismatch, + BaseStateMismatch, + StitchApplyFailed, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct SpeculativeBatchFailure { + pub kind: SpeculativeBatchFailureKind, + pub unit_id: Option, + pub detail: String, +} diff --git a/modules/service/src/speculative/validation.rs b/modules/service/src/speculative/validation.rs new file mode 100644 index 00000000..374c0944 --- /dev/null +++ b/modules/service/src/speculative/validation.rs @@ -0,0 +1,117 @@ +use super::types::{ + ObservedStateTransition, SpeculativeBatchFailure, SpeculativeBatchFailureKind, + SpeculativeUpdateClientRequest, SpeculativeUpdateClientResult, MAX_SPECULATIVE_BATCH_UNITS, + MAX_SPECULATIVE_UNIT_HEADER_BYTES, +}; +use std::collections::BTreeSet; + +// Validate all requests in an already materialized batch against the batch +// client ID. This is a whole-batch wrapper around per-unit admission checks and +// is mainly useful for non-streamed/test-assembled batches. +pub(crate) fn validate_linear_batch_requests( + client_id: &str, + units: &[SpeculativeUpdateClientRequest], +) -> core::result::Result<(), SpeculativeBatchFailure> { + let mut seen_unit_ids = BTreeSet::new(); + for (index, unit) in units.iter().enumerate() { + validate_next_linear_request(client_id, index, &mut seen_unit_ids, unit)?; + } + Ok(()) +} + +// Validate one request as it is admitted into a linear speculative batch. This +// check runs before speculative execution so mixed clients, duplicate unit IDs, +// oversized batches, and oversized unit headers are rejected before scheduler +// resources are spent on the unit. +pub(crate) fn validate_next_linear_request( + client_id: &str, + index: usize, + seen_unit_ids: &mut BTreeSet, + req: &SpeculativeUpdateClientRequest, +) -> core::result::Result<(), SpeculativeBatchFailure> { + if req.update_key() != client_id { + return Err(SpeculativeBatchFailure { + kind: SpeculativeBatchFailureKind::MixedClientId, + unit_id: Some(req.unit_id.clone()), + detail: format!( + "mixed client_id batch is not allowed: batch={} unit={}", + client_id, + req.update_key() + ), + }); + } + if index >= MAX_SPECULATIVE_BATCH_UNITS { + return Err(SpeculativeBatchFailure { + kind: SpeculativeBatchFailureKind::BatchSizeMismatch, + unit_id: Some(req.unit_id.clone()), + detail: format!( + "speculative batch too large: units exceed {}", + MAX_SPECULATIVE_BATCH_UNITS + ), + }); + } + if !seen_unit_ids.insert(req.unit_id.clone()) { + return Err(SpeculativeBatchFailure { + kind: SpeculativeBatchFailureKind::DuplicateUnitId, + unit_id: Some(req.unit_id.clone()), + detail: format!("duplicate unit_id in speculative batch: {}", req.unit_id), + }); + } + let header_len = req + .update + .header + .as_ref() + .map(|header| header.value.len()) + .unwrap_or_default(); + if header_len > MAX_SPECULATIVE_UNIT_HEADER_BYTES { + return Err(SpeculativeBatchFailure { + kind: SpeculativeBatchFailureKind::BatchSizeMismatch, + unit_id: Some(req.unit_id.clone()), + detail: "speculative unit header payload too large".to_string(), + }); + } + Ok(()) +} + +// Validate that the speculative execution results form a single linear chain +// in request order. This is run before stitching so a batch cannot merge write +// sets from results whose observed base/post states do not connect. +pub(crate) fn validate_linear_transitions( + requests: &[SpeculativeUpdateClientRequest], + results: &[SpeculativeUpdateClientResult], +) -> core::result::Result<(), SpeculativeBatchFailure> { + let mut previous = None; + for (req, result) in requests.iter().zip(results.iter()) { + validate_observed_transition_follows(&req.unit_id, previous, result)?; + previous = Some(&result.observed_transition); + } + Ok(()) +} + +// Ensure the current speculative result extends the previous unit's observed +// state transition. The first unit has no predecessor, but every following unit +// must report the previous unit's post state as its own base state before the +// batch can be stitched into one canonical write set. +fn validate_observed_transition_follows( + unit_id: &str, + previous: Option<&ObservedStateTransition>, + result: &SpeculativeUpdateClientResult, +) -> core::result::Result<(), SpeculativeBatchFailure> { + let Some(previous) = previous else { + return Ok(()); + }; + if result.observed_transition.prev_height != Some(previous.post_height) + || result.observed_transition.prev_state_id.as_deref() + != Some(previous.post_state_id.as_slice()) + { + return Err(SpeculativeBatchFailure { + kind: SpeculativeBatchFailureKind::DependencyStateMismatch, + unit_id: Some(unit_id.to_string()), + detail: format!( + "unit {} base state does not match previous unit post state", + unit_id + ), + }); + } + Ok(()) +} From deb1db36b0081738feabcb962f88093908c2dbb3 Mon Sep 17 00:00:00 2001 From: Kiyoshi Nakao Date: Tue, 12 May 2026 15:26:44 +0900 Subject: [PATCH 06/48] app: configure speculative service concurrency --- app/src/commands/elc.rs | 4 +-- app/src/commands/service.rs | 58 ++++++++++++++++++++++++++++++------- app/src/enclave.rs | 26 ++++++++++++++++- enclave/Enclave.config.xml | 2 +- 4 files changed, 75 insertions(+), 15 deletions(-) diff --git a/app/src/commands/elc.rs b/app/src/commands/elc.rs index 1f3b8b53..9b84e70c 100644 --- a/app/src/commands/elc.rs +++ b/app/src/commands/elc.rs @@ -5,7 +5,7 @@ use crate::{ use anyhow::Result; use clap::Parser; use enclave_api::{Enclave, EnclaveProtoAPI}; -use host::store::transaction::CommitStore; +use host::store::transaction::{CommitStore, TxAccessor}; use serde::de::DeserializeOwned; use std::path::PathBuf; @@ -47,7 +47,7 @@ impl ELCOpts { impl ELCCmd { pub fn run(&self, opts: &Opts, enclave_loader: L) -> Result<()> where - S: CommitStore, + S: CommitStore + TxAccessor + 'static, Enclave: EnclaveProtoAPI, L: EnclaveLoader, { diff --git a/app/src/commands/service.rs b/app/src/commands/service.rs index 685882f4..ed14c88e 100644 --- a/app/src/commands/service.rs +++ b/app/src/commands/service.rs @@ -2,10 +2,10 @@ use crate::enclave::EnclaveLoader; use crate::opts::{EnclaveOpts, Opts}; use anyhow::Result; use clap::Parser; -use enclave_api::{Enclave, EnclaveInfo, EnclaveProtoAPI}; -use host::store::transaction::CommitStore; +use enclave_api::{Enclave, EnclaveInfo, EnclaveProtoAPI, SpeculativeEnclaveCommandAPI}; +use host::store::transaction::{CommitStore, TxAccessor}; use log::*; -use service::{run_service, AppService}; +use service::{run_service, ElcService}; use std::sync::Arc; use tokio::runtime::Builder; @@ -28,27 +28,51 @@ pub struct Start { help = "Address of the App service" )] pub address: String, - /// Worker thread number the tokio `Runtime` will use - /// This value is recommended to be less than or equal to TCS_NUM in Enclave config. + /// Worker thread number the tokio `Runtime` will use. + /// This does not control enclave ECALL/TCS concurrency. #[clap( long = "threads", help = "Worker thread number the tokio `Runtime` will use" )] pub threads: Option, + /// Maximum concurrent enclave ECALLs across serial and speculative paths. + /// Defaults to the current Enclave.config.xml TCS budget (4). + #[clap( + long = "max-enclave-concurrency", + help = "Maximum concurrent enclave ECALLs" + )] + pub max_enclave_concurrency: Option, + /// Maximum concurrent speculative update-client requests. + /// Prefer a value less than or equal to --max-enclave-concurrency; excess + /// speculative workers will wait on the enclave ECALL gate. + #[clap( + long = "max-speculative-concurrency", + default_value_t = 1, + help = "Maximum concurrent speculative update-client requests" + )] + pub max_speculative_concurrency: usize, } impl ServiceCmd { pub fn run(&self, opts: &Opts, enclave_loader: L) -> Result<()> where - S: CommitStore + 'static, - Enclave: EnclaveProtoAPI, + S: CommitStore + TxAccessor + 'static, + Enclave: EnclaveProtoAPI + SpeculativeEnclaveCommandAPI, L: EnclaveLoader, { match self { Self::Start(cmd) => { let addr = cmd.address.parse()?; - let enclave = - enclave_loader.load(opts, cmd.enclave.path.as_ref(), cmd.enclave.is_debug())?; + let enclave_parallelism = cmd + .max_enclave_concurrency + .unwrap_or(Enclave::::DEFAULT_ECALL_CONCURRENCY) + .max(1); + let enclave = enclave_loader.load_with_ecall_concurrency( + opts, + cmd.enclave.path.as_ref(), + cmd.enclave.is_debug(), + enclave_parallelism, + )?; let metadata = enclave.metadata()?; let mrenclave = metadata.mrenclave().to_hex_string(); let mut rb = Builder::new_multi_thread(); @@ -58,9 +82,21 @@ impl ServiceCmd { &mut rb }; let rt = Arc::new(rb.enable_all().build()?); - let srv = AppService::new(opts.get_home(), enclave); + let speculative_concurrency_limit = cmd.max_speculative_concurrency.max(1); + if speculative_concurrency_limit > enclave_parallelism { + warn!( + "max-speculative-concurrency ({}) is greater than max-enclave-concurrency ({}); speculative workers above the enclave limit will wait on the ECALL gate", + speculative_concurrency_limit, + enclave_parallelism + ); + } + let srv = ElcService::new(opts.get_home(), enclave, speculative_concurrency_limit); - info!("start service: addr={addr} mrenclave={mrenclave}"); + info!( + "start service: addr={addr} mrenclave={mrenclave} speculative_concurrency_limit={} enclave_parallelism={}", + speculative_concurrency_limit, + enclave_parallelism + ); rt.block_on(async { run_service(srv, addr).await }) } } diff --git a/app/src/enclave.rs b/app/src/enclave.rs index ad18e5f0..f5c3fbc5 100644 --- a/app/src/enclave.rs +++ b/app/src/enclave.rs @@ -7,6 +7,14 @@ use std::path::PathBuf; pub trait EnclaveLoader { fn load(&self, opts: &Opts, path: Option<&PathBuf>, debug: bool) -> Result>; + + fn load_with_ecall_concurrency( + &self, + opts: &Opts, + path: Option<&PathBuf>, + debug: bool, + ecall_concurrency: usize, + ) -> Result>; } #[derive(Debug)] @@ -17,6 +25,16 @@ where Enclave: EnclaveProtoAPI, { fn load(&self, opts: &Opts, path: Option<&PathBuf>, debug: bool) -> Result> { + self.load_with_ecall_concurrency(opts, path, debug, Enclave::::DEFAULT_ECALL_CONCURRENCY) + } + + fn load_with_ecall_concurrency( + &self, + opts: &Opts, + path: Option<&PathBuf>, + debug: bool, + ecall_concurrency: usize, + ) -> Result> { let path = if let Some(path) = path { path.clone() } else { @@ -24,7 +42,13 @@ where }; let env = host::get_environment().unwrap(); let km = EnclaveKeyManager::new(&env.home)?; - match Enclave::create(&path, debug, km, env.store.clone()) { + match Enclave::create_with_ecall_concurrency( + &path, + debug, + km, + env.store.clone(), + ecall_concurrency, + ) { Ok(enclave) => Ok(enclave), Err(x) => { bail!( diff --git a/enclave/Enclave.config.xml b/enclave/Enclave.config.xml index 7c0a9e03..e49200a2 100644 --- a/enclave/Enclave.config.xml +++ b/enclave/Enclave.config.xml @@ -4,7 +4,7 @@ 0 0x40000 0x100000 - 2 + 4 0 0 0 From 4c395d84681bb285795cb8ec94327d3a6ed718f4 Mon Sep 17 00:00:00 2001 From: Kiyoshi Nakao Date: Tue, 12 May 2026 16:09:00 +0900 Subject: [PATCH 07/48] Fix speculative service formatting --- modules/service/src/speculative/service.rs | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/modules/service/src/speculative/service.rs b/modules/service/src/speculative/service.rs index e26b97ae..4918abff 100644 --- a/modules/service/src/speculative/service.rs +++ b/modules/service/src/speculative/service.rs @@ -11,9 +11,7 @@ use super::types::{ SpeculativeUpdateClientRequest, SpeculativeUpdateClientResult, StitchedUpdateClientBatchResult, StitchedUpdateClientResult, }; -use super::validation::{ - validate_linear_batch_requests, validate_linear_transitions, -}; +use super::validation::{validate_linear_batch_requests, validate_linear_transitions}; use crate::service::AppService; use commitments::ProxyMessage; use enclave_api::{ From daad0e581dae081b18ffc57d155da1273393d72a Mon Sep 17 00:00:00 2001 From: Kiyoshi Nakao Date: Thu, 14 May 2026 10:08:26 +0900 Subject: [PATCH 08/48] service: bound resident speculative headers --- modules/service/src/elc.rs | 8 +- modules/service/src/speculative/rebase.rs | 7 +- modules/service/src/speculative/scheduler.rs | 55 ++-- modules/service/src/speculative/service.rs | 114 ++++---- modules/service/src/speculative/stream.rs | 259 +++++++++++++++++-- modules/service/src/speculative/types.rs | 8 + 6 files changed, 354 insertions(+), 97 deletions(-) diff --git a/modules/service/src/elc.rs b/modules/service/src/elc.rs index 6cc69518..3037a7a8 100644 --- a/modules/service/src/elc.rs +++ b/modules/service/src/elc.rs @@ -1,8 +1,9 @@ use crate::service::{AppService, ElcService}; use crate::speculative::stream::{ decode_speculative_batch_stream_init, encode_stitched_batch_result, - SpeculativeBatchStreamDecoder, + SpeculativeBatchStreamDecoder, SpeculativeHeaderMemoryBudget, }; +use crate::MAX_SPECULATIVE_BATCH_HEADER_BYTES; use enclave_api::{EnclaveProtoAPI, SpeculativeEnclaveCommandAPI}; use lcp_proto::google::protobuf::Any; use lcp_proto::lcp::service::elc::v1::msg_update_client_stream_chunk::Chunk; @@ -127,10 +128,13 @@ where ) }); let mut decoder = SpeculativeBatchStreamDecoder::new(client_id.clone()); + let header_memory_budget = + SpeculativeHeaderMemoryBudget::new(MAX_SPECULATIVE_BATCH_HEADER_BYTES); let mut units = 0usize; while let Some(chunk_msg) = stream.message().await? { - if let Some(unit) = decoder.push_chunk(chunk_msg.chunk)? { + let header_memory = header_memory_budget.reserve_for_chunk(&chunk_msg).await?; + if let Some(unit) = decoder.push_chunk(chunk_msg.chunk, header_memory)? { units += 1; tx.send(unit).map_err(|_| { Status::aborted("speculative batch scheduler stopped before stream ended") diff --git a/modules/service/src/speculative/rebase.rs b/modules/service/src/speculative/rebase.rs index 88522b75..86bab010 100644 --- a/modules/service/src/speculative/rebase.rs +++ b/modules/service/src/speculative/rebase.rs @@ -13,10 +13,10 @@ pub(crate) struct DependencyRebaseState { pub(crate) consensus_state: Option, } -pub(crate) fn rebase_speculative_request( - mut req: SpeculativeUpdateClientRequest, +pub(crate) fn rebase_speculative_request_in_place( + req: &mut SpeculativeUpdateClientRequest, previous: &DependencyRebaseState, -) -> SpeculativeUpdateClientRequest { +) { // Always seed the base state from the previous result so the next unit // observes its predecessor's post-state and write set. req.base_state = ExplicitStateRef { @@ -25,7 +25,6 @@ pub(crate) fn rebase_speculative_request( client_state: previous.client_state.clone(), consensus_state: previous.consensus_state.clone(), }; - req } pub(crate) fn build_dependency_rebase_state( diff --git a/modules/service/src/speculative/scheduler.rs b/modules/service/src/speculative/scheduler.rs index 4134b2e9..4162c419 100644 --- a/modules/service/src/speculative/scheduler.rs +++ b/modules/service/src/speculative/scheduler.rs @@ -1,7 +1,8 @@ use super::rebase::{ - build_dependency_rebase_state, rebase_speculative_request, DependencyRebaseState, + build_dependency_rebase_state, rebase_speculative_request_in_place, DependencyRebaseState, }; use super::service::SpeculativeService; +use super::stream::ResidentSpeculativeUpdateClientRequest; use super::types::{ SpeculativeBatchFailure, SpeculativeBatchFailureKind, SpeculativeUpdateClientBatchResult, SpeculativeUpdateClientRequest, SpeculativeUpdateClientResult, @@ -37,7 +38,7 @@ pub(crate) fn execute_speculative_update_client_stream( speculative: &SpeculativeService, app: &AppService, client_id: String, - units: Receiver, + units: Receiver, ) -> core::result::Result where S: CommitStore + TxAccessor + Send + 'static, @@ -81,7 +82,10 @@ where if state.has_unresolvable_pending_work() { state.failure = Some(SpeculativeBatchFailure { kind: SpeculativeBatchFailureKind::DependencyStateMismatch, - unit_id: state.pending.front().map(|(_, req)| req.unit_id.clone()), + unit_id: state + .pending + .front() + .map(|(_, req)| req.request().unit_id.clone()), detail: "speculative stream ended with unresolved linear dependencies" .to_string(), }); @@ -153,8 +157,8 @@ struct StreamingSchedulerShared { // input order, even if worker threads finish out of order. struct StreamingSchedulerState { client_id: String, - ready: VecDeque<(usize, SpeculativeUpdateClientRequest)>, - pending: VecDeque<(usize, SpeculativeUpdateClientRequest)>, + ready: VecDeque<(usize, ResidentSpeculativeUpdateClientRequest)>, + pending: VecDeque<(usize, ResidentSpeculativeUpdateClientRequest)>, request_by_index: BTreeMap, result_by_index: BTreeMap, rebase_state_by_index: BTreeMap, @@ -194,10 +198,15 @@ impl StreamingSchedulerState { fn enqueue( &mut self, - req: SpeculativeUpdateClientRequest, + req: ResidentSpeculativeUpdateClientRequest, ) -> core::result::Result<(), SpeculativeBatchFailure> { let index = self.unit_count; - validate_next_linear_request(&self.client_id, index, &mut self.seen_unit_ids, &req)?; + validate_next_linear_request( + &self.client_id, + index, + &mut self.seen_unit_ids, + req.request(), + )?; self.unit_count += 1; self.enqueue_ready_or_pending(index, req) } @@ -205,13 +214,13 @@ impl StreamingSchedulerState { fn enqueue_ready_or_pending( &mut self, index: usize, - req: SpeculativeUpdateClientRequest, + mut req: ResidentSpeculativeUpdateClientRequest, ) -> core::result::Result<(), SpeculativeBatchFailure> { - if index == 0 || req.base_state.has_complete_base_state_payload() { + if index == 0 || req.request().base_state.has_complete_base_state_payload() { self.ready.push_back((index, req)); } else if let Some(previous) = self.rebase_state_by_index.get(&(index - 1)) { - self.ready - .push_back((index, rebase_speculative_request(req, previous))); + rebase_speculative_request_in_place(req.request_mut(), previous); + self.ready.push_back((index, req)); } else { self.pending.push_back((index, req)); } @@ -235,12 +244,12 @@ impl StreamingSchedulerState { fn promote_pending(&mut self) -> core::result::Result<(), SpeculativeBatchFailure> { let mut remaining = VecDeque::new(); - while let Some((index, req)) = self.pending.pop_front() { - if index == 0 || req.base_state.has_complete_base_state_payload() { + while let Some((index, mut req)) = self.pending.pop_front() { + if index == 0 || req.request().base_state.has_complete_base_state_payload() { self.ready.push_back((index, req)); } else if let Some(previous) = self.rebase_state_by_index.get(&(index - 1)) { - self.ready - .push_back((index, rebase_speculative_request(req, previous))); + rebase_speculative_request_in_place(req.request_mut(), previous); + self.ready.push_back((index, req)); } else { remaining.push_back((index, req)); } @@ -250,6 +259,12 @@ impl StreamingSchedulerState { } } +fn clear_request_header_payload(req: &mut SpeculativeUpdateClientRequest) { + if let Some(header) = req.update.header.as_mut() { + header.value.clear(); + } +} + fn streaming_speculative_worker( speculative: &SpeculativeService, app: &AppService, @@ -278,12 +293,12 @@ fn streaming_speculative_worker( } }; - let unit_id = req.unit_id.clone(); - let header_digest = speculative_request_header_digest(&req); + let unit_id = req.request().unit_id.clone(); + let header_digest = speculative_request_header_digest(req.request()); if let Some((header_bytes, header_sha256)) = header_digest.as_ref() { info!( "execute speculative update client unit: client_id={} unit_id={} header_bytes={} header_sha256={}", - req.update.client_id, + req.request().update.client_id, unit_id, header_bytes, header_sha256 @@ -291,7 +306,7 @@ fn streaming_speculative_worker( } let result = speculative .with_speculative_request_permit(|| { - speculative.speculative_update_client(app, req.clone()) + speculative.speculative_update_client(app, req.request().clone()) }) .map_err(|e| SpeculativeBatchFailure { kind: SpeculativeBatchFailureKind::SpeculativeExecutionFailed, @@ -309,6 +324,8 @@ fn streaming_speculative_worker( state.in_flight -= 1; match result { Ok(result) => { + let mut req = req.into_request(); + clear_request_header_payload(&mut req); if let Err(e) = state.complete_unit(index, req, result) { state.failure = Some(e); } diff --git a/modules/service/src/speculative/service.rs b/modules/service/src/speculative/service.rs index 4918abff..b328c723 100644 --- a/modules/service/src/speculative/service.rs +++ b/modules/service/src/speculative/service.rs @@ -2,9 +2,10 @@ use super::permit::{KeyLockMap, PermitGate}; #[cfg(test)] use super::rebase::{ extract_client_state_from_write_set, extract_consensus_state_from_write_set, - rebase_speculative_request, DependencyRebaseState, + rebase_speculative_request_in_place, DependencyRebaseState, }; use super::scheduler::execute_speculative_update_client_stream; +use super::stream::ResidentSpeculativeUpdateClientRequest; use super::types::{ ExplicitStateRef, ObservedStateTransition, SpeculativeBatchFailure, SpeculativeBatchFailureKind, SpeculativeUpdateClientBatch, SpeculativeUpdateClientBatchResult, @@ -165,7 +166,7 @@ impl SpeculativeService { &self, app: &AppService, client_id: String, - units: Receiver, + units: Receiver, ) -> core::result::Result where S: CommitStore + TxAccessor + Send + 'static, @@ -493,7 +494,7 @@ mod tests { #[test] fn replaces_explicit_base_state_metadata_when_rebasing_previous_payloads() { - let req = mk_req( + let mut req = mk_req( "unit-0001", "client", Some(Height::new(0, 10)), @@ -510,18 +511,18 @@ mod tests { consensus_state: None, }; - let rebased = rebase_speculative_request(req, &previous); + rebase_speculative_request_in_place(&mut req, &previous); - assert_eq!(rebased.base_state.prev_height, Some(Height::new(0, 11))); + assert_eq!(req.base_state.prev_height, Some(Height::new(0, 11))); assert_eq!( - rebased.base_state.prev_state_id.as_deref(), + req.base_state.prev_state_id.as_deref(), Some(b"post-0".as_slice()) ); } #[test] fn fills_missing_base_state_metadata_from_previous_post_state() { - let req = mk_req("unit-0001", "client", None, None); + let mut req = mk_req("unit-0001", "client", None, None); let previous = DependencyRebaseState { observed_transition: ObservedStateTransition { prev_height: None, @@ -533,18 +534,18 @@ mod tests { consensus_state: None, }; - let rebased = rebase_speculative_request(req, &previous); + rebase_speculative_request_in_place(&mut req, &previous); - assert_eq!(rebased.base_state.prev_height, Some(Height::new(0, 11))); + assert_eq!(req.base_state.prev_height, Some(Height::new(0, 11))); assert_eq!( - rebased.base_state.prev_state_id.as_deref(), + req.base_state.prev_state_id.as_deref(), Some(b"post-0".as_slice()) ); } #[test] fn seeds_previous_payloads_even_when_explicit_base_state_is_complete() { - let req = with_explicit_base_state_payload(mk_req( + let mut req = with_explicit_base_state_payload(mk_req( "unit-0001", "client", Some(Height::new(0, 11)), @@ -573,15 +574,15 @@ mod tests { ), }; - let rebased = rebase_speculative_request(req, &previous); + rebase_speculative_request_in_place(&mut req, &previous); - assert_eq!(rebased.base_state.prev_height, Some(Height::new(0, 11))); + assert_eq!(req.base_state.prev_height, Some(Height::new(0, 11))); assert_eq!( - rebased.base_state.prev_state_id.as_deref(), + req.base_state.prev_state_id.as_deref(), Some(b"post-0".as_slice()) ); - assert!(rebased.base_state.client_state.is_some()); - assert!(rebased.base_state.consensus_state.is_some()); + assert!(req.base_state.client_state.is_some()); + assert!(req.base_state.consensus_state.is_some()); } #[test] @@ -662,24 +663,26 @@ mod tests { ) }); - tx.send(SpeculativeUpdateClientRequest { - unit_id: "unit-0000".to_string(), - update: MsgUpdateClient { - client_id: client_id.to_string(), - signer: vec![0; 20], - header: Some(Any { - type_url: "/ibc.mock.Header".to_string(), - value: vec![1], - }), - ..Default::default() - }, - base_state: ExplicitStateRef { - prev_height: None, - prev_state_id: None, - client_state: None, - consensus_state: None, + tx.send(ResidentSpeculativeUpdateClientRequest::unmetered( + SpeculativeUpdateClientRequest { + unit_id: "unit-0000".to_string(), + update: MsgUpdateClient { + client_id: client_id.to_string(), + signer: vec![0; 20], + header: Some(Any { + type_url: "/ibc.mock.Header".to_string(), + value: vec![1], + }), + ..Default::default() + }, + base_state: ExplicitStateRef { + prev_height: None, + prev_state_id: None, + client_state: None, + consensus_state: None, + }, }, - }) + )) .expect("send first unit"); for _ in 0..100 { @@ -693,28 +696,30 @@ mod tests { "expected first unit to start before input stream closes" ); - tx.send(SpeculativeUpdateClientRequest { - unit_id: "unit-0001".to_string(), - update: MsgUpdateClient { - client_id: client_id.to_string(), - signer: { - let mut signer = vec![0; 20]; - signer[19] = 1; - signer + tx.send(ResidentSpeculativeUpdateClientRequest::unmetered( + SpeculativeUpdateClientRequest { + unit_id: "unit-0001".to_string(), + update: MsgUpdateClient { + client_id: client_id.to_string(), + signer: { + let mut signer = vec![0; 20]; + signer[19] = 1; + signer + }, + header: Some(Any { + type_url: "/ibc.mock.Header".to_string(), + value: vec![2], + }), + ..Default::default() + }, + base_state: ExplicitStateRef { + prev_height: None, + prev_state_id: None, + client_state: None, + consensus_state: None, }, - header: Some(Any { - type_url: "/ibc.mock.Header".to_string(), - value: vec![2], - }), - ..Default::default() - }, - base_state: ExplicitStateRef { - prev_height: None, - prev_state_id: None, - client_state: None, - consensus_state: None, }, - }) + )) .expect("send second unit"); drop(tx); @@ -767,7 +772,8 @@ mod tests { }; } for req in requests { - tx.send(req).expect("send unit"); + tx.send(ResidentSpeculativeUpdateClientRequest::unmetered(req)) + .expect("send unit"); } drop(tx); diff --git a/modules/service/src/speculative/stream.rs b/modules/service/src/speculative/stream.rs index e61b5f8a..07e24b9a 100644 --- a/modules/service/src/speculative/stream.rs +++ b/modules/service/src/speculative/stream.rs @@ -1,7 +1,6 @@ use crate::{ ExplicitStateRef, ObservedStateTransition, SpeculativeUpdateClientRequest, - StitchedUpdateClientBatchResult, StitchedUpdateClientResult, - MAX_SPECULATIVE_BATCH_HEADER_BYTES, MAX_SPECULATIVE_UNIT_HEADER_BYTES, + StitchedUpdateClientBatchResult, StitchedUpdateClientResult, MAX_SPECULATIVE_UNIT_HEADER_BYTES, }; #[cfg(test)] use crate::{SpeculativeUpdateClientBatch, MAX_SPECULATIVE_BATCH_UNITS}; @@ -19,10 +18,179 @@ use lcp_types::Height; use log::info; use sha2::Digest; use std::collections::HashSet; +use std::sync::{Arc, Condvar, Mutex}; use tonic::{Status, Streaming}; pub(crate) const MAX_SPECULATIVE_BATCH_HEADER_CHUNK_BYTES: usize = 4 * 1024 * 1024; +/// Tracks the peak resident header payload bytes for one speculative batch +/// stream. Reservations are attached to decoded units and released when those +/// units are dropped after execution; this intentionally bounds in-memory +/// pressure instead of the total bytes carried by the whole stream. +#[derive(Clone, Debug)] +pub(crate) struct SpeculativeHeaderMemoryBudget { + inner: Arc, +} + +#[derive(Debug)] +struct SpeculativeHeaderMemoryBudgetInner { + max_bytes: usize, + state: Mutex, + available: Condvar, +} + +#[derive(Debug, Default)] +struct SpeculativeHeaderMemoryBudgetState { + used_bytes: usize, +} + +impl SpeculativeHeaderMemoryBudget { + pub(crate) fn new(max_bytes: usize) -> Self { + Self { + inner: Arc::new(SpeculativeHeaderMemoryBudgetInner { + max_bytes, + state: Mutex::new(SpeculativeHeaderMemoryBudgetState::default()), + available: Condvar::new(), + }), + } + } + + #[allow(clippy::result_large_err)] + pub(crate) async fn reserve_for_chunk( + &self, + chunk: &MsgSpeculativeUpdateClientBatchStreamChunk, + ) -> Result { + let bytes = match chunk.chunk.as_ref() { + Some(BatchChunk::UnitHeaderChunk(header_chunk)) => header_chunk.data.len(), + _ => 0, + }; + if bytes == 0 { + return Ok(SpeculativeHeaderMemoryReservation::empty()); + } + + let budget = self.clone(); + tokio::task::spawn_blocking(move || budget.reserve_blocking(bytes)) + .await + .map_err(|e| { + Status::aborted(format!( + "speculative header memory budget waiter failed: {e}" + )) + })? + } + + #[allow(clippy::result_large_err)] + fn reserve_blocking(&self, bytes: usize) -> Result { + if bytes > self.inner.max_bytes { + return Err(Status::resource_exhausted(format!( + "speculative resident header payload too large: bytes={} max={}", + bytes, self.inner.max_bytes + ))); + } + + let mut state = self.inner.state.lock().unwrap(); + while state.used_bytes + bytes > self.inner.max_bytes { + state = self.inner.available.wait(state).unwrap(); + } + state.used_bytes += bytes; + Ok(SpeculativeHeaderMemoryReservation { + budget: Some(self.clone()), + bytes, + }) + } + + fn release(&self, bytes: usize) { + if bytes == 0 { + return; + } + let mut state = self.inner.state.lock().unwrap(); + state.used_bytes = state.used_bytes.saturating_sub(bytes); + self.inner.available.notify_all(); + } + + #[cfg(test)] + fn used_bytes(&self) -> usize { + self.inner.state.lock().unwrap().used_bytes + } +} + +#[derive(Debug)] +pub(crate) struct SpeculativeHeaderMemoryReservation { + budget: Option, + bytes: usize, +} + +impl SpeculativeHeaderMemoryReservation { + pub(crate) fn empty() -> Self { + Self { + budget: None, + bytes: 0, + } + } + + fn merge(&mut self, mut other: Self) { + if other.bytes == 0 { + return; + } + if self.bytes == 0 { + self.budget = other.budget.take(); + self.bytes = other.bytes; + other.bytes = 0; + return; + } + debug_assert!( + match (&self.budget, &other.budget) { + (Some(left), Some(right)) => Arc::ptr_eq(&left.inner, &right.inner), + _ => false, + }, + "cannot merge header memory reservations from different budgets" + ); + self.bytes += other.bytes; + other.bytes = 0; + } +} + +impl Drop for SpeculativeHeaderMemoryReservation { + fn drop(&mut self) { + if let Some(budget) = self.budget.take() { + budget.release(self.bytes); + } + } +} + +pub(crate) struct ResidentSpeculativeUpdateClientRequest { + request: SpeculativeUpdateClientRequest, + _header_memory: SpeculativeHeaderMemoryReservation, +} + +impl ResidentSpeculativeUpdateClientRequest { + fn new( + request: SpeculativeUpdateClientRequest, + header_memory: SpeculativeHeaderMemoryReservation, + ) -> Self { + Self { + request, + _header_memory: header_memory, + } + } + + pub(crate) fn request(&self) -> &SpeculativeUpdateClientRequest { + &self.request + } + + pub(crate) fn request_mut(&mut self) -> &mut SpeculativeUpdateClientRequest { + &mut self.request + } + + pub(crate) fn into_request(self) -> SpeculativeUpdateClientRequest { + self.request + } + + #[cfg(test)] + pub(crate) fn unmetered(request: SpeculativeUpdateClientRequest) -> Self { + Self::new(request, SpeculativeHeaderMemoryReservation::empty()) + } +} + fn sha256_hex(bytes: &[u8]) -> String { hex::encode(sha2::Sha256::digest(bytes)) } @@ -37,6 +205,7 @@ struct DecodedSpeculativeBatchRequest { struct OpenSpeculativeUnit { init: SpeculativeUpdateClientUnitInit, header_bytes: Vec, + header_memory: SpeculativeHeaderMemoryReservation, } pub(crate) struct SpeculativeBatchStreamDecoder { @@ -66,7 +235,8 @@ impl SpeculativeBatchStreamDecoder { pub(crate) fn push_chunk( &mut self, chunk: Option, - ) -> Result, Status> { + header_memory: SpeculativeHeaderMemoryReservation, + ) -> Result, Status> { if self.closed { return Err(Status::invalid_argument( "speculative batch stream received chunk after batch_end", @@ -92,6 +262,7 @@ impl SpeculativeBatchStreamDecoder { self.open_unit = Some(OpenSpeculativeUnit { init: unit_init, header_bytes: Vec::new(), + header_memory: SpeculativeHeaderMemoryReservation::empty(), }); Ok(None) } @@ -100,6 +271,7 @@ impl SpeculativeBatchStreamDecoder { &mut self.open_unit, header_chunk, &mut self.total_header_bytes, + header_memory, )?; Ok(None) } @@ -109,14 +281,14 @@ impl SpeculativeBatchStreamDecoder { self.open_unit.take(), unit_end.unit_id, )?; - if !self.seen_unit_ids.insert(unit.unit_id.clone()) { + if !self.seen_unit_ids.insert(unit.request().unit_id.clone()) { return Err(Status::invalid_argument(format!( "duplicate speculative unit_id: {}", - unit.unit_id + unit.request().unit_id ))); } #[cfg(test)] - self.units.push(unit.clone()); + self.units.push(unit.request().clone()); Ok(Some(unit)) } Some(BatchChunk::BatchEnd(_)) => { @@ -191,6 +363,7 @@ fn append_speculative_unit_header_chunk( open_unit: &mut Option, header_chunk: SpeculativeUpdateClientUnitHeaderChunk, total_header_bytes: &mut usize, + header_memory: SpeculativeHeaderMemoryReservation, ) -> Result<(), Status> { if header_chunk.data.is_empty() { return Err(Status::invalid_argument( @@ -220,13 +393,8 @@ fn append_speculative_unit_header_chunk( let chunk_len = header_chunk.data.len(); open.header_bytes.extend(header_chunk.data); *total_header_bytes += chunk_len; + open.header_memory.merge(header_memory); validate_speculative_unit_header_payload_len(&open.init.unit_id, open.header_bytes.len())?; - if *total_header_bytes > MAX_SPECULATIVE_BATCH_HEADER_BYTES { - return Err(Status::resource_exhausted(format!( - "speculative batch header payload too large: bytes={} max={}", - *total_header_bytes, MAX_SPECULATIVE_BATCH_HEADER_BYTES - ))); - } Ok(()) } @@ -249,7 +417,7 @@ fn close_speculative_unit( client_id: &str, open_unit: Option, unit_id: String, -) -> Result { +) -> Result { let Some(open) = open_unit else { return Err(Status::invalid_argument( "speculative unit_end received before unit_init", @@ -275,7 +443,7 @@ fn close_speculative_unit( sha256_hex(&open.header_bytes) ); - Ok(SpeculativeUpdateClientRequest { + let request = SpeculativeUpdateClientRequest { unit_id: open.init.unit_id, update: MsgUpdateClient { client_id: client_id.to_string(), @@ -287,7 +455,11 @@ fn close_speculative_unit( signer: open.init.signer, }, base_state: decode_explicit_state_ref(open.init.base_state)?, - }) + }; + Ok(ResidentSpeculativeUpdateClientRequest::new( + request, + open.header_memory, + )) } #[allow(clippy::result_large_err)] @@ -380,6 +552,7 @@ mod tests { use super::{ decode_speculative_batch, validate_speculative_unit_header_payload_len, DecodedSpeculativeBatchRequest, SpeculativeBatchStreamDecoder, + SpeculativeHeaderMemoryBudget, SpeculativeHeaderMemoryReservation, MAX_SPECULATIVE_BATCH_HEADER_CHUNK_BYTES, }; use crate::{ @@ -389,8 +562,8 @@ mod tests { use lcp_proto::google::protobuf::Any; use lcp_proto::lcp::service::elc::v1::{ msg_speculative_update_client_batch_stream_chunk::Chunk as BatchChunk, - ExplicitStateRef as ProtoExplicitStateRef, MsgUpdateClient, - SpeculativeUpdateClientBatchEnd, SpeculativeUpdateClientBatchStreamInit, + ExplicitStateRef as ProtoExplicitStateRef, MsgSpeculativeUpdateClientBatchStreamChunk, + MsgUpdateClient, SpeculativeUpdateClientBatchEnd, SpeculativeUpdateClientBatchStreamInit, SpeculativeUpdateClientUnitEnd, SpeculativeUpdateClientUnitHeaderChunk, SpeculativeUpdateClientUnitInit, }; @@ -438,7 +611,7 @@ mod tests { ) -> Result { let mut decoder = SpeculativeBatchStreamDecoder::new("client-0".to_string()); for chunk in chunks { - decoder.push_chunk(Some(chunk))?; + decoder.push_chunk(Some(chunk), SpeculativeHeaderMemoryReservation::empty())?; } decoder.finish()?; Ok(DecodedSpeculativeBatchRequest { @@ -465,6 +638,56 @@ mod tests { ); } + fn header_chunk_msg( + unit_id: &str, + data: Vec, + ) -> MsgSpeculativeUpdateClientBatchStreamChunk { + MsgSpeculativeUpdateClientBatchStreamChunk { + chunk: Some(BatchChunk::UnitHeaderChunk( + SpeculativeUpdateClientUnitHeaderChunk { + unit_id: unit_id.to_string(), + data, + }, + )), + } + } + + #[test] + fn header_memory_reservation_is_held_by_decoded_unit_until_drop() { + let runtime = tokio::runtime::Runtime::new().expect("tokio runtime"); + let budget = SpeculativeHeaderMemoryBudget::new(10); + let mut decoder = SpeculativeBatchStreamDecoder::new("client-0".to_string()); + + decoder + .push_chunk( + Some(BatchChunk::UnitInit(make_unit_init("unit-0000"))), + SpeculativeHeaderMemoryReservation::empty(), + ) + .expect("unit init"); + let chunk_msg = header_chunk_msg("unit-0000", b"abc".to_vec()); + let header_memory = runtime + .block_on(budget.reserve_for_chunk(&chunk_msg)) + .expect("header memory"); + assert_eq!(budget.used_bytes(), 3); + decoder + .push_chunk(chunk_msg.chunk, header_memory) + .expect("header chunk"); + assert_eq!(budget.used_bytes(), 3); + + let unit = decoder + .push_chunk( + Some(BatchChunk::UnitEnd(SpeculativeUpdateClientUnitEnd { + unit_id: "unit-0000".to_string(), + })), + SpeculativeHeaderMemoryReservation::empty(), + ) + .expect("unit end") + .expect("decoded unit"); + assert_eq!(budget.used_bytes(), 3); + drop(unit); + assert_eq!(budget.used_bytes(), 0); + } + #[test] fn decode_speculative_batch_rejects_too_many_units() { let request = DecodedSpeculativeBatchRequest { diff --git a/modules/service/src/speculative/types.rs b/modules/service/src/speculative/types.rs index 02f6c348..3f9e6574 100644 --- a/modules/service/src/speculative/types.rs +++ b/modules/service/src/speculative/types.rs @@ -4,7 +4,15 @@ use serde::{Deserialize, Serialize}; use store::WriteSet; pub const MAX_SPECULATIVE_BATCH_UNITS: usize = 256; + +/// Maximum speculative update-client header bytes that one streaming RPC may +/// keep resident at the same time. This is a peak in-memory budget, not a +/// cumulative per-stream payload limit: completed units clear their header +/// payloads and release their reservation before the stream continues reading +/// more data. pub const MAX_SPECULATIVE_BATCH_HEADER_BYTES: usize = 512 * 1024 * 1024; + +/// Maximum header payload accepted for a single speculative update-client unit. pub const MAX_SPECULATIVE_UNIT_HEADER_BYTES: usize = 256 * 1024 * 1024; #[derive(Debug, Clone, Serialize, Deserialize)] From 3eaae1bdd2478fadc6a1cc5795d2071cba00be2e Mon Sep 17 00:00:00 2001 From: Kiyoshi Nakao Date: Thu, 14 May 2026 10:18:54 +0900 Subject: [PATCH 09/48] service: defer speculative header digest --- modules/service/src/speculative/scheduler.rs | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/modules/service/src/speculative/scheduler.rs b/modules/service/src/speculative/scheduler.rs index 4162c419..d1ae1d69 100644 --- a/modules/service/src/speculative/scheduler.rs +++ b/modules/service/src/speculative/scheduler.rs @@ -27,6 +27,10 @@ fn sha256_hex(bytes: &[u8]) -> String { hex::encode(sha2::Sha256::digest(bytes)) } +fn speculative_request_header_len(req: &SpeculativeUpdateClientRequest) -> Option { + Some(req.update.header.as_ref()?.value.len()) +} + fn speculative_request_header_digest( req: &SpeculativeUpdateClientRequest, ) -> Option<(usize, String)> { @@ -294,14 +298,13 @@ fn streaming_speculative_worker( }; let unit_id = req.request().unit_id.clone(); - let header_digest = speculative_request_header_digest(req.request()); - if let Some((header_bytes, header_sha256)) = header_digest.as_ref() { + let header_bytes = speculative_request_header_len(req.request()); + if let Some(header_bytes) = header_bytes { info!( - "execute speculative update client unit: client_id={} unit_id={} header_bytes={} header_sha256={}", + "execute speculative update client unit: client_id={} unit_id={} header_bytes={}", req.request().update.client_id, unit_id, - header_bytes, - header_sha256 + header_bytes ); } let result = speculative @@ -311,7 +314,7 @@ fn streaming_speculative_worker( .map_err(|e| SpeculativeBatchFailure { kind: SpeculativeBatchFailureKind::SpeculativeExecutionFailed, unit_id: Some(unit_id), - detail: match header_digest { + detail: match speculative_request_header_digest(req.request()) { Some((header_bytes, header_sha256)) => format!( "{}; header_bytes={} header_sha256={}", e, header_bytes, header_sha256 From 98ca9600d60776d89565351c5cd956303936fdbe Mon Sep 17 00:00:00 2001 From: Kiyoshi Nakao Date: Fri, 22 May 2026 18:19:33 +0900 Subject: [PATCH 10/48] Reject incomplete non-leading speculative base states --- modules/service/src/speculative/mod.rs | 1 - modules/service/src/speculative/rebase.rs | 73 ----- modules/service/src/speculative/scheduler.rs | 83 +----- modules/service/src/speculative/service.rs | 253 ++++++------------ modules/service/src/speculative/stream.rs | 4 - modules/service/src/speculative/validation.rs | 10 + proto/definitions/lcp/service/elc/v1/tx.proto | 7 +- 7 files changed, 113 insertions(+), 318 deletions(-) delete mode 100644 modules/service/src/speculative/rebase.rs diff --git a/modules/service/src/speculative/mod.rs b/modules/service/src/speculative/mod.rs index ab77f932..6d46f31a 100644 --- a/modules/service/src/speculative/mod.rs +++ b/modules/service/src/speculative/mod.rs @@ -1,5 +1,4 @@ mod permit; -pub(crate) mod rebase; pub(crate) mod scheduler; mod service; pub(crate) mod stream; diff --git a/modules/service/src/speculative/rebase.rs b/modules/service/src/speculative/rebase.rs deleted file mode 100644 index 86bab010..00000000 --- a/modules/service/src/speculative/rebase.rs +++ /dev/null @@ -1,73 +0,0 @@ -use super::types::{ - ExplicitStateRef, ObservedStateTransition, SpeculativeUpdateClientRequest, - SpeculativeUpdateClientResult, -}; -use lcp_types::{store_key, Any, Height}; -use log::warn; -use store::WriteSet; - -#[derive(Debug, Clone)] -pub(crate) struct DependencyRebaseState { - pub(crate) observed_transition: ObservedStateTransition, - pub(crate) client_state: Option, - pub(crate) consensus_state: Option, -} - -pub(crate) fn rebase_speculative_request_in_place( - req: &mut SpeculativeUpdateClientRequest, - previous: &DependencyRebaseState, -) { - // Always seed the base state from the previous result so the next unit - // observes its predecessor's post-state and write set. - req.base_state = ExplicitStateRef { - prev_height: Some(previous.observed_transition.post_height), - prev_state_id: Some(previous.observed_transition.post_state_id.clone()), - client_state: previous.client_state.clone(), - consensus_state: previous.consensus_state.clone(), - }; -} - -pub(crate) fn build_dependency_rebase_state( - client_id: &str, - result: &SpeculativeUpdateClientResult, -) -> DependencyRebaseState { - DependencyRebaseState { - observed_transition: result.observed_transition.clone(), - client_state: extract_client_state_from_write_set(client_id, &result.write_set), - consensus_state: extract_consensus_state_from_write_set( - client_id, - result.observed_transition.post_height, - &result.write_set, - ), - } -} - -pub(crate) fn extract_client_state_from_write_set( - client_id: &str, - write_set: &WriteSet, -) -> Option { - let key = store_key::client_state_bytes(client_id); - decode_any_from_write_set("client_state", write_set.get(&key)?.as_ref()?) -} - -pub(crate) fn extract_consensus_state_from_write_set( - client_id: &str, - height: Height, - write_set: &WriteSet, -) -> Option { - let key = store_key::consensus_state_bytes(client_id, &height); - decode_any_from_write_set("consensus_state", write_set.get(&key)?.as_ref()?) -} - -fn decode_any_from_write_set(kind: &str, value: &[u8]) -> Option { - match bincode::serde::decode_from_slice(value, bincode::config::standard()) { - Ok((any, _)) => Some(any), - Err(e) => { - warn!( - "failed to decode {} from speculative write set: {}", - kind, e - ); - None - } - } -} diff --git a/modules/service/src/speculative/scheduler.rs b/modules/service/src/speculative/scheduler.rs index d1ae1d69..d80e98e1 100644 --- a/modules/service/src/speculative/scheduler.rs +++ b/modules/service/src/speculative/scheduler.rs @@ -1,6 +1,3 @@ -use super::rebase::{ - build_dependency_rebase_state, rebase_speculative_request_in_place, DependencyRebaseState, -}; use super::service::SpeculativeService; use super::stream::ResidentSpeculativeUpdateClientRequest; use super::types::{ @@ -83,19 +80,6 @@ where state.closed = true; shared.ready.notify_all(); while state.failure.is_none() && state.has_unfinished_work() { - if state.has_unresolvable_pending_work() { - state.failure = Some(SpeculativeBatchFailure { - kind: SpeculativeBatchFailureKind::DependencyStateMismatch, - unit_id: state - .pending - .front() - .map(|(_, req)| req.request().unit_id.clone()), - detail: "speculative stream ended with unresolved linear dependencies" - .to_string(), - }); - shared.ready.notify_all(); - break; - } state = shared.complete.wait(state).unwrap(); } info!( @@ -142,8 +126,7 @@ where // Shared synchronization wrapper for one streaming scheduler run. // -// The scheduler state is protected by a single mutex so enqueue, promotion, -// completion, and failure transitions stay consistent across worker threads. +// The scheduler state is protected by a single mutex so enqueue, completion, and failure transitions stay consistent across worker threads. // `ready` wakes workers when executable units become available, while // `complete` wakes the coordinator waiting for in-flight work to drain. struct StreamingSchedulerShared { @@ -154,18 +137,16 @@ struct StreamingSchedulerShared { // Mutable state for one streaming speculative batch execution. // -// Incoming units are assigned monotonically increasing stream indexes, then -// split into `ready` work that workers can execute immediately and `pending` -// work that must wait for the previous unit's rebase state. Completed units -// store their request/result by index so the final response can be rebuilt in -// input order, even if worker threads finish out of order. +// Incoming units are assigned monotonically increasing stream indexes. The +// first unit may execute with an incomplete base state, but non-leading units +// are admitted only when they carry complete base-state payloads. Completed +// units store their request/result by index so the final response can be +// rebuilt in input order, even if worker threads finish out of order. struct StreamingSchedulerState { client_id: String, ready: VecDeque<(usize, ResidentSpeculativeUpdateClientRequest)>, - pending: VecDeque<(usize, ResidentSpeculativeUpdateClientRequest)>, request_by_index: BTreeMap, result_by_index: BTreeMap, - rebase_state_by_index: BTreeMap, seen_unit_ids: BTreeSet, unit_count: usize, in_flight: usize, @@ -179,10 +160,8 @@ impl StreamingSchedulerState { Self { client_id, ready: VecDeque::new(), - pending: VecDeque::new(), request_by_index: BTreeMap::new(), result_by_index: BTreeMap::new(), - rebase_state_by_index: BTreeMap::new(), seen_unit_ids: BTreeSet::new(), unit_count: 0, in_flight: 0, @@ -193,11 +172,7 @@ impl StreamingSchedulerState { } fn has_unfinished_work(&self) -> bool { - self.in_flight > 0 || !self.ready.is_empty() || !self.pending.is_empty() - } - - fn has_unresolvable_pending_work(&self) -> bool { - self.in_flight == 0 && self.ready.is_empty() && !self.pending.is_empty() + self.in_flight > 0 || !self.ready.is_empty() } fn enqueue( @@ -212,22 +187,7 @@ impl StreamingSchedulerState { req.request(), )?; self.unit_count += 1; - self.enqueue_ready_or_pending(index, req) - } - - fn enqueue_ready_or_pending( - &mut self, - index: usize, - mut req: ResidentSpeculativeUpdateClientRequest, - ) -> core::result::Result<(), SpeculativeBatchFailure> { - if index == 0 || req.request().base_state.has_complete_base_state_payload() { - self.ready.push_back((index, req)); - } else if let Some(previous) = self.rebase_state_by_index.get(&(index - 1)) { - rebase_speculative_request_in_place(req.request_mut(), previous); - self.ready.push_back((index, req)); - } else { - self.pending.push_back((index, req)); - } + self.ready.push_back((index, req)); Ok(()) } @@ -236,30 +196,9 @@ impl StreamingSchedulerState { index: usize, req: SpeculativeUpdateClientRequest, result: SpeculativeUpdateClientResult, - ) -> core::result::Result<(), SpeculativeBatchFailure> { + ) { self.request_by_index.insert(index, req); - self.rebase_state_by_index.insert( - index, - build_dependency_rebase_state(&self.client_id, &result), - ); self.result_by_index.insert(index, result); - self.promote_pending() - } - - fn promote_pending(&mut self) -> core::result::Result<(), SpeculativeBatchFailure> { - let mut remaining = VecDeque::new(); - while let Some((index, mut req)) = self.pending.pop_front() { - if index == 0 || req.request().base_state.has_complete_base_state_payload() { - self.ready.push_back((index, req)); - } else if let Some(previous) = self.rebase_state_by_index.get(&(index - 1)) { - rebase_speculative_request_in_place(req.request_mut(), previous); - self.ready.push_back((index, req)); - } else { - remaining.push_back((index, req)); - } - } - self.pending = remaining; - Ok(()) } } @@ -329,9 +268,7 @@ fn streaming_speculative_worker( Ok(result) => { let mut req = req.into_request(); clear_request_header_payload(&mut req); - if let Err(e) = state.complete_unit(index, req, result) { - state.failure = Some(e); - } + state.complete_unit(index, req, result); } Err(e) => { state.failure = Some(e); diff --git a/modules/service/src/speculative/service.rs b/modules/service/src/speculative/service.rs index b328c723..d4db6fc2 100644 --- a/modules/service/src/speculative/service.rs +++ b/modules/service/src/speculative/service.rs @@ -1,9 +1,4 @@ use super::permit::{KeyLockMap, PermitGate}; -#[cfg(test)] -use super::rebase::{ - extract_client_state_from_write_set, extract_consensus_state_from_write_set, - rebase_speculative_request_in_place, DependencyRebaseState, -}; use super::scheduler::execute_speculative_update_client_stream; use super::stream::ResidentSpeculativeUpdateClientRequest; use super::types::{ @@ -223,7 +218,7 @@ mod tests { }; use keymanager::EnclaveKeyManager; use lcp_proto::google::protobuf::Any; - use lcp_types::{store_key, Height}; + use lcp_types::Height; use lcp_types::{EnclaveMetadata, Time}; use sgx_types::{sgx_enclave_id_t, sgx_status_t}; use std::sync::atomic::{AtomicUsize, Ordering}; @@ -492,165 +487,12 @@ mod tests { assert_eq!(err.unit_id.as_deref(), Some("unit-0001")); } - #[test] - fn replaces_explicit_base_state_metadata_when_rebasing_previous_payloads() { - let mut req = mk_req( - "unit-0001", - "client", - Some(Height::new(0, 10)), - Some(b"stale"), - ); - let previous = DependencyRebaseState { - observed_transition: ObservedStateTransition { - prev_height: None, - prev_state_id: None, - post_height: Height::new(0, 11), - post_state_id: b"post-0".to_vec(), - }, - client_state: None, - consensus_state: None, - }; - - rebase_speculative_request_in_place(&mut req, &previous); - - assert_eq!(req.base_state.prev_height, Some(Height::new(0, 11))); - assert_eq!( - req.base_state.prev_state_id.as_deref(), - Some(b"post-0".as_slice()) - ); - } - - #[test] - fn fills_missing_base_state_metadata_from_previous_post_state() { - let mut req = mk_req("unit-0001", "client", None, None); - let previous = DependencyRebaseState { - observed_transition: ObservedStateTransition { - prev_height: None, - prev_state_id: None, - post_height: Height::new(0, 11), - post_state_id: b"post-0".to_vec(), - }, - client_state: None, - consensus_state: None, - }; - - rebase_speculative_request_in_place(&mut req, &previous); - - assert_eq!(req.base_state.prev_height, Some(Height::new(0, 11))); - assert_eq!( - req.base_state.prev_state_id.as_deref(), - Some(b"post-0".as_slice()) - ); - } - - #[test] - fn seeds_previous_payloads_even_when_explicit_base_state_is_complete() { - let mut req = with_explicit_base_state_payload(mk_req( - "unit-0001", - "client", - Some(Height::new(0, 11)), - Some(b"post-0"), - )); - let previous = DependencyRebaseState { - observed_transition: ObservedStateTransition { - prev_height: None, - prev_state_id: None, - post_height: Height::new(0, 11), - post_state_id: b"post-0".to_vec(), - }, - client_state: Some( - Any { - type_url: "/ibc.mock.ClientState".to_string(), - value: vec![3], - } - .into(), - ), - consensus_state: Some( - Any { - type_url: "/ibc.mock.ConsensusState".to_string(), - value: vec![4], - } - .into(), - ), - }; - - rebase_speculative_request_in_place(&mut req, &previous); - - assert_eq!(req.base_state.prev_height, Some(Height::new(0, 11))); - assert_eq!( - req.base_state.prev_state_id.as_deref(), - Some(b"post-0".as_slice()) - ); - assert!(req.base_state.client_state.is_some()); - assert!(req.base_state.consensus_state.is_some()); - } - - #[test] - fn extracts_rebase_payloads_from_bincode_write_set() { - let client_id = "07-tendermint-0"; - let height = Height::new(0, 11); - let client_state = Any { - type_url: "/ibc.mock.ClientState".to_string(), - value: vec![1, 2, 3], - }; - let consensus_state = Any { - type_url: "/ibc.mock.ConsensusState".to_string(), - value: vec![4, 5, 6], - }; - let client_state_key = store_key::client_state_bytes(client_id); - let consensus_state_key = store_key::consensus_state_bytes(client_id, &height); - let mut write_set = WriteSet::default(); - write_set.insert( - client_state_key, - Some( - bincode::serde::encode_to_vec(&client_state, bincode::config::standard()) - .expect("encode client state"), - ), - ); - write_set.insert( - consensus_state_key, - Some( - bincode::serde::encode_to_vec(&consensus_state, bincode::config::standard()) - .expect("encode consensus state"), - ), - ); - - assert_eq!( - extract_client_state_from_write_set(client_id, &write_set), - Some(client_state.into()) - ); - assert_eq!( - extract_consensus_state_from_write_set(client_id, height, &write_set), - Some(consensus_state.into()) - ); - } - - #[test] - fn ignores_missing_or_malformed_rebase_payloads_from_write_set() { - let client_id = "07-tendermint-0"; - let height = Height::new(0, 11); - let client_state_key = store_key::client_state_bytes(client_id); - let consensus_state_key = store_key::consensus_state_bytes(client_id, &height); - let mut write_set = WriteSet::default(); - write_set.insert(client_state_key, Some(b"not-bincode-any".to_vec())); - write_set.insert(consensus_state_key, None); - - assert_eq!( - extract_client_state_from_write_set(client_id, &write_set), - None - ); - assert_eq!( - extract_consensus_state_from_write_set(client_id, height, &write_set), - None - ); - } - #[test] fn streaming_speculative_batch_executes_before_input_closes() { let client_id = "07-tendermint-0"; let enclave = FakeEnclave::new(Duration::from_millis(100)); let app = AppService::::new("test-home", enclave); - let service = SpeculativeService::new(2); + let service = SpeculativeService::new(1); let (tx, rx) = std::sync::mpsc::sync_channel(2); let worker_service = service.clone(); let worker_app = app.clone(); @@ -697,7 +539,7 @@ mod tests { ); tx.send(ResidentSpeculativeUpdateClientRequest::unmetered( - SpeculativeUpdateClientRequest { + with_explicit_base_state_payload(SpeculativeUpdateClientRequest { unit_id: "unit-0001".to_string(), update: MsgUpdateClient { client_id: client_id.to_string(), @@ -713,12 +555,16 @@ mod tests { ..Default::default() }, base_state: ExplicitStateRef { - prev_height: None, - prev_state_id: None, + prev_height: Some(Height::new(0, 11)), + prev_state_id: Some({ + let mut prev_state_id = vec![0; 32]; + prev_state_id[31] = 1; + prev_state_id + }), client_state: None, consensus_state: None, }, - }, + }), )) .expect("send second unit"); drop(tx); @@ -731,6 +577,85 @@ mod tests { assert_eq!(app.enclave.observed_max_in_flight(), 1); } + #[test] + fn streaming_speculative_batch_rejects_incomplete_non_leading_base_state() { + let client_id = "07-tendermint-0"; + let enclave = FakeEnclave::new(Duration::from_millis(1)); + let app = AppService::::new("test-home", enclave); + let service = SpeculativeService::new(2); + let (tx, rx) = std::sync::mpsc::sync_channel(2); + let worker_service = service.clone(); + let worker_app = app.clone(); + let client_id_for_worker = client_id.to_string(); + let handle = thread::spawn(move || { + worker_service.execute_serialized_speculative_update_client_stream( + &worker_app, + client_id_for_worker, + rx, + ) + }); + + tx.send(ResidentSpeculativeUpdateClientRequest::unmetered( + SpeculativeUpdateClientRequest { + unit_id: "unit-0000".to_string(), + update: MsgUpdateClient { + client_id: client_id.to_string(), + signer: vec![0; 20], + header: Some(Any { + type_url: "/ibc.mock.Header".to_string(), + value: vec![1], + }), + ..Default::default() + }, + base_state: ExplicitStateRef { + prev_height: None, + prev_state_id: None, + client_state: None, + consensus_state: None, + }, + }, + )) + .expect("send first unit"); + tx.send(ResidentSpeculativeUpdateClientRequest::unmetered( + SpeculativeUpdateClientRequest { + unit_id: "unit-0001".to_string(), + update: MsgUpdateClient { + client_id: client_id.to_string(), + signer: { + let mut signer = vec![0; 20]; + signer[19] = 1; + signer + }, + header: Some(Any { + type_url: "/ibc.mock.Header".to_string(), + value: vec![2], + }), + ..Default::default() + }, + base_state: ExplicitStateRef { + prev_height: Some(Height::new(0, 11)), + prev_state_id: Some(vec![1]), + client_state: None, + consensus_state: None, + }, + }, + )) + .expect("send second unit"); + drop(tx); + + let err = handle + .join() + .expect("streaming worker thread") + .expect_err("incomplete non-leading base state should fail"); + assert_eq!(err.kind, SpeculativeBatchFailureKind::BaseStateMismatch); + assert_eq!(err.unit_id.as_deref(), Some("unit-0001")); + assert!( + err.detail.contains("complete base_state payload"), + "unexpected error detail: {}", + err.detail + ); + } + #[test] fn streaming_speculative_batch_parallelizes_complete_base_state_units() { let client_id = "07-tendermint-0"; diff --git a/modules/service/src/speculative/stream.rs b/modules/service/src/speculative/stream.rs index 07e24b9a..d45bed68 100644 --- a/modules/service/src/speculative/stream.rs +++ b/modules/service/src/speculative/stream.rs @@ -177,10 +177,6 @@ impl ResidentSpeculativeUpdateClientRequest { &self.request } - pub(crate) fn request_mut(&mut self) -> &mut SpeculativeUpdateClientRequest { - &mut self.request - } - pub(crate) fn into_request(self) -> SpeculativeUpdateClientRequest { self.request } diff --git a/modules/service/src/speculative/validation.rs b/modules/service/src/speculative/validation.rs index 374c0944..d85efd54 100644 --- a/modules/service/src/speculative/validation.rs +++ b/modules/service/src/speculative/validation.rs @@ -57,6 +57,16 @@ pub(crate) fn validate_next_linear_request( detail: format!("duplicate unit_id in speculative batch: {}", req.unit_id), }); } + if index > 0 && !req.base_state.has_complete_base_state_payload() { + return Err(SpeculativeBatchFailure { + kind: SpeculativeBatchFailureKind::BaseStateMismatch, + unit_id: Some(req.unit_id.clone()), + detail: format!( + "non-leading speculative unit requires complete base_state payload: unit_id={}", + req.unit_id + ), + }); + } let header_len = req .update .header diff --git a/proto/definitions/lcp/service/elc/v1/tx.proto b/proto/definitions/lcp/service/elc/v1/tx.proto index 0f9a4106..ca27454e 100644 --- a/proto/definitions/lcp/service/elc/v1/tx.proto +++ b/proto/definitions/lcp/service/elc/v1/tx.proto @@ -207,9 +207,10 @@ message SpeculativeUpdateClientBatchStreamInit { } // SpeculativeUpdateClientUnitInit starts a single speculative UpdateClient work -// unit. Units form a linear chain in stream order: the first unit starts from -// its explicit base_state, and each following unit is rebased onto the previous -// unit's observed post state before execution. +// unit. Units form a linear chain in stream order: the first unit may start +// from an incomplete base_state and read the canonical store, but every +// following unit must provide complete base_state payloads so it can execute +// without being rebased from the previous unit's write set. message SpeculativeUpdateClientUnitInit { reserved 6; From 9de88e31a338ebb4323b467e0e0612c84fa99313 Mon Sep 17 00:00:00 2001 From: Kiyoshi Nakao Date: Fri, 22 May 2026 18:22:41 +0900 Subject: [PATCH 11/48] Require complete speculative base states --- modules/service/src/speculative/scheduler.rs | 3 +- modules/service/src/speculative/service.rs | 69 ++++++++----------- modules/service/src/speculative/validation.rs | 4 +- proto/definitions/lcp/service/elc/v1/tx.proto | 6 +- 4 files changed, 34 insertions(+), 48 deletions(-) diff --git a/modules/service/src/speculative/scheduler.rs b/modules/service/src/speculative/scheduler.rs index d80e98e1..f19bed2b 100644 --- a/modules/service/src/speculative/scheduler.rs +++ b/modules/service/src/speculative/scheduler.rs @@ -137,8 +137,7 @@ struct StreamingSchedulerShared { // Mutable state for one streaming speculative batch execution. // -// Incoming units are assigned monotonically increasing stream indexes. The -// first unit may execute with an incomplete base state, but non-leading units +// Incoming units are assigned monotonically increasing stream indexes. Units // are admitted only when they carry complete base-state payloads. Completed // units store their request/result by index so the final response can be // rebuilt in input order, even if worker threads finish out of order. diff --git a/modules/service/src/speculative/service.rs b/modules/service/src/speculative/service.rs index d4db6fc2..7440b3f8 100644 --- a/modules/service/src/speculative/service.rs +++ b/modules/service/src/speculative/service.rs @@ -80,7 +80,7 @@ impl SpeculativeService { .enclave .speculative_update_client(EnclaveSpeculativeUpdateClientInput { update, - base_state: base_state_payload_from_ref(&base_state), + base_state: Some(base_state_payload_from_ref(&base_state)), })?; let observed_transition = decode_observed_transition(&res.response)?; Ok(SpeculativeUpdateClientResult { @@ -179,12 +179,18 @@ impl SpeculativeService { } } -fn base_state_payload_from_ref(base_state: &ExplicitStateRef) -> Option { - Some(SpeculativeBaseState { - prev_height: Some(base_state.prev_height?), - client_state: base_state.client_state.clone()?, - consensus_state: base_state.consensus_state.clone()?, - }) +fn base_state_payload_from_ref(base_state: &ExplicitStateRef) -> SpeculativeBaseState { + SpeculativeBaseState { + prev_height: base_state.prev_height, + client_state: base_state + .client_state + .clone() + .expect("validated speculative base_state client_state"), + consensus_state: base_state + .consensus_state + .clone() + .expect("validated speculative base_state consensus_state"), + } } #[allow(clippy::result_large_err)] @@ -304,7 +310,10 @@ mod tests { std::thread::sleep(self.delay); self.current_in_flight.fetch_sub(1, Ordering::SeqCst); - let prev_height = (idx > 0).then(|| Height::new(0, 10 + idx)); + let prev_height = input + .base_state + .as_ref() + .and_then(|base_state| base_state.prev_height); let prev_state_id = (idx > 0).then(|| { let mut prev_state_id = [0u8; 32]; prev_state_id[31] = idx as u8; @@ -506,7 +515,7 @@ mod tests { }); tx.send(ResidentSpeculativeUpdateClientRequest::unmetered( - SpeculativeUpdateClientRequest { + with_explicit_base_state_payload(SpeculativeUpdateClientRequest { unit_id: "unit-0000".to_string(), update: MsgUpdateClient { client_id: client_id.to_string(), @@ -518,12 +527,12 @@ mod tests { ..Default::default() }, base_state: ExplicitStateRef { - prev_height: None, + prev_height: Some(Height::new(0, 10)), prev_state_id: None, client_state: None, consensus_state: None, }, - }, + }), )) .expect("send first unit"); @@ -578,7 +587,7 @@ mod tests { } #[test] - fn streaming_speculative_batch_rejects_incomplete_non_leading_base_state() { + fn streaming_speculative_batch_rejects_incomplete_base_state() { let client_id = "07-tendermint-0"; let enclave = FakeEnclave::new(Duration::from_millis(1)); let app = AppService::::new("test-home", enclave); @@ -616,39 +625,14 @@ mod tests { }, )) .expect("send first unit"); - tx.send(ResidentSpeculativeUpdateClientRequest::unmetered( - SpeculativeUpdateClientRequest { - unit_id: "unit-0001".to_string(), - update: MsgUpdateClient { - client_id: client_id.to_string(), - signer: { - let mut signer = vec![0; 20]; - signer[19] = 1; - signer - }, - header: Some(Any { - type_url: "/ibc.mock.Header".to_string(), - value: vec![2], - }), - ..Default::default() - }, - base_state: ExplicitStateRef { - prev_height: Some(Height::new(0, 11)), - prev_state_id: Some(vec![1]), - client_state: None, - consensus_state: None, - }, - }, - )) - .expect("send second unit"); drop(tx); let err = handle .join() .expect("streaming worker thread") - .expect_err("incomplete non-leading base state should fail"); + .expect_err("incomplete base state should fail"); assert_eq!(err.kind, SpeculativeBatchFailureKind::BaseStateMismatch); - assert_eq!(err.unit_id.as_deref(), Some("unit-0001")); + assert_eq!(err.unit_id.as_deref(), Some("unit-0000")); assert!( err.detail.contains("complete base_state payload"), "unexpected error detail: {}", @@ -675,7 +659,12 @@ mod tests { }); let mut requests = vec![ - with_explicit_base_state_payload(mk_req("unit-0000", client_id, None, None)), + with_explicit_base_state_payload(mk_req( + "unit-0000", + client_id, + Some(Height::new(0, 10)), + None, + )), with_explicit_base_state_payload(mk_req( "unit-0001", client_id, diff --git a/modules/service/src/speculative/validation.rs b/modules/service/src/speculative/validation.rs index d85efd54..dff8119c 100644 --- a/modules/service/src/speculative/validation.rs +++ b/modules/service/src/speculative/validation.rs @@ -57,12 +57,12 @@ pub(crate) fn validate_next_linear_request( detail: format!("duplicate unit_id in speculative batch: {}", req.unit_id), }); } - if index > 0 && !req.base_state.has_complete_base_state_payload() { + if !req.base_state.has_complete_base_state_payload() { return Err(SpeculativeBatchFailure { kind: SpeculativeBatchFailureKind::BaseStateMismatch, unit_id: Some(req.unit_id.clone()), detail: format!( - "non-leading speculative unit requires complete base_state payload: unit_id={}", + "speculative unit requires complete base_state payload: unit_id={}", req.unit_id ), }); diff --git a/proto/definitions/lcp/service/elc/v1/tx.proto b/proto/definitions/lcp/service/elc/v1/tx.proto index ca27454e..9e6d5787 100644 --- a/proto/definitions/lcp/service/elc/v1/tx.proto +++ b/proto/definitions/lcp/service/elc/v1/tx.proto @@ -207,10 +207,8 @@ message SpeculativeUpdateClientBatchStreamInit { } // SpeculativeUpdateClientUnitInit starts a single speculative UpdateClient work -// unit. Units form a linear chain in stream order: the first unit may start -// from an incomplete base_state and read the canonical store, but every -// following unit must provide complete base_state payloads so it can execute -// without being rebased from the previous unit's write set. +// unit. Every unit must provide complete base_state payloads so it can execute +// from an explicit isolated state without reading the canonical store. message SpeculativeUpdateClientUnitInit { reserved 6; From 6885c7526a296fe86892f621c30ed1a184b59526 Mon Sep 17 00:00:00 2001 From: Kiyoshi Nakao Date: Fri, 22 May 2026 18:31:59 +0900 Subject: [PATCH 12/48] Require seeded speculative base state --- modules/enclave-api/src/api/command.rs | 41 ++++++++-------------- modules/service/src/speculative/service.rs | 26 +++----------- modules/service/src/speculative/types.rs | 4 +-- 3 files changed, 20 insertions(+), 51 deletions(-) diff --git a/modules/enclave-api/src/api/command.rs b/modules/enclave-api/src/api/command.rs index e2cf8c74..e42d1142 100644 --- a/modules/enclave-api/src/api/command.rs +++ b/modules/enclave-api/src/api/command.rs @@ -17,12 +17,12 @@ use store::WriteSet; #[derive(Debug)] pub struct SpeculativeUpdateClientInput { pub update: UpdateClientInput, - pub base_state: Option, + pub base_state: SpeculativeBaseState, } #[derive(Debug, Clone)] pub struct SpeculativeBaseState { - pub prev_height: Option, + pub prev_height: Height, pub client_state: Any, pub consensus_state: Any, } @@ -151,10 +151,7 @@ pub trait SpeculativeEnclaveCommandAPI: where Self: Sized, { - debug!( - "prepare speculative command with base state: has_base_state={}", - input.base_state.is_some() - ); + debug!("prepare speculative command with base state"); let client_id = input.update.client_id.to_string(); let base_state = input.base_state; @@ -162,10 +159,7 @@ pub trait SpeculativeEnclaveCommandAPI: LightClientExecuteCommand::UpdateClient(input.update), )); let (res, write_set) = self.execute_command_speculatively_with_seed(cmd, |tx_id| { - if let Some(base_state) = base_state.as_ref() { - seed_speculative_base_state(self, tx_id, &client_id, base_state)?; - } - Ok(()) + seed_speculative_base_state(self, tx_id, &client_id, &base_state) })?; match res { @@ -192,22 +186,15 @@ fn seed_speculative_base_state( .map_err(crate::errors::Error::bincode_encode)?; enclave.use_mut_store(|store| store.tx_set(tx_id, client_state_key, client_state_value))?; - // The client state is always seeded, but the consensus state is keyed by - // the predecessor height and is therefore only seeded when prev_height is - // present. Callers may still preserve explicit prev_state_id metadata above - // this layer; that identifier is validation metadata, not a store key. - if let Some(prev_height) = base_state.prev_height { - debug_assert!( - !base_state.consensus_state.type_url.is_empty(), - "seeded consensus state should carry a concrete type" - ); - let consensus_state_key = store_key::consensus_state_bytes(client_id, &prev_height); - let consensus_state_value = - bincode::serde::encode_to_vec(&base_state.consensus_state, bincode::config::standard()) - .map_err(crate::errors::Error::bincode_encode)?; - enclave.use_mut_store(|store| { - store.tx_set(tx_id, consensus_state_key, consensus_state_value) - })?; - } + debug_assert!( + !base_state.consensus_state.type_url.is_empty(), + "seeded consensus state should carry a concrete type" + ); + let consensus_state_key = store_key::consensus_state_bytes(client_id, &base_state.prev_height); + let consensus_state_value = + bincode::serde::encode_to_vec(&base_state.consensus_state, bincode::config::standard()) + .map_err(crate::errors::Error::bincode_encode)?; + enclave + .use_mut_store(|store| store.tx_set(tx_id, consensus_state_key, consensus_state_value))?; Ok(()) } diff --git a/modules/service/src/speculative/service.rs b/modules/service/src/speculative/service.rs index 7440b3f8..3cfe1651 100644 --- a/modules/service/src/speculative/service.rs +++ b/modules/service/src/speculative/service.rs @@ -80,7 +80,7 @@ impl SpeculativeService { .enclave .speculative_update_client(EnclaveSpeculativeUpdateClientInput { update, - base_state: Some(base_state_payload_from_ref(&base_state)), + base_state: base_state_payload_from_ref(&base_state), })?; let observed_transition = decode_observed_transition(&res.response)?; Ok(SpeculativeUpdateClientResult { @@ -181,7 +181,9 @@ impl SpeculativeService { fn base_state_payload_from_ref(base_state: &ExplicitStateRef) -> SpeculativeBaseState { SpeculativeBaseState { - prev_height: base_state.prev_height, + prev_height: base_state + .prev_height + .expect("validated speculative base_state prev_height"), client_state: base_state .client_state .clone() @@ -310,10 +312,7 @@ mod tests { std::thread::sleep(self.delay); self.current_in_flight.fetch_sub(1, Ordering::SeqCst); - let prev_height = input - .base_state - .as_ref() - .and_then(|base_state| base_state.prev_height); + let prev_height = Some(input.base_state.prev_height); let prev_state_id = (idx > 0).then(|| { let mut prev_state_id = [0u8; 32]; prev_state_id[31] = idx as u8; @@ -438,21 +437,6 @@ mod tests { assert!(validate_linear_transitions(&requests, &results).is_ok()); } - #[test] - fn validates_base_state_prev_height_only_when_provided() { - let mut result = mk_result( - Some(Height::new(0, 11)), - None, - Height::new(0, 12), - b"post-1", - ); - result.base_state.prev_height = None; - - result - .validate_base_state() - .expect("missing prev_height should accept observed height"); - } - #[test] fn rejects_base_state_prev_height_mismatch_when_provided() { let mut result = mk_result( diff --git a/modules/service/src/speculative/types.rs b/modules/service/src/speculative/types.rs index 3f9e6574..efef21cf 100644 --- a/modules/service/src/speculative/types.rs +++ b/modules/service/src/speculative/types.rs @@ -61,9 +61,7 @@ pub struct SpeculativeUpdateClientResult { impl SpeculativeUpdateClientResult { #[allow(clippy::result_large_err)] pub fn validate_base_state(&self) -> core::result::Result<(), enclave_api::Error> { - if self.base_state.prev_height.is_some() - && self.base_state.prev_height != self.observed_transition.prev_height - { + if self.base_state.prev_height != self.observed_transition.prev_height { return Err(enclave_api::Error::invalid_argument(format!( "base prev_height mismatch: expected={:?} observed={:?}", self.base_state.prev_height, self.observed_transition.prev_height From d2ae4dc934923757fd2a39fc01eacb5ac8137e2c Mon Sep 17 00:00:00 2001 From: Kiyoshi Nakao Date: Fri, 29 May 2026 14:49:35 +0900 Subject: [PATCH 13/48] Serialize update client with speculative batches Serial gRPC UpdateClient (unary + streaming) is serialized with speculative batches by the same KeyLockMap[client_id]. Calls to proto_update_client outside the service path (for example app/src/commands/elc.rs) are outside this guarantee. --- modules/service/src/elc.rs | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/modules/service/src/elc.rs b/modules/service/src/elc.rs index 3037a7a8..842fc7f6 100644 --- a/modules/service/src/elc.rs +++ b/modules/service/src/elc.rs @@ -40,7 +40,12 @@ where &self, request: Request, ) -> Result, Status> { - match self.app.enclave.proto_update_client(request.into_inner()) { + let msg = request.into_inner(); + let client_id = msg.client_id.clone(); + match self + .speculative + .with_client_serialized(&client_id, || self.app.enclave.proto_update_client(msg)) + { Ok(res) => Ok(Response::new(res)), Err(e) => Err(Status::aborted(e.to_string())), } @@ -103,7 +108,11 @@ where }), }; - match self.app.enclave.proto_update_client(msg) { + let client_id = msg.client_id.clone(); + match self + .speculative + .with_client_serialized(&client_id, || self.app.enclave.proto_update_client(msg)) + { Ok(res) => Ok(Response::new(res)), Err(e) => Err(Status::aborted(e.to_string())), } From 0ab323d7a8df1fc1f25179f681e15f73da21ab5d Mon Sep 17 00:00:00 2001 From: Kiyoshi Nakao Date: Fri, 29 May 2026 15:48:38 +0900 Subject: [PATCH 14/48] Return effective speculative write sets --- modules/enclave-api/src/api/command.rs | 106 ++++++++++++++++++++++--- 1 file changed, 94 insertions(+), 12 deletions(-) diff --git a/modules/enclave-api/src/api/command.rs b/modules/enclave-api/src/api/command.rs index e42d1142..594e177a 100644 --- a/modules/enclave-api/src/api/command.rs +++ b/modules/enclave-api/src/api/command.rs @@ -30,6 +30,9 @@ pub struct SpeculativeBaseState { #[derive(Debug)] pub struct SpeculativeUpdateClientResponse { pub response: UpdateClientResponse, + /// Effective write set for canonical apply. Entries whose `(key, value)` + /// match the seeded base state have been removed, so applying this write set + /// reflects only what speculative UpdateClient actually computed. pub write_set: WriteSet, } @@ -155,18 +158,20 @@ pub trait SpeculativeEnclaveCommandAPI: let client_id = input.update.client_id.to_string(); let base_state = input.base_state; + let seed_writes = compute_seed_write_set(&client_id, &base_state)?; let cmd = Command::LightClient(LightClientCommand::Execute( LightClientExecuteCommand::UpdateClient(input.update), )); - let (res, write_set) = self.execute_command_speculatively_with_seed(cmd, |tx_id| { - seed_speculative_base_state(self, tx_id, &client_id, &base_state) + let (res, raw_write_set) = self.execute_command_speculatively_with_seed(cmd, |tx_id| { + apply_seed_write_set(self, tx_id, &seed_writes) })?; + let effective_write_set = filter_seed_writes(raw_write_set, &seed_writes); match res { CommandResponse::LightClient(LightClientResponse::UpdateClient(response)) => { Ok(SpeculativeUpdateClientResponse { response, - write_set, + write_set: effective_write_set, }) } _ => unreachable!(), @@ -174,17 +179,11 @@ pub trait SpeculativeEnclaveCommandAPI: } } -fn seed_speculative_base_state( - enclave: &(impl CommitStoreAccessor + ?Sized), - tx_id: TxId, - client_id: &str, - base_state: &SpeculativeBaseState, -) -> Result<()> { +fn compute_seed_write_set(client_id: &str, base_state: &SpeculativeBaseState) -> Result { let client_state_key = store_key::client_state_bytes(client_id); let client_state_value = bincode::serde::encode_to_vec(&base_state.client_state, bincode::config::standard()) .map_err(crate::errors::Error::bincode_encode)?; - enclave.use_mut_store(|store| store.tx_set(tx_id, client_state_key, client_state_value))?; debug_assert!( !base_state.consensus_state.type_url.is_empty(), @@ -194,7 +193,90 @@ fn seed_speculative_base_state( let consensus_state_value = bincode::serde::encode_to_vec(&base_state.consensus_state, bincode::config::standard()) .map_err(crate::errors::Error::bincode_encode)?; - enclave - .use_mut_store(|store| store.tx_set(tx_id, consensus_state_key, consensus_state_value))?; + + Ok([ + (client_state_key, Some(client_state_value)), + (consensus_state_key, Some(consensus_state_value)), + ] + .into_iter() + .collect()) +} + +fn apply_seed_write_set( + enclave: &(impl CommitStoreAccessor + ?Sized), + tx_id: TxId, + seed_writes: &WriteSet, +) -> Result<()> { + for (key, value) in seed_writes { + match value { + Some(value) => { + enclave.use_mut_store(|store| store.tx_set(tx_id, key.clone(), value.clone()))? + } + None => enclave.use_mut_store(|store| store.tx_remove(tx_id, key))?, + } + } Ok(()) } + +fn filter_seed_writes(write_set: WriteSet, seed_writes: &WriteSet) -> WriteSet { + write_set + .into_iter() + .filter(|(key, value)| seed_writes.get(key) != Some(value)) + .collect() +} + +#[cfg(test)] +mod tests { + use super::*; + + fn any(type_url: &str, value: &[u8]) -> Any { + Any::new(type_url.to_string(), value.to_vec()) + } + + fn base_state() -> SpeculativeBaseState { + SpeculativeBaseState { + prev_height: Height::new(0, 10), + client_state: any("/ibc.mock.ClientState", b"client-10"), + consensus_state: any("/ibc.mock.ConsensusState", b"consensus-10"), + } + } + + #[test] + fn speculative_update_client_excludes_seeded_consensus_state_from_write_set() { + let client_id = "07-tendermint-0"; + let seed_writes = compute_seed_write_set(client_id, &base_state()).unwrap(); + let consensus_state_key = store_key::consensus_state_bytes(client_id, &Height::new(0, 10)); + + let effective_write_set = filter_seed_writes(seed_writes.clone(), &seed_writes); + + assert!( + !effective_write_set.contains_key(&consensus_state_key), + "seeded consensus_state(prev_height) must not be returned as an effective write" + ); + } + + #[test] + fn speculative_update_client_keeps_computed_client_state_even_if_seed_provided() { + let client_id = "07-tendermint-0"; + let seed_writes = compute_seed_write_set(client_id, &base_state()).unwrap(); + let client_state_key = store_key::client_state_bytes(client_id); + let computed_client_state_value = bincode::serde::encode_to_vec( + any("/ibc.mock.ClientState", b"client-11"), + bincode::config::standard(), + ) + .unwrap(); + let raw_write_set = [( + client_state_key.clone(), + Some(computed_client_state_value.clone()), + )] + .into_iter() + .collect(); + + let effective_write_set = filter_seed_writes(raw_write_set, &seed_writes); + + assert_eq!( + effective_write_set.get(&client_state_key), + Some(&Some(computed_client_state_value)) + ); + } +} From dee0da34e5b9d05335f1d81af4bdfb32d2260122 Mon Sep 17 00:00:00 2001 From: Kiyoshi Nakao Date: Fri, 29 May 2026 15:53:31 +0900 Subject: [PATCH 15/48] Return errors for incomplete speculative base state --- modules/service/src/speculative/service.rs | 41 ++++++++++++++-------- 1 file changed, 26 insertions(+), 15 deletions(-) diff --git a/modules/service/src/speculative/service.rs b/modules/service/src/speculative/service.rs index 3cfe1651..93da3dfa 100644 --- a/modules/service/src/speculative/service.rs +++ b/modules/service/src/speculative/service.rs @@ -80,7 +80,7 @@ impl SpeculativeService { .enclave .speculative_update_client(EnclaveSpeculativeUpdateClientInput { update, - base_state: base_state_payload_from_ref(&base_state), + base_state: base_state_payload_from_ref(&base_state)?, })?; let observed_transition = decode_observed_transition(&res.response)?; Ok(SpeculativeUpdateClientResult { @@ -179,20 +179,31 @@ impl SpeculativeService { } } -fn base_state_payload_from_ref(base_state: &ExplicitStateRef) -> SpeculativeBaseState { - SpeculativeBaseState { - prev_height: base_state - .prev_height - .expect("validated speculative base_state prev_height"), - client_state: base_state - .client_state - .clone() - .expect("validated speculative base_state client_state"), - consensus_state: base_state - .consensus_state - .clone() - .expect("validated speculative base_state consensus_state"), - } +#[allow(clippy::result_large_err)] +fn base_state_payload_from_ref( + base_state: &ExplicitStateRef, +) -> core::result::Result { + let prev_height = base_state.prev_height.ok_or_else(|| { + enclave_api::Error::invalid_argument( + "speculative base_state prev_height must be provided".to_string(), + ) + })?; + let client_state = base_state.client_state.clone().ok_or_else(|| { + enclave_api::Error::invalid_argument( + "speculative base_state client_state must be provided".to_string(), + ) + })?; + let consensus_state = base_state.consensus_state.clone().ok_or_else(|| { + enclave_api::Error::invalid_argument( + "speculative base_state consensus_state must be provided".to_string(), + ) + })?; + + Ok(SpeculativeBaseState { + prev_height, + client_state, + consensus_state, + }) } #[allow(clippy::result_large_err)] From 9281dc54ef84932dc9c0275faf50bd3f169701c2 Mon Sep 17 00:00:00 2001 From: Kiyoshi Nakao Date: Fri, 29 May 2026 16:26:01 +0900 Subject: [PATCH 16/48] Clarify speculative stream failure handling --- modules/service/src/elc.rs | 14 +++++++++++--- modules/service/src/speculative/permit.rs | 2 ++ 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/modules/service/src/elc.rs b/modules/service/src/elc.rs index 842fc7f6..18a2e262 100644 --- a/modules/service/src/elc.rs +++ b/modules/service/src/elc.rs @@ -145,9 +145,17 @@ where let header_memory = header_memory_budget.reserve_for_chunk(&chunk_msg).await?; if let Some(unit) = decoder.push_chunk(chunk_msg.chunk, header_memory)? { units += 1; - tx.send(unit).map_err(|_| { - Status::aborted("speculative batch scheduler stopped before stream ended") - })?; + if tx.send(unit).is_err() { + let result = scheduler.await.map_err(|e| { + Status::aborted(format!("speculative batch worker failed: {e}")) + })?; + return match result { + Ok(_) => Err(Status::aborted( + "speculative batch scheduler stopped before stream ended", + )), + Err(e) => Err(Status::aborted(format!("{:?}: {}", e.kind, e.detail))), + }; + } } } decoder.finish()?; diff --git a/modules/service/src/speculative/permit.rs b/modules/service/src/speculative/permit.rs index f4a9b916..c27319df 100644 --- a/modules/service/src/speculative/permit.rs +++ b/modules/service/src/speculative/permit.rs @@ -73,6 +73,8 @@ impl KeyLockMap { drop(guard); let mut locks = self.locks.lock().unwrap(); + // strong_count == 2 means only this local `lock` binding and the map + // entry still reference the mutex, so the idle key entry can be removed. let should_remove = Arc::strong_count(&lock) == 2 && locks .get(key) From e989df88912a50b4b696c9917096646fea55bfba Mon Sep 17 00:00:00 2001 From: Kiyoshi Nakao Date: Fri, 29 May 2026 16:44:43 +0900 Subject: [PATCH 17/48] Verify canonical base before speculative stitch --- modules/enclave-api/src/enclave.rs | 99 ++++++++++++-- modules/service/src/speculative/service.rs | 148 +++++++++++++++++---- 2 files changed, 215 insertions(+), 32 deletions(-) diff --git a/modules/enclave-api/src/enclave.rs b/modules/enclave-api/src/enclave.rs index ef8a0140..763469a2 100644 --- a/modules/enclave-api/src/enclave.rs +++ b/modules/enclave-api/src/enclave.rs @@ -1,6 +1,6 @@ -use crate::errors::Result; +use crate::errors::{Error, Result}; use keymanager::EnclaveKeyManager; -use lcp_types::EnclaveMetadata; +use lcp_types::{store_key, Any, EnclaveMetadata, Height}; use sgx_types::{sgx_enclave_id_t, SgxResult}; use sgx_urts::SgxEnclave; use std::path::PathBuf; @@ -212,16 +212,99 @@ pub trait HostStoreTxManager: CommitStoreAccessor { { let tx = self.begin_tx(Some(update_key))?; let tx_id = tx.get_id(); + if let Err(e) = self.apply_write_set_in_tx(tx_id, write_set) { + self.rollback_tx(tx); + return Err(e); + } + self.commit_tx(tx) + } + + /// `apply_write_set_with_expected_base` applies a speculative write set only if the + /// canonical store still matches the explicit base state that seeded the batch. + /// The check and apply run under the same serialized update transaction keyed by + /// `update_key`, so the canonical base cannot change between verification and commit. + fn apply_write_set_with_expected_base( + &self, + update_key: UpdateKey, + prev_height: Height, + client_state: &Any, + consensus_state: &Any, + write_set: WriteSet, + ) -> Result<()> + where + S: TxAccessor, + { + let tx = self.begin_tx(Some(update_key.clone()))?; + let tx_id = tx.get_id(); + if let Err(e) = self.verify_expected_base_state_in_tx( + tx_id, + &update_key, + &prev_height, + client_state, + consensus_state, + ) { + self.rollback_tx(tx); + return Err(e); + } + if let Err(e) = self.apply_write_set_in_tx(tx_id, write_set) { + self.rollback_tx(tx); + return Err(e); + } + self.commit_tx(tx) + } + + fn apply_write_set_in_tx(&self, tx_id: store::TxId, write_set: WriteSet) -> Result<()> + where + S: TxAccessor, + { for (key, value) in write_set { - if let Err(e) = self.use_mut_store(|store| match value { + self.use_mut_store(|store| match value { Some(value) => store.tx_set(tx_id, key, value), None => store.tx_remove(tx_id, &key), - }) { - self.rollback_tx(tx); - return Err(e.into()); - } + })?; } - self.commit_tx(tx) + Ok(()) + } + + fn verify_expected_base_state_in_tx( + &self, + tx_id: store::TxId, + client_id: &str, + prev_height: &Height, + client_state: &Any, + consensus_state: &Any, + ) -> Result<()> + where + S: TxAccessor, + { + let client_state_key = store_key::client_state_bytes(client_id); + let client_state_value = + bincode::serde::encode_to_vec(client_state, bincode::config::standard()) + .map_err(Error::bincode_encode)?; + let canonical_client_state = + self.use_mut_store(|store| store.tx_get(tx_id, &client_state_key))?; + if canonical_client_state.as_deref() != Some(client_state_value.as_slice()) { + return Err(Error::invalid_argument(format!( + "canonical speculative base client_state mismatch: client_id={}", + client_id + ))); + } + + let consensus_state_key = store_key::consensus_state_bytes(client_id, prev_height); + let consensus_state_value = + bincode::serde::encode_to_vec(consensus_state, bincode::config::standard()) + .map_err(Error::bincode_encode)?; + let canonical_consensus_state = + self.use_mut_store(|store| store.tx_get(tx_id, &consensus_state_key))?; + if canonical_consensus_state.as_deref() != Some(consensus_state_value.as_slice()) { + return Err(Error::invalid_argument(format!( + "canonical speculative base consensus_state mismatch: client_id={} height={}-{}", + client_id, + prev_height.revision_number(), + prev_height.revision_height() + ))); + } + Ok(()) } /// `rollback_tx` rollbacks the changes in the transaction diff --git a/modules/service/src/speculative/service.rs b/modules/service/src/speculative/service.rs index 93da3dfa..0cfcbb31 100644 --- a/modules/service/src/speculative/service.rs +++ b/modules/service/src/speculative/service.rs @@ -124,6 +124,18 @@ impl SpeculativeService { }); } validate_linear_transitions(&batch.units, &results.units)?; + let first_unit = batch.units.first().ok_or_else(|| SpeculativeBatchFailure { + kind: SpeculativeBatchFailureKind::BatchSizeMismatch, + unit_id: None, + detail: "speculative batch must contain at least one unit".to_string(), + })?; + let canonical_base = base_state_payload_from_ref(&first_unit.base_state).map_err(|e| { + SpeculativeBatchFailure { + kind: SpeculativeBatchFailureKind::BaseStateMismatch, + unit_id: Some(first_unit.unit_id.clone()), + detail: e.to_string(), + } + })?; let mut merged_write_set = WriteSet::default(); let mut units = Vec::with_capacity(batch.units.len()); @@ -144,10 +156,16 @@ impl SpeculativeService { }); } app.enclave - .apply_write_set(batch.client_id.clone(), merged_write_set) + .apply_write_set_with_expected_base( + batch.client_id.clone(), + canonical_base.prev_height, + &canonical_base.client_state, + &canonical_base.consensus_state, + merged_write_set, + ) .map_err(|e| SpeculativeBatchFailure { - kind: SpeculativeBatchFailureKind::StitchApplyFailed, - unit_id: None, + kind: SpeculativeBatchFailureKind::BaseStateMismatch, + unit_id: Some(first_unit.unit_id.clone()), detail: e.to_string(), })?; @@ -245,6 +263,7 @@ mod tests { use std::thread; use std::time::Duration; use store::memory::MemStore; + use store::KVStore; struct FakeEnclave { store: Mutex, @@ -400,6 +419,38 @@ mod tests { req } + fn seed_canonical_base_state( + app: &AppService, + client_id: &str, + base_state: &ExplicitStateRef, + ) { + let prev_height = base_state.prev_height.expect("test base prev_height"); + let client_state = base_state + .client_state + .as_ref() + .expect("test base client_state"); + let consensus_state = base_state + .consensus_state + .as_ref() + .expect("test base consensus_state"); + let client_state_value = + bincode::serde::encode_to_vec(client_state, bincode::config::standard()) + .expect("encode client_state"); + let consensus_state_value = + bincode::serde::encode_to_vec(consensus_state, bincode::config::standard()) + .expect("encode consensus_state"); + app.enclave.use_mut_store(|store| { + store.set( + lcp_types::store_key::client_state_bytes(client_id), + client_state_value, + ); + store.set( + lcp_types::store_key::consensus_state_bytes(client_id, &prev_height), + consensus_state_value, + ); + }); + } + fn mk_result( prev_height: Option, prev_state_id: Option<&[u8]>, @@ -491,6 +542,54 @@ mod tests { assert_eq!(err.unit_id.as_deref(), Some("unit-0001")); } + #[test] + fn stitch_rejects_first_base_state_that_differs_from_canonical_store() { + let client_id = "07-tendermint-0"; + let enclave = FakeEnclave::new(Duration::from_millis(1)); + let app = AppService::::new("test-home", enclave); + let service = SpeculativeService::new(1); + let req = with_explicit_base_state_payload(mk_req( + "unit-0000", + client_id, + Some(Height::new(0, 10)), + None, + )); + let result = SpeculativeUpdateClientResult { + response: MsgUpdateClientResponse::default(), + write_set: WriteSet::default(), + base_state: req.base_state.clone(), + observed_transition: ObservedStateTransition { + prev_height: Some(Height::new(0, 10)), + prev_state_id: None, + post_height: Height::new(0, 11), + post_state_id: vec![1; 32], + }, + }; + + let err = service + .stitch_speculative_update_client_batch( + &app, + SpeculativeUpdateClientBatch { + client_id: client_id.to_string(), + units: vec![req], + }, + SpeculativeUpdateClientBatchResult { + client_id: client_id.to_string(), + units: vec![result], + }, + ) + .expect_err("non-canonical first base state must be rejected"); + + assert_eq!(err.kind, SpeculativeBatchFailureKind::BaseStateMismatch); + assert_eq!(err.unit_id.as_deref(), Some("unit-0000")); + assert!( + err.detail + .contains("canonical speculative base client_state mismatch"), + "unexpected error detail: {}", + err.detail + ); + } + #[test] fn streaming_speculative_batch_executes_before_input_closes() { let client_id = "07-tendermint-0"; @@ -509,27 +608,27 @@ mod tests { ) }); - tx.send(ResidentSpeculativeUpdateClientRequest::unmetered( - with_explicit_base_state_payload(SpeculativeUpdateClientRequest { - unit_id: "unit-0000".to_string(), - update: MsgUpdateClient { - client_id: client_id.to_string(), - signer: vec![0; 20], - header: Some(Any { - type_url: "/ibc.mock.Header".to_string(), - value: vec![1], - }), - ..Default::default() - }, - base_state: ExplicitStateRef { - prev_height: Some(Height::new(0, 10)), - prev_state_id: None, - client_state: None, - consensus_state: None, - }, - }), - )) - .expect("send first unit"); + let first_req = with_explicit_base_state_payload(SpeculativeUpdateClientRequest { + unit_id: "unit-0000".to_string(), + update: MsgUpdateClient { + client_id: client_id.to_string(), + signer: vec![0; 20], + header: Some(Any { + type_url: "/ibc.mock.Header".to_string(), + value: vec![1], + }), + ..Default::default() + }, + base_state: ExplicitStateRef { + prev_height: Some(Height::new(0, 10)), + prev_state_id: None, + client_state: None, + consensus_state: None, + }, + }); + seed_canonical_base_state(&app, client_id, &first_req.base_state); + tx.send(ResidentSpeculativeUpdateClientRequest::unmetered(first_req)) + .expect("send first unit"); for _ in 0..100 { if app.enclave.observed_max_in_flight() >= 1 { @@ -680,6 +779,7 @@ mod tests { signer }; } + seed_canonical_base_state(&app, client_id, &requests[0].base_state); for req in requests { tx.send(ResidentSpeculativeUpdateClientRequest::unmetered(req)) .expect("send unit"); From 9d6ea214580887efc21b888f8611ec8943e1dc83 Mon Sep 17 00:00:00 2001 From: Kiyoshi Nakao Date: Mon, 1 Jun 2026 11:32:46 +0900 Subject: [PATCH 18/48] Simplify enclave concurrency constructors --- app/src/enclave.rs | 8 +------- modules/enclave-api/src/enclave.rs | 32 +----------------------------- tests/integration/src/lib.rs | 9 ++++++++- 3 files changed, 10 insertions(+), 39 deletions(-) diff --git a/app/src/enclave.rs b/app/src/enclave.rs index f5c3fbc5..0830853a 100644 --- a/app/src/enclave.rs +++ b/app/src/enclave.rs @@ -42,13 +42,7 @@ where }; let env = host::get_environment().unwrap(); let km = EnclaveKeyManager::new(&env.home)?; - match Enclave::create_with_ecall_concurrency( - &path, - debug, - km, - env.store.clone(), - ecall_concurrency, - ) { + match Enclave::create(&path, debug, km, env.store.clone(), ecall_concurrency) { Ok(enclave) => Ok(enclave), Err(x) => { bail!( diff --git a/modules/enclave-api/src/enclave.rs b/modules/enclave-api/src/enclave.rs index 763469a2..78ea588a 100644 --- a/modules/enclave-api/src/enclave.rs +++ b/modules/enclave-api/src/enclave.rs @@ -43,21 +43,6 @@ impl Enclave { key_manager: EnclaveKeyManager, store: Arc>, sgx_enclave: SgxEnclave, - ) -> Self { - Self::new_with_ecall_concurrency( - path, - key_manager, - store, - sgx_enclave, - Self::DEFAULT_ECALL_CONCURRENCY, - ) - } - - pub fn new_with_ecall_concurrency( - path: impl Into, - key_manager: EnclaveKeyManager, - store: Arc>, - sgx_enclave: SgxEnclave, ecall_concurrency: usize, ) -> Self { Enclave { @@ -75,26 +60,11 @@ impl Enclave { debug: bool, key_manager: EnclaveKeyManager, store: Arc>, - ) -> SgxResult { - Self::create_with_ecall_concurrency( - path, - debug, - key_manager, - store, - Self::DEFAULT_ECALL_CONCURRENCY, - ) - } - - pub fn create_with_ecall_concurrency( - path: impl Into, - debug: bool, - key_manager: EnclaveKeyManager, - store: Arc>, ecall_concurrency: usize, ) -> SgxResult { let path = path.into(); let enclave = host::create_enclave(path.clone(), debug)?; - Ok(Self::new_with_ecall_concurrency( + Ok(Self::new( path, key_manager, store, diff --git a/tests/integration/src/lib.rs b/tests/integration/src/lib.rs index 15a3ec91..1b6e4f2d 100644 --- a/tests/integration/src/lib.rs +++ b/tests/integration/src/lib.rs @@ -279,7 +279,14 @@ mod tests { let env = host::get_environment().unwrap(); let km = EnclaveKeyManager::new(&env.home).unwrap(); - let enclave = Enclave::create(ENCLAVE_FILE, true, km, env.store.clone()).unwrap(); + let enclave = Enclave::create( + ENCLAVE_FILE, + true, + km, + env.store.clone(), + Enclave::<_>::DEFAULT_ECALL_CONCURRENCY, + ) + .unwrap(); test_remote_attestation(&enclave).unwrap(); From b242ae0a25bc4d2db8218205efd9795a6d7de0c9 Mon Sep 17 00:00:00 2001 From: Kiyoshi Nakao Date: Mon, 1 Jun 2026 11:37:42 +0900 Subject: [PATCH 19/48] Use serial ECALLs for CLI enclave loads --- app/src/enclave.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/app/src/enclave.rs b/app/src/enclave.rs index 0830853a..0f4cd14e 100644 --- a/app/src/enclave.rs +++ b/app/src/enclave.rs @@ -25,7 +25,8 @@ where Enclave: EnclaveProtoAPI, { fn load(&self, opts: &Opts, path: Option<&PathBuf>, debug: bool) -> Result> { - self.load_with_ecall_concurrency(opts, path, debug, Enclave::::DEFAULT_ECALL_CONCURRENCY) + // One-shot CLI commands (attestation/enclave/elc) issue ECALLs serially. + self.load_with_ecall_concurrency(opts, path, debug, 1) } fn load_with_ecall_concurrency( From b83750a059f4ca2a5056bbc4d9b630bbd09aee15 Mon Sep 17 00:00:00 2001 From: Kiyoshi Nakao Date: Mon, 1 Jun 2026 11:54:56 +0900 Subject: [PATCH 20/48] Make enclave concurrency explicit --- app/src/commands/service.rs | 11 +++++------ modules/enclave-api/src/enclave.rs | 2 -- tests/integration/src/lib.rs | 9 +-------- 3 files changed, 6 insertions(+), 16 deletions(-) diff --git a/app/src/commands/service.rs b/app/src/commands/service.rs index ed14c88e..0dc35275 100644 --- a/app/src/commands/service.rs +++ b/app/src/commands/service.rs @@ -36,12 +36,14 @@ pub struct Start { )] pub threads: Option, /// Maximum concurrent enclave ECALLs across serial and speculative paths. - /// Defaults to the current Enclave.config.xml TCS budget (4). + /// Set this to match the loaded enclave's `TCSNum`; the default assumes a + /// conservative TCS budget of 4. #[clap( long = "max-enclave-concurrency", + default_value_t = 4, help = "Maximum concurrent enclave ECALLs" )] - pub max_enclave_concurrency: Option, + pub max_enclave_concurrency: usize, /// Maximum concurrent speculative update-client requests. /// Prefer a value less than or equal to --max-enclave-concurrency; excess /// speculative workers will wait on the enclave ECALL gate. @@ -63,10 +65,7 @@ impl ServiceCmd { match self { Self::Start(cmd) => { let addr = cmd.address.parse()?; - let enclave_parallelism = cmd - .max_enclave_concurrency - .unwrap_or(Enclave::::DEFAULT_ECALL_CONCURRENCY) - .max(1); + let enclave_parallelism = cmd.max_enclave_concurrency.max(1); let enclave = enclave_loader.load_with_ecall_concurrency( opts, cmd.enclave.path.as_ref(), diff --git a/modules/enclave-api/src/enclave.rs b/modules/enclave-api/src/enclave.rs index 78ea588a..48f66d2d 100644 --- a/modules/enclave-api/src/enclave.rs +++ b/modules/enclave-api/src/enclave.rs @@ -36,8 +36,6 @@ struct ECallPermitGuard<'a> { } impl Enclave { - pub const DEFAULT_ECALL_CONCURRENCY: usize = 4; - pub fn new( path: impl Into, key_manager: EnclaveKeyManager, diff --git a/tests/integration/src/lib.rs b/tests/integration/src/lib.rs index 1b6e4f2d..f75721d5 100644 --- a/tests/integration/src/lib.rs +++ b/tests/integration/src/lib.rs @@ -279,14 +279,7 @@ mod tests { let env = host::get_environment().unwrap(); let km = EnclaveKeyManager::new(&env.home).unwrap(); - let enclave = Enclave::create( - ENCLAVE_FILE, - true, - km, - env.store.clone(), - Enclave::<_>::DEFAULT_ECALL_CONCURRENCY, - ) - .unwrap(); + let enclave = Enclave::create(ENCLAVE_FILE, true, km, env.store.clone(), 1).unwrap(); test_remote_attestation(&enclave).unwrap(); From 30a061952125027841df0f56d46ff510f9abb516 Mon Sep 17 00:00:00 2001 From: Kiyoshi Nakao Date: Mon, 1 Jun 2026 19:10:11 +0900 Subject: [PATCH 21/48] Move client update lock to ELC service --- modules/service/src/client_lock.rs | 100 +++++++++++++++++++++ modules/service/src/elc.rs | 15 ++-- modules/service/src/lib.rs | 1 + modules/service/src/service.rs | 22 ++++- modules/service/src/speculative/permit.rs | 94 +------------------ modules/service/src/speculative/service.rs | 36 +++----- 6 files changed, 142 insertions(+), 126 deletions(-) create mode 100644 modules/service/src/client_lock.rs diff --git a/modules/service/src/client_lock.rs b/modules/service/src/client_lock.rs new file mode 100644 index 00000000..6dd9bcd6 --- /dev/null +++ b/modules/service/src/client_lock.rs @@ -0,0 +1,100 @@ +use std::collections::HashMap; +use std::sync::{Arc, Mutex}; + +#[derive(Debug, Default)] +pub(crate) struct ClientUpdateLocks { + locks: Mutex>>>, +} + +impl ClientUpdateLocks { + pub(crate) fn with_client_serialized(&self, client_id: &str, f: impl FnOnce() -> T) -> T { + let lock = { + let mut locks = self.locks.lock().unwrap(); + locks + .entry(client_id.to_string()) + .or_insert_with(|| Arc::new(Mutex::new(()))) + .clone() + }; + let guard = lock.lock().unwrap(); + let result = f(); + drop(guard); + + let mut locks = self.locks.lock().unwrap(); + // strong_count == 2 means only this local `lock` binding and the map + // entry still reference the mutex, so the idle key entry can be removed. + let should_remove = Arc::strong_count(&lock) == 2 + && locks + .get(client_id) + .map(|existing| Arc::ptr_eq(existing, &lock)) + .unwrap_or(false); + if should_remove { + locks.remove(client_id); + } + result + } +} + +#[cfg(test)] +mod tests { + use super::ClientUpdateLocks; + use std::sync::atomic::{AtomicUsize, Ordering}; + use std::sync::Arc; + use std::thread; + use std::time::Duration; + + #[test] + fn client_update_locks_serialize_same_client() { + let locks = Arc::new(ClientUpdateLocks::default()); + let in_flight = Arc::new(AtomicUsize::new(0)); + let observed_max = Arc::new(AtomicUsize::new(0)); + let mut handles = Vec::new(); + + for _ in 0..6 { + let locks = locks.clone(); + let in_flight = in_flight.clone(); + let observed_max = observed_max.clone(); + handles.push(thread::spawn(move || { + locks.with_client_serialized("client-0", || { + let current = in_flight.fetch_add(1, Ordering::SeqCst) + 1; + observed_max.fetch_max(current, Ordering::SeqCst); + thread::sleep(Duration::from_millis(25)); + in_flight.fetch_sub(1, Ordering::SeqCst); + }); + })); + } + + for handle in handles { + handle.join().unwrap(); + } + + assert_eq!(observed_max.load(Ordering::SeqCst), 1); + } + + #[test] + fn client_update_locks_allow_different_clients() { + let locks = Arc::new(ClientUpdateLocks::default()); + let in_flight = Arc::new(AtomicUsize::new(0)); + let observed_max = Arc::new(AtomicUsize::new(0)); + let mut handles = Vec::new(); + + for i in 0..6 { + let locks = locks.clone(); + let in_flight = in_flight.clone(); + let observed_max = observed_max.clone(); + handles.push(thread::spawn(move || { + locks.with_client_serialized(&format!("client-{i}"), || { + let current = in_flight.fetch_add(1, Ordering::SeqCst) + 1; + observed_max.fetch_max(current, Ordering::SeqCst); + thread::sleep(Duration::from_millis(25)); + in_flight.fetch_sub(1, Ordering::SeqCst); + }); + })); + } + + for handle in handles { + handle.join().unwrap(); + } + + assert!(observed_max.load(Ordering::SeqCst) > 1); + } +} diff --git a/modules/service/src/elc.rs b/modules/service/src/elc.rs index 18a2e262..26244021 100644 --- a/modules/service/src/elc.rs +++ b/modules/service/src/elc.rs @@ -43,8 +43,7 @@ where let msg = request.into_inner(); let client_id = msg.client_id.clone(); match self - .speculative - .with_client_serialized(&client_id, || self.app.enclave.proto_update_client(msg)) + .with_client_update_serialized(&client_id, || self.app.enclave.proto_update_client(msg)) { Ok(res) => Ok(Response::new(res)), Err(e) => Err(Status::aborted(e.to_string())), @@ -110,8 +109,7 @@ where let client_id = msg.client_id.clone(); match self - .speculative - .with_client_serialized(&client_id, || self.app.enclave.proto_update_client(msg)) + .with_client_update_serialized(&client_id, || self.app.enclave.proto_update_client(msg)) { Ok(res) => Ok(Response::new(res)), Err(e) => Err(Status::aborted(e.to_string())), @@ -129,12 +127,11 @@ where let app = self.app.clone(); let speculative = self.speculative.clone(); let scheduler_client_id = client_id.clone(); + let service = self.clone(); let scheduler = tokio::task::spawn_blocking(move || { - speculative.execute_serialized_speculative_update_client_stream( - &app, - scheduler_client_id, - rx, - ) + service.with_client_update_serialized(&scheduler_client_id.clone(), || { + speculative.execute_speculative_update_client_stream(&app, scheduler_client_id, rx) + }) }); let mut decoder = SpeculativeBatchStreamDecoder::new(client_id.clone()); let header_memory_budget = diff --git a/modules/service/src/lib.rs b/modules/service/src/lib.rs index 4663a3ee..6dec822e 100644 --- a/modules/service/src/lib.rs +++ b/modules/service/src/lib.rs @@ -1,3 +1,4 @@ +mod client_lock; mod elc; mod enclave; mod service; diff --git a/modules/service/src/service.rs b/modules/service/src/service.rs index b4c86f08..2540a022 100644 --- a/modules/service/src/service.rs +++ b/modules/service/src/service.rs @@ -1,3 +1,4 @@ +use crate::client_lock::ClientUpdateLocks; use crate::speculative::SpeculativeService; use anyhow::Result; use enclave_api::{EnclaveProtoAPI, SpeculativeEnclaveCommandAPI}; @@ -28,6 +29,7 @@ where { pub(crate) app: AppService, pub(crate) speculative: SpeculativeService, + client_update_locks: Arc, } impl Clone for AppService @@ -53,6 +55,7 @@ where Self { app: self.app.clone(), speculative: self.speculative.clone(), + client_update_locks: self.client_update_locks.clone(), } } } @@ -83,7 +86,24 @@ where ) -> Self { let app = AppService::new(home, enclave); let speculative = SpeculativeService::new(speculative_concurrency_limit); - Self { app, speculative } + Self { + app, + speculative, + client_update_locks: Arc::new(ClientUpdateLocks::default()), + } + } + + pub(crate) fn with_client_update_serialized( + &self, + client_id: &str, + f: impl FnOnce() -> T, + ) -> T { + // This lock is intentionally owned by the ELC service, not by the + // speculative executor: it serializes all canonical UpdateClient writes + // for a client, including both ordinary gRPC updates and speculative + // batch stitch commits. + self.client_update_locks + .with_client_serialized(client_id, f) } } diff --git a/modules/service/src/speculative/permit.rs b/modules/service/src/speculative/permit.rs index c27319df..267a81de 100644 --- a/modules/service/src/speculative/permit.rs +++ b/modules/service/src/speculative/permit.rs @@ -1,6 +1,5 @@ use enclave_api::Error as EnclaveError; -use std::collections::HashMap; -use std::sync::{Arc, Condvar, Mutex}; +use std::sync::{Condvar, Mutex}; #[derive(Debug)] pub(super) struct PermitGate { @@ -8,11 +7,6 @@ pub(super) struct PermitGate { ready: Condvar, } -#[derive(Debug, Default)] -pub(super) struct KeyLockMap { - locks: Mutex>>>, -} - #[derive(Debug)] struct PermitGateState { available: usize, @@ -59,37 +53,9 @@ impl Drop for PermitGuard<'_> { } } -impl KeyLockMap { - pub(super) fn with_key_serialized(&self, key: &str, f: impl FnOnce() -> T) -> T { - let lock = { - let mut locks = self.locks.lock().unwrap(); - locks - .entry(key.to_string()) - .or_insert_with(|| Arc::new(Mutex::new(()))) - .clone() - }; - let guard = lock.lock().unwrap(); - let result = f(); - drop(guard); - - let mut locks = self.locks.lock().unwrap(); - // strong_count == 2 means only this local `lock` binding and the map - // entry still reference the mutex, so the idle key entry can be removed. - let should_remove = Arc::strong_count(&lock) == 2 - && locks - .get(key) - .map(|existing| Arc::ptr_eq(existing, &lock)) - .unwrap_or(false); - if should_remove { - locks.remove(key); - } - result - } -} - #[cfg(test)] mod tests { - use super::{KeyLockMap, PermitGate}; + use super::PermitGate; use std::sync::atomic::{AtomicUsize, Ordering}; use std::sync::Arc; use std::thread; @@ -124,60 +90,4 @@ mod tests { assert_eq!(observed_max.load(Ordering::SeqCst), 2); } - - #[test] - fn key_lock_map_serializes_same_key() { - let locks = Arc::new(KeyLockMap::default()); - let in_flight = Arc::new(AtomicUsize::new(0)); - let observed_max = Arc::new(AtomicUsize::new(0)); - let mut handles = Vec::new(); - - for _ in 0..6 { - let locks = locks.clone(); - let in_flight = in_flight.clone(); - let observed_max = observed_max.clone(); - handles.push(thread::spawn(move || { - locks.with_key_serialized("client-0", || { - let current = in_flight.fetch_add(1, Ordering::SeqCst) + 1; - observed_max.fetch_max(current, Ordering::SeqCst); - thread::sleep(Duration::from_millis(25)); - in_flight.fetch_sub(1, Ordering::SeqCst); - }); - })); - } - - for handle in handles { - handle.join().unwrap(); - } - - assert_eq!(observed_max.load(Ordering::SeqCst), 1); - } - - #[test] - fn key_lock_map_allows_different_keys() { - let locks = Arc::new(KeyLockMap::default()); - let in_flight = Arc::new(AtomicUsize::new(0)); - let observed_max = Arc::new(AtomicUsize::new(0)); - let mut handles = Vec::new(); - - for i in 0..6 { - let locks = locks.clone(); - let in_flight = in_flight.clone(); - let observed_max = observed_max.clone(); - handles.push(thread::spawn(move || { - locks.with_key_serialized(&format!("client-{i}"), || { - let current = in_flight.fetch_add(1, Ordering::SeqCst) + 1; - observed_max.fetch_max(current, Ordering::SeqCst); - thread::sleep(Duration::from_millis(25)); - in_flight.fetch_sub(1, Ordering::SeqCst); - }); - })); - } - - for handle in handles { - handle.join().unwrap(); - } - - assert!(observed_max.load(Ordering::SeqCst) > 1); - } } diff --git a/modules/service/src/speculative/service.rs b/modules/service/src/speculative/service.rs index 0cfcbb31..0d2654fa 100644 --- a/modules/service/src/speculative/service.rs +++ b/modules/service/src/speculative/service.rs @@ -1,5 +1,5 @@ -use super::permit::{KeyLockMap, PermitGate}; -use super::scheduler::execute_speculative_update_client_stream; +use super::permit::PermitGate; +use super::scheduler::execute_speculative_update_client_stream as execute_stream_scheduler; use super::stream::ResidentSpeculativeUpdateClientRequest; use super::types::{ ExplicitStateRef, ObservedStateTransition, SpeculativeBatchFailure, @@ -22,7 +22,6 @@ use store::transaction::{CommitStore, TxAccessor}; use store::WriteSet; pub struct SpeculativeService { - key_locks: Arc, speculative_concurrency_limit: usize, speculative_request_permits: Arc, } @@ -30,7 +29,6 @@ pub struct SpeculativeService { impl Clone for SpeculativeService { fn clone(&self) -> Self { Self { - key_locks: self.key_locks.clone(), speculative_concurrency_limit: self.speculative_concurrency_limit, speculative_request_permits: self.speculative_request_permits.clone(), } @@ -40,7 +38,6 @@ impl Clone for SpeculativeService { impl SpeculativeService { pub fn new(speculative_concurrency_limit: usize) -> Self { Self { - key_locks: Arc::new(KeyLockMap::default()), speculative_concurrency_limit: speculative_concurrency_limit.max(1), speculative_request_permits: Arc::new(PermitGate::new(speculative_concurrency_limit)), } @@ -50,12 +47,6 @@ impl SpeculativeService { self.speculative_concurrency_limit } - pub fn with_client_serialized(&self, client_id: &str, f: impl FnOnce() -> T) -> T { - // Keep client-key serialization outside the speculative execution/stitch - // body so all canonical writes for one client are ordered. - self.key_locks.with_key_serialized(client_id, f) - } - #[allow(clippy::result_large_err)] pub fn with_speculative_request_permit( &self, @@ -175,7 +166,7 @@ impl SpeculativeService { }) } - pub(crate) fn execute_serialized_speculative_update_client_stream( + pub(crate) fn execute_speculative_update_client_stream( &self, app: &AppService, client_id: String, @@ -185,15 +176,12 @@ impl SpeculativeService { S: CommitStore + TxAccessor + Send + 'static, E: EnclaveProtoAPI + SpeculativeEnclaveCommandAPI + Send + Sync + 'static, { - self.with_client_serialized(&client_id.clone(), || { - let batch_result = - execute_speculative_update_client_stream(self, app, client_id.clone(), units)?; - let batch = SpeculativeUpdateClientBatch { - client_id, - units: batch_result.requests, - }; - self.stitch_speculative_update_client_batch(app, batch, batch_result.results) - }) + let batch_result = execute_stream_scheduler(self, app, client_id.clone(), units)?; + let batch = SpeculativeUpdateClientBatch { + client_id, + units: batch_result.requests, + }; + self.stitch_speculative_update_client_batch(app, batch, batch_result.results) } } @@ -601,7 +589,7 @@ mod tests { let worker_app = app.clone(); let client_id_for_worker = client_id.to_string(); let handle = thread::spawn(move || { - worker_service.execute_serialized_speculative_update_client_stream( + worker_service.execute_speculative_update_client_stream( &worker_app, client_id_for_worker, rx, @@ -691,7 +679,7 @@ mod tests { let worker_app = app.clone(); let client_id_for_worker = client_id.to_string(); let handle = thread::spawn(move || { - worker_service.execute_serialized_speculative_update_client_stream( + worker_service.execute_speculative_update_client_stream( &worker_app, client_id_for_worker, rx, @@ -745,7 +733,7 @@ mod tests { let worker_app = app.clone(); let client_id_for_worker = client_id.to_string(); let handle = thread::spawn(move || { - worker_service.execute_serialized_speculative_update_client_stream( + worker_service.execute_speculative_update_client_stream( &worker_app, client_id_for_worker, rx, From be397eff68f0cd992781cc25fd88a77ca46623c2 Mon Sep 17 00:00:00 2001 From: Kiyoshi Nakao Date: Mon, 1 Jun 2026 19:25:00 +0900 Subject: [PATCH 22/48] Run serial update clients on blocking pool --- modules/service/src/elc.rs | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/modules/service/src/elc.rs b/modules/service/src/elc.rs index 26244021..408f4a5b 100644 --- a/modules/service/src/elc.rs +++ b/modules/service/src/elc.rs @@ -42,9 +42,15 @@ where ) -> Result, Status> { let msg = request.into_inner(); let client_id = msg.client_id.clone(); - match self - .with_client_update_serialized(&client_id, || self.app.enclave.proto_update_client(msg)) - { + let service = self.clone(); + let result = tokio::task::spawn_blocking(move || { + service.with_client_update_serialized(&client_id, || { + service.app.enclave.proto_update_client(msg) + }) + }) + .await + .map_err(|e| Status::aborted(format!("update client worker failed: {e}")))?; + match result { Ok(res) => Ok(Response::new(res)), Err(e) => Err(Status::aborted(e.to_string())), } @@ -108,9 +114,15 @@ where }; let client_id = msg.client_id.clone(); - match self - .with_client_update_serialized(&client_id, || self.app.enclave.proto_update_client(msg)) - { + let service = self.clone(); + let result = tokio::task::spawn_blocking(move || { + service.with_client_update_serialized(&client_id, || { + service.app.enclave.proto_update_client(msg) + }) + }) + .await + .map_err(|e| Status::aborted(format!("update client stream worker failed: {e}")))?; + match result { Ok(res) => Ok(Response::new(res)), Err(e) => Err(Status::aborted(e.to_string())), } From ccf38b81d4a5baf425ed1658d3fbb496942c1fcf Mon Sep 17 00:00:00 2001 From: Kiyoshi Nakao Date: Tue, 2 Jun 2026 14:54:56 +0900 Subject: [PATCH 23/48] Clear speculative headers before releasing budget --- modules/service/src/speculative/scheduler.rs | 9 +-------- modules/service/src/speculative/stream.rs | 5 ++++- 2 files changed, 5 insertions(+), 9 deletions(-) diff --git a/modules/service/src/speculative/scheduler.rs b/modules/service/src/speculative/scheduler.rs index f19bed2b..6620f48e 100644 --- a/modules/service/src/speculative/scheduler.rs +++ b/modules/service/src/speculative/scheduler.rs @@ -201,12 +201,6 @@ impl StreamingSchedulerState { } } -fn clear_request_header_payload(req: &mut SpeculativeUpdateClientRequest) { - if let Some(header) = req.update.header.as_mut() { - header.value.clear(); - } -} - fn streaming_speculative_worker( speculative: &SpeculativeService, app: &AppService, @@ -265,8 +259,7 @@ fn streaming_speculative_worker( state.in_flight -= 1; match result { Ok(result) => { - let mut req = req.into_request(); - clear_request_header_payload(&mut req); + let req = req.into_request_without_header_payload(); state.complete_unit(index, req, result); } Err(e) => { diff --git a/modules/service/src/speculative/stream.rs b/modules/service/src/speculative/stream.rs index d45bed68..43e62d5b 100644 --- a/modules/service/src/speculative/stream.rs +++ b/modules/service/src/speculative/stream.rs @@ -177,7 +177,10 @@ impl ResidentSpeculativeUpdateClientRequest { &self.request } - pub(crate) fn into_request(self) -> SpeculativeUpdateClientRequest { + pub(crate) fn into_request_without_header_payload(mut self) -> SpeculativeUpdateClientRequest { + if let Some(header) = self.request.update.header.as_mut() { + header.value.clear(); + } self.request } From 0e1df3d63e6424851c495228986ebe32a62bd4cd Mon Sep 17 00:00:00 2001 From: Kiyoshi Nakao Date: Tue, 2 Jun 2026 14:57:17 +0900 Subject: [PATCH 24/48] Reject empty speculative consensus state type --- modules/enclave-api/src/api/command.rs | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/modules/enclave-api/src/api/command.rs b/modules/enclave-api/src/api/command.rs index 594e177a..46026ce5 100644 --- a/modules/enclave-api/src/api/command.rs +++ b/modules/enclave-api/src/api/command.rs @@ -185,10 +185,11 @@ fn compute_seed_write_set(client_id: &str, base_state: &SpeculativeBaseState) -> bincode::serde::encode_to_vec(&base_state.client_state, bincode::config::standard()) .map_err(crate::errors::Error::bincode_encode)?; - debug_assert!( - !base_state.consensus_state.type_url.is_empty(), - "seeded consensus state should carry a concrete type" - ); + if base_state.consensus_state.type_url.is_empty() { + return Err(crate::errors::Error::invalid_argument( + "speculative base_state consensus_state type_url must not be empty".to_string(), + )); + } let consensus_state_key = store_key::consensus_state_bytes(client_id, &base_state.prev_height); let consensus_state_value = bincode::serde::encode_to_vec(&base_state.consensus_state, bincode::config::standard()) @@ -279,4 +280,21 @@ mod tests { Some(&Some(computed_client_state_value)) ); } + #[test] + fn speculative_update_client_rejects_empty_seeded_consensus_state_type_url() { + let client_id = "07-tendermint-0"; + let base_state = SpeculativeBaseState { + consensus_state: any("", b"consensus-10"), + ..base_state() + }; + + let err = compute_seed_write_set(client_id, &base_state) + .expect_err("empty consensus_state type_url must be rejected"); + + assert!( + err.to_string() + .contains("speculative base_state consensus_state type_url must not be empty"), + "unexpected error: {err}" + ); + } } From 8ad018637b2314276a53e845aad7ad4f4d277810 Mon Sep 17 00:00:00 2001 From: Kiyoshi Nakao Date: Tue, 2 Jun 2026 15:00:24 +0900 Subject: [PATCH 25/48] Preserve first speculative scheduler failure --- modules/service/src/speculative/scheduler.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/service/src/speculative/scheduler.rs b/modules/service/src/speculative/scheduler.rs index 6620f48e..96b9f39a 100644 --- a/modules/service/src/speculative/scheduler.rs +++ b/modules/service/src/speculative/scheduler.rs @@ -263,7 +263,7 @@ fn streaming_speculative_worker( state.complete_unit(index, req, result); } Err(e) => { - state.failure = Some(e); + state.failure.get_or_insert(e); } } shared.ready.notify_all(); From 8f3acd3a86cf1c05a544dc528765ccb655e87872 Mon Sep 17 00:00:00 2001 From: Kiyoshi Nakao Date: Fri, 5 Jun 2026 16:16:57 +0900 Subject: [PATCH 26/48] service: validate speculative base by stored height --- modules/enclave-api/src/enclave.rs | 46 +++- modules/service/src/speculative/service.rs | 275 ++++++++++++++++++++- modules/types/src/lib.rs | 5 +- modules/types/src/store_key.rs | 17 ++ 4 files changed, 323 insertions(+), 20 deletions(-) diff --git a/modules/enclave-api/src/enclave.rs b/modules/enclave-api/src/enclave.rs index 48f66d2d..41eeb31b 100644 --- a/modules/enclave-api/src/enclave.rs +++ b/modules/enclave-api/src/enclave.rs @@ -188,9 +188,14 @@ pub trait HostStoreTxManager: CommitStoreAccessor { } /// `apply_write_set_with_expected_base` applies a speculative write set only if the - /// canonical store still matches the explicit base state that seeded the batch. + /// store already contains the explicit base state that seeded the batch at + /// `prev_height`. + /// /// The check and apply run under the same serialized update transaction keyed by - /// `update_key`, so the canonical base cannot change between verification and commit. + /// `update_key`, so the accepted base cannot change between verification and commit. + /// Historical client-state entries are preferred. The latest client-state key is + /// accepted as a bootstrap fallback for stores created before the historical index + /// existed, or for the first speculative batch whose base is still the latest state. fn apply_write_set_with_expected_base( &self, update_key: UpdateKey, @@ -245,17 +250,36 @@ pub trait HostStoreTxManager: CommitStoreAccessor { where S: TxAccessor, { - let client_state_key = store_key::client_state_bytes(client_id); + let historical_client_state_key = + store_key::client_state_at_height_bytes(client_id, prev_height); + let latest_client_state_key = store_key::client_state_bytes(client_id); let client_state_value = bincode::serde::encode_to_vec(client_state, bincode::config::standard()) .map_err(Error::bincode_encode)?; - let canonical_client_state = - self.use_mut_store(|store| store.tx_get(tx_id, &client_state_key))?; - if canonical_client_state.as_deref() != Some(client_state_value.as_slice()) { - return Err(Error::invalid_argument(format!( - "canonical speculative base client_state mismatch: client_id={}", - client_id - ))); + let stored_historical_client_state = + self.use_mut_store(|store| store.tx_get(tx_id, &historical_client_state_key))?; + let stored_latest_client_state = + self.use_mut_store(|store| store.tx_get(tx_id, &latest_client_state_key))?; + match stored_historical_client_state.as_deref() { + Some(stored) if stored == client_state_value.as_slice() => {} + Some(_) => { + return Err(Error::invalid_argument(format!( + "stored speculative base client_state mismatch: client_id={} height={}-{}", + client_id, + prev_height.revision_number(), + prev_height.revision_height() + ))); + } + None if stored_latest_client_state.as_deref() + == Some(client_state_value.as_slice()) => {} + None => { + return Err(Error::invalid_argument(format!( + "stored speculative base client_state mismatch: client_id={} height={}-{}", + client_id, + prev_height.revision_number(), + prev_height.revision_height() + ))); + } } let consensus_state_key = store_key::consensus_state_bytes(client_id, prev_height); @@ -266,7 +290,7 @@ pub trait HostStoreTxManager: CommitStoreAccessor { self.use_mut_store(|store| store.tx_get(tx_id, &consensus_state_key))?; if canonical_consensus_state.as_deref() != Some(consensus_state_value.as_slice()) { return Err(Error::invalid_argument(format!( - "canonical speculative base consensus_state mismatch: client_id={} height={}-{}", + "stored speculative base consensus_state mismatch: client_id={} height={}-{}", client_id, prev_height.revision_number(), prev_height.revision_height() diff --git a/modules/service/src/speculative/service.rs b/modules/service/src/speculative/service.rs index 0d2654fa..1b8ce883 100644 --- a/modules/service/src/speculative/service.rs +++ b/modules/service/src/speculative/service.rs @@ -120,7 +120,7 @@ impl SpeculativeService { unit_id: None, detail: "speculative batch must contain at least one unit".to_string(), })?; - let canonical_base = base_state_payload_from_ref(&first_unit.base_state).map_err(|e| { + let first_base = base_state_payload_from_ref(&first_unit.base_state).map_err(|e| { SpeculativeBatchFailure { kind: SpeculativeBatchFailureKind::BaseStateMismatch, unit_id: Some(first_unit.unit_id.clone()), @@ -141,6 +141,16 @@ impl SpeculativeService { for (key, value) in result.write_set { merged_write_set.insert(key, value); } + insert_historical_base_state_write( + &mut merged_write_set, + &batch.client_id, + &req.base_state, + ) + .map_err(|e| SpeculativeBatchFailure { + kind: SpeculativeBatchFailureKind::BaseStateMismatch, + unit_id: Some(req.unit_id.clone()), + detail: e.to_string(), + })?; units.push(StitchedUpdateClientResult { response: result.response, observed_transition: result.observed_transition, @@ -149,9 +159,9 @@ impl SpeculativeService { app.enclave .apply_write_set_with_expected_base( batch.client_id.clone(), - canonical_base.prev_height, - &canonical_base.client_state, - &canonical_base.consensus_state, + first_base.prev_height, + &first_base.client_state, + &first_base.consensus_state, merged_write_set, ) .map_err(|e| SpeculativeBatchFailure { @@ -185,6 +195,22 @@ impl SpeculativeService { } } +fn insert_historical_base_state_write( + write_set: &mut WriteSet, + client_id: &str, + base_state: &ExplicitStateRef, +) -> core::result::Result<(), EnclaveError> { + let base_state = base_state_payload_from_ref(base_state)?; + let client_state_value = + bincode::serde::encode_to_vec(&base_state.client_state, bincode::config::standard()) + .map_err(EnclaveError::bincode_encode)?; + write_set.insert( + lcp_types::store_key::client_state_at_height_bytes(client_id, &base_state.prev_height), + Some(client_state_value), + ); + Ok(()) +} + #[allow(clippy::result_large_err)] fn base_state_payload_from_ref( base_state: &ExplicitStateRef, @@ -430,6 +456,10 @@ mod tests { app.enclave.use_mut_store(|store| { store.set( lcp_types::store_key::client_state_bytes(client_id), + client_state_value.clone(), + ); + store.set( + lcp_types::store_key::client_state_at_height_bytes(client_id, &prev_height), client_state_value, ); store.set( @@ -531,7 +561,7 @@ mod tests { } #[test] - fn stitch_rejects_first_base_state_that_differs_from_canonical_store() { + fn stitch_rejects_first_base_state_that_is_not_in_store() { let client_id = "07-tendermint-0"; let enclave = FakeEnclave::new(Duration::from_millis(1)); let app = AppService::::new("test-home", enclave); @@ -566,13 +596,244 @@ mod tests { units: vec![result], }, ) - .expect_err("non-canonical first base state must be rejected"); + .expect_err("unknown first base state must be rejected"); + + assert_eq!(err.kind, SpeculativeBatchFailureKind::BaseStateMismatch); + assert_eq!(err.unit_id.as_deref(), Some("unit-0000")); + assert!( + err.detail + .contains("stored speculative base client_state mismatch"), + "unexpected error detail: {}", + err.detail + ); + } + + #[test] + fn stitch_accepts_historical_first_base_state_that_is_not_latest() { + let client_id = "07-tendermint-0"; + let enclave = FakeEnclave::new(Duration::from_millis(1)); + let app = AppService::::new("test-home", enclave); + let service = SpeculativeService::new(1); + let req = with_explicit_base_state_payload(mk_req( + "unit-0000", + client_id, + Some(Height::new(0, 10)), + None, + )); + let prev_height = req.base_state.prev_height.expect("test base prev_height"); + let client_state = req + .base_state + .client_state + .as_ref() + .expect("test base client_state"); + let consensus_state = req + .base_state + .consensus_state + .as_ref() + .expect("test base consensus_state"); + let historical_client_state_value = + bincode::serde::encode_to_vec(client_state, bincode::config::standard()) + .expect("encode historical client_state"); + let consensus_state_value = + bincode::serde::encode_to_vec(consensus_state, bincode::config::standard()) + .expect("encode consensus_state"); + let latest_client_state_value = bincode::serde::encode_to_vec( + &Any { + type_url: "/ibc.mock.ClientState".to_string(), + value: vec![9], + }, + bincode::config::standard(), + ) + .expect("encode latest client_state"); + app.enclave.use_mut_store(|store| { + store.set( + lcp_types::store_key::client_state_bytes(client_id), + latest_client_state_value, + ); + store.set( + lcp_types::store_key::client_state_at_height_bytes(client_id, &prev_height), + historical_client_state_value, + ); + store.set( + lcp_types::store_key::consensus_state_bytes(client_id, &prev_height), + consensus_state_value, + ); + }); + let result = SpeculativeUpdateClientResult { + response: MsgUpdateClientResponse::default(), + write_set: WriteSet::default(), + base_state: req.base_state.clone(), + observed_transition: ObservedStateTransition { + prev_height: Some(prev_height), + prev_state_id: None, + post_height: Height::new(0, 11), + post_state_id: vec![1; 32], + }, + }; + + service + .stitch_speculative_update_client_batch( + &app, + SpeculativeUpdateClientBatch { + client_id: client_id.to_string(), + units: vec![req], + }, + SpeculativeUpdateClientBatchResult { + client_id: client_id.to_string(), + units: vec![result], + }, + ) + .expect("historical first base state should be accepted"); + } + + #[test] + fn stitch_accepts_latest_first_base_state_when_historical_is_absent() { + let client_id = "07-tendermint-0"; + let enclave = FakeEnclave::new(Duration::from_millis(1)); + let app = AppService::::new("test-home", enclave); + let service = SpeculativeService::new(1); + let req = with_explicit_base_state_payload(mk_req( + "unit-0000", + client_id, + Some(Height::new(0, 10)), + None, + )); + let prev_height = req.base_state.prev_height.expect("test base prev_height"); + let client_state = req + .base_state + .client_state + .as_ref() + .expect("test base client_state"); + let consensus_state = req + .base_state + .consensus_state + .as_ref() + .expect("test base consensus_state"); + let latest_client_state_value = + bincode::serde::encode_to_vec(client_state, bincode::config::standard()) + .expect("encode latest client_state"); + let consensus_state_value = + bincode::serde::encode_to_vec(consensus_state, bincode::config::standard()) + .expect("encode consensus_state"); + app.enclave.use_mut_store(|store| { + store.set( + lcp_types::store_key::client_state_bytes(client_id), + latest_client_state_value, + ); + store.set( + lcp_types::store_key::consensus_state_bytes(client_id, &prev_height), + consensus_state_value, + ); + }); + let result = SpeculativeUpdateClientResult { + response: MsgUpdateClientResponse::default(), + write_set: WriteSet::default(), + base_state: req.base_state.clone(), + observed_transition: ObservedStateTransition { + prev_height: Some(prev_height), + prev_state_id: None, + post_height: Height::new(0, 11), + post_state_id: vec![1; 32], + }, + }; + + service + .stitch_speculative_update_client_batch( + &app, + SpeculativeUpdateClientBatch { + client_id: client_id.to_string(), + units: vec![req], + }, + SpeculativeUpdateClientBatchResult { + client_id: client_id.to_string(), + units: vec![result], + }, + ) + .expect("latest first base state should bootstrap when historical state is absent"); + } + + #[test] + fn stitch_rejects_mismatched_historical_first_base_state_even_if_latest_matches() { + let client_id = "07-tendermint-0"; + let enclave = FakeEnclave::new(Duration::from_millis(1)); + let app = AppService::::new("test-home", enclave); + let service = SpeculativeService::new(1); + let req = with_explicit_base_state_payload(mk_req( + "unit-0000", + client_id, + Some(Height::new(0, 10)), + None, + )); + let prev_height = req.base_state.prev_height.expect("test base prev_height"); + let client_state = req + .base_state + .client_state + .as_ref() + .expect("test base client_state"); + let consensus_state = req + .base_state + .consensus_state + .as_ref() + .expect("test base consensus_state"); + let latest_client_state_value = + bincode::serde::encode_to_vec(client_state, bincode::config::standard()) + .expect("encode latest client_state"); + let mismatched_historical_client_state_value = bincode::serde::encode_to_vec( + &Any { + type_url: "/ibc.mock.ClientState".to_string(), + value: vec![9], + }, + bincode::config::standard(), + ) + .expect("encode historical client_state"); + let consensus_state_value = + bincode::serde::encode_to_vec(consensus_state, bincode::config::standard()) + .expect("encode consensus_state"); + app.enclave.use_mut_store(|store| { + store.set( + lcp_types::store_key::client_state_bytes(client_id), + latest_client_state_value, + ); + store.set( + lcp_types::store_key::client_state_at_height_bytes(client_id, &prev_height), + mismatched_historical_client_state_value, + ); + store.set( + lcp_types::store_key::consensus_state_bytes(client_id, &prev_height), + consensus_state_value, + ); + }); + let result = SpeculativeUpdateClientResult { + response: MsgUpdateClientResponse::default(), + write_set: WriteSet::default(), + base_state: req.base_state.clone(), + observed_transition: ObservedStateTransition { + prev_height: Some(prev_height), + prev_state_id: None, + post_height: Height::new(0, 11), + post_state_id: vec![1; 32], + }, + }; + + let err = service + .stitch_speculative_update_client_batch( + &app, + SpeculativeUpdateClientBatch { + client_id: client_id.to_string(), + units: vec![req], + }, + SpeculativeUpdateClientBatchResult { + client_id: client_id.to_string(), + units: vec![result], + }, + ) + .expect_err("mismatched historical base state should be rejected"); assert_eq!(err.kind, SpeculativeBatchFailureKind::BaseStateMismatch); assert_eq!(err.unit_id.as_deref(), Some("unit-0000")); assert!( err.detail - .contains("canonical speculative base client_state mismatch"), + .contains("stored speculative base client_state mismatch"), "unexpected error detail: {}", err.detail ); diff --git a/modules/types/src/lib.rs b/modules/types/src/lib.rs index 214d5360..953a1de1 100644 --- a/modules/types/src/lib.rs +++ b/modules/types/src/lib.rs @@ -11,8 +11,9 @@ pub use host::ClientId; pub use lcp_proto as proto; pub use sgx::{EnclaveMetadata, Mrenclave}; pub use store_key::{ - client_state as client_state_key, client_state_bytes, client_type as client_type_key, - client_type_bytes, consensus_state as consensus_state_key, consensus_state_bytes, + client_state as client_state_key, client_state_at_height, client_state_at_height_bytes, + client_state_bytes, client_type as client_type_key, client_type_bytes, + consensus_state as consensus_state_key, consensus_state_bytes, }; pub use time::{nanos_to_duration, Time, MAX_UNIX_TIMESTAMP_NANOS}; pub use transmuter::{deserialize_bytes, serialize_bytes, BytesTransmuter}; diff --git a/modules/types/src/store_key.rs b/modules/types/src/store_key.rs index 91194d7b..8b905718 100644 --- a/modules/types/src/store_key.rs +++ b/modules/types/src/store_key.rs @@ -8,6 +8,15 @@ pub fn client_state(client_id: &str) -> String { format!("clients/{client_id}/clientState") } +pub fn client_state_at_height(client_id: &str, height: &Height) -> String { + format!( + "clients/{}/clientStates/{}-{}", + client_id, + height.revision_number(), + height.revision_height() + ) +} + pub fn consensus_state(client_id: &str, height: &Height) -> String { format!( "clients/{}/consensusStates/{}-{}", @@ -25,6 +34,10 @@ pub fn client_state_bytes(client_id: &str) -> Vec { client_state(client_id).into_bytes() } +pub fn client_state_at_height_bytes(client_id: &str, height: &Height) -> Vec { + client_state_at_height(client_id, height).into_bytes() +} + pub fn consensus_state_bytes(client_id: &str, height: &Height) -> Vec { consensus_state(client_id, height).into_bytes() } @@ -44,6 +57,10 @@ mod tests { client_state("07-tendermint-0"), "clients/07-tendermint-0/clientState" ); + assert_eq!( + client_state_at_height("07-tendermint-0", &height), + "clients/07-tendermint-0/clientStates/1-23" + ); assert_eq!( consensus_state("07-tendermint-0", &height), "clients/07-tendermint-0/consensusStates/1-23" From 6142089907577f4418f680d3e91c19e2b4233c82 Mon Sep 17 00:00:00 2001 From: Kiyoshi Nakao Date: Fri, 5 Jun 2026 18:42:08 +0900 Subject: [PATCH 27/48] service: validate speculative base by stored state id --- .../src/light_client/init_client.rs | 4 +- .../src/light_client/update_client.rs | 4 +- modules/enclave-api/src/enclave.rs | 64 ++-- modules/light-client/src/context.rs | 18 ++ modules/service/src/speculative/service.rs | 281 ++++++++---------- modules/types/src/lib.rs | 6 +- modules/types/src/store_key.rs | 16 +- 7 files changed, 195 insertions(+), 198 deletions(-) diff --git a/enclave-modules/ecall-handler/src/light_client/init_client.rs b/enclave-modules/ecall-handler/src/light_client/init_client.rs index 6877c002..c899c994 100644 --- a/enclave-modules/ecall-handler/src/light_client/init_client.rs +++ b/enclave-modules/ecall-handler/src/light_client/init_client.rs @@ -6,7 +6,7 @@ use crypto::Signer; use ecall_commands::{InitClientInput, InitClientResponse, LightClientResponse}; use lcp_types::ClientId; use light_client::commitments::{prove_commitment, CommitmentProof}; -use light_client::{ClientKeeper, ClientReader, LightClientResolver}; +use light_client::{commitments::gen_state_id_from_any, ClientKeeper, ClientReader, LightClientResolver}; use store::KVStore; pub fn init_client( @@ -34,9 +34,11 @@ pub fn init_client( if ctx.client_exists(&client_id) { return Err(Error::client_already_exists(client_id.to_string())); } + let state_id = gen_state_id_from_any(&input.any_client_state, &input.any_consensus_state)?; ctx.store_client_type(client_id.clone(), client_type)?; ctx.store_any_client_state(client_id.clone(), input.any_client_state)?; ctx.store_any_consensus_state(client_id.clone(), res.height, input.any_consensus_state)?; + ctx.store_state_id(client_id.clone(), res.height, state_id)?; let proof = if res.prove { prove_commitment(ek, res.message)? diff --git a/enclave-modules/ecall-handler/src/light_client/update_client.rs b/enclave-modules/ecall-handler/src/light_client/update_client.rs index 12905ea2..1780f788 100644 --- a/enclave-modules/ecall-handler/src/light_client/update_client.rs +++ b/enclave-modules/ecall-handler/src/light_client/update_client.rs @@ -16,6 +16,7 @@ pub fn update_client( let ek = ctx.get_enclave_key(); match lc.update_client(ctx, input.client_id.clone(), input.any_header)? { UpdateClientResult::UpdateState(mut data) => { + let post_state_id = data.message.post_state_id; let message: ProxyMessage = { if input.include_state && data.message.emitted_states.is_empty() { data.message.emitted_states = @@ -26,10 +27,11 @@ pub fn update_client( ctx.store_any_client_state(input.client_id.clone(), data.new_any_client_state)?; ctx.store_any_consensus_state( - input.client_id, + input.client_id.clone(), data.height, data.new_any_consensus_state, )?; + ctx.store_state_id(input.client_id, data.height, post_state_id)?; let proof = if data.prove { prove_commitment(ek, message)? diff --git a/modules/enclave-api/src/enclave.rs b/modules/enclave-api/src/enclave.rs index 41eeb31b..07330ba6 100644 --- a/modules/enclave-api/src/enclave.rs +++ b/modules/enclave-api/src/enclave.rs @@ -193,15 +193,18 @@ pub trait HostStoreTxManager: CommitStoreAccessor { /// /// The check and apply run under the same serialized update transaction keyed by /// `update_key`, so the accepted base cannot change between verification and commit. - /// Historical client-state entries are preferred. The latest client-state key is - /// accepted as a bootstrap fallback for stores created before the historical index - /// existed, or for the first speculative batch whose base is still the latest state. + /// The explicit base client state is not looked up by height. Instead, its + /// state ID must match the height-indexed state ID previously stored by a + /// successful serial/speculative update. This keeps the canonical store model + /// aligned with serial UpdateClient: latest client_state plus height-indexed + /// consensus_states and compact state_ids. fn apply_write_set_with_expected_base( &self, update_key: UpdateKey, prev_height: Height, client_state: &Any, consensus_state: &Any, + prev_state_id: Option<&[u8]>, write_set: WriteSet, ) -> Result<()> where @@ -215,6 +218,7 @@ pub trait HostStoreTxManager: CommitStoreAccessor { &prev_height, client_state, consensus_state, + prev_state_id, ) { self.rollback_tx(tx); return Err(e); @@ -244,44 +248,13 @@ pub trait HostStoreTxManager: CommitStoreAccessor { tx_id: store::TxId, client_id: &str, prev_height: &Height, - client_state: &Any, + _client_state: &Any, consensus_state: &Any, + prev_state_id: Option<&[u8]>, ) -> Result<()> where S: TxAccessor, { - let historical_client_state_key = - store_key::client_state_at_height_bytes(client_id, prev_height); - let latest_client_state_key = store_key::client_state_bytes(client_id); - let client_state_value = - bincode::serde::encode_to_vec(client_state, bincode::config::standard()) - .map_err(Error::bincode_encode)?; - let stored_historical_client_state = - self.use_mut_store(|store| store.tx_get(tx_id, &historical_client_state_key))?; - let stored_latest_client_state = - self.use_mut_store(|store| store.tx_get(tx_id, &latest_client_state_key))?; - match stored_historical_client_state.as_deref() { - Some(stored) if stored == client_state_value.as_slice() => {} - Some(_) => { - return Err(Error::invalid_argument(format!( - "stored speculative base client_state mismatch: client_id={} height={}-{}", - client_id, - prev_height.revision_number(), - prev_height.revision_height() - ))); - } - None if stored_latest_client_state.as_deref() - == Some(client_state_value.as_slice()) => {} - None => { - return Err(Error::invalid_argument(format!( - "stored speculative base client_state mismatch: client_id={} height={}-{}", - client_id, - prev_height.revision_number(), - prev_height.revision_height() - ))); - } - } - let consensus_state_key = store_key::consensus_state_bytes(client_id, prev_height); let consensus_state_value = bincode::serde::encode_to_vec(consensus_state, bincode::config::standard()) @@ -296,6 +269,25 @@ pub trait HostStoreTxManager: CommitStoreAccessor { prev_height.revision_height() ))); } + + let prev_state_id = prev_state_id.ok_or_else(|| { + Error::invalid_argument(format!( + "speculative update_client must provide prev_state_id: client_id={} height={}-{}", + client_id, + prev_height.revision_number(), + prev_height.revision_height() + )) + })?; + let state_id_key = store_key::state_id_bytes(client_id, prev_height); + let stored_state_id = self.use_mut_store(|store| store.tx_get(tx_id, &state_id_key))?; + if stored_state_id.as_deref() != Some(prev_state_id) { + return Err(Error::invalid_argument(format!( + "stored speculative base state_id mismatch: client_id={} height={}-{}", + client_id, + prev_height.revision_number(), + prev_height.revision_height() + ))); + } Ok(()) } diff --git a/modules/light-client/src/context.rs b/modules/light-client/src/context.rs index eb335c72..59c04938 100644 --- a/modules/light-client/src/context.rs +++ b/modules/light-client/src/context.rs @@ -1,5 +1,6 @@ use crate::types::{Any, ClientId, Height, Time}; use crate::{errors::Error, prelude::*}; +use commitments::StateID; use lcp_types::store_key; use store::KVStore; @@ -95,6 +96,23 @@ pub trait ClientKeeper: ClientReader { ); Ok(()) } + + /// Called upon successful client creation and update to index the state ID + /// for the state at `height`. This keeps historical base validation compact: + /// client_state remains latest-only while consensus_state and state_id are + /// height-indexed. + fn store_state_id( + &mut self, + client_id: ClientId, + height: Height, + state_id: StateID, + ) -> Result<(), Error> { + self.set( + store_key::state_id_bytes(client_id.as_str(), &height), + state_id.to_vec(), + ); + Ok(()) + } } pub trait HostClientReader: HostContext + ClientReader {} diff --git a/modules/service/src/speculative/service.rs b/modules/service/src/speculative/service.rs index 1b8ce883..80e669d7 100644 --- a/modules/service/src/speculative/service.rs +++ b/modules/service/src/speculative/service.rs @@ -127,6 +127,10 @@ impl SpeculativeService { detail: e.to_string(), } })?; + let first_prev_state_id = results + .units + .first() + .and_then(|unit| unit.observed_transition.prev_state_id.clone()); let mut merged_write_set = WriteSet::default(); let mut units = Vec::with_capacity(batch.units.len()); @@ -141,16 +145,6 @@ impl SpeculativeService { for (key, value) in result.write_set { merged_write_set.insert(key, value); } - insert_historical_base_state_write( - &mut merged_write_set, - &batch.client_id, - &req.base_state, - ) - .map_err(|e| SpeculativeBatchFailure { - kind: SpeculativeBatchFailureKind::BaseStateMismatch, - unit_id: Some(req.unit_id.clone()), - detail: e.to_string(), - })?; units.push(StitchedUpdateClientResult { response: result.response, observed_transition: result.observed_transition, @@ -162,6 +156,7 @@ impl SpeculativeService { first_base.prev_height, &first_base.client_state, &first_base.consensus_state, + first_prev_state_id.as_deref(), merged_write_set, ) .map_err(|e| SpeculativeBatchFailure { @@ -195,22 +190,6 @@ impl SpeculativeService { } } -fn insert_historical_base_state_write( - write_set: &mut WriteSet, - client_id: &str, - base_state: &ExplicitStateRef, -) -> core::result::Result<(), EnclaveError> { - let base_state = base_state_payload_from_ref(base_state)?; - let client_state_value = - bincode::serde::encode_to_vec(&base_state.client_state, bincode::config::standard()) - .map_err(EnclaveError::bincode_encode)?; - write_set.insert( - lcp_types::store_key::client_state_at_height_bytes(client_id, &base_state.prev_height), - Some(client_state_value), - ); - Ok(()) -} - #[allow(clippy::result_large_err)] fn base_state_payload_from_ref( base_state: &ExplicitStateRef, @@ -259,7 +238,9 @@ fn decode_observed_transition( #[cfg(test)] mod tests { use super::*; - use commitments::{CommitmentProof, StateID, UpdateStateProxyMessage, ValidationContext}; + use commitments::{ + gen_state_id_from_any, CommitmentProof, StateID, UpdateStateProxyMessage, ValidationContext, + }; use ecall_commands::UpdateClientResponse as EnclaveUpdateClientResponse; use enclave_api::{ CommitStoreAccessor, EnclaveCommandAPI, EnclaveInfo, EnclavePrimitiveAPI, EnclaveProtoAPI, @@ -357,11 +338,19 @@ mod tests { self.current_in_flight.fetch_sub(1, Ordering::SeqCst); let prev_height = Some(input.base_state.prev_height); - let prev_state_id = (idx > 0).then(|| { + let prev_state_id = if idx == 0 { + Some( + gen_state_id_from_any( + &input.base_state.client_state, + &input.base_state.consensus_state, + ) + .expect("test prev_state_id"), + ) + } else { let mut prev_state_id = [0u8; 32]; prev_state_id[31] = idx as u8; - StateID::from(prev_state_id) - }); + Some(StateID::from(prev_state_id)) + }; let mut post_state_id = [0u8; 32]; post_state_id[31] = (idx as u8) + 1; let message = ProxyMessage::from(UpdateStateProxyMessage { @@ -453,22 +442,38 @@ mod tests { let consensus_state_value = bincode::serde::encode_to_vec(consensus_state, bincode::config::standard()) .expect("encode consensus_state"); + let state_id = state_id_for_base_state(base_state); app.enclave.use_mut_store(|store| { store.set( lcp_types::store_key::client_state_bytes(client_id), - client_state_value.clone(), - ); - store.set( - lcp_types::store_key::client_state_at_height_bytes(client_id, &prev_height), client_state_value, ); store.set( lcp_types::store_key::consensus_state_bytes(client_id, &prev_height), consensus_state_value, ); + store.set( + lcp_types::store_key::state_id_bytes(client_id, &prev_height), + state_id, + ); }); } + fn state_id_for_base_state(base_state: &ExplicitStateRef) -> Vec { + gen_state_id_from_any( + base_state + .client_state + .as_ref() + .expect("test base client_state"), + base_state + .consensus_state + .as_ref() + .expect("test base consensus_state"), + ) + .expect("compute test state_id") + .to_vec() + } + fn mk_result( prev_height: Option, prev_state_id: Option<&[u8]>, @@ -596,76 +601,41 @@ mod tests { units: vec![result], }, ) - .expect_err("unknown first base state must be rejected"); + .expect_err("unknown first base consensus state must be rejected"); assert_eq!(err.kind, SpeculativeBatchFailureKind::BaseStateMismatch); assert_eq!(err.unit_id.as_deref(), Some("unit-0000")); assert!( err.detail - .contains("stored speculative base client_state mismatch"), + .contains("stored speculative base consensus_state mismatch"), "unexpected error detail: {}", err.detail ); } #[test] - fn stitch_accepts_historical_first_base_state_that_is_not_latest() { + fn stitch_accepts_first_base_state_when_stored_consensus_and_state_id_match() { let client_id = "07-tendermint-0"; let enclave = FakeEnclave::new(Duration::from_millis(1)); let app = AppService::::new("test-home", enclave); let service = SpeculativeService::new(1); - let req = with_explicit_base_state_payload(mk_req( + let mut req = with_explicit_base_state_payload(mk_req( "unit-0000", client_id, Some(Height::new(0, 10)), None, )); let prev_height = req.base_state.prev_height.expect("test base prev_height"); - let client_state = req - .base_state - .client_state - .as_ref() - .expect("test base client_state"); - let consensus_state = req - .base_state - .consensus_state - .as_ref() - .expect("test base consensus_state"); - let historical_client_state_value = - bincode::serde::encode_to_vec(client_state, bincode::config::standard()) - .expect("encode historical client_state"); - let consensus_state_value = - bincode::serde::encode_to_vec(consensus_state, bincode::config::standard()) - .expect("encode consensus_state"); - let latest_client_state_value = bincode::serde::encode_to_vec( - &Any { - type_url: "/ibc.mock.ClientState".to_string(), - value: vec![9], - }, - bincode::config::standard(), - ) - .expect("encode latest client_state"); - app.enclave.use_mut_store(|store| { - store.set( - lcp_types::store_key::client_state_bytes(client_id), - latest_client_state_value, - ); - store.set( - lcp_types::store_key::client_state_at_height_bytes(client_id, &prev_height), - historical_client_state_value, - ); - store.set( - lcp_types::store_key::consensus_state_bytes(client_id, &prev_height), - consensus_state_value, - ); - }); + let prev_state_id = state_id_for_base_state(&req.base_state); + req.base_state.prev_state_id = Some(prev_state_id.clone()); + seed_canonical_base_state(&app, client_id, &req.base_state); let result = SpeculativeUpdateClientResult { response: MsgUpdateClientResponse::default(), write_set: WriteSet::default(), base_state: req.base_state.clone(), observed_transition: ObservedStateTransition { prev_height: Some(prev_height), - prev_state_id: None, + prev_state_id: Some(prev_state_id), post_height: Height::new(0, 11), post_state_id: vec![1; 32], }, @@ -683,11 +653,11 @@ mod tests { units: vec![result], }, ) - .expect("historical first base state should be accepted"); + .expect("stored consensus state and matching state_id should be accepted"); } #[test] - fn stitch_accepts_latest_first_base_state_when_historical_is_absent() { + fn stitch_rejects_first_base_state_when_prev_state_id_is_missing() { let client_id = "07-tendermint-0"; let enclave = FakeEnclave::new(Duration::from_millis(1)); let app = AppService::::new("test-home", enclave); @@ -699,32 +669,7 @@ mod tests { None, )); let prev_height = req.base_state.prev_height.expect("test base prev_height"); - let client_state = req - .base_state - .client_state - .as_ref() - .expect("test base client_state"); - let consensus_state = req - .base_state - .consensus_state - .as_ref() - .expect("test base consensus_state"); - let latest_client_state_value = - bincode::serde::encode_to_vec(client_state, bincode::config::standard()) - .expect("encode latest client_state"); - let consensus_state_value = - bincode::serde::encode_to_vec(consensus_state, bincode::config::standard()) - .expect("encode consensus_state"); - app.enclave.use_mut_store(|store| { - store.set( - lcp_types::store_key::client_state_bytes(client_id), - latest_client_state_value, - ); - store.set( - lcp_types::store_key::consensus_state_bytes(client_id, &prev_height), - consensus_state_value, - ); - }); + seed_canonical_base_state(&app, client_id, &req.base_state); let result = SpeculativeUpdateClientResult { response: MsgUpdateClientResponse::default(), write_set: WriteSet::default(), @@ -737,7 +682,7 @@ mod tests { }, }; - service + let err = service .stitch_speculative_update_client_batch( &app, SpeculativeUpdateClientBatch { @@ -749,59 +694,39 @@ mod tests { units: vec![result], }, ) - .expect("latest first base state should bootstrap when historical state is absent"); + .expect_err("missing first prev_state_id should be rejected"); + + assert_eq!(err.kind, SpeculativeBatchFailureKind::BaseStateMismatch); + assert_eq!(err.unit_id.as_deref(), Some("unit-0000")); + assert!( + err.detail + .contains("speculative update_client must provide prev_state_id"), + "unexpected error detail: {}", + err.detail + ); } #[test] - fn stitch_rejects_mismatched_historical_first_base_state_even_if_latest_matches() { + fn stitch_rejects_first_base_state_when_stored_state_id_is_missing() { let client_id = "07-tendermint-0"; let enclave = FakeEnclave::new(Duration::from_millis(1)); let app = AppService::::new("test-home", enclave); let service = SpeculativeService::new(1); - let req = with_explicit_base_state_payload(mk_req( + let mut req = with_explicit_base_state_payload(mk_req( "unit-0000", client_id, Some(Height::new(0, 10)), None, )); let prev_height = req.base_state.prev_height.expect("test base prev_height"); - let client_state = req - .base_state - .client_state - .as_ref() - .expect("test base client_state"); - let consensus_state = req - .base_state - .consensus_state - .as_ref() - .expect("test base consensus_state"); - let latest_client_state_value = - bincode::serde::encode_to_vec(client_state, bincode::config::standard()) - .expect("encode latest client_state"); - let mismatched_historical_client_state_value = bincode::serde::encode_to_vec( - &Any { - type_url: "/ibc.mock.ClientState".to_string(), - value: vec![9], - }, - bincode::config::standard(), - ) - .expect("encode historical client_state"); - let consensus_state_value = - bincode::serde::encode_to_vec(consensus_state, bincode::config::standard()) - .expect("encode consensus_state"); + let prev_state_id = state_id_for_base_state(&req.base_state); + req.base_state.prev_state_id = Some(prev_state_id.clone()); + seed_canonical_base_state(&app, client_id, &req.base_state); app.enclave.use_mut_store(|store| { - store.set( - lcp_types::store_key::client_state_bytes(client_id), - latest_client_state_value, - ); - store.set( - lcp_types::store_key::client_state_at_height_bytes(client_id, &prev_height), - mismatched_historical_client_state_value, - ); - store.set( - lcp_types::store_key::consensus_state_bytes(client_id, &prev_height), - consensus_state_value, - ); + store.remove(&lcp_types::store_key::state_id_bytes( + client_id, + &prev_height, + )); }); let result = SpeculativeUpdateClientResult { response: MsgUpdateClientResponse::default(), @@ -809,7 +734,57 @@ mod tests { base_state: req.base_state.clone(), observed_transition: ObservedStateTransition { prev_height: Some(prev_height), - prev_state_id: None, + prev_state_id: Some(prev_state_id), + post_height: Height::new(0, 11), + post_state_id: vec![1; 32], + }, + }; + + let err = service + .stitch_speculative_update_client_batch( + &app, + SpeculativeUpdateClientBatch { + client_id: client_id.to_string(), + units: vec![req], + }, + SpeculativeUpdateClientBatchResult { + client_id: client_id.to_string(), + units: vec![result], + }, + ) + .expect_err("missing stored state_id should be rejected"); + + assert_eq!(err.kind, SpeculativeBatchFailureKind::BaseStateMismatch); + assert_eq!(err.unit_id.as_deref(), Some("unit-0000")); + assert!( + err.detail + .contains("stored speculative base state_id mismatch"), + "unexpected error detail: {}", + err.detail + ); + } + + #[test] + fn stitch_rejects_first_base_state_when_state_id_mismatch() { + let client_id = "07-tendermint-0"; + let enclave = FakeEnclave::new(Duration::from_millis(1)); + let app = AppService::::new("test-home", enclave); + let service = SpeculativeService::new(1); + let req = with_explicit_base_state_payload(mk_req( + "unit-0000", + client_id, + Some(Height::new(0, 10)), + None, + )); + let prev_height = req.base_state.prev_height.expect("test base prev_height"); + seed_canonical_base_state(&app, client_id, &req.base_state); + let result = SpeculativeUpdateClientResult { + response: MsgUpdateClientResponse::default(), + write_set: WriteSet::default(), + base_state: req.base_state.clone(), + observed_transition: ObservedStateTransition { + prev_height: Some(prev_height), + prev_state_id: Some(vec![9; 32]), post_height: Height::new(0, 11), post_state_id: vec![1; 32], }, @@ -827,13 +802,13 @@ mod tests { units: vec![result], }, ) - .expect_err("mismatched historical base state should be rejected"); + .expect_err("mismatched first base state_id should be rejected"); assert_eq!(err.kind, SpeculativeBatchFailureKind::BaseStateMismatch); assert_eq!(err.unit_id.as_deref(), Some("unit-0000")); assert!( err.detail - .contains("stored speculative base client_state mismatch"), + .contains("stored speculative base state_id mismatch"), "unexpected error detail: {}", err.detail ); @@ -857,7 +832,7 @@ mod tests { ) }); - let first_req = with_explicit_base_state_payload(SpeculativeUpdateClientRequest { + let mut first_req = with_explicit_base_state_payload(SpeculativeUpdateClientRequest { unit_id: "unit-0000".to_string(), update: MsgUpdateClient { client_id: client_id.to_string(), @@ -875,6 +850,7 @@ mod tests { consensus_state: None, }, }); + first_req.base_state.prev_state_id = Some(state_id_for_base_state(&first_req.base_state)); seed_canonical_base_state(&app, client_id, &first_req.base_state); tx.send(ResidentSpeculativeUpdateClientRequest::unmetered(first_req)) .expect("send first unit"); @@ -1027,6 +1003,13 @@ mod tests { signer[19] = i as u8; signer }; + if i == 0 { + req.base_state.prev_state_id = Some(state_id_for_base_state(&req.base_state)); + } else { + let mut prev_state_id = vec![0; 32]; + prev_state_id[31] = i as u8; + req.base_state.prev_state_id = Some(prev_state_id); + } } seed_canonical_base_state(&app, client_id, &requests[0].base_state); for req in requests { diff --git a/modules/types/src/lib.rs b/modules/types/src/lib.rs index 953a1de1..d7407ced 100644 --- a/modules/types/src/lib.rs +++ b/modules/types/src/lib.rs @@ -11,9 +11,9 @@ pub use host::ClientId; pub use lcp_proto as proto; pub use sgx::{EnclaveMetadata, Mrenclave}; pub use store_key::{ - client_state as client_state_key, client_state_at_height, client_state_at_height_bytes, - client_state_bytes, client_type as client_type_key, client_type_bytes, - consensus_state as consensus_state_key, consensus_state_bytes, + client_state as client_state_key, client_state_bytes, client_type as client_type_key, + client_type_bytes, consensus_state as consensus_state_key, consensus_state_bytes, state_id, + state_id_bytes, }; pub use time::{nanos_to_duration, Time, MAX_UNIX_TIMESTAMP_NANOS}; pub use transmuter::{deserialize_bytes, serialize_bytes, BytesTransmuter}; diff --git a/modules/types/src/store_key.rs b/modules/types/src/store_key.rs index 8b905718..b6183deb 100644 --- a/modules/types/src/store_key.rs +++ b/modules/types/src/store_key.rs @@ -8,9 +8,9 @@ pub fn client_state(client_id: &str) -> String { format!("clients/{client_id}/clientState") } -pub fn client_state_at_height(client_id: &str, height: &Height) -> String { +pub fn state_id(client_id: &str, height: &Height) -> String { format!( - "clients/{}/clientStates/{}-{}", + "clients/{}/stateIds/{}-{}", client_id, height.revision_number(), height.revision_height() @@ -34,8 +34,8 @@ pub fn client_state_bytes(client_id: &str) -> Vec { client_state(client_id).into_bytes() } -pub fn client_state_at_height_bytes(client_id: &str, height: &Height) -> Vec { - client_state_at_height(client_id, height).into_bytes() +pub fn state_id_bytes(client_id: &str, height: &Height) -> Vec { + state_id(client_id, height).into_bytes() } pub fn consensus_state_bytes(client_id: &str, height: &Height) -> Vec { @@ -57,13 +57,13 @@ mod tests { client_state("07-tendermint-0"), "clients/07-tendermint-0/clientState" ); - assert_eq!( - client_state_at_height("07-tendermint-0", &height), - "clients/07-tendermint-0/clientStates/1-23" - ); assert_eq!( consensus_state("07-tendermint-0", &height), "clients/07-tendermint-0/consensusStates/1-23" ); + assert_eq!( + state_id("07-tendermint-0", &height), + "clients/07-tendermint-0/stateIds/1-23" + ); } } From bfe621d4514d04af297f9a50caf64a27d0b2ff85 Mon Sep 17 00:00:00 2001 From: Kiyoshi Nakao Date: Mon, 8 Jun 2026 10:50:50 +0900 Subject: [PATCH 28/48] service: store light-client state id on init --- .../ecall-handler/src/light_client/init_client.rs | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/enclave-modules/ecall-handler/src/light_client/init_client.rs b/enclave-modules/ecall-handler/src/light_client/init_client.rs index c899c994..29023a31 100644 --- a/enclave-modules/ecall-handler/src/light_client/init_client.rs +++ b/enclave-modules/ecall-handler/src/light_client/init_client.rs @@ -5,8 +5,8 @@ use core::str::FromStr; use crypto::Signer; use ecall_commands::{InitClientInput, InitClientResponse, LightClientResponse}; use lcp_types::ClientId; -use light_client::commitments::{prove_commitment, CommitmentProof}; -use light_client::{commitments::gen_state_id_from_any, ClientKeeper, ClientReader, LightClientResolver}; +use light_client::commitments::{prove_commitment, CommitmentProof, ProxyMessage}; +use light_client::{ClientKeeper, ClientReader, LightClientResolver}; use store::KVStore; pub fn init_client( @@ -34,7 +34,15 @@ pub fn init_client( if ctx.client_exists(&client_id) { return Err(Error::client_already_exists(client_id.to_string())); } - let state_id = gen_state_id_from_any(&input.any_client_state, &input.any_consensus_state)?; + let state_id = match &res.message { + ProxyMessage::UpdateState(message) => message.post_state_id, + message => { + return Err(Error::invalid_argument(format!( + "create_client must return update-state message: actual_type={}", + message.message_type() + ))) + } + }; ctx.store_client_type(client_id.clone(), client_type)?; ctx.store_any_client_state(client_id.clone(), input.any_client_state)?; ctx.store_any_consensus_state(client_id.clone(), res.height, input.any_consensus_state)?; From 55ae24194f66c036ce215e46f176f5b0a5ae8e14 Mon Sep 17 00:00:00 2001 From: Kiyoshi Nakao Date: Tue, 9 Jun 2026 15:07:58 +0900 Subject: [PATCH 29/48] Verify speculative base client state --- modules/enclave-api/src/enclave.rs | 21 +++++-- modules/service/src/speculative/service.rs | 71 +++++++++++++++++++++- 2 files changed, 85 insertions(+), 7 deletions(-) diff --git a/modules/enclave-api/src/enclave.rs b/modules/enclave-api/src/enclave.rs index 07330ba6..0ef539ac 100644 --- a/modules/enclave-api/src/enclave.rs +++ b/modules/enclave-api/src/enclave.rs @@ -1,4 +1,5 @@ use crate::errors::{Error, Result}; +use commitments::gen_state_id_from_any; use keymanager::EnclaveKeyManager; use lcp_types::{store_key, Any, EnclaveMetadata, Height}; use sgx_types::{sgx_enclave_id_t, SgxResult}; @@ -193,10 +194,11 @@ pub trait HostStoreTxManager: CommitStoreAccessor { /// /// The check and apply run under the same serialized update transaction keyed by /// `update_key`, so the accepted base cannot change between verification and commit. - /// The explicit base client state is not looked up by height. Instead, its - /// state ID must match the height-indexed state ID previously stored by a - /// successful serial/speculative update. This keeps the canonical store model - /// aligned with serial UpdateClient: latest client_state plus height-indexed + /// The explicit base client state is not looked up by height. Instead, the + /// caller-supplied `(client_state, consensus_state)` pair must re-derive the + /// height-indexed state ID previously stored by a successful + /// serial/speculative update. This keeps the canonical store model aligned + /// with serial UpdateClient: latest client_state plus height-indexed /// consensus_states and compact state_ids. fn apply_write_set_with_expected_base( &self, @@ -248,7 +250,7 @@ pub trait HostStoreTxManager: CommitStoreAccessor { tx_id: store::TxId, client_id: &str, prev_height: &Height, - _client_state: &Any, + client_state: &Any, consensus_state: &Any, prev_state_id: Option<&[u8]>, ) -> Result<()> @@ -278,6 +280,15 @@ pub trait HostStoreTxManager: CommitStoreAccessor { prev_height.revision_height() )) })?; + let expected_prev_state_id = gen_state_id_from_any(client_state, consensus_state)?.to_vec(); + if expected_prev_state_id.as_slice() != prev_state_id { + return Err(Error::invalid_argument(format!( + "speculative base state_id does not match client_state/consensus_state: client_id={} height={}-{}", + client_id, + prev_height.revision_number(), + prev_height.revision_height() + ))); + } let state_id_key = store_key::state_id_bytes(client_id, prev_height); let stored_state_id = self.use_mut_store(|store| store.tx_get(tx_id, &state_id_key))?; if stored_state_id.as_deref() != Some(prev_state_id) { diff --git a/modules/service/src/speculative/service.rs b/modules/service/src/speculative/service.rs index 80e669d7..6d22f466 100644 --- a/modules/service/src/speculative/service.rs +++ b/modules/service/src/speculative/service.rs @@ -770,21 +770,29 @@ mod tests { let enclave = FakeEnclave::new(Duration::from_millis(1)); let app = AppService::::new("test-home", enclave); let service = SpeculativeService::new(1); - let req = with_explicit_base_state_payload(mk_req( + let mut req = with_explicit_base_state_payload(mk_req( "unit-0000", client_id, Some(Height::new(0, 10)), None, )); let prev_height = req.base_state.prev_height.expect("test base prev_height"); + let prev_state_id = state_id_for_base_state(&req.base_state); + req.base_state.prev_state_id = Some(prev_state_id.clone()); seed_canonical_base_state(&app, client_id, &req.base_state); + app.enclave.use_mut_store(|store| { + store.set( + lcp_types::store_key::state_id_bytes(client_id, &prev_height), + vec![9; 32], + ); + }); let result = SpeculativeUpdateClientResult { response: MsgUpdateClientResponse::default(), write_set: WriteSet::default(), base_state: req.base_state.clone(), observed_transition: ObservedStateTransition { prev_height: Some(prev_height), - prev_state_id: Some(vec![9; 32]), + prev_state_id: Some(prev_state_id), post_height: Height::new(0, 11), post_state_id: vec![1; 32], }, @@ -814,6 +822,65 @@ mod tests { ); } + #[test] + fn stitch_rejects_first_base_state_when_client_state_does_not_match_state_id() { + let client_id = "07-tendermint-0"; + let enclave = FakeEnclave::new(Duration::from_millis(1)); + let app = AppService::::new("test-home", enclave); + let service = SpeculativeService::new(1); + let mut req = with_explicit_base_state_payload(mk_req( + "unit-0000", + client_id, + Some(Height::new(0, 10)), + None, + )); + let prev_height = req.base_state.prev_height.expect("test base prev_height"); + let prev_state_id = state_id_for_base_state(&req.base_state); + seed_canonical_base_state(&app, client_id, &req.base_state); + req.base_state.prev_state_id = Some(prev_state_id.clone()); + req.base_state.client_state = Some( + Any { + type_url: "/ibc.mock.ClientState".to_string(), + value: vec![9], + } + .into(), + ); + let result = SpeculativeUpdateClientResult { + response: MsgUpdateClientResponse::default(), + write_set: WriteSet::default(), + base_state: req.base_state.clone(), + observed_transition: ObservedStateTransition { + prev_height: Some(prev_height), + prev_state_id: Some(prev_state_id), + post_height: Height::new(0, 11), + post_state_id: vec![1; 32], + }, + }; + + let err = service + .stitch_speculative_update_client_batch( + &app, + SpeculativeUpdateClientBatch { + client_id: client_id.to_string(), + units: vec![req], + }, + SpeculativeUpdateClientBatchResult { + client_id: client_id.to_string(), + units: vec![result], + }, + ) + .expect_err("client_state inconsistent with state_id should be rejected"); + + assert_eq!(err.kind, SpeculativeBatchFailureKind::BaseStateMismatch); + assert_eq!(err.unit_id.as_deref(), Some("unit-0000")); + assert!( + err.detail + .contains("speculative base state_id does not match client_state/consensus_state"), + "unexpected error detail: {}", + err.detail + ); + } + #[test] fn streaming_speculative_batch_executes_before_input_closes() { let client_id = "07-tendermint-0"; From 4f49954a3319eb98d5d014f786020f18db55e6f9 Mon Sep 17 00:00:00 2001 From: Kiyoshi Nakao Date: Tue, 9 Jun 2026 15:10:41 +0900 Subject: [PATCH 30/48] Add idle timeout to speculative stream --- modules/service/src/elc.rs | 29 +++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/modules/service/src/elc.rs b/modules/service/src/elc.rs index 408f4a5b..2135dd39 100644 --- a/modules/service/src/elc.rs +++ b/modules/service/src/elc.rs @@ -15,11 +15,15 @@ use lcp_proto::lcp::service::elc::v1::{ MsgVerifyNonMembership, MsgVerifyNonMembershipResponse, QueryClientRequest, QueryClientResponse, }; -use log::debug; +use log::{debug, warn}; use std::sync::mpsc; +use std::time::Duration; use store::transaction::{CommitStore, TxAccessor}; +use tokio::time::timeout; use tonic::{Request, Response, Status, Streaming}; +const SPECULATIVE_BATCH_STREAM_IDLE_TIMEOUT: Duration = Duration::from_secs(60); + #[tonic::async_trait] impl Msg for ElcService where @@ -150,7 +154,28 @@ where SpeculativeHeaderMemoryBudget::new(MAX_SPECULATIVE_BATCH_HEADER_BYTES); let mut units = 0usize; - while let Some(chunk_msg) = stream.message().await? { + loop { + let chunk_msg = match timeout(SPECULATIVE_BATCH_STREAM_IDLE_TIMEOUT, stream.message()) + .await + { + Ok(result) => result?, + Err(_) => { + warn!( + "speculative update client batch stream idle timeout: client_id={} timeout_secs={}", + client_id, + SPECULATIVE_BATCH_STREAM_IDLE_TIMEOUT.as_secs() + ); + drop(tx); + let _ = scheduler.await; + return Err(Status::deadline_exceeded(format!( + "speculative update client batch stream idle timeout after {} seconds", + SPECULATIVE_BATCH_STREAM_IDLE_TIMEOUT.as_secs() + ))); + } + }; + let Some(chunk_msg) = chunk_msg else { + break; + }; let header_memory = header_memory_budget.reserve_for_chunk(&chunk_msg).await?; if let Some(unit) = decoder.push_chunk(chunk_msg.chunk, header_memory)? { units += 1; From b46a2eb18de63110c1bbca12b418aebe0c2f715a Mon Sep 17 00:00:00 2001 From: Kiyoshi Nakao Date: Tue, 9 Jun 2026 15:25:49 +0900 Subject: [PATCH 31/48] Align memstore speculative write extraction --- modules/service/src/elc.rs | 3 + modules/store/src/memory.rs | 107 ++++++++++++++++++++++++++++++------ 2 files changed, 93 insertions(+), 17 deletions(-) diff --git a/modules/service/src/elc.rs b/modules/service/src/elc.rs index 2135dd39..fbaccd84 100644 --- a/modules/service/src/elc.rs +++ b/modules/service/src/elc.rs @@ -139,6 +139,9 @@ where let mut stream = request.into_inner(); let init = decode_speculative_batch_stream_init(&mut stream).await?; let client_id = init.client_id; + // This channel is intentionally unbounded: resident header bytes are + // bounded by `SpeculativeHeaderMemoryBudget`, which is the actual + // backpressure mechanism for large speculative batch inputs. let (tx, rx) = mpsc::channel(); let app = self.app.clone(); let speculative = self.speculative.clone(); diff --git a/modules/store/src/memory.rs b/modules/store/src/memory.rs index 950d6ee5..3e7983a4 100644 --- a/modules/store/src/memory.rs +++ b/modules/store/src/memory.rs @@ -47,6 +47,10 @@ impl CommitStore for MemStore { self.0.lock().unwrap().create_transaction(_update_key) } + fn create_speculative_transaction(&mut self) -> Result { + self.0.lock().unwrap().create_speculative_transaction() + } + fn begin(&mut self, tx: &::PreparedTx) -> Result<()> { self.0.lock().unwrap().begin(tx) } @@ -66,7 +70,7 @@ impl CommitStore for MemStore { #[derive(Default, Debug)] pub struct InnerMemStore { - running_tx_exists: bool, + running_tx_kind: Option, latest_tx_id: TxId, uncommitted_data: HashMap, Option>>, committed_data: HashMap, Vec>, @@ -74,7 +78,7 @@ pub struct InnerMemStore { impl KVStore for InnerMemStore { fn get(&self, key: &[u8]) -> Option> { - if self.running_tx_exists { + if self.running_tx_kind.is_some() { match self.uncommitted_data.get(key) { Some(v) => v.clone(), None => self.committed_data.get(key).map(|v| v.to_vec()), @@ -85,7 +89,7 @@ impl KVStore for InnerMemStore { } fn set(&mut self, key: Vec, value: Vec) { - if self.running_tx_exists { + if self.running_tx_kind.is_some() { self.uncommitted_data.insert(key, Some(value)); } else { self.committed_data.insert(key, value); @@ -93,7 +97,7 @@ impl KVStore for InnerMemStore { } fn remove(&mut self, key: &[u8]) { - if self.running_tx_exists { + if self.running_tx_kind.is_some() { self.uncommitted_data.insert(key.to_vec(), None); } else { self.committed_data.remove(key); @@ -123,18 +127,29 @@ impl CommitStore for InnerMemStore { _update_key: Option, ) -> Result { self.latest_tx_id.safe_incr()?; - Ok(MemTx(self.latest_tx_id)) + Ok(MemTx { + id: self.latest_tx_id, + kind: MemTxKind::Regular, + }) + } + + fn create_speculative_transaction(&mut self) -> Result { + self.latest_tx_id.safe_incr()?; + Ok(MemTx { + id: self.latest_tx_id, + kind: MemTxKind::Speculative, + }) } - fn begin(&mut self, _tx: &::PreparedTx) -> Result<()> { - assert!(!self.running_tx_exists); - self.running_tx_exists = true; + fn begin(&mut self, tx: &::PreparedTx) -> Result<()> { + assert!(self.running_tx_kind.is_none()); + self.running_tx_kind = Some(tx.kind); Ok(()) } fn commit(&mut self, _tx: ::PreparedTx) -> Result<()> { - assert!(self.running_tx_exists); - self.running_tx_exists = false; + assert!(self.running_tx_kind.is_some()); + self.running_tx_kind = None; let data = HashMap::, Option>>::default(); let uncommitted_data = std::mem::replace(&mut self.uncommitted_data, data); for it in uncommitted_data { @@ -146,26 +161,40 @@ impl CommitStore for InnerMemStore { Ok(()) } - fn take_write_set(&mut self, _tx: ::PreparedTx) -> Result { - assert!(self.running_tx_exists); - self.running_tx_exists = false; + fn take_write_set(&mut self, tx: ::PreparedTx) -> Result { + assert!(self.running_tx_kind.is_some()); + self.running_tx_kind = None; let data = HashMap::, Option>>::default(); let uncommitted_data = std::mem::replace(&mut self.uncommitted_data, data); + if tx.kind != MemTxKind::Speculative { + return Err(crate::Error::not_supported_operation( + "take_write_set is only available for speculative transactions".to_string(), + )); + } Ok(uncommitted_data.into_iter().collect()) } fn rollback(&mut self, _tx: ::PreparedTx) { - assert!(self.running_tx_exists); - self.running_tx_exists = false; + assert!(self.running_tx_kind.is_some()); + self.running_tx_kind = None; self.uncommitted_data.clear(); } } -pub struct MemTx(TxId); +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +enum MemTxKind { + Regular, + Speculative, +} + +pub struct MemTx { + id: TxId, + kind: MemTxKind, +} impl Tx for MemTx { fn get_id(&self) -> TxId { - self.0 + self.id } } @@ -176,3 +205,47 @@ impl CreatedTx for MemTx { Ok(self) } } + +#[cfg(test)] +mod tests { + use super::*; + + fn key(i: u8) -> Vec { + vec![i] + } + + fn value(i: u8) -> Vec { + vec![i + 10] + } + + #[test] + fn take_write_set_requires_speculative_transaction() { + let mut store = InnerMemStore::default(); + let tx = store.create_transaction(None).unwrap().prepare().unwrap(); + store.begin(&tx).unwrap(); + store.tx_set(tx.get_id(), key(1), value(1)).unwrap(); + + assert!(store.take_write_set(tx).is_err()); + assert_eq!(store.get(&key(1)), None); + } + + #[test] + fn take_write_set_extracts_speculative_writes_without_commit() { + let mut store = InnerMemStore::default(); + store.set(key(0), value(0)); + let tx = store + .create_speculative_transaction() + .unwrap() + .prepare() + .unwrap(); + store.begin(&tx).unwrap(); + store.tx_set(tx.get_id(), key(1), value(1)).unwrap(); + store.tx_remove(tx.get_id(), &key(0)).unwrap(); + + let writes = store.take_write_set(tx).unwrap(); + assert_eq!(writes.get(&key(1)), Some(&Some(value(1)))); + assert_eq!(writes.get(&key(0)), Some(&None)); + assert_eq!(store.get(&key(1)), None); + assert_eq!(store.get(&key(0)), Some(value(0))); + } +} From a7673df89a9f0cd85b2bee26f64b3eee6d51caee Mon Sep 17 00:00:00 2001 From: Kiyoshi Nakao Date: Tue, 9 Jun 2026 17:15:04 +0900 Subject: [PATCH 32/48] Reject stale speculative base client state --- modules/enclave-api/src/enclave.rs | 25 ++++-- modules/service/src/speculative/service.rs | 92 ++++++++++++++++++++++ 2 files changed, 112 insertions(+), 5 deletions(-) diff --git a/modules/enclave-api/src/enclave.rs b/modules/enclave-api/src/enclave.rs index 0ef539ac..79dfc93f 100644 --- a/modules/enclave-api/src/enclave.rs +++ b/modules/enclave-api/src/enclave.rs @@ -194,12 +194,12 @@ pub trait HostStoreTxManager: CommitStoreAccessor { /// /// The check and apply run under the same serialized update transaction keyed by /// `update_key`, so the accepted base cannot change between verification and commit. - /// The explicit base client state is not looked up by height. Instead, the - /// caller-supplied `(client_state, consensus_state)` pair must re-derive the + /// The explicit base client state must match the latest canonical + /// client_state. This prevents an old, historically valid base state from + /// overwriting a newer latest-only client_state. The caller-supplied + /// `(client_state, consensus_state)` pair must also re-derive the /// height-indexed state ID previously stored by a successful - /// serial/speculative update. This keeps the canonical store model aligned - /// with serial UpdateClient: latest client_state plus height-indexed - /// consensus_states and compact state_ids. + /// serial/speculative update. fn apply_write_set_with_expected_base( &self, update_key: UpdateKey, @@ -257,6 +257,21 @@ pub trait HostStoreTxManager: CommitStoreAccessor { where S: TxAccessor, { + let client_state_key = store_key::client_state_bytes(client_id); + let client_state_value = + bincode::serde::encode_to_vec(client_state, bincode::config::standard()) + .map_err(Error::bincode_encode)?; + let canonical_client_state = + self.use_mut_store(|store| store.tx_get(tx_id, &client_state_key))?; + if canonical_client_state.as_deref() != Some(client_state_value.as_slice()) { + return Err(Error::invalid_argument(format!( + "stored speculative base client_state mismatch: client_id={} height={}-{}", + client_id, + prev_height.revision_number(), + prev_height.revision_height() + ))); + } + let consensus_state_key = store_key::consensus_state_bytes(client_id, prev_height); let consensus_state_value = bincode::serde::encode_to_vec(consensus_state, bincode::config::standard()) diff --git a/modules/service/src/speculative/service.rs b/modules/service/src/speculative/service.rs index 6d22f466..c875b590 100644 --- a/modules/service/src/speculative/service.rs +++ b/modules/service/src/speculative/service.rs @@ -474,6 +474,22 @@ mod tests { .to_vec() } + fn set_canonical_client_state( + app: &AppService, + client_id: &str, + client_state: &Any, + ) { + let client_state_value = + bincode::serde::encode_to_vec(client_state, bincode::config::standard()) + .expect("encode client_state"); + app.enclave.use_mut_store(|store| { + store.set( + lcp_types::store_key::client_state_bytes(client_id), + client_state_value, + ); + }); + } + fn mk_result( prev_height: Option, prev_state_id: Option<&[u8]>, @@ -577,6 +593,14 @@ mod tests { Some(Height::new(0, 10)), None, )); + set_canonical_client_state( + &app, + client_id, + req.base_state + .client_state + .as_ref() + .expect("test base client_state"), + ); let result = SpeculativeUpdateClientResult { response: MsgUpdateClientResponse::default(), write_set: WriteSet::default(), @@ -845,6 +869,14 @@ mod tests { } .into(), ); + set_canonical_client_state( + &app, + client_id, + req.base_state + .client_state + .as_ref() + .expect("mutated client_state"), + ); let result = SpeculativeUpdateClientResult { response: MsgUpdateClientResponse::default(), write_set: WriteSet::default(), @@ -881,6 +913,66 @@ mod tests { ); } + #[test] + fn stitch_rejects_first_base_state_when_canonical_client_state_advanced() { + let client_id = "07-tendermint-0"; + let enclave = FakeEnclave::new(Duration::from_millis(1)); + let app = AppService::::new("test-home", enclave); + let service = SpeculativeService::new(1); + let mut req = with_explicit_base_state_payload(mk_req( + "unit-0000", + client_id, + Some(Height::new(0, 10)), + None, + )); + let prev_height = req.base_state.prev_height.expect("test base prev_height"); + let prev_state_id = state_id_for_base_state(&req.base_state); + req.base_state.prev_state_id = Some(prev_state_id.clone()); + seed_canonical_base_state(&app, client_id, &req.base_state); + set_canonical_client_state( + &app, + client_id, + &Any { + type_url: "/ibc.mock.ClientState".to_string(), + value: vec![42], + }, + ); + let result = SpeculativeUpdateClientResult { + response: MsgUpdateClientResponse::default(), + write_set: WriteSet::default(), + base_state: req.base_state.clone(), + observed_transition: ObservedStateTransition { + prev_height: Some(prev_height), + prev_state_id: Some(prev_state_id), + post_height: Height::new(0, 11), + post_state_id: vec![1; 32], + }, + }; + + let err = service + .stitch_speculative_update_client_batch( + &app, + SpeculativeUpdateClientBatch { + client_id: client_id.to_string(), + units: vec![req], + }, + SpeculativeUpdateClientBatchResult { + client_id: client_id.to_string(), + units: vec![result], + }, + ) + .expect_err("stale base client_state should be rejected"); + + assert_eq!(err.kind, SpeculativeBatchFailureKind::BaseStateMismatch); + assert_eq!(err.unit_id.as_deref(), Some("unit-0000")); + assert!( + err.detail + .contains("stored speculative base client_state mismatch"), + "unexpected error detail: {}", + err.detail + ); + } + #[test] fn streaming_speculative_batch_executes_before_input_closes() { let client_id = "07-tendermint-0"; From d9425c41fda1c9bae751c0bf05b20c2114d52b06 Mon Sep 17 00:00:00 2001 From: Kiyoshi Nakao Date: Wed, 10 Jun 2026 11:25:20 +0900 Subject: [PATCH 33/48] Prevent partial speculative stream commits --- modules/service/src/elc.rs | 14 +- modules/service/src/speculative/scheduler.rs | 23 +++- modules/service/src/speculative/service.rs | 132 ++++++++++++++----- 3 files changed, 132 insertions(+), 37 deletions(-) diff --git a/modules/service/src/elc.rs b/modules/service/src/elc.rs index fbaccd84..f9a01da5 100644 --- a/modules/service/src/elc.rs +++ b/modules/service/src/elc.rs @@ -1,4 +1,5 @@ use crate::service::{AppService, ElcService}; +use crate::speculative::scheduler::StreamingSpeculativeBatchInput; use crate::speculative::stream::{ decode_speculative_batch_stream_init, encode_stitched_batch_result, SpeculativeBatchStreamDecoder, SpeculativeHeaderMemoryBudget, @@ -182,7 +183,7 @@ where let header_memory = header_memory_budget.reserve_for_chunk(&chunk_msg).await?; if let Some(unit) = decoder.push_chunk(chunk_msg.chunk, header_memory)? { units += 1; - if tx.send(unit).is_err() { + if tx.send(StreamingSpeculativeBatchInput::Unit(unit)).is_err() { let result = scheduler.await.map_err(|e| { Status::aborted(format!("speculative batch worker failed: {e}")) })?; @@ -196,6 +197,17 @@ where } } decoder.finish()?; + if tx.send(StreamingSpeculativeBatchInput::Complete).is_err() { + let result = scheduler + .await + .map_err(|e| Status::aborted(format!("speculative batch worker failed: {e}")))?; + return match result { + Ok(_) => Err(Status::aborted( + "speculative batch scheduler stopped before batch_end", + )), + Err(e) => Err(Status::aborted(format!("{:?}: {}", e.kind, e.detail))), + }; + } drop(tx); debug!( diff --git a/modules/service/src/speculative/scheduler.rs b/modules/service/src/speculative/scheduler.rs index 96b9f39a..32c5e476 100644 --- a/modules/service/src/speculative/scheduler.rs +++ b/modules/service/src/speculative/scheduler.rs @@ -20,6 +20,11 @@ pub(crate) struct StreamingSpeculativeBatchResult { pub(crate) results: SpeculativeUpdateClientBatchResult, } +pub(crate) enum StreamingSpeculativeBatchInput { + Unit(ResidentSpeculativeUpdateClientRequest), + Complete, +} + fn sha256_hex(bytes: &[u8]) -> String { hex::encode(sha2::Sha256::digest(bytes)) } @@ -39,7 +44,7 @@ pub(crate) fn execute_speculative_update_client_stream( speculative: &SpeculativeService, app: &AppService, client_id: String, - units: Receiver, + inputs: Receiver, ) -> core::result::Result where S: CommitStore + TxAccessor + Send + 'static, @@ -62,7 +67,12 @@ where scope.spawn(move || streaming_speculative_worker(speculative, app, shared)); } - for unit in units { + let mut input_completed = false; + for input in inputs { + let StreamingSpeculativeBatchInput::Unit(unit) = input else { + input_completed = true; + break; + }; let mut state = shared.state.lock().unwrap(); if state.failure.is_some() { break; @@ -77,6 +87,15 @@ where } let mut state = shared.state.lock().unwrap(); + if !input_completed { + state + .failure + .get_or_insert_with(|| SpeculativeBatchFailure { + kind: SpeculativeBatchFailureKind::BatchSizeMismatch, + unit_id: None, + detail: "speculative batch input stream closed before batch_end".to_string(), + }); + } state.closed = true; shared.ready.notify_all(); while state.failure.is_none() && state.has_unfinished_work() { diff --git a/modules/service/src/speculative/service.rs b/modules/service/src/speculative/service.rs index c875b590..6b3e7d0e 100644 --- a/modules/service/src/speculative/service.rs +++ b/modules/service/src/speculative/service.rs @@ -1,5 +1,9 @@ use super::permit::PermitGate; -use super::scheduler::execute_speculative_update_client_stream as execute_stream_scheduler; +use super::scheduler::{ + execute_speculative_update_client_stream as execute_stream_scheduler, + StreamingSpeculativeBatchInput, +}; +#[cfg(test)] use super::stream::ResidentSpeculativeUpdateClientRequest; use super::types::{ ExplicitStateRef, ObservedStateTransition, SpeculativeBatchFailure, @@ -175,7 +179,7 @@ impl SpeculativeService { &self, app: &AppService, client_id: String, - units: Receiver, + units: Receiver, ) -> core::result::Result where S: CommitStore + TxAccessor + Send + 'static, @@ -1011,8 +1015,10 @@ mod tests { }); first_req.base_state.prev_state_id = Some(state_id_for_base_state(&first_req.base_state)); seed_canonical_base_state(&app, client_id, &first_req.base_state); - tx.send(ResidentSpeculativeUpdateClientRequest::unmetered(first_req)) - .expect("send first unit"); + tx.send(StreamingSpeculativeBatchInput::Unit( + ResidentSpeculativeUpdateClientRequest::unmetered(first_req), + )) + .expect("send first unit"); for _ in 0..100 { if app.enclave.observed_max_in_flight() >= 1 { @@ -1025,35 +1031,39 @@ mod tests { "expected first unit to start before input stream closes" ); - tx.send(ResidentSpeculativeUpdateClientRequest::unmetered( - with_explicit_base_state_payload(SpeculativeUpdateClientRequest { - unit_id: "unit-0001".to_string(), - update: MsgUpdateClient { - client_id: client_id.to_string(), - signer: { - let mut signer = vec![0; 20]; - signer[19] = 1; - signer + tx.send(StreamingSpeculativeBatchInput::Unit( + ResidentSpeculativeUpdateClientRequest::unmetered(with_explicit_base_state_payload( + SpeculativeUpdateClientRequest { + unit_id: "unit-0001".to_string(), + update: MsgUpdateClient { + client_id: client_id.to_string(), + signer: { + let mut signer = vec![0; 20]; + signer[19] = 1; + signer + }, + header: Some(Any { + type_url: "/ibc.mock.Header".to_string(), + value: vec![2], + }), + ..Default::default() + }, + base_state: ExplicitStateRef { + prev_height: Some(Height::new(0, 11)), + prev_state_id: Some({ + let mut prev_state_id = vec![0; 32]; + prev_state_id[31] = 1; + prev_state_id + }), + client_state: None, + consensus_state: None, }, - header: Some(Any { - type_url: "/ibc.mock.Header".to_string(), - value: vec![2], - }), - ..Default::default() - }, - base_state: ExplicitStateRef { - prev_height: Some(Height::new(0, 11)), - prev_state_id: Some({ - let mut prev_state_id = vec![0; 32]; - prev_state_id[31] = 1; - prev_state_id - }), - client_state: None, - consensus_state: None, }, - }), + )), )) .expect("send second unit"); + tx.send(StreamingSpeculativeBatchInput::Complete) + .expect("send batch complete"); drop(tx); let result = handle @@ -1064,6 +1074,56 @@ mod tests { assert_eq!(app.enclave.observed_max_in_flight(), 1); } + #[test] + fn streaming_speculative_batch_rejects_channel_close_without_complete() { + let client_id = "07-tendermint-0"; + let enclave = FakeEnclave::new(Duration::from_millis(1)); + let app = AppService::::new("test-home", enclave); + let service = SpeculativeService::new(1); + let (tx, rx) = std::sync::mpsc::sync_channel(1); + let worker_service = service.clone(); + let worker_app = app.clone(); + let client_id_for_worker = client_id.to_string(); + let handle = thread::spawn(move || { + worker_service.execute_speculative_update_client_stream( + &worker_app, + client_id_for_worker, + rx, + ) + }); + + let mut req = with_explicit_base_state_payload(mk_req( + "unit-0000", + client_id, + Some(Height::new(0, 10)), + None, + )); + req.base_state.prev_state_id = Some(state_id_for_base_state(&req.base_state)); + seed_canonical_base_state(&app, client_id, &req.base_state); + + tx.send(StreamingSpeculativeBatchInput::Unit( + ResidentSpeculativeUpdateClientRequest::unmetered(req), + )) + .expect("send first unit"); + drop(tx); + + let err = handle + .join() + .expect("streaming worker thread") + .expect_err("missing batch completion should fail"); + assert_eq!(err.kind, SpeculativeBatchFailureKind::BatchSizeMismatch); + assert!( + err.detail.contains("closed before batch_end"), + "unexpected error detail: {}", + err.detail + ); + assert_eq!( + app.enclave.use_mut_store(|store| store.get(&[0])), + None, + "truncated stream must not apply speculative write set" + ); + } + #[test] fn streaming_speculative_batch_rejects_incomplete_base_state() { let client_id = "07-tendermint-0"; @@ -1082,8 +1142,8 @@ mod tests { ) }); - tx.send(ResidentSpeculativeUpdateClientRequest::unmetered( - SpeculativeUpdateClientRequest { + tx.send(StreamingSpeculativeBatchInput::Unit( + ResidentSpeculativeUpdateClientRequest::unmetered(SpeculativeUpdateClientRequest { unit_id: "unit-0000".to_string(), update: MsgUpdateClient { client_id: client_id.to_string(), @@ -1100,7 +1160,7 @@ mod tests { client_state: None, consensus_state: None, }, - }, + }), )) .expect("send first unit"); drop(tx); @@ -1172,9 +1232,13 @@ mod tests { } seed_canonical_base_state(&app, client_id, &requests[0].base_state); for req in requests { - tx.send(ResidentSpeculativeUpdateClientRequest::unmetered(req)) - .expect("send unit"); + tx.send(StreamingSpeculativeBatchInput::Unit( + ResidentSpeculativeUpdateClientRequest::unmetered(req), + )) + .expect("send unit"); } + tx.send(StreamingSpeculativeBatchInput::Complete) + .expect("send batch complete"); drop(tx); let result = handle From 485e3488221050a4ac0f4ca1d254acb10957d745 Mon Sep 17 00:00:00 2001 From: Kiyoshi Nakao Date: Wed, 10 Jun 2026 11:41:07 +0900 Subject: [PATCH 34/48] Tighten speculative stream resource handling --- modules/enclave-api/src/api/primitive.rs | 24 ++-- modules/service/Cargo.toml | 2 +- modules/service/src/elc.rs | 20 +-- modules/service/src/speculative/service.rs | 134 ++++++++++++++++++++- modules/service/src/speculative/stream.rs | 43 ++++--- modules/store/src/memory.rs | 48 ++++++-- 6 files changed, 226 insertions(+), 45 deletions(-) diff --git a/modules/enclave-api/src/api/primitive.rs b/modules/enclave-api/src/api/primitive.rs index 26ab0bc6..1f6a37e9 100644 --- a/modules/enclave-api/src/api/primitive.rs +++ b/modules/enclave-api/src/api/primitive.rs @@ -87,18 +87,20 @@ fn execute_prepared_command( cmd: Command, tx_id: TxId, ) -> Result { - let current_timestamp = Time::now(); - let cctx = match cmd.get_enclave_key() { - Some(addr) => { - let ski = enclave.get_key_manager().load(addr)?; - CommandContext::new(current_timestamp, Some(ski.sealed_ek), tx_id) - } - None => CommandContext::new(current_timestamp, None, tx_id), - }; + enclave.with_ecall_permit(|| { + let current_timestamp = Time::now(); + let cctx = match cmd.get_enclave_key() { + Some(addr) => { + let ski = enclave.get_key_manager().load(addr)?; + CommandContext::new(current_timestamp, Some(ski.sealed_ek), tx_id) + } + None => CommandContext::new(current_timestamp, None, tx_id), + }; - let ecmd = ECallCommand::new(cctx, cmd); - debug!("try to execute command: {:?}", ecmd); - enclave.with_ecall_permit(|| raw_execute_command(enclave.get_eid(), ecmd)) + let ecmd = ECallCommand::new(cctx, cmd); + debug!("try to execute command: {:?}", ecmd); + raw_execute_command(enclave.get_eid(), ecmd) + }) } pub(crate) fn raw_execute_command( diff --git a/modules/service/Cargo.toml b/modules/service/Cargo.toml index 6b7c9eb4..562bd15f 100644 --- a/modules/service/Cargo.toml +++ b/modules/service/Cargo.toml @@ -10,7 +10,7 @@ tokio = { version = "1.0", features = ["full"] } anyhow = { version = "1.0.56" } log = { version = "0.4.8" } serde = { version = "1.0", features = ["derive"] } -bincode = { version = "=2.0.0-rc.3", default-features = false, features = ["serde", "alloc"] } +bincode = { version = "2.0.0-rc.3", default-features = false, features = ["serde", "alloc"] } hex = { version = "0.4", default-features = false, features = ["alloc"] } sha2 = { version = "0.10.8", default-features = false } diff --git a/modules/service/src/elc.rs b/modules/service/src/elc.rs index f9a01da5..d983d84e 100644 --- a/modules/service/src/elc.rs +++ b/modules/service/src/elc.rs @@ -2,9 +2,8 @@ use crate::service::{AppService, ElcService}; use crate::speculative::scheduler::StreamingSpeculativeBatchInput; use crate::speculative::stream::{ decode_speculative_batch_stream_init, encode_stitched_batch_result, - SpeculativeBatchStreamDecoder, SpeculativeHeaderMemoryBudget, + SpeculativeBatchStreamDecoder, }; -use crate::MAX_SPECULATIVE_BATCH_HEADER_BYTES; use enclave_api::{EnclaveProtoAPI, SpeculativeEnclaveCommandAPI}; use lcp_proto::google::protobuf::Any; use lcp_proto::lcp::service::elc::v1::msg_update_client_stream_chunk::Chunk; @@ -141,21 +140,26 @@ where let init = decode_speculative_batch_stream_init(&mut stream).await?; let client_id = init.client_id; // This channel is intentionally unbounded: resident header bytes are - // bounded by `SpeculativeHeaderMemoryBudget`, which is the actual - // backpressure mechanism for large speculative batch inputs. + // bounded by the service-global `SpeculativeHeaderMemoryBudget`, which + // is the actual backpressure mechanism for large speculative batch + // inputs across concurrent streams. let (tx, rx) = mpsc::channel(); let app = self.app.clone(); let speculative = self.speculative.clone(); let scheduler_client_id = client_id.clone(); let service = self.clone(); let scheduler = tokio::task::spawn_blocking(move || { - service.with_client_update_serialized(&scheduler_client_id.clone(), || { - speculative.execute_speculative_update_client_stream(&app, scheduler_client_id, rx) + let (batch, results) = speculative.execute_speculative_update_client_stream_batch( + &app, + scheduler_client_id.clone(), + rx, + )?; + service.with_client_update_serialized(&scheduler_client_id, || { + speculative.stitch_executed_speculative_update_client_stream(&app, batch, results) }) }); let mut decoder = SpeculativeBatchStreamDecoder::new(client_id.clone()); - let header_memory_budget = - SpeculativeHeaderMemoryBudget::new(MAX_SPECULATIVE_BATCH_HEADER_BYTES); + let header_memory_budget = self.speculative.header_memory_budget(); let mut units = 0usize; loop { diff --git a/modules/service/src/speculative/service.rs b/modules/service/src/speculative/service.rs index 6b3e7d0e..ce48a5f9 100644 --- a/modules/service/src/speculative/service.rs +++ b/modules/service/src/speculative/service.rs @@ -5,11 +5,12 @@ use super::scheduler::{ }; #[cfg(test)] use super::stream::ResidentSpeculativeUpdateClientRequest; +use super::stream::SpeculativeHeaderMemoryBudget; use super::types::{ ExplicitStateRef, ObservedStateTransition, SpeculativeBatchFailure, SpeculativeBatchFailureKind, SpeculativeUpdateClientBatch, SpeculativeUpdateClientBatchResult, SpeculativeUpdateClientRequest, SpeculativeUpdateClientResult, StitchedUpdateClientBatchResult, - StitchedUpdateClientResult, + StitchedUpdateClientResult, MAX_SPECULATIVE_BATCH_HEADER_BYTES, }; use super::validation::{validate_linear_batch_requests, validate_linear_transitions}; use crate::service::AppService; @@ -28,6 +29,7 @@ use store::WriteSet; pub struct SpeculativeService { speculative_concurrency_limit: usize, speculative_request_permits: Arc, + header_memory_budget: SpeculativeHeaderMemoryBudget, } impl Clone for SpeculativeService { @@ -35,6 +37,7 @@ impl Clone for SpeculativeService { Self { speculative_concurrency_limit: self.speculative_concurrency_limit, speculative_request_permits: self.speculative_request_permits.clone(), + header_memory_budget: self.header_memory_budget.clone(), } } } @@ -44,6 +47,9 @@ impl SpeculativeService { Self { speculative_concurrency_limit: speculative_concurrency_limit.max(1), speculative_request_permits: Arc::new(PermitGate::new(speculative_concurrency_limit)), + header_memory_budget: SpeculativeHeaderMemoryBudget::new( + MAX_SPECULATIVE_BATCH_HEADER_BYTES, + ), } } @@ -51,6 +57,10 @@ impl SpeculativeService { self.speculative_concurrency_limit } + pub(crate) fn header_memory_budget(&self) -> SpeculativeHeaderMemoryBudget { + self.header_memory_budget.clone() + } + #[allow(clippy::result_large_err)] pub fn with_speculative_request_permit( &self, @@ -175,12 +185,34 @@ impl SpeculativeService { }) } + #[cfg(test)] pub(crate) fn execute_speculative_update_client_stream( &self, app: &AppService, client_id: String, units: Receiver, ) -> core::result::Result + where + S: CommitStore + TxAccessor + Send + 'static, + E: EnclaveProtoAPI + SpeculativeEnclaveCommandAPI + Send + Sync + 'static, + { + let (batch, results) = + self.execute_speculative_update_client_stream_batch(app, client_id, units)?; + self.stitch_speculative_update_client_batch(app, batch, results) + } + + pub(crate) fn execute_speculative_update_client_stream_batch( + &self, + app: &AppService, + client_id: String, + units: Receiver, + ) -> core::result::Result< + ( + SpeculativeUpdateClientBatch, + SpeculativeUpdateClientBatchResult, + ), + SpeculativeBatchFailure, + > where S: CommitStore + TxAccessor + Send + 'static, E: EnclaveProtoAPI + SpeculativeEnclaveCommandAPI + Send + Sync + 'static, @@ -190,7 +222,20 @@ impl SpeculativeService { client_id, units: batch_result.requests, }; - self.stitch_speculative_update_client_batch(app, batch, batch_result.results) + Ok((batch, batch_result.results)) + } + + pub(crate) fn stitch_executed_speculative_update_client_stream( + &self, + app: &AppService, + batch: SpeculativeUpdateClientBatch, + results: SpeculativeUpdateClientBatchResult, + ) -> core::result::Result + where + S: CommitStore + TxAccessor + 'static, + E: EnclaveProtoAPI + SpeculativeEnclaveCommandAPI + 'static, + { + self.stitch_speculative_update_client_batch(app, batch, results) } } @@ -254,6 +299,10 @@ mod tests { }; use keymanager::EnclaveKeyManager; use lcp_proto::google::protobuf::Any; + use lcp_proto::lcp::service::elc::v1::{ + msg_speculative_update_client_batch_stream_chunk::Chunk as BatchChunk, + MsgSpeculativeUpdateClientBatchStreamChunk, SpeculativeUpdateClientUnitHeaderChunk, + }; use lcp_types::Height; use lcp_types::{EnclaveMetadata, Time}; use sgx_types::{sgx_enclave_id_t, sgx_status_t}; @@ -494,6 +543,30 @@ mod tests { }); } + #[test] + fn speculative_service_clones_share_header_memory_budget() { + let runtime = tokio::runtime::Runtime::new().expect("tokio runtime"); + let service = SpeculativeService::new(1); + let cloned = service.clone(); + let left_budget = service.header_memory_budget(); + let right_budget = cloned.header_memory_budget(); + let chunk_msg = MsgSpeculativeUpdateClientBatchStreamChunk { + chunk: Some(BatchChunk::UnitHeaderChunk( + SpeculativeUpdateClientUnitHeaderChunk { + unit_id: "unit-0000".to_string(), + data: b"abc".to_vec(), + }, + )), + }; + + let reservation = runtime + .block_on(left_budget.reserve_for_chunk(&chunk_msg)) + .expect("header memory"); + assert_eq!(right_budget.used_bytes(), 3); + drop(reservation); + assert_eq!(right_budget.used_bytes(), 0); + } + fn mk_result( prev_height: Option, prev_state_id: Option<&[u8]>, @@ -1099,6 +1172,7 @@ mod tests { None, )); req.base_state.prev_state_id = Some(state_id_for_base_state(&req.base_state)); + req.update.signer = vec![0; 20]; seed_canonical_base_state(&app, client_id, &req.base_state); tx.send(StreamingSpeculativeBatchInput::Unit( @@ -1124,6 +1198,62 @@ mod tests { ); } + #[test] + fn streaming_speculative_batch_execution_does_not_apply_until_stitched() { + let client_id = "07-tendermint-0"; + let enclave = FakeEnclave::new(Duration::from_millis(1)); + let app = AppService::::new("test-home", enclave); + let service = SpeculativeService::new(1); + let (tx, rx) = std::sync::mpsc::sync_channel(2); + let worker_service = service.clone(); + let worker_app = app.clone(); + let client_id_for_worker = client_id.to_string(); + let handle = thread::spawn(move || { + worker_service.execute_speculative_update_client_stream_batch( + &worker_app, + client_id_for_worker, + rx, + ) + }); + + let mut req = with_explicit_base_state_payload(mk_req( + "unit-0000", + client_id, + Some(Height::new(0, 10)), + None, + )); + req.base_state.prev_state_id = Some(state_id_for_base_state(&req.base_state)); + req.update.signer = vec![0; 20]; + seed_canonical_base_state(&app, client_id, &req.base_state); + + tx.send(StreamingSpeculativeBatchInput::Unit( + ResidentSpeculativeUpdateClientRequest::unmetered(req), + )) + .expect("send first unit"); + tx.send(StreamingSpeculativeBatchInput::Complete) + .expect("send batch complete"); + drop(tx); + + let (batch, results) = handle + .join() + .expect("streaming worker thread") + .expect("streaming speculative batch execution"); + assert_eq!( + app.enclave.use_mut_store(|store| store.get(&[0])), + None, + "execution alone must not apply speculative write set" + ); + + service + .stitch_executed_speculative_update_client_stream(&app, batch, results) + .expect("stitch executed stream"); + assert_eq!( + app.enclave.use_mut_store(|store| store.get(&[0])), + Some(vec![0]), + "stitch should apply speculative write set" + ); + } + #[test] fn streaming_speculative_batch_rejects_incomplete_base_state() { let client_id = "07-tendermint-0"; diff --git a/modules/service/src/speculative/stream.rs b/modules/service/src/speculative/stream.rs index 43e62d5b..07178950 100644 --- a/modules/service/src/speculative/stream.rs +++ b/modules/service/src/speculative/stream.rs @@ -15,7 +15,7 @@ use lcp_proto::lcp::service::elc::v1::{ StitchedSpeculativeUpdateClientUnitResult as ProtoStitchedSpeculativeUpdateClientUnitResult, }; use lcp_types::Height; -use log::info; +use log::debug; use sha2::Digest; use std::collections::HashSet; use std::sync::{Arc, Condvar, Mutex}; @@ -108,7 +108,7 @@ impl SpeculativeHeaderMemoryBudget { } #[cfg(test)] - fn used_bytes(&self) -> usize { + pub(crate) fn used_bytes(&self) -> usize { self.inner.state.lock().unwrap().used_bytes } } @@ -214,7 +214,6 @@ pub(crate) struct SpeculativeBatchStreamDecoder { open_unit: Option, seen_unit_ids: HashSet, closed: bool, - total_header_bytes: usize, } impl SpeculativeBatchStreamDecoder { @@ -226,7 +225,6 @@ impl SpeculativeBatchStreamDecoder { open_unit: None, seen_unit_ids: HashSet::new(), closed: false, - total_header_bytes: 0, } } @@ -269,7 +267,6 @@ impl SpeculativeBatchStreamDecoder { append_speculative_unit_header_chunk( &mut self.open_unit, header_chunk, - &mut self.total_header_bytes, header_memory, )?; Ok(None) @@ -324,7 +321,7 @@ pub(crate) async fn decode_speculative_batch_stream_init( ) -> Result { match stream.message().await? { Some(chunk) => match chunk.chunk { - Some(BatchChunk::Init(init)) => Ok(init), + Some(BatchChunk::Init(init)) => validate_speculative_batch_stream_init(init), _ => Err(Status::invalid_argument( "first message must be of type Init", )), @@ -335,6 +332,18 @@ pub(crate) async fn decode_speculative_batch_stream_init( } } +#[allow(clippy::result_large_err)] +fn validate_speculative_batch_stream_init( + init: SpeculativeUpdateClientBatchStreamInit, +) -> Result { + if init.client_id.is_empty() { + return Err(Status::invalid_argument( + "speculative batch stream init requires client_id", + )); + } + Ok(init) +} + #[allow(clippy::result_large_err)] fn validate_speculative_unit_init( unit_init: &SpeculativeUpdateClientUnitInit, @@ -361,7 +370,6 @@ fn validate_speculative_unit_init( fn append_speculative_unit_header_chunk( open_unit: &mut Option, header_chunk: SpeculativeUpdateClientUnitHeaderChunk, - total_header_bytes: &mut usize, header_memory: SpeculativeHeaderMemoryReservation, ) -> Result<(), Status> { if header_chunk.data.is_empty() { @@ -389,9 +397,7 @@ fn append_speculative_unit_header_chunk( ))); } - let chunk_len = header_chunk.data.len(); open.header_bytes.extend(header_chunk.data); - *total_header_bytes += chunk_len; open.header_memory.merge(header_memory); validate_speculative_unit_header_payload_len(&open.init.unit_id, open.header_bytes.len())?; Ok(()) @@ -434,7 +440,7 @@ fn close_speculative_unit( open.init.unit_id ))); } - info!( + debug!( "received speculative update client unit: client_id={} unit_id={} header_bytes={} header_sha256={}", client_id, open.init.unit_id, @@ -549,10 +555,10 @@ fn encode_observed_transition(transition: ObservedStateTransition) -> ProtoObser #[cfg(test)] mod tests { use super::{ - decode_speculative_batch, validate_speculative_unit_header_payload_len, - DecodedSpeculativeBatchRequest, SpeculativeBatchStreamDecoder, - SpeculativeHeaderMemoryBudget, SpeculativeHeaderMemoryReservation, - MAX_SPECULATIVE_BATCH_HEADER_CHUNK_BYTES, + decode_speculative_batch, validate_speculative_batch_stream_init, + validate_speculative_unit_header_payload_len, DecodedSpeculativeBatchRequest, + SpeculativeBatchStreamDecoder, SpeculativeHeaderMemoryBudget, + SpeculativeHeaderMemoryReservation, MAX_SPECULATIVE_BATCH_HEADER_CHUNK_BYTES, }; use crate::{ ExplicitStateRef, SpeculativeUpdateClientRequest, MAX_SPECULATIVE_BATCH_UNITS, @@ -709,6 +715,15 @@ mod tests { assert_resource_exhausted_contains(err, "speculative unit header payload too large"); } + #[test] + fn validate_speculative_batch_stream_init_rejects_empty_client_id() { + let err = validate_speculative_batch_stream_init(SpeculativeUpdateClientBatchStreamInit { + client_id: String::new(), + }) + .unwrap_err(); + assert_invalid_argument_contains(err, "requires client_id"); + } + #[test] fn decode_speculative_batch_stream_chunks_decodes_units() { let request = decode_stream_chunks([ diff --git a/modules/store/src/memory.rs b/modules/store/src/memory.rs index 3e7983a4..64dc1be5 100644 --- a/modules/store/src/memory.rs +++ b/modules/store/src/memory.rs @@ -142,13 +142,21 @@ impl CommitStore for InnerMemStore { } fn begin(&mut self, tx: &::PreparedTx) -> Result<()> { - assert!(self.running_tx_kind.is_none()); + if self.running_tx_kind.is_some() { + return Err(crate::Error::begin_tx( + "MemStore supports only one running transaction".to_string(), + )); + } self.running_tx_kind = Some(tx.kind); Ok(()) } - fn commit(&mut self, _tx: ::PreparedTx) -> Result<()> { - assert!(self.running_tx_kind.is_some()); + fn commit(&mut self, tx: ::PreparedTx) -> Result<()> { + if self.running_tx_kind != Some(tx.kind) { + return Err(crate::Error::commit_tx( + "MemStore transaction kind mismatch or no running transaction".to_string(), + )); + } self.running_tx_kind = None; let data = HashMap::, Option>>::default(); let uncommitted_data = std::mem::replace(&mut self.uncommitted_data, data); @@ -162,20 +170,26 @@ impl CommitStore for InnerMemStore { } fn take_write_set(&mut self, tx: ::PreparedTx) -> Result { - assert!(self.running_tx_kind.is_some()); - self.running_tx_kind = None; - let data = HashMap::, Option>>::default(); - let uncommitted_data = std::mem::replace(&mut self.uncommitted_data, data); + if self.running_tx_kind != Some(tx.kind) { + return Err(crate::Error::commit_tx( + "MemStore transaction kind mismatch or no running transaction".to_string(), + )); + } if tx.kind != MemTxKind::Speculative { return Err(crate::Error::not_supported_operation( "take_write_set is only available for speculative transactions".to_string(), )); } + self.running_tx_kind = None; + let data = HashMap::, Option>>::default(); + let uncommitted_data = std::mem::replace(&mut self.uncommitted_data, data); Ok(uncommitted_data.into_iter().collect()) } - fn rollback(&mut self, _tx: ::PreparedTx) { - assert!(self.running_tx_kind.is_some()); + fn rollback(&mut self, tx: ::PreparedTx) { + if self.running_tx_kind != Some(tx.kind) { + return; + } self.running_tx_kind = None; self.uncommitted_data.clear(); } @@ -226,6 +240,22 @@ mod tests { store.tx_set(tx.get_id(), key(1), value(1)).unwrap(); assert!(store.take_write_set(tx).is_err()); + assert_eq!(store.get(&key(1)), Some(value(1))); + } + + #[test] + fn begin_rejects_overlapping_transactions_without_panicking() { + let mut store = InnerMemStore::default(); + let tx1 = store.create_transaction(None).unwrap().prepare().unwrap(); + let tx2 = store + .create_speculative_transaction() + .unwrap() + .prepare() + .unwrap(); + + store.begin(&tx1).unwrap(); + assert!(store.begin(&tx2).is_err()); + store.rollback(tx1); assert_eq!(store.get(&key(1)), None); } From 0630ad294de15740c0d5e3ee0efcb4c7e7a234bc Mon Sep 17 00:00:00 2001 From: Kiyoshi Nakao Date: Thu, 11 Jun 2026 15:35:48 +0900 Subject: [PATCH 35/48] service: pin gRPC ECALL dispatch to a fixed-size worker pool Add `EcallPool`, a long-lived pool of `--max-enclave-concurrency` OS threads dedicated to executing ECALLs. All gRPC handlers in `elc.rs` (`create_client`, `update_client`, `update_client_stream`, `aggregate_messages`, `verify_membership`, `verify_non_membership`, `client`) now dispatch their ECALLs onto the pool via `tokio::task::spawn_blocking(|| pool.run(|| ...))` instead of either calling the enclave directly from a tonic async handler (which would bind a TCS to every tokio worker that ever ECALLs) or going through the default `tokio::task::spawn_blocking` pool (which can grow to hundreds of cached threads, each binding its own TCS). Under `TCSPolicy=BIND` the Intel SGX SDK pins one TCS to each ECALL-issuing host thread for the thread's lifetime. The `with_ecall_permit` semaphore in `enclave-api` only bounds *concurrent* ECALLs, not the cumulative set of distinct threads that have ever ECALLed. Once that cumulative set exceeds `TCSNum`, any new ECALL-issuing thread fails with `SGX_ERROR_OUT_OF_TCS`. This is what was observed during a long-running Arbitrum activate-client run on bcc-dev even with `--max-enclave-concurrency=4` and `TCSNum=8`. By pinning ECALL execution to `EcallPool`'s `size` permanent workers, the cumulative TCS bindings created by the service stay at `size` for the entire process lifetime, restoring the invariant the existing CLI guidance assumed. Speculative scheduler's own `thread::scope` workers (`speculative/scheduler.rs`) are intentionally left as-is in this change. Their threads terminate at scope exit so the SDK destructor releases their TCS bindings on each batch; addressing that path (folding speculative dispatch into the same pool) is a follow-up so this PR stays small. CLI help text for `--max-enclave-concurrency` is updated: the prior wording recommended `cap == TCSNum`, which is unsafe under BIND once any unbounded ECALL-thread source exists. The new wording describes the flag as the pool size and recommends `cap < TCSNum` to leave headroom for the SDK runtime and the speculative path. Tests: - `cargo test -p service` (37 tests, includes 3 new `ecall_pool` tests asserting concurrency cap, result delivery, and that observed worker-thread ids are bounded by pool size). - `cargo check --workspace` is clean. --- app/src/commands/service.rs | 19 ++- modules/service/src/ecall_pool.rs | 171 +++++++++++++++++++++ modules/service/src/elc.rs | 81 +++++++--- modules/service/src/lib.rs | 1 + modules/service/src/service.rs | 14 +- modules/service/src/speculative/service.rs | 24 +-- 6 files changed, 268 insertions(+), 42 deletions(-) create mode 100644 modules/service/src/ecall_pool.rs diff --git a/app/src/commands/service.rs b/app/src/commands/service.rs index 0dc35275..49d1c1f4 100644 --- a/app/src/commands/service.rs +++ b/app/src/commands/service.rs @@ -35,13 +35,17 @@ pub struct Start { help = "Worker thread number the tokio `Runtime` will use" )] pub threads: Option, - /// Maximum concurrent enclave ECALLs across serial and speculative paths. - /// Set this to match the loaded enclave's `TCSNum`; the default assumes a - /// conservative TCS budget of 4. + /// Size of the dedicated ECALL worker pool that owns the set of OS + /// threads allowed to enter the enclave. Under `TCSPolicy=BIND` the + /// Intel SGX SDK pins one TCS to each ECALL-issuing thread for the + /// thread's lifetime, so this value also bounds the cumulative number + /// of TCS bindings created by the service. Set this to a value strictly + /// less than the enclave's `TCSNum` to leave headroom for the SDK + /// runtime and any speculative path that spawns ad-hoc workers. #[clap( long = "max-enclave-concurrency", default_value_t = 4, - help = "Maximum concurrent enclave ECALLs" + help = "Size of the dedicated ECALL worker pool" )] pub max_enclave_concurrency: usize, /// Maximum concurrent speculative update-client requests. @@ -89,7 +93,12 @@ impl ServiceCmd { enclave_parallelism ); } - let srv = ElcService::new(opts.get_home(), enclave, speculative_concurrency_limit); + let srv = ElcService::new( + opts.get_home(), + enclave, + speculative_concurrency_limit, + enclave_parallelism, + ); info!( "start service: addr={addr} mrenclave={mrenclave} speculative_concurrency_limit={} enclave_parallelism={}", diff --git a/modules/service/src/ecall_pool.rs b/modules/service/src/ecall_pool.rs new file mode 100644 index 00000000..bfb71d79 --- /dev/null +++ b/modules/service/src/ecall_pool.rs @@ -0,0 +1,171 @@ +use log::*; +use std::sync::mpsc::{channel, Sender}; +use std::sync::{Arc, Mutex}; +use std::thread::{self, JoinHandle}; + +type Job = Box; + +/// A fixed-size pool of long-lived OS threads dedicated to executing ECALLs. +/// +/// Under `TCSPolicy=BIND`, the Intel SGX SDK binds a TCS to each host thread +/// on its first ECALL and only releases the binding when the thread +/// terminates. Without an upper bound on the set of distinct threads that +/// ever ECALL, cumulative bindings can exceed `TCSNum` even when concurrent +/// ECALLs stay well below it, producing transient `SGX_ERROR_OUT_OF_TCS` +/// failures. +/// +/// `EcallPool` solves this by pinning ECALL execution to exactly `size` +/// permanent worker threads. Workers are spawned once at service start and +/// live for the entire process lifetime; their TCS bindings are therefore +/// stable at `size` and never accumulate. +pub struct EcallPool { + sender: Option>, + workers: Vec>, +} + +impl EcallPool { + /// Creates a pool with `size` permanent worker threads (`size.max(1)`). + /// Callers should set `size` equal to `--max-enclave-concurrency`. + pub fn new(size: usize) -> Self { + let size = size.max(1); + let (sender, receiver) = channel::(); + let receiver = Arc::new(Mutex::new(receiver)); + let workers = (0..size) + .map(|i| { + let receiver = Arc::clone(&receiver); + thread::Builder::new() + .name(format!("ecall-{}", i)) + .spawn(move || ecall_worker_loop(i, receiver)) + .expect("failed to spawn ECALL pool worker") + }) + .collect(); + Self { + sender: Some(sender), + workers, + } + } + + /// Runs `f` on one of the pool's worker threads, blocking the caller + /// until the job completes. Each invocation acquires a worker slot. + pub fn run(&self, f: F) -> R + where + F: FnOnce() -> R + Send + 'static, + R: Send + 'static, + { + let sender = self + .sender + .as_ref() + .expect("ECALL pool used after shutdown"); + let (tx, rx) = channel(); + let job: Job = Box::new(move || { + let _ = tx.send(f()); + }); + sender + .send(job) + .expect("ECALL pool worker channel closed"); + rx.recv() + .expect("ECALL pool worker terminated before producing a result") + } +} + +impl Drop for EcallPool { + fn drop(&mut self) { + // Closing the sender lets each worker observe `Err` on `recv` and + // exit its loop. We then join every worker so SGX SDK destructors + // run before the surrounding process resources are torn down. + drop(self.sender.take()); + for worker in self.workers.drain(..) { + if let Err(e) = worker.join() { + warn!("ECALL pool worker panicked at shutdown: {:?}", e); + } + } + } +} + +fn ecall_worker_loop(index: usize, receiver: Arc>>) { + debug!("ECALL worker {} started", index); + loop { + let job = { + let recv = receiver.lock().unwrap(); + recv.recv() + }; + match job { + Ok(job) => job(), + Err(_) => { + debug!("ECALL worker {} exiting (channel closed)", index); + return; + } + } + } +} + +#[cfg(test)] +mod tests { + use super::EcallPool; + use std::sync::atomic::{AtomicUsize, Ordering}; + use std::sync::Arc; + use std::thread; + use std::time::Duration; + + #[test] + fn pool_limits_concurrent_jobs_to_worker_count() { + let pool = Arc::new(EcallPool::new(2)); + let in_flight = Arc::new(AtomicUsize::new(0)); + let observed_max = Arc::new(AtomicUsize::new(0)); + let mut handles = Vec::new(); + for _ in 0..6 { + let pool = Arc::clone(&pool); + let in_flight = Arc::clone(&in_flight); + let observed_max = Arc::clone(&observed_max); + handles.push(thread::spawn(move || { + pool.run(move || { + let current = in_flight.fetch_add(1, Ordering::SeqCst) + 1; + observed_max.fetch_max(current, Ordering::SeqCst); + thread::sleep(Duration::from_millis(40)); + in_flight.fetch_sub(1, Ordering::SeqCst); + }); + })); + } + for handle in handles { + handle.join().unwrap(); + } + assert_eq!(observed_max.load(Ordering::SeqCst), 2); + } + + #[test] + fn pool_returns_job_result_to_caller() { + let pool = EcallPool::new(2); + let result = pool.run(|| 7 * 6); + assert_eq!(result, 42); + } + + #[test] + fn pool_workers_have_stable_thread_ids_across_jobs() { + // Verifies the "1 thread = 1 TCS forever" property under BIND policy: + // the set of OS thread ids that execute jobs is bounded by pool size. + let pool = Arc::new(EcallPool::new(3)); + let observed = Arc::new(std::sync::Mutex::new( + std::collections::HashSet::::new(), + )); + let mut handles = Vec::new(); + for _ in 0..30 { + let pool = Arc::clone(&pool); + let observed = Arc::clone(&observed); + handles.push(thread::spawn(move || { + pool.run(move || { + observed.lock().unwrap().insert(thread::current().id()); + thread::sleep(Duration::from_millis(5)); + }); + })); + } + for handle in handles { + handle.join().unwrap(); + } + let set = observed.lock().unwrap(); + assert!( + set.len() <= 3, + "expected at most pool-size distinct worker threads, saw {}", + set.len() + ); + } +} diff --git a/modules/service/src/elc.rs b/modules/service/src/elc.rs index d983d84e..0d98d989 100644 --- a/modules/service/src/elc.rs +++ b/modules/service/src/elc.rs @@ -34,7 +34,15 @@ where &self, request: Request, ) -> Result, Status> { - match self.app.enclave.proto_create_client(request.into_inner()) { + let inner = request.into_inner(); + let app = self.app.clone(); + let result = tokio::task::spawn_blocking(move || { + app.ecall_pool + .run(move || app.enclave.proto_create_client(inner)) + }) + .await + .map_err(|e| Status::aborted(format!("create client worker failed: {e}")))?; + match result { Ok(res) => Ok(Response::new(res)), Err(e) => Err(Status::aborted(e.to_string())), } @@ -48,8 +56,13 @@ where let client_id = msg.client_id.clone(); let service = self.clone(); let result = tokio::task::spawn_blocking(move || { - service.with_client_update_serialized(&client_id, || { - service.app.enclave.proto_update_client(msg) + let pool = service.app.ecall_pool.clone(); + let enclave = service.app.enclave.clone(); + service.with_client_update_serialized(&client_id, move || { + // The blocking-pool thread holds the per-client lock; the + // actual ECALL runs on an EcallPool worker so cumulative + // TCS bindings stay bounded. + pool.run(move || enclave.proto_update_client(msg)) }) }) .await @@ -120,8 +133,10 @@ where let client_id = msg.client_id.clone(); let service = self.clone(); let result = tokio::task::spawn_blocking(move || { - service.with_client_update_serialized(&client_id, || { - service.app.enclave.proto_update_client(msg) + let pool = service.app.ecall_pool.clone(); + let enclave = service.app.enclave.clone(); + service.with_client_update_serialized(&client_id, move || { + pool.run(move || enclave.proto_update_client(msg)) }) }) .await @@ -231,11 +246,15 @@ where &self, request: Request, ) -> Result, Status> { - match self - .app - .enclave - .proto_aggregate_messages(request.into_inner()) - { + let inner = request.into_inner(); + let app = self.app.clone(); + let result = tokio::task::spawn_blocking(move || { + app.ecall_pool + .run(move || app.enclave.proto_aggregate_messages(inner)) + }) + .await + .map_err(|e| Status::aborted(format!("aggregate messages worker failed: {e}")))?; + match result { Ok(res) => Ok(Response::new(res)), Err(e) => Err(Status::aborted(e.to_string())), } @@ -245,11 +264,15 @@ where &self, request: Request, ) -> Result, Status> { - match self - .app - .enclave - .proto_verify_membership(request.into_inner()) - { + let inner = request.into_inner(); + let app = self.app.clone(); + let result = tokio::task::spawn_blocking(move || { + app.ecall_pool + .run(move || app.enclave.proto_verify_membership(inner)) + }) + .await + .map_err(|e| Status::aborted(format!("verify membership worker failed: {e}")))?; + match result { Ok(res) => Ok(Response::new(res)), Err(e) => Err(Status::aborted(e.to_string())), } @@ -259,11 +282,15 @@ where &self, request: Request, ) -> Result, Status> { - match self - .app - .enclave - .proto_verify_non_membership(request.into_inner()) - { + let inner = request.into_inner(); + let app = self.app.clone(); + let result = tokio::task::spawn_blocking(move || { + app.ecall_pool + .run(move || app.enclave.proto_verify_non_membership(inner)) + }) + .await + .map_err(|e| Status::aborted(format!("verify non-membership worker failed: {e}")))?; + match result { Ok(res) => Ok(Response::new(res)), Err(e) => Err(Status::aborted(e.to_string())), } @@ -273,14 +300,22 @@ where #[tonic::async_trait] impl Query for AppService where - S: CommitStore + TxAccessor + 'static, - E: EnclaveProtoAPI + 'static, + S: CommitStore + TxAccessor + Send + 'static, + E: EnclaveProtoAPI + Send + Sync + 'static, { async fn client( &self, request: Request, ) -> Result, Status> { - match self.enclave.proto_query_client(request.into_inner()) { + let inner = request.into_inner(); + let app = self.clone(); + let result = tokio::task::spawn_blocking(move || { + app.ecall_pool + .run(move || app.enclave.proto_query_client(inner)) + }) + .await + .map_err(|e| Status::aborted(format!("query client worker failed: {e}")))?; + match result { Ok(res) => Ok(Response::new(res)), Err(e) => Err(Status::aborted(e.to_string())), } diff --git a/modules/service/src/lib.rs b/modules/service/src/lib.rs index 6dec822e..247de9bf 100644 --- a/modules/service/src/lib.rs +++ b/modules/service/src/lib.rs @@ -1,4 +1,5 @@ mod client_lock; +mod ecall_pool; mod elc; mod enclave; mod service; diff --git a/modules/service/src/service.rs b/modules/service/src/service.rs index 2540a022..376b3d14 100644 --- a/modules/service/src/service.rs +++ b/modules/service/src/service.rs @@ -1,4 +1,5 @@ use crate::client_lock::ClientUpdateLocks; +use crate::ecall_pool::EcallPool; use crate::speculative::SpeculativeService; use anyhow::Result; use enclave_api::{EnclaveProtoAPI, SpeculativeEnclaveCommandAPI}; @@ -19,6 +20,12 @@ where { pub(crate) home: PathBuf, pub(crate) enclave: Arc, + /// Long-lived pool that owns the set of OS threads allowed to ECALL. + /// All ECALL-issuing call sites in the gRPC layer dispatch through this + /// pool to keep the cumulative set of distinct host threads that ever + /// enter the enclave bounded by `--max-enclave-concurrency`, which is + /// the invariant TCSPolicy=BIND requires. + pub(crate) ecall_pool: Arc, _marker: PhantomData, } @@ -41,6 +48,7 @@ where Self { home: self.home.clone(), enclave: self.enclave.clone(), + ecall_pool: self.ecall_pool.clone(), _marker: Default::default(), } } @@ -65,10 +73,11 @@ where S: CommitStore + 'static, E: EnclaveProtoAPI + 'static, { - pub fn new>(home: P, enclave: E) -> Self { + pub fn new>(home: P, enclave: E, ecall_concurrency: usize) -> Self { AppService { home: home.into(), enclave: Arc::new(enclave), + ecall_pool: Arc::new(EcallPool::new(ecall_concurrency)), _marker: Default::default(), } } @@ -83,8 +92,9 @@ where home: P, enclave: E, speculative_concurrency_limit: usize, + ecall_concurrency: usize, ) -> Self { - let app = AppService::new(home, enclave); + let app = AppService::new(home, enclave, ecall_concurrency); let speculative = SpeculativeService::new(speculative_concurrency_limit); Self { app, diff --git a/modules/service/src/speculative/service.rs b/modules/service/src/speculative/service.rs index ce48a5f9..fa06e910 100644 --- a/modules/service/src/speculative/service.rs +++ b/modules/service/src/speculative/service.rs @@ -662,7 +662,7 @@ mod tests { fn stitch_rejects_first_base_state_that_is_not_in_store() { let client_id = "07-tendermint-0"; let enclave = FakeEnclave::new(Duration::from_millis(1)); - let app = AppService::::new("test-home", enclave); + let app = AppService::::new("test-home", enclave, 1); let service = SpeculativeService::new(1); let req = with_explicit_base_state_payload(mk_req( "unit-0000", @@ -718,7 +718,7 @@ mod tests { fn stitch_accepts_first_base_state_when_stored_consensus_and_state_id_match() { let client_id = "07-tendermint-0"; let enclave = FakeEnclave::new(Duration::from_millis(1)); - let app = AppService::::new("test-home", enclave); + let app = AppService::::new("test-home", enclave, 1); let service = SpeculativeService::new(1); let mut req = with_explicit_base_state_payload(mk_req( "unit-0000", @@ -761,7 +761,7 @@ mod tests { fn stitch_rejects_first_base_state_when_prev_state_id_is_missing() { let client_id = "07-tendermint-0"; let enclave = FakeEnclave::new(Duration::from_millis(1)); - let app = AppService::::new("test-home", enclave); + let app = AppService::::new("test-home", enclave, 1); let service = SpeculativeService::new(1); let req = with_explicit_base_state_payload(mk_req( "unit-0000", @@ -811,7 +811,7 @@ mod tests { fn stitch_rejects_first_base_state_when_stored_state_id_is_missing() { let client_id = "07-tendermint-0"; let enclave = FakeEnclave::new(Duration::from_millis(1)); - let app = AppService::::new("test-home", enclave); + let app = AppService::::new("test-home", enclave, 1); let service = SpeculativeService::new(1); let mut req = with_explicit_base_state_payload(mk_req( "unit-0000", @@ -869,7 +869,7 @@ mod tests { fn stitch_rejects_first_base_state_when_state_id_mismatch() { let client_id = "07-tendermint-0"; let enclave = FakeEnclave::new(Duration::from_millis(1)); - let app = AppService::::new("test-home", enclave); + let app = AppService::::new("test-home", enclave, 1); let service = SpeculativeService::new(1); let mut req = with_explicit_base_state_payload(mk_req( "unit-0000", @@ -927,7 +927,7 @@ mod tests { fn stitch_rejects_first_base_state_when_client_state_does_not_match_state_id() { let client_id = "07-tendermint-0"; let enclave = FakeEnclave::new(Duration::from_millis(1)); - let app = AppService::::new("test-home", enclave); + let app = AppService::::new("test-home", enclave, 1); let service = SpeculativeService::new(1); let mut req = with_explicit_base_state_payload(mk_req( "unit-0000", @@ -994,7 +994,7 @@ mod tests { fn stitch_rejects_first_base_state_when_canonical_client_state_advanced() { let client_id = "07-tendermint-0"; let enclave = FakeEnclave::new(Duration::from_millis(1)); - let app = AppService::::new("test-home", enclave); + let app = AppService::::new("test-home", enclave, 1); let service = SpeculativeService::new(1); let mut req = with_explicit_base_state_payload(mk_req( "unit-0000", @@ -1054,7 +1054,7 @@ mod tests { fn streaming_speculative_batch_executes_before_input_closes() { let client_id = "07-tendermint-0"; let enclave = FakeEnclave::new(Duration::from_millis(100)); - let app = AppService::::new("test-home", enclave); + let app = AppService::::new("test-home", enclave, 1); let service = SpeculativeService::new(1); let (tx, rx) = std::sync::mpsc::sync_channel(2); let worker_service = service.clone(); @@ -1151,7 +1151,7 @@ mod tests { fn streaming_speculative_batch_rejects_channel_close_without_complete() { let client_id = "07-tendermint-0"; let enclave = FakeEnclave::new(Duration::from_millis(1)); - let app = AppService::::new("test-home", enclave); + let app = AppService::::new("test-home", enclave, 1); let service = SpeculativeService::new(1); let (tx, rx) = std::sync::mpsc::sync_channel(1); let worker_service = service.clone(); @@ -1202,7 +1202,7 @@ mod tests { fn streaming_speculative_batch_execution_does_not_apply_until_stitched() { let client_id = "07-tendermint-0"; let enclave = FakeEnclave::new(Duration::from_millis(1)); - let app = AppService::::new("test-home", enclave); + let app = AppService::::new("test-home", enclave, 1); let service = SpeculativeService::new(1); let (tx, rx) = std::sync::mpsc::sync_channel(2); let worker_service = service.clone(); @@ -1258,7 +1258,7 @@ mod tests { fn streaming_speculative_batch_rejects_incomplete_base_state() { let client_id = "07-tendermint-0"; let enclave = FakeEnclave::new(Duration::from_millis(1)); - let app = AppService::::new("test-home", enclave); + let app = AppService::::new("test-home", enclave, 1); let service = SpeculativeService::new(2); let (tx, rx) = std::sync::mpsc::sync_channel(2); let worker_service = service.clone(); @@ -1312,7 +1312,7 @@ mod tests { fn streaming_speculative_batch_parallelizes_complete_base_state_units() { let client_id = "07-tendermint-0"; let enclave = FakeEnclave::new(Duration::from_millis(100)); - let app = AppService::::new("test-home", enclave); + let app = AppService::::new("test-home", enclave, 1); let service = SpeculativeService::new(3); let (tx, rx) = std::sync::mpsc::sync_channel(3); let worker_service = service.clone(); From c6593abd1f3eae93e23ccfc407793fbb66c0d214 Mon Sep 17 00:00:00 2001 From: Kiyoshi Nakao Date: Thu, 11 Jun 2026 18:48:18 +0900 Subject: [PATCH 36/48] service: route speculative ECALLs through EcallPool MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The previous fix in `9e75101` routed gRPC-handler ECALLs (update_client, update_client_stream, create_client, aggregate_messages, verify_*, client) through the long-lived `EcallPool` workers, but left the speculative scheduler's `thread::scope` workers in `modules/service/src/speculative/scheduler.rs` issuing their own ECALLs directly on those scoped threads. Under `TCSPolicy=BIND` each scope worker still bound a TCS for the duration of a batch, so peak TCS occupancy was `pool_size + scope_size` whenever a speculative batch overlapped with any non-speculative gRPC ECALL. For the deployed configuration `pool_size = scope_size = 4` and `TCSNum = 8`, leaving no headroom for the SDK runtime and reintroducing `SGX_ERROR_OUT_OF_TCS` risk under service-mode peak load. This change hands off the per-unit ECALL inside the scope worker to `AppService.ecall_pool.run(...)`. The scope worker continues to dequeue work, hold the per-stream `speculative_request_permit`, and record the result; it just no longer enters the enclave on its own thread. The ECALL executes on one of the permanent `EcallPool` workers, whose TCS binding is stable for the lifetime of the LCP process. Peak TCS occupancy after this change: pool_size + 0 (scope workers no longer ECALL) = pool_size `--max-enclave-concurrency` (= `EcallPool` size) is now the single structural bound on host threads that ever enter the enclave, regardless of how many speculative scope workers exist at any moment. Test impact: `streaming_speculative_batch_parallelizes_complete_base_state_units` now requires the `AppService` ecall_concurrency to be at least the per-stream speculative cap, since the pool is the effective ECALL concurrency. Bumped the test fixture from `ecall_concurrency=1` to `=3` to match `SpeculativeService::new(3)`. All other speculative tests already pass at `ecall_concurrency=1` because they assert `observed_max_in_flight() >= 1` or `== 1`, which still holds. Tests: - `cargo test -p service` — 37 passed, 0 failed. - `cargo check --workspace --tests` — clean. --- modules/service/src/speculative/scheduler.rs | 15 ++++++++++++++- modules/service/src/speculative/service.rs | 5 ++++- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/modules/service/src/speculative/scheduler.rs b/modules/service/src/speculative/scheduler.rs index 32c5e476..6e5103e5 100644 --- a/modules/service/src/speculative/scheduler.rs +++ b/modules/service/src/speculative/scheduler.rs @@ -258,9 +258,22 @@ fn streaming_speculative_worker( header_bytes ); } + // Dispatch the actual ECALL onto the long-lived EcallPool worker. + // This scope thread holds the per-stream `speculative_request_permit` + // and `in_flight` slot, then blocks on `pool.run` waiting for the pool + // worker's result. The ECALL itself runs on the pool worker thread, + // whose TCS binding is stable across the lifetime of the LCP service + // process. The scope thread itself never enters the enclave and + // therefore does not contribute to TCS occupancy. + let pool = app.ecall_pool.clone(); + let speculative_inner = speculative.clone(); + let app_inner = app.clone(); + let req_clone = req.request().clone(); let result = speculative .with_speculative_request_permit(|| { - speculative.speculative_update_client(app, req.request().clone()) + pool.run(move || { + speculative_inner.speculative_update_client(&app_inner, req_clone) + }) }) .map_err(|e| SpeculativeBatchFailure { kind: SpeculativeBatchFailureKind::SpeculativeExecutionFailed, diff --git a/modules/service/src/speculative/service.rs b/modules/service/src/speculative/service.rs index fa06e910..fe9a3756 100644 --- a/modules/service/src/speculative/service.rs +++ b/modules/service/src/speculative/service.rs @@ -1312,7 +1312,10 @@ mod tests { fn streaming_speculative_batch_parallelizes_complete_base_state_units() { let client_id = "07-tendermint-0"; let enclave = FakeEnclave::new(Duration::from_millis(100)); - let app = AppService::::new("test-home", enclave, 1); + // EcallPool size must be at least as large as the per-stream + // speculative cap or the pool becomes the effective bottleneck and + // the in-flight observation below will not exceed pool size. + let app = AppService::::new("test-home", enclave, 3); let service = SpeculativeService::new(3); let (tx, rx) = std::sync::mpsc::sync_channel(3); let worker_service = service.clone(); From 56fcda17f15f11b91bc3663e67b646c522922d30 Mon Sep 17 00:00:00 2001 From: Kiyoshi Nakao Date: Thu, 11 Jun 2026 18:55:38 +0900 Subject: [PATCH 37/48] enclave-api: remove ECallGate (single source of truth is EcallPool) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After `6afd485` routed both gRPC handler ECALLs and speculative scheduler ECALLs through `service::EcallPool`, the `ECallGate` semaphore in `modules/enclave-api/src/enclave.rs` became structurally redundant: the only host threads that ever issue an ECALL are the permanent EcallPool workers, whose count is fixed at the configured `--max-enclave-concurrency`. The host-side semaphore had nothing left to gate. Removed: - `ECallGate`, `ECallGateState`, `ECallPermitGuard`, and their tests in `modules/enclave-api/src/enclave.rs`. - `Enclave::new` / `Enclave::create` no longer take an `ecall_concurrency` argument. - `EnclaveInfo::with_ecall_permit` trait method. - `EnclaveLoader::load_with_ecall_concurrency` and its implementation; the remaining `EnclaveLoader::load` no longer needs an `ecall_concurrency` parameter. - The semaphore acquire/release call site in `modules/enclave-api/src/api/primitive.rs::execute_prepared_command`. Updated: - `app/src/commands/service.rs` switches `enclave_loader.load(...)`, and the `--max-enclave-concurrency` CLI help text is rewritten to describe the flag as the EcallPool size only. - `tests/integration/src/lib.rs` drops the now-removed argument from `Enclave::create`. Concurrency control after this change: EcallPool size (--max-enclave-concurrency) = number of permanent host threads that ever ECALL = max concurrent ECALLs in flight = max cumulative TCS bindings under TCSPolicy=BIND Set this to a value at most `TCSNum`; the default 4 leaves room for the SDK runtime when `TCSNum=8`. Tests: - `cargo test -p service` — 37 passed. - `cargo test -p enclave-api` — 3 passed. - `cargo check --workspace` — clean. - The integration-test crate compile-passes; runtime test `tests::test_lcp` requires actual SGX hardware and is not affected by this change. --- app/src/commands/service.rs | 20 ++-- app/src/enclave.rs | 21 +---- modules/enclave-api/src/api/primitive.rs | 27 +++--- modules/enclave-api/src/enclave.rs | 114 ++--------------------- tests/integration/src/lib.rs | 2 +- 5 files changed, 34 insertions(+), 150 deletions(-) diff --git a/app/src/commands/service.rs b/app/src/commands/service.rs index 49d1c1f4..22f0c76c 100644 --- a/app/src/commands/service.rs +++ b/app/src/commands/service.rs @@ -35,13 +35,14 @@ pub struct Start { help = "Worker thread number the tokio `Runtime` will use" )] pub threads: Option, - /// Size of the dedicated ECALL worker pool that owns the set of OS - /// threads allowed to enter the enclave. Under `TCSPolicy=BIND` the - /// Intel SGX SDK pins one TCS to each ECALL-issuing thread for the - /// thread's lifetime, so this value also bounds the cumulative number - /// of TCS bindings created by the service. Set this to a value strictly - /// less than the enclave's `TCSNum` to leave headroom for the SDK - /// runtime and any speculative path that spawns ad-hoc workers. + /// Size of the dedicated ECALL worker pool. All enclave ECALL execution + /// flows through this pool — both serial gRPC handlers and speculative + /// scheduler workers — so this value is the single source of truth for + /// concurrent ECALL count and cumulative TCS bindings under + /// `TCSPolicy=BIND`. Set this to a value at most equal to the loaded + /// enclave's `TCSNum`; leaving at least one TCS for the SDK runtime + /// (i.e. `--max-enclave-concurrency = TCSNum - 1`) is the conservative + /// default. #[clap( long = "max-enclave-concurrency", default_value_t = 4, @@ -70,11 +71,10 @@ impl ServiceCmd { Self::Start(cmd) => { let addr = cmd.address.parse()?; let enclave_parallelism = cmd.max_enclave_concurrency.max(1); - let enclave = enclave_loader.load_with_ecall_concurrency( + let enclave = enclave_loader.load( opts, cmd.enclave.path.as_ref(), cmd.enclave.is_debug(), - enclave_parallelism, )?; let metadata = enclave.metadata()?; let mrenclave = metadata.mrenclave().to_hex_string(); @@ -88,7 +88,7 @@ impl ServiceCmd { let speculative_concurrency_limit = cmd.max_speculative_concurrency.max(1); if speculative_concurrency_limit > enclave_parallelism { warn!( - "max-speculative-concurrency ({}) is greater than max-enclave-concurrency ({}); speculative workers above the enclave limit will wait on the ECALL gate", + "max-speculative-concurrency ({}) is greater than max-enclave-concurrency ({}); speculative workers above the enclave limit will block waiting for an EcallPool slot", speculative_concurrency_limit, enclave_parallelism ); diff --git a/app/src/enclave.rs b/app/src/enclave.rs index 0f4cd14e..ad18e5f0 100644 --- a/app/src/enclave.rs +++ b/app/src/enclave.rs @@ -7,14 +7,6 @@ use std::path::PathBuf; pub trait EnclaveLoader { fn load(&self, opts: &Opts, path: Option<&PathBuf>, debug: bool) -> Result>; - - fn load_with_ecall_concurrency( - &self, - opts: &Opts, - path: Option<&PathBuf>, - debug: bool, - ecall_concurrency: usize, - ) -> Result>; } #[derive(Debug)] @@ -25,17 +17,6 @@ where Enclave: EnclaveProtoAPI, { fn load(&self, opts: &Opts, path: Option<&PathBuf>, debug: bool) -> Result> { - // One-shot CLI commands (attestation/enclave/elc) issue ECALLs serially. - self.load_with_ecall_concurrency(opts, path, debug, 1) - } - - fn load_with_ecall_concurrency( - &self, - opts: &Opts, - path: Option<&PathBuf>, - debug: bool, - ecall_concurrency: usize, - ) -> Result> { let path = if let Some(path) = path { path.clone() } else { @@ -43,7 +24,7 @@ where }; let env = host::get_environment().unwrap(); let km = EnclaveKeyManager::new(&env.home)?; - match Enclave::create(&path, debug, km, env.store.clone(), ecall_concurrency) { + match Enclave::create(&path, debug, km, env.store.clone()) { Ok(enclave) => Ok(enclave), Err(x) => { bail!( diff --git a/modules/enclave-api/src/api/primitive.rs b/modules/enclave-api/src/api/primitive.rs index 1f6a37e9..29ed89eb 100644 --- a/modules/enclave-api/src/api/primitive.rs +++ b/modules/enclave-api/src/api/primitive.rs @@ -87,20 +87,21 @@ fn execute_prepared_command( cmd: Command, tx_id: TxId, ) -> Result { - enclave.with_ecall_permit(|| { - let current_timestamp = Time::now(); - let cctx = match cmd.get_enclave_key() { - Some(addr) => { - let ski = enclave.get_key_manager().load(addr)?; - CommandContext::new(current_timestamp, Some(ski.sealed_ek), tx_id) - } - None => CommandContext::new(current_timestamp, None, tx_id), - }; + // Concurrency is now bounded structurally by `service::EcallPool`. The + // caller is expected to run on one of the pool's permanent ECALL + // workers, keeping TCSPolicy=BIND bindings within the pool size. + let current_timestamp = Time::now(); + let cctx = match cmd.get_enclave_key() { + Some(addr) => { + let ski = enclave.get_key_manager().load(addr)?; + CommandContext::new(current_timestamp, Some(ski.sealed_ek), tx_id) + } + None => CommandContext::new(current_timestamp, None, tx_id), + }; - let ecmd = ECallCommand::new(cctx, cmd); - debug!("try to execute command: {:?}", ecmd); - raw_execute_command(enclave.get_eid(), ecmd) - }) + let ecmd = ECallCommand::new(cctx, cmd); + debug!("try to execute command: {:?}", ecmd); + raw_execute_command(enclave.get_eid(), ecmd) } pub(crate) fn raw_execute_command( diff --git a/modules/enclave-api/src/enclave.rs b/modules/enclave-api/src/enclave.rs index 79dfc93f..5431d183 100644 --- a/modules/enclave-api/src/enclave.rs +++ b/modules/enclave-api/src/enclave.rs @@ -5,7 +5,7 @@ use lcp_types::{store_key, Any, EnclaveMetadata, Height}; use sgx_types::{sgx_enclave_id_t, SgxResult}; use sgx_urts::SgxEnclave; use std::path::PathBuf; -use std::sync::{Arc, Condvar, Mutex, RwLock}; +use std::sync::{Arc, RwLock}; use std::{marker::PhantomData, ops::DerefMut}; use store::host::{HostStore, IntoCommitStore}; use store::transaction::{CommitStore, CreatedTx, Tx, TxAccessor, UpdateKey}; @@ -17,39 +17,21 @@ pub struct Enclave { pub(crate) key_manager: EnclaveKeyManager, pub(crate) store: Arc>, pub(crate) sgx_enclave: SgxEnclave, - pub(crate) ecall_gate: Arc, _marker: PhantomData, } -#[derive(Debug)] -pub(crate) struct ECallGate { - state: Mutex, - ready: Condvar, -} - -#[derive(Debug)] -struct ECallGateState { - available: usize, -} - -struct ECallPermitGuard<'a> { - gate: &'a ECallGate, -} - impl Enclave { pub fn new( path: impl Into, key_manager: EnclaveKeyManager, store: Arc>, sgx_enclave: SgxEnclave, - ecall_concurrency: usize, ) -> Self { Enclave { path: path.into(), key_manager, store, sgx_enclave, - ecall_gate: Arc::new(ECallGate::new(ecall_concurrency)), _marker: PhantomData, } } @@ -59,17 +41,10 @@ impl Enclave { debug: bool, key_manager: EnclaveKeyManager, store: Arc>, - ecall_concurrency: usize, ) -> SgxResult { let path = path.into(); let enclave = host::create_enclave(path.clone(), debug)?; - Ok(Self::new( - path, - key_manager, - store, - enclave, - ecall_concurrency, - )) + Ok(Self::new(path, key_manager, store, enclave)) } pub fn destroy(self) { @@ -77,40 +52,12 @@ impl Enclave { } } -impl ECallGate { - fn new(permits: usize) -> Self { - Self { - state: Mutex::new(ECallGateState { - available: permits.max(1), - }), - ready: Condvar::new(), - } - } - - fn with_permit(&self, f: impl FnOnce() -> Result) -> Result { - let _permit = self.acquire(); - f() - } - - fn acquire(&self) -> ECallPermitGuard<'_> { - let mut state = self.state.lock().unwrap(); - while state.available == 0 { - state = self.ready.wait(state).unwrap(); - } - state.available -= 1; - ECallPermitGuard { gate: self } - } -} - -impl Drop for ECallPermitGuard<'_> { - fn drop(&mut self) { - let mut state = self.gate.state.lock().unwrap(); - state.available += 1; - self.gate.ready.notify_one(); - } -} - -/// `EnclaveInfo` is an accessor to enclave information +/// `EnclaveInfo` is an accessor to enclave information. +/// +/// Concurrency over `ecall_execute_command` is the caller's responsibility: +/// the LCP service routes all ECALLs through `service::EcallPool`, which pins +/// host threads that issue ECALLs to a fixed set so that cumulative TCS +/// bindings under `TCSPolicy=BIND` cannot exceed the pool size. pub trait EnclaveInfo: Sync + Send { /// `get_eid` returns the enclave id fn get_eid(&self) -> sgx_enclave_id_t; @@ -120,10 +67,6 @@ pub trait EnclaveInfo: Sync + Send { fn is_debug(&self) -> bool; /// `get_key_manager` returns a key manager for Enclave Keys fn get_key_manager(&self) -> &EnclaveKeyManager; - /// `with_ecall_permit` guards entry into enclave ECALLs. - fn with_ecall_permit(&self, f: impl FnOnce() -> Result) -> Result { - f() - } } impl EnclaveInfo for Enclave { @@ -143,9 +86,6 @@ impl EnclaveInfo for Enclave { fn get_key_manager(&self) -> &EnclaveKeyManager { &self.key_manager } - fn with_ecall_permit(&self, f: impl FnOnce() -> Result) -> Result { - self.ecall_gate.with_permit(f) - } } /// `HostStoreTxManager` is a transaction manager for the host store @@ -339,41 +279,3 @@ where } } -#[cfg(test)] -mod tests { - use super::ECallGate; - use std::sync::atomic::{AtomicUsize, Ordering}; - use std::sync::Arc; - use std::thread; - use std::time::Duration; - - #[test] - fn ecall_gate_limits_concurrency() { - let gate = Arc::new(ECallGate::new(2)); - let in_flight = Arc::new(AtomicUsize::new(0)); - let observed_max = Arc::new(AtomicUsize::new(0)); - let mut handles = Vec::new(); - - for _ in 0..6 { - let gate = gate.clone(); - let in_flight = in_flight.clone(); - let observed_max = observed_max.clone(); - handles.push(thread::spawn(move || { - gate.with_permit(|| { - let current = in_flight.fetch_add(1, Ordering::SeqCst) + 1; - observed_max.fetch_max(current, Ordering::SeqCst); - thread::sleep(Duration::from_millis(25)); - in_flight.fetch_sub(1, Ordering::SeqCst); - Ok(()) - }) - .unwrap(); - })); - } - - for handle in handles { - handle.join().unwrap(); - } - - assert_eq!(observed_max.load(Ordering::SeqCst), 2); - } -} diff --git a/tests/integration/src/lib.rs b/tests/integration/src/lib.rs index f75721d5..15a3ec91 100644 --- a/tests/integration/src/lib.rs +++ b/tests/integration/src/lib.rs @@ -279,7 +279,7 @@ mod tests { let env = host::get_environment().unwrap(); let km = EnclaveKeyManager::new(&env.home).unwrap(); - let enclave = Enclave::create(ENCLAVE_FILE, true, km, env.store.clone(), 1).unwrap(); + let enclave = Enclave::create(ENCLAVE_FILE, true, km, env.store.clone()).unwrap(); test_remote_attestation(&enclave).unwrap(); From 4a3c45975261e9b18e824666b5ab06c3bef6622d Mon Sep 17 00:00:00 2001 From: Kiyoshi Nakao Date: Thu, 11 Jun 2026 19:37:01 +0900 Subject: [PATCH 38/48] enclave-api: verify speculative base against stored state_id only The stitch-phase base verification recomputed the expected prev_state_id with gen_state_id_from_any over the raw caller-supplied Anys. Light clients derive state IDs from a canonicalized client state (e.g. latest_height/frozen reset before hashing), and that canonicalization is ELC-specific and only available inside the enclave, so the recomputed hash never matches the enclave-observed prev_state_id for any ELC whose canonicalization is not the identity. This made every explicit-state speculative batch fail with BaseStateMismatch at the stitch phase. Drop the raw recompute and rely on the remaining checks: the supplied (client_state, consensus_state) bytes are pinned to the canonical store, and the enclave-observed prev_state_id must match the height-indexed state_id stored by the in-enclave light client at create/update time. --- modules/enclave-api/src/enclave.rs | 24 +++---- modules/service/src/speculative/service.rs | 80 ++++++++++++++++++++-- 2 files changed, 85 insertions(+), 19 deletions(-) diff --git a/modules/enclave-api/src/enclave.rs b/modules/enclave-api/src/enclave.rs index 5431d183..ee1aa8cf 100644 --- a/modules/enclave-api/src/enclave.rs +++ b/modules/enclave-api/src/enclave.rs @@ -1,5 +1,4 @@ use crate::errors::{Error, Result}; -use commitments::gen_state_id_from_any; use keymanager::EnclaveKeyManager; use lcp_types::{store_key, Any, EnclaveMetadata, Height}; use sgx_types::{sgx_enclave_id_t, SgxResult}; @@ -137,9 +136,9 @@ pub trait HostStoreTxManager: CommitStoreAccessor { /// The explicit base client state must match the latest canonical /// client_state. This prevents an old, historically valid base state from /// overwriting a newer latest-only client_state. The caller-supplied - /// `(client_state, consensus_state)` pair must also re-derive the - /// height-indexed state ID previously stored by a successful - /// serial/speculative update. + /// `prev_state_id` (observed in-enclave by the first speculative unit) + /// must also match the height-indexed state ID previously stored by a + /// successful create/serial/speculative update. fn apply_write_set_with_expected_base( &self, update_key: UpdateKey, @@ -235,15 +234,14 @@ pub trait HostStoreTxManager: CommitStoreAccessor { prev_height.revision_height() )) })?; - let expected_prev_state_id = gen_state_id_from_any(client_state, consensus_state)?.to_vec(); - if expected_prev_state_id.as_slice() != prev_state_id { - return Err(Error::invalid_argument(format!( - "speculative base state_id does not match client_state/consensus_state: client_id={} height={}-{}", - client_id, - prev_height.revision_number(), - prev_height.revision_height() - ))); - } + // Do not recompute the state ID from the supplied raw Anys here: light + // clients derive state IDs from a canonicalized client state (e.g. + // latest_height/frozen reset), and that canonicalization is + // ELC-specific and only available inside the enclave. The supplied + // base bytes are already pinned to the canonical store by the two + // checks above, and the stored state_id below was written by the + // in-enclave light client for exactly those bytes, so comparing the + // observed prev_state_id against the stored state_id closes the chain. let state_id_key = store_key::state_id_bytes(client_id, prev_height); let stored_state_id = self.use_mut_store(|store| store.tx_get(tx_id, &state_id_key))?; if stored_state_id.as_deref() != Some(prev_state_id) { diff --git a/modules/service/src/speculative/service.rs b/modules/service/src/speculative/service.rs index fe9a3756..f1ad0ebb 100644 --- a/modules/service/src/speculative/service.rs +++ b/modules/service/src/speculative/service.rs @@ -757,6 +757,69 @@ mod tests { .expect("stored consensus state and matching state_id should be accepted"); } + // Regression test: light clients derive state IDs from a + // canonicalized client state (e.g. latest_height/frozen reset before + // hashing), so the stored/observed state ID is generally NOT equal to + // gen_state_id_from_any over the raw supplied Anys. The stitch must not + // recompute the state ID from the raw base; it must accept the batch as + // long as the stored state_id and the enclave-observed prev_state_id + // agree, even when both differ from the raw-Any hash. + #[test] + fn stitch_accepts_first_base_state_when_state_id_uses_canonicalized_form() { + let client_id = "07-tendermint-0"; + let enclave = FakeEnclave::new(Duration::from_millis(1)); + let app = AppService::::new("test-home", enclave, 1); + let service = SpeculativeService::new(1); + let mut req = with_explicit_base_state_payload(mk_req( + "unit-0000", + client_id, + Some(Height::new(0, 10)), + None, + )); + let prev_height = req.base_state.prev_height.expect("test base prev_height"); + // Simulate an ELC whose canonicalized state_id differs from the hash + // of the raw supplied Anys. + let canonical_form_state_id = vec![7u8; 32]; + assert_ne!( + canonical_form_state_id, + state_id_for_base_state(&req.base_state), + "test requires a state_id that differs from the raw-Any hash" + ); + req.base_state.prev_state_id = Some(canonical_form_state_id.clone()); + seed_canonical_base_state(&app, client_id, &req.base_state); + app.enclave.use_mut_store(|store| { + store.set( + lcp_types::store_key::state_id_bytes(client_id, &prev_height), + canonical_form_state_id.clone(), + ); + }); + let result = SpeculativeUpdateClientResult { + response: MsgUpdateClientResponse::default(), + write_set: WriteSet::default(), + base_state: req.base_state.clone(), + observed_transition: ObservedStateTransition { + prev_height: Some(prev_height), + prev_state_id: Some(canonical_form_state_id), + post_height: Height::new(0, 11), + post_state_id: vec![1; 32], + }, + }; + + service + .stitch_speculative_update_client_batch( + &app, + SpeculativeUpdateClientBatch { + client_id: client_id.to_string(), + units: vec![req], + }, + SpeculativeUpdateClientBatchResult { + client_id: client_id.to_string(), + units: vec![result], + }, + ) + .expect("canonicalized-form state_id matching the stored state_id should be accepted"); + } + #[test] fn stitch_rejects_first_base_state_when_prev_state_id_is_missing() { let client_id = "07-tendermint-0"; @@ -924,7 +987,7 @@ mod tests { } #[test] - fn stitch_rejects_first_base_state_when_client_state_does_not_match_state_id() { + fn stitch_rejects_first_base_state_when_client_state_does_not_match_stored_state_id() { let client_id = "07-tendermint-0"; let enclave = FakeEnclave::new(Duration::from_millis(1)); let app = AppService::::new("test-home", enclave, 1); @@ -936,9 +999,12 @@ mod tests { None, )); let prev_height = req.base_state.prev_height.expect("test base prev_height"); - let prev_state_id = state_id_for_base_state(&req.base_state); + // Seed the canonical store from the original base, then mutate the + // supplied client_state. The stored state_id keeps reflecting the + // original pair while the unit (executing from the supplied base) + // observes the mutated pair's state_id, so the stored-state_id check + // must reject the stitch. seed_canonical_base_state(&app, client_id, &req.base_state); - req.base_state.prev_state_id = Some(prev_state_id.clone()); req.base_state.client_state = Some( Any { type_url: "/ibc.mock.ClientState".to_string(), @@ -946,6 +1012,8 @@ mod tests { } .into(), ); + let observed_prev_state_id = state_id_for_base_state(&req.base_state); + req.base_state.prev_state_id = Some(observed_prev_state_id.clone()); set_canonical_client_state( &app, client_id, @@ -960,7 +1028,7 @@ mod tests { base_state: req.base_state.clone(), observed_transition: ObservedStateTransition { prev_height: Some(prev_height), - prev_state_id: Some(prev_state_id), + prev_state_id: Some(observed_prev_state_id), post_height: Height::new(0, 11), post_state_id: vec![1; 32], }, @@ -978,13 +1046,13 @@ mod tests { units: vec![result], }, ) - .expect_err("client_state inconsistent with state_id should be rejected"); + .expect_err("client_state inconsistent with stored state_id should be rejected"); assert_eq!(err.kind, SpeculativeBatchFailureKind::BaseStateMismatch); assert_eq!(err.unit_id.as_deref(), Some("unit-0000")); assert!( err.detail - .contains("speculative base state_id does not match client_state/consensus_state"), + .contains("stored speculative base state_id mismatch"), "unexpected error detail: {}", err.detail ); From a1876999b9966c9b452b5ee8954e18f78bc71d49 Mon Sep 17 00:00:00 2001 From: Kiyoshi Nakao Date: Fri, 12 Jun 2026 10:31:18 +0900 Subject: [PATCH 39/48] fmt: apply cargo fmt to pre-existing formatting drift --- app/src/commands/service.rs | 7 ++----- modules/enclave-api/src/enclave.rs | 1 - modules/service/src/ecall_pool.rs | 10 ++++------ modules/service/src/speculative/scheduler.rs | 4 +--- 4 files changed, 7 insertions(+), 15 deletions(-) diff --git a/app/src/commands/service.rs b/app/src/commands/service.rs index 22f0c76c..0df0c80c 100644 --- a/app/src/commands/service.rs +++ b/app/src/commands/service.rs @@ -71,11 +71,8 @@ impl ServiceCmd { Self::Start(cmd) => { let addr = cmd.address.parse()?; let enclave_parallelism = cmd.max_enclave_concurrency.max(1); - let enclave = enclave_loader.load( - opts, - cmd.enclave.path.as_ref(), - cmd.enclave.is_debug(), - )?; + let enclave = + enclave_loader.load(opts, cmd.enclave.path.as_ref(), cmd.enclave.is_debug())?; let metadata = enclave.metadata()?; let mrenclave = metadata.mrenclave().to_hex_string(); let mut rb = Builder::new_multi_thread(); diff --git a/modules/enclave-api/src/enclave.rs b/modules/enclave-api/src/enclave.rs index ee1aa8cf..60e6f0a5 100644 --- a/modules/enclave-api/src/enclave.rs +++ b/modules/enclave-api/src/enclave.rs @@ -276,4 +276,3 @@ where store.deref_mut().apply(f) } } - diff --git a/modules/service/src/ecall_pool.rs b/modules/service/src/ecall_pool.rs index bfb71d79..4667d56b 100644 --- a/modules/service/src/ecall_pool.rs +++ b/modules/service/src/ecall_pool.rs @@ -60,9 +60,7 @@ impl EcallPool { let job: Job = Box::new(move || { let _ = tx.send(f()); }); - sender - .send(job) - .expect("ECALL pool worker channel closed"); + sender.send(job).expect("ECALL pool worker channel closed"); rx.recv() .expect("ECALL pool worker terminated before producing a result") } @@ -144,9 +142,9 @@ mod tests { // Verifies the "1 thread = 1 TCS forever" property under BIND policy: // the set of OS thread ids that execute jobs is bounded by pool size. let pool = Arc::new(EcallPool::new(3)); - let observed = Arc::new(std::sync::Mutex::new( - std::collections::HashSet::::new(), - )); + let observed = Arc::new(std::sync::Mutex::new(std::collections::HashSet::< + thread::ThreadId, + >::new())); let mut handles = Vec::new(); for _ in 0..30 { let pool = Arc::clone(&pool); diff --git a/modules/service/src/speculative/scheduler.rs b/modules/service/src/speculative/scheduler.rs index 6e5103e5..e2164187 100644 --- a/modules/service/src/speculative/scheduler.rs +++ b/modules/service/src/speculative/scheduler.rs @@ -271,9 +271,7 @@ fn streaming_speculative_worker( let req_clone = req.request().clone(); let result = speculative .with_speculative_request_permit(|| { - pool.run(move || { - speculative_inner.speculative_update_client(&app_inner, req_clone) - }) + pool.run(move || speculative_inner.speculative_update_client(&app_inner, req_clone)) }) .map_err(|e| SpeculativeBatchFailure { kind: SpeculativeBatchFailureKind::SpeculativeExecutionFailed, From 9189c57cf0151072f960b29b1074838d471ceaca Mon Sep 17 00:00:00 2001 From: Kiyoshi Nakao Date: Fri, 12 Jun 2026 10:57:45 +0900 Subject: [PATCH 40/48] service: box streaming speculative batch input unit to satisfy clippy --- modules/service/src/elc.rs | 5 +++- modules/service/src/speculative/scheduler.rs | 4 ++-- modules/service/src/speculative/service.rs | 24 ++++++++++---------- 3 files changed, 18 insertions(+), 15 deletions(-) diff --git a/modules/service/src/elc.rs b/modules/service/src/elc.rs index 0d98d989..232f2301 100644 --- a/modules/service/src/elc.rs +++ b/modules/service/src/elc.rs @@ -202,7 +202,10 @@ where let header_memory = header_memory_budget.reserve_for_chunk(&chunk_msg).await?; if let Some(unit) = decoder.push_chunk(chunk_msg.chunk, header_memory)? { units += 1; - if tx.send(StreamingSpeculativeBatchInput::Unit(unit)).is_err() { + if tx + .send(StreamingSpeculativeBatchInput::Unit(Box::new(unit))) + .is_err() + { let result = scheduler.await.map_err(|e| { Status::aborted(format!("speculative batch worker failed: {e}")) })?; diff --git a/modules/service/src/speculative/scheduler.rs b/modules/service/src/speculative/scheduler.rs index e2164187..3d275321 100644 --- a/modules/service/src/speculative/scheduler.rs +++ b/modules/service/src/speculative/scheduler.rs @@ -21,7 +21,7 @@ pub(crate) struct StreamingSpeculativeBatchResult { } pub(crate) enum StreamingSpeculativeBatchInput { - Unit(ResidentSpeculativeUpdateClientRequest), + Unit(Box), Complete, } @@ -77,7 +77,7 @@ where if state.failure.is_some() { break; } - if let Err(e) = state.enqueue(unit) { + if let Err(e) = state.enqueue(*unit) { state.failure = Some(e); shared.ready.notify_all(); shared.complete.notify_all(); diff --git a/modules/service/src/speculative/service.rs b/modules/service/src/speculative/service.rs index f1ad0ebb..bc7f99a5 100644 --- a/modules/service/src/speculative/service.rs +++ b/modules/service/src/speculative/service.rs @@ -1156,9 +1156,9 @@ mod tests { }); first_req.base_state.prev_state_id = Some(state_id_for_base_state(&first_req.base_state)); seed_canonical_base_state(&app, client_id, &first_req.base_state); - tx.send(StreamingSpeculativeBatchInput::Unit( + tx.send(StreamingSpeculativeBatchInput::Unit(Box::new( ResidentSpeculativeUpdateClientRequest::unmetered(first_req), - )) + ))) .expect("send first unit"); for _ in 0..100 { @@ -1172,7 +1172,7 @@ mod tests { "expected first unit to start before input stream closes" ); - tx.send(StreamingSpeculativeBatchInput::Unit( + tx.send(StreamingSpeculativeBatchInput::Unit(Box::new( ResidentSpeculativeUpdateClientRequest::unmetered(with_explicit_base_state_payload( SpeculativeUpdateClientRequest { unit_id: "unit-0001".to_string(), @@ -1201,7 +1201,7 @@ mod tests { }, }, )), - )) + ))) .expect("send second unit"); tx.send(StreamingSpeculativeBatchInput::Complete) .expect("send batch complete"); @@ -1243,9 +1243,9 @@ mod tests { req.update.signer = vec![0; 20]; seed_canonical_base_state(&app, client_id, &req.base_state); - tx.send(StreamingSpeculativeBatchInput::Unit( + tx.send(StreamingSpeculativeBatchInput::Unit(Box::new( ResidentSpeculativeUpdateClientRequest::unmetered(req), - )) + ))) .expect("send first unit"); drop(tx); @@ -1294,9 +1294,9 @@ mod tests { req.update.signer = vec![0; 20]; seed_canonical_base_state(&app, client_id, &req.base_state); - tx.send(StreamingSpeculativeBatchInput::Unit( + tx.send(StreamingSpeculativeBatchInput::Unit(Box::new( ResidentSpeculativeUpdateClientRequest::unmetered(req), - )) + ))) .expect("send first unit"); tx.send(StreamingSpeculativeBatchInput::Complete) .expect("send batch complete"); @@ -1340,7 +1340,7 @@ mod tests { ) }); - tx.send(StreamingSpeculativeBatchInput::Unit( + tx.send(StreamingSpeculativeBatchInput::Unit(Box::new( ResidentSpeculativeUpdateClientRequest::unmetered(SpeculativeUpdateClientRequest { unit_id: "unit-0000".to_string(), update: MsgUpdateClient { @@ -1359,7 +1359,7 @@ mod tests { consensus_state: None, }, }), - )) + ))) .expect("send first unit"); drop(tx); @@ -1433,9 +1433,9 @@ mod tests { } seed_canonical_base_state(&app, client_id, &requests[0].base_state); for req in requests { - tx.send(StreamingSpeculativeBatchInput::Unit( + tx.send(StreamingSpeculativeBatchInput::Unit(Box::new( ResidentSpeculativeUpdateClientRequest::unmetered(req), - )) + ))) .expect("send unit"); } tx.send(StreamingSpeculativeBatchInput::Complete) From 9aa6643564129cb77a1b25f3c0dba1ea42dca8b1 Mon Sep 17 00:00:00 2001 From: Kiyoshi Nakao Date: Fri, 12 Jun 2026 13:48:28 +0900 Subject: [PATCH 41/48] service: prevent speculative stream hang on ECALL worker panic --- modules/service/src/ecall_pool.rs | 26 +++++- modules/service/src/speculative/scheduler.rs | 86 ++++++++++++++++---- modules/service/src/speculative/service.rs | 68 ++++++++++++++++ 3 files changed, 163 insertions(+), 17 deletions(-) diff --git a/modules/service/src/ecall_pool.rs b/modules/service/src/ecall_pool.rs index 4667d56b..1a930aa7 100644 --- a/modules/service/src/ecall_pool.rs +++ b/modules/service/src/ecall_pool.rs @@ -1,4 +1,5 @@ use log::*; +use std::panic::{catch_unwind, resume_unwind, AssertUnwindSafe}; use std::sync::mpsc::{channel, Sender}; use std::sync::{Arc, Mutex}; use std::thread::{self, JoinHandle}; @@ -47,6 +48,10 @@ impl EcallPool { /// Runs `f` on one of the pool's worker threads, blocking the caller /// until the job completes. Each invocation acquires a worker slot. + /// + /// If `f` panics, the panic is caught on the worker thread (keeping the + /// worker and its TCS binding alive) and resumed on the calling thread, + /// matching the observable behavior of a direct invocation. pub fn run(&self, f: F) -> R where F: FnOnce() -> R + Send + 'static, @@ -58,11 +63,16 @@ impl EcallPool { .expect("ECALL pool used after shutdown"); let (tx, rx) = channel(); let job: Job = Box::new(move || { - let _ = tx.send(f()); + let _ = tx.send(catch_unwind(AssertUnwindSafe(f))); }); sender.send(job).expect("ECALL pool worker channel closed"); - rx.recv() + match rx + .recv() .expect("ECALL pool worker terminated before producing a result") + { + Ok(result) => result, + Err(panic) => resume_unwind(panic), + } } } @@ -137,6 +147,18 @@ mod tests { assert_eq!(result, 42); } + #[test] + fn pool_survives_job_panic_and_propagates_it_to_caller() { + let pool = EcallPool::new(1); + let panic = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| { + pool.run::<_, ()>(|| panic!("job panic")) + })) + .expect_err("job panic should propagate to the caller"); + assert_eq!(panic.downcast_ref::<&str>(), Some(&"job panic")); + // The single worker must have survived the panic to serve this job. + assert_eq!(pool.run(|| 7 * 6), 42); + } + #[test] fn pool_workers_have_stable_thread_ids_across_jobs() { // Verifies the "1 thread = 1 TCS forever" property under BIND policy: diff --git a/modules/service/src/speculative/scheduler.rs b/modules/service/src/speculative/scheduler.rs index 3d275321..538174b5 100644 --- a/modules/service/src/speculative/scheduler.rs +++ b/modules/service/src/speculative/scheduler.rs @@ -9,7 +9,9 @@ use crate::service::AppService; use enclave_api::{EnclaveProtoAPI, SpeculativeEnclaveCommandAPI}; use log::info; use sha2::Digest; +use std::any::Any; use std::collections::{BTreeMap, BTreeSet, VecDeque}; +use std::panic::{catch_unwind, AssertUnwindSafe}; use std::sync::mpsc::Receiver; use std::sync::{Arc, Condvar, Mutex}; use std::thread; @@ -220,6 +222,42 @@ impl StreamingSchedulerState { } } +// RAII guard for one `in_flight` slot taken by a streaming worker. +// +// Dropping the guard returns the slot and wakes both the workers waiting on +// `ready` and the coordinator waiting on `complete`. Tying the decrement to +// `Drop` keeps slot accounting correct even if the worker unwinds while +// executing a unit; a leaked slot would leave the coordinator blocked on +// `complete` forever. +struct InFlightSlot<'a> { + shared: &'a StreamingSchedulerShared, +} + +impl Drop for InFlightSlot<'_> { + fn drop(&mut self) { + // Recover from mutex poisoning: this may run during unwinding, and + // panicking again here would abort the process. + let mut state = self + .shared + .state + .lock() + .unwrap_or_else(|poisoned| poisoned.into_inner()); + state.in_flight -= 1; + self.shared.ready.notify_all(); + self.shared.complete.notify_all(); + } +} + +fn panic_payload_message(panic: &(dyn Any + Send)) -> String { + if let Some(msg) = panic.downcast_ref::<&str>() { + (*msg).to_string() + } else if let Some(msg) = panic.downcast_ref::() { + msg.clone() + } else { + "non-string panic payload".to_string() + } +} + fn streaming_speculative_worker( speculative: &SpeculativeService, app: &AppService, @@ -248,6 +286,10 @@ fn streaming_speculative_worker( } }; + // Hold the slot in an RAII guard so it is restored even if anything + // below unwinds; see InFlightSlot. + let in_flight_slot = InFlightSlot { shared: &shared }; + let unit_id = req.request().unit_id.clone(); let header_bytes = speculative_request_header_len(req.request()); if let Some(header_bytes) = header_bytes { @@ -269,11 +311,12 @@ fn streaming_speculative_worker( let speculative_inner = speculative.clone(); let app_inner = app.clone(); let req_clone = req.request().clone(); - let result = speculative - .with_speculative_request_permit(|| { + let result = match catch_unwind(AssertUnwindSafe(|| { + speculative.with_speculative_request_permit(|| { pool.run(move || speculative_inner.speculative_update_client(&app_inner, req_clone)) }) - .map_err(|e| SpeculativeBatchFailure { + })) { + Ok(executed) => executed.map_err(|e| SpeculativeBatchFailure { kind: SpeculativeBatchFailureKind::SpeculativeExecutionFailed, unit_id: Some(unit_id), detail: match speculative_request_header_digest(req.request()) { @@ -283,20 +326,33 @@ fn streaming_speculative_worker( ), None => e.to_string(), }, - }); + }), + // A panic in the ECALL path is recorded as a unit failure instead + // of unwinding this scoped worker, which would poison the shared + // state and panic the surrounding `thread::scope`. + Err(panic) => Err(SpeculativeBatchFailure { + kind: SpeculativeBatchFailureKind::SpeculativeExecutionFailed, + unit_id: Some(unit_id), + detail: format!( + "speculative execution panicked: {}", + panic_payload_message(panic.as_ref()) + ), + }), + }; - let mut state = shared.state.lock().unwrap(); - state.in_flight -= 1; - match result { - Ok(result) => { - let req = req.into_request_without_header_payload(); - state.complete_unit(index, req, result); - } - Err(e) => { - state.failure.get_or_insert(e); + { + let mut state = shared.state.lock().unwrap(); + match result { + Ok(result) => { + let req = req.into_request_without_header_payload(); + state.complete_unit(index, req, result); + } + Err(e) => { + state.failure.get_or_insert(e); + } } } - shared.ready.notify_all(); - shared.complete.notify_all(); + // Release the slot and wake waiters after the outcome is recorded. + drop(in_flight_slot); } } diff --git a/modules/service/src/speculative/service.rs b/modules/service/src/speculative/service.rs index bc7f99a5..a7985643 100644 --- a/modules/service/src/speculative/service.rs +++ b/modules/service/src/speculative/service.rs @@ -319,6 +319,7 @@ mod tests { current_in_flight: AtomicUsize, observed_max_in_flight: AtomicUsize, delay: Duration, + panic_on_signer_idx: Option, } impl FakeEnclave { @@ -338,6 +339,14 @@ mod tests { current_in_flight: AtomicUsize::new(0), observed_max_in_flight: AtomicUsize::new(0), delay, + panic_on_signer_idx: None, + } + } + + fn new_panicking_on(delay: Duration, signer_idx: u64) -> Self { + Self { + panic_on_signer_idx: Some(signer_idx), + ..Self::new(delay) } } @@ -384,6 +393,9 @@ mod tests { ) -> core::result::Result { let idx = input.update.signer.0[19] as u64; + if self.panic_on_signer_idx == Some(idx) { + panic!("injected speculative_update_client panic"); + } let current = self.current_in_flight.fetch_add(1, Ordering::SeqCst) + 1; self.observed_max_in_flight .fetch_max(current, Ordering::SeqCst); @@ -1266,6 +1278,62 @@ mod tests { ); } + #[test] + fn streaming_speculative_batch_reports_worker_panic_as_failure() { + // Regression test: a panic inside the speculative ECALL path used to + // kill the EcallPool worker and leak the scheduler's `in_flight` + // slot, leaving the coordinator blocked on `complete` forever. The + // stream must instead finish with a SpeculativeExecutionFailed error. + let client_id = "07-tendermint-0"; + let enclave = FakeEnclave::new_panicking_on(Duration::from_millis(1), 0); + let app = AppService::::new("test-home", enclave, 1); + let service = SpeculativeService::new(1); + let (tx, rx) = std::sync::mpsc::sync_channel(1); + let worker_service = service.clone(); + let worker_app = app.clone(); + let client_id_for_worker = client_id.to_string(); + let handle = thread::spawn(move || { + worker_service.execute_speculative_update_client_stream( + &worker_app, + client_id_for_worker, + rx, + ) + }); + + let mut req = with_explicit_base_state_payload(mk_req( + "unit-0000", + client_id, + Some(Height::new(0, 10)), + None, + )); + req.base_state.prev_state_id = Some(state_id_for_base_state(&req.base_state)); + req.update.signer = vec![0; 20]; + seed_canonical_base_state(&app, client_id, &req.base_state); + + tx.send(StreamingSpeculativeBatchInput::Unit(Box::new( + ResidentSpeculativeUpdateClientRequest::unmetered(req), + ))) + .expect("send first unit"); + tx.send(StreamingSpeculativeBatchInput::Complete) + .expect("send batch complete"); + drop(tx); + + let err = handle + .join() + .expect("streaming worker thread") + .expect_err("panicking unit must surface as batch failure"); + assert_eq!( + err.kind, + SpeculativeBatchFailureKind::SpeculativeExecutionFailed + ); + assert_eq!(err.unit_id.as_deref(), Some("unit-0000")); + assert!( + err.detail.contains("panicked"), + "unexpected error detail: {}", + err.detail + ); + } + #[test] fn streaming_speculative_batch_execution_does_not_apply_until_stitched() { let client_id = "07-tendermint-0"; From 586d6f5432e716ff70b3a74f2a6af18898ced8b4 Mon Sep 17 00:00:00 2001 From: Kiyoshi Nakao Date: Fri, 12 Jun 2026 14:00:37 +0900 Subject: [PATCH 42/48] service: recover per-client update lock from poisoning --- modules/service/src/client_lock.rs | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/modules/service/src/client_lock.rs b/modules/service/src/client_lock.rs index 6dd9bcd6..47125749 100644 --- a/modules/service/src/client_lock.rs +++ b/modules/service/src/client_lock.rs @@ -15,7 +15,10 @@ impl ClientUpdateLocks { .or_insert_with(|| Arc::new(Mutex::new(()))) .clone() }; - let guard = lock.lock().unwrap(); + // Recover from poisoning: the guarded data is `()`, so a panic while + // the lock was held leaves no invariant to protect, and refusing the + // lock here would permanently block all updates for this client. + let guard = lock.lock().unwrap_or_else(|poisoned| poisoned.into_inner()); let result = f(); drop(guard); @@ -70,6 +73,20 @@ mod tests { assert_eq!(observed_max.load(Ordering::SeqCst), 1); } + #[test] + fn client_update_locks_recover_after_panicked_update() { + let locks = ClientUpdateLocks::default(); + let panic = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| { + locks.with_client_serialized("client-0", || { + panic!("injected update panic"); + }) + })); + assert!(panic.is_err()); + // A panicked update must not poison the per-client lock for good. + let value = locks.with_client_serialized("client-0", || 42); + assert_eq!(value, 42); + } + #[test] fn client_update_locks_allow_different_clients() { let locks = Arc::new(ClientUpdateLocks::default()); From 813465cc333a7e4d77e0d93aa3a57b24bd328e0d Mon Sep 17 00:00:00 2001 From: Kiyoshi Nakao Date: Fri, 12 Jun 2026 14:00:38 +0900 Subject: [PATCH 43/48] service: time out speculative header memory reservation waits --- modules/service/src/speculative/stream.rs | 50 ++++++++++++++++++++++- 1 file changed, 49 insertions(+), 1 deletion(-) diff --git a/modules/service/src/speculative/stream.rs b/modules/service/src/speculative/stream.rs index 07178950..51aaf740 100644 --- a/modules/service/src/speculative/stream.rs +++ b/modules/service/src/speculative/stream.rs @@ -19,10 +19,18 @@ use log::debug; use sha2::Digest; use std::collections::HashSet; use std::sync::{Arc, Condvar, Mutex}; +use std::time::{Duration, Instant}; use tonic::{Status, Streaming}; pub(crate) const MAX_SPECULATIVE_BATCH_HEADER_CHUNK_BYTES: usize = 4 * 1024 * 1024; +// Upper bound on how long one stream may wait for header memory held by other +// streams. The budget is shared service-wide, so an unbounded wait lets two +// streams that each hold partial reservations deadlock each other (and starve +// every later stream). Timing out converts that into a retryable +// RESOURCE_EXHAUSTED error that releases the failing stream's reservations. +const SPECULATIVE_HEADER_MEMORY_RESERVE_TIMEOUT: Duration = Duration::from_secs(60); + /// Tracks the peak resident header payload bytes for one speculative batch /// stream. Reservations are attached to decoded units and released when those /// units are dropped after execution; this intentionally bounds in-memory @@ -35,6 +43,7 @@ pub(crate) struct SpeculativeHeaderMemoryBudget { #[derive(Debug)] struct SpeculativeHeaderMemoryBudgetInner { max_bytes: usize, + reserve_timeout: Duration, state: Mutex, available: Condvar, } @@ -46,9 +55,14 @@ struct SpeculativeHeaderMemoryBudgetState { impl SpeculativeHeaderMemoryBudget { pub(crate) fn new(max_bytes: usize) -> Self { + Self::new_with_reserve_timeout(max_bytes, SPECULATIVE_HEADER_MEMORY_RESERVE_TIMEOUT) + } + + fn new_with_reserve_timeout(max_bytes: usize, reserve_timeout: Duration) -> Self { Self { inner: Arc::new(SpeculativeHeaderMemoryBudgetInner { max_bytes, + reserve_timeout, state: Mutex::new(SpeculativeHeaderMemoryBudgetState::default()), available: Condvar::new(), }), @@ -87,9 +101,17 @@ impl SpeculativeHeaderMemoryBudget { ))); } + let deadline = Instant::now() + self.inner.reserve_timeout; let mut state = self.inner.state.lock().unwrap(); while state.used_bytes + bytes > self.inner.max_bytes { - state = self.inner.available.wait(state).unwrap(); + let remaining = deadline.saturating_duration_since(Instant::now()); + if remaining.is_zero() { + return Err(Status::resource_exhausted(format!( + "timed out waiting for speculative header memory budget: requested_bytes={} used_bytes={} max_bytes={}", + bytes, state.used_bytes, self.inner.max_bytes + ))); + } + (state, _) = self.inner.available.wait_timeout(state, remaining).unwrap(); } state.used_bytes += bytes; Ok(SpeculativeHeaderMemoryReservation { @@ -693,6 +715,32 @@ mod tests { assert_eq!(budget.used_bytes(), 0); } + #[test] + fn header_memory_reservation_wait_times_out_instead_of_deadlocking() { + let runtime = tokio::runtime::Runtime::new().expect("tokio runtime"); + let budget = super::SpeculativeHeaderMemoryBudget::new_with_reserve_timeout( + 10, + std::time::Duration::from_millis(50), + ); + let held = runtime + .block_on(budget.reserve_for_chunk(&header_chunk_msg("unit-0000", vec![0u8; 8]))) + .expect("first reservation"); + + let err = runtime + .block_on(budget.reserve_for_chunk(&header_chunk_msg("unit-0001", vec![0u8; 8]))) + .expect_err("reservation exceeding the budget must time out"); + assert_resource_exhausted_contains( + err, + "timed out waiting for speculative header memory budget", + ); + + // Releasing the held reservation makes the budget usable again. + drop(held); + runtime + .block_on(budget.reserve_for_chunk(&header_chunk_msg("unit-0002", vec![0u8; 8]))) + .expect("reservation after release"); + } + #[test] fn decode_speculative_batch_rejects_too_many_units() { let request = DecodedSpeculativeBatchRequest { From a3b76bc3217cb417a84132a83f26f640ecd1e6ec Mon Sep 17 00:00:00 2001 From: Kiyoshi Nakao Date: Fri, 12 Jun 2026 14:00:39 +0900 Subject: [PATCH 44/48] enclave-api: report missing stored state_id distinctly from mismatch --- modules/enclave-api/src/enclave.rs | 13 ++++++++++++- modules/service/src/speculative/service.rs | 10 +++++++++- 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/modules/enclave-api/src/enclave.rs b/modules/enclave-api/src/enclave.rs index 60e6f0a5..f4c42eee 100644 --- a/modules/enclave-api/src/enclave.rs +++ b/modules/enclave-api/src/enclave.rs @@ -244,7 +244,18 @@ pub trait HostStoreTxManager: CommitStoreAccessor { // observed prev_state_id against the stored state_id closes the chain. let state_id_key = store_key::state_id_bytes(client_id, prev_height); let stored_state_id = self.use_mut_store(|store| store.tx_get(tx_id, &state_id_key))?; - if stored_state_id.as_deref() != Some(prev_state_id) { + // Clients created before state_id tracking have no stored entry at + // prev_height; one serial update_client backfills it. Report that + // case distinctly from a true mismatch so the error is actionable. + let Some(stored_state_id) = stored_state_id else { + return Err(Error::invalid_argument(format!( + "stored speculative base state_id missing: client_id={} height={}-{}; run a serial update_client once to record the state_id before speculative updates", + client_id, + prev_height.revision_number(), + prev_height.revision_height() + ))); + }; + if stored_state_id.as_slice() != prev_state_id { return Err(Error::invalid_argument(format!( "stored speculative base state_id mismatch: client_id={} height={}-{}", client_id, diff --git a/modules/service/src/speculative/service.rs b/modules/service/src/speculative/service.rs index a7985643..a40279a5 100644 --- a/modules/service/src/speculative/service.rs +++ b/modules/service/src/speculative/service.rs @@ -932,12 +932,20 @@ mod tests { assert_eq!(err.kind, SpeculativeBatchFailureKind::BaseStateMismatch); assert_eq!(err.unit_id.as_deref(), Some("unit-0000")); + // A missing stateIds entry (e.g. a client created before state_id + // tracking) must be reported distinctly from a mismatch, with the + // serial-update remedy, so operators can tell the cases apart. assert!( err.detail - .contains("stored speculative base state_id mismatch"), + .contains("stored speculative base state_id missing"), "unexpected error detail: {}", err.detail ); + assert!( + err.detail.contains("run a serial update_client once"), + "missing remedy hint in error detail: {}", + err.detail + ); } #[test] From 5b7f330dafb51201ec4c8177cd6a09b7cc75e2b1 Mon Sep 17 00:00:00 2001 From: Kiyoshi Nakao Date: Fri, 12 Jun 2026 14:00:40 +0900 Subject: [PATCH 45/48] service: document speculative base binding scope for non-first units --- modules/service/src/speculative/validation.rs | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/modules/service/src/speculative/validation.rs b/modules/service/src/speculative/validation.rs index dff8119c..ff870541 100644 --- a/modules/service/src/speculative/validation.rs +++ b/modules/service/src/speculative/validation.rs @@ -102,6 +102,15 @@ pub(crate) fn validate_linear_transitions( // state transition. The first unit has no predecessor, but every following unit // must report the previous unit's post state as its own base state before the // batch can be stitched into one canonical write set. +// +// Binding scope: only the first unit's base bytes are pinned byte-for-byte +// against the canonical store (`verify_expected_base_state_in_tx`). Later +// units are bound to their predecessor solely through the canonicalized +// state_id chain, so base fields erased by ELC canonicalization (for example +// `latest_height`) are not byte-compared. A divergent intermediate base from +// the authenticated relayer cannot affect the on-chain proof chain; at worst +// it corrupts this client's stitched host-store cache, which a subsequent +// serial update_client rewrites. fn validate_observed_transition_follows( unit_id: &str, previous: Option<&ObservedStateTransition>, From c6fa4cee10868d0c44883351ce1ec5eea93124f8 Mon Sep 17 00:00:00 2001 From: Kiyoshi Nakao Date: Mon, 15 Jun 2026 15:16:40 +0900 Subject: [PATCH 46/48] enclave-api: drop inappropriate bincode checks from speculative base verify The state_id hash check already covers canonical equivalence; the bincode byte-equality checks on client_state / consensus_state are over-strict on encoding-only differences in the supplied Anys. --- modules/enclave-api/src/enclave.rs | 75 ++++++++++++++---------------- 1 file changed, 35 insertions(+), 40 deletions(-) diff --git a/modules/enclave-api/src/enclave.rs b/modules/enclave-api/src/enclave.rs index f4c42eee..55db1cf1 100644 --- a/modules/enclave-api/src/enclave.rs +++ b/modules/enclave-api/src/enclave.rs @@ -189,43 +189,46 @@ pub trait HostStoreTxManager: CommitStoreAccessor { tx_id: store::TxId, client_id: &str, prev_height: &Height, - client_state: &Any, - consensus_state: &Any, + _client_state: &Any, + _consensus_state: &Any, prev_state_id: Option<&[u8]>, ) -> Result<()> where S: TxAccessor, { - let client_state_key = store_key::client_state_bytes(client_id); - let client_state_value = - bincode::serde::encode_to_vec(client_state, bincode::config::standard()) - .map_err(Error::bincode_encode)?; - let canonical_client_state = - self.use_mut_store(|store| store.tx_get(tx_id, &client_state_key))?; - if canonical_client_state.as_deref() != Some(client_state_value.as_slice()) { - return Err(Error::invalid_argument(format!( - "stored speculative base client_state mismatch: client_id={} height={}-{}", - client_id, - prev_height.revision_number(), - prev_height.revision_height() - ))); - } - - let consensus_state_key = store_key::consensus_state_bytes(client_id, prev_height); - let consensus_state_value = - bincode::serde::encode_to_vec(consensus_state, bincode::config::standard()) - .map_err(Error::bincode_encode)?; - let canonical_consensus_state = - self.use_mut_store(|store| store.tx_get(tx_id, &consensus_state_key))?; - if canonical_consensus_state.as_deref() != Some(consensus_state_value.as_slice()) { - return Err(Error::invalid_argument(format!( - "stored speculative base consensus_state mismatch: client_id={} height={}-{}", - client_id, - prev_height.revision_number(), - prev_height.revision_height() - ))); - } - + // Verify the speculative batch's first-unit base anchors at the + // stored canonical state_id. The supplied `_client_state` / + // `_consensus_state` Anys are intentionally not byte-compared here: + // + // - The supplied `prev_state_id` (recorded by the in-enclave light + // client as the first unit's `observed_transition.prev_state_id`) + // is computed by `gen_state_id(canonicalize(client_state), + // canonicalize(consensus_state))`. The same canonicalization + // wrote `stored_state_id` at this height during the previous + // committed update. If the two state_ids agree, the supplied + // base canonicalizes to the same logical state as the stored + // canonical — which is the property a speculative batch requires. + // + // - Byte-comparing the raw Any bytes is over-strict: encoding-only + // differences (e.g. light-client serde round-trip reshuffling + // nested JSON keys, dropping unknown fields, default-Some vs + // omitted-None for optional fields across the relayer-side + // local rebuild vs the enclave-side incremental advance) reject + // a base whose canonicalized form is identical to the stored + // canonical. The state_id check is the canonical-equivalent + // CAS without the encoding noise. + // + // - Value-level divergence (e.g. an L1Config that genuinely + // differs at the same height) flows through canonicalize() into + // state_id and is therefore still caught by the hash check. + // + // - The supplied bytes are still seeded into the speculative + // transaction via `compute_seed_write_set` so the in-enclave + // light client observes exactly the supplied state when it + // processes the first unit's header; the state_id check + // verifies that the resulting prev_state_id agrees with the + // prior committed canonical, which keeps the chain of in-enclave + // updates tight. let prev_state_id = prev_state_id.ok_or_else(|| { Error::invalid_argument(format!( "speculative update_client must provide prev_state_id: client_id={} height={}-{}", @@ -234,14 +237,6 @@ pub trait HostStoreTxManager: CommitStoreAccessor { prev_height.revision_height() )) })?; - // Do not recompute the state ID from the supplied raw Anys here: light - // clients derive state IDs from a canonicalized client state (e.g. - // latest_height/frozen reset), and that canonicalization is - // ELC-specific and only available inside the enclave. The supplied - // base bytes are already pinned to the canonical store by the two - // checks above, and the stored state_id below was written by the - // in-enclave light client for exactly those bytes, so comparing the - // observed prev_state_id against the stored state_id closes the chain. let state_id_key = store_key::state_id_bytes(client_id, prev_height); let stored_state_id = self.use_mut_store(|store| store.tx_get(tx_id, &state_id_key))?; // Clients created before state_id tracking have no stored entry at From c437e304ee8d97ce94f3e8df00a40579bd4020d5 Mon Sep 17 00:00:00 2001 From: Kiyoshi Nakao Date: Tue, 16 Jun 2026 15:03:47 +0900 Subject: [PATCH 47/48] enclave-api: tighten verify_expected_base_state_in_tx comments Drop over-specific examples from the comment and the operator hint from the missing-entry error; keep the canonical-equivalence rationale. --- modules/enclave-api/src/enclave.rs | 49 ++++++++---------------------- 1 file changed, 12 insertions(+), 37 deletions(-) diff --git a/modules/enclave-api/src/enclave.rs b/modules/enclave-api/src/enclave.rs index 55db1cf1..02577f98 100644 --- a/modules/enclave-api/src/enclave.rs +++ b/modules/enclave-api/src/enclave.rs @@ -196,39 +196,17 @@ pub trait HostStoreTxManager: CommitStoreAccessor { where S: TxAccessor, { - // Verify the speculative batch's first-unit base anchors at the - // stored canonical state_id. The supplied `_client_state` / - // `_consensus_state` Anys are intentionally not byte-compared here: - // - // - The supplied `prev_state_id` (recorded by the in-enclave light - // client as the first unit's `observed_transition.prev_state_id`) - // is computed by `gen_state_id(canonicalize(client_state), - // canonicalize(consensus_state))`. The same canonicalization - // wrote `stored_state_id` at this height during the previous - // committed update. If the two state_ids agree, the supplied - // base canonicalizes to the same logical state as the stored - // canonical — which is the property a speculative batch requires. - // - // - Byte-comparing the raw Any bytes is over-strict: encoding-only - // differences (e.g. light-client serde round-trip reshuffling - // nested JSON keys, dropping unknown fields, default-Some vs - // omitted-None for optional fields across the relayer-side - // local rebuild vs the enclave-side incremental advance) reject - // a base whose canonicalized form is identical to the stored - // canonical. The state_id check is the canonical-equivalent - // CAS without the encoding noise. - // - // - Value-level divergence (e.g. an L1Config that genuinely - // differs at the same height) flows through canonicalize() into - // state_id and is therefore still caught by the hash check. - // - // - The supplied bytes are still seeded into the speculative - // transaction via `compute_seed_write_set` so the in-enclave - // light client observes exactly the supplied state when it - // processes the first unit's header; the state_id check - // verifies that the resulting prev_state_id agrees with the - // prior committed canonical, which keeps the chain of in-enclave - // updates tight. + // The supplied Anys are intentionally not byte-compared. The + // observed `prev_state_id` from the in-enclave light client is + // `gen_state_id(canonicalize(client_state), canonicalize(consensus_state))`, + // and `stored_state_id` was written by the same canonicalization + // at commit time. Comparing state_ids therefore checks canonical + // equivalence and absorbs encoding-only differences in the raw + // Any bytes; value-level divergence at the same height still + // flows through canonicalize() into state_id and is rejected. + // The supplied bytes are still seeded into the speculative + // transaction via `compute_seed_write_set` so the in-enclave + // light client observes exactly the supplied state. let prev_state_id = prev_state_id.ok_or_else(|| { Error::invalid_argument(format!( "speculative update_client must provide prev_state_id: client_id={} height={}-{}", @@ -239,12 +217,9 @@ pub trait HostStoreTxManager: CommitStoreAccessor { })?; let state_id_key = store_key::state_id_bytes(client_id, prev_height); let stored_state_id = self.use_mut_store(|store| store.tx_get(tx_id, &state_id_key))?; - // Clients created before state_id tracking have no stored entry at - // prev_height; one serial update_client backfills it. Report that - // case distinctly from a true mismatch so the error is actionable. let Some(stored_state_id) = stored_state_id else { return Err(Error::invalid_argument(format!( - "stored speculative base state_id missing: client_id={} height={}-{}; run a serial update_client once to record the state_id before speculative updates", + "stored speculative base state_id missing: client_id={} height={}-{}", client_id, prev_height.revision_number(), prev_height.revision_height() From c441ed7ca3ce03ec43df19d726fe2d2358c86471 Mon Sep 17 00:00:00 2001 From: Kiyoshi Nakao Date: Wed, 17 Jun 2026 14:59:29 +0900 Subject: [PATCH 48/48] service: raise speculative batch stream idle timeout to 300s The previous 60s budget fires during normal per-unit build pauses on slow upstream provers; raise the cap so transient producer pacing does not close the stream mid-batch. --- modules/service/src/elc.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/service/src/elc.rs b/modules/service/src/elc.rs index 232f2301..c9db9223 100644 --- a/modules/service/src/elc.rs +++ b/modules/service/src/elc.rs @@ -22,7 +22,7 @@ use store::transaction::{CommitStore, TxAccessor}; use tokio::time::timeout; use tonic::{Request, Response, Status, Streaming}; -const SPECULATIVE_BATCH_STREAM_IDLE_TIMEOUT: Duration = Duration::from_secs(60); +const SPECULATIVE_BATCH_STREAM_IDLE_TIMEOUT: Duration = Duration::from_secs(300); #[tonic::async_trait] impl Msg for ElcService