From 9a3be620952b6ee824161b96f4c6686f4297436b Mon Sep 17 00:00:00 2001 From: Nathaniel Ford Date: Tue, 24 Mar 2026 21:08:30 +0000 Subject: [PATCH 01/32] Implement PickFirst load balancer with optimized subchannel management and stickiness --- grpc/src/client/load_balancing/pick_first.rs | 598 +++++++++++++++++-- 1 file changed, 544 insertions(+), 54 deletions(-) diff --git a/grpc/src/client/load_balancing/pick_first.rs b/grpc/src/client/load_balancing/pick_first.rs index f09cc0222..101dac77c 100644 --- a/grpc/src/client/load_balancing/pick_first.rs +++ b/grpc/src/client/load_balancing/pick_first.rs @@ -22,8 +22,11 @@ * */ +use std::fmt::Debug; use std::sync::Arc; -use std::time::Duration; + +use rand::seq::SliceRandom; +use tonic::metadata::MetadataMap; use crate::client::ConnectivityState; use crate::client::load_balancing::ChannelController; @@ -32,18 +35,30 @@ use crate::client::load_balancing::LbPolicy; use crate::client::load_balancing::LbPolicyBuilder; use crate::client::load_balancing::LbPolicyOptions; use crate::client::load_balancing::LbState; -use crate::client::load_balancing::OneSubchannelPicker; -use crate::client::load_balancing::Subchannel; -use crate::client::load_balancing::SubchannelState; +use crate::client::load_balancing::ParsedJsonLbConfig; +use crate::client::load_balancing::Pick; +use crate::client::load_balancing::PickResult; +use crate::client::load_balancing::Picker; +use crate::client::load_balancing::QueuingPicker; use crate::client::load_balancing::WorkScheduler; +use crate::client::load_balancing::subchannel::Subchannel; +use crate::client::load_balancing::subchannel::SubchannelState; use crate::client::name_resolution::Address; +use crate::client::name_resolution::Endpoint; use crate::client::name_resolution::ResolverUpdate; +use crate::core::RequestHeaders; use crate::rt::GrpcRuntime; pub(crate) static POLICY_NAME: &str = "pick_first"; -#[derive(Debug, Default)] -pub(crate) struct Builder {} +#[derive(Debug, serde::Deserialize, Clone)] +pub(crate) struct PickFirstConfig { + #[serde(rename = "shuffleAddressList")] + pub shuffle_address_list: bool, +} + +#[derive(Debug)] +struct Builder {} impl LbPolicyBuilder for Builder { type LbPolicy = PickFirstPolicy; @@ -51,15 +66,23 @@ impl LbPolicyBuilder for Builder { fn build(&self, options: LbPolicyOptions) -> Self::LbPolicy { PickFirstPolicy { work_scheduler: options.work_scheduler, - subchannel: None, - next_addresses: Vec::default(), runtime: options.runtime, + subchannels: Vec::default(), + selected: None, + current_index: 0, + connectivity_state: ConnectivityState::Connecting, + config: None, } } fn name(&self) -> &'static str { POLICY_NAME } + + fn parse_config(&self, config: &ParsedJsonLbConfig) -> Result, String> { + let config: PickFirstConfig = config.convert_to().map_err(|e| e.to_string())?; + Ok(Some(config)) + } } pub(crate) fn reg() { @@ -69,13 +92,117 @@ pub(crate) fn reg() { #[derive(Debug)] pub(crate) struct PickFirstPolicy { work_scheduler: Arc, - subchannel: Option>, - next_addresses: Vec
, runtime: GrpcRuntime, + subchannels: Vec>, + selected: Option>, + current_index: usize, + connectivity_state: ConnectivityState, + config: Option, +} + +impl PickFirstPolicy { + fn start_connection_pass(&mut self, channel_controller: &mut dyn ChannelController) { + self.current_index = 0; + self.selected = None; + if let Some(sc) = self.subchannels.get(0) { + self.connectivity_state = ConnectivityState::Connecting; + sc.connect(); + channel_controller.update_picker(LbState { + connectivity_state: ConnectivityState::Connecting, + picker: Arc::new(QueuingPicker {}), + }); + } else { + // This should have been handled in resolver_update, but just in case. + self.connectivity_state = ConnectivityState::TransientFailure; + channel_controller.update_picker(LbState { + connectivity_state: ConnectivityState::TransientFailure, + picker: Arc::new(FailingPicker { + error: "no addresses available".to_string(), + }), + }); + } + } + + fn rebuild_subchannels( + &mut self, + new_addresses: Vec
, + channel_controller: &mut dyn ChannelController, + ) { + // Map existing subchannels by address. + let mut existing_map: std::collections::HashMap> = self + .subchannels + .drain(..) + .map(|sc| (sc.address(), sc)) + .collect(); + + // Build the new list, pulling from the map where possible to preserve backoff state. + self.subchannels = new_addresses + .into_iter() + .map(|addr| { + existing_map + .remove(&addr) + .unwrap_or_else(|| channel_controller.new_subchannel(&addr).0) + }) + .collect(); + } + + fn update_config(&mut self, config: Option<&PickFirstConfig>) -> Result<(), String> { + if let Some(lb_config) = config { + self.config = Some(lb_config.clone()); + } + Ok(()) + } + + // Converts the update endpoints to an address list, erroring if empty. + // Pick first doesn't care about the Endpoint attributes; these are dropped. + // If the configuration is set, this list will be shuffled. + fn compile_address( + &mut self, + endpoints: Vec, + channel_controller: &mut dyn ChannelController, + ) -> Result, String> { + let mut addresses = endpoints + .into_iter() + .flat_map(|e| e.addresses) + .collect::>(); + + if addresses.is_empty() { + self.set_transient_failure(channel_controller, "empty address list".to_string())?; + } + + if self + .config + .as_ref() + .map(|c| c.shuffle_address_list) + .unwrap_or(false) + { + let mut rng = rand::rng(); + addresses.shuffle(&mut rng); + } + + Ok(addresses) + } + + // Sets state to TRANSIENT_FAILURE and updates picker with error. + fn set_transient_failure( + &mut self, + channel_controller: &mut dyn ChannelController, + error: String, + ) -> Result<(), String> { + self.connectivity_state = ConnectivityState::TransientFailure; + channel_controller.update_picker(LbState { + connectivity_state: ConnectivityState::TransientFailure, + picker: Arc::new(FailingPicker { + error: error.clone(), + }), + }); + channel_controller.request_resolution(); + Err(error) + } } impl LbPolicy for PickFirstPolicy { - type LbConfig = (); + type LbConfig = PickFirstConfig; fn resolver_update( &mut self, @@ -83,29 +210,23 @@ impl LbPolicy for PickFirstPolicy { config: Option<&Self::LbConfig>, channel_controller: &mut dyn ChannelController, ) -> Result<(), String> { - let mut addresses = update - .endpoints - .unwrap() - .into_iter() - .next() - .ok_or("no endpoints")? - .addresses; - - let address = addresses.pop().ok_or("no addresses")?; - - let (sc, _state) = channel_controller.new_subchannel(&address); - sc.connect(); - self.subchannel = Some(sc); - - self.next_addresses = addresses; - let work_scheduler = self.work_scheduler.clone(); - let runtime = self.runtime.clone(); - // TODO: Implement Drop that cancels this task. - self.runtime.spawn(Box::pin(async move { - runtime.sleep(Duration::from_millis(200)).await; - work_scheduler.schedule_work(); - })); - // TODO: return a picker that queues RPCs. + self.update_config(config)?; + let new_addresses = self.compile_address( + update.endpoints.map_err(|e| e.to_string())?, + channel_controller, + )?; + + // Stickiness: Check if currently selected subchannel is in the new list. + if let Some(ref selected) = self.selected { + if new_addresses.contains(&selected.address()) { + self.rebuild_subchannels(new_addresses, channel_controller); + return Ok(()); + } + } + + self.rebuild_subchannels(new_addresses, channel_controller); + self.start_connection_pass(channel_controller); + Ok(()) } @@ -115,31 +236,400 @@ impl LbPolicy for PickFirstPolicy { state: &SubchannelState, channel_controller: &mut dyn ChannelController, ) { - match state.connectivity_state { - // Assume the update is for our subchannel. - ConnectivityState::Ready => { - channel_controller.update_picker(LbState { - connectivity_state: ConnectivityState::Ready, - picker: Arc::new(OneSubchannelPicker { - sc: self.subchannel.clone().unwrap(), - }), - }); + // 1. If we have a selected subchannel, only care about updates from it. + if let Some(ref selected) = self.selected { + if selected.address() == subchannel.address() { + if state.connectivity_state != ConnectivityState::Ready { + // Lost connection: Go IDLE as per Vanilla design. + self.selected = None; + self.connectivity_state = ConnectivityState::Idle; + channel_controller.update_picker(LbState { + connectivity_state: ConnectivityState::Idle, + picker: Arc::new(IdlePicker { + work_scheduler: self.work_scheduler.clone(), + }), + }); + } + return; + } + } + + // 2. Otherwise, check if this is from the subchannel we are currently attempting. + if let Some(attempting) = self.subchannels.get(self.current_index) { + if attempting.address() == subchannel.address() { + match state.connectivity_state { + ConnectivityState::Ready => { + self.selected = Some(subchannel.clone()); + self.connectivity_state = ConnectivityState::Ready; + channel_controller.update_picker(LbState { + connectivity_state: ConnectivityState::Ready, + picker: Arc::new(OneSubchannelPicker { sc: subchannel }), + }); + } + ConnectivityState::TransientFailure => { + // Move to next address + self.current_index += 1; + if self.current_index < self.subchannels.len() { + let next_sc = &self.subchannels[self.current_index]; + next_sc.connect(); + } else { + // Exhausted: TRANSIENT_FAILURE and request re-resolution. + self.connectivity_state = ConnectivityState::TransientFailure; + let error = state + .last_connection_error + .as_ref() + .map(|e| e.to_string()) + .unwrap_or_else(|| "all addresses failed".to_string()); + + channel_controller.update_picker(LbState { + connectivity_state: ConnectivityState::TransientFailure, + picker: Arc::new(FailingPicker { error }), + }); + channel_controller.request_resolution(); + } + } + _ => {} + } + } + } + } + + fn work(&mut self, channel_controller: &mut dyn ChannelController) { + if self.connectivity_state == ConnectivityState::Idle { + self.exit_idle(channel_controller); + } + } + + fn exit_idle(&mut self, channel_controller: &mut dyn ChannelController) { + self.start_connection_pass(channel_controller); + } +} + +#[derive(Debug)] +struct OneSubchannelPicker { + sc: Arc, +} + +impl Picker for OneSubchannelPicker { + fn pick(&self, _: &RequestHeaders) -> PickResult { + PickResult::Pick(Pick { + subchannel: self.sc.clone(), + metadata: MetadataMap::new(), + on_complete: None, + }) + } +} + +#[derive(Debug)] +struct IdlePicker { + work_scheduler: Arc, +} + +impl Picker for IdlePicker { + fn pick(&self, _: &RequestHeaders) -> PickResult { + self.work_scheduler.schedule_work(); + PickResult::Queue + } +} + +#[cfg(test)] +mod test { + use super::*; + use crate::client::load_balancing::test_utils::{ + TestChannelController, TestEvent, TestWorkScheduler, + }; + use std::time::Duration; + use tokio::sync::mpsc; + + fn setup() -> ( + mpsc::UnboundedReceiver, + PickFirstPolicy, + Box, + ) { + let (tx, rx) = mpsc::unbounded_channel(); + let work_scheduler = Arc::new(TestWorkScheduler { + tx_events: tx.clone(), + }); + let runtime = crate::rt::default_runtime(); + let policy = PickFirstPolicy { + work_scheduler, + runtime, + subchannels: Vec::new(), + selected: None, + current_index: 0, + connectivity_state: ConnectivityState::Idle, + config: None, + }; + let controller = Box::new(TestChannelController { tx_events: tx }); + (rx, policy, controller) + } + + fn create_endpoints(addrs: Vec<&str>) -> Vec { + addrs + .into_iter() + .map(|a| Endpoint { + addresses: vec![Address { + address: crate::byte_str::ByteStr::from(a.to_string()), + ..Default::default() + }], + ..Default::default() + }) + .collect() + } + + #[tokio::test] + async fn test_pick_first_basic_connection() { + let (mut rx, mut policy, mut controller) = setup(); + let endpoints = create_endpoints(vec!["addr1", "addr2"]); + let update = ResolverUpdate { + endpoints: Ok(endpoints), + ..Default::default() + }; + + policy + .resolver_update(update, None, controller.as_mut()) + .unwrap(); + + // Expect NewSubchannel x2, Connect, UpdatePicker(Connecting) + rx.recv().await; + rx.recv().await; + rx.recv().await; + rx.recv().await; + + // Simulating READY for addr1 + let sc1 = policy.subchannels[0].clone(); + policy.subchannel_update( + sc1.clone(), + &SubchannelState { + connectivity_state: ConnectivityState::Ready, + last_connection_error: None, + }, + controller.as_mut(), + ); + + // Should update picker to READY with sc1 + match rx.recv().await.unwrap() { + TestEvent::UpdatePicker(state) => { + assert_eq!(state.connectivity_state, ConnectivityState::Ready); + let res = state.picker.pick(&RequestHeaders::default()); + match res { + PickResult::Pick(pick) => { + assert_eq!(pick.subchannel.address().address.to_string(), "addr1") + } + other => panic!("unexpected pick result {:?}", other), + } } - ConnectivityState::TransientFailure => { - channel_controller.update_picker(LbState { - connectivity_state: ConnectivityState::TransientFailure, - picker: Arc::new(FailingPicker { - error: state.last_connection_error.clone().unwrap(), - }), - }); + other => panic!("unexpected event {:?}", other), + } + } + + #[tokio::test] + async fn test_pick_first_failover() { + let (mut rx, mut policy, mut controller) = setup(); + let endpoints = create_endpoints(vec!["addr1", "addr2"]); + policy + .resolver_update( + ResolverUpdate { + endpoints: Ok(endpoints), + ..Default::default() + }, + None, + controller.as_mut(), + ) + .unwrap(); + + // Expect NewSubchannel x2, Connect, UpdatePicker(Connecting) + rx.recv().await; + rx.recv().await; + rx.recv().await; + rx.recv().await; + + // Simulate addr1 failing + let sc1 = policy.subchannels[0].clone(); + policy.subchannel_update( + sc1, + &SubchannelState { + connectivity_state: ConnectivityState::TransientFailure, + last_connection_error: None, + }, + controller.as_mut(), + ); + + // Should connect to addr2 + match rx.recv().await.unwrap() { + TestEvent::Connect(addr) => assert_eq!(addr.address.to_string(), "addr2"), + other => panic!("unexpected event {:?}", other), + } + + // Simulate addr2 succeeding + let sc2 = policy.subchannels[1].clone(); + policy.subchannel_update( + sc2, + &SubchannelState { + connectivity_state: ConnectivityState::Ready, + last_connection_error: None, + }, + controller.as_mut(), + ); + + match rx.recv().await.unwrap() { + TestEvent::UpdatePicker(state) => { + assert_eq!(state.connectivity_state, ConnectivityState::Ready) + } + other => panic!("unexpected event {:?}", other), + } + } + + #[tokio::test] + async fn test_pick_first_stickiness() { + let (mut rx, mut policy, mut controller) = setup(); + let endpoints = create_endpoints(vec!["addr1", "addr2"]); + policy + .resolver_update( + ResolverUpdate { + endpoints: Ok(endpoints), + ..Default::default() + }, + None, + controller.as_mut(), + ) + .unwrap(); + + // Expect NewSubchannel x2, Connect, UpdatePicker(Connecting) + rx.recv().await; + rx.recv().await; + rx.recv().await; + rx.recv().await; + + // Make addr1 READY + let sc1 = policy.subchannels[0].clone(); + policy.subchannel_update( + sc1.clone(), + &SubchannelState { + connectivity_state: ConnectivityState::Ready, + last_connection_error: None, + }, + controller.as_mut(), + ); + + // Expect UpdatePicker(Ready) + match rx.recv().await.unwrap() { + TestEvent::UpdatePicker(state) => { + assert_eq!(state.connectivity_state, ConnectivityState::Ready) } - _ => {} + other => panic!("unexpected event {:?}", other), + } + + // New resolver update including addr1 + let endpoints_new = create_endpoints(vec!["addr2", "addr1", "addr3"]); + policy + .resolver_update( + ResolverUpdate { + endpoints: Ok(endpoints_new), + ..Default::default() + }, + None, + controller.as_mut(), + ) + .unwrap(); + + // Should create subchannel for addr3 (addr1 and addr2 are re-used) + match rx.recv().await.unwrap() { + TestEvent::NewSubchannel(sc) => assert_eq!(sc.address().address.to_string(), "addr3"), + other => panic!("unexpected event {:?}", other), + } + + // Should NOT have any more events (no Connect, no UpdatePicker) because it is sticky + tokio::select! { + e = rx.recv() => panic!("unexpected event {:?}", e), + _ = tokio::time::sleep(Duration::from_millis(50)) => {} + } + + assert_eq!( + policy.selected.as_ref().unwrap().address().address.to_string(), + "addr1" + ); + } + + #[tokio::test] + async fn test_pick_first_exhaustion() { + let (mut rx, mut policy, mut controller) = setup(); + let endpoints = create_endpoints(vec!["addr1"]); + policy + .resolver_update( + ResolverUpdate { + endpoints: Ok(endpoints), + ..Default::default() + }, + None, + controller.as_mut(), + ) + .unwrap(); + + // Expect NewSubchannel, Connect, UpdatePicker(Connecting) + rx.recv().await; + rx.recv().await; + rx.recv().await; + + // Simulate addr1 failure + let sc1 = policy.subchannels[0].clone(); + policy.subchannel_update( + sc1, + &SubchannelState { + connectivity_state: ConnectivityState::TransientFailure, + last_connection_error: None, + }, + controller.as_mut(), + ); + + // Should update picker to TransientFailure + match rx.recv().await.unwrap() { + TestEvent::UpdatePicker(state) => assert_eq!( + state.connectivity_state, + ConnectivityState::TransientFailure + ), + other => panic!("unexpected event {:?}", other), + } + + // Should request re-resolution + match rx.recv().await.unwrap() { + TestEvent::RequestResolution => {} + other => panic!("unexpected event {:?}", other), } } - fn work(&mut self, channel_controller: &mut dyn ChannelController) {} + #[tokio::test] + async fn test_pick_first_shuffling() { + let (mut _rx, mut policy, mut controller) = setup(); + + // Enable shuffling in config + let config = PickFirstConfig { + shuffle_address_list: true, + }; + + // Provide 10 addresses + let addrs: Vec = (0..100).map(|i| format!("addr{}", i)).collect(); + let endpoints = create_endpoints(addrs.iter().map(|s| s.as_str()).collect()); + + policy + .resolver_update( + ResolverUpdate { + endpoints: Ok(endpoints), + ..Default::default() + }, + Some(&config), + controller.as_mut(), + ) + .unwrap(); + + let resulting_addrs: Vec = policy + .subchannels + .iter() + .map(|sc| sc.address().address.to_string()) + .collect(); - fn exit_idle(&mut self, _channel_controller: &mut dyn ChannelController) { - todo!("implement exit_idle") + // While non-deterministic, the probability of 100 items remaining in exact order is 1/(100!). + // Would need to add a RNG to the options to make this deterministic. + assert_ne!(resulting_addrs, addrs, "Addresses were not shuffled"); } } From aa390424106d05696248da6440d7dab072daa0d0 Mon Sep 17 00:00:00 2001 From: Nathaniel Ford Date: Mon, 6 Apr 2026 18:31:18 +0000 Subject: [PATCH 02/32] WIP: implementation of PickFirstLB --- grpc/src/client/load_balancing/pick_first.rs | 226 ++++++++++++++----- 1 file changed, 175 insertions(+), 51 deletions(-) diff --git a/grpc/src/client/load_balancing/pick_first.rs b/grpc/src/client/load_balancing/pick_first.rs index 101dac77c..a50979395 100644 --- a/grpc/src/client/load_balancing/pick_first.rs +++ b/grpc/src/client/load_balancing/pick_first.rs @@ -47,31 +47,37 @@ use crate::client::name_resolution::Address; use crate::client::name_resolution::Endpoint; use crate::client::name_resolution::ResolverUpdate; use crate::core::RequestHeaders; -use crate::rt::GrpcRuntime; pub(crate) static POLICY_NAME: &str = "pick_first"; +type ShufflerFn = dyn Fn(&mut [Address]) + Send + Sync + 'static; + #[derive(Debug, serde::Deserialize, Clone)] -pub(crate) struct PickFirstConfig { +pub struct PickFirstConfig { #[serde(rename = "shuffleAddressList")] pub shuffle_address_list: bool, } #[derive(Debug)] -struct Builder {} +pub(crate) struct PickFirstBuilder {} -impl LbPolicyBuilder for Builder { +impl LbPolicyBuilder for PickFirstBuilder { type LbPolicy = PickFirstPolicy; fn build(&self, options: LbPolicyOptions) -> Self::LbPolicy { PickFirstPolicy { work_scheduler: options.work_scheduler, - runtime: options.runtime, subchannels: Vec::default(), selected: None, current_index: 0, connectivity_state: ConnectivityState::Connecting, config: None, + last_resolver_error: None, + last_connection_error: None, + shuffler: Arc::new(|addrs| { + let mut rng = rand::rng(); + addrs.shuffle(&mut rng); + }), } } @@ -86,18 +92,37 @@ impl LbPolicyBuilder for Builder { } pub(crate) fn reg() { - super::GLOBAL_LB_REGISTRY.add_builder(Builder {}) + super::GLOBAL_LB_REGISTRY.add_builder(PickFirstBuilder {}) } -#[derive(Debug)] pub(crate) struct PickFirstPolicy { work_scheduler: Arc, - runtime: GrpcRuntime, subchannels: Vec>, selected: Option>, current_index: usize, connectivity_state: ConnectivityState, config: Option, + + // Detailed error tracking inspired by PR #2340 + last_resolver_error: Option, + last_connection_error: Option, + + // Injectable shuffler for deterministic testing + shuffler: Arc, +} + +impl Debug for PickFirstPolicy { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("PickFirstPolicy") + .field("subchannels", &self.subchannels) + .field("selected", &self.selected) + .field("current_index", &self.current_index) + .field("connectivity_state", &self.connectivity_state) + .field("config", &self.config) + .field("last_resolver_error", &self.last_resolver_error) + .field("last_connection_error", &self.last_connection_error) + .finish() + } } impl PickFirstPolicy { @@ -112,14 +137,12 @@ impl PickFirstPolicy { picker: Arc::new(QueuingPicker {}), }); } else { - // This should have been handled in resolver_update, but just in case. - self.connectivity_state = ConnectivityState::TransientFailure; - channel_controller.update_picker(LbState { - connectivity_state: ConnectivityState::TransientFailure, - picker: Arc::new(FailingPicker { - error: "no addresses available".to_string(), - }), - }); + let error = self + .last_resolver_error + .clone() + .unwrap_or_else(|| "no addresses available".to_string()); + self.set_transient_failure(channel_controller, error) + .unwrap_or(()); } } @@ -153,21 +176,32 @@ impl PickFirstPolicy { Ok(()) } - // Converts the update endpoints to an address list, erroring if empty. - // Pick first doesn't care about the Endpoint attributes; these are dropped. - // If the configuration is set, this list will be shuffled. + // Converts the update endpoints to an address list. + // Includes de-duplication logic inspired by PR #2340 fn compile_address( &mut self, endpoints: Vec, channel_controller: &mut dyn ChannelController, ) -> Result, String> { - let mut addresses = endpoints - .into_iter() - .flat_map(|e| e.addresses) - .collect::>(); + let mut addresses = Vec::new(); + let mut seen = std::collections::HashSet::new(); + + for endpoint in endpoints { + for address in endpoint.addresses { + if seen.insert(address.clone()) { + addresses.push(address); + } + } + } if addresses.is_empty() { - self.set_transient_failure(channel_controller, "empty address list".to_string())?; + let error = self + .last_resolver_error + .clone() + .unwrap_or_else(|| "empty address list".to_string()); + return self + .set_transient_failure(channel_controller, error) + .map(|_| vec![]); } if self @@ -176,8 +210,7 @@ impl PickFirstPolicy { .map(|c| c.shuffle_address_list) .unwrap_or(false) { - let mut rng = rand::rng(); - addresses.shuffle(&mut rng); + (self.shuffler)(&mut addresses); } Ok(addresses) @@ -211,22 +244,33 @@ impl LbPolicy for PickFirstPolicy { channel_controller: &mut dyn ChannelController, ) -> Result<(), String> { self.update_config(config)?; - let new_addresses = self.compile_address( - update.endpoints.map_err(|e| e.to_string())?, - channel_controller, - )?; - // Stickiness: Check if currently selected subchannel is in the new list. - if let Some(ref selected) = self.selected { - if new_addresses.contains(&selected.address()) { + match update.endpoints { + Ok(endpoints) => { + let new_addresses = self.compile_address(endpoints, channel_controller)?; + + // Stickiness: Check if currently selected subchannel is in the new list. + if let Some(ref selected) = self.selected { + if new_addresses.contains(&selected.address()) { + self.rebuild_subchannels(new_addresses, channel_controller); + return Ok(()); + } + } + self.rebuild_subchannels(new_addresses, channel_controller); - return Ok(()); + self.start_connection_pass(channel_controller); + } + Err(e) => { + let error = e.to_string(); + self.last_resolver_error = Some(error.clone()); + if self.subchannels.is_empty() + || self.connectivity_state == ConnectivityState::TransientFailure + { + self.set_transient_failure(channel_controller, error)?; + } } } - self.rebuild_subchannels(new_addresses, channel_controller); - self.start_connection_pass(channel_controller); - Ok(()) } @@ -281,6 +325,7 @@ impl LbPolicy for PickFirstPolicy { .map(|e| e.to_string()) .unwrap_or_else(|| "all addresses failed".to_string()); + self.last_connection_error = Some(error.clone()); channel_controller.update_picker(LbState { connectivity_state: ConnectivityState::TransientFailure, picker: Arc::new(FailingPicker { error }), @@ -351,15 +396,16 @@ mod test { tx_events: tx.clone(), }); let runtime = crate::rt::default_runtime(); - let policy = PickFirstPolicy { + let mut policy = PickFirstBuilder {}.build(LbPolicyOptions { work_scheduler, runtime, - subchannels: Vec::new(), - selected: None, - current_index: 0, - connectivity_state: ConnectivityState::Idle, - config: None, - }; + }); + + // Manual override for deterministic shuffling in tests + policy.shuffler = Arc::new(|addrs| { + addrs.reverse(); // Deterministic "shuffle" + }); + let controller = Box::new(TestChannelController { tx_events: tx }); (rx, policy, controller) } @@ -599,7 +645,7 @@ mod test { } #[tokio::test] - async fn test_pick_first_shuffling() { + async fn test_pick_first_shuffling_deterministic() { let (mut _rx, mut policy, mut controller) = setup(); // Enable shuffling in config @@ -607,9 +653,9 @@ mod test { shuffle_address_list: true, }; - // Provide 10 addresses - let addrs: Vec = (0..100).map(|i| format!("addr{}", i)).collect(); - let endpoints = create_endpoints(addrs.iter().map(|s| s.as_str()).collect()); + // Provide addresses in order + let addrs = vec!["addr1", "addr2", "addr3"]; + let endpoints = create_endpoints(addrs.clone()); policy .resolver_update( @@ -628,8 +674,86 @@ mod test { .map(|sc| sc.address().address.to_string()) .collect(); - // While non-deterministic, the probability of 100 items remaining in exact order is 1/(100!). - // Would need to add a RNG to the options to make this deterministic. - assert_ne!(resulting_addrs, addrs, "Addresses were not shuffled"); + // Our mock shuffler reverses the list + let mut expected = addrs.clone(); + expected.reverse(); + assert_eq!(resulting_addrs, expected, "Deterministic shuffling failed"); + } + + #[tokio::test] + async fn test_pick_first_duplicate_de_duplication() { + let (mut rx, mut policy, mut controller) = setup(); + + // Create endpoints with duplicates + let endpoints = vec![ + Endpoint { + addresses: vec![ + Address { + address: "addr1".to_string().into(), + ..Default::default() + }, + Address { + address: "addr1".to_string().into(), + ..Default::default() + }, + ], + ..Default::default() + }, + Endpoint { + addresses: vec![ + Address { + address: "addr2".to_string().into(), + ..Default::default() + }, + Address { + address: "addr1".to_string().into(), + ..Default::default() + }, + ], + ..Default::default() + }, + ]; + + policy + .resolver_update( + ResolverUpdate { + endpoints: Ok(endpoints), + ..Default::default() + }, + None, + controller.as_mut(), + ) + .unwrap(); + + // Should only create subchannels for addr1 and addr2 (2 unique addresses) + rx.recv().await; // NewSubchannel(addr1) + rx.recv().await; // NewSubchannel(addr2) + + // Verify no 3rd subchannel was created + tokio::select! { + e = rx.recv() => match e.unwrap() { + TestEvent::NewSubchannel(_) => panic!("Duplicate subchannel created"), + _ => {} // Connect and UpdatePicker are expected + }, + _ = tokio::time::sleep(Duration::from_millis(50)) => {} + } + + assert_eq!(policy.subchannels.len(), 2, "De-duplication failed"); + } + + #[tokio::test] + async fn test_pick_first_config_parsing() { + let builder = PickFirstBuilder {}; + + // Test valid config + let json = r#"{"shuffleAddressList": true}"#; + let parsed = ParsedJsonLbConfig::new(json).unwrap(); + let config = builder.parse_config(&parsed).unwrap().unwrap(); + assert!(config.shuffle_address_list); + + // Test invalid JSON type + let json_invalid = r#"{"shuffleAddressList": "not-a-bool"}"#; + let parsed_invalid = ParsedJsonLbConfig::new(json_invalid).unwrap(); + assert!(builder.parse_config(&parsed_invalid).is_err()); } } From db800a79e28e154bceb709cf47ab2f96f153d200 Mon Sep 17 00:00:00 2001 From: Nathaniel Ford Date: Mon, 6 Apr 2026 20:14:49 +0000 Subject: [PATCH 03/32] Implement PickFirst LB with optimized subchannel management, stickiness, and updated sync testing framework --- grpc/src/client/load_balancing/pick_first.rs | 139 +++++++++---------- 1 file changed, 62 insertions(+), 77 deletions(-) diff --git a/grpc/src/client/load_balancing/pick_first.rs b/grpc/src/client/load_balancing/pick_first.rs index a50979395..43b4f4485 100644 --- a/grpc/src/client/load_balancing/pick_first.rs +++ b/grpc/src/client/load_balancing/pick_first.rs @@ -47,6 +47,7 @@ use crate::client::name_resolution::Address; use crate::client::name_resolution::Endpoint; use crate::client::name_resolution::ResolverUpdate; use crate::core::RequestHeaders; +use crate::rt::GrpcRuntime; pub(crate) static POLICY_NAME: &str = "pick_first"; @@ -67,6 +68,7 @@ impl LbPolicyBuilder for PickFirstBuilder { fn build(&self, options: LbPolicyOptions) -> Self::LbPolicy { PickFirstPolicy { work_scheduler: options.work_scheduler, + runtime: options.runtime, subchannels: Vec::default(), selected: None, current_index: 0, @@ -97,6 +99,7 @@ pub(crate) fn reg() { pub(crate) struct PickFirstPolicy { work_scheduler: Arc, + runtime: GrpcRuntime, subchannels: Vec>, selected: Option>, current_index: usize, @@ -368,6 +371,7 @@ impl Picker for OneSubchannelPicker { #[derive(Debug)] struct IdlePicker { work_scheduler: Arc, + // TODO: work_scheduler was added to match rounding_robin's pattern on master. } impl Picker for IdlePicker { @@ -383,15 +387,15 @@ mod test { use crate::client::load_balancing::test_utils::{ TestChannelController, TestEvent, TestWorkScheduler, }; + use std::sync::mpsc; use std::time::Duration; - use tokio::sync::mpsc; fn setup() -> ( - mpsc::UnboundedReceiver, + mpsc::Receiver, PickFirstPolicy, Box, ) { - let (tx, rx) = mpsc::unbounded_channel(); + let (tx, rx) = mpsc::channel(); let work_scheduler = Arc::new(TestWorkScheduler { tx_events: tx.clone(), }); @@ -401,9 +405,9 @@ mod test { runtime, }); - // Manual override for deterministic shuffling in tests + // Deterministic shuffling for tests: policy.shuffler = Arc::new(|addrs| { - addrs.reverse(); // Deterministic "shuffle" + addrs.reverse(); }); let controller = Box::new(TestChannelController { tx_events: tx }); @@ -423,9 +427,9 @@ mod test { .collect() } - #[tokio::test] - async fn test_pick_first_basic_connection() { - let (mut rx, mut policy, mut controller) = setup(); + #[test] + fn test_pick_first_basic_connection() { + let (rx, mut policy, mut controller) = setup(); let endpoints = create_endpoints(vec!["addr1", "addr2"]); let update = ResolverUpdate { endpoints: Ok(endpoints), @@ -437,10 +441,10 @@ mod test { .unwrap(); // Expect NewSubchannel x2, Connect, UpdatePicker(Connecting) - rx.recv().await; - rx.recv().await; - rx.recv().await; - rx.recv().await; + rx.recv().unwrap(); + rx.recv().unwrap(); + rx.recv().unwrap(); + rx.recv().unwrap(); // Simulating READY for addr1 let sc1 = policy.subchannels[0].clone(); @@ -454,7 +458,7 @@ mod test { ); // Should update picker to READY with sc1 - match rx.recv().await.unwrap() { + match rx.recv().unwrap() { TestEvent::UpdatePicker(state) => { assert_eq!(state.connectivity_state, ConnectivityState::Ready); let res = state.picker.pick(&RequestHeaders::default()); @@ -469,9 +473,9 @@ mod test { } } - #[tokio::test] - async fn test_pick_first_failover() { - let (mut rx, mut policy, mut controller) = setup(); + #[test] + fn test_pick_first_failover() { + let (rx, mut policy, mut controller) = setup(); let endpoints = create_endpoints(vec!["addr1", "addr2"]); policy .resolver_update( @@ -485,10 +489,10 @@ mod test { .unwrap(); // Expect NewSubchannel x2, Connect, UpdatePicker(Connecting) - rx.recv().await; - rx.recv().await; - rx.recv().await; - rx.recv().await; + rx.recv().unwrap(); + rx.recv().unwrap(); + rx.recv().unwrap(); + rx.recv().unwrap(); // Simulate addr1 failing let sc1 = policy.subchannels[0].clone(); @@ -502,7 +506,7 @@ mod test { ); // Should connect to addr2 - match rx.recv().await.unwrap() { + match rx.recv().unwrap() { TestEvent::Connect(addr) => assert_eq!(addr.address.to_string(), "addr2"), other => panic!("unexpected event {:?}", other), } @@ -518,7 +522,7 @@ mod test { controller.as_mut(), ); - match rx.recv().await.unwrap() { + match rx.recv().unwrap() { TestEvent::UpdatePicker(state) => { assert_eq!(state.connectivity_state, ConnectivityState::Ready) } @@ -526,9 +530,9 @@ mod test { } } - #[tokio::test] - async fn test_pick_first_stickiness() { - let (mut rx, mut policy, mut controller) = setup(); + #[test] + fn test_pick_first_stickiness() { + let (rx, mut policy, mut controller) = setup(); let endpoints = create_endpoints(vec!["addr1", "addr2"]); policy .resolver_update( @@ -542,10 +546,10 @@ mod test { .unwrap(); // Expect NewSubchannel x2, Connect, UpdatePicker(Connecting) - rx.recv().await; - rx.recv().await; - rx.recv().await; - rx.recv().await; + rx.recv().unwrap(); + rx.recv().unwrap(); + rx.recv().unwrap(); + rx.recv().unwrap(); // Make addr1 READY let sc1 = policy.subchannels[0].clone(); @@ -559,7 +563,7 @@ mod test { ); // Expect UpdatePicker(Ready) - match rx.recv().await.unwrap() { + match rx.recv().unwrap() { TestEvent::UpdatePicker(state) => { assert_eq!(state.connectivity_state, ConnectivityState::Ready) } @@ -580,16 +584,14 @@ mod test { .unwrap(); // Should create subchannel for addr3 (addr1 and addr2 are re-used) - match rx.recv().await.unwrap() { + match rx.recv().unwrap() { TestEvent::NewSubchannel(sc) => assert_eq!(sc.address().address.to_string(), "addr3"), other => panic!("unexpected event {:?}", other), } // Should NOT have any more events (no Connect, no UpdatePicker) because it is sticky - tokio::select! { - e = rx.recv() => panic!("unexpected event {:?}", e), - _ = tokio::time::sleep(Duration::from_millis(50)) => {} - } + std::thread::sleep(Duration::from_millis(50)); + assert!(rx.try_recv().is_err(), "unexpected event"); assert_eq!( policy.selected.as_ref().unwrap().address().address.to_string(), @@ -597,9 +599,9 @@ mod test { ); } - #[tokio::test] - async fn test_pick_first_exhaustion() { - let (mut rx, mut policy, mut controller) = setup(); + #[test] + fn test_pick_first_exhaustion() { + let (rx, mut policy, mut controller) = setup(); let endpoints = create_endpoints(vec!["addr1"]); policy .resolver_update( @@ -613,9 +615,9 @@ mod test { .unwrap(); // Expect NewSubchannel, Connect, UpdatePicker(Connecting) - rx.recv().await; - rx.recv().await; - rx.recv().await; + rx.recv().unwrap(); + rx.recv().unwrap(); + rx.recv().unwrap(); // Simulate addr1 failure let sc1 = policy.subchannels[0].clone(); @@ -629,7 +631,7 @@ mod test { ); // Should update picker to TransientFailure - match rx.recv().await.unwrap() { + match rx.recv().unwrap() { TestEvent::UpdatePicker(state) => assert_eq!( state.connectivity_state, ConnectivityState::TransientFailure @@ -638,15 +640,15 @@ mod test { } // Should request re-resolution - match rx.recv().await.unwrap() { + match rx.recv().unwrap() { TestEvent::RequestResolution => {} other => panic!("unexpected event {:?}", other), } } - #[tokio::test] - async fn test_pick_first_shuffling_deterministic() { - let (mut _rx, mut policy, mut controller) = setup(); + #[test] + fn test_pick_first_shuffling_deterministic() { + let (_rx, mut policy, mut controller) = setup(); // Enable shuffling in config let config = PickFirstConfig { @@ -680,20 +682,20 @@ mod test { assert_eq!(resulting_addrs, expected, "Deterministic shuffling failed"); } - #[tokio::test] - async fn test_pick_first_duplicate_de_duplication() { - let (mut rx, mut policy, mut controller) = setup(); + #[test] + fn test_pick_first_duplicate_de_duplication() { + let (rx, mut policy, mut controller) = setup(); // Create endpoints with duplicates let endpoints = vec![ Endpoint { addresses: vec![ Address { - address: "addr1".to_string().into(), + address: crate::byte_str::ByteStr::from("addr1".to_string()), ..Default::default() }, Address { - address: "addr1".to_string().into(), + address: crate::byte_str::ByteStr::from("addr1".to_string()), ..Default::default() }, ], @@ -702,11 +704,11 @@ mod test { Endpoint { addresses: vec![ Address { - address: "addr2".to_string().into(), + address: crate::byte_str::ByteStr::from("addr2".to_string()), ..Default::default() }, Address { - address: "addr1".to_string().into(), + address: crate::byte_str::ByteStr::from("addr1".to_string()), ..Default::default() }, ], @@ -726,34 +728,17 @@ mod test { .unwrap(); // Should only create subchannels for addr1 and addr2 (2 unique addresses) - rx.recv().await; // NewSubchannel(addr1) - rx.recv().await; // NewSubchannel(addr2) + rx.recv().unwrap(); // NewSubchannel(addr1) + rx.recv().unwrap(); // NewSubchannel(addr2) // Verify no 3rd subchannel was created - tokio::select! { - e = rx.recv() => match e.unwrap() { - TestEvent::NewSubchannel(_) => panic!("Duplicate subchannel created"), - _ => {} // Connect and UpdatePicker are expected - }, - _ = tokio::time::sleep(Duration::from_millis(50)) => {} + std::thread::sleep(Duration::from_millis(50)); + while let Ok(event) = rx.try_recv() { + if let TestEvent::NewSubchannel(_) = event { + panic!("Duplicate subchannel created"); + } } assert_eq!(policy.subchannels.len(), 2, "De-duplication failed"); } - - #[tokio::test] - async fn test_pick_first_config_parsing() { - let builder = PickFirstBuilder {}; - - // Test valid config - let json = r#"{"shuffleAddressList": true}"#; - let parsed = ParsedJsonLbConfig::new(json).unwrap(); - let config = builder.parse_config(&parsed).unwrap().unwrap(); - assert!(config.shuffle_address_list); - - // Test invalid JSON type - let json_invalid = r#"{"shuffleAddressList": "not-a-bool"}"#; - let parsed_invalid = ParsedJsonLbConfig::new(json_invalid).unwrap(); - assert!(builder.parse_config(&parsed_invalid).is_err()); - } } From 19b690dd2d7647d63d4c52d79bcdf1f85efc6bec Mon Sep 17 00:00:00 2001 From: Nathaniel Ford Date: Mon, 6 Apr 2026 22:23:15 +0000 Subject: [PATCH 04/32] Implement PickFirst LB with optimized subchannel management and gRFC A61 endpoint handling This commit implements the PickFirst load balancer policy for Tonic gRPC, focusing on: - Efficient subchannel management with backoff preservation. - "Stickiness" support: continuing to use an existing Ready subchannel if it remains in resolver updates. - Compliance with gRFC A61: endpoints are now shuffled before being flattened into an address list, ensuring multiple addresses for a single endpoint (e.g., IPv4/IPv6) stay together. - Clean state reset: subchannels and selected state are now cleared when receiving an empty address list. - Alignment with the updated synchronous testing framework in master. Includes comprehensive test coverage for basic connection, failover, stickiness, exhaustion, deterministic endpoint shuffling, de-duplication, and empty updates. --- grpc/src/client/load_balancing/pick_first.rs | 197 +++++++++++++------ 1 file changed, 142 insertions(+), 55 deletions(-) diff --git a/grpc/src/client/load_balancing/pick_first.rs b/grpc/src/client/load_balancing/pick_first.rs index 43b4f4485..2e5da9183 100644 --- a/grpc/src/client/load_balancing/pick_first.rs +++ b/grpc/src/client/load_balancing/pick_first.rs @@ -22,6 +22,8 @@ * */ +use std::collections::HashMap; +use std::collections::HashSet; use std::fmt::Debug; use std::sync::Arc; @@ -51,7 +53,7 @@ use crate::rt::GrpcRuntime; pub(crate) static POLICY_NAME: &str = "pick_first"; -type ShufflerFn = dyn Fn(&mut [Address]) + Send + Sync + 'static; +type ShufflerFn = dyn Fn(&mut [Endpoint]) + Send + Sync + 'static; #[derive(Debug, serde::Deserialize, Clone)] pub struct PickFirstConfig { @@ -73,12 +75,11 @@ impl LbPolicyBuilder for PickFirstBuilder { selected: None, current_index: 0, connectivity_state: ConnectivityState::Connecting, - config: None, last_resolver_error: None, last_connection_error: None, - shuffler: Arc::new(|addrs| { + shuffler: Arc::new(|endpoints| { let mut rng = rand::rng(); - addrs.shuffle(&mut rng); + endpoints.shuffle(&mut rng); }), } } @@ -104,7 +105,6 @@ pub(crate) struct PickFirstPolicy { selected: Option>, current_index: usize, connectivity_state: ConnectivityState, - config: Option, // Detailed error tracking inspired by PR #2340 last_resolver_error: Option, @@ -121,7 +121,6 @@ impl Debug for PickFirstPolicy { .field("selected", &self.selected) .field("current_index", &self.current_index) .field("connectivity_state", &self.connectivity_state) - .field("config", &self.config) .field("last_resolver_error", &self.last_resolver_error) .field("last_connection_error", &self.last_connection_error) .finish() @@ -132,21 +131,14 @@ impl PickFirstPolicy { fn start_connection_pass(&mut self, channel_controller: &mut dyn ChannelController) { self.current_index = 0; self.selected = None; - if let Some(sc) = self.subchannels.get(0) { - self.connectivity_state = ConnectivityState::Connecting; - sc.connect(); - channel_controller.update_picker(LbState { - connectivity_state: ConnectivityState::Connecting, - picker: Arc::new(QueuingPicker {}), - }); - } else { - let error = self - .last_resolver_error - .clone() - .unwrap_or_else(|| "no addresses available".to_string()); - self.set_transient_failure(channel_controller, error) - .unwrap_or(()); - } + let sc = self.subchannels.get(self.current_index).unwrap(); // We are guaranteed to have at least one subchannel if we are in this method, and should panic if this is not true. + + self.connectivity_state = ConnectivityState::Connecting; + sc.connect(); + channel_controller.update_picker(LbState { + connectivity_state: ConnectivityState::Connecting, + picker: Arc::new(QueuingPicker {}), + }); } fn rebuild_subchannels( @@ -155,7 +147,7 @@ impl PickFirstPolicy { channel_controller: &mut dyn ChannelController, ) { // Map existing subchannels by address. - let mut existing_map: std::collections::HashMap> = self + let mut existing_map: HashMap> = self .subchannels .drain(..) .map(|sc| (sc.address(), sc)) @@ -172,22 +164,26 @@ impl PickFirstPolicy { .collect(); } - fn update_config(&mut self, config: Option<&PickFirstConfig>) -> Result<(), String> { - if let Some(lb_config) = config { - self.config = Some(lb_config.clone()); - } - Ok(()) - } - // Converts the update endpoints to an address list. - // Includes de-duplication logic inspired by PR #2340 + // Shuffles endpoints (if enabled) before flattening and de-duplication. fn compile_address( &mut self, endpoints: Vec, + config: Option<&PickFirstConfig>, channel_controller: &mut dyn ChannelController, ) -> Result, String> { + let mut endpoints = endpoints; + + if config + .as_ref() + .map(|c| c.shuffle_address_list) + .unwrap_or(false) + { + (self.shuffler)(&mut endpoints); + } + let mut addresses = Vec::new(); - let mut seen = std::collections::HashSet::new(); + let mut seen = HashSet::new(); for endpoint in endpoints { for address in endpoint.addresses { @@ -197,7 +193,10 @@ impl PickFirstPolicy { } } + // Empty address list is an error, but also clears previous subchannels. if addresses.is_empty() { + self.subchannels.clear(); + self.selected = None; let error = self .last_resolver_error .clone() @@ -207,15 +206,6 @@ impl PickFirstPolicy { .map(|_| vec![]); } - if self - .config - .as_ref() - .map(|c| c.shuffle_address_list) - .unwrap_or(false) - { - (self.shuffler)(&mut addresses); - } - Ok(addresses) } @@ -246,11 +236,9 @@ impl LbPolicy for PickFirstPolicy { config: Option<&Self::LbConfig>, channel_controller: &mut dyn ChannelController, ) -> Result<(), String> { - self.update_config(config)?; - match update.endpoints { Ok(endpoints) => { - let new_addresses = self.compile_address(endpoints, channel_controller)?; + let new_addresses = self.compile_address(endpoints, config, channel_controller)?; // Stickiness: Check if currently selected subchannel is in the new list. if let Some(ref selected) = self.selected { @@ -371,7 +359,6 @@ impl Picker for OneSubchannelPicker { #[derive(Debug)] struct IdlePicker { work_scheduler: Arc, - // TODO: work_scheduler was added to match rounding_robin's pattern on master. } impl Picker for IdlePicker { @@ -405,9 +392,9 @@ mod test { runtime, }); - // Deterministic shuffling for tests: - policy.shuffler = Arc::new(|addrs| { - addrs.reverse(); + // Deterministic shuffling for tests: reverse the endpoints + policy.shuffler = Arc::new(|endpoints| { + endpoints.reverse(); }); let controller = Box::new(TestChannelController { tx_events: tx }); @@ -594,7 +581,13 @@ mod test { assert!(rx.try_recv().is_err(), "unexpected event"); assert_eq!( - policy.selected.as_ref().unwrap().address().address.to_string(), + policy + .selected + .as_ref() + .unwrap() + .address() + .address + .to_string(), "addr1" ); } @@ -655,9 +648,35 @@ mod test { shuffle_address_list: true, }; - // Provide addresses in order - let addrs = vec!["addr1", "addr2", "addr3"]; - let endpoints = create_endpoints(addrs.clone()); + // Provide two endpoints, each with two addresses + let endpoints = vec![ + Endpoint { + addresses: vec![ + Address { + address: crate::byte_str::ByteStr::from("addr1a".to_string()), + ..Default::default() + }, + Address { + address: crate::byte_str::ByteStr::from("addr1b".to_string()), + ..Default::default() + }, + ], + ..Default::default() + }, + Endpoint { + addresses: vec![ + Address { + address: crate::byte_str::ByteStr::from("addr2a".to_string()), + ..Default::default() + }, + Address { + address: crate::byte_str::ByteStr::from("addr2b".to_string()), + ..Default::default() + }, + ], + ..Default::default() + }, + ]; policy .resolver_update( @@ -676,10 +695,14 @@ mod test { .map(|sc| sc.address().address.to_string()) .collect(); - // Our mock shuffler reverses the list - let mut expected = addrs.clone(); - expected.reverse(); - assert_eq!(resulting_addrs, expected, "Deterministic shuffling failed"); + // Our mock shuffler reverses the list of endpoints. + // Original: [EP1, EP2] -> Reversed: [EP2, EP1] + // Flattened: [addr2a, addr2b, addr1a, addr1b] + let expected = vec!["addr2a", "addr2b", "addr1a", "addr1b"]; + assert_eq!( + resulting_addrs, expected, + "Deterministic shuffling of endpoints failed" + ); } #[test] @@ -741,4 +764,68 @@ mod test { assert_eq!(policy.subchannels.len(), 2, "De-duplication failed"); } + + #[test] + fn test_pick_first_empty_update_clears_state() { + let (rx, mut policy, mut controller) = setup(); + let endpoints = create_endpoints(vec!["addr1", "addr2"]); + + // Initial update with addresses + policy + .resolver_update( + ResolverUpdate { + endpoints: Ok(endpoints), + ..Default::default() + }, + None, + controller.as_mut(), + ) + .unwrap(); + + assert_eq!(policy.subchannels.len(), 2); + + // Make addr1 READY so it becomes selected + let sc1 = policy.subchannels[0].clone(); + policy.subchannel_update( + sc1, + &SubchannelState { + connectivity_state: ConnectivityState::Ready, + last_connection_error: None, + }, + controller.as_mut(), + ); + assert!(policy.selected.is_some()); + + // Drain events (NewSubchannel x2, Connect, UpdatePicker x2) + while rx.try_recv().is_ok() {} + + // Send empty update + let res = policy.resolver_update( + ResolverUpdate { + endpoints: Ok(vec![]), + ..Default::default() + }, + None, + controller.as_mut(), + ); + + assert!(res.is_err()); + assert_eq!(policy.subchannels.len(), 0); + assert!(policy.selected.is_none()); + + // Should have updated picker to TransientFailure and requested resolution + match rx.recv().unwrap() { + TestEvent::UpdatePicker(state) => { + assert_eq!( + state.connectivity_state, + ConnectivityState::TransientFailure + ); + } + other => panic!("unexpected event {:?}", other), + } + match rx.recv().unwrap() { + TestEvent::RequestResolution => {} + other => panic!("unexpected event {:?}", other), + } + } } From 3115847d6d03583e0e58b5ea839a80e05e032367 Mon Sep 17 00:00:00 2001 From: Nathaniel Ford Date: Mon, 20 Apr 2026 22:05:04 +0000 Subject: [PATCH 05/32] grpc: implement PickFirst Happy Eyeballs address interleaving and proactive failover This change enhances the PickFirst load balancing policy to better support gRFC A61 (Happy Eyeballs) and improve connection establishment latency. Key changes: - Implement IPv6/IPv4 address interleaving in `compile_address` to ensure subsequent connection attempts alternate between protocol families. - Introduce a `subchannel_states` cache in `PickFirstPolicy` to track the connectivity status of managed subchannels. - Refactor connection logic to use a `frontier_index` and proactively skip subchannels known to be in `TransientFailure` (e.g., during backoff). - Update `advance_frontier` to safely maintain the index within the bounds of the address list, ensuring the policy remains reactive to recovery. - Add deterministic unit tests for shuffling and interleaving logic. --- grpc/src/client/load_balancing/pick_first.rs | 256 +++++++++++++------ 1 file changed, 178 insertions(+), 78 deletions(-) diff --git a/grpc/src/client/load_balancing/pick_first.rs b/grpc/src/client/load_balancing/pick_first.rs index 2e5da9183..ea508f3f2 100644 --- a/grpc/src/client/load_balancing/pick_first.rs +++ b/grpc/src/client/load_balancing/pick_first.rs @@ -71,16 +71,14 @@ impl LbPolicyBuilder for PickFirstBuilder { PickFirstPolicy { work_scheduler: options.work_scheduler, runtime: options.runtime, + connectivity_state: ConnectivityState::Connecting, subchannels: Vec::default(), + subchannel_states: HashMap::default(), selected: None, - current_index: 0, - connectivity_state: ConnectivityState::Connecting, + frontier_index: 0, last_resolver_error: None, last_connection_error: None, - shuffler: Arc::new(|endpoints| { - let mut rng = rand::rng(); - endpoints.shuffle(&mut rng); - }), + shuffler: build_shuffler(), } } @@ -101,10 +99,13 @@ pub(crate) fn reg() { pub(crate) struct PickFirstPolicy { work_scheduler: Arc, runtime: GrpcRuntime, + connectivity_state: ConnectivityState, + + // Subchannel information subchannels: Vec>, + subchannel_states: HashMap, // Cached states for all subchannels by address selected: Option>, - current_index: usize, - connectivity_state: ConnectivityState, + frontier_index: usize, // Detailed error tracking inspired by PR #2340 last_resolver_error: Option, @@ -119,7 +120,7 @@ impl Debug for PickFirstPolicy { f.debug_struct("PickFirstPolicy") .field("subchannels", &self.subchannels) .field("selected", &self.selected) - .field("current_index", &self.current_index) + .field("frontier_index", &self.frontier_index) .field("connectivity_state", &self.connectivity_state) .field("last_resolver_error", &self.last_resolver_error) .field("last_connection_error", &self.last_connection_error) @@ -128,40 +129,92 @@ impl Debug for PickFirstPolicy { } impl PickFirstPolicy { - fn start_connection_pass(&mut self, channel_controller: &mut dyn ChannelController) { - self.current_index = 0; - self.selected = None; - let sc = self.subchannels.get(self.current_index).unwrap(); // We are guaranteed to have at least one subchannel if we are in this method, and should panic if this is not true. - - self.connectivity_state = ConnectivityState::Connecting; - sc.connect(); - channel_controller.update_picker(LbState { - connectivity_state: ConnectivityState::Connecting, - picker: Arc::new(QueuingPicker {}), - }); - } - fn rebuild_subchannels( &mut self, new_addresses: Vec
, channel_controller: &mut dyn ChannelController, ) { // Map existing subchannels by address. - let mut existing_map: HashMap> = self + let mut existing_subchannels: HashMap> = self .subchannels .drain(..) .map(|sc| (sc.address(), sc)) .collect(); - // Build the new list, pulling from the map where possible to preserve backoff state. + let mut new_states = HashMap::new(); + self.subchannels = new_addresses .into_iter() .map(|addr| { - existing_map - .remove(&addr) - .unwrap_or_else(|| channel_controller.new_subchannel(&addr).0) + let subchannel = if let Some(sc) = existing_subchannels.remove(&addr) { + sc // Reuse existing subchannel + } else { + // New subchannel for new address + let (sc, state) = channel_controller.new_subchannel(&addr); + self.subchannel_states.insert(addr.clone(), state); + sc + }; + + let state = self.subchannel_states.get(&addr).unwrap().clone(); + new_states.insert(addr.clone(), state); + subchannel }) .collect(); + + self.subchannel_states = new_states; // Prune old addresses + } + + fn start_connection_pass(&mut self, channel_controller: &mut dyn ChannelController) { + // Starting a connection pass clears the selected subchannel. + self.selected = None; + + // If there is a viable subchannel at the frontier, connect to it and update picker to CONNECTING. + if let Some(sc) = self.advance_frontier(true) { + let sc = sc.clone(); // Clone to avoid borrow issues + self.connectivity_state = ConnectivityState::Connecting; + sc.connect(); + + // TODO: Implement connection delay timer (Happy Eyeballs) + + channel_controller.update_picker(LbState { + connectivity_state: ConnectivityState::Connecting, + picker: Arc::new(QueuingPicker {}), + }); + } else { + // Otherwise all addresses are in transient failure: update picker and request re-resolution. + let error = self + .last_connection_error + .clone() + .unwrap_or_else(|| "all addresses in transient failure".to_string()); + + // This transition triggers a FailingPicker and requests re-resolution. + _ = self.set_transient_failure(channel_controller, error); + } + } + + /// Advances the frontier to the next non-TransientFailure subchannel and returns it. + /// If `reset` is true, starts the scan from index 0. + // The frontier is the latest index in which connectivity has been attempted. + fn advance_frontier(&mut self, reset: bool) -> Option<&Arc> { + if reset { + self.frontier_index = 0; + } + + while self.frontier_index < self.subchannels.len() { + let sc = &self.subchannels[self.frontier_index]; + let addr = sc.address(); + let state = self + .subchannel_states + .get(&addr) + .map(|s| s.connectivity_state); + + if state == Some(ConnectivityState::TransientFailure) { + self.frontier_index += 1; + } else { + return Some(sc); + } + } + None } // Converts the update endpoints to an address list. @@ -174,27 +227,51 @@ impl PickFirstPolicy { ) -> Result, String> { let mut endpoints = endpoints; - if config - .as_ref() - .map(|c| c.shuffle_address_list) - .unwrap_or(false) - { + // Shuffle endpoints if enabled. + if config.map_or(false, |c| c.shuffle_address_list) { (self.shuffler)(&mut endpoints); } - let mut addresses = Vec::new(); + // Flatten and de-duplicate unique addresses in order. let mut seen = HashSet::new(); + let unique_addresses: Vec
= endpoints + .into_iter() + .flat_map(|ep| ep.addresses) + .filter(|addr| seen.insert(addr.clone())) + .collect(); - for endpoint in endpoints { - for address in endpoint.addresses { - if seen.insert(address.clone()) { - addresses.push(address); + // Partition by family (Basic IPv6 detection via colon). + let (ipv6, ipv4): (Vec
, Vec
) = unique_addresses + .into_iter() + .partition(|addr| addr.address.contains(':')); + + // Interleave the two lists. + let mut interleaved = Vec::with_capacity(ipv6.len() + ipv4.len()); + let mut v6_iter = ipv6.into_iter(); + let mut v4_iter = ipv4.into_iter(); + + loop { + match (v6_iter.next(), v4_iter.next()) { + (Some(v6), Some(v4)) => { + interleaved.push(v6); + interleaved.push(v4); + } + (Some(v6), None) => { + interleaved.push(v6); + interleaved.extend(v6_iter); + break; + } + (None, Some(v4)) => { + interleaved.push(v4); + interleaved.extend(v4_iter); + break; } + (None, None) => break, } } - // Empty address list is an error, but also clears previous subchannels. - if addresses.is_empty() { + // Handle empty case. + if interleaved.is_empty() { self.subchannels.clear(); self.selected = None; let error = self @@ -206,10 +283,10 @@ impl PickFirstPolicy { .map(|_| vec![]); } - Ok(addresses) + Ok(interleaved) } - // Sets state to TRANSIENT_FAILURE and updates picker with error. + // Sets state to TRANSIENT_FAILURE and updates picker with error. Triggers a re-resolution request. fn set_transient_failure( &mut self, channel_controller: &mut dyn ChannelController, @@ -225,6 +302,14 @@ impl PickFirstPolicy { channel_controller.request_resolution(); Err(error) } + + // Returns true if the currently selected subchannel's address is still present in the new address list. + fn sticky(&self, new_addresses: &[Address]) -> bool { + self.selected + .as_ref() + .map(|sc| new_addresses.contains(&sc.address())) + .unwrap_or(false) + } } impl LbPolicy for PickFirstPolicy { @@ -239,17 +324,11 @@ impl LbPolicy for PickFirstPolicy { match update.endpoints { Ok(endpoints) => { let new_addresses = self.compile_address(endpoints, config, channel_controller)?; - - // Stickiness: Check if currently selected subchannel is in the new list. - if let Some(ref selected) = self.selected { - if new_addresses.contains(&selected.address()) { - self.rebuild_subchannels(new_addresses, channel_controller); - return Ok(()); - } - } - + let sticky = self.sticky(&new_addresses); self.rebuild_subchannels(new_addresses, channel_controller); - self.start_connection_pass(channel_controller); + if !sticky { + self.start_connection_pass(channel_controller); + } } Err(e) => { let error = e.to_string(); @@ -271,11 +350,15 @@ impl LbPolicy for PickFirstPolicy { state: &SubchannelState, channel_controller: &mut dyn ChannelController, ) { - // 1. If we have a selected subchannel, only care about updates from it. + // Update the cache for all updates. + self.subchannel_states + .insert(subchannel.address(), state.clone()); + + // If we have a selected subchannel, only care about updates from it. if let Some(ref selected) = self.selected { if selected.address() == subchannel.address() { if state.connectivity_state != ConnectivityState::Ready { - // Lost connection: Go IDLE as per Vanilla design. + // Lost connection: Go IDLE. self.selected = None; self.connectivity_state = ConnectivityState::Idle; channel_controller.update_picker(LbState { @@ -289,8 +372,8 @@ impl LbPolicy for PickFirstPolicy { } } - // 2. Otherwise, check if this is from the subchannel we are currently attempting. - if let Some(attempting) = self.subchannels.get(self.current_index) { + // Otherwise, check if this is from the subchannel we are currently attempting. + if let Some(attempting) = self.subchannels.get(self.frontier_index) { if attempting.address() == subchannel.address() { match state.connectivity_state { ConnectivityState::Ready => { @@ -302,10 +385,9 @@ impl LbPolicy for PickFirstPolicy { }); } ConnectivityState::TransientFailure => { - // Move to next address - self.current_index += 1; - if self.current_index < self.subchannels.len() { - let next_sc = &self.subchannels[self.current_index]; + // Move to next address via advance_frontier + if let Some(next_sc) = self.advance_frontier(false) { + let next_sc = next_sc.clone(); // Clone to avoid borrow issues next_sc.connect(); } else { // Exhausted: TRANSIENT_FAILURE and request re-resolution. @@ -368,6 +450,13 @@ impl Picker for IdlePicker { } } +fn build_shuffler() -> Arc { + Arc::new(|endpoints| { + let mut rng = rand::rng(); + endpoints.shuffle(&mut rng); + }) +} + #[cfg(test)] mod test { use super::*; @@ -640,7 +729,7 @@ mod test { } #[test] - fn test_pick_first_shuffling_deterministic() { + fn test_pick_first_shuffling_and_interleaving_deterministic() { let (_rx, mut policy, mut controller) = setup(); // Enable shuffling in config @@ -648,32 +737,36 @@ mod test { shuffle_address_list: true, }; - // Provide two endpoints, each with two addresses + // Provide three endpoints: + // EP1: [V6_1, V4_1] + // EP2: [V6_2] + // EP3: [V4_2] let endpoints = vec![ Endpoint { addresses: vec![ Address { - address: crate::byte_str::ByteStr::from("addr1a".to_string()), + address: crate::byte_str::ByteStr::from("::1".to_string()), ..Default::default() }, Address { - address: crate::byte_str::ByteStr::from("addr1b".to_string()), + address: crate::byte_str::ByteStr::from("127.0.0.1".to_string()), ..Default::default() }, ], ..Default::default() }, Endpoint { - addresses: vec![ - Address { - address: crate::byte_str::ByteStr::from("addr2a".to_string()), - ..Default::default() - }, - Address { - address: crate::byte_str::ByteStr::from("addr2b".to_string()), - ..Default::default() - }, - ], + addresses: vec![Address { + address: crate::byte_str::ByteStr::from("::2".to_string()), + ..Default::default() + }], + ..Default::default() + }, + Endpoint { + addresses: vec![Address { + address: crate::byte_str::ByteStr::from("127.0.0.2".to_string()), + ..Default::default() + }], ..Default::default() }, ]; @@ -695,13 +788,20 @@ mod test { .map(|sc| sc.address().address.to_string()) .collect(); - // Our mock shuffler reverses the list of endpoints. - // Original: [EP1, EP2] -> Reversed: [EP2, EP1] - // Flattened: [addr2a, addr2b, addr1a, addr1b] - let expected = vec!["addr2a", "addr2b", "addr1a", "addr1b"]; + // Mock shuffler reverses endpoints: [EP3, EP2, EP1] + // EP3: [127.0.0.2] (V4) + // EP2: [::2] (V6) + // EP1: [::1] (V6), [127.0.0.1] (V4) + // + // Categorized: + // IPv6: [::2, ::1] + // IPv4: [127.0.0.2, 127.0.0.1] + // + // Interleaved: [::2, 127.0.0.2, ::1, 127.0.0.1] + let expected = vec!["::2", "127.0.0.2", "::1", "127.0.0.1"]; assert_eq!( resulting_addrs, expected, - "Deterministic shuffling of endpoints failed" + "Interleaving or shuffling failed" ); } From 98b1b41db6a896a2f645eb79c38c3bd83eaba43f Mon Sep 17 00:00:00 2001 From: Nathaniel Ford Date: Mon, 27 Apr 2026 17:34:52 +0000 Subject: [PATCH 06/32] grpc: implement Racing Selection and Timer Cancellation for Happy Eyeballs --- grpc/Cargo.toml | 1 + grpc/src/client/load_balancing/pick_first.rs | 183 ++++++++++++++----- 2 files changed, 136 insertions(+), 48 deletions(-) diff --git a/grpc/Cargo.toml b/grpc/Cargo.toml index 9188537a5..252a1b2d7 100644 --- a/grpc/Cargo.toml +++ b/grpc/Cargo.toml @@ -81,6 +81,7 @@ trait-variant = "0.1.2" url = "2.5.0" [dev-dependencies] +tokio = { version = "1.37.0", features = ["test-util"] } async-stream = "0.3.6" criterion = "0.5" hickory-server = "0.25.2" diff --git a/grpc/src/client/load_balancing/pick_first.rs b/grpc/src/client/load_balancing/pick_first.rs index ea508f3f2..ea933f7c1 100644 --- a/grpc/src/client/load_balancing/pick_first.rs +++ b/grpc/src/client/load_balancing/pick_first.rs @@ -28,6 +28,7 @@ use std::fmt::Debug; use std::sync::Arc; use rand::seq::SliceRandom; +use std::sync::atomic::{AtomicBool, Ordering}; use tonic::metadata::MetadataMap; use crate::client::ConnectivityState; @@ -79,6 +80,8 @@ impl LbPolicyBuilder for PickFirstBuilder { last_resolver_error: None, last_connection_error: None, shuffler: build_shuffler(), + timer_expired: Arc::new(AtomicBool::new(false)), + timer_handle: None, } } @@ -113,6 +116,10 @@ pub(crate) struct PickFirstPolicy { // Injectable shuffler for deterministic testing shuffler: Arc, + + // Timer state for Happy Eyeballs. Tracks when the last connection attempt was started. + timer_expired: Arc, + timer_handle: Option>, } impl Debug for PickFirstPolicy { @@ -171,10 +178,7 @@ impl PickFirstPolicy { // If there is a viable subchannel at the frontier, connect to it and update picker to CONNECTING. if let Some(sc) = self.advance_frontier(true) { let sc = sc.clone(); // Clone to avoid borrow issues - self.connectivity_state = ConnectivityState::Connecting; - sc.connect(); - - // TODO: Implement connection delay timer (Happy Eyeballs) + self.trigger_subchannel_connection(sc, channel_controller); channel_controller.update_picker(LbState { connectivity_state: ConnectivityState::Connecting, @@ -217,6 +221,34 @@ impl PickFirstPolicy { None } + /// Triggers a connection on the subchannel, and starts the 250ms timer. + /// If no connection succeeds before the timer expires, the frontier will advance to the next subchannel. + fn trigger_subchannel_connection( + &mut self, + sc: Arc, + channel_controller: &mut dyn ChannelController, + ) { + let sc_clone = sc.clone(); + self.connectivity_state = ConnectivityState::Connecting; + sc_clone.connect(); + + // Cancel any existing timer + if let Some(handle) = self.timer_handle.take() { + handle.abort(); + } + + // Start 250ms timer + let timer_expired = self.timer_expired.clone(); + let work_scheduler = self.work_scheduler.clone(); + + let handle = tokio::spawn(async move { + tokio::time::sleep(std::time::Duration::from_millis(250)).await; + timer_expired.store(true, Ordering::SeqCst); + work_scheduler.schedule_work(); + }); + self.timer_handle = Some(handle); + } + // Converts the update endpoints to an address list. // Shuffles endpoints (if enabled) before flattening and de-duplication. fn compile_address( @@ -372,41 +404,56 @@ impl LbPolicy for PickFirstPolicy { } } - // Otherwise, check if this is from the subchannel we are currently attempting. + // Racing for Success: If any subchannel in the current pass reports READY, select it. + if state.connectivity_state == ConnectivityState::Ready { + // Check if this subchannel is part of the current pass (index <= frontier_index) + let is_in_pass = self + .subchannels + .iter() + .take(self.frontier_index + 1) + .any(|sc| sc.address() == subchannel.address()); + + if is_in_pass { + self.selected = Some(subchannel.clone()); + self.connectivity_state = ConnectivityState::Ready; + + // Cancel timer + if let Some(handle) = self.timer_handle.take() { + handle.abort(); + } + + channel_controller.update_picker(LbState { + connectivity_state: ConnectivityState::Ready, + picker: Arc::new(OneSubchannelPicker { sc: subchannel }), + }); + return; + } + } + + // Failover on Failure: Only if the subchannel at the exact frontier_index fails. if let Some(attempting) = self.subchannels.get(self.frontier_index) { if attempting.address() == subchannel.address() { - match state.connectivity_state { - ConnectivityState::Ready => { - self.selected = Some(subchannel.clone()); - self.connectivity_state = ConnectivityState::Ready; + if state.connectivity_state == ConnectivityState::TransientFailure { + // Move to next address via advance_frontier + if let Some(next_sc) = self.advance_frontier(false) { + let next_sc = next_sc.clone(); // Clone to avoid borrow issues + self.trigger_subchannel_connection(next_sc, channel_controller); + } else { + // Exhausted: TRANSIENT_FAILURE and request re-resolution. + self.connectivity_state = ConnectivityState::TransientFailure; + let error = state + .last_connection_error + .as_ref() + .map(|e| e.to_string()) + .unwrap_or_else(|| "all addresses failed".to_string()); + + self.last_connection_error = Some(error.clone()); channel_controller.update_picker(LbState { - connectivity_state: ConnectivityState::Ready, - picker: Arc::new(OneSubchannelPicker { sc: subchannel }), + connectivity_state: ConnectivityState::TransientFailure, + picker: Arc::new(FailingPicker { error }), }); + channel_controller.request_resolution(); } - ConnectivityState::TransientFailure => { - // Move to next address via advance_frontier - if let Some(next_sc) = self.advance_frontier(false) { - let next_sc = next_sc.clone(); // Clone to avoid borrow issues - next_sc.connect(); - } else { - // Exhausted: TRANSIENT_FAILURE and request re-resolution. - self.connectivity_state = ConnectivityState::TransientFailure; - let error = state - .last_connection_error - .as_ref() - .map(|e| e.to_string()) - .unwrap_or_else(|| "all addresses failed".to_string()); - - self.last_connection_error = Some(error.clone()); - channel_controller.update_picker(LbState { - connectivity_state: ConnectivityState::TransientFailure, - picker: Arc::new(FailingPicker { error }), - }); - channel_controller.request_resolution(); - } - } - _ => {} } } } @@ -415,6 +462,17 @@ impl LbPolicy for PickFirstPolicy { fn work(&mut self, channel_controller: &mut dyn ChannelController) { if self.connectivity_state == ConnectivityState::Idle { self.exit_idle(channel_controller); + } else if self.connectivity_state == ConnectivityState::Connecting { + // Check if timer expired + if self.timer_expired.load(Ordering::SeqCst) { + self.timer_expired.store(false, Ordering::SeqCst); // Reset + + // Advance frontier and trigger next connection + if let Some(next_sc) = self.advance_frontier(false) { + let next_sc = next_sc.clone(); + self.trigger_subchannel_connection(next_sc, channel_controller); + } + } } } @@ -503,8 +561,8 @@ mod test { .collect() } - #[test] - fn test_pick_first_basic_connection() { + #[tokio::test] + async fn test_pick_first_basic_connection() { let (rx, mut policy, mut controller) = setup(); let endpoints = create_endpoints(vec!["addr1", "addr2"]); let update = ResolverUpdate { @@ -549,8 +607,8 @@ mod test { } } - #[test] - fn test_pick_first_failover() { + #[tokio::test] + async fn test_pick_first_failover() { let (rx, mut policy, mut controller) = setup(); let endpoints = create_endpoints(vec!["addr1", "addr2"]); policy @@ -606,8 +664,8 @@ mod test { } } - #[test] - fn test_pick_first_stickiness() { + #[tokio::test] + async fn test_pick_first_stickiness() { let (rx, mut policy, mut controller) = setup(); let endpoints = create_endpoints(vec!["addr1", "addr2"]); policy @@ -681,8 +739,8 @@ mod test { ); } - #[test] - fn test_pick_first_exhaustion() { + #[tokio::test] + async fn test_pick_first_exhaustion() { let (rx, mut policy, mut controller) = setup(); let endpoints = create_endpoints(vec!["addr1"]); policy @@ -728,8 +786,8 @@ mod test { } } - #[test] - fn test_pick_first_shuffling_and_interleaving_deterministic() { + #[tokio::test] + async fn test_pick_first_shuffling_and_interleaving_deterministic() { let (_rx, mut policy, mut controller) = setup(); // Enable shuffling in config @@ -805,8 +863,8 @@ mod test { ); } - #[test] - fn test_pick_first_duplicate_de_duplication() { + #[tokio::test] + async fn test_pick_first_duplicate_de_duplication() { let (rx, mut policy, mut controller) = setup(); // Create endpoints with duplicates @@ -865,8 +923,8 @@ mod test { assert_eq!(policy.subchannels.len(), 2, "De-duplication failed"); } - #[test] - fn test_pick_first_empty_update_clears_state() { + #[tokio::test] + async fn test_pick_first_empty_update_clears_state() { let (rx, mut policy, mut controller) = setup(); let endpoints = create_endpoints(vec!["addr1", "addr2"]); @@ -928,4 +986,33 @@ mod test { other => panic!("unexpected event {:?}", other), } } + + #[tokio::test(start_paused = true)] + async fn test_pick_first_timer_advancement() { + let (rx, mut policy, mut controller) = setup(); + let endpoints = create_endpoints(vec!["addr1", "addr2"]); + let update = ResolverUpdate { + endpoints: Ok(endpoints), + ..Default::default() + }; + + policy + .resolver_update(update, None, controller.as_mut()) + .unwrap(); + + // Expect NewSubchannel x2, Connect(addr1), UpdatePicker(Connecting) + rx.recv().unwrap(); + rx.recv().unwrap(); + rx.recv().unwrap(); + rx.recv().unwrap(); + + // Advance time by 250ms + tokio::time::advance(Duration::from_millis(250)).await; + + // Expect Connect event for addr2 due to timer expiration + match rx.recv().unwrap() { + TestEvent::Connect(addr) => assert_eq!(addr.address.to_string(), "addr2"), + other => panic!("unexpected event {:?}", other), + } + } } From c8a7762ac09aaca0135e38fe0cfbaf8408597efc Mon Sep 17 00:00:00 2001 From: Nathaniel Ford Date: Mon, 27 Apr 2026 19:28:09 +0000 Subject: [PATCH 07/32] grpc: implement Continuous Retries with Backoff (Steady State) and fix tests This change completes the Happy Eyeballs implementation in the `pick_first` load balancer by: - Implementing the "Steady State" mode for continuous retries after the initial connection pass fails for all addresses. - Adding two new unit tests to verify failover and steady-state behavior under multi-backend scenarios. - Fixing the timer advancement unit test to avoid deadlocks in async test contexts. --- grpc/src/client/load_balancing/pick_first.rs | 248 +++++++++++++++++-- 1 file changed, 226 insertions(+), 22 deletions(-) diff --git a/grpc/src/client/load_balancing/pick_first.rs b/grpc/src/client/load_balancing/pick_first.rs index ea933f7c1..478665bfe 100644 --- a/grpc/src/client/load_balancing/pick_first.rs +++ b/grpc/src/client/load_balancing/pick_first.rs @@ -82,6 +82,7 @@ impl LbPolicyBuilder for PickFirstBuilder { shuffler: build_shuffler(), timer_expired: Arc::new(AtomicBool::new(false)), timer_handle: None, + steady_state: None, } } @@ -104,22 +105,25 @@ pub(crate) struct PickFirstPolicy { runtime: GrpcRuntime, connectivity_state: ConnectivityState, - // Subchannel information + // Subchannel information. subchannels: Vec>, - subchannel_states: HashMap, // Cached states for all subchannels by address + subchannel_states: HashMap, // Cached states for all subchannels by address. selected: Option>, frontier_index: usize, - // Detailed error tracking inspired by PR #2340 + // Detailed error tracking. last_resolver_error: Option, last_connection_error: Option, - // Injectable shuffler for deterministic testing + // Injectable shuffler for deterministic testing. shuffler: Arc, - // Timer state for Happy Eyeballs. Tracks when the last connection attempt was started. + // Timer state tracks when the last connect attempt was started. timer_expired: Arc, timer_handle: Option>, + + // Steady state tracking for continuous retries after pass exhaustion. + steady_state: Option, } impl Debug for PickFirstPolicy { @@ -154,9 +158,9 @@ impl PickFirstPolicy { .into_iter() .map(|addr| { let subchannel = if let Some(sc) = existing_subchannels.remove(&addr) { - sc // Reuse existing subchannel + sc // Reuse existing subchannel. } else { - // New subchannel for new address + // New subchannel for new address. let (sc, state) = channel_controller.new_subchannel(&addr); self.subchannel_states.insert(addr.clone(), state); sc @@ -168,16 +172,17 @@ impl PickFirstPolicy { }) .collect(); - self.subchannel_states = new_states; // Prune old addresses + self.subchannel_states = new_states; // Prune old addresses. } + /// Starts a connection pass through the address list. + // This clears the selected subchannel. fn start_connection_pass(&mut self, channel_controller: &mut dyn ChannelController) { - // Starting a connection pass clears the selected subchannel. self.selected = None; // If there is a viable subchannel at the frontier, connect to it and update picker to CONNECTING. if let Some(sc) = self.advance_frontier(true) { - let sc = sc.clone(); // Clone to avoid borrow issues + let sc = sc.clone(); // Clone to avoid borrow issues. self.trigger_subchannel_connection(sc, channel_controller); channel_controller.update_picker(LbState { @@ -202,6 +207,8 @@ impl PickFirstPolicy { fn advance_frontier(&mut self, reset: bool) -> Option<&Arc> { if reset { self.frontier_index = 0; + } else { + self.frontier_index += 1; } while self.frontier_index < self.subchannels.len() { @@ -277,7 +284,7 @@ impl PickFirstPolicy { .into_iter() .partition(|addr| addr.address.contains(':')); - // Interleave the two lists. + // Interleave the two lists so ipv6 and ipv4 addresses are alternated. let mut interleaved = Vec::with_capacity(ipv6.len() + ipv4.len()); let mut v6_iter = ipv6.into_iter(); let mut v4_iter = ipv4.into_iter(); @@ -302,7 +309,7 @@ impl PickFirstPolicy { } } - // Handle empty case. + // If we have no addresses, clear subchannels and set TRANSIENT_FAILURE. if interleaved.is_empty() { self.subchannels.clear(); self.selected = None; @@ -353,6 +360,9 @@ impl LbPolicy for PickFirstPolicy { config: Option<&Self::LbConfig>, channel_controller: &mut dyn ChannelController, ) -> Result<(), String> { + // Reset steady state on new update + self.steady_state = None; + match update.endpoints { Ok(endpoints) => { let new_addresses = self.compile_address(endpoints, config, channel_controller)?; @@ -404,7 +414,21 @@ impl LbPolicy for PickFirstPolicy { } } - // Racing for Success: If any subchannel in the current pass reports READY, select it. + // Steady State Retries: Automatically connect when a subchannel goes IDLE. + if self.steady_state.is_some() && state.connectivity_state == ConnectivityState::Idle { + subchannel.connect(); + } + + // Steady State Failures: Count failures across all subchannels. + if let Some(ref mut ss) = self.steady_state { + if state.connectivity_state == ConnectivityState::TransientFailure { + if ss.record_failure(self.subchannels.len()) { + channel_controller.request_resolution(); + } + } + } + + // If any subchannel in the current pass reports READY, select it. if state.connectivity_state == ConnectivityState::Ready { // Check if this subchannel is part of the current pass (index <= frontier_index) let is_in_pass = self @@ -440,7 +464,6 @@ impl LbPolicy for PickFirstPolicy { self.trigger_subchannel_connection(next_sc, channel_controller); } else { // Exhausted: TRANSIENT_FAILURE and request re-resolution. - self.connectivity_state = ConnectivityState::TransientFailure; let error = state .last_connection_error .as_ref() @@ -448,11 +471,10 @@ impl LbPolicy for PickFirstPolicy { .unwrap_or_else(|| "all addresses failed".to_string()); self.last_connection_error = Some(error.clone()); - channel_controller.update_picker(LbState { - connectivity_state: ConnectivityState::TransientFailure, - picker: Arc::new(FailingPicker { error }), - }); - channel_controller.request_resolution(); + _ = self.set_transient_failure(channel_controller, error); + + // Transition to steady state mode + self.steady_state = Some(SteadyState::new()); } } } @@ -467,7 +489,7 @@ impl LbPolicy for PickFirstPolicy { if self.timer_expired.load(Ordering::SeqCst) { self.timer_expired.store(false, Ordering::SeqCst); // Reset - // Advance frontier and trigger next connection + // Advance frontier and trigger next connection. if let Some(next_sc) = self.advance_frontier(false) { let next_sc = next_sc.clone(); self.trigger_subchannel_connection(next_sc, channel_controller); @@ -515,6 +537,28 @@ fn build_shuffler() -> Arc { }) } +#[derive(Debug)] +struct SteadyState { + /// The number of failures connecting, used to roughly approximate if a re-resolution needs to happen. + failure_count: usize, +} + +impl SteadyState { + fn new() -> Self { + Self { failure_count: 0 } + } + + /// Increments the failure count and returns true if re-resolution is required. + fn record_failure(&mut self, subchannels_len: usize) -> bool { + self.failure_count += 1; + if self.failure_count >= subchannels_len { + self.failure_count = 0; + return true; + } + false + } +} + #[cfg(test)] mod test { use super::*; @@ -1006,13 +1050,173 @@ mod test { rx.recv().unwrap(); rx.recv().unwrap(); - // Advance time by 250ms - tokio::time::advance(Duration::from_millis(250)).await; + // Simulate timer expiration by setting the flag directly! + policy + .timer_expired + .store(true, std::sync::atomic::Ordering::SeqCst); + + // Manually call work() to process the timer expiration! + policy.work(controller.as_mut()); // Expect Connect event for addr2 due to timer expiration + // Loop to check for event without blocking the thread + let mut found = None; + for _ in 0..10 { + match rx.try_recv() { + Ok(event) => { + found = Some(event); + break; + } + Err(std::sync::mpsc::TryRecvError::Empty) => { + // Yield to runtime to allow timer task to run! + tokio::time::sleep(std::time::Duration::from_millis(10)).await; + } + Err(e) => panic!("error recv: {:?}", e), + } + } + + match found { + Some(TestEvent::Connect(addr)) => assert_eq!(addr.address.to_string(), "addr2"), + other => panic!("unexpected result {:?}", other), + } + } + + #[tokio::test] + async fn test_pick_first_steady_state_retries() { + let (rx, mut policy, mut controller) = setup(); + let endpoints = create_endpoints(vec!["addr1"]); + let update = ResolverUpdate { + endpoints: Ok(endpoints), + ..Default::default() + }; + + policy + .resolver_update(update, None, controller.as_mut()) + .unwrap(); + + // Expect NewSubchannel, Connect(addr1), UpdatePicker(Connecting) + rx.recv().unwrap(); + rx.recv().unwrap(); + rx.recv().unwrap(); + + // Simulate addr1 failure + let sc1 = policy.subchannels[0].clone(); + policy.subchannel_update( + sc1.clone(), + &SubchannelState { + connectivity_state: ConnectivityState::TransientFailure, + last_connection_error: None, + }, + controller.as_mut(), + ); + + // Expect UpdatePicker(TransientFailure) and RequestResolution + rx.recv().unwrap(); + rx.recv().unwrap(); + + // Now we are in steady state! + assert!(policy.steady_state.is_some()); + + // Simulate addr1 transitioning to IDLE (backoff over) + policy.subchannel_update( + sc1.clone(), + &SubchannelState { + connectivity_state: ConnectivityState::Idle, + last_connection_error: None, + }, + controller.as_mut(), + ); + + // Should automatically call connect() again! + match rx.recv().unwrap() { + TestEvent::Connect(addr) => assert_eq!(addr.address.to_string(), "addr1"), + other => panic!("unexpected event {:?}", other), + } + } + + #[tokio::test] + async fn test_pick_first_steady_state_multi_backend() { + let (rx, mut policy, mut controller) = setup(); + let endpoints = create_endpoints(vec!["addr1", "addr2"]); + let update = ResolverUpdate { + endpoints: Ok(endpoints), + ..Default::default() + }; + + policy + .resolver_update(update, None, controller.as_mut()) + .unwrap(); + + // Expect NewSubchannel x2, Connect(addr1), UpdatePicker(Connecting) + rx.recv().unwrap(); + rx.recv().unwrap(); + rx.recv().unwrap(); + rx.recv().unwrap(); + + // Simulate addr1 failure + let sc1 = policy.subchannels[0].clone(); + policy.subchannel_update( + sc1.clone(), + &SubchannelState { + connectivity_state: ConnectivityState::TransientFailure, + last_connection_error: None, + }, + controller.as_mut(), + ); + + // Should failover to addr2: expect Connect(addr2) match rx.recv().unwrap() { TestEvent::Connect(addr) => assert_eq!(addr.address.to_string(), "addr2"), other => panic!("unexpected event {:?}", other), } + + // Now while addr2 is connecting, simulate addr1 going IDLE (backoff over) + policy.subchannel_update( + sc1.clone(), + &SubchannelState { + connectivity_state: ConnectivityState::Idle, + last_connection_error: None, + }, + controller.as_mut(), + ); + + // We should NOT reconnect to addr1 during the first pass! + // Wait a bit to ensure no event is sent + std::thread::sleep(std::time::Duration::from_millis(50)); + assert!(rx.try_recv().is_err(), "unexpected event"); + + // Now fail addr2 to complete first pass + let sc2 = policy.subchannels[1].clone(); + policy.subchannel_update( + sc2.clone(), + &SubchannelState { + connectivity_state: ConnectivityState::TransientFailure, + last_connection_error: None, + }, + controller.as_mut(), + ); + + // Expect UpdatePicker(TransientFailure) and RequestResolution + rx.recv().unwrap(); + rx.recv().unwrap(); + + // Now we are in steady state! + assert!(policy.steady_state.is_some()); + + // Simulate addr1 going IDLE again + policy.subchannel_update( + sc1.clone(), + &SubchannelState { + connectivity_state: ConnectivityState::Idle, + last_connection_error: None, + }, + controller.as_mut(), + ); + + // Now it SHOULD automatically call connect() again! + match rx.recv().unwrap() { + TestEvent::Connect(addr) => assert_eq!(addr.address.to_string(), "addr1"), + other => panic!("unexpected event {:?}", other), + } } } From 798270f6e14bd6f4dd503dc613535afd6bef6416 Mon Sep 17 00:00:00 2001 From: Nathaniel Ford Date: Wed, 29 Apr 2026 15:17:17 +0000 Subject: [PATCH 08/32] WIP: PickFirstLB implementation details --- grpc/src/client/load_balancing/pick_first.rs | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/grpc/src/client/load_balancing/pick_first.rs b/grpc/src/client/load_balancing/pick_first.rs index 478665bfe..5db17d824 100644 --- a/grpc/src/client/load_balancing/pick_first.rs +++ b/grpc/src/client/load_balancing/pick_first.rs @@ -440,6 +440,9 @@ impl LbPolicy for PickFirstPolicy { if is_in_pass { self.selected = Some(subchannel.clone()); self.connectivity_state = ConnectivityState::Ready; + + // Keep only the successful subchannel in the list + self.subchannels = vec![subchannel.clone()]; // Cancel timer if let Some(handle) = self.timer_handle.take() { @@ -761,7 +764,11 @@ mod test { ) .unwrap(); - // Should create subchannel for addr3 (addr1 and addr2 are re-used) + // Should create subchannel for addr2 (was cleared by cleanup) and addr3 (new) + match rx.recv().unwrap() { + TestEvent::NewSubchannel(sc) => assert_eq!(sc.address().address.to_string(), "addr2"), + other => panic!("unexpected event {:?}", other), + } match rx.recv().unwrap() { TestEvent::NewSubchannel(sc) => assert_eq!(sc.address().address.to_string(), "addr3"), other => panic!("unexpected event {:?}", other), From 566cb6d63b746b7a630ea277606bfe1993a92ae7 Mon Sep 17 00:00:00 2001 From: Nathaniel Ford Date: Wed, 29 Apr 2026 15:23:57 +0000 Subject: [PATCH 09/32] Formatting fix --- grpc/src/client/load_balancing/pick_first.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/grpc/src/client/load_balancing/pick_first.rs b/grpc/src/client/load_balancing/pick_first.rs index 5db17d824..f62ed86dd 100644 --- a/grpc/src/client/load_balancing/pick_first.rs +++ b/grpc/src/client/load_balancing/pick_first.rs @@ -440,7 +440,7 @@ impl LbPolicy for PickFirstPolicy { if is_in_pass { self.selected = Some(subchannel.clone()); self.connectivity_state = ConnectivityState::Ready; - + // Keep only the successful subchannel in the list self.subchannels = vec![subchannel.clone()]; From 055e7139145a988b5c6a8c9d074aa6811aa248b9 Mon Sep 17 00:00:00 2001 From: Nathaniel Ford Date: Thu, 30 Apr 2026 18:20:32 +0000 Subject: [PATCH 10/32] Replace tokio::spawn with Runtime::spawn in pick_first.rs --- grpc/Cargo.toml | 1 - grpc/src/client/load_balancing/pick_first.rs | 12 +++++++----- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/grpc/Cargo.toml b/grpc/Cargo.toml index 252a1b2d7..9188537a5 100644 --- a/grpc/Cargo.toml +++ b/grpc/Cargo.toml @@ -81,7 +81,6 @@ trait-variant = "0.1.2" url = "2.5.0" [dev-dependencies] -tokio = { version = "1.37.0", features = ["test-util"] } async-stream = "0.3.6" criterion = "0.5" hickory-server = "0.25.2" diff --git a/grpc/src/client/load_balancing/pick_first.rs b/grpc/src/client/load_balancing/pick_first.rs index f62ed86dd..75b325e74 100644 --- a/grpc/src/client/load_balancing/pick_first.rs +++ b/grpc/src/client/load_balancing/pick_first.rs @@ -50,6 +50,7 @@ use crate::client::name_resolution::Address; use crate::client::name_resolution::Endpoint; use crate::client::name_resolution::ResolverUpdate; use crate::core::RequestHeaders; +use crate::rt::BoxedTaskHandle; use crate::rt::GrpcRuntime; pub(crate) static POLICY_NAME: &str = "pick_first"; @@ -120,7 +121,7 @@ pub(crate) struct PickFirstPolicy { // Timer state tracks when the last connect attempt was started. timer_expired: Arc, - timer_handle: Option>, + timer_handle: Option, // Steady state tracking for continuous retries after pass exhaustion. steady_state: Option, @@ -248,13 +249,14 @@ impl PickFirstPolicy { let timer_expired = self.timer_expired.clone(); let work_scheduler = self.work_scheduler.clone(); - let handle = tokio::spawn(async move { - tokio::time::sleep(std::time::Duration::from_millis(250)).await; + let sleep_fut = self.runtime.sleep(std::time::Duration::from_millis(250)); + let handle = self.runtime.spawn(Box::pin(async move { + sleep_fut.await; timer_expired.store(true, Ordering::SeqCst); work_scheduler.schedule_work(); - }); + })); self.timer_handle = Some(handle); - } + }O // Converts the update endpoints to an address list. // Shuffles endpoints (if enabled) before flattening and de-duplication. From 9cbb1a439b32206725c78cc57380589d5595ccfa Mon Sep 17 00:00:00 2001 From: Nathaniel Ford Date: Mon, 4 May 2026 23:19:15 +0000 Subject: [PATCH 11/32] Partial feedback integration --- grpc/src/client/load_balancing/pick_first.rs | 85 +++++++++++--------- 1 file changed, 45 insertions(+), 40 deletions(-) diff --git a/grpc/src/client/load_balancing/pick_first.rs b/grpc/src/client/load_balancing/pick_first.rs index 75b325e74..d28248152 100644 --- a/grpc/src/client/load_balancing/pick_first.rs +++ b/grpc/src/client/load_balancing/pick_first.rs @@ -153,27 +153,21 @@ impl PickFirstPolicy { .map(|sc| (sc.address(), sc)) .collect(); - let mut new_states = HashMap::new(); - - self.subchannels = new_addresses + let (new_subchannels, new_states): (Vec<_>, HashMap<_, _>) = new_addresses .into_iter() .map(|addr| { - let subchannel = if let Some(sc) = existing_subchannels.remove(&addr) { - sc // Reuse existing subchannel. + if let Some(sc) = existing_subchannels.remove(&addr) { + let state = self.subchannel_states.get(&addr).unwrap().clone(); + (sc, (addr, state)) } else { - // New subchannel for new address. let (sc, state) = channel_controller.new_subchannel(&addr); - self.subchannel_states.insert(addr.clone(), state); - sc - }; - - let state = self.subchannel_states.get(&addr).unwrap().clone(); - new_states.insert(addr.clone(), state); - subchannel + (sc, (addr, state)) + } }) - .collect(); + .unzip(); - self.subchannel_states = new_states; // Prune old addresses. + self.subchannels = new_subchannels; // Prune old addresses. + self.subchannel_states = new_states; // Update subchannel states cache. } /// Starts a connection pass through the address list. @@ -218,19 +212,21 @@ impl PickFirstPolicy { let state = self .subchannel_states .get(&addr) - .map(|s| s.connectivity_state); + .map(|s| s.connectivity_state) + .expect("Expected non-None subchannel state"); - if state == Some(ConnectivityState::TransientFailure) { - self.frontier_index += 1; - } else { - return Some(sc); + match state { + // Push the frontier if sc is in TransientFailure, otherwise return the sc. + ConnectivityState::TransientFailure => self.frontier_index += 1, + _ => return Some(sc), } } None } /// Triggers a connection on the subchannel, and starts the 250ms timer. - /// If no connection succeeds before the timer expires, the frontier will advance to the next subchannel. + /// If no connection succeeds before the timer expires, the frontier will advance to + /// the next subchannel. fn trigger_subchannel_connection( &mut self, sc: Arc, @@ -256,7 +252,7 @@ impl PickFirstPolicy { work_scheduler.schedule_work(); })); self.timer_handle = Some(handle); - }O + } // Converts the update endpoints to an address list. // Shuffles endpoints (if enabled) before flattening and de-duplication. @@ -281,33 +277,42 @@ impl PickFirstPolicy { .filter(|addr| seen.insert(addr.clone())) .collect(); + // Partition out all 'unknown' non-TCP addresses. + // This is to remain consistent with similar behavior in C++ and Java. + let (tcp_addresses, unknown): (Vec
, Vec
) = + unique_addresses.into_iter().partition(|addr| { + addr.network_type == crate::client::name_resolution::TCP_IP_NETWORK_TYPE + }); + // Partition by family (Basic IPv6 detection via colon). - let (ipv6, ipv4): (Vec
, Vec
) = unique_addresses + let (ipv6, ipv4): (Vec
, Vec
) = tcp_addresses .into_iter() .partition(|addr| addr.address.contains(':')); // Interleave the two lists so ipv6 and ipv4 addresses are alternated. - let mut interleaved = Vec::with_capacity(ipv6.len() + ipv4.len()); + let mut interleaved = Vec::with_capacity(ipv6.len() + ipv4.len() + unknown.len()); let mut v6_iter = ipv6.into_iter(); let mut v4_iter = ipv4.into_iter(); + let mut unknown_iter = unknown.into_iter(); loop { - match (v6_iter.next(), v4_iter.next()) { - (Some(v6), Some(v4)) => { - interleaved.push(v6); - interleaved.push(v4); - } - (Some(v6), None) => { - interleaved.push(v6); - interleaved.extend(v6_iter); - break; - } - (None, Some(v4)) => { - interleaved.push(v4); - interleaved.extend(v4_iter); - break; - } - (None, None) => break, + let mut more = false; + + if let Some(v6) = v6_iter.next() { + interleaved.push(v6); + more = true; + } + if let Some(v4) = v4_iter.next() { + interleaved.push(v4); + more = true; + } + if let Some(unknown) = unknown_iter.next() { + interleaved.push(unknown); + more = true; + } + + if !more { + break; } } From 81804736e066fd91aff809a6555272fc7e94e511 Mon Sep 17 00:00:00 2001 From: Nathaniel Ford Date: Wed, 6 May 2026 17:45:49 +0000 Subject: [PATCH 12/32] lb/pick_first: restrict visibility of builder and config --- grpc/src/client/load_balancing/pick_first.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/grpc/src/client/load_balancing/pick_first.rs b/grpc/src/client/load_balancing/pick_first.rs index d28248152..ad5f9d3c1 100644 --- a/grpc/src/client/load_balancing/pick_first.rs +++ b/grpc/src/client/load_balancing/pick_first.rs @@ -58,13 +58,13 @@ pub(crate) static POLICY_NAME: &str = "pick_first"; type ShufflerFn = dyn Fn(&mut [Endpoint]) + Send + Sync + 'static; #[derive(Debug, serde::Deserialize, Clone)] -pub struct PickFirstConfig { +pub(crate) struct PickFirstConfig { #[serde(rename = "shuffleAddressList")] pub shuffle_address_list: bool, } #[derive(Debug)] -pub(crate) struct PickFirstBuilder {} +struct PickFirstBuilder {} impl LbPolicyBuilder for PickFirstBuilder { type LbPolicy = PickFirstPolicy; From 2954173f0ea82ad746465d5e81b692ca11cfe33e Mon Sep 17 00:00:00 2001 From: Nathaniel Ford Date: Wed, 6 May 2026 21:00:36 +0000 Subject: [PATCH 13/32] lb/pick_first: refactor subchannel updates for mode separation and immediate READY activation --- grpc/Cargo.toml | 2 + grpc/src/client/load_balancing/pick_first.rs | 226 +++++++++++-------- 2 files changed, 140 insertions(+), 88 deletions(-) diff --git a/grpc/Cargo.toml b/grpc/Cargo.toml index 9188537a5..ce364dbd5 100644 --- a/grpc/Cargo.toml +++ b/grpc/Cargo.toml @@ -93,6 +93,8 @@ tonic = { version = "0.14.0", path = "../tonic", default-features = false, featu "tls-ring", ] } tonic-prost = { version = "0.14.0", path = "../tonic-prost" } +tokio = { version = "1.37.0", features = ["test-util"] } + [[bench]] name = "metadata" diff --git a/grpc/src/client/load_balancing/pick_first.rs b/grpc/src/client/load_balancing/pick_first.rs index ad5f9d3c1..e6989435b 100644 --- a/grpc/src/client/load_balancing/pick_first.rs +++ b/grpc/src/client/load_balancing/pick_first.rs @@ -31,6 +31,7 @@ use rand::seq::SliceRandom; use std::sync::atomic::{AtomicBool, Ordering}; use tonic::metadata::MetadataMap; + use crate::client::ConnectivityState; use crate::client::load_balancing::ChannelController; use crate::client::load_balancing::FailingPicker; @@ -44,6 +45,7 @@ use crate::client::load_balancing::PickResult; use crate::client::load_balancing::Picker; use crate::client::load_balancing::QueuingPicker; use crate::client::load_balancing::WorkScheduler; +use crate::client::load_balancing::subchannel; use crate::client::load_balancing::subchannel::Subchannel; use crate::client::load_balancing::subchannel::SubchannelState; use crate::client::name_resolution::Address; @@ -170,6 +172,39 @@ impl PickFirstPolicy { self.subchannel_states = new_states; // Update subchannel states cache. } + /// Call when the selected subchannel is dropped or loses connection. + // This causes the LB to go IDLE. + fn subchannel_drop(&mut self, channel_controller: &mut dyn ChannelController) { + self.selected = None; + self.connectivity_state = ConnectivityState::Idle; + channel_controller.update_picker(LbState { + connectivity_state: ConnectivityState::Idle, + picker: Arc::new(IdlePicker { + work_scheduler: self.work_scheduler.clone(), + }), + }); + } + + fn subchannel_activate( + &mut self, + subchannel: Arc, + channel_controller: &mut dyn ChannelController, + ) { + self.selected = Some(subchannel.clone()); + self.connectivity_state = ConnectivityState::Ready; + self.subchannels = vec![subchannel.clone()]; // Keep only the winner. + self.steady_state = None; // Reset mode to First Pass. + + if let Some(handle) = self.timer_handle.take() { + handle.abort(); + } + + channel_controller.update_picker(LbState { + connectivity_state: ConnectivityState::Ready, + picker: Arc::new(OneSubchannelPicker { sc: subchannel }), + }); + } + /// Starts a connection pass through the address list. // This clears the selected subchannel. fn start_connection_pass(&mut self, channel_controller: &mut dyn ChannelController) { @@ -196,6 +231,41 @@ impl PickFirstPolicy { } } + /// Book-keeping for tracking progress on the first pass through the address list. + /// Assumes the subchannel is in a non-READY state. + /// If the failure is from the subchannel at the frontier, advances the frontier and triggers a connection on the next subchannel. + fn update_first_pass( + &mut self, + subchannel: Arc, + state: &SubchannelState, + channel_controller: &mut dyn ChannelController, + ) { + // Failover triggers only if the failure comes from the subchannel currently at the frontier. + if let Some(attempting) = self.subchannels.get(self.frontier_index) { + if attempting.address() == subchannel.address() + && state.connectivity_state == ConnectivityState::TransientFailure + { + // Advance frontier to the next available address. + if let Some(next_sc) = self.advance_frontier(false) { + let next_sc = next_sc.clone(); + self.trigger_subchannel_connection(next_sc, channel_controller); + } else { + // Pass exhausted: enter policy-level TRANSIENT_FAILURE and switch to steady state. + let error = state + .last_connection_error + .as_ref() + .map(|e| e.to_string()) + .unwrap_or_else(|| "all addresses failed".to_string()); + + self.last_connection_error = Some(error.clone()); + _ = self.set_transient_failure(channel_controller, error); + + self.steady_state = Some(SteadyState::new(self.subchannels.len())); + } + } + } + } + /// Advances the frontier to the next non-TransientFailure subchannel and returns it. /// If `reset` is true, starts the scan from index 0. // The frontier is the latest index in which connectivity has been attempted. @@ -224,6 +294,13 @@ impl PickFirstPolicy { None } + /// Returns true if the subchannel's address is present in the most recently received address list. + fn subchannel_is_current(&self, subchannel: &Arc) -> bool { + self.subchannels + .iter() + .any(|sc| sc.address() == subchannel.address()) + } + /// Triggers a connection on the subchannel, and starts the 250ms timer. /// If no connection succeeds before the timer expires, the frontier will advance to /// the next subchannel. @@ -403,89 +480,35 @@ impl LbPolicy for PickFirstPolicy { self.subchannel_states .insert(subchannel.address(), state.clone()); - // If we have a selected subchannel, only care about updates from it. - if let Some(ref selected) = self.selected { - if selected.address() == subchannel.address() { - if state.connectivity_state != ConnectivityState::Ready { - // Lost connection: Go IDLE. - self.selected = None; - self.connectivity_state = ConnectivityState::Idle; - channel_controller.update_picker(LbState { - connectivity_state: ConnectivityState::Idle, - picker: Arc::new(IdlePicker { - work_scheduler: self.work_scheduler.clone(), - }), - }); - } - return; + // If the subchannel being updated is the selected one, it affects handling. + let is_selected = self + .selected + .as_ref() + .map_or(false, |s| s.address() == subchannel.address()); + + match ( + is_selected, // Does the load balancer have an active subchannel already? + state.connectivity_state, // What is the updating subchannel's state? + ) { + (true, ConnectivityState::Ready) => { + // The selected subchannel is still ready; do nothing with this update. } - } - - // Steady State Retries: Automatically connect when a subchannel goes IDLE. - if self.steady_state.is_some() && state.connectivity_state == ConnectivityState::Idle { - subchannel.connect(); - } - - // Steady State Failures: Count failures across all subchannels. - if let Some(ref mut ss) = self.steady_state { - if state.connectivity_state == ConnectivityState::TransientFailure { - if ss.record_failure(self.subchannels.len()) { - channel_controller.request_resolution(); - } + (true, _) => { + // The selected subchannel has failed (is no longer READY); drop the connection. + self.subchannel_drop(channel_controller); } - } - - // If any subchannel in the current pass reports READY, select it. - if state.connectivity_state == ConnectivityState::Ready { - // Check if this subchannel is part of the current pass (index <= frontier_index) - let is_in_pass = self - .subchannels - .iter() - .take(self.frontier_index + 1) - .any(|sc| sc.address() == subchannel.address()); - - if is_in_pass { - self.selected = Some(subchannel.clone()); - self.connectivity_state = ConnectivityState::Ready; - - // Keep only the successful subchannel in the list - self.subchannels = vec![subchannel.clone()]; - - // Cancel timer - if let Some(handle) = self.timer_handle.take() { - handle.abort(); + (false, ConnectivityState::Ready) => { + // The updating subchannel is READY. If it is still current, activate it. + if self.subchannel_is_current(&subchannel) { + self.subchannel_activate(subchannel, channel_controller); } - - channel_controller.update_picker(LbState { - connectivity_state: ConnectivityState::Ready, - picker: Arc::new(OneSubchannelPicker { sc: subchannel }), - }); - return; } - } - - // Failover on Failure: Only if the subchannel at the exact frontier_index fails. - if let Some(attempting) = self.subchannels.get(self.frontier_index) { - if attempting.address() == subchannel.address() { - if state.connectivity_state == ConnectivityState::TransientFailure { - // Move to next address via advance_frontier - if let Some(next_sc) = self.advance_frontier(false) { - let next_sc = next_sc.clone(); // Clone to avoid borrow issues - self.trigger_subchannel_connection(next_sc, channel_controller); - } else { - // Exhausted: TRANSIENT_FAILURE and request re-resolution. - let error = state - .last_connection_error - .as_ref() - .map(|e| e.to_string()) - .unwrap_or_else(|| "all addresses failed".to_string()); - - self.last_connection_error = Some(error.clone()); - _ = self.set_transient_failure(channel_controller, error); - - // Transition to steady state mode - self.steady_state = Some(SteadyState::new()); - } + (false, _) => { + // The updating subchannel won't be selected, so track progress based on whether we are in steady state or a first pass. + if let Some(steady) = self.steady_state.as_mut() { + steady.subchannel_nonready(channel_controller, subchannel, state); + } else { + self.update_first_pass(subchannel, state, channel_controller); } } } @@ -547,25 +570,47 @@ fn build_shuffler() -> Arc { }) } +/// Tracks a the 'steady state' pass of subchannels when looking for a ready connection. +/// If the number of reported subchannel failures reaches the failure threshold, this will ask the Name Resolver to re-resolve. #[derive(Debug)] struct SteadyState { + /// The number of failures before triggering a re-resolution of addresses. + /// This is a rough heuristic to approximate if all subchannels have failed since we entered steady state, and can be tuned as needed. + failure_threshold: usize, /// The number of failures connecting, used to roughly approximate if a re-resolution needs to happen. failure_count: usize, } impl SteadyState { - fn new() -> Self { - Self { failure_count: 0 } + fn new(threshold: usize) -> Self { + Self { + failure_threshold: threshold, + failure_count: 0, + } } - /// Increments the failure count and returns true if re-resolution is required. - fn record_failure(&mut self, subchannels_len: usize) -> bool { - self.failure_count += 1; - if self.failure_count >= subchannels_len { - self.failure_count = 0; - return true; + /// Handles non-ready subchannel updates when the LB is in 'steady state' connection mode. + fn subchannel_nonready( + &mut self, + channel_controller: &mut dyn ChannelController, + subchannel: Arc, + state: &SubchannelState, + ) { + match state.connectivity_state { + ConnectivityState::Idle => { + // Subchannel backoff expired: trigger reconnection attempt. + subchannel.connect(); + } + ConnectivityState::TransientFailure => { + // Track failures. If all known subchannels have failed, request new addresses. + self.failure_count += 1; + if self.failure_count >= self.failure_threshold { + self.failure_count = 0; + channel_controller.request_resolution(); + } + } + _ => {} } - false } } @@ -862,10 +907,12 @@ mod test { addresses: vec![ Address { address: crate::byte_str::ByteStr::from("::1".to_string()), + network_type: crate::client::name_resolution::TCP_IP_NETWORK_TYPE, ..Default::default() }, Address { address: crate::byte_str::ByteStr::from("127.0.0.1".to_string()), + network_type: crate::client::name_resolution::TCP_IP_NETWORK_TYPE, ..Default::default() }, ], @@ -874,6 +921,7 @@ mod test { Endpoint { addresses: vec![Address { address: crate::byte_str::ByteStr::from("::2".to_string()), + network_type: crate::client::name_resolution::TCP_IP_NETWORK_TYPE, ..Default::default() }], ..Default::default() @@ -881,12 +929,14 @@ mod test { Endpoint { addresses: vec![Address { address: crate::byte_str::ByteStr::from("127.0.0.2".to_string()), + network_type: crate::client::name_resolution::TCP_IP_NETWORK_TYPE, ..Default::default() }], ..Default::default() }, ]; + policy .resolver_update( ResolverUpdate { From 168e7a8e57e45383ffbcb3699a4a6e07aac6fc92 Mon Sep 17 00:00:00 2001 From: Nathaniel Ford Date: Wed, 6 May 2026 21:10:04 +0000 Subject: [PATCH 14/32] lb/pick_first: early return on untracked subchannel updates --- grpc/src/client/load_balancing/pick_first.rs | 28 +++++++++++++++----- 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/grpc/src/client/load_balancing/pick_first.rs b/grpc/src/client/load_balancing/pick_first.rs index e6989435b..dc7a1422e 100644 --- a/grpc/src/client/load_balancing/pick_first.rs +++ b/grpc/src/client/load_balancing/pick_first.rs @@ -31,7 +31,6 @@ use rand::seq::SliceRandom; use std::sync::atomic::{AtomicBool, Ordering}; use tonic::metadata::MetadataMap; - use crate::client::ConnectivityState; use crate::client::load_balancing::ChannelController; use crate::client::load_balancing::FailingPicker; @@ -470,12 +469,32 @@ impl LbPolicy for PickFirstPolicy { Ok(()) } + /// Invoked asynchronously by the outer channel infrastructure whenever any subchannel + /// managed by this policy experiences a connectivity state transition. + /// + /// # Parameters + /// * `subchannel`: The specific backend connection instance (`Arc`) that triggered the event. + /// It identifies *which* transport lane is reporting telemetry. + /// * `state`: The new connectivity status snapshot (`SubchannelState`) being reported. + /// It details *what* happened (e.g., transitioned to `READY`, `IDLE`, or encountered a `TransientFailure`). + /// * `channel_controller`: The internal control plane interface used to update the channel's RPC picker + /// or signal the Name Resolver to fetch new addresses. + /// + /// # Behavioral Flow + /// This function drives the core load-balancing state machine. It caches the new state and executes a + /// routing matrix to determine whether to drop a failing active connection, finalize a successful + /// backend selection, pace connection attempts (First Pass), or monitor background retry health (Steady State). fn subchannel_update( &mut self, subchannel: Arc, state: &SubchannelState, channel_controller: &mut dyn ChannelController, ) { + if !self.subchannel_is_current(&subchannel) { + // This update is from an outdated subchannel that is no longer in the address list. Ignore it. + return; + } + // Update the cache for all updates. self.subchannel_states .insert(subchannel.address(), state.clone()); @@ -498,10 +517,8 @@ impl LbPolicy for PickFirstPolicy { self.subchannel_drop(channel_controller); } (false, ConnectivityState::Ready) => { - // The updating subchannel is READY. If it is still current, activate it. - if self.subchannel_is_current(&subchannel) { - self.subchannel_activate(subchannel, channel_controller); - } + // The updating subchannel is READY; activate it. + self.subchannel_activate(subchannel, channel_controller); } (false, _) => { // The updating subchannel won't be selected, so track progress based on whether we are in steady state or a first pass. @@ -936,7 +953,6 @@ mod test { }, ]; - policy .resolver_update( ResolverUpdate { From 5467799ba7699cf64332cfc1888111102f7eb77d Mon Sep 17 00:00:00 2001 From: Nathaniel Ford Date: Wed, 6 May 2026 21:21:27 +0000 Subject: [PATCH 15/32] lb/pick_first: encapsulate timer cancellation into a helper function --- grpc/src/client/load_balancing/pick_first.rs | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/grpc/src/client/load_balancing/pick_first.rs b/grpc/src/client/load_balancing/pick_first.rs index dc7a1422e..650ced371 100644 --- a/grpc/src/client/load_balancing/pick_first.rs +++ b/grpc/src/client/load_balancing/pick_first.rs @@ -256,6 +256,9 @@ impl PickFirstPolicy { .map(|e| e.to_string()) .unwrap_or_else(|| "all addresses failed".to_string()); + // Cancel the pacing timer since this connection pass is over. + self.abort_pacing_timer(); + self.last_connection_error = Some(error.clone()); _ = self.set_transient_failure(channel_controller, error); @@ -432,6 +435,13 @@ impl PickFirstPolicy { .map(|sc| new_addresses.contains(&sc.address())) .unwrap_or(false) } + + // Cancels the connection pacing timer if it is active. + fn abort_pacing_timer(&mut self) { + if let Some(handle) = self.timer_handle.take() { + handle.abort(); + } + } } impl LbPolicy for PickFirstPolicy { @@ -443,6 +453,8 @@ impl LbPolicy for PickFirstPolicy { config: Option<&Self::LbConfig>, channel_controller: &mut dyn ChannelController, ) -> Result<(), String> { + self.abort_pacing_timer(); + // Reset steady state on new update self.steady_state = None; @@ -553,6 +565,12 @@ impl LbPolicy for PickFirstPolicy { } } +impl Drop for PickFirstPolicy { + fn drop(&mut self) { + self.abort_pacing_timer(); + } +} + #[derive(Debug)] struct OneSubchannelPicker { sc: Arc, From 0a77a0f26a9f2566d538c2375897f522adc93696 Mon Sep 17 00:00:00 2001 From: Nathaniel Ford Date: Wed, 6 May 2026 22:09:21 +0000 Subject: [PATCH 16/32] lb/pick_first: scan for READY subchannels during resolver updates to preserve stickiness --- grpc/src/client/load_balancing/pick_first.rs | 65 +++++++++++++++----- 1 file changed, 49 insertions(+), 16 deletions(-) diff --git a/grpc/src/client/load_balancing/pick_first.rs b/grpc/src/client/load_balancing/pick_first.rs index 650ced371..972773eaf 100644 --- a/grpc/src/client/load_balancing/pick_first.rs +++ b/grpc/src/client/load_balancing/pick_first.rs @@ -146,7 +146,7 @@ impl PickFirstPolicy { &mut self, new_addresses: Vec
, channel_controller: &mut dyn ChannelController, - ) { + ) -> Option> { // Map existing subchannels by address. let mut existing_subchannels: HashMap> = self .subchannels @@ -154,21 +154,39 @@ impl PickFirstPolicy { .map(|sc| (sc.address(), sc)) .collect(); - let (new_subchannels, new_states): (Vec<_>, HashMap<_, _>) = new_addresses - .into_iter() - .map(|addr| { - if let Some(sc) = existing_subchannels.remove(&addr) { - let state = self.subchannel_states.get(&addr).unwrap().clone(); - (sc, (addr, state)) - } else { - let (sc, state) = channel_controller.new_subchannel(&addr); - (sc, (addr, state)) + let mut new_subchannels = Vec::with_capacity(new_addresses.len()); + let mut new_states = HashMap::with_capacity(new_addresses.len()); + let mut ready_subchannel = None; + + for addr in new_addresses { + let (sc, state) = if let Some(sc) = existing_subchannels.remove(&addr) { + let state = self.subchannel_states.get(&addr).unwrap().clone(); + (sc, state) + } else { + // Get a new subchannel handle from the controller if we don't have an existing one. + channel_controller.new_subchannel(&addr) + }; + + // Track the best candidate for immediate activation: + // 1. Absolute Priority: The currently selected subchannel if it is still READY. + // 2. Fallback: The first generic READY subchannel encountered. + if state.connectivity_state == ConnectivityState::Ready { + if self.subchannel_is_selected(&sc) { + // Sticky channel wins immediately and overrides any fallback candidates. + ready_subchannel = Some(sc.clone()); + } else if ready_subchannel.is_none() { + // Capture fallback candidate, but does not overwrite if a sticky channel was already found. + ready_subchannel = Some(sc.clone()); } - }) - .unzip(); + } + + new_subchannels.push(sc); + new_states.insert(addr, state); + } - self.subchannels = new_subchannels; // Prune old addresses. + self.subchannels = new_subchannels; // Prunes old addresses, adds new ones. self.subchannel_states = new_states; // Update subchannel states cache. + ready_subchannel } /// Call when the selected subchannel is dropped or loses connection. @@ -189,6 +207,10 @@ impl PickFirstPolicy { subchannel: Arc, channel_controller: &mut dyn ChannelController, ) { + if self.subchannel_is_selected(&subchannel) { + // Already selected; skip activation. + return; + } self.selected = Some(subchannel.clone()); self.connectivity_state = ConnectivityState::Ready; self.subchannels = vec![subchannel.clone()]; // Keep only the winner. @@ -296,7 +318,16 @@ impl PickFirstPolicy { None } + /// Returns true if the given subchannel matches the currently selected active subchannel. + fn subchannel_is_selected(&self, subchannel: &Arc) -> bool { + self.selected + .as_ref() + .map_or(false, |sel| sel.address() == subchannel.address()) + } + /// Returns true if the subchannel's address is present in the most recently received address list. + // This compares against the current list of subchannels the LB is attempting to connect to. To + // see if the LB already connected to the channel, use 'subchannel_is_selected'. fn subchannel_is_current(&self, subchannel: &Arc) -> bool { self.subchannels .iter() @@ -461,9 +492,11 @@ impl LbPolicy for PickFirstPolicy { match update.endpoints { Ok(endpoints) => { let new_addresses = self.compile_address(endpoints, config, channel_controller)?; - let sticky = self.sticky(&new_addresses); - self.rebuild_subchannels(new_addresses, channel_controller); - if !sticky { + if let Some(ready_subchannel) = + self.rebuild_subchannels(new_addresses, channel_controller) + { + self.subchannel_activate(ready_subchannel, channel_controller); + } else { self.start_connection_pass(channel_controller); } } From c17dbaf1a452c6bca25b6a3b01b2c8e9137d45f2 Mon Sep 17 00:00:00 2001 From: Nathaniel Ford Date: Wed, 6 May 2026 22:29:36 +0000 Subject: [PATCH 17/32] lb/pick_first: reconnect to IDLE subchannels upon connection pass exhaustion --- grpc/src/client/load_balancing/pick_first.rs | 87 ++++++++++++++++++++ 1 file changed, 87 insertions(+) diff --git a/grpc/src/client/load_balancing/pick_first.rs b/grpc/src/client/load_balancing/pick_first.rs index 972773eaf..3ab79b2df 100644 --- a/grpc/src/client/load_balancing/pick_first.rs +++ b/grpc/src/client/load_balancing/pick_first.rs @@ -285,7 +285,18 @@ impl PickFirstPolicy { _ = self.set_transient_failure(channel_controller, error); self.steady_state = Some(SteadyState::new(self.subchannels.len())); + + // Trigger connection attempts on any subchannels that transitioned to IDLE + // during the first pass, ensuring they don't get stuck. + for sc in &self.subchannels { + let is_idle = self.subchannel_states.get(&sc.address()) + .map_or(false, |s| s.connectivity_state == ConnectivityState::Idle); + if is_idle { + sc.connect(); + } + } } + } } } @@ -1350,4 +1361,80 @@ mod test { other => panic!("unexpected event {:?}", other), } } + + #[tokio::test] + async fn test_pick_first_steady_state_stuck_idle_prevention() { + let (rx, mut policy, mut controller) = setup(); + let endpoints = create_endpoints(vec!["addr1", "addr2"]); + policy + .resolver_update( + ResolverUpdate { + endpoints: Ok(endpoints), + ..Default::default() + }, + None, + controller.as_mut(), + ) + .unwrap(); + + // Expect NewSubchannel x2, Connect(addr1), UpdatePicker(Connecting) + rx.recv().unwrap(); // addr1 + rx.recv().unwrap(); // addr2 + rx.recv().unwrap(); // Connect(addr1) + rx.recv().unwrap(); // UpdatePicker(Connecting) + + // 1. Fail addr1 to advance frontier to addr2 + let sc1 = policy.subchannels[0].clone(); + policy.subchannel_update( + sc1.clone(), + &SubchannelState { + connectivity_state: ConnectivityState::TransientFailure, + last_connection_error: None, + }, + controller.as_mut(), + ); + + // Expect Connect(addr2) + match rx.recv().unwrap() { + TestEvent::Connect(addr) => assert_eq!(addr.address.to_string(), "addr2"), + other => panic!("unexpected event {:?}", other), + } + + // 2. Simulate addr1 backing off and transitioning to IDLE early (while addr2 is still connecting) + policy.subchannel_update( + sc1.clone(), + &SubchannelState { + connectivity_state: ConnectivityState::Idle, + last_connection_error: None, + }, + controller.as_mut(), + ); + + // Expect NO events yet because first pass is still active and ignoring IDLE + std::thread::sleep(Duration::from_millis(50)); + assert!(rx.try_recv().is_err(), "unexpected event during first pass"); + + // 3. Fail addr2 to exhaust the first pass + let sc2 = policy.subchannels[1].clone(); + policy.subchannel_update( + sc2, + &SubchannelState { + connectivity_state: ConnectivityState::TransientFailure, + last_connection_error: None, + }, + controller.as_mut(), + ); + + // Expect UpdatePicker(TransientFailure) and RequestResolution from exhaustion + rx.recv().unwrap(); // UpdatePicker + rx.recv().unwrap(); // RequestResolution + + // CRUCIAL VERIFICATION: Expect an IMMEDIATE Connect(addr1) event triggered + // by the exhaustion loop sweeping up the early IDLE subchannel! + match rx.recv().unwrap() { + TestEvent::Connect(addr) => assert_eq!(addr.address.to_string(), "addr1"), + other => panic!("unexpected event post-exhaustion {:?}", other), + } + } } + From 9449f226939193902603bc1d6cb8c2531c567e30 Mon Sep 17 00:00:00 2001 From: Nathaniel Ford Date: Wed, 6 May 2026 22:36:08 +0000 Subject: [PATCH 18/32] lb/pick_first: harden pass exhaustion error tracking with strict expectations --- grpc/src/client/load_balancing/pick_first.rs | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/grpc/src/client/load_balancing/pick_first.rs b/grpc/src/client/load_balancing/pick_first.rs index 3ab79b2df..0f54aab21 100644 --- a/grpc/src/client/load_balancing/pick_first.rs +++ b/grpc/src/client/load_balancing/pick_first.rs @@ -274,9 +274,8 @@ impl PickFirstPolicy { // Pass exhausted: enter policy-level TRANSIENT_FAILURE and switch to steady state. let error = state .last_connection_error - .as_ref() - .map(|e| e.to_string()) - .unwrap_or_else(|| "all addresses failed".to_string()); + .clone() + .expect("gRPC Contract Violation: last_connection_error must be populated when in TransientFailure"); // Cancel the pacing timer since this connection pass is over. self.abort_pacing_timer(); @@ -812,7 +811,7 @@ mod test { sc1, &SubchannelState { connectivity_state: ConnectivityState::TransientFailure, - last_connection_error: None, + last_connection_error: Some("connection refused".to_string()), }, controller.as_mut(), ); @@ -947,7 +946,7 @@ mod test { sc1, &SubchannelState { connectivity_state: ConnectivityState::TransientFailure, - last_connection_error: None, + last_connection_error: Some("connection refused".to_string()), }, controller.as_mut(), ); @@ -1247,7 +1246,7 @@ mod test { sc1.clone(), &SubchannelState { connectivity_state: ConnectivityState::TransientFailure, - last_connection_error: None, + last_connection_error: Some("connection refused".to_string()), }, controller.as_mut(), ); @@ -1301,7 +1300,7 @@ mod test { sc1.clone(), &SubchannelState { connectivity_state: ConnectivityState::TransientFailure, - last_connection_error: None, + last_connection_error: Some("connection refused".to_string()), }, controller.as_mut(), ); @@ -1333,7 +1332,7 @@ mod test { sc2.clone(), &SubchannelState { connectivity_state: ConnectivityState::TransientFailure, - last_connection_error: None, + last_connection_error: Some("connection refused".to_string()), }, controller.as_mut(), ); @@ -1389,7 +1388,7 @@ mod test { sc1.clone(), &SubchannelState { connectivity_state: ConnectivityState::TransientFailure, - last_connection_error: None, + last_connection_error: Some("connection refused".to_string()), }, controller.as_mut(), ); @@ -1420,7 +1419,7 @@ mod test { sc2, &SubchannelState { connectivity_state: ConnectivityState::TransientFailure, - last_connection_error: None, + last_connection_error: Some("connection refused".to_string()), }, controller.as_mut(), ); From 12ab35304a683d72fc34a1f81e8d00a84691a745 Mon Sep 17 00:00:00 2001 From: Nathaniel Ford Date: Wed, 6 May 2026 23:17:37 +0000 Subject: [PATCH 19/32] name_resolution: include structural attributes in Address PartialEq --- grpc/src/client/name_resolution/mod.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/grpc/src/client/name_resolution/mod.rs b/grpc/src/client/name_resolution/mod.rs index 9354d0514..61ebcedee 100644 --- a/grpc/src/client/name_resolution/mod.rs +++ b/grpc/src/client/name_resolution/mod.rs @@ -317,10 +317,13 @@ impl Eq for Address {} impl PartialEq for Address { fn eq(&self, other: &Self) -> bool { - self.network_type == other.network_type && self.address == other.address + self.network_type == other.network_type + && self.address == other.address + && self.attributes == other.attributes } } + impl Hash for Address { fn hash(&self, state: &mut H) { self.network_type.hash(state); From 1018fdaf3086be79553a6f2502fdd67348a16590 Mon Sep 17 00:00:00 2001 From: Nathaniel Ford Date: Wed, 6 May 2026 23:30:36 +0000 Subject: [PATCH 20/32] name_resolution: include structural attributes in Address PartialEq --- grpc/src/client/name_resolution/mod.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/grpc/src/client/name_resolution/mod.rs b/grpc/src/client/name_resolution/mod.rs index 61ebcedee..99d9dae44 100644 --- a/grpc/src/client/name_resolution/mod.rs +++ b/grpc/src/client/name_resolution/mod.rs @@ -323,7 +323,6 @@ impl PartialEq for Address { } } - impl Hash for Address { fn hash(&self, state: &mut H) { self.network_type.hash(state); From acd6b2e29eb1a647fb00b3d974be1bb22f168f0f Mon Sep 17 00:00:00 2001 From: Nathaniel Ford Date: Wed, 6 May 2026 23:55:59 +0000 Subject: [PATCH 21/32] name_resolution: derive structural equality for Address and add map collision unit test --- grpc/src/client/name_resolution/mod.rs | 64 ++++++++++++++++++++++---- 1 file changed, 54 insertions(+), 10 deletions(-) diff --git a/grpc/src/client/name_resolution/mod.rs b/grpc/src/client/name_resolution/mod.rs index 99d9dae44..3f56cbca8 100644 --- a/grpc/src/client/name_resolution/mod.rs +++ b/grpc/src/client/name_resolution/mod.rs @@ -298,7 +298,7 @@ impl Hash for Endpoint { /// An Address is an identifier that indicates how to connect to a server. #[non_exhaustive] -#[derive(Debug, Clone, Default, Ord, PartialOrd)] +#[derive(Debug, Clone, Default, PartialEq, Eq, Ord, PartialOrd)] pub(crate) struct Address { /// The network type is used to identify what kind of transport to create /// when connecting to this address. Typically TCP_IP_ADDRESS_TYPE. @@ -313,15 +313,6 @@ pub(crate) struct Address { pub attributes: Attributes, } -impl Eq for Address {} - -impl PartialEq for Address { - fn eq(&self, other: &Self) -> bool { - self.network_type == other.network_type - && self.address == other.address - && self.attributes == other.attributes - } -} impl Hash for Address { fn hash(&self, state: &mut H) { @@ -452,4 +443,57 @@ mod test { assert_eq!(&target.to_string(), tc.want_str); } } + + // This test ensures that the Address struct correctly maintains its asymmetric PartialEq and Hash contracts. + // Specifically, two addresses with the same physical coordinates but different metadata attributes must + // hash to the same HashMap bucket (intentional collision) but fail strict equality, forcing collection layers + // (like load balancers and connection pools) to safely treat them as distinct endpoints without corrupting the map. + #[test] + fn test_address_hashmap_asymmetric_collision() { + use std::collections::HashMap; + use std::hash::{Hash, Hasher}; + use std::collections::hash_map::DefaultHasher; + use crate::client::name_resolution::Address; + use crate::attributes::Attributes; + use crate::byte_str::ByteStr; + + let addr_base = "127.0.0.1:8080"; + + // Address A: No metadata attributes + let addr_a = Address { + network_type: "tcp", + address: ByteStr::from(addr_base.to_string()), + attributes: Attributes::new(), + }; + + // Address B: Identical text/type coordinates, but WITH metadata attributes + let attrs = Attributes::new().add("metadata_payload".to_string()); + let addr_b = Address { + network_type: "tcp", + address: ByteStr::from(addr_base.to_string()), + attributes: attrs, + }; + + // Invariant Verification: Stricter equality must say they differ + assert_ne!(addr_a, addr_b, "Address equality must be distinct when attributes mutate!"); + + // Invariant Verification: Hashing must ignore attributes (intentional collision) + let mut hasher_a = DefaultHasher::new(); + let mut hasher_b = DefaultHasher::new(); + addr_a.hash(&mut hasher_a); + addr_b.hash(&mut hasher_b); + assert_eq!(hasher_a.finish(), hasher_b.finish(), "Address hashes MUST remain identical to ensure same HashMap memory bucket routing!"); + + // Map Functional Verification + let mut map = HashMap::new(); + map.insert(addr_a.clone(), "subchannel_a"); + + // Querying or removing using B (mutated attributes) must FAIL due to asymmetric == check + assert!(map.remove(&addr_b).is_none(), "Flaw: HashMap incorrectly matched key despite mismatched attributes!"); + + // Querying or removing using A (exact match) must SUCCEED + assert_eq!(map.remove(&addr_a), Some("subchannel_a")); + assert!(map.is_empty()); + } } + From cc467f555bf8f1f0ded42f6274b0d3cfbd9c5810 Mon Sep 17 00:00:00 2001 From: Nathaniel Ford Date: Thu, 7 May 2026 16:27:18 +0000 Subject: [PATCH 22/32] Update test to remove coverage of derived traits, add coverage for manually written hash impl --- grpc/src/client/name_resolution/mod.rs | 82 ++++++++++++++++++-------- 1 file changed, 58 insertions(+), 24 deletions(-) diff --git a/grpc/src/client/name_resolution/mod.rs b/grpc/src/client/name_resolution/mod.rs index 3f56cbca8..9cecab56c 100644 --- a/grpc/src/client/name_resolution/mod.rs +++ b/grpc/src/client/name_resolution/mod.rs @@ -313,7 +313,6 @@ pub(crate) struct Address { pub attributes: Attributes, } - impl Hash for Address { fn hash(&self, state: &mut H) { self.network_type.hash(state); @@ -381,6 +380,12 @@ impl NopResolver { #[cfg(test)] mod test { use super::Target; + use crate::attributes::Attributes; + use crate::byte_str::ByteStr; + use crate::client::name_resolution::Address; + use std::collections::HashMap; + use std::collections::hash_map::DefaultHasher; + use std::hash::{Hash, Hasher}; #[test] pub fn parse_target() { @@ -444,29 +449,25 @@ mod test { } } - // This test ensures that the Address struct correctly maintains its asymmetric PartialEq and Hash contracts. - // Specifically, two addresses with the same physical coordinates but different metadata attributes must - // hash to the same HashMap bucket (intentional collision) but fail strict equality, forcing collection layers - // (like load balancers and connection pools) to safely treat them as distinct endpoints without corrupting the map. + // This test ensures that the Address struct correctly maintains its + // asymmetric PartialEq and Hash contracts. + // Specifically, two addresses with the same physical coordinates but + // different metadata attributes must hash to the same HashMap bucket + // (intentional collision) but fail strict equality, forcing collection + // layers (e.g., load balancers and connection pools) to safely treat them + // as distinct endpoints without corrupting the map. #[test] fn test_address_hashmap_asymmetric_collision() { - use std::collections::HashMap; - use std::hash::{Hash, Hasher}; - use std::collections::hash_map::DefaultHasher; - use crate::client::name_resolution::Address; - use crate::attributes::Attributes; - use crate::byte_str::ByteStr; - let addr_base = "127.0.0.1:8080"; - // Address A: No metadata attributes + // Address A: without metadata attributes let addr_a = Address { network_type: "tcp", address: ByteStr::from(addr_base.to_string()), attributes: Attributes::new(), }; - // Address B: Identical text/type coordinates, but WITH metadata attributes + // Address B: with metadata attributes let attrs = Attributes::new().add("metadata_payload".to_string()); let addr_b = Address { network_type: "tcp", @@ -474,26 +475,59 @@ mod test { attributes: attrs, }; - // Invariant Verification: Stricter equality must say they differ - assert_ne!(addr_a, addr_b, "Address equality must be distinct when attributes mutate!"); - - // Invariant Verification: Hashing must ignore attributes (intentional collision) + // Hashing must ignore attributes (intentional collision) let mut hasher_a = DefaultHasher::new(); let mut hasher_b = DefaultHasher::new(); addr_a.hash(&mut hasher_a); addr_b.hash(&mut hasher_b); - assert_eq!(hasher_a.finish(), hasher_b.finish(), "Address hashes MUST remain identical to ensure same HashMap memory bucket routing!"); + assert_eq!( + hasher_a.finish(), + hasher_b.finish(), + "Identical Address hashes must route to the same HashMap memory bucket!" + ); + + let hash_a = hasher_a.finish(); + + // Verify that changing network_type changes the hash + let addr_diff_net = Address { + network_type: "uds", + address: ByteStr::from(addr_base.to_string()), + attributes: Attributes::new(), + }; + let mut hasher_diff_net = DefaultHasher::new(); + addr_diff_net.hash(&mut hasher_diff_net); + assert_ne!( + hash_a, + hasher_diff_net.finish(), + "Changing network_type must change the hash!" + ); + + // Verify that changing address changes the hash + let addr_diff_addr = Address { + network_type: "tcp", + address: ByteStr::from("127.0.0.1:8081".to_string()), + attributes: Attributes::new(), + }; + let mut hasher_diff_addr = DefaultHasher::new(); + addr_diff_addr.hash(&mut hasher_diff_addr); + assert_ne!( + hash_a, + hasher_diff_addr.finish(), + "Changing address must change the hash!" + ); // Map Functional Verification let mut map = HashMap::new(); map.insert(addr_a.clone(), "subchannel_a"); - // Querying or removing using B (mutated attributes) must FAIL due to asymmetric == check - assert!(map.remove(&addr_b).is_none(), "Flaw: HashMap incorrectly matched key despite mismatched attributes!"); - - // Querying or removing using A (exact match) must SUCCEED + // Removing using B (different attributes) should fail. + assert!( + map.remove(&addr_b).is_none(), + "HashMap incorrectly matched key despite mismatched attributes!" + ); + + // Removing using A (same attributes) should succeed. assert_eq!(map.remove(&addr_a), Some("subchannel_a")); assert!(map.is_empty()); } } - From c869cb667a9345de6c169b4cd1867155e97640bd Mon Sep 17 00:00:00 2001 From: Nathaniel Ford Date: Thu, 7 May 2026 19:42:08 +0000 Subject: [PATCH 23/32] lb/pick_first: refactor test boilerplate, harden comments, and add conformance tests --- grpc/src/client/load_balancing/pick_first.rs | 547 +++++++++---------- 1 file changed, 263 insertions(+), 284 deletions(-) diff --git a/grpc/src/client/load_balancing/pick_first.rs b/grpc/src/client/load_balancing/pick_first.rs index 0f54aab21..304934b8d 100644 --- a/grpc/src/client/load_balancing/pick_first.rs +++ b/grpc/src/client/load_balancing/pick_first.rs @@ -27,9 +27,9 @@ use std::collections::HashSet; use std::fmt::Debug; use std::sync::Arc; +use crate::metadata::MetadataMap; use rand::seq::SliceRandom; use std::sync::atomic::{AtomicBool, Ordering}; -use tonic::metadata::MetadataMap; use crate::client::ConnectivityState; use crate::client::load_balancing::ChannelController; @@ -44,7 +44,6 @@ use crate::client::load_balancing::PickResult; use crate::client::load_balancing::Picker; use crate::client::load_balancing::QueuingPicker; use crate::client::load_balancing::WorkScheduler; -use crate::client::load_balancing::subchannel; use crate::client::load_balancing::subchannel::Subchannel; use crate::client::load_balancing::subchannel::SubchannelState; use crate::client::name_resolution::Address; @@ -288,14 +287,15 @@ impl PickFirstPolicy { // Trigger connection attempts on any subchannels that transitioned to IDLE // during the first pass, ensuring they don't get stuck. for sc in &self.subchannels { - let is_idle = self.subchannel_states.get(&sc.address()) + let is_idle = self + .subchannel_states + .get(&sc.address()) .map_or(false, |s| s.connectivity_state == ConnectivityState::Idle); if is_idle { sc.connect(); } } } - } } } @@ -701,6 +701,30 @@ mod test { use std::sync::mpsc; use std::time::Duration; + // Helper to create endpoints from a list of address strings. + // If attrs are provided, they will be added to each endpoint; otherwise, + // default attributes will be used. + fn create_endpoints( + addrs: Vec<&str>, + attrs: Option, + ) -> Vec { + addrs + .into_iter() + .map(|a| Endpoint { + addresses: vec![Address { + address: crate::byte_str::ByteStr::from(a.to_string()), + network_type: crate::client::name_resolution::TCP_IP_NETWORK_TYPE, + attributes: attrs.clone().unwrap_or_default(), + ..Default::default() + }], + ..Default::default() + }) + .collect() + } + + // Sets up a PickFirstPolicy with a TestWorkScheduler and + // TestChannelController. Returns the event receiver, policy, and + // controller, which can be used for testing. fn setup() -> ( mpsc::Receiver, PickFirstPolicy, @@ -725,39 +749,50 @@ mod test { (rx, policy, controller) } - fn create_endpoints(addrs: Vec<&str>) -> Vec { - addrs - .into_iter() - .map(|a| Endpoint { - addresses: vec![Address { - address: crate::byte_str::ByteStr::from(a.to_string()), - ..Default::default() - }], - ..Default::default() - }) - .collect() - } - - #[tokio::test] - async fn test_pick_first_basic_connection() { + // Helper to simulate a basic connection against a list of + // addresses. Returns the event receiver for inspection. Does not imply + // that the connection succeeded or failed. + fn simulate_connection( + addrs: Vec<&str>, + attrs: Option, + ) -> ( + mpsc::Receiver, + PickFirstPolicy, + Box, + ) { let (rx, mut policy, mut controller) = setup(); - let endpoints = create_endpoints(vec!["addr1", "addr2"]); - let update = ResolverUpdate { - endpoints: Ok(endpoints), - ..Default::default() - }; - + let addrs_len = addrs.len(); + let endpoints = create_endpoints(addrs, attrs); policy - .resolver_update(update, None, controller.as_mut()) + .resolver_update( + ResolverUpdate { + endpoints: Ok(endpoints), + ..Default::default() + }, + None, + controller.as_mut(), + ) .unwrap(); - // Expect NewSubchannel x2, Connect, UpdatePicker(Connecting) - rx.recv().unwrap(); - rx.recv().unwrap(); - rx.recv().unwrap(); - rx.recv().unwrap(); + // Expect NewSubchannel/addr, Connect, UpdatePicker(Connecting). + for _ in 0..(addrs_len + 2) { + rx.recv().unwrap(); + } + + (rx, policy, controller) + } - // Simulating READY for addr1 + fn simulate_successful_connection( + addrs: Vec<&str>, + attrs: Option, + ) -> ( + mpsc::Receiver, + PickFirstPolicy, + Box, + ) { + let (rx, mut policy, mut controller) = simulate_connection(addrs, attrs); + + // Simulating READY for addr1. let sc1 = policy.subchannels[0].clone(); policy.subchannel_update( sc1.clone(), @@ -767,8 +802,40 @@ mod test { }, controller.as_mut(), ); + (rx, policy, controller) + } + + fn simulate_failed_connection( + addrs: Vec<&str>, + attrs: Option, + ) -> ( + mpsc::Receiver, + PickFirstPolicy, + Box, + ) { + let (rx, mut policy, mut controller) = simulate_connection(addrs, attrs); + + // Simulating TransientFailure for addr1. + let sc1 = policy.subchannels[0].clone(); + policy.subchannel_update( + sc1.clone(), + &SubchannelState { + connectivity_state: ConnectivityState::TransientFailure, + last_connection_error: Some("connection refused".to_string()), + }, + controller.as_mut(), + ); + (rx, policy, controller) + } - // Should update picker to READY with sc1 + // The LB can successfully connect to the first address, and updates the + // picker to READY with the correct subchannel. + #[tokio::test] + async fn test_pick_first_basic_connection() { + let addrs = vec!["addr1", "addr2"]; + let (rx, _, _) = simulate_successful_connection(addrs, None); + + // Should update picker to READY with sc1. match rx.recv().unwrap() { TestEvent::UpdatePicker(state) => { assert_eq!(state.connectivity_state, ConnectivityState::Ready); @@ -784,45 +851,19 @@ mod test { } } + // If the first address fails, the LB should failover to the second address. #[tokio::test] async fn test_pick_first_failover() { - let (rx, mut policy, mut controller) = setup(); - let endpoints = create_endpoints(vec!["addr1", "addr2"]); - policy - .resolver_update( - ResolverUpdate { - endpoints: Ok(endpoints), - ..Default::default() - }, - None, - controller.as_mut(), - ) - .unwrap(); - - // Expect NewSubchannel x2, Connect, UpdatePicker(Connecting) - rx.recv().unwrap(); - rx.recv().unwrap(); - rx.recv().unwrap(); - rx.recv().unwrap(); - - // Simulate addr1 failing - let sc1 = policy.subchannels[0].clone(); - policy.subchannel_update( - sc1, - &SubchannelState { - connectivity_state: ConnectivityState::TransientFailure, - last_connection_error: Some("connection refused".to_string()), - }, - controller.as_mut(), - ); + let (rx, mut policy, mut controller) = + simulate_failed_connection(vec!["addr1", "addr2"], None); - // Should connect to addr2 + // Should connect to addr2. match rx.recv().unwrap() { TestEvent::Connect(addr) => assert_eq!(addr.address.to_string(), "addr2"), other => panic!("unexpected event {:?}", other), } - // Simulate addr2 succeeding + // Simulate addr2 succeeding. let sc2 = policy.subchannels[1].clone(); policy.subchannel_update( sc2, @@ -841,39 +882,15 @@ mod test { } } + // Ensures that if a subchannel is already selected, and is still present in + // the new resolver update, the LB will keep using it and not switch to a + // different subchannel. #[tokio::test] async fn test_pick_first_stickiness() { - let (rx, mut policy, mut controller) = setup(); - let endpoints = create_endpoints(vec!["addr1", "addr2"]); - policy - .resolver_update( - ResolverUpdate { - endpoints: Ok(endpoints), - ..Default::default() - }, - None, - controller.as_mut(), - ) - .unwrap(); - - // Expect NewSubchannel x2, Connect, UpdatePicker(Connecting) - rx.recv().unwrap(); - rx.recv().unwrap(); - rx.recv().unwrap(); - rx.recv().unwrap(); + let (rx, mut policy, mut controller) = + simulate_successful_connection(vec!["addr1", "addr2"], None); - // Make addr1 READY - let sc1 = policy.subchannels[0].clone(); - policy.subchannel_update( - sc1.clone(), - &SubchannelState { - connectivity_state: ConnectivityState::Ready, - last_connection_error: None, - }, - controller.as_mut(), - ); - - // Expect UpdatePicker(Ready) + // Expect `UpdatePicker(Ready)`. match rx.recv().unwrap() { TestEvent::UpdatePicker(state) => { assert_eq!(state.connectivity_state, ConnectivityState::Ready) @@ -881,8 +898,8 @@ mod test { other => panic!("unexpected event {:?}", other), } - // New resolver update including addr1 - let endpoints_new = create_endpoints(vec!["addr2", "addr1", "addr3"]); + // New resolver update including addr1. + let endpoints_new = create_endpoints(vec!["addr2", "addr1", "addr3"], None); policy .resolver_update( ResolverUpdate { @@ -894,17 +911,19 @@ mod test { ) .unwrap(); - // Should create subchannel for addr2 (was cleared by cleanup) and addr3 (new) + // Should create new subchannel for addr2 (was cleared by cleanup). match rx.recv().unwrap() { TestEvent::NewSubchannel(sc) => assert_eq!(sc.address().address.to_string(), "addr2"), other => panic!("unexpected event {:?}", other), } + // Should create new subchannel for addr3 (was not in previous list). match rx.recv().unwrap() { TestEvent::NewSubchannel(sc) => assert_eq!(sc.address().address.to_string(), "addr3"), other => panic!("unexpected event {:?}", other), } - // Should NOT have any more events (no Connect, no UpdatePicker) because it is sticky + // Should NOT have any more events (no Connect, no UpdatePicker), + // because it stuck to the original selected subchannel. std::thread::sleep(Duration::from_millis(50)); assert!(rx.try_recv().is_err(), "unexpected event"); @@ -920,38 +939,13 @@ mod test { ); } + // If all addresses fail during a connection pass, the LB should update to + // TransientFailure and request re-resolution. #[tokio::test] async fn test_pick_first_exhaustion() { - let (rx, mut policy, mut controller) = setup(); - let endpoints = create_endpoints(vec!["addr1"]); - policy - .resolver_update( - ResolverUpdate { - endpoints: Ok(endpoints), - ..Default::default() - }, - None, - controller.as_mut(), - ) - .unwrap(); - - // Expect NewSubchannel, Connect, UpdatePicker(Connecting) - rx.recv().unwrap(); - rx.recv().unwrap(); - rx.recv().unwrap(); - - // Simulate addr1 failure - let sc1 = policy.subchannels[0].clone(); - policy.subchannel_update( - sc1, - &SubchannelState { - connectivity_state: ConnectivityState::TransientFailure, - last_connection_error: Some("connection refused".to_string()), - }, - controller.as_mut(), - ); + let (rx, policy, controller) = simulate_failed_connection(vec!["addr1"], None); - // Should update picker to TransientFailure + // Should update picker to TransientFailure. match rx.recv().unwrap() { TestEvent::UpdatePicker(state) => assert_eq!( state.connectivity_state, @@ -960,18 +954,20 @@ mod test { other => panic!("unexpected event {:?}", other), } - // Should request re-resolution + // Should request re-resolution. match rx.recv().unwrap() { TestEvent::RequestResolution => {} other => panic!("unexpected event {:?}", other), } } + // Shuffling and interleaving of addresses is deterministic and correct + // based on the provided shuffler and config. #[tokio::test] async fn test_pick_first_shuffling_and_interleaving_deterministic() { let (_rx, mut policy, mut controller) = setup(); - // Enable shuffling in config + // Enable shuffling in config. let config = PickFirstConfig { shuffle_address_list: true, }; @@ -1048,11 +1044,13 @@ mod test { ); } + // De-duplicate addresses that appear multiple times within the same + // endpoint, and across different endpoints. One subchannel each. #[tokio::test] async fn test_pick_first_duplicate_de_duplication() { let (rx, mut policy, mut controller) = setup(); - // Create endpoints with duplicates + // Create endpoints with duplicates. let endpoints = vec![ Endpoint { addresses: vec![ @@ -1093,11 +1091,11 @@ mod test { ) .unwrap(); - // Should only create subchannels for addr1 and addr2 (2 unique addresses) + // Should only create subchannels for addr1 and addr2 (2 unique addrs). rx.recv().unwrap(); // NewSubchannel(addr1) rx.recv().unwrap(); // NewSubchannel(addr2) - // Verify no 3rd subchannel was created + // Verify no 3rd subchannel was created. std::thread::sleep(Duration::from_millis(50)); while let Ok(event) = rx.try_recv() { if let TestEvent::NewSubchannel(_) = event { @@ -1108,41 +1106,17 @@ mod test { assert_eq!(policy.subchannels.len(), 2, "De-duplication failed"); } + // If the resolver update contains no addresses, the LB should clear + // subchannels, update to TransientFailure, and request re-resolution. #[tokio::test] async fn test_pick_first_empty_update_clears_state() { - let (rx, mut policy, mut controller) = setup(); - let endpoints = create_endpoints(vec!["addr1", "addr2"]); - - // Initial update with addresses - policy - .resolver_update( - ResolverUpdate { - endpoints: Ok(endpoints), - ..Default::default() - }, - None, - controller.as_mut(), - ) - .unwrap(); - - assert_eq!(policy.subchannels.len(), 2); - - // Make addr1 READY so it becomes selected - let sc1 = policy.subchannels[0].clone(); - policy.subchannel_update( - sc1, - &SubchannelState { - connectivity_state: ConnectivityState::Ready, - last_connection_error: None, - }, - controller.as_mut(), - ); + let (rx, mut policy, mut controller) = + simulate_successful_connection(vec!["addr1", "addr2"], None); + assert_eq!(policy.subchannels.len(), 1); assert!(policy.selected.is_some()); - - // Drain events (NewSubchannel x2, Connect, UpdatePicker x2) while rx.try_recv().is_ok() {} - // Send empty update + // Send empty update. let res = policy.resolver_update( ResolverUpdate { endpoints: Ok(vec![]), @@ -1156,7 +1130,7 @@ mod test { assert_eq!(policy.subchannels.len(), 0); assert!(policy.selected.is_none()); - // Should have updated picker to TransientFailure and requested resolution + // Check picker is in TransientFailure. match rx.recv().unwrap() { TestEvent::UpdatePicker(state) => { assert_eq!( @@ -1166,41 +1140,29 @@ mod test { } other => panic!("unexpected event {:?}", other), } + // Check that re-resolution was requested. match rx.recv().unwrap() { TestEvent::RequestResolution => {} other => panic!("unexpected event {:?}", other), } } + // If the timer expires during a connection pass, the LB should advance to + // the next subchannel and trigger a connection attempt. #[tokio::test(start_paused = true)] async fn test_pick_first_timer_advancement() { - let (rx, mut policy, mut controller) = setup(); - let endpoints = create_endpoints(vec!["addr1", "addr2"]); - let update = ResolverUpdate { - endpoints: Ok(endpoints), - ..Default::default() - }; + let (rx, mut policy, mut controller) = simulate_connection(vec!["addr1", "addr2"], None); - policy - .resolver_update(update, None, controller.as_mut()) - .unwrap(); - - // Expect NewSubchannel x2, Connect(addr1), UpdatePicker(Connecting) - rx.recv().unwrap(); - rx.recv().unwrap(); - rx.recv().unwrap(); - rx.recv().unwrap(); - - // Simulate timer expiration by setting the flag directly! + // Simulate timer expiration by setting the flag directly. policy .timer_expired .store(true, std::sync::atomic::Ordering::SeqCst); - // Manually call work() to process the timer expiration! + // Manually call work() to process the timer expiration. policy.work(controller.as_mut()); - // Expect Connect event for addr2 due to timer expiration - // Loop to check for event without blocking the thread + // Expect Connect event for addr2 due to timer expiration. + // Loop to check for event without blocking the thread. let mut found = None; for _ in 0..10 { match rx.try_recv() { @@ -1209,7 +1171,7 @@ mod test { break; } Err(std::sync::mpsc::TryRecvError::Empty) => { - // Yield to runtime to allow timer task to run! + // Yield to runtime to allow timer task to run. tokio::time::sleep(std::time::Duration::from_millis(10)).await; } Err(e) => panic!("error recv: {:?}", e), @@ -1222,43 +1184,21 @@ mod test { } } + // If all addresses fail during a connection pass, the LB should enter + // steady state and monitor for backoff expiry to retry connections. #[tokio::test] async fn test_pick_first_steady_state_retries() { - let (rx, mut policy, mut controller) = setup(); - let endpoints = create_endpoints(vec!["addr1"]); - let update = ResolverUpdate { - endpoints: Ok(endpoints), - ..Default::default() - }; - - policy - .resolver_update(update, None, controller.as_mut()) - .unwrap(); - - // Expect NewSubchannel, Connect(addr1), UpdatePicker(Connecting) - rx.recv().unwrap(); - rx.recv().unwrap(); - rx.recv().unwrap(); - - // Simulate addr1 failure + let (rx, mut policy, mut controller) = simulate_failed_connection(vec!["addr1"], None); let sc1 = policy.subchannels[0].clone(); - policy.subchannel_update( - sc1.clone(), - &SubchannelState { - connectivity_state: ConnectivityState::TransientFailure, - last_connection_error: Some("connection refused".to_string()), - }, - controller.as_mut(), - ); - // Expect UpdatePicker(TransientFailure) and RequestResolution + // Expect UpdatePicker(TransientFailure) and RequestResolution. rx.recv().unwrap(); rx.recv().unwrap(); - // Now we are in steady state! + // Ensure steady state was entered. assert!(policy.steady_state.is_some()); - // Simulate addr1 transitioning to IDLE (backoff over) + // Simulate addr1 transitioning to IDLE (backoff over). policy.subchannel_update( sc1.clone(), &SubchannelState { @@ -1268,50 +1208,30 @@ mod test { controller.as_mut(), ); - // Should automatically call connect() again! + // Should automatically call connect() again. match rx.recv().unwrap() { TestEvent::Connect(addr) => assert_eq!(addr.address.to_string(), "addr1"), other => panic!("unexpected event {:?}", other), } } + // If the LB is in steady state, and a new address becomes ready, it should + // switch to it immediately. If the current active address goes idle, it + // should trigger a retry, but should not switch back to it until it reports + // ready. #[tokio::test] async fn test_pick_first_steady_state_multi_backend() { - let (rx, mut policy, mut controller) = setup(); - let endpoints = create_endpoints(vec!["addr1", "addr2"]); - let update = ResolverUpdate { - endpoints: Ok(endpoints), - ..Default::default() - }; - - policy - .resolver_update(update, None, controller.as_mut()) - .unwrap(); - - // Expect NewSubchannel x2, Connect(addr1), UpdatePicker(Connecting) - rx.recv().unwrap(); - rx.recv().unwrap(); - rx.recv().unwrap(); - rx.recv().unwrap(); - - // Simulate addr1 failure + let (rx, mut policy, mut controller) = + simulate_failed_connection(vec!["addr1", "addr2"], None); let sc1 = policy.subchannels[0].clone(); - policy.subchannel_update( - sc1.clone(), - &SubchannelState { - connectivity_state: ConnectivityState::TransientFailure, - last_connection_error: Some("connection refused".to_string()), - }, - controller.as_mut(), - ); - // Should failover to addr2: expect Connect(addr2) + // Should failover to addr2: expect Connect(addr2). match rx.recv().unwrap() { TestEvent::Connect(addr) => assert_eq!(addr.address.to_string(), "addr2"), other => panic!("unexpected event {:?}", other), } - // Now while addr2 is connecting, simulate addr1 going IDLE (backoff over) + // While addr2 is connecting, simulate addr1 going IDLE (backoff over). policy.subchannel_update( sc1.clone(), &SubchannelState { @@ -1321,12 +1241,12 @@ mod test { controller.as_mut(), ); - // We should NOT reconnect to addr1 during the first pass! - // Wait a bit to ensure no event is sent + // We should NOT reconnect to addr1 during the first pass. + // Wait a bit to ensure no event is sent. std::thread::sleep(std::time::Duration::from_millis(50)); assert!(rx.try_recv().is_err(), "unexpected event"); - // Now fail addr2 to complete first pass + // Now fail addr2 to complete first pass. let sc2 = policy.subchannels[1].clone(); policy.subchannel_update( sc2.clone(), @@ -1337,14 +1257,14 @@ mod test { controller.as_mut(), ); - // Expect UpdatePicker(TransientFailure) and RequestResolution + // Expect UpdatePicker(TransientFailure) and RequestResolution. rx.recv().unwrap(); rx.recv().unwrap(); - // Now we are in steady state! + // Confirm LB is in steady state. assert!(policy.steady_state.is_some()); - // Simulate addr1 going IDLE again + // Simulate addr1 going IDLE again. policy.subchannel_update( sc1.clone(), &SubchannelState { @@ -1354,52 +1274,32 @@ mod test { controller.as_mut(), ); - // Now it SHOULD automatically call connect() again! + // Now it should automatically call connect() again. match rx.recv().unwrap() { TestEvent::Connect(addr) => assert_eq!(addr.address.to_string(), "addr1"), other => panic!("unexpected event {:?}", other), } } + // If the LB is in steady state, and all addresses fail, it should trigger a + // re-resolution. If one of the addresses goes idle during this time, it + // should trigger an immediate connection attempt, rather than waiting for + // the timer. This prevents the load balancer from getting stuck in idle if + // all addresses fail at the same time. #[tokio::test] async fn test_pick_first_steady_state_stuck_idle_prevention() { - let (rx, mut policy, mut controller) = setup(); - let endpoints = create_endpoints(vec!["addr1", "addr2"]); - policy - .resolver_update( - ResolverUpdate { - endpoints: Ok(endpoints), - ..Default::default() - }, - None, - controller.as_mut(), - ) - .unwrap(); - - // Expect NewSubchannel x2, Connect(addr1), UpdatePicker(Connecting) - rx.recv().unwrap(); // addr1 - rx.recv().unwrap(); // addr2 - rx.recv().unwrap(); // Connect(addr1) - rx.recv().unwrap(); // UpdatePicker(Connecting) - - // 1. Fail addr1 to advance frontier to addr2 + let (rx, mut policy, mut controller) = + simulate_failed_connection(vec!["addr1", "addr2"], None); let sc1 = policy.subchannels[0].clone(); - policy.subchannel_update( - sc1.clone(), - &SubchannelState { - connectivity_state: ConnectivityState::TransientFailure, - last_connection_error: Some("connection refused".to_string()), - }, - controller.as_mut(), - ); - // Expect Connect(addr2) + // Expect Connect(addr2). match rx.recv().unwrap() { TestEvent::Connect(addr) => assert_eq!(addr.address.to_string(), "addr2"), other => panic!("unexpected event {:?}", other), } - // 2. Simulate addr1 backing off and transitioning to IDLE early (while addr2 is still connecting) + // Simulate addr1 backing off and transitioning to IDLE early + // (while addr2 is still connecting). policy.subchannel_update( sc1.clone(), &SubchannelState { @@ -1409,11 +1309,11 @@ mod test { controller.as_mut(), ); - // Expect NO events yet because first pass is still active and ignoring IDLE + // Expect NO events yet because first pass is still active. std::thread::sleep(Duration::from_millis(50)); assert!(rx.try_recv().is_err(), "unexpected event during first pass"); - // 3. Fail addr2 to exhaust the first pass + // Fail addr2 to exhaust the first pass. let sc2 = policy.subchannels[1].clone(); policy.subchannel_update( sc2, @@ -1424,16 +1324,95 @@ mod test { controller.as_mut(), ); - // Expect UpdatePicker(TransientFailure) and RequestResolution from exhaustion + // Expect UpdatePicker(TransientFailure) and RequestResolution from + // exhaustion. rx.recv().unwrap(); // UpdatePicker rx.recv().unwrap(); // RequestResolution - // CRUCIAL VERIFICATION: Expect an IMMEDIATE Connect(addr1) event triggered - // by the exhaustion loop sweeping up the early IDLE subchannel! + // Expect an immediate Connect(addr1) event triggered by the exhaustion + // loop sweeping up the early IDLE subchannel. match rx.recv().unwrap() { TestEvent::Connect(addr) => assert_eq!(addr.address.to_string(), "addr1"), other => panic!("unexpected event post-exhaustion {:?}", other), } } -} + // This test is meant to validate that if a new address with different + // attributes is sent as part of a resolver update, the policy treats it as + // a different address and creates a new subchannel for it, rather than + // ignoring it as a duplicate. + #[tokio::test] + async fn test_pick_first_address_update_with_attributes() { + let addr = "addr1"; + let (rx, mut policy, mut controller) = simulate_connection(vec![addr], None); + + // Push same address but with attributes. + let attrs = crate::attributes::Attributes::new().add("metadata_value".to_string()); + let endpoints_updated = create_endpoints(vec![addr], Some(attrs)); + + policy + .resolver_update( + ResolverUpdate { + endpoints: Ok(endpoints_updated), + ..Default::default() + }, + None, + controller.as_mut(), + ) + .unwrap(); + + // This should be a different subchannel due to different attributes. + // Therefore, expect a new TestEvent::NewSubchannel event to be emitted. + let mut found_new_subchannel = false; + while let Ok(event) = rx.try_recv() { + if let TestEvent::NewSubchannel(_) = event { + found_new_subchannel = true; + break; + } + } + + assert!( + found_new_subchannel, + "Policy failed to recreate subchannel when address attributes mutated." + ); + } + + // If a resolver error is received while the LB is in the process of + // connecting to addresses, it should not abort the connection attempt or + // clear the existing addresses, as long as there are still valid addresses + // in the LB. This ensures that transient resolver errors do not cause + // unnecessary disruption to active connection attempts. + #[tokio::test] + async fn test_pick_first_resolver_error_during_connecting() { + let (rx, mut policy, mut controller) = simulate_connection(vec!["addr1"], None); + + // Simulate resolver error arriving. + let resolver_error = "dns resolution failed".to_string(); + policy + .resolver_update( + ResolverUpdate { + endpoints: Err(resolver_error.clone()), + ..Default::default() + }, + None, + controller.as_mut(), + ) + .unwrap(); + + // Resolver errors received during active connection attempts should NOT + // abort the attempt or force TransientFailure immediately if the load + // balancer still has valid addresses. + // Expect NO events to be emitted (no UpdatePicker/RequestResolution). + std::thread::sleep(std::time::Duration::from_millis(50)); + assert!( + rx.try_recv().is_err(), + "Unexpected event after resolver error" + ); + + // Verify internal state did not clear endpoints. + assert!( + !policy.subchannels.is_empty(), + "Subchannels erroneously cleared by resolver error." + ); + } +} From c90c5ea05e4f4e115e45bd0e8f8b7e020aacaf97 Mon Sep 17 00:00:00 2001 From: Nathaniel Ford Date: Tue, 12 May 2026 22:30:45 +0000 Subject: [PATCH 24/32] PR review fixes --- grpc/src/client/load_balancing/pick_first.rs | 238 ++++++++++--------- 1 file changed, 121 insertions(+), 117 deletions(-) diff --git a/grpc/src/client/load_balancing/pick_first.rs b/grpc/src/client/load_balancing/pick_first.rs index 304934b8d..5055fc30d 100644 --- a/grpc/src/client/load_balancing/pick_first.rs +++ b/grpc/src/client/load_balancing/pick_first.rs @@ -162,19 +162,22 @@ impl PickFirstPolicy { let state = self.subchannel_states.get(&addr).unwrap().clone(); (sc, state) } else { - // Get a new subchannel handle from the controller if we don't have an existing one. + // Get a new subchannel handle from the controller if we don't + // have an existing one. channel_controller.new_subchannel(&addr) }; // Track the best candidate for immediate activation: - // 1. Absolute Priority: The currently selected subchannel if it is still READY. + // 1. Priority: The currently selected subchannel if still READY. // 2. Fallback: The first generic READY subchannel encountered. if state.connectivity_state == ConnectivityState::Ready { if self.subchannel_is_selected(&sc) { - // Sticky channel wins immediately and overrides any fallback candidates. + // Sticky channel wins immediately and overrides any + // fallback candidates. ready_subchannel = Some(sc.clone()); } else if ready_subchannel.is_none() { - // Capture fallback candidate, but does not overwrite if a sticky channel was already found. + // Capture fallback candidate, but does not overwrite if a + //sticky channel was already found. ready_subchannel = Some(sc.clone()); } } @@ -183,8 +186,8 @@ impl PickFirstPolicy { new_states.insert(addr, state); } - self.subchannels = new_subchannels; // Prunes old addresses, adds new ones. - self.subchannel_states = new_states; // Update subchannel states cache. + self.subchannels = new_subchannels; + self.subchannel_states = new_states; ready_subchannel } @@ -230,9 +233,10 @@ impl PickFirstPolicy { fn start_connection_pass(&mut self, channel_controller: &mut dyn ChannelController) { self.selected = None; - // If there is a viable subchannel at the frontier, connect to it and update picker to CONNECTING. + // If there is a viable subchannel at the frontier, connect to it and + // update picker to CONNECTING. if let Some(sc) = self.advance_frontier(true) { - let sc = sc.clone(); // Clone to avoid borrow issues. + let sc = sc.clone(); // Avoid borrow issues. self.trigger_subchannel_connection(sc, channel_controller); channel_controller.update_picker(LbState { @@ -240,68 +244,64 @@ impl PickFirstPolicy { picker: Arc::new(QueuingPicker {}), }); } else { - // Otherwise all addresses are in transient failure: update picker and request re-resolution. - let error = self - .last_connection_error - .clone() - .unwrap_or_else(|| "all addresses in transient failure".to_string()); - - // This transition triggers a FailingPicker and requests re-resolution. - _ = self.set_transient_failure(channel_controller, error); + // Otherwise all addresses are in transient failure: update picker. + _ = self.set_transient_failure(channel_controller, None); } } - /// Book-keeping for tracking progress on the first pass through the address list. - /// Assumes the subchannel is in a non-READY state. - /// If the failure is from the subchannel at the frontier, advances the frontier and triggers a connection on the next subchannel. + // Book-keeping for tracking progress on the first pass through the address + // list. Assumes the subchannel is in a non-READY state. + // If the failure is from the subchannel at the frontier, advances the + // frontier and triggers a connection on the next subchannel. fn update_first_pass( &mut self, subchannel: Arc, state: &SubchannelState, channel_controller: &mut dyn ChannelController, ) { - // Failover triggers only if the failure comes from the subchannel currently at the frontier. - if let Some(attempting) = self.subchannels.get(self.frontier_index) { - if attempting.address() == subchannel.address() - && state.connectivity_state == ConnectivityState::TransientFailure - { - // Advance frontier to the next available address. - if let Some(next_sc) = self.advance_frontier(false) { - let next_sc = next_sc.clone(); - self.trigger_subchannel_connection(next_sc, channel_controller); - } else { - // Pass exhausted: enter policy-level TRANSIENT_FAILURE and switch to steady state. - let error = state - .last_connection_error - .clone() - .expect("gRPC Contract Violation: last_connection_error must be populated when in TransientFailure"); - - // Cancel the pacing timer since this connection pass is over. - self.abort_pacing_timer(); - - self.last_connection_error = Some(error.clone()); - _ = self.set_transient_failure(channel_controller, error); - - self.steady_state = Some(SteadyState::new(self.subchannels.len())); - - // Trigger connection attempts on any subchannels that transitioned to IDLE - // during the first pass, ensuring they don't get stuck. - for sc in &self.subchannels { - let is_idle = self - .subchannel_states - .get(&sc.address()) - .map_or(false, |s| s.connectivity_state == ConnectivityState::Idle); - if is_idle { - sc.connect(); - } + // Advance frontier if this failure is from the active frontier subchannel. + if let Some(attempting) = self.subchannels.get(self.frontier_index) + && attempting.address() == subchannel.address() + && state.connectivity_state == ConnectivityState::TransientFailure + { + if let Some(next_sc) = self.advance_frontier(false) { + let next_sc = next_sc.clone(); + self.trigger_subchannel_connection(next_sc, channel_controller); + } + } + + // Check if First Pass is fully exhausted (frontier exhausted AND zero connecting). + if self.frontier_index >= self.subchannels.len() { + let any_connecting = self.subchannels.iter().any(|sc| { + self.subchannel_states + .get(&sc.address()) + .is_some_and(|s| s.connectivity_state == ConnectivityState::Connecting) + }); + + if !any_connecting && self.steady_state.is_none() { + self.abort_happy_eyeballs_timer(); + let error = self.last_connection_error.clone(); + _ = self.set_transient_failure(channel_controller, error); + self.steady_state = Some(SteadyState::new(self.subchannels.len())); + + // Trigger connection attempts on any subchannels that + // transitioned to IDLE during the first pass, ensuring they + // don't get stuck. + for sc in &self.subchannels { + let is_idle = self + .subchannel_states + .get(&sc.address()) + .is_some_and(|s| s.connectivity_state == ConnectivityState::Idle); + if is_idle { + sc.connect(); } } } } } - /// Advances the frontier to the next non-TransientFailure subchannel and returns it. - /// If `reset` is true, starts the scan from index 0. + /// Advances the frontier to the next non-TransientFailure subchannel and + /// returns it. If `reset` is true, starts the scan from index 0. // The frontier is the latest index in which connectivity has been attempted. fn advance_frontier(&mut self, reset: bool) -> Option<&Arc> { if reset { @@ -320,24 +320,28 @@ impl PickFirstPolicy { .expect("Expected non-None subchannel state"); match state { - // Push the frontier if sc is in TransientFailure, otherwise return the sc. + // Push the frontier if sc is in TransientFailure ConnectivityState::TransientFailure => self.frontier_index += 1, + // Otherwise return the subchannel. _ => return Some(sc), } } None } - /// Returns true if the given subchannel matches the currently selected active subchannel. + /// Returns true if the given subchannel matches the currently selected + /// active subchannel. fn subchannel_is_selected(&self, subchannel: &Arc) -> bool { self.selected .as_ref() - .map_or(false, |sel| sel.address() == subchannel.address()) + .is_some_and(|sel| sel.address() == subchannel.address()) } - /// Returns true if the subchannel's address is present in the most recently received address list. - // This compares against the current list of subchannels the LB is attempting to connect to. To - // see if the LB already connected to the channel, use 'subchannel_is_selected'. + /// Returns true if the subchannel's address is present in the most recently + /// received address list. + // This compares against the current list of subchannels the LB is + // attempting to connect to. To see if the LB already connected to the + // channel, use 'subchannel_is_selected'. fn subchannel_is_current(&self, subchannel: &Arc) -> bool { self.subchannels .iter() @@ -345,8 +349,8 @@ impl PickFirstPolicy { } /// Triggers a connection on the subchannel, and starts the 250ms timer. - /// If no connection succeeds before the timer expires, the frontier will advance to - /// the next subchannel. + /// If no connection succeeds before the timer expires, the frontier will + /// advance to the next subchannel. fn trigger_subchannel_connection( &mut self, sc: Arc, @@ -440,36 +444,38 @@ impl PickFirstPolicy { if interleaved.is_empty() { self.subchannels.clear(); self.selected = None; - let error = self - .last_resolver_error - .clone() - .unwrap_or_else(|| "empty address list".to_string()); - return self - .set_transient_failure(channel_controller, error) - .map(|_| vec![]); + self.set_transient_failure(channel_controller, Some("empty address list".to_string()))?; } Ok(interleaved) } - // Sets state to TRANSIENT_FAILURE and updates picker with error. Triggers a re-resolution request. + // Sets LB state to TRANSIENT_FAILURE and updates picker with error. + // Triggers a re-resolution request. fn set_transient_failure( &mut self, channel_controller: &mut dyn ChannelController, - error: String, + error: Option, ) -> Result<(), String> { + // Replace the last connection error if we have a new one. + if let Some(e) = error { + self.last_connection_error = Some(e); + } self.connectivity_state = ConnectivityState::TransientFailure; + let err = self + .last_connection_error + .clone() + .expect("no last connection error set"); channel_controller.update_picker(LbState { connectivity_state: ConnectivityState::TransientFailure, - picker: Arc::new(FailingPicker { - error: error.clone(), - }), + picker: Arc::new(FailingPicker { error: err.clone() }), }); channel_controller.request_resolution(); - Err(error) + Err(err.clone()) } - // Returns true if the currently selected subchannel's address is still present in the new address list. + // Returns true if the currently selected subchannel's address is still + // present in the new address list. fn sticky(&self, new_addresses: &[Address]) -> bool { self.selected .as_ref() @@ -477,8 +483,8 @@ impl PickFirstPolicy { .unwrap_or(false) } - // Cancels the connection pacing timer if it is active. - fn abort_pacing_timer(&mut self) { + // Cancels the connection 'Happy Eyeballs' timer if it is active. + fn abort_happy_eyeballs_timer(&mut self) { if let Some(handle) = self.timer_handle.take() { handle.abort(); } @@ -494,7 +500,7 @@ impl LbPolicy for PickFirstPolicy { config: Option<&Self::LbConfig>, channel_controller: &mut dyn ChannelController, ) -> Result<(), String> { - self.abort_pacing_timer(); + self.abort_happy_eyeballs_timer(); // Reset steady state on new update self.steady_state = None; @@ -512,11 +518,10 @@ impl LbPolicy for PickFirstPolicy { } Err(e) => { let error = e.to_string(); - self.last_resolver_error = Some(error.clone()); if self.subchannels.is_empty() || self.connectivity_state == ConnectivityState::TransientFailure { - self.set_transient_failure(channel_controller, error)?; + self.set_transient_failure(channel_controller, Some(error))?; } } } @@ -524,29 +529,19 @@ impl LbPolicy for PickFirstPolicy { Ok(()) } - /// Invoked asynchronously by the outer channel infrastructure whenever any subchannel - /// managed by this policy experiences a connectivity state transition. - /// - /// # Parameters - /// * `subchannel`: The specific backend connection instance (`Arc`) that triggered the event. - /// It identifies *which* transport lane is reporting telemetry. - /// * `state`: The new connectivity status snapshot (`SubchannelState`) being reported. - /// It details *what* happened (e.g., transitioned to `READY`, `IDLE`, or encountered a `TransientFailure`). - /// * `channel_controller`: The internal control plane interface used to update the channel's RPC picker - /// or signal the Name Resolver to fetch new addresses. - /// - /// # Behavioral Flow - /// This function drives the core load-balancing state machine. It caches the new state and executes a - /// routing matrix to determine whether to drop a failing active connection, finalize a successful - /// backend selection, pace connection attempts (First Pass), or monitor background retry health (Steady State). fn subchannel_update( &mut self, subchannel: Arc, state: &SubchannelState, channel_controller: &mut dyn ChannelController, ) { - if !self.subchannel_is_current(&subchannel) { - // This update is from an outdated subchannel that is no longer in the address list. Ignore it. + if !self + .subchannels + .iter() + .any(|sc| sc.address() == subchannel.address()) + { + // This update is from an outdated subchannel that is no longer in + // the address list. Ignore it. return; } @@ -554,21 +549,18 @@ impl LbPolicy for PickFirstPolicy { self.subchannel_states .insert(subchannel.address(), state.clone()); - // If the subchannel being updated is the selected one, it affects handling. - let is_selected = self - .selected - .as_ref() - .map_or(false, |s| s.address() == subchannel.address()); - match ( - is_selected, // Does the load balancer have an active subchannel already? - state.connectivity_state, // What is the updating subchannel's state? + // Does the load balancer have an active subchannel already? + self.subchannel_is_selected(&subchannel), + // What is the updating subchannel's state? + state.connectivity_state, ) { (true, ConnectivityState::Ready) => { - // The selected subchannel is still ready; do nothing with this update. + // The selected subchannel is still ready; do nothing w/update. } (true, _) => { - // The selected subchannel has failed (is no longer READY); drop the connection. + // The selected subchannel has failed (is no longer READY); + // drop the connection. self.subchannel_drop(channel_controller); } (false, ConnectivityState::Ready) => { @@ -576,7 +568,14 @@ impl LbPolicy for PickFirstPolicy { self.subchannel_activate(subchannel, channel_controller); } (false, _) => { - // The updating subchannel won't be selected, so track progress based on whether we are in steady state or a first pass. + // Always capture freshest unselected error for accurate telemetry. + if state.connectivity_state == ConnectivityState::TransientFailure { + if let Some(err) = &state.last_connection_error { + self.last_connection_error = Some(err.clone()); + } + } + + // Track progress based on whether we are connection pass. if let Some(steady) = self.steady_state.as_mut() { steady.subchannel_nonready(channel_controller, subchannel, state); } else { @@ -610,7 +609,7 @@ impl LbPolicy for PickFirstPolicy { impl Drop for PickFirstPolicy { fn drop(&mut self) { - self.abort_pacing_timer(); + self.abort_happy_eyeballs_timer(); } } @@ -648,14 +647,17 @@ fn build_shuffler() -> Arc { }) } -/// Tracks a the 'steady state' pass of subchannels when looking for a ready connection. -/// If the number of reported subchannel failures reaches the failure threshold, this will ask the Name Resolver to re-resolve. +/// Tracks a the 'steady state' pass of subchannels when looking for a ready +/// connection. If the number of reported subchannel failures reaches the +/// failure threshold, this will ask the Name Resolver to re-resolve. #[derive(Debug)] struct SteadyState { /// The number of failures before triggering a re-resolution of addresses. - /// This is a rough heuristic to approximate if all subchannels have failed since we entered steady state, and can be tuned as needed. + /// This is a rough heuristic to approximate if all subchannels have failed + /// since we entered steady state, and can be tuned as needed. failure_threshold: usize, - /// The number of failures connecting, used to roughly approximate if a re-resolution needs to happen. + /// The number of failures connecting, used to roughly approximate if a + /// re-resolution needs to happen. failure_count: usize, } @@ -667,7 +669,8 @@ impl SteadyState { } } - /// Handles non-ready subchannel updates when the LB is in 'steady state' connection mode. + /// Handles non-ready subchannel updates when the LB is in 'steady state' + /// connection mode. fn subchannel_nonready( &mut self, channel_controller: &mut dyn ChannelController, @@ -680,7 +683,8 @@ impl SteadyState { subchannel.connect(); } ConnectivityState::TransientFailure => { - // Track failures. If all known subchannels have failed, request new addresses. + // Track failures. If all known subchannels have failed, + // request new addresses. self.failure_count += 1; if self.failure_count >= self.failure_threshold { self.failure_count = 0; From 3e55d3d8042cd12b8ea19b4019db45a7e7bf9c10 Mon Sep 17 00:00:00 2001 From: Nathaniel Ford Date: Tue, 12 May 2026 22:33:05 +0000 Subject: [PATCH 25/32] Add Happy Eyeballs regression tests --- grpc/src/client/load_balancing/pick_first.rs | 93 +++++++++++++++++++- 1 file changed, 92 insertions(+), 1 deletion(-) diff --git a/grpc/src/client/load_balancing/pick_first.rs b/grpc/src/client/load_balancing/pick_first.rs index 5055fc30d..1ac074d74 100644 --- a/grpc/src/client/load_balancing/pick_first.rs +++ b/grpc/src/client/load_balancing/pick_first.rs @@ -358,6 +358,13 @@ impl PickFirstPolicy { ) { let sc_clone = sc.clone(); self.connectivity_state = ConnectivityState::Connecting; + self.subchannel_states.insert( + sc.address(), + SubchannelState { + connectivity_state: ConnectivityState::Connecting, + last_connection_error: None, + }, + ); sc_clone.connect(); // Cancel any existing timer @@ -568,7 +575,7 @@ impl LbPolicy for PickFirstPolicy { self.subchannel_activate(subchannel, channel_controller); } (false, _) => { - // Always capture freshest unselected error for accurate telemetry. + // Always capture freshest unselected error. if state.connectivity_state == ConnectivityState::TransientFailure { if let Some(err) = &state.last_connection_error { self.last_connection_error = Some(err.clone()); @@ -1419,4 +1426,88 @@ mod test { "Subchannels erroneously cleared by resolver error." ); } + + // Out-of-Order Failure Detection + // Ensures the policy waits for all parallel connection attempts to drop + // before failing the channel. + #[tokio::test] + async fn test_pick_first_happy_eyeballs_out_of_order_failure() { + let (rx, mut policy, mut controller) = simulate_connection(vec!["addr1", "addr2"], None); + + // 1. Simulate Happy Eyeballs timer firing to launch parallel connection + // to addr2. + policy.timer_expired.store(true, Ordering::SeqCst); + policy.work(controller.as_mut()); + + match rx.recv().unwrap() { + TestEvent::Connect(addr) => assert_eq!(addr.address.to_string(), "addr2"), + other => panic!("unexpected event {:?}", other), + } + + // 2. Simulate addr2 failing first while addr1 is still in flight. + let sc2 = policy.subchannels[1].clone(); + policy.subchannel_update( + sc2, + &SubchannelState { + connectivity_state: ConnectivityState::TransientFailure, + last_connection_error: Some("addr2 failed".to_string()), + }, + controller.as_mut(), + ); + + // Verify policy does NOT enter TransientFailure yet. + std::thread::sleep(Duration::from_millis(50)); + assert!(rx.try_recv().is_err(), "unexpected premature event"); + + // 3. Simulate addr1 failing. Pass is now fully exhausted. + let sc1 = policy.subchannels[0].clone(); + policy.subchannel_update( + sc1, + &SubchannelState { + connectivity_state: ConnectivityState::TransientFailure, + last_connection_error: Some("addr1 failed".to_string()), + }, + controller.as_mut(), + ); + + match rx.recv().unwrap() { + TestEvent::UpdatePicker(state) => { + assert_eq!( + state.connectivity_state, + ConnectivityState::TransientFailure + ); + } + other => panic!("unexpected event {:?}", other), + } + } + + // Freshest Error Caching (Steady State) + // Ensures background failures during Steady State continuously overwrite + // stale connection errors. + #[tokio::test] + async fn test_pick_first_steady_state_freshest_error() { + let (rx, mut policy, mut controller) = simulate_failed_connection(vec!["addr1"], None); + + // Consume exhaustion events. + rx.recv().unwrap(); + rx.recv().unwrap(); + assert!(policy.steady_state.is_some()); + + // Simulate background failure during Steady State with net-new error telemetry. + let sc1 = policy.subchannels[0].clone(); + policy.subchannel_update( + sc1, + &SubchannelState { + connectivity_state: ConnectivityState::TransientFailure, + last_connection_error: Some("steady state network drop".to_string()), + }, + controller.as_mut(), + ); + + // Verify policy caches the freshest unselected error. + assert_eq!( + policy.last_connection_error.as_deref(), + Some("steady state network drop") + ); + } } From 3c905533439283ac1a7dfb8c210874c4297dfe7b Mon Sep 17 00:00:00 2001 From: Doug Fawley Date: Wed, 20 May 2026 09:35:29 -0700 Subject: [PATCH 26/32] partial progress --- grpc/src/client/load_balancing/pick_first.rs | 268 +++++++++++-------- 1 file changed, 152 insertions(+), 116 deletions(-) diff --git a/grpc/src/client/load_balancing/pick_first.rs b/grpc/src/client/load_balancing/pick_first.rs index 1ac074d74..c47ec66a1 100644 --- a/grpc/src/client/load_balancing/pick_first.rs +++ b/grpc/src/client/load_balancing/pick_first.rs @@ -26,10 +26,10 @@ use std::collections::HashMap; use std::collections::HashSet; use std::fmt::Debug; use std::sync::Arc; +use std::sync::atomic::AtomicBool; +use std::sync::atomic::Ordering; -use crate::metadata::MetadataMap; use rand::seq::SliceRandom; -use std::sync::atomic::{AtomicBool, Ordering}; use crate::client::ConnectivityState; use crate::client::load_balancing::ChannelController; @@ -50,6 +50,7 @@ use crate::client::name_resolution::Address; use crate::client::name_resolution::Endpoint; use crate::client::name_resolution::ResolverUpdate; use crate::core::RequestHeaders; +use crate::metadata::MetadataMap; use crate::rt::BoxedTaskHandle; use crate::rt::GrpcRuntime; @@ -73,16 +74,14 @@ impl LbPolicyBuilder for PickFirstBuilder { PickFirstPolicy { work_scheduler: options.work_scheduler, runtime: options.runtime, - connectivity_state: ConnectivityState::Connecting, + connectivity_state: ConnectivityState::Idle, subchannels: Vec::default(), subchannel_states: HashMap::default(), selected: None, frontier_index: 0, - last_resolver_error: None, last_connection_error: None, shuffler: build_shuffler(), - timer_expired: Arc::new(AtomicBool::new(false)), - timer_handle: None, + timer: None, steady_state: None, } } @@ -113,17 +112,20 @@ pub(crate) struct PickFirstPolicy { frontier_index: usize, // Detailed error tracking. - last_resolver_error: Option, last_connection_error: Option, // Injectable shuffler for deterministic testing. shuffler: Arc, // Timer state tracks when the last connect attempt was started. - timer_expired: Arc, - timer_handle: Option, + timer: Option, // Steady state tracking for continuous retries after pass exhaustion. + // TODO: should steady_state be a "mode" selector enum, e.g.: + // - FirstPass - holds the timer above + // - SteadyState - existing info + // - Ready - holds selected subchannel + // - Error - for zero addresses; holds no data steady_state: Option, } @@ -134,7 +136,6 @@ impl Debug for PickFirstPolicy { .field("selected", &self.selected) .field("frontier_index", &self.frontier_index) .field("connectivity_state", &self.connectivity_state) - .field("last_resolver_error", &self.last_resolver_error) .field("last_connection_error", &self.last_connection_error) .finish() } @@ -191,16 +192,34 @@ impl PickFirstPolicy { ready_subchannel } - /// Call when the selected subchannel is dropped or loses connection. + /// Call when the selected subchannel loses connection. // This causes the LB to go IDLE. fn subchannel_drop(&mut self, channel_controller: &mut dyn ChannelController) { self.selected = None; - self.connectivity_state = ConnectivityState::Idle; + self.update_picker( + ConnectivityState::Connecting, + Arc::new(IdlePicker::new(self.work_scheduler.clone())), + channel_controller, + ); + } + + fn update_picker( + &mut self, + connectivity_state: ConnectivityState, + picker: Arc, + channel_controller: &mut dyn ChannelController, + ) { + if self.connectivity_state == connectivity_state + && connectivity_state == ConnectivityState::Connecting + { + // Prevent redundant connecting updates. + // TODO: prevent redundant IDLE updates? + return; + } + self.connectivity_state = connectivity_state; channel_controller.update_picker(LbState { - connectivity_state: ConnectivityState::Idle, - picker: Arc::new(IdlePicker { - work_scheduler: self.work_scheduler.clone(), - }), + connectivity_state, + picker, }); } @@ -214,18 +233,15 @@ impl PickFirstPolicy { return; } self.selected = Some(subchannel.clone()); - self.connectivity_state = ConnectivityState::Ready; self.subchannels = vec![subchannel.clone()]; // Keep only the winner. self.steady_state = None; // Reset mode to First Pass. + self.timer = None; // Stop the happy eyeballs timer. - if let Some(handle) = self.timer_handle.take() { - handle.abort(); - } - - channel_controller.update_picker(LbState { - connectivity_state: ConnectivityState::Ready, - picker: Arc::new(OneSubchannelPicker { sc: subchannel }), - }); + self.update_picker( + ConnectivityState::Ready, + Arc::new(OneSubchannelPicker { sc: subchannel }), + channel_controller, + ); } /// Starts a connection pass through the address list. @@ -236,15 +252,19 @@ impl PickFirstPolicy { // If there is a viable subchannel at the frontier, connect to it and // update picker to CONNECTING. if let Some(sc) = self.advance_frontier(true) { - let sc = sc.clone(); // Avoid borrow issues. self.trigger_subchannel_connection(sc, channel_controller); - channel_controller.update_picker(LbState { - connectivity_state: ConnectivityState::Connecting, - picker: Arc::new(QueuingPicker {}), - }); + // TODO: avoid this update if we are in TF (i.e. sticky TF)? + self.update_picker( + ConnectivityState::Connecting, + Arc::new(QueuingPicker {}), + channel_controller, + ); } else { // Otherwise all addresses are in transient failure: update picker. + // TODO: set the last connection error? Is it correct to do so, + // e.g. if the address it came from was removed from the address + // list and a name resolver update triggered this connection pass? _ = self.set_transient_failure(channel_controller, None); } } @@ -263,11 +283,9 @@ impl PickFirstPolicy { if let Some(attempting) = self.subchannels.get(self.frontier_index) && attempting.address() == subchannel.address() && state.connectivity_state == ConnectivityState::TransientFailure + && let Some(next_sc) = self.advance_frontier(false) { - if let Some(next_sc) = self.advance_frontier(false) { - let next_sc = next_sc.clone(); - self.trigger_subchannel_connection(next_sc, channel_controller); - } + self.trigger_subchannel_connection(next_sc, channel_controller); } // Check if First Pass is fully exhausted (frontier exhausted AND zero connecting). @@ -278,8 +296,10 @@ impl PickFirstPolicy { .is_some_and(|s| s.connectivity_state == ConnectivityState::Connecting) }); - if !any_connecting && self.steady_state.is_none() { - self.abort_happy_eyeballs_timer(); + if !any_connecting { + // Nothing currently connecting; first pass complete. Enter + // steady state. + self.timer = None; let error = self.last_connection_error.clone(); _ = self.set_transient_failure(channel_controller, error); self.steady_state = Some(SteadyState::new(self.subchannels.len())); @@ -303,7 +323,8 @@ impl PickFirstPolicy { /// Advances the frontier to the next non-TransientFailure subchannel and /// returns it. If `reset` is true, starts the scan from index 0. // The frontier is the latest index in which connectivity has been attempted. - fn advance_frontier(&mut self, reset: bool) -> Option<&Arc> { + // Returns a cloned Subchannel to avoid borrowing from self. + fn advance_frontier(&mut self, reset: bool) -> Option> { if reset { self.frontier_index = 0; } else { @@ -323,7 +344,7 @@ impl PickFirstPolicy { // Push the frontier if sc is in TransientFailure ConnectivityState::TransientFailure => self.frontier_index += 1, // Otherwise return the subchannel. - _ => return Some(sc), + _ => return Some(sc.clone()), } } None @@ -356,8 +377,6 @@ impl PickFirstPolicy { sc: Arc, channel_controller: &mut dyn ChannelController, ) { - let sc_clone = sc.clone(); - self.connectivity_state = ConnectivityState::Connecting; self.subchannel_states.insert( sc.address(), SubchannelState { @@ -365,38 +384,25 @@ impl PickFirstPolicy { last_connection_error: None, }, ); - sc_clone.connect(); + sc.connect(); - // Cancel any existing timer - if let Some(handle) = self.timer_handle.take() { - handle.abort(); - } - - // Start 250ms timer - let timer_expired = self.timer_expired.clone(); - let work_scheduler = self.work_scheduler.clone(); - - let sleep_fut = self.runtime.sleep(std::time::Duration::from_millis(250)); - let handle = self.runtime.spawn(Box::pin(async move { - sleep_fut.await; - timer_expired.store(true, Ordering::SeqCst); - work_scheduler.schedule_work(); - })); - self.timer_handle = Some(handle); + // Start happy eyeballs timer; replacing any pre-existing timer. + self.timer = Some(Timer::start( + self.runtime.clone(), + self.work_scheduler.clone(), + )); } // Converts the update endpoints to an address list. // Shuffles endpoints (if enabled) before flattening and de-duplication. fn compile_address( &mut self, - endpoints: Vec, + mut endpoints: Vec, config: Option<&PickFirstConfig>, channel_controller: &mut dyn ChannelController, - ) -> Result, String> { - let mut endpoints = endpoints; - + ) -> Vec
{ // Shuffle endpoints if enabled. - if config.map_or(false, |c| c.shuffle_address_list) { + if config.is_some_and(|c| c.shuffle_address_list) { (self.shuffler)(&mut endpoints); } @@ -446,19 +452,12 @@ impl PickFirstPolicy { break; } } - - // If we have no addresses, clear subchannels and set TRANSIENT_FAILURE. - if interleaved.is_empty() { - self.subchannels.clear(); - self.selected = None; - self.set_transient_failure(channel_controller, Some("empty address list".to_string()))?; - } - - Ok(interleaved) + interleaved } // Sets LB state to TRANSIENT_FAILURE and updates picker with error. // Triggers a re-resolution request. + // TODO: make error mandatory. fn set_transient_failure( &mut self, channel_controller: &mut dyn ChannelController, @@ -468,34 +467,18 @@ impl PickFirstPolicy { if let Some(e) = error { self.last_connection_error = Some(e); } - self.connectivity_state = ConnectivityState::TransientFailure; let err = self .last_connection_error .clone() .expect("no last connection error set"); - channel_controller.update_picker(LbState { - connectivity_state: ConnectivityState::TransientFailure, - picker: Arc::new(FailingPicker { error: err.clone() }), - }); + self.update_picker( + ConnectivityState::TransientFailure, + Arc::new(FailingPicker { error: err.clone() }), + channel_controller, + ); channel_controller.request_resolution(); Err(err.clone()) } - - // Returns true if the currently selected subchannel's address is still - // present in the new address list. - fn sticky(&self, new_addresses: &[Address]) -> bool { - self.selected - .as_ref() - .map(|sc| new_addresses.contains(&sc.address())) - .unwrap_or(false) - } - - // Cancels the connection 'Happy Eyeballs' timer if it is active. - fn abort_happy_eyeballs_timer(&mut self) { - if let Some(handle) = self.timer_handle.take() { - handle.abort(); - } - } } impl LbPolicy for PickFirstPolicy { @@ -507,14 +490,24 @@ impl LbPolicy for PickFirstPolicy { config: Option<&Self::LbConfig>, channel_controller: &mut dyn ChannelController, ) -> Result<(), String> { - self.abort_happy_eyeballs_timer(); + self.timer = None; // Reset steady state on new update self.steady_state = None; match update.endpoints { Ok(endpoints) => { - let new_addresses = self.compile_address(endpoints, config, channel_controller)?; + let new_addresses = self.compile_address(endpoints, config, channel_controller); + // If we have no addresses, clear subchannels and set TRANSIENT_FAILURE. + if new_addresses.is_empty() { + self.subchannels.clear(); + self.selected = None; + self.set_transient_failure( + channel_controller, + Some("empty address list".to_string()), + )?; + } + if let Some(ready_subchannel) = self.rebuild_subchannels(new_addresses, channel_controller) { @@ -576,13 +569,13 @@ impl LbPolicy for PickFirstPolicy { } (false, _) => { // Always capture freshest unselected error. - if state.connectivity_state == ConnectivityState::TransientFailure { - if let Some(err) = &state.last_connection_error { - self.last_connection_error = Some(err.clone()); - } + if state.connectivity_state == ConnectivityState::TransientFailure + && let Some(err) = &state.last_connection_error + { + self.last_connection_error = Some(err.clone()); } - // Track progress based on whether we are connection pass. + // Track progress based on whether we are in a connection pass. if let Some(steady) = self.steady_state.as_mut() { steady.subchannel_nonready(channel_controller, subchannel, state); } else { @@ -594,17 +587,13 @@ impl LbPolicy for PickFirstPolicy { fn work(&mut self, channel_controller: &mut dyn ChannelController) { if self.connectivity_state == ConnectivityState::Idle { + // TODO: is it safe to assume any call to work() while idle means we + // should connect? self.exit_idle(channel_controller); - } else if self.connectivity_state == ConnectivityState::Connecting { - // Check if timer expired - if self.timer_expired.load(Ordering::SeqCst) { - self.timer_expired.store(false, Ordering::SeqCst); // Reset - - // Advance frontier and trigger next connection. - if let Some(next_sc) = self.advance_frontier(false) { - let next_sc = next_sc.clone(); - self.trigger_subchannel_connection(next_sc, channel_controller); - } + } else if self.timer.as_ref().is_some_and(|t| t.expired()) { + // Advance frontier and trigger next connection. + if let Some(next_sc) = self.advance_frontier(false) { + self.trigger_subchannel_connection(next_sc, channel_controller); } } } @@ -614,9 +603,35 @@ impl LbPolicy for PickFirstPolicy { } } -impl Drop for PickFirstPolicy { +/// Implements the happy eyeballs timer task. `expired` becomes set when it +/// fires. When dropped, the timer is cancelled. +struct Timer { + expired: Arc, + handle: BoxedTaskHandle, +} + +impl Timer { + /// Starts a new timer, returning it. + fn start(runtime: GrpcRuntime, work_scheduler: Arc) -> Self { + let expired = Arc::new(AtomicBool::new(false)); + let expired_clone = expired.clone(); + let handle = runtime.clone().spawn(Box::pin(async move { + runtime.sleep(std::time::Duration::from_millis(250)).await; + expired_clone.store(true, Ordering::SeqCst); + work_scheduler.schedule_work(); + })); + Self { expired, handle } + } + + /// Returns whether the timer has expired yet. + fn expired(&self) -> bool { + self.expired.load(Ordering::SeqCst) + } +} + +impl Drop for Timer { fn drop(&mut self) { - self.abort_happy_eyeballs_timer(); + self.handle.abort(); } } @@ -637,12 +652,24 @@ impl Picker for OneSubchannelPicker { #[derive(Debug)] struct IdlePicker { + triggered_work: AtomicBool, work_scheduler: Arc, } +impl IdlePicker { + fn new(work_scheduler: Arc) -> Self { + Self { + triggered_work: AtomicBool::new(false), + work_scheduler, + } + } +} + impl Picker for IdlePicker { fn pick(&self, _: &RequestHeaders) -> PickResult { - self.work_scheduler.schedule_work(); + if !self.triggered_work.swap(true, Ordering::SeqCst) { + self.work_scheduler.schedule_work(); + } PickResult::Queue } } @@ -705,12 +732,13 @@ impl SteadyState { #[cfg(test)] mod test { + use std::sync::mpsc; + use std::time::Duration; + use super::*; use crate::client::load_balancing::test_utils::{ TestChannelController, TestEvent, TestWorkScheduler, }; - use std::sync::mpsc; - use std::time::Duration; // Helper to create endpoints from a list of address strings. // If attrs are provided, they will be added to each endpoint; otherwise, @@ -1166,7 +1194,10 @@ mod test { // Simulate timer expiration by setting the flag directly. policy - .timer_expired + .timer + .as_ref() + .unwrap() + .expired .store(true, std::sync::atomic::Ordering::SeqCst); // Manually call work() to process the timer expiration. @@ -1436,7 +1467,12 @@ mod test { // 1. Simulate Happy Eyeballs timer firing to launch parallel connection // to addr2. - policy.timer_expired.store(true, Ordering::SeqCst); + policy + .timer + .as_ref() + .unwrap() + .expired + .store(true, Ordering::SeqCst); policy.work(controller.as_mut()); match rx.recv().unwrap() { From 9740828058e11f8d4bb9de5f92bc08878ee83006 Mon Sep 17 00:00:00 2001 From: Doug Fawley Date: Wed, 20 May 2026 13:28:52 -0700 Subject: [PATCH 27/32] ordering::relaxed is fine for this --- grpc/src/client/load_balancing/pick_first.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/grpc/src/client/load_balancing/pick_first.rs b/grpc/src/client/load_balancing/pick_first.rs index c47ec66a1..d5e5b0f2f 100644 --- a/grpc/src/client/load_balancing/pick_first.rs +++ b/grpc/src/client/load_balancing/pick_first.rs @@ -667,7 +667,7 @@ impl IdlePicker { impl Picker for IdlePicker { fn pick(&self, _: &RequestHeaders) -> PickResult { - if !self.triggered_work.swap(true, Ordering::SeqCst) { + if !self.triggered_work.swap(true, Ordering::Relaxed) { self.work_scheduler.schedule_work(); } PickResult::Queue From 5036188fc38e8816a0fd079894f83ae35bc6816c Mon Sep 17 00:00:00 2001 From: Doug Fawley Date: Thu, 21 May 2026 09:04:33 -0700 Subject: [PATCH 28/32] test fixes --- grpc/src/client/load_balancing/pick_first.rs | 431 ++++++++++++------- 1 file changed, 269 insertions(+), 162 deletions(-) diff --git a/grpc/src/client/load_balancing/pick_first.rs b/grpc/src/client/load_balancing/pick_first.rs index d5e5b0f2f..238d9cb3a 100644 --- a/grpc/src/client/load_balancing/pick_first.rs +++ b/grpc/src/client/load_balancing/pick_first.rs @@ -197,7 +197,7 @@ impl PickFirstPolicy { fn subchannel_drop(&mut self, channel_controller: &mut dyn ChannelController) { self.selected = None; self.update_picker( - ConnectivityState::Connecting, + ConnectivityState::Idle, Arc::new(IdlePicker::new(self.work_scheduler.clone())), channel_controller, ); @@ -740,6 +740,8 @@ mod test { TestChannelController, TestEvent, TestWorkScheduler, }; + const DEFAULT_TEST_DURATION: Duration = Duration::from_secs(10); + // Helper to create endpoints from a list of address strings. // If attrs are provided, they will be added to each endpoint; otherwise, // default attributes will be used. @@ -788,10 +790,68 @@ mod test { (rx, policy, controller) } + async fn recv_async(rx: &mpsc::Receiver) -> TestEvent { + let start = std::time::Instant::now(); + loop { + match rx.try_recv() { + Ok(event) => return event, + Err(mpsc::TryRecvError::Empty) => { + if start.elapsed() > DEFAULT_TEST_DURATION { + panic!("timed out waiting for event after {DEFAULT_TEST_DURATION:?}"); + } + tokio::time::sleep(Duration::from_millis(10)).await; + } + Err(mpsc::TryRecvError::Disconnected) => { + panic!("channel disconnected"); + } + } + } + } + + async fn expect_new_subchannel(rx: &mpsc::Receiver) -> Arc { + let event = recv_async(rx).await; + match event { + TestEvent::NewSubchannel(sc) => sc, + other => panic!("expected NewSubchannel, got {:?}", other), + } + } + + async fn expect_connect(rx: &mpsc::Receiver) -> Address { + let event = recv_async(rx).await; + match event { + TestEvent::Connect(addr) => addr, + other => panic!("expected Connect, got {:?}", other), + } + } + + async fn expect_picker_update(rx: &mpsc::Receiver) -> LbState { + let event = recv_async(rx).await; + match event { + TestEvent::UpdatePicker(state) => state, + other => panic!("expected UpdatePicker, got {:?}", other), + } + } + + async fn expect_request_resolution(rx: &mpsc::Receiver) { + let event = recv_async(rx).await; + match event { + TestEvent::RequestResolution => {} + other => panic!("expected RequestResolution, got {:?}", other), + } + } + + async fn expect_schedule_work(rx: &mpsc::Receiver) { + let event = recv_async(rx).await; + match event { + TestEvent::ScheduleWork => {} + other => panic!("expected ScheduleWork, got {:?}", other), + } + } + // Helper to simulate a basic connection against a list of // addresses. Returns the event receiver for inspection. Does not imply // that the connection succeeded or failed. - fn simulate_connection( + async fn simulate_connection( addrs: Vec<&str>, attrs: Option, ) -> ( @@ -813,15 +873,19 @@ mod test { ) .unwrap(); - // Expect NewSubchannel/addr, Connect, UpdatePicker(Connecting). - for _ in 0..(addrs_len + 2) { - rx.recv().unwrap(); + for _ in 0..addrs_len { + expect_new_subchannel(&rx).await; } + expect_connect(&rx).await; + + let state = expect_picker_update(&rx).await; + assert_eq!(state.connectivity_state, ConnectivityState::Connecting); + (rx, policy, controller) } - fn simulate_successful_connection( + async fn simulate_successful_connection( addrs: Vec<&str>, attrs: Option, ) -> ( @@ -829,7 +893,7 @@ mod test { PickFirstPolicy, Box, ) { - let (rx, mut policy, mut controller) = simulate_connection(addrs, attrs); + let (rx, mut policy, mut controller) = simulate_connection(addrs, attrs).await; // Simulating READY for addr1. let sc1 = policy.subchannels[0].clone(); @@ -844,7 +908,7 @@ mod test { (rx, policy, controller) } - fn simulate_failed_connection( + async fn simulate_failed_connection( addrs: Vec<&str>, attrs: Option, ) -> ( @@ -852,7 +916,7 @@ mod test { PickFirstPolicy, Box, ) { - let (rx, mut policy, mut controller) = simulate_connection(addrs, attrs); + let (rx, mut policy, mut controller) = simulate_connection(addrs, attrs).await; // Simulating TransientFailure for addr1. let sc1 = policy.subchannels[0].clone(); @@ -872,21 +936,17 @@ mod test { #[tokio::test] async fn test_pick_first_basic_connection() { let addrs = vec!["addr1", "addr2"]; - let (rx, _, _) = simulate_successful_connection(addrs, None); + let (rx, _, _) = simulate_successful_connection(addrs, None).await; // Should update picker to READY with sc1. - match rx.recv().unwrap() { - TestEvent::UpdatePicker(state) => { - assert_eq!(state.connectivity_state, ConnectivityState::Ready); - let res = state.picker.pick(&RequestHeaders::default()); - match res { - PickResult::Pick(pick) => { - assert_eq!(pick.subchannel.address().address.to_string(), "addr1") - } - other => panic!("unexpected pick result {:?}", other), - } + let state = expect_picker_update(&rx).await; + assert_eq!(state.connectivity_state, ConnectivityState::Ready); + let res = state.picker.pick(&RequestHeaders::default()); + match res { + PickResult::Pick(pick) => { + assert_eq!(pick.subchannel.address().address.to_string(), "addr1") } - other => panic!("unexpected event {:?}", other), + other => panic!("unexpected pick result {:?}", other), } } @@ -894,13 +954,11 @@ mod test { #[tokio::test] async fn test_pick_first_failover() { let (rx, mut policy, mut controller) = - simulate_failed_connection(vec!["addr1", "addr2"], None); + simulate_failed_connection(vec!["addr1", "addr2"], None).await; // Should connect to addr2. - match rx.recv().unwrap() { - TestEvent::Connect(addr) => assert_eq!(addr.address.to_string(), "addr2"), - other => panic!("unexpected event {:?}", other), - } + let addr = expect_connect(&rx).await; + assert_eq!(addr.address.to_string(), "addr2"); // Simulate addr2 succeeding. let sc2 = policy.subchannels[1].clone(); @@ -913,29 +971,21 @@ mod test { controller.as_mut(), ); - match rx.recv().unwrap() { - TestEvent::UpdatePicker(state) => { - assert_eq!(state.connectivity_state, ConnectivityState::Ready) - } - other => panic!("unexpected event {:?}", other), - } + let state = expect_picker_update(&rx).await; + assert_eq!(state.connectivity_state, ConnectivityState::Ready); } // Ensures that if a subchannel is already selected, and is still present in - // the new resolver update, the LB will keep using it and not switch to a + // the new resolver update, the LB will keep using it and not switch to a // different subchannel. #[tokio::test] async fn test_pick_first_stickiness() { let (rx, mut policy, mut controller) = - simulate_successful_connection(vec!["addr1", "addr2"], None); + simulate_successful_connection(vec!["addr1", "addr2"], None).await; // Expect `UpdatePicker(Ready)`. - match rx.recv().unwrap() { - TestEvent::UpdatePicker(state) => { - assert_eq!(state.connectivity_state, ConnectivityState::Ready) - } - other => panic!("unexpected event {:?}", other), - } + let state = expect_picker_update(&rx).await; + assert_eq!(state.connectivity_state, ConnectivityState::Ready); // New resolver update including addr1. let endpoints_new = create_endpoints(vec!["addr2", "addr1", "addr3"], None); @@ -951,19 +1001,15 @@ mod test { .unwrap(); // Should create new subchannel for addr2 (was cleared by cleanup). - match rx.recv().unwrap() { - TestEvent::NewSubchannel(sc) => assert_eq!(sc.address().address.to_string(), "addr2"), - other => panic!("unexpected event {:?}", other), - } + let sc2 = expect_new_subchannel(&rx).await; + assert_eq!(sc2.address().address.to_string(), "addr2"); // Should create new subchannel for addr3 (was not in previous list). - match rx.recv().unwrap() { - TestEvent::NewSubchannel(sc) => assert_eq!(sc.address().address.to_string(), "addr3"), - other => panic!("unexpected event {:?}", other), - } + let sc3 = expect_new_subchannel(&rx).await; + assert_eq!(sc3.address().address.to_string(), "addr3"); // Should NOT have any more events (no Connect, no UpdatePicker), // because it stuck to the original selected subchannel. - std::thread::sleep(Duration::from_millis(50)); + tokio::time::sleep(Duration::from_millis(50)).await; assert!(rx.try_recv().is_err(), "unexpected event"); assert_eq!( @@ -982,29 +1028,24 @@ mod test { // TransientFailure and request re-resolution. #[tokio::test] async fn test_pick_first_exhaustion() { - let (rx, policy, controller) = simulate_failed_connection(vec!["addr1"], None); + let (rx, policy, controller) = simulate_failed_connection(vec!["addr1"], None).await; // Should update picker to TransientFailure. - match rx.recv().unwrap() { - TestEvent::UpdatePicker(state) => assert_eq!( - state.connectivity_state, - ConnectivityState::TransientFailure - ), - other => panic!("unexpected event {:?}", other), - } + let state = expect_picker_update(&rx).await; + assert_eq!( + state.connectivity_state, + ConnectivityState::TransientFailure + ); // Should request re-resolution. - match rx.recv().unwrap() { - TestEvent::RequestResolution => {} - other => panic!("unexpected event {:?}", other), - } + expect_request_resolution(&rx).await; } // Shuffling and interleaving of addresses is deterministic and correct // based on the provided shuffler and config. #[tokio::test] async fn test_pick_first_shuffling_and_interleaving_deterministic() { - let (_rx, mut policy, mut controller) = setup(); + let (rx, mut policy, mut controller) = setup(); // Enable shuffling in config. let config = PickFirstConfig { @@ -1060,11 +1101,12 @@ mod test { ) .unwrap(); - let resulting_addrs: Vec = policy - .subchannels - .iter() - .map(|sc| sc.address().address.to_string()) - .collect(); + const NUM_ADDRS: usize = 4; + let mut resulting_addrs = Vec::with_capacity(NUM_ADDRS); + for _ in 0..NUM_ADDRS { + let sc = expect_new_subchannel(&rx).await; + resulting_addrs.push(sc.address().address.to_string()); + } // Mock shuffler reverses endpoints: [EP3, EP2, EP1] // EP3: [127.0.0.2] (V4) @@ -1131,11 +1173,13 @@ mod test { .unwrap(); // Should only create subchannels for addr1 and addr2 (2 unique addrs). - rx.recv().unwrap(); // NewSubchannel(addr1) - rx.recv().unwrap(); // NewSubchannel(addr2) + let sc1 = expect_new_subchannel(&rx).await; + assert_eq!(sc1.address().address.to_string(), "addr1"); + let sc2 = expect_new_subchannel(&rx).await; + assert_eq!(sc2.address().address.to_string(), "addr2"); // Verify no 3rd subchannel was created. - std::thread::sleep(Duration::from_millis(50)); + tokio::time::sleep(Duration::from_millis(50)).await; while let Ok(event) = rx.try_recv() { if let TestEvent::NewSubchannel(_) = event { panic!("Duplicate subchannel created"); @@ -1150,9 +1194,12 @@ mod test { #[tokio::test] async fn test_pick_first_empty_update_clears_state() { let (rx, mut policy, mut controller) = - simulate_successful_connection(vec!["addr1", "addr2"], None); - assert_eq!(policy.subchannels.len(), 1); - assert!(policy.selected.is_some()); + simulate_successful_connection(vec!["addr1", "addr2"], None).await; + + // Verify that the policy produced a picker that was READY. + let state = expect_picker_update(&rx).await; + assert_eq!(state.connectivity_state, ConnectivityState::Ready); + while rx.try_recv().is_ok() {} // Send empty update. @@ -1166,31 +1213,24 @@ mod test { ); assert!(res.is_err()); - assert_eq!(policy.subchannels.len(), 0); - assert!(policy.selected.is_none()); // Check picker is in TransientFailure. - match rx.recv().unwrap() { - TestEvent::UpdatePicker(state) => { - assert_eq!( - state.connectivity_state, - ConnectivityState::TransientFailure - ); - } - other => panic!("unexpected event {:?}", other), - } + let state = expect_picker_update(&rx).await; + assert_eq!( + state.connectivity_state, + ConnectivityState::TransientFailure + ); + // Check that re-resolution was requested. - match rx.recv().unwrap() { - TestEvent::RequestResolution => {} - other => panic!("unexpected event {:?}", other), - } + expect_request_resolution(&rx).await; } // If the timer expires during a connection pass, the LB should advance to // the next subchannel and trigger a connection attempt. - #[tokio::test(start_paused = true)] + #[tokio::test] async fn test_pick_first_timer_advancement() { - let (rx, mut policy, mut controller) = simulate_connection(vec!["addr1", "addr2"], None); + let (rx, mut policy, mut controller) = + simulate_connection(vec!["addr1", "addr2"], None).await; // Simulate timer expiration by setting the flag directly. policy @@ -1204,38 +1244,25 @@ mod test { policy.work(controller.as_mut()); // Expect Connect event for addr2 due to timer expiration. - // Loop to check for event without blocking the thread. - let mut found = None; - for _ in 0..10 { - match rx.try_recv() { - Ok(event) => { - found = Some(event); - break; - } - Err(std::sync::mpsc::TryRecvError::Empty) => { - // Yield to runtime to allow timer task to run. - tokio::time::sleep(std::time::Duration::from_millis(10)).await; - } - Err(e) => panic!("error recv: {:?}", e), - } - } - - match found { - Some(TestEvent::Connect(addr)) => assert_eq!(addr.address.to_string(), "addr2"), - other => panic!("unexpected result {:?}", other), - } + let addr = expect_connect(&rx).await; + assert_eq!(addr.address.to_string(), "addr2"); } // If all addresses fail during a connection pass, the LB should enter // steady state and monitor for backoff expiry to retry connections. #[tokio::test] async fn test_pick_first_steady_state_retries() { - let (rx, mut policy, mut controller) = simulate_failed_connection(vec!["addr1"], None); + let (rx, mut policy, mut controller) = + simulate_failed_connection(vec!["addr1"], None).await; let sc1 = policy.subchannels[0].clone(); // Expect UpdatePicker(TransientFailure) and RequestResolution. - rx.recv().unwrap(); - rx.recv().unwrap(); + let state = expect_picker_update(&rx).await; + assert_eq!( + state.connectivity_state, + ConnectivityState::TransientFailure + ); + expect_request_resolution(&rx).await; // Ensure steady state was entered. assert!(policy.steady_state.is_some()); @@ -1251,10 +1278,8 @@ mod test { ); // Should automatically call connect() again. - match rx.recv().unwrap() { - TestEvent::Connect(addr) => assert_eq!(addr.address.to_string(), "addr1"), - other => panic!("unexpected event {:?}", other), - } + let addr = expect_connect(&rx).await; + assert_eq!(addr.address.to_string(), "addr1"); } // If the LB is in steady state, and a new address becomes ready, it should @@ -1264,14 +1289,12 @@ mod test { #[tokio::test] async fn test_pick_first_steady_state_multi_backend() { let (rx, mut policy, mut controller) = - simulate_failed_connection(vec!["addr1", "addr2"], None); + simulate_failed_connection(vec!["addr1", "addr2"], None).await; let sc1 = policy.subchannels[0].clone(); // Should failover to addr2: expect Connect(addr2). - match rx.recv().unwrap() { - TestEvent::Connect(addr) => assert_eq!(addr.address.to_string(), "addr2"), - other => panic!("unexpected event {:?}", other), - } + let addr = expect_connect(&rx).await; + assert_eq!(addr.address.to_string(), "addr2"); // While addr2 is connecting, simulate addr1 going IDLE (backoff over). policy.subchannel_update( @@ -1285,7 +1308,7 @@ mod test { // We should NOT reconnect to addr1 during the first pass. // Wait a bit to ensure no event is sent. - std::thread::sleep(std::time::Duration::from_millis(50)); + tokio::time::sleep(std::time::Duration::from_millis(50)).await; assert!(rx.try_recv().is_err(), "unexpected event"); // Now fail addr2 to complete first pass. @@ -1299,9 +1322,15 @@ mod test { controller.as_mut(), ); - // Expect UpdatePicker(TransientFailure) and RequestResolution. - rx.recv().unwrap(); - rx.recv().unwrap(); + // Expect UpdatePicker(TransientFailure), RequestResolution, and Connect(addr1) from first pass exhaustion. + let state = expect_picker_update(&rx).await; + assert_eq!( + state.connectivity_state, + ConnectivityState::TransientFailure + ); + expect_request_resolution(&rx).await; + let addr = expect_connect(&rx).await; + assert_eq!(addr.address.to_string(), "addr1"); // Confirm LB is in steady state. assert!(policy.steady_state.is_some()); @@ -1317,9 +1346,28 @@ mod test { ); // Now it should automatically call connect() again. - match rx.recv().unwrap() { - TestEvent::Connect(addr) => assert_eq!(addr.address.to_string(), "addr1"), - other => panic!("unexpected event {:?}", other), + let addr = expect_connect(&rx).await; + assert_eq!(addr.address.to_string(), "addr1"); + + // Simulate addr1 successfully connecting and becoming READY. + policy.subchannel_update( + sc1.clone(), + &SubchannelState { + connectivity_state: ConnectivityState::Ready, + last_connection_error: None, + }, + controller.as_mut(), + ); + + // The policy should switch to it immediately (enter READY state). + let state = expect_picker_update(&rx).await; + assert_eq!(state.connectivity_state, ConnectivityState::Ready); + let res = state.picker.pick(&RequestHeaders::default()); + match res { + PickResult::Pick(pick) => { + assert_eq!(pick.subchannel.address().address.to_string(), "addr1"); + } + other => panic!("unexpected pick result {:?}", other), } } @@ -1331,14 +1379,12 @@ mod test { #[tokio::test] async fn test_pick_first_steady_state_stuck_idle_prevention() { let (rx, mut policy, mut controller) = - simulate_failed_connection(vec!["addr1", "addr2"], None); + simulate_failed_connection(vec!["addr1", "addr2"], None).await; let sc1 = policy.subchannels[0].clone(); // Expect Connect(addr2). - match rx.recv().unwrap() { - TestEvent::Connect(addr) => assert_eq!(addr.address.to_string(), "addr2"), - other => panic!("unexpected event {:?}", other), - } + let addr = expect_connect(&rx).await; + assert_eq!(addr.address.to_string(), "addr2"); // Simulate addr1 backing off and transitioning to IDLE early // (while addr2 is still connecting). @@ -1352,7 +1398,7 @@ mod test { ); // Expect NO events yet because first pass is still active. - std::thread::sleep(Duration::from_millis(50)); + tokio::time::sleep(Duration::from_millis(50)).await; assert!(rx.try_recv().is_err(), "unexpected event during first pass"); // Fail addr2 to exhaust the first pass. @@ -1368,15 +1414,17 @@ mod test { // Expect UpdatePicker(TransientFailure) and RequestResolution from // exhaustion. - rx.recv().unwrap(); // UpdatePicker - rx.recv().unwrap(); // RequestResolution + let state = expect_picker_update(&rx).await; + assert_eq!( + state.connectivity_state, + ConnectivityState::TransientFailure + ); + expect_request_resolution(&rx).await; // Expect an immediate Connect(addr1) event triggered by the exhaustion // loop sweeping up the early IDLE subchannel. - match rx.recv().unwrap() { - TestEvent::Connect(addr) => assert_eq!(addr.address.to_string(), "addr1"), - other => panic!("unexpected event post-exhaustion {:?}", other), - } + let addr = expect_connect(&rx).await; + assert_eq!(addr.address.to_string(), "addr1"); } // This test is meant to validate that if a new address with different @@ -1386,7 +1434,7 @@ mod test { #[tokio::test] async fn test_pick_first_address_update_with_attributes() { let addr = "addr1"; - let (rx, mut policy, mut controller) = simulate_connection(vec![addr], None); + let (rx, mut policy, mut controller) = simulate_connection(vec![addr], None).await; // Push same address but with attributes. let attrs = crate::attributes::Attributes::new().add("metadata_value".to_string()); @@ -1426,7 +1474,7 @@ mod test { // unnecessary disruption to active connection attempts. #[tokio::test] async fn test_pick_first_resolver_error_during_connecting() { - let (rx, mut policy, mut controller) = simulate_connection(vec!["addr1"], None); + let (rx, mut policy, mut controller) = simulate_connection(vec!["addr1"], None).await; // Simulate resolver error arriving. let resolver_error = "dns resolution failed".to_string(); @@ -1445,7 +1493,7 @@ mod test { // abort the attempt or force TransientFailure immediately if the load // balancer still has valid addresses. // Expect NO events to be emitted (no UpdatePicker/RequestResolution). - std::thread::sleep(std::time::Duration::from_millis(50)); + tokio::time::sleep(std::time::Duration::from_millis(50)).await; assert!( rx.try_recv().is_err(), "Unexpected event after resolver error" @@ -1463,7 +1511,8 @@ mod test { // before failing the channel. #[tokio::test] async fn test_pick_first_happy_eyeballs_out_of_order_failure() { - let (rx, mut policy, mut controller) = simulate_connection(vec!["addr1", "addr2"], None); + let (rx, mut policy, mut controller) = + simulate_connection(vec!["addr1", "addr2"], None).await; // 1. Simulate Happy Eyeballs timer firing to launch parallel connection // to addr2. @@ -1475,10 +1524,8 @@ mod test { .store(true, Ordering::SeqCst); policy.work(controller.as_mut()); - match rx.recv().unwrap() { - TestEvent::Connect(addr) => assert_eq!(addr.address.to_string(), "addr2"), - other => panic!("unexpected event {:?}", other), - } + let addr = expect_connect(&rx).await; + assert_eq!(addr.address.to_string(), "addr2"); // 2. Simulate addr2 failing first while addr1 is still in flight. let sc2 = policy.subchannels[1].clone(); @@ -1492,7 +1539,7 @@ mod test { ); // Verify policy does NOT enter TransientFailure yet. - std::thread::sleep(Duration::from_millis(50)); + tokio::time::sleep(Duration::from_millis(50)).await; assert!(rx.try_recv().is_err(), "unexpected premature event"); // 3. Simulate addr1 failing. Pass is now fully exhausted. @@ -1506,15 +1553,11 @@ mod test { controller.as_mut(), ); - match rx.recv().unwrap() { - TestEvent::UpdatePicker(state) => { - assert_eq!( - state.connectivity_state, - ConnectivityState::TransientFailure - ); - } - other => panic!("unexpected event {:?}", other), - } + let state = expect_picker_update(&rx).await; + assert_eq!( + state.connectivity_state, + ConnectivityState::TransientFailure + ); } // Freshest Error Caching (Steady State) @@ -1522,11 +1565,16 @@ mod test { // stale connection errors. #[tokio::test] async fn test_pick_first_steady_state_freshest_error() { - let (rx, mut policy, mut controller) = simulate_failed_connection(vec!["addr1"], None); + let (rx, mut policy, mut controller) = + simulate_failed_connection(vec!["addr1"], None).await; // Consume exhaustion events. - rx.recv().unwrap(); - rx.recv().unwrap(); + let state = expect_picker_update(&rx).await; + assert_eq!( + state.connectivity_state, + ConnectivityState::TransientFailure + ); + expect_request_resolution(&rx).await; assert!(policy.steady_state.is_some()); // Simulate background failure during Steady State with net-new error telemetry. @@ -1546,4 +1594,63 @@ mod test { Some("steady state network drop") ); } + + // Tests that when a selected subchannel disconnects (transitions to Idle), + // the policy reports an Idle state and uses an IdlePicker. + // When an RPC occurs, the IdlePicker schedules work, and the policy + // reconnects when the work scheduler runs. + #[tokio::test] + async fn test_pick_first_disconnect_to_idle_and_reconnect() { + let (rx, mut policy, mut controller) = + simulate_successful_connection(vec!["addr1"], None).await; + + // 1. Consume the initial Ready picker update. + let state = expect_picker_update(&rx).await; + assert_eq!(state.connectivity_state, ConnectivityState::Ready); + let res = state.picker.pick(&RequestHeaders::default()); + let sc1 = match res { + PickResult::Pick(pick) => { + assert_eq!(pick.subchannel.address().address.to_string(), "addr1"); + pick.subchannel + } + other => panic!("unexpected pick result {:?}", other), + }; + + // 2. Simulate the subchannel disconnecting (transitioning to Idle). + policy.subchannel_update( + sc1.clone(), + &SubchannelState { + connectivity_state: ConnectivityState::Idle, + last_connection_error: None, + }, + controller.as_mut(), + ); + + // 3. Verify the policy updates the picker to Idle state. + let state = expect_picker_update(&rx).await; + assert_eq!(state.connectivity_state, ConnectivityState::Idle); + let idle_picker = state.picker; + + // At this point, there should be no more events, as we are waiting for an RPC. + tokio::time::sleep(Duration::from_millis(50)).await; + assert!(rx.try_recv().is_err(), "unexpected event"); + + // 4. Simulate an RPC (pick) happening. + let pick_result = idle_picker.pick(&RequestHeaders::default()); + assert!(matches!(pick_result, PickResult::Queue)); + + // 5. The picker should schedule work. + expect_schedule_work(&rx).await; + + // 6. Call work to execute the scheduled connection attempt. + policy.work(controller.as_mut()); + + // 7. Verify that the policy initiates a reconnection to addr1. + let addr = expect_connect(&rx).await; + assert_eq!(addr.address.to_string(), "addr1"); + + // And the picker goes to Connecting. + let state = expect_picker_update(&rx).await; + assert_eq!(state.connectivity_state, ConnectivityState::Connecting); + } } From 24b8f3b2b0d3827ad8bd79d09e57e32c38910977 Mon Sep 17 00:00:00 2001 From: Doug Fawley Date: Thu, 21 May 2026 12:24:30 -0700 Subject: [PATCH 29/32] always use try_recv and no polling --- grpc/src/client/load_balancing/pick_first.rs | 186 +++++++++---------- 1 file changed, 84 insertions(+), 102 deletions(-) diff --git a/grpc/src/client/load_balancing/pick_first.rs b/grpc/src/client/load_balancing/pick_first.rs index 238d9cb3a..a0710c1ae 100644 --- a/grpc/src/client/load_balancing/pick_first.rs +++ b/grpc/src/client/load_balancing/pick_first.rs @@ -790,68 +790,50 @@ mod test { (rx, policy, controller) } - async fn recv_async(rx: &mpsc::Receiver) -> TestEvent { - let start = std::time::Instant::now(); - loop { - match rx.try_recv() { - Ok(event) => return event, - Err(mpsc::TryRecvError::Empty) => { - if start.elapsed() > DEFAULT_TEST_DURATION { - panic!("timed out waiting for event after {DEFAULT_TEST_DURATION:?}"); - } - tokio::time::sleep(Duration::from_millis(10)).await; - } - Err(mpsc::TryRecvError::Disconnected) => { - panic!("channel disconnected"); - } - } - } - } - - async fn expect_new_subchannel(rx: &mpsc::Receiver) -> Arc { - let event = recv_async(rx).await; - match event { - TestEvent::NewSubchannel(sc) => sc, - other => panic!("expected NewSubchannel, got {:?}", other), + fn expect_new_subchannel(rx: &mpsc::Receiver) -> Arc { + match rx.try_recv() { + Ok(TestEvent::NewSubchannel(sc)) => sc, + Ok(other) => panic!("expected NewSubchannel, got {:?}", other), + Err(e) => panic!("expected NewSubchannel, got error: {:?}", e), } } - async fn expect_connect(rx: &mpsc::Receiver) -> Address { - let event = recv_async(rx).await; - match event { - TestEvent::Connect(addr) => addr, - other => panic!("expected Connect, got {:?}", other), + fn expect_connect(rx: &mpsc::Receiver) -> Address { + match rx.try_recv() { + Ok(TestEvent::Connect(addr)) => addr, + Ok(other) => panic!("expected Connect, got {:?}", other), + Err(e) => panic!("expected Connect, got error: {:?}", e), } } - async fn expect_picker_update(rx: &mpsc::Receiver) -> LbState { - let event = recv_async(rx).await; - match event { - TestEvent::UpdatePicker(state) => state, - other => panic!("expected UpdatePicker, got {:?}", other), + fn expect_picker_update(rx: &mpsc::Receiver) -> LbState { + match rx.try_recv() { + Ok(TestEvent::UpdatePicker(state)) => state, + Ok(other) => panic!("expected UpdatePicker, got {:?}", other), + Err(e) => panic!("expected UpdatePicker, got error: {:?}", e), } } - async fn expect_request_resolution(rx: &mpsc::Receiver) { - let event = recv_async(rx).await; - match event { - TestEvent::RequestResolution => {} - other => panic!("expected RequestResolution, got {:?}", other), + fn expect_request_resolution(rx: &mpsc::Receiver) { + match rx.try_recv() { + Ok(TestEvent::RequestResolution) => {} + Ok(other) => panic!("expected RequestResolution, got {:?}", other), + Err(e) => panic!("expected RequestResolution, got error: {:?}", e), } } - async fn expect_schedule_work(rx: &mpsc::Receiver) { - let event = recv_async(rx).await; - match event { - TestEvent::ScheduleWork => {} - other => panic!("expected ScheduleWork, got {:?}", other), + fn expect_schedule_work(rx: &mpsc::Receiver) { + match rx.try_recv() { + Ok(TestEvent::ScheduleWork) => {} + Ok(other) => panic!("expected ScheduleWork, got {:?}", other), + Err(e) => panic!("expected ScheduleWork, got error: {:?}", e), } } // Helper to simulate a basic connection against a list of // addresses. Returns the event receiver for inspection. Does not imply // that the connection succeeded or failed. - async fn simulate_connection( + fn simulate_connection( addrs: Vec<&str>, attrs: Option, ) -> ( @@ -874,18 +856,18 @@ mod test { .unwrap(); for _ in 0..addrs_len { - expect_new_subchannel(&rx).await; + expect_new_subchannel(&rx); } - expect_connect(&rx).await; + expect_connect(&rx); - let state = expect_picker_update(&rx).await; + let state = expect_picker_update(&rx); assert_eq!(state.connectivity_state, ConnectivityState::Connecting); (rx, policy, controller) } - async fn simulate_successful_connection( + fn simulate_successful_connection( addrs: Vec<&str>, attrs: Option, ) -> ( @@ -893,7 +875,7 @@ mod test { PickFirstPolicy, Box, ) { - let (rx, mut policy, mut controller) = simulate_connection(addrs, attrs).await; + let (rx, mut policy, mut controller) = simulate_connection(addrs, attrs); // Simulating READY for addr1. let sc1 = policy.subchannels[0].clone(); @@ -908,7 +890,7 @@ mod test { (rx, policy, controller) } - async fn simulate_failed_connection( + fn simulate_failed_connection( addrs: Vec<&str>, attrs: Option, ) -> ( @@ -916,7 +898,7 @@ mod test { PickFirstPolicy, Box, ) { - let (rx, mut policy, mut controller) = simulate_connection(addrs, attrs).await; + let (rx, mut policy, mut controller) = simulate_connection(addrs, attrs); // Simulating TransientFailure for addr1. let sc1 = policy.subchannels[0].clone(); @@ -936,10 +918,10 @@ mod test { #[tokio::test] async fn test_pick_first_basic_connection() { let addrs = vec!["addr1", "addr2"]; - let (rx, _, _) = simulate_successful_connection(addrs, None).await; + let (rx, _, _) = simulate_successful_connection(addrs, None); // Should update picker to READY with sc1. - let state = expect_picker_update(&rx).await; + let state = expect_picker_update(&rx); assert_eq!(state.connectivity_state, ConnectivityState::Ready); let res = state.picker.pick(&RequestHeaders::default()); match res { @@ -954,10 +936,10 @@ mod test { #[tokio::test] async fn test_pick_first_failover() { let (rx, mut policy, mut controller) = - simulate_failed_connection(vec!["addr1", "addr2"], None).await; + simulate_failed_connection(vec!["addr1", "addr2"], None); // Should connect to addr2. - let addr = expect_connect(&rx).await; + let addr = expect_connect(&rx); assert_eq!(addr.address.to_string(), "addr2"); // Simulate addr2 succeeding. @@ -971,7 +953,7 @@ mod test { controller.as_mut(), ); - let state = expect_picker_update(&rx).await; + let state = expect_picker_update(&rx); assert_eq!(state.connectivity_state, ConnectivityState::Ready); } @@ -981,10 +963,10 @@ mod test { #[tokio::test] async fn test_pick_first_stickiness() { let (rx, mut policy, mut controller) = - simulate_successful_connection(vec!["addr1", "addr2"], None).await; + simulate_successful_connection(vec!["addr1", "addr2"], None); // Expect `UpdatePicker(Ready)`. - let state = expect_picker_update(&rx).await; + let state = expect_picker_update(&rx); assert_eq!(state.connectivity_state, ConnectivityState::Ready); // New resolver update including addr1. @@ -1001,10 +983,10 @@ mod test { .unwrap(); // Should create new subchannel for addr2 (was cleared by cleanup). - let sc2 = expect_new_subchannel(&rx).await; + let sc2 = expect_new_subchannel(&rx); assert_eq!(sc2.address().address.to_string(), "addr2"); // Should create new subchannel for addr3 (was not in previous list). - let sc3 = expect_new_subchannel(&rx).await; + let sc3 = expect_new_subchannel(&rx); assert_eq!(sc3.address().address.to_string(), "addr3"); // Should NOT have any more events (no Connect, no UpdatePicker), @@ -1028,17 +1010,17 @@ mod test { // TransientFailure and request re-resolution. #[tokio::test] async fn test_pick_first_exhaustion() { - let (rx, policy, controller) = simulate_failed_connection(vec!["addr1"], None).await; + let (rx, policy, controller) = simulate_failed_connection(vec!["addr1"], None); // Should update picker to TransientFailure. - let state = expect_picker_update(&rx).await; + let state = expect_picker_update(&rx); assert_eq!( state.connectivity_state, ConnectivityState::TransientFailure ); // Should request re-resolution. - expect_request_resolution(&rx).await; + expect_request_resolution(&rx); } // Shuffling and interleaving of addresses is deterministic and correct @@ -1104,7 +1086,7 @@ mod test { const NUM_ADDRS: usize = 4; let mut resulting_addrs = Vec::with_capacity(NUM_ADDRS); for _ in 0..NUM_ADDRS { - let sc = expect_new_subchannel(&rx).await; + let sc = expect_new_subchannel(&rx); resulting_addrs.push(sc.address().address.to_string()); } @@ -1173,9 +1155,9 @@ mod test { .unwrap(); // Should only create subchannels for addr1 and addr2 (2 unique addrs). - let sc1 = expect_new_subchannel(&rx).await; + let sc1 = expect_new_subchannel(&rx); assert_eq!(sc1.address().address.to_string(), "addr1"); - let sc2 = expect_new_subchannel(&rx).await; + let sc2 = expect_new_subchannel(&rx); assert_eq!(sc2.address().address.to_string(), "addr2"); // Verify no 3rd subchannel was created. @@ -1194,10 +1176,10 @@ mod test { #[tokio::test] async fn test_pick_first_empty_update_clears_state() { let (rx, mut policy, mut controller) = - simulate_successful_connection(vec!["addr1", "addr2"], None).await; + simulate_successful_connection(vec!["addr1", "addr2"], None); // Verify that the policy produced a picker that was READY. - let state = expect_picker_update(&rx).await; + let state = expect_picker_update(&rx); assert_eq!(state.connectivity_state, ConnectivityState::Ready); while rx.try_recv().is_ok() {} @@ -1215,14 +1197,14 @@ mod test { assert!(res.is_err()); // Check picker is in TransientFailure. - let state = expect_picker_update(&rx).await; + let state = expect_picker_update(&rx); assert_eq!( state.connectivity_state, ConnectivityState::TransientFailure ); // Check that re-resolution was requested. - expect_request_resolution(&rx).await; + expect_request_resolution(&rx); } // If the timer expires during a connection pass, the LB should advance to @@ -1230,7 +1212,7 @@ mod test { #[tokio::test] async fn test_pick_first_timer_advancement() { let (rx, mut policy, mut controller) = - simulate_connection(vec!["addr1", "addr2"], None).await; + simulate_connection(vec!["addr1", "addr2"], None); // Simulate timer expiration by setting the flag directly. policy @@ -1244,7 +1226,7 @@ mod test { policy.work(controller.as_mut()); // Expect Connect event for addr2 due to timer expiration. - let addr = expect_connect(&rx).await; + let addr = expect_connect(&rx); assert_eq!(addr.address.to_string(), "addr2"); } @@ -1253,16 +1235,16 @@ mod test { #[tokio::test] async fn test_pick_first_steady_state_retries() { let (rx, mut policy, mut controller) = - simulate_failed_connection(vec!["addr1"], None).await; + simulate_failed_connection(vec!["addr1"], None); let sc1 = policy.subchannels[0].clone(); // Expect UpdatePicker(TransientFailure) and RequestResolution. - let state = expect_picker_update(&rx).await; + let state = expect_picker_update(&rx); assert_eq!( state.connectivity_state, ConnectivityState::TransientFailure ); - expect_request_resolution(&rx).await; + expect_request_resolution(&rx); // Ensure steady state was entered. assert!(policy.steady_state.is_some()); @@ -1278,7 +1260,7 @@ mod test { ); // Should automatically call connect() again. - let addr = expect_connect(&rx).await; + let addr = expect_connect(&rx); assert_eq!(addr.address.to_string(), "addr1"); } @@ -1289,11 +1271,11 @@ mod test { #[tokio::test] async fn test_pick_first_steady_state_multi_backend() { let (rx, mut policy, mut controller) = - simulate_failed_connection(vec!["addr1", "addr2"], None).await; + simulate_failed_connection(vec!["addr1", "addr2"], None); let sc1 = policy.subchannels[0].clone(); // Should failover to addr2: expect Connect(addr2). - let addr = expect_connect(&rx).await; + let addr = expect_connect(&rx); assert_eq!(addr.address.to_string(), "addr2"); // While addr2 is connecting, simulate addr1 going IDLE (backoff over). @@ -1323,13 +1305,13 @@ mod test { ); // Expect UpdatePicker(TransientFailure), RequestResolution, and Connect(addr1) from first pass exhaustion. - let state = expect_picker_update(&rx).await; + let state = expect_picker_update(&rx); assert_eq!( state.connectivity_state, ConnectivityState::TransientFailure ); - expect_request_resolution(&rx).await; - let addr = expect_connect(&rx).await; + expect_request_resolution(&rx); + let addr = expect_connect(&rx); assert_eq!(addr.address.to_string(), "addr1"); // Confirm LB is in steady state. @@ -1346,7 +1328,7 @@ mod test { ); // Now it should automatically call connect() again. - let addr = expect_connect(&rx).await; + let addr = expect_connect(&rx); assert_eq!(addr.address.to_string(), "addr1"); // Simulate addr1 successfully connecting and becoming READY. @@ -1360,7 +1342,7 @@ mod test { ); // The policy should switch to it immediately (enter READY state). - let state = expect_picker_update(&rx).await; + let state = expect_picker_update(&rx); assert_eq!(state.connectivity_state, ConnectivityState::Ready); let res = state.picker.pick(&RequestHeaders::default()); match res { @@ -1379,11 +1361,11 @@ mod test { #[tokio::test] async fn test_pick_first_steady_state_stuck_idle_prevention() { let (rx, mut policy, mut controller) = - simulate_failed_connection(vec!["addr1", "addr2"], None).await; + simulate_failed_connection(vec!["addr1", "addr2"], None); let sc1 = policy.subchannels[0].clone(); // Expect Connect(addr2). - let addr = expect_connect(&rx).await; + let addr = expect_connect(&rx); assert_eq!(addr.address.to_string(), "addr2"); // Simulate addr1 backing off and transitioning to IDLE early @@ -1414,16 +1396,16 @@ mod test { // Expect UpdatePicker(TransientFailure) and RequestResolution from // exhaustion. - let state = expect_picker_update(&rx).await; + let state = expect_picker_update(&rx); assert_eq!( state.connectivity_state, ConnectivityState::TransientFailure ); - expect_request_resolution(&rx).await; + expect_request_resolution(&rx); // Expect an immediate Connect(addr1) event triggered by the exhaustion // loop sweeping up the early IDLE subchannel. - let addr = expect_connect(&rx).await; + let addr = expect_connect(&rx); assert_eq!(addr.address.to_string(), "addr1"); } @@ -1434,7 +1416,7 @@ mod test { #[tokio::test] async fn test_pick_first_address_update_with_attributes() { let addr = "addr1"; - let (rx, mut policy, mut controller) = simulate_connection(vec![addr], None).await; + let (rx, mut policy, mut controller) = simulate_connection(vec![addr], None); // Push same address but with attributes. let attrs = crate::attributes::Attributes::new().add("metadata_value".to_string()); @@ -1474,7 +1456,7 @@ mod test { // unnecessary disruption to active connection attempts. #[tokio::test] async fn test_pick_first_resolver_error_during_connecting() { - let (rx, mut policy, mut controller) = simulate_connection(vec!["addr1"], None).await; + let (rx, mut policy, mut controller) = simulate_connection(vec!["addr1"], None); // Simulate resolver error arriving. let resolver_error = "dns resolution failed".to_string(); @@ -1512,7 +1494,7 @@ mod test { #[tokio::test] async fn test_pick_first_happy_eyeballs_out_of_order_failure() { let (rx, mut policy, mut controller) = - simulate_connection(vec!["addr1", "addr2"], None).await; + simulate_connection(vec!["addr1", "addr2"], None); // 1. Simulate Happy Eyeballs timer firing to launch parallel connection // to addr2. @@ -1524,7 +1506,7 @@ mod test { .store(true, Ordering::SeqCst); policy.work(controller.as_mut()); - let addr = expect_connect(&rx).await; + let addr = expect_connect(&rx); assert_eq!(addr.address.to_string(), "addr2"); // 2. Simulate addr2 failing first while addr1 is still in flight. @@ -1553,7 +1535,7 @@ mod test { controller.as_mut(), ); - let state = expect_picker_update(&rx).await; + let state = expect_picker_update(&rx); assert_eq!( state.connectivity_state, ConnectivityState::TransientFailure @@ -1566,15 +1548,15 @@ mod test { #[tokio::test] async fn test_pick_first_steady_state_freshest_error() { let (rx, mut policy, mut controller) = - simulate_failed_connection(vec!["addr1"], None).await; + simulate_failed_connection(vec!["addr1"], None); // Consume exhaustion events. - let state = expect_picker_update(&rx).await; + let state = expect_picker_update(&rx); assert_eq!( state.connectivity_state, ConnectivityState::TransientFailure ); - expect_request_resolution(&rx).await; + expect_request_resolution(&rx); assert!(policy.steady_state.is_some()); // Simulate background failure during Steady State with net-new error telemetry. @@ -1602,10 +1584,10 @@ mod test { #[tokio::test] async fn test_pick_first_disconnect_to_idle_and_reconnect() { let (rx, mut policy, mut controller) = - simulate_successful_connection(vec!["addr1"], None).await; + simulate_successful_connection(vec!["addr1"], None); // 1. Consume the initial Ready picker update. - let state = expect_picker_update(&rx).await; + let state = expect_picker_update(&rx); assert_eq!(state.connectivity_state, ConnectivityState::Ready); let res = state.picker.pick(&RequestHeaders::default()); let sc1 = match res { @@ -1627,7 +1609,7 @@ mod test { ); // 3. Verify the policy updates the picker to Idle state. - let state = expect_picker_update(&rx).await; + let state = expect_picker_update(&rx); assert_eq!(state.connectivity_state, ConnectivityState::Idle); let idle_picker = state.picker; @@ -1640,17 +1622,17 @@ mod test { assert!(matches!(pick_result, PickResult::Queue)); // 5. The picker should schedule work. - expect_schedule_work(&rx).await; + expect_schedule_work(&rx); // 6. Call work to execute the scheduled connection attempt. policy.work(controller.as_mut()); // 7. Verify that the policy initiates a reconnection to addr1. - let addr = expect_connect(&rx).await; + let addr = expect_connect(&rx); assert_eq!(addr.address.to_string(), "addr1"); // And the picker goes to Connecting. - let state = expect_picker_update(&rx).await; + let state = expect_picker_update(&rx); assert_eq!(state.connectivity_state, ConnectivityState::Connecting); } } From a207faf8a8331ed0d5ad632a6c32c74751320aa7 Mon Sep 17 00:00:00 2001 From: Doug Fawley Date: Thu, 21 May 2026 12:30:57 -0700 Subject: [PATCH 30/32] remove unused tokio functionality --- grpc/Cargo.toml | 2 -- 1 file changed, 2 deletions(-) diff --git a/grpc/Cargo.toml b/grpc/Cargo.toml index 5d0b8a98d..da82e25dc 100644 --- a/grpc/Cargo.toml +++ b/grpc/Cargo.toml @@ -96,8 +96,6 @@ tonic = { version = "0.14.6", path = "../tonic", default-features = false, featu "tls-ring", ] } tonic-prost = { version = "0.14.6", path = "../tonic-prost" } -tokio = { version = "1.37.0", features = ["test-util"] } - [[bench]] name = "metadata" From 1cdebe62766ac42d8568049bbe52739b0f72cf98 Mon Sep 17 00:00:00 2001 From: Doug Fawley Date: Thu, 21 May 2026 12:54:02 -0700 Subject: [PATCH 31/32] fmt --- grpc/src/client/load_balancing/pick_first.rs | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/grpc/src/client/load_balancing/pick_first.rs b/grpc/src/client/load_balancing/pick_first.rs index a0710c1ae..73a8e734c 100644 --- a/grpc/src/client/load_balancing/pick_first.rs +++ b/grpc/src/client/load_balancing/pick_first.rs @@ -1211,8 +1211,7 @@ mod test { // the next subchannel and trigger a connection attempt. #[tokio::test] async fn test_pick_first_timer_advancement() { - let (rx, mut policy, mut controller) = - simulate_connection(vec!["addr1", "addr2"], None); + let (rx, mut policy, mut controller) = simulate_connection(vec!["addr1", "addr2"], None); // Simulate timer expiration by setting the flag directly. policy @@ -1234,8 +1233,7 @@ mod test { // steady state and monitor for backoff expiry to retry connections. #[tokio::test] async fn test_pick_first_steady_state_retries() { - let (rx, mut policy, mut controller) = - simulate_failed_connection(vec!["addr1"], None); + let (rx, mut policy, mut controller) = simulate_failed_connection(vec!["addr1"], None); let sc1 = policy.subchannels[0].clone(); // Expect UpdatePicker(TransientFailure) and RequestResolution. @@ -1493,8 +1491,7 @@ mod test { // before failing the channel. #[tokio::test] async fn test_pick_first_happy_eyeballs_out_of_order_failure() { - let (rx, mut policy, mut controller) = - simulate_connection(vec!["addr1", "addr2"], None); + let (rx, mut policy, mut controller) = simulate_connection(vec!["addr1", "addr2"], None); // 1. Simulate Happy Eyeballs timer firing to launch parallel connection // to addr2. @@ -1547,8 +1544,7 @@ mod test { // stale connection errors. #[tokio::test] async fn test_pick_first_steady_state_freshest_error() { - let (rx, mut policy, mut controller) = - simulate_failed_connection(vec!["addr1"], None); + let (rx, mut policy, mut controller) = simulate_failed_connection(vec!["addr1"], None); // Consume exhaustion events. let state = expect_picker_update(&rx); @@ -1583,8 +1579,7 @@ mod test { // reconnects when the work scheduler runs. #[tokio::test] async fn test_pick_first_disconnect_to_idle_and_reconnect() { - let (rx, mut policy, mut controller) = - simulate_successful_connection(vec!["addr1"], None); + let (rx, mut policy, mut controller) = simulate_successful_connection(vec!["addr1"], None); // 1. Consume the initial Ready picker update. let state = expect_picker_update(&rx); From c0afaaebe8c95775a66915763af5f01dcfd1e097 Mon Sep 17 00:00:00 2001 From: Doug Fawley Date: Thu, 21 May 2026 13:36:59 -0700 Subject: [PATCH 32/32] remove sleeps; they should do nothing --- grpc/src/client/load_balancing/pick_first.rs | 7 ------- 1 file changed, 7 deletions(-) diff --git a/grpc/src/client/load_balancing/pick_first.rs b/grpc/src/client/load_balancing/pick_first.rs index 73a8e734c..e21fa0c38 100644 --- a/grpc/src/client/load_balancing/pick_first.rs +++ b/grpc/src/client/load_balancing/pick_first.rs @@ -991,7 +991,6 @@ mod test { // Should NOT have any more events (no Connect, no UpdatePicker), // because it stuck to the original selected subchannel. - tokio::time::sleep(Duration::from_millis(50)).await; assert!(rx.try_recv().is_err(), "unexpected event"); assert_eq!( @@ -1161,7 +1160,6 @@ mod test { assert_eq!(sc2.address().address.to_string(), "addr2"); // Verify no 3rd subchannel was created. - tokio::time::sleep(Duration::from_millis(50)).await; while let Ok(event) = rx.try_recv() { if let TestEvent::NewSubchannel(_) = event { panic!("Duplicate subchannel created"); @@ -1288,7 +1286,6 @@ mod test { // We should NOT reconnect to addr1 during the first pass. // Wait a bit to ensure no event is sent. - tokio::time::sleep(std::time::Duration::from_millis(50)).await; assert!(rx.try_recv().is_err(), "unexpected event"); // Now fail addr2 to complete first pass. @@ -1378,7 +1375,6 @@ mod test { ); // Expect NO events yet because first pass is still active. - tokio::time::sleep(Duration::from_millis(50)).await; assert!(rx.try_recv().is_err(), "unexpected event during first pass"); // Fail addr2 to exhaust the first pass. @@ -1473,7 +1469,6 @@ mod test { // abort the attempt or force TransientFailure immediately if the load // balancer still has valid addresses. // Expect NO events to be emitted (no UpdatePicker/RequestResolution). - tokio::time::sleep(std::time::Duration::from_millis(50)).await; assert!( rx.try_recv().is_err(), "Unexpected event after resolver error" @@ -1518,7 +1513,6 @@ mod test { ); // Verify policy does NOT enter TransientFailure yet. - tokio::time::sleep(Duration::from_millis(50)).await; assert!(rx.try_recv().is_err(), "unexpected premature event"); // 3. Simulate addr1 failing. Pass is now fully exhausted. @@ -1609,7 +1603,6 @@ mod test { let idle_picker = state.picker; // At this point, there should be no more events, as we are waiting for an RPC. - tokio::time::sleep(Duration::from_millis(50)).await; assert!(rx.try_recv().is_err(), "unexpected event"); // 4. Simulate an RPC (pick) happening.