From 05c375b85ebd0ef970b8685258d18c071ab36b92 Mon Sep 17 00:00:00 2001 From: Jason Lynch Date: Fri, 24 Apr 2026 13:19:36 -0400 Subject: [PATCH] fix: embedded etcd should only connect to self Fixes a bug where the etcd clients in hosts with embedded etcd were configured to connect to all cluster members that existed when the client was initialized. This was the original intent and functionality, but I changed it while implementing support for remote Etcd. I think this was just an accidental inclusion from a different implementation of the remote Etcd feature. PLAT-581 --- clustertest/add_remove_host_test.go | 23 +++++++++++++++++++++++ clustertest/cluster_test.go | 6 +++--- server/internal/etcd/embedded.go | 3 ++- 3 files changed, 28 insertions(+), 4 deletions(-) diff --git a/clustertest/add_remove_host_test.go b/clustertest/add_remove_host_test.go index 07055010..55b7fbe4 100644 --- a/clustertest/add_remove_host_test.go +++ b/clustertest/add_remove_host_test.go @@ -3,6 +3,7 @@ package clustertest import ( + "fmt" "testing" "time" @@ -198,3 +199,25 @@ func TestForcedHostRemovalWithDatabase(t *testing.T) { tLog(t, "test completed successfully") } + +func TestRollingAddRemove(t *testing.T) { + // Regression test for PLAT-581 + t.Parallel() + + // Initialize a four-host cluster, then remove host-1, and add another host. + // The order of adds is important because this bug originates in the + // endpoint list for host-2+, so we add each host individually. + cluster := NewCluster(t, ClusterConfig{ + Hosts: []HostConfig{ + {ID: "host-1"}, + }, + }) + cluster.Init(t) + for i := 2; i <= 4; i++ { + cluster.Add(t, HostConfig{ID: fmt.Sprintf("host-%d", i)}) + cluster.Init(t) + } + cluster.Remove(t, "host-1") + cluster.Add(t, HostConfig{ID: "host-5"}) + cluster.Init(t) +} diff --git a/clustertest/cluster_test.go b/clustertest/cluster_test.go index b2fd5dd6..1ff7fb78 100644 --- a/clustertest/cluster_test.go +++ b/clustertest/cluster_test.go @@ -101,6 +101,9 @@ func (c *Cluster) Add(t testing.TB, hostCfg HostConfig) { func (c *Cluster) Remove(t testing.TB, hostID string) { t.Helper() + delete(c.hosts, hostID) + c.client = hostsClient(t, c.hosts) + resp, err := c.client.RemoveHost(t.Context(), &controlplane.RemoveHostPayload{ HostID: controlplane.Identifier(hostID), }) @@ -114,9 +117,6 @@ func (c *Cluster) Remove(t testing.TB, hostID string) { TaskID: resp.Task.TaskID, }) require.NoError(t, err) - - delete(c.hosts, hostID) - c.client = hostsClient(t, c.hosts) } // RefreshClient recreates the client with updated host configurations. diff --git a/server/internal/etcd/embedded.go b/server/internal/etcd/embedded.go index 8b46626a..529a1e39 100644 --- a/server/internal/etcd/embedded.go +++ b/server/internal/etcd/embedded.go @@ -409,7 +409,8 @@ func (e *EmbeddedEtcd) GetClient() (*clientv3.Client, error) { } cfg := e.cfg.Config() - clientCfg, err := clientConfig(cfg, e.logger, e.etcd.Server.Cluster().ClientURLs()...) + // We only want to connect to our own Etcd endpoint. + clientCfg, err := clientConfig(cfg, e.logger, e.ClientEndpoints()...) if err != nil { return nil, fmt.Errorf("failed to get client config: %w", err) }