diff --git a/manager/state/raft/transport/peer.go b/manager/state/raft/transport/peer.go index 1c79eae54..7b5efe156 100644 --- a/manager/state/raft/transport/peer.go +++ b/manager/state/raft/transport/peer.go @@ -172,6 +172,10 @@ func splitSnapshotData(_ context.Context, m *raftpb.Message) []api.StreamRaftMes } raftMsg := *m + // Clone Snapshot so that re-slicing Snapshot.Data below + // does not mutate m.Snapshot.Data through the shared pointer. + snap := *m.Snapshot + raftMsg.Snapshot = &snap // sub-slice for this snapshot chunk. raftMsg.Snapshot.Data = m.Snapshot.Data[snapDataIndex : snapDataIndex+chunkSize] diff --git a/manager/state/raft/transport/transport_test.go b/manager/state/raft/transport/transport_test.go index 14e4550e5..46e376b7b 100644 --- a/manager/state/raft/transport/transport_test.go +++ b/manager/state/raft/transport/transport_test.go @@ -105,6 +105,51 @@ func testSend(ctx context.Context, c *mockCluster, from uint64, to []uint64, msg } } +// TestSplitSnapshotDataDoesNotMutateInput is a regression test for #3231. +// Before the fix, splitSnapshotData did a shallow copy of the raft message +// and re-sliced the shared Snapshot.Data on each iteration, shrinking the +// original slice's capacity and eventually panicking with +// "slice bounds out of range". +func TestSplitSnapshotDataDoesNotMutateInput(t *testing.T) { + ctx := context.Background() + + // Build a MsgSnap whose Snapshot.Data clearly exceeds GRPCMaxMsgSize so + // that the split loop runs multiple iterations (where the bug manifests). + const dataSize = 3 * GRPCMaxMsgSize + data := make([]byte, dataSize) + for i := range data { + data[i] = byte(i % (1 << 8)) + } + m := raftpb.Message{ + Type: raftpb.MsgSnap, + From: 1, + To: 2, + Snapshot: &raftpb.Snapshot{ + Data: data, + Metadata: raftpb.SnapshotMetadata{ + Index: uint64(len(data)), + }, + }, + } + origData := m.Snapshot.Data + origLen, origCap := len(origData), cap(origData) + + msgs := splitSnapshotData(ctx, &m) + require.Greater(t, len(msgs), 1, "data larger than GRPCMaxMsgSize must split into multiple chunks") + + // Chunks must reassemble to the original data. + var assembled []byte + for _, msg := range msgs { + assembled = append(assembled, msg.Message.Snapshot.Data...) + } + assert.Equal(t, data, assembled) + + // The input message's Snapshot.Data must be untouched (regression guard). + assert.Equal(t, origLen, len(m.Snapshot.Data)) + assert.Equal(t, origCap, cap(m.Snapshot.Data)) + assert.Equal(t, data, m.Snapshot.Data) +} + func TestSend(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) c := newCluster()