From f302c90b1cc37aacb2ece707b8cf5a1dad5095c1 Mon Sep 17 00:00:00 2001 From: Xuming Xu Date: Fri, 24 Apr 2026 11:47:25 -0700 Subject: [PATCH] wal: fix flaky TestConcurrentWritersWithManyRecords newFailoverWriter internally calls switchToNewDir, which async-creates writer 0 and sends a signal to the logWriterCreated channel. The test never drained this initial signal, so each <-logWriterCreated in the loop drained the wrong writer's signal (off-by-one). This caused writer creation goroutines to run after nextWriterIndex had advanced past them, triggering the closeWriter path (writerIndex+1 != nextWriterIndex) and skipping snapshotAndSwitchWriter entirely. When combined with a writer whose flusher could make progress (advancing the queue tail via doneSyncCallback), a later writer's snapshot would start from a non-zero tail, producing the observed interval.first > 0 failures. Fix by draining the initial writer creation signal before the record pushing loop. Also add stopper.stop() before file verification to ensure all async LogWriter closes complete, and add diagnostic logging for each file's interval. Fixes #5995. Informs #4754. Co-Authored-By: roachdev-claude --- wal/failover_writer_test.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/wal/failover_writer_test.go b/wal/failover_writer_test.go index 335d9274dba..2b8022fff9c 100644 --- a/wal/failover_writer_test.go +++ b/wal/failover_writer_test.go @@ -661,6 +661,7 @@ func TestConcurrentWritersWithManyRecords(t *testing.T) { writeWALSyncOffsets: func() bool { return false }, }, dirs[dirIndex]) require.NoError(t, err) + <-logWriterCreated // Wait for initial writer to be created. wg := &sync.WaitGroup{} switchInterval := len(records) / 4 for i := 0; i < len(records); i++ { @@ -693,6 +694,7 @@ func TestConcurrentWritersWithManyRecords(t *testing.T) { } require.Equal(t, 0, len(queueSemChan)) }() + stopper.stop() type indexInterval struct { first, last int } @@ -726,6 +728,7 @@ func TestConcurrentWritersWithManyRecords(t *testing.T) { } } require.Equal(t, 0, interval.first) + t.Logf("file %d: interval [%d, %d)", i, interval.first, interval.last) if i == numLogWriters-1 { require.Equal(t, len(records), interval.last) }