Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion manager/state/raft/storage/walwrap.go
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,11 @@ func ReadRepairWAL(
return nil, WALData{}, errors.Wrap(err, "failed to decrypt WAL")
}
// we can only repair ErrUnexpectedEOF and we never repair twice.
if repaired || err != io.ErrUnexpectedEOF {
if repaired || !errors.Is(err, io.ErrUnexpectedEOF) {
// TODO(thaJeztah): should ReadRepairWAL be updated to handle cases where
// some (last) of the files cannot be recovered? ("best effort" recovery?)
// Or should an informative error be produced to help the user (which could
// mean: remove the last file?). See TestReadRepairWAL for more details.
return nil, WALData{}, errors.Wrap(err, "irreparable WAL error")
}
if !wal.Repair(nil, walDir) {
Expand Down
24 changes: 18 additions & 6 deletions manager/state/raft/storage/walwrap_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -231,12 +231,24 @@ func TestReadRepairWAL(t *testing.T) {
require.NoError(t, err)
require.Len(t, files, 1)

fName := filepath.Join(tempdir, files[0].Name())
fileContents, err := os.ReadFile(fName)
require.NoError(t, err)
info, err := files[0].Info()
require.NoError(t, err)
require.NoError(t, os.WriteFile(fName, fileContents[:200], info.Mode()))
Comment on lines -234 to -239
Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Replaced this with os.Truncate() below, which was shorter, and more descriptive what we're doing 😅

// Corrupt the file by truncating it to replicate a broken state (partial
// write), but for this test, make sure that there's enough bytes left to
// read a record (24 bytes). The record size (recBytes) is calculated here;
//
// - https://github.com/etcd-io/etcd/blob/621cd7b9e5aa2ccf634b555e4ebe0037b8975066/server/wal/decoder.go#L83
// - https://github.com/etcd-io/etcd/blob/621cd7b9e5aa2ccf634b555e4ebe0037b8975066/server/wal/decoder.go#L122-L131
//
// Using a shorter length will make the test fail:
//
// wal: max entry size limit exceeded, recBytes: 24, fileSize(200) - offset(184) - padBytes(0) = entryLimit(16)
//
// So the file should be >= 208 bytes.
//
// For further details, see:
//
// - https://github.com/etcd-io/etcd/commit/621cd7b9e5aa2ccf634b555e4ebe0037b8975066 / https://github.com/etcd-io/etcd/pull/14127 (backport of https://github.com/etcd-io/etcd/pull/14122)
// - https://github.com/etcd-io/etcd/issues/14114
require.NoError(t, os.Truncate(filepath.Join(tempdir, files[0].Name()), 300))
Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Effectively the fix is s/200/300/


ogWAL, err := OriginalWAL.Open(tempdir, snapshot)
require.NoError(t, err)
Expand Down