From d3915c37dd0ebd73c6d94d5012b3dff14d066978 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Petr=20Van=C4=9Bk?= Date: Sat, 11 Apr 2026 23:47:05 +0200 Subject: [PATCH 1/3] fix(uffd): retry failed page faults via UFFDIO_WAKE instead of killing sandbox Previously, a single failed source.Slice in faultPage immediately fired SignalExit, irreversibly killing the entire sandbox. This was the root cause of production UFFD failures when GCS reads timed out. Add UFFDIO_WAKE support and use it to retry transient storage failures: 1. On source.Slice failure, increment a per-address retry counter 2. If under maxFaultRetries (3): call UFFDIO_WAKE to wake the faulting guest thread without resolving the fault - the kernel re-delivers the page fault as a fresh message, freeing the current goroutine 3. If all retries exhausted: call SignalExit as before (sandbox teardown) 4. On success: clear retry state for that address This is the proper kernel-level retry mechanism - UFFDIO_WAKE tells the kernel 'I cannot serve this page right now', and the kernel naturally re-delivers the fault. No goroutine slots are held during the retry window. --- .../pkg/sandbox/uffd/userfaultfd/fd.go | 15 ++++++++ .../sandbox/uffd/userfaultfd/userfaultfd.go | 37 ++++++++++++++++++- 2 files changed, 51 insertions(+), 1 deletion(-) diff --git a/packages/orchestrator/pkg/sandbox/uffd/userfaultfd/fd.go b/packages/orchestrator/pkg/sandbox/uffd/userfaultfd/fd.go index 75a464c95c..fe3c655c9d 100644 --- a/packages/orchestrator/pkg/sandbox/uffd/userfaultfd/fd.go +++ b/packages/orchestrator/pkg/sandbox/uffd/userfaultfd/fd.go @@ -44,6 +44,7 @@ const ( UFFDIO_API = C.UFFDIO_API UFFDIO_REGISTER = C.UFFDIO_REGISTER UFFDIO_UNREGISTER = C.UFFDIO_UNREGISTER + UFFDIO_WAKE = C.UFFDIO_WAKE UFFDIO_COPY = C.UFFDIO_COPY UFFD_PAGEFAULT_FLAG_WRITE = C.UFFD_PAGEFAULT_FLAG_WRITE @@ -130,6 +131,20 @@ func (f Fd) copy(addr, pagesize uintptr, data []byte, mode CULong) error { return nil } +// wake wakes threads waiting on page faults in the given address range +// without resolving the fault. The woken threads will re-execute the +// faulting instruction, triggering a new page fault that will be +// delivered as a fresh message on the uffd fd. +func (f Fd) wake(addr, pagesize uintptr) error { + r := newUffdioRange(CULong(addr)&^CULong(pagesize-1), CULong(pagesize)) + + if _, _, errno := syscall.Syscall(syscall.SYS_IOCTL, uintptr(f), UFFDIO_WAKE, uintptr(unsafe.Pointer(&r))); errno != 0 { + return errno + } + + return nil +} + func (f Fd) close() error { return syscall.Close(int(f)) } diff --git a/packages/orchestrator/pkg/sandbox/uffd/userfaultfd/userfaultfd.go b/packages/orchestrator/pkg/sandbox/uffd/userfaultfd/userfaultfd.go index 51bcc4f17d..e15b8d2837 100644 --- a/packages/orchestrator/pkg/sandbox/uffd/userfaultfd/userfaultfd.go +++ b/packages/orchestrator/pkg/sandbox/uffd/userfaultfd/userfaultfd.go @@ -5,6 +5,7 @@ import ( "errors" "fmt" "sync" + "sync/atomic" "syscall" "unsafe" @@ -23,7 +24,13 @@ import ( var tracer = otel.Tracer("github.com/e2b-dev/infra/packages/orchestrator/pkg/sandbox/uffd/userfaultfd") -const maxRequestsInProgress = 4096 +const ( + maxRequestsInProgress = 4096 + // maxFaultRetries is the number of times a page fault can be retried via + // UFFDIO_WAKE before giving up. Each retry releases the goroutine and + // lets the kernel re-deliver the fault as a fresh message. + maxFaultRetries = 3 +) var ErrUnexpectedEventType = errors.New("unexpected event type") @@ -48,6 +55,10 @@ type Userfaultfd struct { wg errgroup.Group + // faultRetries tracks how many times each page address has been retried + // via UFFDIO_WAKE. Key is page-aligned address, value is *atomic.Int32. + faultRetries sync.Map + logger logger.Logger } @@ -333,6 +344,28 @@ func (u *Userfaultfd) faultPage( b, dataErr := source.Slice(ctx, offset, int64(pagesize)) if dataErr != nil { + retryVal, _ := u.faultRetries.LoadOrStore(addr, &atomic.Int32{}) + retries := retryVal.(*atomic.Int32) + attempt := int(retries.Add(1)) + + if attempt <= maxFaultRetries { + u.logger.Warn(ctx, "UFFD serve data fetch failed, waking for retry", + zap.Int("attempt", attempt), + zap.Int("max_retries", maxFaultRetries), + zap.Int64("offset", offset), + zap.Uintptr("addr", addr), + zap.Error(dataErr), + ) + + if wakeErr := u.fd.wake(addr, pagesize); wakeErr != nil { + u.logger.Error(ctx, "UFFD wake failed", zap.Uintptr("addr", addr), zap.Error(wakeErr)) + } else { + return nil + } + } + + u.faultRetries.Delete(addr) + var signalErr error if onFailure != nil { signalErr = onFailure() @@ -346,6 +379,8 @@ func (u *Userfaultfd) faultPage( return fmt.Errorf("failed to read from source: %w", joinedErr) } + u.faultRetries.Delete(addr) + var copyMode CULong // Performing copy() on UFFD clears the WP bit unless we explicitly tell From 685535496c063e53c59184c507a0be9886f58438 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Petr=20Van=C4=9Bk?= Date: Mon, 13 Apr 2026 10:58:35 +0200 Subject: [PATCH 2/3] style(uffd): simplify if-else to early return per linter --- .../pkg/sandbox/uffd/userfaultfd/userfaultfd.go | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/packages/orchestrator/pkg/sandbox/uffd/userfaultfd/userfaultfd.go b/packages/orchestrator/pkg/sandbox/uffd/userfaultfd/userfaultfd.go index e15b8d2837..562de5bb64 100644 --- a/packages/orchestrator/pkg/sandbox/uffd/userfaultfd/userfaultfd.go +++ b/packages/orchestrator/pkg/sandbox/uffd/userfaultfd/userfaultfd.go @@ -357,11 +357,12 @@ func (u *Userfaultfd) faultPage( zap.Error(dataErr), ) - if wakeErr := u.fd.wake(addr, pagesize); wakeErr != nil { - u.logger.Error(ctx, "UFFD wake failed", zap.Uintptr("addr", addr), zap.Error(wakeErr)) - } else { + wakeErr := u.fd.wake(addr, pagesize) + if wakeErr == nil { return nil } + + u.logger.Error(ctx, "UFFD wake failed", zap.Uintptr("addr", addr), zap.Error(wakeErr)) } u.faultRetries.Delete(addr) From f1255f2146b169a73b12e1e400035a232cda036a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Petr=20Van=C4=9Bk?= Date: Mon, 13 Apr 2026 11:45:20 +0200 Subject: [PATCH 3/3] fix(uffd): include wakeErr in error chain on retry exhaustion --- .../pkg/sandbox/uffd/userfaultfd/userfaultfd.go | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/packages/orchestrator/pkg/sandbox/uffd/userfaultfd/userfaultfd.go b/packages/orchestrator/pkg/sandbox/uffd/userfaultfd/userfaultfd.go index 562de5bb64..84f4626a7f 100644 --- a/packages/orchestrator/pkg/sandbox/uffd/userfaultfd/userfaultfd.go +++ b/packages/orchestrator/pkg/sandbox/uffd/userfaultfd/userfaultfd.go @@ -348,6 +348,8 @@ func (u *Userfaultfd) faultPage( retries := retryVal.(*atomic.Int32) attempt := int(retries.Add(1)) + var wakeErr error + if attempt <= maxFaultRetries { u.logger.Warn(ctx, "UFFD serve data fetch failed, waking for retry", zap.Int("attempt", attempt), @@ -357,7 +359,7 @@ func (u *Userfaultfd) faultPage( zap.Error(dataErr), ) - wakeErr := u.fd.wake(addr, pagesize) + wakeErr = u.fd.wake(addr, pagesize) if wakeErr == nil { return nil } @@ -372,7 +374,7 @@ func (u *Userfaultfd) faultPage( signalErr = onFailure() } - joinedErr := errors.Join(dataErr, signalErr) + joinedErr := errors.Join(dataErr, wakeErr, signalErr) span.RecordError(joinedErr) u.logger.Error(ctx, "UFFD serve data fetch error", zap.Error(joinedErr))