Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
146 changes: 76 additions & 70 deletions runsc/boot/restore.go
Original file line number Diff line number Diff line change
Expand Up @@ -370,86 +370,92 @@ func (r *restorer) restore(l *Loader) error {

r.stateFile.Close()

// Transfer ownership of the `timer` to a new goroutine.
// This is because `timer.Log` blocks until all timed tasks are finished,
// but some restore tasks may still run in the background, and we don't
// want to block this function until they finish.
postRestoreThread := r.timer.Fork("postRestore")
go func() {
defer postRestoreThread.End()
postRestoreThread.Reached("scheduled")
if err := control.PostRestore(l.k, postRestoreThread); err != nil {
log.Warningf("Killing the sandbox after post restore work failed: %v", err)
l.k.Kill(linux.WaitStatusTerminationSignal(linux.SIGKILL))
return
}
postRestoreThread.Reached("post restore done")
go r.postRestore(l.k, postRestoreThread, r.timer)
r.timer = nil

// Now that post restore work succeeded, increment the checkpoint gen
// manually. The count was saved while the previous kernel was being saved
// and checkpoint success was unknown at that time. Now we know the had
// checkpoint succeeded. Allow the application to proceed while pages may
// keep loading in the background.
l.k.IncCheckpointGenOnRestore()
return nil
}

// Wait for page loading to complete if happening in the background.
if r.asyncMFLoader != nil {
if err := r.asyncMFLoader.Wait(); err != nil {
log.Warningf("Killing the sandbox after MemoryFile page loading failed: %v", err)
l.k.Kill(linux.WaitStatusTerminationSignal(linux.SIGKILL))
return
}
}
func (r *restorer) postRestore(k *kernel.Kernel, postRestoreThread *timing.Timeline, timer *timing.Timer) {
defer timer.Log()
defer postRestoreThread.End()

// Calculate the CPU time saved for restore.
t, err := state.CPUTime()
var s Savings
if err != nil {
log.Warningf("Failed to get CPU time usage for restore, err: %v", err)
} else {
log.Infof("Restore CPU usage: %s", t.String())
savedTimeStr, ok := r.metadata[state.GvisorCPUUsageKey]
if !ok {
log.Warningf("Failed to retrieve CPU time usage from the metadata")
} else {
savedTime, err := time2.ParseDuration(savedTimeStr)
if err != nil {
log.Warningf("CPU time usage in metadata %v is invalid, err: %v", savedTimeStr, err)
} else {
s.CPUTimeSaved = savedTime - t
log.Infof("CPU time saved with restore: %v ms", s.CPUTimeSaved.Milliseconds())
}
}
}
postRestoreThread.Reached("scheduled")
if err := control.PostRestore(k, postRestoreThread); err != nil {
log.Warningf("Killing the sandbox after post restore work failed: %v", err)
k.Kill(linux.WaitStatusTerminationSignal(linux.SIGKILL))
return
}
postRestoreThread.Reached("post restore done")

// Calculate the walltime saved for restore.
startTime := starttime.Get()
curTime := time2.Now()
wt := curTime.Sub(startTime)
log.Infof("Restore wall time: %s", wt.String())
savedWtStr, ok := r.metadata[state.GvisorWallTimeKey]
if !ok {
log.Warningf("Failed to retrieve walltime from the metadata")
} else {
savedWt, err := time2.ParseDuration(savedWtStr)
if err != nil {
log.Warningf("Walltime in metadata %v is invalid, err: %v", savedWtStr, err)
} else {
s.WallTimeSaved = savedWt - wt
log.Infof("Walltime saved with restore: %v ms", s.WallTimeSaved.Milliseconds())
}
// Now that post restore work succeeded, increment the checkpoint gen
// manually. The count was saved while the previous kernel was being saved
// and checkpoint success was unknown at that time. Now we know the had
// checkpoint succeeded. Allow the application to proceed while pages may
// keep loading in the background.
k.IncCheckpointGenOnRestore()

// Wait for page loading to complete if happening in the background.
if r.asyncMFLoader != nil {
if err := r.asyncMFLoader.Wait(); err != nil {
log.Warningf("Killing the sandbox after MemoryFile page loading failed: %v", err)
k.Kill(linux.WaitStatusTerminationSignal(linux.SIGKILL))
return
}
}

r.cm.onRestoreDone(s)
postRestoreThread.Reached("kernel notified")
var s Savings
if err := r.calculateCPUSavings(&s); err != nil {
log.Warningf("Failed to calculate CPU savings: %v", err)
}
if err := r.calculateWallTimeSavings(&s); err != nil {
log.Warningf("Failed to calculate walltime savings: %v", err)
}

log.Infof("Restore successful")
}()
r.cm.onRestoreDone(s)
postRestoreThread.Reached("kernel notified")
log.Infof("Restore successful")
}

// Transfer ownership of the `timer` to a new goroutine.
// This is because `timer.Log` blocks until all timed tasks are finished,
// but some restore tasks may still run in the background, and we don't
// want to block this function until they finish.
timer := r.timer
r.timer = nil
go timer.Log()
// Calculate the CPU time saved for restore.
func (r *restorer) calculateCPUSavings(s *Savings) error {
t, err := state.CPUTime()
if err != nil {
return fmt.Errorf("failed to get CPU time usage for restore, err: %w", err)
}
savedTimeStr, ok := r.metadata[state.GvisorCPUUsageKey]
if !ok {
return fmt.Errorf("failed to retrieve CPU time usage from the metadata")
}
savedTime, err := time2.ParseDuration(savedTimeStr)
if err != nil {
return fmt.Errorf("cpu time usage in metadata %v is invalid, err: %w", savedTimeStr, err)
}

s.CPUTimeSaved = savedTime - t
log.Infof("CPU time saved with restore: %v ms, restore CPU time: %v ms", s.CPUTimeSaved.Milliseconds(), t.Milliseconds())
return nil
}

// Calculate the walltime saved for restore.
func (r *restorer) calculateWallTimeSavings(s *Savings) error {
savedWtStr, ok := r.metadata[state.GvisorWallTimeKey]
if !ok {
return fmt.Errorf("failed to retrieve walltime from the metadata")
}
savedWt, err := time2.ParseDuration(savedWtStr)
if err != nil {
return fmt.Errorf("walltime in metadata %v is invalid, err: %w", savedWtStr, err)
}

wt := time2.Since(starttime.Get())
s.WallTimeSaved = savedWt - wt
log.Infof("Walltime saved with restore: %v ms, restore walltime: %v ms", s.WallTimeSaved.Milliseconds(), wt.Milliseconds())
return nil
}

Expand Down
Loading