Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 26 additions & 27 deletions packages/orchestrator/pkg/sandbox/fc/process.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,11 @@ import (
"io"
"os"
"os/exec"
"strings"
"sync/atomic"
"syscall"
"time"

"github.com/shirou/gopsutil/v4/process"
"go.opentelemetry.io/otel"
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/trace"
Expand Down Expand Up @@ -612,17 +612,16 @@ func (p *Process) Pid() (int, error) {
return p.cmd.Process.Pid, nil
}

// getProcessState returns the state of the process.
// It's used to check if the process is in the D state, because gopsutil doesn't show that.
func getProcessState(ctx context.Context, pid int) (string, error) {
output, err := exec.CommandContext(ctx, "ps", "-o", "stat=", "-p", fmt.Sprint(pid)).Output()
// getProcessStatus returns the process status using gopsutil.
// Return values: R (running), S (sleep), T (stop), I (idle),
// Z (zombie), W (wait), L (lock), D (disk sleep / uninterruptible).
func getProcessStatus(pid int) ([]string, error) {
proc, err := process.NewProcess(int32(pid))
if err != nil {
return "", fmt.Errorf("error getting state of pid=%d: %w", pid, err)
return nil, fmt.Errorf("process %d not found: %w", pid, err)
}

state := strings.TrimSpace(string(output))

return state, nil
return proc.Status()
}

func (p *Process) Stop(ctx context.Context) error {
Expand All @@ -648,14 +647,7 @@ func (p *Process) Stop(ctx context.Context) error {
// this function should never fail b/c a previous context was canceled.
ctx = context.WithoutCancel(ctx)

state, err := getProcessState(ctx, p.cmd.Process.Pid)
if err != nil {
logger.L().Warn(ctx, "failed to get fc process state", zap.Error(err), logger.WithSandboxID(p.files.SandboxID))
} else if state == "D" {
logger.L().Info(ctx, "fc process is in the D state before we call SIGTERM", logger.WithSandboxID(p.files.SandboxID))
}

err = p.cmd.Process.Signal(syscall.SIGTERM)
err := p.cmd.Process.Signal(syscall.SIGTERM)
if err != nil {
if errors.Is(err, os.ErrProcessDone) {
logger.L().Info(ctx, "fc process already exited", logger.WithSandboxID(p.files.SandboxID))
Expand All @@ -670,18 +662,25 @@ func (p *Process) Stop(ctx context.Context) error {
select {
// Wait 10 sec for the FC process to exit, if it doesn't, send SIGKILL.
case <-time.After(10 * time.Second):
err := p.cmd.Process.Kill()
if err != nil {
logger.L().Warn(ctx, "failed to send SIGKILL to fc process", zap.Error(err), logger.WithSandboxID(p.files.SandboxID))
} else {
logger.L().Info(ctx, "sent SIGKILL to fc process because it was not responding to SIGTERM for 10 seconds", logger.WithSandboxID(p.files.SandboxID))
// Check process status right before Kill — the pre-SIGTERM status
// captured above is 10s stale and no longer useful here.
status, stateErr := getProcessStatus(p.cmd.Process.Pid)
if errors.Is(stateErr, process.ErrorProcessNotRunning) {
// Process already exited, no need to send SIGKILL.
return
} else if stateErr != nil {
logger.L().Warn(ctx, "failed to get fc process status before SIGKILL", zap.Error(stateErr), logger.WithSandboxID(p.files.SandboxID))
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we return here? The kill will fail as well, because the process is not running anymore.

}

state, err := getProcessState(ctx, p.cmd.Process.Pid)
if err != nil {
logger.L().Warn(ctx, "failed to get fc process state after sending SIGKILL", zap.Error(err), logger.WithSandboxID(p.files.SandboxID))
} else if state == "D" {
logger.L().Info(ctx, "fc process is in the D state after we call SIGKILL", logger.WithSandboxID(p.files.SandboxID))
err := p.cmd.Process.Kill()
Comment thread
jakubno marked this conversation as resolved.
if err == nil {
logger.L().Info(ctx, "sent SIGKILL to fc process because it was not responding to SIGTERM for 10 seconds",
zap.Strings("status", status),
logger.WithSandboxID(p.files.SandboxID),
)
}
if err != nil && !errors.Is(err, os.ErrProcessDone) {
logger.L().Warn(ctx, "failed to send SIGKILL to fc process", zap.Error(err), logger.WithSandboxID(p.files.SandboxID))
}

// If the FC process exited, we can return.
Expand Down
Loading