Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion packages/api/internal/orchestrator/delete_instance.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"context"
"errors"
"fmt"
"time"

"github.com/google/uuid"
"go.uber.org/zap"
Expand Down Expand Up @@ -80,11 +81,17 @@ func (o *Orchestrator) RemoveSandbox(ctx context.Context, teamID uuid.UUID, sand
if alreadyDone {
logger.L().Info(ctx, "Sandbox was already in the process of being removed", logger.WithSandboxID(sandboxID), zap.String("state", string(sbx.State)))

if time.Since(sbx.EndTime) > sandbox.StaleCutoff && opts.Action.Effect == sandbox.TransitionExpires {
o.sandboxStore.Remove(context.WithoutCancel(ctx), teamID, sandboxID)
go o.analyticsRemove(context.WithoutCancel(ctx), sbx, opts.Action)
}

return nil
Comment thread
jakubno marked this conversation as resolved.
}

defer func() { go o.analyticsRemove(context.WithoutCancel(ctx), sbx, opts.Action) }()
defer o.sandboxStore.Remove(ctx, teamID, sandboxID)
// Once we start the removal process, we want to make sure it gets removed from the store
defer o.sandboxStore.Remove(context.WithoutCancel(ctx), teamID, sandboxID)
Comment on lines +84 to +94
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Isn't it worth creating an anonymous cleanup function calling sandbox store, remove, and analytics remove without ctx?

err = o.removeSandboxFromNode(ctx, sbx, opts.Action)
if err != nil {
logger.L().Error(ctx, "Error pausing sandbox", zap.Error(err), logger.WithSandboxID(sbx.SandboxID))
Expand Down
4 changes: 4 additions & 0 deletions packages/api/internal/sandbox/states.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@ import (
"time"
)

// StaleCutoff is how long a team entry must be expired before it can be pruned
// This prevents races where TTL is extended, but the sandbox would still be removed
const StaleCutoff = 10 * time.Minute

// TransitionEffect describes what happens to the sandbox when a state action completes.
type TransitionEffect int

Expand Down
3 changes: 2 additions & 1 deletion packages/api/internal/sandbox/storage/redis/items.go
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,8 @@ func (s *Storage) ExpiredItems(ctx context.Context) ([]sandbox.Sandbox, error) {

// Only evict running sandboxes
if sbx.State != sandbox.StateRunning {
if time.Since(sbx.EndTime) <= staleCutoff {
// If the sandbox is in transitioning state for more than stale cutoff, it's likely failed removal. Let it be cleaned up by the regular expiration process.
if time.Since(sbx.EndTime) <= sandbox.StaleCutoff {
// Let the current removal finish

continue
Expand Down
7 changes: 1 addition & 6 deletions packages/api/internal/sandbox/storage/redis/operations.go
Original file line number Diff line number Diff line change
Expand Up @@ -224,11 +224,6 @@ func (s *Storage) Update(ctx context.Context, teamID uuid.UUID, sandboxID string
return updatedSbx, nil
}

// staleCutoff is how long a team entry must be idle (no Add calls) before it
// can be pruned from the global teams ZSET when its sandbox count is zero.
// This prevents races where a Remove sees SCARD==0 right before an Add.
const staleCutoff = time.Hour

func (s *Storage) TeamsWithSandboxCount(ctx context.Context) (map[uuid.UUID]int64, error) {
members, err := s.redisClient.ZRangeWithScores(ctx, globalTeamsSet, 0, -1).Result()
if err != nil {
Expand Down Expand Up @@ -269,7 +264,7 @@ func (s *Storage) TeamsWithSandboxCount(ctx context.Context) (map[uuid.UUID]int6
}

now := time.Now().Unix()
cutoff := now - int64(staleCutoff.Seconds())
cutoff := now - int64(sandbox.StaleCutoff.Seconds())
Comment thread
jakubno marked this conversation as resolved.
Comment thread
jakubno marked this conversation as resolved.

teams := make(map[uuid.UUID]int64, len(entries))
var stale []any
Expand Down
Loading