diff --git a/packages/api/internal/handlers/sandbox_kill.go b/packages/api/internal/handlers/sandbox_kill.go index f8bd596476..39cc979e2f 100644 --- a/packages/api/internal/handlers/sandbox_kill.go +++ b/packages/api/internal/handlers/sandbox_kill.go @@ -25,7 +25,9 @@ func (a *APIStore) deleteSnapshot(ctx context.Context, sandboxID string, teamID return err } - aliasKeys, dbErr := a.sqlcDB.DeleteTemplate(ctx, queries.DeleteTemplateParams{ + // Snapshot builds are not tracked in active_template_builds, so there are + // no in-progress builds to cancel on the orchestrator here. + deleteRows, dbErr := a.sqlcDB.DeleteTemplate(ctx, queries.DeleteTemplateParams{ TeamID: teamID, TemplateID: snapshot.TemplateID, }) @@ -33,6 +35,13 @@ func (a *APIStore) deleteSnapshot(ctx context.Context, sandboxID string, teamID return fmt.Errorf("error deleting template from db: %w", dbErr) } + var aliasKeys []string + for _, row := range deleteRows { + if row.AliasKey != "" { + aliasKeys = append(aliasKeys, row.AliasKey) + } + } + a.templateCache.InvalidateAllTags(context.WithoutCancel(ctx), snapshot.TemplateID) a.templateCache.InvalidateAliasesByTemplateID(context.WithoutCancel(ctx), snapshot.TemplateID, aliasKeys) a.snapshotCache.Invalidate(context.WithoutCancel(ctx), sandboxID) diff --git a/packages/api/internal/handlers/template_delete.go b/packages/api/internal/handlers/template_delete.go index 3680c8db64..d02955760f 100644 --- a/packages/api/internal/handlers/template_delete.go +++ b/packages/api/internal/handlers/template_delete.go @@ -4,13 +4,16 @@ import ( "context" "fmt" "net/http" + "time" "github.com/gin-gonic/gin" "go.opentelemetry.io/otel/attribute" + "go.uber.org/zap" "github.com/e2b-dev/infra/packages/api/internal/api" "github.com/e2b-dev/infra/packages/api/internal/sandbox" "github.com/e2b-dev/infra/packages/db/queries" + "github.com/e2b-dev/infra/packages/shared/pkg/clusters" "github.com/e2b-dev/infra/packages/shared/pkg/id" "github.com/e2b-dev/infra/packages/shared/pkg/logger" "github.com/e2b-dev/infra/packages/shared/pkg/telemetry" @@ -81,11 +84,11 @@ func (a *APIStore) DeleteTemplatesTemplateID(c *gin.Context, aliasOrTemplateID a } // Delete the template from DB (cascades to env_build_assignments, env_aliases, snapshot_templates). - // Returns alias cache keys captured before cascade deletion for cache invalidation. + // Returns alias cache keys and active builds captured before the cascade delete. // Build artifacts are intentionally NOT deleted from storage here because builds are layered diffs // that may be referenced by other builds' header mappings. // [ENG-3477] a future GC mechanism will handle orphaned storage. - aliasKeys, err := a.sqlcDB.DeleteTemplate(ctx, queries.DeleteTemplateParams{ + deleteRows, err := a.sqlcDB.DeleteTemplate(ctx, queries.DeleteTemplateParams{ TemplateID: templateID, TeamID: team.ID, }) @@ -96,9 +99,26 @@ func (a *APIStore) DeleteTemplatesTemplateID(c *gin.Context, aliasOrTemplateID a return } + // Split results into alias keys (for cache invalidation) and active builds (for cancellation). + var aliasKeys []string + var activeBuilds []queries.DeleteTemplateRow + + for _, row := range deleteRows { + if row.AliasKey != "" { + aliasKeys = append(aliasKeys, row.AliasKey) + } + + if row.BuildID != nil { + activeBuilds = append(activeBuilds, row) + } + } + a.templateCache.InvalidateAllTags(context.WithoutCancel(ctx), templateID) a.templateCache.InvalidateAliasesByTemplateID(context.WithoutCancel(ctx), templateID, aliasKeys) + // Cancel any active builds that were running for this template. + a.cancelActiveBuilds(context.WithoutCancel(ctx), templateID, activeBuilds) + telemetry.ReportEvent(ctx, "deleted template from db") properties := a.posthog.GetPackageToPosthogProperties(&c.Request.Header) @@ -109,3 +129,33 @@ func (a *APIStore) DeleteTemplatesTemplateID(c *gin.Context, aliasOrTemplateID a c.Status(http.StatusNoContent) } + +// cancelActiveBuilds stops in-progress builds on the orchestrator. +func (a *APIStore) cancelActiveBuilds(ctx context.Context, templateID string, builds []queries.DeleteTemplateRow) { + if len(builds) == 0 { + return + } + + ctx, cancel := context.WithTimeout(ctx, 30*time.Second) + defer cancel() + + ctx, span := tracer.Start(ctx, "cancel active-builds") + defer span.End() + + for _, b := range builds { + clusterID := clusters.WithClusterFallback(b.ClusterID) + + // Stop the build on the orchestrator node if it's running. + if b.ClusterNodeID != nil { + deleteErr := a.templateManager.DeleteBuild(ctx, *b.BuildID, templateID, clusterID, *b.ClusterNodeID) + if deleteErr != nil { + logger.L().Error(ctx, "Failed to cancel build on node during template deletion", + zap.String("buildID", b.BuildID.String()), + logger.WithTemplateID(templateID), + zap.Error(deleteErr)) + } + } + } + + logger.L().Info(ctx, "Cancelled active builds after template deletion", zap.Int("count", len(builds))) +} diff --git a/packages/db/pkg/tests/builds/active_template_builds_test.go b/packages/db/pkg/tests/builds/active_template_builds_test.go index c5ad0dd284..1cd8bc6651 100644 --- a/packages/db/pkg/tests/builds/active_template_builds_test.go +++ b/packages/db/pkg/tests/builds/active_template_builds_test.go @@ -109,7 +109,11 @@ func TestDeleteActiveTemplateBuild_RemovesActiveBuild(t *testing.T) { }) require.NoError(t, err) - err = db.SqlcClient.DeleteActiveTemplateBuild(ctx, buildID) + err = db.SqlcClient.TestsRawSQL(ctx, + `DELETE FROM public.active_template_builds + WHERE build_id = $1`, + buildID, + ) require.NoError(t, err) count, err := db.SqlcClient.GetInProgressTemplateBuildsByTeam(ctx, queries.GetInProgressTemplateBuildsByTeamParams{ diff --git a/packages/db/queries/active_template_builds.sql.go b/packages/db/queries/active_template_builds.sql.go index 6042bba633..fa4c3491f7 100644 --- a/packages/db/queries/active_template_builds.sql.go +++ b/packages/db/queries/active_template_builds.sql.go @@ -41,13 +41,3 @@ func (q *Queries) CreateActiveTemplateBuild(ctx context.Context, arg CreateActiv ) return err } - -const deleteActiveTemplateBuild = `-- name: DeleteActiveTemplateBuild :exec -DELETE FROM public.active_template_builds -WHERE build_id = $1 -` - -func (q *Queries) DeleteActiveTemplateBuild(ctx context.Context, buildID uuid.UUID) error { - _, err := q.db.Exec(ctx, deleteActiveTemplateBuild, buildID) - return err -} diff --git a/packages/db/queries/builds/active_template_builds.sql b/packages/db/queries/builds/active_template_builds.sql index 5e08dce66e..53af99234e 100644 --- a/packages/db/queries/builds/active_template_builds.sql +++ b/packages/db/queries/builds/active_template_builds.sql @@ -10,7 +10,3 @@ INSERT INTO public.active_template_builds ( @template_id, @tags::text[] ); - --- name: DeleteActiveTemplateBuild :exec -DELETE FROM public.active_template_builds -WHERE build_id = @build_id; diff --git a/packages/db/queries/delete_template.sql.go b/packages/db/queries/delete_template.sql.go index e7f52dfdb6..9179a35074 100644 --- a/packages/db/queries/delete_template.sql.go +++ b/packages/db/queries/delete_template.sql.go @@ -17,16 +17,25 @@ WITH alias_keys AS ( WHEN namespace IS NOT NULL THEN namespace || '/' || alias ELSE alias END::text AS alias_key - FROM public.env_aliases - WHERE env_id = $1 + FROM public.env_aliases ea + WHERE ea.env_id = $1 +), active_builds AS ( + SELECT atb.build_id, e.cluster_id, b.cluster_node_id + FROM public.active_template_builds atb + JOIN public.env_builds b ON b.id = atb.build_id + JOIN public.envs e ON e.id = atb.template_id + WHERE atb.template_id = $1 ), deleted AS ( - DELETE FROM "public"."envs" - WHERE id = $1 - AND team_id = $2 - RETURNING id + DELETE FROM "public"."envs" envs_del + WHERE envs_del.id = $1 + AND envs_del.team_id = $2 + RETURNING envs_del.id ) -SELECT alias_key FROM alias_keys -WHERE EXISTS (SELECT 1 FROM deleted) +SELECT alias_key, NULL::uuid AS build_id, NULL::uuid AS cluster_id, NULL::text AS cluster_node_id +FROM alias_keys WHERE EXISTS (SELECT 1 FROM deleted) +UNION ALL +SELECT ''::text AS alias_key, build_id, cluster_id, cluster_node_id +FROM active_builds WHERE EXISTS (SELECT 1 FROM deleted) ` type DeleteTemplateParams struct { @@ -34,21 +43,34 @@ type DeleteTemplateParams struct { TeamID uuid.UUID } -// Deletes a template and returns its alias cache keys for cache invalidation. -// Alias keys are captured via CTE before the cascade delete removes them. -func (q *Queries) DeleteTemplate(ctx context.Context, arg DeleteTemplateParams) ([]string, error) { +type DeleteTemplateRow struct { + AliasKey string + BuildID *uuid.UUID + ClusterID *uuid.UUID + ClusterNodeID *string +} + +// Deletes a template and returns alias cache keys and active builds. +// Both are captured via CTEs before the cascade delete removes them. +// Active builds are returned so the caller can stop them on the orchestrator. +func (q *Queries) DeleteTemplate(ctx context.Context, arg DeleteTemplateParams) ([]DeleteTemplateRow, error) { rows, err := q.db.Query(ctx, deleteTemplate, arg.TemplateID, arg.TeamID) if err != nil { return nil, err } defer rows.Close() - var items []string + var items []DeleteTemplateRow for rows.Next() { - var alias_key string - if err := rows.Scan(&alias_key); err != nil { + var i DeleteTemplateRow + if err := rows.Scan( + &i.AliasKey, + &i.BuildID, + &i.ClusterID, + &i.ClusterNodeID, + ); err != nil { return nil, err } - items = append(items, alias_key) + items = append(items, i) } if err := rows.Err(); err != nil { return nil, err diff --git a/packages/db/queries/templates/delete_template.sql b/packages/db/queries/templates/delete_template.sql index abd7a3a873..3d369a9d50 100644 --- a/packages/db/queries/templates/delete_template.sql +++ b/packages/db/queries/templates/delete_template.sql @@ -1,18 +1,28 @@ -- name: DeleteTemplate :many --- Deletes a template and returns its alias cache keys for cache invalidation. --- Alias keys are captured via CTE before the cascade delete removes them. +-- Deletes a template and returns alias cache keys and active builds. +-- Both are captured via CTEs before the cascade delete removes them. +-- Active builds are returned so the caller can stop them on the orchestrator. WITH alias_keys AS ( SELECT CASE WHEN namespace IS NOT NULL THEN namespace || '/' || alias ELSE alias END::text AS alias_key - FROM public.env_aliases - WHERE env_id = @template_id + FROM public.env_aliases ea + WHERE ea.env_id = @template_id +), active_builds AS ( + SELECT atb.build_id, e.cluster_id, b.cluster_node_id + FROM public.active_template_builds atb + JOIN public.env_builds b ON b.id = atb.build_id + JOIN public.envs e ON e.id = atb.template_id + WHERE atb.template_id = @template_id ), deleted AS ( - DELETE FROM "public"."envs" - WHERE id = @template_id - AND team_id = @team_id - RETURNING id + DELETE FROM "public"."envs" envs_del + WHERE envs_del.id = @template_id + AND envs_del.team_id = @team_id + RETURNING envs_del.id ) -SELECT alias_key FROM alias_keys -WHERE EXISTS (SELECT 1 FROM deleted); \ No newline at end of file +SELECT alias_key, NULL::uuid AS build_id, NULL::uuid AS cluster_id, NULL::text AS cluster_node_id +FROM alias_keys WHERE EXISTS (SELECT 1 FROM deleted) +UNION ALL +SELECT ''::text AS alias_key, build_id, cluster_id, cluster_node_id +FROM active_builds WHERE EXISTS (SELECT 1 FROM deleted);