From ed46aeb6d02ec5b51e9d5699821113dac3b3a5b5 Mon Sep 17 00:00:00 2001 From: Jakub Novak Date: Mon, 13 Apr 2026 16:34:11 +0000 Subject: [PATCH 1/5] fix(api): cancel builds on template delete --- .../api/internal/handlers/sandbox_kill.go | 9 +++- .../api/internal/handlers/template_delete.go | 47 ++++++++++++++++- .../builds/active_template_builds_test.go | 6 ++- .../db/queries/active_template_builds.sql.go | 10 ---- .../queries/builds/active_template_builds.sql | 4 -- packages/db/queries/delete_template.sql.go | 52 +++++++++++++------ .../db/queries/templates/delete_template.sql | 30 +++++++---- 7 files changed, 115 insertions(+), 43 deletions(-) diff --git a/packages/api/internal/handlers/sandbox_kill.go b/packages/api/internal/handlers/sandbox_kill.go index f8bd596476..a7dda20c73 100644 --- a/packages/api/internal/handlers/sandbox_kill.go +++ b/packages/api/internal/handlers/sandbox_kill.go @@ -25,7 +25,7 @@ func (a *APIStore) deleteSnapshot(ctx context.Context, sandboxID string, teamID return err } - aliasKeys, dbErr := a.sqlcDB.DeleteTemplate(ctx, queries.DeleteTemplateParams{ + deleteRows, dbErr := a.sqlcDB.DeleteTemplate(ctx, queries.DeleteTemplateParams{ TeamID: teamID, TemplateID: snapshot.TemplateID, }) @@ -33,6 +33,13 @@ func (a *APIStore) deleteSnapshot(ctx context.Context, sandboxID string, teamID return fmt.Errorf("error deleting template from db: %w", dbErr) } + var aliasKeys []string + for _, row := range deleteRows { + if row.AliasKey != "" { + aliasKeys = append(aliasKeys, row.AliasKey) + } + } + a.templateCache.InvalidateAllTags(context.WithoutCancel(ctx), snapshot.TemplateID) a.templateCache.InvalidateAliasesByTemplateID(context.WithoutCancel(ctx), snapshot.TemplateID, aliasKeys) a.snapshotCache.Invalidate(context.WithoutCancel(ctx), sandboxID) diff --git a/packages/api/internal/handlers/template_delete.go b/packages/api/internal/handlers/template_delete.go index 3680c8db64..a87b42be8d 100644 --- a/packages/api/internal/handlers/template_delete.go +++ b/packages/api/internal/handlers/template_delete.go @@ -7,10 +7,12 @@ import ( "github.com/gin-gonic/gin" "go.opentelemetry.io/otel/attribute" + "go.uber.org/zap" "github.com/e2b-dev/infra/packages/api/internal/api" "github.com/e2b-dev/infra/packages/api/internal/sandbox" "github.com/e2b-dev/infra/packages/db/queries" + "github.com/e2b-dev/infra/packages/shared/pkg/clusters" "github.com/e2b-dev/infra/packages/shared/pkg/id" "github.com/e2b-dev/infra/packages/shared/pkg/logger" "github.com/e2b-dev/infra/packages/shared/pkg/telemetry" @@ -81,11 +83,11 @@ func (a *APIStore) DeleteTemplatesTemplateID(c *gin.Context, aliasOrTemplateID a } // Delete the template from DB (cascades to env_build_assignments, env_aliases, snapshot_templates). - // Returns alias cache keys captured before cascade deletion for cache invalidation. + // Returns alias cache keys and active builds captured before the cascade delete. // Build artifacts are intentionally NOT deleted from storage here because builds are layered diffs // that may be referenced by other builds' header mappings. // [ENG-3477] a future GC mechanism will handle orphaned storage. - aliasKeys, err := a.sqlcDB.DeleteTemplate(ctx, queries.DeleteTemplateParams{ + deleteRows, err := a.sqlcDB.DeleteTemplate(ctx, queries.DeleteTemplateParams{ TemplateID: templateID, TeamID: team.ID, }) @@ -96,9 +98,26 @@ func (a *APIStore) DeleteTemplatesTemplateID(c *gin.Context, aliasOrTemplateID a return } + // Split results into alias keys (for cache invalidation) and active builds (for cancellation). + var aliasKeys []string + var activeBuilds []queries.DeleteTemplateRow + + for _, row := range deleteRows { + if row.AliasKey != "" { + aliasKeys = append(aliasKeys, row.AliasKey) + } + + if row.BuildID != nil { + activeBuilds = append(activeBuilds, row) + } + } + a.templateCache.InvalidateAllTags(context.WithoutCancel(ctx), templateID) a.templateCache.InvalidateAliasesByTemplateID(context.WithoutCancel(ctx), templateID, aliasKeys) + // Cancel any active builds that were running for this template. + a.cancelActiveBuilds(ctx, templateID, activeBuilds) + telemetry.ReportEvent(ctx, "deleted template from db") properties := a.posthog.GetPackageToPosthogProperties(&c.Request.Header) @@ -109,3 +128,27 @@ func (a *APIStore) DeleteTemplatesTemplateID(c *gin.Context, aliasOrTemplateID a c.Status(http.StatusNoContent) } + +// cancelActiveBuilds stops in-progress builds on the orchestrator and marks them as failed in the DB. +func (a *APIStore) cancelActiveBuilds(ctx context.Context, templateID string, builds []queries.DeleteTemplateRow) { + if len(builds) == 0 { + return + } + + for _, b := range builds { + clusterID := clusters.WithClusterFallback(b.ClusterID) + + // Stop the build on the orchestrator node if it's running. + if b.ClusterNodeID != nil { + deleteErr := a.templateManager.DeleteBuild(ctx, *b.BuildID, templateID, clusterID, *b.ClusterNodeID) + if deleteErr != nil { + logger.L().Error(ctx, "Failed to cancel build on node during template deletion", + zap.String("buildID", b.BuildID.String()), + logger.WithTemplateID(templateID), + zap.Error(deleteErr)) + } + } + } + + logger.L().Info(ctx, "Cancelled active builds after template deletion", zap.Int("count", len(builds))) +} diff --git a/packages/db/pkg/tests/builds/active_template_builds_test.go b/packages/db/pkg/tests/builds/active_template_builds_test.go index c5ad0dd284..1cd8bc6651 100644 --- a/packages/db/pkg/tests/builds/active_template_builds_test.go +++ b/packages/db/pkg/tests/builds/active_template_builds_test.go @@ -109,7 +109,11 @@ func TestDeleteActiveTemplateBuild_RemovesActiveBuild(t *testing.T) { }) require.NoError(t, err) - err = db.SqlcClient.DeleteActiveTemplateBuild(ctx, buildID) + err = db.SqlcClient.TestsRawSQL(ctx, + `DELETE FROM public.active_template_builds + WHERE build_id = $1`, + buildID, + ) require.NoError(t, err) count, err := db.SqlcClient.GetInProgressTemplateBuildsByTeam(ctx, queries.GetInProgressTemplateBuildsByTeamParams{ diff --git a/packages/db/queries/active_template_builds.sql.go b/packages/db/queries/active_template_builds.sql.go index 6042bba633..fa4c3491f7 100644 --- a/packages/db/queries/active_template_builds.sql.go +++ b/packages/db/queries/active_template_builds.sql.go @@ -41,13 +41,3 @@ func (q *Queries) CreateActiveTemplateBuild(ctx context.Context, arg CreateActiv ) return err } - -const deleteActiveTemplateBuild = `-- name: DeleteActiveTemplateBuild :exec -DELETE FROM public.active_template_builds -WHERE build_id = $1 -` - -func (q *Queries) DeleteActiveTemplateBuild(ctx context.Context, buildID uuid.UUID) error { - _, err := q.db.Exec(ctx, deleteActiveTemplateBuild, buildID) - return err -} diff --git a/packages/db/queries/builds/active_template_builds.sql b/packages/db/queries/builds/active_template_builds.sql index 5e08dce66e..53af99234e 100644 --- a/packages/db/queries/builds/active_template_builds.sql +++ b/packages/db/queries/builds/active_template_builds.sql @@ -10,7 +10,3 @@ INSERT INTO public.active_template_builds ( @template_id, @tags::text[] ); - --- name: DeleteActiveTemplateBuild :exec -DELETE FROM public.active_template_builds -WHERE build_id = @build_id; diff --git a/packages/db/queries/delete_template.sql.go b/packages/db/queries/delete_template.sql.go index e7f52dfdb6..e3b01f4fc0 100644 --- a/packages/db/queries/delete_template.sql.go +++ b/packages/db/queries/delete_template.sql.go @@ -17,16 +17,26 @@ WITH alias_keys AS ( WHEN namespace IS NOT NULL THEN namespace || '/' || alias ELSE alias END::text AS alias_key - FROM public.env_aliases - WHERE env_id = $1 + FROM public.env_aliases ea + WHERE ea.env_id = $1 +), active_builds AS ( + SELECT atb.build_id, e.cluster_id, b.cluster_node_id + FROM public.active_template_builds atb + JOIN public.env_builds b ON b.id = atb.build_id + JOIN public.envs e ON e.id = atb.template_id + WHERE atb.template_id = $1 + AND atb.created_at > NOW() - INTERVAL '1 day' ), deleted AS ( - DELETE FROM "public"."envs" - WHERE id = $1 - AND team_id = $2 - RETURNING id + DELETE FROM "public"."envs" envs_del + WHERE envs_del.id = $1 + AND envs_del.team_id = $2 + RETURNING envs_del.id ) -SELECT alias_key FROM alias_keys -WHERE EXISTS (SELECT 1 FROM deleted) +SELECT alias_key, NULL::uuid AS build_id, NULL::uuid AS cluster_id, NULL::text AS cluster_node_id +FROM alias_keys WHERE EXISTS (SELECT 1 FROM deleted) +UNION ALL +SELECT ''::text AS alias_key, build_id, cluster_id, cluster_node_id +FROM active_builds WHERE EXISTS (SELECT 1 FROM deleted) ` type DeleteTemplateParams struct { @@ -34,21 +44,33 @@ type DeleteTemplateParams struct { TeamID uuid.UUID } -// Deletes a template and returns its alias cache keys for cache invalidation. -// Alias keys are captured via CTE before the cascade delete removes them. -func (q *Queries) DeleteTemplate(ctx context.Context, arg DeleteTemplateParams) ([]string, error) { +type DeleteTemplateRow struct { + AliasKey string + BuildID *uuid.UUID + ClusterID *uuid.UUID + ClusterNodeID *string +} + +// Deletes a template and returns alias cache keys and active builds. +// Both are captured via CTEs before the cascade delete removes them. +func (q *Queries) DeleteTemplate(ctx context.Context, arg DeleteTemplateParams) ([]DeleteTemplateRow, error) { rows, err := q.db.Query(ctx, deleteTemplate, arg.TemplateID, arg.TeamID) if err != nil { return nil, err } defer rows.Close() - var items []string + var items []DeleteTemplateRow for rows.Next() { - var alias_key string - if err := rows.Scan(&alias_key); err != nil { + var i DeleteTemplateRow + if err := rows.Scan( + &i.AliasKey, + &i.BuildID, + &i.ClusterID, + &i.ClusterNodeID, + ); err != nil { return nil, err } - items = append(items, alias_key) + items = append(items, i) } if err := rows.Err(); err != nil { return nil, err diff --git a/packages/db/queries/templates/delete_template.sql b/packages/db/queries/templates/delete_template.sql index abd7a3a873..49604b6d5c 100644 --- a/packages/db/queries/templates/delete_template.sql +++ b/packages/db/queries/templates/delete_template.sql @@ -1,18 +1,28 @@ -- name: DeleteTemplate :many --- Deletes a template and returns its alias cache keys for cache invalidation. --- Alias keys are captured via CTE before the cascade delete removes them. +-- Deletes a template and returns alias cache keys and active builds. +-- Both are captured via CTEs before the cascade delete removes them. WITH alias_keys AS ( SELECT CASE WHEN namespace IS NOT NULL THEN namespace || '/' || alias ELSE alias END::text AS alias_key - FROM public.env_aliases - WHERE env_id = @template_id + FROM public.env_aliases ea + WHERE ea.env_id = @template_id +), active_builds AS ( + SELECT atb.build_id, e.cluster_id, b.cluster_node_id + FROM public.active_template_builds atb + JOIN public.env_builds b ON b.id = atb.build_id + JOIN public.envs e ON e.id = atb.template_id + WHERE atb.template_id = @template_id + AND atb.created_at > NOW() - INTERVAL '1 day' ), deleted AS ( - DELETE FROM "public"."envs" - WHERE id = @template_id - AND team_id = @team_id - RETURNING id + DELETE FROM "public"."envs" envs_del + WHERE envs_del.id = @template_id + AND envs_del.team_id = @team_id + RETURNING envs_del.id ) -SELECT alias_key FROM alias_keys -WHERE EXISTS (SELECT 1 FROM deleted); \ No newline at end of file +SELECT alias_key, NULL::uuid AS build_id, NULL::uuid AS cluster_id, NULL::text AS cluster_node_id +FROM alias_keys WHERE EXISTS (SELECT 1 FROM deleted) +UNION ALL +SELECT ''::text AS alias_key, build_id, cluster_id, cluster_node_id +FROM active_builds WHERE EXISTS (SELECT 1 FROM deleted); \ No newline at end of file From 0f5901b15243839877e829c8141d89981c497038 Mon Sep 17 00:00:00 2001 From: Jakub Novak Date: Mon, 13 Apr 2026 16:48:39 +0000 Subject: [PATCH 2/5] chore: address pr comments --- packages/api/internal/handlers/sandbox_kill.go | 2 ++ packages/api/internal/handlers/template_delete.go | 2 +- packages/db/queries/delete_template.sql.go | 1 + packages/db/queries/templates/delete_template.sql | 3 ++- 4 files changed, 6 insertions(+), 2 deletions(-) diff --git a/packages/api/internal/handlers/sandbox_kill.go b/packages/api/internal/handlers/sandbox_kill.go index a7dda20c73..39cc979e2f 100644 --- a/packages/api/internal/handlers/sandbox_kill.go +++ b/packages/api/internal/handlers/sandbox_kill.go @@ -25,6 +25,8 @@ func (a *APIStore) deleteSnapshot(ctx context.Context, sandboxID string, teamID return err } + // Snapshot builds are not tracked in active_template_builds, so there are + // no in-progress builds to cancel on the orchestrator here. deleteRows, dbErr := a.sqlcDB.DeleteTemplate(ctx, queries.DeleteTemplateParams{ TeamID: teamID, TemplateID: snapshot.TemplateID, diff --git a/packages/api/internal/handlers/template_delete.go b/packages/api/internal/handlers/template_delete.go index a87b42be8d..d9ed53fbd3 100644 --- a/packages/api/internal/handlers/template_delete.go +++ b/packages/api/internal/handlers/template_delete.go @@ -129,7 +129,7 @@ func (a *APIStore) DeleteTemplatesTemplateID(c *gin.Context, aliasOrTemplateID a c.Status(http.StatusNoContent) } -// cancelActiveBuilds stops in-progress builds on the orchestrator and marks them as failed in the DB. +// cancelActiveBuilds stops in-progress builds on the orchestrator. func (a *APIStore) cancelActiveBuilds(ctx context.Context, templateID string, builds []queries.DeleteTemplateRow) { if len(builds) == 0 { return diff --git a/packages/db/queries/delete_template.sql.go b/packages/db/queries/delete_template.sql.go index e3b01f4fc0..11f6633c6e 100644 --- a/packages/db/queries/delete_template.sql.go +++ b/packages/db/queries/delete_template.sql.go @@ -53,6 +53,7 @@ type DeleteTemplateRow struct { // Deletes a template and returns alias cache keys and active builds. // Both are captured via CTEs before the cascade delete removes them. +// Active builds are returned so the caller can stop them on the orchestrator. func (q *Queries) DeleteTemplate(ctx context.Context, arg DeleteTemplateParams) ([]DeleteTemplateRow, error) { rows, err := q.db.Query(ctx, deleteTemplate, arg.TemplateID, arg.TeamID) if err != nil { diff --git a/packages/db/queries/templates/delete_template.sql b/packages/db/queries/templates/delete_template.sql index 49604b6d5c..4ea8d171ce 100644 --- a/packages/db/queries/templates/delete_template.sql +++ b/packages/db/queries/templates/delete_template.sql @@ -1,6 +1,7 @@ -- name: DeleteTemplate :many -- Deletes a template and returns alias cache keys and active builds. -- Both are captured via CTEs before the cascade delete removes them. +-- Active builds are returned so the caller can stop them on the orchestrator. WITH alias_keys AS ( SELECT CASE WHEN namespace IS NOT NULL THEN namespace || '/' || alias @@ -25,4 +26,4 @@ SELECT alias_key, NULL::uuid AS build_id, NULL::uuid AS cluster_id, NULL::text A FROM alias_keys WHERE EXISTS (SELECT 1 FROM deleted) UNION ALL SELECT ''::text AS alias_key, build_id, cluster_id, cluster_node_id -FROM active_builds WHERE EXISTS (SELECT 1 FROM deleted); \ No newline at end of file +FROM active_builds WHERE EXISTS (SELECT 1 FROM deleted); From 2ac34a8d62080ad7f4b9fe01c54250db390d6166 Mon Sep 17 00:00:00 2001 From: Jakub Novak Date: Mon, 13 Apr 2026 16:51:46 +0000 Subject: [PATCH 3/5] chore: improve context handling --- packages/api/internal/handlers/template_delete.go | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/packages/api/internal/handlers/template_delete.go b/packages/api/internal/handlers/template_delete.go index d9ed53fbd3..d02955760f 100644 --- a/packages/api/internal/handlers/template_delete.go +++ b/packages/api/internal/handlers/template_delete.go @@ -4,6 +4,7 @@ import ( "context" "fmt" "net/http" + "time" "github.com/gin-gonic/gin" "go.opentelemetry.io/otel/attribute" @@ -116,7 +117,7 @@ func (a *APIStore) DeleteTemplatesTemplateID(c *gin.Context, aliasOrTemplateID a a.templateCache.InvalidateAliasesByTemplateID(context.WithoutCancel(ctx), templateID, aliasKeys) // Cancel any active builds that were running for this template. - a.cancelActiveBuilds(ctx, templateID, activeBuilds) + a.cancelActiveBuilds(context.WithoutCancel(ctx), templateID, activeBuilds) telemetry.ReportEvent(ctx, "deleted template from db") @@ -135,6 +136,12 @@ func (a *APIStore) cancelActiveBuilds(ctx context.Context, templateID string, bu return } + ctx, cancel := context.WithTimeout(ctx, 30*time.Second) + defer cancel() + + ctx, span := tracer.Start(ctx, "cancel active-builds") + defer span.End() + for _, b := range builds { clusterID := clusters.WithClusterFallback(b.ClusterID) From 4a874535be4b17c38badaff75051835b50dddbab Mon Sep 17 00:00:00 2001 From: Jakub Novak Date: Mon, 13 Apr 2026 17:01:41 +0000 Subject: [PATCH 4/5] chore: remove unnecessary check --- packages/db/queries/templates/delete_template.sql | 1 - 1 file changed, 1 deletion(-) diff --git a/packages/db/queries/templates/delete_template.sql b/packages/db/queries/templates/delete_template.sql index 4ea8d171ce..3d369a9d50 100644 --- a/packages/db/queries/templates/delete_template.sql +++ b/packages/db/queries/templates/delete_template.sql @@ -15,7 +15,6 @@ WITH alias_keys AS ( JOIN public.env_builds b ON b.id = atb.build_id JOIN public.envs e ON e.id = atb.template_id WHERE atb.template_id = @template_id - AND atb.created_at > NOW() - INTERVAL '1 day' ), deleted AS ( DELETE FROM "public"."envs" envs_del WHERE envs_del.id = @template_id From c3fbe3a6f0727c7db7a26d44152e68dd2ab5876a Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Mon, 13 Apr 2026 17:04:09 +0000 Subject: [PATCH 5/5] chore: auto-commit generated changes --- packages/db/queries/delete_template.sql.go | 1 - 1 file changed, 1 deletion(-) diff --git a/packages/db/queries/delete_template.sql.go b/packages/db/queries/delete_template.sql.go index 11f6633c6e..9179a35074 100644 --- a/packages/db/queries/delete_template.sql.go +++ b/packages/db/queries/delete_template.sql.go @@ -25,7 +25,6 @@ WITH alias_keys AS ( JOIN public.env_builds b ON b.id = atb.build_id JOIN public.envs e ON e.id = atb.template_id WHERE atb.template_id = $1 - AND atb.created_at > NOW() - INTERVAL '1 day' ), deleted AS ( DELETE FROM "public"."envs" envs_del WHERE envs_del.id = $1