Skip to content

Commit 62898a9

Browse files
committed
billing: cap actions minutes by job timeout
1 parent 7c607df commit 62898a9

9 files changed

Lines changed: 93 additions & 15 deletions

File tree

docs/internal/billing.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -351,7 +351,9 @@ PAYMENTS SP08 starts hosted-cost metering:
351351
durable.
352352
- Actions minutes are counted from completed or canceled workflow job
353353
runtime, rounded up to the next whole minute, within the current
354-
monthly usage period.
354+
monthly usage period. Metered runtime is capped at the job's declared
355+
`timeout_minutes` so stale terminal rows cannot bill past the maximum
356+
execution time the runner was allowed to consume.
355357
- `org_usage_counters` stores the current projection,
356358
`org_usage_snapshots` records audit snapshots, and
357359
`org_quota_overrides` lets site admins temporarily override a quota

docs/internal/runbooks/actions.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -259,6 +259,11 @@ active container, and reports terminal `cancelled`.
259259
with matching labels and capacity. Unsupported hosted labels such as
260260
`windows-latest` and `macos-latest` intentionally remain queued until an
261261
operator registers matching runners.
262+
- **Actions usage looks too high:** compare raw job wall-clock runtime with
263+
timeout-capped runtime. Billing and usage metrics cap completed/cancelled job
264+
runtime at `workflow_jobs.timeout_minutes`; a larger raw
265+
`completed_at - started_at` gap usually means a stale running row was
266+
cancelled later than the container actually stopped.
262267
- **Step logs buffer:** verify the Caddy route above and confirm the SSE route
263268
is still mounted outside compression and short timeouts.
264269
- **`actions/checkout@v4` fails:** confirm the job is still running, the repo

docs/internal/runbooks/stripe-billing.md

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -382,7 +382,14 @@ Use this when `BillingQuotaOverage` fires.
382382

383383
1. Open the org billing settings page as a site admin. The Usage panel
384384
shows storage and Actions minutes against the effective limits.
385-
2. If counters look stale, run `org:usage_recalc` for that org.
385+
2. If counters look stale, run `org:usage_recalc` for that org:
386+
387+
```sh
388+
shithubd admin run-job org:usage_recalc '{"org_id":1,"source":"support-recalc"}'
389+
```
390+
391+
Replace `org_id` with the affected organization id and use a source label
392+
that names the incident or support case.
386393
3. If the overage is legitimate, ask the org owner to upgrade or reduce
387394
usage. For support incidents, add a temporary quota override in the
388395
site-admin debug panel; it is attributed to the operator.

internal/actions/queries/workflow_insights.sql

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,10 @@
44
SELECT
55
COUNT(DISTINCT run.id)::bigint AS run_count,
66
COUNT(job.id)::bigint AS job_count,
7-
COALESCE(SUM(EXTRACT(EPOCH FROM (job.completed_at - job.started_at))) FILTER (
7+
COALESCE(SUM(LEAST(
8+
GREATEST(EXTRACT(EPOCH FROM (job.completed_at - job.started_at)), 0),
9+
job.timeout_minutes::double precision * 60.0
10+
)) FILTER (
811
WHERE job.started_at IS NOT NULL AND job.completed_at IS NOT NULL
912
), 0)::bigint AS completed_job_seconds
1013
FROM workflow_runs run
@@ -18,7 +21,10 @@ SELECT
1821
COALESCE(NULLIF(run.workflow_name, ''), run.workflow_file)::text AS workflow_name,
1922
COUNT(DISTINCT run.id)::bigint AS run_count,
2023
COUNT(job.id)::bigint AS job_count,
21-
COALESCE(SUM(EXTRACT(EPOCH FROM (job.completed_at - job.started_at))) FILTER (
24+
COALESCE(SUM(LEAST(
25+
GREATEST(EXTRACT(EPOCH FROM (job.completed_at - job.started_at)), 0),
26+
job.timeout_minutes::double precision * 60.0
27+
)) FILTER (
2228
WHERE job.started_at IS NOT NULL AND job.completed_at IS NOT NULL
2329
), 0)::bigint AS completed_job_seconds
2430
FROM workflow_runs run
@@ -31,7 +37,10 @@ LIMIT $3;
3137

3238
-- name: GetActionsPerformanceSummaryForRepo :one
3339
SELECT
34-
COALESCE(AVG(EXTRACT(EPOCH FROM (job.completed_at - job.started_at))) FILTER (
40+
COALESCE(AVG(LEAST(
41+
GREATEST(EXTRACT(EPOCH FROM (job.completed_at - job.started_at)), 0),
42+
job.timeout_minutes::double precision * 60.0
43+
)) FILTER (
3544
WHERE job.started_at IS NOT NULL AND job.completed_at IS NOT NULL
3645
), 0)::double precision AS avg_job_seconds,
3746
COALESCE(AVG(EXTRACT(EPOCH FROM (job.started_at - job.created_at))) FILTER (
@@ -45,7 +54,10 @@ SELECT
4554
AND job.conclusion IS NOT NULL
4655
AND job.conclusion <> 'success'
4756
)::bigint AS failed_job_count,
48-
COALESCE(SUM(EXTRACT(EPOCH FROM (job.completed_at - job.started_at))) FILTER (
57+
COALESCE(SUM(LEAST(
58+
GREATEST(EXTRACT(EPOCH FROM (job.completed_at - job.started_at)), 0),
59+
job.timeout_minutes::double precision * 60.0
60+
)) FILTER (
4961
WHERE job.started_at IS NOT NULL
5062
AND job.completed_at IS NOT NULL
5163
AND job.conclusion IS NOT NULL
@@ -62,7 +74,10 @@ SELECT
6274
COALESCE(NULLIF(run.workflow_name, ''), run.workflow_file)::text AS workflow_name,
6375
COUNT(DISTINCT run.id)::bigint AS run_count,
6476
COUNT(job.id)::bigint AS job_count,
65-
COALESCE(AVG(EXTRACT(EPOCH FROM (job.completed_at - job.started_at))) FILTER (
77+
COALESCE(AVG(LEAST(
78+
GREATEST(EXTRACT(EPOCH FROM (job.completed_at - job.started_at)), 0),
79+
job.timeout_minutes::double precision * 60.0
80+
)) FILTER (
6681
WHERE job.started_at IS NOT NULL AND job.completed_at IS NOT NULL
6782
), 0)::double precision AS avg_job_seconds,
6883
COALESCE(AVG(EXTRACT(EPOCH FROM (job.started_at - job.created_at))) FILTER (

internal/actions/sqlc/workflow_insights.sql.go

Lines changed: 20 additions & 5 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

internal/actions/trigger/enqueue_test.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -388,6 +388,9 @@ func seedCompletedActionsMinutes(t *testing.T, f enqFx, completedAt time.Time, m
388388
t.Fatalf("claimed seed job id=%d, want %d", claimed.ID, jobs[0].ID)
389389
}
390390
startedAt := completedAt.Add(-time.Duration(minutes) * time.Minute)
391+
if _, err := f.pool.Exec(ctx, `UPDATE workflow_jobs SET timeout_minutes = $2 WHERE id = $1`, jobs[0].ID, minutes); err != nil {
392+
t.Fatalf("set seed timeout_minutes: %v", err)
393+
}
391394
if _, err := q.UpdateWorkflowJobStatus(ctx, f.pool, actionsdb.UpdateWorkflowJobStatusParams{
392395
ID: jobs[0].ID,
393396
Status: actionsdb.WorkflowJobStatusCompleted,

internal/billing/billing_test.go

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -544,6 +544,31 @@ func TestOrgUsageCountersAndQuotaOverrides(t *testing.T) {
544544
`, org.ID, start.Add(12*time.Hour)); err != nil {
545545
t.Fatalf("insert workflow usage: %v", err)
546546
}
547+
if _, err := pool.Exec(ctx, `
548+
WITH repo AS (
549+
SELECT id FROM repos WHERE owner_org_id = $1 AND name = 'metered-repo'
550+
), runner AS (
551+
SELECT id FROM workflow_runners WHERE name = 'metered-runner'
552+
), run AS (
553+
INSERT INTO workflow_runs (
554+
repo_id, run_index, workflow_file, head_sha, event,
555+
status, conclusion, started_at, completed_at
556+
)
557+
SELECT repo.id, 2, '.shithub/workflows/stale.yml', 'abcdef2', 'push',
558+
'cancelled', 'cancelled', $2::timestamptz, $2::timestamptz + interval '3 days'
559+
FROM repo
560+
RETURNING id
561+
)
562+
INSERT INTO workflow_jobs (
563+
run_id, job_index, job_key, runner_id, status, conclusion,
564+
timeout_minutes, started_at, completed_at
565+
)
566+
SELECT run.id, 0, 'stale', runner.id, 'cancelled', 'cancelled',
567+
360, $2::timestamptz, $2::timestamptz + interval '3 days'
568+
FROM run, runner
569+
`, org.ID, start.Add(13*time.Hour)); err != nil {
570+
t.Fatalf("insert stale workflow usage: %v", err)
571+
}
547572
recalc, err := billing.RecalculateOrgUsageCounters(ctx, deps, org.ID, start, end)
548573
if err != nil {
549574
t.Fatalf("RecalculateOrgUsageCounters: %v", err)
@@ -552,7 +577,7 @@ func TestOrgUsageCountersAndQuotaOverrides(t *testing.T) {
552577
recalc.ActionsLogBytes != 1234 ||
553578
recalc.ActionsArtifactBytes != 3456 ||
554579
recalc.ObjectStorageBytes != 4690 ||
555-
recalc.ActionsMinutesUsed != 6 {
580+
recalc.ActionsMinutesUsed != 366 {
556581
t.Fatalf("unexpected recalculated usage: %+v", recalc)
557582
}
558583
}

internal/billing/queries/billing.sql

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -931,7 +931,10 @@ actions_minutes AS (
931931
AND j.completed_at IS NOT NULL
932932
AND j.completed_at >= sqlc.arg(actions_period_start)::timestamptz
933933
AND j.completed_at < sqlc.arg(actions_period_end)::timestamptz
934-
THEN CEIL(EXTRACT(EPOCH FROM (j.completed_at - j.started_at)) / 60.0)::bigint
934+
THEN LEAST(
935+
CEIL(GREATEST(EXTRACT(EPOCH FROM (j.completed_at - j.started_at)), 0) / 60.0)::bigint,
936+
j.timeout_minutes::bigint
937+
)
935938
ELSE 0
936939
END
937940
), 0)::bigint AS actions_minutes_used

internal/billing/sqlc/billing.sql.go

Lines changed: 4 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)