Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion docs/internal/billing.md
Original file line number Diff line number Diff line change
Expand Up @@ -351,7 +351,9 @@ PAYMENTS SP08 starts hosted-cost metering:
durable.
- Actions minutes are counted from completed or canceled workflow job
runtime, rounded up to the next whole minute, within the current
monthly usage period.
monthly usage period. Metered runtime is capped at the job's declared
`timeout_minutes` so stale terminal rows cannot bill past the maximum
execution time the runner was allowed to consume.
- `org_usage_counters` stores the current projection,
`org_usage_snapshots` records audit snapshots, and
`org_quota_overrides` lets site admins temporarily override a quota
Expand Down
5 changes: 5 additions & 0 deletions docs/internal/runbooks/actions.md
Original file line number Diff line number Diff line change
Expand Up @@ -259,6 +259,11 @@ active container, and reports terminal `cancelled`.
with matching labels and capacity. Unsupported hosted labels such as
`windows-latest` and `macos-latest` intentionally remain queued until an
operator registers matching runners.
- **Actions usage looks too high:** compare raw job wall-clock runtime with
timeout-capped runtime. Billing and usage metrics cap completed/cancelled job
runtime at `workflow_jobs.timeout_minutes`; a larger raw
`completed_at - started_at` gap usually means a stale running row was
cancelled later than the container actually stopped.
- **Step logs buffer:** verify the Caddy route above and confirm the SSE route
is still mounted outside compression and short timeouts.
- **`actions/checkout@v4` fails:** confirm the job is still running, the repo
Expand Down
9 changes: 8 additions & 1 deletion docs/internal/runbooks/stripe-billing.md
Original file line number Diff line number Diff line change
Expand Up @@ -382,7 +382,14 @@ Use this when `BillingQuotaOverage` fires.

1. Open the org billing settings page as a site admin. The Usage panel
shows storage and Actions minutes against the effective limits.
2. If counters look stale, run `org:usage_recalc` for that org.
2. If counters look stale, run `org:usage_recalc` for that org:

```sh
shithubd admin run-job org:usage_recalc '{"org_id":1,"source":"support-recalc"}'
```

Replace `org_id` with the affected organization id and use a source label
that names the incident or support case.
3. If the overage is legitimate, ask the org owner to upgrade or reduce
usage. For support incidents, add a temporary quota override in the
site-admin debug panel; it is attributed to the operator.
Expand Down
25 changes: 20 additions & 5 deletions internal/actions/queries/workflow_insights.sql
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,10 @@
SELECT
COUNT(DISTINCT run.id)::bigint AS run_count,
COUNT(job.id)::bigint AS job_count,
COALESCE(SUM(EXTRACT(EPOCH FROM (job.completed_at - job.started_at))) FILTER (
COALESCE(SUM(LEAST(
GREATEST(EXTRACT(EPOCH FROM (job.completed_at - job.started_at)), 0),
job.timeout_minutes::double precision * 60.0
)) FILTER (
WHERE job.started_at IS NOT NULL AND job.completed_at IS NOT NULL
), 0)::bigint AS completed_job_seconds
FROM workflow_runs run
Expand All @@ -18,7 +21,10 @@ SELECT
COALESCE(NULLIF(run.workflow_name, ''), run.workflow_file)::text AS workflow_name,
COUNT(DISTINCT run.id)::bigint AS run_count,
COUNT(job.id)::bigint AS job_count,
COALESCE(SUM(EXTRACT(EPOCH FROM (job.completed_at - job.started_at))) FILTER (
COALESCE(SUM(LEAST(
GREATEST(EXTRACT(EPOCH FROM (job.completed_at - job.started_at)), 0),
job.timeout_minutes::double precision * 60.0
)) FILTER (
WHERE job.started_at IS NOT NULL AND job.completed_at IS NOT NULL
), 0)::bigint AS completed_job_seconds
FROM workflow_runs run
Expand All @@ -31,7 +37,10 @@ LIMIT $3;

-- name: GetActionsPerformanceSummaryForRepo :one
SELECT
COALESCE(AVG(EXTRACT(EPOCH FROM (job.completed_at - job.started_at))) FILTER (
COALESCE(AVG(LEAST(
GREATEST(EXTRACT(EPOCH FROM (job.completed_at - job.started_at)), 0),
job.timeout_minutes::double precision * 60.0
)) FILTER (
WHERE job.started_at IS NOT NULL AND job.completed_at IS NOT NULL
), 0)::double precision AS avg_job_seconds,
COALESCE(AVG(EXTRACT(EPOCH FROM (job.started_at - job.created_at))) FILTER (
Expand All @@ -45,7 +54,10 @@ SELECT
AND job.conclusion IS NOT NULL
AND job.conclusion <> 'success'
)::bigint AS failed_job_count,
COALESCE(SUM(EXTRACT(EPOCH FROM (job.completed_at - job.started_at))) FILTER (
COALESCE(SUM(LEAST(
GREATEST(EXTRACT(EPOCH FROM (job.completed_at - job.started_at)), 0),
job.timeout_minutes::double precision * 60.0
)) FILTER (
WHERE job.started_at IS NOT NULL
AND job.completed_at IS NOT NULL
AND job.conclusion IS NOT NULL
Expand All @@ -62,7 +74,10 @@ SELECT
COALESCE(NULLIF(run.workflow_name, ''), run.workflow_file)::text AS workflow_name,
COUNT(DISTINCT run.id)::bigint AS run_count,
COUNT(job.id)::bigint AS job_count,
COALESCE(AVG(EXTRACT(EPOCH FROM (job.completed_at - job.started_at))) FILTER (
COALESCE(AVG(LEAST(
GREATEST(EXTRACT(EPOCH FROM (job.completed_at - job.started_at)), 0),
job.timeout_minutes::double precision * 60.0
)) FILTER (
WHERE job.started_at IS NOT NULL AND job.completed_at IS NOT NULL
), 0)::double precision AS avg_job_seconds,
COALESCE(AVG(EXTRACT(EPOCH FROM (job.started_at - job.created_at))) FILTER (
Expand Down
25 changes: 20 additions & 5 deletions internal/actions/sqlc/workflow_insights.sql.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions internal/actions/trigger/enqueue_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -388,6 +388,9 @@ func seedCompletedActionsMinutes(t *testing.T, f enqFx, completedAt time.Time, m
t.Fatalf("claimed seed job id=%d, want %d", claimed.ID, jobs[0].ID)
}
startedAt := completedAt.Add(-time.Duration(minutes) * time.Minute)
if _, err := f.pool.Exec(ctx, `UPDATE workflow_jobs SET timeout_minutes = $2 WHERE id = $1`, jobs[0].ID, minutes); err != nil {
t.Fatalf("set seed timeout_minutes: %v", err)
}
if _, err := q.UpdateWorkflowJobStatus(ctx, f.pool, actionsdb.UpdateWorkflowJobStatusParams{
ID: jobs[0].ID,
Status: actionsdb.WorkflowJobStatusCompleted,
Expand Down
27 changes: 26 additions & 1 deletion internal/billing/billing_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -544,6 +544,31 @@ func TestOrgUsageCountersAndQuotaOverrides(t *testing.T) {
`, org.ID, start.Add(12*time.Hour)); err != nil {
t.Fatalf("insert workflow usage: %v", err)
}
if _, err := pool.Exec(ctx, `
WITH repo AS (
SELECT id FROM repos WHERE owner_org_id = $1 AND name = 'metered-repo'
), runner AS (
SELECT id FROM workflow_runners WHERE name = 'metered-runner'
), run AS (
INSERT INTO workflow_runs (
repo_id, run_index, workflow_file, head_sha, event,
status, conclusion, started_at, completed_at
)
SELECT repo.id, 2, '.shithub/workflows/stale.yml', 'abcdef2', 'push',
'cancelled', 'cancelled', $2::timestamptz, $2::timestamptz + interval '3 days'
FROM repo
RETURNING id
)
INSERT INTO workflow_jobs (
run_id, job_index, job_key, runner_id, status, conclusion,
timeout_minutes, started_at, completed_at
)
SELECT run.id, 0, 'stale', runner.id, 'cancelled', 'cancelled',
360, $2::timestamptz, $2::timestamptz + interval '3 days'
FROM run, runner
`, org.ID, start.Add(13*time.Hour)); err != nil {
t.Fatalf("insert stale workflow usage: %v", err)
}
recalc, err := billing.RecalculateOrgUsageCounters(ctx, deps, org.ID, start, end)
if err != nil {
t.Fatalf("RecalculateOrgUsageCounters: %v", err)
Expand All @@ -552,7 +577,7 @@ func TestOrgUsageCountersAndQuotaOverrides(t *testing.T) {
recalc.ActionsLogBytes != 1234 ||
recalc.ActionsArtifactBytes != 3456 ||
recalc.ObjectStorageBytes != 4690 ||
recalc.ActionsMinutesUsed != 6 {
recalc.ActionsMinutesUsed != 366 {
t.Fatalf("unexpected recalculated usage: %+v", recalc)
}
}
Expand Down
5 changes: 4 additions & 1 deletion internal/billing/queries/billing.sql
Original file line number Diff line number Diff line change
Expand Up @@ -931,7 +931,10 @@ actions_minutes AS (
AND j.completed_at IS NOT NULL
AND j.completed_at >= sqlc.arg(actions_period_start)::timestamptz
AND j.completed_at < sqlc.arg(actions_period_end)::timestamptz
THEN CEIL(EXTRACT(EPOCH FROM (j.completed_at - j.started_at)) / 60.0)::bigint
THEN LEAST(
CEIL(GREATEST(EXTRACT(EPOCH FROM (j.completed_at - j.started_at)), 0) / 60.0)::bigint,
j.timeout_minutes::bigint
)
ELSE 0
END
), 0)::bigint AS actions_minutes_used
Expand Down
5 changes: 4 additions & 1 deletion internal/billing/sqlc/billing.sql.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading