diff --git a/cmd/agc/internal/agentpool/crypto.go b/cmd/agc/internal/agentpool/crypto.go
index afeb2823..60a824f6 100644
--- a/cmd/agc/internal/agentpool/crypto.go
+++ b/cmd/agc/internal/agentpool/crypto.go
@@ -14,21 +14,28 @@ import (
type KeyType string
const (
- // KeyTypeEd25519 generates an Ed25519 key (default).
+ // KeyTypeEd25519 generates an Ed25519 key (explicit opt-in). Ed25519 agents
+ // cannot decrypt RSA-OAEP session keys, so selecting it loses a layer of
+ // session-key encryption — see docs/design/05-security.md.
KeyTypeEd25519 KeyType = "ed25519"
- // KeyTypeRSA generates an RSA-3072 key.
+ // KeyTypeRSA generates an RSA-3072 key (secure default).
KeyTypeRSA KeyType = "rsa"
)
// generateKey returns a new private key of the requested type.
-// An empty KeyType defaults to KeyTypeEd25519.
+//
+// An empty KeyType defaults to KeyTypeRSA (RSA-3072), the secure default that
+// preserves RSA-OAEP session-key encryption. Ed25519 must be requested
+// explicitly via KeyTypeEd25519: defaulting empty→Ed25519 would be a
+// secure-by-default regression (any caller that omits the key type would
+// silently lose a layer of encryption).
func generateKey(kt KeyType) (crypto.Signer, error) {
switch kt {
- case KeyTypeRSA:
- return rsa.GenerateKey(rand.Reader, 3072)
- default:
+ case KeyTypeEd25519:
_, priv, err := ed25519.GenerateKey(rand.Reader)
return priv, err
+ default:
+ return rsa.GenerateKey(rand.Reader, 3072)
}
}
diff --git a/cmd/agc/internal/agentpool/crypto_test.go b/cmd/agc/internal/agentpool/crypto_test.go
new file mode 100644
index 00000000..3afd43a7
--- /dev/null
+++ b/cmd/agc/internal/agentpool/crypto_test.go
@@ -0,0 +1,48 @@
+package agentpool
+
+import (
+ "crypto/ed25519"
+ "crypto/rsa"
+ "testing"
+)
+
+// TestGenerateKeyDefaultIsRSA pins the secure-by-default contract: an empty
+// (unspecified) KeyType must yield an RSA-3072 key, never Ed25519. Defaulting
+// empty→Ed25519 would silently drop RSA-OAEP session-key encryption for any
+// caller that omits the key type (Q109 regression). RSA stays the default;
+// Ed25519 is an explicit opt-in only (Q11).
+func TestGenerateKeyDefaultIsRSA(t *testing.T) {
+ tests := []struct {
+ name string
+ keyType KeyType
+ wantRSA bool
+ }{
+ {name: "empty defaults to RSA", keyType: "", wantRSA: true},
+ {name: "explicit rsa", keyType: KeyTypeRSA, wantRSA: true},
+ {name: "unrecognised value defaults to RSA", keyType: KeyType("bogus"), wantRSA: true},
+ {name: "explicit ed25519 opt-in", keyType: KeyTypeEd25519, wantRSA: false},
+ }
+ for _, tc := range tests {
+ t.Run(tc.name, func(t *testing.T) {
+ signer, err := generateKey(tc.keyType)
+ if err != nil {
+ t.Fatalf("generateKey(%q): %v", tc.keyType, err)
+ }
+ switch key := signer.(type) {
+ case *rsa.PrivateKey:
+ if !tc.wantRSA {
+ t.Fatalf("generateKey(%q) = RSA, want Ed25519", tc.keyType)
+ }
+ if got := key.N.BitLen(); got != 3072 {
+ t.Errorf("RSA key size = %d bits, want 3072", got)
+ }
+ case ed25519.PrivateKey:
+ if tc.wantRSA {
+ t.Fatalf("generateKey(%q) = Ed25519, want RSA", tc.keyType)
+ }
+ default:
+ t.Fatalf("generateKey(%q) = %T, want *rsa.PrivateKey or ed25519.PrivateKey", tc.keyType, signer)
+ }
+ })
+ }
+}
diff --git a/cmd/agc/internal/agentpool/pool.go b/cmd/agc/internal/agentpool/pool.go
index 4fa3c70b..93024cb4 100644
--- a/cmd/agc/internal/agentpool/pool.go
+++ b/cmd/agc/internal/agentpool/pool.go
@@ -138,7 +138,7 @@ type Pool struct {
// NewPool creates a Pool for the given RunnerGroup.
// runnerLabels is the label set passed to GitHub during runner registration.
-// keyType selects the algorithm for newly-generated agent keys; empty defaults to KeyTypeEd25519.
+// keyType selects the algorithm for newly-generated agent keys; empty defaults to KeyTypeRSA (the secure default).
func NewPool(c client.Client, namespace, groupName, runnerVersion string, runnerLabels []string, registrar Registrar, keyType KeyType) *Pool {
return &Pool{
client: c,
diff --git a/cmd/agc/internal/controller/integration/suite_integration_test.go b/cmd/agc/internal/controller/integration/suite_integration_test.go
index a29c08e1..12df2c2d 100644
--- a/cmd/agc/internal/controller/integration/suite_integration_test.go
+++ b/cmd/agc/internal/controller/integration/suite_integration_test.go
@@ -176,6 +176,11 @@ func startAGCReconcilerOpts(t *testing.T, opts provisionerOptions) (*controller.
Client: mgr.GetClient(),
TokenManager: tm,
Registrar: reg,
+ // Use Ed25519 agent keys: these suites exercise the session/reconcile
+ // lifecycle, not crypto, and Ed25519 generation is near-instant. The
+ // production default is RSA-3072 (Q109), whose per-agent generation cost
+ // is high enough to blow these tests' Eventually timeouts on CI.
+ AgentKeyType: agentpool.KeyTypeEd25519,
BrokerConfig: controller.BrokerConfig{
BrokerURL: brokerStub.URL,
RunnerVersion: "2.335.1",
diff --git a/cmd/agc/internal/controller/runnergroup_controller.go b/cmd/agc/internal/controller/runnergroup_controller.go
index f368c15a..831fca3c 100644
--- a/cmd/agc/internal/controller/runnergroup_controller.go
+++ b/cmd/agc/internal/controller/runnergroup_controller.go
@@ -73,7 +73,7 @@ type RunnerGroupReconciler struct {
Metrics *listener.Metrics
Log *slog.Logger
Provisioner *provisioner.Provisioner
- AgentKeyType agentpool.KeyType // defaults to KeyTypeEd25519 when empty
+ AgentKeyType agentpool.KeyType // defaults to KeyTypeRSA (the secure default) when empty
// Recorder emits Kubernetes Events on the reconciled RunnerGroup so that
// credential, agent-pool, and listener failures surface in `kubectl describe
diff --git a/docs/STATUS.md b/docs/STATUS.md
index c9ceb892..372b0a6f 100644
--- a/docs/STATUS.md
+++ b/docs/STATUS.md
@@ -51,6 +51,7 @@ Specific actionable items in priority order. Pick from the top; skip 🚫 items
| ID | Item | Labels | St | Sz | Notes |
|---|---|---|---|---|---|
+| Q135 | Flake: e2e E2E_AGC_MultipleJobsQueued — 4min timeout waiting for 2nd worker pod | `tests` `bug` | 🔲 | S | job_lifecycle_test.go:179 timed out waiting for the 2nd queued job's worker pod on PR 227 run 27510232006; passed on rerun w/o code change. Timing-sensitive worker-pod spawn under load; same shared-brokerStub/session-recycle class as Q120. |
| Q134 | Flake: e2e E2E_AGC_WorkerPodAdmittedWithNonNumericUserImage — "no session registered" 180s timeout | `tests` `bug` `1.0-gate` | 🔲 | M | Heaviest e2e spec (real worker image) times out waiting for an AGC session (worker_securitycontext_test.go:96). Hit main + 2 PRs ~2026-06-14 17:00; other runs passed — intermittent. Likely AGC session/image-pull timing; blocks PRs when it fires. |
| Q131 | Flake: TestListener_IdleNotShutdownIfLast poll-count timing | `tests` `bug` | 🔲 | S | goroutine_test.go:419 asserted poll count ≥5 but got 3 ("poll past threshold when last listener") on a local `make check`; passed on rerun (-count=3) w/o code change. Timing-sensitive idle-shutdown test; tighten synchronization, not the count. |
| Q113 | Flake: eviction integration tests time out in waitForWorkerPod | `tests` `bug` | 🔲 | S | EvictionTriggersRequeue + EvictionBudgetExhausted (failure_recovery_test.go:142) timed out (20s) on CI run 27383065643, passed on rerun w/o code change. Suspects: sessions[len-1] pick on shared brokerStub; 20s budget on 2-vCPU runner. |
@@ -94,7 +95,6 @@ Specific actionable items in priority order. Pick from the top; skip 🚫 items
| Q106 | Non-atomic eviction-retry counter race | `bug` | 🔲 | S | handleEviction reads-modifies-writes the count with no per-key lock (provisioner.go:451). Two concurrent evictions of the same run_id both pass the budget check and both call rerun-failed-jobs, exceeding the budget. Serialize per runID (atomic CAS). |
| Q107 | Token manager tight retry loop when TTL ≤ refreshLeadTime | `bug` `infra` | 🔲 | S | On success wait=ExpiresAt-5min-now clamped to 0 (token/manager.go:174); a near-expiry token (skew/short TTL) → fetch→wait 0→fetch spin, a retry storm vs the token endpoint. Floor the post-success wait (e.g. 30s) when wait<=0. |
| Q108 | Broker long-poll has no per-request deadline | `infra` | 🔲 | S | AGC leaves BrokerConfig.HTTPClient nil, so GetMessage long-polls with no ResponseHeaderTimeout (broker/client.go:163; agc/main.go:416). A black-holed broker conn blocks a listener until the OS TCP timeout. Set a tuned client or per-poll ctx (~55s). |
-| Q109 | Library agent-key-type default is ed25519, not the secure rsa | `security` | 🔲 | S | generateKey defaults empty keyType→Ed25519 (agentpool/crypto.go:24), the regressed path. Only the CLI default (rsa) holds the secure choice; any new constructor with empty keyType silently regresses. Make the library default RSA. See [Q11](#Q11). |
| Q110 | coverage-baseline folds in untested test-helper packages | `tests` `infra` | 🔲 | S | coverage-baseline.txt floors (broker 48.3, gmc 48.2) fold in untested helpers (brokertest) and miss envtest coverage, so the ratchet misleads while core code is ~80%. Exclude helper pkgs so floors track production code. Sibling of Q77. |
| Q55 | Verify provisioner-test goleak cascade fix held in CI | `tests` `bug` | 🔲 | S | Intermittent ~20-test goleak cascade in `internal/provisioner` fixed by `waitForPodCreated` helper in 59c0714; delete row once CI is clean. If flakes recur, migrate remaining ~18 Eventually-on-Pod sites to the helper. |
| Q129 | [Public GitHub Pages website](plan/website.md) | `docs` `infra` | ▶ | L | [Public site](plan/website.md): MkDocs Material scaffold + landing + vs-ARC page + Pages workflow shipped, builds in CI. Remaining: public launch (gated on Q99 + enabling Pages); link reconcile + strict build folds into [Q52](#Q52). |
diff --git a/docs/plan/release-1.0.md b/docs/plan/release-1.0.md
index 2d49ba38..246aa5f1 100644
--- a/docs/plan/release-1.0.md
+++ b/docs/plan/release-1.0.md
@@ -342,11 +342,13 @@ docs caveat, not omission.
Both admit an unverified artifact into a signed release. Fix
(re-vendor + `git diff --exit-code` CI gate; checksum-pin cosign as the
`KIND_BINARY_SHA256` pattern does) or caveat.
-- **Library `generateKey` empty-keyType default is Ed25519**
+- ~~**Library `generateKey` empty-keyType default is Ed25519**
([Q109](../STATUS.md)): an empty `keyType` yields Ed25519 — the
secure-by-default regression `CLAUDE.md` explicitly forbids (Ed25519
agents can't decrypt RSA-OAEP session keys). Only the CLI default holds
- RSA; the **library** default must be RSA too.
+ RSA; the **library** default must be RSA too.~~ **Resolved (Q109):**
+ `generateKey` now defaults an empty/unrecognised `keyType` to RSA-3072,
+ matching the CLI; Ed25519 requires the explicit `KeyTypeEd25519` opt-in.
---
@@ -415,7 +417,7 @@ Suggested sequence:
| D. Operability | Q35 (logging+probes) | Q34 HA, Q51, Q72, Q35 logger unify |
| E. Docs honesty | capacity reframe, egress + sandbox caveats, ops install flow, Q103 SLSA-L3 claim | — |
| F. Engineering quality | Q77 coverage, Q78 dup-check, Q79 `-race` unit, Q80 gosec, Q81 errcheck, Q84 shellcheck, Q66 install-artifact validation | — |
-| G. Must-resolve (fix **or** caveat → Q99) | Q105 port-53 egress, Q126 + Q127-cosign release integrity, Q109 Ed25519 library default | — |
+| G. Must-resolve (fix **or** caveat → Q99) | Q105 port-53 egress, Q126 + Q127-cosign release integrity, ~~Q109 Ed25519 library default~~ (fixed) | — |
**1.0 = all gating boxes ticked**, and every bucket-G item either fixed or
honestly caveated. Recommended items that slip become ordinary post-1.0