From 3f0c62a782534805a609849019766efb9abd7274 Mon Sep 17 00:00:00 2001 From: Joshua Gilman Date: Wed, 24 Jun 2026 12:04:19 -0700 Subject: [PATCH] feat(api): add per-client IP rate limiting MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rate limit the API per client IP, on by default, as a Huma middleware installed before authentication so an over-limit request is rejected with an RFC 9457 429 + Retry-After before it reaches the credential store, shielding the auth path and database from anonymous floods. The infrastructure routes (/healthz, /readyz, /metrics) bypass Huma and are never limited. The shipped limiter is in-process (per-key token bucket via golang.org/x/time/rate, with idle-evicted buckets) behind a new ratelimit.Limiter port — the seam for a distributed (for example, Redis-backed) limiter. Keying is a pluggable KeyFunc whose default is the spoof-safe resolved client IP; swap it to limit authenticated principals instead. Config: --rate-limit-enabled (default true), --rate-limit-rps (default 10), --rate-limit-burst (default 20). Retry-After (RFC 9110) is the throttle signal; the IETF RateLimit structured-field headers are still a draft and map only loosely onto a token bucket, so they are left as a documented enhancement. Co-Authored-By: Claude Opus 4.8 (1M context) --- DELETE_ME.md | 1 + README.md | 31 ++++++ go.mod | 1 + go.sum | 2 + internal/adapter/http/ratelimit.go | 24 ++++ internal/adapter/http/router.go | 34 ++++-- internal/app/app.go | 40 ++++++- internal/app/app_test.go | 45 ++++++++ internal/app/serve.go | 14 +++ internal/config/config.go | 43 ++++++++ internal/config/config_test.go | 25 +++++ internal/integration/authz_e2e_test.go | 5 + internal/ratelimit/limiter.go | 40 +++++++ internal/ratelimit/memory.go | 146 +++++++++++++++++++++++++ internal/ratelimit/memory_test.go | 89 +++++++++++++++ internal/ratelimit/middleware.go | 111 +++++++++++++++++++ internal/ratelimit/middleware_test.go | 97 ++++++++++++++++ 17 files changed, 735 insertions(+), 13 deletions(-) create mode 100644 internal/adapter/http/ratelimit.go create mode 100644 internal/ratelimit/limiter.go create mode 100644 internal/ratelimit/memory.go create mode 100644 internal/ratelimit/memory_test.go create mode 100644 internal/ratelimit/middleware.go create mode 100644 internal/ratelimit/middleware_test.go diff --git a/DELETE_ME.md b/DELETE_ME.md index 8aefdaa..06de815 100644 --- a/DELETE_ME.md +++ b/DELETE_ME.md @@ -11,6 +11,7 @@ It is only here to orient the initial project owner. - A runnable hexagonal HTTP API server (chi + Huma) at `github.com/meigma/template-go-api`, with a `todo` example resource served under a `/v1` URL version prefix, RFC 9457 errors, unversioned `/healthz`, `/readyz`, and `/metrics`, runtime API docs at `/docs`, and an `openapi` spec-export command. - A PostgreSQL persistence adapter (pgx + sqlc typed queries + goose migrations) behind the domain's `todo.Repository` port: a `migrate` subcommand, a committed-and-drift-guarded sqlc layer, a real `/readyz` check, and container-backed integration tests. The port is the seam — implement it to back the template with a different datastore. - An authorization tier (Cedar via `cedar-go`) with a deny-by-default Huma middleware, a modular per-resource authz slice pattern, and authentication deferred to the integrator: a placeholder API-key authenticator (`X-API-Key`/Bearer, backed by an `api_keys` table) and dev-only mock keys seeded for the Compose demo. Replace the authenticator with real authn — see step 6. +- Per-client rate limiting (on by default): a Huma middleware that throttles by client IP **before** authentication and returns RFC 9457 `429` with `Retry-After`. The shipped limiter is in-process (token bucket, `golang.org/x/time/rate`) behind a `ratelimit.Limiter` port — the seam for a distributed (for example, Redis-backed) limiter. See the README's [Rate limiting](README.md#rate-limiting) section. - A Cobra/Viper entrypoint under `cmd/template-go-api` and `internal/cli` exposing `serve` (default), `version`, `openapi`, and `migrate`. - Moon tasks for `format`, `lint`, `build`, `test`, and `check`, plus `sqlc` / `sqlc-check` (regenerate and drift-guard the typed query layer), `mockery` / `mockery-check` (regenerate and drift-guard the testify mocks), `migrate` (run database migrations), and `test-integration` (container-backed adapter tests). - `golangci-lint`, `sqlc`, `goose`, and `mockery` wired through Proto and Moon. diff --git a/README.md b/README.md index dbd8c31..2e1c1fb 100644 --- a/README.md +++ b/README.md @@ -198,6 +198,9 @@ default. | `--db-max-conns` | `TEMPLATE_GO_API_DB_MAX_CONNS` | `0` | maximum PostgreSQL pool connections; `0` uses the driver default | | `--authz-enabled` | `TEMPLATE_GO_API_AUTHZ_ENABLED` | `true` | enable the [authorization](#authorization) middleware (deny-by-default); `false` bypasses it entirely | | `--authz-policy-dir` | `TEMPLATE_GO_API_AUTHZ_POLICY_DIR` | _(none)_ | directory of `.cedar` files to load instead of the embedded policies; empty uses the embedded set | +| `--rate-limit-enabled` | `TEMPLATE_GO_API_RATE_LIMIT_ENABLED` | `true` | enable per-client [rate limiting](#rate-limiting); `false` disables throttling entirely | +| `--rate-limit-rps` | `TEMPLATE_GO_API_RATE_LIMIT_RPS` | `10` | sustained per-client request rate (requests/second) | +| `--rate-limit-burst` | `TEMPLATE_GO_API_RATE_LIMIT_BURST` | `20` | per-client burst size (token-bucket depth) | CORS is off until you set origins. Client IP is read from the direct TCP peer unless you opt into a trusted proxy header — never from `X-Forwarded-For` @@ -440,6 +443,34 @@ policy in one slice can reference shared principal roles (`principal in Role::"admin"`) or another slice's entities. Cross-cutting rules and shared principal/role types live in the base package's `base.cedar`. +## Rate limiting + +The API is rate limited per client out of the box (`--rate-limit-enabled`, +default true). The limiter is a Huma middleware installed **before** +authentication, so an over-limit request is rejected with `429 Too Many +Requests` before it reaches the credential store — protecting the auth path and +database from anonymous floods. The infrastructure routes (`/healthz`, +`/readyz`, `/metrics`) bypass Huma and are never limited. + +Requests are keyed by **client IP** (the spoof-safe IP the +[`--trusted-proxy-header`](#configuration) logic resolves), allowing a burst of +`--rate-limit-burst` and a sustained `--rate-limit-rps` per second (a token +bucket). A throttled response is RFC 9457 `application/problem+json` and carries +a `Retry-After` header (whole seconds). + +The shipped limiter is **in-process** (`golang.org/x/time/rate`), with per-key +buckets evicted after an idle period to bound memory. That is the right default +for a single instance; behind multiple replicas a shared backend keeps the limit +global. The `ratelimit.Limiter` port is the seam: implement `Allow` over a store +such as Redis and wire your adapter in `internal/app/app.go` — the middleware and +key function are unchanged. To limit authenticated callers instead of IPs, swap +the key function (`adapterhttp.ClientIPKeyFunc`) for one that reads the principal. + +> The IETF [RateLimit header fields](https://datatracker.ietf.org/doc/draft-ietf-httpapi-ratelimit-headers/) +> (`RateLimit`/`RateLimit-Policy`) are still a draft and map only loosely onto a +> token bucket, so the template advertises the limit with the stable `Retry-After` +> header and leaves those headers as a documented enhancement. + ## Testing Unit tests sit beside the code and use [Testify](https://github.com/stretchr/testify) diff --git a/go.mod b/go.mod index aa59893..9e4e867 100644 --- a/go.mod +++ b/go.mod @@ -17,6 +17,7 @@ require ( github.com/stretchr/testify v1.11.1 github.com/testcontainers/testcontainers-go v0.43.0 github.com/testcontainers/testcontainers-go/modules/postgres v0.43.0 + golang.org/x/time v0.11.0 ) require ( diff --git a/go.sum b/go.sum index 9d2d36c..6b30b65 100644 --- a/go.sum +++ b/go.sum @@ -222,6 +222,8 @@ golang.org/x/term v0.44.0 h1:0rLvDRCtNj0gZkyIXhCyOb2OAzEhLVqc4B+hrsBhrmc= golang.org/x/term v0.44.0/go.mod h1:7ze4MdzUzLXpSAoFP1H0bOI9aXDqveSvatT5vKcFh2Y= golang.org/x/text v0.38.0 h1:sXmwo9DwP3OK9EZ7PqAdaooSGozfl/3a6/xJcbzPRhE= golang.org/x/text v0.38.0/go.mod h1:YXZt3QhHUKYT53r2lLKFIVi6Ao1jdzrTR/KQ09qyxF4= +golang.org/x/time v0.11.0 h1:/bpjEDfN9tkoN/ryeYHnv5hcMlc8ncjMcM4XBk5NWV0= +golang.org/x/time v0.11.0/go.mod h1:CDIdPxbZBQxdj6cxyCIdrNogrJKMJ7pr37NYpMcMDSg= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= google.golang.org/protobuf v1.36.11 h1:fV6ZwhNocDyBLK0dj+fg8ektcVegBBuEolpbTQyBNVE= google.golang.org/protobuf v1.36.11/go.mod h1:HTf+CrKn2C3g5S8VImy6tdcUvCska2kB7j23XfzDpco= diff --git a/internal/adapter/http/ratelimit.go b/internal/adapter/http/ratelimit.go new file mode 100644 index 0000000..6b55c46 --- /dev/null +++ b/internal/adapter/http/ratelimit.go @@ -0,0 +1,24 @@ +package http + +import ( + "github.com/danielgtaylor/huma/v2" + "github.com/danielgtaylor/huma/v2/adapters/humachi" + chimiddleware "github.com/go-chi/chi/v5/middleware" +) + +// ClientIPKeyFunc keys the rate limiter by the resolved client IP. It reads the +// IP the ClientIP middleware stored on the request context — spoof-safe, since +// that middleware honors --trusted-proxy-header and otherwise trusts only the +// TCP peer — so the limiter and the access log agree on who the client is. +// +// It has the signature of ratelimit.KeyFunc and is the default key for the +// rate-limit middleware. To limit authenticated callers instead, swap in a key +// function that reads the principal (see internal/authz) from the context; +// keying lives here, in the transport, so the limiter core stays +// router-agnostic. It never errors: an unresolved IP yields the empty key, which +// simply shares one bucket rather than failing the request. +func ClientIPKeyFunc(ctx huma.Context) (string, error) { + r, _ := humachi.Unwrap(ctx) + + return chimiddleware.GetClientIP(r.Context()), nil +} diff --git a/internal/adapter/http/router.go b/internal/adapter/http/router.go index 10f58d8..f27be9a 100644 --- a/internal/adapter/http/router.go +++ b/internal/adapter/http/router.go @@ -39,6 +39,14 @@ type RouterDeps struct { Readiness []ReadinessCheck // Register mounts resource operations onto the Huma API. Register Registrar + // InstallRateLimit installs the rate-limit Huma middleware on the API. Like + // InstallAuthz it MUST run before the resource operations are registered (Huma + // snapshots the middleware stack per operation at registration), and it runs + // BEFORE InstallAuthz so an over-limit request is rejected before + // authentication touches the credential store. Nil (or a disabled middleware) + // leaves the API unthrottled. The infrastructure routes bypass Huma, so they + // are never rate limited. + InstallRateLimit func(huma.API) // InstallAuthz installs the authentication/authorization Huma middleware on // the API. It MUST run before the resource operations are registered: Huma // snapshots the API's middleware stack into each operation at registration @@ -61,12 +69,15 @@ type RouterDeps struct { func NewRouter(deps RouterDeps) http.Handler { mux := chi.NewMux() - // Core middleware, outermost first. Deferred seams (insert here in later - // slices): authn/authz and rate limiting. + // Core chi middleware, outermost first. The rate-limit and authn/authz + // middleware are Huma middleware (installed on the API below), not chi + // middleware, so they run only for API operations and never for the + // infrastructure routes. // - // Client-IP runs first so the request id, access log, and metrics all see - // the resolved IP. CORS sits after the access log (so preflight responses are - // logged and metered) and is installed only when origins are configured. + // Client-IP runs first so the request id, access log, metrics, and the + // rate limiter all see the resolved IP. CORS sits after the access log (so + // preflight responses are logged and metered) and is installed only when + // origins are configured. mux.Use(middleware.ClientIP(deps.TrustedProxyHeader)) mux.Use(chimiddleware.RequestID) mux.Use(middleware.Recoverer(deps.Logger)) @@ -92,10 +103,15 @@ func NewRouter(deps RouterDeps) http.Handler { }) api := NewAPI(mux, deps.Version) - // The authn/authz Huma middleware is installed BEFORE the operations are - // registered: Huma bakes the API's middleware stack into each operation at - // registration time, so middleware added afterward would never run. It is a - // no-op when authorization is disabled. + // The rate-limit and authn/authz Huma middleware are installed BEFORE the + // operations are registered: Huma bakes the API's middleware stack into each + // operation at registration time, so middleware added afterward would never + // run. Rate limiting is installed first so it runs outermost — an over-limit + // request is rejected before authentication runs. Each is a no-op when its + // feature is disabled. + if deps.InstallRateLimit != nil { + deps.InstallRateLimit(api) + } if deps.InstallAuthz != nil { deps.InstallAuthz(api) } diff --git a/internal/app/app.go b/internal/app/app.go index d918648..86c8de3 100644 --- a/internal/app/app.go +++ b/internal/app/app.go @@ -13,6 +13,7 @@ import ( "github.com/danielgtaylor/huma/v2" "github.com/jackc/pgx/v5/pgxpool" + "golang.org/x/time/rate" adapterhttp "github.com/meigma/template-go-api/internal/adapter/http" "github.com/meigma/template-go-api/internal/adapter/postgres" @@ -20,12 +21,17 @@ import ( "github.com/meigma/template-go-api/internal/authz/apikey" "github.com/meigma/template-go-api/internal/config" "github.com/meigma/template-go-api/internal/observability" + "github.com/meigma/template-go-api/internal/ratelimit" "github.com/meigma/template-go-api/internal/todo" todoauthz "github.com/meigma/template-go-api/internal/todo/authz" "github.com/meigma/template-go-api/internal/todo/httpapi" todopostgres "github.com/meigma/template-go-api/internal/todo/postgres" ) +// rateLimiterIdleTTL is how long an idle per-client bucket is kept before the +// in-process limiter evicts it, bounding memory under churning client keys. +const rateLimiterIdleTTL = 10 * time.Minute + // App is a fully wired API server ready to Run. type App struct { server *http.Server @@ -35,6 +41,9 @@ type App struct { // pool is the PostgreSQL connection pool, closed during graceful shutdown. // It is nil when a repository is injected with WithRepository (tests). pool *pgxpool.Pool + // rateLimiter is the in-process rate limiter whose janitor goroutine is + // stopped during graceful shutdown. It is nil when rate limiting is disabled. + rateLimiter *ratelimit.InMemory } // Option configures how New wires the application. @@ -95,6 +104,8 @@ func New( return nil, err } + rateLimiter, installRateLimit := buildRateLimiter(cfg, logger) + // An empty metrics-addr co-locates /metrics on the API listener; otherwise a // dedicated metrics server (below) serves it off the API surface. serveMetricsInline := cfg.MetricsAddr == "" @@ -108,10 +119,11 @@ func New( TrustedProxyHeader: cfg.TrustedProxyHeader, // The postgres store contributes a real connectivity check here; an // injected repository (tests) contributes none, so /readyz is always ready. - Readiness: readiness, - Register: registerResources(service), - InstallAuthz: installAuthz, - FinalizeAuthz: finalizeAuthz, + Readiness: readiness, + Register: registerResources(service), + InstallRateLimit: installRateLimit, + InstallAuthz: installAuthz, + FinalizeAuthz: finalizeAuthz, }) server := &http.Server{ @@ -141,6 +153,7 @@ func New( logger: logger, grace: cfg.ShutdownGrace, pool: pool, + rateLimiter: rateLimiter, }, nil } @@ -240,6 +253,25 @@ func resolveAuthenticator( return apikey.NewAuthenticator(apikey.NewStore(pool)), nil } +// buildRateLimiter constructs the rate limiter and the hook that installs the +// rate-limit middleware on the API. When rate limiting is disabled it returns a +// nil limiter and a nil hook, so NewRouter leaves the API unthrottled. The +// limiter is keyed by client IP (adapterhttp.ClientIPKeyFunc); swap that key +// function for a principal-based one to limit authenticated callers instead. +// The returned limiter runs a janitor goroutine the App stops on shutdown. +func buildRateLimiter(cfg config.Config, logger *slog.Logger) (*ratelimit.InMemory, func(huma.API)) { + if !cfg.RateLimitEnabled { + return nil, nil + } + + limiter := ratelimit.NewInMemory(rate.Limit(cfg.RateLimitRPS), cfg.RateLimitBurst, rateLimiterIdleTTL) + install := func(api huma.API) { + ratelimit.NewMiddleware(api, limiter, adapterhttp.ClientIPKeyFunc, logger, true).Install() + } + + return limiter, install +} + // Handler returns the assembled HTTP handler, primarily for functional tests. func (a *App) Handler() http.Handler { return a.server.Handler diff --git a/internal/app/app_test.go b/internal/app/app_test.go index bae4876..fdafc18 100644 --- a/internal/app/app_test.go +++ b/internal/app/app_test.go @@ -97,3 +97,48 @@ func TestAppWiringDeniesUnauthorized(t *testing.T) { application.Handler().ServeHTTP(createRec, createReq) assert.Equal(t, http.StatusForbidden, createRec.Code) } + +// TestAppWiringRateLimits proves the rate-limit middleware is wired into the +// composed server, runs before authentication, and exempts the infrastructure +// routes. With a burst of one, the second API request from the same client is +// throttled — and it returns 429, not the 403 a denied caller would get, which +// shows the limiter runs before authorization. The /healthz route is never +// limited because the infra routes bypass Huma. +func TestAppWiringRateLimits(t *testing.T) { + t.Parallel() + + vp := viper.New() + vp.Set("rate-limit-rps", 1) + vp.Set("rate-limit-burst", 1) + cfg := config.Load(vp) + logger := observability.NewLogger(io.Discard, slog.LevelError, "json") + application, err := app.New( + context.Background(), cfg, logger, "test", + app.WithRepository(todotest.NewRepository()), + app.WithAuthenticator(stubAuthenticator{roles: []string{"user"}}), + ) + require.NoError(t, err) + handler := application.Handler() + + // Infra routes bypass Huma, so they are never rate limited: repeated /healthz + // hits all succeed despite the burst of one. + for range 3 { + rec := httptest.NewRecorder() + handler.ServeHTTP(rec, httptest.NewRequest(http.MethodGet, "/healthz", nil)) + assert.Equal(t, http.StatusOK, rec.Code) + } + + post := func() int { + req := httptest.NewRequest(http.MethodPost, "/v1/todos", strings.NewReader(`{"title":"x"}`)) + req.Header.Set("Content-Type", "application/json") + rec := httptest.NewRecorder() + handler.ServeHTTP(rec, req) + + return rec.Code + } + + // The single burst token lets the first request through; the next request + // from the same client is throttled before authorization runs. + assert.Equal(t, http.StatusCreated, post()) + assert.Equal(t, http.StatusTooManyRequests, post()) +} diff --git a/internal/app/serve.go b/internal/app/serve.go index 7ad016c..154094d 100644 --- a/internal/app/serve.go +++ b/internal/app/serve.go @@ -31,6 +31,8 @@ func (a *App) Run(ctx context.Context) error { // Close the database pool (when postgres) on every exit path, after the // servers have returned — including when a server fails to start. defer a.closePool(ctx) + // Stop the in-process rate limiter's janitor goroutine on every exit path. + defer a.stopRateLimiter(ctx) servers := a.servers() serveErr := make(chan error, len(servers)) @@ -91,3 +93,15 @@ func (a *App) closePool(ctx context.Context) { a.logger.InfoContext(ctx, "closing database pool") a.pool.Close() } + +// stopRateLimiter stops the in-process rate limiter's janitor goroutine when one +// is configured. It is deferred in Run so it executes on every exit path. It is +// a no-op when rate limiting is disabled (no limiter was built). +func (a *App) stopRateLimiter(ctx context.Context) { + if a.rateLimiter == nil { + return + } + + a.logger.InfoContext(ctx, "stopping rate limiter") + a.rateLimiter.Stop() +} diff --git a/internal/config/config.go b/internal/config/config.go index 17c3b7a..42ced2d 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -30,6 +30,18 @@ const ( // box. Operators set it false as an escape hatch to bypass authorization // entirely (incremental adoption or local debugging). defaultAuthzEnabled = true + // defaultRateLimitEnabled is true: the API is rate limited out of the box + // (per client IP, pre-auth), a secure default that also shields the + // credential store from anonymous floods. Operators set it false to disable + // throttling entirely. + defaultRateLimitEnabled = true + // defaultRateLimitRPS is the sustained per-client request rate (requests per + // second). It is deliberately generous so local development and the demo + // stack are not throttled; tune it down for production. + defaultRateLimitRPS = 10.0 + // defaultRateLimitBurst is the per-client token-bucket depth: how many + // requests a client may make in a burst before the sustained rate applies. + defaultRateLimitBurst = 20 ) // Config holds runtime settings for the API server. @@ -78,6 +90,16 @@ type Config struct { // instead of the embedded set. Empty (the default) uses the embedded // policies. AuthzPolicyDir string + // RateLimitEnabled is the rate-limiting master switch. It defaults to true: + // the API is throttled per client IP before authentication runs. When false + // the rate-limit middleware is inert (pass-through). + RateLimitEnabled bool + // RateLimitRPS is the sustained per-client request rate, in requests per + // second, when rate limiting is enabled. + RateLimitRPS float64 + // RateLimitBurst is the per-client token-bucket depth: the number of requests + // a client may make in a burst before the sustained RateLimitRPS applies. + RateLimitBurst int } // RegisterFlags declares the server configuration flags on flags. Binding them @@ -115,6 +137,13 @@ func RegisterFlags(flags *pflag.FlagSet) { "", "directory of .cedar policy files to load instead of the embedded policies; empty uses the embedded set", ) + flags.Bool( + "rate-limit-enabled", + defaultRateLimitEnabled, + "enable per-client (IP) rate limiting; false disables throttling entirely", + ) + flags.Float64("rate-limit-rps", defaultRateLimitRPS, "sustained per-client request rate (requests per second)") + flags.Int("rate-limit-burst", defaultRateLimitBurst, "per-client burst size (token-bucket depth)") } // Load reads the server configuration from vp, applying defaults for unset keys. @@ -138,6 +167,9 @@ func Load(vp *viper.Viper) Config { DBMaxConns: vp.GetInt32("db-max-conns"), AuthzEnabled: vp.GetBool("authz-enabled"), AuthzPolicyDir: vp.GetString("authz-policy-dir"), + RateLimitEnabled: vp.GetBool("rate-limit-enabled"), + RateLimitRPS: vp.GetFloat64("rate-limit-rps"), + RateLimitBurst: vp.GetInt("rate-limit-burst"), } } @@ -161,6 +193,14 @@ func (c Config) Validate() error { if strings.TrimSpace(c.DatabaseURL) == "" { return errors.New("database-url is required") } + if c.RateLimitEnabled { + if c.RateLimitRPS <= 0 { + return errors.New("rate-limit-rps must be positive when rate limiting is enabled") + } + if c.RateLimitBurst <= 0 { + return errors.New("rate-limit-burst must be positive when rate limiting is enabled") + } + } return nil } @@ -182,4 +222,7 @@ func setDefaults(vp *viper.Viper) { vp.SetDefault("db-max-conns", defaultDBMaxConns) vp.SetDefault("authz-enabled", defaultAuthzEnabled) vp.SetDefault("authz-policy-dir", "") + vp.SetDefault("rate-limit-enabled", defaultRateLimitEnabled) + vp.SetDefault("rate-limit-rps", defaultRateLimitRPS) + vp.SetDefault("rate-limit-burst", defaultRateLimitBurst) } diff --git a/internal/config/config_test.go b/internal/config/config_test.go index a5a16ca..18e3f60 100644 --- a/internal/config/config_test.go +++ b/internal/config/config_test.go @@ -26,6 +26,9 @@ func TestLoadDefaults(t *testing.T) { assert.Zero(t, cfg.DBMaxConns) assert.True(t, cfg.AuthzEnabled, "authz is enabled by default now that the routes are tagged") assert.Empty(t, cfg.AuthzPolicyDir) + assert.True(t, cfg.RateLimitEnabled, "rate limiting is enabled by default") + assert.InDelta(t, defaultRateLimitRPS, cfg.RateLimitRPS, 0.0001) + assert.Equal(t, defaultRateLimitBurst, cfg.RateLimitBurst) } func TestLoadAuthzFromFlags(t *testing.T) { @@ -105,4 +108,26 @@ func TestValidate(t *testing.T) { missingDatabaseURL := base missingDatabaseURL.DatabaseURL = "" require.Error(t, missingDatabaseURL.Validate()) + + // Rate-limit settings are validated only when rate limiting is enabled. + rateLimited := base + rateLimited.RateLimitEnabled = true + rateLimited.RateLimitRPS = 10 + rateLimited.RateLimitBurst = 20 + require.NoError(t, rateLimited.Validate()) + + zeroRPS := rateLimited + zeroRPS.RateLimitRPS = 0 + require.Error(t, zeroRPS.Validate()) + + zeroBurst := rateLimited + zeroBurst.RateLimitBurst = 0 + require.Error(t, zeroBurst.Validate()) + + // With rate limiting disabled, non-positive values are ignored. + disabledIgnoresValues := base + disabledIgnoresValues.RateLimitEnabled = false + disabledIgnoresValues.RateLimitRPS = 0 + disabledIgnoresValues.RateLimitBurst = 0 + require.NoError(t, disabledIgnoresValues.Validate()) } diff --git a/internal/integration/authz_e2e_test.go b/internal/integration/authz_e2e_test.go index c18a17a..9075bf4 100644 --- a/internal/integration/authz_e2e_test.go +++ b/internal/integration/authz_e2e_test.go @@ -44,6 +44,11 @@ func e2eServer(ctx context.Context, t *testing.T, databaseURL string) *httptest. vp := viper.New() vp.Set("database-url", databaseURL) + // Rate limiting is orthogonal here, and its per-IP bucket would be shared + // across this suite's rapid requests from one loopback client; disable it so + // the authz assertions stay deterministic. Rate limiting is covered by its + // own tests in internal/ratelimit. + vp.Set("rate-limit-enabled", false) cfg := config.Load(vp) require.NoError(t, cfg.Validate()) // Guard the premise of the whole suite: authz must be enabled by default now diff --git a/internal/ratelimit/limiter.go b/internal/ratelimit/limiter.go new file mode 100644 index 0000000..72a311a --- /dev/null +++ b/internal/ratelimit/limiter.go @@ -0,0 +1,40 @@ +// Package ratelimit provides per-client request rate limiting for the API: a +// Limiter port, an in-process token-bucket adapter, and a Huma middleware that +// enforces the limit and emits an RFC 9457 429 response when it is exceeded. +// +// The Limiter interface is the extension seam. The shipped in-process adapter +// (NewInMemory) limits per process, which is the right default for a single +// instance; behind multiple replicas a shared backend (for example, Redis) +// keeps the limit global. Implement Limiter with that backend and wire it in +// the composition root — the middleware and key function are unchanged. +package ratelimit + +import ( + "context" + "time" +) + +// Decision is the outcome of a limiter check for a single request. It carries +// what the middleware needs to answer the client: whether the request is +// allowed and, when it is not, how long the client should wait before retrying. +type Decision struct { + // Allowed reports whether the request may proceed. + Allowed bool + // RetryAfter is how long until the client may retry. It is meaningful only + // when Allowed is false; the middleware rounds it up to whole seconds for the + // Retry-After response header. Zero means no hint is available. + RetryAfter time.Duration +} + +// Limiter decides whether a request identified by key may proceed now. key is +// the client identity the middleware extracts — by default the client IP. +// Implementations must be safe for concurrent use. +// +// Allow returns a non-nil error only for an infrastructure failure (for +// example, a distributed backend being unreachable). The middleware fails open +// on such an error — it lets the request through — so that a limiter outage +// cannot take the whole API down with it. A plain allow/deny decision returns a +// nil error. +type Limiter interface { + Allow(ctx context.Context, key string) (Decision, error) +} diff --git a/internal/ratelimit/memory.go b/internal/ratelimit/memory.go new file mode 100644 index 0000000..6075b28 --- /dev/null +++ b/internal/ratelimit/memory.go @@ -0,0 +1,146 @@ +package ratelimit + +import ( + "context" + "sync" + "time" + + "golang.org/x/time/rate" +) + +// minIdleTTL floors the eviction interval so a misconfigured zero or negative +// TTL cannot panic [time.NewTicker] or spin the janitor. +const minIdleTTL = time.Minute + +// InMemory is an in-process Limiter backed by a per-key token bucket +// (golang.org/x/time/rate). Each distinct key gets its own bucket that refills +// at rps tokens per second up to a depth of burst, so a client may burst up to +// burst requests and then sustains rps requests per second. +// +// Buckets are created on first use and evicted after they go idle for idleTTL, +// bounding memory under churning keys (for example, per-IP keys behind NAT or +// during a scan). A background janitor performs the eviction; call Stop when the +// limiter is no longer needed so the goroutine exits. The composition root owns +// that lifecycle and stops the limiter on shutdown. +type InMemory struct { + rps rate.Limit + burst int + ttl time.Duration + + mu sync.Mutex + clients map[string]*clientBucket + stop chan struct{} + stopped bool +} + +// clientBucket is one key's token bucket plus the last time it was seen, which +// the janitor uses to evict idle keys. +type clientBucket struct { + limiter *rate.Limiter + lastSeen time.Time +} + +// NewInMemory builds an in-process limiter that allows up to burst requests +// immediately and rps requests per second sustained, independently per key. +// Buckets unused for idleTTL are evicted. The returned limiter starts a janitor +// goroutine; call Stop to end it. +func NewInMemory(rps rate.Limit, burst int, idleTTL time.Duration) *InMemory { + if idleTTL < minIdleTTL { + idleTTL = minIdleTTL + } + + l := &InMemory{ + rps: rps, + burst: burst, + ttl: idleTTL, + clients: make(map[string]*clientBucket), + stop: make(chan struct{}), + } + go l.janitor() + + return l +} + +// Allow reports whether the request for key may proceed, consuming a token when +// it does. A denied request reports how long until the next token frees up so +// the middleware can set Retry-After. It never returns an error: an in-process +// bucket has no backend that can fail. +func (l *InMemory) Allow(_ context.Context, key string) (Decision, error) { + limiter := l.bucket(key) + + reservation := limiter.Reserve() + if !reservation.OK() { + // burst is non-positive, so a token can never be granted: deny with no + // retry hint rather than reporting an infinite wait. + return Decision{Allowed: false}, nil + } + + if delay := reservation.Delay(); delay > 0 { + // No token is available yet. Cancel so this check does not consume a + // future token, and report the wait. + reservation.Cancel() + + return Decision{Allowed: false, RetryAfter: delay}, nil + } + + return Decision{Allowed: true}, nil +} + +// bucket returns the token bucket for key, creating it on first use, and +// records that the key was just seen so the janitor does not evict it. +func (l *InMemory) bucket(key string) *rate.Limiter { + l.mu.Lock() + defer l.mu.Unlock() + + c, ok := l.clients[key] + if !ok { + c = &clientBucket{limiter: rate.NewLimiter(l.rps, l.burst)} + l.clients[key] = c + } + c.lastSeen = time.Now() + + return c.limiter +} + +// Stop ends the janitor goroutine. It is safe to call more than once; calls +// after the first are no-ops. Allow still works after Stop, but idle buckets are +// no longer evicted. +func (l *InMemory) Stop() { + l.mu.Lock() + defer l.mu.Unlock() + + if l.stopped { + return + } + l.stopped = true + close(l.stop) +} + +// janitor evicts idle buckets on each tick until Stop closes the stop channel. +func (l *InMemory) janitor() { + ticker := time.NewTicker(l.ttl) + defer ticker.Stop() + + for { + select { + case <-l.stop: + return + case <-ticker.C: + l.evictIdle() + } + } +} + +// evictIdle removes every bucket not seen within the idle TTL. +func (l *InMemory) evictIdle() { + cutoff := time.Now().Add(-l.ttl) + + l.mu.Lock() + defer l.mu.Unlock() + + for key, c := range l.clients { + if c.lastSeen.Before(cutoff) { + delete(l.clients, key) + } + } +} diff --git a/internal/ratelimit/memory_test.go b/internal/ratelimit/memory_test.go new file mode 100644 index 0000000..de676ad --- /dev/null +++ b/internal/ratelimit/memory_test.go @@ -0,0 +1,89 @@ +package ratelimit_test + +import ( + "context" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "golang.org/x/time/rate" + + "github.com/meigma/template-go-api/internal/ratelimit" +) + +func TestInMemoryAllowsBurstThenDenies(t *testing.T) { + t.Parallel() + + // Refill of 1 token/sec with a burst of 3: the first three requests pass + // immediately, the fourth (within the same second) is denied with a hint. + limiter := ratelimit.NewInMemory(rate.Limit(1), 3, time.Minute) + t.Cleanup(limiter.Stop) + + ctx := context.Background() + for i := range 3 { + d, err := limiter.Allow(ctx, "client-a") + require.NoError(t, err) + assert.Truef(t, d.Allowed, "burst request %d should be allowed", i+1) + } + + d, err := limiter.Allow(ctx, "client-a") + require.NoError(t, err) + assert.False(t, d.Allowed, "the request past the burst should be denied") + assert.Positive(t, d.RetryAfter, "a denied request reports when to retry") +} + +func TestInMemoryKeysAreIndependent(t *testing.T) { + t.Parallel() + + limiter := ratelimit.NewInMemory(rate.Limit(1), 1, time.Minute) + t.Cleanup(limiter.Stop) + ctx := context.Background() + + first, err := limiter.Allow(ctx, "client-a") + require.NoError(t, err) + require.True(t, first.Allowed) + + // client-a is now exhausted, but client-b has its own independent bucket. + exhausted, err := limiter.Allow(ctx, "client-a") + require.NoError(t, err) + assert.False(t, exhausted.Allowed) + + other, err := limiter.Allow(ctx, "client-b") + require.NoError(t, err) + assert.True(t, other.Allowed, "a different key has an independent bucket") +} + +func TestInMemoryRefillsOverTime(t *testing.T) { + t.Parallel() + + // 50 tokens/sec => a token refills in ~20ms. Burst 1, so the bucket empties + // after one request and then refills shortly after. + limiter := ratelimit.NewInMemory(rate.Limit(50), 1, time.Minute) + t.Cleanup(limiter.Stop) + ctx := context.Background() + + first, err := limiter.Allow(ctx, "c") + require.NoError(t, err) + require.True(t, first.Allowed) + + denied, err := limiter.Allow(ctx, "c") + require.NoError(t, err) + require.False(t, denied.Allowed) + + // After the refill interval the bucket allows again. Reserve/Cancel on the + // denied polls does not consume a future token, so this converges. + require.Eventually(t, func() bool { + d, err := limiter.Allow(ctx, "c") + + return err == nil && d.Allowed + }, time.Second, 5*time.Millisecond, "the bucket should refill and allow again") +} + +func TestInMemoryStopIsIdempotent(t *testing.T) { + t.Parallel() + + limiter := ratelimit.NewInMemory(rate.Limit(1), 1, time.Minute) + limiter.Stop() + assert.NotPanics(t, limiter.Stop, "Stop is safe to call more than once") +} diff --git a/internal/ratelimit/middleware.go b/internal/ratelimit/middleware.go new file mode 100644 index 0000000..ee2ee3a --- /dev/null +++ b/internal/ratelimit/middleware.go @@ -0,0 +1,111 @@ +package ratelimit + +import ( + "log/slog" + "math" + "net/http" + "strconv" + + "github.com/danielgtaylor/huma/v2" +) + +// genericTooManyRequests is the client-facing detail for a throttled request. +// The limited key is logged, not returned, so the response leaks nothing about +// other clients' traffic. +const genericTooManyRequests = "rate limit exceeded; retry later" + +// KeyFunc extracts the client identity a request is limited by — by default the +// resolved client IP. Returning an error makes the middleware fail open (the +// request proceeds), so a key-extraction failure cannot deny all traffic. +type KeyFunc func(ctx huma.Context) (string, error) + +// Middleware enforces a per-client rate limit as Huma middleware. Installed +// before the authentication middleware (see Install), it rejects an over-limit +// request with an RFC 9457 429 before authentication runs, so a flood never +// reaches the credential store. It is inert (pass-through) when disabled — the +// escape hatch matching the authz tier. +type Middleware struct { + api huma.API + limiter Limiter + key KeyFunc + logger *slog.Logger + enabled bool +} + +// NewMiddleware builds the rate-limit middleware over limiter, keyed by key. api +// is the Huma API used to write the RFC 9457 problem response; logger records +// throttling and fail-open events. When enabled is false the middleware is a +// pass-through (Install is a no-op). +func NewMiddleware( + api huma.API, + limiter Limiter, + key KeyFunc, + logger *slog.Logger, + enabled bool, +) *Middleware { + if logger == nil { + logger = slog.Default() + } + + return &Middleware{ + api: api, + limiter: limiter, + key: key, + logger: logger, + enabled: enabled, + } +} + +// Install registers the rate-limit middleware on the API. Like the authz +// middleware it must run before huma.Register (Huma snapshots the API's +// middleware stack into each operation at registration time, so middleware +// added afterward never runs), and it should be installed before the +// authentication middleware so limiting happens pre-auth. It is a no-op when +// disabled. The infrastructure routes (/healthz, /readyz, /metrics) bypass Huma, +// so they are never rate limited. +func (m *Middleware) Install() { + if !m.enabled { + return + } + + m.api.UseMiddleware(m.limit) +} + +// limit is the middleware function: derive the client key, consult the limiter, +// and either continue or reject with 429. It fails open on any key-extraction or +// limiter error so the limiter is never a single point of failure for the API. +func (m *Middleware) limit(ctx huma.Context, next func(huma.Context)) { + key, err := m.key(ctx) + if err != nil { + m.logger.WarnContext(ctx.Context(), "rate-limit key extraction failed; allowing request", + slog.Any("error", err)) + next(ctx) + + return + } + + decision, err := m.limiter.Allow(ctx.Context(), key) + if err != nil { + m.logger.ErrorContext(ctx.Context(), "rate limiter unavailable; allowing request", + slog.Any("error", err)) + next(ctx) + + return + } + + if decision.Allowed { + next(ctx) + + return + } + + if decision.RetryAfter > 0 { + // Retry-After is whole seconds (RFC 9110); round up so the client never + // retries before a token is actually available. + seconds := int(math.Ceil(decision.RetryAfter.Seconds())) + ctx.SetHeader("Retry-After", strconv.Itoa(seconds)) + } + + m.logger.InfoContext(ctx.Context(), "rate limit exceeded", slog.String("key", key)) + _ = huma.WriteErr(m.api, ctx, http.StatusTooManyRequests, genericTooManyRequests) +} diff --git a/internal/ratelimit/middleware_test.go b/internal/ratelimit/middleware_test.go new file mode 100644 index 0000000..1c1dc82 --- /dev/null +++ b/internal/ratelimit/middleware_test.go @@ -0,0 +1,97 @@ +package ratelimit_test + +import ( + "context" + "log/slog" + "net/http" + "testing" + "time" + + "github.com/danielgtaylor/huma/v2" + "github.com/danielgtaylor/huma/v2/humatest" + "github.com/stretchr/testify/assert" + "golang.org/x/time/rate" + + "github.com/meigma/template-go-api/internal/ratelimit" +) + +// testClientHeader names the header the test key function reads to identify the +// client, so a test can simulate distinct clients without distinct IPs. +const testClientHeader = "X-Test-Client" + +// keyByHeader keys requests by the testClientHeader value, falling back to a +// shared key when the header is absent. +func keyByHeader(ctx huma.Context) (string, error) { + if c := ctx.Header(testClientHeader); c != "" { + return c, nil + } + + return "default", nil +} + +// newTestAPI installs the rate-limit middleware over limiter on a humatest API +// and registers a single GET /ping operation behind it. +func newTestAPI(t *testing.T, limiter ratelimit.Limiter, enabled bool) humatest.TestAPI { + t.Helper() + + _, api := humatest.New(t) + logger := slog.New(slog.DiscardHandler) + ratelimit.NewMiddleware(api, limiter, keyByHeader, logger, enabled).Install() + + huma.Register(api, huma.Operation{ + OperationID: "ping", + Method: http.MethodGet, + Path: "/ping", + }, func(_ context.Context, _ *struct{}) (*struct{}, error) { + return &struct{}{}, nil + }) + + return api +} + +func TestMiddlewareAllowsBurstThenReturns429(t *testing.T) { + t.Parallel() + + limiter := ratelimit.NewInMemory(rate.Limit(1), 2, time.Minute) + t.Cleanup(limiter.Stop) + api := newTestAPI(t, limiter, true) + + // The burst of 2 passes. + assert.Equal(t, http.StatusNoContent, api.Get("/ping").Code) + assert.Equal(t, http.StatusNoContent, api.Get("/ping").Code) + + // The third request within the same second is throttled. + resp := api.Get("/ping") + assert.Equal(t, http.StatusTooManyRequests, resp.Code) + assert.NotEmpty(t, resp.Header().Get("Retry-After"), "a 429 carries a Retry-After hint") + assert.Contains(t, resp.Body.String(), "rate limit exceeded", "the body is the RFC 9457 problem detail") +} + +func TestMiddlewareLimitsPerClientKey(t *testing.T) { + t.Parallel() + + limiter := ratelimit.NewInMemory(rate.Limit(1), 1, time.Minute) + t.Cleanup(limiter.Stop) + api := newTestAPI(t, limiter, true) + + // alice exhausts her own bucket. + assert.Equal(t, http.StatusNoContent, api.Get("/ping", testClientHeader+": alice").Code) + assert.Equal(t, http.StatusTooManyRequests, api.Get("/ping", testClientHeader+": alice").Code) + + // bob is unaffected: he has an independent bucket. + assert.Equal(t, http.StatusNoContent, api.Get("/ping", testClientHeader+": bob").Code) +} + +func TestMiddlewareDisabledIsPassthrough(t *testing.T) { + t.Parallel() + + // A burst of 1 would throttle the second request if the middleware ran, but + // disabled it must never install, so every request reaches the handler. + limiter := ratelimit.NewInMemory(rate.Limit(1), 1, time.Minute) + t.Cleanup(limiter.Stop) + api := newTestAPI(t, limiter, false) + + for range 3 { + assert.Equal(t, http.StatusNoContent, api.Get("/ping").Code) + } +}