Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions internal/config/config_core.go
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,18 @@ type ServerConfig struct {
// fallback uses the HTTP client's request timeout instead. Increase this for backends that are
// slow to initialize. Only applies to HTTP server types. Default: 30 seconds.
ConnectTimeout int `toml:"connect_timeout" json:"connect_timeout,omitempty"`

// RateLimitThreshold is the number of consecutive rate-limit errors from this backend
// that will trip the circuit breaker (transition CLOSED → OPEN). When OPEN, requests
// are immediately rejected until the cooldown period elapses. Default: 3.
// Supported in file-based config (TOML/JSON); stdin JSON config does not currently accept this field.
RateLimitThreshold int `toml:"rate_limit_threshold" json:"rate_limit_threshold,omitempty"`

// RateLimitCooldown is the number of seconds the circuit breaker stays OPEN before
// allowing a single probe request (transition OPEN → HALF-OPEN). If the probe
// succeeds the circuit closes; if rate-limited again it re-opens. Default: 60.
Comment on lines +221 to +227
Copy link

Copilot AI Apr 14, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These new per-server rate-limit fields are added to the file config (TOML/JSON via ServerConfig), but stdin JSON configs are validated against an embedded upstream schema where stdioServerConfig/httpServerConfig have additionalProperties=false. Since fixSchemaBytes/stripExtensionFieldsForValidation currently do not add/strip rate_limit_threshold or rate_limit_cooldown, providing these fields via stdin JSON will fail schema validation (and convertStdinServerConfig has no wiring for them). If the intent is to support JSON stdin config too, update schema fixing + StdinServerConfig/convertStdinServerConfig accordingly; otherwise the doc comments/defaults should clarify the fields are TOML-only.

Suggested change
// are immediately rejected until the cooldown period elapses. Default: 3.
RateLimitThreshold int `toml:"rate_limit_threshold" json:"rate_limit_threshold,omitempty"`
// RateLimitCooldown is the number of seconds the circuit breaker stays OPEN before
// allowing a single probe request (transition OPEN → HALF-OPEN). If the probe
// succeeds the circuit closes; if rate-limited again it re-opens. Default: 60.
// are immediately rejected until the cooldown period elapses. Default: 3.
// Supported in file-based config; stdin JSON config does not currently accept this field.
RateLimitThreshold int `toml:"rate_limit_threshold" json:"rate_limit_threshold,omitempty"`
// RateLimitCooldown is the number of seconds the circuit breaker stays OPEN before
// allowing a single probe request (transition OPEN → HALF-OPEN). If the probe
// succeeds the circuit closes; if rate-limited again it re-opens. Default: 60.
// Supported in file-based config; stdin JSON config does not currently accept this field.

Copilot uses AI. Check for mistakes.
// Supported in file-based config (TOML/JSON); stdin JSON config does not currently accept this field.
RateLimitCooldown int `toml:"rate_limit_cooldown" json:"rate_limit_cooldown,omitempty"`
}

// GuardConfig represents a guard configuration for DIFC enforcement.
Expand Down
36 changes: 36 additions & 0 deletions internal/config/rate_limit_config_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
package config

import (
"testing"

"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)

func TestServerConfig_RateLimitFields(t *testing.T) {
t.Parallel()
toml := `
[servers.github]
command = "docker"
args = ["run", "--rm", "-i", "ghcr.io/github/github-mcp-server:latest"]
rate_limit_threshold = 5
rate_limit_cooldown = 120
`
path := writeTempTOML(t, toml)
cfg, err := LoadFromFile(path)
require.NoError(t, err)
srv := cfg.Servers["github"]
assert.Equal(t, 5, srv.RateLimitThreshold)
assert.Equal(t, 120, srv.RateLimitCooldown)
}

func TestServerConfig_RateLimitFieldsDefaultToZero(t *testing.T) {
t.Parallel()
toml := validDockerServerTOML
path := writeTempTOML(t, toml)
cfg, err := LoadFromFile(path)
require.NoError(t, err)
srv := cfg.Servers["github"]
assert.Equal(t, 0, srv.RateLimitThreshold)
assert.Equal(t, 0, srv.RateLimitCooldown)
}
65 changes: 65 additions & 0 deletions internal/proxy/handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ import (
"fmt"
"io"
"net/http"
"strconv"
"time"

"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/codes"
Expand Down Expand Up @@ -350,8 +352,11 @@ func (h *proxyHandler) passthrough(w http.ResponseWriter, r *http.Request, path
}

// writeResponse writes an upstream response to the client.
// When the upstream signals rate-limiting (HTTP 429 or X-RateLimit-Remaining == 0),
// it injects a Retry-After header and logs the event at ERROR level.
func (h *proxyHandler) writeResponse(w http.ResponseWriter, resp *http.Response, body []byte) {
copyResponseHeaders(w, resp)
injectRetryAfterIfRateLimited(w, resp)
w.WriteHeader(resp.StatusCode)
w.Write(body)
}
Expand Down Expand Up @@ -416,6 +421,66 @@ func copyResponseHeaders(w http.ResponseWriter, resp *http.Response) {
}
}

// injectRetryAfterIfRateLimited inspects the upstream response for rate-limit signals
// (HTTP 429 or X-Ratelimit-Remaining == 0). When detected it:
// 1. Injects a Retry-After header so the client knows when to retry.
// 2. Logs the event at ERROR level so operators can monitor rate-limit incidents.
func injectRetryAfterIfRateLimited(w http.ResponseWriter, resp *http.Response) {
is429 := resp.StatusCode == http.StatusTooManyRequests
// Use Go's canonical header key form (textproto.CanonicalMIMEHeaderKey produces
// "X-Ratelimit-Remaining", matching GitHub's actual response headers).
remaining := resp.Header.Get("X-Ratelimit-Remaining")
resetHeader := resp.Header.Get("X-Ratelimit-Reset")

isRateLimited := is429 || remaining == "0"
if !isRateLimited {
return
}

resetAt := parseRateLimitReset(resetHeader)
retryAfter := computeRetryAfter(resetAt)

w.Header().Set("Retry-After", strconv.Itoa(retryAfter))

logger.LogError("client",
"upstream rate limit hit: status=%d X-Ratelimit-Remaining=%s X-Ratelimit-Reset=%s retry-after=%ds",
resp.StatusCode, remaining, resetHeader, retryAfter)
}

// parseRateLimitReset parses the X-RateLimit-Reset Unix-timestamp header.
// Returns zero time when absent or malformed.
func parseRateLimitReset(value string) time.Time {
if value == "" {
return time.Time{}
}
unix, err := strconv.ParseInt(value, 10, 64)
if err != nil {
return time.Time{}
}
return time.Unix(unix, 0)
}

// computeRetryAfter returns the number of seconds to wait before retrying.
// When resetAt is in the future the delay is clamped to [1, 3600] seconds.
// When resetAt is zero or in the past a default of 60 seconds is returned.
func computeRetryAfter(resetAt time.Time) int {
const (
defaultDelay = 60
maxDelay = 3600
)
if resetAt.IsZero() {
return defaultDelay
}
secs := int(time.Until(resetAt).Seconds()) + 1 // add 1s buffer
if secs < 1 {
return defaultDelay
}
if secs > maxDelay {
return maxDelay
}
return secs
}

// rewrapSearchResponse re-wraps filtered items into the original search response
// envelope. GitHub search endpoints return {"total_count": N, "items": [...]};
// ToResult() returns a bare array, so we rebuild the wrapper.
Expand Down
139 changes: 139 additions & 0 deletions internal/proxy/rate_limit_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
package proxy

import (
"net/http"
"net/http/httptest"
"strconv"
"testing"
"time"

"github.com/stretchr/testify/assert"
)

// TestInjectRetryAfterIfRateLimited verifies Retry-After injection and logging for
// rate-limited upstream responses.
func TestInjectRetryAfterIfRateLimited(t *testing.T) {
t.Parallel()

t.Run("HTTP 429 injects Retry-After", func(t *testing.T) {
t.Parallel()
w := httptest.NewRecorder()
future := time.Now().Add(30 * time.Second)
resp := &http.Response{
StatusCode: http.StatusTooManyRequests,
Header: http.Header{
"X-Ratelimit-Reset": []string{strconv.FormatInt(future.Unix(), 10)},
},
}
injectRetryAfterIfRateLimited(w, resp)
retryAfter := w.Header().Get("Retry-After")
assert.NotEmpty(t, retryAfter, "Retry-After should be set on 429")
secs, err := strconv.Atoi(retryAfter)
assert.NoError(t, err)
assert.Greater(t, secs, 0, "Retry-After should be positive")
})

t.Run("X-Ratelimit-Remaining 0 injects Retry-After", func(t *testing.T) {
t.Parallel()
w := httptest.NewRecorder()
future := time.Now().Add(60 * time.Second)
resp := &http.Response{
StatusCode: http.StatusOK,
Header: http.Header{
"X-Ratelimit-Remaining": []string{"0"},
"X-Ratelimit-Reset": []string{strconv.FormatInt(future.Unix(), 10)},
},
}
injectRetryAfterIfRateLimited(w, resp)
assert.NotEmpty(t, w.Header().Get("Retry-After"), "Retry-After should be set when remaining=0")
})

t.Run("non-zero remaining does not inject Retry-After", func(t *testing.T) {
t.Parallel()
w := httptest.NewRecorder()
resp := &http.Response{
StatusCode: http.StatusOK,
Header: http.Header{
"X-Ratelimit-Remaining": []string{"100"},
},
}
injectRetryAfterIfRateLimited(w, resp)
assert.Empty(t, w.Header().Get("Retry-After"))
})

t.Run("200 with no rate-limit headers does not inject Retry-After", func(t *testing.T) {
t.Parallel()
w := httptest.NewRecorder()
resp := &http.Response{
StatusCode: http.StatusOK,
Header: make(http.Header),
}
injectRetryAfterIfRateLimited(w, resp)
assert.Empty(t, w.Header().Get("Retry-After"))
})

t.Run("429 without reset header uses default delay", func(t *testing.T) {
t.Parallel()
w := httptest.NewRecorder()
resp := &http.Response{
StatusCode: http.StatusTooManyRequests,
Header: make(http.Header),
}
injectRetryAfterIfRateLimited(w, resp)
retryAfter := w.Header().Get("Retry-After")
assert.Equal(t, "60", retryAfter, "default delay should be 60 seconds")
})
}

// TestParseRateLimitReset verifies the Unix-timestamp header parser.
func TestParseRateLimitReset(t *testing.T) {
t.Parallel()

t.Run("empty string returns zero", func(t *testing.T) {
t.Parallel()
assert.True(t, parseRateLimitReset("").IsZero())
})

t.Run("invalid string returns zero", func(t *testing.T) {
t.Parallel()
assert.True(t, parseRateLimitReset("not-a-number").IsZero())
})

t.Run("valid unix timestamp parses correctly", func(t *testing.T) {
t.Parallel()
ts := time.Now().Add(60 * time.Second)
got := parseRateLimitReset(strconv.FormatInt(ts.Unix(), 10))
assert.False(t, got.IsZero())
assert.Equal(t, ts.Unix(), got.Unix())
})
}

// TestComputeRetryAfter verifies the retry-after calculation.
func TestComputeRetryAfter(t *testing.T) {
t.Parallel()

t.Run("zero time returns default", func(t *testing.T) {
t.Parallel()
assert.Equal(t, 60, computeRetryAfter(time.Time{}))
})

t.Run("past time returns default", func(t *testing.T) {
t.Parallel()
assert.Equal(t, 60, computeRetryAfter(time.Now().Add(-time.Minute)))
})

t.Run("future time returns seconds until reset", func(t *testing.T) {
t.Parallel()
future := time.Now().Add(30 * time.Second)
secs := computeRetryAfter(future)
// Allow ±2s for timing jitter.
assert.GreaterOrEqual(t, secs, 29)
assert.LessOrEqual(t, secs, 32)
})

t.Run("very far future is clamped to max", func(t *testing.T) {
t.Parallel()
farFuture := time.Now().Add(24 * time.Hour)
assert.Equal(t, 3600, computeRetryAfter(farFuture))
})
}
Loading
Loading