github · lpcox · Apr 14, 2026 · Apr 14, 2026 · Apr 14, 2026 · Apr 14, 2026
diff --git a/internal/config/config_core.go b/internal/config/config_core.go
@@ -215,6 +215,18 @@ type ServerConfig struct {
 	// fallback uses the HTTP client's request timeout instead. Increase this for backends that are
 	// slow to initialize. Only applies to HTTP server types. Default: 30 seconds.
 	ConnectTimeout int `toml:"connect_timeout" json:"connect_timeout,omitempty"`
+
+	// RateLimitThreshold is the number of consecutive rate-limit errors from this backend
+	// that will trip the circuit breaker (transition CLOSED → OPEN). When OPEN, requests
+	// are immediately rejected until the cooldown period elapses. Default: 3.
+	// Supported in file-based config (TOML/JSON); stdin JSON config does not currently accept this field.
+	RateLimitThreshold int `toml:"rate_limit_threshold" json:"rate_limit_threshold,omitempty"`
+
+	// RateLimitCooldown is the number of seconds the circuit breaker stays OPEN before
+	// allowing a single probe request (transition OPEN → HALF-OPEN). If the probe
+	// succeeds the circuit closes; if rate-limited again it re-opens. Default: 60.
-	// are immediately rejected until the cooldown period elapses. Default: 3.
-	RateLimitThreshold int `toml:"rate_limit_threshold" json:"rate_limit_threshold,omitempty"`
-
-	// RateLimitCooldown is the number of seconds the circuit breaker stays OPEN before
-	// allowing a single probe request (transition OPEN → HALF-OPEN). If the probe
-	// succeeds the circuit closes; if rate-limited again it re-opens. Default: 60.
+	// are immediately rejected until the cooldown period elapses. Default: 3.
+	// Supported in file-based config; stdin JSON config does not currently accept this field.
+	RateLimitThreshold int `toml:"rate_limit_threshold" json:"rate_limit_threshold,omitempty"`
+
+	// RateLimitCooldown is the number of seconds the circuit breaker stays OPEN before
+	// allowing a single probe request (transition OPEN → HALF-OPEN). If the probe
+	// succeeds the circuit closes; if rate-limited again it re-opens. Default: 60.
+	// Supported in file-based config; stdin JSON config does not currently accept this field.
-	// are immediately rejected until the cooldown period elapses. Default: 3.
-	RateLimitThreshold int `toml:"rate_limit_threshold" json:"rate_limit_threshold,omitempty"`
-
-	// RateLimitCooldown is the number of seconds the circuit breaker stays OPEN before
-	// allowing a single probe request (transition OPEN → HALF-OPEN). If the probe
-	// succeeds the circuit closes; if rate-limited again it re-opens. Default: 60.
+	// are immediately rejected until the cooldown period elapses. Default: 3.
+	// Supported in file-based config; stdin JSON config does not currently accept this field.
+	RateLimitThreshold int `toml:"rate_limit_threshold" json:"rate_limit_threshold,omitempty"`
+
+	// RateLimitCooldown is the number of seconds the circuit breaker stays OPEN before
+	// allowing a single probe request (transition OPEN → HALF-OPEN). If the probe
+	// succeeds the circuit closes; if rate-limited again it re-opens. Default: 60.
+	// Supported in file-based config; stdin JSON config does not currently accept this field.
+	// Supported in file-based config (TOML/JSON); stdin JSON config does not currently accept this field.
+	RateLimitCooldown int `toml:"rate_limit_cooldown" json:"rate_limit_cooldown,omitempty"`
 }
 
 // GuardConfig represents a guard configuration for DIFC enforcement.

diff --git a/internal/config/rate_limit_config_test.go b/internal/config/rate_limit_config_test.go
@@ -0,0 +1,36 @@
+package config
+
+import (
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+)
+
+func TestServerConfig_RateLimitFields(t *testing.T) {
+	t.Parallel()
+	toml := `
+[servers.github]
+command = "docker"
+args = ["run", "--rm", "-i", "ghcr.io/github/github-mcp-server:latest"]
+rate_limit_threshold = 5
+rate_limit_cooldown = 120
+`
+	path := writeTempTOML(t, toml)
+	cfg, err := LoadFromFile(path)
+	require.NoError(t, err)
+	srv := cfg.Servers["github"]
+	assert.Equal(t, 5, srv.RateLimitThreshold)
+	assert.Equal(t, 120, srv.RateLimitCooldown)
+}
+
+func TestServerConfig_RateLimitFieldsDefaultToZero(t *testing.T) {
+	t.Parallel()
+	toml := validDockerServerTOML
+	path := writeTempTOML(t, toml)
+	cfg, err := LoadFromFile(path)
+	require.NoError(t, err)
+	srv := cfg.Servers["github"]
+	assert.Equal(t, 0, srv.RateLimitThreshold)
+	assert.Equal(t, 0, srv.RateLimitCooldown)
+}
diff --git a/internal/proxy/handler.go b/internal/proxy/handler.go
@@ -7,6 +7,8 @@ import (
 	"fmt"
 	"io"
 	"net/http"
+	"strconv"
+	"time"
 
 	"go.opentelemetry.io/otel/attribute"
 	"go.opentelemetry.io/otel/codes"
@@ -350,8 +352,11 @@ func (h *proxyHandler) passthrough(w http.ResponseWriter, r *http.Request, path
 }
 
 // writeResponse writes an upstream response to the client.
+// When the upstream signals rate-limiting (HTTP 429 or X-RateLimit-Remaining == 0),
+// it injects a Retry-After header and logs the event at ERROR level.
 func (h *proxyHandler) writeResponse(w http.ResponseWriter, resp *http.Response, body []byte) {
 	copyResponseHeaders(w, resp)
+	injectRetryAfterIfRateLimited(w, resp)
 	w.WriteHeader(resp.StatusCode)
 	w.Write(body)
 }
@@ -416,6 +421,66 @@ func copyResponseHeaders(w http.ResponseWriter, resp *http.Response) {
 	}
 }
 
+// injectRetryAfterIfRateLimited inspects the upstream response for rate-limit signals
+// (HTTP 429 or X-Ratelimit-Remaining == 0). When detected it:
+//  1. Injects a Retry-After header so the client knows when to retry.
+//  2. Logs the event at ERROR level so operators can monitor rate-limit incidents.
+func injectRetryAfterIfRateLimited(w http.ResponseWriter, resp *http.Response) {
+	is429 := resp.StatusCode == http.StatusTooManyRequests
+	// Use Go's canonical header key form (textproto.CanonicalMIMEHeaderKey produces
+	// "X-Ratelimit-Remaining", matching GitHub's actual response headers).
+	remaining := resp.Header.Get("X-Ratelimit-Remaining")
+	resetHeader := resp.Header.Get("X-Ratelimit-Reset")
+
+	isRateLimited := is429 || remaining == "0"
+	if !isRateLimited {
+		return
+	}
+
+	resetAt := parseRateLimitReset(resetHeader)
+	retryAfter := computeRetryAfter(resetAt)
+
+	w.Header().Set("Retry-After", strconv.Itoa(retryAfter))
+
+	logger.LogError("client",
+		"upstream rate limit hit: status=%d X-Ratelimit-Remaining=%s X-Ratelimit-Reset=%s retry-after=%ds",
+		resp.StatusCode, remaining, resetHeader, retryAfter)
+}
+
+// parseRateLimitReset parses the X-RateLimit-Reset Unix-timestamp header.
+// Returns zero time when absent or malformed.
+func parseRateLimitReset(value string) time.Time {
+	if value == "" {
+		return time.Time{}
+	}
+	unix, err := strconv.ParseInt(value, 10, 64)
+	if err != nil {
+		return time.Time{}
+	}
+	return time.Unix(unix, 0)
+}
+
+// computeRetryAfter returns the number of seconds to wait before retrying.
+// When resetAt is in the future the delay is clamped to [1, 3600] seconds.
+// When resetAt is zero or in the past a default of 60 seconds is returned.
+func computeRetryAfter(resetAt time.Time) int {
+	const (
+		defaultDelay = 60
+		maxDelay     = 3600
+	)
+	if resetAt.IsZero() {
+		return defaultDelay
+	}
+	secs := int(time.Until(resetAt).Seconds()) + 1 // add 1s buffer
+	if secs < 1 {
+		return defaultDelay
+	}
+	if secs > maxDelay {
+		return maxDelay
+	}
+	return secs
+}
+
 // rewrapSearchResponse re-wraps filtered items into the original search response
 // envelope. GitHub search endpoints return {"total_count": N, "items": [...]};
 // ToResult() returns a bare array, so we rebuild the wrapper.

diff --git a/internal/proxy/rate_limit_test.go b/internal/proxy/rate_limit_test.go
@@ -0,0 +1,139 @@
+package proxy
+
+import (
+	"net/http"
+	"net/http/httptest"
+	"strconv"
+	"testing"
+	"time"
+
+	"github.com/stretchr/testify/assert"
+)
+
+// TestInjectRetryAfterIfRateLimited verifies Retry-After injection and logging for
+// rate-limited upstream responses.
+func TestInjectRetryAfterIfRateLimited(t *testing.T) {
+	t.Parallel()
+
+	t.Run("HTTP 429 injects Retry-After", func(t *testing.T) {
+		t.Parallel()
+		w := httptest.NewRecorder()
+		future := time.Now().Add(30 * time.Second)
+		resp := &http.Response{
+			StatusCode: http.StatusTooManyRequests,
+			Header: http.Header{
+				"X-Ratelimit-Reset": []string{strconv.FormatInt(future.Unix(), 10)},
+			},
+		}
+		injectRetryAfterIfRateLimited(w, resp)
+		retryAfter := w.Header().Get("Retry-After")
+		assert.NotEmpty(t, retryAfter, "Retry-After should be set on 429")
+		secs, err := strconv.Atoi(retryAfter)
+		assert.NoError(t, err)
+		assert.Greater(t, secs, 0, "Retry-After should be positive")
+	})
+
+	t.Run("X-Ratelimit-Remaining 0 injects Retry-After", func(t *testing.T) {
+		t.Parallel()
+		w := httptest.NewRecorder()
+		future := time.Now().Add(60 * time.Second)
+		resp := &http.Response{
+			StatusCode: http.StatusOK,
+			Header: http.Header{
+				"X-Ratelimit-Remaining": []string{"0"},
+				"X-Ratelimit-Reset":     []string{strconv.FormatInt(future.Unix(), 10)},
+			},
+		}
+		injectRetryAfterIfRateLimited(w, resp)
+		assert.NotEmpty(t, w.Header().Get("Retry-After"), "Retry-After should be set when remaining=0")
+	})
+
+	t.Run("non-zero remaining does not inject Retry-After", func(t *testing.T) {
+		t.Parallel()
+		w := httptest.NewRecorder()
+		resp := &http.Response{
+			StatusCode: http.StatusOK,
+			Header: http.Header{
+				"X-Ratelimit-Remaining": []string{"100"},
+			},
+		}
+		injectRetryAfterIfRateLimited(w, resp)
+		assert.Empty(t, w.Header().Get("Retry-After"))
+	})
+
+	t.Run("200 with no rate-limit headers does not inject Retry-After", func(t *testing.T) {
+		t.Parallel()
+		w := httptest.NewRecorder()
+		resp := &http.Response{
+			StatusCode: http.StatusOK,
+			Header:     make(http.Header),
+		}
+		injectRetryAfterIfRateLimited(w, resp)
+		assert.Empty(t, w.Header().Get("Retry-After"))
+	})
+
+	t.Run("429 without reset header uses default delay", func(t *testing.T) {
+		t.Parallel()
+		w := httptest.NewRecorder()
+		resp := &http.Response{
+			StatusCode: http.StatusTooManyRequests,
+			Header:     make(http.Header),
+		}
+		injectRetryAfterIfRateLimited(w, resp)
+		retryAfter := w.Header().Get("Retry-After")
+		assert.Equal(t, "60", retryAfter, "default delay should be 60 seconds")
+	})
+}
+
+// TestParseRateLimitReset verifies the Unix-timestamp header parser.
+func TestParseRateLimitReset(t *testing.T) {
+	t.Parallel()
+
+	t.Run("empty string returns zero", func(t *testing.T) {
+		t.Parallel()
+		assert.True(t, parseRateLimitReset("").IsZero())
+	})
+
+	t.Run("invalid string returns zero", func(t *testing.T) {
+		t.Parallel()
+		assert.True(t, parseRateLimitReset("not-a-number").IsZero())
+	})
+
+	t.Run("valid unix timestamp parses correctly", func(t *testing.T) {
+		t.Parallel()
+		ts := time.Now().Add(60 * time.Second)
+		got := parseRateLimitReset(strconv.FormatInt(ts.Unix(), 10))
+		assert.False(t, got.IsZero())
+		assert.Equal(t, ts.Unix(), got.Unix())
+	})
+}
+
+// TestComputeRetryAfter verifies the retry-after calculation.
+func TestComputeRetryAfter(t *testing.T) {
+	t.Parallel()
+
+	t.Run("zero time returns default", func(t *testing.T) {
+		t.Parallel()
+		assert.Equal(t, 60, computeRetryAfter(time.Time{}))
+	})
+
+	t.Run("past time returns default", func(t *testing.T) {
+		t.Parallel()
+		assert.Equal(t, 60, computeRetryAfter(time.Now().Add(-time.Minute)))
+	})
+
+	t.Run("future time returns seconds until reset", func(t *testing.T) {
+		t.Parallel()
+		future := time.Now().Add(30 * time.Second)
+		secs := computeRetryAfter(future)
+		// Allow ±2s for timing jitter.
+		assert.GreaterOrEqual(t, secs, 29)
+		assert.LessOrEqual(t, secs, 32)
+	})
+
+	t.Run("very far future is clamped to max", func(t *testing.T) {
+		t.Parallel()
+		farFuture := time.Now().Add(24 * time.Hour)
+		assert.Equal(t, 3600, computeRetryAfter(farFuture))
+	})
+}