diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..d1672bd --- /dev/null +++ b/.env.example @@ -0,0 +1,18 @@ +# EasyP API Service — Environment Variables +# Copy to .env and uncomment the values you need. + +# --- Ports (docker-compose) --- +# EASYP_TRAEFIK_PORT=80 +# EASYP_GRAFANA_PORT=3000 +# EASYP_REGISTRY_PORT=5005 +# EASYP_POSTGRES_PORT=5432 +# EASYP_GRPC_PORT=8080 +# EASYP_METRICS_PORT=8081 +# EASYP_HEALTH_PORT=8082 +# EASYP_GATEWAY_PORT=8083 + +# --- Enterprise License (uncomment both to enable) --- +# Ed25519 public key (hex) — embedded into binary at build time +# LICENSE_PUBLIC_KEY=c4c720019f4c70dcb30f3cdbac7f73689c6e027d02cb8ae8c5a3cbe654cfb6e0 +# PASETO v4.public token — passed to service at runtime (tier=enterprise, 10yr expiry) +# LICENSE_KEY=v4.public.eyJleHAiOiIyMDM2LTAzLTEyVDIwOjIwOjE0KzAzOjAwIiwiaWF0IjoiMjAyNi0wMy0xNVQyMDoyMDoxNCswMzowMCIsIm1heF9wbHVnaW5zIjoiLTEiLCJtYXhfd29ya2VycyI6IjE2Iiwic3ViIjoidGVzdC1kZXYtbGljZW5zZSIsInRpZXIiOiJlbnRlcnByaXNlIn0qc0ELh3_NyDNOPxfreo8MH2dGeaFrkax-78CSmM2B06hfjmWIU-bp5aO98TIHpMO8MXNuS9nqaXiCEGqpOXEP diff --git a/.gitignore b/.gitignore index 7628bae..6ebd5ac 100644 --- a/.gitignore +++ b/.gitignore @@ -41,3 +41,4 @@ test/generated/ # EasyP generated files easyp.lock +.kiro/ diff --git a/Dockerfile b/Dockerfile index 8d55a08..0533374 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,5 +1,7 @@ FROM golang:alpine3.22 AS builder +ARG LICENSE_PUBLIC_KEY="" + RUN apk update && apk add --no-cache ca-certificates COPY go.mod go.mod @@ -10,7 +12,7 @@ COPY . /app WORKDIR /app -RUN go build -o easyp ./cmd/main.go +RUN go build -ldflags "-X main.licensePublicKey=${LICENSE_PUBLIC_KEY}" -o easyp ./cmd/main.go FROM alpine:3.22 diff --git a/Taskfile.yml b/Taskfile.yml index e6805d7..caa903a 100644 --- a/Taskfile.yml +++ b/Taskfile.yml @@ -3,6 +3,7 @@ version: "3" tasks: up: + desc: "Start all services. For Enterprise: LICENSE_PUBLIC_KEY=... LICENSE_KEY=... task up" dir: "{{.USER_WORKING_DIR}}" preconditions: - "test -f docker-compose.yml" diff --git a/cmd/main.go b/cmd/main.go index 96c6f2e..963f2f8 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -14,6 +14,7 @@ import ( "syscall" "time" + "github.com/grpc-ecosystem/go-grpc-middleware/v2/interceptors/ratelimit" "github.com/hellofresh/health-go/v5" _ "github.com/lib/pq" "github.com/prometheus/client_golang/prometheus" @@ -33,8 +34,10 @@ import ( "github.com/easyp-tech/service/internal/database/migrations" "github.com/easyp-tech/service/internal/flags" "github.com/easyp-tech/service/internal/grpchelper" + "github.com/easyp-tech/service/internal/license" "github.com/easyp-tech/service/internal/mcpserver" "github.com/easyp-tech/service/internal/monitor" + "github.com/easyp-tech/service/internal/ratelimiter" "github.com/easyp-tech/service/internal/telemetry" ) @@ -50,6 +53,8 @@ type ( Registry registryConfig `yaml:"registry" env:", prefix=REGISTRY_"` Telemetry telemetryConfig `yaml:"telemetry" env:", prefix=TELEMETRY_"` WorkerPool workerPoolConfig `yaml:"worker_pool" env:", prefix=WORKER_POOL_"` + License licenseConfig `yaml:"license" env:", prefix=LICENSE_"` + RateLimit rateLimitConfig `yaml:"rate_limit" env:", prefix=RATE_LIMIT_"` } server struct { Host string `yaml:"host" env:"HOST, default=0.0.0.0"` @@ -80,11 +85,22 @@ type ( MaxRetries int `yaml:"max_retries" env:"MAX_RETRIES,default=2"` ShutdownTimeout time.Duration `yaml:"shutdown_timeout" env:"SHUTDOWN_TIMEOUT,default=30s"` } + licenseConfig struct { + Key string `yaml:"key" env:"KEY"` + File string `yaml:"file" env:"FILE"` + } + rateLimitConfig struct { + RequestsPerSecond float64 `yaml:"requests_per_second" env:"REQUESTS_PER_SECOND,default=10.0"` + Burst int `yaml:"burst" env:"BURST,default=20"` + CleanupInterval time.Duration `yaml:"cleanup_interval" env:"CLEANUP_INTERVAL,default=10m"` + } ) var ( cfgFile = &flags.File{DefaultPath: "", MaxSize: configFileSize} logLevel = &flags.Level{Level: slog.LevelDebug} + + licensePublicKey string // set via -ldflags "-X main.licensePublicKey=..." ) // grpcMetrics implements grpchelper.Metrics for the recovery interceptor. @@ -96,6 +112,23 @@ func (m *grpcMetrics) PanicsTotal() prometheus.Counter { return m.panics } +// featureGateAdapter adapts license.FeatureGate to core.FeatureGate interface. +type featureGateAdapter struct { + gate *license.FeatureGate +} + +func (a *featureGateAdapter) Enabled(feature int) bool { + return a.gate.Enabled(license.Feature(feature)) +} + +func (a *featureGateAdapter) MaxWorkers() int { + return a.gate.MaxWorkers() +} + +func (a *featureGateAdapter) MaxPlugins() int { + return a.gate.MaxPlugins() +} + func main() { flag.Var(cfgFile, "cfg", "path to config file") flag.Var(logLevel, "log_level", "log level") @@ -214,13 +247,41 @@ func run(ctx context.Context, cfg config, reg *prometheus.Registry, namespace st } }() + // Create LicenseManager + lm, err := license.NewLicenseManager(licensePublicKey, license.LicenseConfig{ + Key: cfg.License.Key, + File: cfg.License.File, + }, log, reg, namespace) + if err != nil { + log.Warn("license initialization error, continuing in community mode", "error", err) + } + lm.StartExpirationWatcher(ctx) + defer lm.Stop() + + // Create FeatureGate + gate := license.NewFeatureGate(lm) + + // Create RateLimiter + rl := ratelimiter.New(ratelimiter.Config{ + RequestsPerSecond: cfg.RateLimit.RequestsPerSecond, + Burst: cfg.RateLimit.Burst, + CleanupInterval: cfg.RateLimit.CleanupInterval, + }, gate, nil, log, reg) // nil keyExtractor → PeerIPExtractor + rl.StartCleanup(ctx) + // Wrap Registry in tracing decorator tracedRegistry := telemetry.NewTracingRegistry(r) + // Override WorkerPool workers from license limits + wpWorkers := cfg.WorkerPool.Workers + if licenseWorkers := gate.MaxWorkers(); licenseWorkers > 0 { + wpWorkers = licenseWorkers + } + // Wrap TracingRegistry in WorkerPool (limit Docker parallelism) metricsAdapter := adapter_metrics.New(reg, namespace) pool := core.NewWorkerPool(tracedRegistry, core.WorkerPoolConfig{ - Workers: cfg.WorkerPool.Workers, + Workers: wpWorkers, QueueSize: cfg.WorkerPool.QueueSize, GenerationTimeout: cfg.WorkerPool.GenerationTimeout, MaxRetries: cfg.WorkerPool.MaxRetries, @@ -236,7 +297,7 @@ func run(ctx context.Context, cfg config, reg *prometheus.Registry, namespace st }() // Core gets pool as Registry - module := core.New(metricsAdapter, pool) + module := core.New(metricsAdapter, pool, &featureGateAdapter{gate: gate}) // Wrap Core in tracing decorator, pass to API tracedCore := telemetry.NewTracingCore(module) @@ -255,14 +316,24 @@ func run(ctx context.Context, cfg config, reg *prometheus.Registry, namespace st // Create audit interceptor auditInterceptor := api.NewAuditInterceptor(auditCh, log) + // Create license interceptor (before audit in the chain) + licenseInterceptor := api.NewLicenseInterceptor(gate, log) + // Create gRPC server with full middleware stack grpcSrv, healthSrv := grpchelper.NewServer( &grpcMetrics{panics: panicsCounter}, log, serverMetrics, api.ErrorToStatus, - []grpc.UnaryServerInterceptor{auditInterceptor.UnaryServerInterceptor()}, - nil, + []grpc.UnaryServerInterceptor{ + ratelimit.UnaryServerInterceptor(rl), + licenseInterceptor.UnaryServerInterceptor(), + auditInterceptor.UnaryServerInterceptor(), + }, + []grpc.StreamServerInterceptor{ + ratelimit.StreamServerInterceptor(rl), + licenseInterceptor.StreamServerInterceptor(), + }, ) serverMetrics.InitializeMetrics(grpcSrv) diff --git a/config.yml b/config.yml index 4a22bbe..dea96d3 100644 --- a/config.yml +++ b/config.yml @@ -20,3 +20,10 @@ worker_pool: generation_timeout: 120s max_retries: 3 shutdown_timeout: 30s +license: + key: "" # inline PASETO token (takes priority over file) + file: "" # path to license file +rate_limit: + requests_per_second: 10.0 # token refill rate (tokens/sec) + burst: 20 # max tokens (burst size) + cleanup_interval: 10m # stale client cleanup interval diff --git a/docker-compose.yml b/docker-compose.yml index babf4c8..9c7c77b 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -183,6 +183,11 @@ services: POSTGRES_PASSWORD: easyp_pass ports: - "${EASYP_POSTGRES_PORT:-5432}:5432" + healthcheck: + test: ["CMD-SHELL", "pg_isready -U easyp_svc -d easyp_db"] + interval: 2s + timeout: 3s + retries: 10 networks: - easyp_network @@ -190,11 +195,15 @@ services: build: context: . dockerfile: ./Dockerfile + args: + LICENSE_PUBLIC_KEY: "${LICENSE_PUBLIC_KEY:-}" container_name: easyp-api-service restart: always depends_on: - - postgres - - alloy + postgres: + condition: service_healthy + alloy: + condition: service_started command: ["-cfg=/config.yml", "-log_level=debug"] volumes: - "./config.yml:/config.yml:ro" @@ -203,6 +212,7 @@ services: environment: OTEL_EXPORTER_OTLP_ENDPOINT: "http://alloy:4317" OTEL_SERVICE_NAME: "easyp-api-service" + LICENSE_KEY: "${LICENSE_KEY:-}" ports: - "${EASYP_GRPC_PORT:-8080}:8080" - "${EASYP_METRICS_PORT:-8081}:8081" diff --git a/docs/architecture.md b/docs/architecture.md index fb13fe6..896930d 100644 --- a/docs/architecture.md +++ b/docs/architecture.md @@ -8,8 +8,10 @@ EasyP API Service is a code generation service from Protocol Buffers using Docke ``` gRPC Client → API Layer (gRPC + interceptors) → Core (business logic) → WorkerPool → Registry → Docker - → PostgreSQL -MCP Client → MCP Server (HTTP) → Core + ↑ → PostgreSQL + FeatureGate + ↑ +MCP Client → MCP Server (HTTP) → Core LicenseManager ``` ## Components @@ -25,7 +27,9 @@ A gRPC server with a chain of interceptors that process each request in the foll 5. **Panic recovery** — panic interception with `panics_total` counter increment 6. **Validation** — incoming message validation 7. **Error code conversion** — conversion of internal errors to gRPC codes -8. **Audit interceptor** — non-blocking audit event recording +8. **Rate limit interceptor** — per-client rate limiting via token bucket algorithm (uses `grpc-ecosystem/go-grpc-middleware/v2/interceptors/ratelimit`). Controlled by FeatureGate; when disabled, all requests pass through +9. **License interceptor** — license-based feature gating (Enterprise methods require a valid license; Community methods pass through) +10. **Audit interceptor** — non-blocking audit event recording ### Core (`internal/core`) @@ -75,6 +79,48 @@ An asynchronous audit system. - Writes to the `audit_log` table - Non-blocking dispatch from the gRPC interceptor — when the channel overflows, the event is dropped (`audit_events_lost_total` is incremented) +### License System (`internal/license`) + +A license-based feature gating system built on PASETO v4.public tokens (Ed25519 signatures). + +**Components:** + +- **Feature enum** — typed `int` constants defined with `iota`. Each feature is classified as Community or Enterprise. Methods: `String()`, `IsEnterprise()`, `Valid()` +- **LicenseManager** — parses and validates PASETO v4.public tokens using a public key embedded at build time via `-ldflags`. Caches parsed claims in memory. Runs an expiration watcher (60s ticker) that transitions to Community mode when the license expires +- **FeatureGate** — provides `Enabled(feature)`, `MaxWorkers()`, and `MaxPlugins()` based on the current cached claims. Increments the `easyp_license_feature_denied_total` Prometheus counter when an Enterprise feature is denied + +**Tiers:** + +| Tier | Features | MaxWorkers | MaxPlugins | +|------|----------|------------|------------| +| Community (default) | Code generation, plugin listing, MCP tools, rate limiting, plugin CRUD | 4 | 10 | +| Enterprise | All | 16 | -1 (unlimited) | + +**Graceful degradation:** when no license is configured, the token is invalid, or the license expires at runtime, the system falls back to Community mode without interruption. + +**Integration:** +- `FeatureGate` is injected into `Core` via constructor. The `core.FeatureGate` interface uses `Enabled(feature int)` (not `license.Feature`) to avoid circular dependencies. A `featureGateAdapter` in `cmd/main.go` bridges `license.FeatureGate` (Feature type) to `core.FeatureGate` (int type) +- `LicenseInterceptor` in the gRPC chain checks the FeatureGate for Enterprise methods and returns `PERMISSION_DENIED` when the feature is not enabled. Community methods (those without a method-to-feature mapping) pass through without checks + +### Rate Limiter (`internal/ratelimiter`) + +A per-client rate limiting system integrated as a gRPC interceptor. + +**Algorithm:** Token bucket via `golang.org/x/time/rate`. Each client gets an independent bucket identified by IP address (extracted via `KeyExtractor` abstraction). + +**Components:** + +- **Config** — `RequestsPerSecond` (token refill rate), `Burst` (max tokens), `CleanupInterval` (stale bucket cleanup period) +- **KeyExtractor** — `func(ctx context.Context) string` abstraction for client identification. Default implementation `PeerIPExtractor` extracts IP from `peer.FromContext()`. Extensible to API key, tenant ID, etc. +- **RateLimiter** — implements `ratelimit.Limiter` interface from `grpc-ecosystem/go-grpc-middleware/v2`. Per-client buckets stored in `sync.Map`. Background goroutine cleans up stale buckets +- **Prometheus metrics** — `easyp_rate_limit_requests_total` (allowed/denied), `easyp_rate_limit_active_clients` + +**Behavior:** +- Controlled by FeatureGate (`FeatureRateLimiting`). When disabled, all requests pass through +- Empty key from KeyExtractor → fail-open (request allowed) +- Denied requests return `RESOURCE_EXHAUSTED` with `X-RateLimit-*` headers in gRPC metadata +- Allowed requests include `X-RateLimit-*` headers in response metadata + ### Metrics (`internal/adapters/metrics`) Prometheus-based metrics: @@ -116,15 +162,16 @@ An SQL wrapper over sqlx: ## Code Generation Request Flow 1. The gRPC client sends a `GenerateRequest` with protobuf files and the plugin name -2. The request passes through the interceptor chain (validation, logging, metrics, audit) +2. The request passes through the interceptor chain (validation, logging, metrics, license check, audit) 3. `Core.Generate()` parses the plugin name (`group/name:version`) -4. Core requests the plugin from the Registry (PostgreSQL) -5. The task is submitted to the WorkerPool -6. The WorkerPool checks queue availability (backpressure) -7. A worker executes `docker run --rm -i` with security constraints -8. Protobuf data is passed to the container's stdin, the result is read from stdout -9. On transient errors, a retry is performed (up to `max_retries` attempts) -10. The result is returned to the client via gRPC, metrics and audit are recorded +4. Core checks the FeatureGate for feature availability and applies license-based limits +5. Core requests the plugin from the Registry (PostgreSQL) +6. The task is submitted to the WorkerPool +7. The WorkerPool checks queue availability (backpressure) +8. A worker executes `docker run --rm -i` with security constraints +9. Protobuf data is passed to the container's stdin, the result is read from stdout +10. On transient errors, a retry is performed (up to `max_retries` attempts) +11. The result is returned to the client via gRPC, metrics and audit are recorded ## Design Patterns @@ -132,5 +179,6 @@ An SQL wrapper over sqlx: |---------|-------| | **Decorator** | Tracing: `TracingCore`, `TracingRegistry`, `TracingPlugin` wrap interfaces, adding spans | | **Worker Pool** | Limiting Docker container concurrency with a queue and backpressure | -| **Adapter** | Adapters for metrics, audit, registry — isolate infrastructure from business logic | +| **Adapter** | Adapters for metrics, audit, registry — isolate infrastructure from business logic. `featureGateAdapter` bridges `license.FeatureGate` to `core.FeatureGate` to avoid circular dependencies | | **Middleware Chain** | gRPC interceptor chain for cross-cutting concerns | +| **Feature Gate** | `FeatureGate` controls feature availability based on the current license tier | diff --git a/docs/configuration.md b/docs/configuration.md index e01fc22..dca8009 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -49,6 +49,15 @@ worker_pool: generation_timeout: 120s # Maximum generation time max_retries: 3 # Retries on transient errors shutdown_timeout: 30s # Graceful shutdown timeout + +license: + key: "" # Inline PASETO v4.public license token (takes priority over file) + file: "" # Path to a file containing the PASETO license token + +rate_limit: + requests_per_second: 10.0 # Token refill rate (tokens/sec) + burst: 20 # Max tokens (burst size) + cleanup_interval: 10m # Stale client cleanup interval ``` ## Environment Variables @@ -96,6 +105,88 @@ All configuration parameters can be overridden via environment variables. | `WORKER_POOL_MAX_RETRIES` | Maximum retries | `3` | | `WORKER_POOL_SHUTDOWN_TIMEOUT` | Shutdown timeout | `30s` | +### Rate Limiting + +| Variable | Description | Default | +|----------|-------------|---------| +| `RATE_LIMIT_REQUESTS_PER_SECOND` | Token refill rate (tokens/sec) | `10.0` | +| `RATE_LIMIT_BURST` | Max tokens (burst size) | `20` | +| `RATE_LIMIT_CLEANUP_INTERVAL` | Stale client bucket cleanup interval | `10m` | + +### License + +| Variable | Description | Default | +|----------|-------------|---------| +| `LICENSE_KEY` | Inline PASETO v4.public license token | — | +| `LICENSE_FILE` | Path to a file containing the license token | — | + +## Rate Limiting + +Per-client rate limiting using the token bucket algorithm. Each client (identified by IP address) gets an independent bucket. Controlled by FeatureGate — when `FeatureRateLimiting` is disabled, all requests pass through. + +### Configuration Parameters + +| Parameter | Env Variable | Default | Description | +|-----------|-------------|---------|-------------| +| `rate_limit.requests_per_second` | `RATE_LIMIT_REQUESTS_PER_SECOND` | `10.0` | Token refill rate per second | +| `rate_limit.burst` | `RATE_LIMIT_BURST` | `20` | Maximum tokens (burst capacity) | +| `rate_limit.cleanup_interval` | `RATE_LIMIT_CLEANUP_INTERVAL` | `10m` | Interval for cleaning up stale client buckets | + +### Behavior + +- Allowed requests receive `X-RateLimit-Limit`, `X-RateLimit-Remaining`, and `X-RateLimit-Reset` headers in gRPC response metadata +- Denied requests return gRPC `RESOURCE_EXHAUSTED` with the same headers in trailing metadata +- If the client IP cannot be determined, the request passes through (fail-open) +- The `KeyExtractor` abstraction allows switching from IP-based to API key or tenant ID-based limiting in the future + +### Example + +```yaml +rate_limit: + requests_per_second: 10.0 # token refill rate (tokens/sec) + burst: 20 # max tokens (burst size) + cleanup_interval: 10m # stale client cleanup interval +``` + +## License + +The license system controls access to Enterprise features using PASETO v4.public tokens signed with Ed25519. When no valid license is present, the service operates in Community mode with all core features enabled. + +### Configuration Parameters + +| Parameter | Env Variable | Description | +|-----------|-------------|-------------| +| `license.key` | `LICENSE_KEY` | Inline PASETO v4.public token string | +| `license.file` | `LICENSE_FILE` | Path to a file containing the PASETO token | + +### Priority and Behavior + +- If `license.key` is set, it is used regardless of `license.file` (a warning is logged when both are specified). +- If only `license.file` is set, the token is read from the specified file path. +- If neither is set, the service starts in **Community mode** — core features are available (code generation, plugin listing, MCP tools, rate limiting, plugin CRUD). Limits: `max_workers=4`, `max_plugins=10`. + +### Embedding the Public Key + +The Ed25519 public key used to verify license tokens must be embedded at build time via `-ldflags`: + +```bash +go build -ldflags "-X main.licensePublicKey=" ./cmd/ +``` + +If no public key is embedded in the binary, the service operates in Community mode regardless of any token configuration. + +### Example + +```yaml +# Enterprise license via inline token +license: + key: "v4.public.eyJ..." + +# Or via file path +license: + file: "/etc/easyp/license.key" +``` + ## Docker Compose Environment Variables The following variables are used to customize ports in `docker-compose.yml`: @@ -157,4 +248,10 @@ worker_pool: generation_timeout: 120s max_retries: 3 shutdown_timeout: 30s +rate_limit: + requests_per_second: 20.0 + burst: 40 + cleanup_interval: 10m +license: + file: "/etc/easyp/license.key" ``` diff --git a/docs/deployment.md b/docs/deployment.md index b89729d..71b5e6b 100644 --- a/docs/deployment.md +++ b/docs/deployment.md @@ -137,6 +137,87 @@ task local-push-registry The `push.sh` script builds images from the `registry/` directory and pushes them to the specified registry. +## License Configuration + +EasyP supports two license tiers: **Community** (free, default) and **Enterprise**. The license is a PASETO v4.public token verified against a public key embedded at build time. + +### Building with a Public Key + +Embed the Ed25519 public key into the binary at build time: + +```bash +go build -ldflags "-X main.licensePublicKey=" ./cmd/ +``` + +If no public key is embedded, the service runs in Community mode. + +### Providing the License Token + +The license token can be supplied via the config file or environment variables: + +```yaml +# config.yml +license: + key: "v4.public.xxx..." # inline token (takes priority) + file: "/path/to/license.key" # or path to a file containing the token +``` + +Environment variables: `LICENSE_KEY`, `LICENSE_FILE`. If both `key` and `file` are set, `key` takes priority. + +### Docker Example + +Mount a license file as a volume: + +```bash +docker run -d \ + -v /var/run/docker.sock:/var/run/docker.sock \ + -v ./config.yml:/config.yml \ + -v ./license.key:/license.key \ + -p 8080:8080 -p 8081:8081 -p 8082:8082 -p 8083:8083 \ + ghcr.io/easyp-tech/service:latest \ + -cfg /config.yml +``` + +Or pass the token directly via an environment variable: + +```bash +docker run -d \ + -v /var/run/docker.sock:/var/run/docker.sock \ + -v ./config.yml:/config.yml \ + -e LICENSE_KEY="v4.public.xxx..." \ + -p 8080:8080 -p 8081:8081 -p 8082:8082 -p 8083:8083 \ + ghcr.io/easyp-tech/service:latest \ + -cfg /config.yml +``` + +### Graceful Degradation + +When a license expires or is absent, the service transitions to Community mode without interruption: + +- All Community features remain fully operational (code generation, plugin listing, MCP tools, rate limiting, plugin CRUD) +- Enterprise-only features (multi-tenancy, response caching, audit) are disabled +- Community limits apply: `max_workers=4`, `max_plugins=10` +- In-progress operations complete normally; only subsequent Enterprise requests are denied +- The expiration watcher checks every 60 seconds and logs a warning on transition + +No restart is required — the service continues serving requests throughout the transition. + +### Monitoring License Status + +Watch these Prometheus metrics to stay ahead of license issues: + +| Metric | Type | Description | +|--------|------|-------------| +| `easyp_license_valid` | Gauge | `1` when the license is valid, `0` otherwise | +| `easyp_license_expiry_timestamp_seconds` | Gauge | Unix timestamp of the license expiration | +| `easyp_license_feature_denied_total` | Counter | Number of denied feature requests (label: `feature`) | + +Useful PromQL queries: + +- License status: `easyp_license_valid` +- Time until expiration: `easyp_license_expiry_timestamp_seconds - time()` +- Feature denial rate: `rate(easyp_license_feature_denied_total[5m])` + ## Production Deployment Checklist 1. Configure PostgreSQL with strong credentials and SSL (`sslmode=require`) @@ -147,3 +228,5 @@ The `push.sh` script builds images from the `registry/` directory and pushes the 6. Configure a health check for the orchestrator 7. Set resource limits for the service container 8. Configure the worker pool according to the expected load +9. Configure the license: embed the public key at build time, provide the token via `license.key`, `license.file`, or `LICENSE_KEY` env var +10. Set up alerts on `easyp_license_valid == 0` and `easyp_license_expiry_timestamp_seconds - time() < 7*24*3600` (expiring within 7 days) diff --git a/docs/development.md b/docs/development.md index 4052621..696510d 100644 --- a/docs/development.md +++ b/docs/development.md @@ -42,10 +42,15 @@ After startup: # Build the binary go build -o bin/server ./cmd/main.go +# Build with an embedded license public key (Ed25519, hex-encoded) +go build -ldflags "-X main.licensePublicKey=" -o bin/server ./cmd/main.go + # Run with configuration ./bin/server -cfg config.yml -log_level debug ``` +If `licensePublicKey` is not set, the service starts in Community mode (all community features enabled, enterprise features disabled). + ## Project Structure ``` @@ -57,6 +62,7 @@ go build -o bin/server ./cmd/main.go │ ├── api/ # gRPC transport layer (server, interceptors) │ ├── core/ # Business logic (Core, WorkerPool) │ │ └── pool.go # Worker pool with backpressure and retry +│ ├── license/ # License system (Feature enum, Claims, LicenseManager, FeatureGate, metrics) │ ├── adapters/ │ │ ├── registry/ # Plugin registry adapter (PostgreSQL + Docker) │ │ ├── audit/ # Asynchronous audit system @@ -155,6 +161,15 @@ go test ./internal/mcpserver -run TestMCPServer -count=1 task test-mcp ``` +### License Tests + +```bash +# Run all license package tests (unit + property-based) +go test ./internal/license/... -v +``` + +To generate test licenses during development, use `license.FormatToken` with a test Ed25519 private key. See `internal/license/claims_test.go` for examples. + ### MCP Smoke Tests Require a running server: @@ -184,6 +199,8 @@ Uses the `easyp.yaml` configuration with remote plugins on `localhost:8080`. | Prometheus client | v1.23.2 | Metrics export | | MCP SDK | v1.3.1 | Model Context Protocol | | Pyroscope | v1.2.7 | Continuous profiling | +| go-paseto | v1.5+ | PASETO v4.public token library (Ed25519) | +| rapid | — | Property-based testing library | ## Release diff --git a/docs/licensing.md b/docs/licensing.md new file mode 100644 index 0000000..28e75c9 --- /dev/null +++ b/docs/licensing.md @@ -0,0 +1,171 @@ +# EasyP Licensing + +## Overview + +EasyP API Service uses a two-tier licensing model to control access to features and resource limits: + +- **Community** (free, default) — includes core features: code generation, plugin listing, MCP server tools, rate limiting, and plugin CRUD. Limited to 4 workers and 10 plugins. +- **Enterprise** (paid) — unlocks all features including multi-tenancy, response caching, and audit, with higher resource limits (16 workers, unlimited plugins). + +License tokens use the [PASETO v4.public](https://paseto.io/) format with Ed25519 asymmetric signatures. Tokens are signed with a private key (held by the license issuer) and verified with a public key embedded in the service binary at build time. All validation is performed offline — no network calls are required. + +## Feature Tiers + +| # | Feature Enum | String ID | Tier | Implemented | Description | +|---|-------------|-----------|------|-------------|-------------| +| 0 | `FeatureCodeGeneration` | `code_generation` | Community | ✓ | Code generation from protobuf via Docker plugins | +| 1 | `FeaturePluginListing` | `plugin_listing` | Community | ✓ | Listing available plugins | +| 2 | `FeatureMCPServerTools` | `mcp_server_tools` | Community | ✓ | MCP server tools (plugins_list, easyp_config_describe) | +| 3 | `FeatureRateLimiting` | `rate_limiting` | Community | ✓ | Per-client rate limiting (token bucket, by IP) | +| 4 | `FeaturePluginCRUD` | `plugin_crud` | Community | ✗ | Plugin CRUD operations (add/remove/update) | +| 5 | `FeatureMultiTenancy` | `multi_tenancy` | Enterprise | ✗ | Multi-tenancy (data isolation between clients) | +| 6 | `FeatureResponseCaching` | `response_caching` | Enterprise | ✗ | Generation response caching | +| 7 | `FeatureAudit` | `audit` | Enterprise | ✓ | Operation audit logging | + +Feature enums are defined in `internal/license/features.go` as typed `int` constants using `iota`. The `IsEnterprise()` method returns `true` for features 5–7. + +## Limits + +| Limit | Community | Enterprise | +|-------|-----------|------------| +| Max workers | 4 | 16 | +| Max plugins | 10 | Unlimited | + +License limits override the corresponding values from `config.yml`. When the service operates in Community mode, the limits above are enforced regardless of the configuration file settings. + +## Configuration + +The license token can be provided in two ways: + +| Parameter | Env Variable | Description | +|-----------|-------------|-------------| +| `license.key` | `LICENSE_KEY` | Inline PASETO v4.public token string | +| `license.file` | `LICENSE_FILE` | Path to a file containing the PASETO token | + +### Priority Rules + +1. If `license.key` is set, it is used regardless of `license.file`. A warning is logged when both are specified. +2. If only `license.file` is set, the token is read from the specified file path. +3. If neither is set, the service starts in Community mode. + +### Example + +```yaml +# Enterprise license via inline token +license: + key: "v4.public.eyJ..." + +# Or via file path +license: + file: "/etc/easyp/license.key" +``` + +Environment variables work the same way: + +```bash +export LICENSE_KEY="v4.public.eyJ..." +# or +export LICENSE_FILE="/etc/easyp/license.key" +``` + +## Embedding the Public Key + +PASETO v4.public uses asymmetric cryptography (Ed25519). The license issuer signs tokens with a **private key** (kept secret). The service verifies tokens using the corresponding **public key**, which is safe to distribute. + +The public key is embedded into the service binary at build time via Go linker flags: + +```bash +go build -ldflags "-X main.licensePublicKey=" ./cmd/ +``` + +This sets the `licensePublicKey` variable in the `main` package. At startup, the `LicenseManager` decodes this hex string into an Ed25519 public key and uses it to verify token signatures. + +If no public key is embedded (the variable is empty), the service operates in Community mode regardless of any token configuration. This ensures the service always starts successfully. + +## Graceful Degradation + +The licensing system is designed to never cause service failures. All license-related errors result in a fallback to Community mode. + +| Scenario | Behavior | +|----------|----------| +| No license configured | Community mode, all core features available | +| Expired license token | Community mode, warning logged with expiration timestamp | +| Invalid token (malformed) | Community mode, error logged with parsing failure reason | +| Invalid signature (tampered) | Community mode, error logged with verification failure | +| License expires at runtime | Transition to Community mode within 60 seconds, in-progress operations complete normally | +| No public key embedded | Community mode, no error | +| Both `key` and `file` set | `key` is used, warning logged about ambiguous configuration | +| License file not found | Community mode, error logged | + +When the license expires during runtime: + +1. The expiration watcher (60-second ticker) detects the expiration +2. The service transitions to Community mode automatically +3. In-progress Enterprise operations complete normally +4. Subsequent Enterprise requests are denied with gRPC `PERMISSION_DENIED` +5. All Community features continue without interruption +6. No restart is required + +## Metrics and Monitoring + +The licensing system exposes three Prometheus metrics: + +| Metric | Type | Labels | Description | +|--------|------|--------|-------------| +| `easyp_license_valid` | Gauge | — | `1` when the license is valid, `0` when invalid or absent | +| `easyp_license_expiry_timestamp_seconds` | Gauge | — | Unix timestamp of the license expiration | +| `easyp_license_feature_denied_total` | Counter | `feature` | Number of denied feature access requests | + +### PromQL Examples + +License status: + +```promql +easyp_license_valid +``` + +Time remaining until expiration (in seconds): + +```promql +easyp_license_expiry_timestamp_seconds - time() +``` + +Feature denial rate over the last 5 minutes: + +```promql +rate(easyp_license_feature_denied_total[5m]) +``` + +Top denied features: + +```promql +topk(5, sum by (feature) (rate(easyp_license_feature_denied_total[5m]))) +``` + +### Alerting Recommendations + +- Alert when the license becomes invalid: `easyp_license_valid == 0` +- Alert when the license expires within 7 days: `easyp_license_expiry_timestamp_seconds - time() < 7 * 24 * 3600` +- Alert on sustained feature denials: `rate(easyp_license_feature_denied_total[5m]) > 0` + +## FAQ + +**What happens when the license expires?** + +The service transitions to Community mode automatically. Core features (code generation, plugin listing, MCP tools, rate limiting, plugin CRUD) continue working. Enterprise-only features (multi-tenancy, response caching, audit) are disabled. Community limits apply (`max_workers=4`, `max_plugins=10`). No restart is needed — the transition happens within 60 seconds. + +**Can I run without a license?** + +Yes. Without a license, the service operates in Community mode with all core features enabled. Community mode is the default. + +**How do I upgrade from Community to Enterprise?** + +Obtain a PASETO v4.public license token from the license issuer. Add it to your configuration via `license.key` (inline) or `license.file` (file path), or set the `LICENSE_KEY` / `LICENSE_FILE` environment variable. Restart the service or wait for the configuration to be reloaded. + +**Is an internet connection required for license validation?** + +No. License validation is performed entirely offline using the Ed25519 public key embedded in the binary. The token optionally contains a `refresh_url` claim for automatic license renewal, but this is not required — if the refresh request fails, the current token remains valid until its expiration date. + +**What if both `license.key` and `license.file` are set?** + +The `license.key` value takes priority. The token from `license.file` is ignored, and a warning is logged about the ambiguous configuration. diff --git a/docs/monitoring.md b/docs/monitoring.md index f022b59..2e2edbb 100644 --- a/docs/monitoring.md +++ b/docs/monitoring.md @@ -99,6 +99,21 @@ Service (Pyroscope SDK) → Pyroscope server (S3 backend) | `audit_events_lost_total` | Counter | Lost audit events (channel overflow) | | `audit_queue_depth` | Gauge | Audit queue depth | +### Rate Limiting + +| Metric | Type | Labels | Description | +|--------|------|--------|-------------| +| `easyp_rate_limit_requests_total` | Counter | `status` (allowed/denied), `client_ip` | Total requests processed by rate limiter | +| `easyp_rate_limit_active_clients` | Gauge | — | Current number of active client buckets | + +### License + +| Metric | Type | Labels | Description | +|--------|------|--------|-------------| +| `easyp_license_valid` | Gauge | — | 1 when the license is valid, 0 when invalid or absent | +| `easyp_license_expiry_timestamp_seconds` | Gauge | — | Unix timestamp of the license expiration | +| `easyp_license_feature_denied_total` | Counter | `feature` | Number of feature access denials per feature | + ### System | Metric | Type | Description | @@ -176,3 +191,51 @@ pool_active_workers / 4 * 100 # utilization percentage (with 4 workers) ```promql db_open_connections / 50 * 100 # usage percentage ``` + +### License Status + +```promql +easyp_license_valid +``` + +### Time Until License Expiration + +```promql +easyp_license_expiry_timestamp_seconds - time() +``` + +### Feature Denial Rate + +```promql +rate(easyp_license_feature_denied_total[5m]) +``` + +### Top Denied Features + +```promql +topk(5, sum by (feature) (rate(easyp_license_feature_denied_total[5m]))) +``` + +### Rate Limit Denial Rate + +```promql +rate(easyp_rate_limit_requests_total{status="denied"}[5m]) +``` + +### Rate Limit Allow/Deny Ratio + +```promql +sum(rate(easyp_rate_limit_requests_total{status="denied"}[5m])) / sum(rate(easyp_rate_limit_requests_total[5m])) * 100 +``` + +### Top Rate-Limited Clients + +```promql +topk(10, sum by (client_ip) (rate(easyp_rate_limit_requests_total{status="denied"}[5m]))) +``` + +### Active Rate Limit Buckets + +```promql +easyp_rate_limit_active_clients +``` diff --git a/go.mod b/go.mod index bc51ecd..b5e3712 100644 --- a/go.mod +++ b/go.mod @@ -3,6 +3,7 @@ module github.com/easyp-tech/service go 1.26.0 require ( + aidanwoods.dev/go-paseto v1.6.0 github.com/easyp-tech/easyp v0.14.1-0.20260301022854-21e6e9dbe91e github.com/easyp-tech/protoc-gen-easydoc v0.4.0 github.com/easyp-tech/protoc-gen-mcp v0.1.1 @@ -26,12 +27,14 @@ require ( go.opentelemetry.io/otel/sdk/metric v1.40.0 go.opentelemetry.io/otel/trace v1.40.0 golang.org/x/sync v0.19.0 + golang.org/x/time v0.15.0 google.golang.org/grpc v1.79.1 google.golang.org/protobuf v1.36.11 gopkg.in/yaml.v3 v3.0.1 ) require ( + aidanwoods.dev/go-result v0.3.1 // indirect github.com/a8m/envsubst v1.4.3 // indirect github.com/bahlo/generic-list-go v0.2.0 // indirect github.com/beorn7/perks v1.0.1 // indirect @@ -69,6 +72,7 @@ require ( go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.40.0 // indirect go.opentelemetry.io/proto/otlp v1.9.0 // indirect go.yaml.in/yaml/v2 v2.4.3 // indirect + golang.org/x/crypto v0.48.0 // indirect golang.org/x/mod v0.32.0 // indirect golang.org/x/net v0.51.0 // indirect golang.org/x/oauth2 v0.35.0 // indirect diff --git a/go.sum b/go.sum index e6fc141..617cdc9 100644 --- a/go.sum +++ b/go.sum @@ -1,3 +1,7 @@ +aidanwoods.dev/go-paseto v1.6.0 h1:JA/PFk5lVsB/PakQGqnfmik/1tIHjE6F0UoPPoAO/nU= +aidanwoods.dev/go-paseto v1.6.0/go.mod h1:LdqkL0Z2mLL0kBWzmHVR1cGFniX+zyOweQmbNKYrDxQ= +aidanwoods.dev/go-result v0.3.1 h1:ee98hpohYUVYbI+pa6gUHTyoRerIudgjky/IPSowDXQ= +aidanwoods.dev/go-result v0.3.1/go.mod h1:GKnFg8p/BKulVD3wsfULiPhpPmrTWyiTIbz8EWuUqSk= filippo.io/edwards25519 v1.1.0 h1:FNf4tywRC1HmFuKW5xopWpigGjJKiJSV0Cqo0cJWDaA= filippo.io/edwards25519 v1.1.0/go.mod h1:BxyFTGdWcka3PhytdK4V28tE5sGfRvvvRV7EaN4VDT4= github.com/a8m/envsubst v1.4.3 h1:kDF7paGK8QACWYaQo6KtyYBozY2jhQrTuNNuUxQkhJY= @@ -144,6 +148,8 @@ go.yaml.in/yaml/v2 v2.4.3 h1:6gvOSjQoTB3vt1l+CU+tSyi/HOjfOjRLJ4YwYZGwRO0= go.yaml.in/yaml/v2 v2.4.3/go.mod h1:zSxWcmIDjOzPXpjlTTbAsKokqkDNAVtZO0WOMiT90s8= golang.org/x/mod v0.32.0 h1:9F4d3PHLljb6x//jOyokMv3eX+YDeepZSEo3mFJy93c= golang.org/x/mod v0.32.0/go.mod h1:SgipZ/3h2Ci89DlEtEXWUk/HteuRin+HHhN+WbNhguU= +golang.org/x/crypto v0.48.0 h1:/VRzVqiRSggnhY7gNRxPauEQ5Drw9haKdM0jqfcCFts= +golang.org/x/crypto v0.48.0/go.mod h1:r0kV5h3qnFPlQnBSrULhlsRfryS2pmewsg+XfMgkVos= golang.org/x/net v0.51.0 h1:94R/GTO7mt3/4wIKpcR5gkGmRLOuE/2hNGeWq/GBIFo= golang.org/x/net v0.51.0/go.mod h1:aamm+2QF5ogm02fjy5Bb7CQ0WMt1/WVM7FtyaTLlA9Y= golang.org/x/oauth2 v0.35.0 h1:Mv2mzuHuZuY2+bkyWXIHMfhNdJAdwW3FuWeCPYN5GVQ= @@ -154,6 +160,8 @@ golang.org/x/sys v0.41.0 h1:Ivj+2Cp/ylzLiEU89QhWblYnOE9zerudt9Ftecq2C6k= golang.org/x/sys v0.41.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= golang.org/x/text v0.34.0 h1:oL/Qq0Kdaqxa1KbNeMKwQq0reLCCaFtqu2eNuSeNHbk= golang.org/x/text v0.34.0/go.mod h1:homfLqTYRFyVYemLBFl5GgL/DWEiH5wcsQ5gSh1yziA= +golang.org/x/time v0.15.0 h1:bbrp8t3bGUeFOx08pvsMYRTCVSMk89u4tKbNOZbp88U= +golang.org/x/time v0.15.0/go.mod h1:Y4YMaQmXwGQZoFaVFk4YpCt4FLQMYKZe9oeV/f4MSno= golang.org/x/tools v0.41.0 h1:a9b8iMweWG+S0OBnlU36rzLp20z1Rp10w+IY2czHTQc= golang.org/x/tools v0.41.0/go.mod h1:XSY6eDqxVNiYgezAVqqCeihT4j1U2CCsqvH3WhQpnlg= gonum.org/v1/gonum v0.16.0 h1:5+ul4Swaf3ESvrOnidPp4GZbzf0mxVQpDCYUQE7OJfk= diff --git a/internal/api/license_interceptor.go b/internal/api/license_interceptor.go new file mode 100644 index 0000000..81a2aea --- /dev/null +++ b/internal/api/license_interceptor.go @@ -0,0 +1,97 @@ +package api + +import ( + "context" + "fmt" + "log/slog" + + "google.golang.org/grpc" + "google.golang.org/grpc/codes" + "google.golang.org/grpc/status" + + "github.com/easyp-tech/service/internal/license" +) + +// LicenseInterceptor проверяет лицензию на уровне gRPC-запроса. +type LicenseInterceptor struct { + gate *license.FeatureGate + logger *slog.Logger + // methodFeatures маппит gRPC full method name → Feature. + // Методы, отсутствующие в маппинге, считаются Community и пропускаются. + methodFeatures map[string]license.Feature +} + +// NewLicenseInterceptor создаёт интерсептор с маппингом method → Feature. +// Текущие методы (GenerateCode, Plugins) — Community, маппинг пуст. +// Маппинг будет расширяться по мере добавления Enterprise-методов. +func NewLicenseInterceptor(gate *license.FeatureGate, logger *slog.Logger) *LicenseInterceptor { + return &LicenseInterceptor{ + gate: gate, + logger: logger, + methodFeatures: make(map[string]license.Feature), + } +} + +// checkFeature проверяет, разрешён ли вызов метода текущей лицензией. +// Возвращает nil, если метод не требует проверки (Community) или функция разрешена. +func (li *LicenseInterceptor) checkFeature(fullMethod string) error { + feature, ok := li.methodFeatures[fullMethod] + if !ok { + // Community method — no license check needed. + return nil + } + + if li.gate.Enabled(feature) { + return nil + } + + li.logger.Warn("enterprise feature denied", + "method", fullMethod, + "feature", feature.String(), + ) + + return status.Errorf(codes.PermissionDenied, "feature %s requires enterprise license", feature) +} + +// UnaryServerInterceptor возвращает grpc.UnaryServerInterceptor. +func (li *LicenseInterceptor) UnaryServerInterceptor() grpc.UnaryServerInterceptor { + return func(ctx context.Context, req any, info *grpc.UnaryServerInfo, handler grpc.UnaryHandler) (any, error) { + if err := li.checkFeature(info.FullMethod); err != nil { + return nil, err + } + + return handler(ctx, req) + } +} + +// wrappedStream wraps grpc.ServerStream to preserve context. +type wrappedStream struct { + grpc.ServerStream + ctx context.Context +} + +// Context returns the wrapped context. +func (w *wrappedStream) Context() context.Context { + return w.ctx +} + +// StreamServerInterceptor возвращает grpc.StreamServerInterceptor. +func (li *LicenseInterceptor) StreamServerInterceptor() grpc.StreamServerInterceptor { + return func(srv any, ss grpc.ServerStream, info *grpc.StreamServerInfo, handler grpc.StreamHandler) error { + if err := li.checkFeature(info.FullMethod); err != nil { + return err + } + + return handler(srv, &wrappedStream{ServerStream: ss, ctx: ss.Context()}) + } +} + +// RegisterMethodFeature registers a gRPC method as requiring a specific license feature. +// This is used to extend the interceptor when new Enterprise methods are added. +func (li *LicenseInterceptor) RegisterMethodFeature(fullMethod string, feature license.Feature) { + li.methodFeatures[fullMethod] = feature + li.logger.Info("registered license check", + "method", fullMethod, + "feature", fmt.Sprintf("%s", feature), + ) +} diff --git a/internal/core/core.go b/internal/core/core.go index 2be4948..933eb86 100644 --- a/internal/core/core.go +++ b/internal/core/core.go @@ -9,15 +9,18 @@ import ( // Core defines the interface for interacting with the plugin server. type Core struct { - metrics Metrics - registry Registry + metrics Metrics + registry Registry + featureGate FeatureGate } // New creates a new Core instance. -func New(metrics Metrics, registry Registry) *Core { +// If featureGate is nil, all features are considered available (backward compatibility). +func New(metrics Metrics, registry Registry, featureGate FeatureGate) *Core { return &Core{ - metrics: metrics, - registry: registry, + metrics: metrics, + registry: registry, + featureGate: featureGate, } } diff --git a/internal/core/domain.go b/internal/core/domain.go index c7ae2f0..c45e463 100644 --- a/internal/core/domain.go +++ b/internal/core/domain.go @@ -118,6 +118,20 @@ type ( Save(ctx context.Context, entry AuditEntry) error } + // FeatureGate определяет интерфейс проверки доступности функций. + // Определён в core для избежания циклических зависимостей. + // Параметр feature имеет тип int (не license.Feature) для предотвращения + // циклической зависимости между пакетами core и license. + FeatureGate interface { + // Enabled возвращает true, если функция с указанным идентификатором разрешена текущей лицензией. + Enabled(feature int) bool + // MaxWorkers возвращает лимит воркеров из текущей лицензии. + MaxWorkers() int + // MaxPlugins возвращает лимит плагинов из текущей лицензии. + // -1 означает отсутствие ограничения. + MaxPlugins() int + } + // CoreService defines the business logic interface used by the API layer. CoreService interface { Generate(ctx context.Context, req GenerateCodeRequest) (*GenerateCodeResponse, error) diff --git a/internal/license/claims.go b/internal/license/claims.go new file mode 100644 index 0000000..d450d66 --- /dev/null +++ b/internal/license/claims.go @@ -0,0 +1,45 @@ +package license + +import "time" + +// Tier определяет уровень лицензии. +type Tier string + +const ( + TierCommunity Tier = "community" + TierEnterprise Tier = "enterprise" +) + +// Claims содержит данные из PASETO-токена лицензии. +type Claims struct { + Tier Tier `json:"tier"` + Features []Feature `json:"features"` + MaxWorkers int `json:"max_workers"` + MaxPlugins int `json:"max_plugins"` // -1 = unlimited + ExpiresAt time.Time `json:"exp"` + IssuedAt time.Time `json:"iat"` + Issuer string `json:"iss"` + Subject string `json:"sub"` + RefreshURL string `json:"refresh_url,omitempty"` +} + +// CommunityDefaults возвращает claims для Community-режима. +func CommunityDefaults() Claims { + return Claims{ + Tier: TierCommunity, + Features: communityFeatures(), + MaxWorkers: 4, + MaxPlugins: 10, + } +} + +// communityFeatures возвращает список всех не-Enterprise функций. +func communityFeatures() []Feature { + var features []Feature + for f := Feature(0); f < featureCount; f++ { + if !f.IsEnterprise() { + features = append(features, f) + } + } + return features +} diff --git a/internal/license/claims_test.go b/internal/license/claims_test.go new file mode 100644 index 0000000..46e2010 --- /dev/null +++ b/internal/license/claims_test.go @@ -0,0 +1,68 @@ +package license + +import ( + "testing" +) + +func TestCommunityDefaults(t *testing.T) { + claims := CommunityDefaults() + + if claims.Tier != TierCommunity { + t.Errorf("expected tier %q, got %q", TierCommunity, claims.Tier) + } + + if claims.MaxWorkers != 4 { + t.Errorf("expected MaxWorkers=4, got %d", claims.MaxWorkers) + } + + if claims.MaxPlugins != 10 { + t.Errorf("expected MaxPlugins=10, got %d", claims.MaxPlugins) + } + + // Verify all community features are present and no enterprise features. + for _, f := range claims.Features { + if f.IsEnterprise() { + t.Errorf("community defaults should not include enterprise feature %s", f) + } + } + + // Verify every non-enterprise feature is included. + featureSet := make(map[Feature]bool, len(claims.Features)) + for _, f := range claims.Features { + featureSet[f] = true + } + + for f := Feature(0); f < featureCount; f++ { + if !f.IsEnterprise() && !featureSet[f] { + t.Errorf("community defaults missing feature %s", f) + } + if f.IsEnterprise() && featureSet[f] { + t.Errorf("community defaults should not contain enterprise feature %s", f) + } + } +} + +func TestCommunityDefaults_FeatureCount(t *testing.T) { + claims := CommunityDefaults() + + // Count expected community features. + var expected int + for f := Feature(0); f < featureCount; f++ { + if !f.IsEnterprise() { + expected++ + } + } + + if len(claims.Features) != expected { + t.Errorf("expected %d community features, got %d", expected, len(claims.Features)) + } +} + +func TestTierConstants(t *testing.T) { + if TierCommunity != "community" { + t.Errorf("expected TierCommunity=%q, got %q", "community", TierCommunity) + } + if TierEnterprise != "enterprise" { + t.Errorf("expected TierEnterprise=%q, got %q", "enterprise", TierEnterprise) + } +} diff --git a/internal/license/errors.go b/internal/license/errors.go new file mode 100644 index 0000000..7b8a8aa --- /dev/null +++ b/internal/license/errors.go @@ -0,0 +1,11 @@ +package license + +import "errors" + +var ( + ErrInvalidToken = errors.New("license: invalid token format") + ErrSignatureInvalid = errors.New("license: signature verification failed") + ErrTokenExpired = errors.New("license: token expired") + ErrInvalidClaims = errors.New("license: invalid claims") + ErrFileNotFound = errors.New("license: license file not found") +) diff --git a/internal/license/features.go b/internal/license/features.go new file mode 100644 index 0000000..41b7e0d --- /dev/null +++ b/internal/license/features.go @@ -0,0 +1,48 @@ +package license + +// Feature определяет функцию сервиса как типизированную константу. +type Feature int + +const ( + FeatureCodeGeneration Feature = iota // Базовая генерация кода + FeaturePluginListing // Листинг плагинов + FeatureMCPServerTools // MCP server tools + FeatureRateLimiting // Rate limiting + FeaturePluginCRUD // CRUD операции с плагинами + // Enterprise-only features + FeatureMultiTenancy // Мультитенантность + FeatureResponseCaching // Кэширование ответов + FeatureAudit // Аудит + + featureCount // sentinel для валидации +) + +// featureNames содержит строковые представления Feature для метрик и логирования. +var featureNames = [featureCount]string{ + FeatureCodeGeneration: "code_generation", + FeaturePluginListing: "plugin_listing", + FeatureMCPServerTools: "mcp_server_tools", + FeatureRateLimiting: "rate_limiting", + FeaturePluginCRUD: "plugin_crud", + FeatureMultiTenancy: "multi_tenancy", + FeatureResponseCaching: "response_caching", + FeatureAudit: "audit", +} + +// String возвращает строковое представление Feature для метрик и логирования. +func (f Feature) String() string { + if !f.Valid() { + return "unknown" + } + return featureNames[f] +} + +// IsEnterprise возвращает true, если функция доступна только в Enterprise. +func (f Feature) IsEnterprise() bool { + return f == FeatureMultiTenancy || f == FeatureResponseCaching || f == FeatureAudit +} + +// Valid возвращает true, если значение Feature определено. +func (f Feature) Valid() bool { + return f >= 0 && f < featureCount +} diff --git a/internal/license/features_test.go b/internal/license/features_test.go new file mode 100644 index 0000000..f2ff55b --- /dev/null +++ b/internal/license/features_test.go @@ -0,0 +1,59 @@ +package license + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestFeature_String(t *testing.T) { + tests := []struct { + feature Feature + want string + }{ + {FeatureCodeGeneration, "code_generation"}, + {FeaturePluginListing, "plugin_listing"}, + {FeatureMCPServerTools, "mcp_server_tools"}, + {FeatureRateLimiting, "rate_limiting"}, + {FeaturePluginCRUD, "plugin_crud"}, + {FeatureMultiTenancy, "multi_tenancy"}, + {FeatureResponseCaching, "response_caching"}, + {FeatureAudit, "audit"}, + {Feature(-1), "unknown"}, + {featureCount, "unknown"}, + {Feature(999), "unknown"}, + } + + for _, tt := range tests { + t.Run(tt.want, func(t *testing.T) { + assert.Equal(t, tt.want, tt.feature.String()) + }) + } +} + +func TestFeature_IsEnterprise(t *testing.T) { + enterpriseFeatures := []Feature{FeatureMultiTenancy, FeatureResponseCaching, FeatureAudit} + communityFeatures := []Feature{ + FeatureCodeGeneration, FeaturePluginListing, FeatureMCPServerTools, + FeatureRateLimiting, FeaturePluginCRUD, + } + + for _, f := range enterpriseFeatures { + assert.True(t, f.IsEnterprise(), "%s should be enterprise", f) + } + for _, f := range communityFeatures { + assert.False(t, f.IsEnterprise(), "%s should not be enterprise", f) + } +} + +func TestFeature_Valid(t *testing.T) { + // All defined features should be valid. + for f := FeatureCodeGeneration; f < featureCount; f++ { + assert.True(t, f.Valid(), "%s should be valid", f) + } + + // Out-of-range values should be invalid. + assert.False(t, Feature(-1).Valid()) + assert.False(t, featureCount.Valid()) + assert.False(t, Feature(999).Valid()) +} diff --git a/internal/license/gate.go b/internal/license/gate.go new file mode 100644 index 0000000..d9c2580 --- /dev/null +++ b/internal/license/gate.go @@ -0,0 +1,65 @@ +package license + +// FeatureGate предоставляет проверку доступности функций на основе текущей лицензии. +type FeatureGate struct { + manager *LicenseManager + metrics *LicenseMetrics +} + +// NewFeatureGate создаёт FeatureGate, привязанный к LicenseManager. +func NewFeatureGate(manager *LicenseManager) *FeatureGate { + return &FeatureGate{ + manager: manager, + metrics: manager.Metrics(), + } +} + +// Enabled возвращает true, если функция разрешена текущей лицензией. +// Для невалидных значений Feature возвращает false. +// +// Алгоритм: +// 1. Если feature невалиден → false +// 2. Получить claims из LicenseManager (потокобезопасно) +// 3. Если tier == Enterprise → true +// 4. Если feature.IsEnterprise() → инкремент метрики denied, false +// 5. Проверить наличие feature в claims.Features → результат +func (fg *FeatureGate) Enabled(feature Feature) bool { + // Step 1: Invalid feature → false. + if !feature.Valid() { + return false + } + + // Step 2: Get current claims (thread-safe). + claims := fg.manager.Claims() + + // Step 3: Enterprise tier → all features enabled. + if claims.Tier == TierEnterprise { + return true + } + + // Step 4: Enterprise-only feature in non-Enterprise mode → deny + metric. + if feature.IsEnterprise() { + fg.metrics.featureDenied.WithLabelValues(feature.String()).Inc() + return false + } + + // Step 5: Check if feature is in claims.Features. + for _, f := range claims.Features { + if f == feature { + return true + } + } + + return false +} + +// MaxWorkers возвращает лимит воркеров из текущей лицензии. +func (fg *FeatureGate) MaxWorkers() int { + return fg.manager.Claims().MaxWorkers +} + +// MaxPlugins возвращает лимит плагинов из текущей лицензии. +// -1 означает отсутствие ограничения. +func (fg *FeatureGate) MaxPlugins() int { + return fg.manager.Claims().MaxPlugins +} diff --git a/internal/license/manager.go b/internal/license/manager.go new file mode 100644 index 0000000..234090c --- /dev/null +++ b/internal/license/manager.go @@ -0,0 +1,356 @@ +package license + +import ( + "context" + "encoding/json" + "fmt" + "log/slog" + "os" + "sync" + "time" + + "aidanwoods.dev/go-paseto" + "github.com/prometheus/client_golang/prometheus" +) + +// LicenseConfig содержит параметры конфигурации лицензии. +type LicenseConfig struct { + Key string `yaml:"key" env:"LICENSE_KEY"` + File string `yaml:"file" env:"LICENSE_FILE"` +} + +// LicenseManager отвечает за парсинг, валидацию и кэширование лицензии. +type LicenseManager struct { + mu sync.RWMutex + claims Claims + valid bool + publicKey paseto.V4AsymmetricPublicKey + hasKey bool + logger *slog.Logger + metrics *LicenseMetrics + stopTicker chan struct{} +} + +// NewLicenseManager создаёт LicenseManager. +// publicKeyHex — hex-encoded Ed25519 public key, встроенный через ldflags. +// Если publicKeyHex пуст, работает в Community-режиме. +func NewLicenseManager( + publicKeyHex string, + cfg LicenseConfig, + logger *slog.Logger, + reg *prometheus.Registry, + namespace string, +) (*LicenseManager, error) { + metrics := NewLicenseMetrics(reg, namespace) + + lm := &LicenseManager{ + claims: CommunityDefaults(), + logger: logger, + metrics: metrics, + stopTicker: make(chan struct{}), + } + + // Step 1: If publicKeyHex is empty → Community mode. + if publicKeyHex == "" { + logger.Info("no public key provided, operating in community mode") + lm.updateMetrics() + return lm, nil + } + + // Step 2: Decode publicKeyHex → paseto.V4AsymmetricPublicKey. + pubKey, err := paseto.NewV4AsymmetricPublicKeyFromHex(publicKeyHex) + if err != nil { + logger.Error("failed to decode public key, operating in community mode", "error", err) + lm.updateMetrics() + return lm, fmt.Errorf("decode public key: %w", err) + } + lm.publicKey = pubKey + lm.hasKey = true + + // Step 3: Load token. + token, err := lm.loadToken(cfg) + if err != nil { + lm.updateMetrics() + return lm, err + } + if token == "" { + // No token configured → Community mode. + logger.Info("no license token configured, operating in community mode") + lm.updateMetrics() + return lm, nil + } + + // Step 4: If both key and file are specified → log warning. + if cfg.Key != "" && cfg.File != "" { + logger.Warn("both license.key and license.file specified, using license.key") + } + + // Steps 5-9: Parse, validate, cache. + if err := lm.applyToken(token); err != nil { + lm.updateMetrics() + return lm, err + } + + lm.updateMetrics() + return lm, nil +} + +// Claims возвращает текущие кэшированные claims (потокобезопасно). +func (lm *LicenseManager) Claims() Claims { + lm.mu.RLock() + defer lm.mu.RUnlock() + return lm.claims +} + +// Valid возвращает true, если лицензия валидна. +func (lm *LicenseManager) Valid() bool { + lm.mu.RLock() + defer lm.mu.RUnlock() + return lm.valid +} + +// Reload загружает и валидирует новый токен, обновляя кэш. +func (lm *LicenseManager) Reload(token string) error { + if !lm.hasKey { + return fmt.Errorf("cannot reload: no public key configured") + } + + if err := lm.applyToken(token); err != nil { + return err + } + + lm.updateMetrics() + return nil +} + +// ParseToken парсит PASETO v4.public токен и возвращает Claims. +func (lm *LicenseManager) ParseToken(token string) (Claims, error) { + if !lm.hasKey { + return Claims{}, fmt.Errorf("%w: no public key configured", ErrInvalidToken) + } + + parser := paseto.NewParserWithoutExpiryCheck() + parsed, err := parser.ParseV4Public(lm.publicKey, token, nil) + if err != nil { + return Claims{}, fmt.Errorf("%w: %s", ErrSignatureInvalid, err.Error()) + } + + claims, err := extractClaims(parsed) + if err != nil { + return Claims{}, err + } + + return claims, nil +} + +// FormatToken форматирует Claims в строковое представление PASETO-токена. +// Требует приватный ключ (используется только в тестах и CLI для генерации лицензий). +func FormatToken(claims Claims, privateKey paseto.V4AsymmetricSecretKey) (string, error) { + token := paseto.NewToken() + + if err := token.Set("tier", claims.Tier); err != nil { + return "", fmt.Errorf("set tier: %w", err) + } + if err := token.Set("features", claims.Features); err != nil { + return "", fmt.Errorf("set features: %w", err) + } + if err := token.Set("max_workers", claims.MaxWorkers); err != nil { + return "", fmt.Errorf("set max_workers: %w", err) + } + if err := token.Set("max_plugins", claims.MaxPlugins); err != nil { + return "", fmt.Errorf("set max_plugins: %w", err) + } + + token.SetExpiration(claims.ExpiresAt) + token.SetIssuedAt(claims.IssuedAt) + token.SetIssuer(claims.Issuer) + token.SetSubject(claims.Subject) + + if claims.RefreshURL != "" { + token.SetString("refresh_url", claims.RefreshURL) + } + + signed := token.V4Sign(privateKey, nil) + return signed, nil +} + +// StartExpirationWatcher запускает горутину с тикером (60s), +// проверяющую истечение лицензии. +func (lm *LicenseManager) StartExpirationWatcher(ctx context.Context) { + go func() { + ticker := time.NewTicker(60 * time.Second) + defer ticker.Stop() + + for { + select { + case <-ticker.C: + lm.checkExpiration() + case <-lm.stopTicker: + return + case <-ctx.Done(): + return + } + } + }() +} + +// Stop останавливает тикер проверки истечения. +func (lm *LicenseManager) Stop() { + select { + case lm.stopTicker <- struct{}{}: + default: + } +} + +// Metrics возвращает метрики лицензирования (для использования в FeatureGate). +func (lm *LicenseManager) Metrics() *LicenseMetrics { + return lm.metrics +} + +// loadToken загружает токен из конфигурации. +func (lm *LicenseManager) loadToken(cfg LicenseConfig) (string, error) { + if cfg.Key != "" { + return cfg.Key, nil + } + + if cfg.File != "" { + data, err := os.ReadFile(cfg.File) + if err != nil { + if os.IsNotExist(err) { + lm.logger.Error("license file not found, operating in community mode", "file", cfg.File) + return "", fmt.Errorf("%w: %s", ErrFileNotFound, cfg.File) + } + return "", fmt.Errorf("read license file: %w", err) + } + return string(data), nil + } + + return "", nil +} + +// applyToken парсит и применяет токен, обновляя кэш. +func (lm *LicenseManager) applyToken(token string) error { + claims, err := lm.ParseToken(token) + if err != nil { + lm.logger.Error("failed to parse license token, operating in community mode", "error", err) + lm.mu.Lock() + lm.claims = CommunityDefaults() + lm.valid = false + lm.mu.Unlock() + return err + } + + // Check expiration. + if time.Now().After(claims.ExpiresAt) { + lm.logger.Warn("license token expired, operating in community mode", + "expired_at", claims.ExpiresAt.Format(time.RFC3339)) + lm.mu.Lock() + lm.claims = CommunityDefaults() + lm.valid = false + lm.mu.Unlock() + return ErrTokenExpired + } + + // Cache claims. + lm.mu.Lock() + lm.claims = claims + lm.valid = true + lm.mu.Unlock() + + lm.logger.Info("license loaded successfully", + "tier", string(claims.Tier), + "expires_at", claims.ExpiresAt.Format(time.RFC3339), + "features_count", len(claims.Features)) + + return nil +} + +// checkExpiration проверяет истечение лицензии. +func (lm *LicenseManager) checkExpiration() { + lm.mu.RLock() + expiresAt := lm.claims.ExpiresAt + valid := lm.valid + lm.mu.RUnlock() + + if !valid { + return + } + + if time.Now().After(expiresAt) { + lm.mu.Lock() + // Double-check after acquiring write lock. + if lm.valid { + lm.claims = CommunityDefaults() + lm.valid = false + lm.logger.Warn("license expired during runtime, transitioning to community mode", + "expired_at", expiresAt.Format(time.RFC3339)) + } + lm.mu.Unlock() + lm.updateMetrics() + } +} + +// updateMetrics обновляет Prometheus-метрики. +func (lm *LicenseManager) updateMetrics() { + lm.mu.RLock() + valid := lm.valid + expiresAt := lm.claims.ExpiresAt + lm.mu.RUnlock() + + if valid { + lm.metrics.valid.Set(1) + } else { + lm.metrics.valid.Set(0) + } + + if !expiresAt.IsZero() { + lm.metrics.expiryTS.Set(float64(expiresAt.Unix())) + } +} + +// extractClaims извлекает Claims из распарсенного PASETO-токена. +func extractClaims(token *paseto.Token) (Claims, error) { + claimsJSON := token.ClaimsJSON() + + // We use an intermediate struct for JSON unmarshalling since the PASETO + // library stores time as RFC3339 strings, not as time.Time directly. + var raw struct { + Tier Tier `json:"tier"` + Features []Feature `json:"features"` + MaxWorkers int `json:"max_workers"` + MaxPlugins int `json:"max_plugins"` + RefreshURL string `json:"refresh_url"` + } + if err := json.Unmarshal(claimsJSON, &raw); err != nil { + return Claims{}, fmt.Errorf("%w: %s", ErrInvalidClaims, err.Error()) + } + + // Extract time fields using the PASETO token's built-in methods (RFC3339). + exp, err := token.GetExpiration() + if err != nil { + return Claims{}, fmt.Errorf("%w: missing exp: %s", ErrInvalidClaims, err.Error()) + } + + iat, err := token.GetIssuedAt() + if err != nil { + // IssuedAt is optional for parsing; use zero time if missing. + iat = time.Time{} + } + + iss, _ := token.GetIssuer() + sub, _ := token.GetSubject() + + claims := Claims{ + Tier: raw.Tier, + Features: raw.Features, + MaxWorkers: raw.MaxWorkers, + MaxPlugins: raw.MaxPlugins, + ExpiresAt: exp, + IssuedAt: iat, + Issuer: iss, + Subject: sub, + RefreshURL: raw.RefreshURL, + } + + return claims, nil +} diff --git a/internal/license/metrics.go b/internal/license/metrics.go new file mode 100644 index 0000000..75e6c25 --- /dev/null +++ b/internal/license/metrics.go @@ -0,0 +1,37 @@ +package license + +import "github.com/prometheus/client_golang/prometheus" + +// LicenseMetrics содержит Prometheus-метрики лицензирования. +type LicenseMetrics struct { + valid prometheus.Gauge // {namespace}_license_valid: 1=valid, 0=invalid/absent + expiryTS prometheus.Gauge // {namespace}_license_expiry_timestamp_seconds + featureDenied *prometheus.CounterVec // {namespace}_license_feature_denied_total{feature} +} + +// NewLicenseMetrics создаёт и регистрирует метрики лицензирования в реестре. +func NewLicenseMetrics(reg *prometheus.Registry, namespace string) *LicenseMetrics { + m := &LicenseMetrics{ + valid: prometheus.NewGauge(prometheus.GaugeOpts{ + Namespace: namespace, + Name: "license_valid", + Help: "Whether the license is valid (1) or invalid/absent (0).", + }), + expiryTS: prometheus.NewGauge(prometheus.GaugeOpts{ + Namespace: namespace, + Name: "license_expiry_timestamp_seconds", + Help: "Unix timestamp of the license expiration.", + }), + featureDenied: prometheus.NewCounterVec(prometheus.CounterOpts{ + Namespace: namespace, + Name: "license_feature_denied_total", + Help: "Total number of feature access denials.", + }, []string{"feature"}), + } + + if reg != nil { + reg.MustRegister(m.valid, m.expiryTS, m.featureDenied) + } + + return m +} diff --git a/internal/ratelimiter/config.go b/internal/ratelimiter/config.go new file mode 100644 index 0000000..f3ca499 --- /dev/null +++ b/internal/ratelimiter/config.go @@ -0,0 +1,19 @@ +package ratelimiter + +import "time" + +// Config содержит параметры rate limiting. +type Config struct { + RequestsPerSecond float64 `yaml:"requests_per_second" env:"REQUESTS_PER_SECOND"` + Burst int `yaml:"burst" env:"BURST"` + CleanupInterval time.Duration `yaml:"cleanup_interval" env:"CLEANUP_INTERVAL"` +} + +// DefaultConfig возвращает конфигурацию по умолчанию. +func DefaultConfig() Config { + return Config{ + RequestsPerSecond: 10.0, + Burst: 20, + CleanupInterval: 10 * time.Minute, + } +} diff --git a/internal/ratelimiter/extractor.go b/internal/ratelimiter/extractor.go new file mode 100644 index 0000000..82092fb --- /dev/null +++ b/internal/ratelimiter/extractor.go @@ -0,0 +1,29 @@ +package ratelimiter + +import ( + "context" + "net" + + "google.golang.org/grpc/peer" +) + +// KeyExtractor определяет стратегию извлечения ключа rate limiting из контекста. +// Возвращает ключ (например, client IP) или пустую строку, если ключ не найден. +// При пустом ключе rate limiting пропускается (fail-open). +type KeyExtractor func(ctx context.Context) string + +// PeerIPExtractor извлекает client IP из peer.FromContext(). +// Это реализация по умолчанию, использующая IP, установленный realip interceptor. +func PeerIPExtractor(ctx context.Context) string { + p, ok := peer.FromContext(ctx) + if !ok || p.Addr == nil { + return "" + } + + host, _, err := net.SplitHostPort(p.Addr.String()) + if err != nil { + return p.Addr.String() + } + + return host +} diff --git a/internal/ratelimiter/ratelimiter.go b/internal/ratelimiter/ratelimiter.go new file mode 100644 index 0000000..0977d31 --- /dev/null +++ b/internal/ratelimiter/ratelimiter.go @@ -0,0 +1,190 @@ +package ratelimiter + +import ( + "context" + "log/slog" + "strconv" + "sync" + "time" + + "github.com/prometheus/client_golang/prometheus" + "golang.org/x/time/rate" + "google.golang.org/grpc" + "google.golang.org/grpc/codes" + "google.golang.org/grpc/metadata" + "google.golang.org/grpc/status" + + "github.com/easyp-tech/service/internal/license" +) + +// clientBucket хранит rate.Limiter и время последнего обращения. +type clientBucket struct { + limiter *rate.Limiter + lastSeen time.Time + mu sync.Mutex // защищает lastSeen +} + +// RateLimiter реализует ratelimit.Limiter из grpc-ecosystem. +type RateLimiter struct { + cfg Config + gate *license.FeatureGate + keyExtractor KeyExtractor + logger *slog.Logger + buckets sync.Map // map[string]*clientBucket + + // Prometheus metrics + requestsTotal *prometheus.CounterVec // labels: status (allowed/denied), client_ip + activeClients prometheus.Gauge +} + +// New создаёт RateLimiter и регистрирует метрики. +// keyExtractor определяет стратегию извлечения ключа. Если nil — используется PeerIPExtractor. +func New( + cfg Config, + gate *license.FeatureGate, + keyExtractor KeyExtractor, + logger *slog.Logger, + reg *prometheus.Registry, +) *RateLimiter { + if keyExtractor == nil { + keyExtractor = PeerIPExtractor + } + + requestsTotal := prometheus.NewCounterVec( + prometheus.CounterOpts{ + Name: "easyp_rate_limit_requests_total", + Help: "Total number of requests processed by rate limiter", + }, + []string{"status", "client_ip"}, + ) + + activeClients := prometheus.NewGauge( + prometheus.GaugeOpts{ + Name: "easyp_rate_limit_active_clients", + Help: "Current number of active client buckets", + }, + ) + + if reg != nil { + reg.MustRegister(requestsTotal) + reg.MustRegister(activeClients) + } + + return &RateLimiter{ + cfg: cfg, + gate: gate, + keyExtractor: keyExtractor, + logger: logger, + requestsTotal: requestsTotal, + activeClients: activeClients, + } +} + +// Limit реализует ratelimit.Limiter. +// Возвращает nil если запрос разрешён, или status.Error(RESOURCE_EXHAUSTED) если отклонён. +func (rl *RateLimiter) Limit(ctx context.Context) error { + // Step 1: Check FeatureGate — if nil (no license system), treat as enabled. + if rl.gate != nil && !rl.gate.Enabled(license.FeatureRateLimiting) { + return nil + } + + // Step 2: Extract key via keyExtractor — empty string means fail-open. + key := rl.keyExtractor(ctx) + if key == "" { + return nil + } + + // Step 3: Get or create clientBucket for this key. + newBucket := &clientBucket{ + limiter: rate.NewLimiter(rate.Limit(rl.cfg.RequestsPerSecond), rl.cfg.Burst), + lastSeen: time.Now(), + } + val, loaded := rl.buckets.LoadOrStore(key, newBucket) + bucket := val.(*clientBucket) + + if !loaded { + // New bucket created — update active clients gauge. + rl.activeClients.Inc() + } + + // Step 4: Update lastSeen. + bucket.mu.Lock() + bucket.lastSeen = time.Now() + bucket.mu.Unlock() + + // Step 5: Check token availability. + allowed := bucket.limiter.Allow() + + // Step 6: Prepare rate limit headers. + remaining := int(bucket.limiter.Tokens()) + if remaining < 0 { + remaining = 0 + } + + tokensNeeded := float64(rl.cfg.Burst) - bucket.limiter.Tokens() + if tokensNeeded < 0 { + tokensNeeded = 0 + } + resetDuration := time.Duration(tokensNeeded / rl.cfg.RequestsPerSecond * float64(time.Second)) + resetTime := time.Now().Add(resetDuration).Unix() + + md := metadata.Pairs( + "x-ratelimit-limit", strconv.Itoa(rl.cfg.Burst), + "x-ratelimit-remaining", strconv.Itoa(remaining), + "x-ratelimit-reset", strconv.FormatInt(resetTime, 10), + ) + + // Step 7: Allowed — set headers, increment metric, return nil. + if allowed { + _ = grpc.SetHeader(ctx, md) + rl.requestsTotal.WithLabelValues("allowed", key).Inc() + return nil + } + + // Step 8: Denied — set trailing metadata, increment metric, log, return error. + _ = grpc.SetTrailer(ctx, md) + rl.requestsTotal.WithLabelValues("denied", key).Inc() + rl.logger.Warn("rate limit exceeded", + slog.String("client_ip", key), + slog.Int64("reset_time", resetTime), + ) + + return status.Errorf(codes.ResourceExhausted, "rate limit exceeded for %s, retry after %d", key, resetTime) +} + +// StartCleanup запускает фоновую горутину очистки stale buckets. +// Останавливается при отмене контекста. +func (rl *RateLimiter) StartCleanup(ctx context.Context) { + go func() { + ticker := time.NewTicker(rl.cfg.CleanupInterval) + defer ticker.Stop() + + for { + select { + case <-ctx.Done(): + return + case <-ticker.C: + rl.cleanup() + } + } + }() +} + +// cleanup удаляет stale buckets и обновляет метрику activeClients. +func (rl *RateLimiter) cleanup() { + var active int + rl.buckets.Range(func(key, value any) bool { + bucket := value.(*clientBucket) + bucket.mu.Lock() + lastSeen := bucket.lastSeen + bucket.mu.Unlock() + + if time.Since(lastSeen) > rl.cfg.CleanupInterval { + rl.buckets.Delete(key) + } else { + active++ + } + return true + }) + rl.activeClients.Set(float64(active)) +}