Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions apps/api/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,10 @@
"@octokit/rest": "catalog:",
"@octokit/webhooks": "^13.7.4",
"@octokit/webhooks-types": "^7.5.1",
"@opentelemetry/api": "^1.9.0",
"@opentelemetry/exporter-metrics-otlp-http": "^0.55.0",
"@opentelemetry/resources": "^1.28.0",
"@opentelemetry/sdk-metrics": "^1.28.0",
"@t3-oss/env-core": "catalog:",
"@trpc/server": "11.0.0-rc.364",
"bcryptjs": "^2.4.3",
Expand Down
3 changes: 3 additions & 0 deletions apps/api/src/config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,9 @@ export const env = createEnv({
BASE_URL: z.string().optional(),

OTEL_SAMPLER_RATIO: z.number().optional().default(1),
OTEL_EXPORTER_OTLP_ENDPOINT: z.string().optional(),
OTEL_EXPORTER_OTLP_METRICS_ENDPOINT: z.string().optional(),
OTEL_SERVICE_NAME: z.string().default("ctrlplane-api"),

AZURE_APP_CLIENT_ID: z.string().optional(),

Expand Down
2 changes: 2 additions & 0 deletions apps/api/src/index.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import "@/instrumentation.js";

import { env } from "@/config.js";
import { app } from "@/server.js";

Expand Down
49 changes: 49 additions & 0 deletions apps/api/src/instrumentation.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
import { env } from "@/config.js";
import { metrics } from "@opentelemetry/api";
import { OTLPMetricExporter } from "@opentelemetry/exporter-metrics-otlp-http";
import { Resource } from "@opentelemetry/resources";
import {
MeterProvider,
PeriodicExportingMetricReader,
} from "@opentelemetry/sdk-metrics";

import { logger } from "@ctrlplane/logger";

const stripTrailingSlash = (s: string) => s.replace(/\/$/, "");
const appendMetricsPath = (base: string) =>
`${stripTrailingSlash(base)}/v1/metrics`;

const metricsUrl =
env.OTEL_EXPORTER_OTLP_METRICS_ENDPOINT ??
(env.OTEL_EXPORTER_OTLP_ENDPOINT &&
appendMetricsPath(env.OTEL_EXPORTER_OTLP_ENDPOINT));

if (metricsUrl) {
const meterProvider = new MeterProvider({
resource: new Resource({ "service.name": env.OTEL_SERVICE_NAME }),
readers: [
new PeriodicExportingMetricReader({
exporter: new OTLPMetricExporter({ url: metricsUrl }),
exportIntervalMillis: 30_000,
}),
],
});

metrics.setGlobalMeterProvider(meterProvider);

for (const signal of ["SIGINT", "SIGTERM"] as const) {
process.on(signal, () => {
meterProvider
.shutdown()
.catch((err) =>
logger.error("meterProvider.shutdown error", { err }),
);
});
}

logger.info(`OTel metrics enabled (endpoint: ${metricsUrl})`);
} else {
logger.info(
"OTel metrics disabled (set OTEL_EXPORTER_OTLP_ENDPOINT or OTEL_EXPORTER_OTLP_METRICS_ENDPOINT to enable)",
);
}
78 changes: 78 additions & 0 deletions apps/api/src/middleware/metrics.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
import type { Counter } from "@opentelemetry/api";
import type { Request, RequestHandler } from "express";
import { metrics } from "@opentelemetry/api";

// Clients in this set are tagged with their version (e.g. `ctrlc/1.2.3`);
// everyone else is tagged with just the client name to keep cardinality bounded.
const VERSIONED_CLIENT_ALLOWLIST = new Set(["ctrlc"]);

const BROWSER_MATCHERS: Array<{ test: (ua: string) => boolean; name: string }> =
[
// Order matters: Edge / Opera UAs include "Chrome/" too, so they go first.
{ test: (ua) => ua.includes("Edg/") || ua.includes("Edge/"), name: "Edge" },
{
test: (ua) => ua.includes("OPR/") || ua.includes("Opera/"),
name: "Opera",
},
{ test: (ua) => ua.includes("Chrome/"), name: "Chrome" },
{ test: (ua) => ua.includes("Firefox/"), name: "Firefox" },
// Safari last: every WebKit-based browser includes "Safari/" in its UA.
{
test: (ua) => ua.startsWith("Mozilla/") && ua.includes("Safari/"),
name: "Safari",
},
];

export const simplifyUserAgent = (
userAgent: string | string[] | undefined | null,
): string => {
const ua = Array.isArray(userAgent) ? userAgent[0] : userAgent;
if (!ua || ua.trim() === "") return "unknown";
const trimmed = ua.trim();

for (const { test, name } of BROWSER_MATCHERS) {
if (test(trimmed)) return name;
}

// Generic "Mozilla/..." UA we don't recognize — still a browser-shape.
if (trimmed.startsWith("Mozilla/")) return "browser-other";

// Non-browser clients usually look like "name/version ..." — take the first
// product token, then split name/version.
const [rawName, version] = trimmed.split(/\s+/, 1)[0]!.split("/", 2);
const name = rawName?.toLowerCase();
if (!name || !/^[a-z][a-z0-9._-]{0,63}$/.test(name)) return "other";
return VERSIONED_CLIENT_ALLOWLIST.has(name) && version
? `${name}/${version}`
: name;
Comment on lines +45 to +47
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major | ⚡ Quick win

Unvalidated version string will cause metric-label cardinality explosion.

The raw version substring is passed directly to the OTel counter label for allowlisted clients:

return VERSIONED_CLIENT_ALLOWLIST.has(name) && version
  ? `${name}/${version}`
  : name;

Any client sending User-Agent: ctrlc/<UUID>, ctrlc/<unix-timestamp>, or any high-cardinality string will generate a unique metric label combination per request. Most time-series backends (Prometheus, Thanos, Victoria Metrics) have hard cardinality limits; exceeding them causes OOM crashes or label truncation silently.

Validate version against a bounded pattern (e.g. semver-like) before using it as a label:

🛡️ Proposed fix
+ const VALID_VERSION_RE = /^\d{1,4}\.\d{1,4}(?:\.\d{1,4})?(?:[-+][a-zA-Z0-9._-]{0,32})?$/;

  ...

- return VERSIONED_CLIENT_ALLOWLIST.has(name) && version
-   ? `${name}/${version}`
-   : name;
+ if (VERSIONED_CLIENT_ALLOWLIST.has(name) && version && VALID_VERSION_RE.test(version))
+   return `${name}/${version}`;
+ return name;
🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@apps/api/src/middleware/metrics.ts` around lines 45 - 47, The metric label
construction that returns `${name}/${version}` for members of
VERSIONED_CLIENT_ALLOWLIST must validate the version string to prevent
high-cardinality labels; update the code around the label-building logic (the
expression that uses VERSIONED_CLIENT_ALLOWLIST.has(name) and `version`) to only
include the version when it matches a bounded pattern (e.g. semver-like regex or
a strict allowed-set) and otherwise fall back to a stable token such as just
`name` or `name/unknown`; ensure the validation is implemented as a small helper
(e.g. isValidClientVersion(version)) and used before interpolating the version
into the metric label.

};

const resolveRouteTemplate = (req: Request): string => {
const template = req.route?.path;
if (template == null) return "unmatched";
return `${req.baseUrl}${typeof template === "string" ? template : String(template)}`;
};

let counter: Counter | null = null;
const getCounter = (): Counter => {
if (counter) return counter;
counter = metrics
.getMeter("ctrlplane-api")
.createCounter("http.server.requests_by_client", {
description:
"Number of HTTP requests received, labeled by simplified client (User-Agent)",
});
return counter;
};
Comment thread
dacbd marked this conversation as resolved.

export const metricsMiddleware: RequestHandler = (req, res, next) => {
res.on("finish", () => {
getCounter().add(1, {
client: simplifyUserAgent(req.headers["user-agent"]),
method: req.method,
status_code: String(res.statusCode),
route: resolveRouteTemplate(req),
});
});
next();
};
2 changes: 2 additions & 0 deletions apps/api/src/server.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ import { dirname, join } from "path";
import { fileURLToPath } from "url";
import { requireAuth } from "@/middleware/auth.js";
import { errorHandler } from "@/middleware/error-handler.js";
import { metricsMiddleware } from "@/middleware/metrics.js";
import { createV1Router } from "@/routes/index.js";
import * as trpcExpress from "@trpc/server/adapters/express";
import { toNodeHandler } from "better-auth/node";
Expand Down Expand Up @@ -66,6 +67,7 @@ const app = express()
.use(express.urlencoded({ extended: true, limit: "100mb" }))
.use(express.json({ limit: "100mb" }))
.use(cookieParser())
.use(metricsMiddleware)
.use(loggerMiddleware)

// Health check endpoint (before OpenAPI validator)
Expand Down
9 changes: 8 additions & 1 deletion packages/workspace-engine-sdk/eslint.config.js
Original file line number Diff line number Diff line change
@@ -1,2 +1,9 @@
import baseConfig from "@ctrlplane/eslint-config/base";

/** @type {import('typescript-eslint').Config} */
export default [];
export default [
{
ignores: ["dist/**", "node_modules/**", "src/schema.ts"],
},
...baseConfig,
];
Loading
Loading