diff --git a/cmd/do-agent/aggregation.go b/cmd/do-agent/aggregation.go index 9548dcd..38130aa 100644 --- a/cmd/do-agent/aggregation.go +++ b/cmd/do-agent/aggregation.go @@ -237,3 +237,25 @@ var diAggregationSpec = map[string][]string{ "gradient_infra_di_vllm:request_time_per_output_token_seconds_bucket": diLabelsToDrop, "gradient_infra_di_vllm:time_to_first_token_seconds_bucket": diLabelsToDrop, } + +// SI metrics: drop high-cardinality labels we don't want to keep. +var siLabelsToDrop = []string{ + "container", + "job", + "otel_scope_name", + "otel_scope_schema_url", + "otel_scope_version", + "resource_uuid", +} + +var siAggregationSpec = map[string][]string{ + "gen_ai_platform_inference_proxy_http_requests_total": siLabelsToDrop, + "gen_ai_platform_inference_proxy_http_request_duration_seconds": siLabelsToDrop, + "gen_ai_platform_inference_proxy_token_throughput": siLabelsToDrop, + "gen_ai_platform_inference_proxy_rate_limit_exceeded": siLabelsToDrop, + "gen_ai_platform_inference_proxy_cache_hit": siLabelsToDrop, + "gen_ai_platform_inference_proxy_cache_miss": siLabelsToDrop, + "gen_ai_platform_inference_proxy_inference_client_ttft_duration": siLabelsToDrop, + "gen_ai_platform_inference_proxy_inference_client_itl_duration_seconds": siLabelsToDrop, + "gen_ai_platform_inference_proxy_inference_client_requests_total": siLabelsToDrop, +} diff --git a/cmd/do-agent/config.go b/cmd/do-agent/config.go index c3c8e2c..2f7eae6 100644 --- a/cmd/do-agent/config.go +++ b/cmd/do-agent/config.go @@ -47,6 +47,7 @@ var ( defaultMaxMetricLength int promAddr string diMetricsPath string + siMetricsPath string gpuMetricsPath string topK int scrapeTimeout time.Duration @@ -131,6 +132,9 @@ func init() { kingpin.Flag("di-metrics-path", "enable Dedicated Inference (DI) metrics collection from a prometheus endpoint"). StringVar(&config.diMetricsPath) + kingpin.Flag("si-metrics-path", "enable Serverless Inference (SI) metrics collection from a prometheus endpoint"). + StringVar(&config.siMetricsPath) + kingpin.Flag("web.listen", "enable a local endpoint for scrapeable prometheus metrics as well"). Default("false"). BoolVar(&config.webListen) @@ -267,6 +271,11 @@ func initAggregatorSpecs() map[string][]string { aggregateSpecs[k] = append(aggregateSpecs[k], v...) } } + if config.siMetricsPath != "" { + for k, v := range siAggregationSpec { + aggregateSpecs[k] = append(aggregateSpecs[k], v...) + } + } return aggregateSpecs } @@ -350,6 +359,15 @@ func initCollectors() []prometheus.Collector { } } + if config.siMetricsPath != "" { + si, err := collector.NewScraper("si", config.siMetricsPath, nil, siWhitelist, collector.WithTimeout(config.scrapeTimeout)) + if err != nil { + log.Error("Failed to initialize SI metrics collector: %+v", err) + } else { + cols = append(cols, si) + } + } + if config.gpuMetricsPath != "" { gpu, err := collector.NewScraper("gpu", config.gpuMetricsPath, nil, gpuWhitelist, collector.WithTimeout(config.scrapeTimeout)) if err != nil { diff --git a/cmd/do-agent/whitelist.go b/cmd/do-agent/whitelist.go index 0be6eee..cd133ba 100644 --- a/cmd/do-agent/whitelist.go +++ b/cmd/do-agent/whitelist.go @@ -274,3 +274,15 @@ var diWhitelist = map[string]bool{ "gradient_infra_di_vllm:request_time_per_output_token_seconds_bucket": true, "gradient_infra_di_vllm:time_to_first_token_seconds_bucket": true, } + +var siWhitelist = map[string]bool{ + "gen_ai_platform_inference_proxy_http_requests_total": true, + "gen_ai_platform_inference_proxy_http_request_duration_seconds": true, + "gen_ai_platform_inference_proxy_token_throughput": true, + "gen_ai_platform_inference_proxy_rate_limit_exceeded": true, + "gen_ai_platform_inference_proxy_cache_hit": true, + "gen_ai_platform_inference_proxy_cache_miss": true, + "gen_ai_platform_inference_proxy_inference_client_ttft_duration": true, + "gen_ai_platform_inference_proxy_inference_client_itl_duration_seconds": true, + "gen_ai_platform_inference_proxy_inference_client_requests_total": true, +}