diff --git a/config/core/configmaps/observability.yaml b/config/core/configmaps/observability.yaml index 41f3bee1fd6..417bf4bb788 100644 --- a/config/core/configmaps/observability.yaml +++ b/config/core/configmaps/observability.yaml @@ -23,7 +23,7 @@ metadata: app.kubernetes.io/version: devel app.kubernetes.io/name: knative-eventing annotations: - knative.dev/example-checksum: "0270bb17" + knative.dev/example-checksum: "afa5507a" data: _example: | ################################ @@ -58,6 +58,11 @@ data: # If a zero or negative value is passed the default reporting OTel period is used (60 secs). metrics-export-interval: 60s + # metrics-attributes-deny is a comma-separated list of metric attribute keys to filter + # out from all metrics. This can help prevent OOM issues caused by unbounded + # metric cardinality in production (e.g. cloudevents.type, messaging.destination.name). + metrics-attributes-deny: "" + # sink-event-error-reporting.enable whether the adapter reports a kube event to the CRD indicating # a failure to send a cloud event to the sink. sink-event-error-reporting.enable: "false" diff --git a/pkg/observability/config.go b/pkg/observability/config.go index 9f0673caf06..ce1dcad1581 100644 --- a/pkg/observability/config.go +++ b/pkg/observability/config.go @@ -74,7 +74,9 @@ func NewFromMap(m map[string]string) (*Config, error) { c.BaseConfig.Metrics.Endpoint = fmt.Sprintf(":%d", DefaultMetricsPort) } - err := configmap.Parse(m, configmap.As(EnableSinkEventErrorReportingKey, &c.EnableSinkEventErrorReporting)) + err := configmap.Parse(m, + configmap.As(EnableSinkEventErrorReportingKey, &c.EnableSinkEventErrorReporting), + ) if err != nil { fmt.Printf("failed to parse enable-sink-error-reporting: %s\n", err.Error()) return c, err diff --git a/pkg/observability/config_test.go b/pkg/observability/config_test.go index b67d5e14eed..3b4040c56f8 100644 --- a/pkg/observability/config_test.go +++ b/pkg/observability/config_test.go @@ -23,8 +23,11 @@ import ( ) func TestNewFromMap(t *testing.T) { - configWithOverride := DefaultConfig() - configWithOverride.EnableSinkEventErrorReporting = true + configWithSinkEventErrorReporting := DefaultConfig() + configWithSinkEventErrorReporting.EnableSinkEventErrorReporting = true + + configWithDenyList := DefaultConfig() + configWithDenyList.Metrics.AttributesDeny = "cloudevents.type, messaging.destination.name" testCases := map[string]struct { m map[string]string @@ -39,7 +42,13 @@ func TestNewFromMap(t *testing.T) { m: map[string]string{ EnableSinkEventErrorReportingKey: "true", }, - want: configWithOverride, + want: configWithSinkEventErrorReporting, + }, + "metric attributes deny list": { + m: map[string]string{ + "metrics-attributes-deny": "cloudevents.type, messaging.destination.name", + }, + want: configWithDenyList, }, "valid keys, invalid sink event error reporting value": { m: map[string]string{ diff --git a/pkg/observability/otel/otel.go b/pkg/observability/otel/otel.go index cfcdc684d6e..01fbc66a725 100644 --- a/pkg/observability/otel/otel.go +++ b/pkg/observability/otel/otel.go @@ -60,10 +60,15 @@ func SetupObservabilityOrDie( otelResource := resource.Default(component) + meterOpts := []metric.Option{metric.WithResource(otelResource)} + if denyList := cfg.Metrics.AttributesDenyList(); len(denyList) > 0 { + meterOpts = append(meterOpts, metric.WithView(metrics.MetricAttributesDenyFilter(denyList))) + } + meterProvider, err := metrics.NewMeterProvider( ctx, cfg.Metrics, - metric.WithResource(otelResource), + meterOpts..., ) if err != nil { logger.Fatalw("failed to set up meter provider", zap.Error(err)) diff --git a/pkg/observability/otel/otel_test.go b/pkg/observability/otel/otel_test.go new file mode 100644 index 00000000000..95b26555d90 --- /dev/null +++ b/pkg/observability/otel/otel_test.go @@ -0,0 +1,59 @@ +/* +Copyright 2026 The Knative Authors + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package otel + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/sdk/metric" + + "knative.dev/pkg/observability/metrics" +) + +func TestMetricAttributesDenyFilter(t *testing.T) { + view := metrics.MetricAttributesDenyFilter([]string{"cloudevents.type", "messaging.destination.name"}) + + stream, ok := view(metric.Instrument{Name: "kn.eventing.dispatch.duration"}) + assert.True(t, ok, "view should match kn.eventing.* instruments") + assert.NotNil(t, stream.AttributeFilter) + + denied := []attribute.KeyValue{ + attribute.String("cloudevents.type", "com.example.event"), + attribute.String("messaging.destination.name", "my-destination"), + } + for _, kv := range denied { + assert.False(t, stream.AttributeFilter(kv), "attribute %s should be denied", kv.Key) + } + + allowed := []attribute.KeyValue{ + attribute.String("messaging.system", "knative"), + attribute.Int("http.response.status_code", 200), + } + for _, kv := range allowed { + assert.True(t, stream.AttributeFilter(kv), "attribute %s should be allowed", kv.Key) + } +} + +func TestMetricAttributesDenyFilterMatchesAllInstruments(t *testing.T) { + view := metrics.MetricAttributesDenyFilter([]string{"cloudevents.type"}) + + stream, ok := view(metric.Instrument{Name: "http.server.request.duration"}) + assert.True(t, ok, "view should match all instruments") + assert.NotNil(t, stream.AttributeFilter) +}