From 515532045dab19728638966165ac1a5a7ce760ee Mon Sep 17 00:00:00 2001 From: Guillaume Pagnoux Date: Tue, 17 Feb 2026 16:07:33 +0100 Subject: [PATCH 1/6] discovery: add system-probe-lite support --- api/datadoghq/v2alpha1/datadogagent_types.go | 8 + .../v2alpha1/zz_generated.deepcopy.go | 5 + .../datadoghq.com_datadogagentinternals.yaml | 16 ++ ...hq.com_datadogagentinternals_v1alpha1.json | 8 + .../datadoghq.com_datadogagentprofiles.yaml | 8 + ...ghq.com_datadogagentprofiles_v1alpha1.json | 4 + .../bases/v1/datadoghq.com_datadogagents.yaml | 16 ++ .../datadoghq.com_datadogagents_v2alpha1.json | 8 + docs/configuration.v2alpha1.md | 1 + docs/configuration_public.md | 3 + .../datadogagent/controller_v2_test.go | 8 +- .../defaults/datadogagent_default.go | 4 +- .../defaults/datadogagent_default_test.go | 48 ++-- .../feature/servicediscovery/feature.go | 81 ++++++- .../feature/servicediscovery/feature_test.go | 208 +++++++++++++++++- 15 files changed, 391 insertions(+), 35 deletions(-) diff --git a/api/datadoghq/v2alpha1/datadogagent_types.go b/api/datadoghq/v2alpha1/datadogagent_types.go index caa0f99a9a..e505f7f638 100644 --- a/api/datadoghq/v2alpha1/datadogagent_types.go +++ b/api/datadoghq/v2alpha1/datadogagent_types.go @@ -647,6 +647,14 @@ type ServiceDiscoveryFeatureConfig struct { // +optional Enabled *bool `json:"enabled,omitempty"` + // EnabledByDefault is set by the operator when it enables this feature via default configuration, + // as opposed to an explicit user choice. When true, the system-probe binary is not used as a + // fallback if system-probe-lite is unavailable — the container falls back to sleep infinity + // instead, to avoid unexpected resource usage on older agent images. + // This field is managed by the operator and should not be set by users. + // +optional + EnabledByDefault *bool `json:"enabledByDefault,omitempty"` + // Enables the service discovery network stats collection. // Default: true // +optional diff --git a/api/datadoghq/v2alpha1/zz_generated.deepcopy.go b/api/datadoghq/v2alpha1/zz_generated.deepcopy.go index 4733c0d92d..0962f85f11 100644 --- a/api/datadoghq/v2alpha1/zz_generated.deepcopy.go +++ b/api/datadoghq/v2alpha1/zz_generated.deepcopy.go @@ -3453,6 +3453,11 @@ func (in *ServiceDiscoveryFeatureConfig) DeepCopyInto(out *ServiceDiscoveryFeatu *out = new(bool) **out = **in } + if in.EnabledByDefault != nil { + in, out := &in.EnabledByDefault, &out.EnabledByDefault + *out = new(bool) + **out = **in + } if in.NetworkStats != nil { in, out := &in.NetworkStats, &out.NetworkStats *out = new(ServiceDiscoveryNetworkStatsConfig) diff --git a/config/crd/bases/v1/datadoghq.com_datadogagentinternals.yaml b/config/crd/bases/v1/datadoghq.com_datadogagentinternals.yaml index e58c5325d9..bf0ec26be1 100644 --- a/config/crd/bases/v1/datadoghq.com_datadogagentinternals.yaml +++ b/config/crd/bases/v1/datadoghq.com_datadogagentinternals.yaml @@ -2488,6 +2488,14 @@ spec: Enables the service discovery check. Default: false type: boolean + enabledByDefault: + description: |- + EnabledByDefault is set by the operator when it enables this feature via default configuration, + as opposed to an explicit user choice. When true, the system-probe binary is not used as a + fallback if system-probe-lite is unavailable — the container falls back to sleep infinity + instead, to avoid unexpected resource usage on older agent images. + This field is managed by the operator and should not be set by users. + type: boolean networkStats: description: |- Enables the service discovery network stats collection. @@ -10909,6 +10917,14 @@ spec: Enables the service discovery check. Default: false type: boolean + enabledByDefault: + description: |- + EnabledByDefault is set by the operator when it enables this feature via default configuration, + as opposed to an explicit user choice. When true, the system-probe binary is not used as a + fallback if system-probe-lite is unavailable — the container falls back to sleep infinity + instead, to avoid unexpected resource usage on older agent images. + This field is managed by the operator and should not be set by users. + type: boolean networkStats: description: |- Enables the service discovery network stats collection. diff --git a/config/crd/bases/v1/datadoghq.com_datadogagentinternals_v1alpha1.json b/config/crd/bases/v1/datadoghq.com_datadogagentinternals_v1alpha1.json index ea311f2d5e..c4bb96e3f9 100644 --- a/config/crd/bases/v1/datadoghq.com_datadogagentinternals_v1alpha1.json +++ b/config/crd/bases/v1/datadoghq.com_datadogagentinternals_v1alpha1.json @@ -2588,6 +2588,10 @@ "description": "Enables the service discovery check.\nDefault: false", "type": "boolean" }, + "enabledByDefault": { + "description": "EnabledByDefault is set by the operator when it enables this feature via default configuration,\nas opposed to an explicit user choice. When true, the system-probe binary is not used as a\nfallback if system-probe-lite is unavailable — the container falls back to sleep infinity\ninstead, to avoid unexpected resource usage on older agent images.\nThis field is managed by the operator and should not be set by users.", + "type": "boolean" + }, "networkStats": { "additionalProperties": false, "description": "Enables the service discovery network stats collection.\nDefault: true", @@ -10742,6 +10746,10 @@ "description": "Enables the service discovery check.\nDefault: false", "type": "boolean" }, + "enabledByDefault": { + "description": "EnabledByDefault is set by the operator when it enables this feature via default configuration,\nas opposed to an explicit user choice. When true, the system-probe binary is not used as a\nfallback if system-probe-lite is unavailable — the container falls back to sleep infinity\ninstead, to avoid unexpected resource usage on older agent images.\nThis field is managed by the operator and should not be set by users.", + "type": "boolean" + }, "networkStats": { "additionalProperties": false, "description": "Enables the service discovery network stats collection.\nDefault: true", diff --git a/config/crd/bases/v1/datadoghq.com_datadogagentprofiles.yaml b/config/crd/bases/v1/datadoghq.com_datadogagentprofiles.yaml index d6908b943c..fbaa47722b 100644 --- a/config/crd/bases/v1/datadoghq.com_datadogagentprofiles.yaml +++ b/config/crd/bases/v1/datadoghq.com_datadogagentprofiles.yaml @@ -2488,6 +2488,14 @@ spec: Enables the service discovery check. Default: false type: boolean + enabledByDefault: + description: |- + EnabledByDefault is set by the operator when it enables this feature via default configuration, + as opposed to an explicit user choice. When true, the system-probe binary is not used as a + fallback if system-probe-lite is unavailable — the container falls back to sleep infinity + instead, to avoid unexpected resource usage on older agent images. + This field is managed by the operator and should not be set by users. + type: boolean networkStats: description: |- Enables the service discovery network stats collection. diff --git a/config/crd/bases/v1/datadoghq.com_datadogagentprofiles_v1alpha1.json b/config/crd/bases/v1/datadoghq.com_datadogagentprofiles_v1alpha1.json index ad4153ce5a..24085c65f9 100644 --- a/config/crd/bases/v1/datadoghq.com_datadogagentprofiles_v1alpha1.json +++ b/config/crd/bases/v1/datadoghq.com_datadogagentprofiles_v1alpha1.json @@ -2592,6 +2592,10 @@ "description": "Enables the service discovery check.\nDefault: false", "type": "boolean" }, + "enabledByDefault": { + "description": "EnabledByDefault is set by the operator when it enables this feature via default configuration,\nas opposed to an explicit user choice. When true, the system-probe binary is not used as a\nfallback if system-probe-lite is unavailable — the container falls back to sleep infinity\ninstead, to avoid unexpected resource usage on older agent images.\nThis field is managed by the operator and should not be set by users.", + "type": "boolean" + }, "networkStats": { "additionalProperties": false, "description": "Enables the service discovery network stats collection.\nDefault: true", diff --git a/config/crd/bases/v1/datadoghq.com_datadogagents.yaml b/config/crd/bases/v1/datadoghq.com_datadogagents.yaml index 0325733812..d52a723ef5 100644 --- a/config/crd/bases/v1/datadoghq.com_datadogagents.yaml +++ b/config/crd/bases/v1/datadoghq.com_datadogagents.yaml @@ -2488,6 +2488,14 @@ spec: Enables the service discovery check. Default: false type: boolean + enabledByDefault: + description: |- + EnabledByDefault is set by the operator when it enables this feature via default configuration, + as opposed to an explicit user choice. When true, the system-probe binary is not used as a + fallback if system-probe-lite is unavailable — the container falls back to sleep infinity + instead, to avoid unexpected resource usage on older agent images. + This field is managed by the operator and should not be set by users. + type: boolean networkStats: description: |- Enables the service discovery network stats collection. @@ -10959,6 +10967,14 @@ spec: Enables the service discovery check. Default: false type: boolean + enabledByDefault: + description: |- + EnabledByDefault is set by the operator when it enables this feature via default configuration, + as opposed to an explicit user choice. When true, the system-probe binary is not used as a + fallback if system-probe-lite is unavailable — the container falls back to sleep infinity + instead, to avoid unexpected resource usage on older agent images. + This field is managed by the operator and should not be set by users. + type: boolean networkStats: description: |- Enables the service discovery network stats collection. diff --git a/config/crd/bases/v1/datadoghq.com_datadogagents_v2alpha1.json b/config/crd/bases/v1/datadoghq.com_datadogagents_v2alpha1.json index a1e1934161..db93361c7f 100644 --- a/config/crd/bases/v1/datadoghq.com_datadogagents_v2alpha1.json +++ b/config/crd/bases/v1/datadoghq.com_datadogagents_v2alpha1.json @@ -2588,6 +2588,10 @@ "description": "Enables the service discovery check.\nDefault: false", "type": "boolean" }, + "enabledByDefault": { + "description": "EnabledByDefault is set by the operator when it enables this feature via default configuration,\nas opposed to an explicit user choice. When true, the system-probe binary is not used as a\nfallback if system-probe-lite is unavailable — the container falls back to sleep infinity\ninstead, to avoid unexpected resource usage on older agent images.\nThis field is managed by the operator and should not be set by users.", + "type": "boolean" + }, "networkStats": { "additionalProperties": false, "description": "Enables the service discovery network stats collection.\nDefault: true", @@ -10807,6 +10811,10 @@ "description": "Enables the service discovery check.\nDefault: false", "type": "boolean" }, + "enabledByDefault": { + "description": "EnabledByDefault is set by the operator when it enables this feature via default configuration,\nas opposed to an explicit user choice. When true, the system-probe binary is not used as a\nfallback if system-probe-lite is unavailable — the container falls back to sleep infinity\ninstead, to avoid unexpected resource usage on older agent images.\nThis field is managed by the operator and should not be set by users.", + "type": "boolean" + }, "networkStats": { "additionalProperties": false, "description": "Enables the service discovery network stats collection.\nDefault: true", diff --git a/docs/configuration.v2alpha1.md b/docs/configuration.v2alpha1.md index 07cc8e4a5a..9dab0ca530 100644 --- a/docs/configuration.v2alpha1.md +++ b/docs/configuration.v2alpha1.md @@ -200,6 +200,7 @@ spec: | features.sbom.host.analyzers | To use for SBOM collection. | | features.sbom.host.enabled | Enable this option to activate SBOM collection. Default: false | | features.serviceDiscovery.enabled | Enables the service discovery check. Default: false | +| features.serviceDiscovery.enabledByDefault | EnabledByDefault is set by the operator when it enables this feature via default configuration, as opposed to an explicit user choice. When true, the system-probe binary is not used as a fallback if system-probe-lite is unavailable — the container falls back to sleep infinity instead, to avoid unexpected resource usage on older agent images. This field is managed by the operator and should not be set by users. | | features.serviceDiscovery.networkStats.enabled | Enables the Service Discovery Network Stats feature. Default: true | | features.tcpQueueLength.enabled | Enables the TCP queue length eBPF-based check. Default: false | | features.usm.enabled | Enables Universal Service Monitoring. Default: false | diff --git a/docs/configuration_public.md b/docs/configuration_public.md index c014206974..d4832b8f89 100644 --- a/docs/configuration_public.md +++ b/docs/configuration_public.md @@ -384,6 +384,9 @@ spec: `features.serviceDiscovery.enabled` : Enables the service discovery check. Default: false +`features.serviceDiscovery.enabledByDefault` +: EnabledByDefault is set by the operator when it enables this feature via default configuration, as opposed to an explicit user choice. When true, the system-probe binary is not used as a fallback if system-probe-lite is unavailable — the container falls back to sleep infinity instead, to avoid unexpected resource usage on older agent images. This field is managed by the operator and should not be set by users. + `features.serviceDiscovery.networkStats.enabled` : Enables the Service Discovery Network Stats feature. Default: true diff --git a/internal/controller/datadogagent/controller_v2_test.go b/internal/controller/datadogagent/controller_v2_test.go index af15dbfdf7..b23b5f6af1 100644 --- a/internal/controller/datadogagent/controller_v2_test.go +++ b/internal/controller/datadogagent/controller_v2_test.go @@ -1752,7 +1752,7 @@ func Test_DDAI_ReconcileV3(t *testing.T) { wantFunc: func(t *testing.T, c client.Client) { expectedDDAI := getBaseDDAI(dda) expectedDDAI.Annotations = map[string]string{ - constants.MD5DDAIDeploymentAnnotationKey: "ccac39a3a007bad81d7baf8febc6445f", + constants.MD5DDAIDeploymentAnnotationKey: "62d2822cc8547055dc8e2fca6f222a17", } verifyDDAI(t, c, []v1alpha1.DatadogAgentInternal{expectedDDAI}) @@ -1784,7 +1784,7 @@ func Test_DDAI_ReconcileV3(t *testing.T) { baseDDAI := getBaseDDAI(dda) expectedDDAI := baseDDAI.DeepCopy() expectedDDAI.Annotations = map[string]string{ - constants.MD5DDAIDeploymentAnnotationKey: "f2aa21d0ecced63c091ca2df3d31e451", + constants.MD5DDAIDeploymentAnnotationKey: "c362b9a0aa0e2ad1a1d60f4ee8575c8f", } expectedDDAI.Spec.Features.ClusterChecks.UseClusterChecksRunners = apiutils.NewBoolPointer(true) expectedDDAI.Spec.Global.Credentials = &v2alpha1.DatadogCredentials{ @@ -1860,7 +1860,7 @@ func Test_DDAI_ReconcileV3(t *testing.T) { profileDDAI := getBaseDDAI(dda) profileDDAI.Name = "foo-profile" profileDDAI.Annotations = map[string]string{ - constants.MD5DDAIDeploymentAnnotationKey: "73e0cc1e445001e326507ac23654104e", + constants.MD5DDAIDeploymentAnnotationKey: "2c3c1664f08fb6d6591294f2c878d1dd", } profileDDAI.Labels[constants.ProfileLabelKey] = "foo-profile" profileDDAI.Spec.Override = map[v2alpha1.ComponentName]*v2alpha1.DatadogAgentComponentOverride{ @@ -2094,7 +2094,7 @@ func getBaseDDAI(dda *v2alpha1.DatadogAgent) v1alpha1.DatadogAgentInternal { func getDefaultDDAI(dda *v2alpha1.DatadogAgent) v1alpha1.DatadogAgentInternal { expectedDDAI := getBaseDDAI(dda) expectedDDAI.Annotations = map[string]string{ - constants.MD5DDAIDeploymentAnnotationKey: "f98c0497c66e2747f6d116970ab8f0b1", + constants.MD5DDAIDeploymentAnnotationKey: "7e6c12e645247762609327ab80b63d9e", } expectedDDAI.Spec.Override = map[v2alpha1.ComponentName]*v2alpha1.DatadogAgentComponentOverride{ v2alpha1.NodeAgentComponentName: { diff --git a/internal/controller/datadogagent/defaults/datadogagent_default.go b/internal/controller/datadogagent/defaults/datadogagent_default.go index 3c1324834c..20f7878720 100644 --- a/internal/controller/datadogagent/defaults/datadogagent_default.go +++ b/internal/controller/datadogagent/defaults/datadogagent_default.go @@ -42,6 +42,7 @@ const ( defaultGPUMonitoringEnabled bool = false defaultServiceDiscoveryEnabled bool = false + defaultServiceDiscoveryEnabledByDefault bool = false defaultServiceDiscoveryNetworkStatsEnabled bool = true defaultAPMEnabled bool = true @@ -304,8 +305,9 @@ func defaultFeaturesConfig(ddaSpec *v2alpha1.DatadogAgentSpec) { ddaSpec.Features.ServiceDiscovery = &v2alpha1.ServiceDiscoveryFeatureConfig{} } apiutils.DefaultBooleanIfUnset(&ddaSpec.Features.ServiceDiscovery.Enabled, defaultServiceDiscoveryEnabled) + apiutils.DefaultBooleanIfUnset(&ddaSpec.Features.ServiceDiscovery.EnabledByDefault, defaultServiceDiscoveryEnabledByDefault) - if *ddaSpec.Features.ServiceDiscovery.Enabled { + if *ddaSpec.Features.ServiceDiscovery.Enabled || *ddaSpec.Features.ServiceDiscovery.EnabledByDefault { if ddaSpec.Features.ServiceDiscovery.NetworkStats == nil { ddaSpec.Features.ServiceDiscovery.NetworkStats = &v2alpha1.ServiceDiscoveryNetworkStatsConfig{} } diff --git a/internal/controller/datadogagent/defaults/datadogagent_default_test.go b/internal/controller/datadogagent/defaults/datadogagent_default_test.go index 890ae6adff..ab47528238 100644 --- a/internal/controller/datadogagent/defaults/datadogagent_default_test.go +++ b/internal/controller/datadogagent/defaults/datadogagent_default_test.go @@ -212,7 +212,8 @@ func Test_defaultFeatures(t *testing.T) { Enabled: apiutils.NewBoolPointer(defaultEBPFCheckEnabled), }, ServiceDiscovery: &v2alpha1.ServiceDiscoveryFeatureConfig{ - Enabled: apiutils.NewBoolPointer(defaultServiceDiscoveryEnabled), + Enabled: apiutils.NewBoolPointer(defaultServiceDiscoveryEnabled), + EnabledByDefault: apiutils.NewBoolPointer(defaultServiceDiscoveryEnabledByDefault), }, GPU: &v2alpha1.GPUFeatureConfig{ Enabled: apiutils.NewBoolPointer(defaultGPUMonitoringEnabled), @@ -361,7 +362,8 @@ func Test_defaultFeatures(t *testing.T) { Enabled: apiutils.NewBoolPointer(defaultEBPFCheckEnabled), }, ServiceDiscovery: &v2alpha1.ServiceDiscoveryFeatureConfig{ - Enabled: apiutils.NewBoolPointer(defaultServiceDiscoveryEnabled), + Enabled: apiutils.NewBoolPointer(defaultServiceDiscoveryEnabled), + EnabledByDefault: apiutils.NewBoolPointer(defaultServiceDiscoveryEnabledByDefault), }, GPU: &v2alpha1.GPUFeatureConfig{ Enabled: apiutils.NewBoolPointer(defaultGPUMonitoringEnabled), @@ -460,7 +462,8 @@ func Test_defaultFeatures(t *testing.T) { Enabled: apiutils.NewBoolPointer(defaultEBPFCheckEnabled), }, ServiceDiscovery: &v2alpha1.ServiceDiscoveryFeatureConfig{ - Enabled: apiutils.NewBoolPointer(defaultServiceDiscoveryEnabled), + Enabled: apiutils.NewBoolPointer(defaultServiceDiscoveryEnabled), + EnabledByDefault: apiutils.NewBoolPointer(defaultServiceDiscoveryEnabledByDefault), }, GPU: &v2alpha1.GPUFeatureConfig{ Enabled: apiutils.NewBoolPointer(defaultGPUMonitoringEnabled), @@ -596,7 +599,8 @@ func Test_defaultFeatures(t *testing.T) { Enabled: apiutils.NewBoolPointer(defaultEBPFCheckEnabled), }, ServiceDiscovery: &v2alpha1.ServiceDiscoveryFeatureConfig{ - Enabled: apiutils.NewBoolPointer(defaultServiceDiscoveryEnabled), + Enabled: apiutils.NewBoolPointer(defaultServiceDiscoveryEnabled), + EnabledByDefault: apiutils.NewBoolPointer(defaultServiceDiscoveryEnabledByDefault), }, GPU: &v2alpha1.GPUFeatureConfig{ Enabled: apiutils.NewBoolPointer(defaultGPUMonitoringEnabled), @@ -757,7 +761,8 @@ func Test_defaultFeatures(t *testing.T) { Enabled: apiutils.NewBoolPointer(defaultEBPFCheckEnabled), }, ServiceDiscovery: &v2alpha1.ServiceDiscoveryFeatureConfig{ - Enabled: apiutils.NewBoolPointer(defaultServiceDiscoveryEnabled), + Enabled: apiutils.NewBoolPointer(defaultServiceDiscoveryEnabled), + EnabledByDefault: apiutils.NewBoolPointer(defaultServiceDiscoveryEnabledByDefault), }, GPU: &v2alpha1.GPUFeatureConfig{ Enabled: apiutils.NewBoolPointer(defaultGPUMonitoringEnabled), @@ -913,7 +918,8 @@ func Test_defaultFeatures(t *testing.T) { Enabled: apiutils.NewBoolPointer(defaultEBPFCheckEnabled), }, ServiceDiscovery: &v2alpha1.ServiceDiscoveryFeatureConfig{ - Enabled: apiutils.NewBoolPointer(defaultServiceDiscoveryEnabled), + Enabled: apiutils.NewBoolPointer(defaultServiceDiscoveryEnabled), + EnabledByDefault: apiutils.NewBoolPointer(defaultServiceDiscoveryEnabledByDefault), }, GPU: &v2alpha1.GPUFeatureConfig{ Enabled: apiutils.NewBoolPointer(defaultGPUMonitoringEnabled), @@ -1069,7 +1075,8 @@ func Test_defaultFeatures(t *testing.T) { Enabled: apiutils.NewBoolPointer(defaultEBPFCheckEnabled), }, ServiceDiscovery: &v2alpha1.ServiceDiscoveryFeatureConfig{ - Enabled: apiutils.NewBoolPointer(defaultServiceDiscoveryEnabled), + Enabled: apiutils.NewBoolPointer(defaultServiceDiscoveryEnabled), + EnabledByDefault: apiutils.NewBoolPointer(defaultServiceDiscoveryEnabledByDefault), }, GPU: &v2alpha1.GPUFeatureConfig{ Enabled: apiutils.NewBoolPointer(defaultGPUMonitoringEnabled), @@ -1234,7 +1241,8 @@ func Test_defaultFeatures(t *testing.T) { Enabled: apiutils.NewBoolPointer(defaultEBPFCheckEnabled), }, ServiceDiscovery: &v2alpha1.ServiceDiscoveryFeatureConfig{ - Enabled: apiutils.NewBoolPointer(defaultServiceDiscoveryEnabled), + Enabled: apiutils.NewBoolPointer(defaultServiceDiscoveryEnabled), + EnabledByDefault: apiutils.NewBoolPointer(defaultServiceDiscoveryEnabledByDefault), }, GPU: &v2alpha1.GPUFeatureConfig{ Enabled: apiutils.NewBoolPointer(defaultGPUMonitoringEnabled), @@ -1390,7 +1398,8 @@ func Test_defaultFeatures(t *testing.T) { Enabled: apiutils.NewBoolPointer(defaultEBPFCheckEnabled), }, ServiceDiscovery: &v2alpha1.ServiceDiscoveryFeatureConfig{ - Enabled: apiutils.NewBoolPointer(defaultServiceDiscoveryEnabled), + Enabled: apiutils.NewBoolPointer(defaultServiceDiscoveryEnabled), + EnabledByDefault: apiutils.NewBoolPointer(defaultServiceDiscoveryEnabledByDefault), }, GPU: &v2alpha1.GPUFeatureConfig{ Enabled: apiutils.NewBoolPointer(defaultGPUMonitoringEnabled), @@ -1549,7 +1558,8 @@ func Test_defaultFeatures(t *testing.T) { Enabled: apiutils.NewBoolPointer(defaultEBPFCheckEnabled), }, ServiceDiscovery: &v2alpha1.ServiceDiscoveryFeatureConfig{ - Enabled: apiutils.NewBoolPointer(defaultServiceDiscoveryEnabled), + Enabled: apiutils.NewBoolPointer(defaultServiceDiscoveryEnabled), + EnabledByDefault: apiutils.NewBoolPointer(defaultServiceDiscoveryEnabledByDefault), }, GPU: &v2alpha1.GPUFeatureConfig{ Enabled: apiutils.NewBoolPointer(defaultGPUMonitoringEnabled), @@ -1751,7 +1761,8 @@ func Test_defaultFeatures(t *testing.T) { Enabled: apiutils.NewBoolPointer(defaultEBPFCheckEnabled), }, ServiceDiscovery: &v2alpha1.ServiceDiscoveryFeatureConfig{ - Enabled: apiutils.NewBoolPointer(defaultServiceDiscoveryEnabled), + Enabled: apiutils.NewBoolPointer(defaultServiceDiscoveryEnabled), + EnabledByDefault: apiutils.NewBoolPointer(defaultServiceDiscoveryEnabledByDefault), }, GPU: &v2alpha1.GPUFeatureConfig{ Enabled: apiutils.NewBoolPointer(defaultGPUMonitoringEnabled), @@ -1876,7 +1887,8 @@ func Test_defaultFeatures(t *testing.T) { Enabled: apiutils.NewBoolPointer(defaultEBPFCheckEnabled), }, ServiceDiscovery: &v2alpha1.ServiceDiscoveryFeatureConfig{ - Enabled: apiutils.NewBoolPointer(defaultServiceDiscoveryEnabled), + Enabled: apiutils.NewBoolPointer(defaultServiceDiscoveryEnabled), + EnabledByDefault: apiutils.NewBoolPointer(defaultServiceDiscoveryEnabledByDefault), }, GPU: &v2alpha1.GPUFeatureConfig{ Enabled: apiutils.NewBoolPointer(defaultGPUMonitoringEnabled), @@ -2033,7 +2045,8 @@ func Test_defaultFeatures(t *testing.T) { Enabled: apiutils.NewBoolPointer(defaultEBPFCheckEnabled), }, ServiceDiscovery: &v2alpha1.ServiceDiscoveryFeatureConfig{ - Enabled: apiutils.NewBoolPointer(defaultServiceDiscoveryEnabled), + Enabled: apiutils.NewBoolPointer(defaultServiceDiscoveryEnabled), + EnabledByDefault: apiutils.NewBoolPointer(defaultServiceDiscoveryEnabledByDefault), }, GPU: &v2alpha1.GPUFeatureConfig{ Enabled: apiutils.NewBoolPointer(defaultGPUMonitoringEnabled), @@ -2213,7 +2226,8 @@ func Test_defaultFeatures(t *testing.T) { Enabled: apiutils.NewBoolPointer(defaultEBPFCheckEnabled), }, ServiceDiscovery: &v2alpha1.ServiceDiscoveryFeatureConfig{ - Enabled: apiutils.NewBoolPointer(defaultServiceDiscoveryEnabled), + Enabled: apiutils.NewBoolPointer(defaultServiceDiscoveryEnabled), + EnabledByDefault: apiutils.NewBoolPointer(defaultServiceDiscoveryEnabledByDefault), }, GPU: &v2alpha1.GPUFeatureConfig{ Enabled: apiutils.NewBoolPointer(defaultGPUMonitoringEnabled), @@ -2372,7 +2386,8 @@ func Test_defaultFeatures(t *testing.T) { Enabled: apiutils.NewBoolPointer(defaultEBPFCheckEnabled), }, ServiceDiscovery: &v2alpha1.ServiceDiscoveryFeatureConfig{ - Enabled: apiutils.NewBoolPointer(defaultServiceDiscoveryEnabled), + Enabled: apiutils.NewBoolPointer(defaultServiceDiscoveryEnabled), + EnabledByDefault: apiutils.NewBoolPointer(defaultServiceDiscoveryEnabledByDefault), }, GPU: &v2alpha1.GPUFeatureConfig{ Enabled: apiutils.NewBoolPointer(defaultGPUMonitoringEnabled), @@ -2544,7 +2559,8 @@ func Test_defaultFeatures(t *testing.T) { Enabled: apiutils.NewBoolPointer(defaultEBPFCheckEnabled), }, ServiceDiscovery: &v2alpha1.ServiceDiscoveryFeatureConfig{ - Enabled: apiutils.NewBoolPointer(valueTrue), + Enabled: apiutils.NewBoolPointer(valueTrue), + EnabledByDefault: apiutils.NewBoolPointer(defaultServiceDiscoveryEnabledByDefault), NetworkStats: &v2alpha1.ServiceDiscoveryNetworkStatsConfig{ Enabled: apiutils.NewBoolPointer(defaultServiceDiscoveryNetworkStatsEnabled), }, diff --git a/internal/controller/datadogagent/feature/servicediscovery/feature.go b/internal/controller/datadogagent/feature/servicediscovery/feature.go index 214d5100c1..285d5b5b36 100644 --- a/internal/controller/datadogagent/feature/servicediscovery/feature.go +++ b/internal/controller/datadogagent/feature/servicediscovery/feature.go @@ -6,6 +6,8 @@ package servicediscovery import ( + "fmt" + corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -30,7 +32,9 @@ func buildFeature(*feature.Options) feature.Feature { } type serviceDiscoveryFeature struct { - networkStatsEnabled bool + networkStatsEnabled bool + useSystemProbeLite bool + userExplicitlyEnabled bool } // ID returns the ID of the Feature @@ -40,21 +44,66 @@ func (f *serviceDiscoveryFeature) ID() feature.IDType { // Configure is used to configure the feature from a v2alpha1.DatadogAgent instance. func (f *serviceDiscoveryFeature) Configure(_ metav1.Object, ddaSpec *v2alpha1.DatadogAgentSpec, _ *v2alpha1.RemoteConfigConfiguration) (reqComp feature.RequiredComponents) { - if ddaSpec.Features != nil && ddaSpec.Features.ServiceDiscovery != nil && apiutils.BoolValue(ddaSpec.Features.ServiceDiscovery.Enabled) { - reqComp.Agent = feature.RequiredComponent{ - IsRequired: apiutils.NewBoolPointer(true), - Containers: []apicommon.AgentContainerName{apicommon.CoreAgentContainerName, apicommon.SystemProbeContainerName}, - } + if ddaSpec.Features == nil || ddaSpec.Features.ServiceDiscovery == nil { + return reqComp + } - f.networkStatsEnabled = true - if ddaSpec.Features.ServiceDiscovery.NetworkStats != nil { - f.networkStatsEnabled = apiutils.BoolValue(ddaSpec.Features.ServiceDiscovery.NetworkStats.Enabled) - } + sd := ddaSpec.Features.ServiceDiscovery + + // Explicit Enabled=false always disables the feature, even if EnabledByDefault=true. + if sd.Enabled != nil && !*sd.Enabled { + return reqComp + } + // Feature requires either an explicit opt-in or a default enablement. + if !apiutils.BoolValue(sd.Enabled) && !apiutils.BoolValue(sd.EnabledByDefault) { + return reqComp + } + + reqComp.Agent = feature.RequiredComponent{ + IsRequired: apiutils.NewBoolPointer(true), + Containers: []apicommon.AgentContainerName{apicommon.CoreAgentContainerName, apicommon.SystemProbeContainerName}, + } + + f.networkStatsEnabled = true + if sd.NetworkStats != nil { + f.networkStatsEnabled = apiutils.BoolValue(sd.NetworkStats.Enabled) } + f.useSystemProbeLite = !hasOtherSystemProbeFeatures(ddaSpec.Features) + f.userExplicitlyEnabled = apiutils.BoolValue(sd.Enabled) + return reqComp } +// systemProbeLiteCommand returns the shell command for the system-probe container when +// system-probe-lite is preferred. If userOptedIn is true (user explicitly enabled discovery), +// system-probe is used as the fallback — the user has accepted the resource cost. +// Otherwise (enabled by default), the fallback is sleep infinity to avoid unexpectedly +// running system-probe on older agent images where the discovery feature may not be supported. +func systemProbeLiteCommand(socketPath string, userOptedIn bool) string { + fallback := "sleep infinity" + if userOptedIn { + fallback = "system-probe --config=/etc/datadog-agent/system-probe.yaml" + } + return fmt.Sprintf("system-probe-lite run --socket %s --log-level ${DD_LOG_LEVEL:-info} || %s", socketPath, fallback) +} + +// hasOtherSystemProbeFeatures returns true if any feature besides service discovery +// requires the full system-probe binary. When true, system-probe-lite cannot be used. +func hasOtherSystemProbeFeatures(features *v2alpha1.DatadogFeatures) bool { + if features == nil { + return false + } + return (features.NPM != nil && apiutils.BoolValue(features.NPM.Enabled)) || + (features.CWS != nil && apiutils.BoolValue(features.CWS.Enabled)) || + (features.CSPM != nil && apiutils.BoolValue(features.CSPM.Enabled) && apiutils.BoolValue(features.CSPM.RunInSystemProbe)) || + (features.USM != nil && apiutils.BoolValue(features.USM.Enabled)) || + (features.OOMKill != nil && apiutils.BoolValue(features.OOMKill.Enabled)) || + (features.TCPQueueLength != nil && apiutils.BoolValue(features.TCPQueueLength.Enabled)) || + (features.EBPFCheck != nil && apiutils.BoolValue(features.EBPFCheck.Enabled)) || + (features.GPU != nil && apiutils.BoolValue(features.GPU.Enabled) && apiutils.BoolValue(features.GPU.PrivilegedMode)) +} + // ManageDependencies allows a feature to manage its dependencies. // Feature's dependencies should be added in the store. func (f *serviceDiscoveryFeature) ManageDependencies(managers feature.ResourceManagers, provider string) error { @@ -136,6 +185,18 @@ func (f *serviceDiscoveryFeature) ManageNodeAgent(managers feature.PodTemplateMa managers.EnvVar().AddEnvVarToContainer(apicommon.CoreAgentContainerName, socketEnvVar) managers.EnvVar().AddEnvVarToContainer(apicommon.SystemProbeContainerName, socketEnvVar) + // Direct PodTemplateSpec mutation: no managers API for command overrides. + if f.useSystemProbeLite { + for i := range managers.PodTemplateSpec().Spec.Containers { + c := &managers.PodTemplateSpec().Spec.Containers[i] + if c.Name == string(apicommon.SystemProbeContainerName) { + c.Command = []string{"/bin/sh", "-c"} + c.Args = []string{systemProbeLiteCommand(common.DefaultSystemProbeSocketPath, f.userExplicitlyEnabled)} + break + } + } + } + return nil } diff --git a/internal/controller/datadogagent/feature/servicediscovery/feature_test.go b/internal/controller/datadogagent/feature/servicediscovery/feature_test.go index 8cddad23b5..06fa110c75 100644 --- a/internal/controller/datadogagent/feature/servicediscovery/feature_test.go +++ b/internal/controller/datadogagent/feature/servicediscovery/feature_test.go @@ -44,6 +44,42 @@ func Test_serviceDiscoveryFeature_Configure(t *testing.T) { ddaServiceDiscoveryEnabledWithNetStats.Spec.Features.ServiceDiscovery.NetworkStats.Enabled = apiutils.NewBoolPointer(true) } + ddaWithNPM := v2alpha1.DatadogAgent{ + Spec: v2alpha1.DatadogAgentSpec{ + Features: &v2alpha1.DatadogFeatures{ + ServiceDiscovery: &v2alpha1.ServiceDiscoveryFeatureConfig{ + Enabled: apiutils.NewBoolPointer(true), + }, + NPM: &v2alpha1.NPMFeatureConfig{ + Enabled: apiutils.NewBoolPointer(true), + }, + }, + }, + } + + ddaWithCWS := v2alpha1.DatadogAgent{ + Spec: v2alpha1.DatadogAgentSpec{ + Features: &v2alpha1.DatadogFeatures{ + ServiceDiscovery: &v2alpha1.ServiceDiscoveryFeatureConfig{ + Enabled: apiutils.NewBoolPointer(true), + }, + CWS: &v2alpha1.CWSFeatureConfig{ + Enabled: apiutils.NewBoolPointer(true), + }, + }, + }, + } + + ddaEnabledByDefault := v2alpha1.DatadogAgent{ + Spec: v2alpha1.DatadogAgentSpec{ + Features: &v2alpha1.DatadogFeatures{ + ServiceDiscovery: &v2alpha1.ServiceDiscoveryFeatureConfig{ + EnabledByDefault: apiutils.NewBoolPointer(true), + }, + }, + }, + } + tests := test.FeatureTestSuite{ { Name: "service discovery not enabled", @@ -54,25 +90,176 @@ func Test_serviceDiscoveryFeature_Configure(t *testing.T) { Name: "service discovery enabled - no network stats", DDA: ddaServiceDiscoveryEnabledNoNetStats, WantConfigure: true, - Agent: test.NewDefaultComponentTest().WithWantFunc(getWantFunc(noNetStats)), + Agent: test.NewDefaultComponentTest(). + WithCreateFunc(createFuncWithSystemProbeContainer()). + WithWantFunc(getWantFunc(noNetStats, true, true)), }, { Name: "service discovery enabled - with network stats", DDA: ddaServiceDiscoveryEnabledWithNetStats, WantConfigure: true, - Agent: test.NewDefaultComponentTest().WithWantFunc(getWantFunc(withNetStats)), + Agent: test.NewDefaultComponentTest(). + WithCreateFunc(createFuncWithSystemProbeContainer()). + WithWantFunc(getWantFunc(withNetStats, true, true)), + }, + { + Name: "system-probe-lite not used when NPM also enabled", + DDA: &ddaWithNPM, + WantConfigure: true, + Agent: test.NewDefaultComponentTest(). + WithCreateFunc(createFuncWithSystemProbeContainer()). + WithWantFunc(getWantFunc(withNetStats, false, true)), + }, + { + Name: "system-probe-lite not used when CWS also enabled", + DDA: &ddaWithCWS, + WantConfigure: true, + Agent: test.NewDefaultComponentTest(). + WithCreateFunc(createFuncWithSystemProbeContainer()). + WithWantFunc(getWantFunc(withNetStats, false, true)), + }, + { + Name: "system-probe-lite enabled by default - no system-probe fallback", + DDA: &ddaEnabledByDefault, + WantConfigure: true, + Agent: test.NewDefaultComponentTest(). + WithCreateFunc(createFuncWithSystemProbeContainer()). + WithWantFunc(getWantFunc(withNetStats, true, false)), }, } tests.Run(t, buildFeature) } +func Test_hasOtherSystemProbeFeatures(t *testing.T) { + tests := []struct { + name string + features *v2alpha1.DatadogFeatures + want bool + }{ + { + name: "nil features", + features: nil, + want: false, + }, + { + name: "no other features", + features: &v2alpha1.DatadogFeatures{}, + want: false, + }, + { + name: "NPM enabled", + features: &v2alpha1.DatadogFeatures{ + NPM: &v2alpha1.NPMFeatureConfig{Enabled: apiutils.NewBoolPointer(true)}, + }, + want: true, + }, + { + name: "CWS enabled", + features: &v2alpha1.DatadogFeatures{ + CWS: &v2alpha1.CWSFeatureConfig{Enabled: apiutils.NewBoolPointer(true)}, + }, + want: true, + }, + { + name: "USM enabled", + features: &v2alpha1.DatadogFeatures{ + USM: &v2alpha1.USMFeatureConfig{Enabled: apiutils.NewBoolPointer(true)}, + }, + want: true, + }, + { + name: "OOMKill enabled", + features: &v2alpha1.DatadogFeatures{ + OOMKill: &v2alpha1.OOMKillFeatureConfig{Enabled: apiutils.NewBoolPointer(true)}, + }, + want: true, + }, + { + name: "TCPQueueLength enabled", + features: &v2alpha1.DatadogFeatures{ + TCPQueueLength: &v2alpha1.TCPQueueLengthFeatureConfig{Enabled: apiutils.NewBoolPointer(true)}, + }, + want: true, + }, + { + name: "EBPFCheck enabled", + features: &v2alpha1.DatadogFeatures{ + EBPFCheck: &v2alpha1.EBPFCheckFeatureConfig{Enabled: apiutils.NewBoolPointer(true)}, + }, + want: true, + }, + { + name: "CSPM enabled with RunInSystemProbe", + features: &v2alpha1.DatadogFeatures{ + CSPM: &v2alpha1.CSPMFeatureConfig{ + Enabled: apiutils.NewBoolPointer(true), + RunInSystemProbe: apiutils.NewBoolPointer(true), + }, + }, + want: true, + }, + { + name: "CSPM enabled without RunInSystemProbe", + features: &v2alpha1.DatadogFeatures{ + CSPM: &v2alpha1.CSPMFeatureConfig{ + Enabled: apiutils.NewBoolPointer(true), + }, + }, + want: false, + }, + { + name: "GPU enabled with PrivilegedMode", + features: &v2alpha1.DatadogFeatures{ + GPU: &v2alpha1.GPUFeatureConfig{ + Enabled: apiutils.NewBoolPointer(true), + PrivilegedMode: apiutils.NewBoolPointer(true), + }, + }, + want: true, + }, + { + name: "GPU enabled without PrivilegedMode", + features: &v2alpha1.DatadogFeatures{ + GPU: &v2alpha1.GPUFeatureConfig{ + Enabled: apiutils.NewBoolPointer(true), + }, + }, + want: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + assert.Equal(t, tt.want, hasOtherSystemProbeFeatures(tt.features)) + }) + } +} + const ( noNetStats = false withNetStats = true ) -func getWantFunc(withNetStats bool) func(t testing.TB, mgrInterface feature.PodTemplateManagers) { +func createFuncWithSystemProbeContainer() func(testing.TB) (feature.PodTemplateManagers, string) { + return func(t testing.TB) (feature.PodTemplateManagers, string) { + newPTS := corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Name: string(apicommon.CoreAgentContainerName), + }, + { + Name: string(apicommon.SystemProbeContainerName), + }, + }, + }, + } + return fake.NewPodTemplateManagers(t, newPTS), "" + } +} + +func getWantFunc(withNetStats bool, useSPL bool, userOptedIn bool) func(t testing.TB, mgrInterface feature.PodTemplateManagers) { return func(t testing.TB, mgrInterface feature.PodTemplateManagers) { mgr := mgrInterface.(*fake.PodTemplateManagers) @@ -200,7 +387,6 @@ func getWantFunc(withNetStats bool) func(t testing.TB, mgrInterface feature.PodT }, } - // check env vars wantSPEnvVars := []*corev1.EnvVar{ { Name: DDServiceDiscoveryEnabled, @@ -221,6 +407,20 @@ func getWantFunc(withNetStats bool) func(t testing.TB, mgrInterface feature.PodT systemProbeEnvVars := mgr.EnvVarMgr.EnvVarsByC[apicommon.SystemProbeContainerName] assert.True(t, apiutils.IsEqualStruct(systemProbeEnvVars, wantSPEnvVars), "System Probe envvars \ndiff = %s", cmp.Diff(systemProbeEnvVars, wantSPEnvVars)) + + // check system-probe container command override + for _, c := range mgr.PodTemplateSpec().Spec.Containers { + if c.Name == string(apicommon.SystemProbeContainerName) { + if useSPL { + assert.Equal(t, []string{"/bin/sh", "-c"}, c.Command, "System Probe command should be overridden for system-probe-lite") + assert.Equal(t, []string{systemProbeLiteCommand(common.DefaultSystemProbeSocketPath, userOptedIn)}, c.Args, "System Probe args mismatch") + } else { + assert.Empty(t, c.Command, "System Probe command should not be overridden") + assert.Empty(t, c.Args, "System Probe args should not be overridden") + } + break + } + } } } From 14334d12168a223fefd761c183142838250b261f Mon Sep 17 00:00:00 2001 From: Guillaume Pagnoux Date: Thu, 26 Mar 2026 11:29:30 +0100 Subject: [PATCH 2/6] docs: applied suggestions --- docs/configuration.v2alpha1.md | 2 +- docs/configuration_public.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/configuration.v2alpha1.md b/docs/configuration.v2alpha1.md index 9dab0ca530..81e015e9ee 100644 --- a/docs/configuration.v2alpha1.md +++ b/docs/configuration.v2alpha1.md @@ -200,7 +200,7 @@ spec: | features.sbom.host.analyzers | To use for SBOM collection. | | features.sbom.host.enabled | Enable this option to activate SBOM collection. Default: false | | features.serviceDiscovery.enabled | Enables the service discovery check. Default: false | -| features.serviceDiscovery.enabledByDefault | EnabledByDefault is set by the operator when it enables this feature via default configuration, as opposed to an explicit user choice. When true, the system-probe binary is not used as a fallback if system-probe-lite is unavailable — the container falls back to sleep infinity instead, to avoid unexpected resource usage on older agent images. This field is managed by the operator and should not be set by users. | +| features.serviceDiscovery.enabledByDefault | Indicates that the operator enabled this feature automatically rather than in response to an explicit user setting. When true, if `system-probe-lite` is unavailable, the container falls back to `sleep infinity` rather than `system-probe`, to avoid unexpected resource usage on older agent images. This field is managed by the operator and must not be set by users. | | features.serviceDiscovery.networkStats.enabled | Enables the Service Discovery Network Stats feature. Default: true | | features.tcpQueueLength.enabled | Enables the TCP queue length eBPF-based check. Default: false | | features.usm.enabled | Enables Universal Service Monitoring. Default: false | diff --git a/docs/configuration_public.md b/docs/configuration_public.md index d4832b8f89..48e82b8a71 100644 --- a/docs/configuration_public.md +++ b/docs/configuration_public.md @@ -385,7 +385,7 @@ spec: : Enables the service discovery check. Default: false `features.serviceDiscovery.enabledByDefault` -: EnabledByDefault is set by the operator when it enables this feature via default configuration, as opposed to an explicit user choice. When true, the system-probe binary is not used as a fallback if system-probe-lite is unavailable — the container falls back to sleep infinity instead, to avoid unexpected resource usage on older agent images. This field is managed by the operator and should not be set by users. +: Indicates that the operator enabled this feature automatically rather than in response to an explicit user setting. When true, if `system-probe-lite` is unavailable, the container falls back to `sleep infinity` rather than `system-probe`, to avoid unexpected resource usage on older agent images. This field is managed by the operator and must not be set by users. `features.serviceDiscovery.networkStats.enabled` : Enables the Service Discovery Network Stats feature. Default: true From cf6e1d3be2df8d1bea1e835fa248b723f958f8a4 Mon Sep 17 00:00:00 2001 From: Guillaume Pagnoux Date: Thu, 26 Mar 2026 11:31:01 +0100 Subject: [PATCH 3/6] fix unset enabled case --- .../datadogagent/defaults/datadogagent_default.go | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/internal/controller/datadogagent/defaults/datadogagent_default.go b/internal/controller/datadogagent/defaults/datadogagent_default.go index 20f7878720..aacb56cfcb 100644 --- a/internal/controller/datadogagent/defaults/datadogagent_default.go +++ b/internal/controller/datadogagent/defaults/datadogagent_default.go @@ -304,10 +304,14 @@ func defaultFeaturesConfig(ddaSpec *v2alpha1.DatadogAgentSpec) { if ddaSpec.Features.ServiceDiscovery == nil { ddaSpec.Features.ServiceDiscovery = &v2alpha1.ServiceDiscoveryFeatureConfig{} } - apiutils.DefaultBooleanIfUnset(&ddaSpec.Features.ServiceDiscovery.Enabled, defaultServiceDiscoveryEnabled) apiutils.DefaultBooleanIfUnset(&ddaSpec.Features.ServiceDiscovery.EnabledByDefault, defaultServiceDiscoveryEnabledByDefault) + // Only default Enabled to false when not enabled-by-default, so that Enabled=nil remains + // distinguishable from Enabled=false (explicit user opt-out) when EnabledByDefault=true. + if !apiutils.BoolValue(ddaSpec.Features.ServiceDiscovery.EnabledByDefault) { + apiutils.DefaultBooleanIfUnset(&ddaSpec.Features.ServiceDiscovery.Enabled, defaultServiceDiscoveryEnabled) + } - if *ddaSpec.Features.ServiceDiscovery.Enabled || *ddaSpec.Features.ServiceDiscovery.EnabledByDefault { + if apiutils.BoolValue(ddaSpec.Features.ServiceDiscovery.Enabled) || apiutils.BoolValue(ddaSpec.Features.ServiceDiscovery.EnabledByDefault) { if ddaSpec.Features.ServiceDiscovery.NetworkStats == nil { ddaSpec.Features.ServiceDiscovery.NetworkStats = &v2alpha1.ServiceDiscoveryNetworkStatsConfig{} } From 8b7ca14f4a4c4c62394511485dbdce43c27d4f3e Mon Sep 17 00:00:00 2001 From: Guillaume Pagnoux Date: Fri, 27 Mar 2026 10:15:08 +0100 Subject: [PATCH 4/6] fix codex CR --- .../datadogagent/feature/servicediscovery/feature.go | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/internal/controller/datadogagent/feature/servicediscovery/feature.go b/internal/controller/datadogagent/feature/servicediscovery/feature.go index 285d5b5b36..b91b474c39 100644 --- a/internal/controller/datadogagent/feature/servicediscovery/feature.go +++ b/internal/controller/datadogagent/feature/servicediscovery/feature.go @@ -33,8 +33,11 @@ func buildFeature(*feature.Options) feature.Feature { type serviceDiscoveryFeature struct { networkStatsEnabled bool - useSystemProbeLite bool userExplicitlyEnabled bool + // features holds a pointer to the live DDA features struct so that ManageNodeAgent + // can re-evaluate hasOtherSystemProbeFeatures after Remote Config state has been + // merged by other features' Configure calls (e.g. USM merges RC state into the spec). + features *v2alpha1.DatadogFeatures } // ID returns the ID of the Feature @@ -69,7 +72,7 @@ func (f *serviceDiscoveryFeature) Configure(_ metav1.Object, ddaSpec *v2alpha1.D f.networkStatsEnabled = apiutils.BoolValue(sd.NetworkStats.Enabled) } - f.useSystemProbeLite = !hasOtherSystemProbeFeatures(ddaSpec.Features) + f.features = ddaSpec.Features f.userExplicitlyEnabled = apiutils.BoolValue(sd.Enabled) return reqComp @@ -186,7 +189,9 @@ func (f *serviceDiscoveryFeature) ManageNodeAgent(managers feature.PodTemplateMa managers.EnvVar().AddEnvVarToContainer(apicommon.SystemProbeContainerName, socketEnvVar) // Direct PodTemplateSpec mutation: no managers API for command overrides. - if f.useSystemProbeLite { + // Re-evaluate here (not cached from Configure) so that RC state merged by other + // features' Configure calls (e.g. USM) is taken into account. + if !hasOtherSystemProbeFeatures(f.features) { for i := range managers.PodTemplateSpec().Spec.Containers { c := &managers.PodTemplateSpec().Spec.Containers[i] if c.Name == string(apicommon.SystemProbeContainerName) { From 550f2e79b40ad018087d23529c071a0f16acb2cc Mon Sep 17 00:00:00 2001 From: Guillaume Pagnoux Date: Fri, 27 Mar 2026 11:07:59 +0100 Subject: [PATCH 5/6] regenerate docs --- api/datadoghq/v2alpha1/datadogagent_types.go | 9 ++++----- .../datadoghq.com_datadogagentinternals.yaml | 18 ++++++++---------- ...ghq.com_datadogagentinternals_v1alpha1.json | 4 ++-- .../v1/datadoghq.com_datadogagentprofiles.yaml | 9 ++++----- ...oghq.com_datadogagentprofiles_v1alpha1.json | 2 +- .../bases/v1/datadoghq.com_datadogagents.yaml | 18 ++++++++---------- .../datadoghq.com_datadogagents_v2alpha1.json | 4 ++-- docs/configuration.v2alpha1.md | 2 +- 8 files changed, 30 insertions(+), 36 deletions(-) diff --git a/api/datadoghq/v2alpha1/datadogagent_types.go b/api/datadoghq/v2alpha1/datadogagent_types.go index e505f7f638..e99e95bbc7 100644 --- a/api/datadoghq/v2alpha1/datadogagent_types.go +++ b/api/datadoghq/v2alpha1/datadogagent_types.go @@ -647,11 +647,10 @@ type ServiceDiscoveryFeatureConfig struct { // +optional Enabled *bool `json:"enabled,omitempty"` - // EnabledByDefault is set by the operator when it enables this feature via default configuration, - // as opposed to an explicit user choice. When true, the system-probe binary is not used as a - // fallback if system-probe-lite is unavailable — the container falls back to sleep infinity - // instead, to avoid unexpected resource usage on older agent images. - // This field is managed by the operator and should not be set by users. + // Indicates that the operator enabled this feature automatically rather than in response to an + // explicit user setting. When true, if `system-probe-lite` is unavailable, the container falls + // back to `sleep infinity` rather than `system-probe`, to avoid unexpected resource usage on + // older agent images. This field is managed by the operator and must not be set by users. // +optional EnabledByDefault *bool `json:"enabledByDefault,omitempty"` diff --git a/config/crd/bases/v1/datadoghq.com_datadogagentinternals.yaml b/config/crd/bases/v1/datadoghq.com_datadogagentinternals.yaml index bf0ec26be1..97b8db5c96 100644 --- a/config/crd/bases/v1/datadoghq.com_datadogagentinternals.yaml +++ b/config/crd/bases/v1/datadoghq.com_datadogagentinternals.yaml @@ -2490,11 +2490,10 @@ spec: type: boolean enabledByDefault: description: |- - EnabledByDefault is set by the operator when it enables this feature via default configuration, - as opposed to an explicit user choice. When true, the system-probe binary is not used as a - fallback if system-probe-lite is unavailable — the container falls back to sleep infinity - instead, to avoid unexpected resource usage on older agent images. - This field is managed by the operator and should not be set by users. + Indicates that the operator enabled this feature automatically rather than in response to an + explicit user setting. When true, if `system-probe-lite` is unavailable, the container falls + back to `sleep infinity` rather than `system-probe`, to avoid unexpected resource usage on + older agent images. This field is managed by the operator and must not be set by users. type: boolean networkStats: description: |- @@ -10919,11 +10918,10 @@ spec: type: boolean enabledByDefault: description: |- - EnabledByDefault is set by the operator when it enables this feature via default configuration, - as opposed to an explicit user choice. When true, the system-probe binary is not used as a - fallback if system-probe-lite is unavailable — the container falls back to sleep infinity - instead, to avoid unexpected resource usage on older agent images. - This field is managed by the operator and should not be set by users. + Indicates that the operator enabled this feature automatically rather than in response to an + explicit user setting. When true, if `system-probe-lite` is unavailable, the container falls + back to `sleep infinity` rather than `system-probe`, to avoid unexpected resource usage on + older agent images. This field is managed by the operator and must not be set by users. type: boolean networkStats: description: |- diff --git a/config/crd/bases/v1/datadoghq.com_datadogagentinternals_v1alpha1.json b/config/crd/bases/v1/datadoghq.com_datadogagentinternals_v1alpha1.json index c4bb96e3f9..3feea4293b 100644 --- a/config/crd/bases/v1/datadoghq.com_datadogagentinternals_v1alpha1.json +++ b/config/crd/bases/v1/datadoghq.com_datadogagentinternals_v1alpha1.json @@ -2589,7 +2589,7 @@ "type": "boolean" }, "enabledByDefault": { - "description": "EnabledByDefault is set by the operator when it enables this feature via default configuration,\nas opposed to an explicit user choice. When true, the system-probe binary is not used as a\nfallback if system-probe-lite is unavailable — the container falls back to sleep infinity\ninstead, to avoid unexpected resource usage on older agent images.\nThis field is managed by the operator and should not be set by users.", + "description": "Indicates that the operator enabled this feature automatically rather than in response to an\nexplicit user setting. When true, if `system-probe-lite` is unavailable, the container falls\nback to `sleep infinity` rather than `system-probe`, to avoid unexpected resource usage on\nolder agent images. This field is managed by the operator and must not be set by users.", "type": "boolean" }, "networkStats": { @@ -10747,7 +10747,7 @@ "type": "boolean" }, "enabledByDefault": { - "description": "EnabledByDefault is set by the operator when it enables this feature via default configuration,\nas opposed to an explicit user choice. When true, the system-probe binary is not used as a\nfallback if system-probe-lite is unavailable — the container falls back to sleep infinity\ninstead, to avoid unexpected resource usage on older agent images.\nThis field is managed by the operator and should not be set by users.", + "description": "Indicates that the operator enabled this feature automatically rather than in response to an\nexplicit user setting. When true, if `system-probe-lite` is unavailable, the container falls\nback to `sleep infinity` rather than `system-probe`, to avoid unexpected resource usage on\nolder agent images. This field is managed by the operator and must not be set by users.", "type": "boolean" }, "networkStats": { diff --git a/config/crd/bases/v1/datadoghq.com_datadogagentprofiles.yaml b/config/crd/bases/v1/datadoghq.com_datadogagentprofiles.yaml index fbaa47722b..2e67e2f1f7 100644 --- a/config/crd/bases/v1/datadoghq.com_datadogagentprofiles.yaml +++ b/config/crd/bases/v1/datadoghq.com_datadogagentprofiles.yaml @@ -2490,11 +2490,10 @@ spec: type: boolean enabledByDefault: description: |- - EnabledByDefault is set by the operator when it enables this feature via default configuration, - as opposed to an explicit user choice. When true, the system-probe binary is not used as a - fallback if system-probe-lite is unavailable — the container falls back to sleep infinity - instead, to avoid unexpected resource usage on older agent images. - This field is managed by the operator and should not be set by users. + Indicates that the operator enabled this feature automatically rather than in response to an + explicit user setting. When true, if `system-probe-lite` is unavailable, the container falls + back to `sleep infinity` rather than `system-probe`, to avoid unexpected resource usage on + older agent images. This field is managed by the operator and must not be set by users. type: boolean networkStats: description: |- diff --git a/config/crd/bases/v1/datadoghq.com_datadogagentprofiles_v1alpha1.json b/config/crd/bases/v1/datadoghq.com_datadogagentprofiles_v1alpha1.json index 24085c65f9..afc7ff06ea 100644 --- a/config/crd/bases/v1/datadoghq.com_datadogagentprofiles_v1alpha1.json +++ b/config/crd/bases/v1/datadoghq.com_datadogagentprofiles_v1alpha1.json @@ -2593,7 +2593,7 @@ "type": "boolean" }, "enabledByDefault": { - "description": "EnabledByDefault is set by the operator when it enables this feature via default configuration,\nas opposed to an explicit user choice. When true, the system-probe binary is not used as a\nfallback if system-probe-lite is unavailable — the container falls back to sleep infinity\ninstead, to avoid unexpected resource usage on older agent images.\nThis field is managed by the operator and should not be set by users.", + "description": "Indicates that the operator enabled this feature automatically rather than in response to an\nexplicit user setting. When true, if `system-probe-lite` is unavailable, the container falls\nback to `sleep infinity` rather than `system-probe`, to avoid unexpected resource usage on\nolder agent images. This field is managed by the operator and must not be set by users.", "type": "boolean" }, "networkStats": { diff --git a/config/crd/bases/v1/datadoghq.com_datadogagents.yaml b/config/crd/bases/v1/datadoghq.com_datadogagents.yaml index d52a723ef5..faad8df420 100644 --- a/config/crd/bases/v1/datadoghq.com_datadogagents.yaml +++ b/config/crd/bases/v1/datadoghq.com_datadogagents.yaml @@ -2490,11 +2490,10 @@ spec: type: boolean enabledByDefault: description: |- - EnabledByDefault is set by the operator when it enables this feature via default configuration, - as opposed to an explicit user choice. When true, the system-probe binary is not used as a - fallback if system-probe-lite is unavailable — the container falls back to sleep infinity - instead, to avoid unexpected resource usage on older agent images. - This field is managed by the operator and should not be set by users. + Indicates that the operator enabled this feature automatically rather than in response to an + explicit user setting. When true, if `system-probe-lite` is unavailable, the container falls + back to `sleep infinity` rather than `system-probe`, to avoid unexpected resource usage on + older agent images. This field is managed by the operator and must not be set by users. type: boolean networkStats: description: |- @@ -10969,11 +10968,10 @@ spec: type: boolean enabledByDefault: description: |- - EnabledByDefault is set by the operator when it enables this feature via default configuration, - as opposed to an explicit user choice. When true, the system-probe binary is not used as a - fallback if system-probe-lite is unavailable — the container falls back to sleep infinity - instead, to avoid unexpected resource usage on older agent images. - This field is managed by the operator and should not be set by users. + Indicates that the operator enabled this feature automatically rather than in response to an + explicit user setting. When true, if `system-probe-lite` is unavailable, the container falls + back to `sleep infinity` rather than `system-probe`, to avoid unexpected resource usage on + older agent images. This field is managed by the operator and must not be set by users. type: boolean networkStats: description: |- diff --git a/config/crd/bases/v1/datadoghq.com_datadogagents_v2alpha1.json b/config/crd/bases/v1/datadoghq.com_datadogagents_v2alpha1.json index db93361c7f..115e2bb07c 100644 --- a/config/crd/bases/v1/datadoghq.com_datadogagents_v2alpha1.json +++ b/config/crd/bases/v1/datadoghq.com_datadogagents_v2alpha1.json @@ -2589,7 +2589,7 @@ "type": "boolean" }, "enabledByDefault": { - "description": "EnabledByDefault is set by the operator when it enables this feature via default configuration,\nas opposed to an explicit user choice. When true, the system-probe binary is not used as a\nfallback if system-probe-lite is unavailable — the container falls back to sleep infinity\ninstead, to avoid unexpected resource usage on older agent images.\nThis field is managed by the operator and should not be set by users.", + "description": "Indicates that the operator enabled this feature automatically rather than in response to an\nexplicit user setting. When true, if `system-probe-lite` is unavailable, the container falls\nback to `sleep infinity` rather than `system-probe`, to avoid unexpected resource usage on\nolder agent images. This field is managed by the operator and must not be set by users.", "type": "boolean" }, "networkStats": { @@ -10812,7 +10812,7 @@ "type": "boolean" }, "enabledByDefault": { - "description": "EnabledByDefault is set by the operator when it enables this feature via default configuration,\nas opposed to an explicit user choice. When true, the system-probe binary is not used as a\nfallback if system-probe-lite is unavailable — the container falls back to sleep infinity\ninstead, to avoid unexpected resource usage on older agent images.\nThis field is managed by the operator and should not be set by users.", + "description": "Indicates that the operator enabled this feature automatically rather than in response to an\nexplicit user setting. When true, if `system-probe-lite` is unavailable, the container falls\nback to `sleep infinity` rather than `system-probe`, to avoid unexpected resource usage on\nolder agent images. This field is managed by the operator and must not be set by users.", "type": "boolean" }, "networkStats": { diff --git a/docs/configuration.v2alpha1.md b/docs/configuration.v2alpha1.md index 81e015e9ee..1ac3acbe76 100644 --- a/docs/configuration.v2alpha1.md +++ b/docs/configuration.v2alpha1.md @@ -200,7 +200,7 @@ spec: | features.sbom.host.analyzers | To use for SBOM collection. | | features.sbom.host.enabled | Enable this option to activate SBOM collection. Default: false | | features.serviceDiscovery.enabled | Enables the service discovery check. Default: false | -| features.serviceDiscovery.enabledByDefault | Indicates that the operator enabled this feature automatically rather than in response to an explicit user setting. When true, if `system-probe-lite` is unavailable, the container falls back to `sleep infinity` rather than `system-probe`, to avoid unexpected resource usage on older agent images. This field is managed by the operator and must not be set by users. | +| features.serviceDiscovery.enabledByDefault | Indicates that the operator enabled this feature automatically rather than in response to an explicit user setting. When true, if `system-probe-lite` is unavailable, the container falls back to `sleep infinity` rather than `system-probe`, to avoid unexpected resource usage on older agent images. This field is managed by the operator and must not be set by users. | | features.serviceDiscovery.networkStats.enabled | Enables the Service Discovery Network Stats feature. Default: true | | features.tcpQueueLength.enabled | Enables the TCP queue length eBPF-based check. Default: false | | features.usm.enabled | Enables Universal Service Monitoring. Default: false | From 04225c52d4b02287d1e1e84ff17ba92e899cdd87 Mon Sep 17 00:00:00 2001 From: Guillaume Pagnoux Date: Wed, 8 Apr 2026 16:23:13 +0200 Subject: [PATCH 6/6] discovery: remove EnabledByDefault from CRD, use nil tri-state MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove EnabledByDefault from the CRD and instead use the nil/false/true tri-state on Enabled: - Enabled=nil → not configured by user (operator may auto-enable later) - Enabled=false → explicit user opt-out - Enabled=true → explicit user opt-in (system-probe fallback allowed) The fallback distinction is preserved: userExplicitlyEnabled is only true when Enabled=*true, so a future operator auto-enable path (nil) would still use the conservative sleep-infinity fallback. Enabled is intentionally not defaulted to false so that nil remains distinguishable from an explicit opt-out. Co-Authored-By: Claude Sonnet 4.6 (1M context) --- api/datadoghq/v2alpha1/datadogagent_types.go | 7 - .../v2alpha1/zz_generated.deepcopy.go | 5 - .../datadoghq.com_datadogagentinternals.yaml | 14 -- ...hq.com_datadogagentinternals_v1alpha1.json | 8 - .../datadoghq.com_datadogagentprofiles.yaml | 7 - ...ghq.com_datadogagentprofiles_v1alpha1.json | 4 - .../bases/v1/datadoghq.com_datadogagents.yaml | 14 -- .../datadoghq.com_datadogagents_v2alpha1.json | 8 - docs/configuration.v2alpha1.md | 1 - docs/configuration_public.md | 3 - .../datadogagent/controller_v2_test.go | 8 +- .../defaults/datadogagent_default.go | 12 +- .../defaults/datadogagent_default_test.go | 78 ++-------- .../datadogagent/feature/factory.go | 27 ---- .../feature/servicediscovery/feature.go | 49 +++--- .../feature/servicediscovery/feature_test.go | 147 +++++++++++++----- 16 files changed, 163 insertions(+), 229 deletions(-) diff --git a/api/datadoghq/v2alpha1/datadogagent_types.go b/api/datadoghq/v2alpha1/datadogagent_types.go index ddc739f386..3929616e1d 100644 --- a/api/datadoghq/v2alpha1/datadogagent_types.go +++ b/api/datadoghq/v2alpha1/datadogagent_types.go @@ -647,13 +647,6 @@ type ServiceDiscoveryFeatureConfig struct { // +optional Enabled *bool `json:"enabled,omitempty"` - // Indicates that the operator enabled this feature automatically rather than in response to an - // explicit user setting. When true, if `system-probe-lite` is unavailable, the container falls - // back to `sleep infinity` rather than `system-probe`, to avoid unexpected resource usage on - // older agent images. This field is managed by the operator and must not be set by users. - // +optional - EnabledByDefault *bool `json:"enabledByDefault,omitempty"` - // DEPRECATED: NetworkStats is no longer configurable and will be ignored. Scheduled for removal in v1.28. // +deprecated // +optional diff --git a/api/datadoghq/v2alpha1/zz_generated.deepcopy.go b/api/datadoghq/v2alpha1/zz_generated.deepcopy.go index 7d5a912a22..9c768f10ab 100644 --- a/api/datadoghq/v2alpha1/zz_generated.deepcopy.go +++ b/api/datadoghq/v2alpha1/zz_generated.deepcopy.go @@ -3503,11 +3503,6 @@ func (in *ServiceDiscoveryFeatureConfig) DeepCopyInto(out *ServiceDiscoveryFeatu *out = new(bool) **out = **in } - if in.EnabledByDefault != nil { - in, out := &in.EnabledByDefault, &out.EnabledByDefault - *out = new(bool) - **out = **in - } if in.NetworkStats != nil { in, out := &in.NetworkStats, &out.NetworkStats *out = new(ServiceDiscoveryNetworkStatsConfig) diff --git a/config/crd/bases/v1/datadoghq.com_datadogagentinternals.yaml b/config/crd/bases/v1/datadoghq.com_datadogagentinternals.yaml index c30bc441d2..30ad410214 100644 --- a/config/crd/bases/v1/datadoghq.com_datadogagentinternals.yaml +++ b/config/crd/bases/v1/datadoghq.com_datadogagentinternals.yaml @@ -2503,13 +2503,6 @@ spec: Enables the service discovery check. Default: false type: boolean - enabledByDefault: - description: |- - Indicates that the operator enabled this feature automatically rather than in response to an - explicit user setting. When true, if `system-probe-lite` is unavailable, the container falls - back to `sleep infinity` rather than `system-probe`, to avoid unexpected resource usage on - older agent images. This field is managed by the operator and must not be set by users. - type: boolean networkStats: description: 'DEPRECATED: NetworkStats is no longer configurable and will be ignored. Scheduled for removal in v1.28.' properties: @@ -10942,13 +10935,6 @@ spec: Enables the service discovery check. Default: false type: boolean - enabledByDefault: - description: |- - Indicates that the operator enabled this feature automatically rather than in response to an - explicit user setting. When true, if `system-probe-lite` is unavailable, the container falls - back to `sleep infinity` rather than `system-probe`, to avoid unexpected resource usage on - older agent images. This field is managed by the operator and must not be set by users. - type: boolean networkStats: description: 'DEPRECATED: NetworkStats is no longer configurable and will be ignored. Scheduled for removal in v1.28.' properties: diff --git a/config/crd/bases/v1/datadoghq.com_datadogagentinternals_v1alpha1.json b/config/crd/bases/v1/datadoghq.com_datadogagentinternals_v1alpha1.json index 6ac575cdb0..b6e0f69009 100644 --- a/config/crd/bases/v1/datadoghq.com_datadogagentinternals_v1alpha1.json +++ b/config/crd/bases/v1/datadoghq.com_datadogagentinternals_v1alpha1.json @@ -2603,10 +2603,6 @@ "description": "Enables the service discovery check.\nDefault: false", "type": "boolean" }, - "enabledByDefault": { - "description": "Indicates that the operator enabled this feature automatically rather than in response to an\nexplicit user setting. When true, if `system-probe-lite` is unavailable, the container falls\nback to `sleep infinity` rather than `system-probe`, to avoid unexpected resource usage on\nolder agent images. This field is managed by the operator and must not be set by users.", - "type": "boolean" - }, "networkStats": { "additionalProperties": false, "description": "DEPRECATED: NetworkStats is no longer configurable and will be ignored. Scheduled for removal in v1.28.", @@ -10776,10 +10772,6 @@ "description": "Enables the service discovery check.\nDefault: false", "type": "boolean" }, - "enabledByDefault": { - "description": "Indicates that the operator enabled this feature automatically rather than in response to an\nexplicit user setting. When true, if `system-probe-lite` is unavailable, the container falls\nback to `sleep infinity` rather than `system-probe`, to avoid unexpected resource usage on\nolder agent images. This field is managed by the operator and must not be set by users.", - "type": "boolean" - }, "networkStats": { "additionalProperties": false, "description": "DEPRECATED: NetworkStats is no longer configurable and will be ignored. Scheduled for removal in v1.28.", diff --git a/config/crd/bases/v1/datadoghq.com_datadogagentprofiles.yaml b/config/crd/bases/v1/datadoghq.com_datadogagentprofiles.yaml index 5ac3d70335..afd1b0e507 100644 --- a/config/crd/bases/v1/datadoghq.com_datadogagentprofiles.yaml +++ b/config/crd/bases/v1/datadoghq.com_datadogagentprofiles.yaml @@ -2503,13 +2503,6 @@ spec: Enables the service discovery check. Default: false type: boolean - enabledByDefault: - description: |- - Indicates that the operator enabled this feature automatically rather than in response to an - explicit user setting. When true, if `system-probe-lite` is unavailable, the container falls - back to `sleep infinity` rather than `system-probe`, to avoid unexpected resource usage on - older agent images. This field is managed by the operator and must not be set by users. - type: boolean networkStats: description: 'DEPRECATED: NetworkStats is no longer configurable and will be ignored. Scheduled for removal in v1.28.' properties: diff --git a/config/crd/bases/v1/datadoghq.com_datadogagentprofiles_v1alpha1.json b/config/crd/bases/v1/datadoghq.com_datadogagentprofiles_v1alpha1.json index 5f828ca2f3..3185fb8533 100644 --- a/config/crd/bases/v1/datadoghq.com_datadogagentprofiles_v1alpha1.json +++ b/config/crd/bases/v1/datadoghq.com_datadogagentprofiles_v1alpha1.json @@ -2607,10 +2607,6 @@ "description": "Enables the service discovery check.\nDefault: false", "type": "boolean" }, - "enabledByDefault": { - "description": "Indicates that the operator enabled this feature automatically rather than in response to an\nexplicit user setting. When true, if `system-probe-lite` is unavailable, the container falls\nback to `sleep infinity` rather than `system-probe`, to avoid unexpected resource usage on\nolder agent images. This field is managed by the operator and must not be set by users.", - "type": "boolean" - }, "networkStats": { "additionalProperties": false, "description": "DEPRECATED: NetworkStats is no longer configurable and will be ignored. Scheduled for removal in v1.28.", diff --git a/config/crd/bases/v1/datadoghq.com_datadogagents.yaml b/config/crd/bases/v1/datadoghq.com_datadogagents.yaml index 1b26b14720..22ecfd013a 100644 --- a/config/crd/bases/v1/datadoghq.com_datadogagents.yaml +++ b/config/crd/bases/v1/datadoghq.com_datadogagents.yaml @@ -2507,13 +2507,6 @@ spec: Enables the service discovery check. Default: false type: boolean - enabledByDefault: - description: |- - Indicates that the operator enabled this feature automatically rather than in response to an - explicit user setting. When true, if `system-probe-lite` is unavailable, the container falls - back to `sleep infinity` rather than `system-probe`, to avoid unexpected resource usage on - older agent images. This field is managed by the operator and must not be set by users. - type: boolean networkStats: description: 'DEPRECATED: NetworkStats is no longer configurable and will be ignored. Scheduled for removal in v1.28.' properties: @@ -11022,13 +11015,6 @@ spec: Enables the service discovery check. Default: false type: boolean - enabledByDefault: - description: |- - Indicates that the operator enabled this feature automatically rather than in response to an - explicit user setting. When true, if `system-probe-lite` is unavailable, the container falls - back to `sleep infinity` rather than `system-probe`, to avoid unexpected resource usage on - older agent images. This field is managed by the operator and must not be set by users. - type: boolean networkStats: description: 'DEPRECATED: NetworkStats is no longer configurable and will be ignored. Scheduled for removal in v1.28.' properties: diff --git a/config/crd/bases/v1/datadoghq.com_datadogagents_v2alpha1.json b/config/crd/bases/v1/datadoghq.com_datadogagents_v2alpha1.json index c1efe0ed71..84ba9c4e7d 100644 --- a/config/crd/bases/v1/datadoghq.com_datadogagents_v2alpha1.json +++ b/config/crd/bases/v1/datadoghq.com_datadogagents_v2alpha1.json @@ -2603,10 +2603,6 @@ "description": "Enables the service discovery check.\nDefault: false", "type": "boolean" }, - "enabledByDefault": { - "description": "Indicates that the operator enabled this feature automatically rather than in response to an\nexplicit user setting. When true, if `system-probe-lite` is unavailable, the container falls\nback to `sleep infinity` rather than `system-probe`, to avoid unexpected resource usage on\nolder agent images. This field is managed by the operator and must not be set by users.", - "type": "boolean" - }, "networkStats": { "additionalProperties": false, "description": "DEPRECATED: NetworkStats is no longer configurable and will be ignored. Scheduled for removal in v1.28.", @@ -10869,10 +10865,6 @@ "description": "Enables the service discovery check.\nDefault: false", "type": "boolean" }, - "enabledByDefault": { - "description": "Indicates that the operator enabled this feature automatically rather than in response to an\nexplicit user setting. When true, if `system-probe-lite` is unavailable, the container falls\nback to `sleep infinity` rather than `system-probe`, to avoid unexpected resource usage on\nolder agent images. This field is managed by the operator and must not be set by users.", - "type": "boolean" - }, "networkStats": { "additionalProperties": false, "description": "DEPRECATED: NetworkStats is no longer configurable and will be ignored. Scheduled for removal in v1.28.", diff --git a/docs/configuration.v2alpha1.md b/docs/configuration.v2alpha1.md index 9db7355340..3705d38cdd 100644 --- a/docs/configuration.v2alpha1.md +++ b/docs/configuration.v2alpha1.md @@ -202,7 +202,6 @@ spec: | features.sbom.host.analyzers | To use for SBOM collection. | | features.sbom.host.enabled | Enable this option to activate SBOM collection. Default: false | | features.serviceDiscovery.enabled | Enables the service discovery check. Default: false | -| features.serviceDiscovery.enabledByDefault | Indicates that the operator enabled this feature automatically rather than in response to an explicit user setting. When true, if `system-probe-lite` is unavailable, the container falls back to `sleep infinity` rather than `system-probe`, to avoid unexpected resource usage on older agent images. This field is managed by the operator and must not be set by users. | | features.serviceDiscovery.networkStats.enabled | DEPRECATED: this field is ignored. | | features.tcpQueueLength.enabled | Enables the TCP queue length eBPF-based check. Default: false | | features.usm.enabled | Enables Universal Service Monitoring. Default: false | diff --git a/docs/configuration_public.md b/docs/configuration_public.md index 3fd7f241c9..c536cfd615 100644 --- a/docs/configuration_public.md +++ b/docs/configuration_public.md @@ -384,9 +384,6 @@ spec: `features.serviceDiscovery.enabled` : Enables the service discovery check. Default: false -`features.serviceDiscovery.enabledByDefault` -: Indicates that the operator enabled this feature automatically rather than in response to an explicit user setting. When true, if `system-probe-lite` is unavailable, the container falls back to `sleep infinity` rather than `system-probe`, to avoid unexpected resource usage on older agent images. This field is managed by the operator and must not be set by users. - `features.serviceDiscovery.networkStats.enabled` : DEPRECATED: this field is ignored. diff --git a/internal/controller/datadogagent/controller_v2_test.go b/internal/controller/datadogagent/controller_v2_test.go index 5a359e1bc4..ec4b8c92f4 100644 --- a/internal/controller/datadogagent/controller_v2_test.go +++ b/internal/controller/datadogagent/controller_v2_test.go @@ -1753,7 +1753,7 @@ func Test_DDAI_ReconcileV3(t *testing.T) { wantFunc: func(t *testing.T, c client.Client) { expectedDDAI := getBaseDDAI(dda) expectedDDAI.Annotations = map[string]string{ - constants.MD5DDAIDeploymentAnnotationKey: "62d2822cc8547055dc8e2fca6f222a17", + constants.MD5DDAIDeploymentAnnotationKey: "d472c741d84f93e553772d29ba5ea914", } verifyDDAI(t, c, []v1alpha1.DatadogAgentInternal{expectedDDAI}) @@ -1785,7 +1785,7 @@ func Test_DDAI_ReconcileV3(t *testing.T) { baseDDAI := getBaseDDAI(dda) expectedDDAI := baseDDAI.DeepCopy() expectedDDAI.Annotations = map[string]string{ - constants.MD5DDAIDeploymentAnnotationKey: "c362b9a0aa0e2ad1a1d60f4ee8575c8f", + constants.MD5DDAIDeploymentAnnotationKey: "9d6165c057934517d4c0623699a4257a", } expectedDDAI.Spec.Features.ClusterChecks.UseClusterChecksRunners = ptr.To(true) expectedDDAI.Spec.Global.Credentials = &v2alpha1.DatadogCredentials{ @@ -1861,7 +1861,7 @@ func Test_DDAI_ReconcileV3(t *testing.T) { profileDDAI := getBaseDDAI(dda) profileDDAI.Name = "foo-profile" profileDDAI.Annotations = map[string]string{ - constants.MD5DDAIDeploymentAnnotationKey: "2c3c1664f08fb6d6591294f2c878d1dd", + constants.MD5DDAIDeploymentAnnotationKey: "d868cbefaa12de8dfc8e94552dcf8528", } profileDDAI.Labels[constants.ProfileLabelKey] = "foo-profile" profileDDAI.Spec.Override = map[v2alpha1.ComponentName]*v2alpha1.DatadogAgentComponentOverride{ @@ -2095,7 +2095,7 @@ func getBaseDDAI(dda *v2alpha1.DatadogAgent) v1alpha1.DatadogAgentInternal { func getDefaultDDAI(dda *v2alpha1.DatadogAgent) v1alpha1.DatadogAgentInternal { expectedDDAI := getBaseDDAI(dda) expectedDDAI.Annotations = map[string]string{ - constants.MD5DDAIDeploymentAnnotationKey: "7e6c12e645247762609327ab80b63d9e", + constants.MD5DDAIDeploymentAnnotationKey: "1a68caa8fd645f09d034b2b97a61f543", } expectedDDAI.Spec.Override = map[v2alpha1.ComponentName]*v2alpha1.DatadogAgentComponentOverride{ v2alpha1.NodeAgentComponentName: { diff --git a/internal/controller/datadogagent/defaults/datadogagent_default.go b/internal/controller/datadogagent/defaults/datadogagent_default.go index 079d2d4164..faa470fca8 100644 --- a/internal/controller/datadogagent/defaults/datadogagent_default.go +++ b/internal/controller/datadogagent/defaults/datadogagent_default.go @@ -43,9 +43,6 @@ const ( defaultGPUMonitoringEnabled bool = false - defaultServiceDiscoveryEnabled bool = false - defaultServiceDiscoveryEnabledByDefault bool = false - defaultAPMEnabled bool = true defaultAPMHostPortEnabled bool = false defaultAPMHostPort int32 = 8126 @@ -305,12 +302,9 @@ func defaultFeaturesConfig(ddaSpec *v2alpha1.DatadogAgentSpec) { if ddaSpec.Features.ServiceDiscovery == nil { ddaSpec.Features.ServiceDiscovery = &v2alpha1.ServiceDiscoveryFeatureConfig{} } - apiutils.DefaultBooleanIfUnset(&ddaSpec.Features.ServiceDiscovery.EnabledByDefault, defaultServiceDiscoveryEnabledByDefault) - // Only default Enabled to false when not enabled-by-default, so that Enabled=nil remains - // distinguishable from Enabled=false (explicit user opt-out) when EnabledByDefault=true. - if !apiutils.BoolValue(ddaSpec.Features.ServiceDiscovery.EnabledByDefault) { - apiutils.DefaultBooleanIfUnset(&ddaSpec.Features.ServiceDiscovery.Enabled, defaultServiceDiscoveryEnabled) - } + // Enabled is intentionally not defaulted to false — Enabled=nil (not configured by user) + // must remain distinguishable from Enabled=false (explicit opt-out) so that the feature + // can be auto-activated by the operator in the future without exposing a CRD field. // GPU monitoring feature if ddaSpec.Features.GPU == nil { diff --git a/internal/controller/datadogagent/defaults/datadogagent_default_test.go b/internal/controller/datadogagent/defaults/datadogagent_default_test.go index b1706a484f..e4528cf222 100644 --- a/internal/controller/datadogagent/defaults/datadogagent_default_test.go +++ b/internal/controller/datadogagent/defaults/datadogagent_default_test.go @@ -213,10 +213,7 @@ func Test_defaultFeatures(t *testing.T) { EBPFCheck: &v2alpha1.EBPFCheckFeatureConfig{ Enabled: ptr.To(defaultEBPFCheckEnabled), }, - ServiceDiscovery: &v2alpha1.ServiceDiscoveryFeatureConfig{ - Enabled: ptr.To(defaultServiceDiscoveryEnabled), - EnabledByDefault: ptr.To(defaultServiceDiscoveryEnabledByDefault), - }, + ServiceDiscovery: &v2alpha1.ServiceDiscoveryFeatureConfig{}, GPU: &v2alpha1.GPUFeatureConfig{ Enabled: ptr.To(defaultGPUMonitoringEnabled), }, @@ -363,10 +360,7 @@ func Test_defaultFeatures(t *testing.T) { EBPFCheck: &v2alpha1.EBPFCheckFeatureConfig{ Enabled: ptr.To(defaultEBPFCheckEnabled), }, - ServiceDiscovery: &v2alpha1.ServiceDiscoveryFeatureConfig{ - Enabled: ptr.To(defaultServiceDiscoveryEnabled), - EnabledByDefault: ptr.To(defaultServiceDiscoveryEnabledByDefault), - }, + ServiceDiscovery: &v2alpha1.ServiceDiscoveryFeatureConfig{}, GPU: &v2alpha1.GPUFeatureConfig{ Enabled: ptr.To(defaultGPUMonitoringEnabled), }, @@ -463,10 +457,7 @@ func Test_defaultFeatures(t *testing.T) { EBPFCheck: &v2alpha1.EBPFCheckFeatureConfig{ Enabled: ptr.To(defaultEBPFCheckEnabled), }, - ServiceDiscovery: &v2alpha1.ServiceDiscoveryFeatureConfig{ - Enabled: ptr.To(defaultServiceDiscoveryEnabled), - EnabledByDefault: ptr.To(defaultServiceDiscoveryEnabledByDefault), - }, + ServiceDiscovery: &v2alpha1.ServiceDiscoveryFeatureConfig{}, GPU: &v2alpha1.GPUFeatureConfig{ Enabled: ptr.To(defaultGPUMonitoringEnabled), }, @@ -600,10 +591,7 @@ func Test_defaultFeatures(t *testing.T) { EBPFCheck: &v2alpha1.EBPFCheckFeatureConfig{ Enabled: ptr.To(defaultEBPFCheckEnabled), }, - ServiceDiscovery: &v2alpha1.ServiceDiscoveryFeatureConfig{ - Enabled: ptr.To(defaultServiceDiscoveryEnabled), - EnabledByDefault: ptr.To(defaultServiceDiscoveryEnabledByDefault), - }, + ServiceDiscovery: &v2alpha1.ServiceDiscoveryFeatureConfig{}, GPU: &v2alpha1.GPUFeatureConfig{ Enabled: ptr.To(defaultGPUMonitoringEnabled), }, @@ -762,10 +750,7 @@ func Test_defaultFeatures(t *testing.T) { EBPFCheck: &v2alpha1.EBPFCheckFeatureConfig{ Enabled: ptr.To(defaultEBPFCheckEnabled), }, - ServiceDiscovery: &v2alpha1.ServiceDiscoveryFeatureConfig{ - Enabled: ptr.To(defaultServiceDiscoveryEnabled), - EnabledByDefault: ptr.To(defaultServiceDiscoveryEnabledByDefault), - }, + ServiceDiscovery: &v2alpha1.ServiceDiscoveryFeatureConfig{}, GPU: &v2alpha1.GPUFeatureConfig{ Enabled: ptr.To(defaultGPUMonitoringEnabled), }, @@ -919,10 +904,7 @@ func Test_defaultFeatures(t *testing.T) { EBPFCheck: &v2alpha1.EBPFCheckFeatureConfig{ Enabled: ptr.To(defaultEBPFCheckEnabled), }, - ServiceDiscovery: &v2alpha1.ServiceDiscoveryFeatureConfig{ - Enabled: ptr.To(defaultServiceDiscoveryEnabled), - EnabledByDefault: ptr.To(defaultServiceDiscoveryEnabledByDefault), - }, + ServiceDiscovery: &v2alpha1.ServiceDiscoveryFeatureConfig{}, GPU: &v2alpha1.GPUFeatureConfig{ Enabled: ptr.To(defaultGPUMonitoringEnabled), }, @@ -1076,10 +1058,7 @@ func Test_defaultFeatures(t *testing.T) { EBPFCheck: &v2alpha1.EBPFCheckFeatureConfig{ Enabled: ptr.To(defaultEBPFCheckEnabled), }, - ServiceDiscovery: &v2alpha1.ServiceDiscoveryFeatureConfig{ - Enabled: ptr.To(defaultServiceDiscoveryEnabled), - EnabledByDefault: ptr.To(defaultServiceDiscoveryEnabledByDefault), - }, + ServiceDiscovery: &v2alpha1.ServiceDiscoveryFeatureConfig{}, GPU: &v2alpha1.GPUFeatureConfig{ Enabled: ptr.To(defaultGPUMonitoringEnabled), }, @@ -1242,10 +1221,7 @@ func Test_defaultFeatures(t *testing.T) { EBPFCheck: &v2alpha1.EBPFCheckFeatureConfig{ Enabled: ptr.To(defaultEBPFCheckEnabled), }, - ServiceDiscovery: &v2alpha1.ServiceDiscoveryFeatureConfig{ - Enabled: ptr.To(defaultServiceDiscoveryEnabled), - EnabledByDefault: ptr.To(defaultServiceDiscoveryEnabledByDefault), - }, + ServiceDiscovery: &v2alpha1.ServiceDiscoveryFeatureConfig{}, GPU: &v2alpha1.GPUFeatureConfig{ Enabled: ptr.To(defaultGPUMonitoringEnabled), }, @@ -1399,10 +1375,7 @@ func Test_defaultFeatures(t *testing.T) { EBPFCheck: &v2alpha1.EBPFCheckFeatureConfig{ Enabled: ptr.To(defaultEBPFCheckEnabled), }, - ServiceDiscovery: &v2alpha1.ServiceDiscoveryFeatureConfig{ - Enabled: ptr.To(defaultServiceDiscoveryEnabled), - EnabledByDefault: ptr.To(defaultServiceDiscoveryEnabledByDefault), - }, + ServiceDiscovery: &v2alpha1.ServiceDiscoveryFeatureConfig{}, GPU: &v2alpha1.GPUFeatureConfig{ Enabled: ptr.To(defaultGPUMonitoringEnabled), }, @@ -1559,10 +1532,7 @@ func Test_defaultFeatures(t *testing.T) { EBPFCheck: &v2alpha1.EBPFCheckFeatureConfig{ Enabled: ptr.To(defaultEBPFCheckEnabled), }, - ServiceDiscovery: &v2alpha1.ServiceDiscoveryFeatureConfig{ - Enabled: ptr.To(defaultServiceDiscoveryEnabled), - EnabledByDefault: ptr.To(defaultServiceDiscoveryEnabledByDefault), - }, + ServiceDiscovery: &v2alpha1.ServiceDiscoveryFeatureConfig{}, GPU: &v2alpha1.GPUFeatureConfig{ Enabled: ptr.To(defaultGPUMonitoringEnabled), }, @@ -1762,10 +1732,7 @@ func Test_defaultFeatures(t *testing.T) { EBPFCheck: &v2alpha1.EBPFCheckFeatureConfig{ Enabled: ptr.To(defaultEBPFCheckEnabled), }, - ServiceDiscovery: &v2alpha1.ServiceDiscoveryFeatureConfig{ - Enabled: ptr.To(defaultServiceDiscoveryEnabled), - EnabledByDefault: ptr.To(defaultServiceDiscoveryEnabledByDefault), - }, + ServiceDiscovery: &v2alpha1.ServiceDiscoveryFeatureConfig{}, GPU: &v2alpha1.GPUFeatureConfig{ Enabled: ptr.To(defaultGPUMonitoringEnabled), }, @@ -1888,10 +1855,7 @@ func Test_defaultFeatures(t *testing.T) { EBPFCheck: &v2alpha1.EBPFCheckFeatureConfig{ Enabled: ptr.To(defaultEBPFCheckEnabled), }, - ServiceDiscovery: &v2alpha1.ServiceDiscoveryFeatureConfig{ - Enabled: ptr.To(defaultServiceDiscoveryEnabled), - EnabledByDefault: ptr.To(defaultServiceDiscoveryEnabledByDefault), - }, + ServiceDiscovery: &v2alpha1.ServiceDiscoveryFeatureConfig{}, GPU: &v2alpha1.GPUFeatureConfig{ Enabled: ptr.To(defaultGPUMonitoringEnabled), }, @@ -2046,10 +2010,7 @@ func Test_defaultFeatures(t *testing.T) { EBPFCheck: &v2alpha1.EBPFCheckFeatureConfig{ Enabled: ptr.To(defaultEBPFCheckEnabled), }, - ServiceDiscovery: &v2alpha1.ServiceDiscoveryFeatureConfig{ - Enabled: ptr.To(defaultServiceDiscoveryEnabled), - EnabledByDefault: ptr.To(defaultServiceDiscoveryEnabledByDefault), - }, + ServiceDiscovery: &v2alpha1.ServiceDiscoveryFeatureConfig{}, GPU: &v2alpha1.GPUFeatureConfig{ Enabled: ptr.To(defaultGPUMonitoringEnabled), }, @@ -2227,10 +2188,7 @@ func Test_defaultFeatures(t *testing.T) { EBPFCheck: &v2alpha1.EBPFCheckFeatureConfig{ Enabled: ptr.To(defaultEBPFCheckEnabled), }, - ServiceDiscovery: &v2alpha1.ServiceDiscoveryFeatureConfig{ - Enabled: ptr.To(defaultServiceDiscoveryEnabled), - EnabledByDefault: ptr.To(defaultServiceDiscoveryEnabledByDefault), - }, + ServiceDiscovery: &v2alpha1.ServiceDiscoveryFeatureConfig{}, GPU: &v2alpha1.GPUFeatureConfig{ Enabled: ptr.To(defaultGPUMonitoringEnabled), }, @@ -2387,10 +2345,7 @@ func Test_defaultFeatures(t *testing.T) { EBPFCheck: &v2alpha1.EBPFCheckFeatureConfig{ Enabled: ptr.To(defaultEBPFCheckEnabled), }, - ServiceDiscovery: &v2alpha1.ServiceDiscoveryFeatureConfig{ - Enabled: ptr.To(defaultServiceDiscoveryEnabled), - EnabledByDefault: ptr.To(defaultServiceDiscoveryEnabledByDefault), - }, + ServiceDiscovery: &v2alpha1.ServiceDiscoveryFeatureConfig{}, GPU: &v2alpha1.GPUFeatureConfig{ Enabled: ptr.To(defaultGPUMonitoringEnabled), }, @@ -2561,8 +2516,7 @@ func Test_defaultFeatures(t *testing.T) { Enabled: ptr.To(defaultEBPFCheckEnabled), }, ServiceDiscovery: &v2alpha1.ServiceDiscoveryFeatureConfig{ - Enabled: ptr.To(valueTrue), - EnabledByDefault: ptr.To(defaultServiceDiscoveryEnabledByDefault), + Enabled: ptr.To(valueTrue), }, GPU: &v2alpha1.GPUFeatureConfig{ Enabled: ptr.To(defaultGPUMonitoringEnabled), diff --git a/internal/controller/datadogagent/feature/factory.go b/internal/controller/datadogagent/feature/factory.go index 3086c2beae..7299683e0e 100644 --- a/internal/controller/datadogagent/feature/factory.go +++ b/internal/controller/datadogagent/feature/factory.go @@ -20,33 +20,6 @@ func init() { featureBuilders = map[IDType]BuildFunc{} } -// SystemProbeContainerRequiredByFeatures returns true if any registered feature -// other than excludeID declares SystemProbeContainerName in its RequiredComponents -// for the given DDA configuration. -// -// Because it iterates the feature registry, it is automatically correct when new -// system-probe features are added — they self-register via their package init(). -func SystemProbeContainerRequiredByFeatures(dda metav1.Object, ddaSpec *v2alpha1.DatadogAgentSpec, ddaRCStatus *v2alpha1.RemoteConfigConfiguration, excludeID IDType) bool { - builderMutex.RLock() - defer builderMutex.RUnlock() - - // Deep-copy the spec so that side-effecting Configure calls (e.g. USM merging - // Remote Config state) do not mutate the live spec. - specCopy := ddaSpec.DeepCopy() - - for id, buildFunc := range featureBuilders { - if id == excludeID { - continue - } - feat := buildFunc(&Options{}) - reqComp := feat.Configure(dda, specCopy, ddaRCStatus) - if slices.Contains(reqComp.Agent.Containers, common.SystemProbeContainerName) { - return true - } - } - return false -} - // Register use to register a Feature to the Feature factory. func Register(id IDType, buildFunc BuildFunc) error { builderMutex.Lock() diff --git a/internal/controller/datadogagent/feature/servicediscovery/feature.go b/internal/controller/datadogagent/feature/servicediscovery/feature.go index b6574823a4..c741a08e7e 100644 --- a/internal/controller/datadogagent/feature/servicediscovery/feature.go +++ b/internal/controller/datadogagent/feature/servicediscovery/feature.go @@ -33,12 +33,10 @@ func buildFeature(*feature.Options) feature.Feature { type serviceDiscoveryFeature struct { userExplicitlyEnabled bool - // dda, ddaSpec, ddaRCStatus are stored so that ManageNodeAgent can call - // feature.SystemProbeContainerRequiredByFeatures after all Configure calls have - // completed (including Remote Config state merges by other features such as USM). - dda metav1.Object - ddaSpec *v2alpha1.DatadogAgentSpec - ddaRCStatus *v2alpha1.RemoteConfigConfiguration + // features holds a pointer to the live DDA features struct so that ManageNodeAgent + // can re-evaluate hasOtherSystemProbeFeatures after Remote Config state has been + // merged by other features' Configure calls (e.g. USM merges RC state into the spec). + features *v2alpha1.DatadogFeatures } // ID returns the ID of the Feature @@ -47,19 +45,15 @@ func (f *serviceDiscoveryFeature) ID() feature.IDType { } // Configure is used to configure the feature from a v2alpha1.DatadogAgent instance. -func (f *serviceDiscoveryFeature) Configure(dda metav1.Object, ddaSpec *v2alpha1.DatadogAgentSpec, ddaRCStatus *v2alpha1.RemoteConfigConfiguration) (reqComp feature.RequiredComponents) { +func (f *serviceDiscoveryFeature) Configure(_ metav1.Object, ddaSpec *v2alpha1.DatadogAgentSpec, _ *v2alpha1.RemoteConfigConfiguration) (reqComp feature.RequiredComponents) { if ddaSpec.Features == nil || ddaSpec.Features.ServiceDiscovery == nil { return reqComp } sd := ddaSpec.Features.ServiceDiscovery - // Explicit Enabled=false always disables the feature, even if EnabledByDefault=true. - if sd.Enabled != nil && !*sd.Enabled { - return reqComp - } - // Feature requires either an explicit opt-in or a default enablement. - if !apiutils.BoolValue(sd.Enabled) && !apiutils.BoolValue(sd.EnabledByDefault) { + // Enabled=nil (not configured) or Enabled=false (explicit opt-out) → disabled. + if !apiutils.BoolValue(sd.Enabled) { return reqComp } @@ -68,10 +62,9 @@ func (f *serviceDiscoveryFeature) Configure(dda metav1.Object, ddaSpec *v2alpha1 Containers: []apicommon.AgentContainerName{apicommon.CoreAgentContainerName, apicommon.SystemProbeContainerName}, } - f.dda = dda - f.ddaSpec = ddaSpec - f.ddaRCStatus = ddaRCStatus - f.userExplicitlyEnabled = apiutils.BoolValue(sd.Enabled) + f.features = ddaSpec.Features + // True only when Enabled=true (user opted in); nil means operator auto-enabled (future path). + f.userExplicitlyEnabled = sd.Enabled != nil && *sd.Enabled return reqComp } @@ -89,6 +82,22 @@ func systemProbeLiteCommand(socketPath string, userOptedIn bool) string { return fmt.Sprintf("system-probe-lite run --socket %s --log-level ${DD_LOG_LEVEL:-info} || %s", socketPath, fallback) } +// hasOtherSystemProbeFeatures returns true if any feature besides service discovery +// requires the full system-probe binary. When true, system-probe-lite cannot be used. +func hasOtherSystemProbeFeatures(features *v2alpha1.DatadogFeatures) bool { + if features == nil { + return false + } + return (features.NPM != nil && apiutils.BoolValue(features.NPM.Enabled)) || + (features.CWS != nil && apiutils.BoolValue(features.CWS.Enabled)) || + (features.CSPM != nil && apiutils.BoolValue(features.CSPM.Enabled) && apiutils.BoolValue(features.CSPM.RunInSystemProbe)) || + (features.USM != nil && apiutils.BoolValue(features.USM.Enabled)) || + (features.OOMKill != nil && apiutils.BoolValue(features.OOMKill.Enabled)) || + (features.TCPQueueLength != nil && apiutils.BoolValue(features.TCPQueueLength.Enabled)) || + (features.EBPFCheck != nil && apiutils.BoolValue(features.EBPFCheck.Enabled)) || + (features.GPU != nil && apiutils.BoolValue(features.GPU.Enabled) && apiutils.BoolValue(features.GPU.PrivilegedMode)) +} + // ManageDependencies allows a feature to manage its dependencies. // Feature's dependencies should be added in the store. func (f *serviceDiscoveryFeature) ManageDependencies(managers feature.ResourceManagers, provider string) error { @@ -145,9 +154,9 @@ func (f *serviceDiscoveryFeature) ManageNodeAgent(managers feature.PodTemplateMa managers.EnvVar().AddEnvVarToContainer(apicommon.SystemProbeContainerName, socketEnvVar) // Direct PodTemplateSpec mutation: no managers API for command overrides. - // Evaluated here (not during Configure) so that Remote Config state merged by - // other features' Configure calls (e.g. USM) is taken into account. - if !feature.SystemProbeContainerRequiredByFeatures(f.dda, f.ddaSpec, f.ddaRCStatus, feature.ServiceDiscoveryType) { + // Re-evaluate here (not cached from Configure) so that RC state merged by other + // features' Configure calls (e.g. USM) is taken into account. + if !hasOtherSystemProbeFeatures(f.features) { for i := range managers.PodTemplateSpec().Spec.Containers { c := &managers.PodTemplateSpec().Spec.Containers[i] if c.Name == string(apicommon.SystemProbeContainerName) { diff --git a/internal/controller/datadogagent/feature/servicediscovery/feature_test.go b/internal/controller/datadogagent/feature/servicediscovery/feature_test.go index f2c8d7544f..9a1a8c8ca5 100644 --- a/internal/controller/datadogagent/feature/servicediscovery/feature_test.go +++ b/internal/controller/datadogagent/feature/servicediscovery/feature_test.go @@ -22,12 +22,6 @@ import ( "github.com/DataDog/datadog-operator/internal/controller/datadogagent/feature" "github.com/DataDog/datadog-operator/internal/controller/datadogagent/feature/fake" "github.com/DataDog/datadog-operator/internal/controller/datadogagent/feature/test" - - // Blank imports trigger the init() of these packages, registering them in the - // feature factory. Tests that check SPL is suppressed when these features are - // active require them to be registered. - _ "github.com/DataDog/datadog-operator/internal/controller/datadogagent/feature/cws" - _ "github.com/DataDog/datadog-operator/internal/controller/datadogagent/feature/npm" ) func Test_serviceDiscoveryFeature_Configure(t *testing.T) { @@ -69,16 +63,6 @@ func Test_serviceDiscoveryFeature_Configure(t *testing.T) { }, } - ddaEnabledByDefault := v2alpha1.DatadogAgent{ - Spec: v2alpha1.DatadogAgentSpec{ - Features: &v2alpha1.DatadogFeatures{ - ServiceDiscovery: &v2alpha1.ServiceDiscoveryFeatureConfig{ - EnabledByDefault: ptr.To(true), - }, - }, - }, - } - tests := test.FeatureTestSuite{ { Name: "service discovery not enabled", @@ -94,36 +78,127 @@ func Test_serviceDiscoveryFeature_Configure(t *testing.T) { WithWantFunc(getWantFunc(true, true)), }, { - Name: "system-probe-lite enabled by default - no system-probe fallback", - DDA: &ddaEnabledByDefault, + Name: "system-probe-lite not used when NPM also enabled", + DDA: &ddaWithNPM, WantConfigure: true, Agent: test.NewDefaultComponentTest(). WithCreateFunc(createFuncWithSystemProbeContainer()). - WithWantFunc(getWantFunc(true, false)), + WithWantFunc(getWantFunc(false, true)), + }, + { + Name: "system-probe-lite not used when CWS also enabled", + DDA: &ddaWithCWS, + WantConfigure: true, + Agent: test.NewDefaultComponentTest(). + WithCreateFunc(createFuncWithSystemProbeContainer()). + WithWantFunc(getWantFunc(false, true)), }, } tests.Run(t, buildFeature) +} - // These cases involve multiple registered features (npm, cws are imported via blank imports - // so they register in the factory). FeatureTestSuite is designed for one feature at a time, - // so we test service discovery's ManageNodeAgent output directly here. - for _, tc := range []struct { - name string - dda *v2alpha1.DatadogAgent +func Test_hasOtherSystemProbeFeatures(t *testing.T) { + tests := []struct { + name string + features *v2alpha1.DatadogFeatures + want bool }{ - {"system-probe-lite not used when NPM also enabled", &ddaWithNPM}, - {"system-probe-lite not used when CWS also enabled", &ddaWithCWS}, - } { - t.Run(tc.name, func(t *testing.T) { - feat := buildFeature(nil) - reqComp := feat.Configure(tc.dda, &tc.dda.Spec, tc.dda.Status.RemoteConfigConfiguration) - assert.True(t, reqComp.IsEnabled()) - - tplManager, provider := createFuncWithSystemProbeContainer()(t) - assert.NoError(t, feat.ManageNodeAgent(tplManager, provider)) + { + name: "nil features", + features: nil, + want: false, + }, + { + name: "no other features", + features: &v2alpha1.DatadogFeatures{}, + want: false, + }, + { + name: "NPM enabled", + features: &v2alpha1.DatadogFeatures{ + NPM: &v2alpha1.NPMFeatureConfig{Enabled: ptr.To(true)}, + }, + want: true, + }, + { + name: "CWS enabled", + features: &v2alpha1.DatadogFeatures{ + CWS: &v2alpha1.CWSFeatureConfig{Enabled: ptr.To(true)}, + }, + want: true, + }, + { + name: "USM enabled", + features: &v2alpha1.DatadogFeatures{ + USM: &v2alpha1.USMFeatureConfig{Enabled: ptr.To(true)}, + }, + want: true, + }, + { + name: "OOMKill enabled", + features: &v2alpha1.DatadogFeatures{ + OOMKill: &v2alpha1.OOMKillFeatureConfig{Enabled: ptr.To(true)}, + }, + want: true, + }, + { + name: "TCPQueueLength enabled", + features: &v2alpha1.DatadogFeatures{ + TCPQueueLength: &v2alpha1.TCPQueueLengthFeatureConfig{Enabled: ptr.To(true)}, + }, + want: true, + }, + { + name: "EBPFCheck enabled", + features: &v2alpha1.DatadogFeatures{ + EBPFCheck: &v2alpha1.EBPFCheckFeatureConfig{Enabled: ptr.To(true)}, + }, + want: true, + }, + { + name: "CSPM enabled with RunInSystemProbe", + features: &v2alpha1.DatadogFeatures{ + CSPM: &v2alpha1.CSPMFeatureConfig{ + Enabled: ptr.To(true), + RunInSystemProbe: ptr.To(true), + }, + }, + want: true, + }, + { + name: "CSPM enabled without RunInSystemProbe", + features: &v2alpha1.DatadogFeatures{ + CSPM: &v2alpha1.CSPMFeatureConfig{ + Enabled: ptr.To(true), + }, + }, + want: false, + }, + { + name: "GPU enabled with PrivilegedMode", + features: &v2alpha1.DatadogFeatures{ + GPU: &v2alpha1.GPUFeatureConfig{ + Enabled: ptr.To(true), + PrivilegedMode: ptr.To(true), + }, + }, + want: true, + }, + { + name: "GPU enabled without PrivilegedMode", + features: &v2alpha1.DatadogFeatures{ + GPU: &v2alpha1.GPUFeatureConfig{ + Enabled: ptr.To(true), + }, + }, + want: false, + }, + } - getWantFunc(false, true)(t, tplManager) + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + assert.Equal(t, tt.want, hasOtherSystemProbeFeatures(tt.features)) }) } }