From d6fcf3e19f9a24317977867ce6ae495c3c79dd2f Mon Sep 17 00:00:00 2001 From: kristofhetenyi Date: Tue, 27 Feb 2024 13:09:47 +0100 Subject: [PATCH 1/4] Add GetEnvOrDefault utils fnc --- internal/utils/env.go | 11 +++++++++++ internal/utils/env_test.go | 29 +++++++++++++++++++++++++++++ 2 files changed, 40 insertions(+) create mode 100644 internal/utils/env.go create mode 100644 internal/utils/env_test.go diff --git a/internal/utils/env.go b/internal/utils/env.go new file mode 100644 index 0000000..0604c10 --- /dev/null +++ b/internal/utils/env.go @@ -0,0 +1,11 @@ +package utils + +import "os" + +// GetEnvOrDefault returns the value of the environment variable with the given key, or the given default value if the env is not set. +func GetEnvOrDefault(key, defaultValue string) string { + if value, ok := os.LookupEnv(key); ok { + return value + } + return defaultValue +} diff --git a/internal/utils/env_test.go b/internal/utils/env_test.go new file mode 100644 index 0000000..2fa75cd --- /dev/null +++ b/internal/utils/env_test.go @@ -0,0 +1,29 @@ +package utils + +import ( + "github.com/stretchr/testify/assert" + "os" + "testing" +) + +func TestGetEnvOrDefault(t *testing.T) { + // Set an environment variable for testing + err := os.Setenv("TEST_KEY", "TEST_VALUE") + assert.Nil(t, err) + + // Test case when the environment variable exists + value := GetEnvOrDefault("TEST_KEY", "DEFAULT_VALUE") + if value != "TEST_VALUE" { + t.Errorf("Expected TEST_VALUE, but got %s", value) + } + + // Test case when the environment variable does not exist + value = GetEnvOrDefault("NON_EXISTENT_KEY", "DEFAULT_VALUE") + if value != "DEFAULT_VALUE" { + t.Errorf("Expected DEFAULT_VALUE, but got %s", value) + } + + // Unset the environment variable after testing + err = os.Unsetenv("TEST_KEY") + assert.Nil(t, err) +} From 209448eebb3093899f1b5940b4f067a548bf56e2 Mon Sep 17 00:00:00 2001 From: kristofhetenyi Date: Tue, 27 Feb 2024 13:24:00 +0100 Subject: [PATCH 2/4] Add Custom label and pool name option --- internal/status/status.go | 15 ++++++++++++++- internal/status/status_test.go | 9 +++++++++ 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/internal/status/status.go b/internal/status/status.go index 3cd046b..fa1b34b 100644 --- a/internal/status/status.go +++ b/internal/status/status.go @@ -2,6 +2,8 @@ package status import ( "fmt" + "github.com/xenitab/node-ttl/internal/utils" + "os" "regexp" "strconv" "strings" @@ -15,6 +17,8 @@ const ( KubemarkNodePoolLabelKey = "autoscaling.k8s.io/nodegroup" ) +var CustomNodeLabelKey = os.Getenv("CUSTOM_NODE_POOL_LABEL_KEY") + func HasScaleDownCapacity(status string, node *corev1.Node) (bool, error) { nodePoolName, err := getNodePoolName(node) if err != nil { @@ -31,7 +35,11 @@ func HasScaleDownCapacity(status string, node *corev1.Node) (bool, error) { } func getNodePoolLabelKeys() []string { - return []string{AzureNodePoolLabelKey, AWSNodePoolLabelKey, KubemarkNodePoolLabelKey} + defaultKey := []string{AzureNodePoolLabelKey, AWSNodePoolLabelKey, KubemarkNodePoolLabelKey} + if CustomNodeLabelKey != "" { + return append(defaultKey, CustomNodeLabelKey) + } + return defaultKey } func getNodePoolName(node *corev1.Node) (string, error) { @@ -54,6 +62,11 @@ func getNodePoolName(node *corev1.Node) (string, error) { // The name is however, predicatable as it will be the same as the EKS node pool name with an additional UUID as a // suffix. This is why the UUID regex has to be appended to the end. nodePoolName = fmt.Sprintf("eks-%s-[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}", nodePoolName) + case CustomNodeLabelKey: + // To be able to configure any kind of node pool label and name, a custom label format can be set. + //That is using Sprintf formatting to provide the regex. + nodePoolName = fmt.Sprintf(utils.GetEnvOrDefault("CUSTOM_NODE_POOL_NAME_FORMAT", "%s"), nodePoolName) + } return nodePoolName, nil } diff --git a/internal/status/status_test.go b/internal/status/status_test.go index 571bce8..6663a6e 100644 --- a/internal/status/status_test.go +++ b/internal/status/status_test.go @@ -172,6 +172,15 @@ func getNodePoolNameAndNode(t *testing.T, cp string, name string) (*corev1.Node, }, }, }, name + case CustomNodeLabelKey: + return &corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Labels: map[string]string{ + CustomNodeLabelKey: name, + }, + }, + }, fmt.Sprintf("custom-%s", name) default: t.Fatal("unknown key") return nil, "" From 297c436e4bf68026b883745203e6161eaa46a1b6 Mon Sep 17 00:00:00 2001 From: kristofhetenyi Date: Tue, 27 Feb 2024 13:41:27 +0100 Subject: [PATCH 3/4] Add Custom label and pool name docs --- README.md | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/README.md b/README.md index 7b643ef..4f9d7af 100644 --- a/README.md +++ b/README.md @@ -69,6 +69,23 @@ metadata: cluster-autoscaler.kubernetes.io/safe-to-evict: false ``` +### Adding Custom Node Pool Label key And Pool name + +If the node pool label key and pool name are different from the pre-defined values, +you can set the `customNodeLabelKey` and `customNodePoolNameFormat` values in the `values.yaml` file. + +```yaml +customNodeLabelKey: "cloud.google.com/gke-nodepool" # Required +customNodePoolNameFormat: "example-%s" # Optional (default: "%s") +``` + +or use the env variables `CUSTOM_NODE_POOL_LABEL_KEY` and `CUSTOM_NODE_POOL_NAME_FORMAT` to set the values. + +```shell +CUSTOM_NODE_POOL_LABEL_KEY=cloud.google.com/gke-nodepool +CUSTOM_NODE_POOL_NAME_FORMAT=example-%s +``` + ### Cluster Autoscaler Status A node pool where the min count is equal to the current node count will node be scaled down by cluster autoscaler. Even if the node is completely unused and a scale down candidate. This is because the cluster austoscaler has to fulfill the minum count requirement. This is an issue for Node TTL as it relies on cluster autoscaler node removal to replace nodes. If a node in this case were to be cordoned and drained the node would get stuck forever without any Pods scheduled to it. In a perfect world cluster autoscaler would allow the node removal and create a new node or alternativly preemptivly add a new node to the node pool. From fab096588c06831381b0f3a92f866dcf2186b95f Mon Sep 17 00:00:00 2001 From: kristofhetenyi Date: Tue, 27 Feb 2024 13:52:01 +0100 Subject: [PATCH 4/4] Add Custom label and pool name to Helm --- charts/node-ttl/Chart.yaml | 4 +-- charts/node-ttl/README.md | 40 ++++++++++++----------- charts/node-ttl/templates/deployment.yaml | 11 +++++++ charts/node-ttl/values.yaml | 7 +++- 4 files changed, 40 insertions(+), 22 deletions(-) diff --git a/charts/node-ttl/Chart.yaml b/charts/node-ttl/Chart.yaml index 1cbd145..20069a4 100644 --- a/charts/node-ttl/Chart.yaml +++ b/charts/node-ttl/Chart.yaml @@ -2,5 +2,5 @@ apiVersion: v2 name: node-ttl description: Enforces a time to live (TTL) on Kubernetes nodes and evicts nodes which have expired. type: application -version: 0.1.0 -appVersion: 0.1.0 +version: 0.1.1 +appVersion: 0.1.1 diff --git a/charts/node-ttl/README.md b/charts/node-ttl/README.md index 40fbdf8..947ce8e 100644 --- a/charts/node-ttl/README.md +++ b/charts/node-ttl/README.md @@ -4,22 +4,24 @@ Enforces a time to live (TTL) on Kubernetes nodes and evicts nodes which have ex ## Values -| Key | Type | Default | Description | -|-----|------|---------|-------------| -| affinity | object | `{}` | | -| fullnameOverride | string | `""` | | -| image.pullPolicy | string | `"IfNotPresent"` | | -| image.repository | string | `"ghcr.io/xenitab/node-ttl"` | | -| image.tag | string | `""` | | -| imagePullSecrets | list | `[]` | | -| nameOverride | string | `""` | | -| nodeSelector | object | `{}` | | -| nodeTtl.interval | string | `"10m"` | | -| podAnnotations | object | `{}` | | -| podSecurityContext.seccompProfile.type | string | `"RuntimeDefault"` | | -| resources | object | `{}` | | -| securityContext.capabilities.drop[0] | string | `"ALL"` | | -| securityContext.readOnlyRootFilesystem | bool | `true` | | -| securityContext.runAsNonRoot | bool | `true` | | -| securityContext.runAsUser | int | `65532` | | -| tolerations | list | `[]` | | +| Key | Type | Default | Description | +|----------------------------------------|--------|------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| affinity | object | `{}` | | +| fullnameOverride | string | `""` | | +| image.pullPolicy | string | `"IfNotPresent"` | | +| image.repository | string | `"ghcr.io/xenitab/node-ttl"` | | +| image.tag | string | `""` | | +| imagePullSecrets | list | `[]` | | +| nameOverride | string | `""` | | +| nodeSelector | object | `{}` | | +| nodeTtl.interval | string | `"10m"` | | +| podAnnotations | object | `{}` | | +| podSecurityContext.seccompProfile.type | string | `"RuntimeDefault"` | | +| resources | object | `{}` | | +| securityContext.capabilities.drop[0] | string | `"ALL"` | | +| securityContext.readOnlyRootFilesystem | bool | `true` | | +| securityContext.runAsNonRoot | bool | `true` | | +| securityContext.runAsUser | int | `65532` | | +| tolerations | list | `[]` | | +| customNodeLabelKey | string | `` | Must be set to a non-empty value to enable custom node label | +| customNodePoolNameFormat | string | `` | It should be a format string with one %s to be replaced by the custom node label value (e.g. "node-pool-%s") with optional regexp to match the node pool name against (e.g. "node-pool-(.*)") | diff --git a/charts/node-ttl/templates/deployment.yaml b/charts/node-ttl/templates/deployment.yaml index 610bd29..5d2d798 100644 --- a/charts/node-ttl/templates/deployment.yaml +++ b/charts/node-ttl/templates/deployment.yaml @@ -28,6 +28,17 @@ spec: - name: {{ .Chart.Name }} image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" imagePullPolicy: {{ .Values.image.pullPolicy }} + {{- if .Values.customNodeLabelKey }} + env: + {{- if .Values.customNodeLabelKey }} + - name: CUSTOM_NODE_POOL_LABEL_KEY + value: {{ .Values.customNodeLabelKey }} + {{- end }} + {{- if .Values.customNodePoolNameFormat }} + - name: CUSTOM_NODE_POOL_NAME_FORMAT + value: {{ .Values.customNodePoolNameFormat }} + {{- end }} + {{- end }} args: - --probe-addr=:{{ .Values.service.probe.port }} - --metrics-addr=:{{ .Values.service.metrics.port }} diff --git a/charts/node-ttl/values.yaml b/charts/node-ttl/values.yaml index 3738140..3c6dcf7 100644 --- a/charts/node-ttl/values.yaml +++ b/charts/node-ttl/values.yaml @@ -45,4 +45,9 @@ networkPolicy: nodeTtl: interval: 10m statusConfigMapName: cluster-autoscaler-status - statusConfigMapNamespace: cluster-autoscaler \ No newline at end of file + statusConfigMapNamespace: cluster-autoscaler + +customNodeLabelKey: "" # Must be set to a non-empty value to enable custom node label +# it should be a format string with one %s to be replaced by the custom node label value (e.g. "node-pool-%s") +# with optional regexp to match the node pool name against (e.g. "node-pool-(.*)") +customNodePoolNameFormat: "" # default: "%s"