Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,23 @@ metadata:
cluster-autoscaler.kubernetes.io/safe-to-evict: false
```

### Adding Custom Node Pool Label key And Pool name

If the node pool label key and pool name are different from the pre-defined values,
you can set the `customNodeLabelKey` and `customNodePoolNameFormat` values in the `values.yaml` file.

```yaml
customNodeLabelKey: "cloud.google.com/gke-nodepool" # Required
customNodePoolNameFormat: "example-%s" # Optional (default: "%s")
```

or use the env variables `CUSTOM_NODE_POOL_LABEL_KEY` and `CUSTOM_NODE_POOL_NAME_FORMAT` to set the values.

```shell
CUSTOM_NODE_POOL_LABEL_KEY=cloud.google.com/gke-nodepool
CUSTOM_NODE_POOL_NAME_FORMAT=example-%s
```

### Cluster Autoscaler Status

A node pool where the min count is equal to the current node count will node be scaled down by cluster autoscaler. Even if the node is completely unused and a scale down candidate. This is because the cluster austoscaler has to fulfill the minum count requirement. This is an issue for Node TTL as it relies on cluster autoscaler node removal to replace nodes. If a node in this case were to be cordoned and drained the node would get stuck forever without any Pods scheduled to it. In a perfect world cluster autoscaler would allow the node removal and create a new node or alternativly preemptivly add a new node to the node pool.
Expand Down
4 changes: 2 additions & 2 deletions charts/node-ttl/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,5 @@ apiVersion: v2
name: node-ttl
description: Enforces a time to live (TTL) on Kubernetes nodes and evicts nodes which have expired.
type: application
version: 0.1.0
appVersion: 0.1.0
version: 0.1.1
appVersion: 0.1.1
40 changes: 21 additions & 19 deletions charts/node-ttl/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,22 +4,24 @@ Enforces a time to live (TTL) on Kubernetes nodes and evicts nodes which have ex

## Values

| Key | Type | Default | Description |
|-----|------|---------|-------------|
| affinity | object | `{}` | |
| fullnameOverride | string | `""` | |
| image.pullPolicy | string | `"IfNotPresent"` | |
| image.repository | string | `"ghcr.io/xenitab/node-ttl"` | |
| image.tag | string | `""` | |
| imagePullSecrets | list | `[]` | |
| nameOverride | string | `""` | |
| nodeSelector | object | `{}` | |
| nodeTtl.interval | string | `"10m"` | |
| podAnnotations | object | `{}` | |
| podSecurityContext.seccompProfile.type | string | `"RuntimeDefault"` | |
| resources | object | `{}` | |
| securityContext.capabilities.drop[0] | string | `"ALL"` | |
| securityContext.readOnlyRootFilesystem | bool | `true` | |
| securityContext.runAsNonRoot | bool | `true` | |
| securityContext.runAsUser | int | `65532` | |
| tolerations | list | `[]` | |
| Key | Type | Default | Description |
|----------------------------------------|--------|------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| affinity | object | `{}` | |
| fullnameOverride | string | `""` | |
| image.pullPolicy | string | `"IfNotPresent"` | |
| image.repository | string | `"ghcr.io/xenitab/node-ttl"` | |
| image.tag | string | `""` | |
| imagePullSecrets | list | `[]` | |
| nameOverride | string | `""` | |
| nodeSelector | object | `{}` | |
| nodeTtl.interval | string | `"10m"` | |
| podAnnotations | object | `{}` | |
| podSecurityContext.seccompProfile.type | string | `"RuntimeDefault"` | |
| resources | object | `{}` | |
| securityContext.capabilities.drop[0] | string | `"ALL"` | |
| securityContext.readOnlyRootFilesystem | bool | `true` | |
| securityContext.runAsNonRoot | bool | `true` | |
| securityContext.runAsUser | int | `65532` | |
| tolerations | list | `[]` | |
| customNodeLabelKey | string | `` | Must be set to a non-empty value to enable custom node label |
| customNodePoolNameFormat | string | `` | It should be a format string with one %s to be replaced by the custom node label value (e.g. "node-pool-%s") with optional regexp to match the node pool name against (e.g. "node-pool-(.*)") |
11 changes: 11 additions & 0 deletions charts/node-ttl/templates/deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,17 @@ spec:
- name: {{ .Chart.Name }}
image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
imagePullPolicy: {{ .Values.image.pullPolicy }}
{{- if .Values.customNodeLabelKey }}
env:
{{- if .Values.customNodeLabelKey }}
- name: CUSTOM_NODE_POOL_LABEL_KEY
value: {{ .Values.customNodeLabelKey }}
{{- end }}
{{- if .Values.customNodePoolNameFormat }}
- name: CUSTOM_NODE_POOL_NAME_FORMAT
value: {{ .Values.customNodePoolNameFormat }}
{{- end }}
{{- end }}
args:
- --probe-addr=:{{ .Values.service.probe.port }}
- --metrics-addr=:{{ .Values.service.metrics.port }}
Expand Down
7 changes: 6 additions & 1 deletion charts/node-ttl/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -45,4 +45,9 @@ networkPolicy:
nodeTtl:
interval: 10m
statusConfigMapName: cluster-autoscaler-status
statusConfigMapNamespace: cluster-autoscaler
statusConfigMapNamespace: cluster-autoscaler

customNodeLabelKey: "" # Must be set to a non-empty value to enable custom node label
# it should be a format string with one %s to be replaced by the custom node label value (e.g. "node-pool-%s")
# with optional regexp to match the node pool name against (e.g. "node-pool-(.*)")
customNodePoolNameFormat: "" # default: "%s"
15 changes: 14 additions & 1 deletion internal/status/status.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ package status

import (
"fmt"
"github.com/xenitab/node-ttl/internal/utils"
"os"
"regexp"
"strconv"
"strings"
Expand All @@ -15,6 +17,8 @@ const (
KubemarkNodePoolLabelKey = "autoscaling.k8s.io/nodegroup"
)

var CustomNodeLabelKey = os.Getenv("CUSTOM_NODE_POOL_LABEL_KEY")

func HasScaleDownCapacity(status string, node *corev1.Node) (bool, error) {
nodePoolName, err := getNodePoolName(node)
if err != nil {
Expand All @@ -31,7 +35,11 @@ func HasScaleDownCapacity(status string, node *corev1.Node) (bool, error) {
}

func getNodePoolLabelKeys() []string {
return []string{AzureNodePoolLabelKey, AWSNodePoolLabelKey, KubemarkNodePoolLabelKey}
defaultKey := []string{AzureNodePoolLabelKey, AWSNodePoolLabelKey, KubemarkNodePoolLabelKey}
if CustomNodeLabelKey != "" {
return append(defaultKey, CustomNodeLabelKey)
}
return defaultKey
}

func getNodePoolName(node *corev1.Node) (string, error) {
Expand All @@ -54,6 +62,11 @@ func getNodePoolName(node *corev1.Node) (string, error) {
// The name is however, predicatable as it will be the same as the EKS node pool name with an additional UUID as a
// suffix. This is why the UUID regex has to be appended to the end.
nodePoolName = fmt.Sprintf("eks-%s-[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}", nodePoolName)
case CustomNodeLabelKey:
// To be able to configure any kind of node pool label and name, a custom label format can be set.
//That is using Sprintf formatting to provide the regex.
nodePoolName = fmt.Sprintf(utils.GetEnvOrDefault("CUSTOM_NODE_POOL_NAME_FORMAT", "%s"), nodePoolName)

}
return nodePoolName, nil
}
Expand Down
9 changes: 9 additions & 0 deletions internal/status/status_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,15 @@ func getNodePoolNameAndNode(t *testing.T, cp string, name string) (*corev1.Node,
},
},
}, name
case CustomNodeLabelKey:
return &corev1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: name,
Labels: map[string]string{
CustomNodeLabelKey: name,
},
},
}, fmt.Sprintf("custom-%s", name)
default:
t.Fatal("unknown key")
return nil, ""
Expand Down
11 changes: 11 additions & 0 deletions internal/utils/env.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
package utils

import "os"

// GetEnvOrDefault returns the value of the environment variable with the given key, or the given default value if the env is not set.
func GetEnvOrDefault(key, defaultValue string) string {
if value, ok := os.LookupEnv(key); ok {
return value
}
return defaultValue
}
29 changes: 29 additions & 0 deletions internal/utils/env_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
package utils

import (
"github.com/stretchr/testify/assert"
"os"
"testing"
)

func TestGetEnvOrDefault(t *testing.T) {
// Set an environment variable for testing
err := os.Setenv("TEST_KEY", "TEST_VALUE")
assert.Nil(t, err)

// Test case when the environment variable exists
value := GetEnvOrDefault("TEST_KEY", "DEFAULT_VALUE")
if value != "TEST_VALUE" {
t.Errorf("Expected TEST_VALUE, but got %s", value)
}

// Test case when the environment variable does not exist
value = GetEnvOrDefault("NON_EXISTENT_KEY", "DEFAULT_VALUE")
if value != "DEFAULT_VALUE" {
t.Errorf("Expected DEFAULT_VALUE, but got %s", value)
}

// Unset the environment variable after testing
err = os.Unsetenv("TEST_KEY")
assert.Nil(t, err)
}