From 21e664ed2c5e7476ea9617077056c68133c6d7ac Mon Sep 17 00:00:00 2001 From: daniel-gines Date: Wed, 6 May 2026 07:56:18 -0300 Subject: [PATCH] feat(snapshot-controller): VolumeSnapshotClass component for AWS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit New `components/snapshot-controller/` chart that renders a single VolumeSnapshotClass `ebs-csi-aws` with the `velero.io/csi-volumesnapshot- class: "true"` discovery label. Required for Velero to take CSI snapshots of EBS-backed PVs (without the label, backups skip PV snapshotting entirely → PartiallyFailed phase, observed on cortex prd 2026-05-05). The snapshot-controller deployment + CRDs are installed via the EKS managed addon (provisioned by estabilis-platform v0.46.0 in providers/aws/eks.tf — paired with this chart). Component is consumed by bootstrap/platform-root/templates/ snapshot-controller.yaml in estabilis-platform v0.46.0+ (AWS-only, sync wave 6). --- CHANGELOG.md | 21 +++++++++++++++++++ components/snapshot-controller/Chart.yaml | 5 +++++ .../templates/volumesnapshotclasses.yaml | 21 +++++++++++++++++++ .../snapshot-controller/values-aws.yaml | 21 +++++++++++++++++++ components/snapshot-controller/values.yaml | 15 +++++++++++++ workload-bootstrap/Chart.yaml | 4 ++-- workload-bootstrap/values.yaml | 2 +- 7 files changed, 86 insertions(+), 3 deletions(-) create mode 100644 components/snapshot-controller/Chart.yaml create mode 100644 components/snapshot-controller/templates/volumesnapshotclasses.yaml create mode 100644 components/snapshot-controller/values-aws.yaml create mode 100644 components/snapshot-controller/values.yaml diff --git a/CHANGELOG.md b/CHANGELOG.md index 9b7ba1f..c2cf32b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,27 @@ and the corresponding commit messages. ## [Unreleased] +## [0.39.10] — 2026-05-06 + +### Added — `components/snapshot-controller/` chart with VolumeSnapshotClass + +New optional component that ships VolumeSnapshotClass resources for the +in-cluster CSI snapshot-controller. The controller and CRDs themselves +are installed via cloud-managed mechanisms (AWS: EKS managed addon +`snapshot-controller`, provisioned in +`estabilis-platform/providers/aws/eks.tf`). + +`values-aws.yaml` ships one VolumeSnapshotClass `ebs-csi-aws` with the +`velero.io/csi-volumesnapshot-class: "true"` discovery label so Velero +backups can snapshot EBS PVs (previously fell back to no snapshot → +`PartiallyFailed` backups, observed on cortex prd 2026-05-05). Default +class annotation set so workloads requesting `kind: VolumeSnapshot` +without `snapshotClassName` resolve to this VSC. + +Consumed by `bootstrap/platform-root/templates/snapshot-controller.yaml` +in estabilis-platform v0.46.0+ (AWS-only, sync wave 6 — before +velero in wave 7). + ## [0.39.9] — 2026-05-04 ### Fixed — Workload Alloy `metric_pods` relabel for annotation scrape diff --git a/components/snapshot-controller/Chart.yaml b/components/snapshot-controller/Chart.yaml new file mode 100644 index 0000000..8963dcb --- /dev/null +++ b/components/snapshot-controller/Chart.yaml @@ -0,0 +1,5 @@ +apiVersion: v2 +name: snapshot-controller +description: VolumeSnapshotClasses for the in-cluster CSI snapshot-controller +type: application +version: 0.1.0 diff --git a/components/snapshot-controller/templates/volumesnapshotclasses.yaml b/components/snapshot-controller/templates/volumesnapshotclasses.yaml new file mode 100644 index 0000000..ff18afc --- /dev/null +++ b/components/snapshot-controller/templates/volumesnapshotclasses.yaml @@ -0,0 +1,21 @@ +{{- range .Values.volumeSnapshotClasses }} +--- +apiVersion: snapshot.storage.k8s.io/v1 +kind: VolumeSnapshotClass +metadata: + name: {{ .name }} + {{- with .annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} + {{- with .labels }} + labels: + {{- toYaml . | nindent 4 }} + {{- end }} +driver: {{ .driver }} +deletionPolicy: {{ .deletionPolicy }} +{{- with .parameters }} +parameters: + {{- toYaml . | nindent 2 }} +{{- end }} +{{- end }} diff --git a/components/snapshot-controller/values-aws.yaml b/components/snapshot-controller/values-aws.yaml new file mode 100644 index 0000000..d2767a5 --- /dev/null +++ b/components/snapshot-controller/values-aws.yaml @@ -0,0 +1,21 @@ +# AWS-specific VolumeSnapshotClass for the EBS CSI driver. +# +# Why label `velero.io/csi-volumesnapshot-class: "true"`: +# Velero discovers which VolumeSnapshotClass to use for a given +# StorageClass by matching this label. Without it, Velero falls back +# to no snapshot — backup completes with phase=PartiallyFailed and +# the source PV is skipped (observed on cortex 2026-05-05 prior to +# this component shipping). +# +# Why `is-default-class: "true"`: +# When a workload requests `kind: VolumeSnapshot` without specifying +# a snapshotClassName, k8s picks the default. Single VSC per cluster +# today — safe to flag as default. +volumeSnapshotClasses: + - name: ebs-csi-aws + annotations: + snapshot.storage.kubernetes.io/is-default-class: "true" + labels: + velero.io/csi-volumesnapshot-class: "true" + driver: ebs.csi.aws.com + deletionPolicy: Delete diff --git a/components/snapshot-controller/values.yaml b/components/snapshot-controller/values.yaml new file mode 100644 index 0000000..b0dfad4 --- /dev/null +++ b/components/snapshot-controller/values.yaml @@ -0,0 +1,15 @@ +# VolumeSnapshotClass declarations for the CSI snapshot-controller. +# +# The snapshot-controller and its CRDs (volumesnapshots.snapshot.storage.k8s.io +# et al.) are installed by the cloud-managed mechanism for each provider: +# • AWS — EKS managed addon `snapshot-controller` (provisioned in +# estabilis-platform/providers/aws/eks.tf) +# • Azure — TBD (deferred until a workload demands CSI snapshots) +# +# This component only ships the VolumeSnapshotClass(es) consumed by Velero +# and other workloads. AWS values live in `values-aws.yaml` (loaded by the +# Application via `valueFiles` against `global.provider`). +# +# Default: empty list — no VolumeSnapshotClass rendered when no provider +# overlay applies. +volumeSnapshotClasses: [] diff --git a/workload-bootstrap/Chart.yaml b/workload-bootstrap/Chart.yaml index 274750b..4055a0c 100644 --- a/workload-bootstrap/Chart.yaml +++ b/workload-bootstrap/Chart.yaml @@ -5,5 +5,5 @@ description: | workload cluster registered by the estabilis-workload-operator. Rendered by the hub's ArgoCD. See ADR 0001. type: application -version: 0.39.9 -appVersion: "0.39.9" +version: 0.39.10 +appVersion: "0.39.10" diff --git a/workload-bootstrap/values.yaml b/workload-bootstrap/values.yaml index 4eb5994..7c7ca22 100644 --- a/workload-bootstrap/values.yaml +++ b/workload-bootstrap/values.yaml @@ -14,7 +14,7 @@ # should pin to when reading $values. Kept in sync with the hub Application's # targetRevision so $values always matches the rendered templates. repoURL: https://github.com/Estabilis/estabilis-platform-gitops.git -repoVersion: v0.39.9 +repoVersion: v0.39.10 # Version of estabilis-platform (the HUB-side repo) that rendered this chart. # Passed as a helm parameter by the parent Application