Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
203 changes: 203 additions & 0 deletions .github/workflows/postgresql-backup-test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,203 @@
name: PostgreSQL Backup Test

on:
schedule:
- cron: "0 3 1,15 * *"
workflow_dispatch:
inputs:
region:
description: "DOKS region"
default: "nyc3"
required: false
node_size:
description: "DOKS node size slug"
default: "s-4vcpu-8gb"
required: false
node_count:
description: "Number of DOKS nodes"
default: "2"
required: false

env:
CLUSTER_NAME: pg-backup-test-${{ github.run_id }}
REGION: ${{ inputs.region || 'nyc3' }}
NODE_SIZE: ${{ inputs.node_size || 's-4vcpu-8gb' }}
NODE_COUNT: ${{ inputs.node_count || '2' }}
NAMESPACE: prod-postgresql
RECOVERY_TAG: postgresql-first-recovery-test
ARGOCD_CHART_VERSION: "9.4.15"

jobs:
backup-test:
runs-on: ubuntu-latest
timeout-minutes: 45
steps:
- name: Checkout repository
uses: actions/checkout@v4

- name: Install doctl
uses: digitalocean/action-doctl@v2
with:
token: ${{ secrets.DIGITALOCEAN_TOKEN }}

- name: Install ArgoCD CLI
run: |
curl -sSL -o argocd https://github.com/argoproj/argo-cd/releases/download/v3.3.4/argocd-linux-amd64
chmod +x argocd
sudo mv argocd /usr/local/bin/

- name: Create DOKS cluster
run: |
doctl kubernetes cluster create "$CLUSTER_NAME" \
--region "$REGION" \
--size "$NODE_SIZE" \
--count "$NODE_COUNT" \
--wait

- name: Save kubeconfig
run: doctl kubernetes cluster kubeconfig save "$CLUSTER_NAME"

- name: Install ArgoCD via Helm
run: |
helm repo add argo https://argoproj.github.io/argo-helm
helm repo update
helm install argocd argo/argo-cd \
--version "$ARGOCD_CHART_VERSION" \
--namespace argocd \
--create-namespace \
--wait \
--timeout 5m \
--set 'configs.params.server\.insecure=true'

- name: Wait for ArgoCD to be ready
run: |
kubectl rollout status deployment/argocd-server -n argocd --timeout=120s
kubectl rollout status deployment/argocd-repo-server -n argocd --timeout=120s
kubectl rollout status deployment/argocd-applicationset-controller -n argocd --timeout=120s

- name: Install app-of-app chart
run: |
helm install app-of-app ./kubernetes/argocd/app-of-app \
--namespace argocd \
--set metallb.enabled=false \
--set traefik.enabled=false \
--set openebs.enabled=false \
--set postgresql.enabled=true \
--set certManager.enabled=true \
--set kubePrometheusStack.enabled=true \
--set customManifest.enabled=false \
--set loki.enabled=true \
--set alloy.enabled=true \
--set pgadmin4.enabled=true \
--set sonarqube.enabled=false \
--set harbor.enabled=false \
--set velero.enabled=false \
--set mongoOperator.enabled=false \
--set kafkaOperator.enabled=false \
--set juicefs.enabled=false \
--set vaultwarden.enabled=true

- name: Sync cert-manager
run: |
argocd app sync cert-manager \
--core \
--timeout 120
argocd app wait cert-manager \
--core \
--health \
--timeout 120

- name: Create namespace and secrets
env:
R2_ACCESS_KEY: ${{ secrets.R2_ACCESS_KEY }}
R2_SECRET_KEY: ${{ secrets.R2_SECRET_KEY }}
run: |
kubectl create namespace "$NAMESPACE" --dry-run=client -o yaml | kubectl apply -f -
kubectl create secret generic cloudflare-r2 \
-n "$NAMESPACE" \
--from-literal=ACCESS_KEY="$R2_ACCESS_KEY" \
--from-literal=SECRET_KEY="$R2_SECRET_KEY" \
--dry-run=client -o yaml | kubectl apply -f -
kubectl create secret generic postgres-admin \
-n "$NAMESPACE" \
--from-literal=username=postgres \
--from-literal=password=backup-test-dummy \
--dry-run=client -o yaml | kubectl apply -f -

- name: Override PostgreSQL app revision to recovery tag
run: |
argocd app set prod-postgresql \
--core \
--source-position 2 \
--revision "$RECOVERY_TAG"

- name: Show ArgoCD diff
run: |
argocd app diff prod-postgresql --core || true

- name: Sync PostgreSQL
run: |
argocd app sync prod-postgresql \
--core \
--timeout 300

- name: Wait for cluster healthy state
run: |
echo "Waiting for CloudNativePG cluster to reach healthy state..."
for i in $(seq 1 90); do
phase=$(kubectl get cluster -n "$NAMESPACE" postgresql \
-o jsonpath='{.status.phase}' 2>/dev/null || echo "unknown")
echo " Attempt $i/90: phase=$phase"
if [ "$phase" = "Cluster in healthy state" ]; then
echo "Cluster is healthy."
exit 0
fi
sleep 10
done
echo "ERROR: Cluster did not reach healthy state within 15 minutes."
kubectl get cluster -n "$NAMESPACE" postgresql -o yaml || true
kubectl get pods -n "$NAMESPACE" -l cnpg.io/cluster=postgresql || true
exit 1

- name: Validate restored data
run: |
POD=$(kubectl get pods -n "$NAMESPACE" \
-l cnpg.io/cluster=postgresql,role=primary \
-o jsonpath='{.items[0].metadata.name}')
echo "Primary pod: $POD"

echo "--- Connectivity check ---"
kubectl exec -n "$NAMESPACE" "$POD" -- \
psql -U postgres -c "SELECT 1 AS connectivity_check;"

echo "--- Database listing ---"
kubectl exec -n "$NAMESPACE" "$POD" -- \
psql -U postgres -c "\l"

echo "--- Verify expected databases exist ---"
EXPECTED_DBS="gitlab sonarqube vaultwarden nextcloud"
for db in $EXPECTED_DBS; do
count=$(kubectl exec -n "$NAMESPACE" "$POD" -- \
psql -U postgres -tAc "SELECT count(*) FROM pg_database WHERE datname = '$db';")
if [ "$count" -eq 0 ]; then
echo "FAIL: Database '$db' not found."
exit 1
fi
echo "OK: Database '$db' exists."
done

echo "--- Count user tables across databases ---"
for db in $EXPECTED_DBS; do
table_count=$(kubectl exec -n "$NAMESPACE" "$POD" -- \
psql -U postgres -d "$db" -tAc \
"SELECT count(*) FROM pg_catalog.pg_tables WHERE schemaname NOT IN ('pg_catalog','information_schema');" \
2>/dev/null || echo "0")
echo "Database '$db': $table_count user table(s)"
done

echo "All validation checks passed."

- name: Destroy DOKS cluster
if: always()
run: |
doctl kubernetes cluster delete "$CLUSTER_NAME" --force --dangerous 2>/dev/null || true
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -53,4 +53,5 @@ credentials.json
**/**.log
change.diff
*secret.yaml
*secret
*secret
.secrets
2 changes: 1 addition & 1 deletion disaster-recovery/vaultwarden/Backup/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ requires-python = ">=3.12"
dependencies = [
"boto3>=1.42.17",
"boto3-stubs~=1.42.17",
"cryptography>=46.0.3",
"cryptography>=46.0.7",
"docker>=7.0.0",
"python-dotenv>=1.2.1",
"zstandard>=0.25.0",
Expand Down
Loading