From adb9a18c4627ca6dddf03d273824c9cea53a8cca Mon Sep 17 00:00:00 2001 From: Ngo Vu Minh Dat Date: Sun, 12 Apr 2026 14:56:36 +0700 Subject: [PATCH 1/2] feat(ci): add automated postgresql backup recovery test - Create a GitHub Actions workflow to spin up an ephemeral DOKS cluster. - Bootstraps ArgoCD and syncs the `prod-postgresql` app from the recovery tag. - Validates data restoration for critical databases (GitLab, SonarQube, Vaultwarden, Nextcloud). - Automatically destroys the cluster post-validation. - Add `.secrets` to `.gitignore`. --- .github/workflows/postgresql-backup-test.yml | 203 +++++++++++++++++++ .gitignore | 3 +- 2 files changed, 205 insertions(+), 1 deletion(-) create mode 100644 .github/workflows/postgresql-backup-test.yml diff --git a/.github/workflows/postgresql-backup-test.yml b/.github/workflows/postgresql-backup-test.yml new file mode 100644 index 00000000..93fd6141 --- /dev/null +++ b/.github/workflows/postgresql-backup-test.yml @@ -0,0 +1,203 @@ +name: PostgreSQL Backup Test + +on: + schedule: + - cron: "0 3 1,15 * *" + workflow_dispatch: + inputs: + region: + description: "DOKS region" + default: "nyc3" + required: false + node_size: + description: "DOKS node size slug" + default: "s-4vcpu-8gb" + required: false + node_count: + description: "Number of DOKS nodes" + default: "2" + required: false + +env: + CLUSTER_NAME: pg-backup-test-${{ github.run_id }} + REGION: ${{ inputs.region || 'nyc3' }} + NODE_SIZE: ${{ inputs.node_size || 's-4vcpu-8gb' }} + NODE_COUNT: ${{ inputs.node_count || '2' }} + NAMESPACE: prod-postgresql + RECOVERY_TAG: postgresql-first-recovery-test + ARGOCD_CHART_VERSION: "9.4.15" + +jobs: + backup-test: + runs-on: ubuntu-latest + timeout-minutes: 45 + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Install doctl + uses: digitalocean/action-doctl@v2 + with: + token: ${{ secrets.DIGITALOCEAN_TOKEN }} + + - name: Install ArgoCD CLI + run: | + curl -sSL -o argocd https://github.com/argoproj/argo-cd/releases/download/v3.3.4/argocd-linux-amd64 + chmod +x argocd + sudo mv argocd /usr/local/bin/ + + - name: Create DOKS cluster + run: | + doctl kubernetes cluster create "$CLUSTER_NAME" \ + --region "$REGION" \ + --size "$NODE_SIZE" \ + --count "$NODE_COUNT" \ + --wait + + - name: Save kubeconfig + run: doctl kubernetes cluster kubeconfig save "$CLUSTER_NAME" + + - name: Install ArgoCD via Helm + run: | + helm repo add argo https://argoproj.github.io/argo-helm + helm repo update + helm install argocd argo/argo-cd \ + --version "$ARGOCD_CHART_VERSION" \ + --namespace argocd \ + --create-namespace \ + --wait \ + --timeout 5m \ + --set 'configs.params.server\.insecure=true' + + - name: Wait for ArgoCD to be ready + run: | + kubectl rollout status deployment/argocd-server -n argocd --timeout=120s + kubectl rollout status deployment/argocd-repo-server -n argocd --timeout=120s + kubectl rollout status deployment/argocd-applicationset-controller -n argocd --timeout=120s + + - name: Install app-of-app chart + run: | + helm install app-of-app ./kubernetes/argocd/app-of-app \ + --namespace argocd \ + --set metallb.enabled=false \ + --set traefik.enabled=false \ + --set openebs.enabled=false \ + --set postgresql.enabled=true \ + --set certManager.enabled=true \ + --set kubePrometheusStack.enabled=true \ + --set customManifest.enabled=false \ + --set loki.enabled=true \ + --set alloy.enabled=true \ + --set pgadmin4.enabled=true \ + --set sonarqube.enabled=false \ + --set harbor.enabled=false \ + --set velero.enabled=false \ + --set mongoOperator.enabled=false \ + --set kafkaOperator.enabled=false \ + --set juicefs.enabled=false \ + --set vaultwarden.enabled=true + + - name: Sync cert-manager + run: | + argocd app sync cert-manager \ + --core \ + --timeout 120 + argocd app wait cert-manager \ + --core \ + --health \ + --timeout 120 + + - name: Create namespace and secrets + env: + R2_ACCESS_KEY: ${{ secrets.R2_ACCESS_KEY }} + R2_SECRET_KEY: ${{ secrets.R2_SECRET_KEY }} + run: | + kubectl create namespace "$NAMESPACE" --dry-run=client -o yaml | kubectl apply -f - + kubectl create secret generic cloudflare-r2 \ + -n "$NAMESPACE" \ + --from-literal=ACCESS_KEY="$R2_ACCESS_KEY" \ + --from-literal=SECRET_KEY="$R2_SECRET_KEY" \ + --dry-run=client -o yaml | kubectl apply -f - + kubectl create secret generic postgres-admin \ + -n "$NAMESPACE" \ + --from-literal=username=postgres \ + --from-literal=password=backup-test-dummy \ + --dry-run=client -o yaml | kubectl apply -f - + + - name: Override PostgreSQL app revision to recovery tag + run: | + argocd app set prod-postgresql \ + --core \ + --source-position 2 \ + --revision "$RECOVERY_TAG" + + - name: Show ArgoCD diff + run: | + argocd app diff prod-postgresql --core || true + + - name: Sync PostgreSQL + run: | + argocd app sync prod-postgresql \ + --core \ + --timeout 300 + + - name: Wait for cluster healthy state + run: | + echo "Waiting for CloudNativePG cluster to reach healthy state..." + for i in $(seq 1 90); do + phase=$(kubectl get cluster -n "$NAMESPACE" postgresql \ + -o jsonpath='{.status.phase}' 2>/dev/null || echo "unknown") + echo " Attempt $i/90: phase=$phase" + if [ "$phase" = "Cluster in healthy state" ]; then + echo "Cluster is healthy." + exit 0 + fi + sleep 10 + done + echo "ERROR: Cluster did not reach healthy state within 15 minutes." + kubectl get cluster -n "$NAMESPACE" postgresql -o yaml || true + kubectl get pods -n "$NAMESPACE" -l cnpg.io/cluster=postgresql || true + exit 1 + + - name: Validate restored data + run: | + POD=$(kubectl get pods -n "$NAMESPACE" \ + -l cnpg.io/cluster=postgresql,role=primary \ + -o jsonpath='{.items[0].metadata.name}') + echo "Primary pod: $POD" + + echo "--- Connectivity check ---" + kubectl exec -n "$NAMESPACE" "$POD" -- \ + psql -U postgres -c "SELECT 1 AS connectivity_check;" + + echo "--- Database listing ---" + kubectl exec -n "$NAMESPACE" "$POD" -- \ + psql -U postgres -c "\l" + + echo "--- Verify expected databases exist ---" + EXPECTED_DBS="gitlab sonarqube vaultwarden nextcloud" + for db in $EXPECTED_DBS; do + count=$(kubectl exec -n "$NAMESPACE" "$POD" -- \ + psql -U postgres -tAc "SELECT count(*) FROM pg_database WHERE datname = '$db';") + if [ "$count" -eq 0 ]; then + echo "FAIL: Database '$db' not found." + exit 1 + fi + echo "OK: Database '$db' exists." + done + + echo "--- Count user tables across databases ---" + for db in $EXPECTED_DBS; do + table_count=$(kubectl exec -n "$NAMESPACE" "$POD" -- \ + psql -U postgres -d "$db" -tAc \ + "SELECT count(*) FROM pg_catalog.pg_tables WHERE schemaname NOT IN ('pg_catalog','information_schema');" \ + 2>/dev/null || echo "0") + echo "Database '$db': $table_count user table(s)" + done + + echo "All validation checks passed." + + - name: Destroy DOKS cluster + if: always() + run: | + doctl kubernetes cluster delete "$CLUSTER_NAME" --force --dangerous 2>/dev/null || true diff --git a/.gitignore b/.gitignore index 7d4eaf2d..61d16e52 100644 --- a/.gitignore +++ b/.gitignore @@ -53,4 +53,5 @@ credentials.json **/**.log change.diff *secret.yaml -*secret \ No newline at end of file +*secret +.secrets \ No newline at end of file From 652f87c108e70e1c6a828dcdb12792f3f70a559b Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sun, 12 Apr 2026 08:00:07 +0000 Subject: [PATCH 2/2] chore(deps): bump urllib3 in /disaster-recovery/vaultwarden/Backup Bumps [urllib3](https://github.com/urllib3/urllib3) from 2.6.2 to 2.6.3. - [Release notes](https://github.com/urllib3/urllib3/releases) - [Changelog](https://github.com/urllib3/urllib3/blob/main/CHANGES.rst) - [Commits](https://github.com/urllib3/urllib3/compare/2.6.2...2.6.3) --- updated-dependencies: - dependency-name: urllib3 dependency-version: 2.6.3 dependency-type: indirect ... Signed-off-by: dependabot[bot] --- disaster-recovery/vaultwarden/Backup/uv.lock | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/disaster-recovery/vaultwarden/Backup/uv.lock b/disaster-recovery/vaultwarden/Backup/uv.lock index 9d6d6e9a..b3054980 100644 --- a/disaster-recovery/vaultwarden/Backup/uv.lock +++ b/disaster-recovery/vaultwarden/Backup/uv.lock @@ -368,11 +368,11 @@ wheels = [ [[package]] name = "urllib3" -version = "2.6.2" +version = "2.6.3" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/1e/24/a2a2ed9addd907787d7aa0355ba36a6cadf1768b934c652ea78acbd59dcd/urllib3-2.6.2.tar.gz", hash = "sha256:016f9c98bb7e98085cb2b4b17b87d2c702975664e4f060c6532e64d1c1a5e797", size = 432930, upload-time = "2025-12-11T15:56:40.252Z" } +sdist = { url = "https://files.pythonhosted.org/packages/c7/24/5f1b3bdffd70275f6661c76461e25f024d5a38a46f04aaca912426a2b1d3/urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed", size = 435556, upload-time = "2026-01-07T16:24:43.925Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/6d/b9/4095b668ea3678bf6a0af005527f39de12fb026516fb3df17495a733b7f8/urllib3-2.6.2-py3-none-any.whl", hash = "sha256:ec21cddfe7724fc7cb4ba4bea7aa8e2ef36f607a4bab81aa6ce42a13dc3f03dd", size = 131182, upload-time = "2025-12-11T15:56:38.584Z" }, + { url = "https://files.pythonhosted.org/packages/39/08/aaaad47bc4e9dc8c725e68f9d04865dbcb2052843ff09c97b08904852d84/urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4", size = 131584, upload-time = "2026-01-07T16:24:42.685Z" }, ] [[package]]