From ae35168187194807ec7dc2dfbd5e7e93bf284463 Mon Sep 17 00:00:00 2001 From: rishabkumar7 Date: Fri, 13 Feb 2026 17:52:18 -0600 Subject: [PATCH] feat: Initialize DevOps Lab Application with FastAPI and Redis - Added main application file (app.py) with health and status endpoints. - Created requirements.txt for FastAPI, Uvicorn, and Redis dependencies. - Implemented unit tests for health and status endpoints in test_app.py. - Added Dockerfile for containerizing the application. - Created docker-compose.yml for managing application and Redis services. - Set up CI/CD pipelines with GitHub Actions for building, testing, and deploying to GKE. - Defined Kubernetes deployment and service configurations for the application and Redis. - Added monitoring alerts configuration in alerts.json. - Implemented setup and destroy scripts for managing GCP resources. - Created validation script for incident resolution tracking. - Defined Terraform configuration for GCP infrastructure provisioning. --- README.md | 6 +- gcp/README.md | 170 ++++++++++- gcp/app/app.py | 39 +++ gcp/app/requirements.txt | 3 + gcp/app/tests/test_app.py | 19 ++ gcp/docker/Dockerfile | 14 + gcp/docker/docker-compose.yml | 26 ++ gcp/github-actions/cd.yml | 59 ++++ gcp/github-actions/ci.yml | 33 ++ gcp/kubernetes/app-deployment.yaml | 35 +++ gcp/kubernetes/app-service.yaml | 13 + gcp/kubernetes/namespace.yaml | 4 + gcp/kubernetes/redis-deployment.yaml | 20 ++ gcp/kubernetes/redis-service.yaml | 12 + gcp/monitoring/alerts.json | 36 +++ gcp/scripts/destroy.sh | 51 ++++ gcp/scripts/setup.sh | 103 +++++++ gcp/scripts/validate.sh | 437 +++++++++++++++++++++++++++ gcp/terraform/main.tf | 85 ++++++ gcp/terraform/outputs.tf | 23 ++ gcp/terraform/variables.tf | 16 + 21 files changed, 1192 insertions(+), 12 deletions(-) create mode 100644 gcp/app/app.py create mode 100644 gcp/app/requirements.txt create mode 100644 gcp/app/tests/test_app.py create mode 100644 gcp/docker/Dockerfile create mode 100644 gcp/docker/docker-compose.yml create mode 100644 gcp/github-actions/cd.yml create mode 100644 gcp/github-actions/ci.yml create mode 100644 gcp/kubernetes/app-deployment.yaml create mode 100644 gcp/kubernetes/app-service.yaml create mode 100644 gcp/kubernetes/namespace.yaml create mode 100644 gcp/kubernetes/redis-deployment.yaml create mode 100644 gcp/kubernetes/redis-service.yaml create mode 100644 gcp/monitoring/alerts.json create mode 100755 gcp/scripts/destroy.sh create mode 100755 gcp/scripts/setup.sh create mode 100755 gcp/scripts/validate.sh create mode 100644 gcp/terraform/main.tf create mode 100644 gcp/terraform/outputs.tf create mode 100644 gcp/terraform/variables.tf diff --git a/README.md b/README.md index 31b4894..a2cb589 100644 --- a/README.md +++ b/README.md @@ -26,7 +26,7 @@ You've just joined a startup as the DevOps engineer. The previous engineer left, |----------|--------|-------| | Azure | βœ… Available | [azure/README.md](azure/README.md) | | AWS | 🚧 Coming soon | β€” | -| GCP | 🚧 Coming soon | β€” | +| GCP | βœ… Available | [gcp/README.md](gcp/README.md) | ## How It Works @@ -42,7 +42,7 @@ You've just joined a startup as the DevOps engineer. The previous engineer left, After resolving all incidents, generate your completion token: ```bash -cd azure/scripts +cd /scripts ./validate.sh export ``` @@ -50,7 +50,7 @@ Submit your token at [learntocloud.guide](https://learntocloud.guide) to verify ## Cost -~$3-5 per session (Azure). **Always destroy resources when done.** +~$3-5 per session. **Always destroy resources when done.** ## License diff --git a/gcp/README.md b/gcp/README.md index 9e8c708..6c2d110 100644 --- a/gcp/README.md +++ b/gcp/README.md @@ -1,14 +1,166 @@ # GCP DevOps Lab -🚧 **Coming soon.** +Fix a broken DevOps pipeline deployed to Google Cloud. Work through 7 incidents to get the application running. -The GCP version of this lab will use equivalent services: +``` +β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” +β”‚ GCP Resources β”‚ +β”‚ β”‚ +β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚ +β”‚ β”‚ VPC β”‚ β”‚ Artifact Reg. β”‚ β”‚ GKE β”‚ β”‚ +β”‚ β”‚ β”‚ β”‚ (images) │──▢│ β”Œβ”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β” β”‚ β”‚ +β”‚ β”‚ Subnet β”‚ β”‚ β”‚ β”‚ β”‚ App β”‚ β”‚Redis β”‚ β”‚ β”‚ +β”‚ β”‚ β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ β””β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”˜ β”‚ β”‚ +β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ +β”‚ β”‚ +β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚ +β”‚ β”‚ Cloud Logging β”‚ β”‚ Cloud Monitoring β”‚ β”‚ +β”‚ β”‚ Workspace β”‚ β”‚ Metrics + Alerts β”‚ β”‚ +β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ +β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ +``` -| Azure | GCP Equivalent | -|-------|---------------| -| ACR | Artifact Registry | -| AKS | GKE | -| Azure Monitor | Cloud Monitoring | -| VNet | VPC | +## Prerequisites -Want to help build it? See our [Contributing Guide](../CONTRIBUTING.md). +- [Google Cloud CLI](https://cloud.google.com/sdk/docs/install) +- [Terraform](https://developer.hashicorp.com/terraform/install) (v1.0+) +- [Docker](https://docs.docker.com/get-docker/) +- [kubectl](https://kubernetes.io/docs/tasks/tools/) + +## Getting Started + +1. Clone this repo and navigate to the GCP scripts: + ```bash + git clone https://github.com/learntocloud/devops-lab + cd devops-lab/gcp/scripts + ``` + +2. Log in to Google Cloud: + ```bash + gcloud auth login + gcloud auth application-default login + ``` + +3. Run the setup script: + ```bash + chmod +x *.sh + ./setup.sh + ``` + +**Cost**: ~$3-5/session. Destroy resources when done. + +--- + +## Incident Queue + +You're the new DevOps engineer. Seven incidents are waiting. Diagnose and fix each one. + +--- + +### INC-001: Container Image Won't Build + +**Priority:** High +**Reported by:** Development Team +**Tools:** `docker` CLI + +> "We can't build the app's Docker image. The `docker build` command fails immediately with errors. The Dockerfile is at `gcp/docker/Dockerfile`. We need the image to build successfully and the container to start and respond on the correct port." + +**What to fix:** `gcp/docker/Dockerfile` + +--- + +### INC-002: Local Dev Environment Broken + +**Priority:** High +**Reported by:** Development Team +**Tools:** `docker compose` CLI + +> "Docker Compose won't bring up our local environment. The app can't connect to Redis, and the port mapping seems wrong. The compose file is at `gcp/docker/docker-compose.yml`. We need both services (app + redis) to start and communicate." + +**What to fix:** `gcp/docker/docker-compose.yml` + +--- + +### INC-003: CI Pipeline is Broken + +**Priority:** High +**Reported by:** Engineering Manager +**Tools:** GitHub Actions YAML reference + +> "Our CI workflow has YAML errors and the steps are in the wrong order. Tests run before dependencies are installed, and some action versions look wrong. The workflow is at `gcp/github-actions/ci.yml`." + +**What to fix:** `gcp/github-actions/ci.yml` + +--- + +### INC-004: Terraform Can't Provision Infrastructure + +**Priority:** Critical +**Reported by:** Platform Team +**Tools:** `terraform` CLI, `gcloud` CLI + +> "Terraform plan fails with multiple errors. There are typos in resource names, IAM role bindings are wrong, and network ranges conflict. The config is at `gcp/terraform/`. We need the VPC, Artifact Registry, GKE cluster, and monitoring workspace to all deploy successfully." + +**What to fix:** `gcp/terraform/main.tf`, `gcp/terraform/outputs.tf` + +--- + +### INC-005: Deployment Pipeline Failing + +**Priority:** High +**Reported by:** Release Team +**Tools:** GitHub Actions YAML reference, `gcloud` CLI + +> "The CD pipeline can't deploy to GKE. The GCP auth action is misconfigured, and deployment steps are incomplete. The workflow is at `gcp/github-actions/cd.yml`." + +**What to fix:** `gcp/github-actions/cd.yml` + +--- + +### INC-006: Kubernetes Deployment Crashing + +**Priority:** Critical +**Reported by:** SRE Team +**Tools:** `kubectl` CLI + +> "Pods won't start in GKE. The deployments have wrong API versions, label selectors don't match between deployments and services, container ports are wrong, and the readiness probe is hitting an endpoint that doesn't exist. Manifests are in `gcp/kubernetes/`." + +**What to fix:** `gcp/kubernetes/app-deployment.yaml`, `gcp/kubernetes/app-service.yaml`, `gcp/kubernetes/redis-deployment.yaml`, `gcp/kubernetes/redis-service.yaml` + +--- + +### INC-007: Monitoring Not Working + +**Priority:** Medium +**Reported by:** Observability Team +**Tools:** Cloud Monitoring alert policy JSON + +> "The pod restart alert is disabled and should be enabled. Our monitoring config at `gcp/monitoring/alerts.json` needs fixing. The alert should be severity 2 (not 1), and should evaluate every 60 seconds (not 300)." + +**What to fix:** `gcp/monitoring/alerts.json` + +--- + +## Verify Your Fixes + +Check incident status anytime: + +```bash +cd gcp/scripts +./validate.sh +``` + +Generate your completion token after all incidents are resolved: + +```bash +./validate.sh export +``` + +## Clean Up + +**Always destroy resources when done to avoid charges:** + +```bash +cd gcp/scripts +./destroy.sh +``` diff --git a/gcp/app/app.py b/gcp/app/app.py new file mode 100644 index 0000000..0e5a439 --- /dev/null +++ b/gcp/app/app.py @@ -0,0 +1,39 @@ +from fastapi import FastAPI +import redis +import os + +app = FastAPI(title="DevOps Lab App") + +REDIS_HOST = os.getenv("REDIS_HOST", "localhost") +REDIS_PORT = int(os.getenv("REDIS_PORT", "6379")) + + +def get_redis(): + try: + r = redis.Redis(host=REDIS_HOST, port=REDIS_PORT, decode_responses=True) + r.ping() + return r + except redis.ConnectionError: + return None + + +@app.get("/health") +def health(): + r = get_redis() + redis_status = "connected" if r else "disconnected" + return {"status": "healthy", "redis": redis_status} + + +@app.get("/api/status") +def status(): + r = get_redis() + if r: + visits = r.incr("visits") + else: + visits = -1 + return { + "app": "devops-lab", + "version": "1.0.0", + "visits": visits, + "redis": "connected" if r else "disconnected", + } diff --git a/gcp/app/requirements.txt b/gcp/app/requirements.txt new file mode 100644 index 0000000..eceecf6 --- /dev/null +++ b/gcp/app/requirements.txt @@ -0,0 +1,3 @@ +fastapi==0.115.0 +uvicorn==0.30.0 +redis==5.0.0 diff --git a/gcp/app/tests/test_app.py b/gcp/app/tests/test_app.py new file mode 100644 index 0000000..5a3202e --- /dev/null +++ b/gcp/app/tests/test_app.py @@ -0,0 +1,19 @@ +from fastapi.testclient import TestClient +from app import app + +client = TestClient(app) + + +def test_health(): + response = client.get("/health") + assert response.status_code == 200 + data = response.json() + assert data["status"] == "healthy" + + +def test_status(): + response = client.get("/api/status") + assert response.status_code == 200 + data = response.json() + assert data["app"] == "devops-lab" + assert data["version"] == "1.0.0" diff --git a/gcp/docker/Dockerfile b/gcp/docker/Dockerfile new file mode 100644 index 0000000..0fdbc85 --- /dev/null +++ b/gcp/docker/Dockerfile @@ -0,0 +1,14 @@ +# Dockerfile for DevOps Lab App +FROM python:3.11-slm + +WORKDIR /src + +COPY app/requirements.txt . + +RUN pip install -r requirements.txt + +COPY app/ . + +EXPOSE 5000 + +CMD ["python", "app.py"] diff --git a/gcp/docker/docker-compose.yml b/gcp/docker/docker-compose.yml new file mode 100644 index 0000000..fa40f27 --- /dev/null +++ b/gcp/docker/docker-compose.yml @@ -0,0 +1,26 @@ +version: "3.8" + +services: + app: + build: + context: . + dockerfile: Dockerfile + ports: + - "8000:5000" + environment: + - REDIS_HOST=cache + - REDIS_PORT=6379 + depends_on: + - cache + networks: + - backend + + redis: + image: redis:alpine + ports: + - "6379:6379" + volumes: + - redis_data:/data + +volumes: + redis_data: diff --git a/gcp/github-actions/cd.yml b/gcp/github-actions/cd.yml new file mode 100644 index 0000000..756c551 --- /dev/null +++ b/gcp/github-actions/cd.yml @@ -0,0 +1,59 @@ +name: CD Pipeline + +on: + workflow_dispatch: + push: + branches: [main] + +env: + GCP_PROJECT_ID: ${{ secrets.GCP_PROJECT_ID }} + GKE_CLUSTER: ${{ secrets.GKE_CLUSTER_NAME }} + GKE_LOCATION: ${{ secrets.GKE_LOCATION }} + GAR_LOCATION: ${{ secrets.GAR_LOCATION }} + GAR_REPOSITORY: ${{ secrets.GAR_REPOSITORY }} + +jobs: + build-and-push: + runs-on: ubuntu-latest + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Authenticate to Google Cloud + uses: google-github-actions/auth@v2 + with: + credentials_json: ${{ secrets.GCP_SA_KEY }} + + - name: Set up gcloud CLI + uses: google-github-actions/setup-gcloud@v2 + + - name: Configure Docker for Artifact Registry + run: gcloud auth configure-docker ${{ env.GAR_LOCATION }}-docker.pkg.dev --quiet + + - name: Build and push image + run: | + docker build -f gcp/docker/Dockerfile -t ${{ env.GAR_LOCATION }}-docker.pkg.dev/${{ env.GCP_PROJECT_ID }}/${{ env.GAR_REPOSITORY }}/devops-lab-app:${{ github.sha }} . + docker push ${{ env.GAR_LOCATION }}-docker.pkg.dev/${{ env.GCP_PROJECT_ID }}/${{ env.GAR_REPOSITORY }}/devops-lab-app:${{ github.sha }} + + deploy-to-gke: + runs-on: ubuntu-latest + needs: build-and-push + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Authenticate to Google Cloud + uses: google-github-actions/auth@v2 + with: + credentials_json: ${{ secrets.GCP_SA_KEY }} + + - name: Set up gcloud CLI + uses: google-github-actions/setup-gcloud@v2 + + - name: Get GKE credentials + run: | + gcloud container clusters get-credentials ${{ env.GKE_CLUSTER }} --region ${{ env.GKE_LOCATION }} --project ${{ env.GCP_PROJECT_ID }} + + - name: Deploy to GKE + run: | + kubectl set image deployment/devops-lab-app app=${{ env.GAR_LOCATION }}-docker.pkg.dev/${{ env.GCP_PROJECT_ID }}/${{ env.GAR_REPOSITORY }}/devops-lab-app:${{ github.sha }} -n devops-lab diff --git a/gcp/github-actions/ci.yml b/gcp/github-actions/ci.yml new file mode 100644 index 0000000..3279da2 --- /dev/null +++ b/gcp/github-actions/ci.yml @@ -0,0 +1,33 @@ +name: CI Pipeline + +on: + push: + branches: [main] + pull_request: + branches: [main] + +jobs: + build-and-test: + runs-on: ubuntu-latest + steps: + - name: Checkout code + uses: actions/checkout@v99 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.11" + + - name: Run tests + working-directory: ./gcp/app + run: | + python -m pytest tests/ -v + + - name: Install dependencies + working-directory: ./gcp/app + run: | + pip install -r requirements.txt + + - name: Build Docker image + run: | + docker build -f gcp/docker/Dockerfile -t devops-lab-app . diff --git a/gcp/kubernetes/app-deployment.yaml b/gcp/kubernetes/app-deployment.yaml new file mode 100644 index 0000000..430ca3c --- /dev/null +++ b/gcp/kubernetes/app-deployment.yaml @@ -0,0 +1,35 @@ +apiVersion: apps/v1beta1 +kind: Deployment +metadata: + name: devops-lab-app + namespace: devops-lab +spec: + replicas: 2 + selector: + matchLabels: + app: devopslab-app + template: + metadata: + labels: + app: devops-lab-app + spec: + containers: + - name: app + image: ARTIFACT_REGISTRY/devops-lab-app:latest + ports: + - containerPort: 5000 + env: + - name: REDIS_HOST + value: "redis" + - name: REDIS_PORT + value: "6379" + livenessProbe: + httpGet: + path: /health + port: 5000 + initialDelaySeconds: 10 + readinessProbe: + httpGet: + path: /ready + port: 5000 + initialDelaySeconds: 5 diff --git a/gcp/kubernetes/app-service.yaml b/gcp/kubernetes/app-service.yaml new file mode 100644 index 0000000..0cf9091 --- /dev/null +++ b/gcp/kubernetes/app-service.yaml @@ -0,0 +1,13 @@ +apiVersion: v1 +kind: Service +metadata: + name: devops-lab-app + namespace: devops-lab +spec: + type: LoadBalancer + selector: + app: devopslab-app + ports: + - protocol: TCP + port: 80 + targetPort: 5000 diff --git a/gcp/kubernetes/namespace.yaml b/gcp/kubernetes/namespace.yaml new file mode 100644 index 0000000..5dedf0b --- /dev/null +++ b/gcp/kubernetes/namespace.yaml @@ -0,0 +1,4 @@ +apiVersion: v1 +kind: Namespace +metadata: + name: devops-lab diff --git a/gcp/kubernetes/redis-deployment.yaml b/gcp/kubernetes/redis-deployment.yaml new file mode 100644 index 0000000..2fbea7c --- /dev/null +++ b/gcp/kubernetes/redis-deployment.yaml @@ -0,0 +1,20 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: redis + namespace: devops-lab +spec: + replicas: 1 + selector: + matchLabels: + app: redis + template: + metadata: + labels: + app: redis-cache + spec: + containers: + - name: redis + image: redis:alpine + ports: + - containerPort: 6380 diff --git a/gcp/kubernetes/redis-service.yaml b/gcp/kubernetes/redis-service.yaml new file mode 100644 index 0000000..153300b --- /dev/null +++ b/gcp/kubernetes/redis-service.yaml @@ -0,0 +1,12 @@ +apiVersion: v1 +kind: Service +metadata: + name: redis + namespace: devops-lab +spec: + selector: + app: redis + ports: + - protocol: TCP + port: 6379 + targetPort: 6380 diff --git a/gcp/monitoring/alerts.json b/gcp/monitoring/alerts.json new file mode 100644 index 0000000..c7f411c --- /dev/null +++ b/gcp/monitoring/alerts.json @@ -0,0 +1,36 @@ +{ + "alertPolicies": [ + { + "displayName": "high-cpu-alert", + "enabled": true, + "severity": 2, + "conditions": [ + { + "displayName": "cpu-check", + "conditionThreshold": { + "filter": "metric.type=\"kubernetes.io/node/cpu/core_usage_time\"", + "comparison": "COMPARISON_GT", + "thresholdValue": 0.9, + "duration": "60s" + } + } + ] + }, + { + "displayName": "pod-restart-alert", + "enabled": false, + "severity": 1, + "conditions": [ + { + "displayName": "restart-check", + "conditionThreshold": { + "filter": "metric.type=\"kubernetes.io/container/restart_count\"", + "comparison": "COMPARISON_GT", + "thresholdValue": 3, + "duration": "300s" + } + } + ] + } + ] +} diff --git a/gcp/scripts/destroy.sh b/gcp/scripts/destroy.sh new file mode 100755 index 0000000..0dc7597 --- /dev/null +++ b/gcp/scripts/destroy.sh @@ -0,0 +1,51 @@ +#!/bin/bash +# ============================================================================= +# DEVOPS LAB - DESTROY SCRIPT +# Tears down all GCP resources +# ============================================================================= + +set -e + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +TERRAFORM_DIR="${SCRIPT_DIR}/../terraform" + +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +NC='\033[0m' + +echo "" +echo -e "${RED}============================================${NC}" +echo -e "${RED} DEVOPS LAB - DESTROY RESOURCES${NC}" +echo -e "${RED}============================================${NC}" +echo "" + +if [ -f "${TERRAFORM_DIR}/terraform.tfstate" ]; then + echo "Found Terraform state. Attempting terraform destroy..." + echo "This will destroy ALL resources created by the lab." + echo "" + read -p "Continue? (y/N) " -n 1 -r + echo "" + + if [[ ! $REPLY =~ ^[Yy]$ ]]; then + echo "Aborted." + exit 0 + fi + + cd "$TERRAFORM_DIR" + terraform destroy -auto-approve || true + + rm -f terraform.tfstate terraform.tfstate.backup + rm -rf .terraform .terraform.lock.hcl + echo -e "${GREEN}Terraform state cleaned up.${NC}" +else + echo "No Terraform state found." + echo "" + echo "To manually check for resources:" + echo " gcloud container clusters list" + echo " gcloud artifacts repositories list --location " +fi + +echo "" +echo -e "${GREEN}Cleanup complete.${NC}" +echo "" diff --git a/gcp/scripts/setup.sh b/gcp/scripts/setup.sh new file mode 100755 index 0000000..74a7271 --- /dev/null +++ b/gcp/scripts/setup.sh @@ -0,0 +1,103 @@ +#!/bin/bash +# ============================================================================= +# DEVOPS LAB - SETUP SCRIPT +# Initializes the lab and optionally deploys infrastructure +# ============================================================================= + +set -e + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" + +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +BLUE='\033[0;34m' +NC='\033[0m' + +echo "" +echo -e "${BLUE}============================================${NC}" +echo -e "${BLUE} DEVOPS LAB - GCP SETUP${NC}" +echo -e "${BLUE}============================================${NC}" +echo "" + +echo "Checking prerequisites..." + +if ! command -v gcloud &> /dev/null; then + echo -e "${RED}Error: Google Cloud CLI not found.${NC}" + echo "Install: https://cloud.google.com/sdk/docs/install" + exit 1 +fi +echo -e " ${GREEN}βœ“${NC} Google Cloud CLI found" + +if ! gcloud auth list --filter=status:ACTIVE --format="value(account)" | grep -q .; then + echo -e "${YELLOW}Not logged in to Google Cloud. Running 'gcloud auth login'...${NC}" + gcloud auth login +fi + +if ! gcloud auth application-default print-access-token > /dev/null 2>&1; then + echo -e "${YELLOW}Application default credentials not configured. Running 'gcloud auth application-default login'...${NC}" + gcloud auth application-default login +fi + +ACCOUNT=$(gcloud auth list --filter=status:ACTIVE --format="value(account)" | head -1) +PROJECT_ID=$(gcloud config get-value project 2>/dev/null || true) + +echo -e " ${GREEN}βœ“${NC} Logged in: $ACCOUNT" +if [ -n "$PROJECT_ID" ]; then + echo -e " ${GREEN}βœ“${NC} Active project: $PROJECT_ID" +else + echo -e "${YELLOW}No active gcloud project set.${NC}" + echo "Set one with: gcloud config set project " +fi + +if ! command -v terraform &> /dev/null; then + echo -e "${RED}Error: Terraform not found.${NC}" + echo "Install: https://www.terraform.io/downloads" + exit 1 +fi +echo -e " ${GREEN}βœ“${NC} Terraform found" + +if ! command -v docker &> /dev/null; then + echo -e "${YELLOW}Warning: Docker not found. You'll need it for INC-001 and INC-002.${NC}" +else + echo -e " ${GREEN}βœ“${NC} Docker found" +fi + +if ! command -v kubectl &> /dev/null; then + echo -e "${YELLOW}Warning: kubectl not found. You'll need it for INC-006.${NC}" +else + echo -e " ${GREEN}βœ“${NC} kubectl found" +fi + +echo "" +echo -e "${YELLOW}This lab deploys GCP resources that cost ~\$3-5/session.${NC}" +echo "" +read -p "Continue? (y/N) " -n 1 -r +echo "" + +if [[ ! $REPLY =~ ^[Yy]$ ]]; then + echo "Aborted." + exit 0 +fi + +echo "" +echo "Note: Before deploying, you need to fix the Terraform configuration (INC-004)." +echo "The Terraform files have intentional errors that must be fixed first." +echo "" +echo "To start working on the lab:" +echo " 1. Fix INC-001 (Dockerfile): gcp/docker/Dockerfile" +echo " 2. Fix INC-002 (Compose): gcp/docker/docker-compose.yml" +echo " 3. Fix INC-003 (CI): gcp/github-actions/ci.yml" +echo " 4. Fix INC-004 (Terraform): gcp/terraform/main.tf" +echo "" +echo "Once INC-004 is fixed, deploy infrastructure:" +echo " cd gcp/terraform" +echo " terraform init" +echo " terraform plan -var project_id=\"\"" +echo " terraform apply -var project_id=\"\"" +echo "" +echo "Then continue with INC-005 through INC-007." +echo "" +echo "Validate progress: ./validate.sh" +echo "Destroy resources: ./destroy.sh" +echo "" diff --git a/gcp/scripts/validate.sh b/gcp/scripts/validate.sh new file mode 100755 index 0000000..a32809e --- /dev/null +++ b/gcp/scripts/validate.sh @@ -0,0 +1,437 @@ +#!/usr/bin/env bash +# ============================================================================= +# DEVOPS LAB - VALIDATION SCRIPT +# Validates incident resolution and generates completion tokens +# ============================================================================= + +set -e + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +GCP_DIR="${SCRIPT_DIR}/.." +TERRAFORM_DIR="${GCP_DIR}/terraform" + +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +CYAN='\033[0;36m' +NC='\033[0m' + +INC_001="pending" +INC_002="pending" +INC_003="pending" +INC_004="pending" +INC_005="pending" +INC_006="pending" +INC_007="pending" + +MASTER_SECRET="L2C_CTF_MASTER_2024" +PYTHON_CMD="python3" + +# ============================================================================= +# Runtime Dependencies +# ============================================================================= +ensure_python_yaml() { + if ! command -v python3 > /dev/null 2>&1; then + echo -e "${RED}Error: python3 is required for validation.${NC}" + exit 1 + fi + + if python3 -c "import yaml" > /dev/null 2>&1; then + PYTHON_CMD="python3" + return + fi + + local VENV_DIR="${SCRIPT_DIR}/.venv" + local VENV_PY="${VENV_DIR}/bin/python" + + if [ -x "$VENV_PY" ] && "$VENV_PY" -c "import yaml" > /dev/null 2>&1; then + PYTHON_CMD="$VENV_PY" + return + fi + + echo -e "${YELLOW}PyYAML not found. Creating local venv and installing dependencies...${NC}" + python3 -m venv "$VENV_DIR" + + if ! "$VENV_PY" -m pip install --quiet --upgrade pip pyyaml; then + echo -e "${RED}Error: Failed to install PyYAML in ${VENV_DIR}.${NC}" + echo "Install manually: python3 -m pip install pyyaml" + exit 1 + fi + + PYTHON_CMD="$VENV_PY" +} + +# ============================================================================= +# INC-001: Dockerfile +# ============================================================================= +validate_inc_001() { + local DOCKERFILE="${GCP_DIR}/docker/Dockerfile" + [ ! -f "$DOCKERFILE" ] && return + + if docker build -f "$DOCKERFILE" -t devops-lab-app-test "${GCP_DIR}" > /dev/null 2>&1; then + local CID + CID=$(docker run -d -p 18000:8000 -e REDIS_HOST=localhost devops-lab-app-test 2>/dev/null || echo "") + if [ -n "$CID" ]; then + sleep 3 + local RESP + RESP=$(curl -s --max-time 5 http://localhost:18000/health 2>/dev/null || echo "") + docker rm -f "$CID" > /dev/null 2>&1 || true + if echo "$RESP" | grep -q "healthy"; then + INC_001="resolved" + fi + fi + docker rmi devops-lab-app-test > /dev/null 2>&1 || true + fi +} + +# ============================================================================= +# INC-002: Docker Compose +# ============================================================================= +validate_inc_002() { + local COMPOSE="${GCP_DIR}/docker/docker-compose.yml" + [ ! -f "$COMPOSE" ] && return + + if ! docker compose -f "$COMPOSE" config > /dev/null 2>&1; then + return + fi + + docker compose -f "$COMPOSE" up -d --build > /dev/null 2>&1 || true + sleep 5 + + local RESP + RESP=$(curl -s --max-time 5 http://localhost:8000/health 2>/dev/null || echo "") + docker compose -f "$COMPOSE" down > /dev/null 2>&1 || true + + if echo "$RESP" | grep -q '"redis":"connected"'; then + INC_002="resolved" + fi +} + +# ============================================================================= +# INC-003: CI Workflow +# ============================================================================= +validate_inc_003() { + local CI="${GCP_DIR}/github-actions/ci.yml" + [ ! -f "$CI" ] && return + + if ! "$PYTHON_CMD" -c "import yaml; yaml.safe_load(open('$CI'))" 2>/dev/null; then + return + fi + + if grep -q "@v99" "$CI" 2>/dev/null; then return; fi + if ! grep -q "runs-on:" "$CI" 2>/dev/null; then return; fi + + local INSTALL_LINE TEST_LINE + INSTALL_LINE=$(grep -n "Install dependencies" "$CI" 2>/dev/null | head -1 | cut -d: -f1) + TEST_LINE=$(grep -n "Run tests" "$CI" 2>/dev/null | head -1 | cut -d: -f1) + + if [ -n "$INSTALL_LINE" ] && [ -n "$TEST_LINE" ]; then + if [ "$TEST_LINE" -lt "$INSTALL_LINE" ]; then return; fi + fi + + INC_003="resolved" +} + +# ============================================================================= +# INC-004: Terraform +# ============================================================================= +validate_inc_004() { + local TF="${GCP_DIR}/terraform" + [ ! -f "${TF}/main.tf" ] && return + + if grep -q "google_compute_netwrok" "${TF}/main.tf" 2>/dev/null; then return; fi + if grep -q "roles/artifactregsitry.reader" "${TF}/main.tf" 2>/dev/null; then return; fi + + if grep -q 'cluster_ipv4_cidr_block.*10\.1\.0\.0/16' "${TF}/main.tf" 2>/dev/null && \ + grep -q 'services_ipv4_cidr_block.*10\.1\.0\.0/20' "${TF}/main.tf" 2>/dev/null; then + return + fi + + cd "$TF" + if terraform init -backend=false > /dev/null 2>&1 && terraform validate > /dev/null 2>&1; then + INC_004="resolved" + fi + cd "$SCRIPT_DIR" +} + +# ============================================================================= +# INC-005: CD Workflow +# ============================================================================= +validate_inc_005() { + local CD="${GCP_DIR}/github-actions/cd.yml" + [ ! -f "$CD" ] && return + + if ! "$PYTHON_CMD" -c "import yaml; yaml.safe_load(open('$CD'))" 2>/dev/null; then return; fi + if ! grep -q "google-github-actions/auth@v2" "$CD" 2>/dev/null; then return; fi + if grep -q "credentials_json" "$CD" 2>/dev/null; then return; fi + if ! grep -q "kubectl" "$CD" 2>/dev/null; then return; fi + + INC_005="resolved" +} + +# ============================================================================= +# INC-006: Kubernetes +# ============================================================================= +validate_inc_006() { + local K="${GCP_DIR}/kubernetes" + [ ! -f "${K}/app-deployment.yaml" ] && return + + if grep -q "v1beta1" "${K}/app-deployment.yaml" 2>/dev/null; then return; fi + + local SEL TMPL + SEL=$(grep -A2 "matchLabels" "${K}/app-deployment.yaml" 2>/dev/null | grep "app:" | head -1 | awk '{print $2}') + TMPL=$(sed -n '/template:/,$ p' "${K}/app-deployment.yaml" 2>/dev/null | grep -A2 "labels:" | grep "app:" | head -1 | awk '{print $2}') + if [ "$SEL" != "$TMPL" ]; then return; fi + + local RSEL RTMPL + RSEL=$(grep -A2 "matchLabels" "${K}/redis-deployment.yaml" 2>/dev/null | grep "app:" | head -1 | awk '{print $2}') + RTMPL=$(sed -n '/template:/,$ p' "${K}/redis-deployment.yaml" 2>/dev/null | grep -A2 "labels:" | grep "app:" | head -1 | awk '{print $2}') + if [ "$RSEL" != "$RTMPL" ]; then return; fi + + if grep -q "containerPort: 6380" "${K}/redis-deployment.yaml" 2>/dev/null; then return; fi + if grep -q "ARTIFACT_REGISTRY" "${K}/app-deployment.yaml" 2>/dev/null; then return; fi + if grep -q "/ready" "${K}/app-deployment.yaml" 2>/dev/null; then return; fi + if grep -q "containerPort: 5000" "${K}/app-deployment.yaml" 2>/dev/null; then return; fi + + INC_006="resolved" +} + +# ============================================================================= +# INC-007: Monitoring +# ============================================================================= +validate_inc_007() { + local ALERTS="${GCP_DIR}/monitoring/alerts.json" + [ ! -f "$ALERTS" ] && return + + local ENABLED SEVERITY FREQ + ENABLED=$(python3 -c " +import json +with open('$ALERTS') as f: + data = json.load(f) +for p in data.get('alertPolicies', []): + if p.get('displayName') == 'pod-restart-alert': + print(p.get('enabled')) +" 2>/dev/null || echo "false") + + SEVERITY=$(python3 -c " +import json +with open('$ALERTS') as f: + data = json.load(f) +for p in data.get('alertPolicies', []): + if p.get('displayName') == 'pod-restart-alert': + print(p.get('severity')) +" 2>/dev/null || echo "") + + FREQ=$(python3 -c " +import json +with open('$ALERTS') as f: + data = json.load(f) +for p in data.get('alertPolicies', []): + if p.get('displayName') == 'pod-restart-alert': + print(p.get('conditions', [{}])[0].get('conditionThreshold', {}).get('duration')) +" 2>/dev/null || echo "") + + if [ "$ENABLED" = "True" ] && [ "$SEVERITY" = "2" ] && [ "$FREQ" = "60s" ]; then + INC_007="resolved" + fi +} + +# ============================================================================= +# Token Generation +# ============================================================================= +get_deployment_id() { + if [ -f "${TERRAFORM_DIR}/terraform.tfstate" ]; then + cd "$TERRAFORM_DIR" + local DID + DID=$(terraform output -raw deployment_id 2>/dev/null || echo "") + cd "$SCRIPT_DIR" + if [ -n "$DID" ]; then + echo "$DID" + return + fi + fi + echo "local-$(date +%s | shasum -a 256 | head -c 16)" +} + +generate_verification_token() { + local GITHUB_USER="$1" + local DEPLOYMENT_ID + DEPLOYMENT_ID=$(get_deployment_id) + + local TIMESTAMP=$(date +%s) + local COMPLETION_DATE=$(date -u +"%Y-%m-%d") + local COMPLETION_TIME=$(date -u +"%H:%M:%S") + + local VERIFICATION_SECRET + VERIFICATION_SECRET=$(echo -n "${MASTER_SECRET}:${DEPLOYMENT_ID}" | shasum -a 256 | cut -d' ' -f1) + + local PAYLOAD='{"github_username":"'"$GITHUB_USER"'","date":"'"$COMPLETION_DATE"'","time":"'"$COMPLETION_TIME"'","timestamp":'"$TIMESTAMP"',"challenge":"devops-lab-gcp","challenges":7,"instance_id":"'"$DEPLOYMENT_ID"'"}' + + local SIGNATURE + SIGNATURE=$(echo -n "$PAYLOAD" | openssl dgst -sha256 -hmac "$VERIFICATION_SECRET" | sed 's/^.* //') + + local TOKEN_DATA='{"payload":'"$PAYLOAD"',"signature":"'"$SIGNATURE"'"}' + echo -n "$TOKEN_DATA" | base64 +} + +# ============================================================================= +# Display +# ============================================================================= +show_status() { + echo "" + echo "============================================" + echo " DevOps Lab - Incident Status" + echo "============================================" + + local RESOLVED=0 + + for INC_VAR in INC_001 INC_002 INC_003 INC_004 INC_005 INC_006 INC_007; do + local NUM="${INC_VAR#INC_}" + local LABEL="" + case "$NUM" in + 001) LABEL="Dockerfile" ;; + 002) LABEL="Docker Compose" ;; + 003) LABEL="CI Pipeline" ;; + 004) LABEL="Terraform" ;; + 005) LABEL="CD Pipeline" ;; + 006) LABEL="Kubernetes" ;; + 007) LABEL="Monitoring" ;; + esac + + local STATUS="${!INC_VAR}" + if [ "$STATUS" = "resolved" ]; then + echo -e " ${GREEN}βœ“${NC} INC-${NUM} - ${LABEL}" + RESOLVED=$((RESOLVED + 1)) + else + echo -e " ${RED}βœ—${NC} INC-${NUM} - ${LABEL}" + fi + done + + echo "" + echo " Resolved: $RESOLVED / 7" + echo "" + + if [ $RESOLVED -eq 7 ]; then + echo -e "${GREEN}============================================${NC}" + echo -e "${GREEN} ALL INCIDENTS RESOLVED${NC}" + echo -e "${GREEN}============================================${NC}" + echo "" + echo -e " Run ${CYAN}./validate.sh export${NC} to generate" + echo " your completion token." + echo "" + fi +} + +export_token() { + validate_inc_001 + validate_inc_002 + validate_inc_003 + validate_inc_004 + validate_inc_005 + validate_inc_006 + validate_inc_007 + + local RESOLVED=0 + for INC_VAR in INC_001 INC_002 INC_003 INC_004 INC_005 INC_006 INC_007; do + [ "${!INC_VAR}" = "resolved" ] && RESOLVED=$((RESOLVED + 1)) + done + + if [ $RESOLVED -ne 7 ]; then + show_status + echo -e "${RED}Error: Not all incidents resolved.${NC}" + exit 1 + fi + + echo "" + echo -e "${GREEN}============================================${NC}" + echo -e "${GREEN} DEVOPS LAB - EXPORT TOKEN${NC}" + echo -e "${GREEN}============================================${NC}" + echo "" + echo "Enter your GitHub username:" + echo -n "> " + read GITHUB_USER + + if [ -z "$GITHUB_USER" ]; then + echo -e "${RED}Error: GitHub username required.${NC}" + exit 1 + fi + + echo "" + echo "Generating completion token..." + echo "" + + local TOKEN + TOKEN=$(generate_verification_token "$GITHUB_USER") + + echo -e "${GREEN}Your completion token:${NC}" + echo "" + echo "--- BEGIN L2C DEVOPS LAB TOKEN ---" + echo "$TOKEN" + echo "--- END L2C DEVOPS LAB TOKEN ---" + echo "" + echo "Token details:" + echo " GitHub User: $GITHUB_USER" + echo " Challenge: devops-lab-gcp" + echo " Completed: $(date -u +"%Y-%m-%d %H:%M:%S UTC")" + echo "" + echo -e "${CYAN}Submit this token at: https://learntocloud.guide${NC}" + echo "" +} + +verify_token() { + local TOKEN="$1" + [ -z "$TOKEN" ] && echo "Usage: $0 verify " && exit 1 + + echo "" + echo "Verifying token..." + + local DECODED + DECODED=$(echo "$TOKEN" | base64 -d 2>/dev/null || echo "") + [ -z "$DECODED" ] && echo -e "${RED}Error: Invalid token.${NC}" && exit 1 + + local PAYLOAD PROVIDED_SIG INSTANCE_ID + PAYLOAD=$(echo "$DECODED" | python3 -c "import sys,json; d=json.load(sys.stdin); print(json.dumps(d['payload'],separators=(',',':')))" 2>/dev/null) + PROVIDED_SIG=$(echo "$DECODED" | python3 -c "import sys,json; print(json.load(sys.stdin)['signature'])" 2>/dev/null) + INSTANCE_ID=$(echo "$DECODED" | python3 -c "import sys,json; print(json.load(sys.stdin)['payload']['instance_id'])" 2>/dev/null) + + [ -z "$PAYLOAD" ] || [ -z "$PROVIDED_SIG" ] && echo -e "${RED}Error: Parse failed.${NC}" && exit 1 + + local VSECRET EXPECTED_SIG + VSECRET=$(echo -n "${MASTER_SECRET}:${INSTANCE_ID}" | shasum -a 256 | cut -d' ' -f1) + EXPECTED_SIG=$(echo -n "$PAYLOAD" | openssl dgst -sha256 -hmac "$VSECRET" | sed 's/^.* //') + + if [ "$PROVIDED_SIG" = "$EXPECTED_SIG" ]; then + echo -e "${GREEN}βœ“ Token is VALID${NC}" + echo "" + echo "$DECODED" | python3 -c "import sys,json; d=json.load(sys.stdin); [print(f' {k}: {v}') for k,v in d['payload'].items()]" + else + echo -e "${RED}βœ— Token is INVALID${NC}" + exit 1 + fi + echo "" +} + +# ============================================================================= +# Main +# ============================================================================= +main() { + local CMD="${1:-status}" + ensure_python_yaml + case "$CMD" in + status|all) + validate_inc_001 + validate_inc_002 + validate_inc_003 + validate_inc_004 + validate_inc_005 + validate_inc_006 + validate_inc_007 + show_status + ;; + export) export_token ;; + verify) verify_token "$2" ;; + *) echo "Usage: $0 [status|export|verify ]" ;; + esac +} + +main "$@" diff --git a/gcp/terraform/main.tf b/gcp/terraform/main.tf new file mode 100644 index 0000000..a8c35ab --- /dev/null +++ b/gcp/terraform/main.tf @@ -0,0 +1,85 @@ +terraform { + required_version = ">= 1.0" + + required_providers { + google = { + source = "hashicorp/google" + version = "~> 6.0" + } + random = { + source = "hashicorp/random" + version = "~> 3.0" + } + } +} + +provider "google" { + project = var.project_id + region = var.region + zone = var.zone +} + +resource "random_id" "deployment" { + byte_length = 4 +} + +resource "google_compute_netwrok" "main" { + name = "vpc-devopslab-${random_id.deployment.hex}" + auto_create_subnetworks = false +} + +resource "google_compute_subnetwork" "gke" { + name = "snet-gke" + ip_cidr_range = "10.0.0.0/16" + region = var.region + network = google_compute_netwrok.main.id +} + +resource "google_artifact_registry_repository" "main" { + location = var.region + repository_id = "devopslab-${random_id.deployment.hex}" + description = "DevOps Lab container registry" + format = "DOCKER" +} + +resource "google_container_cluster" "main" { + name = "gke-devopslab-${random_id.deployment.hex}" + location = var.zone + initial_node_count = 1 + + network = google_compute_netwrok.main.name + subnetwork = google_compute_subnetwork.gke.name + + ip_allocation_policy { + cluster_ipv4_cidr_block = "10.1.0.0/16" + services_ipv4_cidr_block = "10.1.0.0/20" + } + + deletion_protection = false +} + +resource "google_service_account" "gke_nodes" { + account_id = "gke-nodes-${random_id.deployment.hex}" + display_name = "GKE node pool service account" +} + +resource "google_container_node_pool" "primary" { + name = "primary-nodes" + location = var.zone + cluster = google_container_cluster.main.name + node_count = 1 + + node_config { + machine_type = "e2-standard-2" + service_account = google_service_account.gke_nodes.email + oauth_scopes = [ + "https://www.googleapis.com/auth/cloud-platform" + ] + } +} + +resource "google_project_iam_member" "gke_artifact_reader" { + project = var.project_id + role = "roles/artifactregsitry.reader" + member = "serviceAccount:${google_service_account.gke_nodes.email}" +} diff --git a/gcp/terraform/outputs.tf b/gcp/terraform/outputs.tf new file mode 100644 index 0000000..618950e --- /dev/null +++ b/gcp/terraform/outputs.tf @@ -0,0 +1,23 @@ +output "project_id" { + value = var.project_id +} + +output "region" { + value = var.region +} + +output "artifact_registry_repository" { + value = google_artifact_registry_repository.main.repository_id +} + +output "gke_cluster_name" { + value = google_container_cluster.main.name +} + +output "deployment_id" { + value = random_id.deployment.hex +} + +output "vpc_name" { + value = google_compute_netwrok.main.name +} diff --git a/gcp/terraform/variables.tf b/gcp/terraform/variables.tf new file mode 100644 index 0000000..e7a94a8 --- /dev/null +++ b/gcp/terraform/variables.tf @@ -0,0 +1,16 @@ +variable "project_id" { + description = "GCP project ID" + type = string +} + +variable "region" { + description = "GCP region" + type = string + default = "us-central1" +} + +variable "zone" { + description = "GCP zone" + type = string + default = "us-central1-a" +}