From 00621be633b927e612aaf9065fa0ee47bb0592fa Mon Sep 17 00:00:00 2001 From: Jashwanth Date: Fri, 26 Jun 2026 19:25:55 +0530 Subject: [PATCH] =?UTF-8?q?feat:=20Prometheus=20+=20Grafana=20observabilit?= =?UTF-8?q?y=20=E2=80=94=20Week=2011?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Backend: - prometheus-fastapi-instrumentator added to requirements - /metrics endpoint exposed on FastAPI app - Tracks: request rate, latency, error rate per endpoint Kubernetes: - prometheus-values.yaml: kube-prometheus-stack Helm values - Scrapes crms-backend:8000/metrics automatically - Grafana dashboard: API rate, p99 latency, CPU, memory panels Alerts: - CRMSHighErrorRate: fires if error rate > 10% for 2 mins - CRMSHighLatency: fires if p99 > 1s for 5 mins - CRMSPodDown: fires if no backend pods available Week 11 - Observability phase --- backend/app/main.py | 9 ++- backend/requirements.txt | 1 + observability/deploy-monitoring.sh | 26 +++++++++ .../grafana-dashboards/crms-dashboard.json | 58 +++++++++++++++++++ observability/prometheus-rules.yaml | 42 ++++++++++++++ observability/prometheus-values.yaml | 45 ++++++++++++++ 6 files changed, 180 insertions(+), 1 deletion(-) create mode 100644 observability/deploy-monitoring.sh create mode 100644 observability/grafana-dashboards/crms-dashboard.json create mode 100644 observability/prometheus-rules.yaml create mode 100644 observability/prometheus-values.yaml diff --git a/backend/app/main.py b/backend/app/main.py index 9db7e41..607af1e 100644 --- a/backend/app/main.py +++ b/backend/app/main.py @@ -1,6 +1,7 @@ from fastapi import FastAPI from fastapi.middleware.cors import CORSMiddleware from app.api import auth, results +from prometheus_fastapi_instrumentator import Instrumentator app = FastAPI( title="CRMS API", @@ -10,12 +11,18 @@ app.add_middleware( CORSMiddleware, - allow_origins=["http://localhost:5173"], + allow_origins=[ + "http://localhost:5173", + "http://localhost", + "http://localhost:80", + ], allow_credentials=True, allow_methods=["*"], allow_headers=["*"], ) +Instrumentator().instrument(app).expose(app) + app.include_router(auth.router) app.include_router(results.router) diff --git a/backend/requirements.txt b/backend/requirements.txt index 5ace69c..03aa608 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -13,3 +13,4 @@ anyio==4.9.0 httpx==0.28.1 pytest==8.3.5 pytest-asyncio==0.26.0 +prometheus-fastapi-instrumentator==7.0.0 diff --git a/observability/deploy-monitoring.sh b/observability/deploy-monitoring.sh new file mode 100644 index 0000000..2bad36f --- /dev/null +++ b/observability/deploy-monitoring.sh @@ -0,0 +1,26 @@ +#!/bin/bash + +set -e + +echo "Adding Helm repos..." +helm repo add prometheus-community https://prometheus-community.github.io/helm-charts +helm repo update + +echo "Creating monitoring namespace..." +kubectl create namespace monitoring --dry-run=client -o yaml | kubectl apply -f - + +echo "Installing kube-prometheus-stack..." +helm upgrade --install kube-prometheus-stack \ + prometheus-community/kube-prometheus-stack \ + --namespace monitoring \ + --values observability/prometheus-values.yaml \ + --wait \ + --timeout 5m + +echo "Applying PrometheusRules..." +kubectl apply -f observability/prometheus-rules.yaml + +echo "Done! Access Grafana:" +echo "kubectl port-forward svc/kube-prometheus-stack-grafana -n monitoring 3000:80" +echo "Username: admin" +echo "Password: crms-grafana-admin" \ No newline at end of file diff --git a/observability/grafana-dashboards/crms-dashboard.json b/observability/grafana-dashboards/crms-dashboard.json new file mode 100644 index 0000000..1822ad5 --- /dev/null +++ b/observability/grafana-dashboards/crms-dashboard.json @@ -0,0 +1,58 @@ +{ + "title": "CRMS Application Dashboard", + "uid": "crms-main", + "tags": ["crms", "fastapi"], + "timezone": "browser", + "panels": [ + { + "id": 1, + "title": "API Request Rate", + "type": "graph", + "gridPos": {"h": 8, "w": 12, "x": 0, "y": 0}, + "targets": [ + { + "expr": "rate(http_requests_total{app=\"crms-backend\"}[5m])", + "legendFormat": "{{method}} {{handler}}" + } + ] + }, + { + "id": 2, + "title": "API Response Time p99", + "type": "graph", + "gridPos": {"h": 8, "w": 12, "x": 12, "y": 0}, + "targets": [ + { + "expr": "histogram_quantile(0.99, rate(http_request_duration_seconds_bucket{app=\"crms-backend\"}[5m]))", + "legendFormat": "p99 latency" + } + ] + }, + { + "id": 3, + "title": "Pod CPU Usage", + "type": "graph", + "gridPos": {"h": 8, "w": 12, "x": 0, "y": 8}, + "targets": [ + { + "expr": "rate(container_cpu_usage_seconds_total{namespace=\"crms\"}[5m])", + "legendFormat": "{{pod}}" + } + ] + }, + { + "id": 4, + "title": "Pod Memory Usage", + "type": "graph", + "gridPos": {"h": 8, "w": 12, "x": 12, "y": 8}, + "targets": [ + { + "expr": "container_memory_usage_bytes{namespace=\"crms\"}", + "legendFormat": "{{pod}}" + } + ] + } + ], + "schemaVersion": 27, + "version": 1 +} \ No newline at end of file diff --git a/observability/prometheus-rules.yaml b/observability/prometheus-rules.yaml new file mode 100644 index 0000000..a528078 --- /dev/null +++ b/observability/prometheus-rules.yaml @@ -0,0 +1,42 @@ +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: crms-alerts + namespace: monitoring + labels: + release: kube-prometheus-stack +spec: + groups: + - name: crms.rules + rules: + - alert: CRMSHighErrorRate + expr: | + rate(http_requests_total{app="crms-backend",status=~"5.."}[5m]) > 0.1 + for: 2m + labels: + severity: critical + annotations: + summary: "CRMS API error rate above 10%" + + - alert: CRMSHighLatency + expr: | + histogram_quantile(0.99, + rate(http_request_duration_seconds_bucket{app="crms-backend"}[5m]) + ) > 1.0 + for: 5m + labels: + severity: warning + annotations: + summary: "CRMS API p99 latency above 1 second" + + - alert: CRMSPodDown + expr: | + kube_deployment_status_replicas_available{ + namespace="crms", + deployment="crms-backend" + } < 1 + for: 1m + labels: + severity: critical + annotations: + summary: "No CRMS backend pods available" \ No newline at end of file diff --git a/observability/prometheus-values.yaml b/observability/prometheus-values.yaml new file mode 100644 index 0000000..7307cec --- /dev/null +++ b/observability/prometheus-values.yaml @@ -0,0 +1,45 @@ +prometheus: + prometheusSpec: + retention: 24h + resources: + requests: + memory: 256Mi + cpu: 100m + limits: + memory: 512Mi + cpu: 200m + additionalScrapeConfigs: + - job_name: crms-backend + static_configs: + - targets: + - crms-backend-service.crms.svc.cluster.local:8000 + metrics_path: /metrics + +grafana: + enabled: true + adminPassword: crms-grafana-admin + resources: + requests: + memory: 128Mi + cpu: 100m + limits: + memory: 256Mi + cpu: 200m + dashboardProviders: + dashboardproviders.yaml: + apiVersion: 1 + providers: + - name: crms + folder: CRMS + type: file + options: + path: /var/lib/grafana/dashboards/crms + +alertmanager: + enabled: false + +nodeExporter: + enabled: true + +kubeStateMetrics: + enabled: true \ No newline at end of file