From ee4f6cf47e0ec8a2b3f60ca7a687ef73e79b169e Mon Sep 17 00:00:00 2001 From: JuArce <52429267+JuArce@users.noreply.github.com> Date: Wed, 14 Jan 2026 13:01:26 -0300 Subject: [PATCH 01/16] infra(aggregation_mode): deploy services with ansible --- .gitignore | 4 + Makefile | 186 +++++ infra/aggregation_mode/ansible/README.md | 659 ++++++++++++++++++ .../ansible/hoodi-inventory.yaml | 73 ++ .../ansible/mainnet-inventory.yaml | 73 ++ .../ansible/playbooks/deploy_all.yaml | 22 + .../ansible/playbooks/gateway.yaml | 143 ++++ .../ansible/playbooks/gateway_stack.yaml | 13 + .../ansible/playbooks/grafana_agg_mode.yaml | 109 +++ .../ansible/playbooks/ini/config-hoodi.ini | 88 +++ .../ansible/playbooks/ini/config-mainnet.ini | 91 +++ .../ansible/playbooks/metrics_stack.yaml | 13 + .../playbooks/pg_autofailover_common.yaml | 100 +++ .../ansible/playbooks/pg_monitor.yaml | 101 +++ .../ansible/playbooks/pg_node.yaml | 110 +++ .../ansible/playbooks/poller.yaml | 107 +++ .../ansible/playbooks/postgres_cluster.yaml | 24 + .../playbooks/postgres_migrations.yaml | 40 ++ .../playbooks/prometheus_agg_mode.yaml | 84 +++ .../ansible/playbooks/rust.yaml | 39 ++ .../ansible/playbooks/setup.yaml | 40 ++ .../config-agg-mode-gateway.yaml.j2 | 17 + .../config-agg-mode-poller.yaml.j2 | 11 + .../templates/grafana/grafana_env.j2 | 9 + .../prometheus/prometheus_agg_mode.yaml.j2 | 51 ++ .../templates/services/gateway.service.j2 | 18 + .../templates/services/poller.service.j2 | 15 + .../services/prometheus_agg_mode.service.j2 | 16 + .../templates/sudoers/gateway-service.j2 | 3 + 29 files changed, 2259 insertions(+) create mode 100644 infra/aggregation_mode/ansible/README.md create mode 100644 infra/aggregation_mode/ansible/hoodi-inventory.yaml create mode 100644 infra/aggregation_mode/ansible/mainnet-inventory.yaml create mode 100644 infra/aggregation_mode/ansible/playbooks/deploy_all.yaml create mode 100644 infra/aggregation_mode/ansible/playbooks/gateway.yaml create mode 100644 infra/aggregation_mode/ansible/playbooks/gateway_stack.yaml create mode 100644 infra/aggregation_mode/ansible/playbooks/grafana_agg_mode.yaml create mode 100644 infra/aggregation_mode/ansible/playbooks/ini/config-hoodi.ini create mode 100644 infra/aggregation_mode/ansible/playbooks/ini/config-mainnet.ini create mode 100644 infra/aggregation_mode/ansible/playbooks/metrics_stack.yaml create mode 100644 infra/aggregation_mode/ansible/playbooks/pg_autofailover_common.yaml create mode 100644 infra/aggregation_mode/ansible/playbooks/pg_monitor.yaml create mode 100644 infra/aggregation_mode/ansible/playbooks/pg_node.yaml create mode 100644 infra/aggregation_mode/ansible/playbooks/poller.yaml create mode 100644 infra/aggregation_mode/ansible/playbooks/postgres_cluster.yaml create mode 100644 infra/aggregation_mode/ansible/playbooks/postgres_migrations.yaml create mode 100644 infra/aggregation_mode/ansible/playbooks/prometheus_agg_mode.yaml create mode 100644 infra/aggregation_mode/ansible/playbooks/rust.yaml create mode 100644 infra/aggregation_mode/ansible/playbooks/setup.yaml create mode 100644 infra/aggregation_mode/ansible/playbooks/templates/config-files/config-agg-mode-gateway.yaml.j2 create mode 100644 infra/aggregation_mode/ansible/playbooks/templates/config-files/config-agg-mode-poller.yaml.j2 create mode 100644 infra/aggregation_mode/ansible/playbooks/templates/grafana/grafana_env.j2 create mode 100644 infra/aggregation_mode/ansible/playbooks/templates/prometheus/prometheus_agg_mode.yaml.j2 create mode 100644 infra/aggregation_mode/ansible/playbooks/templates/services/gateway.service.j2 create mode 100644 infra/aggregation_mode/ansible/playbooks/templates/services/poller.service.j2 create mode 100644 infra/aggregation_mode/ansible/playbooks/templates/services/prometheus_agg_mode.service.j2 create mode 100644 infra/aggregation_mode/ansible/playbooks/templates/sudoers/gateway-service.j2 diff --git a/.gitignore b/.gitignore index 041927db8..a156fb7b1 100644 --- a/.gitignore +++ b/.gitignore @@ -46,3 +46,7 @@ docs/dead_links_report.txt terraform.tfstate terraform.tfstate.backup + +# Aggregation Mode Ansible INI files (track config-*.ini templates, ignore others) +infra/aggregation_mode/ansible/playbooks/ini/*.ini +!infra/aggregation_mode/ansible/playbooks/ini/config-*.ini diff --git a/Makefile b/Makefile index 27dcb36e1..9dce05b8b 100644 --- a/Makefile +++ b/Makefile @@ -1658,3 +1658,189 @@ __NODE_EXPORTER_: install_node_exporter: @./scripts/install_node_exporter.sh + +# ============================================================================== +# Aggregation Mode Ansible Deployment +# ============================================================================== + +AGG_MODE_ANSIBLE_DIR = infra/aggregation_mode/ansible +AGG_MODE_PLAYBOOKS_DIR = $(AGG_MODE_ANSIBLE_DIR)/playbooks +AGG_MODE_INI_DIR = $(AGG_MODE_PLAYBOOKS_DIR)/ini + +# ------------------------------------------------------------------------------ +# Setup: Create INI configuration files +# ------------------------------------------------------------------------------ + +# ------------------------------------------------------------------------------ +# PostgreSQL Cluster Deployment +# ------------------------------------------------------------------------------ + +.PHONY: postgres_deploy +postgres_deploy: ## Deploy PostgreSQL Auto-Failover Cluster. Usage: make postgres_deploy ENV=hoodi + @if [ -z "$(ENV)" ]; then \ + echo "Error: ENV must be set (hoodi or mainnet)"; \ + exit 1; \ + fi + @ansible-playbook $(AGG_MODE_PLAYBOOKS_DIR)/postgres_cluster.yaml \ + -i $(AGG_MODE_ANSIBLE_DIR)/$(ENV)-inventory.yaml \ + -e "env=$(ENV)" + +.PHONY: postgres_monitor_deploy +postgres_monitor_deploy: ## Deploy PostgreSQL Monitor only. Usage: make postgres_monitor_deploy ENV=hoodi + @if [ -z "$(ENV)" ]; then \ + echo "Error: ENV must be set (hoodi or mainnet)"; \ + exit 1; \ + fi + @ansible-playbook $(AGG_MODE_PLAYBOOKS_DIR)/pg_monitor.yaml \ + -i $(AGG_MODE_ANSIBLE_DIR)/$(ENV)-inventory.yaml \ + -e "host=postgres_monitor" \ + -e "env=$(ENV)" + +.PHONY: postgres_nodes_deploy +postgres_nodes_deploy: ## Deploy PostgreSQL Primary & Secondary. Usage: make postgres_nodes_deploy ENV=hoodi + @if [ -z "$(ENV)" ]; then \ + echo "Error: ENV must be set (hoodi or mainnet)"; \ + exit 1; \ + fi + @ansible-playbook $(AGG_MODE_PLAYBOOKS_DIR)/pg_node.yaml \ + -i $(AGG_MODE_ANSIBLE_DIR)/$(ENV)-inventory.yaml \ + -e "host=postgres_primary" \ + -e "env=$(ENV)" + @ansible-playbook $(AGG_MODE_PLAYBOOKS_DIR)/pg_node.yaml \ + -i $(AGG_MODE_ANSIBLE_DIR)/$(ENV)-inventory.yaml \ + -e "host=postgres_secondary" \ + -e "env=$(ENV)" + +.PHONY: postgres_migrations +postgres_migrations: ## Run database migrations. Usage: make postgres_migrations ENV=hoodi + @if [ -z "$(ENV)" ]; then \ + echo "Error: ENV must be set (hoodi or mainnet)"; \ + exit 1; \ + fi + @ansible-playbook $(AGG_MODE_PLAYBOOKS_DIR)/postgres_migrations.yaml \ + -i $(AGG_MODE_ANSIBLE_DIR)/$(ENV)-inventory.yaml \ + -e "host=postgres_primary" \ + -e "env=$(ENV)" + +.PHONY: postgres_status +postgres_status: ## Check PostgreSQL cluster status. Usage: make postgres_status ENV=hoodi + @if [ -z "$(ENV)" ]; then \ + echo "Error: ENV must be set (hoodi or mainnet)"; \ + exit 1; \ + fi + @ansible postgres_monitor -i $(AGG_MODE_ANSIBLE_DIR)/$(ENV)-inventory.yaml \ + -m shell -a "sudo -u postgres pg_autoctl show state --monitor postgres://autoctl_node@localhost:5432/pg_auto_failover" --become + +# ------------------------------------------------------------------------------ +# Gateway & Poller Deployment +# ------------------------------------------------------------------------------ + +.PHONY: gateway_deploy +gateway_deploy: ## Deploy Gateway & Poller on both servers. Usage: make gateway_deploy ENV=hoodi + @if [ -z "$(ENV)" ]; then \ + echo "Error: ENV must be set (hoodi or mainnet)"; \ + exit 1; \ + fi + @ansible-playbook $(AGG_MODE_PLAYBOOKS_DIR)/gateway_stack.yaml \ + -i $(AGG_MODE_ANSIBLE_DIR)/$(ENV)-inventory.yaml \ + -e "host=gateway_primary" \ + -e "env=$(ENV)" + @ansible-playbook $(AGG_MODE_PLAYBOOKS_DIR)/gateway_stack.yaml \ + -i $(AGG_MODE_ANSIBLE_DIR)/$(ENV)-inventory.yaml \ + -e "host=gateway_secondary" \ + -e "env=$(ENV)" + +.PHONY: gateway_primary_deploy +gateway_primary_deploy: ## Deploy Gateway & Poller on primary only. Usage: make gateway_primary_deploy ENV=hoodi + @if [ -z "$(ENV)" ]; then \ + echo "Error: ENV must be set (hoodi or mainnet)"; \ + exit 1; \ + fi + @ansible-playbook $(AGG_MODE_PLAYBOOKS_DIR)/gateway_stack.yaml \ + -i $(AGG_MODE_ANSIBLE_DIR)/$(ENV)-inventory.yaml \ + -e "host=gateway_primary" \ + -e "env=$(ENV)" + +.PHONY: gateway_secondary_deploy +gateway_secondary_deploy: ## Deploy Gateway & Poller on secondary only. Usage: make gateway_secondary_deploy ENV=hoodi + @if [ -z "$(ENV)" ]; then \ + echo "Error: ENV must be set (hoodi or mainnet)"; \ + exit 1; \ + fi + @ansible-playbook $(AGG_MODE_PLAYBOOKS_DIR)/gateway_stack.yaml \ + -i $(AGG_MODE_ANSIBLE_DIR)/$(ENV)-inventory.yaml \ + -e "host=gateway_secondary" \ + -e "env=$(ENV)" + +# ------------------------------------------------------------------------------ +# Metrics Deployment +# ------------------------------------------------------------------------------ + +.PHONY: metrics_deploy +metrics_deploy: ## Deploy Prometheus & Grafana. Usage: make metrics_deploy ENV=hoodi + @if [ -z "$(ENV)" ]; then \ + echo "Error: ENV must be set (hoodi or mainnet)"; \ + exit 1; \ + fi + @ansible-playbook $(AGG_MODE_PLAYBOOKS_DIR)/metrics_stack.yaml \ + -i $(AGG_MODE_ANSIBLE_DIR)/$(ENV)-inventory.yaml \ + -e "host=metrics" \ + -e "env=$(ENV)" + +.PHONY: prometheus_deploy +prometheus_deploy: ## Deploy Prometheus only. Usage: make prometheus_deploy ENV=hoodi + @if [ -z "$(ENV)" ]; then \ + echo "Error: ENV must be set (hoodi or mainnet)"; \ + exit 1; \ + fi + @ansible-playbook $(AGG_MODE_PLAYBOOKS_DIR)/prometheus_agg_mode.yaml \ + -i $(AGG_MODE_ANSIBLE_DIR)/$(ENV)-inventory.yaml \ + -e "host=metrics" \ + -e "env=$(ENV)" + +.PHONY: grafana_deploy +grafana_deploy: ## Deploy Grafana only. Usage: make grafana_deploy ENV=hoodi + @if [ -z "$(ENV)" ]; then \ + echo "Error: ENV must be set (hoodi or mainnet)"; \ + exit 1; \ + fi + @ansible-playbook $(AGG_MODE_PLAYBOOKS_DIR)/grafana_agg_mode.yaml \ + -i $(AGG_MODE_ANSIBLE_DIR)/$(ENV)-inventory.yaml \ + -e "host=metrics" \ + -e "env=$(ENV)" + +# ------------------------------------------------------------------------------ +# Full Deployment +# ------------------------------------------------------------------------------ + +.PHONY: agg_mode_deploy_all +agg_mode_deploy_all: ## Deploy entire aggregation mode stack. Usage: make agg_mode_deploy_all ENV=hoodi + @if [ -z "$(ENV)" ]; then \ + echo "Error: ENV must be set (hoodi or mainnet)"; \ + exit 1; \ + fi + @ansible-playbook $(AGG_MODE_PLAYBOOKS_DIR)/deploy_all.yaml \ + -i $(AGG_MODE_ANSIBLE_DIR)/$(ENV)-inventory.yaml \ + -e "env=$(ENV)" + +# ------------------------------------------------------------------------------ +# Service Management +# ------------------------------------------------------------------------------ + +.PHONY: gateway_restart +gateway_restart: ## Restart gateway service. Usage: make gateway_restart ENV=hoodi HOST=gateway_primary + @if [ -z "$(ENV)" ] || [ -z "$(HOST)" ]; then \ + echo "Error: ENV and HOST must be set"; \ + exit 1; \ + fi + @ansible $(HOST) -i $(AGG_MODE_ANSIBLE_DIR)/$(ENV)-inventory.yaml \ + -m shell -a "sudo systemctl restart gateway" --become + +.PHONY: poller_restart +poller_restart: ## Restart poller service. Usage: make poller_restart ENV=hoodi HOST=gateway_primary + @if [ -z "$(ENV)" ] || [ -z "$(HOST)" ]; then \ + echo "Error: ENV and HOST must be set"; \ + exit 1; \ + fi + @ansible $(HOST) -i $(AGG_MODE_ANSIBLE_DIR)/$(ENV)-inventory.yaml \ + -m shell -a "systemctl --user restart poller" diff --git a/infra/aggregation_mode/ansible/README.md b/infra/aggregation_mode/ansible/README.md new file mode 100644 index 000000000..24b04bbac --- /dev/null +++ b/infra/aggregation_mode/ansible/README.md @@ -0,0 +1,659 @@ +# Aggregation Mode Ansible Automation + +This directory contains Ansible playbooks and configuration for automating the deployment and management of the Aligned Layer aggregation mode infrastructure. + +## Table of Contents + +- [Overview](#overview) +- [Architecture](#architecture) +- [Prerequisites](#prerequisites) +- [Initial Setup](#initial-setup) +- [Deployment](#deployment) +- [Service Management](#service-management) +- [Verification](#verification) +- [Troubleshooting](#troubleshooting) +- [Advanced Usage](#advanced-usage) + +## Overview + +The Ansible automation deploys a complete aggregation mode stack consisting of: + +1. **PostgreSQL Auto-Failover Cluster** (3 servers) + - 1 Monitor node (EC2) + - 2 Data nodes (Primary + Secondary) with automatic failover (Scaleway Elastic Metal) + - Password authentication with scram-sha-256 + +2. **Gateway Service** (2 servers) + - Rust-based gateway with TLS support + - Runs on port 8080 (non-TLS) and port 443 (TLS) + - Systemd service with automatic restart + +3. **Poller Service** (2 servers, colocated with gateway) + - Payment poller service + - User-level systemd service + +4. **Metrics Stack** (1 server) + - Prometheus for metrics collection + - Grafana for visualization + - 90-day retention + +## Architecture + +``` +┌─────────────────────────────────────────────────────────────────┐ +│ Tailscale VPN │ +│ (100.64.0.0/10) │ +├─────────────────────────────────────────────────────────────────┤ +│ │ +│ ┌──────────────┐ ┌──────────────┐ ┌──────────────┐ │ +│ │ PG Monitor │ │ PG Primary │ │ PG Secondary │ │ +│ │ (EC2) │ │ (Scaleway) │ │ (Scaleway) │ │ +│ └──────┬───────┘ └──────┬───────┘ └──────┬───────┘ │ +│ │ │ │ │ +│ └──────────────────┴──────────────────┘ │ +│ pg_auto_failover │ +│ │ +│ ┌────────────────────────┐ ┌────────────────────────┐ │ +│ │ Gateway Primary │ │ Gateway Secondary │ │ +│ │ ├─ Gateway (8080+443)│ │ ├─ Gateway (8080+443) │ │ +│ │ └─ Poller │ │ └─ Poller │ │ +│ └────────────────────────┘ └────────────────────────┘ │ +│ │ +│ ┌────────────────────────┐ │ +│ │ Metrics Server │ │ +│ │ ├─ Prometheus (9090) │ │ +│ │ └─ Grafana (3000) │ │ +│ └────────────────────────┘ │ +│ │ +└─────────────────────────────────────────────────────────────────┘ +``` + +## Prerequisites + +### Local Machine + +1. **Ansible** (version 2.9 or higher) + ```bash + pip install ansible + ``` + +2. **SSH access** to all servers via Tailscale + - Ensure you're connected to the Tailscale VPN + - SSH keys configured for `admin` user on all servers + +3. **TLS Certificates** for Gateway + - Valid TLS certificate and key files + - Can be Let's Encrypt, CA-issued, or self-signed + +### Remote Servers + +All servers are provisioned via Terraform and connected via Tailscale VPN. They should have: +- Ubuntu/Debian-based OS +- `admin` user with sudo privileges +- `app` user for application services (gateway servers) +- `postgres` user will be created automatically for PostgreSQL services +- Tailscale VPN configured + +## Initial Setup + +All configuration is consolidated into environment-specific files with predefined values. You only need to fill in sensitive values (passwords, certificate paths). + +### 1. Configure Hoodi Environment + +Edit `playbooks/ini/config-hoodi.ini`: + +All non-sensitive values are already pre-filled. You only need to set: + +```ini +[DEFAULT] +# ... (all values pre-filled) ... + +# REQUIRED: Set a strong password before deploying +db_password=your_secure_password_here + +# REQUIRED: Same password for gateway/poller database access +gateway_db_password=your_secure_password_here + +# REQUIRED: Provide local paths to your TLS certificate files +tls_cert_source_path=/path/to/your/cert.pem +tls_key_source_path=/path/to/your/key.pem + +# REQUIRED: Same password for Grafana Postgres datasource +grafana_postgres_password=your_secure_password_here +``` + +**⚠️ CRITICAL**: All three password fields must be set to the same value before deploying! + +### 2. Configure Mainnet Environment (if needed) + +Edit `playbooks/ini/config-mainnet.ini`: + +Similar to Hoodi, fill in the required values: + +```ini +[DEFAULT] +# ... (most values pre-filled) ... + +# REQUIRED: Set passwords (same as above) +db_password=your_secure_password_here +gateway_db_password=your_secure_password_here +grafana_postgres_password=your_secure_password_here + +# REQUIRED: TLS certificate paths +tls_cert_source_path=/path/to/your/cert.pem +tls_key_source_path=/path/to/your/key.pem + +# TODO: Update these for mainnet deployment +gateway_payment_service_address=0xYourMainnetPaymentServiceAddress +gateway_eth_rpc_url=https://your-mainnet-rpc-url +grafana_rpc_url=https://your-mainnet-rpc-url +``` + +### Configuration File Structure + +The consolidated config files contain all settings organized by component: + +```ini +# config-hoodi.ini structure: +[DEFAULT] +environment=hoodi +git_branch=staging + +# PostgreSQL Configuration +postgres_monitor_hostname=agg-mode-hoodi-postgres-monitor +postgres_primary_hostname=agg-mode-hoodi-postgres-1 +postgres_secondary_hostname=agg-mode-hoodi-postgres-2 +db_name=agg_mode +db_user=autoctl_node +db_password= # ← FILL THIS IN + +# Gateway & Poller Configuration +gateway_network=Hoodi +gateway_payment_service_address=0x7222E0183cE1A96619d0c883e9bfc6b76D4e780e +gateway_eth_rpc_url=https://aligned-hoodi-rpc-geth.tail665ae.ts.net +gateway_db_password= # ← FILL THIS IN (same as db_password) +# ... other gateway settings ... + +# TLS Certificate Management +tls_cert_source_path= # ← FILL THIS IN +tls_key_source_path= # ← FILL THIS IN + +# Metrics Configuration +grafana_postgres_password= # ← FILL THIS IN (same as db_password) +# ... other metrics settings ... +``` + +The Ansible templates will automatically generate two separate database connection URLs for failover: +- `postgres://autoctl_node:password@agg-mode-hoodi-postgres-1:5432/agg_mode` +- `postgres://autoctl_node:password@agg-mode-hoodi-postgres-2:5432/agg_mode` + +The sqlx driver will try them in order for automatic failover + +## Deployment + +### Full Stack Deployment + +To deploy everything in one command: + +```bash +make agg_mode_deploy_all ENV=hoodi +``` + +This will: +1. Deploy PostgreSQL cluster (monitor, primary, secondary) +2. Run database migrations +3. Deploy gateway and poller on both servers +4. Deploy Prometheus and Grafana + +### Step-by-Step Deployment + +For more control, deploy each component separately: + +#### 1. Deploy PostgreSQL Cluster + +```bash +# Deploy complete postgres cluster with password authentication +make postgres_deploy ENV=hoodi +``` + +This will: +- Deploy monitor with scram-sha-256 auth +- Set password for autoctl_node user +- Deploy primary and secondary nodes +- Configure replication with password auth +- Run database migrations + +**Verify cluster status:** +```bash +make postgres_status ENV=hoodi +``` + +Expected output: +``` + Name | Node | Host:Port | TLI: LSN | Connection | Reported State | Assigned State +----------+-------+--------------------+----------------+--------------+---------------------+-------------------- +monitor | 1 | 100.x.x.x:5432 | | | | +node_1 | 2 | 100.x.x.x:5432 | 1: 0/... | read-write | primary | primary +node_2 | 3 | 100.x.x.x:5432 | 1: 0/... | read-only | secondary | secondary +``` + +#### 2. Deploy Gateway & Poller + +```bash +# Deploy on both servers +make gateway_deploy ENV=hoodi + +# Or deploy individually +make gateway_primary_deploy ENV=hoodi +make gateway_secondary_deploy ENV=hoodi +``` + +**Verify gateway is running:** +```bash +ssh app@agg-mode-hoodi-gateway-1 "sudo systemctl status gateway" +ssh app@agg-mode-hoodi-gateway-1 "systemctl --user status poller" +``` + +**Test endpoint:** +```bash +curl -k https://agg-mode-hoodi-gateway-1/health +``` + +#### 3. Deploy Metrics Stack + +```bash +# Deploy both Prometheus and Grafana +make metrics_deploy ENV=hoodi + +# Or deploy individually +make prometheus_deploy ENV=hoodi +make grafana_deploy ENV=hoodi +``` + +**Access dashboards:** +- Prometheus: `http://:9090` +- Grafana: `http://:3000` (default credentials: admin/admin) + +## Service Management + +### Restart Services + +**Gateway:** +```bash +make gateway_restart ENV=hoodi HOST=gateway_primary +make gateway_restart ENV=hoodi HOST=gateway_secondary +``` + +**Poller:** +```bash +make poller_restart ENV=hoodi HOST=gateway_primary +make poller_restart ENV=hoodi HOST=gateway_secondary +``` + +### Check Service Status + +**PostgreSQL Cluster:** +```bash +make postgres_status ENV=hoodi +``` + +**Gateway:** +```bash +ssh app@agg-mode-hoodi-gateway-1 "sudo systemctl status gateway" +ssh app@agg-mode-hoodi-gateway-1 "sudo journalctl -u gateway -n 50" +``` + +**Poller:** +```bash +ssh app@agg-mode-hoodi-gateway-1 "systemctl --user status poller" +ssh app@agg-mode-hoodi-gateway-1 "journalctl --user -u poller -n 50" +``` + +**Prometheus:** +```bash +ssh admin@agg-mode-hoodi-metrics "systemctl --user status prometheus" +``` + +**Grafana:** +```bash +ssh admin@agg-mode-hoodi-metrics "sudo systemctl status grafana-server" +``` + +### View Logs + +**Gateway:** +```bash +ssh app@agg-mode-hoodi-gateway-1 "sudo journalctl -u gateway -f" +``` + +**Poller:** +```bash +ssh app@agg-mode-hoodi-gateway-1 "journalctl --user -u poller -f" +``` + +**PostgreSQL:** +```bash +ssh admin@agg-mode-hoodi-postgres-1 "sudo journalctl -u pgautofailover -f" +``` + +## Verification + +### PostgreSQL Cluster Health + +1. **Check cluster state:** + ```bash + make postgres_status ENV=hoodi + ``` + +2. **Test password authentication:** + ```bash + ssh admin@agg-mode-hoodi-postgres-1 "PGPASSWORD='your_password' psql -U autoctl_node -h localhost -d agg_mode -c 'SELECT 1'" + ``` + +3. **Verify replication:** + ```bash + ssh admin@agg-mode-hoodi-postgres-1 "sudo -u postgres psql -d agg_mode -c 'SELECT * FROM pg_stat_replication'" + ``` + +4. **Test failover (optional):** + ```bash + # Stop primary + ssh admin@agg-mode-hoodi-postgres-1 "sudo systemctl stop pgautofailover" + + # Wait 30 seconds, check status + make postgres_status ENV=hoodi + # Secondary should now be primary + + # Restart original primary + ssh admin@agg-mode-hoodi-postgres-1 "sudo systemctl start pgautofailover" + ``` + +### Gateway Health + +1. **Check HTTP health endpoint:** + ```bash + curl -k https://agg-mode-hoodi-gateway-1/health + ``` + +2. **Check metrics:** + ```bash + curl http://agg-mode-hoodi-gateway-1:9094/metrics + ``` + +3. **Verify database connectivity:** + ```bash + ssh app@agg-mode-hoodi-gateway-1 + PGPASSWORD='your_password' psql -U autoctl_node -h agg-mode-hoodi-postgres-1 -d agg_mode -c "SELECT 1" + ``` + +### Poller Health + +1. **Check last processed block:** + ```bash + ssh app@agg-mode-hoodi-gateway-1 "cat ~/config/proof-aggregator.last_block_fetched.json" + ``` + + The block number should increase over time. + +2. **Check metrics:** + ```bash + curl http://agg-mode-hoodi-gateway-1:9095/metrics + ``` + +### Metrics Stack + +1. **Prometheus targets:** + - Navigate to `http://:9090/targets` + - All targets should show as "UP" + +2. **Grafana datasources:** + - Navigate to `http://:3000` + - Go to Configuration → Data Sources + - Verify Prometheus and PostgreSQL datasources are connected + +## Troubleshooting + +### PostgreSQL Issues + +**Problem: Node fails to join cluster** + +Check monitor logs: +```bash +ssh admin@agg-mode-hoodi-postgres-monitor "sudo journalctl -u pgautofailover -n 100" +``` + +Check node logs: +```bash +ssh admin@agg-mode-hoodi-postgres-1 "sudo journalctl -u pgautofailover -n 100" +``` + +**Problem: Password authentication fails** + +Verify password is set correctly in your environment config file (`config-hoodi.ini` or `config-mainnet.ini`). All three password fields must match: +- `db_password` +- `gateway_db_password` +- `grafana_postgres_password` + +Check pg_hba.conf: +```bash +ssh admin@agg-mode-hoodi-postgres-1 "sudo -u postgres cat /var/lib/postgresql/node/pg_hba.conf" +``` + +Should contain: +``` +host all all 100.64.0.0/10 scram-sha-256 +``` + +### Gateway Issues + +**Problem: Gateway won't start** + +Check logs for errors: +```bash +ssh app@agg-mode-hoodi-gateway-1 "sudo journalctl -u gateway -n 100" +``` + +Common issues: +- Missing TLS certificates → Check paths in `config-{{ env }}.ini` (tls_cert_source_path, tls_key_source_path) +- Database connection failed → Verify password in `config-{{ env }}.ini` (gateway_db_password) +- Port 443 already in use → Check with `sudo lsof -i :443` + +**Problem: TLS certificate errors** + +Verify certificates exist: +```bash +ssh app@agg-mode-hoodi-gateway-1 "ls -la ~/.ssl/" +``` + +Check certificate validity: +```bash +ssh app@agg-mode-hoodi-gateway-1 "openssl x509 -in ~/.ssl/cert.pem -text -noout" +``` + +### Poller Issues + +**Problem: Poller not syncing blocks** + +Check logs: +```bash +ssh app@agg-mode-hoodi-gateway-1 "journalctl --user -u poller -n 100" +``` + +Verify RPC connectivity: +```bash +ssh app@agg-mode-hoodi-gateway-1 "curl -X POST -H 'Content-Type: application/json' --data '{\"jsonrpc\":\"2.0\",\"method\":\"eth_blockNumber\",\"params\":[],\"id\":1}' https://aligned-hoodi-rpc-geth.tail665ae.ts.net" +``` + +### Metrics Issues + +**Problem: Prometheus not scraping targets** + +Check Prometheus logs: +```bash +ssh admin@agg-mode-hoodi-metrics "journalctl --user -u prometheus -n 100" +``` + +Verify targets are reachable from metrics server: +```bash +ssh admin@agg-mode-hoodi-metrics "curl http://agg-mode-hoodi-gateway-1:9094/metrics" +``` + +Check Prometheus config: +```bash +ssh admin@agg-mode-hoodi-metrics "cat ~/config/prometheus.yaml" +``` + +### General Debugging + +**Check Tailscale connectivity:** +```bash +tailscale status +``` + +**Test SSH access to servers:** +```bash +ssh admin@agg-mode-hoodi-postgres-monitor "echo 'Connection successful'" +ssh app@agg-mode-hoodi-gateway-1 "echo 'Connection successful'" +``` + +**Verify Ansible inventory:** +```bash +ansible-inventory -i infra/aggregation_mode/ansible/hoodi-inventory.yaml --list +``` + +## Advanced Usage + +### Running Individual Playbooks + +You can run any playbook directly with ansible-playbook: + +```bash +# Deploy only postgres monitor +ansible-playbook infra/aggregation_mode/ansible/playbooks/pg_monitor.yaml \ + -i infra/aggregation_mode/ansible/hoodi-inventory.yaml \ + -e "host=postgres_monitor" \ + -e "env=hoodi" + +# Deploy only gateway (no poller) +ansible-playbook infra/aggregation_mode/ansible/playbooks/gateway.yaml \ + -i infra/aggregation_mode/ansible/hoodi-inventory.yaml \ + -e "host=gateway_primary" \ + -e "env=hoodi" +``` + +### Updating Services + +**Update gateway code:** +```bash +ssh app@agg-mode-hoodi-gateway-1 +cd ~/repos/gateway/aligned_layer +git pull origin staging +cargo install --path aggregation_mode/gateway --bin gateway --features tls --locked +sudo systemctl restart gateway +``` + +**Update poller code:** +```bash +ssh app@agg-mode-hoodi-gateway-1 +cd ~/repos/poller/aligned_layer +git pull origin staging +cargo install --path aggregation_mode/payments_poller --bin payments_poller --locked +systemctl --user restart poller +``` + +### Redeploy with Latest Code + +To redeploy with the latest code from git, simply run the deployment again: + +```bash +make gateway_deploy ENV=hoodi +``` + +The playbooks will: +1. Pull latest code from the configured branch +2. Rebuild the binaries +3. Restart the services + +### Changing Configuration + +1. Update INI files in `playbooks/ini/` +2. Redeploy the affected service: + ```bash + make gateway_deploy ENV=hoodi + # or + make postgres_deploy ENV=hoodi + ``` + +### Rotating Passwords + +1. Update all three password fields in your environment config file (`config-hoodi.ini` or `config-mainnet.ini`): + - `db_password` + - `gateway_db_password` + - `grafana_postgres_password` +2. Run password update on PostgreSQL: + ```bash + ssh admin@agg-mode-hoodi-postgres-monitor "sudo -u postgres psql -d pg_auto_failover -c \"ALTER USER autoctl_node PASSWORD 'new_password'\"" + ``` +3. Redeploy gateway and metrics: + ```bash + make gateway_deploy ENV=hoodi + make metrics_deploy ENV=hoodi + ``` + +## File Structure + +``` +infra/aggregation_mode/ansible/ +├── README.md # This file +├── hoodi-inventory.yaml # Hoodi environment inventory +├── mainnet-inventory.yaml # Mainnet environment inventory +└── playbooks/ + ├── ini/ # Configuration files + │ ├── config-hoodi.ini # Hoodi config (tracked, fill in passwords) + │ └── config-mainnet.ini # Mainnet config (tracked, fill in passwords) + ├── templates/ # Jinja2 templates + │ ├── config-files/ # Service config templates + │ ├── services/ # Systemd service templates + │ ├── sudoers/ # Sudoers templates + │ ├── prometheus/ # Prometheus config templates + │ └── grafana/ # Grafana config templates + ├── rust.yaml # Rust installation + ├── pg_autofailover_common.yaml # PostgreSQL + pg_auto_failover setup + ├── pg_monitor.yaml # PostgreSQL monitor deployment + ├── pg_node.yaml # PostgreSQL node deployment + ├── postgres_migrations.yaml # Database migrations + ├── gateway.yaml # Gateway deployment + ├── poller.yaml # Poller deployment + ├── prometheus_agg_mode.yaml # Prometheus deployment + ├── grafana_agg_mode.yaml # Grafana deployment + ├── postgres_cluster.yaml # Postgres orchestration + ├── gateway_stack.yaml # Gateway + poller orchestration + ├── metrics_stack.yaml # Metrics orchestration + └── deploy_all.yaml # Full stack orchestration +``` + +## Security Notes + +1. **Passwords**: Config files are tracked in git with empty password fields. Fill in passwords locally. Use `git update-index --assume-unchanged config-*.ini` after filling passwords to prevent accidentally committing them. + +2. **TLS Certificates**: Keep private keys secure. The playbooks set appropriate permissions (0600). + +3. **SSH Access**: All servers are only accessible via Tailscale VPN (100.64.0.0/10). + +4. **PostgreSQL**: Uses scram-sha-256 password authentication, not trust mode. + +5. **Firewall**: UFW is configured on all servers with deny-by-default policy. + +## Support + +For issues or questions: +- Check the [Troubleshooting](#troubleshooting) section +- Review logs on the affected server +- Contact the infrastructure team + +## References + +- [PostgreSQL Auto-Failover Documentation](https://pg-auto-failover.readthedocs.io/) +- [Ansible Documentation](https://docs.ansible.com/) +- [Prometheus Documentation](https://prometheus.io/docs/) +- [Grafana Documentation](https://grafana.com/docs/) diff --git a/infra/aggregation_mode/ansible/hoodi-inventory.yaml b/infra/aggregation_mode/ansible/hoodi-inventory.yaml new file mode 100644 index 000000000..4e2f1c91b --- /dev/null +++ b/infra/aggregation_mode/ansible/hoodi-inventory.yaml @@ -0,0 +1,73 @@ +# PostgreSQL Monitor +postgres_monitor: + hosts: + agg-mode-hoodi-postgres-monitor: + ansible_host: agg-mode-hoodi-postgres-monitor + admin_user: admin + ansible_user: admin + ansible_python_interpreter: /usr/bin/python3 + +# PostgreSQL Primary +postgres_primary: + hosts: + agg-mode-hoodi-postgres-1: + ansible_host: agg-mode-hoodi-postgres-1 + admin_user: admin + ansible_user: admin + ansible_python_interpreter: /usr/bin/python3 + +# PostgreSQL Secondary +postgres_secondary: + hosts: + agg-mode-hoodi-postgres-2: + ansible_host: agg-mode-hoodi-postgres-2 + admin_user: admin + ansible_user: admin + ansible_python_interpreter: /usr/bin/python3 + +# PostgreSQL Cluster (all postgres nodes) +postgres_cluster: + children: + postgres_monitor: + postgres_primary: + postgres_secondary: + +# Gateway Primary +gateway_primary: + hosts: + agg-mode-hoodi-gateway-1: + ansible_host: agg-mode-hoodi-gateway-1 + admin_user: admin + ansible_user: app + ansible_python_interpreter: /usr/bin/python3 + +# Gateway Secondary +gateway_secondary: + hosts: + agg-mode-hoodi-gateway-2: + ansible_host: agg-mode-hoodi-gateway-2 + admin_user: admin + ansible_user: app + ansible_python_interpreter: /usr/bin/python3 + +# Gateway Cluster (all gateway nodes) +gateway_cluster: + children: + gateway_primary: + gateway_secondary: + +# Metrics Server +metrics: + hosts: + agg-mode-hoodi-metrics: + ansible_host: agg-mode-hoodi-metrics + admin_user: admin + ansible_user: admin + ansible_python_interpreter: /usr/bin/python3 + +# All aggregation mode servers +aggregation_mode: + children: + postgres_cluster: + gateway_cluster: + metrics: diff --git a/infra/aggregation_mode/ansible/mainnet-inventory.yaml b/infra/aggregation_mode/ansible/mainnet-inventory.yaml new file mode 100644 index 000000000..489cc9d8d --- /dev/null +++ b/infra/aggregation_mode/ansible/mainnet-inventory.yaml @@ -0,0 +1,73 @@ +# PostgreSQL Monitor +postgres_monitor: + hosts: + agg-mode-mainnet-postgres-monitor: + ansible_host: agg-mode-mainnet-postgres-monitor + admin_user: admin + ansible_user: admin + ansible_python_interpreter: /usr/bin/python3 + +# PostgreSQL Primary +postgres_primary: + hosts: + agg-mode-mainnet-postgres-1: + ansible_host: agg-mode-mainnet-postgres-1 + admin_user: admin + ansible_user: admin + ansible_python_interpreter: /usr/bin/python3 + +# PostgreSQL Secondary +postgres_secondary: + hosts: + agg-mode-mainnet-postgres-2: + ansible_host: agg-mode-mainnet-postgres-2 + admin_user: admin + ansible_user: admin + ansible_python_interpreter: /usr/bin/python3 + +# PostgreSQL Cluster (all postgres nodes) +postgres_cluster: + children: + postgres_monitor: + postgres_primary: + postgres_secondary: + +# Gateway Primary +gateway_primary: + hosts: + agg-mode-mainnet-gateway-1: + ansible_host: agg-mode-mainnet-gateway-1 + admin_user: admin + ansible_user: app + ansible_python_interpreter: /usr/bin/python3 + +# Gateway Secondary +gateway_secondary: + hosts: + agg-mode-mainnet-gateway-2: + ansible_host: agg-mode-mainnet-gateway-2 + admin_user: admin + ansible_user: app + ansible_python_interpreter: /usr/bin/python3 + +# Gateway Cluster (all gateway nodes) +gateway_cluster: + children: + gateway_primary: + gateway_secondary: + +# Metrics Server +metrics: + hosts: + agg-mode-mainnet-metrics: + ansible_host: agg-mode-mainnet-metrics + admin_user: admin + ansible_user: admin + ansible_python_interpreter: /usr/bin/python3 + +# All aggregation mode servers +aggregation_mode: + children: + postgres_cluster: + gateway_cluster: + metrics: diff --git a/infra/aggregation_mode/ansible/playbooks/deploy_all.yaml b/infra/aggregation_mode/ansible/playbooks/deploy_all.yaml new file mode 100644 index 000000000..160cfe4f1 --- /dev/null +++ b/infra/aggregation_mode/ansible/playbooks/deploy_all.yaml @@ -0,0 +1,22 @@ +- name: Deploy Full Aggregation Mode Stack + hosts: localhost + gather_facts: no + + tasks: + - name: Deploy PostgreSQL Cluster + ansible.builtin.import_playbook: postgres_cluster.yaml + + - name: Deploy Gateway and Poller on Primary + ansible.builtin.import_playbook: gateway_stack.yaml + vars: + host: gateway_primary + + - name: Deploy Gateway and Poller on Secondary + ansible.builtin.import_playbook: gateway_stack.yaml + vars: + host: gateway_secondary + + - name: Deploy Metrics Stack + ansible.builtin.import_playbook: metrics_stack.yaml + vars: + host: metrics diff --git a/infra/aggregation_mode/ansible/playbooks/gateway.yaml b/infra/aggregation_mode/ansible/playbooks/gateway.yaml new file mode 100644 index 000000000..04d257a23 --- /dev/null +++ b/infra/aggregation_mode/ansible/playbooks/gateway.yaml @@ -0,0 +1,143 @@ +- name: Gateway Setup + hosts: "{{ host }}" + + tasks: + - name: Import setup playbook + ansible.builtin.import_playbook: setup.yaml + vars: + host: "{{ host }}" + + - name: Import rust playbook + ansible.builtin.import_playbook: rust.yaml + vars: + host: "{{ host }}" + +- hosts: "{{ host }}" + vars: + git_branch: "{{ lookup('ini', 'git_branch', file='ini/config-{{ env }}.ini') }}" + gateway_port: "{{ lookup('ini', 'gateway_port', file='ini/config-{{ env }}.ini', default='8080') }}" + gateway_tls_enabled: "{{ lookup('ini', 'gateway_tls_enabled', file='ini/config-{{ env }}.ini', default='true') }}" + gateway_tls_port: "{{ lookup('ini', 'gateway_tls_port', file='ini/config-{{ env }}.ini', default='443') }}" + gateway_tls_cert_path: "{{ lookup('ini', 'gateway_tls_cert_path', file='ini/config-{{ env }}.ini') }}" + gateway_tls_key_path: "{{ lookup('ini', 'gateway_tls_key_path', file='ini/config-{{ env }}.ini') }}" + gateway_db_user: "{{ lookup('ini', 'gateway_db_user', file='ini/config-{{ env }}.ini') }}" + gateway_db_password: "{{ lookup('ini', 'gateway_db_password', file='ini/config-{{ env }}.ini') }}" + gateway_db_name: "{{ lookup('ini', 'gateway_db_name', file='ini/config-{{ env }}.ini') }}" + gateway_postgres_primary: "{{ lookup('ini', 'gateway_postgres_primary', file='ini/config-{{ env }}.ini') }}" + gateway_postgres_secondary: "{{ lookup('ini', 'gateway_postgres_secondary', file='ini/config-{{ env }}.ini') }}" + gateway_postgres_port: "{{ lookup('ini', 'gateway_postgres_port', file='ini/config-{{ env }}.ini', default='5432') }}" + gateway_eth_rpc_url: "{{ lookup('ini', 'gateway_eth_rpc_url', file='ini/config-{{ env }}.ini') }}" + gateway_payment_service_address: "{{ lookup('ini', 'gateway_payment_service_address', file='ini/config-{{ env }}.ini') }}" + gateway_network: "{{ lookup('ini', 'gateway_network', file='ini/config-{{ env }}.ini') }}" + gateway_max_daily_proofs: "{{ lookup('ini', 'gateway_max_daily_proofs', file='ini/config-{{ env }}.ini', default='100') }}" + gateway_last_block_fetched_filepath: "{{ lookup('ini', 'gateway_last_block_fetched_filepath', file='ini/config-{{ env }}.ini') }}" + gateway_metrics_port: "{{ lookup('ini', 'gateway_metrics_port', file='ini/config-{{ env }}.ini', default='9094') }}" + poller_metrics_port: "{{ lookup('ini', 'poller_metrics_port', file='ini/config-{{ env }}.ini', default='9095') }}" + tls_cert_source_path: "{{ lookup('ini', 'tls_cert_source_path', file='ini/config-{{ env }}.ini') }}" + tls_key_source_path: "{{ lookup('ini', 'tls_key_source_path', file='ini/config-{{ env }}.ini') }}" + + tasks: + - name: Install required system packages + become: true + apt: + pkg: + - pkg-config + - libssl-dev + - build-essential + state: latest + update_cache: true + vars: + ansible_ssh_user: "{{ admin_user }}" + + - name: Create SSL directory + file: + path: /home/{{ ansible_user }}/.ssl + state: directory + mode: '0700' + owner: "{{ ansible_user }}" + group: "{{ ansible_user }}" + + - name: Copy TLS certificate + copy: + src: "{{ tls_cert_source_path }}" + dest: "{{ gateway_tls_cert_path }}" + mode: '0600' + owner: "{{ ansible_user }}" + group: "{{ ansible_user }}" + + - name: Copy TLS key + copy: + src: "{{ tls_key_source_path }}" + dest: "{{ gateway_tls_key_path }}" + mode: '0600' + owner: "{{ ansible_user }}" + group: "{{ ansible_user }}" + no_log: true + + - name: Clone aligned_layer repository + git: + repo: https://github.com/yetanotherco/aligned_layer.git + dest: /home/{{ ansible_user }}/repos/gateway/aligned_layer + version: "{{ git_branch }}" + update: yes + + - name: Build gateway with TLS + shell: | + export PATH=$HOME/.cargo/bin:$PATH + cargo install --path /home/{{ ansible_user }}/repos/gateway/aligned_layer/aggregation_mode/gateway --bin gateway --features tls --locked + args: + creates: /home/{{ ansible_user }}/.cargo/bin/gateway + + - name: Create config directory + file: + path: /home/{{ ansible_user }}/config + state: directory + mode: '0755' + owner: "{{ ansible_user }}" + group: "{{ ansible_user }}" + + - name: Template gateway config file + template: + src: config-files/config-agg-mode-gateway.yaml.j2 + dest: /home/{{ ansible_user }}/config/config-agg-mode-gateway.yaml + mode: '0644' + owner: "{{ ansible_user }}" + group: "{{ ansible_user }}" + + - name: Template gateway systemd service + become: true + template: + src: services/gateway.service.j2 + dest: /etc/systemd/system/gateway.service + mode: '0644' + vars: + ansible_ssh_user: "{{ admin_user }}" + + - name: Allow port 443 (TLS) through UFW + become: true + ufw: + rule: allow + port: '443' + proto: tcp + vars: + ansible_ssh_user: "{{ admin_user }}" + + - name: Create sudoers file for gateway service management + become: true + template: + src: sudoers/gateway-service.j2 + dest: /etc/sudoers.d/gateway-service + mode: '0440' + validate: 'visudo -cf %s' + vars: + ansible_ssh_user: "{{ admin_user }}" + + - name: Enable and start gateway service + become: true + systemd_service: + name: gateway + state: started + enabled: true + daemon_reload: true + vars: + ansible_ssh_user: "{{ admin_user }}" diff --git a/infra/aggregation_mode/ansible/playbooks/gateway_stack.yaml b/infra/aggregation_mode/ansible/playbooks/gateway_stack.yaml new file mode 100644 index 000000000..eceba3b91 --- /dev/null +++ b/infra/aggregation_mode/ansible/playbooks/gateway_stack.yaml @@ -0,0 +1,13 @@ +- name: Deploy Gateway and Poller Stack + hosts: "{{ host }}" + + tasks: + - name: Deploy Gateway + ansible.builtin.import_playbook: gateway.yaml + vars: + host: "{{ host }}" + + - name: Deploy Poller + ansible.builtin.import_playbook: poller.yaml + vars: + host: "{{ host }}" diff --git a/infra/aggregation_mode/ansible/playbooks/grafana_agg_mode.yaml b/infra/aggregation_mode/ansible/playbooks/grafana_agg_mode.yaml new file mode 100644 index 000000000..388d53ab5 --- /dev/null +++ b/infra/aggregation_mode/ansible/playbooks/grafana_agg_mode.yaml @@ -0,0 +1,109 @@ +- name: Grafana Setup for Aggregation Mode + hosts: "{{ host }}" + + tasks: + - name: Import setup playbook + ansible.builtin.import_playbook: setup.yaml + vars: + host: "{{ host }}" + +- hosts: "{{ host }}" + vars: + grafana_prometheus_url: "{{ lookup('ini', 'grafana_prometheus_url', file='ini/config-{{ env }}.ini') }}" + grafana_rpc_url: "{{ lookup('ini', 'grafana_rpc_url', file='ini/config-{{ env }}.ini') }}" + grafana_postgres_host: "{{ lookup('ini', 'grafana_postgres_host', file='ini/config-{{ env }}.ini') }}" + grafana_postgres_port: "{{ lookup('ini', 'grafana_postgres_port', file='ini/config-{{ env }}.ini', default='5432') }}" + grafana_postgres_db: "{{ lookup('ini', 'grafana_postgres_db', file='ini/config-{{ env }}.ini') }}" + grafana_postgres_user: "{{ lookup('ini', 'grafana_postgres_user', file='ini/config-{{ env }}.ini') }}" + grafana_postgres_password: "{{ lookup('ini', 'grafana_postgres_password', file='ini/config-{{ env }}.ini') }}" + + tasks: + - name: Install required packages + become: true + apt: + pkg: + - apt-transport-https + - software-properties-common + - wget + - gnupg + state: latest + update_cache: true + vars: + ansible_ssh_user: "{{ admin_user }}" + + - name: Create apt keyrings directory + become: true + file: + path: /etc/apt/keyrings + state: directory + mode: '0755' + vars: + ansible_ssh_user: "{{ admin_user }}" + + - name: Download Grafana GPG key + become: true + get_url: + url: https://apt.grafana.com/gpg.key + dest: /tmp/grafana.key + mode: '0644' + vars: + ansible_ssh_user: "{{ admin_user }}" + + - name: Add Grafana GPG key + become: true + shell: | + gpg --dearmor < /tmp/grafana.key > /etc/apt/keyrings/grafana.gpg + args: + creates: /etc/apt/keyrings/grafana.gpg + vars: + ansible_ssh_user: "{{ admin_user }}" + + - name: Add Grafana APT repository + become: true + lineinfile: + path: /etc/apt/sources.list.d/grafana.list + line: "deb [signed-by=/etc/apt/keyrings/grafana.gpg] https://apt.grafana.com stable main" + create: yes + vars: + ansible_ssh_user: "{{ admin_user }}" + + - name: Update apt cache + become: true + apt: + update_cache: true + vars: + ansible_ssh_user: "{{ admin_user }}" + + - name: Install Grafana + become: true + apt: + name: grafana + state: latest + vars: + ansible_ssh_user: "{{ admin_user }}" + + - name: Template Grafana environment configuration + become: true + template: + src: grafana/grafana_env.j2 + dest: /etc/default/grafana-server-custom + mode: '0644' + vars: + ansible_ssh_user: "{{ admin_user }}" + + - name: Append custom environment to grafana-server defaults + become: true + shell: | + cat /etc/default/grafana-server-custom >> /etc/default/grafana-server + vars: + ansible_ssh_user: "{{ admin_user }}" + + - name: Enable and start Grafana service + become: true + systemd_service: + name: grafana-server + state: restarted + enabled: true + daemon_reload: true + vars: + ansible_ssh_user: "{{ admin_user }}" diff --git a/infra/aggregation_mode/ansible/playbooks/ini/config-hoodi.ini b/infra/aggregation_mode/ansible/playbooks/ini/config-hoodi.ini new file mode 100644 index 000000000..d40277519 --- /dev/null +++ b/infra/aggregation_mode/ansible/playbooks/ini/config-hoodi.ini @@ -0,0 +1,88 @@ +[DEFAULT] +# ============================================ +# Hoodi Environment Configuration +# ============================================ +# This file contains all configuration for the Hoodi environment. +# Only sensitive values (passwords, certificate paths) need to be filled in. + +# Environment +environment=hoodi +git_branch=staging + +# ============================================ +# PostgreSQL Configuration +# ============================================ +postgres_monitor_hostname=agg-mode-hoodi-postgres-monitor +postgres_primary_hostname=agg-mode-hoodi-postgres-1 +postgres_secondary_hostname=agg-mode-hoodi-postgres-2 +db_name=agg_mode +db_user=autoctl_node +# REQUIRED: Set this password before deploying postgres +db_password= + +# PostgreSQL Monitor +monitor_pgdata=/var/lib/postgresql/monitor +monitor_port=5432 + +# PostgreSQL Nodes +node_pgdata=/var/lib/postgresql/node +node_port=5432 +backup_dir=/var/lib/backup + +# ============================================ +# Gateway & Poller Configuration +# ============================================ +gateway_network=hoodi +gateway_max_daily_proofs=100 +gateway_payment_service_address=0x7222E0183cE1A96619d0c883e9bfc6b76D4e780e +gateway_eth_rpc_url=https://aligned-hoodi-rpc-geth.tail665ae.ts.net + +# Database connection (uses same credentials as postgres) +gateway_db_user=autoctl_node +# REQUIRED: Set to same password as db_password +gateway_db_password= +gateway_db_name=agg_mode +gateway_postgres_primary=agg-mode-hoodi-postgres-1 +gateway_postgres_secondary=agg-mode-hoodi-postgres-2 +gateway_postgres_port=5432 + +# Metrics ports +gateway_metrics_port=9094 +poller_metrics_port=9095 + +# Gateway Service Settings (same for all gateways) +gateway_port=8080 +gateway_tls_enabled=true +gateway_tls_port=443 +gateway_tls_cert_path=/home/app/.ssl/cert.pem +gateway_tls_key_path=/home/app/.ssl/key.pem +gateway_last_block_fetched_filepath=/home/app/config/proof-aggregator.last_block_fetched.json + +# Poller Service Settings (same for all pollers) +poller_last_block_fetched_filepath=/home/app/config/proof-aggregator.last_block_fetched.json +last_block_fetched_initial_value=0 + +# ============================================ +# TLS Certificate Management +# ============================================ +# REQUIRED: Provide paths to existing certificates on your local machine +# These will be copied to the gateway servers +tls_cert_source_path= +tls_key_source_path= + +# ============================================ +# Metrics Configuration +# ============================================ +prometheus_version=3.6.0 +gateway_primary_hostname=agg-mode-hoodi-gateway-1 +gateway_secondary_hostname=agg-mode-hoodi-gateway-2 + +# Grafana Configuration +grafana_prometheus_url=http://localhost:9090 +grafana_rpc_url=https://aligned-hoodi-rpc-geth.tail665ae.ts.net +grafana_postgres_host=agg-mode-hoodi-postgres-1 +grafana_postgres_port=5432 +grafana_postgres_db=agg_mode +grafana_postgres_user=autoctl_node +# REQUIRED: Set to same password as db_password +grafana_postgres_password= diff --git a/infra/aggregation_mode/ansible/playbooks/ini/config-mainnet.ini b/infra/aggregation_mode/ansible/playbooks/ini/config-mainnet.ini new file mode 100644 index 000000000..70d25c24f --- /dev/null +++ b/infra/aggregation_mode/ansible/playbooks/ini/config-mainnet.ini @@ -0,0 +1,91 @@ +[DEFAULT] +# ============================================ +# Mainnet Environment Configuration +# ============================================ +# This file contains all configuration for the Mainnet environment. +# Only sensitive values (passwords, certificate paths) need to be filled in. + +# Environment +environment=mainnet +git_branch=staging + +# ============================================ +# PostgreSQL Configuration +# ============================================ +postgres_monitor_hostname=agg-mode-mainnet-postgres-monitor +postgres_primary_hostname=agg-mode-mainnet-postgres-1 +postgres_secondary_hostname=agg-mode-mainnet-postgres-2 +db_name=agg_mode +db_user=autoctl_node +# REQUIRED: Set this password before deploying postgres +db_password= + +# PostgreSQL Monitor +monitor_pgdata=/var/lib/postgresql/monitor +monitor_port=5432 + +# PostgreSQL Nodes +node_pgdata=/var/lib/postgresql/node +node_port=5432 +backup_dir=/var/lib/backup + +# ============================================ +# Gateway & Poller Configuration +# ============================================ +gateway_network=mainnet +gateway_max_daily_proofs=100 +# TODO: Update with mainnet payment service address +gateway_payment_service_address= +# TODO: Update with mainnet RPC URL +gateway_eth_rpc_url= + +# Database connection (uses same credentials as postgres) +gateway_db_user=autoctl_node +# REQUIRED: Set to same password as db_password +gateway_db_password= +gateway_db_name=agg_mode +gateway_postgres_primary=agg-mode-mainnet-postgres-1 +gateway_postgres_secondary=agg-mode-mainnet-postgres-2 +gateway_postgres_port=5432 + +# Metrics ports +gateway_metrics_port=9094 +poller_metrics_port=9095 + +# Gateway Service Settings (same for all gateways) +gateway_port=8080 +gateway_tls_enabled=true +gateway_tls_port=443 +gateway_tls_cert_path=/home/app/.ssl/cert.pem +gateway_tls_key_path=/home/app/.ssl/key.pem +gateway_last_block_fetched_filepath=/home/app/config/proof-aggregator.last_block_fetched.json + +# Poller Service Settings (same for all pollers) +poller_last_block_fetched_filepath=/home/app/config/proof-aggregator.last_block_fetched.json +last_block_fetched_initial_value=0 + +# ============================================ +# TLS Certificate Management +# ============================================ +# REQUIRED: Provide paths to existing certificates on your local machine +# These will be copied to the gateway servers +tls_cert_source_path= +tls_key_source_path= + +# ============================================ +# Metrics Configuration +# ============================================ +prometheus_version=3.6.0 +gateway_primary_hostname=agg-mode-mainnet-gateway-1 +gateway_secondary_hostname=agg-mode-mainnet-gateway-2 + +# Grafana Configuration +grafana_prometheus_url=http://localhost:9090 +# TODO: Update with mainnet RPC URL +grafana_rpc_url= +grafana_postgres_host=agg-mode-mainnet-postgres-1 +grafana_postgres_port=5432 +grafana_postgres_db=agg_mode +grafana_postgres_user=autoctl_node +# REQUIRED: Set to same password as db_password +grafana_postgres_password= diff --git a/infra/aggregation_mode/ansible/playbooks/metrics_stack.yaml b/infra/aggregation_mode/ansible/playbooks/metrics_stack.yaml new file mode 100644 index 000000000..7303a0320 --- /dev/null +++ b/infra/aggregation_mode/ansible/playbooks/metrics_stack.yaml @@ -0,0 +1,13 @@ +- name: Deploy Metrics Stack + hosts: "{{ host }}" + + tasks: + - name: Deploy Prometheus + ansible.builtin.import_playbook: prometheus_agg_mode.yaml + vars: + host: "{{ host }}" + + - name: Deploy Grafana + ansible.builtin.import_playbook: grafana_agg_mode.yaml + vars: + host: "{{ host }}" diff --git a/infra/aggregation_mode/ansible/playbooks/pg_autofailover_common.yaml b/infra/aggregation_mode/ansible/playbooks/pg_autofailover_common.yaml new file mode 100644 index 000000000..655637704 --- /dev/null +++ b/infra/aggregation_mode/ansible/playbooks/pg_autofailover_common.yaml @@ -0,0 +1,100 @@ +- name: PostgreSQL Auto-Failover Common Setup + hosts: "{{ host }}" + + tasks: + - name: Install postgresql-common and ca-certificates + become: true + apt: + pkg: + - postgresql-common + - ca-certificates + - curl + state: latest + update_cache: true + vars: + ansible_ssh_user: "{{ admin_user }}" + + - name: Download PostgreSQL APT repository setup script + become: true + get_url: + url: https://www.postgresql.org/media/keys/ACCC4CF8.asc + dest: /tmp/postgresql.asc + mode: '0644' + vars: + ansible_ssh_user: "{{ admin_user }}" + + - name: Add PostgreSQL APT key + become: true + shell: | + cat /tmp/postgresql.asc | gpg --dearmor | tee /etc/apt/keyrings/postgresql.gpg > /dev/null + args: + creates: /etc/apt/keyrings/postgresql.gpg + vars: + ansible_ssh_user: "{{ admin_user }}" + + - name: Add PostgreSQL APT repository + become: true + lineinfile: + path: /etc/apt/sources.list.d/pgdg.list + line: "deb [signed-by=/etc/apt/keyrings/postgresql.gpg] https://apt.postgresql.org/pub/repos/apt bookworm-pgdg main" + create: yes + vars: + ansible_ssh_user: "{{ admin_user }}" + + - name: Set create_main_cluster to false + become: true + lineinfile: + path: /etc/postgresql-common/createcluster.conf + line: "create_main_cluster = false" + create: yes + vars: + ansible_ssh_user: "{{ admin_user }}" + + - name: Update apt cache + become: true + apt: + update_cache: true + vars: + ansible_ssh_user: "{{ admin_user }}" + + - name: Install postgresql-16 and pg-auto-failover-cli + become: true + apt: + pkg: + - postgresql-16 + - pg-auto-failover-cli + state: latest + vars: + ansible_ssh_user: "{{ admin_user }}" + + - name: Stop and disable default PostgreSQL service + become: true + systemd_service: + name: "{{ item }}" + state: stopped + enabled: false + with_items: + - postgresql + - postgresql@16-main + ignore_errors: yes + vars: + ansible_ssh_user: "{{ admin_user }}" + + - name: Remove default PostgreSQL data directory + become: true + file: + path: /var/lib/postgresql + state: absent + vars: + ansible_ssh_user: "{{ admin_user }}" + + - name: Create PostgreSQL data directory + become: true + file: + path: /var/lib/postgresql + state: directory + owner: postgres + group: postgres + mode: '0700' + vars: + ansible_ssh_user: "{{ admin_user }}" diff --git a/infra/aggregation_mode/ansible/playbooks/pg_monitor.yaml b/infra/aggregation_mode/ansible/playbooks/pg_monitor.yaml new file mode 100644 index 000000000..36c19f039 --- /dev/null +++ b/infra/aggregation_mode/ansible/playbooks/pg_monitor.yaml @@ -0,0 +1,101 @@ +- name: PostgreSQL Monitor Setup + hosts: "{{ host }}" + + tasks: + - name: Import pg_autofailover_common playbook + ansible.builtin.import_playbook: pg_autofailover_common.yaml + vars: + host: "{{ host }}" + +- hosts: "{{ host }}" + vars: + db_password: "{{ lookup('ini', 'db_password', file='ini/config-{{ env }}.ini') }}" + monitor_pgdata: "{{ lookup('ini', 'monitor_pgdata', file='ini/config-{{ env }}.ini', default='/var/lib/postgresql/monitor') }}" + monitor_port: "{{ lookup('ini', 'monitor_port', file='ini/config-{{ env }}.ini', default='5432') }}" + + tasks: + - name: Install postgresql-16-auto-failover + become: true + apt: + pkg: + - postgresql-16-auto-failover + state: latest + vars: + ansible_ssh_user: "{{ admin_user }}" + + - name: Check if monitor is already initialized + stat: + path: "{{ monitor_pgdata }}/PG_VERSION" + register: monitor_initialized + become: true + become_user: postgres + + - name: Get Tailscale IP + shell: tailscale ip --4 + register: tailscale_ip + when: not monitor_initialized.stat.exists + + - name: Create pg_autoctl monitor + become: true + become_user: postgres + shell: | + pg_autoctl create monitor \ + --pgdata {{ monitor_pgdata }} \ + --pgctl /usr/lib/postgresql/16/bin/pg_ctl \ + --auth scram-sha-256 \ + --ssl-self-signed \ + --pgport {{ monitor_port }} \ + --hostname {{ tailscale_ip.stdout }} + when: not monitor_initialized.stat.exists + args: + creates: "{{ monitor_pgdata }}/PG_VERSION" + + - name: Wait for monitor to be ready + wait_for: + port: "{{ monitor_port }}" + delay: 5 + timeout: 60 + + - name: Set password for autoctl_node user + become: true + become_user: postgres + shell: | + psql -d pg_auto_failover -c "ALTER USER autoctl_node PASSWORD '{{ db_password }}';" + no_log: true + + - name: Update pg_hba.conf for Tailscale network + become: true + become_user: postgres + blockinfile: + path: "{{ monitor_pgdata }}/pg_hba.conf" + marker: "# {mark} ANSIBLE MANAGED BLOCK - Tailscale" + block: | + # Tailscale network connections + host all all 100.64.0.0/10 scram-sha-256 + host replication all 100.64.0.0/10 scram-sha-256 + + - name: Generate systemd service file + become: true + become_user: postgres + shell: | + pg_autoctl show systemd --pgdata {{ monitor_pgdata }} + register: systemd_service_content + + - name: Create pgautofailover systemd service + become: true + copy: + content: "{{ systemd_service_content.stdout }}" + dest: /etc/systemd/system/pgautofailover.service + mode: '0644' + vars: + ansible_ssh_user: "{{ admin_user }}" + + - name: Enable and start pgautofailover service + become: true + systemd_service: + name: pgautofailover + state: started + enabled: true + daemon_reload: true + vars: + ansible_ssh_user: "{{ admin_user }}" diff --git a/infra/aggregation_mode/ansible/playbooks/pg_node.yaml b/infra/aggregation_mode/ansible/playbooks/pg_node.yaml new file mode 100644 index 000000000..b271d4d10 --- /dev/null +++ b/infra/aggregation_mode/ansible/playbooks/pg_node.yaml @@ -0,0 +1,110 @@ +- name: PostgreSQL Node Setup + hosts: "{{ host }}" + + tasks: + - name: Import pg_autofailover_common playbook + ansible.builtin.import_playbook: pg_autofailover_common.yaml + vars: + host: "{{ host }}" + +- hosts: "{{ host }}" + vars: + db_name: "{{ lookup('ini', 'db_name', file='ini/config-{{ env }}.ini') }}" + db_user: "{{ lookup('ini', 'db_user', file='ini/config-{{ env }}.ini') }}" + db_password: "{{ lookup('ini', 'db_password', file='ini/config-{{ env }}.ini') }}" + postgres_monitor_hostname: "{{ lookup('ini', 'postgres_monitor_hostname', file='ini/config-{{ env }}.ini') }}" + node_pgdata: "{{ lookup('ini', 'node_pgdata', file='ini/config-{{ env }}.ini', default='/var/lib/postgresql/node') }}" + node_port: "{{ lookup('ini', 'node_port', file='ini/config-{{ env }}.ini', default='5432') }}" + backup_dir: "{{ lookup('ini', 'backup_dir', file='ini/config-{{ env }}.ini', default='/var/lib/backup') }}" + + tasks: + - name: Create backup directory + become: true + file: + path: "{{ backup_dir }}" + state: directory + owner: postgres + group: postgres + mode: '0700' + vars: + ansible_ssh_user: "{{ admin_user }}" + + - name: Check if node is already initialized + stat: + path: "{{ node_pgdata }}/PG_VERSION" + register: node_initialized + become: true + become_user: postgres + + - name: Get Tailscale IP + shell: tailscale ip --4 + register: tailscale_ip + when: not node_initialized.stat.exists + + - name: Create pg_autoctl postgres node + become: true + become_user: postgres + shell: | + PGPASSWORD='{{ db_password }}' pg_autoctl create postgres \ + --pgdata {{ node_pgdata }} \ + --pgctl /usr/lib/postgresql/16/bin/pg_ctl \ + --auth scram-sha-256 \ + --ssl-self-signed \ + --username {{ db_user }} \ + --dbname {{ db_name }} \ + --pgport {{ node_port }} \ + --hostname {{ tailscale_ip.stdout }} \ + --monitor 'postgres://{{ db_user }}:{{ db_password }}@{{ postgres_monitor_hostname }}:5432/pg_auto_failover?sslmode=require' + when: not node_initialized.stat.exists + no_log: true + args: + creates: "{{ node_pgdata }}/PG_VERSION" + + - name: Set replication password + become: true + become_user: postgres + shell: | + pg_autoctl config set replication.password '{{ db_password }}' --pgdata {{ node_pgdata }} + no_log: true + + - name: Update pg_hba.conf for Tailscale network + become: true + become_user: postgres + blockinfile: + path: "{{ node_pgdata }}/pg_hba.conf" + marker: "# {mark} ANSIBLE MANAGED BLOCK - Tailscale" + block: | + # Tailscale network connections + host all all 100.64.0.0/10 scram-sha-256 + + - name: Generate systemd service file + become: true + become_user: postgres + shell: | + pg_autoctl show systemd --pgdata {{ node_pgdata }} + register: systemd_service_content + + - name: Create pgautofailover systemd service + become: true + copy: + content: "{{ systemd_service_content.stdout }}" + dest: /etc/systemd/system/pgautofailover.service + mode: '0644' + vars: + ansible_ssh_user: "{{ admin_user }}" + + - name: Enable and start pgautofailover service + become: true + systemd_service: + name: pgautofailover + state: started + enabled: true + daemon_reload: true + vars: + ansible_ssh_user: "{{ admin_user }}" + + - name: Wait for node to join cluster + wait_for: + port: "{{ node_port }}" + delay: 5 + timeout: 60 diff --git a/infra/aggregation_mode/ansible/playbooks/poller.yaml b/infra/aggregation_mode/ansible/playbooks/poller.yaml new file mode 100644 index 000000000..7c1de48c0 --- /dev/null +++ b/infra/aggregation_mode/ansible/playbooks/poller.yaml @@ -0,0 +1,107 @@ +- name: Poller Setup + hosts: "{{ host }}" + + tasks: + - name: Import setup playbook + ansible.builtin.import_playbook: setup.yaml + vars: + host: "{{ host }}" + + - name: Import rust playbook + ansible.builtin.import_playbook: rust.yaml + vars: + host: "{{ host }}" + +- hosts: "{{ host }}" + vars: + git_branch: "{{ lookup('ini', 'git_branch', file='ini/config-{{ env }}.ini') }}" + poller_db_user: "{{ lookup('ini', 'gateway_db_user', file='ini/config-{{ env }}.ini') }}" + poller_db_password: "{{ lookup('ini', 'gateway_db_password', file='ini/config-{{ env }}.ini') }}" + poller_db_name: "{{ lookup('ini', 'gateway_db_name', file='ini/config-{{ env }}.ini') }}" + poller_postgres_primary: "{{ lookup('ini', 'gateway_postgres_primary', file='ini/config-{{ env }}.ini') }}" + poller_postgres_secondary: "{{ lookup('ini', 'gateway_postgres_secondary', file='ini/config-{{ env }}.ini') }}" + poller_postgres_port: "{{ lookup('ini', 'gateway_postgres_port', file='ini/config-{{ env }}.ini', default='5432') }}" + poller_eth_rpc_url: "{{ lookup('ini', 'gateway_eth_rpc_url', file='ini/config-{{ env }}.ini') }}" + poller_payment_service_address: "{{ lookup('ini', 'gateway_payment_service_address', file='ini/config-{{ env }}.ini') }}" + poller_network: "{{ lookup('ini', 'gateway_network', file='ini/config-{{ env }}.ini') }}" + poller_max_daily_proofs: "{{ lookup('ini', 'gateway_max_daily_proofs', file='ini/config-{{ env }}.ini', default='100') }}" + poller_last_block_fetched_filepath: "{{ lookup('ini', 'poller_last_block_fetched_filepath', file='ini/config-{{ env }}.ini') }}" + last_block_fetched_initial_value: "{{ lookup('ini', 'last_block_fetched_initial_value', file='ini/config-{{ env }}.ini', default='0') }}" + gateway_metrics_port: "{{ lookup('ini', 'gateway_metrics_port', file='ini/config-{{ env }}.ini', default='9094') }}" + poller_metrics_port: "{{ lookup('ini', 'poller_metrics_port', file='ini/config-{{ env }}.ini', default='9095') }}" + + tasks: + - name: Install required system packages + become: true + apt: + pkg: + - pkg-config + - libssl-dev + - build-essential + state: latest + update_cache: true + vars: + ansible_ssh_user: "{{ admin_user }}" + + - name: Clone aligned_layer repository + git: + repo: https://github.com/yetanotherco/aligned_layer.git + dest: /home/{{ ansible_user }}/repos/poller/aligned_layer + version: "{{ git_branch }}" + update: yes + + - name: Build poller + shell: | + export PATH=$HOME/.cargo/bin:$PATH + cargo install --path /home/{{ ansible_user }}/repos/poller/aligned_layer/aggregation_mode/payments_poller --bin payments_poller --locked + args: + creates: /home/{{ ansible_user }}/.cargo/bin/payments_poller + + - name: Create config directory + file: + path: /home/{{ ansible_user }}/config + state: directory + mode: '0755' + owner: "{{ ansible_user }}" + group: "{{ ansible_user }}" + + - name: Create last_block_fetched file + copy: + content: '{"last_block_fetched":{{ last_block_fetched_initial_value }}}' + dest: "{{ poller_last_block_fetched_filepath }}" + mode: '0644' + owner: "{{ ansible_user }}" + group: "{{ ansible_user }}" + force: no + + - name: Template poller config file + template: + src: config-files/config-agg-mode-poller.yaml.j2 + dest: /home/{{ ansible_user }}/config/config-agg-mode-poller.yaml + mode: '0644' + owner: "{{ ansible_user }}" + group: "{{ ansible_user }}" + + - name: Create systemd user directory + file: + path: /home/{{ ansible_user }}/.config/systemd/user + state: directory + mode: '0755' + owner: "{{ ansible_user }}" + group: "{{ ansible_user }}" + + - name: Template poller systemd service + template: + src: services/poller.service.j2 + dest: /home/{{ ansible_user }}/.config/systemd/user/poller.service + mode: '0644' + owner: "{{ ansible_user }}" + group: "{{ ansible_user }}" + + - name: Enable and start poller service + systemd_service: + name: poller + state: started + enabled: true + scope: user + daemon_reload: true diff --git a/infra/aggregation_mode/ansible/playbooks/postgres_cluster.yaml b/infra/aggregation_mode/ansible/playbooks/postgres_cluster.yaml new file mode 100644 index 000000000..24b033d5e --- /dev/null +++ b/infra/aggregation_mode/ansible/playbooks/postgres_cluster.yaml @@ -0,0 +1,24 @@ +- name: Deploy PostgreSQL Auto-Failover Cluster + hosts: localhost + gather_facts: no + + tasks: + - name: Deploy PostgreSQL Monitor + ansible.builtin.import_playbook: pg_monitor.yaml + vars: + host: postgres_monitor + + - name: Deploy PostgreSQL Primary Node + ansible.builtin.import_playbook: pg_node.yaml + vars: + host: postgres_primary + + - name: Deploy PostgreSQL Secondary Node + ansible.builtin.import_playbook: pg_node.yaml + vars: + host: postgres_secondary + + - name: Run Database Migrations + ansible.builtin.import_playbook: postgres_migrations.yaml + vars: + host: postgres_primary diff --git a/infra/aggregation_mode/ansible/playbooks/postgres_migrations.yaml b/infra/aggregation_mode/ansible/playbooks/postgres_migrations.yaml new file mode 100644 index 000000000..a6c526009 --- /dev/null +++ b/infra/aggregation_mode/ansible/playbooks/postgres_migrations.yaml @@ -0,0 +1,40 @@ +- name: PostgreSQL Migrations Setup + hosts: "{{ host }}" + + tasks: + - name: Import setup playbook + ansible.builtin.import_playbook: setup.yaml + vars: + host: "{{ host }}" + + - name: Import rust playbook + ansible.builtin.import_playbook: rust.yaml + vars: + host: "{{ host }}" + +- hosts: "{{ host }}" + vars: + db_name: "{{ lookup('ini', 'db_name', file='ini/config-{{ env }}.ini') }}" + db_user: "{{ lookup('ini', 'db_user', file='ini/config-{{ env }}.ini') }}" + db_password: "{{ lookup('ini', 'db_password', file='ini/config-{{ env }}.ini') }}" + postgres_primary_hostname: "{{ lookup('ini', 'postgres_primary_hostname', file='ini/config-{{ env }}.ini') }}" + git_branch: "{{ lookup('ini', 'git_branch', file='ini/config-{{ env }}.ini') }}" + + tasks: + - name: Clone aligned_layer repository + git: + repo: https://github.com/yetanotherco/aligned_layer.git + dest: /home/{{ ansible_user }}/repos/migrations/aligned_layer + version: "{{ git_branch }}" + update: yes + + - name: Run database migrations + shell: | + export PATH=$HOME/.cargo/bin:$PATH + cargo run --manifest-path /home/{{ ansible_user }}/repos/migrations/aligned_layer/aggregation_mode/Cargo.toml --release --bin migrate -- "postgres://{{ db_user }}:{{ db_password }}@{{ postgres_primary_hostname }}:5432/{{ db_name }}" + register: migration_result + no_log: true + + - name: Display migration result + debug: + msg: "{{ migration_result.stdout_lines }}" diff --git a/infra/aggregation_mode/ansible/playbooks/prometheus_agg_mode.yaml b/infra/aggregation_mode/ansible/playbooks/prometheus_agg_mode.yaml new file mode 100644 index 000000000..c7f82c747 --- /dev/null +++ b/infra/aggregation_mode/ansible/playbooks/prometheus_agg_mode.yaml @@ -0,0 +1,84 @@ +- name: Prometheus Setup for Aggregation Mode + hosts: "{{ host }}" + + tasks: + - name: Import setup playbook + ansible.builtin.import_playbook: setup.yaml + vars: + host: "{{ host }}" + +- hosts: "{{ host }}" + vars: + prometheus_version: "{{ lookup('ini', 'prometheus_version', file='ini/config-{{ env }}.ini', default='3.6.0') }}" + gateway_primary_hostname: "{{ lookup('ini', 'gateway_primary_hostname', file='ini/config-{{ env }}.ini') }}" + gateway_secondary_hostname: "{{ lookup('ini', 'gateway_secondary_hostname', file='ini/config-{{ env }}.ini') }}" + gateway_metrics_port: "{{ lookup('ini', 'gateway_metrics_port', file='ini/config-{{ env }}.ini', default='9094') }}" + poller_metrics_port: "{{ lookup('ini', 'poller_metrics_port', file='ini/config-{{ env }}.ini', default='9095') }}" + + tasks: + - name: Check if Prometheus is installed + stat: + path: /home/{{ ansible_user }}/prometheus-{{ prometheus_version }}.linux-amd64/prometheus + register: prometheus_exists + + - name: Download Prometheus + when: not prometheus_exists.stat.exists + get_url: + url: "https://github.com/prometheus/prometheus/releases/download/v{{ prometheus_version }}/prometheus-{{ prometheus_version }}.linux-amd64.tar.gz" + dest: "/tmp/prometheus-{{ prometheus_version }}.tar.gz" + mode: '0644' + + - name: Extract Prometheus + when: not prometheus_exists.stat.exists + unarchive: + src: "/tmp/prometheus-{{ prometheus_version }}.tar.gz" + dest: /home/{{ ansible_user }}/ + remote_src: yes + owner: "{{ ansible_user }}" + group: "{{ ansible_user }}" + + - name: Create config directory + file: + path: /home/{{ ansible_user }}/config + state: directory + mode: '0755' + owner: "{{ ansible_user }}" + group: "{{ ansible_user }}" + + - name: Template Prometheus config file + template: + src: prometheus/prometheus_agg_mode.yaml.j2 + dest: /home/{{ ansible_user }}/config/prometheus.yaml + mode: '0644' + owner: "{{ ansible_user }}" + group: "{{ ansible_user }}" + + - name: Create systemd user directory + file: + path: /home/{{ ansible_user }}/.config/systemd/user + state: directory + mode: '0755' + owner: "{{ ansible_user }}" + group: "{{ ansible_user }}" + + - name: Template Prometheus systemd service + template: + src: services/prometheus_agg_mode.service.j2 + dest: /home/{{ ansible_user }}/.config/systemd/user/prometheus.service + mode: '0644' + owner: "{{ ansible_user }}" + group: "{{ ansible_user }}" + + - name: Enable and start Prometheus service + systemd_service: + name: prometheus + state: started + enabled: true + scope: user + daemon_reload: true + + - name: Clean up Prometheus tar + when: not prometheus_exists.stat.exists + file: + path: "/tmp/prometheus-{{ prometheus_version }}.tar.gz" + state: absent diff --git a/infra/aggregation_mode/ansible/playbooks/rust.yaml b/infra/aggregation_mode/ansible/playbooks/rust.yaml new file mode 100644 index 000000000..c3eac848f --- /dev/null +++ b/infra/aggregation_mode/ansible/playbooks/rust.yaml @@ -0,0 +1,39 @@ +- name: Rust Setup + hosts: "{{ host }}" + vars: + rust_version: 1.83.0 + + tasks: + - name: Check if cargo is installed + stat: + path: /home/{{ ansible_user }}/.cargo/bin/cargo + register: cargo_exists + + - name: Download Rust installer + when: not cargo_exists.stat.exists + get_url: + url: https://sh.rustup.rs + dest: /tmp/rustup-init.sh + mode: '0755' + + - name: Install Rust + when: not cargo_exists.stat.exists + shell: | + /tmp/rustup-init.sh -y --default-toolchain {{ rust_version }} + args: + creates: /home/{{ ansible_user }}/.cargo/bin/cargo + + - name: Update user PATH + when: not cargo_exists.stat.exists + lineinfile: + path: "/home/{{ ansible_user }}/.bashrc" + line: "{{ item }}" + state: present + with_items: + - 'export PATH=$HOME/.cargo/bin:$PATH' + + - name: Clean up Rust installer + when: not cargo_exists.stat.exists + file: + path: /tmp/rustup-init.sh + state: absent diff --git a/infra/aggregation_mode/ansible/playbooks/setup.yaml b/infra/aggregation_mode/ansible/playbooks/setup.yaml new file mode 100644 index 000000000..a521324d3 --- /dev/null +++ b/infra/aggregation_mode/ansible/playbooks/setup.yaml @@ -0,0 +1,40 @@ +- name: Server setup + hosts: "{{ host }}" + + tasks: + # Install required packages + - name: Update apt and install required system packages + become: true + apt: + pkg: + - curl + - vim + - git + - make + - gcc + state: latest + update_cache: true + vars: + ansible_ssh_user: "{{ admin_user }}" + + # Create basic directories + - name: Create basic directories if do not exist + ansible.builtin.file: + path: /home/{{ ansible_user }}/{{ item }} + state: directory + mode: '0755' + owner: '{{ ansible_user }}' + group: '{{ ansible_user }}' + loop: + - repos + - config + - services + - .keystores + + - name: Enable linger for {{ ansible_user }} + become: true + command: sudo loginctl enable-linger {{ ansible_user }} + args: + creates: /var/lib/systemd/linger/{{ ansible_user }} + vars: + ansible_ssh_user: "{{ admin_user }}" diff --git a/infra/aggregation_mode/ansible/playbooks/templates/config-files/config-agg-mode-gateway.yaml.j2 b/infra/aggregation_mode/ansible/playbooks/templates/config-files/config-agg-mode-gateway.yaml.j2 new file mode 100644 index 000000000..fab21b85b --- /dev/null +++ b/infra/aggregation_mode/ansible/playbooks/templates/config-files/config-agg-mode-gateway.yaml.j2 @@ -0,0 +1,17 @@ +ip: "0.0.0.0" +port: {{ gateway_port }} +{% if gateway_tls_enabled == 'true' %} +tls_cert_path: "{{ gateway_tls_cert_path }}" +tls_key_path: "{{ gateway_tls_key_path }}" +tls_port: {{ gateway_tls_port }} +{% endif %} +db_connection_urls: + - "postgres://{{ gateway_db_user }}:{{ gateway_db_password }}@{{ gateway_postgres_primary }}:{{ gateway_postgres_port }}/{{ gateway_db_name }}" + - "postgres://{{ gateway_db_user }}:{{ gateway_db_password }}@{{ gateway_postgres_secondary }}:{{ gateway_postgres_port }}/{{ gateway_db_name }}" +eth_rpc_url: "{{ gateway_eth_rpc_url }}" +payment_service_address: "{{ gateway_payment_service_address }}" +network: "{{ gateway_network }}" +max_daily_proofs_per_user: {{ gateway_max_daily_proofs }} + +# Metrics +gateway_metrics_port: {{ gateway_metrics_port }} diff --git a/infra/aggregation_mode/ansible/playbooks/templates/config-files/config-agg-mode-poller.yaml.j2 b/infra/aggregation_mode/ansible/playbooks/templates/config-files/config-agg-mode-poller.yaml.j2 new file mode 100644 index 000000000..8f8114287 --- /dev/null +++ b/infra/aggregation_mode/ansible/playbooks/templates/config-files/config-agg-mode-poller.yaml.j2 @@ -0,0 +1,11 @@ +db_connection_urls: + - "postgres://{{ poller_db_user }}:{{ poller_db_password }}@{{ poller_postgres_primary }}:{{ poller_postgres_port }}/{{ poller_db_name }}" + - "postgres://{{ poller_db_user }}:{{ poller_db_password }}@{{ poller_postgres_secondary }}:{{ poller_postgres_port }}/{{ poller_db_name }}" +eth_rpc_url: "{{ poller_eth_rpc_url }}" +payment_service_address: "{{ poller_payment_service_address }}" +network: "{{ poller_network }}" +max_daily_proofs_per_user: {{ poller_max_daily_proofs }} +last_block_fetched_filepath: "{{ poller_last_block_fetched_filepath }}" + +# Metrics +poller_metrics_port: {{ poller_metrics_port }} diff --git a/infra/aggregation_mode/ansible/playbooks/templates/grafana/grafana_env.j2 b/infra/aggregation_mode/ansible/playbooks/templates/grafana/grafana_env.j2 new file mode 100644 index 000000000..f1a809492 --- /dev/null +++ b/infra/aggregation_mode/ansible/playbooks/templates/grafana/grafana_env.j2 @@ -0,0 +1,9 @@ +GF_USERS_ALLOW_SIGN_UP=false +GF_INSTALL_PLUGINS=yesoreyeram-infinity-datasource +PROMETHEUS_URL={{ grafana_prometheus_url }} +RPC_URL={{ grafana_rpc_url }} +POSTGRES_HOST={{ grafana_postgres_host }} +POSTGRES_PORT={{ grafana_postgres_port }} +POSTGRES_DB={{ grafana_postgres_db }} +POSTGRES_USER={{ grafana_postgres_user }} +POSTGRES_PASSWORD={{ grafana_postgres_password }} diff --git a/infra/aggregation_mode/ansible/playbooks/templates/prometheus/prometheus_agg_mode.yaml.j2 b/infra/aggregation_mode/ansible/playbooks/templates/prometheus/prometheus_agg_mode.yaml.j2 new file mode 100644 index 000000000..2886c16fc --- /dev/null +++ b/infra/aggregation_mode/ansible/playbooks/templates/prometheus/prometheus_agg_mode.yaml.j2 @@ -0,0 +1,51 @@ +global: + scrape_interval: 15s + +scrape_configs: +- job_name: "gateway-primary-http" + scrape_interval: 60s + static_configs: + - targets: ["{{ gateway_primary_hostname }}:8080"] + labels: + service: "gateway" + instance: "primary" + +- job_name: "gateway-secondary-http" + scrape_interval: 60s + static_configs: + - targets: ["{{ gateway_secondary_hostname }}:8080"] + labels: + service: "gateway" + instance: "secondary" + +- job_name: "gateway-primary" + scrape_interval: 15s + static_configs: + - targets: ["{{ gateway_primary_hostname }}:{{ gateway_metrics_port }}"] + labels: + service: "gateway" + instance: "primary" + +- job_name: "gateway-secondary" + scrape_interval: 15s + static_configs: + - targets: ["{{ gateway_secondary_hostname }}:{{ gateway_metrics_port }}"] + labels: + service: "gateway" + instance: "secondary" + +- job_name: "poller-primary" + scrape_interval: 15s + static_configs: + - targets: ["{{ gateway_primary_hostname }}:{{ poller_metrics_port }}"] + labels: + service: "poller" + instance: "primary" + +- job_name: "poller-secondary" + scrape_interval: 15s + static_configs: + - targets: ["{{ gateway_secondary_hostname }}:{{ poller_metrics_port }}"] + labels: + service: "poller" + instance: "secondary" diff --git a/infra/aggregation_mode/ansible/playbooks/templates/services/gateway.service.j2 b/infra/aggregation_mode/ansible/playbooks/templates/services/gateway.service.j2 new file mode 100644 index 000000000..4d9724047 --- /dev/null +++ b/infra/aggregation_mode/ansible/playbooks/templates/services/gateway.service.j2 @@ -0,0 +1,18 @@ +[Unit] +Description=Aggregation Mode Gateway +After=network.target + +[Service] +Type=simple +User={{ ansible_user }} +Group={{ ansible_user }} +WorkingDirectory=/home/{{ ansible_user }}/repos/gateway/aligned_layer/aggregation_mode +ExecStart=/home/{{ ansible_user }}/.cargo/bin/gateway /home/{{ ansible_user }}/config/config-agg-mode-gateway.yaml +Restart=always +RestartSec=1 +StartLimitBurst=100 +LimitNOFILE=100000 +AmbientCapabilities=CAP_NET_BIND_SERVICE + +[Install] +WantedBy=multi-user.target diff --git a/infra/aggregation_mode/ansible/playbooks/templates/services/poller.service.j2 b/infra/aggregation_mode/ansible/playbooks/templates/services/poller.service.j2 new file mode 100644 index 000000000..a1baeffac --- /dev/null +++ b/infra/aggregation_mode/ansible/playbooks/templates/services/poller.service.j2 @@ -0,0 +1,15 @@ +[Unit] +Description=Aggregation Mode Payments Poller +After=network.target + +[Service] +Type=simple +WorkingDirectory=/home/{{ ansible_user }}/repos/poller/aligned_layer/aggregation_mode +ExecStart=/home/{{ ansible_user }}/.cargo/bin/payments_poller /home/{{ ansible_user }}/config/config-agg-mode-poller.yaml +Restart=always +RestartSec=1 +StartLimitBurst=100 +LimitNOFILE=100000 + +[Install] +WantedBy=multi-user.target diff --git a/infra/aggregation_mode/ansible/playbooks/templates/services/prometheus_agg_mode.service.j2 b/infra/aggregation_mode/ansible/playbooks/templates/services/prometheus_agg_mode.service.j2 new file mode 100644 index 000000000..cf1563018 --- /dev/null +++ b/infra/aggregation_mode/ansible/playbooks/templates/services/prometheus_agg_mode.service.j2 @@ -0,0 +1,16 @@ +[Unit] +Description=Prometheus +After=network.target + +[Service] +Type=simple +WorkingDirectory=/home/{{ ansible_user }} +ExecStart=/home/{{ ansible_user }}/prometheus-{{ prometheus_version }}.linux-amd64/prometheus \ + --config.file=/home/{{ ansible_user }}/config/prometheus.yaml \ + --storage.tsdb.retention.time=90d +Restart=always +RestartSec=1 +StartLimitBurst=100 + +[Install] +WantedBy=multi-user.target diff --git a/infra/aggregation_mode/ansible/playbooks/templates/sudoers/gateway-service.j2 b/infra/aggregation_mode/ansible/playbooks/templates/sudoers/gateway-service.j2 new file mode 100644 index 000000000..192f06f07 --- /dev/null +++ b/infra/aggregation_mode/ansible/playbooks/templates/sudoers/gateway-service.j2 @@ -0,0 +1,3 @@ +{{ ansible_user }} ALL=(ALL) NOPASSWD: /bin/systemctl start gateway, /bin/systemctl stop gateway, /bin/systemctl restart gateway, /bin/systemctl status gateway +{{ ansible_user }} ALL=(ALL) NOPASSWD: /bin/journalctl -u gateway* +{{ ansible_user }} ALL=(ALL) NOPASSWD: /bin/journalctl --user-unit=gateway* From 1bb05d0a87217051b1f6ddd11dc0c59202496b7c Mon Sep 17 00:00:00 2001 From: JuArce <52429267+JuArce@users.noreply.github.com> Date: Wed, 14 Jan 2026 13:08:36 -0300 Subject: [PATCH 02/16] remove unused variables --- infra/aggregation_mode/ansible/playbooks/gateway.yaml | 2 -- infra/aggregation_mode/ansible/playbooks/ini/config-hoodi.ini | 1 - infra/aggregation_mode/ansible/playbooks/ini/config-mainnet.ini | 1 - infra/aggregation_mode/ansible/playbooks/poller.yaml | 1 - 4 files changed, 5 deletions(-) diff --git a/infra/aggregation_mode/ansible/playbooks/gateway.yaml b/infra/aggregation_mode/ansible/playbooks/gateway.yaml index 04d257a23..d553d0cdb 100644 --- a/infra/aggregation_mode/ansible/playbooks/gateway.yaml +++ b/infra/aggregation_mode/ansible/playbooks/gateway.yaml @@ -30,9 +30,7 @@ gateway_payment_service_address: "{{ lookup('ini', 'gateway_payment_service_address', file='ini/config-{{ env }}.ini') }}" gateway_network: "{{ lookup('ini', 'gateway_network', file='ini/config-{{ env }}.ini') }}" gateway_max_daily_proofs: "{{ lookup('ini', 'gateway_max_daily_proofs', file='ini/config-{{ env }}.ini', default='100') }}" - gateway_last_block_fetched_filepath: "{{ lookup('ini', 'gateway_last_block_fetched_filepath', file='ini/config-{{ env }}.ini') }}" gateway_metrics_port: "{{ lookup('ini', 'gateway_metrics_port', file='ini/config-{{ env }}.ini', default='9094') }}" - poller_metrics_port: "{{ lookup('ini', 'poller_metrics_port', file='ini/config-{{ env }}.ini', default='9095') }}" tls_cert_source_path: "{{ lookup('ini', 'tls_cert_source_path', file='ini/config-{{ env }}.ini') }}" tls_key_source_path: "{{ lookup('ini', 'tls_key_source_path', file='ini/config-{{ env }}.ini') }}" diff --git a/infra/aggregation_mode/ansible/playbooks/ini/config-hoodi.ini b/infra/aggregation_mode/ansible/playbooks/ini/config-hoodi.ini index d40277519..9a7e907f1 100644 --- a/infra/aggregation_mode/ansible/playbooks/ini/config-hoodi.ini +++ b/infra/aggregation_mode/ansible/playbooks/ini/config-hoodi.ini @@ -56,7 +56,6 @@ gateway_tls_enabled=true gateway_tls_port=443 gateway_tls_cert_path=/home/app/.ssl/cert.pem gateway_tls_key_path=/home/app/.ssl/key.pem -gateway_last_block_fetched_filepath=/home/app/config/proof-aggregator.last_block_fetched.json # Poller Service Settings (same for all pollers) poller_last_block_fetched_filepath=/home/app/config/proof-aggregator.last_block_fetched.json diff --git a/infra/aggregation_mode/ansible/playbooks/ini/config-mainnet.ini b/infra/aggregation_mode/ansible/playbooks/ini/config-mainnet.ini index 70d25c24f..268091b6a 100644 --- a/infra/aggregation_mode/ansible/playbooks/ini/config-mainnet.ini +++ b/infra/aggregation_mode/ansible/playbooks/ini/config-mainnet.ini @@ -58,7 +58,6 @@ gateway_tls_enabled=true gateway_tls_port=443 gateway_tls_cert_path=/home/app/.ssl/cert.pem gateway_tls_key_path=/home/app/.ssl/key.pem -gateway_last_block_fetched_filepath=/home/app/config/proof-aggregator.last_block_fetched.json # Poller Service Settings (same for all pollers) poller_last_block_fetched_filepath=/home/app/config/proof-aggregator.last_block_fetched.json diff --git a/infra/aggregation_mode/ansible/playbooks/poller.yaml b/infra/aggregation_mode/ansible/playbooks/poller.yaml index 7c1de48c0..26dbb03f1 100644 --- a/infra/aggregation_mode/ansible/playbooks/poller.yaml +++ b/infra/aggregation_mode/ansible/playbooks/poller.yaml @@ -27,7 +27,6 @@ poller_max_daily_proofs: "{{ lookup('ini', 'gateway_max_daily_proofs', file='ini/config-{{ env }}.ini', default='100') }}" poller_last_block_fetched_filepath: "{{ lookup('ini', 'poller_last_block_fetched_filepath', file='ini/config-{{ env }}.ini') }}" last_block_fetched_initial_value: "{{ lookup('ini', 'last_block_fetched_initial_value', file='ini/config-{{ env }}.ini', default='0') }}" - gateway_metrics_port: "{{ lookup('ini', 'gateway_metrics_port', file='ini/config-{{ env }}.ini', default='9094') }}" poller_metrics_port: "{{ lookup('ini', 'poller_metrics_port', file='ini/config-{{ env }}.ini', default='9095') }}" tasks: From df26df2841fb5805cb72f79b75559d5cde5b5f98 Mon Sep 17 00:00:00 2001 From: JuArce <52429267+JuArce@users.noreply.github.com> Date: Wed, 14 Jan 2026 16:49:36 -0300 Subject: [PATCH 03/16] fix postgres cluster --- .../ansible/playbooks/ini/config-hoodi.ini | 2 +- .../ansible/playbooks/ini/config-mainnet.ini | 2 +- .../playbooks/pg_autofailover_common.yaml | 1 + .../ansible/playbooks/pg_monitor.yaml | 60 ++++++++++++------- .../ansible/playbooks/pg_node.yaml | 55 ++++++++++++----- .../ansible/playbooks/postgres_cluster.yaml | 41 +++++++------ .../playbooks/postgres_migrations.yaml | 40 +++++++------ .../ansible/playbooks/rust.yaml | 2 +- 8 files changed, 124 insertions(+), 79 deletions(-) diff --git a/infra/aggregation_mode/ansible/playbooks/ini/config-hoodi.ini b/infra/aggregation_mode/ansible/playbooks/ini/config-hoodi.ini index 9a7e907f1..aca8d8743 100644 --- a/infra/aggregation_mode/ansible/playbooks/ini/config-hoodi.ini +++ b/infra/aggregation_mode/ansible/playbooks/ini/config-hoodi.ini @@ -1,4 +1,4 @@ -[DEFAULT] +[global] # ============================================ # Hoodi Environment Configuration # ============================================ diff --git a/infra/aggregation_mode/ansible/playbooks/ini/config-mainnet.ini b/infra/aggregation_mode/ansible/playbooks/ini/config-mainnet.ini index 268091b6a..17dacb20f 100644 --- a/infra/aggregation_mode/ansible/playbooks/ini/config-mainnet.ini +++ b/infra/aggregation_mode/ansible/playbooks/ini/config-mainnet.ini @@ -1,4 +1,4 @@ -[DEFAULT] +[global] # ============================================ # Mainnet Environment Configuration # ============================================ diff --git a/infra/aggregation_mode/ansible/playbooks/pg_autofailover_common.yaml b/infra/aggregation_mode/ansible/playbooks/pg_autofailover_common.yaml index 655637704..5d3382613 100644 --- a/infra/aggregation_mode/ansible/playbooks/pg_autofailover_common.yaml +++ b/infra/aggregation_mode/ansible/playbooks/pg_autofailover_common.yaml @@ -9,6 +9,7 @@ - postgresql-common - ca-certificates - curl + - acl state: latest update_cache: true vars: diff --git a/infra/aggregation_mode/ansible/playbooks/pg_monitor.yaml b/infra/aggregation_mode/ansible/playbooks/pg_monitor.yaml index 36c19f039..3a7da6aa7 100644 --- a/infra/aggregation_mode/ansible/playbooks/pg_monitor.yaml +++ b/infra/aggregation_mode/ansible/playbooks/pg_monitor.yaml @@ -1,19 +1,33 @@ +- name: Import pg_autofailover_common playbook + ansible.builtin.import_playbook: pg_autofailover_common.yaml + vars: + host: "{{ host }}" + - name: PostgreSQL Monitor Setup hosts: "{{ host }}" + vars: + ansible_common_remote_group: postgres tasks: - - name: Import pg_autofailover_common playbook - ansible.builtin.import_playbook: pg_autofailover_common.yaml - vars: - host: "{{ host }}" + - name: Set config file path + set_fact: + config_file: "ini/config-{{ env }}.ini" -- hosts: "{{ host }}" - vars: - db_password: "{{ lookup('ini', 'db_password', file='ini/config-{{ env }}.ini') }}" - monitor_pgdata: "{{ lookup('ini', 'monitor_pgdata', file='ini/config-{{ env }}.ini', default='/var/lib/postgresql/monitor') }}" - monitor_port: "{{ lookup('ini', 'monitor_port', file='ini/config-{{ env }}.ini', default='5432') }}" + - name: Debug config file + debug: + msg: "Using config file: {{ config_file }}" + + - name: Set config vars from INI file + set_fact: + db_password: "{{ lookup('ini', 'db_password', file=config_file) }}" + monitor_pgdata: "{{ lookup('ini', 'monitor_pgdata', file=config_file, default='/var/lib/postgresql/monitor') }}" + monitor_port: "{{ lookup('ini', 'monitor_port', file=config_file, default='5432') }}" + no_log: true + + - name: Debug vars + debug: + msg: "env={{ env }}, monitor_pgdata={{ monitor_pgdata }}" - tasks: - name: Install postgresql-16-auto-failover become: true apt: @@ -50,19 +64,6 @@ args: creates: "{{ monitor_pgdata }}/PG_VERSION" - - name: Wait for monitor to be ready - wait_for: - port: "{{ monitor_port }}" - delay: 5 - timeout: 60 - - - name: Set password for autoctl_node user - become: true - become_user: postgres - shell: | - psql -d pg_auto_failover -c "ALTER USER autoctl_node PASSWORD '{{ db_password }}';" - no_log: true - - name: Update pg_hba.conf for Tailscale network become: true become_user: postgres @@ -99,3 +100,16 @@ daemon_reload: true vars: ansible_ssh_user: "{{ admin_user }}" + + - name: Wait for monitor to be ready + wait_for: + port: "{{ monitor_port }}" + delay: 5 + timeout: 60 + + - name: Set password for autoctl_node user + become: true + become_user: postgres + shell: | + psql -d pg_auto_failover -c "ALTER USER autoctl_node PASSWORD '{{ db_password }}';" + no_log: true diff --git a/infra/aggregation_mode/ansible/playbooks/pg_node.yaml b/infra/aggregation_mode/ansible/playbooks/pg_node.yaml index b271d4d10..9b394de62 100644 --- a/infra/aggregation_mode/ansible/playbooks/pg_node.yaml +++ b/infra/aggregation_mode/ansible/playbooks/pg_node.yaml @@ -1,23 +1,29 @@ +- name: Import pg_autofailover_common playbook + ansible.builtin.import_playbook: pg_autofailover_common.yaml + vars: + host: "{{ host }}" + - name: PostgreSQL Node Setup hosts: "{{ host }}" + vars: + ansible_common_remote_group: postgres tasks: - - name: Import pg_autofailover_common playbook - ansible.builtin.import_playbook: pg_autofailover_common.yaml - vars: - host: "{{ host }}" + - name: Set config file path + set_fact: + config_file: "ini/config-{{ env }}.ini" -- hosts: "{{ host }}" - vars: - db_name: "{{ lookup('ini', 'db_name', file='ini/config-{{ env }}.ini') }}" - db_user: "{{ lookup('ini', 'db_user', file='ini/config-{{ env }}.ini') }}" - db_password: "{{ lookup('ini', 'db_password', file='ini/config-{{ env }}.ini') }}" - postgres_monitor_hostname: "{{ lookup('ini', 'postgres_monitor_hostname', file='ini/config-{{ env }}.ini') }}" - node_pgdata: "{{ lookup('ini', 'node_pgdata', file='ini/config-{{ env }}.ini', default='/var/lib/postgresql/node') }}" - node_port: "{{ lookup('ini', 'node_port', file='ini/config-{{ env }}.ini', default='5432') }}" - backup_dir: "{{ lookup('ini', 'backup_dir', file='ini/config-{{ env }}.ini', default='/var/lib/backup') }}" + - name: Set config vars from INI file + set_fact: + db_name: "{{ lookup('ini', 'db_name', file=config_file) }}" + db_user: "{{ lookup('ini', 'db_user', file=config_file) }}" + db_password: "{{ lookup('ini', 'db_password', file=config_file) }}" + postgres_monitor_hostname: "{{ lookup('ini', 'postgres_monitor_hostname', file=config_file) }}" + node_pgdata: "{{ lookup('ini', 'node_pgdata', file=config_file, default='/var/lib/postgresql/node') }}" + node_port: "{{ lookup('ini', 'node_port', file=config_file, default='5432') }}" + backup_dir: "{{ lookup('ini', 'backup_dir', file=config_file, default='/var/lib/backup') }}" + no_log: true - tasks: - name: Create backup directory become: true file: @@ -36,6 +42,10 @@ become: true become_user: postgres + - name: Debug node_initialized + debug: + msg: "Node initialized: {{ node_initialized.stat.exists }}, path: {{ node_pgdata }}/PG_VERSION" + - name: Get Tailscale IP shell: tailscale ip --4 register: tailscale_ip @@ -56,7 +66,6 @@ --hostname {{ tailscale_ip.stdout }} \ --monitor 'postgres://{{ db_user }}:{{ db_password }}@{{ postgres_monitor_hostname }}:5432/pg_auto_failover?sslmode=require' when: not node_initialized.stat.exists - no_log: true args: creates: "{{ node_pgdata }}/PG_VERSION" @@ -108,3 +117,19 @@ port: "{{ node_port }}" delay: 5 timeout: 60 + + - name: Check if node is writable (primary) + become: true + become_user: postgres + shell: | + psql -d {{ db_name }} -tAc "SELECT NOT pg_is_in_recovery();" + register: is_writable + changed_when: false + + - name: Set password for pgautofailover_replicator user + become: true + become_user: postgres + shell: | + psql -d {{ db_name }} -c "ALTER USER pgautofailover_replicator PASSWORD '{{ db_password }}';" + when: is_writable.stdout == 't' + no_log: true diff --git a/infra/aggregation_mode/ansible/playbooks/postgres_cluster.yaml b/infra/aggregation_mode/ansible/playbooks/postgres_cluster.yaml index 24b033d5e..9cc845212 100644 --- a/infra/aggregation_mode/ansible/playbooks/postgres_cluster.yaml +++ b/infra/aggregation_mode/ansible/playbooks/postgres_cluster.yaml @@ -1,24 +1,23 @@ -- name: Deploy PostgreSQL Auto-Failover Cluster - hosts: localhost - gather_facts: no +- name: Deploy PostgreSQL Monitor + ansible.builtin.import_playbook: pg_monitor.yaml + vars: + host: postgres_monitor + env: "{{ env }}" - tasks: - - name: Deploy PostgreSQL Monitor - ansible.builtin.import_playbook: pg_monitor.yaml - vars: - host: postgres_monitor +- name: Deploy PostgreSQL Primary Node + ansible.builtin.import_playbook: pg_node.yaml + vars: + host: postgres_primary + env: "{{ env }}" - - name: Deploy PostgreSQL Primary Node - ansible.builtin.import_playbook: pg_node.yaml - vars: - host: postgres_primary +- name: Deploy PostgreSQL Secondary Node + ansible.builtin.import_playbook: pg_node.yaml + vars: + host: postgres_secondary + env: "{{ env }}" - - name: Deploy PostgreSQL Secondary Node - ansible.builtin.import_playbook: pg_node.yaml - vars: - host: postgres_secondary - - - name: Run Database Migrations - ansible.builtin.import_playbook: postgres_migrations.yaml - vars: - host: postgres_primary +- name: Run Database Migrations + ansible.builtin.import_playbook: postgres_migrations.yaml + vars: + host: postgres_primary + env: "{{ env }}" diff --git a/infra/aggregation_mode/ansible/playbooks/postgres_migrations.yaml b/infra/aggregation_mode/ansible/playbooks/postgres_migrations.yaml index a6c526009..add485060 100644 --- a/infra/aggregation_mode/ansible/playbooks/postgres_migrations.yaml +++ b/infra/aggregation_mode/ansible/playbooks/postgres_migrations.yaml @@ -1,26 +1,32 @@ +- name: Import setup playbook + ansible.builtin.import_playbook: setup.yaml + vars: + host: "{{ host }}" + env: "{{ env }}" + +- name: Import rust playbook + ansible.builtin.import_playbook: rust.yaml + vars: + host: "{{ host }}" + env: "{{ env }}" + - name: PostgreSQL Migrations Setup hosts: "{{ host }}" tasks: - - name: Import setup playbook - ansible.builtin.import_playbook: setup.yaml - vars: - host: "{{ host }}" + - name: Set config file path + set_fact: + config_file: "ini/config-{{ env }}.ini" - - name: Import rust playbook - ansible.builtin.import_playbook: rust.yaml - vars: - host: "{{ host }}" - -- hosts: "{{ host }}" - vars: - db_name: "{{ lookup('ini', 'db_name', file='ini/config-{{ env }}.ini') }}" - db_user: "{{ lookup('ini', 'db_user', file='ini/config-{{ env }}.ini') }}" - db_password: "{{ lookup('ini', 'db_password', file='ini/config-{{ env }}.ini') }}" - postgres_primary_hostname: "{{ lookup('ini', 'postgres_primary_hostname', file='ini/config-{{ env }}.ini') }}" - git_branch: "{{ lookup('ini', 'git_branch', file='ini/config-{{ env }}.ini') }}" + - name: Set config vars from INI file + set_fact: + db_name: "{{ lookup('ini', 'db_name', file=config_file) }}" + db_user: "{{ lookup('ini', 'db_user', file=config_file) }}" + db_password: "{{ lookup('ini', 'db_password', file=config_file) }}" + postgres_primary_hostname: "{{ lookup('ini', 'postgres_primary_hostname', file=config_file) }}" + git_branch: "{{ lookup('ini', 'git_branch', file=config_file) }}" + no_log: true - tasks: - name: Clone aligned_layer repository git: repo: https://github.com/yetanotherco/aligned_layer.git diff --git a/infra/aggregation_mode/ansible/playbooks/rust.yaml b/infra/aggregation_mode/ansible/playbooks/rust.yaml index c3eac848f..b74c764b4 100644 --- a/infra/aggregation_mode/ansible/playbooks/rust.yaml +++ b/infra/aggregation_mode/ansible/playbooks/rust.yaml @@ -1,7 +1,7 @@ - name: Rust Setup hosts: "{{ host }}" vars: - rust_version: 1.83.0 + rust_version: 1.92.0 tasks: - name: Check if cargo is installed From f5e2995f9b3704e598b168ba87602f15da1743b0 Mon Sep 17 00:00:00 2001 From: JuArce <52429267+JuArce@users.noreply.github.com> Date: Wed, 14 Jan 2026 16:51:28 -0300 Subject: [PATCH 04/16] fix load variables correctly --- .../ansible/playbooks/deploy_all.yaml | 38 +++++------ .../ansible/playbooks/gateway.yaml | 68 ++++++++++--------- .../ansible/playbooks/gateway_stack.yaml | 22 +++--- .../ansible/playbooks/grafana_agg_mode.yaml | 33 +++++---- .../ansible/playbooks/metrics_stack.yaml | 22 +++--- .../ansible/playbooks/poller.yaml | 58 +++++++++------- .../playbooks/prometheus_agg_mode.yaml | 28 ++++---- 7 files changed, 143 insertions(+), 126 deletions(-) diff --git a/infra/aggregation_mode/ansible/playbooks/deploy_all.yaml b/infra/aggregation_mode/ansible/playbooks/deploy_all.yaml index 160cfe4f1..d708c6a79 100644 --- a/infra/aggregation_mode/ansible/playbooks/deploy_all.yaml +++ b/infra/aggregation_mode/ansible/playbooks/deploy_all.yaml @@ -1,22 +1,22 @@ -- name: Deploy Full Aggregation Mode Stack - hosts: localhost - gather_facts: no +- name: Deploy PostgreSQL Cluster + ansible.builtin.import_playbook: postgres_cluster.yaml + vars: + env: "{{ env }}" - tasks: - - name: Deploy PostgreSQL Cluster - ansible.builtin.import_playbook: postgres_cluster.yaml +- name: Deploy Gateway and Poller on Primary + ansible.builtin.import_playbook: gateway_stack.yaml + vars: + host: gateway_primary + env: "{{ env }}" - - name: Deploy Gateway and Poller on Primary - ansible.builtin.import_playbook: gateway_stack.yaml - vars: - host: gateway_primary +- name: Deploy Gateway and Poller on Secondary + ansible.builtin.import_playbook: gateway_stack.yaml + vars: + host: gateway_secondary + env: "{{ env }}" - - name: Deploy Gateway and Poller on Secondary - ansible.builtin.import_playbook: gateway_stack.yaml - vars: - host: gateway_secondary - - - name: Deploy Metrics Stack - ansible.builtin.import_playbook: metrics_stack.yaml - vars: - host: metrics +- name: Deploy Metrics Stack + ansible.builtin.import_playbook: metrics_stack.yaml + vars: + host: metrics + env: "{{ env }}" diff --git a/infra/aggregation_mode/ansible/playbooks/gateway.yaml b/infra/aggregation_mode/ansible/playbooks/gateway.yaml index d553d0cdb..775f092a6 100644 --- a/infra/aggregation_mode/ansible/playbooks/gateway.yaml +++ b/infra/aggregation_mode/ansible/playbooks/gateway.yaml @@ -1,40 +1,46 @@ +- name: Import setup playbook + ansible.builtin.import_playbook: setup.yaml + vars: + host: "{{ host }}" + env: "{{ env }}" + +- name: Import rust playbook + ansible.builtin.import_playbook: rust.yaml + vars: + host: "{{ host }}" + env: "{{ env }}" + - name: Gateway Setup hosts: "{{ host }}" tasks: - - name: Import setup playbook - ansible.builtin.import_playbook: setup.yaml - vars: - host: "{{ host }}" + - name: Set config file path + set_fact: + config_file: "ini/config-{{ env }}.ini" - - name: Import rust playbook - ansible.builtin.import_playbook: rust.yaml - vars: - host: "{{ host }}" - -- hosts: "{{ host }}" - vars: - git_branch: "{{ lookup('ini', 'git_branch', file='ini/config-{{ env }}.ini') }}" - gateway_port: "{{ lookup('ini', 'gateway_port', file='ini/config-{{ env }}.ini', default='8080') }}" - gateway_tls_enabled: "{{ lookup('ini', 'gateway_tls_enabled', file='ini/config-{{ env }}.ini', default='true') }}" - gateway_tls_port: "{{ lookup('ini', 'gateway_tls_port', file='ini/config-{{ env }}.ini', default='443') }}" - gateway_tls_cert_path: "{{ lookup('ini', 'gateway_tls_cert_path', file='ini/config-{{ env }}.ini') }}" - gateway_tls_key_path: "{{ lookup('ini', 'gateway_tls_key_path', file='ini/config-{{ env }}.ini') }}" - gateway_db_user: "{{ lookup('ini', 'gateway_db_user', file='ini/config-{{ env }}.ini') }}" - gateway_db_password: "{{ lookup('ini', 'gateway_db_password', file='ini/config-{{ env }}.ini') }}" - gateway_db_name: "{{ lookup('ini', 'gateway_db_name', file='ini/config-{{ env }}.ini') }}" - gateway_postgres_primary: "{{ lookup('ini', 'gateway_postgres_primary', file='ini/config-{{ env }}.ini') }}" - gateway_postgres_secondary: "{{ lookup('ini', 'gateway_postgres_secondary', file='ini/config-{{ env }}.ini') }}" - gateway_postgres_port: "{{ lookup('ini', 'gateway_postgres_port', file='ini/config-{{ env }}.ini', default='5432') }}" - gateway_eth_rpc_url: "{{ lookup('ini', 'gateway_eth_rpc_url', file='ini/config-{{ env }}.ini') }}" - gateway_payment_service_address: "{{ lookup('ini', 'gateway_payment_service_address', file='ini/config-{{ env }}.ini') }}" - gateway_network: "{{ lookup('ini', 'gateway_network', file='ini/config-{{ env }}.ini') }}" - gateway_max_daily_proofs: "{{ lookup('ini', 'gateway_max_daily_proofs', file='ini/config-{{ env }}.ini', default='100') }}" - gateway_metrics_port: "{{ lookup('ini', 'gateway_metrics_port', file='ini/config-{{ env }}.ini', default='9094') }}" - tls_cert_source_path: "{{ lookup('ini', 'tls_cert_source_path', file='ini/config-{{ env }}.ini') }}" - tls_key_source_path: "{{ lookup('ini', 'tls_key_source_path', file='ini/config-{{ env }}.ini') }}" + - name: Set config vars from INI file + set_fact: + git_branch: "{{ lookup('ini', 'git_branch', file=config_file) }}" + gateway_port: "{{ lookup('ini', 'gateway_port', file=config_file, default='8080') }}" + gateway_tls_enabled: "{{ lookup('ini', 'gateway_tls_enabled', file=config_file, default='true') }}" + gateway_tls_port: "{{ lookup('ini', 'gateway_tls_port', file=config_file, default='443') }}" + gateway_tls_cert_path: "{{ lookup('ini', 'gateway_tls_cert_path', file=config_file) }}" + gateway_tls_key_path: "{{ lookup('ini', 'gateway_tls_key_path', file=config_file) }}" + gateway_db_user: "{{ lookup('ini', 'gateway_db_user', file=config_file) }}" + gateway_db_password: "{{ lookup('ini', 'gateway_db_password', file=config_file) }}" + gateway_db_name: "{{ lookup('ini', 'gateway_db_name', file=config_file) }}" + gateway_postgres_primary: "{{ lookup('ini', 'gateway_postgres_primary', file=config_file) }}" + gateway_postgres_secondary: "{{ lookup('ini', 'gateway_postgres_secondary', file=config_file) }}" + gateway_postgres_port: "{{ lookup('ini', 'gateway_postgres_port', file=config_file, default='5432') }}" + gateway_eth_rpc_url: "{{ lookup('ini', 'gateway_eth_rpc_url', file=config_file) }}" + gateway_payment_service_address: "{{ lookup('ini', 'gateway_payment_service_address', file=config_file) }}" + gateway_network: "{{ lookup('ini', 'gateway_network', file=config_file) }}" + gateway_max_daily_proofs: "{{ lookup('ini', 'gateway_max_daily_proofs', file=config_file, default='100') }}" + gateway_metrics_port: "{{ lookup('ini', 'gateway_metrics_port', file=config_file, default='9094') }}" + tls_cert_source_path: "{{ lookup('ini', 'tls_cert_source_path', file=config_file) }}" + tls_key_source_path: "{{ lookup('ini', 'tls_key_source_path', file=config_file) }}" + no_log: true - tasks: - name: Install required system packages become: true apt: diff --git a/infra/aggregation_mode/ansible/playbooks/gateway_stack.yaml b/infra/aggregation_mode/ansible/playbooks/gateway_stack.yaml index eceba3b91..10ab7cc5b 100644 --- a/infra/aggregation_mode/ansible/playbooks/gateway_stack.yaml +++ b/infra/aggregation_mode/ansible/playbooks/gateway_stack.yaml @@ -1,13 +1,11 @@ -- name: Deploy Gateway and Poller Stack - hosts: "{{ host }}" +- name: Deploy Gateway + ansible.builtin.import_playbook: gateway.yaml + vars: + host: "{{ host }}" + env: "{{ env }}" - tasks: - - name: Deploy Gateway - ansible.builtin.import_playbook: gateway.yaml - vars: - host: "{{ host }}" - - - name: Deploy Poller - ansible.builtin.import_playbook: poller.yaml - vars: - host: "{{ host }}" +- name: Deploy Poller + ansible.builtin.import_playbook: poller.yaml + vars: + host: "{{ host }}" + env: "{{ env }}" diff --git a/infra/aggregation_mode/ansible/playbooks/grafana_agg_mode.yaml b/infra/aggregation_mode/ansible/playbooks/grafana_agg_mode.yaml index 388d53ab5..d293077ff 100644 --- a/infra/aggregation_mode/ansible/playbooks/grafana_agg_mode.yaml +++ b/infra/aggregation_mode/ansible/playbooks/grafana_agg_mode.yaml @@ -1,23 +1,28 @@ +- name: Import setup playbook + ansible.builtin.import_playbook: setup.yaml + vars: + host: "{{ host }}" + env: "{{ env }}" + - name: Grafana Setup for Aggregation Mode hosts: "{{ host }}" tasks: - - name: Import setup playbook - ansible.builtin.import_playbook: setup.yaml - vars: - host: "{{ host }}" + - name: Set config file path + set_fact: + config_file: "ini/config-{{ env }}.ini" -- hosts: "{{ host }}" - vars: - grafana_prometheus_url: "{{ lookup('ini', 'grafana_prometheus_url', file='ini/config-{{ env }}.ini') }}" - grafana_rpc_url: "{{ lookup('ini', 'grafana_rpc_url', file='ini/config-{{ env }}.ini') }}" - grafana_postgres_host: "{{ lookup('ini', 'grafana_postgres_host', file='ini/config-{{ env }}.ini') }}" - grafana_postgres_port: "{{ lookup('ini', 'grafana_postgres_port', file='ini/config-{{ env }}.ini', default='5432') }}" - grafana_postgres_db: "{{ lookup('ini', 'grafana_postgres_db', file='ini/config-{{ env }}.ini') }}" - grafana_postgres_user: "{{ lookup('ini', 'grafana_postgres_user', file='ini/config-{{ env }}.ini') }}" - grafana_postgres_password: "{{ lookup('ini', 'grafana_postgres_password', file='ini/config-{{ env }}.ini') }}" + - name: Set config vars from INI file + set_fact: + grafana_prometheus_url: "{{ lookup('ini', 'grafana_prometheus_url', file=config_file) }}" + grafana_rpc_url: "{{ lookup('ini', 'grafana_rpc_url', file=config_file) }}" + grafana_postgres_host: "{{ lookup('ini', 'grafana_postgres_host', file=config_file) }}" + grafana_postgres_port: "{{ lookup('ini', 'grafana_postgres_port', file=config_file, default='5432') }}" + grafana_postgres_db: "{{ lookup('ini', 'grafana_postgres_db', file=config_file) }}" + grafana_postgres_user: "{{ lookup('ini', 'grafana_postgres_user', file=config_file) }}" + grafana_postgres_password: "{{ lookup('ini', 'grafana_postgres_password', file=config_file) }}" + no_log: true - tasks: - name: Install required packages become: true apt: diff --git a/infra/aggregation_mode/ansible/playbooks/metrics_stack.yaml b/infra/aggregation_mode/ansible/playbooks/metrics_stack.yaml index 7303a0320..242362595 100644 --- a/infra/aggregation_mode/ansible/playbooks/metrics_stack.yaml +++ b/infra/aggregation_mode/ansible/playbooks/metrics_stack.yaml @@ -1,13 +1,11 @@ -- name: Deploy Metrics Stack - hosts: "{{ host }}" +- name: Deploy Prometheus + ansible.builtin.import_playbook: prometheus_agg_mode.yaml + vars: + host: "{{ host }}" + env: "{{ env }}" - tasks: - - name: Deploy Prometheus - ansible.builtin.import_playbook: prometheus_agg_mode.yaml - vars: - host: "{{ host }}" - - - name: Deploy Grafana - ansible.builtin.import_playbook: grafana_agg_mode.yaml - vars: - host: "{{ host }}" +- name: Deploy Grafana + ansible.builtin.import_playbook: grafana_agg_mode.yaml + vars: + host: "{{ host }}" + env: "{{ env }}" diff --git a/infra/aggregation_mode/ansible/playbooks/poller.yaml b/infra/aggregation_mode/ansible/playbooks/poller.yaml index 26dbb03f1..0c9e3da80 100644 --- a/infra/aggregation_mode/ansible/playbooks/poller.yaml +++ b/infra/aggregation_mode/ansible/playbooks/poller.yaml @@ -1,35 +1,41 @@ +- name: Import setup playbook + ansible.builtin.import_playbook: setup.yaml + vars: + host: "{{ host }}" + env: "{{ env }}" + +- name: Import rust playbook + ansible.builtin.import_playbook: rust.yaml + vars: + host: "{{ host }}" + env: "{{ env }}" + - name: Poller Setup hosts: "{{ host }}" tasks: - - name: Import setup playbook - ansible.builtin.import_playbook: setup.yaml - vars: - host: "{{ host }}" + - name: Set config file path + set_fact: + config_file: "ini/config-{{ env }}.ini" - - name: Import rust playbook - ansible.builtin.import_playbook: rust.yaml - vars: - host: "{{ host }}" - -- hosts: "{{ host }}" - vars: - git_branch: "{{ lookup('ini', 'git_branch', file='ini/config-{{ env }}.ini') }}" - poller_db_user: "{{ lookup('ini', 'gateway_db_user', file='ini/config-{{ env }}.ini') }}" - poller_db_password: "{{ lookup('ini', 'gateway_db_password', file='ini/config-{{ env }}.ini') }}" - poller_db_name: "{{ lookup('ini', 'gateway_db_name', file='ini/config-{{ env }}.ini') }}" - poller_postgres_primary: "{{ lookup('ini', 'gateway_postgres_primary', file='ini/config-{{ env }}.ini') }}" - poller_postgres_secondary: "{{ lookup('ini', 'gateway_postgres_secondary', file='ini/config-{{ env }}.ini') }}" - poller_postgres_port: "{{ lookup('ini', 'gateway_postgres_port', file='ini/config-{{ env }}.ini', default='5432') }}" - poller_eth_rpc_url: "{{ lookup('ini', 'gateway_eth_rpc_url', file='ini/config-{{ env }}.ini') }}" - poller_payment_service_address: "{{ lookup('ini', 'gateway_payment_service_address', file='ini/config-{{ env }}.ini') }}" - poller_network: "{{ lookup('ini', 'gateway_network', file='ini/config-{{ env }}.ini') }}" - poller_max_daily_proofs: "{{ lookup('ini', 'gateway_max_daily_proofs', file='ini/config-{{ env }}.ini', default='100') }}" - poller_last_block_fetched_filepath: "{{ lookup('ini', 'poller_last_block_fetched_filepath', file='ini/config-{{ env }}.ini') }}" - last_block_fetched_initial_value: "{{ lookup('ini', 'last_block_fetched_initial_value', file='ini/config-{{ env }}.ini', default='0') }}" - poller_metrics_port: "{{ lookup('ini', 'poller_metrics_port', file='ini/config-{{ env }}.ini', default='9095') }}" + - name: Set config vars from INI file + set_fact: + git_branch: "{{ lookup('ini', 'git_branch', file=config_file) }}" + poller_db_user: "{{ lookup('ini', 'gateway_db_user', file=config_file) }}" + poller_db_password: "{{ lookup('ini', 'gateway_db_password', file=config_file) }}" + poller_db_name: "{{ lookup('ini', 'gateway_db_name', file=config_file) }}" + poller_postgres_primary: "{{ lookup('ini', 'gateway_postgres_primary', file=config_file) }}" + poller_postgres_secondary: "{{ lookup('ini', 'gateway_postgres_secondary', file=config_file) }}" + poller_postgres_port: "{{ lookup('ini', 'gateway_postgres_port', file=config_file, default='5432') }}" + poller_eth_rpc_url: "{{ lookup('ini', 'gateway_eth_rpc_url', file=config_file) }}" + poller_payment_service_address: "{{ lookup('ini', 'gateway_payment_service_address', file=config_file) }}" + poller_network: "{{ lookup('ini', 'gateway_network', file=config_file) }}" + poller_max_daily_proofs: "{{ lookup('ini', 'gateway_max_daily_proofs', file=config_file, default='100') }}" + poller_last_block_fetched_filepath: "{{ lookup('ini', 'poller_last_block_fetched_filepath', file=config_file) }}" + last_block_fetched_initial_value: "{{ lookup('ini', 'last_block_fetched_initial_value', file=config_file, default='0') }}" + poller_metrics_port: "{{ lookup('ini', 'poller_metrics_port', file=config_file, default='9095') }}" + no_log: true - tasks: - name: Install required system packages become: true apt: diff --git a/infra/aggregation_mode/ansible/playbooks/prometheus_agg_mode.yaml b/infra/aggregation_mode/ansible/playbooks/prometheus_agg_mode.yaml index c7f82c747..1375a1318 100644 --- a/infra/aggregation_mode/ansible/playbooks/prometheus_agg_mode.yaml +++ b/infra/aggregation_mode/ansible/playbooks/prometheus_agg_mode.yaml @@ -1,21 +1,25 @@ +- name: Import setup playbook + ansible.builtin.import_playbook: setup.yaml + vars: + host: "{{ host }}" + env: "{{ env }}" + - name: Prometheus Setup for Aggregation Mode hosts: "{{ host }}" tasks: - - name: Import setup playbook - ansible.builtin.import_playbook: setup.yaml - vars: - host: "{{ host }}" + - name: Set config file path + set_fact: + config_file: "ini/config-{{ env }}.ini" -- hosts: "{{ host }}" - vars: - prometheus_version: "{{ lookup('ini', 'prometheus_version', file='ini/config-{{ env }}.ini', default='3.6.0') }}" - gateway_primary_hostname: "{{ lookup('ini', 'gateway_primary_hostname', file='ini/config-{{ env }}.ini') }}" - gateway_secondary_hostname: "{{ lookup('ini', 'gateway_secondary_hostname', file='ini/config-{{ env }}.ini') }}" - gateway_metrics_port: "{{ lookup('ini', 'gateway_metrics_port', file='ini/config-{{ env }}.ini', default='9094') }}" - poller_metrics_port: "{{ lookup('ini', 'poller_metrics_port', file='ini/config-{{ env }}.ini', default='9095') }}" + - name: Set config vars from INI file + set_fact: + prometheus_version: "{{ lookup('ini', 'prometheus_version', file=config_file, default='3.6.0') }}" + gateway_primary_hostname: "{{ lookup('ini', 'gateway_primary_hostname', file=config_file) }}" + gateway_secondary_hostname: "{{ lookup('ini', 'gateway_secondary_hostname', file=config_file) }}" + gateway_metrics_port: "{{ lookup('ini', 'gateway_metrics_port', file=config_file, default='9094') }}" + poller_metrics_port: "{{ lookup('ini', 'poller_metrics_port', file=config_file, default='9095') }}" - tasks: - name: Check if Prometheus is installed stat: path: /home/{{ ansible_user }}/prometheus-{{ prometheus_version }}.linux-amd64/prometheus From a7dfd2da17d6864d2c4381afd07a08082b5cc47d Mon Sep 17 00:00:00 2001 From: JuArce <52429267+JuArce@users.noreply.github.com> Date: Wed, 14 Jan 2026 18:27:05 -0300 Subject: [PATCH 05/16] update config ini --- .../ansible/playbooks/ini/config-mainnet.ini | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/infra/aggregation_mode/ansible/playbooks/ini/config-mainnet.ini b/infra/aggregation_mode/ansible/playbooks/ini/config-mainnet.ini index 17dacb20f..2695699f0 100644 --- a/infra/aggregation_mode/ansible/playbooks/ini/config-mainnet.ini +++ b/infra/aggregation_mode/ansible/playbooks/ini/config-mainnet.ini @@ -34,10 +34,8 @@ backup_dir=/var/lib/backup # ============================================ gateway_network=mainnet gateway_max_daily_proofs=100 -# TODO: Update with mainnet payment service address -gateway_payment_service_address= -# TODO: Update with mainnet RPC URL -gateway_eth_rpc_url= +gateway_payment_service_address=0xc8631Bc1E60c20db40e474F791126212fA8255F4 +gateway_eth_rpc_url=https://aligned-mainnet-rpc-1.tail665ae.ts.net # Database connection (uses same credentials as postgres) gateway_db_user=autoctl_node @@ -81,7 +79,7 @@ gateway_secondary_hostname=agg-mode-mainnet-gateway-2 # Grafana Configuration grafana_prometheus_url=http://localhost:9090 # TODO: Update with mainnet RPC URL -grafana_rpc_url= +grafana_rpc_url=https://aligned-mainnet-rpc-1.tail665ae.ts.net grafana_postgres_host=agg-mode-mainnet-postgres-1 grafana_postgres_port=5432 grafana_postgres_db=agg_mode From 510465ebe994f17fb764739418eba5763f38c859 Mon Sep 17 00:00:00 2001 From: JuArce <52429267+JuArce@users.noreply.github.com> Date: Wed, 14 Jan 2026 20:08:58 -0300 Subject: [PATCH 06/16] fixes --- Makefile | 42 ++++++++++---- infra/aggregation_mode/ansible/README.md | 57 +++++++++++++++---- .../ansible/playbooks/gateway.yaml | 6 ++ .../ansible/playbooks/ini/config-mainnet.ini | 2 +- .../ansible/playbooks/pg_node.yaml | 8 +++ .../ansible/playbooks/poller.yaml | 17 +++++- 6 files changed, 107 insertions(+), 25 deletions(-) diff --git a/Makefile b/Makefile index 9dce05b8b..204017bab 100644 --- a/Makefile +++ b/Makefile @@ -1736,41 +1736,61 @@ postgres_status: ## Check PostgreSQL cluster status. Usage: make postgres_status # ------------------------------------------------------------------------------ .PHONY: gateway_deploy -gateway_deploy: ## Deploy Gateway & Poller on both servers. Usage: make gateway_deploy ENV=hoodi +gateway_deploy: ## Deploy Gateway & Poller on both servers. Usage: make gateway_deploy ENV=hoodi [FORCE_REBUILD=true] @if [ -z "$(ENV)" ]; then \ echo "Error: ENV must be set (hoodi or mainnet)"; \ exit 1; \ fi - @ansible-playbook $(AGG_MODE_PLAYBOOKS_DIR)/gateway_stack.yaml \ + @EXTRA_VARS=""; \ + if [ -n "$(FORCE_REBUILD)" ]; then \ + EXTRA_VARS="-e force_rebuild=true"; \ + fi; \ + ansible-playbook $(AGG_MODE_PLAYBOOKS_DIR)/gateway_stack.yaml \ -i $(AGG_MODE_ANSIBLE_DIR)/$(ENV)-inventory.yaml \ -e "host=gateway_primary" \ - -e "env=$(ENV)" - @ansible-playbook $(AGG_MODE_PLAYBOOKS_DIR)/gateway_stack.yaml \ + -e "env=$(ENV)" \ + $$EXTRA_VARS + @EXTRA_VARS=""; \ + if [ -n "$(FORCE_REBUILD)" ]; then \ + EXTRA_VARS="-e force_rebuild=true"; \ + fi; \ + ansible-playbook $(AGG_MODE_PLAYBOOKS_DIR)/gateway_stack.yaml \ -i $(AGG_MODE_ANSIBLE_DIR)/$(ENV)-inventory.yaml \ -e "host=gateway_secondary" \ - -e "env=$(ENV)" + -e "env=$(ENV)" \ + $$EXTRA_VARS .PHONY: gateway_primary_deploy -gateway_primary_deploy: ## Deploy Gateway & Poller on primary only. Usage: make gateway_primary_deploy ENV=hoodi +gateway_primary_deploy: ## Deploy Gateway & Poller on primary only. Usage: make gateway_primary_deploy ENV=hoodi [FORCE_REBUILD=true] @if [ -z "$(ENV)" ]; then \ echo "Error: ENV must be set (hoodi or mainnet)"; \ exit 1; \ fi - @ansible-playbook $(AGG_MODE_PLAYBOOKS_DIR)/gateway_stack.yaml \ + @EXTRA_VARS=""; \ + if [ -n "$(FORCE_REBUILD)" ]; then \ + EXTRA_VARS="-e force_rebuild=true"; \ + fi; \ + ansible-playbook $(AGG_MODE_PLAYBOOKS_DIR)/gateway_stack.yaml \ -i $(AGG_MODE_ANSIBLE_DIR)/$(ENV)-inventory.yaml \ -e "host=gateway_primary" \ - -e "env=$(ENV)" + -e "env=$(ENV)" \ + $$EXTRA_VARS .PHONY: gateway_secondary_deploy -gateway_secondary_deploy: ## Deploy Gateway & Poller on secondary only. Usage: make gateway_secondary_deploy ENV=hoodi +gateway_secondary_deploy: ## Deploy Gateway & Poller on secondary only. Usage: make gateway_secondary_deploy ENV=hoodi [FORCE_REBUILD=true] @if [ -z "$(ENV)" ]; then \ echo "Error: ENV must be set (hoodi or mainnet)"; \ exit 1; \ fi - @ansible-playbook $(AGG_MODE_PLAYBOOKS_DIR)/gateway_stack.yaml \ + @EXTRA_VARS=""; \ + if [ -n "$(FORCE_REBUILD)" ]; then \ + EXTRA_VARS="-e force_rebuild=true"; \ + fi; \ + ansible-playbook $(AGG_MODE_PLAYBOOKS_DIR)/gateway_stack.yaml \ -i $(AGG_MODE_ANSIBLE_DIR)/$(ENV)-inventory.yaml \ -e "host=gateway_secondary" \ - -e "env=$(ENV)" + -e "env=$(ENV)" \ + $$EXTRA_VARS # ------------------------------------------------------------------------------ # Metrics Deployment diff --git a/infra/aggregation_mode/ansible/README.md b/infra/aggregation_mode/ansible/README.md index 24b04bbac..06913363e 100644 --- a/infra/aggregation_mode/ansible/README.md +++ b/infra/aggregation_mode/ansible/README.md @@ -246,8 +246,13 @@ make gateway_deploy ENV=hoodi # Or deploy individually make gateway_primary_deploy ENV=hoodi make gateway_secondary_deploy ENV=hoodi + +# Force rebuild (always rebuild binaries from latest code) +make gateway_deploy ENV=hoodi FORCE_REBUILD=true ``` +**Note:** By default, the deployment is idempotent and skips building if the binary already exists. Use `FORCE_REBUILD=true` to always rebuild from the latest code in the repository. + **Verify gateway is running:** ```bash ssh app@agg-mode-hoodi-gateway-1 "sudo systemctl status gateway" @@ -539,22 +544,49 @@ ansible-playbook infra/aggregation_mode/ansible/playbooks/gateway.yaml \ -i infra/aggregation_mode/ansible/hoodi-inventory.yaml \ -e "host=gateway_primary" \ -e "env=hoodi" + +# Deploy gateway with forced rebuild +ansible-playbook infra/aggregation_mode/ansible/playbooks/gateway.yaml \ + -i infra/aggregation_mode/ansible/hoodi-inventory.yaml \ + -e "host=gateway_primary" \ + -e "env=hoodi" \ + -e "force_rebuild=true" ``` ### Updating Services -**Update gateway code:** +**Update gateway and poller with latest code:** + +The easiest way to update services is using the `FORCE_REBUILD` parameter: + +```bash +# Update both primary and secondary +make gateway_deploy ENV=hoodi FORCE_REBUILD=true + +# Or update individually +make gateway_primary_deploy ENV=hoodi FORCE_REBUILD=true +make gateway_secondary_deploy ENV=hoodi FORCE_REBUILD=true +``` + +This will: +1. Pull latest code from the configured branch (staging for hoodi, main for mainnet) +2. Delete existing binaries +3. Rebuild gateway and poller from source +4. Restart the services + +**Manual update (alternative):** + +If you prefer to update manually: + ```bash +# Gateway ssh app@agg-mode-hoodi-gateway-1 cd ~/repos/gateway/aligned_layer git pull origin staging cargo install --path aggregation_mode/gateway --bin gateway --features tls --locked sudo systemctl restart gateway -``` -**Update poller code:** -```bash -ssh app@agg-mode-hoodi-gateway-1 +# Poller cd ~/repos/poller/aligned_layer git pull origin staging cargo install --path aggregation_mode/payments_poller --bin payments_poller --locked @@ -563,16 +595,21 @@ systemctl --user restart poller ### Redeploy with Latest Code -To redeploy with the latest code from git, simply run the deployment again: +**Idempotent deployment (skip if binary exists):** ```bash make gateway_deploy ENV=hoodi ``` -The playbooks will: -1. Pull latest code from the configured branch -2. Rebuild the binaries -3. Restart the services +This pulls the latest code but skips building if the binary already exists. Use this when you only want to update configuration files. + +**Force rebuild (always rebuild binaries):** + +```bash +make gateway_deploy ENV=hoodi FORCE_REBUILD=true +``` + +This always rebuilds binaries from the latest code, even if they already exist. Use this when you want to deploy code changes. ### Changing Configuration diff --git a/infra/aggregation_mode/ansible/playbooks/gateway.yaml b/infra/aggregation_mode/ansible/playbooks/gateway.yaml index 775f092a6..f06ec16ea 100644 --- a/infra/aggregation_mode/ansible/playbooks/gateway.yaml +++ b/infra/aggregation_mode/ansible/playbooks/gateway.yaml @@ -85,6 +85,12 @@ version: "{{ git_branch }}" update: yes + - name: Remove existing gateway binary (if force rebuild) + file: + path: /home/{{ ansible_user }}/.cargo/bin/gateway + state: absent + when: force_rebuild | default(false) | bool + - name: Build gateway with TLS shell: | export PATH=$HOME/.cargo/bin:$PATH diff --git a/infra/aggregation_mode/ansible/playbooks/ini/config-mainnet.ini b/infra/aggregation_mode/ansible/playbooks/ini/config-mainnet.ini index 2695699f0..e302da513 100644 --- a/infra/aggregation_mode/ansible/playbooks/ini/config-mainnet.ini +++ b/infra/aggregation_mode/ansible/playbooks/ini/config-mainnet.ini @@ -59,7 +59,7 @@ gateway_tls_key_path=/home/app/.ssl/key.pem # Poller Service Settings (same for all pollers) poller_last_block_fetched_filepath=/home/app/config/proof-aggregator.last_block_fetched.json -last_block_fetched_initial_value=0 +last_block_fetched_initial_value=24235289 # ============================================ # TLS Certificate Management diff --git a/infra/aggregation_mode/ansible/playbooks/pg_node.yaml b/infra/aggregation_mode/ansible/playbooks/pg_node.yaml index 9b394de62..a754a0601 100644 --- a/infra/aggregation_mode/ansible/playbooks/pg_node.yaml +++ b/infra/aggregation_mode/ansible/playbooks/pg_node.yaml @@ -126,6 +126,14 @@ register: is_writable changed_when: false + - name: Set password for autoctl_node user in agg_mode database + become: true + become_user: postgres + shell: | + psql -d {{ db_name }} -c "ALTER USER {{ db_user }} PASSWORD '{{ db_password }}';" + when: is_writable.stdout == 't' + no_log: true + - name: Set password for pgautofailover_replicator user become: true become_user: postgres diff --git a/infra/aggregation_mode/ansible/playbooks/poller.yaml b/infra/aggregation_mode/ansible/playbooks/poller.yaml index 0c9e3da80..ee18f91df 100644 --- a/infra/aggregation_mode/ansible/playbooks/poller.yaml +++ b/infra/aggregation_mode/ansible/playbooks/poller.yaml @@ -55,6 +55,12 @@ version: "{{ git_branch }}" update: yes + - name: Remove existing poller binary (if force rebuild) + file: + path: /home/{{ ansible_user }}/.cargo/bin/payments_poller + state: absent + when: force_rebuild | default(false) | bool + - name: Build poller shell: | export PATH=$HOME/.cargo/bin:$PATH @@ -70,14 +76,19 @@ owner: "{{ ansible_user }}" group: "{{ ansible_user }}" - - name: Create last_block_fetched file + - name: Check if last_block_fetched file exists and get size + stat: + path: "{{ poller_last_block_fetched_filepath }}" + register: last_block_file + + - name: Create or fix last_block_fetched file if empty or missing copy: - content: '{"last_block_fetched":{{ last_block_fetched_initial_value }}}' + content: '{"last_block_fetched": {{ last_block_fetched_initial_value }}}' dest: "{{ poller_last_block_fetched_filepath }}" mode: '0644' owner: "{{ ansible_user }}" group: "{{ ansible_user }}" - force: no + when: not last_block_file.stat.exists or last_block_file.stat.size == 0 - name: Template poller config file template: From 67c8f0fbc050055bd8db2a6f92fce3e01f920dc6 Mon Sep 17 00:00:00 2001 From: JuArce <52429267+JuArce@users.noreply.github.com> Date: Thu, 15 Jan 2026 11:46:36 -0300 Subject: [PATCH 07/16] add read only for grafana --- .../ansible/playbooks/pg_monitor.yaml | 21 ++++++++++++++++++ .../ansible/playbooks/pg_node.yaml | 22 +++++++++++++++++++ 2 files changed, 43 insertions(+) diff --git a/infra/aggregation_mode/ansible/playbooks/pg_monitor.yaml b/infra/aggregation_mode/ansible/playbooks/pg_monitor.yaml index 3a7da6aa7..7fc9af380 100644 --- a/infra/aggregation_mode/ansible/playbooks/pg_monitor.yaml +++ b/infra/aggregation_mode/ansible/playbooks/pg_monitor.yaml @@ -22,6 +22,8 @@ db_password: "{{ lookup('ini', 'db_password', file=config_file) }}" monitor_pgdata: "{{ lookup('ini', 'monitor_pgdata', file=config_file, default='/var/lib/postgresql/monitor') }}" monitor_port: "{{ lookup('ini', 'monitor_port', file=config_file, default='5432') }}" + grafana_postgres_user: "{{ lookup('ini', 'grafana_postgres_user', file=config_file) }}" + grafana_postgres_password: "{{ lookup('ini', 'grafana_postgres_password', file=config_file) }}" no_log: true - name: Debug vars @@ -113,3 +115,22 @@ shell: | psql -d pg_auto_failover -c "ALTER USER autoctl_node PASSWORD '{{ db_password }}';" no_log: true + + - name: Create Grafana read-only user on monitor + become: true + become_user: postgres + shell: | + psql -d pg_auto_failover << 'EOF' + DO $$ + BEGIN + IF NOT EXISTS (SELECT FROM pg_catalog.pg_user WHERE usename = '{{ grafana_postgres_user }}') THEN + CREATE USER {{ grafana_postgres_user }} WITH PASSWORD '{{ grafana_postgres_password }}'; + ELSE + ALTER USER {{ grafana_postgres_user }} WITH PASSWORD '{{ grafana_postgres_password }}'; + END IF; + END + $$; + GRANT CONNECT ON DATABASE pg_auto_failover TO {{ grafana_postgres_user }}; + GRANT pg_read_all_data TO {{ grafana_postgres_user }}; + EOF + no_log: true diff --git a/infra/aggregation_mode/ansible/playbooks/pg_node.yaml b/infra/aggregation_mode/ansible/playbooks/pg_node.yaml index a754a0601..75d85c9b0 100644 --- a/infra/aggregation_mode/ansible/playbooks/pg_node.yaml +++ b/infra/aggregation_mode/ansible/playbooks/pg_node.yaml @@ -22,6 +22,8 @@ node_pgdata: "{{ lookup('ini', 'node_pgdata', file=config_file, default='/var/lib/postgresql/node') }}" node_port: "{{ lookup('ini', 'node_port', file=config_file, default='5432') }}" backup_dir: "{{ lookup('ini', 'backup_dir', file=config_file, default='/var/lib/backup') }}" + grafana_postgres_user: "{{ lookup('ini', 'grafana_postgres_user', file=config_file) }}" + grafana_postgres_password: "{{ lookup('ini', 'grafana_postgres_password', file=config_file) }}" no_log: true - name: Create backup directory @@ -141,3 +143,23 @@ psql -d {{ db_name }} -c "ALTER USER pgautofailover_replicator PASSWORD '{{ db_password }}';" when: is_writable.stdout == 't' no_log: true + + - name: Create Grafana read-only user + become: true + become_user: postgres + shell: | + psql -d {{ db_name }} << 'EOF' + DO $$ + BEGIN + IF NOT EXISTS (SELECT FROM pg_catalog.pg_user WHERE usename = '{{ grafana_postgres_user }}') THEN + CREATE USER {{ grafana_postgres_user }} WITH PASSWORD '{{ grafana_postgres_password }}'; + ELSE + ALTER USER {{ grafana_postgres_user }} WITH PASSWORD '{{ grafana_postgres_password }}'; + END IF; + END + $$; + GRANT CONNECT ON DATABASE {{ db_name }} TO {{ grafana_postgres_user }}; + GRANT pg_read_all_data TO {{ grafana_postgres_user }}; + EOF + when: is_writable.stdout == 't' + no_log: true From 882d9e56b07e4f4dca1ebc6f98c9c066f0abe7ae Mon Sep 17 00:00:00 2001 From: JuArce <52429267+JuArce@users.noreply.github.com> Date: Thu, 15 Jan 2026 12:39:45 -0300 Subject: [PATCH 08/16] update ini and set grafana provisioning --- .../ansible/playbooks/grafana_agg_mode.yaml | 24 +++++++++++++++++++ .../ansible/playbooks/ini/config-hoodi.ini | 3 ++- .../ansible/playbooks/ini/config-mainnet.ini | 3 ++- 3 files changed, 28 insertions(+), 2 deletions(-) diff --git a/infra/aggregation_mode/ansible/playbooks/grafana_agg_mode.yaml b/infra/aggregation_mode/ansible/playbooks/grafana_agg_mode.yaml index d293077ff..a0174f3a4 100644 --- a/infra/aggregation_mode/ansible/playbooks/grafana_agg_mode.yaml +++ b/infra/aggregation_mode/ansible/playbooks/grafana_agg_mode.yaml @@ -14,6 +14,7 @@ - name: Set config vars from INI file set_fact: + grafana_admin_password: "{{ lookup('ini', 'grafana_admin_password', file=config_file) }}" grafana_prometheus_url: "{{ lookup('ini', 'grafana_prometheus_url', file=config_file) }}" grafana_rpc_url: "{{ lookup('ini', 'grafana_rpc_url', file=config_file) }}" grafana_postgres_host: "{{ lookup('ini', 'grafana_postgres_host', file=config_file) }}" @@ -103,6 +104,29 @@ vars: ansible_ssh_user: "{{ admin_user }}" + - name: Set Grafana admin password + become: true + lineinfile: + path: /etc/grafana/grafana.ini + regexp: '^;?admin_password\s*=' + line: 'admin_password = {{ grafana_admin_password }}' + insertafter: '^\[security\]' + vars: + ansible_ssh_user: "{{ admin_user }}" + no_log: true + + - name: Copy provisioning directory + become: true + copy: + src: ../../../../grafana/provisioning/ + dest: /etc/grafana/provisioning/ + owner: grafana + group: grafana + mode: '0644' + directory_mode: '0755' + vars: + ansible_ssh_user: "{{ admin_user }}" + - name: Enable and start Grafana service become: true systemd_service: diff --git a/infra/aggregation_mode/ansible/playbooks/ini/config-hoodi.ini b/infra/aggregation_mode/ansible/playbooks/ini/config-hoodi.ini index aca8d8743..91279b061 100644 --- a/infra/aggregation_mode/ansible/playbooks/ini/config-hoodi.ini +++ b/infra/aggregation_mode/ansible/playbooks/ini/config-hoodi.ini @@ -77,11 +77,12 @@ gateway_primary_hostname=agg-mode-hoodi-gateway-1 gateway_secondary_hostname=agg-mode-hoodi-gateway-2 # Grafana Configuration +grafana_admin_password= grafana_prometheus_url=http://localhost:9090 grafana_rpc_url=https://aligned-hoodi-rpc-geth.tail665ae.ts.net grafana_postgres_host=agg-mode-hoodi-postgres-1 grafana_postgres_port=5432 grafana_postgres_db=agg_mode -grafana_postgres_user=autoctl_node +grafana_postgres_user=grafana # REQUIRED: Set to same password as db_password grafana_postgres_password= diff --git a/infra/aggregation_mode/ansible/playbooks/ini/config-mainnet.ini b/infra/aggregation_mode/ansible/playbooks/ini/config-mainnet.ini index e302da513..e65143f79 100644 --- a/infra/aggregation_mode/ansible/playbooks/ini/config-mainnet.ini +++ b/infra/aggregation_mode/ansible/playbooks/ini/config-mainnet.ini @@ -77,12 +77,13 @@ gateway_primary_hostname=agg-mode-mainnet-gateway-1 gateway_secondary_hostname=agg-mode-mainnet-gateway-2 # Grafana Configuration +grafana_admin_password= grafana_prometheus_url=http://localhost:9090 # TODO: Update with mainnet RPC URL grafana_rpc_url=https://aligned-mainnet-rpc-1.tail665ae.ts.net grafana_postgres_host=agg-mode-mainnet-postgres-1 grafana_postgres_port=5432 grafana_postgres_db=agg_mode -grafana_postgres_user=autoctl_node +grafana_postgres_user=grafana # REQUIRED: Set to same password as db_password grafana_postgres_password= From cbc34bf4809fdaf48ddcbd87867d9c23c8c57784 Mon Sep 17 00:00:00 2001 From: JuArce <52429267+JuArce@users.noreply.github.com> Date: Thu, 15 Jan 2026 16:59:09 -0300 Subject: [PATCH 09/16] task sender ansible --- Makefile | 46 ++++ infra/aggregation_mode/ansible/README.md | 194 +++++++++++++++- .../ansible/hoodi-inventory.yaml | 10 + .../ansible/mainnet-inventory.yaml | 10 + .../ansible/playbooks/deploy_all.yaml | 6 + .../ansible/playbooks/ini/config-hoodi.ini | 10 + .../ansible/playbooks/ini/config-mainnet.ini | 10 + .../ansible/playbooks/task_sender.yaml | 211 ++++++++++++++++++ 8 files changed, 493 insertions(+), 4 deletions(-) create mode 100644 infra/aggregation_mode/ansible/playbooks/task_sender.yaml diff --git a/Makefile b/Makefile index 204017bab..e915de8ab 100644 --- a/Makefile +++ b/Makefile @@ -1829,6 +1829,52 @@ grafana_deploy: ## Deploy Grafana only. Usage: make grafana_deploy ENV=hoodi -e "host=metrics" \ -e "env=$(ENV)" +# ------------------------------------------------------------------------------ +# Task Sender Deployment +# ------------------------------------------------------------------------------ + +.PHONY: task_sender_deploy +task_sender_deploy: ## Deploy task sender. Usage: make task_sender_deploy ENV=hoodi + @if [ -z "$(ENV)" ]; then \ + echo "Error: ENV must be set (hoodi or mainnet)"; \ + exit 1; \ + fi + @ansible-playbook $(AGG_MODE_PLAYBOOKS_DIR)/task_sender.yaml \ + -i $(AGG_MODE_ANSIBLE_DIR)/$(ENV)-inventory.yaml \ + -e "host=task_sender" \ + -e "env=$(ENV)" + +.PHONY: task_sender_status +task_sender_status: ## Check task sender status. Usage: make task_sender_status ENV=hoodi + @if [ -z "$(ENV)" ]; then \ + echo "Error: ENV must be set (hoodi or mainnet)"; \ + exit 1; \ + fi + @echo "Checking task sender tmux session..." + @ansible -i $(AGG_MODE_ANSIBLE_DIR)/$(ENV)-inventory.yaml task_sender \ + -m shell \ + -a "tmux has-session -t task_sender && echo 'Task sender is running' || echo 'Task sender is not running'" + +.PHONY: task_sender_logs +task_sender_logs: ## View task sender logs. Usage: make task_sender_logs ENV=hoodi + @if [ -z "$(ENV)" ]; then \ + echo "Error: ENV must be set (hoodi or mainnet)"; \ + exit 1; \ + fi + @echo "Use: ssh app@agg-mode-$(ENV)-task-sender 'tmux attach -t task_sender'" + @echo "Or: ssh app@agg-mode-$(ENV)-task-sender 'tmux capture-pane -t task_sender -p'" + +.PHONY: task_sender_restart +task_sender_restart: ## Restart task sender. Usage: make task_sender_restart ENV=hoodi + @if [ -z "$(ENV)" ]; then \ + echo "Error: ENV must be set (hoodi or mainnet)"; \ + exit 1; \ + fi + @ansible-playbook $(AGG_MODE_PLAYBOOKS_DIR)/task_sender.yaml \ + -i $(AGG_MODE_ANSIBLE_DIR)/$(ENV)-inventory.yaml \ + -e "host=task_sender" \ + -e "env=$(ENV)" + # ------------------------------------------------------------------------------ # Full Deployment # ------------------------------------------------------------------------------ diff --git a/infra/aggregation_mode/ansible/README.md b/infra/aggregation_mode/ansible/README.md index 06913363e..bcd921c50 100644 --- a/infra/aggregation_mode/ansible/README.md +++ b/infra/aggregation_mode/ansible/README.md @@ -37,6 +37,11 @@ The Ansible automation deploys a complete aggregation mode stack consisting of: - Grafana for visualization - 90-day retention +5. **Task Sender** (1 server) + - Automated proof submission service + - Runs continuously in tmux session + - Configurable interval and proof files + ## Architecture ``` @@ -65,6 +70,11 @@ The Ansible automation deploys a complete aggregation mode stack consisting of: │ │ └─ Grafana (3000) │ │ │ └────────────────────────┘ │ │ │ +│ ┌────────────────────────┐ │ +│ │ Task Sender │ │ +│ │ (tmux session) │ │ +│ └────────────────────────┘ │ +│ │ └─────────────────────────────────────────────────────────────────┘ ``` @@ -120,6 +130,9 @@ tls_key_source_path=/path/to/your/key.pem # REQUIRED: Same password for Grafana Postgres datasource grafana_postgres_password=your_secure_password_here + +# REQUIRED: Private key for task sender (sends proofs to network) +task_sender_private_key=0xYourPrivateKeyHere ``` **⚠️ CRITICAL**: All three password fields must be set to the same value before deploying! @@ -143,6 +156,9 @@ grafana_postgres_password=your_secure_password_here tls_cert_source_path=/path/to/your/cert.pem tls_key_source_path=/path/to/your/key.pem +# REQUIRED: Private key for task sender +task_sender_private_key=0xYourPrivateKeyHere + # TODO: Update these for mainnet deployment gateway_payment_service_address=0xYourMainnetPaymentServiceAddress gateway_eth_rpc_url=https://your-mainnet-rpc-url @@ -181,6 +197,13 @@ tls_key_source_path= # ← FILL THIS IN # Metrics Configuration grafana_postgres_password= # ← FILL THIS IN (same as db_password) # ... other metrics settings ... + +# Task Sender Configuration +task_sender_interval_hours=1 +task_sender_proof_path=scripts/test_files/sp1/sp1_fibonacci_5_0_0.proof +task_sender_vk_path=scripts/test_files/sp1/sp1_fibonacci_5_0_0_vk.bin +task_sender_private_key= # ← FILL THIS IN +task_sender_network=hoodi ``` The Ansible templates will automatically generate two separate database connection URLs for failover: @@ -204,6 +227,7 @@ This will: 2. Run database migrations 3. Deploy gateway and poller on both servers 4. Deploy Prometheus and Grafana +5. Deploy task sender ### Step-by-Step Deployment @@ -279,6 +303,49 @@ make grafana_deploy ENV=hoodi - Prometheus: `http://:9090` - Grafana: `http://:3000` (default credentials: admin/admin) +#### 4. Deploy Task Sender + +```bash +# Deploy task sender +make task_sender_deploy ENV=hoodi +``` + +The task sender runs in a tmux session and continuously sends proofs to the network at the configured interval (default: 1 hour). + +**Automatic Deposit Check:** + +The deployment automatically: +1. Derives the wallet address from the configured private key +2. Checks if the address has an active subscription on the payment contract +3. If not subscribed or expired, automatically deposits 0.0035 ETH to the payment contract +4. Waits for transaction confirmation before starting the task sender + +**Requirements:** +- The account must have sufficient ETH for: + - Payment deposit: **0.0035 ETH** + - Gas fees: ~**0.001 ETH** (estimated) +- Foundry (cast) will be automatically installed if not present + +**Verify task sender is running:** +```bash +make task_sender_status ENV=hoodi +``` + +**View task sender logs:** +```bash +# Show how to view logs +make task_sender_logs ENV=hoodi + +# Or directly attach to the tmux session +ssh app@agg-mode-hoodi-sender 'tmux attach -t task_sender' +# Press Ctrl+B then D to detach without stopping +``` + +**Restart task sender:** +```bash +make task_sender_restart ENV=hoodi +``` + ## Service Management ### Restart Services @@ -295,6 +362,11 @@ make poller_restart ENV=hoodi HOST=gateway_primary make poller_restart ENV=hoodi HOST=gateway_secondary ``` +**Task Sender:** +```bash +make task_sender_restart ENV=hoodi +``` + ### Check Service Status **PostgreSQL Cluster:** @@ -324,6 +396,13 @@ ssh admin@agg-mode-hoodi-metrics "systemctl --user status prometheus" ssh admin@agg-mode-hoodi-metrics "sudo systemctl status grafana-server" ``` +**Task Sender:** +```bash +make task_sender_status ENV=hoodi +# Or check tmux session directly +ssh app@agg-mode-hoodi-sender "tmux has-session -t task_sender && echo 'Running' || echo 'Not running'" +``` + ### View Logs **Gateway:** @@ -341,6 +420,16 @@ ssh app@agg-mode-hoodi-gateway-1 "journalctl --user -u poller -f" ssh admin@agg-mode-hoodi-postgres-1 "sudo journalctl -u pgautofailover -f" ``` +**Task Sender:** +```bash +# Attach to tmux session to view live logs +ssh app@agg-mode-hoodi-sender 'tmux attach -t task_sender' +# Press Ctrl+B then D to detach + +# Or capture current pane output +ssh app@agg-mode-hoodi-sender 'tmux capture-pane -t task_sender -p' +``` + ## Verification ### PostgreSQL Cluster Health @@ -416,6 +505,23 @@ ssh admin@agg-mode-hoodi-postgres-1 "sudo journalctl -u pgautofailover -f" - Go to Configuration → Data Sources - Verify Prometheus and PostgreSQL datasources are connected +### Task Sender + +1. **Check tmux session is running:** + ```bash + make task_sender_status ENV=hoodi + ``` + +2. **View recent logs:** + ```bash + ssh app@agg-mode-hoodi-sender 'tmux capture-pane -t task_sender -p' + ``` + +3. **Verify proof submissions:** + - Check logs for successful proof submissions + - Look for transaction hashes in the output + - Verify proofs are appearing on the network + ## Troubleshooting ### PostgreSQL Issues @@ -508,6 +614,83 @@ Check Prometheus config: ssh admin@agg-mode-hoodi-metrics "cat ~/config/prometheus.yaml" ``` +### Task Sender Issues + +**Problem: Task sender not running** + +Check if tmux session exists: +```bash +ssh app@agg-mode-hoodi-sender "tmux list-sessions" +``` + +If missing, redeploy: +```bash +make task_sender_deploy ENV=hoodi +``` + +**Problem: Task sender crashes or exits** + +Check logs for errors: +```bash +ssh app@agg-mode-hoodi-sender 'tmux capture-pane -t task_sender -p -S -100' +``` + +Common issues: +- Invalid private key → Check `task_sender_private_key` in `config-{{ env }}.ini` +- Missing proof/vk files → Verify files exist: `task_sender_proof_path`, `task_sender_vk_path` +- Network connectivity → Test RPC: `curl https://aligned-hoodi-rpc-geth.tail665ae.ts.net` +- Insufficient balance → Check account has ETH for gas fees + +**Problem: Proofs not being submitted** + +Check interval configuration: +```bash +ssh app@agg-mode-hoodi-sender "cat ~/repos/sender/aligned_layer/scripts/.agg_mode.task_sender.env" +``` + +Verify `INTERVAL_HOURS` is set correctly (default: 1 hour). Attach to session to see live activity: +```bash +ssh app@agg-mode-hoodi-sender 'tmux attach -t task_sender' +``` + +**Problem: Deployment fails with insufficient balance** + +The automatic deposit check requires the account to have at least **0.0045 ETH** (0.0035 for deposit + ~0.001 for gas). + +Check account balance: +```bash +ssh app@agg-mode-hoodi-sender +export PATH=$HOME/.foundry/bin:$PATH +cast balance --rpc-url +``` + +If balance is insufficient, send ETH to the account and redeploy: +```bash +make task_sender_deploy ENV=hoodi +``` + +**Problem: Automatic deposit fails** + +If the automatic deposit fails during deployment, check the Ansible output for error messages. Common issues: +- Insufficient ETH balance in the account +- RPC connection issues +- Gas price too high + +To manually deposit after fixing the issue: +```bash +ssh app@agg-mode-hoodi-sender +export PATH=$HOME/.cargo/bin:$PATH +agg_mode_cli deposit \ + --network hoodi \ + --rpc-url https://aligned-hoodi-rpc-geth.tail665ae.ts.net \ + --private-key +``` + +Then restart the task sender: +```bash +make task_sender_restart ENV=hoodi +``` + ### General Debugging **Check Tailscale connectivity:** @@ -663,6 +846,7 @@ infra/aggregation_mode/ansible/ ├── poller.yaml # Poller deployment ├── prometheus_agg_mode.yaml # Prometheus deployment ├── grafana_agg_mode.yaml # Grafana deployment + ├── task_sender.yaml # Task sender deployment ├── postgres_cluster.yaml # Postgres orchestration ├── gateway_stack.yaml # Gateway + poller orchestration ├── metrics_stack.yaml # Metrics orchestration @@ -673,13 +857,15 @@ infra/aggregation_mode/ansible/ 1. **Passwords**: Config files are tracked in git with empty password fields. Fill in passwords locally. Use `git update-index --assume-unchanged config-*.ini` after filling passwords to prevent accidentally committing them. -2. **TLS Certificates**: Keep private keys secure. The playbooks set appropriate permissions (0600). +2. **Private Keys**: The `task_sender_private_key` field must be filled with a valid Ethereum private key. Never commit this value to git. The playbook sets appropriate permissions (0600) on the environment file. + +3. **TLS Certificates**: Keep private keys secure. The playbooks set appropriate permissions (0600). -3. **SSH Access**: All servers are only accessible via Tailscale VPN (100.64.0.0/10). +4. **SSH Access**: All servers are only accessible via Tailscale VPN (100.64.0.0/10). -4. **PostgreSQL**: Uses scram-sha-256 password authentication, not trust mode. +5. **PostgreSQL**: Uses scram-sha-256 password authentication, not trust mode. -5. **Firewall**: UFW is configured on all servers with deny-by-default policy. +6. **Firewall**: UFW is configured on all servers with deny-by-default policy. ## Support diff --git a/infra/aggregation_mode/ansible/hoodi-inventory.yaml b/infra/aggregation_mode/ansible/hoodi-inventory.yaml index 4e2f1c91b..f08c3dd5d 100644 --- a/infra/aggregation_mode/ansible/hoodi-inventory.yaml +++ b/infra/aggregation_mode/ansible/hoodi-inventory.yaml @@ -65,9 +65,19 @@ metrics: ansible_user: admin ansible_python_interpreter: /usr/bin/python3 +# Task Sender +task_sender: + hosts: + agg-mode-hoodi-sender: + ansible_host: agg-mode-hoodi-sender + admin_user: admin + ansible_user: app + ansible_python_interpreter: /usr/bin/python3 + # All aggregation mode servers aggregation_mode: children: postgres_cluster: gateway_cluster: metrics: + task_sender: diff --git a/infra/aggregation_mode/ansible/mainnet-inventory.yaml b/infra/aggregation_mode/ansible/mainnet-inventory.yaml index 489cc9d8d..48ca062c8 100644 --- a/infra/aggregation_mode/ansible/mainnet-inventory.yaml +++ b/infra/aggregation_mode/ansible/mainnet-inventory.yaml @@ -65,9 +65,19 @@ metrics: ansible_user: admin ansible_python_interpreter: /usr/bin/python3 +# Task Sender +task_sender: + hosts: + agg-mode-mainnet-sender: + ansible_host: agg-mode-mainnet-sender + admin_user: admin + ansible_user: app + ansible_python_interpreter: /usr/bin/python3 + # All aggregation mode servers aggregation_mode: children: postgres_cluster: gateway_cluster: metrics: + task_sender: diff --git a/infra/aggregation_mode/ansible/playbooks/deploy_all.yaml b/infra/aggregation_mode/ansible/playbooks/deploy_all.yaml index d708c6a79..51e23a8a9 100644 --- a/infra/aggregation_mode/ansible/playbooks/deploy_all.yaml +++ b/infra/aggregation_mode/ansible/playbooks/deploy_all.yaml @@ -20,3 +20,9 @@ vars: host: metrics env: "{{ env }}" + +- name: Deploy Task Sender + ansible.builtin.import_playbook: task_sender.yaml + vars: + host: task_sender + env: "{{ env }}" diff --git a/infra/aggregation_mode/ansible/playbooks/ini/config-hoodi.ini b/infra/aggregation_mode/ansible/playbooks/ini/config-hoodi.ini index 91279b061..d7d07726e 100644 --- a/infra/aggregation_mode/ansible/playbooks/ini/config-hoodi.ini +++ b/infra/aggregation_mode/ansible/playbooks/ini/config-hoodi.ini @@ -86,3 +86,13 @@ grafana_postgres_db=agg_mode grafana_postgres_user=grafana # REQUIRED: Set to same password as db_password grafana_postgres_password= + +# ============================================ +# Task Sender Configuration +# ============================================ +task_sender_interval_hours=1 +task_sender_proof_path=scripts/test_files/sp1/sp1_fibonacci_5_0_0.proof +task_sender_vk_path=scripts/test_files/sp1/sp1_fibonacci_5_0_0_vk.bin +# REQUIRED: Set private key for sending proofs +task_sender_private_key= +task_sender_network=hoodi diff --git a/infra/aggregation_mode/ansible/playbooks/ini/config-mainnet.ini b/infra/aggregation_mode/ansible/playbooks/ini/config-mainnet.ini index e65143f79..99c0c3bd3 100644 --- a/infra/aggregation_mode/ansible/playbooks/ini/config-mainnet.ini +++ b/infra/aggregation_mode/ansible/playbooks/ini/config-mainnet.ini @@ -87,3 +87,13 @@ grafana_postgres_db=agg_mode grafana_postgres_user=grafana # REQUIRED: Set to same password as db_password grafana_postgres_password= + +# ============================================ +# Task Sender Configuration +# ============================================ +task_sender_interval_hours=1 +task_sender_proof_path=scripts/test_files/sp1/sp1_fibonacci_5_0_0.proof +task_sender_vk_path=scripts/test_files/sp1/sp1_fibonacci_5_0_0_vk.bin +# REQUIRED: Set private key for sending proofs +task_sender_private_key= +task_sender_network=mainnet diff --git a/infra/aggregation_mode/ansible/playbooks/task_sender.yaml b/infra/aggregation_mode/ansible/playbooks/task_sender.yaml new file mode 100644 index 000000000..f7fa523d0 --- /dev/null +++ b/infra/aggregation_mode/ansible/playbooks/task_sender.yaml @@ -0,0 +1,211 @@ +- name: Import setup playbook + ansible.builtin.import_playbook: setup.yaml + vars: + host: "{{ host }}" + env: "{{ env }}" + +- name: Import rust playbook + ansible.builtin.import_playbook: rust.yaml + vars: + host: "{{ host }}" + env: "{{ env }}" + +- name: Task Sender Setup + hosts: "{{ host }}" + + tasks: + - name: Set config file path + set_fact: + config_file: "ini/config-{{ env }}.ini" + + - name: Set config vars from INI file + set_fact: + git_branch: "{{ lookup('ini', 'git_branch', file=config_file) }}" + task_sender_interval_hours: "{{ lookup('ini', 'task_sender_interval_hours', file=config_file, default='1') }}" + task_sender_proof_path: "{{ lookup('ini', 'task_sender_proof_path', file=config_file) }}" + task_sender_vk_path: "{{ lookup('ini', 'task_sender_vk_path', file=config_file) }}" + task_sender_private_key: "{{ lookup('ini', 'task_sender_private_key', file=config_file) }}" + task_sender_network: "{{ lookup('ini', 'task_sender_network', file=config_file) }}" + gateway_payment_service_address: "{{ lookup('ini', 'gateway_payment_service_address', file=config_file) }}" + gateway_eth_rpc_url: "{{ lookup('ini', 'gateway_eth_rpc_url', file=config_file) }}" + no_log: true + + - name: Install tmux + become: true + apt: + pkg: + - tmux + state: latest + update_cache: true + vars: + ansible_ssh_user: "{{ admin_user }}" + + - name: Create sender directory + file: + path: /home/{{ ansible_user }}/repos/sender + state: directory + mode: '0755' + owner: "{{ ansible_user }}" + group: "{{ ansible_user }}" + + - name: Clone aligned_layer repository + git: + repo: https://github.com/yetanotherco/aligned_layer.git + dest: /home/{{ ansible_user }}/repos/sender/aligned_layer + version: "{{ git_branch }}" + update: yes + force: yes + + - name: Build CLI + shell: | + export PATH=$HOME/.cargo/bin:$PATH + cd /home/{{ ansible_user }}/repos/sender/aligned_layer + make agg_mode_install_cli + args: + creates: /home/{{ ansible_user }}/.cargo/bin/agg_mode_cli + + - name: Check if foundry (cast) is installed + shell: command -v cast + register: cast_installed + failed_when: false + changed_when: false + + - name: Install foundry (for cast tool) + shell: | + curl -L https://foundry.paradigm.xyz | bash + export PATH=$HOME/.foundry/bin:$PATH + foundryup + when: cast_installed.rc != 0 + + - name: Derive wallet address from private key + shell: | + export PATH=$HOME/.foundry/bin:$PATH + cast wallet address {{ task_sender_private_key }} + register: wallet_address_output + no_log: true + changed_when: false + + - name: Set wallet address variable + set_fact: + wallet_address: "{{ wallet_address_output.stdout | trim }}" + no_log: true + + - name: Check subscription status on payment contract + shell: | + export PATH=$HOME/.foundry/bin:$PATH + cast call {{ gateway_payment_service_address }} \ + "subscribedAddresses(address)(uint256)" \ + {{ wallet_address }} \ + --rpc-url {{ gateway_eth_rpc_url }} + register: subscription_expiration_output + changed_when: false + + - name: Get current timestamp + shell: date +%s + register: current_timestamp + changed_when: false + + - name: Set subscription status variables + set_fact: + subscription_expiration: "{{ subscription_expiration_output.stdout | regex_replace('\\s*\\[.*\\]', '') | trim }}" + current_time: "{{ current_timestamp.stdout | trim }}" + + - name: Display subscription status + debug: + msg: | + Wallet address: {{ wallet_address }} + Subscription expiration timestamp: {{ subscription_expiration }} + Current timestamp: {{ current_time }} + Is subscribed: {{ (subscription_expiration | int) > (current_time | int) }} + + - name: Deposit to payment contract if not subscribed + shell: | + export PATH=$HOME/.cargo/bin:$PATH + agg_mode_cli deposit \ + --network {{ task_sender_network }} \ + --rpc-url {{ gateway_eth_rpc_url }} \ + --private-key {{ task_sender_private_key }} + when: (subscription_expiration | int) <= (current_time | int) + no_log: true + register: deposit_result + + - name: Display deposit result + debug: + msg: "Deposit successful. Transaction receipt: {{ deposit_result.stdout }}" + when: (subscription_expiration | int) <= (current_time | int) and deposit_result is defined + + - name: Wait for deposit transaction to confirm + pause: + seconds: 10 + when: (subscription_expiration | int) <= (current_time | int) + + - name: Create scripts directory + file: + path: /home/{{ ansible_user }}/repos/sender/aligned_layer/scripts + state: directory + mode: '0755' + owner: "{{ ansible_user }}" + group: "{{ ansible_user }}" + + - name: Template task sender env file + template: + src: config-files/agg_mode.task_sender.env.j2 + dest: /home/{{ ansible_user }}/repos/sender/aligned_layer/scripts/.agg_mode.task_sender.env + mode: '0600' + owner: "{{ ansible_user }}" + group: "{{ ansible_user }}" + no_log: true + + - name: Check if tmux session exists + shell: tmux has-session -t task_sender 2>/dev/null + register: tmux_session_exists + failed_when: false + changed_when: false + + - name: Kill existing tmux session + shell: tmux kill-session -t task_sender + when: tmux_session_exists.rc == 0 + + - name: Start task sender in tmux session + shell: | + cd /home/{{ ansible_user }}/repos/sender/aligned_layer + tmux new-session -d -s task_sender 'bash -c "export PATH=$HOME/.cargo/bin:$PATH && make agg_mode_task_sender_start 2>&1 | tee /tmp/task_sender.log; exec bash"' + register: tmux_start + changed_when: true + + - name: Wait for task sender to initialize + pause: + seconds: 5 + + - name: Check if tmux session is still running + shell: tmux has-session -t task_sender 2>/dev/null + register: verify_tmux + failed_when: false + changed_when: false + + - name: Capture tmux pane content if session exists + shell: tmux capture-pane -t task_sender -p + register: tmux_output + when: verify_tmux.rc == 0 + failed_when: false + + - name: Display tmux output + debug: + msg: "Tmux session output: {{ tmux_output.stdout }}" + when: verify_tmux.rc == 0 and tmux_output is defined + + - name: Check error log if session failed + shell: cat /tmp/task_sender.log 2>/dev/null || echo "No log file found" + register: error_log + when: verify_tmux.rc != 0 + failed_when: false + + - name: Display error log if session failed + debug: + msg: "Task sender failed. Log: {{ error_log.stdout }}" + when: verify_tmux.rc != 0 + + - name: Fail if tmux session is not running + fail: + msg: "Task sender tmux session failed to start. Check the output above for details." + when: verify_tmux.rc != 0 From 315f24f4831b0346821de314a0dcd2abd6bd8b80 Mon Sep 17 00:00:00 2001 From: JuArce <52429267+JuArce@users.noreply.github.com> Date: Thu, 15 Jan 2026 18:14:46 -0300 Subject: [PATCH 10/16] missing template --- .../templates/config-files/agg_mode.task_sender.env.j2 | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 infra/aggregation_mode/ansible/playbooks/templates/config-files/agg_mode.task_sender.env.j2 diff --git a/infra/aggregation_mode/ansible/playbooks/templates/config-files/agg_mode.task_sender.env.j2 b/infra/aggregation_mode/ansible/playbooks/templates/config-files/agg_mode.task_sender.env.j2 new file mode 100644 index 000000000..2ceab4fe0 --- /dev/null +++ b/infra/aggregation_mode/ansible/playbooks/templates/config-files/agg_mode.task_sender.env.j2 @@ -0,0 +1,5 @@ +INTERVAL_HOURS={{ task_sender_interval_hours }} +PROOF_PATH={{ task_sender_proof_path }} +VK_PATH={{ task_sender_vk_path }} +PRIVATE_KEY={{ task_sender_private_key }} +NETWORK={{ task_sender_network }} From 40889c396660701c79b63ef7d8a6dab093e199ec Mon Sep 17 00:00:00 2001 From: JuArce <52429267+JuArce@users.noreply.github.com> Date: Fri, 16 Jan 2026 11:57:55 -0300 Subject: [PATCH 11/16] improve variables and README.md --- infra/aggregation_mode/ansible/README.md | 121 +++--------------- .../ansible/playbooks/gateway.yaml | 6 +- .../ansible/playbooks/ini/config-hoodi.ini | 38 +++--- .../ansible/playbooks/ini/config-mainnet.ini | 39 +++--- .../ansible/playbooks/poller.yaml | 6 +- 5 files changed, 61 insertions(+), 149 deletions(-) diff --git a/infra/aggregation_mode/ansible/README.md b/infra/aggregation_mode/ansible/README.md index bcd921c50..76005c6ee 100644 --- a/infra/aggregation_mode/ansible/README.md +++ b/infra/aggregation_mode/ansible/README.md @@ -106,112 +106,37 @@ All servers are provisioned via Terraform and connected via Tailscale VPN. They ## Initial Setup -All configuration is consolidated into environment-specific files with predefined values. You only need to fill in sensitive values (passwords, certificate paths). +All configuration is consolidated into environment-specific files with predefined values. You only need to fill in sensitive values at the top of each config file. -### 1. Configure Hoodi Environment +### Configure Environment -Edit `playbooks/ini/config-hoodi.ini`: +Edit the config file for your environment: +- **Hoodi**: `playbooks/ini/config-hoodi.ini` +- **Mainnet**: `playbooks/ini/config-mainnet.ini` -All non-sensitive values are already pre-filled. You only need to set: +All non-sensitive values are already pre-filled. Fill in the required values at the top of the file: ```ini -[DEFAULT] -# ... (all values pre-filled) ... - -# REQUIRED: Set a strong password before deploying +# ============================================ +# REQUIRED: Sensitive Values (fill these in) +# ============================================ +# Database password (used by postgres, gateway, and poller) db_password=your_secure_password_here -# REQUIRED: Same password for gateway/poller database access -gateway_db_password=your_secure_password_here - -# REQUIRED: Provide local paths to your TLS certificate files -tls_cert_source_path=/path/to/your/cert.pem -tls_key_source_path=/path/to/your/key.pem - -# REQUIRED: Same password for Grafana Postgres datasource +# Grafana read-only database user password grafana_postgres_password=your_secure_password_here -# REQUIRED: Private key for task sender (sends proofs to network) -task_sender_private_key=0xYourPrivateKeyHere -``` - -**⚠️ CRITICAL**: All three password fields must be set to the same value before deploying! - -### 2. Configure Mainnet Environment (if needed) - -Edit `playbooks/ini/config-mainnet.ini`: - -Similar to Hoodi, fill in the required values: - -```ini -[DEFAULT] -# ... (most values pre-filled) ... - -# REQUIRED: Set passwords (same as above) -db_password=your_secure_password_here -gateway_db_password=your_secure_password_here -grafana_postgres_password=your_secure_password_here - -# REQUIRED: TLS certificate paths +# TLS certificates (local paths to copy from) tls_cert_source_path=/path/to/your/cert.pem tls_key_source_path=/path/to/your/key.pem -# REQUIRED: Private key for task sender -task_sender_private_key=0xYourPrivateKeyHere +# Grafana admin password +grafana_admin_password=your_grafana_admin_password -# TODO: Update these for mainnet deployment -gateway_payment_service_address=0xYourMainnetPaymentServiceAddress -gateway_eth_rpc_url=https://your-mainnet-rpc-url -grafana_rpc_url=https://your-mainnet-rpc-url +# Task sender private key (for sending proofs) +task_sender_private_key=0xYourPrivateKeyHere ``` -### Configuration File Structure - -The consolidated config files contain all settings organized by component: - -```ini -# config-hoodi.ini structure: -[DEFAULT] -environment=hoodi -git_branch=staging - -# PostgreSQL Configuration -postgres_monitor_hostname=agg-mode-hoodi-postgres-monitor -postgres_primary_hostname=agg-mode-hoodi-postgres-1 -postgres_secondary_hostname=agg-mode-hoodi-postgres-2 -db_name=agg_mode -db_user=autoctl_node -db_password= # ← FILL THIS IN - -# Gateway & Poller Configuration -gateway_network=Hoodi -gateway_payment_service_address=0x7222E0183cE1A96619d0c883e9bfc6b76D4e780e -gateway_eth_rpc_url=https://aligned-hoodi-rpc-geth.tail665ae.ts.net -gateway_db_password= # ← FILL THIS IN (same as db_password) -# ... other gateway settings ... - -# TLS Certificate Management -tls_cert_source_path= # ← FILL THIS IN -tls_key_source_path= # ← FILL THIS IN - -# Metrics Configuration -grafana_postgres_password= # ← FILL THIS IN (same as db_password) -# ... other metrics settings ... - -# Task Sender Configuration -task_sender_interval_hours=1 -task_sender_proof_path=scripts/test_files/sp1/sp1_fibonacci_5_0_0.proof -task_sender_vk_path=scripts/test_files/sp1/sp1_fibonacci_5_0_0_vk.bin -task_sender_private_key= # ← FILL THIS IN -task_sender_network=hoodi -``` - -The Ansible templates will automatically generate two separate database connection URLs for failover: -- `postgres://autoctl_node:password@agg-mode-hoodi-postgres-1:5432/agg_mode` -- `postgres://autoctl_node:password@agg-mode-hoodi-postgres-2:5432/agg_mode` - -The sqlx driver will try them in order for automatic failover - ## Deployment ### Full Stack Deployment @@ -540,10 +465,7 @@ ssh admin@agg-mode-hoodi-postgres-1 "sudo journalctl -u pgautofailover -n 100" **Problem: Password authentication fails** -Verify password is set correctly in your environment config file (`config-hoodi.ini` or `config-mainnet.ini`). All three password fields must match: -- `db_password` -- `gateway_db_password` -- `grafana_postgres_password` +Verify `db_password` is set correctly in your environment config file (`config-hoodi.ini` or `config-mainnet.ini`). Check pg_hba.conf: ```bash @@ -566,7 +488,7 @@ ssh app@agg-mode-hoodi-gateway-1 "sudo journalctl -u gateway -n 100" Common issues: - Missing TLS certificates → Check paths in `config-{{ env }}.ini` (tls_cert_source_path, tls_key_source_path) -- Database connection failed → Verify password in `config-{{ env }}.ini` (gateway_db_password) +- Database connection failed → Verify `db_password` in `config-{{ env }}.ini` - Port 443 already in use → Check with `sudo lsof -i :443` **Problem: TLS certificate errors** @@ -806,10 +728,9 @@ This always rebuilds binaries from the latest code, even if they already exist. ### Rotating Passwords -1. Update all three password fields in your environment config file (`config-hoodi.ini` or `config-mainnet.ini`): - - `db_password` - - `gateway_db_password` - - `grafana_postgres_password` +1. Update password fields in your environment config file (`config-hoodi.ini` or `config-mainnet.ini`): + - `db_password` (used by postgres, gateway, and poller) + - `grafana_postgres_password` (separate read-only user) 2. Run password update on PostgreSQL: ```bash ssh admin@agg-mode-hoodi-postgres-monitor "sudo -u postgres psql -d pg_auto_failover -c \"ALTER USER autoctl_node PASSWORD 'new_password'\"" diff --git a/infra/aggregation_mode/ansible/playbooks/gateway.yaml b/infra/aggregation_mode/ansible/playbooks/gateway.yaml index f06ec16ea..98f73e418 100644 --- a/infra/aggregation_mode/ansible/playbooks/gateway.yaml +++ b/infra/aggregation_mode/ansible/playbooks/gateway.yaml @@ -26,9 +26,9 @@ gateway_tls_port: "{{ lookup('ini', 'gateway_tls_port', file=config_file, default='443') }}" gateway_tls_cert_path: "{{ lookup('ini', 'gateway_tls_cert_path', file=config_file) }}" gateway_tls_key_path: "{{ lookup('ini', 'gateway_tls_key_path', file=config_file) }}" - gateway_db_user: "{{ lookup('ini', 'gateway_db_user', file=config_file) }}" - gateway_db_password: "{{ lookup('ini', 'gateway_db_password', file=config_file) }}" - gateway_db_name: "{{ lookup('ini', 'gateway_db_name', file=config_file) }}" + gateway_db_user: "{{ lookup('ini', 'db_user', file=config_file) }}" + gateway_db_password: "{{ lookup('ini', 'db_password', file=config_file) }}" + gateway_db_name: "{{ lookup('ini', 'db_name', file=config_file) }}" gateway_postgres_primary: "{{ lookup('ini', 'gateway_postgres_primary', file=config_file) }}" gateway_postgres_secondary: "{{ lookup('ini', 'gateway_postgres_secondary', file=config_file) }}" gateway_postgres_port: "{{ lookup('ini', 'gateway_postgres_port', file=config_file, default='5432') }}" diff --git a/infra/aggregation_mode/ansible/playbooks/ini/config-hoodi.ini b/infra/aggregation_mode/ansible/playbooks/ini/config-hoodi.ini index d7d07726e..aad6296c9 100644 --- a/infra/aggregation_mode/ansible/playbooks/ini/config-hoodi.ini +++ b/infra/aggregation_mode/ansible/playbooks/ini/config-hoodi.ini @@ -3,9 +3,25 @@ # Hoodi Environment Configuration # ============================================ # This file contains all configuration for the Hoodi environment. -# Only sensitive values (passwords, certificate paths) need to be filled in. +# ============================================ +# REQUIRED: Sensitive Values (fill these in) +# ============================================ +# Database password (used by postgres, gateway, and poller) +db_password= +# Grafana read-only database user password +grafana_postgres_password= +# TLS certificates (local paths to copy from) +tls_cert_source_path= +tls_key_source_path= +# Grafana admin password +grafana_admin_password= +# Task sender private key (for sending proofs) +task_sender_private_key= + +# ============================================ # Environment +# ============================================ environment=hoodi git_branch=staging @@ -17,8 +33,6 @@ postgres_primary_hostname=agg-mode-hoodi-postgres-1 postgres_secondary_hostname=agg-mode-hoodi-postgres-2 db_name=agg_mode db_user=autoctl_node -# REQUIRED: Set this password before deploying postgres -db_password= # PostgreSQL Monitor monitor_pgdata=/var/lib/postgresql/monitor @@ -37,11 +51,6 @@ gateway_max_daily_proofs=100 gateway_payment_service_address=0x7222E0183cE1A96619d0c883e9bfc6b76D4e780e gateway_eth_rpc_url=https://aligned-hoodi-rpc-geth.tail665ae.ts.net -# Database connection (uses same credentials as postgres) -gateway_db_user=autoctl_node -# REQUIRED: Set to same password as db_password -gateway_db_password= -gateway_db_name=agg_mode gateway_postgres_primary=agg-mode-hoodi-postgres-1 gateway_postgres_secondary=agg-mode-hoodi-postgres-2 gateway_postgres_port=5432 @@ -61,14 +70,6 @@ gateway_tls_key_path=/home/app/.ssl/key.pem poller_last_block_fetched_filepath=/home/app/config/proof-aggregator.last_block_fetched.json last_block_fetched_initial_value=0 -# ============================================ -# TLS Certificate Management -# ============================================ -# REQUIRED: Provide paths to existing certificates on your local machine -# These will be copied to the gateway servers -tls_cert_source_path= -tls_key_source_path= - # ============================================ # Metrics Configuration # ============================================ @@ -77,15 +78,12 @@ gateway_primary_hostname=agg-mode-hoodi-gateway-1 gateway_secondary_hostname=agg-mode-hoodi-gateway-2 # Grafana Configuration -grafana_admin_password= grafana_prometheus_url=http://localhost:9090 grafana_rpc_url=https://aligned-hoodi-rpc-geth.tail665ae.ts.net grafana_postgres_host=agg-mode-hoodi-postgres-1 grafana_postgres_port=5432 grafana_postgres_db=agg_mode grafana_postgres_user=grafana -# REQUIRED: Set to same password as db_password -grafana_postgres_password= # ============================================ # Task Sender Configuration @@ -93,6 +91,4 @@ grafana_postgres_password= task_sender_interval_hours=1 task_sender_proof_path=scripts/test_files/sp1/sp1_fibonacci_5_0_0.proof task_sender_vk_path=scripts/test_files/sp1/sp1_fibonacci_5_0_0_vk.bin -# REQUIRED: Set private key for sending proofs -task_sender_private_key= task_sender_network=hoodi diff --git a/infra/aggregation_mode/ansible/playbooks/ini/config-mainnet.ini b/infra/aggregation_mode/ansible/playbooks/ini/config-mainnet.ini index 99c0c3bd3..587bd187f 100644 --- a/infra/aggregation_mode/ansible/playbooks/ini/config-mainnet.ini +++ b/infra/aggregation_mode/ansible/playbooks/ini/config-mainnet.ini @@ -3,9 +3,25 @@ # Mainnet Environment Configuration # ============================================ # This file contains all configuration for the Mainnet environment. -# Only sensitive values (passwords, certificate paths) need to be filled in. +# ============================================ +# REQUIRED: Sensitive Values (fill these in) +# ============================================ +# Database password (used by postgres, gateway, and poller) +db_password= +# Grafana read-only database user password +grafana_postgres_password= +# TLS certificates (local paths to copy from) +tls_cert_source_path= +tls_key_source_path= +# Grafana admin password +grafana_admin_password= +# Task sender private key (for sending proofs) +task_sender_private_key= + +# ============================================ # Environment +# ============================================ environment=mainnet git_branch=staging @@ -17,8 +33,6 @@ postgres_primary_hostname=agg-mode-mainnet-postgres-1 postgres_secondary_hostname=agg-mode-mainnet-postgres-2 db_name=agg_mode db_user=autoctl_node -# REQUIRED: Set this password before deploying postgres -db_password= # PostgreSQL Monitor monitor_pgdata=/var/lib/postgresql/monitor @@ -37,11 +51,6 @@ gateway_max_daily_proofs=100 gateway_payment_service_address=0xc8631Bc1E60c20db40e474F791126212fA8255F4 gateway_eth_rpc_url=https://aligned-mainnet-rpc-1.tail665ae.ts.net -# Database connection (uses same credentials as postgres) -gateway_db_user=autoctl_node -# REQUIRED: Set to same password as db_password -gateway_db_password= -gateway_db_name=agg_mode gateway_postgres_primary=agg-mode-mainnet-postgres-1 gateway_postgres_secondary=agg-mode-mainnet-postgres-2 gateway_postgres_port=5432 @@ -61,14 +70,6 @@ gateway_tls_key_path=/home/app/.ssl/key.pem poller_last_block_fetched_filepath=/home/app/config/proof-aggregator.last_block_fetched.json last_block_fetched_initial_value=24235289 -# ============================================ -# TLS Certificate Management -# ============================================ -# REQUIRED: Provide paths to existing certificates on your local machine -# These will be copied to the gateway servers -tls_cert_source_path= -tls_key_source_path= - # ============================================ # Metrics Configuration # ============================================ @@ -77,16 +78,12 @@ gateway_primary_hostname=agg-mode-mainnet-gateway-1 gateway_secondary_hostname=agg-mode-mainnet-gateway-2 # Grafana Configuration -grafana_admin_password= grafana_prometheus_url=http://localhost:9090 -# TODO: Update with mainnet RPC URL grafana_rpc_url=https://aligned-mainnet-rpc-1.tail665ae.ts.net grafana_postgres_host=agg-mode-mainnet-postgres-1 grafana_postgres_port=5432 grafana_postgres_db=agg_mode grafana_postgres_user=grafana -# REQUIRED: Set to same password as db_password -grafana_postgres_password= # ============================================ # Task Sender Configuration @@ -94,6 +91,4 @@ grafana_postgres_password= task_sender_interval_hours=1 task_sender_proof_path=scripts/test_files/sp1/sp1_fibonacci_5_0_0.proof task_sender_vk_path=scripts/test_files/sp1/sp1_fibonacci_5_0_0_vk.bin -# REQUIRED: Set private key for sending proofs -task_sender_private_key= task_sender_network=mainnet diff --git a/infra/aggregation_mode/ansible/playbooks/poller.yaml b/infra/aggregation_mode/ansible/playbooks/poller.yaml index ee18f91df..ee0e05953 100644 --- a/infra/aggregation_mode/ansible/playbooks/poller.yaml +++ b/infra/aggregation_mode/ansible/playbooks/poller.yaml @@ -21,9 +21,9 @@ - name: Set config vars from INI file set_fact: git_branch: "{{ lookup('ini', 'git_branch', file=config_file) }}" - poller_db_user: "{{ lookup('ini', 'gateway_db_user', file=config_file) }}" - poller_db_password: "{{ lookup('ini', 'gateway_db_password', file=config_file) }}" - poller_db_name: "{{ lookup('ini', 'gateway_db_name', file=config_file) }}" + poller_db_user: "{{ lookup('ini', 'db_user', file=config_file) }}" + poller_db_password: "{{ lookup('ini', 'db_password', file=config_file) }}" + poller_db_name: "{{ lookup('ini', 'db_name', file=config_file) }}" poller_postgres_primary: "{{ lookup('ini', 'gateway_postgres_primary', file=config_file) }}" poller_postgres_secondary: "{{ lookup('ini', 'gateway_postgres_secondary', file=config_file) }}" poller_postgres_port: "{{ lookup('ini', 'gateway_postgres_port', file=config_file, default='5432') }}" From ae3c344e9d6625a6cc44ef5b056d65495f63a692 Mon Sep 17 00:00:00 2001 From: JuArce <52429267+JuArce@users.noreply.github.com> Date: Fri, 16 Jan 2026 12:55:44 -0300 Subject: [PATCH 12/16] add commands for both networks --- infra/aggregation_mode/ansible/README.md | 351 ++++++++++++++++++++--- 1 file changed, 316 insertions(+), 35 deletions(-) diff --git a/infra/aggregation_mode/ansible/README.md b/infra/aggregation_mode/ansible/README.md index 76005c6ee..5e73acfcd 100644 --- a/infra/aggregation_mode/ansible/README.md +++ b/infra/aggregation_mode/ansible/README.md @@ -144,7 +144,11 @@ task_sender_private_key=0xYourPrivateKeyHere To deploy everything in one command: ```bash +# For Hoodi make agg_mode_deploy_all ENV=hoodi + +# For Mainnet +make agg_mode_deploy_all ENV=mainnet ``` This will: @@ -161,8 +165,11 @@ For more control, deploy each component separately: #### 1. Deploy PostgreSQL Cluster ```bash -# Deploy complete postgres cluster with password authentication +# For Hoodi make postgres_deploy ENV=hoodi + +# For Mainnet +make postgres_deploy ENV=mainnet ``` This will: @@ -174,7 +181,11 @@ This will: **Verify cluster status:** ```bash +# For Hoodi make postgres_status ENV=hoodi + +# For Mainnet +make postgres_status ENV=mainnet ``` Expected output: @@ -189,15 +200,17 @@ node_2 | 3 | 100.x.x.x:5432 | 1: 0/... | read-only | seconda #### 2. Deploy Gateway & Poller ```bash -# Deploy on both servers +# For Hoodi make gateway_deploy ENV=hoodi - -# Or deploy individually make gateway_primary_deploy ENV=hoodi make gateway_secondary_deploy ENV=hoodi - -# Force rebuild (always rebuild binaries from latest code) make gateway_deploy ENV=hoodi FORCE_REBUILD=true + +# For Mainnet +make gateway_deploy ENV=mainnet +make gateway_primary_deploy ENV=mainnet +make gateway_secondary_deploy ENV=mainnet +make gateway_deploy ENV=mainnet FORCE_REBUILD=true ``` **Note:** By default, the deployment is idempotent and skips building if the binary already exists. Use `FORCE_REBUILD=true` to always rebuild from the latest code in the repository. @@ -210,18 +223,21 @@ ssh app@agg-mode-hoodi-gateway-1 "systemctl --user status poller" **Test endpoint:** ```bash -curl -k https://agg-mode-hoodi-gateway-1/health +curl -k https://agg-mode-hoodi-gateway-1/ ``` #### 3. Deploy Metrics Stack ```bash -# Deploy both Prometheus and Grafana +# For Hoodi make metrics_deploy ENV=hoodi - -# Or deploy individually make prometheus_deploy ENV=hoodi make grafana_deploy ENV=hoodi + +# For Mainnet +make metrics_deploy ENV=mainnet +make prometheus_deploy ENV=mainnet +make grafana_deploy ENV=mainnet ``` **Access dashboards:** @@ -231,8 +247,11 @@ make grafana_deploy ENV=hoodi #### 4. Deploy Task Sender ```bash -# Deploy task sender +# For Hoodi make task_sender_deploy ENV=hoodi + +# For Mainnet +make task_sender_deploy ENV=mainnet ``` The task sender runs in a tmux session and continuously sends proofs to the network at the configured interval (default: 1 hour). @@ -253,22 +272,33 @@ The deployment automatically: **Verify task sender is running:** ```bash +# For Hoodi make task_sender_status ENV=hoodi + +# For Mainnet +make task_sender_status ENV=mainnet ``` **View task sender logs:** ```bash -# Show how to view logs +# For Hoodi make task_sender_logs ENV=hoodi - -# Or directly attach to the tmux session ssh app@agg-mode-hoodi-sender 'tmux attach -t task_sender' + +# For Mainnet +make task_sender_logs ENV=mainnet +ssh app@agg-mode-mainnet-sender 'tmux attach -t task_sender' + # Press Ctrl+B then D to detach without stopping ``` **Restart task sender:** ```bash +# For Hoodi make task_sender_restart ENV=hoodi + +# For Mainnet +make task_sender_restart ENV=mainnet ``` ## Service Management @@ -277,82 +307,137 @@ make task_sender_restart ENV=hoodi **Gateway:** ```bash +# For Hoodi make gateway_restart ENV=hoodi HOST=gateway_primary make gateway_restart ENV=hoodi HOST=gateway_secondary + +# For Mainnet +make gateway_restart ENV=mainnet HOST=gateway_primary +make gateway_restart ENV=mainnet HOST=gateway_secondary ``` **Poller:** ```bash +# For Hoodi make poller_restart ENV=hoodi HOST=gateway_primary make poller_restart ENV=hoodi HOST=gateway_secondary + +# For Mainnet +make poller_restart ENV=mainnet HOST=gateway_primary +make poller_restart ENV=mainnet HOST=gateway_secondary ``` **Task Sender:** ```bash +# For Hoodi make task_sender_restart ENV=hoodi + +# For Mainnet +make task_sender_restart ENV=mainnet ``` ### Check Service Status **PostgreSQL Cluster:** ```bash +# For Hoodi make postgres_status ENV=hoodi + +# For Mainnet +make postgres_status ENV=mainnet ``` **Gateway:** ```bash +# For Hoodi ssh app@agg-mode-hoodi-gateway-1 "sudo systemctl status gateway" ssh app@agg-mode-hoodi-gateway-1 "sudo journalctl -u gateway -n 50" + +# For Mainnet +ssh app@agg-mode-mainnet-gateway-1 "sudo systemctl status gateway" +ssh app@agg-mode-mainnet-gateway-1 "sudo journalctl -u gateway -n 50" ``` **Poller:** ```bash +# For Hoodi ssh app@agg-mode-hoodi-gateway-1 "systemctl --user status poller" ssh app@agg-mode-hoodi-gateway-1 "journalctl --user -u poller -n 50" + +# For Mainnet +ssh app@agg-mode-mainnet-gateway-1 "systemctl --user status poller" +ssh app@agg-mode-mainnet-gateway-1 "journalctl --user -u poller -n 50" ``` **Prometheus:** ```bash +# For Hoodi ssh admin@agg-mode-hoodi-metrics "systemctl --user status prometheus" + +# For Mainnet +ssh admin@agg-mode-mainnet-metrics "systemctl --user status prometheus" ``` **Grafana:** ```bash +# For Hoodi ssh admin@agg-mode-hoodi-metrics "sudo systemctl status grafana-server" + +# For Mainnet +ssh admin@agg-mode-mainnet-metrics "sudo systemctl status grafana-server" ``` **Task Sender:** ```bash +# For Hoodi make task_sender_status ENV=hoodi -# Or check tmux session directly ssh app@agg-mode-hoodi-sender "tmux has-session -t task_sender && echo 'Running' || echo 'Not running'" + +# For Mainnet +make task_sender_status ENV=mainnet +ssh app@agg-mode-mainnet-sender "tmux has-session -t task_sender && echo 'Running' || echo 'Not running'" ``` ### View Logs **Gateway:** ```bash +# For Hoodi ssh app@agg-mode-hoodi-gateway-1 "sudo journalctl -u gateway -f" + +# For Mainnet +ssh app@agg-mode-mainnet-gateway-1 "sudo journalctl -u gateway -f" ``` **Poller:** ```bash +# For Hoodi ssh app@agg-mode-hoodi-gateway-1 "journalctl --user -u poller -f" + +# For Mainnet +ssh app@agg-mode-mainnet-gateway-1 "journalctl --user -u poller -f" ``` **PostgreSQL:** ```bash +# For Hoodi ssh admin@agg-mode-hoodi-postgres-1 "sudo journalctl -u pgautofailover -f" + +# For Mainnet +ssh admin@agg-mode-mainnet-postgres-1 "sudo journalctl -u pgautofailover -f" ``` **Task Sender:** ```bash -# Attach to tmux session to view live logs +# For Hoodi ssh app@agg-mode-hoodi-sender 'tmux attach -t task_sender' -# Press Ctrl+B then D to detach - -# Or capture current pane output ssh app@agg-mode-hoodi-sender 'tmux capture-pane -t task_sender -p' + +# For Mainnet +ssh app@agg-mode-mainnet-sender 'tmux attach -t task_sender' +ssh app@agg-mode-mainnet-sender 'tmux capture-pane -t task_sender -p' + +# Press Ctrl+B then D to detach ``` ## Verification @@ -361,62 +446,98 @@ ssh app@agg-mode-hoodi-sender 'tmux capture-pane -t task_sender -p' 1. **Check cluster state:** ```bash + # For Hoodi make postgres_status ENV=hoodi + + # For Mainnet + make postgres_status ENV=mainnet ``` 2. **Test password authentication:** ```bash + # For Hoodi ssh admin@agg-mode-hoodi-postgres-1 "PGPASSWORD='your_password' psql -U autoctl_node -h localhost -d agg_mode -c 'SELECT 1'" + # For Mainnet + ssh admin@agg-mode-mainnet-postgres-1 "PGPASSWORD='your_password' psql -U autoctl_node -h localhost -d agg_mode -c 'SELECT 1'" ``` 3. **Verify replication:** ```bash + # For Hoodi ssh admin@agg-mode-hoodi-postgres-1 "sudo -u postgres psql -d agg_mode -c 'SELECT * FROM pg_stat_replication'" + + # For Mainnet + ssh admin@agg-mode-mainnet-postgres-1 "sudo -u postgres psql -d agg_mode -c 'SELECT * FROM pg_stat_replication'" ``` 4. **Test failover (optional):** ```bash - # Stop primary + # For Hoodi ssh admin@agg-mode-hoodi-postgres-1 "sudo systemctl stop pgautofailover" - # Wait 30 seconds, check status make postgres_status ENV=hoodi # Secondary should now be primary - - # Restart original primary ssh admin@agg-mode-hoodi-postgres-1 "sudo systemctl start pgautofailover" + + # For Mainnet + ssh admin@agg-mode-mainnet-postgres-1 "sudo systemctl stop pgautofailover" + # Wait 30 seconds, check status + make postgres_status ENV=mainnet + # Secondary should now be primary + ssh admin@agg-mode-mainnet-postgres-1 "sudo systemctl start pgautofailover" ``` ### Gateway Health 1. **Check HTTP health endpoint:** ```bash - curl -k https://agg-mode-hoodi-gateway-1/health + # For Hoodi + curl -k https://agg-mode-hoodi-gateway-1/ + + # For Mainnet + curl -k https://agg-mode-mainnet-gateway-1/ ``` 2. **Check metrics:** ```bash + # For Hoodi curl http://agg-mode-hoodi-gateway-1:9094/metrics + + # For Mainnet + curl http://agg-mode-mainnet-gateway-1:9094/metrics ``` 3. **Verify database connectivity:** ```bash + # For Hoodi ssh app@agg-mode-hoodi-gateway-1 PGPASSWORD='your_password' psql -U autoctl_node -h agg-mode-hoodi-postgres-1 -d agg_mode -c "SELECT 1" + + # For Mainnet + ssh app@agg-mode-mainnet-gateway-1 + PGPASSWORD='your_password' psql -U autoctl_node -h agg-mode-mainnet-postgres-1 -d agg_mode -c "SELECT 1" ``` ### Poller Health 1. **Check last processed block:** ```bash + # For Hoodi ssh app@agg-mode-hoodi-gateway-1 "cat ~/config/proof-aggregator.last_block_fetched.json" + + # For Mainnet + ssh app@agg-mode-mainnet-gateway-1 "cat ~/config/proof-aggregator.last_block_fetched.json" ``` The block number should increase over time. 2. **Check metrics:** ```bash + # For Hoodi curl http://agg-mode-hoodi-gateway-1:9095/metrics + + # For Mainnet + curl http://agg-mode-mainnet-gateway-1:9095/metrics ``` ### Metrics Stack @@ -434,12 +555,20 @@ ssh app@agg-mode-hoodi-sender 'tmux capture-pane -t task_sender -p' 1. **Check tmux session is running:** ```bash + # For Hoodi make task_sender_status ENV=hoodi + + # For Mainnet + make task_sender_status ENV=mainnet ``` 2. **View recent logs:** ```bash + # For Hoodi ssh app@agg-mode-hoodi-sender 'tmux capture-pane -t task_sender -p' + + # For Mainnet + ssh app@agg-mode-mainnet-sender 'tmux capture-pane -t task_sender -p' ``` 3. **Verify proof submissions:** @@ -455,12 +584,20 @@ ssh app@agg-mode-hoodi-sender 'tmux capture-pane -t task_sender -p' Check monitor logs: ```bash +# For Hoodi ssh admin@agg-mode-hoodi-postgres-monitor "sudo journalctl -u pgautofailover -n 100" + +# For Mainnet +ssh admin@agg-mode-mainnet-postgres-monitor "sudo journalctl -u pgautofailover -n 100" ``` Check node logs: ```bash +# For Hoodi ssh admin@agg-mode-hoodi-postgres-1 "sudo journalctl -u pgautofailover -n 100" + +# For Mainnet +ssh admin@agg-mode-mainnet-postgres-1 "sudo journalctl -u pgautofailover -n 100" ``` **Problem: Password authentication fails** @@ -469,7 +606,11 @@ Verify `db_password` is set correctly in your environment config file (`config-h Check pg_hba.conf: ```bash +# For Hoodi ssh admin@agg-mode-hoodi-postgres-1 "sudo -u postgres cat /var/lib/postgresql/node/pg_hba.conf" + +# For Mainnet +ssh admin@agg-mode-mainnet-postgres-1 "sudo -u postgres cat /var/lib/postgresql/node/pg_hba.conf" ``` Should contain: @@ -483,7 +624,11 @@ host all all 100.64.0.0/10 scram-sha-256 Check logs for errors: ```bash +# For Hoodi ssh app@agg-mode-hoodi-gateway-1 "sudo journalctl -u gateway -n 100" + +# For Mainnet +ssh app@agg-mode-mainnet-gateway-1 "sudo journalctl -u gateway -n 100" ``` Common issues: @@ -495,12 +640,20 @@ Common issues: Verify certificates exist: ```bash +# For Hoodi ssh app@agg-mode-hoodi-gateway-1 "ls -la ~/.ssl/" + +# For Mainnet +ssh app@agg-mode-mainnet-gateway-1 "ls -la ~/.ssl/" ``` Check certificate validity: ```bash +# For Hoodi ssh app@agg-mode-hoodi-gateway-1 "openssl x509 -in ~/.ssl/cert.pem -text -noout" + +# For Mainnet +ssh app@agg-mode-mainnet-gateway-1 "openssl x509 -in ~/.ssl/cert.pem -text -noout" ``` ### Poller Issues @@ -509,12 +662,20 @@ ssh app@agg-mode-hoodi-gateway-1 "openssl x509 -in ~/.ssl/cert.pem -text -noout" Check logs: ```bash +# For Hoodi ssh app@agg-mode-hoodi-gateway-1 "journalctl --user -u poller -n 100" + +# For Mainnet +ssh app@agg-mode-mainnet-gateway-1 "journalctl --user -u poller -n 100" ``` Verify RPC connectivity: ```bash +# For Hoodi ssh app@agg-mode-hoodi-gateway-1 "curl -X POST -H 'Content-Type: application/json' --data '{\"jsonrpc\":\"2.0\",\"method\":\"eth_blockNumber\",\"params\":[],\"id\":1}' https://aligned-hoodi-rpc-geth.tail665ae.ts.net" + +# For Mainnet +ssh app@agg-mode-mainnet-gateway-1 "curl -X POST -H 'Content-Type: application/json' --data '{\"jsonrpc\":\"2.0\",\"method\":\"eth_blockNumber\",\"params\":[],\"id\":1}' https://aligned-mainnet-rpc-1.tail665ae.ts.net" ``` ### Metrics Issues @@ -523,17 +684,29 @@ ssh app@agg-mode-hoodi-gateway-1 "curl -X POST -H 'Content-Type: application/jso Check Prometheus logs: ```bash +# For Hoodi ssh admin@agg-mode-hoodi-metrics "journalctl --user -u prometheus -n 100" + +# For Mainnet +ssh admin@agg-mode-mainnet-metrics "journalctl --user -u prometheus -n 100" ``` Verify targets are reachable from metrics server: ```bash +# For Hoodi ssh admin@agg-mode-hoodi-metrics "curl http://agg-mode-hoodi-gateway-1:9094/metrics" + +# For Mainnet +ssh admin@agg-mode-mainnet-metrics "curl http://agg-mode-mainnet-gateway-1:9094/metrics" ``` Check Prometheus config: ```bash +# For Hoodi ssh admin@agg-mode-hoodi-metrics "cat ~/config/prometheus.yaml" + +# For Mainnet +ssh admin@agg-mode-mainnet-metrics "cat ~/config/prometheus.yaml" ``` ### Task Sender Issues @@ -542,37 +715,57 @@ ssh admin@agg-mode-hoodi-metrics "cat ~/config/prometheus.yaml" Check if tmux session exists: ```bash +# For Hoodi ssh app@agg-mode-hoodi-sender "tmux list-sessions" + +# For Mainnet +ssh app@agg-mode-mainnet-sender "tmux list-sessions" ``` If missing, redeploy: ```bash +# For Hoodi make task_sender_deploy ENV=hoodi + +# For Mainnet +make task_sender_deploy ENV=mainnet ``` **Problem: Task sender crashes or exits** Check logs for errors: ```bash +# For Hoodi ssh app@agg-mode-hoodi-sender 'tmux capture-pane -t task_sender -p -S -100' + +# For Mainnet +ssh app@agg-mode-mainnet-sender 'tmux capture-pane -t task_sender -p -S -100' ``` Common issues: - Invalid private key → Check `task_sender_private_key` in `config-{{ env }}.ini` - Missing proof/vk files → Verify files exist: `task_sender_proof_path`, `task_sender_vk_path` -- Network connectivity → Test RPC: `curl https://aligned-hoodi-rpc-geth.tail665ae.ts.net` +- Network connectivity → Test RPC: `curl https://aligned-hoodi-rpc-geth.tail665ae.ts.net` (Hoodi) or `curl https://aligned-mainnet-rpc-1.tail665ae.ts.net` (Mainnet) - Insufficient balance → Check account has ETH for gas fees **Problem: Proofs not being submitted** Check interval configuration: ```bash +# For Hoodi ssh app@agg-mode-hoodi-sender "cat ~/repos/sender/aligned_layer/scripts/.agg_mode.task_sender.env" + +# For Mainnet +ssh app@agg-mode-mainnet-sender "cat ~/repos/sender/aligned_layer/scripts/.agg_mode.task_sender.env" ``` Verify `INTERVAL_HOURS` is set correctly (default: 1 hour). Attach to session to see live activity: ```bash +# For Hoodi ssh app@agg-mode-hoodi-sender 'tmux attach -t task_sender' + +# For Mainnet +ssh app@agg-mode-mainnet-sender 'tmux attach -t task_sender' ``` **Problem: Deployment fails with insufficient balance** @@ -581,14 +774,24 @@ The automatic deposit check requires the account to have at least **0.0045 ETH** Check account balance: ```bash +# For Hoodi ssh app@agg-mode-hoodi-sender export PATH=$HOME/.foundry/bin:$PATH -cast balance --rpc-url +cast balance --rpc-url https://aligned-hoodi-rpc-geth.tail665ae.ts.net + +# For Mainnet +ssh app@agg-mode-mainnet-sender +export PATH=$HOME/.foundry/bin:$PATH +cast balance --rpc-url https://aligned-mainnet-rpc-1.tail665ae.ts.net ``` If balance is insufficient, send ETH to the account and redeploy: ```bash +# For Hoodi make task_sender_deploy ENV=hoodi + +# For Mainnet +make task_sender_deploy ENV=mainnet ``` **Problem: Automatic deposit fails** @@ -602,15 +805,27 @@ To manually deposit after fixing the issue: ```bash ssh app@agg-mode-hoodi-sender export PATH=$HOME/.cargo/bin:$PATH + +# For Hoodi agg_mode_cli deposit \ --network hoodi \ --rpc-url https://aligned-hoodi-rpc-geth.tail665ae.ts.net \ --private-key + +# For Mainnet +agg_mode_cli deposit \ + --network mainnet \ + --rpc-url https://aligned-mainnet-rpc-1.tail665ae.ts.net \ + --private-key ``` Then restart the task sender: ```bash +# For Hoodi make task_sender_restart ENV=hoodi + +# For Mainnet +make task_sender_restart ENV=mainnet ``` ### General Debugging @@ -622,13 +837,22 @@ tailscale status **Test SSH access to servers:** ```bash +# For Hoodi ssh admin@agg-mode-hoodi-postgres-monitor "echo 'Connection successful'" ssh app@agg-mode-hoodi-gateway-1 "echo 'Connection successful'" + +# For Mainnet +ssh admin@agg-mode-mainnet-postgres-monitor "echo 'Connection successful'" +ssh app@agg-mode-mainnet-gateway-1 "echo 'Connection successful'" ``` **Verify Ansible inventory:** ```bash +# For Hoodi ansible-inventory -i infra/aggregation_mode/ansible/hoodi-inventory.yaml --list + +# For Mainnet +ansible-inventory -i infra/aggregation_mode/ansible/mainnet-inventory.yaml --list ``` ## Advanced Usage @@ -638,24 +862,43 @@ ansible-inventory -i infra/aggregation_mode/ansible/hoodi-inventory.yaml --list You can run any playbook directly with ansible-playbook: ```bash -# Deploy only postgres monitor +# Deploy only postgres monitor (Hoodi) ansible-playbook infra/aggregation_mode/ansible/playbooks/pg_monitor.yaml \ -i infra/aggregation_mode/ansible/hoodi-inventory.yaml \ -e "host=postgres_monitor" \ -e "env=hoodi" -# Deploy only gateway (no poller) +# Deploy only postgres monitor (Mainnet) +ansible-playbook infra/aggregation_mode/ansible/playbooks/pg_monitor.yaml \ + -i infra/aggregation_mode/ansible/mainnet-inventory.yaml \ + -e "host=postgres_monitor" \ + -e "env=mainnet" + +# Deploy only gateway (no poller) - Hoodi ansible-playbook infra/aggregation_mode/ansible/playbooks/gateway.yaml \ -i infra/aggregation_mode/ansible/hoodi-inventory.yaml \ -e "host=gateway_primary" \ -e "env=hoodi" -# Deploy gateway with forced rebuild +# Deploy only gateway (no poller) - Mainnet +ansible-playbook infra/aggregation_mode/ansible/playbooks/gateway.yaml \ + -i infra/aggregation_mode/ansible/mainnet-inventory.yaml \ + -e "host=gateway_primary" \ + -e "env=mainnet" + +# Deploy gateway with forced rebuild (Hoodi) ansible-playbook infra/aggregation_mode/ansible/playbooks/gateway.yaml \ -i infra/aggregation_mode/ansible/hoodi-inventory.yaml \ -e "host=gateway_primary" \ -e "env=hoodi" \ -e "force_rebuild=true" + +# Deploy gateway with forced rebuild (Mainnet) +ansible-playbook infra/aggregation_mode/ansible/playbooks/gateway.yaml \ + -i infra/aggregation_mode/ansible/mainnet-inventory.yaml \ + -e "host=gateway_primary" \ + -e "env=mainnet" \ + -e "force_rebuild=true" ``` ### Updating Services @@ -665,12 +908,15 @@ ansible-playbook infra/aggregation_mode/ansible/playbooks/gateway.yaml \ The easiest way to update services is using the `FORCE_REBUILD` parameter: ```bash -# Update both primary and secondary +# For Hoodi make gateway_deploy ENV=hoodi FORCE_REBUILD=true - -# Or update individually make gateway_primary_deploy ENV=hoodi FORCE_REBUILD=true make gateway_secondary_deploy ENV=hoodi FORCE_REBUILD=true + +# For Mainnet +make gateway_deploy ENV=mainnet FORCE_REBUILD=true +make gateway_primary_deploy ENV=mainnet FORCE_REBUILD=true +make gateway_secondary_deploy ENV=mainnet FORCE_REBUILD=true ``` This will: @@ -684,14 +930,29 @@ This will: If you prefer to update manually: ```bash -# Gateway +# Gateway (Hoodi) ssh app@agg-mode-hoodi-gateway-1 cd ~/repos/gateway/aligned_layer git pull origin staging cargo install --path aggregation_mode/gateway --bin gateway --features tls --locked sudo systemctl restart gateway -# Poller +# Gateway (Mainnet) +ssh app@agg-mode-mainnet-gateway-1 +cd ~/repos/gateway/aligned_layer +git pull origin staging +cargo install --path aggregation_mode/gateway --bin gateway --features tls --locked +sudo systemctl restart gateway + +# Poller (Hoodi) +ssh app@agg-mode-hoodi-gateway-1 +cd ~/repos/poller/aligned_layer +git pull origin staging +cargo install --path aggregation_mode/payments_poller --bin payments_poller --locked +systemctl --user restart poller + +# Poller (Mainnet) +ssh app@agg-mode-mainnet-gateway-1 cd ~/repos/poller/aligned_layer git pull origin staging cargo install --path aggregation_mode/payments_poller --bin payments_poller --locked @@ -703,7 +964,11 @@ systemctl --user restart poller **Idempotent deployment (skip if binary exists):** ```bash +# For Hoodi make gateway_deploy ENV=hoodi + +# For Mainnet +make gateway_deploy ENV=mainnet ``` This pulls the latest code but skips building if the binary already exists. Use this when you only want to update configuration files. @@ -711,7 +976,11 @@ This pulls the latest code but skips building if the binary already exists. Use **Force rebuild (always rebuild binaries):** ```bash +# For Hoodi make gateway_deploy ENV=hoodi FORCE_REBUILD=true + +# For Mainnet +make gateway_deploy ENV=mainnet FORCE_REBUILD=true ``` This always rebuilds binaries from the latest code, even if they already exist. Use this when you want to deploy code changes. @@ -721,9 +990,13 @@ This always rebuilds binaries from the latest code, even if they already exist. 1. Update INI files in `playbooks/ini/` 2. Redeploy the affected service: ```bash + # For Hoodi make gateway_deploy ENV=hoodi - # or make postgres_deploy ENV=hoodi + + # For Mainnet + make gateway_deploy ENV=mainnet + make postgres_deploy ENV=mainnet ``` ### Rotating Passwords @@ -733,12 +1006,20 @@ This always rebuilds binaries from the latest code, even if they already exist. - `grafana_postgres_password` (separate read-only user) 2. Run password update on PostgreSQL: ```bash + # For Hoodi ssh admin@agg-mode-hoodi-postgres-monitor "sudo -u postgres psql -d pg_auto_failover -c \"ALTER USER autoctl_node PASSWORD 'new_password'\"" + # For Mainnet + ssh admin@agg-mode-mainnet-postgres-monitor "sudo -u postgres psql -d pg_auto_failover -c \"ALTER USER autoctl_node PASSWORD 'new_password'\"" ``` 3. Redeploy gateway and metrics: ```bash + # For Hoodi make gateway_deploy ENV=hoodi make metrics_deploy ENV=hoodi + + # For Mainnet + make gateway_deploy ENV=mainnet + make metrics_deploy ENV=mainnet ``` ## File Structure From 3bda57e3215c3563bf1d1ecd54e22c27bcf93c34 Mon Sep 17 00:00:00 2001 From: JuArce <52429267+JuArce@users.noreply.github.com> Date: Fri, 16 Jan 2026 13:01:51 -0300 Subject: [PATCH 13/16] remove restart section --- Makefile | 33 --------------- infra/aggregation_mode/ansible/README.md | 51 ------------------------ 2 files changed, 84 deletions(-) diff --git a/Makefile b/Makefile index e915de8ab..619fbd6c4 100644 --- a/Makefile +++ b/Makefile @@ -1864,17 +1864,6 @@ task_sender_logs: ## View task sender logs. Usage: make task_sender_logs ENV=hoo @echo "Use: ssh app@agg-mode-$(ENV)-task-sender 'tmux attach -t task_sender'" @echo "Or: ssh app@agg-mode-$(ENV)-task-sender 'tmux capture-pane -t task_sender -p'" -.PHONY: task_sender_restart -task_sender_restart: ## Restart task sender. Usage: make task_sender_restart ENV=hoodi - @if [ -z "$(ENV)" ]; then \ - echo "Error: ENV must be set (hoodi or mainnet)"; \ - exit 1; \ - fi - @ansible-playbook $(AGG_MODE_PLAYBOOKS_DIR)/task_sender.yaml \ - -i $(AGG_MODE_ANSIBLE_DIR)/$(ENV)-inventory.yaml \ - -e "host=task_sender" \ - -e "env=$(ENV)" - # ------------------------------------------------------------------------------ # Full Deployment # ------------------------------------------------------------------------------ @@ -1888,25 +1877,3 @@ agg_mode_deploy_all: ## Deploy entire aggregation mode stack. Usage: make agg_mo @ansible-playbook $(AGG_MODE_PLAYBOOKS_DIR)/deploy_all.yaml \ -i $(AGG_MODE_ANSIBLE_DIR)/$(ENV)-inventory.yaml \ -e "env=$(ENV)" - -# ------------------------------------------------------------------------------ -# Service Management -# ------------------------------------------------------------------------------ - -.PHONY: gateway_restart -gateway_restart: ## Restart gateway service. Usage: make gateway_restart ENV=hoodi HOST=gateway_primary - @if [ -z "$(ENV)" ] || [ -z "$(HOST)" ]; then \ - echo "Error: ENV and HOST must be set"; \ - exit 1; \ - fi - @ansible $(HOST) -i $(AGG_MODE_ANSIBLE_DIR)/$(ENV)-inventory.yaml \ - -m shell -a "sudo systemctl restart gateway" --become - -.PHONY: poller_restart -poller_restart: ## Restart poller service. Usage: make poller_restart ENV=hoodi HOST=gateway_primary - @if [ -z "$(ENV)" ] || [ -z "$(HOST)" ]; then \ - echo "Error: ENV and HOST must be set"; \ - exit 1; \ - fi - @ansible $(HOST) -i $(AGG_MODE_ANSIBLE_DIR)/$(ENV)-inventory.yaml \ - -m shell -a "systemctl --user restart poller" diff --git a/infra/aggregation_mode/ansible/README.md b/infra/aggregation_mode/ansible/README.md index 5e73acfcd..b478e8b26 100644 --- a/infra/aggregation_mode/ansible/README.md +++ b/infra/aggregation_mode/ansible/README.md @@ -292,50 +292,8 @@ ssh app@agg-mode-mainnet-sender 'tmux attach -t task_sender' # Press Ctrl+B then D to detach without stopping ``` -**Restart task sender:** -```bash -# For Hoodi -make task_sender_restart ENV=hoodi - -# For Mainnet -make task_sender_restart ENV=mainnet -``` - ## Service Management -### Restart Services - -**Gateway:** -```bash -# For Hoodi -make gateway_restart ENV=hoodi HOST=gateway_primary -make gateway_restart ENV=hoodi HOST=gateway_secondary - -# For Mainnet -make gateway_restart ENV=mainnet HOST=gateway_primary -make gateway_restart ENV=mainnet HOST=gateway_secondary -``` - -**Poller:** -```bash -# For Hoodi -make poller_restart ENV=hoodi HOST=gateway_primary -make poller_restart ENV=hoodi HOST=gateway_secondary - -# For Mainnet -make poller_restart ENV=mainnet HOST=gateway_primary -make poller_restart ENV=mainnet HOST=gateway_secondary -``` - -**Task Sender:** -```bash -# For Hoodi -make task_sender_restart ENV=hoodi - -# For Mainnet -make task_sender_restart ENV=mainnet -``` - ### Check Service Status **PostgreSQL Cluster:** @@ -819,15 +777,6 @@ agg_mode_cli deposit \ --private-key ``` -Then restart the task sender: -```bash -# For Hoodi -make task_sender_restart ENV=hoodi - -# For Mainnet -make task_sender_restart ENV=mainnet -``` - ### General Debugging **Check Tailscale connectivity:** From 421e35ca2476f142356698b61e66fe22557cd0e4 Mon Sep 17 00:00:00 2001 From: JuArce <52429267+JuArce@users.noreply.github.com> Date: Fri, 16 Jan 2026 13:18:20 -0300 Subject: [PATCH 14/16] redeploy section and remove verification section --- infra/aggregation_mode/ansible/README.md | 370 +++++++++-------------- 1 file changed, 149 insertions(+), 221 deletions(-) diff --git a/infra/aggregation_mode/ansible/README.md b/infra/aggregation_mode/ansible/README.md index b478e8b26..45be3904a 100644 --- a/infra/aggregation_mode/ansible/README.md +++ b/infra/aggregation_mode/ansible/README.md @@ -9,8 +9,8 @@ This directory contains Ansible playbooks and configuration for automating the d - [Prerequisites](#prerequisites) - [Initial Setup](#initial-setup) - [Deployment](#deployment) +- [Redeployment](#redeployment) - [Service Management](#service-management) -- [Verification](#verification) - [Troubleshooting](#troubleshooting) - [Advanced Usage](#advanced-usage) @@ -292,6 +292,154 @@ ssh app@agg-mode-mainnet-sender 'tmux attach -t task_sender' # Press Ctrl+B then D to detach without stopping ``` +## Redeployment + +### Idempotent Deployment + +Idempotent deployment skips building if the binary already exists. Use this when you only want to update configuration files. + +```bash +# For Hoodi +make gateway_deploy ENV=hoodi + +# For Mainnet +make gateway_deploy ENV=mainnet +``` + +### Force Rebuild + +Force rebuild always rebuilds binaries from the latest code, even if they already exist. Use this when you want to deploy code changes. + +```bash +# For Hoodi +make gateway_deploy ENV=hoodi FORCE_REBUILD=true +make gateway_primary_deploy ENV=hoodi FORCE_REBUILD=true +make gateway_secondary_deploy ENV=hoodi FORCE_REBUILD=true + +# For Mainnet +make gateway_deploy ENV=mainnet FORCE_REBUILD=true +make gateway_primary_deploy ENV=mainnet FORCE_REBUILD=true +make gateway_secondary_deploy ENV=mainnet FORCE_REBUILD=true +``` + +This will: +1. Pull latest code from the configured branch (staging for hoodi, main for mainnet) +2. Delete existing binaries +3. Rebuild gateway and poller from source + +### Migrations + +To run database migrations: + +```bash +# For Hoodi +make postgres_migrations ENV=hoodi + +# For Mainnet +make postgres_migrations ENV=mainnet +``` + +### Task Sender + +To redeploy the task sender: + +```bash +# For Hoodi +make task_sender_deploy ENV=hoodi + +# For Mainnet +make task_sender_deploy ENV=mainnet +``` + +### Metrics Stack + +To redeploy the metrics stack (Prometheus and Grafana): + +```bash +# For Hoodi +make metrics_deploy ENV=hoodi +make prometheus_deploy ENV=hoodi +make grafana_deploy ENV=hoodi + +# For Mainnet +make metrics_deploy ENV=mainnet +make prometheus_deploy ENV=mainnet +make grafana_deploy ENV=mainnet +``` + +### Manual Update + +If you prefer to update manually: + +**Gateway:** +```bash +# Hoodi +ssh app@agg-mode-hoodi-gateway-1 +cd ~/repos/gateway/aligned_layer +git pull origin staging +cargo install --path aggregation_mode/gateway --bin gateway --features tls --locked + +# Mainnet +ssh app@agg-mode-mainnet-gateway-1 +cd ~/repos/gateway/aligned_layer +git pull origin staging +cargo install --path aggregation_mode/gateway --bin gateway --features tls --locked +``` + +**Poller:** +```bash +# Hoodi +ssh app@agg-mode-hoodi-gateway-1 +cd ~/repos/poller/aligned_layer +git pull origin staging +cargo install --path aggregation_mode/payments_poller --bin payments_poller --locked + +# Mainnet +ssh app@agg-mode-mainnet-gateway-1 +cd ~/repos/poller/aligned_layer +git pull origin staging +cargo install --path aggregation_mode/payments_poller --bin payments_poller --locked +``` + +**Task Sender:** +```bash +# Hoodi +ssh app@agg-mode-hoodi-sender +cd ~/repos/sender/aligned_layer +git pull origin staging +cargo install --path aggregation_mode/cli --bin agg_mode_cli --locked + +# Mainnet +ssh app@agg-mode-mainnet-sender +cd ~/repos/sender/aligned_layer +git pull origin staging +cargo install --path aggregation_mode/cli --bin agg_mode_cli --locked +``` + +**Prometheus:** +```bash +# Hoodi +ssh admin@agg-mode-hoodi-metrics +# Update prometheus.yaml configuration manually +systemctl --user restart prometheus + +# Mainnet +ssh admin@agg-mode-mainnet-metrics +# Update prometheus.yaml configuration manually +systemctl --user restart prometheus +``` + +**Grafana:** +```bash +# Hoodi +ssh admin@agg-mode-hoodi-metrics +sudo systemctl restart grafana-server + +# Mainnet +ssh admin@agg-mode-mainnet-metrics +sudo systemctl restart grafana-server +``` + ## Service Management ### Check Service Status @@ -398,142 +546,6 @@ ssh app@agg-mode-mainnet-sender 'tmux capture-pane -t task_sender -p' # Press Ctrl+B then D to detach ``` -## Verification - -### PostgreSQL Cluster Health - -1. **Check cluster state:** - ```bash - # For Hoodi - make postgres_status ENV=hoodi - - # For Mainnet - make postgres_status ENV=mainnet - ``` - -2. **Test password authentication:** - ```bash - # For Hoodi - ssh admin@agg-mode-hoodi-postgres-1 "PGPASSWORD='your_password' psql -U autoctl_node -h localhost -d agg_mode -c 'SELECT 1'" - # For Mainnet - ssh admin@agg-mode-mainnet-postgres-1 "PGPASSWORD='your_password' psql -U autoctl_node -h localhost -d agg_mode -c 'SELECT 1'" - ``` - -3. **Verify replication:** - ```bash - # For Hoodi - ssh admin@agg-mode-hoodi-postgres-1 "sudo -u postgres psql -d agg_mode -c 'SELECT * FROM pg_stat_replication'" - - # For Mainnet - ssh admin@agg-mode-mainnet-postgres-1 "sudo -u postgres psql -d agg_mode -c 'SELECT * FROM pg_stat_replication'" - ``` - -4. **Test failover (optional):** - ```bash - # For Hoodi - ssh admin@agg-mode-hoodi-postgres-1 "sudo systemctl stop pgautofailover" - # Wait 30 seconds, check status - make postgres_status ENV=hoodi - # Secondary should now be primary - ssh admin@agg-mode-hoodi-postgres-1 "sudo systemctl start pgautofailover" - - # For Mainnet - ssh admin@agg-mode-mainnet-postgres-1 "sudo systemctl stop pgautofailover" - # Wait 30 seconds, check status - make postgres_status ENV=mainnet - # Secondary should now be primary - ssh admin@agg-mode-mainnet-postgres-1 "sudo systemctl start pgautofailover" - ``` - -### Gateway Health - -1. **Check HTTP health endpoint:** - ```bash - # For Hoodi - curl -k https://agg-mode-hoodi-gateway-1/ - - # For Mainnet - curl -k https://agg-mode-mainnet-gateway-1/ - ``` - -2. **Check metrics:** - ```bash - # For Hoodi - curl http://agg-mode-hoodi-gateway-1:9094/metrics - - # For Mainnet - curl http://agg-mode-mainnet-gateway-1:9094/metrics - ``` - -3. **Verify database connectivity:** - ```bash - # For Hoodi - ssh app@agg-mode-hoodi-gateway-1 - PGPASSWORD='your_password' psql -U autoctl_node -h agg-mode-hoodi-postgres-1 -d agg_mode -c "SELECT 1" - - # For Mainnet - ssh app@agg-mode-mainnet-gateway-1 - PGPASSWORD='your_password' psql -U autoctl_node -h agg-mode-mainnet-postgres-1 -d agg_mode -c "SELECT 1" - ``` - -### Poller Health - -1. **Check last processed block:** - ```bash - # For Hoodi - ssh app@agg-mode-hoodi-gateway-1 "cat ~/config/proof-aggregator.last_block_fetched.json" - - # For Mainnet - ssh app@agg-mode-mainnet-gateway-1 "cat ~/config/proof-aggregator.last_block_fetched.json" - ``` - - The block number should increase over time. - -2. **Check metrics:** - ```bash - # For Hoodi - curl http://agg-mode-hoodi-gateway-1:9095/metrics - - # For Mainnet - curl http://agg-mode-mainnet-gateway-1:9095/metrics - ``` - -### Metrics Stack - -1. **Prometheus targets:** - - Navigate to `http://:9090/targets` - - All targets should show as "UP" - -2. **Grafana datasources:** - - Navigate to `http://:3000` - - Go to Configuration → Data Sources - - Verify Prometheus and PostgreSQL datasources are connected - -### Task Sender - -1. **Check tmux session is running:** - ```bash - # For Hoodi - make task_sender_status ENV=hoodi - - # For Mainnet - make task_sender_status ENV=mainnet - ``` - -2. **View recent logs:** - ```bash - # For Hoodi - ssh app@agg-mode-hoodi-sender 'tmux capture-pane -t task_sender -p' - - # For Mainnet - ssh app@agg-mode-mainnet-sender 'tmux capture-pane -t task_sender -p' - ``` - -3. **Verify proof submissions:** - - Check logs for successful proof submissions - - Look for transaction hashes in the output - - Verify proofs are appearing on the network - ## Troubleshooting ### PostgreSQL Issues @@ -850,90 +862,6 @@ ansible-playbook infra/aggregation_mode/ansible/playbooks/gateway.yaml \ -e "force_rebuild=true" ``` -### Updating Services - -**Update gateway and poller with latest code:** - -The easiest way to update services is using the `FORCE_REBUILD` parameter: - -```bash -# For Hoodi -make gateway_deploy ENV=hoodi FORCE_REBUILD=true -make gateway_primary_deploy ENV=hoodi FORCE_REBUILD=true -make gateway_secondary_deploy ENV=hoodi FORCE_REBUILD=true - -# For Mainnet -make gateway_deploy ENV=mainnet FORCE_REBUILD=true -make gateway_primary_deploy ENV=mainnet FORCE_REBUILD=true -make gateway_secondary_deploy ENV=mainnet FORCE_REBUILD=true -``` - -This will: -1. Pull latest code from the configured branch (staging for hoodi, main for mainnet) -2. Delete existing binaries -3. Rebuild gateway and poller from source -4. Restart the services - -**Manual update (alternative):** - -If you prefer to update manually: - -```bash -# Gateway (Hoodi) -ssh app@agg-mode-hoodi-gateway-1 -cd ~/repos/gateway/aligned_layer -git pull origin staging -cargo install --path aggregation_mode/gateway --bin gateway --features tls --locked -sudo systemctl restart gateway - -# Gateway (Mainnet) -ssh app@agg-mode-mainnet-gateway-1 -cd ~/repos/gateway/aligned_layer -git pull origin staging -cargo install --path aggregation_mode/gateway --bin gateway --features tls --locked -sudo systemctl restart gateway - -# Poller (Hoodi) -ssh app@agg-mode-hoodi-gateway-1 -cd ~/repos/poller/aligned_layer -git pull origin staging -cargo install --path aggregation_mode/payments_poller --bin payments_poller --locked -systemctl --user restart poller - -# Poller (Mainnet) -ssh app@agg-mode-mainnet-gateway-1 -cd ~/repos/poller/aligned_layer -git pull origin staging -cargo install --path aggregation_mode/payments_poller --bin payments_poller --locked -systemctl --user restart poller -``` - -### Redeploy with Latest Code - -**Idempotent deployment (skip if binary exists):** - -```bash -# For Hoodi -make gateway_deploy ENV=hoodi - -# For Mainnet -make gateway_deploy ENV=mainnet -``` - -This pulls the latest code but skips building if the binary already exists. Use this when you only want to update configuration files. - -**Force rebuild (always rebuild binaries):** - -```bash -# For Hoodi -make gateway_deploy ENV=hoodi FORCE_REBUILD=true - -# For Mainnet -make gateway_deploy ENV=mainnet FORCE_REBUILD=true -``` - -This always rebuilds binaries from the latest code, even if they already exist. Use this when you want to deploy code changes. - ### Changing Configuration 1. Update INI files in `playbooks/ini/` From 0010afe13270fa216ef7c09e8597e5f514f0a903 Mon Sep 17 00:00:00 2001 From: JuArce <52429267+JuArce@users.noreply.github.com> Date: Fri, 16 Jan 2026 13:20:24 -0300 Subject: [PATCH 15/16] update makefile --- Makefile | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/Makefile b/Makefile index 619fbd6c4..a41e24008 100644 --- a/Makefile +++ b/Makefile @@ -1667,9 +1667,7 @@ AGG_MODE_ANSIBLE_DIR = infra/aggregation_mode/ansible AGG_MODE_PLAYBOOKS_DIR = $(AGG_MODE_ANSIBLE_DIR)/playbooks AGG_MODE_INI_DIR = $(AGG_MODE_PLAYBOOKS_DIR)/ini -# ------------------------------------------------------------------------------ -# Setup: Create INI configuration files -# ------------------------------------------------------------------------------ +# TODO: Check and add targets to install gateway, poller and cli binaries locally # ------------------------------------------------------------------------------ # PostgreSQL Cluster Deployment From 5ef90c7f993b67cc4ecdd3ada3df0ef855456d64 Mon Sep 17 00:00:00 2001 From: JuArce <52429267+JuArce@users.noreply.github.com> Date: Fri, 16 Jan 2026 20:05:31 -0300 Subject: [PATCH 16/16] improve prometheus deploy --- grafana/provisioning/datasources/datasource.yaml | 2 +- .../aggregation_mode/ansible/playbooks/grafana_agg_mode.yaml | 3 +++ .../aggregation_mode/ansible/playbooks/ini/config-hoodi.ini | 4 ++++ .../ansible/playbooks/ini/config-mainnet.ini | 4 ++++ .../ansible/playbooks/prometheus_agg_mode.yaml | 4 ++-- .../ansible/playbooks/templates/grafana/grafana_env.j2 | 5 +++++ .../templates/prometheus/prometheus_agg_mode.yaml.j2 | 4 ++-- 7 files changed, 21 insertions(+), 5 deletions(-) diff --git a/grafana/provisioning/datasources/datasource.yaml b/grafana/provisioning/datasources/datasource.yaml index 4afd2c4a7..32137e4a3 100644 --- a/grafana/provisioning/datasources/datasource.yaml +++ b/grafana/provisioning/datasources/datasource.yaml @@ -63,7 +63,7 @@ datasources: database: "${MONITOR_DB_DB}" user: "${MONITOR_DB_USER}" secureJsonData: - password: "" + password: ${MONITOR_DB_PASSWORD} basicAuth: false isDefault: false editable: true diff --git a/infra/aggregation_mode/ansible/playbooks/grafana_agg_mode.yaml b/infra/aggregation_mode/ansible/playbooks/grafana_agg_mode.yaml index a0174f3a4..019916511 100644 --- a/infra/aggregation_mode/ansible/playbooks/grafana_agg_mode.yaml +++ b/infra/aggregation_mode/ansible/playbooks/grafana_agg_mode.yaml @@ -22,6 +22,9 @@ grafana_postgres_db: "{{ lookup('ini', 'grafana_postgres_db', file=config_file) }}" grafana_postgres_user: "{{ lookup('ini', 'grafana_postgres_user', file=config_file) }}" grafana_postgres_password: "{{ lookup('ini', 'grafana_postgres_password', file=config_file) }}" + grafana_monitor_host: "{{ lookup('ini', 'grafana_monitor_host', file=config_file) }}" + grafana_monitor_port: "{{ lookup('ini', 'grafana_monitor_port', file=config_file, default='5432') }}" + grafana_monitor_db: "{{ lookup('ini', 'grafana_monitor_db', file=config_file, default='pg_auto_failover') }}" no_log: true - name: Install required packages diff --git a/infra/aggregation_mode/ansible/playbooks/ini/config-hoodi.ini b/infra/aggregation_mode/ansible/playbooks/ini/config-hoodi.ini index aad6296c9..7e76643ac 100644 --- a/infra/aggregation_mode/ansible/playbooks/ini/config-hoodi.ini +++ b/infra/aggregation_mode/ansible/playbooks/ini/config-hoodi.ini @@ -84,6 +84,10 @@ grafana_postgres_host=agg-mode-hoodi-postgres-1 grafana_postgres_port=5432 grafana_postgres_db=agg_mode grafana_postgres_user=grafana +# Monitor datasource (uses same user/password as postgres datasource) +grafana_monitor_host=agg-mode-hoodi-postgres-monitor +grafana_monitor_port=5432 +grafana_monitor_db=pg_auto_failover # ============================================ # Task Sender Configuration diff --git a/infra/aggregation_mode/ansible/playbooks/ini/config-mainnet.ini b/infra/aggregation_mode/ansible/playbooks/ini/config-mainnet.ini index 587bd187f..9d42109d0 100644 --- a/infra/aggregation_mode/ansible/playbooks/ini/config-mainnet.ini +++ b/infra/aggregation_mode/ansible/playbooks/ini/config-mainnet.ini @@ -84,6 +84,10 @@ grafana_postgres_host=agg-mode-mainnet-postgres-1 grafana_postgres_port=5432 grafana_postgres_db=agg_mode grafana_postgres_user=grafana +# Monitor datasource (uses same user/password as postgres datasource) +grafana_monitor_host=agg-mode-mainnet-postgres-monitor +grafana_monitor_port=5432 +grafana_monitor_db=pg_auto_failover # ============================================ # Task Sender Configuration diff --git a/infra/aggregation_mode/ansible/playbooks/prometheus_agg_mode.yaml b/infra/aggregation_mode/ansible/playbooks/prometheus_agg_mode.yaml index 1375a1318..739f91b27 100644 --- a/infra/aggregation_mode/ansible/playbooks/prometheus_agg_mode.yaml +++ b/infra/aggregation_mode/ansible/playbooks/prometheus_agg_mode.yaml @@ -73,10 +73,10 @@ owner: "{{ ansible_user }}" group: "{{ ansible_user }}" - - name: Enable and start Prometheus service + - name: Enable and restart Prometheus service systemd_service: name: prometheus - state: started + state: restarted enabled: true scope: user daemon_reload: true diff --git a/infra/aggregation_mode/ansible/playbooks/templates/grafana/grafana_env.j2 b/infra/aggregation_mode/ansible/playbooks/templates/grafana/grafana_env.j2 index f1a809492..87944f109 100644 --- a/infra/aggregation_mode/ansible/playbooks/templates/grafana/grafana_env.j2 +++ b/infra/aggregation_mode/ansible/playbooks/templates/grafana/grafana_env.j2 @@ -7,3 +7,8 @@ POSTGRES_PORT={{ grafana_postgres_port }} POSTGRES_DB={{ grafana_postgres_db }} POSTGRES_USER={{ grafana_postgres_user }} POSTGRES_PASSWORD={{ grafana_postgres_password }} +MONITOR_DB_HOST={{ grafana_monitor_host }} +MONITOR_DB_PORT={{ grafana_monitor_port }} +MONITOR_DB_DB={{ grafana_monitor_db }} +MONITOR_DB_USER={{ grafana_postgres_user }} +MONITOR_DB_PASSWORD={{ grafana_postgres_password }} diff --git a/infra/aggregation_mode/ansible/playbooks/templates/prometheus/prometheus_agg_mode.yaml.j2 b/infra/aggregation_mode/ansible/playbooks/templates/prometheus/prometheus_agg_mode.yaml.j2 index 2886c16fc..968cc3790 100644 --- a/infra/aggregation_mode/ansible/playbooks/templates/prometheus/prometheus_agg_mode.yaml.j2 +++ b/infra/aggregation_mode/ansible/playbooks/templates/prometheus/prometheus_agg_mode.yaml.j2 @@ -7,7 +7,7 @@ scrape_configs: static_configs: - targets: ["{{ gateway_primary_hostname }}:8080"] labels: - service: "gateway" + service: "gateway-http" instance: "primary" - job_name: "gateway-secondary-http" @@ -15,7 +15,7 @@ scrape_configs: static_configs: - targets: ["{{ gateway_secondary_hostname }}:8080"] labels: - service: "gateway" + service: "gateway-http" instance: "secondary" - job_name: "gateway-primary"