diff --git a/.gitignore b/.gitignore index 041927db8..a156fb7b1 100644 --- a/.gitignore +++ b/.gitignore @@ -46,3 +46,7 @@ docs/dead_links_report.txt terraform.tfstate terraform.tfstate.backup + +# Aggregation Mode Ansible INI files (track config-*.ini templates, ignore others) +infra/aggregation_mode/ansible/playbooks/ini/*.ini +!infra/aggregation_mode/ansible/playbooks/ini/config-*.ini diff --git a/Makefile b/Makefile index 27dcb36e1..a41e24008 100644 --- a/Makefile +++ b/Makefile @@ -1658,3 +1658,220 @@ __NODE_EXPORTER_: install_node_exporter: @./scripts/install_node_exporter.sh + +# ============================================================================== +# Aggregation Mode Ansible Deployment +# ============================================================================== + +AGG_MODE_ANSIBLE_DIR = infra/aggregation_mode/ansible +AGG_MODE_PLAYBOOKS_DIR = $(AGG_MODE_ANSIBLE_DIR)/playbooks +AGG_MODE_INI_DIR = $(AGG_MODE_PLAYBOOKS_DIR)/ini + +# TODO: Check and add targets to install gateway, poller and cli binaries locally + +# ------------------------------------------------------------------------------ +# PostgreSQL Cluster Deployment +# ------------------------------------------------------------------------------ + +.PHONY: postgres_deploy +postgres_deploy: ## Deploy PostgreSQL Auto-Failover Cluster. Usage: make postgres_deploy ENV=hoodi + @if [ -z "$(ENV)" ]; then \ + echo "Error: ENV must be set (hoodi or mainnet)"; \ + exit 1; \ + fi + @ansible-playbook $(AGG_MODE_PLAYBOOKS_DIR)/postgres_cluster.yaml \ + -i $(AGG_MODE_ANSIBLE_DIR)/$(ENV)-inventory.yaml \ + -e "env=$(ENV)" + +.PHONY: postgres_monitor_deploy +postgres_monitor_deploy: ## Deploy PostgreSQL Monitor only. Usage: make postgres_monitor_deploy ENV=hoodi + @if [ -z "$(ENV)" ]; then \ + echo "Error: ENV must be set (hoodi or mainnet)"; \ + exit 1; \ + fi + @ansible-playbook $(AGG_MODE_PLAYBOOKS_DIR)/pg_monitor.yaml \ + -i $(AGG_MODE_ANSIBLE_DIR)/$(ENV)-inventory.yaml \ + -e "host=postgres_monitor" \ + -e "env=$(ENV)" + +.PHONY: postgres_nodes_deploy +postgres_nodes_deploy: ## Deploy PostgreSQL Primary & Secondary. Usage: make postgres_nodes_deploy ENV=hoodi + @if [ -z "$(ENV)" ]; then \ + echo "Error: ENV must be set (hoodi or mainnet)"; \ + exit 1; \ + fi + @ansible-playbook $(AGG_MODE_PLAYBOOKS_DIR)/pg_node.yaml \ + -i $(AGG_MODE_ANSIBLE_DIR)/$(ENV)-inventory.yaml \ + -e "host=postgres_primary" \ + -e "env=$(ENV)" + @ansible-playbook $(AGG_MODE_PLAYBOOKS_DIR)/pg_node.yaml \ + -i $(AGG_MODE_ANSIBLE_DIR)/$(ENV)-inventory.yaml \ + -e "host=postgres_secondary" \ + -e "env=$(ENV)" + +.PHONY: postgres_migrations +postgres_migrations: ## Run database migrations. Usage: make postgres_migrations ENV=hoodi + @if [ -z "$(ENV)" ]; then \ + echo "Error: ENV must be set (hoodi or mainnet)"; \ + exit 1; \ + fi + @ansible-playbook $(AGG_MODE_PLAYBOOKS_DIR)/postgres_migrations.yaml \ + -i $(AGG_MODE_ANSIBLE_DIR)/$(ENV)-inventory.yaml \ + -e "host=postgres_primary" \ + -e "env=$(ENV)" + +.PHONY: postgres_status +postgres_status: ## Check PostgreSQL cluster status. Usage: make postgres_status ENV=hoodi + @if [ -z "$(ENV)" ]; then \ + echo "Error: ENV must be set (hoodi or mainnet)"; \ + exit 1; \ + fi + @ansible postgres_monitor -i $(AGG_MODE_ANSIBLE_DIR)/$(ENV)-inventory.yaml \ + -m shell -a "sudo -u postgres pg_autoctl show state --monitor postgres://autoctl_node@localhost:5432/pg_auto_failover" --become + +# ------------------------------------------------------------------------------ +# Gateway & Poller Deployment +# ------------------------------------------------------------------------------ + +.PHONY: gateway_deploy +gateway_deploy: ## Deploy Gateway & Poller on both servers. Usage: make gateway_deploy ENV=hoodi [FORCE_REBUILD=true] + @if [ -z "$(ENV)" ]; then \ + echo "Error: ENV must be set (hoodi or mainnet)"; \ + exit 1; \ + fi + @EXTRA_VARS=""; \ + if [ -n "$(FORCE_REBUILD)" ]; then \ + EXTRA_VARS="-e force_rebuild=true"; \ + fi; \ + ansible-playbook $(AGG_MODE_PLAYBOOKS_DIR)/gateway_stack.yaml \ + -i $(AGG_MODE_ANSIBLE_DIR)/$(ENV)-inventory.yaml \ + -e "host=gateway_primary" \ + -e "env=$(ENV)" \ + $$EXTRA_VARS + @EXTRA_VARS=""; \ + if [ -n "$(FORCE_REBUILD)" ]; then \ + EXTRA_VARS="-e force_rebuild=true"; \ + fi; \ + ansible-playbook $(AGG_MODE_PLAYBOOKS_DIR)/gateway_stack.yaml \ + -i $(AGG_MODE_ANSIBLE_DIR)/$(ENV)-inventory.yaml \ + -e "host=gateway_secondary" \ + -e "env=$(ENV)" \ + $$EXTRA_VARS + +.PHONY: gateway_primary_deploy +gateway_primary_deploy: ## Deploy Gateway & Poller on primary only. Usage: make gateway_primary_deploy ENV=hoodi [FORCE_REBUILD=true] + @if [ -z "$(ENV)" ]; then \ + echo "Error: ENV must be set (hoodi or mainnet)"; \ + exit 1; \ + fi + @EXTRA_VARS=""; \ + if [ -n "$(FORCE_REBUILD)" ]; then \ + EXTRA_VARS="-e force_rebuild=true"; \ + fi; \ + ansible-playbook $(AGG_MODE_PLAYBOOKS_DIR)/gateway_stack.yaml \ + -i $(AGG_MODE_ANSIBLE_DIR)/$(ENV)-inventory.yaml \ + -e "host=gateway_primary" \ + -e "env=$(ENV)" \ + $$EXTRA_VARS + +.PHONY: gateway_secondary_deploy +gateway_secondary_deploy: ## Deploy Gateway & Poller on secondary only. Usage: make gateway_secondary_deploy ENV=hoodi [FORCE_REBUILD=true] + @if [ -z "$(ENV)" ]; then \ + echo "Error: ENV must be set (hoodi or mainnet)"; \ + exit 1; \ + fi + @EXTRA_VARS=""; \ + if [ -n "$(FORCE_REBUILD)" ]; then \ + EXTRA_VARS="-e force_rebuild=true"; \ + fi; \ + ansible-playbook $(AGG_MODE_PLAYBOOKS_DIR)/gateway_stack.yaml \ + -i $(AGG_MODE_ANSIBLE_DIR)/$(ENV)-inventory.yaml \ + -e "host=gateway_secondary" \ + -e "env=$(ENV)" \ + $$EXTRA_VARS + +# ------------------------------------------------------------------------------ +# Metrics Deployment +# ------------------------------------------------------------------------------ + +.PHONY: metrics_deploy +metrics_deploy: ## Deploy Prometheus & Grafana. Usage: make metrics_deploy ENV=hoodi + @if [ -z "$(ENV)" ]; then \ + echo "Error: ENV must be set (hoodi or mainnet)"; \ + exit 1; \ + fi + @ansible-playbook $(AGG_MODE_PLAYBOOKS_DIR)/metrics_stack.yaml \ + -i $(AGG_MODE_ANSIBLE_DIR)/$(ENV)-inventory.yaml \ + -e "host=metrics" \ + -e "env=$(ENV)" + +.PHONY: prometheus_deploy +prometheus_deploy: ## Deploy Prometheus only. Usage: make prometheus_deploy ENV=hoodi + @if [ -z "$(ENV)" ]; then \ + echo "Error: ENV must be set (hoodi or mainnet)"; \ + exit 1; \ + fi + @ansible-playbook $(AGG_MODE_PLAYBOOKS_DIR)/prometheus_agg_mode.yaml \ + -i $(AGG_MODE_ANSIBLE_DIR)/$(ENV)-inventory.yaml \ + -e "host=metrics" \ + -e "env=$(ENV)" + +.PHONY: grafana_deploy +grafana_deploy: ## Deploy Grafana only. Usage: make grafana_deploy ENV=hoodi + @if [ -z "$(ENV)" ]; then \ + echo "Error: ENV must be set (hoodi or mainnet)"; \ + exit 1; \ + fi + @ansible-playbook $(AGG_MODE_PLAYBOOKS_DIR)/grafana_agg_mode.yaml \ + -i $(AGG_MODE_ANSIBLE_DIR)/$(ENV)-inventory.yaml \ + -e "host=metrics" \ + -e "env=$(ENV)" + +# ------------------------------------------------------------------------------ +# Task Sender Deployment +# ------------------------------------------------------------------------------ + +.PHONY: task_sender_deploy +task_sender_deploy: ## Deploy task sender. Usage: make task_sender_deploy ENV=hoodi + @if [ -z "$(ENV)" ]; then \ + echo "Error: ENV must be set (hoodi or mainnet)"; \ + exit 1; \ + fi + @ansible-playbook $(AGG_MODE_PLAYBOOKS_DIR)/task_sender.yaml \ + -i $(AGG_MODE_ANSIBLE_DIR)/$(ENV)-inventory.yaml \ + -e "host=task_sender" \ + -e "env=$(ENV)" + +.PHONY: task_sender_status +task_sender_status: ## Check task sender status. Usage: make task_sender_status ENV=hoodi + @if [ -z "$(ENV)" ]; then \ + echo "Error: ENV must be set (hoodi or mainnet)"; \ + exit 1; \ + fi + @echo "Checking task sender tmux session..." + @ansible -i $(AGG_MODE_ANSIBLE_DIR)/$(ENV)-inventory.yaml task_sender \ + -m shell \ + -a "tmux has-session -t task_sender && echo 'Task sender is running' || echo 'Task sender is not running'" + +.PHONY: task_sender_logs +task_sender_logs: ## View task sender logs. Usage: make task_sender_logs ENV=hoodi + @if [ -z "$(ENV)" ]; then \ + echo "Error: ENV must be set (hoodi or mainnet)"; \ + exit 1; \ + fi + @echo "Use: ssh app@agg-mode-$(ENV)-task-sender 'tmux attach -t task_sender'" + @echo "Or: ssh app@agg-mode-$(ENV)-task-sender 'tmux capture-pane -t task_sender -p'" + +# ------------------------------------------------------------------------------ +# Full Deployment +# ------------------------------------------------------------------------------ + +.PHONY: agg_mode_deploy_all +agg_mode_deploy_all: ## Deploy entire aggregation mode stack. Usage: make agg_mode_deploy_all ENV=hoodi + @if [ -z "$(ENV)" ]; then \ + echo "Error: ENV must be set (hoodi or mainnet)"; \ + exit 1; \ + fi + @ansible-playbook $(AGG_MODE_PLAYBOOKS_DIR)/deploy_all.yaml \ + -i $(AGG_MODE_ANSIBLE_DIR)/$(ENV)-inventory.yaml \ + -e "env=$(ENV)" diff --git a/grafana/provisioning/datasources/datasource.yaml b/grafana/provisioning/datasources/datasource.yaml index 4afd2c4a7..32137e4a3 100644 --- a/grafana/provisioning/datasources/datasource.yaml +++ b/grafana/provisioning/datasources/datasource.yaml @@ -63,7 +63,7 @@ datasources: database: "${MONITOR_DB_DB}" user: "${MONITOR_DB_USER}" secureJsonData: - password: "" + password: ${MONITOR_DB_PASSWORD} basicAuth: false isDefault: false editable: true diff --git a/infra/aggregation_mode/ansible/README.md b/infra/aggregation_mode/ansible/README.md new file mode 100644 index 000000000..45be3904a --- /dev/null +++ b/infra/aggregation_mode/ansible/README.md @@ -0,0 +1,961 @@ +# Aggregation Mode Ansible Automation + +This directory contains Ansible playbooks and configuration for automating the deployment and management of the Aligned Layer aggregation mode infrastructure. + +## Table of Contents + +- [Overview](#overview) +- [Architecture](#architecture) +- [Prerequisites](#prerequisites) +- [Initial Setup](#initial-setup) +- [Deployment](#deployment) +- [Redeployment](#redeployment) +- [Service Management](#service-management) +- [Troubleshooting](#troubleshooting) +- [Advanced Usage](#advanced-usage) + +## Overview + +The Ansible automation deploys a complete aggregation mode stack consisting of: + +1. **PostgreSQL Auto-Failover Cluster** (3 servers) + - 1 Monitor node (EC2) + - 2 Data nodes (Primary + Secondary) with automatic failover (Scaleway Elastic Metal) + - Password authentication with scram-sha-256 + +2. **Gateway Service** (2 servers) + - Rust-based gateway with TLS support + - Runs on port 8080 (non-TLS) and port 443 (TLS) + - Systemd service with automatic restart + +3. **Poller Service** (2 servers, colocated with gateway) + - Payment poller service + - User-level systemd service + +4. **Metrics Stack** (1 server) + - Prometheus for metrics collection + - Grafana for visualization + - 90-day retention + +5. **Task Sender** (1 server) + - Automated proof submission service + - Runs continuously in tmux session + - Configurable interval and proof files + +## Architecture + +``` +┌─────────────────────────────────────────────────────────────────┐ +│ Tailscale VPN │ +│ (100.64.0.0/10) │ +├─────────────────────────────────────────────────────────────────┤ +│ │ +│ ┌──────────────┐ ┌──────────────┐ ┌──────────────┐ │ +│ │ PG Monitor │ │ PG Primary │ │ PG Secondary │ │ +│ │ (EC2) │ │ (Scaleway) │ │ (Scaleway) │ │ +│ └──────┬───────┘ └──────┬───────┘ └──────┬───────┘ │ +│ │ │ │ │ +│ └──────────────────┴──────────────────┘ │ +│ pg_auto_failover │ +│ │ +│ ┌────────────────────────┐ ┌────────────────────────┐ │ +│ │ Gateway Primary │ │ Gateway Secondary │ │ +│ │ ├─ Gateway (8080+443)│ │ ├─ Gateway (8080+443) │ │ +│ │ └─ Poller │ │ └─ Poller │ │ +│ └────────────────────────┘ └────────────────────────┘ │ +│ │ +│ ┌────────────────────────┐ │ +│ │ Metrics Server │ │ +│ │ ├─ Prometheus (9090) │ │ +│ │ └─ Grafana (3000) │ │ +│ └────────────────────────┘ │ +│ │ +│ ┌────────────────────────┐ │ +│ │ Task Sender │ │ +│ │ (tmux session) │ │ +│ └────────────────────────┘ │ +│ │ +└─────────────────────────────────────────────────────────────────┘ +``` + +## Prerequisites + +### Local Machine + +1. **Ansible** (version 2.9 or higher) + ```bash + pip install ansible + ``` + +2. **SSH access** to all servers via Tailscale + - Ensure you're connected to the Tailscale VPN + - SSH keys configured for `admin` user on all servers + +3. **TLS Certificates** for Gateway + - Valid TLS certificate and key files + - Can be Let's Encrypt, CA-issued, or self-signed + +### Remote Servers + +All servers are provisioned via Terraform and connected via Tailscale VPN. They should have: +- Ubuntu/Debian-based OS +- `admin` user with sudo privileges +- `app` user for application services (gateway servers) +- `postgres` user will be created automatically for PostgreSQL services +- Tailscale VPN configured + +## Initial Setup + +All configuration is consolidated into environment-specific files with predefined values. You only need to fill in sensitive values at the top of each config file. + +### Configure Environment + +Edit the config file for your environment: +- **Hoodi**: `playbooks/ini/config-hoodi.ini` +- **Mainnet**: `playbooks/ini/config-mainnet.ini` + +All non-sensitive values are already pre-filled. Fill in the required values at the top of the file: + +```ini +# ============================================ +# REQUIRED: Sensitive Values (fill these in) +# ============================================ +# Database password (used by postgres, gateway, and poller) +db_password=your_secure_password_here + +# Grafana read-only database user password +grafana_postgres_password=your_secure_password_here + +# TLS certificates (local paths to copy from) +tls_cert_source_path=/path/to/your/cert.pem +tls_key_source_path=/path/to/your/key.pem + +# Grafana admin password +grafana_admin_password=your_grafana_admin_password + +# Task sender private key (for sending proofs) +task_sender_private_key=0xYourPrivateKeyHere +``` + +## Deployment + +### Full Stack Deployment + +To deploy everything in one command: + +```bash +# For Hoodi +make agg_mode_deploy_all ENV=hoodi + +# For Mainnet +make agg_mode_deploy_all ENV=mainnet +``` + +This will: +1. Deploy PostgreSQL cluster (monitor, primary, secondary) +2. Run database migrations +3. Deploy gateway and poller on both servers +4. Deploy Prometheus and Grafana +5. Deploy task sender + +### Step-by-Step Deployment + +For more control, deploy each component separately: + +#### 1. Deploy PostgreSQL Cluster + +```bash +# For Hoodi +make postgres_deploy ENV=hoodi + +# For Mainnet +make postgres_deploy ENV=mainnet +``` + +This will: +- Deploy monitor with scram-sha-256 auth +- Set password for autoctl_node user +- Deploy primary and secondary nodes +- Configure replication with password auth +- Run database migrations + +**Verify cluster status:** +```bash +# For Hoodi +make postgres_status ENV=hoodi + +# For Mainnet +make postgres_status ENV=mainnet +``` + +Expected output: +``` + Name | Node | Host:Port | TLI: LSN | Connection | Reported State | Assigned State +----------+-------+--------------------+----------------+--------------+---------------------+-------------------- +monitor | 1 | 100.x.x.x:5432 | | | | +node_1 | 2 | 100.x.x.x:5432 | 1: 0/... | read-write | primary | primary +node_2 | 3 | 100.x.x.x:5432 | 1: 0/... | read-only | secondary | secondary +``` + +#### 2. Deploy Gateway & Poller + +```bash +# For Hoodi +make gateway_deploy ENV=hoodi +make gateway_primary_deploy ENV=hoodi +make gateway_secondary_deploy ENV=hoodi +make gateway_deploy ENV=hoodi FORCE_REBUILD=true + +# For Mainnet +make gateway_deploy ENV=mainnet +make gateway_primary_deploy ENV=mainnet +make gateway_secondary_deploy ENV=mainnet +make gateway_deploy ENV=mainnet FORCE_REBUILD=true +``` + +**Note:** By default, the deployment is idempotent and skips building if the binary already exists. Use `FORCE_REBUILD=true` to always rebuild from the latest code in the repository. + +**Verify gateway is running:** +```bash +ssh app@agg-mode-hoodi-gateway-1 "sudo systemctl status gateway" +ssh app@agg-mode-hoodi-gateway-1 "systemctl --user status poller" +``` + +**Test endpoint:** +```bash +curl -k https://agg-mode-hoodi-gateway-1/ +``` + +#### 3. Deploy Metrics Stack + +```bash +# For Hoodi +make metrics_deploy ENV=hoodi +make prometheus_deploy ENV=hoodi +make grafana_deploy ENV=hoodi + +# For Mainnet +make metrics_deploy ENV=mainnet +make prometheus_deploy ENV=mainnet +make grafana_deploy ENV=mainnet +``` + +**Access dashboards:** +- Prometheus: `http://:9090` +- Grafana: `http://:3000` (default credentials: admin/admin) + +#### 4. Deploy Task Sender + +```bash +# For Hoodi +make task_sender_deploy ENV=hoodi + +# For Mainnet +make task_sender_deploy ENV=mainnet +``` + +The task sender runs in a tmux session and continuously sends proofs to the network at the configured interval (default: 1 hour). + +**Automatic Deposit Check:** + +The deployment automatically: +1. Derives the wallet address from the configured private key +2. Checks if the address has an active subscription on the payment contract +3. If not subscribed or expired, automatically deposits 0.0035 ETH to the payment contract +4. Waits for transaction confirmation before starting the task sender + +**Requirements:** +- The account must have sufficient ETH for: + - Payment deposit: **0.0035 ETH** + - Gas fees: ~**0.001 ETH** (estimated) +- Foundry (cast) will be automatically installed if not present + +**Verify task sender is running:** +```bash +# For Hoodi +make task_sender_status ENV=hoodi + +# For Mainnet +make task_sender_status ENV=mainnet +``` + +**View task sender logs:** +```bash +# For Hoodi +make task_sender_logs ENV=hoodi +ssh app@agg-mode-hoodi-sender 'tmux attach -t task_sender' + +# For Mainnet +make task_sender_logs ENV=mainnet +ssh app@agg-mode-mainnet-sender 'tmux attach -t task_sender' + +# Press Ctrl+B then D to detach without stopping +``` + +## Redeployment + +### Idempotent Deployment + +Idempotent deployment skips building if the binary already exists. Use this when you only want to update configuration files. + +```bash +# For Hoodi +make gateway_deploy ENV=hoodi + +# For Mainnet +make gateway_deploy ENV=mainnet +``` + +### Force Rebuild + +Force rebuild always rebuilds binaries from the latest code, even if they already exist. Use this when you want to deploy code changes. + +```bash +# For Hoodi +make gateway_deploy ENV=hoodi FORCE_REBUILD=true +make gateway_primary_deploy ENV=hoodi FORCE_REBUILD=true +make gateway_secondary_deploy ENV=hoodi FORCE_REBUILD=true + +# For Mainnet +make gateway_deploy ENV=mainnet FORCE_REBUILD=true +make gateway_primary_deploy ENV=mainnet FORCE_REBUILD=true +make gateway_secondary_deploy ENV=mainnet FORCE_REBUILD=true +``` + +This will: +1. Pull latest code from the configured branch (staging for hoodi, main for mainnet) +2. Delete existing binaries +3. Rebuild gateway and poller from source + +### Migrations + +To run database migrations: + +```bash +# For Hoodi +make postgres_migrations ENV=hoodi + +# For Mainnet +make postgres_migrations ENV=mainnet +``` + +### Task Sender + +To redeploy the task sender: + +```bash +# For Hoodi +make task_sender_deploy ENV=hoodi + +# For Mainnet +make task_sender_deploy ENV=mainnet +``` + +### Metrics Stack + +To redeploy the metrics stack (Prometheus and Grafana): + +```bash +# For Hoodi +make metrics_deploy ENV=hoodi +make prometheus_deploy ENV=hoodi +make grafana_deploy ENV=hoodi + +# For Mainnet +make metrics_deploy ENV=mainnet +make prometheus_deploy ENV=mainnet +make grafana_deploy ENV=mainnet +``` + +### Manual Update + +If you prefer to update manually: + +**Gateway:** +```bash +# Hoodi +ssh app@agg-mode-hoodi-gateway-1 +cd ~/repos/gateway/aligned_layer +git pull origin staging +cargo install --path aggregation_mode/gateway --bin gateway --features tls --locked + +# Mainnet +ssh app@agg-mode-mainnet-gateway-1 +cd ~/repos/gateway/aligned_layer +git pull origin staging +cargo install --path aggregation_mode/gateway --bin gateway --features tls --locked +``` + +**Poller:** +```bash +# Hoodi +ssh app@agg-mode-hoodi-gateway-1 +cd ~/repos/poller/aligned_layer +git pull origin staging +cargo install --path aggregation_mode/payments_poller --bin payments_poller --locked + +# Mainnet +ssh app@agg-mode-mainnet-gateway-1 +cd ~/repos/poller/aligned_layer +git pull origin staging +cargo install --path aggregation_mode/payments_poller --bin payments_poller --locked +``` + +**Task Sender:** +```bash +# Hoodi +ssh app@agg-mode-hoodi-sender +cd ~/repos/sender/aligned_layer +git pull origin staging +cargo install --path aggregation_mode/cli --bin agg_mode_cli --locked + +# Mainnet +ssh app@agg-mode-mainnet-sender +cd ~/repos/sender/aligned_layer +git pull origin staging +cargo install --path aggregation_mode/cli --bin agg_mode_cli --locked +``` + +**Prometheus:** +```bash +# Hoodi +ssh admin@agg-mode-hoodi-metrics +# Update prometheus.yaml configuration manually +systemctl --user restart prometheus + +# Mainnet +ssh admin@agg-mode-mainnet-metrics +# Update prometheus.yaml configuration manually +systemctl --user restart prometheus +``` + +**Grafana:** +```bash +# Hoodi +ssh admin@agg-mode-hoodi-metrics +sudo systemctl restart grafana-server + +# Mainnet +ssh admin@agg-mode-mainnet-metrics +sudo systemctl restart grafana-server +``` + +## Service Management + +### Check Service Status + +**PostgreSQL Cluster:** +```bash +# For Hoodi +make postgres_status ENV=hoodi + +# For Mainnet +make postgres_status ENV=mainnet +``` + +**Gateway:** +```bash +# For Hoodi +ssh app@agg-mode-hoodi-gateway-1 "sudo systemctl status gateway" +ssh app@agg-mode-hoodi-gateway-1 "sudo journalctl -u gateway -n 50" + +# For Mainnet +ssh app@agg-mode-mainnet-gateway-1 "sudo systemctl status gateway" +ssh app@agg-mode-mainnet-gateway-1 "sudo journalctl -u gateway -n 50" +``` + +**Poller:** +```bash +# For Hoodi +ssh app@agg-mode-hoodi-gateway-1 "systemctl --user status poller" +ssh app@agg-mode-hoodi-gateway-1 "journalctl --user -u poller -n 50" + +# For Mainnet +ssh app@agg-mode-mainnet-gateway-1 "systemctl --user status poller" +ssh app@agg-mode-mainnet-gateway-1 "journalctl --user -u poller -n 50" +``` + +**Prometheus:** +```bash +# For Hoodi +ssh admin@agg-mode-hoodi-metrics "systemctl --user status prometheus" + +# For Mainnet +ssh admin@agg-mode-mainnet-metrics "systemctl --user status prometheus" +``` + +**Grafana:** +```bash +# For Hoodi +ssh admin@agg-mode-hoodi-metrics "sudo systemctl status grafana-server" + +# For Mainnet +ssh admin@agg-mode-mainnet-metrics "sudo systemctl status grafana-server" +``` + +**Task Sender:** +```bash +# For Hoodi +make task_sender_status ENV=hoodi +ssh app@agg-mode-hoodi-sender "tmux has-session -t task_sender && echo 'Running' || echo 'Not running'" + +# For Mainnet +make task_sender_status ENV=mainnet +ssh app@agg-mode-mainnet-sender "tmux has-session -t task_sender && echo 'Running' || echo 'Not running'" +``` + +### View Logs + +**Gateway:** +```bash +# For Hoodi +ssh app@agg-mode-hoodi-gateway-1 "sudo journalctl -u gateway -f" + +# For Mainnet +ssh app@agg-mode-mainnet-gateway-1 "sudo journalctl -u gateway -f" +``` + +**Poller:** +```bash +# For Hoodi +ssh app@agg-mode-hoodi-gateway-1 "journalctl --user -u poller -f" + +# For Mainnet +ssh app@agg-mode-mainnet-gateway-1 "journalctl --user -u poller -f" +``` + +**PostgreSQL:** +```bash +# For Hoodi +ssh admin@agg-mode-hoodi-postgres-1 "sudo journalctl -u pgautofailover -f" + +# For Mainnet +ssh admin@agg-mode-mainnet-postgres-1 "sudo journalctl -u pgautofailover -f" +``` + +**Task Sender:** +```bash +# For Hoodi +ssh app@agg-mode-hoodi-sender 'tmux attach -t task_sender' +ssh app@agg-mode-hoodi-sender 'tmux capture-pane -t task_sender -p' + +# For Mainnet +ssh app@agg-mode-mainnet-sender 'tmux attach -t task_sender' +ssh app@agg-mode-mainnet-sender 'tmux capture-pane -t task_sender -p' + +# Press Ctrl+B then D to detach +``` + +## Troubleshooting + +### PostgreSQL Issues + +**Problem: Node fails to join cluster** + +Check monitor logs: +```bash +# For Hoodi +ssh admin@agg-mode-hoodi-postgres-monitor "sudo journalctl -u pgautofailover -n 100" + +# For Mainnet +ssh admin@agg-mode-mainnet-postgres-monitor "sudo journalctl -u pgautofailover -n 100" +``` + +Check node logs: +```bash +# For Hoodi +ssh admin@agg-mode-hoodi-postgres-1 "sudo journalctl -u pgautofailover -n 100" + +# For Mainnet +ssh admin@agg-mode-mainnet-postgres-1 "sudo journalctl -u pgautofailover -n 100" +``` + +**Problem: Password authentication fails** + +Verify `db_password` is set correctly in your environment config file (`config-hoodi.ini` or `config-mainnet.ini`). + +Check pg_hba.conf: +```bash +# For Hoodi +ssh admin@agg-mode-hoodi-postgres-1 "sudo -u postgres cat /var/lib/postgresql/node/pg_hba.conf" + +# For Mainnet +ssh admin@agg-mode-mainnet-postgres-1 "sudo -u postgres cat /var/lib/postgresql/node/pg_hba.conf" +``` + +Should contain: +``` +host all all 100.64.0.0/10 scram-sha-256 +``` + +### Gateway Issues + +**Problem: Gateway won't start** + +Check logs for errors: +```bash +# For Hoodi +ssh app@agg-mode-hoodi-gateway-1 "sudo journalctl -u gateway -n 100" + +# For Mainnet +ssh app@agg-mode-mainnet-gateway-1 "sudo journalctl -u gateway -n 100" +``` + +Common issues: +- Missing TLS certificates → Check paths in `config-{{ env }}.ini` (tls_cert_source_path, tls_key_source_path) +- Database connection failed → Verify `db_password` in `config-{{ env }}.ini` +- Port 443 already in use → Check with `sudo lsof -i :443` + +**Problem: TLS certificate errors** + +Verify certificates exist: +```bash +# For Hoodi +ssh app@agg-mode-hoodi-gateway-1 "ls -la ~/.ssl/" + +# For Mainnet +ssh app@agg-mode-mainnet-gateway-1 "ls -la ~/.ssl/" +``` + +Check certificate validity: +```bash +# For Hoodi +ssh app@agg-mode-hoodi-gateway-1 "openssl x509 -in ~/.ssl/cert.pem -text -noout" + +# For Mainnet +ssh app@agg-mode-mainnet-gateway-1 "openssl x509 -in ~/.ssl/cert.pem -text -noout" +``` + +### Poller Issues + +**Problem: Poller not syncing blocks** + +Check logs: +```bash +# For Hoodi +ssh app@agg-mode-hoodi-gateway-1 "journalctl --user -u poller -n 100" + +# For Mainnet +ssh app@agg-mode-mainnet-gateway-1 "journalctl --user -u poller -n 100" +``` + +Verify RPC connectivity: +```bash +# For Hoodi +ssh app@agg-mode-hoodi-gateway-1 "curl -X POST -H 'Content-Type: application/json' --data '{\"jsonrpc\":\"2.0\",\"method\":\"eth_blockNumber\",\"params\":[],\"id\":1}' https://aligned-hoodi-rpc-geth.tail665ae.ts.net" + +# For Mainnet +ssh app@agg-mode-mainnet-gateway-1 "curl -X POST -H 'Content-Type: application/json' --data '{\"jsonrpc\":\"2.0\",\"method\":\"eth_blockNumber\",\"params\":[],\"id\":1}' https://aligned-mainnet-rpc-1.tail665ae.ts.net" +``` + +### Metrics Issues + +**Problem: Prometheus not scraping targets** + +Check Prometheus logs: +```bash +# For Hoodi +ssh admin@agg-mode-hoodi-metrics "journalctl --user -u prometheus -n 100" + +# For Mainnet +ssh admin@agg-mode-mainnet-metrics "journalctl --user -u prometheus -n 100" +``` + +Verify targets are reachable from metrics server: +```bash +# For Hoodi +ssh admin@agg-mode-hoodi-metrics "curl http://agg-mode-hoodi-gateway-1:9094/metrics" + +# For Mainnet +ssh admin@agg-mode-mainnet-metrics "curl http://agg-mode-mainnet-gateway-1:9094/metrics" +``` + +Check Prometheus config: +```bash +# For Hoodi +ssh admin@agg-mode-hoodi-metrics "cat ~/config/prometheus.yaml" + +# For Mainnet +ssh admin@agg-mode-mainnet-metrics "cat ~/config/prometheus.yaml" +``` + +### Task Sender Issues + +**Problem: Task sender not running** + +Check if tmux session exists: +```bash +# For Hoodi +ssh app@agg-mode-hoodi-sender "tmux list-sessions" + +# For Mainnet +ssh app@agg-mode-mainnet-sender "tmux list-sessions" +``` + +If missing, redeploy: +```bash +# For Hoodi +make task_sender_deploy ENV=hoodi + +# For Mainnet +make task_sender_deploy ENV=mainnet +``` + +**Problem: Task sender crashes or exits** + +Check logs for errors: +```bash +# For Hoodi +ssh app@agg-mode-hoodi-sender 'tmux capture-pane -t task_sender -p -S -100' + +# For Mainnet +ssh app@agg-mode-mainnet-sender 'tmux capture-pane -t task_sender -p -S -100' +``` + +Common issues: +- Invalid private key → Check `task_sender_private_key` in `config-{{ env }}.ini` +- Missing proof/vk files → Verify files exist: `task_sender_proof_path`, `task_sender_vk_path` +- Network connectivity → Test RPC: `curl https://aligned-hoodi-rpc-geth.tail665ae.ts.net` (Hoodi) or `curl https://aligned-mainnet-rpc-1.tail665ae.ts.net` (Mainnet) +- Insufficient balance → Check account has ETH for gas fees + +**Problem: Proofs not being submitted** + +Check interval configuration: +```bash +# For Hoodi +ssh app@agg-mode-hoodi-sender "cat ~/repos/sender/aligned_layer/scripts/.agg_mode.task_sender.env" + +# For Mainnet +ssh app@agg-mode-mainnet-sender "cat ~/repos/sender/aligned_layer/scripts/.agg_mode.task_sender.env" +``` + +Verify `INTERVAL_HOURS` is set correctly (default: 1 hour). Attach to session to see live activity: +```bash +# For Hoodi +ssh app@agg-mode-hoodi-sender 'tmux attach -t task_sender' + +# For Mainnet +ssh app@agg-mode-mainnet-sender 'tmux attach -t task_sender' +``` + +**Problem: Deployment fails with insufficient balance** + +The automatic deposit check requires the account to have at least **0.0045 ETH** (0.0035 for deposit + ~0.001 for gas). + +Check account balance: +```bash +# For Hoodi +ssh app@agg-mode-hoodi-sender +export PATH=$HOME/.foundry/bin:$PATH +cast balance --rpc-url https://aligned-hoodi-rpc-geth.tail665ae.ts.net + +# For Mainnet +ssh app@agg-mode-mainnet-sender +export PATH=$HOME/.foundry/bin:$PATH +cast balance --rpc-url https://aligned-mainnet-rpc-1.tail665ae.ts.net +``` + +If balance is insufficient, send ETH to the account and redeploy: +```bash +# For Hoodi +make task_sender_deploy ENV=hoodi + +# For Mainnet +make task_sender_deploy ENV=mainnet +``` + +**Problem: Automatic deposit fails** + +If the automatic deposit fails during deployment, check the Ansible output for error messages. Common issues: +- Insufficient ETH balance in the account +- RPC connection issues +- Gas price too high + +To manually deposit after fixing the issue: +```bash +ssh app@agg-mode-hoodi-sender +export PATH=$HOME/.cargo/bin:$PATH + +# For Hoodi +agg_mode_cli deposit \ + --network hoodi \ + --rpc-url https://aligned-hoodi-rpc-geth.tail665ae.ts.net \ + --private-key + +# For Mainnet +agg_mode_cli deposit \ + --network mainnet \ + --rpc-url https://aligned-mainnet-rpc-1.tail665ae.ts.net \ + --private-key +``` + +### General Debugging + +**Check Tailscale connectivity:** +```bash +tailscale status +``` + +**Test SSH access to servers:** +```bash +# For Hoodi +ssh admin@agg-mode-hoodi-postgres-monitor "echo 'Connection successful'" +ssh app@agg-mode-hoodi-gateway-1 "echo 'Connection successful'" + +# For Mainnet +ssh admin@agg-mode-mainnet-postgres-monitor "echo 'Connection successful'" +ssh app@agg-mode-mainnet-gateway-1 "echo 'Connection successful'" +``` + +**Verify Ansible inventory:** +```bash +# For Hoodi +ansible-inventory -i infra/aggregation_mode/ansible/hoodi-inventory.yaml --list + +# For Mainnet +ansible-inventory -i infra/aggregation_mode/ansible/mainnet-inventory.yaml --list +``` + +## Advanced Usage + +### Running Individual Playbooks + +You can run any playbook directly with ansible-playbook: + +```bash +# Deploy only postgres monitor (Hoodi) +ansible-playbook infra/aggregation_mode/ansible/playbooks/pg_monitor.yaml \ + -i infra/aggregation_mode/ansible/hoodi-inventory.yaml \ + -e "host=postgres_monitor" \ + -e "env=hoodi" + +# Deploy only postgres monitor (Mainnet) +ansible-playbook infra/aggregation_mode/ansible/playbooks/pg_monitor.yaml \ + -i infra/aggregation_mode/ansible/mainnet-inventory.yaml \ + -e "host=postgres_monitor" \ + -e "env=mainnet" + +# Deploy only gateway (no poller) - Hoodi +ansible-playbook infra/aggregation_mode/ansible/playbooks/gateway.yaml \ + -i infra/aggregation_mode/ansible/hoodi-inventory.yaml \ + -e "host=gateway_primary" \ + -e "env=hoodi" + +# Deploy only gateway (no poller) - Mainnet +ansible-playbook infra/aggregation_mode/ansible/playbooks/gateway.yaml \ + -i infra/aggregation_mode/ansible/mainnet-inventory.yaml \ + -e "host=gateway_primary" \ + -e "env=mainnet" + +# Deploy gateway with forced rebuild (Hoodi) +ansible-playbook infra/aggregation_mode/ansible/playbooks/gateway.yaml \ + -i infra/aggregation_mode/ansible/hoodi-inventory.yaml \ + -e "host=gateway_primary" \ + -e "env=hoodi" \ + -e "force_rebuild=true" + +# Deploy gateway with forced rebuild (Mainnet) +ansible-playbook infra/aggregation_mode/ansible/playbooks/gateway.yaml \ + -i infra/aggregation_mode/ansible/mainnet-inventory.yaml \ + -e "host=gateway_primary" \ + -e "env=mainnet" \ + -e "force_rebuild=true" +``` + +### Changing Configuration + +1. Update INI files in `playbooks/ini/` +2. Redeploy the affected service: + ```bash + # For Hoodi + make gateway_deploy ENV=hoodi + make postgres_deploy ENV=hoodi + + # For Mainnet + make gateway_deploy ENV=mainnet + make postgres_deploy ENV=mainnet + ``` + +### Rotating Passwords + +1. Update password fields in your environment config file (`config-hoodi.ini` or `config-mainnet.ini`): + - `db_password` (used by postgres, gateway, and poller) + - `grafana_postgres_password` (separate read-only user) +2. Run password update on PostgreSQL: + ```bash + # For Hoodi + ssh admin@agg-mode-hoodi-postgres-monitor "sudo -u postgres psql -d pg_auto_failover -c \"ALTER USER autoctl_node PASSWORD 'new_password'\"" + # For Mainnet + ssh admin@agg-mode-mainnet-postgres-monitor "sudo -u postgres psql -d pg_auto_failover -c \"ALTER USER autoctl_node PASSWORD 'new_password'\"" + ``` +3. Redeploy gateway and metrics: + ```bash + # For Hoodi + make gateway_deploy ENV=hoodi + make metrics_deploy ENV=hoodi + + # For Mainnet + make gateway_deploy ENV=mainnet + make metrics_deploy ENV=mainnet + ``` + +## File Structure + +``` +infra/aggregation_mode/ansible/ +├── README.md # This file +├── hoodi-inventory.yaml # Hoodi environment inventory +├── mainnet-inventory.yaml # Mainnet environment inventory +└── playbooks/ + ├── ini/ # Configuration files + │ ├── config-hoodi.ini # Hoodi config (tracked, fill in passwords) + │ └── config-mainnet.ini # Mainnet config (tracked, fill in passwords) + ├── templates/ # Jinja2 templates + │ ├── config-files/ # Service config templates + │ ├── services/ # Systemd service templates + │ ├── sudoers/ # Sudoers templates + │ ├── prometheus/ # Prometheus config templates + │ └── grafana/ # Grafana config templates + ├── rust.yaml # Rust installation + ├── pg_autofailover_common.yaml # PostgreSQL + pg_auto_failover setup + ├── pg_monitor.yaml # PostgreSQL monitor deployment + ├── pg_node.yaml # PostgreSQL node deployment + ├── postgres_migrations.yaml # Database migrations + ├── gateway.yaml # Gateway deployment + ├── poller.yaml # Poller deployment + ├── prometheus_agg_mode.yaml # Prometheus deployment + ├── grafana_agg_mode.yaml # Grafana deployment + ├── task_sender.yaml # Task sender deployment + ├── postgres_cluster.yaml # Postgres orchestration + ├── gateway_stack.yaml # Gateway + poller orchestration + ├── metrics_stack.yaml # Metrics orchestration + └── deploy_all.yaml # Full stack orchestration +``` + +## Security Notes + +1. **Passwords**: Config files are tracked in git with empty password fields. Fill in passwords locally. Use `git update-index --assume-unchanged config-*.ini` after filling passwords to prevent accidentally committing them. + +2. **Private Keys**: The `task_sender_private_key` field must be filled with a valid Ethereum private key. Never commit this value to git. The playbook sets appropriate permissions (0600) on the environment file. + +3. **TLS Certificates**: Keep private keys secure. The playbooks set appropriate permissions (0600). + +4. **SSH Access**: All servers are only accessible via Tailscale VPN (100.64.0.0/10). + +5. **PostgreSQL**: Uses scram-sha-256 password authentication, not trust mode. + +6. **Firewall**: UFW is configured on all servers with deny-by-default policy. + +## Support + +For issues or questions: +- Check the [Troubleshooting](#troubleshooting) section +- Review logs on the affected server +- Contact the infrastructure team + +## References + +- [PostgreSQL Auto-Failover Documentation](https://pg-auto-failover.readthedocs.io/) +- [Ansible Documentation](https://docs.ansible.com/) +- [Prometheus Documentation](https://prometheus.io/docs/) +- [Grafana Documentation](https://grafana.com/docs/) diff --git a/infra/aggregation_mode/ansible/hoodi-inventory.yaml b/infra/aggregation_mode/ansible/hoodi-inventory.yaml new file mode 100644 index 000000000..f08c3dd5d --- /dev/null +++ b/infra/aggregation_mode/ansible/hoodi-inventory.yaml @@ -0,0 +1,83 @@ +# PostgreSQL Monitor +postgres_monitor: + hosts: + agg-mode-hoodi-postgres-monitor: + ansible_host: agg-mode-hoodi-postgres-monitor + admin_user: admin + ansible_user: admin + ansible_python_interpreter: /usr/bin/python3 + +# PostgreSQL Primary +postgres_primary: + hosts: + agg-mode-hoodi-postgres-1: + ansible_host: agg-mode-hoodi-postgres-1 + admin_user: admin + ansible_user: admin + ansible_python_interpreter: /usr/bin/python3 + +# PostgreSQL Secondary +postgres_secondary: + hosts: + agg-mode-hoodi-postgres-2: + ansible_host: agg-mode-hoodi-postgres-2 + admin_user: admin + ansible_user: admin + ansible_python_interpreter: /usr/bin/python3 + +# PostgreSQL Cluster (all postgres nodes) +postgres_cluster: + children: + postgres_monitor: + postgres_primary: + postgres_secondary: + +# Gateway Primary +gateway_primary: + hosts: + agg-mode-hoodi-gateway-1: + ansible_host: agg-mode-hoodi-gateway-1 + admin_user: admin + ansible_user: app + ansible_python_interpreter: /usr/bin/python3 + +# Gateway Secondary +gateway_secondary: + hosts: + agg-mode-hoodi-gateway-2: + ansible_host: agg-mode-hoodi-gateway-2 + admin_user: admin + ansible_user: app + ansible_python_interpreter: /usr/bin/python3 + +# Gateway Cluster (all gateway nodes) +gateway_cluster: + children: + gateway_primary: + gateway_secondary: + +# Metrics Server +metrics: + hosts: + agg-mode-hoodi-metrics: + ansible_host: agg-mode-hoodi-metrics + admin_user: admin + ansible_user: admin + ansible_python_interpreter: /usr/bin/python3 + +# Task Sender +task_sender: + hosts: + agg-mode-hoodi-sender: + ansible_host: agg-mode-hoodi-sender + admin_user: admin + ansible_user: app + ansible_python_interpreter: /usr/bin/python3 + +# All aggregation mode servers +aggregation_mode: + children: + postgres_cluster: + gateway_cluster: + metrics: + task_sender: diff --git a/infra/aggregation_mode/ansible/mainnet-inventory.yaml b/infra/aggregation_mode/ansible/mainnet-inventory.yaml new file mode 100644 index 000000000..48ca062c8 --- /dev/null +++ b/infra/aggregation_mode/ansible/mainnet-inventory.yaml @@ -0,0 +1,83 @@ +# PostgreSQL Monitor +postgres_monitor: + hosts: + agg-mode-mainnet-postgres-monitor: + ansible_host: agg-mode-mainnet-postgres-monitor + admin_user: admin + ansible_user: admin + ansible_python_interpreter: /usr/bin/python3 + +# PostgreSQL Primary +postgres_primary: + hosts: + agg-mode-mainnet-postgres-1: + ansible_host: agg-mode-mainnet-postgres-1 + admin_user: admin + ansible_user: admin + ansible_python_interpreter: /usr/bin/python3 + +# PostgreSQL Secondary +postgres_secondary: + hosts: + agg-mode-mainnet-postgres-2: + ansible_host: agg-mode-mainnet-postgres-2 + admin_user: admin + ansible_user: admin + ansible_python_interpreter: /usr/bin/python3 + +# PostgreSQL Cluster (all postgres nodes) +postgres_cluster: + children: + postgres_monitor: + postgres_primary: + postgres_secondary: + +# Gateway Primary +gateway_primary: + hosts: + agg-mode-mainnet-gateway-1: + ansible_host: agg-mode-mainnet-gateway-1 + admin_user: admin + ansible_user: app + ansible_python_interpreter: /usr/bin/python3 + +# Gateway Secondary +gateway_secondary: + hosts: + agg-mode-mainnet-gateway-2: + ansible_host: agg-mode-mainnet-gateway-2 + admin_user: admin + ansible_user: app + ansible_python_interpreter: /usr/bin/python3 + +# Gateway Cluster (all gateway nodes) +gateway_cluster: + children: + gateway_primary: + gateway_secondary: + +# Metrics Server +metrics: + hosts: + agg-mode-mainnet-metrics: + ansible_host: agg-mode-mainnet-metrics + admin_user: admin + ansible_user: admin + ansible_python_interpreter: /usr/bin/python3 + +# Task Sender +task_sender: + hosts: + agg-mode-mainnet-sender: + ansible_host: agg-mode-mainnet-sender + admin_user: admin + ansible_user: app + ansible_python_interpreter: /usr/bin/python3 + +# All aggregation mode servers +aggregation_mode: + children: + postgres_cluster: + gateway_cluster: + metrics: + task_sender: diff --git a/infra/aggregation_mode/ansible/playbooks/deploy_all.yaml b/infra/aggregation_mode/ansible/playbooks/deploy_all.yaml new file mode 100644 index 000000000..51e23a8a9 --- /dev/null +++ b/infra/aggregation_mode/ansible/playbooks/deploy_all.yaml @@ -0,0 +1,28 @@ +- name: Deploy PostgreSQL Cluster + ansible.builtin.import_playbook: postgres_cluster.yaml + vars: + env: "{{ env }}" + +- name: Deploy Gateway and Poller on Primary + ansible.builtin.import_playbook: gateway_stack.yaml + vars: + host: gateway_primary + env: "{{ env }}" + +- name: Deploy Gateway and Poller on Secondary + ansible.builtin.import_playbook: gateway_stack.yaml + vars: + host: gateway_secondary + env: "{{ env }}" + +- name: Deploy Metrics Stack + ansible.builtin.import_playbook: metrics_stack.yaml + vars: + host: metrics + env: "{{ env }}" + +- name: Deploy Task Sender + ansible.builtin.import_playbook: task_sender.yaml + vars: + host: task_sender + env: "{{ env }}" diff --git a/infra/aggregation_mode/ansible/playbooks/gateway.yaml b/infra/aggregation_mode/ansible/playbooks/gateway.yaml new file mode 100644 index 000000000..98f73e418 --- /dev/null +++ b/infra/aggregation_mode/ansible/playbooks/gateway.yaml @@ -0,0 +1,153 @@ +- name: Import setup playbook + ansible.builtin.import_playbook: setup.yaml + vars: + host: "{{ host }}" + env: "{{ env }}" + +- name: Import rust playbook + ansible.builtin.import_playbook: rust.yaml + vars: + host: "{{ host }}" + env: "{{ env }}" + +- name: Gateway Setup + hosts: "{{ host }}" + + tasks: + - name: Set config file path + set_fact: + config_file: "ini/config-{{ env }}.ini" + + - name: Set config vars from INI file + set_fact: + git_branch: "{{ lookup('ini', 'git_branch', file=config_file) }}" + gateway_port: "{{ lookup('ini', 'gateway_port', file=config_file, default='8080') }}" + gateway_tls_enabled: "{{ lookup('ini', 'gateway_tls_enabled', file=config_file, default='true') }}" + gateway_tls_port: "{{ lookup('ini', 'gateway_tls_port', file=config_file, default='443') }}" + gateway_tls_cert_path: "{{ lookup('ini', 'gateway_tls_cert_path', file=config_file) }}" + gateway_tls_key_path: "{{ lookup('ini', 'gateway_tls_key_path', file=config_file) }}" + gateway_db_user: "{{ lookup('ini', 'db_user', file=config_file) }}" + gateway_db_password: "{{ lookup('ini', 'db_password', file=config_file) }}" + gateway_db_name: "{{ lookup('ini', 'db_name', file=config_file) }}" + gateway_postgres_primary: "{{ lookup('ini', 'gateway_postgres_primary', file=config_file) }}" + gateway_postgres_secondary: "{{ lookup('ini', 'gateway_postgres_secondary', file=config_file) }}" + gateway_postgres_port: "{{ lookup('ini', 'gateway_postgres_port', file=config_file, default='5432') }}" + gateway_eth_rpc_url: "{{ lookup('ini', 'gateway_eth_rpc_url', file=config_file) }}" + gateway_payment_service_address: "{{ lookup('ini', 'gateway_payment_service_address', file=config_file) }}" + gateway_network: "{{ lookup('ini', 'gateway_network', file=config_file) }}" + gateway_max_daily_proofs: "{{ lookup('ini', 'gateway_max_daily_proofs', file=config_file, default='100') }}" + gateway_metrics_port: "{{ lookup('ini', 'gateway_metrics_port', file=config_file, default='9094') }}" + tls_cert_source_path: "{{ lookup('ini', 'tls_cert_source_path', file=config_file) }}" + tls_key_source_path: "{{ lookup('ini', 'tls_key_source_path', file=config_file) }}" + no_log: true + + - name: Install required system packages + become: true + apt: + pkg: + - pkg-config + - libssl-dev + - build-essential + state: latest + update_cache: true + vars: + ansible_ssh_user: "{{ admin_user }}" + + - name: Create SSL directory + file: + path: /home/{{ ansible_user }}/.ssl + state: directory + mode: '0700' + owner: "{{ ansible_user }}" + group: "{{ ansible_user }}" + + - name: Copy TLS certificate + copy: + src: "{{ tls_cert_source_path }}" + dest: "{{ gateway_tls_cert_path }}" + mode: '0600' + owner: "{{ ansible_user }}" + group: "{{ ansible_user }}" + + - name: Copy TLS key + copy: + src: "{{ tls_key_source_path }}" + dest: "{{ gateway_tls_key_path }}" + mode: '0600' + owner: "{{ ansible_user }}" + group: "{{ ansible_user }}" + no_log: true + + - name: Clone aligned_layer repository + git: + repo: https://github.com/yetanotherco/aligned_layer.git + dest: /home/{{ ansible_user }}/repos/gateway/aligned_layer + version: "{{ git_branch }}" + update: yes + + - name: Remove existing gateway binary (if force rebuild) + file: + path: /home/{{ ansible_user }}/.cargo/bin/gateway + state: absent + when: force_rebuild | default(false) | bool + + - name: Build gateway with TLS + shell: | + export PATH=$HOME/.cargo/bin:$PATH + cargo install --path /home/{{ ansible_user }}/repos/gateway/aligned_layer/aggregation_mode/gateway --bin gateway --features tls --locked + args: + creates: /home/{{ ansible_user }}/.cargo/bin/gateway + + - name: Create config directory + file: + path: /home/{{ ansible_user }}/config + state: directory + mode: '0755' + owner: "{{ ansible_user }}" + group: "{{ ansible_user }}" + + - name: Template gateway config file + template: + src: config-files/config-agg-mode-gateway.yaml.j2 + dest: /home/{{ ansible_user }}/config/config-agg-mode-gateway.yaml + mode: '0644' + owner: "{{ ansible_user }}" + group: "{{ ansible_user }}" + + - name: Template gateway systemd service + become: true + template: + src: services/gateway.service.j2 + dest: /etc/systemd/system/gateway.service + mode: '0644' + vars: + ansible_ssh_user: "{{ admin_user }}" + + - name: Allow port 443 (TLS) through UFW + become: true + ufw: + rule: allow + port: '443' + proto: tcp + vars: + ansible_ssh_user: "{{ admin_user }}" + + - name: Create sudoers file for gateway service management + become: true + template: + src: sudoers/gateway-service.j2 + dest: /etc/sudoers.d/gateway-service + mode: '0440' + validate: 'visudo -cf %s' + vars: + ansible_ssh_user: "{{ admin_user }}" + + - name: Enable and start gateway service + become: true + systemd_service: + name: gateway + state: started + enabled: true + daemon_reload: true + vars: + ansible_ssh_user: "{{ admin_user }}" diff --git a/infra/aggregation_mode/ansible/playbooks/gateway_stack.yaml b/infra/aggregation_mode/ansible/playbooks/gateway_stack.yaml new file mode 100644 index 000000000..10ab7cc5b --- /dev/null +++ b/infra/aggregation_mode/ansible/playbooks/gateway_stack.yaml @@ -0,0 +1,11 @@ +- name: Deploy Gateway + ansible.builtin.import_playbook: gateway.yaml + vars: + host: "{{ host }}" + env: "{{ env }}" + +- name: Deploy Poller + ansible.builtin.import_playbook: poller.yaml + vars: + host: "{{ host }}" + env: "{{ env }}" diff --git a/infra/aggregation_mode/ansible/playbooks/grafana_agg_mode.yaml b/infra/aggregation_mode/ansible/playbooks/grafana_agg_mode.yaml new file mode 100644 index 000000000..019916511 --- /dev/null +++ b/infra/aggregation_mode/ansible/playbooks/grafana_agg_mode.yaml @@ -0,0 +1,141 @@ +- name: Import setup playbook + ansible.builtin.import_playbook: setup.yaml + vars: + host: "{{ host }}" + env: "{{ env }}" + +- name: Grafana Setup for Aggregation Mode + hosts: "{{ host }}" + + tasks: + - name: Set config file path + set_fact: + config_file: "ini/config-{{ env }}.ini" + + - name: Set config vars from INI file + set_fact: + grafana_admin_password: "{{ lookup('ini', 'grafana_admin_password', file=config_file) }}" + grafana_prometheus_url: "{{ lookup('ini', 'grafana_prometheus_url', file=config_file) }}" + grafana_rpc_url: "{{ lookup('ini', 'grafana_rpc_url', file=config_file) }}" + grafana_postgres_host: "{{ lookup('ini', 'grafana_postgres_host', file=config_file) }}" + grafana_postgres_port: "{{ lookup('ini', 'grafana_postgres_port', file=config_file, default='5432') }}" + grafana_postgres_db: "{{ lookup('ini', 'grafana_postgres_db', file=config_file) }}" + grafana_postgres_user: "{{ lookup('ini', 'grafana_postgres_user', file=config_file) }}" + grafana_postgres_password: "{{ lookup('ini', 'grafana_postgres_password', file=config_file) }}" + grafana_monitor_host: "{{ lookup('ini', 'grafana_monitor_host', file=config_file) }}" + grafana_monitor_port: "{{ lookup('ini', 'grafana_monitor_port', file=config_file, default='5432') }}" + grafana_monitor_db: "{{ lookup('ini', 'grafana_monitor_db', file=config_file, default='pg_auto_failover') }}" + no_log: true + + - name: Install required packages + become: true + apt: + pkg: + - apt-transport-https + - software-properties-common + - wget + - gnupg + state: latest + update_cache: true + vars: + ansible_ssh_user: "{{ admin_user }}" + + - name: Create apt keyrings directory + become: true + file: + path: /etc/apt/keyrings + state: directory + mode: '0755' + vars: + ansible_ssh_user: "{{ admin_user }}" + + - name: Download Grafana GPG key + become: true + get_url: + url: https://apt.grafana.com/gpg.key + dest: /tmp/grafana.key + mode: '0644' + vars: + ansible_ssh_user: "{{ admin_user }}" + + - name: Add Grafana GPG key + become: true + shell: | + gpg --dearmor < /tmp/grafana.key > /etc/apt/keyrings/grafana.gpg + args: + creates: /etc/apt/keyrings/grafana.gpg + vars: + ansible_ssh_user: "{{ admin_user }}" + + - name: Add Grafana APT repository + become: true + lineinfile: + path: /etc/apt/sources.list.d/grafana.list + line: "deb [signed-by=/etc/apt/keyrings/grafana.gpg] https://apt.grafana.com stable main" + create: yes + vars: + ansible_ssh_user: "{{ admin_user }}" + + - name: Update apt cache + become: true + apt: + update_cache: true + vars: + ansible_ssh_user: "{{ admin_user }}" + + - name: Install Grafana + become: true + apt: + name: grafana + state: latest + vars: + ansible_ssh_user: "{{ admin_user }}" + + - name: Template Grafana environment configuration + become: true + template: + src: grafana/grafana_env.j2 + dest: /etc/default/grafana-server-custom + mode: '0644' + vars: + ansible_ssh_user: "{{ admin_user }}" + + - name: Append custom environment to grafana-server defaults + become: true + shell: | + cat /etc/default/grafana-server-custom >> /etc/default/grafana-server + vars: + ansible_ssh_user: "{{ admin_user }}" + + - name: Set Grafana admin password + become: true + lineinfile: + path: /etc/grafana/grafana.ini + regexp: '^;?admin_password\s*=' + line: 'admin_password = {{ grafana_admin_password }}' + insertafter: '^\[security\]' + vars: + ansible_ssh_user: "{{ admin_user }}" + no_log: true + + - name: Copy provisioning directory + become: true + copy: + src: ../../../../grafana/provisioning/ + dest: /etc/grafana/provisioning/ + owner: grafana + group: grafana + mode: '0644' + directory_mode: '0755' + vars: + ansible_ssh_user: "{{ admin_user }}" + + - name: Enable and start Grafana service + become: true + systemd_service: + name: grafana-server + state: restarted + enabled: true + daemon_reload: true + vars: + ansible_ssh_user: "{{ admin_user }}" diff --git a/infra/aggregation_mode/ansible/playbooks/ini/config-hoodi.ini b/infra/aggregation_mode/ansible/playbooks/ini/config-hoodi.ini new file mode 100644 index 000000000..7e76643ac --- /dev/null +++ b/infra/aggregation_mode/ansible/playbooks/ini/config-hoodi.ini @@ -0,0 +1,98 @@ +[global] +# ============================================ +# Hoodi Environment Configuration +# ============================================ +# This file contains all configuration for the Hoodi environment. + +# ============================================ +# REQUIRED: Sensitive Values (fill these in) +# ============================================ +# Database password (used by postgres, gateway, and poller) +db_password= +# Grafana read-only database user password +grafana_postgres_password= +# TLS certificates (local paths to copy from) +tls_cert_source_path= +tls_key_source_path= +# Grafana admin password +grafana_admin_password= +# Task sender private key (for sending proofs) +task_sender_private_key= + +# ============================================ +# Environment +# ============================================ +environment=hoodi +git_branch=staging + +# ============================================ +# PostgreSQL Configuration +# ============================================ +postgres_monitor_hostname=agg-mode-hoodi-postgres-monitor +postgres_primary_hostname=agg-mode-hoodi-postgres-1 +postgres_secondary_hostname=agg-mode-hoodi-postgres-2 +db_name=agg_mode +db_user=autoctl_node + +# PostgreSQL Monitor +monitor_pgdata=/var/lib/postgresql/monitor +monitor_port=5432 + +# PostgreSQL Nodes +node_pgdata=/var/lib/postgresql/node +node_port=5432 +backup_dir=/var/lib/backup + +# ============================================ +# Gateway & Poller Configuration +# ============================================ +gateway_network=hoodi +gateway_max_daily_proofs=100 +gateway_payment_service_address=0x7222E0183cE1A96619d0c883e9bfc6b76D4e780e +gateway_eth_rpc_url=https://aligned-hoodi-rpc-geth.tail665ae.ts.net + +gateway_postgres_primary=agg-mode-hoodi-postgres-1 +gateway_postgres_secondary=agg-mode-hoodi-postgres-2 +gateway_postgres_port=5432 + +# Metrics ports +gateway_metrics_port=9094 +poller_metrics_port=9095 + +# Gateway Service Settings (same for all gateways) +gateway_port=8080 +gateway_tls_enabled=true +gateway_tls_port=443 +gateway_tls_cert_path=/home/app/.ssl/cert.pem +gateway_tls_key_path=/home/app/.ssl/key.pem + +# Poller Service Settings (same for all pollers) +poller_last_block_fetched_filepath=/home/app/config/proof-aggregator.last_block_fetched.json +last_block_fetched_initial_value=0 + +# ============================================ +# Metrics Configuration +# ============================================ +prometheus_version=3.6.0 +gateway_primary_hostname=agg-mode-hoodi-gateway-1 +gateway_secondary_hostname=agg-mode-hoodi-gateway-2 + +# Grafana Configuration +grafana_prometheus_url=http://localhost:9090 +grafana_rpc_url=https://aligned-hoodi-rpc-geth.tail665ae.ts.net +grafana_postgres_host=agg-mode-hoodi-postgres-1 +grafana_postgres_port=5432 +grafana_postgres_db=agg_mode +grafana_postgres_user=grafana +# Monitor datasource (uses same user/password as postgres datasource) +grafana_monitor_host=agg-mode-hoodi-postgres-monitor +grafana_monitor_port=5432 +grafana_monitor_db=pg_auto_failover + +# ============================================ +# Task Sender Configuration +# ============================================ +task_sender_interval_hours=1 +task_sender_proof_path=scripts/test_files/sp1/sp1_fibonacci_5_0_0.proof +task_sender_vk_path=scripts/test_files/sp1/sp1_fibonacci_5_0_0_vk.bin +task_sender_network=hoodi diff --git a/infra/aggregation_mode/ansible/playbooks/ini/config-mainnet.ini b/infra/aggregation_mode/ansible/playbooks/ini/config-mainnet.ini new file mode 100644 index 000000000..9d42109d0 --- /dev/null +++ b/infra/aggregation_mode/ansible/playbooks/ini/config-mainnet.ini @@ -0,0 +1,98 @@ +[global] +# ============================================ +# Mainnet Environment Configuration +# ============================================ +# This file contains all configuration for the Mainnet environment. + +# ============================================ +# REQUIRED: Sensitive Values (fill these in) +# ============================================ +# Database password (used by postgres, gateway, and poller) +db_password= +# Grafana read-only database user password +grafana_postgres_password= +# TLS certificates (local paths to copy from) +tls_cert_source_path= +tls_key_source_path= +# Grafana admin password +grafana_admin_password= +# Task sender private key (for sending proofs) +task_sender_private_key= + +# ============================================ +# Environment +# ============================================ +environment=mainnet +git_branch=staging + +# ============================================ +# PostgreSQL Configuration +# ============================================ +postgres_monitor_hostname=agg-mode-mainnet-postgres-monitor +postgres_primary_hostname=agg-mode-mainnet-postgres-1 +postgres_secondary_hostname=agg-mode-mainnet-postgres-2 +db_name=agg_mode +db_user=autoctl_node + +# PostgreSQL Monitor +monitor_pgdata=/var/lib/postgresql/monitor +monitor_port=5432 + +# PostgreSQL Nodes +node_pgdata=/var/lib/postgresql/node +node_port=5432 +backup_dir=/var/lib/backup + +# ============================================ +# Gateway & Poller Configuration +# ============================================ +gateway_network=mainnet +gateway_max_daily_proofs=100 +gateway_payment_service_address=0xc8631Bc1E60c20db40e474F791126212fA8255F4 +gateway_eth_rpc_url=https://aligned-mainnet-rpc-1.tail665ae.ts.net + +gateway_postgres_primary=agg-mode-mainnet-postgres-1 +gateway_postgres_secondary=agg-mode-mainnet-postgres-2 +gateway_postgres_port=5432 + +# Metrics ports +gateway_metrics_port=9094 +poller_metrics_port=9095 + +# Gateway Service Settings (same for all gateways) +gateway_port=8080 +gateway_tls_enabled=true +gateway_tls_port=443 +gateway_tls_cert_path=/home/app/.ssl/cert.pem +gateway_tls_key_path=/home/app/.ssl/key.pem + +# Poller Service Settings (same for all pollers) +poller_last_block_fetched_filepath=/home/app/config/proof-aggregator.last_block_fetched.json +last_block_fetched_initial_value=24235289 + +# ============================================ +# Metrics Configuration +# ============================================ +prometheus_version=3.6.0 +gateway_primary_hostname=agg-mode-mainnet-gateway-1 +gateway_secondary_hostname=agg-mode-mainnet-gateway-2 + +# Grafana Configuration +grafana_prometheus_url=http://localhost:9090 +grafana_rpc_url=https://aligned-mainnet-rpc-1.tail665ae.ts.net +grafana_postgres_host=agg-mode-mainnet-postgres-1 +grafana_postgres_port=5432 +grafana_postgres_db=agg_mode +grafana_postgres_user=grafana +# Monitor datasource (uses same user/password as postgres datasource) +grafana_monitor_host=agg-mode-mainnet-postgres-monitor +grafana_monitor_port=5432 +grafana_monitor_db=pg_auto_failover + +# ============================================ +# Task Sender Configuration +# ============================================ +task_sender_interval_hours=1 +task_sender_proof_path=scripts/test_files/sp1/sp1_fibonacci_5_0_0.proof +task_sender_vk_path=scripts/test_files/sp1/sp1_fibonacci_5_0_0_vk.bin +task_sender_network=mainnet diff --git a/infra/aggregation_mode/ansible/playbooks/metrics_stack.yaml b/infra/aggregation_mode/ansible/playbooks/metrics_stack.yaml new file mode 100644 index 000000000..242362595 --- /dev/null +++ b/infra/aggregation_mode/ansible/playbooks/metrics_stack.yaml @@ -0,0 +1,11 @@ +- name: Deploy Prometheus + ansible.builtin.import_playbook: prometheus_agg_mode.yaml + vars: + host: "{{ host }}" + env: "{{ env }}" + +- name: Deploy Grafana + ansible.builtin.import_playbook: grafana_agg_mode.yaml + vars: + host: "{{ host }}" + env: "{{ env }}" diff --git a/infra/aggregation_mode/ansible/playbooks/pg_autofailover_common.yaml b/infra/aggregation_mode/ansible/playbooks/pg_autofailover_common.yaml new file mode 100644 index 000000000..5d3382613 --- /dev/null +++ b/infra/aggregation_mode/ansible/playbooks/pg_autofailover_common.yaml @@ -0,0 +1,101 @@ +- name: PostgreSQL Auto-Failover Common Setup + hosts: "{{ host }}" + + tasks: + - name: Install postgresql-common and ca-certificates + become: true + apt: + pkg: + - postgresql-common + - ca-certificates + - curl + - acl + state: latest + update_cache: true + vars: + ansible_ssh_user: "{{ admin_user }}" + + - name: Download PostgreSQL APT repository setup script + become: true + get_url: + url: https://www.postgresql.org/media/keys/ACCC4CF8.asc + dest: /tmp/postgresql.asc + mode: '0644' + vars: + ansible_ssh_user: "{{ admin_user }}" + + - name: Add PostgreSQL APT key + become: true + shell: | + cat /tmp/postgresql.asc | gpg --dearmor | tee /etc/apt/keyrings/postgresql.gpg > /dev/null + args: + creates: /etc/apt/keyrings/postgresql.gpg + vars: + ansible_ssh_user: "{{ admin_user }}" + + - name: Add PostgreSQL APT repository + become: true + lineinfile: + path: /etc/apt/sources.list.d/pgdg.list + line: "deb [signed-by=/etc/apt/keyrings/postgresql.gpg] https://apt.postgresql.org/pub/repos/apt bookworm-pgdg main" + create: yes + vars: + ansible_ssh_user: "{{ admin_user }}" + + - name: Set create_main_cluster to false + become: true + lineinfile: + path: /etc/postgresql-common/createcluster.conf + line: "create_main_cluster = false" + create: yes + vars: + ansible_ssh_user: "{{ admin_user }}" + + - name: Update apt cache + become: true + apt: + update_cache: true + vars: + ansible_ssh_user: "{{ admin_user }}" + + - name: Install postgresql-16 and pg-auto-failover-cli + become: true + apt: + pkg: + - postgresql-16 + - pg-auto-failover-cli + state: latest + vars: + ansible_ssh_user: "{{ admin_user }}" + + - name: Stop and disable default PostgreSQL service + become: true + systemd_service: + name: "{{ item }}" + state: stopped + enabled: false + with_items: + - postgresql + - postgresql@16-main + ignore_errors: yes + vars: + ansible_ssh_user: "{{ admin_user }}" + + - name: Remove default PostgreSQL data directory + become: true + file: + path: /var/lib/postgresql + state: absent + vars: + ansible_ssh_user: "{{ admin_user }}" + + - name: Create PostgreSQL data directory + become: true + file: + path: /var/lib/postgresql + state: directory + owner: postgres + group: postgres + mode: '0700' + vars: + ansible_ssh_user: "{{ admin_user }}" diff --git a/infra/aggregation_mode/ansible/playbooks/pg_monitor.yaml b/infra/aggregation_mode/ansible/playbooks/pg_monitor.yaml new file mode 100644 index 000000000..7fc9af380 --- /dev/null +++ b/infra/aggregation_mode/ansible/playbooks/pg_monitor.yaml @@ -0,0 +1,136 @@ +- name: Import pg_autofailover_common playbook + ansible.builtin.import_playbook: pg_autofailover_common.yaml + vars: + host: "{{ host }}" + +- name: PostgreSQL Monitor Setup + hosts: "{{ host }}" + vars: + ansible_common_remote_group: postgres + + tasks: + - name: Set config file path + set_fact: + config_file: "ini/config-{{ env }}.ini" + + - name: Debug config file + debug: + msg: "Using config file: {{ config_file }}" + + - name: Set config vars from INI file + set_fact: + db_password: "{{ lookup('ini', 'db_password', file=config_file) }}" + monitor_pgdata: "{{ lookup('ini', 'monitor_pgdata', file=config_file, default='/var/lib/postgresql/monitor') }}" + monitor_port: "{{ lookup('ini', 'monitor_port', file=config_file, default='5432') }}" + grafana_postgres_user: "{{ lookup('ini', 'grafana_postgres_user', file=config_file) }}" + grafana_postgres_password: "{{ lookup('ini', 'grafana_postgres_password', file=config_file) }}" + no_log: true + + - name: Debug vars + debug: + msg: "env={{ env }}, monitor_pgdata={{ monitor_pgdata }}" + + - name: Install postgresql-16-auto-failover + become: true + apt: + pkg: + - postgresql-16-auto-failover + state: latest + vars: + ansible_ssh_user: "{{ admin_user }}" + + - name: Check if monitor is already initialized + stat: + path: "{{ monitor_pgdata }}/PG_VERSION" + register: monitor_initialized + become: true + become_user: postgres + + - name: Get Tailscale IP + shell: tailscale ip --4 + register: tailscale_ip + when: not monitor_initialized.stat.exists + + - name: Create pg_autoctl monitor + become: true + become_user: postgres + shell: | + pg_autoctl create monitor \ + --pgdata {{ monitor_pgdata }} \ + --pgctl /usr/lib/postgresql/16/bin/pg_ctl \ + --auth scram-sha-256 \ + --ssl-self-signed \ + --pgport {{ monitor_port }} \ + --hostname {{ tailscale_ip.stdout }} + when: not monitor_initialized.stat.exists + args: + creates: "{{ monitor_pgdata }}/PG_VERSION" + + - name: Update pg_hba.conf for Tailscale network + become: true + become_user: postgres + blockinfile: + path: "{{ monitor_pgdata }}/pg_hba.conf" + marker: "# {mark} ANSIBLE MANAGED BLOCK - Tailscale" + block: | + # Tailscale network connections + host all all 100.64.0.0/10 scram-sha-256 + host replication all 100.64.0.0/10 scram-sha-256 + + - name: Generate systemd service file + become: true + become_user: postgres + shell: | + pg_autoctl show systemd --pgdata {{ monitor_pgdata }} + register: systemd_service_content + + - name: Create pgautofailover systemd service + become: true + copy: + content: "{{ systemd_service_content.stdout }}" + dest: /etc/systemd/system/pgautofailover.service + mode: '0644' + vars: + ansible_ssh_user: "{{ admin_user }}" + + - name: Enable and start pgautofailover service + become: true + systemd_service: + name: pgautofailover + state: started + enabled: true + daemon_reload: true + vars: + ansible_ssh_user: "{{ admin_user }}" + + - name: Wait for monitor to be ready + wait_for: + port: "{{ monitor_port }}" + delay: 5 + timeout: 60 + + - name: Set password for autoctl_node user + become: true + become_user: postgres + shell: | + psql -d pg_auto_failover -c "ALTER USER autoctl_node PASSWORD '{{ db_password }}';" + no_log: true + + - name: Create Grafana read-only user on monitor + become: true + become_user: postgres + shell: | + psql -d pg_auto_failover << 'EOF' + DO $$ + BEGIN + IF NOT EXISTS (SELECT FROM pg_catalog.pg_user WHERE usename = '{{ grafana_postgres_user }}') THEN + CREATE USER {{ grafana_postgres_user }} WITH PASSWORD '{{ grafana_postgres_password }}'; + ELSE + ALTER USER {{ grafana_postgres_user }} WITH PASSWORD '{{ grafana_postgres_password }}'; + END IF; + END + $$; + GRANT CONNECT ON DATABASE pg_auto_failover TO {{ grafana_postgres_user }}; + GRANT pg_read_all_data TO {{ grafana_postgres_user }}; + EOF + no_log: true diff --git a/infra/aggregation_mode/ansible/playbooks/pg_node.yaml b/infra/aggregation_mode/ansible/playbooks/pg_node.yaml new file mode 100644 index 000000000..75d85c9b0 --- /dev/null +++ b/infra/aggregation_mode/ansible/playbooks/pg_node.yaml @@ -0,0 +1,165 @@ +- name: Import pg_autofailover_common playbook + ansible.builtin.import_playbook: pg_autofailover_common.yaml + vars: + host: "{{ host }}" + +- name: PostgreSQL Node Setup + hosts: "{{ host }}" + vars: + ansible_common_remote_group: postgres + + tasks: + - name: Set config file path + set_fact: + config_file: "ini/config-{{ env }}.ini" + + - name: Set config vars from INI file + set_fact: + db_name: "{{ lookup('ini', 'db_name', file=config_file) }}" + db_user: "{{ lookup('ini', 'db_user', file=config_file) }}" + db_password: "{{ lookup('ini', 'db_password', file=config_file) }}" + postgres_monitor_hostname: "{{ lookup('ini', 'postgres_monitor_hostname', file=config_file) }}" + node_pgdata: "{{ lookup('ini', 'node_pgdata', file=config_file, default='/var/lib/postgresql/node') }}" + node_port: "{{ lookup('ini', 'node_port', file=config_file, default='5432') }}" + backup_dir: "{{ lookup('ini', 'backup_dir', file=config_file, default='/var/lib/backup') }}" + grafana_postgres_user: "{{ lookup('ini', 'grafana_postgres_user', file=config_file) }}" + grafana_postgres_password: "{{ lookup('ini', 'grafana_postgres_password', file=config_file) }}" + no_log: true + + - name: Create backup directory + become: true + file: + path: "{{ backup_dir }}" + state: directory + owner: postgres + group: postgres + mode: '0700' + vars: + ansible_ssh_user: "{{ admin_user }}" + + - name: Check if node is already initialized + stat: + path: "{{ node_pgdata }}/PG_VERSION" + register: node_initialized + become: true + become_user: postgres + + - name: Debug node_initialized + debug: + msg: "Node initialized: {{ node_initialized.stat.exists }}, path: {{ node_pgdata }}/PG_VERSION" + + - name: Get Tailscale IP + shell: tailscale ip --4 + register: tailscale_ip + when: not node_initialized.stat.exists + + - name: Create pg_autoctl postgres node + become: true + become_user: postgres + shell: | + PGPASSWORD='{{ db_password }}' pg_autoctl create postgres \ + --pgdata {{ node_pgdata }} \ + --pgctl /usr/lib/postgresql/16/bin/pg_ctl \ + --auth scram-sha-256 \ + --ssl-self-signed \ + --username {{ db_user }} \ + --dbname {{ db_name }} \ + --pgport {{ node_port }} \ + --hostname {{ tailscale_ip.stdout }} \ + --monitor 'postgres://{{ db_user }}:{{ db_password }}@{{ postgres_monitor_hostname }}:5432/pg_auto_failover?sslmode=require' + when: not node_initialized.stat.exists + args: + creates: "{{ node_pgdata }}/PG_VERSION" + + - name: Set replication password + become: true + become_user: postgres + shell: | + pg_autoctl config set replication.password '{{ db_password }}' --pgdata {{ node_pgdata }} + no_log: true + + - name: Update pg_hba.conf for Tailscale network + become: true + become_user: postgres + blockinfile: + path: "{{ node_pgdata }}/pg_hba.conf" + marker: "# {mark} ANSIBLE MANAGED BLOCK - Tailscale" + block: | + # Tailscale network connections + host all all 100.64.0.0/10 scram-sha-256 + + - name: Generate systemd service file + become: true + become_user: postgres + shell: | + pg_autoctl show systemd --pgdata {{ node_pgdata }} + register: systemd_service_content + + - name: Create pgautofailover systemd service + become: true + copy: + content: "{{ systemd_service_content.stdout }}" + dest: /etc/systemd/system/pgautofailover.service + mode: '0644' + vars: + ansible_ssh_user: "{{ admin_user }}" + + - name: Enable and start pgautofailover service + become: true + systemd_service: + name: pgautofailover + state: started + enabled: true + daemon_reload: true + vars: + ansible_ssh_user: "{{ admin_user }}" + + - name: Wait for node to join cluster + wait_for: + port: "{{ node_port }}" + delay: 5 + timeout: 60 + + - name: Check if node is writable (primary) + become: true + become_user: postgres + shell: | + psql -d {{ db_name }} -tAc "SELECT NOT pg_is_in_recovery();" + register: is_writable + changed_when: false + + - name: Set password for autoctl_node user in agg_mode database + become: true + become_user: postgres + shell: | + psql -d {{ db_name }} -c "ALTER USER {{ db_user }} PASSWORD '{{ db_password }}';" + when: is_writable.stdout == 't' + no_log: true + + - name: Set password for pgautofailover_replicator user + become: true + become_user: postgres + shell: | + psql -d {{ db_name }} -c "ALTER USER pgautofailover_replicator PASSWORD '{{ db_password }}';" + when: is_writable.stdout == 't' + no_log: true + + - name: Create Grafana read-only user + become: true + become_user: postgres + shell: | + psql -d {{ db_name }} << 'EOF' + DO $$ + BEGIN + IF NOT EXISTS (SELECT FROM pg_catalog.pg_user WHERE usename = '{{ grafana_postgres_user }}') THEN + CREATE USER {{ grafana_postgres_user }} WITH PASSWORD '{{ grafana_postgres_password }}'; + ELSE + ALTER USER {{ grafana_postgres_user }} WITH PASSWORD '{{ grafana_postgres_password }}'; + END IF; + END + $$; + GRANT CONNECT ON DATABASE {{ db_name }} TO {{ grafana_postgres_user }}; + GRANT pg_read_all_data TO {{ grafana_postgres_user }}; + EOF + when: is_writable.stdout == 't' + no_log: true diff --git a/infra/aggregation_mode/ansible/playbooks/poller.yaml b/infra/aggregation_mode/ansible/playbooks/poller.yaml new file mode 100644 index 000000000..ee0e05953 --- /dev/null +++ b/infra/aggregation_mode/ansible/playbooks/poller.yaml @@ -0,0 +1,123 @@ +- name: Import setup playbook + ansible.builtin.import_playbook: setup.yaml + vars: + host: "{{ host }}" + env: "{{ env }}" + +- name: Import rust playbook + ansible.builtin.import_playbook: rust.yaml + vars: + host: "{{ host }}" + env: "{{ env }}" + +- name: Poller Setup + hosts: "{{ host }}" + + tasks: + - name: Set config file path + set_fact: + config_file: "ini/config-{{ env }}.ini" + + - name: Set config vars from INI file + set_fact: + git_branch: "{{ lookup('ini', 'git_branch', file=config_file) }}" + poller_db_user: "{{ lookup('ini', 'db_user', file=config_file) }}" + poller_db_password: "{{ lookup('ini', 'db_password', file=config_file) }}" + poller_db_name: "{{ lookup('ini', 'db_name', file=config_file) }}" + poller_postgres_primary: "{{ lookup('ini', 'gateway_postgres_primary', file=config_file) }}" + poller_postgres_secondary: "{{ lookup('ini', 'gateway_postgres_secondary', file=config_file) }}" + poller_postgres_port: "{{ lookup('ini', 'gateway_postgres_port', file=config_file, default='5432') }}" + poller_eth_rpc_url: "{{ lookup('ini', 'gateway_eth_rpc_url', file=config_file) }}" + poller_payment_service_address: "{{ lookup('ini', 'gateway_payment_service_address', file=config_file) }}" + poller_network: "{{ lookup('ini', 'gateway_network', file=config_file) }}" + poller_max_daily_proofs: "{{ lookup('ini', 'gateway_max_daily_proofs', file=config_file, default='100') }}" + poller_last_block_fetched_filepath: "{{ lookup('ini', 'poller_last_block_fetched_filepath', file=config_file) }}" + last_block_fetched_initial_value: "{{ lookup('ini', 'last_block_fetched_initial_value', file=config_file, default='0') }}" + poller_metrics_port: "{{ lookup('ini', 'poller_metrics_port', file=config_file, default='9095') }}" + no_log: true + + - name: Install required system packages + become: true + apt: + pkg: + - pkg-config + - libssl-dev + - build-essential + state: latest + update_cache: true + vars: + ansible_ssh_user: "{{ admin_user }}" + + - name: Clone aligned_layer repository + git: + repo: https://github.com/yetanotherco/aligned_layer.git + dest: /home/{{ ansible_user }}/repos/poller/aligned_layer + version: "{{ git_branch }}" + update: yes + + - name: Remove existing poller binary (if force rebuild) + file: + path: /home/{{ ansible_user }}/.cargo/bin/payments_poller + state: absent + when: force_rebuild | default(false) | bool + + - name: Build poller + shell: | + export PATH=$HOME/.cargo/bin:$PATH + cargo install --path /home/{{ ansible_user }}/repos/poller/aligned_layer/aggregation_mode/payments_poller --bin payments_poller --locked + args: + creates: /home/{{ ansible_user }}/.cargo/bin/payments_poller + + - name: Create config directory + file: + path: /home/{{ ansible_user }}/config + state: directory + mode: '0755' + owner: "{{ ansible_user }}" + group: "{{ ansible_user }}" + + - name: Check if last_block_fetched file exists and get size + stat: + path: "{{ poller_last_block_fetched_filepath }}" + register: last_block_file + + - name: Create or fix last_block_fetched file if empty or missing + copy: + content: '{"last_block_fetched": {{ last_block_fetched_initial_value }}}' + dest: "{{ poller_last_block_fetched_filepath }}" + mode: '0644' + owner: "{{ ansible_user }}" + group: "{{ ansible_user }}" + when: not last_block_file.stat.exists or last_block_file.stat.size == 0 + + - name: Template poller config file + template: + src: config-files/config-agg-mode-poller.yaml.j2 + dest: /home/{{ ansible_user }}/config/config-agg-mode-poller.yaml + mode: '0644' + owner: "{{ ansible_user }}" + group: "{{ ansible_user }}" + + - name: Create systemd user directory + file: + path: /home/{{ ansible_user }}/.config/systemd/user + state: directory + mode: '0755' + owner: "{{ ansible_user }}" + group: "{{ ansible_user }}" + + - name: Template poller systemd service + template: + src: services/poller.service.j2 + dest: /home/{{ ansible_user }}/.config/systemd/user/poller.service + mode: '0644' + owner: "{{ ansible_user }}" + group: "{{ ansible_user }}" + + - name: Enable and start poller service + systemd_service: + name: poller + state: started + enabled: true + scope: user + daemon_reload: true diff --git a/infra/aggregation_mode/ansible/playbooks/postgres_cluster.yaml b/infra/aggregation_mode/ansible/playbooks/postgres_cluster.yaml new file mode 100644 index 000000000..9cc845212 --- /dev/null +++ b/infra/aggregation_mode/ansible/playbooks/postgres_cluster.yaml @@ -0,0 +1,23 @@ +- name: Deploy PostgreSQL Monitor + ansible.builtin.import_playbook: pg_monitor.yaml + vars: + host: postgres_monitor + env: "{{ env }}" + +- name: Deploy PostgreSQL Primary Node + ansible.builtin.import_playbook: pg_node.yaml + vars: + host: postgres_primary + env: "{{ env }}" + +- name: Deploy PostgreSQL Secondary Node + ansible.builtin.import_playbook: pg_node.yaml + vars: + host: postgres_secondary + env: "{{ env }}" + +- name: Run Database Migrations + ansible.builtin.import_playbook: postgres_migrations.yaml + vars: + host: postgres_primary + env: "{{ env }}" diff --git a/infra/aggregation_mode/ansible/playbooks/postgres_migrations.yaml b/infra/aggregation_mode/ansible/playbooks/postgres_migrations.yaml new file mode 100644 index 000000000..add485060 --- /dev/null +++ b/infra/aggregation_mode/ansible/playbooks/postgres_migrations.yaml @@ -0,0 +1,46 @@ +- name: Import setup playbook + ansible.builtin.import_playbook: setup.yaml + vars: + host: "{{ host }}" + env: "{{ env }}" + +- name: Import rust playbook + ansible.builtin.import_playbook: rust.yaml + vars: + host: "{{ host }}" + env: "{{ env }}" + +- name: PostgreSQL Migrations Setup + hosts: "{{ host }}" + + tasks: + - name: Set config file path + set_fact: + config_file: "ini/config-{{ env }}.ini" + + - name: Set config vars from INI file + set_fact: + db_name: "{{ lookup('ini', 'db_name', file=config_file) }}" + db_user: "{{ lookup('ini', 'db_user', file=config_file) }}" + db_password: "{{ lookup('ini', 'db_password', file=config_file) }}" + postgres_primary_hostname: "{{ lookup('ini', 'postgres_primary_hostname', file=config_file) }}" + git_branch: "{{ lookup('ini', 'git_branch', file=config_file) }}" + no_log: true + + - name: Clone aligned_layer repository + git: + repo: https://github.com/yetanotherco/aligned_layer.git + dest: /home/{{ ansible_user }}/repos/migrations/aligned_layer + version: "{{ git_branch }}" + update: yes + + - name: Run database migrations + shell: | + export PATH=$HOME/.cargo/bin:$PATH + cargo run --manifest-path /home/{{ ansible_user }}/repos/migrations/aligned_layer/aggregation_mode/Cargo.toml --release --bin migrate -- "postgres://{{ db_user }}:{{ db_password }}@{{ postgres_primary_hostname }}:5432/{{ db_name }}" + register: migration_result + no_log: true + + - name: Display migration result + debug: + msg: "{{ migration_result.stdout_lines }}" diff --git a/infra/aggregation_mode/ansible/playbooks/prometheus_agg_mode.yaml b/infra/aggregation_mode/ansible/playbooks/prometheus_agg_mode.yaml new file mode 100644 index 000000000..739f91b27 --- /dev/null +++ b/infra/aggregation_mode/ansible/playbooks/prometheus_agg_mode.yaml @@ -0,0 +1,88 @@ +- name: Import setup playbook + ansible.builtin.import_playbook: setup.yaml + vars: + host: "{{ host }}" + env: "{{ env }}" + +- name: Prometheus Setup for Aggregation Mode + hosts: "{{ host }}" + + tasks: + - name: Set config file path + set_fact: + config_file: "ini/config-{{ env }}.ini" + + - name: Set config vars from INI file + set_fact: + prometheus_version: "{{ lookup('ini', 'prometheus_version', file=config_file, default='3.6.0') }}" + gateway_primary_hostname: "{{ lookup('ini', 'gateway_primary_hostname', file=config_file) }}" + gateway_secondary_hostname: "{{ lookup('ini', 'gateway_secondary_hostname', file=config_file) }}" + gateway_metrics_port: "{{ lookup('ini', 'gateway_metrics_port', file=config_file, default='9094') }}" + poller_metrics_port: "{{ lookup('ini', 'poller_metrics_port', file=config_file, default='9095') }}" + + - name: Check if Prometheus is installed + stat: + path: /home/{{ ansible_user }}/prometheus-{{ prometheus_version }}.linux-amd64/prometheus + register: prometheus_exists + + - name: Download Prometheus + when: not prometheus_exists.stat.exists + get_url: + url: "https://github.com/prometheus/prometheus/releases/download/v{{ prometheus_version }}/prometheus-{{ prometheus_version }}.linux-amd64.tar.gz" + dest: "/tmp/prometheus-{{ prometheus_version }}.tar.gz" + mode: '0644' + + - name: Extract Prometheus + when: not prometheus_exists.stat.exists + unarchive: + src: "/tmp/prometheus-{{ prometheus_version }}.tar.gz" + dest: /home/{{ ansible_user }}/ + remote_src: yes + owner: "{{ ansible_user }}" + group: "{{ ansible_user }}" + + - name: Create config directory + file: + path: /home/{{ ansible_user }}/config + state: directory + mode: '0755' + owner: "{{ ansible_user }}" + group: "{{ ansible_user }}" + + - name: Template Prometheus config file + template: + src: prometheus/prometheus_agg_mode.yaml.j2 + dest: /home/{{ ansible_user }}/config/prometheus.yaml + mode: '0644' + owner: "{{ ansible_user }}" + group: "{{ ansible_user }}" + + - name: Create systemd user directory + file: + path: /home/{{ ansible_user }}/.config/systemd/user + state: directory + mode: '0755' + owner: "{{ ansible_user }}" + group: "{{ ansible_user }}" + + - name: Template Prometheus systemd service + template: + src: services/prometheus_agg_mode.service.j2 + dest: /home/{{ ansible_user }}/.config/systemd/user/prometheus.service + mode: '0644' + owner: "{{ ansible_user }}" + group: "{{ ansible_user }}" + + - name: Enable and restart Prometheus service + systemd_service: + name: prometheus + state: restarted + enabled: true + scope: user + daemon_reload: true + + - name: Clean up Prometheus tar + when: not prometheus_exists.stat.exists + file: + path: "/tmp/prometheus-{{ prometheus_version }}.tar.gz" + state: absent diff --git a/infra/aggregation_mode/ansible/playbooks/rust.yaml b/infra/aggregation_mode/ansible/playbooks/rust.yaml new file mode 100644 index 000000000..b74c764b4 --- /dev/null +++ b/infra/aggregation_mode/ansible/playbooks/rust.yaml @@ -0,0 +1,39 @@ +- name: Rust Setup + hosts: "{{ host }}" + vars: + rust_version: 1.92.0 + + tasks: + - name: Check if cargo is installed + stat: + path: /home/{{ ansible_user }}/.cargo/bin/cargo + register: cargo_exists + + - name: Download Rust installer + when: not cargo_exists.stat.exists + get_url: + url: https://sh.rustup.rs + dest: /tmp/rustup-init.sh + mode: '0755' + + - name: Install Rust + when: not cargo_exists.stat.exists + shell: | + /tmp/rustup-init.sh -y --default-toolchain {{ rust_version }} + args: + creates: /home/{{ ansible_user }}/.cargo/bin/cargo + + - name: Update user PATH + when: not cargo_exists.stat.exists + lineinfile: + path: "/home/{{ ansible_user }}/.bashrc" + line: "{{ item }}" + state: present + with_items: + - 'export PATH=$HOME/.cargo/bin:$PATH' + + - name: Clean up Rust installer + when: not cargo_exists.stat.exists + file: + path: /tmp/rustup-init.sh + state: absent diff --git a/infra/aggregation_mode/ansible/playbooks/setup.yaml b/infra/aggregation_mode/ansible/playbooks/setup.yaml new file mode 100644 index 000000000..a521324d3 --- /dev/null +++ b/infra/aggregation_mode/ansible/playbooks/setup.yaml @@ -0,0 +1,40 @@ +- name: Server setup + hosts: "{{ host }}" + + tasks: + # Install required packages + - name: Update apt and install required system packages + become: true + apt: + pkg: + - curl + - vim + - git + - make + - gcc + state: latest + update_cache: true + vars: + ansible_ssh_user: "{{ admin_user }}" + + # Create basic directories + - name: Create basic directories if do not exist + ansible.builtin.file: + path: /home/{{ ansible_user }}/{{ item }} + state: directory + mode: '0755' + owner: '{{ ansible_user }}' + group: '{{ ansible_user }}' + loop: + - repos + - config + - services + - .keystores + + - name: Enable linger for {{ ansible_user }} + become: true + command: sudo loginctl enable-linger {{ ansible_user }} + args: + creates: /var/lib/systemd/linger/{{ ansible_user }} + vars: + ansible_ssh_user: "{{ admin_user }}" diff --git a/infra/aggregation_mode/ansible/playbooks/task_sender.yaml b/infra/aggregation_mode/ansible/playbooks/task_sender.yaml new file mode 100644 index 000000000..f7fa523d0 --- /dev/null +++ b/infra/aggregation_mode/ansible/playbooks/task_sender.yaml @@ -0,0 +1,211 @@ +- name: Import setup playbook + ansible.builtin.import_playbook: setup.yaml + vars: + host: "{{ host }}" + env: "{{ env }}" + +- name: Import rust playbook + ansible.builtin.import_playbook: rust.yaml + vars: + host: "{{ host }}" + env: "{{ env }}" + +- name: Task Sender Setup + hosts: "{{ host }}" + + tasks: + - name: Set config file path + set_fact: + config_file: "ini/config-{{ env }}.ini" + + - name: Set config vars from INI file + set_fact: + git_branch: "{{ lookup('ini', 'git_branch', file=config_file) }}" + task_sender_interval_hours: "{{ lookup('ini', 'task_sender_interval_hours', file=config_file, default='1') }}" + task_sender_proof_path: "{{ lookup('ini', 'task_sender_proof_path', file=config_file) }}" + task_sender_vk_path: "{{ lookup('ini', 'task_sender_vk_path', file=config_file) }}" + task_sender_private_key: "{{ lookup('ini', 'task_sender_private_key', file=config_file) }}" + task_sender_network: "{{ lookup('ini', 'task_sender_network', file=config_file) }}" + gateway_payment_service_address: "{{ lookup('ini', 'gateway_payment_service_address', file=config_file) }}" + gateway_eth_rpc_url: "{{ lookup('ini', 'gateway_eth_rpc_url', file=config_file) }}" + no_log: true + + - name: Install tmux + become: true + apt: + pkg: + - tmux + state: latest + update_cache: true + vars: + ansible_ssh_user: "{{ admin_user }}" + + - name: Create sender directory + file: + path: /home/{{ ansible_user }}/repos/sender + state: directory + mode: '0755' + owner: "{{ ansible_user }}" + group: "{{ ansible_user }}" + + - name: Clone aligned_layer repository + git: + repo: https://github.com/yetanotherco/aligned_layer.git + dest: /home/{{ ansible_user }}/repos/sender/aligned_layer + version: "{{ git_branch }}" + update: yes + force: yes + + - name: Build CLI + shell: | + export PATH=$HOME/.cargo/bin:$PATH + cd /home/{{ ansible_user }}/repos/sender/aligned_layer + make agg_mode_install_cli + args: + creates: /home/{{ ansible_user }}/.cargo/bin/agg_mode_cli + + - name: Check if foundry (cast) is installed + shell: command -v cast + register: cast_installed + failed_when: false + changed_when: false + + - name: Install foundry (for cast tool) + shell: | + curl -L https://foundry.paradigm.xyz | bash + export PATH=$HOME/.foundry/bin:$PATH + foundryup + when: cast_installed.rc != 0 + + - name: Derive wallet address from private key + shell: | + export PATH=$HOME/.foundry/bin:$PATH + cast wallet address {{ task_sender_private_key }} + register: wallet_address_output + no_log: true + changed_when: false + + - name: Set wallet address variable + set_fact: + wallet_address: "{{ wallet_address_output.stdout | trim }}" + no_log: true + + - name: Check subscription status on payment contract + shell: | + export PATH=$HOME/.foundry/bin:$PATH + cast call {{ gateway_payment_service_address }} \ + "subscribedAddresses(address)(uint256)" \ + {{ wallet_address }} \ + --rpc-url {{ gateway_eth_rpc_url }} + register: subscription_expiration_output + changed_when: false + + - name: Get current timestamp + shell: date +%s + register: current_timestamp + changed_when: false + + - name: Set subscription status variables + set_fact: + subscription_expiration: "{{ subscription_expiration_output.stdout | regex_replace('\\s*\\[.*\\]', '') | trim }}" + current_time: "{{ current_timestamp.stdout | trim }}" + + - name: Display subscription status + debug: + msg: | + Wallet address: {{ wallet_address }} + Subscription expiration timestamp: {{ subscription_expiration }} + Current timestamp: {{ current_time }} + Is subscribed: {{ (subscription_expiration | int) > (current_time | int) }} + + - name: Deposit to payment contract if not subscribed + shell: | + export PATH=$HOME/.cargo/bin:$PATH + agg_mode_cli deposit \ + --network {{ task_sender_network }} \ + --rpc-url {{ gateway_eth_rpc_url }} \ + --private-key {{ task_sender_private_key }} + when: (subscription_expiration | int) <= (current_time | int) + no_log: true + register: deposit_result + + - name: Display deposit result + debug: + msg: "Deposit successful. Transaction receipt: {{ deposit_result.stdout }}" + when: (subscription_expiration | int) <= (current_time | int) and deposit_result is defined + + - name: Wait for deposit transaction to confirm + pause: + seconds: 10 + when: (subscription_expiration | int) <= (current_time | int) + + - name: Create scripts directory + file: + path: /home/{{ ansible_user }}/repos/sender/aligned_layer/scripts + state: directory + mode: '0755' + owner: "{{ ansible_user }}" + group: "{{ ansible_user }}" + + - name: Template task sender env file + template: + src: config-files/agg_mode.task_sender.env.j2 + dest: /home/{{ ansible_user }}/repos/sender/aligned_layer/scripts/.agg_mode.task_sender.env + mode: '0600' + owner: "{{ ansible_user }}" + group: "{{ ansible_user }}" + no_log: true + + - name: Check if tmux session exists + shell: tmux has-session -t task_sender 2>/dev/null + register: tmux_session_exists + failed_when: false + changed_when: false + + - name: Kill existing tmux session + shell: tmux kill-session -t task_sender + when: tmux_session_exists.rc == 0 + + - name: Start task sender in tmux session + shell: | + cd /home/{{ ansible_user }}/repos/sender/aligned_layer + tmux new-session -d -s task_sender 'bash -c "export PATH=$HOME/.cargo/bin:$PATH && make agg_mode_task_sender_start 2>&1 | tee /tmp/task_sender.log; exec bash"' + register: tmux_start + changed_when: true + + - name: Wait for task sender to initialize + pause: + seconds: 5 + + - name: Check if tmux session is still running + shell: tmux has-session -t task_sender 2>/dev/null + register: verify_tmux + failed_when: false + changed_when: false + + - name: Capture tmux pane content if session exists + shell: tmux capture-pane -t task_sender -p + register: tmux_output + when: verify_tmux.rc == 0 + failed_when: false + + - name: Display tmux output + debug: + msg: "Tmux session output: {{ tmux_output.stdout }}" + when: verify_tmux.rc == 0 and tmux_output is defined + + - name: Check error log if session failed + shell: cat /tmp/task_sender.log 2>/dev/null || echo "No log file found" + register: error_log + when: verify_tmux.rc != 0 + failed_when: false + + - name: Display error log if session failed + debug: + msg: "Task sender failed. Log: {{ error_log.stdout }}" + when: verify_tmux.rc != 0 + + - name: Fail if tmux session is not running + fail: + msg: "Task sender tmux session failed to start. Check the output above for details." + when: verify_tmux.rc != 0 diff --git a/infra/aggregation_mode/ansible/playbooks/templates/config-files/agg_mode.task_sender.env.j2 b/infra/aggregation_mode/ansible/playbooks/templates/config-files/agg_mode.task_sender.env.j2 new file mode 100644 index 000000000..2ceab4fe0 --- /dev/null +++ b/infra/aggregation_mode/ansible/playbooks/templates/config-files/agg_mode.task_sender.env.j2 @@ -0,0 +1,5 @@ +INTERVAL_HOURS={{ task_sender_interval_hours }} +PROOF_PATH={{ task_sender_proof_path }} +VK_PATH={{ task_sender_vk_path }} +PRIVATE_KEY={{ task_sender_private_key }} +NETWORK={{ task_sender_network }} diff --git a/infra/aggregation_mode/ansible/playbooks/templates/config-files/config-agg-mode-gateway.yaml.j2 b/infra/aggregation_mode/ansible/playbooks/templates/config-files/config-agg-mode-gateway.yaml.j2 new file mode 100644 index 000000000..fab21b85b --- /dev/null +++ b/infra/aggregation_mode/ansible/playbooks/templates/config-files/config-agg-mode-gateway.yaml.j2 @@ -0,0 +1,17 @@ +ip: "0.0.0.0" +port: {{ gateway_port }} +{% if gateway_tls_enabled == 'true' %} +tls_cert_path: "{{ gateway_tls_cert_path }}" +tls_key_path: "{{ gateway_tls_key_path }}" +tls_port: {{ gateway_tls_port }} +{% endif %} +db_connection_urls: + - "postgres://{{ gateway_db_user }}:{{ gateway_db_password }}@{{ gateway_postgres_primary }}:{{ gateway_postgres_port }}/{{ gateway_db_name }}" + - "postgres://{{ gateway_db_user }}:{{ gateway_db_password }}@{{ gateway_postgres_secondary }}:{{ gateway_postgres_port }}/{{ gateway_db_name }}" +eth_rpc_url: "{{ gateway_eth_rpc_url }}" +payment_service_address: "{{ gateway_payment_service_address }}" +network: "{{ gateway_network }}" +max_daily_proofs_per_user: {{ gateway_max_daily_proofs }} + +# Metrics +gateway_metrics_port: {{ gateway_metrics_port }} diff --git a/infra/aggregation_mode/ansible/playbooks/templates/config-files/config-agg-mode-poller.yaml.j2 b/infra/aggregation_mode/ansible/playbooks/templates/config-files/config-agg-mode-poller.yaml.j2 new file mode 100644 index 000000000..8f8114287 --- /dev/null +++ b/infra/aggregation_mode/ansible/playbooks/templates/config-files/config-agg-mode-poller.yaml.j2 @@ -0,0 +1,11 @@ +db_connection_urls: + - "postgres://{{ poller_db_user }}:{{ poller_db_password }}@{{ poller_postgres_primary }}:{{ poller_postgres_port }}/{{ poller_db_name }}" + - "postgres://{{ poller_db_user }}:{{ poller_db_password }}@{{ poller_postgres_secondary }}:{{ poller_postgres_port }}/{{ poller_db_name }}" +eth_rpc_url: "{{ poller_eth_rpc_url }}" +payment_service_address: "{{ poller_payment_service_address }}" +network: "{{ poller_network }}" +max_daily_proofs_per_user: {{ poller_max_daily_proofs }} +last_block_fetched_filepath: "{{ poller_last_block_fetched_filepath }}" + +# Metrics +poller_metrics_port: {{ poller_metrics_port }} diff --git a/infra/aggregation_mode/ansible/playbooks/templates/grafana/grafana_env.j2 b/infra/aggregation_mode/ansible/playbooks/templates/grafana/grafana_env.j2 new file mode 100644 index 000000000..87944f109 --- /dev/null +++ b/infra/aggregation_mode/ansible/playbooks/templates/grafana/grafana_env.j2 @@ -0,0 +1,14 @@ +GF_USERS_ALLOW_SIGN_UP=false +GF_INSTALL_PLUGINS=yesoreyeram-infinity-datasource +PROMETHEUS_URL={{ grafana_prometheus_url }} +RPC_URL={{ grafana_rpc_url }} +POSTGRES_HOST={{ grafana_postgres_host }} +POSTGRES_PORT={{ grafana_postgres_port }} +POSTGRES_DB={{ grafana_postgres_db }} +POSTGRES_USER={{ grafana_postgres_user }} +POSTGRES_PASSWORD={{ grafana_postgres_password }} +MONITOR_DB_HOST={{ grafana_monitor_host }} +MONITOR_DB_PORT={{ grafana_monitor_port }} +MONITOR_DB_DB={{ grafana_monitor_db }} +MONITOR_DB_USER={{ grafana_postgres_user }} +MONITOR_DB_PASSWORD={{ grafana_postgres_password }} diff --git a/infra/aggregation_mode/ansible/playbooks/templates/prometheus/prometheus_agg_mode.yaml.j2 b/infra/aggregation_mode/ansible/playbooks/templates/prometheus/prometheus_agg_mode.yaml.j2 new file mode 100644 index 000000000..968cc3790 --- /dev/null +++ b/infra/aggregation_mode/ansible/playbooks/templates/prometheus/prometheus_agg_mode.yaml.j2 @@ -0,0 +1,51 @@ +global: + scrape_interval: 15s + +scrape_configs: +- job_name: "gateway-primary-http" + scrape_interval: 60s + static_configs: + - targets: ["{{ gateway_primary_hostname }}:8080"] + labels: + service: "gateway-http" + instance: "primary" + +- job_name: "gateway-secondary-http" + scrape_interval: 60s + static_configs: + - targets: ["{{ gateway_secondary_hostname }}:8080"] + labels: + service: "gateway-http" + instance: "secondary" + +- job_name: "gateway-primary" + scrape_interval: 15s + static_configs: + - targets: ["{{ gateway_primary_hostname }}:{{ gateway_metrics_port }}"] + labels: + service: "gateway" + instance: "primary" + +- job_name: "gateway-secondary" + scrape_interval: 15s + static_configs: + - targets: ["{{ gateway_secondary_hostname }}:{{ gateway_metrics_port }}"] + labels: + service: "gateway" + instance: "secondary" + +- job_name: "poller-primary" + scrape_interval: 15s + static_configs: + - targets: ["{{ gateway_primary_hostname }}:{{ poller_metrics_port }}"] + labels: + service: "poller" + instance: "primary" + +- job_name: "poller-secondary" + scrape_interval: 15s + static_configs: + - targets: ["{{ gateway_secondary_hostname }}:{{ poller_metrics_port }}"] + labels: + service: "poller" + instance: "secondary" diff --git a/infra/aggregation_mode/ansible/playbooks/templates/services/gateway.service.j2 b/infra/aggregation_mode/ansible/playbooks/templates/services/gateway.service.j2 new file mode 100644 index 000000000..4d9724047 --- /dev/null +++ b/infra/aggregation_mode/ansible/playbooks/templates/services/gateway.service.j2 @@ -0,0 +1,18 @@ +[Unit] +Description=Aggregation Mode Gateway +After=network.target + +[Service] +Type=simple +User={{ ansible_user }} +Group={{ ansible_user }} +WorkingDirectory=/home/{{ ansible_user }}/repos/gateway/aligned_layer/aggregation_mode +ExecStart=/home/{{ ansible_user }}/.cargo/bin/gateway /home/{{ ansible_user }}/config/config-agg-mode-gateway.yaml +Restart=always +RestartSec=1 +StartLimitBurst=100 +LimitNOFILE=100000 +AmbientCapabilities=CAP_NET_BIND_SERVICE + +[Install] +WantedBy=multi-user.target diff --git a/infra/aggregation_mode/ansible/playbooks/templates/services/poller.service.j2 b/infra/aggregation_mode/ansible/playbooks/templates/services/poller.service.j2 new file mode 100644 index 000000000..a1baeffac --- /dev/null +++ b/infra/aggregation_mode/ansible/playbooks/templates/services/poller.service.j2 @@ -0,0 +1,15 @@ +[Unit] +Description=Aggregation Mode Payments Poller +After=network.target + +[Service] +Type=simple +WorkingDirectory=/home/{{ ansible_user }}/repos/poller/aligned_layer/aggregation_mode +ExecStart=/home/{{ ansible_user }}/.cargo/bin/payments_poller /home/{{ ansible_user }}/config/config-agg-mode-poller.yaml +Restart=always +RestartSec=1 +StartLimitBurst=100 +LimitNOFILE=100000 + +[Install] +WantedBy=multi-user.target diff --git a/infra/aggregation_mode/ansible/playbooks/templates/services/prometheus_agg_mode.service.j2 b/infra/aggregation_mode/ansible/playbooks/templates/services/prometheus_agg_mode.service.j2 new file mode 100644 index 000000000..cf1563018 --- /dev/null +++ b/infra/aggregation_mode/ansible/playbooks/templates/services/prometheus_agg_mode.service.j2 @@ -0,0 +1,16 @@ +[Unit] +Description=Prometheus +After=network.target + +[Service] +Type=simple +WorkingDirectory=/home/{{ ansible_user }} +ExecStart=/home/{{ ansible_user }}/prometheus-{{ prometheus_version }}.linux-amd64/prometheus \ + --config.file=/home/{{ ansible_user }}/config/prometheus.yaml \ + --storage.tsdb.retention.time=90d +Restart=always +RestartSec=1 +StartLimitBurst=100 + +[Install] +WantedBy=multi-user.target diff --git a/infra/aggregation_mode/ansible/playbooks/templates/sudoers/gateway-service.j2 b/infra/aggregation_mode/ansible/playbooks/templates/sudoers/gateway-service.j2 new file mode 100644 index 000000000..192f06f07 --- /dev/null +++ b/infra/aggregation_mode/ansible/playbooks/templates/sudoers/gateway-service.j2 @@ -0,0 +1,3 @@ +{{ ansible_user }} ALL=(ALL) NOPASSWD: /bin/systemctl start gateway, /bin/systemctl stop gateway, /bin/systemctl restart gateway, /bin/systemctl status gateway +{{ ansible_user }} ALL=(ALL) NOPASSWD: /bin/journalctl -u gateway* +{{ ansible_user }} ALL=(ALL) NOPASSWD: /bin/journalctl --user-unit=gateway*