From 0212f171b4b712d7726bc8c43ecedaba34594bff Mon Sep 17 00:00:00 2001 From: supahcraig Date: Wed, 20 May 2026 16:47:52 -0500 Subject: [PATCH] feat: add GCP Terraform deployment support --- driver-redpanda/deploy/gcp/README.md | 84 +++++ .../deploy/gcp/provision-redpanda-gcp.tf | 315 +++++++++++------- .../deploy/gcp/terraform.tfvars.example | 26 ++ 3 files changed, 306 insertions(+), 119 deletions(-) create mode 100644 driver-redpanda/deploy/gcp/README.md create mode 100644 driver-redpanda/deploy/gcp/terraform.tfvars.example diff --git a/driver-redpanda/deploy/gcp/README.md b/driver-redpanda/deploy/gcp/README.md new file mode 100644 index 00000000..fc56cf92 --- /dev/null +++ b/driver-redpanda/deploy/gcp/README.md @@ -0,0 +1,84 @@ +# Redpanda Benchmark — GCP Deployment + +Deploy Redpanda brokers and OMB benchmark clients on GCP using Terraform + Ansible. + +## Prerequisites + +1. [Terraform](https://developer.hashicorp.com/terraform/install) >= 1.0 +2. [gcloud CLI](https://cloud.google.com/sdk/docs/install) authenticated: + ```bash + gcloud auth application-default login + ``` +3. Compute API enabled in your project: + ```bash + gcloud services enable compute.googleapis.com --project= + ``` +4. An SSH key pair: + ```bash + ssh-keygen -t rsa -f ~/.ssh/redpanda_gcp + ``` + +## Usage + +```bash +cd driver-redpanda/deploy/gcp + +cp terraform.tfvars.example terraform.tfvars +# Edit terraform.tfvars — set project_name, ssh_user, public_key_path at minimum + +terraform init +terraform apply +``` + +Run Ansible from the `deploy/` directory (one level up — `hosts.ini` is written there by Terraform). +`ansible.cfg` hardcodes `~/.ssh/redpanda_aws`, so pass your GCP key explicitly with `--private-key`. + +**Standalone Redpanda cluster:** +```bash +cd .. +ansible-playbook deploy.yaml -i hosts.ini --private-key ~/.ssh/redpanda_gcp +``` + +**Against an existing BYOC/Dedicated cluster** (set `num_instances.redpanda = 0` in tfvars): +```bash +cd .. +ansible-playbook deploy.yaml -i hosts.ini \ + --private-key ~/.ssh/redpanda_gcp \ + --ask-become-pass \ + -e "tls_enabled=true sasl_enabled=true sasl_username= sasl_password=" \ + -e "bootstrapServers=:9092" +``` + +> `--ask-become-pass` is needed when running Ansible from a local Mac (the playbook has tasks requiring sudo). Omit it when running from a cloud VM as root. + +## VPC Peering (optional) + +Set `byoc_vpc_name` in `terraform.tfvars` to peer the benchmark network with an existing +BYOC network. `terraform apply` creates peering in both directions automatically. + +### IAM requirement + +Your credentials need `compute.networks.addPeering` on both the benchmark project and the +BYOC project. Grant it with: + +```bash +gcloud projects add-iam-policy-binding \ + --member="user:" \ + --role="roles/compute.networkAdmin" +``` + +### Fallback: manual peering + +If you cannot obtain IAM access to the BYOC project (e.g. it is managed by Redpanda), +create the return peering manually after `terraform apply`: + +```bash +BENCHMARK_NETWORK=$(terraform output -raw benchmark_network_name) +BENCHMARK_PROJECT= + +gcloud compute networks peerings create rp-byoc-to-benchmark \ + --network= \ + --peer-project=$BENCHMARK_PROJECT \ + --peer-network=$BENCHMARK_NETWORK \ + --project= +``` diff --git a/driver-redpanda/deploy/gcp/provision-redpanda-gcp.tf b/driver-redpanda/deploy/gcp/provision-redpanda-gcp.tf index 7e7d911f..9f8ec2be 100644 --- a/driver-redpanda/deploy/gcp/provision-redpanda-gcp.tf +++ b/driver-redpanda/deploy/gcp/provision-redpanda-gcp.tf @@ -1,42 +1,129 @@ +terraform { + required_providers { + google = { + source = "hashicorp/google" + version = "~> 5.0" + } + random = { + version = "~> 3.4.3" + } + http = { + version = "~> 3.0" + } + local = { + version = "~> 2.0" + } + } +} + provider "google" { - project = var.project_name - region = var.region + project = var.project_name + region = var.region +} + +data "http" "myip" { + url = "http://ipv4.icanhazip.com" } resource "random_uuid" "cluster" {} locals { - uuid = random_uuid.cluster.result - deployment_id = random_uuid.cluster.result + deployment_id = random_uuid.cluster.result + ssh_key_metadata = "${var.ssh_user}:${file(pathexpand(var.public_key_path))}" +} + +resource "google_compute_network" "benchmark" { + name = "rp-benchmark-${local.deployment_id}" + auto_create_subnetworks = false +} + +resource "google_compute_subnetwork" "benchmark" { + name = "rp-benchmark-subnet-${local.deployment_id}" + ip_cidr_range = var.benchmark_subnet_cidr + region = var.region + network = google_compute_network.benchmark.self_link +} + +resource "google_compute_firewall" "internal_icmp" { + name = "rp-benchmark-internal-icmp-${local.deployment_id}" + network = google_compute_network.benchmark.name + allow { + protocol = "icmp" + } + source_ranges = [var.benchmark_subnet_cidr] + target_tags = ["rp-cluster"] } -resource "google_compute_resource_policy" "redpanda-rp" { - name = "redpanda-rp" +resource "google_compute_firewall" "internal_tcp" { + name = "rp-benchmark-internal-tcp-${local.deployment_id}" + network = google_compute_network.benchmark.name + allow { + protocol = "tcp" + ports = ["0-65535"] + } + source_ranges = [var.benchmark_subnet_cidr] + target_tags = ["rp-cluster"] +} + +resource "google_compute_firewall" "deployer_access" { + name = "rp-benchmark-deployer-${local.deployment_id}" + network = google_compute_network.benchmark.name + allow { + protocol = "tcp" + ports = ["0-65535"] + } + source_ranges = ["${chomp(data.http.myip.response_body)}/32"] + target_tags = ["rp-cluster"] +} + +resource "google_compute_firewall" "monitoring" { + name = "rp-benchmark-monitoring-${local.deployment_id}" + network = google_compute_network.benchmark.name + allow { + protocol = "tcp" + ports = ["9090", "3000"] + } + source_ranges = ["0.0.0.0/0"] + target_tags = ["rp-cluster"] +} + +resource "google_compute_network_peering" "benchmark_to_byoc" { + count = var.byoc_vpc_name != null ? 1 : 0 + name = "rp-benchmark-to-byoc-${local.deployment_id}" + network = google_compute_network.benchmark.self_link + peer_network = "https://www.googleapis.com/compute/v1/projects/${coalesce(var.gcp_project, var.project_name)}/global/networks/${var.byoc_vpc_name}" +} + +resource "google_compute_network_peering" "byoc_to_benchmark" { + count = var.byoc_vpc_name != null ? 1 : 0 + name = "rp-byoc-to-benchmark-${local.deployment_id}" + network = "https://www.googleapis.com/compute/v1/projects/${coalesce(var.gcp_project, var.project_name)}/global/networks/${var.byoc_vpc_name}" + peer_network = google_compute_network.benchmark.self_link + depends_on = [google_compute_network_peering.benchmark_to_byoc] +} + +# GCP does not give visibility or control over which failure domain a resource is placed into +# (https://issuetracker.google.com/issues/256993209). Separate failure domains are used to +# guarantee separate racks. GCP caps availability_domain_count at 8. +resource "google_compute_resource_policy" "redpanda" { + name = "rp-placement-${local.deployment_id}" region = var.region group_placement_policy { - availability_domain_count = var.ha ? max(3, var.nodes) : 1 + availability_domain_count = var.ha ? min(8, max(3, var.num_instances["redpanda"])) : 1 } count = var.ha ? 1 : 0 } resource "google_compute_instance" "redpanda" { - count = var.nodes + count = var.num_instances["redpanda"] name = "rp-node-${count.index}-${local.deployment_id}" tags = ["rp-cluster", "tf-deployment-${local.deployment_id}"] - zone = "${var.region}-${var.availability_zone[count.index % length(var.availability_zone)]}" - machine_type = var.machine_type - // GCP does not give you visibility nor control over which failure domain a resource has been placed into - // (https://issuetracker.google.com/issues/256993209?pli=1). So the only way that we can guarantee that - // specific nodes are in separate racks is to put them into entirely separate failure domains - basically one - // broker per failure domain, and we are limited by the number of failure domains (at the moment 8). - resource_policies = (var.ha && var.nodes <= 8) ? [ - google_compute_resource_policy.redpanda-rp[0].id - ] : null + zone = var.availability_zone[count.index % length(var.availability_zone)] + machine_type = var.instance_types["redpanda"] + resource_policies = (var.ha && var.num_instances["redpanda"] <= 8) ? [google_compute_resource_policy.redpanda[0].id] : null metadata = { - ssh-keys = <