diff --git a/README.md b/README.md index 1c66db1..f204c42 100644 --- a/README.md +++ b/README.md @@ -46,6 +46,7 @@ The folder `examples` contains the following Terraform implementation examples : | Azure | ~~adb-external-hive-metastore~~ **REMOVED** | This example was removed in February 2026. External Hive metastore has been superseded by [Unity Catalog](https://docs.databricks.com/en/data-governance/unity-catalog/index.html). Use [adb-unity-catalog-basic-demo](examples/adb-unity-catalog-basic-demo/) instead. | | Azure | [adb-kafka](examples/adb-kafka/) | ADB - single node kafka template | | Azure | [adb-private-links](examples/adb-private-links/) | Azure Databricks Private Links | +| Azure | [adb-serverless-appgw-tls-transit](examples/adb-serverless-appgw-tls-transit/) | Serverless → external TLS service (Kafka, etc.) via an Application Gateway v2 TCP/TLS proxy transit + NCC private endpoint | | Azure | [adb-squid-proxy](examples/adb-squid-proxy/) | ADB clusters with HTTP proxy | | Azure | [adb-teradata](examples/adb-teradata/) | ADB with single VM Teradata integration | | Azure | [adb-uc](examples/adb-uc/) | ADB Unity Catalog Process | @@ -77,6 +78,7 @@ The folder `modules` contains the following Terraform modules : | Azure | [adb-with-private-link-standard](modules/adb-with-private-link-standard/) | Provisioning Databricks on Azure with Private Link - Standard deployment | | Azure | [adb-exfiltration-protection](modules/adb-exfiltration-protection/) | A sample implementation of [Data Exfiltration Protection](https://www.databricks.com/blog/2020/03/27/data-exfiltration-protection-with-azure-databricks.html) | | Azure | [adb-with-private-links-exfiltration-protection](modules/adb-with-private-links-exfiltration-protection/) | Provisioning Databricks on Azure with Private Link and [Data Exfiltration Protection](https://www.databricks.com/blog/2020/03/27/data-exfiltration-protection-with-azure-databricks.html) | +| Azure | [adb-serverless-appgw-tls-transit](modules/adb-serverless-appgw-tls-transit/) | Serverless → external TLS service (Kafka, etc.) via an Application Gateway v2 TCP/TLS proxy transit + NCC private endpoint | | Azure | [adb-overwatch-regional-config](modules/adb-overwatch-regional-config/) | Overwatch regional configuration on Azure | | Azure | [adb-overwatch-mws-config](modules/adb-overwatch-mws-config/) | Overwatch multi-workspace deployment on Azure | | Azure | [adb-overwatch-main-ws](modules/adb-overwatch-main-ws/) | Main Overwatch workspace deployment | diff --git a/examples/adb-serverless-appgw-tls-transit/Makefile b/examples/adb-serverless-appgw-tls-transit/Makefile new file mode 100644 index 0000000..653039d --- /dev/null +++ b/examples/adb-serverless-appgw-tls-transit/Makefile @@ -0,0 +1,7 @@ +.PHONY: docs test_docs + +docs: + terraform-docs -c ../../.terraform-docs.yml . + +test_docs: + terraform-docs -c ../../.terraform-docs.yml --output-check . diff --git a/examples/adb-serverless-appgw-tls-transit/README.md b/examples/adb-serverless-appgw-tls-transit/README.md new file mode 100644 index 0000000..bc42715 --- /dev/null +++ b/examples/adb-serverless-appgw-tls-transit/README.md @@ -0,0 +1,74 @@ +# Example — Serverless → TLS service via App Gateway v2 TCP/TLS transit + +Deploys the [`adb-serverless-appgw-tls-transit`](../../modules/adb-serverless-appgw-tls-transit) +module: a customer-tenant Application Gateway v2 TCP/TLS proxy that lets +Databricks Serverless reach an external TLS service (Kafka or any TLS-over-TCP +workload) over Azure Private Link, wired to an NCC private endpoint rule. + +## Prerequisites + +* Premium-tier Databricks account; you must be an **account admin**. +* The **`az` CLI authenticated** as that account admin (used for the documented + REST NCC rule + private endpoint approval — see the module README). +* A target TLS service reachable from the transit VNet (set `backend_addresses`), + and the FQDNs serverless clients dial (set `serverless_domain_names`). + +## How to use + +1. Copy `terraform.tfvars` and fill in your values. +2. `terraform init` +3. `terraform apply` +4. The module auto-approves the App Gateway private endpoint connection. Confirm + the NCC rule reaches `ESTABLISHED` in the account console, then restart + serverless compute and test connectivity to your service. + + +## Requirements + +| Name | Version | +| ---- | ------- | +| [azapi](#requirement\_azapi) | 2.0.1 | +| [azurerm](#requirement\_azurerm) | >=4.31.0 | +| [databricks](#requirement\_databricks) | >=1.81.1 | +| [null](#requirement\_null) | >=3.2.0 | +| [time](#requirement\_time) | >=0.9.0 | + +## Providers + +No providers. + +## Modules + +| Name | Source | Version | +| ---- | ------ | ------- | +| [adb-serverless-appgw-tls-transit](#module\_adb-serverless-appgw-tls-transit) | ../../modules/adb-serverless-appgw-tls-transit | n/a | + +## Resources + +No resources. + +## Inputs + +| Name | Description | Type | Default | Required | +| ---- | ----------- | ---- | ------- | :------: | +| [azure\_region](#input\_azure\_region) | Azure region short name (e.g. australiaeast). Must match your workspace/NCC region. | `string` | n/a | yes | +| [azure\_subscription\_id](#input\_azure\_subscription\_id) | Azure subscription ID to deploy into. | `string` | n/a | yes | +| [backend\_addresses](#input\_backend\_addresses) | IPs (or FQDNs) of the target TLS service, reachable from the transit VNet. | `list(string)` | n/a | yes | +| [databricks\_account\_id](#input\_databricks\_account\_id) | Databricks account ID (UUID). | `string` | n/a | yes | +| [databricks\_workspace\_id](#input\_databricks\_workspace\_id) | Databricks workspace ID to bind the NCC to. | `string` | n/a | yes | +| [serverless\_domain\_names](#input\_serverless\_domain\_names) | FQDNs serverless clients dial (e.g. Kafka bootstrap + wildcard). Max 10. | `list(string)` | n/a | yes | +| [databricks\_host](#input\_databricks\_host) | Databricks account console host. | `string` | `"https://accounts.azuredatabricks.net"` | no | +| [listener\_port](#input\_listener\_port) | TCP/TLS port (e.g. 9092/9094 for Kafka). | `number` | `9092` | no | +| [rg\_name](#input\_rg\_name) | Name of the resource group to create for the transit. | `string` | `"rg-appgw-tls-transit"` | no | +| [tags](#input\_tags) | Tags applied to created resources. | `map(string)` | `{}` | no | + +## Outputs + +| Name | Description | +| ---- | ----------- | +| [appgw\_frontend\_config\_name](#output\_appgw\_frontend\_config\_name) | Frontend config name = the NCC rule group\_id. | +| [appgw\_id](#output\_appgw\_id) | Resource ID of the Application Gateway. | +| [ncc\_id](#output\_ncc\_id) | Databricks NCC ID. | +| [serverless\_domain\_names](#output\_serverless\_domain\_names) | FQDNs serverless clients dial (registered in the NCC rule). | +| [transit\_vnet\_id](#output\_transit\_vnet\_id) | Transit VNet ID — peer your target service network here or place a private endpoint. | + diff --git a/examples/adb-serverless-appgw-tls-transit/main.tf b/examples/adb-serverless-appgw-tls-transit/main.tf new file mode 100644 index 0000000..f2b1403 --- /dev/null +++ b/examples/adb-serverless-appgw-tls-transit/main.tf @@ -0,0 +1,18 @@ +module "adb-serverless-appgw-tls-transit" { + source = "../../modules/adb-serverless-appgw-tls-transit" + + azure_subscription_id = var.azure_subscription_id + azure_region = var.azure_region + rg_name = var.rg_name + databricks_host = var.databricks_host + databricks_account_id = var.databricks_account_id + databricks_workspace_id = var.databricks_workspace_id + + # Target TLS service (e.g. Kafka brokers) reachable from the transit VNet, + # and the FQDNs serverless clients dial. + backend_addresses = var.backend_addresses + serverless_domain_names = var.serverless_domain_names + listener_port = var.listener_port + + tags = var.tags +} diff --git a/examples/adb-serverless-appgw-tls-transit/outputs.tf b/examples/adb-serverless-appgw-tls-transit/outputs.tf new file mode 100644 index 0000000..e17c238 --- /dev/null +++ b/examples/adb-serverless-appgw-tls-transit/outputs.tf @@ -0,0 +1,24 @@ +output "appgw_id" { + description = "Resource ID of the Application Gateway." + value = module.adb-serverless-appgw-tls-transit.appgw_id +} + +output "appgw_frontend_config_name" { + description = "Frontend config name = the NCC rule group_id." + value = module.adb-serverless-appgw-tls-transit.appgw_frontend_config_name +} + +output "ncc_id" { + description = "Databricks NCC ID." + value = module.adb-serverless-appgw-tls-transit.ncc_id +} + +output "serverless_domain_names" { + description = "FQDNs serverless clients dial (registered in the NCC rule)." + value = module.adb-serverless-appgw-tls-transit.serverless_domain_names +} + +output "transit_vnet_id" { + description = "Transit VNet ID — peer your target service network here or place a private endpoint." + value = module.adb-serverless-appgw-tls-transit.transit_vnet_id +} diff --git a/examples/adb-serverless-appgw-tls-transit/providers.tf b/examples/adb-serverless-appgw-tls-transit/providers.tf new file mode 100644 index 0000000..7ea752f --- /dev/null +++ b/examples/adb-serverless-appgw-tls-transit/providers.tf @@ -0,0 +1,39 @@ +terraform { + required_providers { + azurerm = { + source = "hashicorp/azurerm" + version = ">=4.31.0" + } + databricks = { + source = "databricks/databricks" + version = ">=1.81.1" + } + azapi = { + source = "Azure/azapi" + version = "2.0.1" + } + null = { + source = "hashicorp/null" + version = ">=3.2.0" + } + time = { + source = "hashicorp/time" + version = ">=0.9.0" + } + } +} + +provider "azurerm" { + subscription_id = var.azure_subscription_id + features {} +} + +provider "databricks" { + alias = "accounts" + host = var.databricks_host + account_id = var.databricks_account_id +} + +provider "azapi" { + subscription_id = var.azure_subscription_id +} diff --git a/examples/adb-serverless-appgw-tls-transit/terraform.tfvars b/examples/adb-serverless-appgw-tls-transit/terraform.tfvars new file mode 100644 index 0000000..3f77e6c --- /dev/null +++ b/examples/adb-serverless-appgw-tls-transit/terraform.tfvars @@ -0,0 +1,24 @@ +azure_subscription_id = "00000000-0000-0000-0000-000000000000" +azure_region = "australiaeast" +rg_name = "rg-appgw-tls-transit" + +databricks_host = "https://accounts.azuredatabricks.net" +databricks_account_id = "00000000-0000-0000-0000-000000000000" +databricks_workspace_id = "1234567890123456" + +# Target TLS service (e.g. Kafka brokers) reachable from the transit VNet. +backend_addresses = ["10.230.3.10"] + +# FQDNs serverless clients dial. For Confluent Cloud this is the cluster +# bootstrap FQDN + a wildcard for per-broker re-resolution. Max 10. +serverless_domain_names = [ + "lkc-xxxxx..australiaeast.azure.confluent.cloud", + "*..australiaeast.azure.confluent.cloud", +] + +listener_port = 9092 + +tags = { + Environment = "dev" + Workload = "serverless-kafka-privatelink" +} diff --git a/examples/adb-serverless-appgw-tls-transit/variables.tf b/examples/adb-serverless-appgw-tls-transit/variables.tf new file mode 100644 index 0000000..f3c9b8e --- /dev/null +++ b/examples/adb-serverless-appgw-tls-transit/variables.tf @@ -0,0 +1,53 @@ +variable "azure_subscription_id" { + type = string + description = "Azure subscription ID to deploy into." +} + +variable "azure_region" { + type = string + description = "Azure region short name (e.g. australiaeast). Must match your workspace/NCC region." +} + +variable "rg_name" { + type = string + description = "Name of the resource group to create for the transit." + default = "rg-appgw-tls-transit" +} + +variable "databricks_host" { + type = string + description = "Databricks account console host." + default = "https://accounts.azuredatabricks.net" +} + +variable "databricks_account_id" { + type = string + description = "Databricks account ID (UUID)." +} + +variable "databricks_workspace_id" { + type = string + description = "Databricks workspace ID to bind the NCC to." +} + +variable "backend_addresses" { + type = list(string) + description = "IPs (or FQDNs) of the target TLS service, reachable from the transit VNet." +} + +variable "serverless_domain_names" { + type = list(string) + description = "FQDNs serverless clients dial (e.g. Kafka bootstrap + wildcard). Max 10." +} + +variable "listener_port" { + type = number + description = "TCP/TLS port (e.g. 9092/9094 for Kafka)." + default = 9092 +} + +variable "tags" { + type = map(string) + description = "Tags applied to created resources." + default = {} +} diff --git a/modules/adb-serverless-appgw-tls-transit/Makefile b/modules/adb-serverless-appgw-tls-transit/Makefile new file mode 100644 index 0000000..653039d --- /dev/null +++ b/modules/adb-serverless-appgw-tls-transit/Makefile @@ -0,0 +1,7 @@ +.PHONY: docs test_docs + +docs: + terraform-docs -c ../../.terraform-docs.yml . + +test_docs: + terraform-docs -c ../../.terraform-docs.yml --output-check . diff --git a/modules/adb-serverless-appgw-tls-transit/README.md b/modules/adb-serverless-appgw-tls-transit/README.md new file mode 100644 index 0000000..62c0ba5 --- /dev/null +++ b/modules/adb-serverless-appgw-tls-transit/README.md @@ -0,0 +1,156 @@ +# Serverless → TLS service via Application Gateway v2 TCP/TLS transit + +This module provides private connectivity from **Databricks Serverless compute** +to an external **TLS-over-TCP service behind Azure Private Link** — generic for +**Apache Kafka** (Confluent Cloud, self-hosted, Aiven, MSK-on-Azure-peer) and +any other TLS workload — using an **Azure Application Gateway v2 TCP/TLS proxy** +as a customer-tenant transit. + +``` +Databricks Serverless ──NCC PE rule──▶ App Gateway v2 (TCP/TLS listener, Private Link) + │ passes TLS through (no termination) + ▼ + backend = your TLS service (Kafka, etc.) +``` + +The TCP/TLS listener **passes TLS through end-to-end** — the App Gateway never +terminates TLS or sees plaintext, so client↔broker (m)TLS is preserved. + +## Why a transit is required + +Databricks Serverless reaches external services only via **NCC private endpoint +rules**, which target an Azure **resource ID** (not a Private Link alias). +SaaS Kafka (e.g. Confluent Cloud) publishes a cross-tenant PLS *alias*, and an +Azure Standard Load Balancer can't use private-endpoint IPs as backends — so a +customer-tenant L4 proxy is required in between. App Gateway v2's TCP/TLS proxy +(GA 2025-11-26) is the managed-PaaS implementation of that proxy. + +## Why the NCC rule uses the REST API (not the Terraform resource) + +Per [Microsoft Learn](https://learn.microsoft.com/en-us/azure/databricks/security/network/serverless-network-security/serverless-private-link#configure-private-link-to-azure-app-gateway-v2), +an Application Gateway target **requires `resource_id` + `group_id` + `domain_names` +together**, and **must be configured via the Network Connectivity Configurations +REST API** (the account-console UI doesn't support App Gateway). The Terraform +`databricks_mws_ncc_private_endpoint_rule` resource forbids `group_id` alongside +`domain_names`, so it cannot express this case. This module therefore creates the +rule via `az` + `curl` (a `null_resource`) — this is the **documented method**, +not a workaround. The `group_id` is the App Gateway **frontend IP configuration +name** that carries the Private Link configuration (`frontend-public` here). + +## Module content + +* Resource group, transit VNet, App Gateway subnet, and App Gateway Private Link subnet. +* Public IP (required by the Standard_v2 SKU). +* Application Gateway v2 (via `azapi`) with a **TCP listener**, a backend pool of + your target addresses, TCP backend settings, and a Private Link configuration. +* Databricks NCC + workspace binding. +* The NCC **private endpoint rule** for the App Gateway, via the documented REST API. +* (Optional) auto-approval of the inbound private endpoint connection on the App Gateway. + +## Prerequisites + +* Premium-tier Databricks account; you must be an **account admin**. +* The **`az` CLI authenticated** as that account admin on the machine running + terraform (used for the REST NCC rule + PE approval). +* Connectivity from the App Gateway VNet to your `backend_addresses` (in-VNet, + VNet peering, or a private endpoint to the provider's PLS — your responsibility). +* For Kafka: the broker `advertised.listeners` must return FQDNs that are in + `serverless_domain_names`, or the second-hop connection fails. + +## Known limitations + +* The REST-created NCC rule is **create-only** — `terraform destroy` does not + remove it, and changing `serverless_domain_names` requires manual cleanup (use + the PATCH/DELETE operations in the [NCC API](https://docs.databricks.com/api/azure/account/networkconnectivity)). +* Not exercised by `terraform validate` (the rule + approval are `local-exec`). + +## How to use + +> **Note** +> A deployment example using this module can be found in +> [examples/adb-serverless-appgw-tls-transit](../../examples/adb-serverless-appgw-tls-transit) + +1. Reference this module using one of the different [module source types](https://developer.hashicorp.com/terraform/language/modules/sources) +2. Add a `variables.tf` with the same content as [variables.tf](variables.tf) +3. Add a `terraform.tfvars` file and provide values to each defined variable +4. Add an `outputs.tf` file +5. Run `terraform init` +6. Run `terraform apply` + + +## Requirements + +| Name | Version | +| ---- | ------- | +| [azapi](#requirement\_azapi) | 2.0.1 | +| [azurerm](#requirement\_azurerm) | >=4.31.0 | +| [databricks](#requirement\_databricks) | >=1.81.1 | +| [null](#requirement\_null) | >=3.2.0 | +| [time](#requirement\_time) | >=0.9.0 | + +## Providers + +| Name | Version | +| ---- | ------- | +| [azapi](#provider\_azapi) | 2.0.1 | +| [azurerm](#provider\_azurerm) | >=4.31.0 | +| [databricks.accounts](#provider\_databricks.accounts) | >=1.81.1 | +| [null](#provider\_null) | >=3.2.0 | +| [time](#provider\_time) | >=0.9.0 | + +## Modules + +No modules. + +## Resources + +| Name | Type | +| ---- | ---- | +| [azapi_resource.appgw](https://registry.terraform.io/providers/Azure/azapi/2.0.1/docs/resources/resource) | resource | +| [azurerm_public_ip.appgw](https://registry.terraform.io/providers/hashicorp/azurerm/latest/docs/resources/public_ip) | resource | +| [azurerm_resource_group.this](https://registry.terraform.io/providers/hashicorp/azurerm/latest/docs/resources/resource_group) | resource | +| [azurerm_subnet.appgw](https://registry.terraform.io/providers/hashicorp/azurerm/latest/docs/resources/subnet) | resource | +| [azurerm_subnet.appgw_pls](https://registry.terraform.io/providers/hashicorp/azurerm/latest/docs/resources/subnet) | resource | +| [azurerm_virtual_network.transit](https://registry.terraform.io/providers/hashicorp/azurerm/latest/docs/resources/virtual_network) | resource | +| [databricks_mws_ncc_binding.this](https://registry.terraform.io/providers/databricks/databricks/latest/docs/resources/mws_ncc_binding) | resource | +| [databricks_mws_network_connectivity_config.this](https://registry.terraform.io/providers/databricks/databricks/latest/docs/resources/mws_network_connectivity_config) | resource | +| [null_resource.approve_pe_on_appgw](https://registry.terraform.io/providers/hashicorp/null/latest/docs/resources/resource) | resource | +| [null_resource.ncc_pe_rule_appgw](https://registry.terraform.io/providers/hashicorp/null/latest/docs/resources/resource) | resource | +| [time_sleep.wait_for_pe](https://registry.terraform.io/providers/hashicorp/time/latest/docs/resources/sleep) | resource | + +## Inputs + +| Name | Description | Type | Default | Required | +| ---- | ----------- | ---- | ------- | :------: | +| [azure\_region](#input\_azure\_region) | Azure region short name (e.g. australiaeast). Must match your Databricks workspace/NCC region. | `string` | n/a | yes | +| [azure\_subscription\_id](#input\_azure\_subscription\_id) | Azure subscription ID to deploy the transit into. | `string` | n/a | yes | +| [backend\_addresses](#input\_backend\_addresses) | Backend target addresses reachable from the App Gateway VNet — the IPs (or FQDNs) of the TLS service (e.g. Kafka brokers, an internal load balancer, or a private endpoint to a provider PLS). You are responsible for connectivity from the App Gateway VNet to these addresses (in-VNet, VNet peering, or a private endpoint). | `list(string)` | n/a | yes | +| [databricks\_account\_id](#input\_databricks\_account\_id) | Databricks account ID (UUID). | `string` | n/a | yes | +| [databricks\_workspace\_id](#input\_databricks\_workspace\_id) | Databricks workspace ID to bind the NCC to. | `string` | n/a | yes | +| [rg\_name](#input\_rg\_name) | Name of the resource group to create for the transit (VNet, App Gateway, public IP). | `string` | n/a | yes | +| [serverless\_domain\_names](#input\_serverless\_domain\_names) | FQDNs that Databricks Serverless clients will dial (e.g. Kafka bootstrap + per-broker/wildcard FQDNs). NCC injects DNS so these resolve to the Databricks-managed private endpoint. Max 10 per rule. | `list(string)` | n/a | yes | +| [appgw\_capacity](#input\_appgw\_capacity) | Fixed instance capacity for the Application Gateway v2 (Standard\_v2). | `number` | `2` | no | +| [appgw\_frontend\_private\_ip](#input\_appgw\_frontend\_private\_ip) | Static private IP for the App Gateway private frontend (must be inside appgw\_subnet\_prefix). | `string` | `"10.230.1.100"` | no | +| [appgw\_name](#input\_appgw\_name) | Name of the Application Gateway. | `string` | `"appgw-serverless-transit"` | no | +| [appgw\_pls\_subnet\_prefix](#input\_appgw\_pls\_subnet\_prefix) | Address prefix for the Application Gateway Private Link subnet (hosts the PL config IP configuration). | `string` | `"10.230.2.0/24"` | no | +| [appgw\_subnet\_prefix](#input\_appgw\_subnet\_prefix) | Address prefix for the Application Gateway subnet. | `string` | `"10.230.1.0/24"` | no | +| [auto\_approve\_private\_endpoint](#input\_auto\_approve\_private\_endpoint) | Automatically approve the Databricks private endpoint connection on the App Gateway (via az CLI). Set false to approve manually in the Azure portal (NCC docs Step 4). | `bool` | `true` | no | +| [backend\_port](#input\_backend\_port) | Backend port to forward to. Defaults to listener\_port when null. | `number` | `null` | no | +| [databricks\_host](#input\_databricks\_host) | Databricks account console host. The NCC resources require an account-level provider. | `string` | `"https://accounts.azuredatabricks.net"` | no | +| [listener\_port](#input\_listener\_port) | TCP port the TLS service listens on and that clients connect to (e.g. 9092/9094 for Kafka). | `number` | `9092` | no | +| [ncc\_name](#input\_ncc\_name) | Name for the Network Connectivity Configuration. | `string` | `"ncc-appgw-transit"` | no | +| [tags](#input\_tags) | Tags applied to created resources. | `map(string)` | `{}` | no | +| [vnet\_address\_space](#input\_vnet\_address\_space) | Address space for the transit VNet. | `list(string)` |
[
"10.230.0.0/16"
]
| no | + +## Outputs + +| Name | Description | +| ---- | ----------- | +| [appgw\_frontend\_config\_name](#output\_appgw\_frontend\_config\_name) | Frontend IP configuration name that carries the Private Link config — this is the group\_id used by the NCC private endpoint rule. | +| [appgw\_id](#output\_appgw\_id) | Resource ID of the Application Gateway. | +| [appgw\_name](#output\_appgw\_name) | Name of the Application Gateway. | +| [ncc\_id](#output\_ncc\_id) | Databricks Network Connectivity Configuration ID. | +| [public\_ip\_address](#output\_public\_ip\_address) | Public IP of the Application Gateway (required by the Standard\_v2 SKU). | +| [serverless\_domain\_names](#output\_serverless\_domain\_names) | FQDNs registered in the NCC rule. Serverless clients dial these; NCC injects DNS to the Databricks-managed private endpoint. | +| [transit\_vnet\_id](#output\_transit\_vnet\_id) | Resource ID of the transit VNet (peer your target service's network to this, or place a private endpoint here). | + diff --git a/modules/adb-serverless-appgw-tls-transit/appgw.tf b/modules/adb-serverless-appgw-tls-transit/appgw.tf new file mode 100644 index 0000000..418ceec --- /dev/null +++ b/modules/adb-serverless-appgw-tls-transit/appgw.tf @@ -0,0 +1,129 @@ +# ============================================================================= +# Application Gateway v2 — TCP/TLS proxy listener + native Private Link +# +# The azapi provider is used because azurerm does not expose TCP listeners. +# The TCP/TLS listener passes TLS through end-to-end (it does NOT terminate +# TLS) — the App Gateway sees only encrypted bytes. Works for Kafka and any +# other TLS-over-TCP workload. +# +# App GW v2 (Standard_v2) requires a public IP frontend unless the subscription +# has the EnableApplicationGatewayNetworkIsolation feature registered. The +# public IP is created to satisfy the SKU; the Private Link surface (used by the +# Databricks private endpoint) is attached to that frontend. +# ============================================================================= + +locals { + appgw_base_id = "${azurerm_resource_group.this.id}/providers/Microsoft.Network/applicationGateways/${var.appgw_name}" + frontend_port = "port-${var.listener_port}" +} + +resource "azurerm_public_ip" "appgw" { + name = "pip-${var.appgw_name}" + location = var.azure_region + resource_group_name = azurerm_resource_group.this.name + allocation_method = "Static" + sku = "Standard" + tags = local.tags +} + +resource "azapi_resource" "appgw" { + type = "Microsoft.Network/applicationGateways@2024-05-01" + name = var.appgw_name + location = var.azure_region + parent_id = azurerm_resource_group.this.id + tags = local.tags + + body = { + properties = { + sku = { + name = "Standard_v2" + tier = "Standard_v2" + capacity = var.appgw_capacity + } + + gatewayIPConfigurations = [{ + name = "appgw-ip-config" + properties = { subnet = { id = azurerm_subnet.appgw.id } } + }] + + frontendIPConfigurations = [ + { + name = local.frontend_pl_name + properties = { + publicIPAddress = { id = azurerm_public_ip.appgw.id } + privateLinkConfiguration = { id = "${local.appgw_base_id}/privateLinkConfigurations/${local.pl_config_name}" } + } + }, + { + name = "frontend-private" + properties = { + privateIPAllocationMethod = "Static" + privateIPAddress = var.appgw_frontend_private_ip + subnet = { id = azurerm_subnet.appgw.id } + } + } + ] + + frontendPorts = [{ + name = local.frontend_port + properties = { port = var.listener_port } + }] + + backendAddressPools = [{ + name = "backend-pool" + properties = { + backendAddresses = [for addr in var.backend_addresses : { ipAddress = addr }] + } + }] + + backendSettingsCollection = [{ + name = "backend-settings-tcp" + properties = { + port = local.backend_port + protocol = "Tcp" + timeout = 60 + } + }] + + listeners = [{ + name = local.listener_name + properties = { + frontendIPConfiguration = { id = "${local.appgw_base_id}/frontendIPConfigurations/${local.frontend_pl_name}" } + frontendPort = { id = "${local.appgw_base_id}/frontendPorts/${local.frontend_port}" } + protocol = "Tcp" + } + }] + + routingRules = [{ + name = "rule-tcp" + properties = { + ruleType = "Basic" + priority = 100 + listener = { id = "${local.appgw_base_id}/listeners/${local.listener_name}" } + backendAddressPool = { id = "${local.appgw_base_id}/backendAddressPools/backend-pool" } + backendSettings = { id = "${local.appgw_base_id}/backendSettingsCollection/backend-settings-tcp" } + } + }] + + privateLinkConfigurations = [{ + name = local.pl_config_name + properties = { + ipConfigurations = [{ + name = "pl-ipconfig" + properties = { + privateIPAllocationMethod = "Dynamic" + primary = true + subnet = { id = azurerm_subnet.appgw_pls.id } + } + }] + } + }] + } + } + + depends_on = [ + azurerm_subnet.appgw, + azurerm_subnet.appgw_pls, + azurerm_public_ip.appgw, + ] +} diff --git a/modules/adb-serverless-appgw-tls-transit/ncc.tf b/modules/adb-serverless-appgw-tls-transit/ncc.tf new file mode 100644 index 0000000..397be98 --- /dev/null +++ b/modules/adb-serverless-appgw-tls-transit/ncc.tf @@ -0,0 +1,118 @@ +# ============================================================================= +# Databricks NCC + workspace binding + private endpoint rule +# +# IMPORTANT: For an Application Gateway target, the NCC private endpoint rule +# must be created via the Network Connectivity Configurations REST API — this is +# the method documented by Microsoft Learn, not a workaround. App Gateway v2 +# requires resource_id + group_id + domain_names together, and the Terraform +# databricks_mws_ncc_private_endpoint_rule resource forbids group_id alongside +# domain_names. So we POST the rule via az + curl. group_id is the frontend IP +# configuration name that carries the Private Link configuration. +# +# Ref: https://learn.microsoft.com/en-us/azure/databricks/security/network/serverless-network-security/serverless-private-link#configure-private-link-to-azure-app-gateway-v2 +# +# Requires the az CLI to be authenticated as a Databricks account admin on the +# machine running terraform. +# ============================================================================= + +resource "databricks_mws_network_connectivity_config" "this" { + provider = databricks.accounts + name = var.ncc_name + region = var.azure_region +} + +resource "databricks_mws_ncc_binding" "this" { + provider = databricks.accounts + network_connectivity_config_id = databricks_mws_network_connectivity_config.this.network_connectivity_config_id + workspace_id = var.databricks_workspace_id +} + +# Create the App Gateway private endpoint rule via the documented REST API. +resource "null_resource" "ncc_pe_rule_appgw" { + triggers = { + ncc_id = databricks_mws_network_connectivity_config.this.network_connectivity_config_id + appgw_id = azapi_resource.appgw.id + group_id = local.frontend_pl_name + domain_names = join(",", var.serverless_domain_names) + account_id = var.databricks_account_id + host = var.databricks_host + } + + provisioner "local-exec" { + interpreter = ["bash", "-c"] + command = <<-EOT + set -e + TOKEN=$(az account get-access-token --resource "2ff814a6-3304-4ab8-85cb-cd0e6f879c1d" --query accessToken -o tsv) + [ -n "$TOKEN" ] || { echo "ERROR: could not get a Databricks access token via az"; exit 1; } + NCC_ID="${databricks_mws_network_connectivity_config.this.network_connectivity_config_id}" + RESP=$(curl -sw "\n%%{http_code}" -X POST \ + "${var.databricks_host}/api/2.0/accounts/${var.databricks_account_id}/network-connectivity-configs/$NCC_ID/private-endpoint-rules" \ + -H "Authorization: Bearer $TOKEN" -H "Content-Type: application/json" \ + -d '{"resource_id":"${azapi_resource.appgw.id}","group_id":"${local.frontend_pl_name}","domain_names":${jsonencode(var.serverless_domain_names)}}') + CODE=$(echo "$RESP" | tail -1) + BODY=$(echo "$RESP" | sed '$d') + if [ "$CODE" -ge 200 ] && [ "$CODE" -lt 300 ]; then + echo "NCC private endpoint rule created:" + echo "$BODY" | python3 -m json.tool 2>/dev/null || echo "$BODY" + else + echo "ERROR: HTTP $CODE"; echo "$BODY"; exit 1 + fi + EOT + } + + depends_on = [ + databricks_mws_ncc_binding.this, + azapi_resource.appgw, + ] +} + +# Give Databricks time to provision its private endpoint before approving. +resource "time_sleep" "wait_for_pe" { + depends_on = [null_resource.ncc_pe_rule_appgw] + create_duration = "90s" +} + +# (Optional) Approve the inbound private endpoint connection on the App Gateway. +# Set auto_approve_private_endpoint = false to approve manually in the portal. +resource "null_resource" "approve_pe_on_appgw" { + count = var.auto_approve_private_endpoint ? 1 : 0 + depends_on = [time_sleep.wait_for_pe] + + triggers = { + appgw_name = var.appgw_name + rg_name = azurerm_resource_group.this.name + } + + provisioner "local-exec" { + interpreter = ["bash", "-c"] + command = <<-EOT + set -e + echo "Looking for pending PE connections on App Gateway ${var.appgw_name}..." + for i in 1 2 3 4 5 6; do + PENDING=$(az network application-gateway private-link list \ + --gateway-name "${var.appgw_name}" \ + --resource-group "${azurerm_resource_group.this.name}" \ + --query "[0].privateEndpointConnections[?privateLinkServiceConnectionState.status=='Pending']" \ + -o json 2>/dev/null || echo "[]") + COUNT=$(echo "$PENDING" | python3 -c "import sys, json; print(len(json.load(sys.stdin)))") + if [ "$COUNT" -gt 0 ]; then + echo "$PENDING" | python3 -c " +import sys, json, subprocess +for c in json.load(sys.stdin): + name = c['name'] + print(f'Approving {name}') + subprocess.run(['az','network','application-gateway','private-link','connection','update', + '--gateway-name','${var.appgw_name}', + '--resource-group','${azurerm_resource_group.this.name}', + '--name', name, '--connection-status','Approved'], check=True) +" + exit 0 + fi + echo " no pending connection yet, sleeping 30s ($i/6)..." + sleep 30 + done + echo "WARN: no pending PE connection appeared after ~3 min. Approve manually:" + echo " az network application-gateway private-link list --gateway-name ${var.appgw_name} --resource-group ${azurerm_resource_group.this.name} -o jsonc" + EOT + } +} diff --git a/modules/adb-serverless-appgw-tls-transit/network.tf b/modules/adb-serverless-appgw-tls-transit/network.tf new file mode 100644 index 0000000..ad9fc9e --- /dev/null +++ b/modules/adb-serverless-appgw-tls-transit/network.tf @@ -0,0 +1,47 @@ +locals { + # The frontend that carries the Private Link configuration. This name is also + # the NCC private-endpoint-rule group_id (see ncc.tf) — they must match. + frontend_pl_name = "frontend-public" + pl_config_name = "pl-config" + listener_name = "listener-tcp" + backend_port = coalesce(var.backend_port, var.listener_port) + + default_tags = { + ManagedBy = "terraform" + Module = "adb-serverless-appgw-tls-transit" + } + tags = merge(local.default_tags, var.tags) +} + +resource "azurerm_resource_group" "this" { + name = var.rg_name + location = var.azure_region + tags = local.tags +} + +resource "azurerm_virtual_network" "transit" { + name = "vnet-${var.appgw_name}" + location = var.azure_region + resource_group_name = azurerm_resource_group.this.name + address_space = var.vnet_address_space + tags = local.tags +} + +# Dedicated subnet for the Application Gateway. +resource "azurerm_subnet" "appgw" { + name = "snet-appgw" + resource_group_name = azurerm_resource_group.this.name + virtual_network_name = azurerm_virtual_network.transit.name + address_prefixes = [var.appgw_subnet_prefix] +} + +# Subnet hosting the App Gateway Private Link configuration IP. PL network +# policies must be disabled here. +resource "azurerm_subnet" "appgw_pls" { + name = "snet-appgw-pls" + resource_group_name = azurerm_resource_group.this.name + virtual_network_name = azurerm_virtual_network.transit.name + address_prefixes = [var.appgw_pls_subnet_prefix] + + private_link_service_network_policies_enabled = false +} diff --git a/modules/adb-serverless-appgw-tls-transit/outputs.tf b/modules/adb-serverless-appgw-tls-transit/outputs.tf new file mode 100644 index 0000000..2a54d03 --- /dev/null +++ b/modules/adb-serverless-appgw-tls-transit/outputs.tf @@ -0,0 +1,34 @@ +output "appgw_id" { + description = "Resource ID of the Application Gateway." + value = azapi_resource.appgw.id +} + +output "appgw_name" { + description = "Name of the Application Gateway." + value = var.appgw_name +} + +output "appgw_frontend_config_name" { + description = "Frontend IP configuration name that carries the Private Link config — this is the group_id used by the NCC private endpoint rule." + value = local.frontend_pl_name +} + +output "public_ip_address" { + description = "Public IP of the Application Gateway (required by the Standard_v2 SKU)." + value = azurerm_public_ip.appgw.ip_address +} + +output "ncc_id" { + description = "Databricks Network Connectivity Configuration ID." + value = databricks_mws_network_connectivity_config.this.network_connectivity_config_id +} + +output "serverless_domain_names" { + description = "FQDNs registered in the NCC rule. Serverless clients dial these; NCC injects DNS to the Databricks-managed private endpoint." + value = var.serverless_domain_names +} + +output "transit_vnet_id" { + description = "Resource ID of the transit VNet (peer your target service's network to this, or place a private endpoint here)." + value = azurerm_virtual_network.transit.id +} diff --git a/modules/adb-serverless-appgw-tls-transit/providers.tf b/modules/adb-serverless-appgw-tls-transit/providers.tf new file mode 100644 index 0000000..bad1edc --- /dev/null +++ b/modules/adb-serverless-appgw-tls-transit/providers.tf @@ -0,0 +1,48 @@ +# We strongly recommend using the required_providers block to set the +# provider sources and versions being used +terraform { + required_providers { + azurerm = { + source = "hashicorp/azurerm" + version = ">=4.31.0" + } + databricks = { + source = "databricks/databricks" + version = ">=1.81.1" + } + # azapi is required because the azurerm provider does not expose the + # Application Gateway v2 TCP/TLS listener configuration. + azapi = { + source = "Azure/azapi" + version = "2.0.1" + } + # null + time drive the REST-based NCC private endpoint rule (the documented + # Application Gateway method — see README) and its propagation wait. + null = { + source = "hashicorp/null" + version = ">=3.2.0" + } + time = { + source = "hashicorp/time" + version = ">=0.9.0" + } + } +} + +# Configure the Microsoft Azure Provider +provider "azurerm" { + subscription_id = var.azure_subscription_id + features {} +} + +# Account-level Databricks provider (required for NCC resources). +provider "databricks" { + alias = "accounts" + host = var.databricks_host + account_id = var.databricks_account_id +} + +# AzAPI provider for the Application Gateway v2 TCP listener. +provider "azapi" { + subscription_id = var.azure_subscription_id +} diff --git a/modules/adb-serverless-appgw-tls-transit/variables.tf b/modules/adb-serverless-appgw-tls-transit/variables.tf new file mode 100644 index 0000000..563c2e8 --- /dev/null +++ b/modules/adb-serverless-appgw-tls-transit/variables.tf @@ -0,0 +1,136 @@ +# ============================================================================= +# Account / subscription +# ============================================================================= + +variable "azure_subscription_id" { + type = string + description = "Azure subscription ID to deploy the transit into." +} + +variable "azure_region" { + type = string + description = "Azure region short name (e.g. australiaeast). Must match your Databricks workspace/NCC region." +} + +variable "rg_name" { + type = string + description = "Name of the resource group to create for the transit (VNet, App Gateway, public IP)." +} + +variable "databricks_host" { + type = string + description = "Databricks account console host. The NCC resources require an account-level provider." + default = "https://accounts.azuredatabricks.net" +} + +variable "databricks_account_id" { + type = string + description = "Databricks account ID (UUID)." +} + +variable "databricks_workspace_id" { + type = string + description = "Databricks workspace ID to bind the NCC to." +} + +# ============================================================================= +# Target service (generic — Kafka brokers or any TLS workload) +# ============================================================================= + +variable "backend_addresses" { + type = list(string) + description = "Backend target addresses reachable from the App Gateway VNet — the IPs (or FQDNs) of the TLS service (e.g. Kafka brokers, an internal load balancer, or a private endpoint to a provider PLS). You are responsible for connectivity from the App Gateway VNet to these addresses (in-VNet, VNet peering, or a private endpoint)." + + validation { + condition = length(var.backend_addresses) > 0 + error_message = "Provide at least one backend address." + } +} + +variable "serverless_domain_names" { + type = list(string) + description = "FQDNs that Databricks Serverless clients will dial (e.g. Kafka bootstrap + per-broker/wildcard FQDNs). NCC injects DNS so these resolve to the Databricks-managed private endpoint. Max 10 per rule." + + validation { + condition = length(var.serverless_domain_names) > 0 && length(var.serverless_domain_names) <= 10 + error_message = "Provide between 1 and 10 domain names (Azure NCC limit is 10 per rule)." + } +} + +variable "listener_port" { + type = number + description = "TCP port the TLS service listens on and that clients connect to (e.g. 9092/9094 for Kafka)." + default = 9092 +} + +variable "backend_port" { + type = number + description = "Backend port to forward to. Defaults to listener_port when null." + default = null +} + +# ============================================================================= +# Networking +# ============================================================================= + +variable "vnet_address_space" { + type = list(string) + description = "Address space for the transit VNet." + default = ["10.230.0.0/16"] +} + +variable "appgw_subnet_prefix" { + type = string + description = "Address prefix for the Application Gateway subnet." + default = "10.230.1.0/24" +} + +variable "appgw_pls_subnet_prefix" { + type = string + description = "Address prefix for the Application Gateway Private Link subnet (hosts the PL config IP configuration)." + default = "10.230.2.0/24" +} + +variable "appgw_frontend_private_ip" { + type = string + description = "Static private IP for the App Gateway private frontend (must be inside appgw_subnet_prefix)." + default = "10.230.1.100" +} + +# ============================================================================= +# App Gateway +# ============================================================================= + +variable "appgw_name" { + type = string + description = "Name of the Application Gateway." + default = "appgw-serverless-transit" +} + +variable "appgw_capacity" { + type = number + description = "Fixed instance capacity for the Application Gateway v2 (Standard_v2)." + default = 2 +} + +# ============================================================================= +# NCC +# ============================================================================= + +variable "ncc_name" { + type = string + description = "Name for the Network Connectivity Configuration." + default = "ncc-appgw-transit" +} + +variable "auto_approve_private_endpoint" { + type = bool + description = "Automatically approve the Databricks private endpoint connection on the App Gateway (via az CLI). Set false to approve manually in the Azure portal (NCC docs Step 4)." + default = true +} + +variable "tags" { + type = map(string) + description = "Tags applied to created resources." + default = {} +}