diff --git a/README.md b/README.md index 1c66db1..4d110c3 100644 --- a/README.md +++ b/README.md @@ -46,6 +46,7 @@ The folder `examples` contains the following Terraform implementation examples : | Azure | ~~adb-external-hive-metastore~~ **REMOVED** | This example was removed in February 2026. External Hive metastore has been superseded by [Unity Catalog](https://docs.databricks.com/en/data-governance/unity-catalog/index.html). Use [adb-unity-catalog-basic-demo](examples/adb-unity-catalog-basic-demo/) instead. | | Azure | [adb-kafka](examples/adb-kafka/) | ADB - single node kafka template | | Azure | [adb-private-links](examples/adb-private-links/) | Azure Databricks Private Links | +| Azure | [adb-service-direct-private-endpoint](examples/adb-service-direct-private-endpoint/) | Inbound "service-direct" Private Link to performance-intensive services (Zerobus Ingest, Lakebase Autoscaling) — Public Preview | | Azure | [adb-squid-proxy](examples/adb-squid-proxy/) | ADB clusters with HTTP proxy | | Azure | [adb-teradata](examples/adb-teradata/) | ADB with single VM Teradata integration | | Azure | [adb-uc](examples/adb-uc/) | ADB Unity Catalog Process | @@ -77,6 +78,7 @@ The folder `modules` contains the following Terraform modules : | Azure | [adb-with-private-link-standard](modules/adb-with-private-link-standard/) | Provisioning Databricks on Azure with Private Link - Standard deployment | | Azure | [adb-exfiltration-protection](modules/adb-exfiltration-protection/) | A sample implementation of [Data Exfiltration Protection](https://www.databricks.com/blog/2020/03/27/data-exfiltration-protection-with-azure-databricks.html) | | Azure | [adb-with-private-links-exfiltration-protection](modules/adb-with-private-links-exfiltration-protection/) | Provisioning Databricks on Azure with Private Link and [Data Exfiltration Protection](https://www.databricks.com/blog/2020/03/27/data-exfiltration-protection-with-azure-databricks.html) | +| Azure | [adb-service-direct-private-endpoint](modules/adb-service-direct-private-endpoint/) | Inbound "service-direct" Private Link to performance-intensive services (Zerobus Ingest, Lakebase Autoscaling) — Public Preview | | Azure | [adb-overwatch-regional-config](modules/adb-overwatch-regional-config/) | Overwatch regional configuration on Azure | | Azure | [adb-overwatch-mws-config](modules/adb-overwatch-mws-config/) | Overwatch multi-workspace deployment on Azure | | Azure | [adb-overwatch-main-ws](modules/adb-overwatch-main-ws/) | Main Overwatch workspace deployment | diff --git a/examples/adb-service-direct-private-endpoint/Makefile b/examples/adb-service-direct-private-endpoint/Makefile new file mode 100644 index 0000000..653039d --- /dev/null +++ b/examples/adb-service-direct-private-endpoint/Makefile @@ -0,0 +1,7 @@ +.PHONY: docs test_docs + +docs: + terraform-docs -c ../../.terraform-docs.yml . + +test_docs: + terraform-docs -c ../../.terraform-docs.yml --output-check . diff --git a/examples/adb-service-direct-private-endpoint/README.md b/examples/adb-service-direct-private-endpoint/README.md new file mode 100644 index 0000000..6807901 --- /dev/null +++ b/examples/adb-service-direct-private-endpoint/README.md @@ -0,0 +1,97 @@ +# Example — inbound "service-direct" Private Link (performance-intensive services) + +Deploys the [`adb-service-direct-private-endpoint`](../../modules/adb-service-direct-private-endpoint) +module: an Azure private endpoint to the Databricks per-region Private Link +Service for performance-intensive services (Zerobus Ingest, Lakebase +Autoscaling), the `privatelink.azuredatabricks.net` DNS A record +(`.service-direct`), and the account-side `databricks_endpoint` +registration that drives it from `PENDING` to `APPROVED`. + +> **Note** +> This feature and the `databricks_endpoint` resource are both in **Public +> Preview**. Run `terraform plan` and inspect carefully before applying. + +## Prerequisites + +* A **Premium-tier** Databricks account with the **"Private connectivity for + performance-intensive services"** Public Preview feature enabled in the + account console. +* An existing VNet + a dedicated subnet for the private endpoint (PE network + policies disabled — the Azure default). +* The per-region PLS resource ID from the + [Microsoft Learn region table](https://learn.microsoft.com/en-us/azure/databricks/resources/ip-domain-region#service-direct-resource-ids). + +## How to use + +1. Copy `terraform.tfvars` and fill in your values. +2. `terraform init` +3. `terraform plan` +4. `terraform apply` +5. Confirm `endpoint_state` is `APPROVED` and `endpoint_use_case` is + `SERVICE_DIRECT` in the outputs. + +> **Authentication — account provider** +> The `databricks_endpoint` registration uses an **account-level** provider +> (`host` + `account_id`). All Azure Databricks accounts share the host +> `accounts.azuredatabricks.net`, so if you have more than one account profile +> in `~/.databrickscfg`, the CLI auth resolver cannot pick one and `apply` +> fails with `... match https://accounts.azuredatabricks.net ... Use --profile`. +> Disambiguate by exporting `DATABRICKS_CONFIG_PROFILE=` +> (or add `profile = ""` to the `databricks.accounts` provider block). + +> **Approval is asynchronous** +> After `apply`, `endpoint_state` is typically `PENDING` — Databricks approves +> the cross-tenant connection out-of-band, usually within a few minutes. This +> is expected, not a failure. Run `terraform refresh` (or re-`plan`) after a +> few minutes to see `APPROVED`; the Azure private endpoint connection flips +> from `Pending` to `Approved` at the same time. + + +## Requirements + +| Name | Version | +| ---- | ------- | +| [azapi](#requirement\_azapi) | 2.0.1 | +| [azurerm](#requirement\_azurerm) | >=4.31.0 | +| [databricks](#requirement\_databricks) | >=1.107.0 | +| [time](#requirement\_time) | >=0.9.0 | + +## Providers + +No providers. + +## Modules + +| Name | Source | Version | +| ---- | ------ | ------- | +| [adb-service-direct-private-endpoint](#module\_adb-service-direct-private-endpoint) | ../../modules/adb-service-direct-private-endpoint | n/a | + +## Resources + +No resources. + +## Inputs + +| Name | Description | Type | Default | Required | +| ---- | ----------- | ---- | ------- | :------: | +| [azure\_region](#input\_azure\_region) | Azure region short name (e.g. australiaeast). Must match your workspace region. | `string` | n/a | yes | +| [azure\_subscription\_id](#input\_azure\_subscription\_id) | Azure subscription ID to deploy into. | `string` | n/a | yes | +| [databricks\_account\_id](#input\_databricks\_account\_id) | Databricks account ID (UUID). | `string` | n/a | yes | +| [databricks\_pls\_resource\_id](#input\_databricks\_pls\_resource\_id) | Databricks per-region PLS resource ID for performance-intensive services (from the MS Learn region table). | `string` | n/a | yes | +| [private\_endpoint\_subnet\_id](#input\_private\_endpoint\_subnet\_id) | Resource ID of an existing subnet to host the private endpoint (PE network policies disabled). | `string` | n/a | yes | +| [create\_private\_dns\_zone](#input\_create\_private\_dns\_zone) | Create privatelink.azuredatabricks.net here, or reuse an existing zone. | `bool` | `true` | no | +| [databricks\_host](#input\_databricks\_host) | Databricks account console host. | `string` | `"https://accounts.azuredatabricks.net"` | no | +| [rg\_name](#input\_rg\_name) | Name of the resource group to create for the private endpoint and DNS zone. | `string` | `"rg-service-direct-pe"` | no | +| [tags](#input\_tags) | Tags applied to created resources. | `map(string)` | `{}` | no | +| [vnet\_ids\_to\_link](#input\_vnet\_ids\_to\_link) | VNet IDs to link to the DNS zone (used only when create\_private\_dns\_zone = true). | `list(string)` | `[]` | no | + +## Outputs + +| Name | Description | +| ---- | ----------- | +| [dns\_fqdn](#output\_dns\_fqdn) | Resolvable FQDN for service-direct (.service-direct.privatelink.azuredatabricks.net). | +| [endpoint\_state](#output\_endpoint\_state) | Account-side endpoint state. Must be APPROVED to be usable. | +| [endpoint\_use\_case](#output\_endpoint\_use\_case) | Endpoint use\_case — expected SERVICE\_DIRECT. | +| [private\_endpoint\_name](#output\_private\_endpoint\_name) | Name of the Azure private endpoint. | +| [private\_ip\_address](#output\_private\_ip\_address) | Private IP assigned to the private endpoint. | + diff --git a/examples/adb-service-direct-private-endpoint/main.tf b/examples/adb-service-direct-private-endpoint/main.tf new file mode 100644 index 0000000..6617449 --- /dev/null +++ b/examples/adb-service-direct-private-endpoint/main.tf @@ -0,0 +1,16 @@ +module "adb-service-direct-private-endpoint" { + source = "../../modules/adb-service-direct-private-endpoint" + + azure_subscription_id = var.azure_subscription_id + azure_region = var.azure_region + rg_name = var.rg_name + databricks_host = var.databricks_host + databricks_account_id = var.databricks_account_id + private_endpoint_subnet_id = var.private_endpoint_subnet_id + databricks_pls_resource_id = var.databricks_pls_resource_id + + create_private_dns_zone = var.create_private_dns_zone + vnet_ids_to_link = var.vnet_ids_to_link + + tags = var.tags +} diff --git a/examples/adb-service-direct-private-endpoint/outputs.tf b/examples/adb-service-direct-private-endpoint/outputs.tf new file mode 100644 index 0000000..19b4152 --- /dev/null +++ b/examples/adb-service-direct-private-endpoint/outputs.tf @@ -0,0 +1,24 @@ +output "private_endpoint_name" { + description = "Name of the Azure private endpoint." + value = module.adb-service-direct-private-endpoint.private_endpoint_name +} + +output "private_ip_address" { + description = "Private IP assigned to the private endpoint." + value = module.adb-service-direct-private-endpoint.private_ip_address +} + +output "dns_fqdn" { + description = "Resolvable FQDN for service-direct (.service-direct.privatelink.azuredatabricks.net)." + value = module.adb-service-direct-private-endpoint.dns_fqdn +} + +output "endpoint_state" { + description = "Account-side endpoint state. Must be APPROVED to be usable." + value = module.adb-service-direct-private-endpoint.endpoint_state +} + +output "endpoint_use_case" { + description = "Endpoint use_case — expected SERVICE_DIRECT." + value = module.adb-service-direct-private-endpoint.endpoint_use_case +} diff --git a/examples/adb-service-direct-private-endpoint/providers.tf b/examples/adb-service-direct-private-endpoint/providers.tf new file mode 100644 index 0000000..1b6eb5a --- /dev/null +++ b/examples/adb-service-direct-private-endpoint/providers.tf @@ -0,0 +1,38 @@ +# We strongly recommend using the required_providers block to set the +# provider sources and versions being used +terraform { + required_providers { + azurerm = { + source = "hashicorp/azurerm" + version = ">=4.31.0" + } + databricks = { + source = "databricks/databricks" + version = ">=1.107.0" + } + azapi = { + source = "Azure/azapi" + version = "2.0.1" + } + time = { + source = "hashicorp/time" + version = ">=0.9.0" + } + } +} + +provider "azurerm" { + subscription_id = var.azure_subscription_id + features {} +} + +# Account-level provider — required by databricks_endpoint. +provider "databricks" { + alias = "accounts" + host = var.databricks_host + account_id = var.databricks_account_id +} + +provider "azapi" { + subscription_id = var.azure_subscription_id +} diff --git a/examples/adb-service-direct-private-endpoint/terraform.tfvars b/examples/adb-service-direct-private-endpoint/terraform.tfvars new file mode 100644 index 0000000..f6ded35 --- /dev/null +++ b/examples/adb-service-direct-private-endpoint/terraform.tfvars @@ -0,0 +1,27 @@ +azure_subscription_id = "00000000-0000-0000-0000-000000000000" +azure_region = "australiaeast" +rg_name = "rg-service-direct-pe" + +databricks_host = "https://accounts.azuredatabricks.net" +databricks_account_id = "00000000-0000-0000-0000-000000000000" + +# Existing subnet for the private endpoint (PE network policies disabled). +private_endpoint_subnet_id = "/subscriptions//resourceGroups//providers/Microsoft.Network/virtualNetworks//subnets/" + +# Databricks per-region PLS resource ID for performance-intensive services. +# Pull the current value from the MS Learn region table: +# https://learn.microsoft.com/en-us/azure/databricks/resources/ip-domain-region#service-direct-resource-ids +# NOTE: this is a Microsoft.Databricks/workspaces resource ID (the Databricks-owned +# per-region ingress workspace), NOT a Microsoft.Network/privateLinkServices path. +# The PE connects to it by resource ID with subresource service_direct. +databricks_pls_resource_id = "/subscriptions//resourceGroups/regional_ingress__resource_group/providers/Microsoft.Databricks/workspaces/regional_ingress__workspace" + +# Create privatelink.azuredatabricks.net here and link the VNet hosting the PE. +# Set false (and pre-create the zone) if the workspace already uses inbound PL. +create_private_dns_zone = true +vnet_ids_to_link = ["/subscriptions//resourceGroups//providers/Microsoft.Network/virtualNetworks/"] + +tags = { + Environment = "dev" + Workload = "service-direct-privatelink" +} diff --git a/examples/adb-service-direct-private-endpoint/variables.tf b/examples/adb-service-direct-private-endpoint/variables.tf new file mode 100644 index 0000000..c74b69f --- /dev/null +++ b/examples/adb-service-direct-private-endpoint/variables.tf @@ -0,0 +1,54 @@ +variable "azure_subscription_id" { + type = string + description = "Azure subscription ID to deploy into." +} + +variable "azure_region" { + type = string + description = "Azure region short name (e.g. australiaeast). Must match your workspace region." +} + +variable "rg_name" { + type = string + description = "Name of the resource group to create for the private endpoint and DNS zone." + default = "rg-service-direct-pe" +} + +variable "databricks_host" { + type = string + description = "Databricks account console host." + default = "https://accounts.azuredatabricks.net" +} + +variable "databricks_account_id" { + type = string + description = "Databricks account ID (UUID)." +} + +variable "private_endpoint_subnet_id" { + type = string + description = "Resource ID of an existing subnet to host the private endpoint (PE network policies disabled)." +} + +variable "databricks_pls_resource_id" { + type = string + description = "Databricks per-region PLS resource ID for performance-intensive services (from the MS Learn region table)." +} + +variable "create_private_dns_zone" { + type = bool + description = "Create privatelink.azuredatabricks.net here, or reuse an existing zone." + default = true +} + +variable "vnet_ids_to_link" { + type = list(string) + description = "VNet IDs to link to the DNS zone (used only when create_private_dns_zone = true)." + default = [] +} + +variable "tags" { + type = map(string) + description = "Tags applied to created resources." + default = {} +} diff --git a/modules/adb-service-direct-private-endpoint/Makefile b/modules/adb-service-direct-private-endpoint/Makefile new file mode 100644 index 0000000..653039d --- /dev/null +++ b/modules/adb-service-direct-private-endpoint/Makefile @@ -0,0 +1,7 @@ +.PHONY: docs test_docs + +docs: + terraform-docs -c ../../.terraform-docs.yml . + +test_docs: + terraform-docs -c ../../.terraform-docs.yml --output-check . diff --git a/modules/adb-service-direct-private-endpoint/README.md b/modules/adb-service-direct-private-endpoint/README.md new file mode 100644 index 0000000..7e5a9eb --- /dev/null +++ b/modules/adb-service-direct-private-endpoint/README.md @@ -0,0 +1,141 @@ +# Inbound "service-direct" Private Link for performance-intensive services + +This module configures inbound (front-end) **"service-direct" Private Link** to +Databricks **performance-intensive services** — currently **Zerobus Ingest** and +**Lakebase Autoscaling** — on Azure. + +It is distinct from classic workspace front-end Private Link (the +`databricks_ui_api` sub-resource): service-direct targets a Databricks-published +**per-region Private Link Service** with the target sub-resource `service_direct`, +is registered at the **account level**, and reuses the +`privatelink.azuredatabricks.net` DNS zone. + +> **Note** +> This feature and the `databricks_endpoint` resource (provider `>=1.107.0`) are +> both in **Public Preview**. Validate with `terraform plan/apply` before +> production use. + +## Module content + +This module deploys: + +* A resource group (`rg_name`) to hold the private endpoint and DNS resources. +* An **Azure private endpoint** to the Databricks per-region performance-intensive + services Private Link Service, target sub-resource `service_direct` + (`is_manual_connection = true`). +* The **`privatelink.azuredatabricks.net` private DNS zone** (optional — reuse an + existing one), VNet links, and an **A record `.service-direct`** pointing + at the private endpoint's IP. +* A **`databricks_endpoint`** registration on the account side, which drives the + private endpoint from `PENDING` to `APPROVED` (`use_case = SERVICE_DIRECT`). + +The PE's `properties.resourceGuid` (required by `databricks_endpoint`) is read via +the `azapi` provider, because `azurerm` does not export it. + +## Prerequisites + +* A **Premium-tier** Databricks account. +* The **"Private connectivity for performance-intensive services"** Public Preview + feature enabled on the account (self-enroll in the account console) — otherwise + the registration surface does not appear. +* An existing subnet for the private endpoint (private-endpoint network policies + disabled — the Azure default). +* The per-region PLS resource ID from the + [Microsoft Learn region table](https://learn.microsoft.com/en-us/azure/databricks/resources/ip-domain-region#service-direct-resource-ids). + +## Important to know + +* **Account-level + regional blast radius.** Registering this endpoint affects + **all Premium workspaces in the region** — it is not workspace-scoped. Limits: + 5 per region, 100 per account. +* **PLS + sub-resource shape (Preview).** Uses `private_connection_resource_id` + + `subresource_names = ["service_direct"]` per Microsoft Learn. If a future change + treats the target as a pure Private Link Service, switch to + `private_connection_resource_alias` and drop `subresource_names`. + +## How to use + +> **Note** +> You can customize this module by adding, deleting or updating the resources to +> adapt it to your requirements. +> A deployment example using this module can be found in +> [examples/adb-service-direct-private-endpoint](../../examples/adb-service-direct-private-endpoint) + +1. Reference this module using one of the different [module source types](https://developer.hashicorp.com/terraform/language/modules/sources) +2. Add a `variables.tf` with the same content as [variables.tf](variables.tf) +3. Add a `terraform.tfvars` file and provide values to each defined variable +4. Add an `outputs.tf` file +5. (Optional) Configure your [remote backend](https://developer.hashicorp.com/terraform/language/settings/backends/azurerm) +6. Run `terraform init` to initialize terraform and get the providers ready +7. Run `terraform apply` to create the resources + + +## Requirements + +| Name | Version | +| ---- | ------- | +| [azapi](#requirement\_azapi) | 2.0.1 | +| [azurerm](#requirement\_azurerm) | >=4.31.0 | +| [databricks](#requirement\_databricks) | >=1.107.0 | +| [time](#requirement\_time) | >=0.9.0 | + +## Providers + +| Name | Version | +| ---- | ------- | +| [azapi](#provider\_azapi) | 2.0.1 | +| [azurerm](#provider\_azurerm) | >=4.31.0 | +| [databricks.accounts](#provider\_databricks.accounts) | >=1.107.0 | +| [time](#provider\_time) | >=0.9.0 | + +## Modules + +No modules. + +## Resources + +| Name | Type | +| ---- | ---- | +| [azurerm_private_dns_a_record.service_direct](https://registry.terraform.io/providers/hashicorp/azurerm/latest/docs/resources/private_dns_a_record) | resource | +| [azurerm_private_dns_zone.this](https://registry.terraform.io/providers/hashicorp/azurerm/latest/docs/resources/private_dns_zone) | resource | +| [azurerm_private_dns_zone_virtual_network_link.this](https://registry.terraform.io/providers/hashicorp/azurerm/latest/docs/resources/private_dns_zone_virtual_network_link) | resource | +| [azurerm_private_endpoint.this](https://registry.terraform.io/providers/hashicorp/azurerm/latest/docs/resources/private_endpoint) | resource | +| [azurerm_resource_group.this](https://registry.terraform.io/providers/hashicorp/azurerm/latest/docs/resources/resource_group) | resource | +| [databricks_endpoint.this](https://registry.terraform.io/providers/databricks/databricks/latest/docs/resources/endpoint) | resource | +| [time_sleep.wait_for_pe](https://registry.terraform.io/providers/hashicorp/time/latest/docs/resources/sleep) | resource | +| [azapi_resource.pe](https://registry.terraform.io/providers/Azure/azapi/2.0.1/docs/data-sources/resource) | data source | + +## Inputs + +| Name | Description | Type | Default | Required | +| ---- | ----------- | ---- | ------- | :------: | +| [azure\_region](#input\_azure\_region) | Azure region short name (e.g. australiaeast, westus2). Used for the resource group/PE location, the .service-direct DNS A record, and the databricks\_endpoint region. Must match your workspace region. | `string` | n/a | yes | +| [azure\_subscription\_id](#input\_azure\_subscription\_id) | Azure subscription ID to deploy the private endpoint and DNS into. | `string` | n/a | yes | +| [databricks\_account\_id](#input\_databricks\_account\_id) | Databricks account ID (UUID). | `string` | n/a | yes | +| [databricks\_pls\_resource\_id](#input\_databricks\_pls\_resource\_id) | Databricks-published Private Link Service resource ID for performance-intensive services in your region. These are per-region and managed by Databricks — pull the current value from the Microsoft Learn region table (Service-direct resource IDs): https://learn.microsoft.com/en-us/azure/databricks/resources/ip-domain-region#service-direct-resource-ids | `string` | n/a | yes | +| [private\_endpoint\_subnet\_id](#input\_private\_endpoint\_subnet\_id) | Resource ID of an existing subnet to host the private endpoint. Private endpoint network policies must be disabled (the Azure default); use a subnet separate from the workspace's own subnets if you reuse the workspace VNet. | `string` | n/a | yes | +| [rg\_name](#input\_rg\_name) | Name of the resource group to create for the private endpoint (and the private DNS zone, when this module creates it). | `string` | n/a | yes | +| [create\_private\_dns\_zone](#input\_create\_private\_dns\_zone) | Whether to create the privatelink.azuredatabricks.net private DNS zone. Set false to reuse an existing zone (common when the workspace already uses inbound Private Link); the A record is added to the existing zone. | `bool` | `true` | no | +| [databricks\_host](#input\_databricks\_host) | Databricks account console host. databricks\_endpoint requires an account-level provider. | `string` | `"https://accounts.azuredatabricks.net"` | no | +| [dns\_a\_record\_ttl](#input\_dns\_a\_record\_ttl) | TTL (seconds) for the .service-direct A record. | `number` | `3600` | no | +| [endpoint\_display\_name](#input\_endpoint\_display\_name) | Display name for the databricks\_endpoint registration. Must be RFC-1034 compliant (letters, numbers, hyphens; starts with a letter; <= 63 chars). | `string` | `"service-direct-pe"` | no | +| [private\_dns\_zone\_name](#input\_private\_dns\_zone\_name) | Name of the private DNS zone. service-direct shares the workspace front-end Private Link zone. | `string` | `"privatelink.azuredatabricks.net"` | no | +| [private\_endpoint\_name](#input\_private\_endpoint\_name) | Name of the Azure private endpoint. | `string` | `"pe-service-direct"` | no | +| [request\_message](#input\_request\_message) | Request message attached to the manual private-endpoint connection. | `string` | `"Databricks service-direct private endpoint (performance-intensive services)"` | no | +| [subresource\_name](#input\_subresource\_name) | Target sub-resource (group ID) for the private endpoint connection. Per Microsoft Learn this is service\_direct (underscore). Exposed only so it can be overridden if Databricks changes the published group ID during Public Preview. | `string` | `"service_direct"` | no | +| [tags](#input\_tags) | Tags applied to all created resources. | `map(string)` | `{}` | no | +| [vnet\_ids\_to\_link](#input\_vnet\_ids\_to\_link) | VNet IDs to link to the private DNS zone (only used when create\_private\_dns\_zone = true). When reusing an existing zone, manage links separately. | `list(string)` | `[]` | no | + +## Outputs + +| Name | Description | +| ---- | ----------- | +| [dns\_fqdn](#output\_dns\_fqdn) | Resolvable FQDN clients use for service-direct (.service-direct.privatelink.azuredatabricks.net). | +| [endpoint\_id](#output\_endpoint\_id) | Databricks endpoint\_id of the registration. | +| [endpoint\_state](#output\_endpoint\_state) | State of the registered endpoint. Must be APPROVED to be usable. | +| [endpoint\_use\_case](#output\_endpoint\_use\_case) | use\_case of the registered endpoint — expected SERVICE\_DIRECT. | +| [private\_endpoint\_id](#output\_private\_endpoint\_id) | Resource ID of the Azure private endpoint. | +| [private\_endpoint\_name](#output\_private\_endpoint\_name) | Name of the Azure private endpoint. | +| [private\_endpoint\_resource\_guid](#output\_private\_endpoint\_resource\_guid) | properties.resourceGuid of the private endpoint (read via azapi; consumed by the account-side registration). | +| [private\_ip\_address](#output\_private\_ip\_address) | Private IP assigned to the private endpoint. | + diff --git a/modules/adb-service-direct-private-endpoint/outputs.tf b/modules/adb-service-direct-private-endpoint/outputs.tf new file mode 100644 index 0000000..b3e73b0 --- /dev/null +++ b/modules/adb-service-direct-private-endpoint/outputs.tf @@ -0,0 +1,39 @@ +output "private_endpoint_id" { + description = "Resource ID of the Azure private endpoint." + value = azurerm_private_endpoint.this.id +} + +output "private_endpoint_name" { + description = "Name of the Azure private endpoint." + value = azurerm_private_endpoint.this.name +} + +output "private_endpoint_resource_guid" { + description = "properties.resourceGuid of the private endpoint (read via azapi; consumed by the account-side registration)." + value = local.pe_resource_guid +} + +output "private_ip_address" { + description = "Private IP assigned to the private endpoint." + value = local.pe_private_ip +} + +output "dns_fqdn" { + description = "Resolvable FQDN clients use for service-direct (.service-direct.privatelink.azuredatabricks.net)." + value = "${local.a_record_name}.${var.private_dns_zone_name}" +} + +output "endpoint_id" { + description = "Databricks endpoint_id of the registration." + value = databricks_endpoint.this.endpoint_id +} + +output "endpoint_state" { + description = "State of the registered endpoint. Must be APPROVED to be usable." + value = databricks_endpoint.this.state +} + +output "endpoint_use_case" { + description = "use_case of the registered endpoint — expected SERVICE_DIRECT." + value = databricks_endpoint.this.use_case +} diff --git a/modules/adb-service-direct-private-endpoint/providers.tf b/modules/adb-service-direct-private-endpoint/providers.tf new file mode 100644 index 0000000..4087553 --- /dev/null +++ b/modules/adb-service-direct-private-endpoint/providers.tf @@ -0,0 +1,61 @@ +# We strongly recommend using the required_providers block to set the +# provider sources and versions being used +terraform { + required_providers { + # Specify the Azure Provider and its source + azurerm = { + source = "hashicorp/azurerm" + version = ">=4.31.0" + } + + # Specify the Databricks Provider and its source. + # NOTE: databricks_endpoint (Public Preview) was added in v1.107.0 and is + # required by this module — do not lower this floor. + databricks = { + source = "databricks/databricks" + version = ">=1.107.0" + } + + # Specify the AzAPI Provider and its source. Used to read the private + # endpoint's properties.resourceGuid, which azurerm does not export but + # databricks_endpoint requires. + azapi = { + source = "Azure/azapi" + version = "2.0.1" + } + + # Used for a short settle delay between PE creation and account-side + # registration. + time = { + source = "hashicorp/time" + version = ">=0.9.0" + } + } +} + +# Configure the Microsoft Azure Provider +provider "azurerm" { + # Subscription ID for Azure authentication + subscription_id = var.azure_subscription_id + # Enable features for the Azure Provider + features {} +} + +# Configure the Databricks Provider for account-level operations. +# databricks_endpoint can only be used with an account-level provider. +provider "databricks" { + # Create an alias to differentiate this instance from any workspace provider + alias = "accounts" + + # Account console host (Azure: https://accounts.azuredatabricks.net) + host = var.databricks_host + + # Databricks account ID for authentication + account_id = var.databricks_account_id +} + +# Configure the AzAPI Provider for Azure resources +provider "azapi" { + # Subscription ID for Azure authentication + subscription_id = var.azure_subscription_id +} diff --git a/modules/adb-service-direct-private-endpoint/service-direct-pe.tf b/modules/adb-service-direct-private-endpoint/service-direct-pe.tf new file mode 100644 index 0000000..4d6b5b5 --- /dev/null +++ b/modules/adb-service-direct-private-endpoint/service-direct-pe.tf @@ -0,0 +1,143 @@ +# ============================================================================= +# Inbound "service-direct" Private Link to Databricks performance-intensive +# services (Zerobus Ingest, Lakebase Autoscaling). +# +# Two halves: +# 1. Azure — private endpoint to the Databricks per-region PLS, plus the +# privatelink.azuredatabricks.net DNS zone + A record. +# 2. Account — databricks_endpoint registers the PE so it transitions from +# PENDING to APPROVED (use_case = SERVICE_DIRECT). +# +# STATUS: this feature and the databricks_endpoint resource are both Public +# Preview. Validate against terraform plan/apply before production use. +# ============================================================================= + +locals { + a_record_name = "${var.azure_region}.service-direct" + + default_tags = { + ManagedBy = "terraform" + Module = "adb-service-direct-private-endpoint" + } + tags = merge(local.default_tags, var.tags) + + # azurerm does not export the PE's resourceGuid (properties.resourceGuid), + # which databricks_endpoint requires; read it from raw ARM via azapi. + pe_resource_guid = data.azapi_resource.pe.output.properties.resourceGuid + + # MS Learn records the PE private IP from properties.customDnsConfigs[0]. + # ipAddresses[0]; it is allocated at PE creation (before account-side + # approval), so it is safe to use for the DNS A record. + pe_private_ip = data.azapi_resource.pe.output.properties.customDnsConfigs[0].ipAddresses[0] +} + +# Resource group for the private endpoint (and DNS zone, if created here). +resource "azurerm_resource_group" "this" { + name = var.rg_name + location = var.azure_region + tags = local.tags +} + +# ----------------------------------------------------------------------------- +# 1. Azure private endpoint -> Databricks performance-intensive services PLS +# +# Per MS Learn: connect by the PLS *resource ID* with target sub-resource +# service_direct. The connection is manual — it stays PENDING until the +# account-side databricks_endpoint registration approves it. +# +# NOTE (Preview): if a future provider/platform change treats the target as a +# pure Private Link Service, azurerm may reject subresource_names — in that case +# switch to private_connection_resource_alias and drop subresource_names. +# ----------------------------------------------------------------------------- +resource "azurerm_private_endpoint" "this" { + name = var.private_endpoint_name + location = var.azure_region + resource_group_name = azurerm_resource_group.this.name + subnet_id = var.private_endpoint_subnet_id + tags = local.tags + + private_service_connection { + name = "${var.private_endpoint_name}-psc" + private_connection_resource_id = var.databricks_pls_resource_id + subresource_names = [var.subresource_name] + is_manual_connection = true + request_message = var.request_message + } + + lifecycle { + # Manual PLS connections churn this field on refresh; ignore to keep plans clean. + ignore_changes = [private_service_connection[0].private_connection_resource_id] + } +} + +# Read resourceGuid + private IP from raw ARM (azurerm exports neither cleanly). +data "azapi_resource" "pe" { + type = "Microsoft.Network/privateEndpoints@2024-05-01" + resource_id = azurerm_private_endpoint.this.id + response_export_values = ["properties.resourceGuid", "properties.customDnsConfigs"] +} + +# Let the PE settle before the account-side registration reads it. +resource "time_sleep" "wait_for_pe" { + depends_on = [azurerm_private_endpoint.this] + create_duration = "30s" +} + +# ----------------------------------------------------------------------------- +# 2. Private DNS — privatelink.azuredatabricks.net + .service-direct +# +# service-direct shares the workspace front-end Private Link zone. Reuse the +# existing zone (create_private_dns_zone = false) when the workspace already +# uses inbound Private Link. +# ----------------------------------------------------------------------------- +resource "azurerm_private_dns_zone" "this" { + count = var.create_private_dns_zone ? 1 : 0 + name = var.private_dns_zone_name + resource_group_name = azurerm_resource_group.this.name + tags = local.tags +} + +resource "azurerm_private_dns_zone_virtual_network_link" "this" { + count = var.create_private_dns_zone ? length(var.vnet_ids_to_link) : 0 + + name = "link-${count.index}" + resource_group_name = azurerm_resource_group.this.name + private_dns_zone_name = azurerm_private_dns_zone.this[0].name + virtual_network_id = var.vnet_ids_to_link[count.index] + registration_enabled = false + tags = local.tags +} + +resource "azurerm_private_dns_a_record" "service_direct" { + name = local.a_record_name + zone_name = var.private_dns_zone_name + resource_group_name = azurerm_resource_group.this.name + ttl = var.dns_a_record_ttl + records = [local.pe_private_ip] + tags = local.tags + + depends_on = [azurerm_private_dns_zone.this] +} + +# ----------------------------------------------------------------------------- +# 3. Account-side registration — PENDING -> APPROVED +# +# Requires the account-level databricks provider (databricks.accounts). +# use_case resolves to SERVICE_DIRECT; state must reach APPROVED to be usable. +# databricks_endpoint is a plugin-framework resource: nested objects are +# attributes assigned with `=`, not HCL blocks. +# ----------------------------------------------------------------------------- +resource "databricks_endpoint" "this" { + provider = databricks.accounts + + parent = "accounts/${var.databricks_account_id}" + display_name = var.endpoint_display_name + region = var.azure_region + + azure_private_endpoint_info = { + private_endpoint_name = azurerm_private_endpoint.this.name + private_endpoint_resource_guid = local.pe_resource_guid + } + + depends_on = [time_sleep.wait_for_pe] +} diff --git a/modules/adb-service-direct-private-endpoint/variables.tf b/modules/adb-service-direct-private-endpoint/variables.tf new file mode 100644 index 0000000..c0b23a7 --- /dev/null +++ b/modules/adb-service-direct-private-endpoint/variables.tf @@ -0,0 +1,94 @@ +variable "azure_subscription_id" { + type = string + description = "Azure subscription ID to deploy the private endpoint and DNS into." +} + +variable "azure_region" { + type = string + description = "Azure region short name (e.g. australiaeast, westus2). Used for the resource group/PE location, the .service-direct DNS A record, and the databricks_endpoint region. Must match your workspace region." +} + +variable "rg_name" { + type = string + description = "Name of the resource group to create for the private endpoint (and the private DNS zone, when this module creates it)." +} + +variable "databricks_host" { + type = string + description = "Databricks account console host. databricks_endpoint requires an account-level provider." + default = "https://accounts.azuredatabricks.net" +} + +variable "databricks_account_id" { + type = string + description = "Databricks account ID (UUID)." +} + +variable "private_endpoint_subnet_id" { + type = string + description = "Resource ID of an existing subnet to host the private endpoint. Private endpoint network policies must be disabled (the Azure default); use a subnet separate from the workspace's own subnets if you reuse the workspace VNet." +} + +variable "databricks_pls_resource_id" { + type = string + description = "Databricks-published Private Link Service resource ID for performance-intensive services in your region. These are per-region and managed by Databricks — pull the current value from the Microsoft Learn region table (Service-direct resource IDs): https://learn.microsoft.com/en-us/azure/databricks/resources/ip-domain-region#service-direct-resource-ids" +} + +variable "endpoint_display_name" { + type = string + description = "Display name for the databricks_endpoint registration. Must be RFC-1034 compliant (letters, numbers, hyphens; starts with a letter; <= 63 chars)." + default = "service-direct-pe" + + validation { + condition = can(regex("^[a-zA-Z]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?$", var.endpoint_display_name)) + error_message = "endpoint_display_name must be RFC-1034 compliant: start with a letter, contain only letters/numbers/hyphens, end with a letter or number, max 63 chars." + } +} + +variable "private_endpoint_name" { + type = string + description = "Name of the Azure private endpoint." + default = "pe-service-direct" +} + +variable "subresource_name" { + type = string + description = "Target sub-resource (group ID) for the private endpoint connection. Per Microsoft Learn this is service_direct (underscore). Exposed only so it can be overridden if Databricks changes the published group ID during Public Preview." + default = "service_direct" +} + +variable "request_message" { + type = string + description = "Request message attached to the manual private-endpoint connection." + default = "Databricks service-direct private endpoint (performance-intensive services)" +} + +variable "create_private_dns_zone" { + type = bool + description = "Whether to create the privatelink.azuredatabricks.net private DNS zone. Set false to reuse an existing zone (common when the workspace already uses inbound Private Link); the A record is added to the existing zone." + default = true +} + +variable "private_dns_zone_name" { + type = string + description = "Name of the private DNS zone. service-direct shares the workspace front-end Private Link zone." + default = "privatelink.azuredatabricks.net" +} + +variable "vnet_ids_to_link" { + type = list(string) + description = "VNet IDs to link to the private DNS zone (only used when create_private_dns_zone = true). When reusing an existing zone, manage links separately." + default = [] +} + +variable "dns_a_record_ttl" { + type = number + description = "TTL (seconds) for the .service-direct A record." + default = 3600 +} + +variable "tags" { + type = map(string) + description = "Tags applied to all created resources." + default = {} +}