From 58726cd5df85febb5641e0568505d1e94a3ddc79 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Fri, 1 May 2026 11:24:51 +0000 Subject: [PATCH] Sync plugin files from GitHub-Copilot-for-Azure --- .../azure-skills/.claude-plugin/plugin.json | 2 +- .../azure-skills/.cursor-plugin/plugin.json | 2 +- .../plugins/azure-skills/.plugin/plugin.json | 2 +- .../azure-storage/references/sdk-usage.md | 10 +- .../skills/microsoft-foundry/SKILL.md | 19 +- .../private-network-standard-agent-setup.md | 40 ---- .../private-network/private-network.md | 126 ++++++++++++ .../references/custom-template-adaptation.md | 26 +++ .../private-network/references/deploy.md | 90 +++++++++ .../references/end-to-end-test.md | 117 +++++++++++ .../private-network/references/intake.md | 187 ++++++++++++++++++ .../references/post-deployment-validation.md | 93 +++++++++ .../private-network/references/scaffold.md | 30 +++ .../references/template-index.md | 17 ++ .../references/vpn-dns-setup.bicep | 158 +++++++++++++++ .../references/vpn-dns-setup.md | 161 +++++++++++++++ 16 files changed, 1025 insertions(+), 55 deletions(-) delete mode 100644 .github/plugins/azure-skills/skills/microsoft-foundry/references/private-network-standard-agent-setup.md create mode 100644 .github/plugins/azure-skills/skills/microsoft-foundry/resource/private-network/private-network.md create mode 100644 .github/plugins/azure-skills/skills/microsoft-foundry/resource/private-network/references/custom-template-adaptation.md create mode 100644 .github/plugins/azure-skills/skills/microsoft-foundry/resource/private-network/references/deploy.md create mode 100644 .github/plugins/azure-skills/skills/microsoft-foundry/resource/private-network/references/end-to-end-test.md create mode 100644 .github/plugins/azure-skills/skills/microsoft-foundry/resource/private-network/references/intake.md create mode 100644 .github/plugins/azure-skills/skills/microsoft-foundry/resource/private-network/references/post-deployment-validation.md create mode 100644 .github/plugins/azure-skills/skills/microsoft-foundry/resource/private-network/references/scaffold.md create mode 100644 .github/plugins/azure-skills/skills/microsoft-foundry/resource/private-network/references/template-index.md create mode 100644 .github/plugins/azure-skills/skills/microsoft-foundry/resource/private-network/references/vpn-dns-setup.bicep create mode 100644 .github/plugins/azure-skills/skills/microsoft-foundry/resource/private-network/references/vpn-dns-setup.md diff --git a/.github/plugins/azure-skills/.claude-plugin/plugin.json b/.github/plugins/azure-skills/.claude-plugin/plugin.json index 9e6f8b56..e5b35f87 100644 --- a/.github/plugins/azure-skills/.claude-plugin/plugin.json +++ b/.github/plugins/azure-skills/.claude-plugin/plugin.json @@ -1,7 +1,7 @@ { "name": "azure", "description": "Microsoft Azure MCP and Skills integration for cloud resource management, deployments, and Azure services. Manage your Azure infrastructure, monitor applications, and deploy resources directly from Claude Code.", - "version": "1.1.26", + "version": "1.1.27", "author": { "name": "Microsoft", "url": "https://www.microsoft.com" diff --git a/.github/plugins/azure-skills/.cursor-plugin/plugin.json b/.github/plugins/azure-skills/.cursor-plugin/plugin.json index f4866d2e..5f2147e7 100644 --- a/.github/plugins/azure-skills/.cursor-plugin/plugin.json +++ b/.github/plugins/azure-skills/.cursor-plugin/plugin.json @@ -1,7 +1,7 @@ { "name": "azure", "description": "Microsoft Azure MCP and Skills integration for cloud resource management, deployments, and Azure services. Manage your Azure infrastructure, monitor applications, and deploy resources directly from Cursor.", - "version": "1.1.26", + "version": "1.1.27", "author": { "name": "Microsoft", "url": "https://www.microsoft.com" diff --git a/.github/plugins/azure-skills/.plugin/plugin.json b/.github/plugins/azure-skills/.plugin/plugin.json index 65f9bc4e..5ada4764 100644 --- a/.github/plugins/azure-skills/.plugin/plugin.json +++ b/.github/plugins/azure-skills/.plugin/plugin.json @@ -1,7 +1,7 @@ { "name": "azure", "description": "Microsoft Azure MCP and Skills integration for cloud resource management, deployments, and Azure services. Manage your Azure infrastructure, monitor applications, and deploy resources directly from your development environment.", - "version": "1.1.26", + "version": "1.1.27", "author": { "name": "Microsoft", "url": "https://www.microsoft.com" diff --git a/.github/plugins/azure-skills/skills/azure-storage/references/sdk-usage.md b/.github/plugins/azure-skills/skills/azure-storage/references/sdk-usage.md index b96054a9..60f65f1b 100644 --- a/.github/plugins/azure-skills/skills/azure-storage/references/sdk-usage.md +++ b/.github/plugins/azure-skills/skills/azure-storage/references/sdk-usage.md @@ -29,7 +29,6 @@ SDK packages and quick start examples for Azure Storage services. All examples use `DefaultAzureCredential` for authentication, which is recommended for **local development only**. In production, use `ManagedIdentityCredential` β€” see [auth-best-practices.md](auth-best-practices.md). Rust uses `DeveloperToolsCredential` as it doesn't have a `DefaultAzureCredential` equivalent. **Python** - Upload Blob: - ```python from azure.identity import DefaultAzureCredential from azure.storage.blob import BlobServiceClient @@ -41,7 +40,6 @@ blob.upload_blob(b"Hello, Azure Storage!", overwrite=True) ``` **JavaScript** - Upload Blob: - ```javascript import { DefaultAzureCredential } from "@azure/identity"; import { BlobServiceClient } from "@azure/storage-blob"; @@ -53,7 +51,6 @@ await blob.uploadData(Buffer.from("Hello, Azure Storage!")); ``` **C#** - Upload Blob: - ```csharp using Azure.Identity; using Azure.Storage.Blobs; @@ -65,7 +62,6 @@ await blob.UploadAsync(BinaryData.FromString("Hello, Azure Storage!"), overwrite ``` **Java** - Upload Blob: - ```java import com.azure.identity.*; import com.azure.storage.blob.*; @@ -81,7 +77,6 @@ blob.upload(BinaryData.fromString("Hello, Azure Storage!"), true); ``` **Go** - Upload Blob: - ```go package main @@ -102,7 +97,6 @@ func main() { ``` **Rust** - Upload Blob: - ```rust use azure_identity::DeveloperToolsCredential; use azure_storage_blob::{BlobClient, BlobClientOptions}; @@ -113,8 +107,8 @@ let blob_client = BlobClient::new( "my-container", "my-blob.txt", Some(credential), - None + Some(BlobClientOptions::default()), )?; let data = b"Hello, Azure Storage!"; -blob_client.upload(data.to_vec().into(), None).await?; +blob_client.upload(None, data.to_vec().into()).await?; ``` diff --git a/.github/plugins/azure-skills/skills/microsoft-foundry/SKILL.md b/.github/plugins/azure-skills/skills/microsoft-foundry/SKILL.md index 6621af66..03a6d299 100644 --- a/.github/plugins/azure-skills/skills/microsoft-foundry/SKILL.md +++ b/.github/plugins/azure-skills/skills/microsoft-foundry/SKILL.md @@ -4,7 +4,7 @@ description: "Deploy, evaluate, and manage Foundry agents end-to-end: Docker bui license: MIT metadata: author: Microsoft - version: "1.1.8" + version: "1.1.9" --- # Microsoft Foundry Skill @@ -32,19 +32,31 @@ This skill includes specialized sub-skills for specific workflows. **Use these i | **eval-datasets** | Harvest production traces into evaluation datasets, manage dataset versions and splits, track evaluation metrics over time, detect regressions, and maintain full lineage from trace to deployment. Use for: create dataset from traces, dataset versioning, evaluation trending, regression detection, dataset comparison, eval lineage. | [eval-datasets](foundry-agent/eval-datasets/eval-datasets.md) | | **project/create** | Creating a new Azure AI Foundry project for hosting agents and models. Use when onboarding to Foundry or setting up new infrastructure. | [project/create/create-foundry-project.md](project/create/create-foundry-project.md) | | **resource/create** | Creating Azure AI Services multi-service resource (Foundry resource) using Azure CLI. Use when manually provisioning AI Services resources with granular control. | [resource/create/create-foundry-resource.md](resource/create/create-foundry-resource.md) | +| **private-network** | Answer questions about Foundry network isolation **and** deploy Foundry with VNet isolation (BYO VNet, Managed VNet, hybrid). Covers architecture concepts, template selection, deployment, and post-deployment validation. | [resource/private-network/private-network.md](resource/private-network/private-network.md) | | **models/deploy-model** | Unified model deployment with intelligent routing. Handles quick preset deployments, fully customized deployments (version/SKU/capacity/RAI), and capacity discovery across regions. Routes to sub-skills: `preset` (quick deploy), `customize` (full control), `capacity` (find availability). | [models/deploy-model/SKILL.md](models/deploy-model/SKILL.md) | | **quota** | Managing quotas and capacity for Microsoft Foundry resources. Use when checking quota usage, troubleshooting deployment failures due to insufficient quota, requesting quota increases, or planning capacity. | [quota/quota.md](quota/quota.md) | | **rbac** | Managing RBAC permissions, role assignments, managed identities, and service principals for Microsoft Foundry resources. Use for access control, auditing permissions, and CI/CD setup. | [rbac/rbac.md](rbac/rbac.md) | -> πŸ’‘ **Tip:** For a complete onboarding flow: `project/create` β†’ agent workflows (`deploy` β†’ `invoke`). +> πŸ’‘ **Tip:** For a complete onboarding flow: `project/create` (public) or `private-network` (VNet isolation) β†’ `models/deploy-model` β†’ agent workflows (`create` β†’ `deploy` β†’ `invoke`). > πŸ’‘ **Model Deployment:** Use `models/deploy-model` for all deployment scenarios β€” it intelligently routes between quick preset deployment, customized deployment with full control, and capacity discovery across regions. > πŸ’‘ **Prompt Optimization:** For requests like "optimize my prompt" or "improve my agent instructions," load [observe](foundry-agent/observe/observe.md) and use the `prompt_optimize` MCP tool through that eval-driven workflow. +## Infrastructure Lifecycle + +Match user intent to the correct infrastructure workflow. + +| User Intent | Workflow | +|-------------|---------| +| "Create Foundry" / "Set up Foundry" (ambiguous) | Use `AskUserQuestion`: (a) just an AI Services resource, (b) a project with public access, or (c) a project with network isolation? Route: (a) β†’ [resource/create](resource/create/create-foundry-resource.md), (b) β†’ [project/create](project/create/create-foundry-project.md), (c) β†’ [private-network](resource/private-network/private-network.md) | +| Set up Foundry with VNet isolation | [private-network](resource/private-network/private-network.md) | +| Create a Foundry project (public) | [project/create](project/create/create-foundry-project.md) | +| Create a bare Foundry resource | [resource/create](resource/create/create-foundry-resource.md) | + ## Agent Development Lifecycle -Match user intent to the correct workflow. Read each sub-skill in order before executing. +Match user intent to the correct agent workflow. Read each sub-skill in order before executing. | User Intent | Workflow (read in order) | |-------------|------------------------| @@ -79,7 +91,6 @@ Every agent source folder should keep Foundry-specific state under `.foundry/`: ## Agent: Setup References - [Standard Agent Setup](references/standard-agent-setup.md) - Standard capability-host setup with customer-managed data, search, and AI Services resources. -- [Private Network Standard Agent Setup](references/private-network-standard-agent-setup.md) - Standard setup with VNet isolation and private endpoints. ## Agent: Project Context Resolution diff --git a/.github/plugins/azure-skills/skills/microsoft-foundry/references/private-network-standard-agent-setup.md b/.github/plugins/azure-skills/skills/microsoft-foundry/references/private-network-standard-agent-setup.md deleted file mode 100644 index 9f77f225..00000000 --- a/.github/plugins/azure-skills/skills/microsoft-foundry/references/private-network-standard-agent-setup.md +++ /dev/null @@ -1,40 +0,0 @@ -# Private Network Standard Agent Setup - -> **MANDATORY:** Read [Standard Agent Setup with Network Isolation docs](https://learn.microsoft.com/en-us/azure/ai-foundry/how-to/configure-private-link?tabs=azure-portal&pivots=fdp-project) before proceeding. It covers RBAC requirements, resource provider registration, and role assignments. - -## Overview - -Extends [standard agent setup](standard-agent-setup.md) with full VNet isolation using private endpoints and subnet delegation. All resources communicate over private network only. - -## Networking Constraints - -Two subnets required: - -| Subnet | CIDR | Purpose | Delegation | -|--------|------|---------|------------| -| Agent Subnet | /24 (e.g., 192.168.0.0/24) | Agent workloads | `Microsoft.App/environments` (exclusive) | -| Private Endpoint Subnet | /24 (e.g., 192.168.1.0/24) | Private endpoints | None | - -- All Foundry resources **must be in the same region as the VNet**. -- Agent subnet must be exclusive to one Foundry account. -- VNet address space must not overlap with existing networks or reserved ranges. - -> ⚠️ **Warning:** If providing an existing VNet, ensure both subnets exist before deployment. Otherwise the template creates a new VNet with default address spaces. - -## Deployment - -**Always use the official Bicep template:** -[Private Network Standard Agent Setup Bicep](https://github.com/microsoft-foundry/foundry-samples/tree/main/infrastructure/infrastructure-setup-bicep/15-private-network-standard-agent-setup) - -> ⚠️ **Warning:** Capability host provisioning is **asynchronous** (10–20 minutes). Poll deployment status until success before proceeding. - -## Post-Deployment - -1. **Deploy a model** to the new AI Services account (e.g., `gpt-4o`). Fall back to `Standard` SKU if `GlobalStandard` quota is exhausted. -2. **Create the agent** using MCP tools (`agent_update`) or the Python SDK. - -## References - -- [Azure AI Foundry Networking](https://learn.microsoft.com/en-us/azure/ai-foundry/how-to/configure-private-link?tabs=azure-portal&pivots=fdp-project) -- [Azure AI Foundry RBAC](https://learn.microsoft.com/en-us/azure/ai-foundry/concepts/rbac-azure-ai-foundry?pivots=fdp-project) -- [Standard Agent Setup (public network)](standard-agent-setup.md) diff --git a/.github/plugins/azure-skills/skills/microsoft-foundry/resource/private-network/private-network.md b/.github/plugins/azure-skills/skills/microsoft-foundry/resource/private-network/private-network.md new file mode 100644 index 00000000..e3389df5 --- /dev/null +++ b/.github/plugins/azure-skills/skills/microsoft-foundry/resource/private-network/private-network.md @@ -0,0 +1,126 @@ +--- +name: private-network +description: "Answer questions about and deploy Microsoft Foundry with network isolation. Covers BYO VNet, Managed VNet, hybrid patterns, private endpoints, and Bicep deployment. WHEN: 'Foundry networking', 'BYO VNet vs managed VNet', 'deploy Foundry in private VNet', 'private endpoints for Foundry'. DO NOT USE FOR: generic Azure networking without Foundry." +license: MIT +allowed-tools: Read, Write, Bash, AskUserQuestion, microsoft_docs_search, microsoft_docs_fetch +--- + +# Microsoft Foundry Private Networking + +## Quick Reference + +| Property | Value | +|----------|-------| +| **Best for** | Foundry with VNet isolation, private endpoints, subnet delegation, APIM + Foundry, VPN/Bastion access | +| **Tools** | Azure CLI | +| **MCP Tools** | `AskUserQuestion` - ask user questions; `microsoft_docs_search` - verify facts before presenting; `microsoft_docs_fetch` - fetch full Learn pages for validation | +| **Workflow** | Ground in Learn β†’ Gather β†’ Plan β†’ Scaffold β†’ Validate β†’ Deploy β†’ Test | + +### Key Documentation + +| Topic | URL | +|-------|-----| +| Network isolation | https://learn.microsoft.com/azure/ai-foundry/how-to/configure-private-link | +| Agent Service VNet | https://learn.microsoft.com/azure/ai-services/agents/how-to/virtual-networks | +| Managed VNet | https://learn.microsoft.com/azure/ai-foundry/how-to/configure-managed-network | +| Feature limitations | https://learn.microsoft.com/azure/foundry/how-to/configure-private-link#foundry-feature-limitations | + +## When to Use + +- User asks about Foundry networking, private endpoints, or VNet isolation +- User asks about BYO VNet, Managed VNet, or hybrid patterns +- User wants to deploy Foundry agents in a private network +- User needs APIM integration with private Foundry agents + +**Do NOT use for:** +- Public Foundry setup without VNet β†’ use [project/create](../../project/create/create-foundry-project.md) +- Bare Foundry resource without networking β†’ use [resource/create](../create/create-foundry-resource.md) + +--- + +## Step 0 β€” Ground in Microsoft Learn +Use `microsoft_docs_fetch` to get docs from Key Documentation sources. +Use `microsoft_docs_search` to verify any technical fact before presenting it to the user. If Learn contradicts a reference file, **Learn wins**. Cite the URL. If Learn doesn't cover it, say so β€” do not invent facts, limits, flags, or compatibility claims. + +--- + +## End-to-End Deployment Workflow + +> **Important:** All following steps are mandatory. Communicate the plan with the user before acting. + +## Step 1 β€” Gather Requirements + +Read [references/intake.md](references/intake.md). One pass, three tiers: +- **Tier 1 (Core):** Subscription, VNet model, agents, region, RG, VNet β€” determine approach at the end +- **Tier 2 (Architecture):** DNS, topology, NSG, on-prem, identity, BYO resources +- **Tier 3 (Enterprise):** Model, client access, auth, policies, monitoring + +Determine the approach (official template / adapt closest / extend user’s IaC) at the end of Tier 1. Continue through Tiers 2–3. + +--- + +## Step 2 β€” Plan Generation + +Use the confirmed requirements from [references/intake.md](references/intake.md). + +**OFFICIAL path:** Load the template's README from its GitHub URL (via [references/template-index.md](references/template-index.md)). Run `microsoft_docs_search` for its prerequisites. Present a deployment plan using the user's actual values. + +**ADAPT path:** Load the closest template's README. Present a deployment plan highlighting what will be modified from the base template. + +**EXTEND path:** Load [references/custom-template-adaptation.md](references/custom-template-adaptation.md). Read the user's existing template. Follow the gap analysis framework to present what's covered, what's missing, and any issues. Get approval before modifying. + +Get confirmation before proceeding. + +--- + +## Step 3 β€” Scaffold & Parameterize + +Read [references/scaffold.md](references/scaffold.md). + +--- + +## Step 4 β€” Pre-Deployment Validation + +Catch blockers **before** deploying. These checks apply to all paths. + +**Sovereign cloud:** Run `az cloud show --query name -o tsv`. If `AzureUSGovernment` or `AzureChinaCloud`, check whether the templates being used (official or user-provided) handle sovereign cloud endpoints. Official templates hardcode `core.windows.net` and Azure Public AAD endpoints. + +**RBAC:** Verify deploying identity has Owner, or Contributor + User Access Administrator. + +**Policy:** Run `az deployment group what-if`. Fix any violations before deploying. + +**Quota:** + +```bash +az cognitiveservices account list-skus --location --kind AIServices -o table +``` + +**Provider Registrations:** `Microsoft.CognitiveServices`, `Microsoft.DocumentDB`, `Microsoft.Search`, `Microsoft.Network`. + +**Feature Flags:** For Managed VNet β€” verify `AI.ManagedVnetPreview` is registered. + +> Do NOT deploy until all pre-flight checks pass. + +--- + +## Step 5 β€” Deploy & Track + +**OFFICIAL / ADAPT path:** Read [references/deploy.md](references/deploy.md) for deployment command, monitoring, and error recovery. + +**EXTEND path:** Deploy using the user's existing deployment workflow (their CLI commands, pipeline, or CI/CD). The monitoring and error recovery guidance in [references/deploy.md](references/deploy.md) still applies. + +--- + +## Step 6 β€” Test & Validate + +Read [references/post-deployment-validation.md](references/post-deployment-validation.md). These checks apply to all paths β€” PE verification, RBAC audit, `publicNetworkAccess` audit, and end-to-end agent test work regardless of how the infrastructure was deployed. + +If any test fails, run `microsoft_docs_search` for the error before attempting remediation. + +--- + +## Error Handling + +> ⚠️ **Critical retry rule:** If a deployment fails after the capability host step starts, the agent subnet gets a `legionservicelink` that cannot be removed. On retry, always use a **new VNet name** β€” never reuse the same agent subnet. See [references/deploy.md](references/deploy.md). + +For all other errors, check `microsoft_docs_search` for current remediation before acting. diff --git a/.github/plugins/azure-skills/skills/microsoft-foundry/resource/private-network/references/custom-template-adaptation.md b/.github/plugins/azure-skills/skills/microsoft-foundry/resource/private-network/references/custom-template-adaptation.md new file mode 100644 index 00000000..f0abc11d --- /dev/null +++ b/.github/plugins/azure-skills/skills/microsoft-foundry/resource/private-network/references/custom-template-adaptation.md @@ -0,0 +1,26 @@ +# Custom Template Adaptation + +For the EXTEND path β€” when the user has existing Bicep or Terraform templates. + +## Instructions + +1. **Read** the user's existing template files. Understand the resource graph: what's defined, how resources reference each other, what naming conventions are used. + +2. **Analyze** the template against the user's requirements (from [intake.md](intake.md)) and the Foundry private networking documentation validated in the intake step. Identify: + - Resources already present and correctly configured + - Resources present but misconfigured (wrong settings, missing properties) + - Resources missing entirely + - Dependency or wiring issues (e.g., PEs referencing wrong subnet, DNS zones not linked) + +3. **Present** findings to the user as a gap analysis table: resource, status (βœ… present / ⚠️ misconfigured / ❌ missing), and what needs to change. Include any issues found. + +4. **Propose** an end-to-end plan to address all gaps β€” ordered by dependency. Explain what will be added, what will be modified, and why. Never overwrite existing modules β€” add alongside and reference existing resources. + +5. **Wait** for user approval before making any changes. + +6. **Implement** the approved changes. After implementation, the flow continues to Step 4 (Pre-Deployment Validation) in the main workflow. + +## Retry Safety + +> ⚠️ If a deployment fails after the capability host step starts, Azure Container Apps leaves a `legionservicelink` service association on the agent subnet that **cannot be removed**. On retry, use a **new subnet or new VNet** β€” never reuse the same agent subnet. + diff --git a/.github/plugins/azure-skills/skills/microsoft-foundry/resource/private-network/references/deploy.md b/.github/plugins/azure-skills/skills/microsoft-foundry/resource/private-network/references/deploy.md new file mode 100644 index 00000000..c1a1fd2c --- /dev/null +++ b/.github/plugins/azure-skills/skills/microsoft-foundry/resource/private-network/references/deploy.md @@ -0,0 +1,90 @@ +# Deploy & Track + +Applies to all private network deployments. + +## Deploy + +```bash +az deployment group create \ + --resource-group \ + --template-file main.bicep \ + --parameters main.bicepparam \ + --name +``` + +> ⚠️ Capability host provisioning is **asynchronous** (10–20 min). The CLI produces no output during this phase. + +## Monitor Progress + +Use exponential backoff β€” do NOT poll every 30 seconds. + +| Poll | Wait | +|------|------| +| 1st | 1 min after deploy starts | +| 2nd | 3 min after 1st | +| 3rd | 5 min after 2nd | +| 4th+ | Every 5 min | + +```bash +# Overall state +az deployment group show \ + --resource-group --name \ + --query "{state:properties.provisioningState,error:properties.error}" -o json + +# Per-resource progress +az deployment operation group list \ + --resource-group --name \ + --query "[].{resource:properties.targetResource.resourceType,state:properties.provisioningState}" -o table +``` + +Or block with timeout: + +```bash +az deployment group wait \ + --resource-group --name \ + --created --timeout 1800 +``` + +## Error Recovery + +When a deployment fails, follow this workflow: + +### Step 1 β€” Identify the error + +```bash +az deployment operation group list \ + --resource-group \ + --name \ + --query "[?properties.provisioningState=='Failed'].{resource:properties.targetResource.resourceType,error:properties.statusMessage}" \ + -o json +``` + +### Step 2 β€” Resolve + +Use `microsoft_docs_search` with the error code or message to find current remediation. The legionservicelink retry rule is documented in the main workflow's Error Handling section. + +| Error | Likely cause | Fix | +|-------|-------------|-----| +| `legionservicelink` / subnet in use | Orphaned service link from prior attempt | Use a new `vnetName` β€” do not reuse the prior VNet | +| `AuthorizationFailed` on `validate/action` | Missing Contributor role | Assign Contributor + User Access Administrator to deploying identity | +| `SubnetDelegationAlreadyExists` | Agent subnet already delegated to another resource | Use a new VNet or open a support ticket to remove the delegation | +| `disableLocalAuth` policy violation | Template defaults to `false` | Set `disableLocalAuth: true` in Bicep params | +| `defaultOutboundAccess` policy violation | Subnets missing the property | Add `defaultOutboundAccess: false` to subnet properties | + +### Step 3 β€” Present fix to user and get approval + +Before re-deploying, show the user: +- What failed and why +- What file/parameter will be changed +- The new `vnetName` to use (must be different from the failed run) + +### Step 4 β€” Re-deploy with a new deployment name + +```bash +# Update main.bicepparam: change vnetName to a new unique name +az deployment group create \ + --resource-group \ + --template-file main.bicep \ + --parameters main.bicepparam \ + --name -retry +``` diff --git a/.github/plugins/azure-skills/skills/microsoft-foundry/resource/private-network/references/end-to-end-test.md b/.github/plugins/azure-skills/skills/microsoft-foundry/resource/private-network/references/end-to-end-test.md new file mode 100644 index 00000000..0df6b53c --- /dev/null +++ b/.github/plugins/azure-skills/skills/microsoft-foundry/resource/private-network/references/end-to-end-test.md @@ -0,0 +1,117 @@ +# End-to-End Test (VNet Access Required) + +Continues from [post-deployment-validation.md](post-deployment-validation.md). Steps 1–3 there must be complete first. + +## 4. VNet Access Setup + +> ⚠️ The remaining tests require connectivity to the VNet. + +Use `AskUserQuestion`: **"Steps 1-3 are done. The remaining tests need VNet access. How do you want to proceed?"** +Options: +- `I have a Bastion VM / jump box` +- `Set up a point-to-site VPN for me` β€” read [vpn-dns-setup.md](vpn-dns-setup.md) +- `I have VPN / ExpressRoute already` +- `Skip testing for now` + +**Bastion VM:** User has direct access to all private endpoints from the VM. Setup is complete β€” do NOT proceed to Step 5. + +--- + +## 5. End-to-End Test (VPN users only) + +Three phases: +1. **Network** β€” DNS resolution + port 443 reachability +2. **Agent Lifecycle** β€” Create agent, thread, run, verify, cleanup +3. **Isolation Proof** β€” Repeat with VPN off β€” expect 403 + +> ⚠️ Chromium browsers may bypass VPN DNS via Secure DNS (DoH). If portal shows "Error loading agents" but CLI works, disable Secure DNS. + +### Requirements + +```bash +pip install azure-ai-projects azure-identity azure-ai-agents +``` + +### Phase 1: Network Validation + +Resolve DNS and test port 443 for all private endpoints. Substitute actual resource names from the deployment. + +PowerShell: + +```powershell +$endpoints = @( + '.services.ai.azure.com', + '.openai.azure.com', + '.cognitiveservices.azure.com', + '.documents.azure.com', + '.blob.core.windows.net', + '.search.windows.net' +) +foreach ($h in $endpoints) { + $ip = (Resolve-DnsName $h | Where-Object {$_.IPAddress}).IPAddress + $reach = Test-NetConnection $h -Port 443 -WarningAction SilentlyContinue + Write-Host "$h -> $ip (reachable: $($reach.TcpTestSucceeded))" +} +``` + +Bash: + +```bash +endpoints=( + '.services.ai.azure.com' + '.openai.azure.com' + '.cognitiveservices.azure.com' + '.documents.azure.com' + '.blob.core.windows.net' + '.search.windows.net' +) +for h in "${endpoints[@]}"; do + ip=$(dig +short "$h" | tail -n1) + nc -z -w 3 "$h" 443 >/dev/null 2>&1 && reach=yes || reach=no + echo "$h -> $ip (reachable: $reach)" +done +``` + +All should resolve to private IPs and be reachable. + +Report results to the user (βœ…/❌ per endpoint) before proceeding to Phase 2. + +### Phase 2: Agent Lifecycle Test + +Create agent, thread, send message, verify response, cleanup. This exercises all 4 PEs (AI Services, Cosmos DB, Storage, AI Search). + +```python +from azure.identity import DefaultAzureCredential +from azure.ai.projects import AIProjectClient + +endpoint = "https://.services.ai.azure.com/api/projects/" +client = AIProjectClient(endpoint=endpoint, credential=DefaultAzureCredential()) +agents = client.agents + +agent = agents.create_agent(model="", name="vnet-test", instructions="Reply with 'OK'") +thread = agents.threads.create() +agents.messages.create(thread_id=thread.id, role="user", content="test") +run = agents.runs.create_and_process(thread_id=thread.id, agent_id=agent.id) +msgs = agents.messages.list(thread_id=thread.id) +print(f"Response: {msgs.data[0].content[0].text.value}") +agents.threads.delete(thread.id) +agents.delete_agent(agent.id) +``` + +Report results to the user (which PEs passed, any failures) before proceeding to Phase 3. + +Ask user to disconnect VPN. Repeat Phase 2 β€” it should fail with 403. Report whether isolation is confirmed before proceeding to cross-check. + +### Requirements Cross-Check + +After testing, compare each requirement gathered in [intake.md](intake.md) against the deployed state. Flag any mismatches with remediation steps. + +### Cleanup (VPN users only) + +Ask if user wants to delete VPN Gateway (~$140/month) and DNS Resolver (~$180/month), or keep for ongoing access. + +```bash +az network vnet-gateway delete --resource-group --name vpn-gateway- --no-wait +az network dns-resolver delete --resource-group --name dns-resolver- --yes +az network public-ip delete --resource-group --name vpn-gateway-pip- +``` diff --git a/.github/plugins/azure-skills/skills/microsoft-foundry/resource/private-network/references/intake.md b/.github/plugins/azure-skills/skills/microsoft-foundry/resource/private-network/references/intake.md new file mode 100644 index 00000000..e178c6aa --- /dev/null +++ b/.github/plugins/azure-skills/skills/microsoft-foundry/resource/private-network/references/intake.md @@ -0,0 +1,187 @@ +# Intake + +Collect all inputs in one pass, tiered by priority. Extract implicit answers from the user’s message before asking. Use `AskUserQuestion` for unanswered items β€” batch related questions. + +--- + +## Tier 1 β€” Core + +### 1.0 Verify Subscription + +Run: + +```bash +az account show --query "{Name:name, Id:id, State:state}" -o table +``` + +Confirm with user. Switch if needed: + +```bash +az account set --subscription "" +``` + +### 1.1 Extract Known Answers + +Scan the user's message before asking: + +| User Says | Inferred | +|-----------|----------| +| "my existing VNet" / "my VNet" | BYO VNet | +| "managed virtual network" | Managed VNet | +| "user-assigned identity" / "UAI" | User-assigned identity | +| "APIM" / "API Management" | Needs APIM | +| "MCP servers on the VNet" | Needs MCP subnet | +| "I have a Bicep/Terraform template" | Extend existing IaC | +| "add Foundry to my existing infra" | Extend existing IaC | + +### 1.2 Architecture Questions + +For unanswered items, use `AskUserQuestion`: + +**VNet model:** BYO VNet or Managed VNet (preview)? + +**Agents:** Agent workloads, or just models/projects? + +**Region:** Which Azure region? After answer, verify capacity: + +```bash +az cognitiveservices account list-skus --location --kind AIServices -o table +``` + +If empty, warn the user and suggest alternatives. + +**Resource Group:** New or existing? + +**VNet:** New or existing? If new: address space (default `192.168.0.0/16`), subnet CIDRs (agent `/24`, PE `/24`). + +### 1.3 Determine Approach + +Based on the answers collected, select one of three paths: + +``` +User has existing IaC they want to extend? +β”œβ”€β”€ Yes β†’ EXTEND +β”‚ +└── No β†’ check template-index.md + β”œβ”€β”€ Template fits as-is β†’ OFFICIAL + └── Partial or no fit β†’ ADAPT (start from closest template) +``` + +**OFFICIAL:** Load [template-index.md](template-index.md), fetch the best-fit README from GitHub. Present the match using the template's descriptive name. + +**ADAPT:** Fetch the closest template's README. Explain what doesn't fit, present the delta, offer to adapt. + +**EXTEND:** The user has existing Bicep/Terraform β€” no template selection needed yet. Continue to Tier 2. + +Confirm the approach with the user before continuing to Tier 2. + +--- + +## Tier 2 β€” Architecture + +*Skip questions already answered or not applicable.* + +### BYO VNet only + +**Topology:** Standalone, hub-spoke, or Azure vWAN? + +**On-prem connectivity:** VPN Gateway, ExpressRoute, or none? + +**DNS:** Azure-provided, custom DNS resolver, or on-prem DNS forwarding? + +**Address space:** Is `192.168.0.0/16` available, or use a specific range? + +**NSG / Firewall:** Existing rules on the subnets? + +**Deployment executor:** Where will post-deployment commands run? (VM, Bastion, VPN, Cloud Shell) + +**Subscription scope:** Same subscription/tenant, cross-subscription, or cross-tenant? + +**Team ownership:** Same team controls VNet, DNS, NSG, and policy? If different team, block and get pre-approval before deploying. + +### Managed VNet only + +**Feature flag:** Run `az feature show` to verify `AI.ManagedVnetPreview` is registered. If not, register and wait 15–30 min. + +**Outbound mode:** Internet outbound (default) or approved outbound only? + +**MCP:** Public MCP endpoints or private MCP on VNet? + +**Client access:** Where will clients connect from? (Same VNet, peered VNet, on-prem via VPN/ER, Azure-hosted service) + +### Both paths + +**MCP servers:** Needed on VNet? + +**APIM:** Needed? + +**Identity:** System-assigned (default) or user-assigned? + +**BYO resources:** Reuse existing Cosmos DB / Storage / AI Search, or create new? + +> If reusing, confirm all in same region as VNet. + +**Key Vault / App Insights:** If user mentions existing ones, collect resource IDs. Optional. + +--- + +## Tier 3 β€” Enterprise + +**Agent tools:** Which tools? (AI Search, Cosmos DB, Storage, MCP, external APIs, Bing grounding, Code Interpreter) + +**Model:** Name, vendor, version. Verify version format: + +| Vendor | Format | Example | +|--------|--------|---------| +| OpenAI | Date | `2025-04-14` | +| Mistral AI | Integer | `1` | +| Meta | Integer | `9` | + +**Client type:** SDK, web app, Teams bot, other service? + +**Client network path:** Inside VNet, peered VNet, VPN/ExpressRoute? + +**Authentication:** Entra ID (recommended) or API key? + +> Entra ID token audience for Foundry Agents API: `https://ai.azure.com` + +**GitHub access:** Can deployment environment reach `github.com`? If not, pre-stage template. + +**Azure Policy:** Known policies (e.g., `disableLocalAuth`, `defaultOutboundAccess`)? If unknown, `what-if` catches them in Step 4. + +**Monitoring:** Existing Log Analytics workspace, create new, or not needed? + +--- + +## Validate Against Learn + +After collecting all requirements, validate the user's configuration against current documentation. Use `microsoft_docs_fetch` on the relevant pages below, then `microsoft_docs_search` for any requirement-specific concerns not covered. + +### Reference Pages + +| Topic | URL | +|-------|-----| +| Network isolation overview | https://learn.microsoft.com/azure/ai-foundry/how-to/configure-private-link | +| Agent Service private networking | https://learn.microsoft.com/azure/ai-services/agents/how-to/virtual-networks | +| Managed VNet configuration | https://learn.microsoft.com/azure/ai-foundry/how-to/configure-managed-network | +| Agent Service FAQ β€” VNet | https://learn.microsoft.com/azure/foundry/agents/faq#virtual-networking | +| Supported regions & availability | https://learn.microsoft.com/azure/ai-foundry/reference/region-support | +| NSP | https://learn.microsoft.com/en-us/azure/networking/network-security-perimeter | +| Feature Limitations | https://learn.microsoft.com/en-us/azure/foundry/how-to/configure-private-link#foundry-feature-limitations | + +> These URLs may change. If a fetch returns 404, use `microsoft_docs_search` to find the current page. + +If a conflict is found, present: +1. The constraint and its source URL +2. Which requirement it affects +3. Options to resolve + +Do NOT proceed until all conflicts are resolved or accepted. + +--- + +## Confirmation + +Present a summary of all gathered requirements. Ask: **"Confirm this is accurate before I generate a deployment plan."** + +> Do NOT proceed to Plan Generation until you validated requirements against documents and the user confirms. diff --git a/.github/plugins/azure-skills/skills/microsoft-foundry/resource/private-network/references/post-deployment-validation.md b/.github/plugins/azure-skills/skills/microsoft-foundry/resource/private-network/references/post-deployment-validation.md new file mode 100644 index 00000000..202c12e1 --- /dev/null +++ b/.github/plugins/azure-skills/skills/microsoft-foundry/resource/private-network/references/post-deployment-validation.md @@ -0,0 +1,93 @@ +# Post-Deployment Validation + +Run after deployment succeeds. Steps 1-3 can run from anywhere (management plane). Steps 4-5 require VNet access. + +## 1. Infrastructure Verification + +### 1.1 Resource State + +Verify all resources are in `Succeeded` state: + +```bash +az deployment operation group list \ + --resource-group --name \ + --query "[].{resource:properties.targetResource.resourceType,state:properties.provisioningState}" -o table +``` + +### 1.2 Private Endpoint Connections + +Verify all PE connections are `Approved`: + +```bash +az network private-endpoint list \ + --resource-group \ + --query "[].{name:name,status:privateLinkServiceConnections[0].privateLinkServiceConnectionState.status,resource:privateLinkServiceConnections[0].groupIds[0]}" -o table +``` + +### 1.3 Public Network Access Audit + +Verify all resources have public access disabled: + +```bash +az cognitiveservices account show --name --resource-group \ + --query "properties.publicNetworkAccess" -o tsv + +az cosmosdb show --name --resource-group \ + --query "publicNetworkAccess" -o tsv + +az storage account show --name --resource-group \ + --query "publicNetworkAccess" -o tsv + +az search service show --name --resource-group \ + --query "publicNetworkAccess" -o tsv +``` + +All should return `Disabled`. + +> **T10 (Private Basic):** Steps 2-5 below do not apply β€” T10 has no agents, no capability host, and no BYO resources. Setup is complete after Step 1. + +## 2. RBAC Role Assignment (no VNet required) + +The template does not assign data-plane roles automatically. + +Assign `Azure AI Developer` at the **account** scope (management-plane): + +```bash +az role assignment create \ + --role "Azure AI Developer" \ + --assignee \ + --scope /subscriptions//resourceGroups//providers/Microsoft.CognitiveServices/accounts/ +``` + +Assign `Azure AI User` at the **project** scope (data-plane β€” required for `agents/read`, `agents/write`): + +```bash +az role assignment create \ + --role "Azure AI User" \ + --assignee \ + --scope /subscriptions//resourceGroups//providers/Microsoft.CognitiveServices/accounts//projects/ +``` + +> ⚠️ RBAC propagation can take 1–5 minutes. + +## 3. Deploy a Model (no VNet required) + +```bash +az cognitiveservices account deployment create \ + --resource-group \ + --name \ + --deployment-name \ + --model-name \ + --model-version \ + --model-format \ + --sku-name GlobalStandard \ + --sku-capacity 50 +``` + +Fall back to `Standard` SKU if `GlobalStandard` quota is exhausted. + +--- + +## 4. VNet Access & End-to-End Test + +For the remaining steps (VNet access setup, DNS resolution, agent lifecycle test, isolation proof, cleanup), read [end-to-end-test.md](end-to-end-test.md). diff --git a/.github/plugins/azure-skills/skills/microsoft-foundry/resource/private-network/references/scaffold.md b/.github/plugins/azure-skills/skills/microsoft-foundry/resource/private-network/references/scaffold.md new file mode 100644 index 00000000..4ea980c5 --- /dev/null +++ b/.github/plugins/azure-skills/skills/microsoft-foundry/resource/private-network/references/scaffold.md @@ -0,0 +1,30 @@ +# Scaffold & Parameterize + +Use this reference to fetch the confirmed template and wire up parameters. + +## Path A β€” OFFICIAL / ADAPT + +If the user has no GitHub access, the template must already be present in the workspace. Do NOT attempt to fetch from GitHub. + +Fetch the template from the GitHub URL in [template-index.md](template-index.md). Choose **Bicep or Terraform** based on the user's preference or existing workspace files. Fetch the **entire template folder** including subdirectories. Create the files in the user's workspace (e.g., `infra/` folder). + +For ADAPT: after fetching, modify the template to match the user's requirements before parameterizing. + +## Path B β€” EXTEND + +If the user has existing Bicep or Terraform templates they want to extend, load [custom-template-adaptation.md](custom-template-adaptation.md). Follow the gap analysis there: read the user's template, identify what's present, add only the missing mandatory resources. + +Set parameter values using the answers collected in [intake.md](intake.md): + +| Parameter | Source | +|-----------|--------| +| Location | Region (or inferred from existing VNet) | +| VNet name / resource ID | VNet answer (new or existing) | +| VNet address space | Address space from requirements (default `192.168.0.0/16`) | +| Subnet CIDRs | Subnet answers (agent `/24`, PE `/24`, MCP `/24` if needed) | +| Existing Cosmos DB / Storage / AI Search IDs | BYO resource IDs (only if reusing) | +| Isolation mode (T18 only) | Managed VNet outbound mode (`AllowOnlyApprovedOutbound` or `AllowInternetOutbound`) | +| Model name, version, format | Model selection from requirements | +| `disableLocalAuth` | Set `true` if Azure Policy requires it | + +> Do NOT run `az deployment group create` yet β€” validate first (next step). diff --git a/.github/plugins/azure-skills/skills/microsoft-foundry/resource/private-network/references/template-index.md b/.github/plugins/azure-skills/skills/microsoft-foundry/resource/private-network/references/template-index.md new file mode 100644 index 00000000..fb227fae --- /dev/null +++ b/.github/plugins/azure-skills/skills/microsoft-foundry/resource/private-network/references/template-index.md @@ -0,0 +1,17 @@ +# Template Index β€” Foundry Private Network + +Official templates for deploying Microsoft Foundry. Each template may be available in Bicep, Terraform, or both β€” use one, not both. Choose based on the user's preference or existing workspace files. Use tools to fetch Bicep and Terraform templates to understand available templates and recognize if any matches user's requirements: + +**Bicep templates:** https://github.com/microsoft-foundry/foundry-samples/tree/main/infrastructure/infrastructure-setup-bicep/ + +**Terraform templates:** https://github.com/microsoft-foundry/foundry-samples/tree/main/infrastructure/infrastructure-setup-terraform/ + +Not all templates exist in both Bicep and Terraform. Some have format-specific variants (e.g., Terraform has `15a`/`15b` for new VNet vs BYO VNet; Bicep has `15a` for evaluation-only). + +## How to Use + +1. Fetch the **directory listing** from the relevant repo URL above β€” the folder names are descriptive (e.g., `15-private-network-standard-agent-setup`, `18-managed-virtual-network-preview`) +2. Narrow to 1–2 candidates that match the user's requirements based on folder names +3. Fetch only those candidates' READMEs for full details (prerequisites, parameters, deployment instructions) + +> The root README is incomplete β€” do not rely on it for template discovery. Use the directory listing instead. diff --git a/.github/plugins/azure-skills/skills/microsoft-foundry/resource/private-network/references/vpn-dns-setup.bicep b/.github/plugins/azure-skills/skills/microsoft-foundry/resource/private-network/references/vpn-dns-setup.bicep new file mode 100644 index 00000000..565928a5 --- /dev/null +++ b/.github/plugins/azure-skills/skills/microsoft-foundry/resource/private-network/references/vpn-dns-setup.bicep @@ -0,0 +1,158 @@ +/* + VPN Gateway + DNS Private Resolver + ------------------------------------ + Post-deployment add-on for private network templates (T10, T15–T19). + Creates a P2S VPN Gateway (AAD auth, OpenVPN) and a DNS Private Resolver + so the user can connect from their dev machine and resolve private DNS zones. + + Note: VPN Gateway deployment takes 30-45 minutes. +*/ + +@description('Name of the existing VNet from the Foundry deployment') +param vnetName string + +@description('Resource group of the existing VNet. Defaults to the deployment resource group.') +param vnetResourceGroup string = resourceGroup().name + +// ── Existing VNet ── +resource vnet 'Microsoft.Network/virtualNetworks@2024-05-01' existing = { + name: vnetName + scope: resourceGroup(vnetResourceGroup) +} + +var location = vnet.location + +@description('CIDR for GatewaySubnet β€” agent must compute from available VNet space') +param gatewaySubnetCidr string + +@description('CIDR for DNS resolver inbound subnet β€” agent must compute from available VNet space') +param dnsResolverSubnetCidr string + +@description('VPN client address pool β€” must not overlap with VNet') +param vpnClientAddressPool string = '172.16.201.0/24' + +@description('Azure AD tenant ID for VPN authentication') +param aadTenantId string + +@description('Unique suffix for resource naming') +param suffix string + +// AAD constants for Azure Public cloud only. +// Sovereign clouds (AzureUSGovernment, AzureChinaCloud) require different audience/issuer values. +// The intake step (az cloud show) warns users before reaching this template. +var aadAudience = 'c632b3df-fb67-4d84-bdcf-b95ad541b5c8' +var aadIssuer = 'https://sts.windows.net/${aadTenantId}/' +var aadTenant = 'https://login.microsoftonline.com/${aadTenantId}/' + +// ── Add subnets ── +resource gatewaySubnet 'Microsoft.Network/virtualNetworks/subnets@2024-05-01' = { + parent: vnet + name: 'GatewaySubnet' + properties: { + addressPrefix: gatewaySubnetCidr + defaultOutboundAccess: false + } +} + +// NOTE: NRMS policy may auto-deploy an NSG on this subnet. +// Ensure the NSG allows inbound UDP/TCP port 53 (DNS) from the VPN client address pool. +resource dnsResolverSubnet 'Microsoft.Network/virtualNetworks/subnets@2024-05-01' = { + parent: vnet + name: 'dns-resolver-inbound' + properties: { + addressPrefix: dnsResolverSubnetCidr + defaultOutboundAccess: false + delegations: [ + { + name: 'dns-resolver-delegation' + properties: { + serviceName: 'Microsoft.Network/dnsResolvers' + } + } + ] + } + dependsOn: [gatewaySubnet] // serialize subnet updates +} + +// ── Public IP for VPN Gateway ── +resource vpnGatewayPip 'Microsoft.Network/publicIPAddresses@2024-05-01' = { + name: 'vpn-gateway-pip-${suffix}' + location: location + sku: { + name: 'Standard' + } + zones: ['1', '2', '3'] + properties: { + publicIPAllocationMethod: 'Static' + } +} + +// ── VPN Gateway ── +resource vpnGateway 'Microsoft.Network/virtualNetworkGateways@2024-05-01' = { + name: 'vpn-gateway-${suffix}' + location: location + properties: { + gatewayType: 'Vpn' + vpnType: 'RouteBased' + sku: { + name: 'VpnGw1AZ' + tier: 'VpnGw1AZ' + } + ipConfigurations: [ + { + name: 'default' + properties: { + publicIPAddress: { + id: vpnGatewayPip.id + } + subnet: { + id: gatewaySubnet.id + } + } + } + ] + vpnClientConfiguration: { + vpnClientAddressPool: { + addressPrefixes: [vpnClientAddressPool] + } + vpnClientProtocols: ['OpenVPN'] + vpnAuthenticationTypes: ['AAD'] + aadTenant: aadTenant + aadAudience: aadAudience + aadIssuer: aadIssuer + } + } +} + +// ── DNS Private Resolver ── +resource dnsResolver 'Microsoft.Network/dnsResolvers@2022-07-01' = { + name: 'dns-resolver-${suffix}' + location: location + properties: { + virtualNetwork: { + id: vnet.id + } + } +} + +resource dnsInboundEndpoint 'Microsoft.Network/dnsResolvers/inboundEndpoints@2022-07-01' = { + parent: dnsResolver + name: 'inbound' + location: location + properties: { + ipConfigurations: [ + { + privateIpAllocationMethod: 'Dynamic' + subnet: { + id: dnsResolverSubnet.id + } + } + ] + } +} + +// ── Outputs ── +output vpnGatewayName string = vpnGateway.name +output vpnGatewayId string = vpnGateway.id +output vpnPublicIpAddress string = vpnGatewayPip.properties.ipAddress +output dnsResolverInboundIp string = dnsInboundEndpoint.properties.ipConfigurations[0].privateIpAddress diff --git a/.github/plugins/azure-skills/skills/microsoft-foundry/resource/private-network/references/vpn-dns-setup.md b/.github/plugins/azure-skills/skills/microsoft-foundry/resource/private-network/references/vpn-dns-setup.md new file mode 100644 index 00000000..6abb3769 --- /dev/null +++ b/.github/plugins/azure-skills/skills/microsoft-foundry/resource/private-network/references/vpn-dns-setup.md @@ -0,0 +1,161 @@ +# VPN Gateway & DNS Private Resolver Setup + +Post-deployment add-on for private network templates (T10, T15–T19). Creates a point-to-site VPN Gateway and DNS Private Resolver so the user can connect from their dev machine and resolve private DNS zones. + +## Assumptions + +| Property | Value | Rationale | +|----------|-------|-----------| +| Auth | Microsoft Entra ID (AAD) only | No certificate management | +| Tunnel | OpenVPN | Cross-platform, Azure VPN Client | +| Gateway SKU | VpnGw1AZ | Zone-redundant, same cost as VpnGw1 | +| GatewaySubnet | /24 recommended | Agent computes from available VNet space | +| DNS resolver subnet | /28 minimum | Agent computes from available VNet space | +| Client address pool | `172.16.201.0/24` | Non-overlapping with VNet | + +## Subnet Layout + +Adds two subnets to the existing VNet. Uses the next available range after the agent and PE subnets. + +| Subnet | CIDR (default) | Purpose | Delegation | +|--------|----------------|---------|------------| +| `GatewaySubnet` | Computed | VPN Gateway (name is required by Azure) | None | +| `dns-resolver-inbound` | Computed | DNS Private Resolver inbound endpoint | `Microsoft.Network/dnsResolvers` | + +> ⚠️ **Warning:** `GatewaySubnet` is a reserved name β€” Azure requires this exact name for VPN Gateway. + +## Pre-Deployment + +### 1. Discover Available Subnets + +List existing subnets to find free address space: + +```bash +az network vnet subnet list \ + --resource-group --vnet-name \ + --query "[].{name:name,cidr:addressPrefix}" -o table +``` + +Pick the next unused `/24` for `GatewaySubnet` and the next unused `/28` for `dns-resolver-inbound`. Both must not overlap with any existing subnet. + +Example: if subnets `.0.0/24`, `.1.0/24`, `.2.0/24` are in use β†’ use `192.168.3.0/24` for GatewaySubnet, `192.168.4.0/28` for dns-resolver-inbound. + +### 2. Collect Remaining Inputs + +| Parameter | Source | +|-----------|--------| +| `vnetName` | From main deployment | +| `vnetResourceGroup` | Resource group containing the VNet (omit if same as deployment RG) | +| `resourceGroupName` | Resource group for this deployment | +| `gatewaySubnetCidr` | Computed in step 1 | +| `dnsResolverSubnetCidr` | Computed in step 1 | +| `suffix` | From main deployment (or generate unique) | +| `aadTenantId` | From `az account show --query tenantId` | + +### 3. Check VPN Gateway Quota + +```bash +az network list-usages --location \ + --query "[?name.value=='VirtualNetworkGateways'].{limit:limit,current:currentValue}" -o table +``` + +## Bicep Template + +Template: [vpn-dns-setup.bicep](vpn-dns-setup.bicep) + +| Parameter | Required | Default | Description | +|-----------|----------|---------|-------------| +| `vnetName` | Yes | β€” | Name of the existing VNet | +| `vnetResourceGroup` | No | Deployment RG | Resource group of the existing VNet (for BYO VNets in a different RG) | +| `aadTenantId` | Yes | β€” | Entra ID tenant ID for VPN auth | +| `suffix` | Yes | β€” | Unique suffix for resource naming | +| `gatewaySubnetCidr` | Yes | β€” | GatewaySubnet CIDR (computed from VNet) | +| `dnsResolverSubnetCidr` | Yes | β€” | DNS resolver inbound subnet CIDR (computed from VNet) | +| `vpnClientAddressPool` | No | `172.16.201.0/24` | VPN client address pool | + +**Creates:** GatewaySubnet, dns-resolver-inbound subnet, Public IP (zonal), VPN Gateway (VpnGw1AZ, P2S AAD/OpenVPN), DNS Private Resolver with inbound endpoint. + +## Deploy + +```bash +az deployment group create \ + --resource-group \ + --template-file vpn-dns-setup.bicep \ + --parameters vnetName='' aadTenantId='' suffix='' \ + gatewaySubnetCidr='' dnsResolverSubnetCidr='' \ + --name vpn-dns-setup +``` + +> ⚠️ **VPN Gateway provisioning takes 20–45 minutes.** This is normal. Do not cancel. + +Monitor: + +```bash +az deployment group show \ + --resource-group --name vpn-dns-setup \ + --query "{state:properties.provisioningState}" -o tsv +``` + +## Post-Deployment + +### 1. Get DNS Resolver Inbound IP + +```bash +az network dns-resolver inbound-endpoint show \ + --resource-group \ + --dns-resolver-name dns-resolver- \ + --name inbound \ + --query "ipConfigurations[0].privateIpAddress" -o tsv +``` + +Save this IP β€” the VPN client needs it as custom DNS. + +### 2. Connect via VPN + +Provide the user with these instructions (substitute actual resource name and DNS IP): + +1. Go to **Azure Portal** β†’ `vpn-gateway-` β†’ **Point-to-site configuration** β†’ **Download VPN client** +2. Extract the ZIP β†’ edit `AzureVPN/azurevpnconfig.xml` β€” replace: + ```xml + + ``` + with: + ```xml + + + + + + ``` +3. Open [Azure VPN Client](https://aka.ms/azvpnclientdownload) β†’ **Import** the modified `azurevpnconfig.xml` β†’ **Connect** + +Use `AskUserQuestion`: **"Let me know when you're connected so I can verify DNS resolution."** + +> Do NOT proceed to verification until the user confirms they are connected. + +### 3. Verify DNS Resolution + +After connecting via VPN, verify private DNS zones resolve correctly: + +```bash +nslookup .services.ai.azure.com +nslookup .documents.azure.com +nslookup .blob.core.windows.net +``` + +Each should resolve to a private IP (`192.168.x.x`), not a public IP. + +### 4. VPN Setup Complete + +DNS resolves to private IPs β€” VPN is working. Return to [post-deployment-validation.md](post-deployment-validation.md) **Step 5** to run the end-to-end tests. + +## Troubleshooting + +| Problem | Cause | Fix | +|---------|-------|-----| +| VPN connects but DNS doesn't resolve | Custom DNS not set in VPN client profile | Add DNS resolver inbound IP as custom DNS server | +| `nslookup` returns public IP | Private DNS zones not linked to VNet | Verify DNS zone VNet links: `az network private-dns zone list -g ` | +| VPN client auth fails | Wrong tenant or app not consented | Verify `tenantId`, ensure Azure VPN enterprise app is consented in the tenant | +| Gateway deployment times out | Normal β€” VPN GW takes 20-45 min | Wait and re-check with `az deployment group show` | +| Subnet conflict | CIDR overlaps with existing subnet | Use different CIDRs for `gatewaySubnetCidr` / `dnsResolverSubnetCidr` | +| DNS resolver queries blocked | NRMS auto-deployed NSG missing DNS rules | Add inbound allow rule for UDP/TCP port 53 from VPN client address pool to the `dns-resolver-inbound` subnet NSG |