From 7d5f4fd30fa26b60f8bc1683d715382a2332ef1c Mon Sep 17 00:00:00 2001 From: Chudi Huang Date: Thu, 26 Feb 2026 05:43:26 +0000 Subject: [PATCH 1/5] [aks-preview] subcommand to provision foundry and deploy openclaw --- src/aks-preview/azext_aks_preview/_help.py | 91 +++++ src/aks-preview/azext_aks_preview/_params.py | 56 +++ src/aks-preview/azext_aks_preview/commands.py | 10 + src/aks-preview/azext_aks_preview/custom.py | 46 +++ .../azext_aks_preview/openclaw/__init__.py | 4 + .../azext_aks_preview/openclaw/_consts.py | 23 ++ .../azext_aks_preview/openclaw/_helpers.py | 324 +++++++++++++++ .../azext_aks_preview/openclaw/deploy.py | 377 ++++++++++++++++++ .../tests/latest/test_openclaw.py | 144 +++++++ 9 files changed, 1075 insertions(+) create mode 100644 src/aks-preview/azext_aks_preview/openclaw/__init__.py create mode 100644 src/aks-preview/azext_aks_preview/openclaw/_consts.py create mode 100644 src/aks-preview/azext_aks_preview/openclaw/_helpers.py create mode 100644 src/aks-preview/azext_aks_preview/openclaw/deploy.py create mode 100644 src/aks-preview/azext_aks_preview/tests/latest/test_openclaw.py diff --git a/src/aks-preview/azext_aks_preview/_help.py b/src/aks-preview/azext_aks_preview/_help.py index 1c4864b09cf..81f4afb15f1 100644 --- a/src/aks-preview/azext_aks_preview/_help.py +++ b/src/aks-preview/azext_aks_preview/_help.py @@ -4453,3 +4453,94 @@ - name: Show a specific JWT authenticator configuration text: az aks jwtauthenticator show -g MyResourceGroup --cluster-name MyCluster --name myjwt """ + +helps['aks openclaw'] = """ + type: group + short-summary: Commands to deploy and manage OpenClaw on an AKS cluster. +""" + +helps['aks openclaw deploy'] = """ + type: command + short-summary: Deploy OpenClaw with Azure AI Foundry on an AKS cluster. + long-summary: | + Provisions Azure AI Foundry resources (or uses an existing one), deploys the + openclaw-kubernetes Helm chart with LiteLLM proxy, and configures the web UI. + By default, a new AIServices account is created using the resource group's location + and an auto-generated name. Use --ai-foundry-resource-id or --ai-foundry-endpoint + to bring your own AI Foundry resource instead. + parameters: + - name: --cluster-name + type: string + short-summary: Name of the AKS cluster. + - name: --ai-foundry-resource-id + type: string + short-summary: Full ARM resource ID of an existing AIServices account (BYO mode). + - name: --ai-foundry-endpoint + type: string + short-summary: Endpoint URL of an existing AI Foundry resource (BYO mode). Requires --ai-foundry-api-key. + - name: --ai-foundry-api-key + type: string + short-summary: API key for the AI Foundry endpoint. Required with --ai-foundry-endpoint. + - name: --ai-foundry-location + type: string + short-summary: Azure region for provisioning a new AIServices account. Defaults to the resource group's location. + - name: --model + type: string + short-summary: Model name to deploy. Default is gpt-5.1-chat. + - name: --model-version + type: string + short-summary: Model version to deploy. Default is 2025-11-13. Only used when provisioning new resources. + - name: --deployment-name + type: string + short-summary: Azure model deployment name. Auto-generated if not specified. + - name: --capacity + type: int + short-summary: Tokens-per-minute capacity for the model deployment. Default is 50. Only used when provisioning new resources. + - name: --namespace + type: string + short-summary: Kubernetes namespace for OpenClaw. Default is openclaw. + examples: + - name: Deploy OpenClaw with auto-provisioned AI Foundry (simplest) + text: az aks openclaw deploy -g MyResourceGroup --cluster-name MyCluster + - name: Deploy with a specific model and region override + text: az aks openclaw deploy -g MyResourceGroup --cluster-name MyCluster --model gpt-4o --ai-foundry-location westus + - name: Deploy using an existing AI Foundry resource (BYO by resource ID) + text: az aks openclaw deploy -g MyResourceGroup --cluster-name MyCluster --ai-foundry-resource-id /subscriptions/SUB_ID/resourceGroups/RG/providers/Microsoft.CognitiveServices/accounts/myaccount + - name: Deploy using a raw endpoint and API key (BYO by endpoint) + text: az aks openclaw deploy -g MyResourceGroup --cluster-name MyCluster --ai-foundry-endpoint https://eastus.api.cognitive.microsoft.com --ai-foundry-api-key MY_KEY --deployment-name gpt51chat +""" + +helps['aks openclaw delete'] = """ + type: command + short-summary: Delete OpenClaw deployment from an AKS cluster. + parameters: + - name: --cluster-name + type: string + short-summary: Name of the AKS cluster. + - name: --namespace + type: string + short-summary: Kubernetes namespace where OpenClaw is deployed. Default is openclaw. + - name: --delete-ai-resources + type: bool + short-summary: Also delete the auto-provisioned AIServices account. Default is false. + examples: + - name: Delete OpenClaw deployment + text: az aks openclaw delete -g MyResourceGroup --cluster-name MyCluster --yes + - name: Delete OpenClaw and the provisioned AI Foundry resources + text: az aks openclaw delete -g MyResourceGroup --cluster-name MyCluster --delete-ai-resources --yes +""" + +helps['aks openclaw show'] = """ + type: command + short-summary: Show OpenClaw deployment status on an AKS cluster. + parameters: + - name: --cluster-name + type: string + short-summary: Name of the AKS cluster. + - name: --namespace + type: string + short-summary: Kubernetes namespace where OpenClaw is deployed. Default is openclaw. + examples: + - name: Show OpenClaw status + text: az aks openclaw show -g MyResourceGroup --cluster-name MyCluster +""" diff --git a/src/aks-preview/azext_aks_preview/_params.py b/src/aks-preview/azext_aks_preview/_params.py index cc0cb8fc8a9..0ae178b0334 100644 --- a/src/aks-preview/azext_aks_preview/_params.py +++ b/src/aks-preview/azext_aks_preview/_params.py @@ -3149,6 +3149,62 @@ def load_arguments(self, _): c.argument('config_file', options_list=['--config-file'], type=file_type, completer=FilesCompleter(), help='Path to the JSON configuration file containing JWT authenticator properties.') + # OpenClaw commands + with self.argument_context("aks openclaw") as c: + c.argument("cluster_name", options_list=["--cluster-name"], help="The AKS cluster name.") + c.argument("namespace", help="Kubernetes namespace for OpenClaw. Default is openclaw.") + + with self.argument_context("aks openclaw deploy") as c: + c.argument( + "ai_foundry_resource_id", + options_list=["--ai-foundry-resource-id"], + help="Full ARM resource ID of an existing AIServices account (BYO mode).", + ) + c.argument( + "ai_foundry_endpoint", + options_list=["--ai-foundry-endpoint"], + help="Endpoint URL of an existing AI Foundry resource (BYO mode). Requires --ai-foundry-api-key.", + ) + c.argument( + "ai_foundry_api_key", + options_list=["--ai-foundry-api-key"], + help="API key for the AI Foundry endpoint. Required with --ai-foundry-endpoint.", + ) + c.argument( + "ai_foundry_location", + options_list=["--ai-foundry-location"], + help="Azure region for provisioning a new AIServices account. Defaults to the resource group's location.", + ) + c.argument( + "model", + options_list=["--model"], + help="Model name to deploy. Default is gpt-5.1-chat.", + ) + c.argument( + "model_version", + options_list=["--model-version"], + help="Model version to deploy. Default is 2025-11-13. Only used when provisioning new resources.", + ) + c.argument( + "deployment_name", + options_list=["--deployment-name"], + help="Azure model deployment name. Auto-generated from model name if not specified.", + ) + c.argument( + "capacity", + options_list=["--capacity"], + type=int, + help="Tokens-per-minute capacity for the model deployment. Default is 50.", + ) + + with self.argument_context("aks openclaw delete") as c: + c.argument( + "delete_ai_resources", + options_list=["--delete-ai-resources"], + action="store_true", + help="Also delete the auto-provisioned AIServices account.", + ) + def _get_default_install_location(exe_name): system = platform.system() diff --git a/src/aks-preview/azext_aks_preview/commands.py b/src/aks-preview/azext_aks_preview/commands.py index ea34bd3ebc7..d7a64b5dda1 100644 --- a/src/aks-preview/azext_aks_preview/commands.py +++ b/src/aks-preview/azext_aks_preview/commands.py @@ -603,3 +603,13 @@ def load_command_table(self, _): self.command_table["aks safeguards delete"] = Delete(loader=self) self.command_table["aks safeguards list"] = List(loader=self) self.command_table["aks safeguards wait"] = Wait(loader=self) + + # AKS openclaw commands + with self.command_group( + "aks openclaw", + managed_clusters_sdk, + client_factory=cf_managed_clusters, + ) as g: + g.custom_command("deploy", "aks_openclaw_deploy") + g.custom_command("delete", "aks_openclaw_delete", confirmation=True) + g.custom_show_command("show", "aks_openclaw_show") diff --git a/src/aks-preview/azext_aks_preview/custom.py b/src/aks-preview/azext_aks_preview/custom.py index 3b4f1ceb1b6..6d6882d74a7 100644 --- a/src/aks-preview/azext_aks_preview/custom.py +++ b/src/aks-preview/azext_aks_preview/custom.py @@ -5375,3 +5375,49 @@ def aks_jwtauthenticator_list(cmd, client, resource_group_name, cluster_name, ak def aks_jwtauthenticator_show(cmd, client, resource_group_name, cluster_name, name, aks_custom_headers=None): headers = get_aks_custom_headers(aks_custom_headers) return client.get(resource_group_name, cluster_name, name, headers=headers) + + +# openclaw commands +def aks_openclaw_deploy(cmd, client, resource_group_name, cluster_name, + ai_foundry_resource_id=None, + ai_foundry_endpoint=None, + ai_foundry_api_key=None, + ai_foundry_location=None, + model=None, + model_version=None, + deployment_name=None, + capacity=None, + namespace=None): + from azext_aks_preview.openclaw.deploy import deploy_openclaw + return deploy_openclaw( + cmd, resource_group_name, cluster_name, + ai_foundry_resource_id=ai_foundry_resource_id, + ai_foundry_endpoint=ai_foundry_endpoint, + ai_foundry_api_key=ai_foundry_api_key, + ai_foundry_location=ai_foundry_location, + model=model, + model_version=model_version, + deployment_name=deployment_name, + capacity=capacity, + namespace=namespace, + ) + + +def aks_openclaw_delete(cmd, client, resource_group_name, cluster_name, + namespace=None, + delete_ai_resources=False): + from azext_aks_preview.openclaw.deploy import delete_openclaw + return delete_openclaw( + cmd, resource_group_name, cluster_name, + namespace=namespace, + delete_ai_resources=delete_ai_resources, + ) + + +def aks_openclaw_show(cmd, client, resource_group_name, cluster_name, + namespace=None): + from azext_aks_preview.openclaw.deploy import show_openclaw + return show_openclaw( + cmd, resource_group_name, cluster_name, + namespace=namespace, + ) diff --git a/src/aks-preview/azext_aks_preview/openclaw/__init__.py b/src/aks-preview/azext_aks_preview/openclaw/__init__.py new file mode 100644 index 00000000000..34913fb394d --- /dev/null +++ b/src/aks-preview/azext_aks_preview/openclaw/__init__.py @@ -0,0 +1,4 @@ +# -------------------------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for license information. +# -------------------------------------------------------------------------------------------- diff --git a/src/aks-preview/azext_aks_preview/openclaw/_consts.py b/src/aks-preview/azext_aks_preview/openclaw/_consts.py new file mode 100644 index 00000000000..bf9fe11f216 --- /dev/null +++ b/src/aks-preview/azext_aks_preview/openclaw/_consts.py @@ -0,0 +1,23 @@ +# -------------------------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for license information. +# -------------------------------------------------------------------------------------------- + +# Helm chart +CONST_OPENCLAW_HELM_CHART_URL = "oci://ghcr.io/feiskyer/openclaw-kubernetes/openclaw" +CONST_OPENCLAW_DEFAULT_NAMESPACE = "openclaw" + +# Azure AI Foundry defaults +CONST_OPENCLAW_DEFAULT_MODEL = "gpt-5.1-chat" +CONST_OPENCLAW_DEFAULT_MODEL_VERSION = "2025-11-13" +CONST_OPENCLAW_DEFAULT_CAPACITY = 50 +CONST_OPENCLAW_DEFAULT_SKU = "GlobalStandard" +CONST_OPENCLAW_AI_SERVICES_KIND = "AIServices" +CONST_OPENCLAW_AI_SERVICES_SKU = "S0" +CONST_OPENCLAW_COGNITIVE_API_VERSION = "2024-10-01" + +# Storage +CONST_OPENCLAW_STORAGE_CLASS_NAME = "azurefile-openclaw" + +# LiteLLM +CONST_OPENCLAW_LITELLM_API_VERSION = "2024-10-01-preview" diff --git a/src/aks-preview/azext_aks_preview/openclaw/_helpers.py b/src/aks-preview/azext_aks_preview/openclaw/_helpers.py new file mode 100644 index 00000000000..ed47e3ce9ec --- /dev/null +++ b/src/aks-preview/azext_aks_preview/openclaw/_helpers.py @@ -0,0 +1,324 @@ +# -------------------------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for license information. +# -------------------------------------------------------------------------------------------- + +import hashlib +import json +import os +import secrets +import subprocess +import tempfile + +import yaml +from knack.log import get_logger +from knack.util import CLIError + +from azext_aks_preview.openclaw._consts import ( + CONST_OPENCLAW_DEFAULT_NAMESPACE, + CONST_OPENCLAW_HELM_CHART_URL, + CONST_OPENCLAW_LITELLM_API_VERSION, + CONST_OPENCLAW_STORAGE_CLASS_NAME, +) + +logger = get_logger(__name__) + + +def ensure_prerequisites(): + """Check that helm and kubectl are on PATH.""" + from shutil import which + + if not which("helm"): + raise CLIError( + "Could not find 'helm' on PATH. " + "Please install Helm v3: https://helm.sh/docs/intro/install/" + ) + if not which("kubectl"): + raise CLIError( + "Could not find 'kubectl' on PATH. " + "Please install kubectl: https://kubernetes.io/docs/tasks/tools/" + ) + + +def run_helm(args, kubeconfig_path=None, timeout=300): + """Run a helm command and return (success, output).""" + cmd = ["helm"] + if kubeconfig_path: + cmd.extend(["--kubeconfig", kubeconfig_path]) + cmd.extend(args) + + logger.debug("Running: %s", " ".join(cmd)) + try: + result = subprocess.run( + cmd, + capture_output=True, + text=True, + check=True, + timeout=timeout, + ) + return True, result.stdout + except subprocess.TimeoutExpired as e: + return False, f"Helm command timed out after {timeout}s: {e}" + except subprocess.CalledProcessError as e: + return False, e.stderr or e.stdout or str(e) + + +def run_kubectl(args, kubeconfig_path=None, check=True, timeout=120): + """Run a kubectl command and return stdout.""" + cmd = ["kubectl"] + if kubeconfig_path: + cmd.extend(["--kubeconfig", kubeconfig_path]) + cmd.extend(args) + + logger.debug("Running: %s", " ".join(cmd)) + try: + result = subprocess.run( + cmd, + capture_output=True, + text=True, + check=check, + timeout=timeout, + ) + return result.stdout + except subprocess.CalledProcessError as e: + if check: + raise CLIError(f"kubectl command failed: {e.stderr or e.stdout}") from e + return e.stderr or e.stdout + + +def generate_foundry_name(cluster_name): + """Generate a deterministic AI Foundry account name from cluster name.""" + short_hash = hashlib.sha256(cluster_name.encode()).hexdigest()[:8] + # Cognitive Services account names: 2-64 alphanumeric chars and hyphens + name = f"openclaw-{cluster_name}-{short_hash}" + # Truncate to 64 chars and strip trailing hyphens + return name[:64].rstrip("-") + + +def generate_deployment_name(model_name): + """Generate an Azure deployment name from model name (no dots allowed).""" + return model_name.replace(".", "").replace("-", "") + + +def get_kubeconfig(cmd, resource_group_name, cluster_name): + """Get AKS credentials into a temp kubeconfig file, return path.""" + temp_dir = tempfile.mkdtemp() + kubeconfig_path = os.path.join(temp_dir, "kubeconfig") + + from azure.cli.command_modules.acs.custom import aks_get_credentials + + aks_get_credentials( + cmd, + resource_group_name=resource_group_name, + name=cluster_name, + path=kubeconfig_path, + admin=False, + overwrite_existing=True, + ) + return kubeconfig_path + + +def apply_storage_class(kubeconfig_path): + """Create the azurefile-openclaw StorageClass if it doesn't exist.""" + # Check if it already exists + output = run_kubectl( + ["get", "storageclass", CONST_OPENCLAW_STORAGE_CLASS_NAME, + "-o", "name", "--ignore-not-found"], + kubeconfig_path=kubeconfig_path, + check=False, + ) + if CONST_OPENCLAW_STORAGE_CLASS_NAME in output: + logger.info("StorageClass '%s' already exists, skipping creation.", + CONST_OPENCLAW_STORAGE_CLASS_NAME) + return + + sc_manifest = { + "apiVersion": "storage.k8s.io/v1", + "kind": "StorageClass", + "metadata": {"name": CONST_OPENCLAW_STORAGE_CLASS_NAME}, + "provisioner": "file.csi.azure.com", + "parameters": {"skuName": "Standard_LRS"}, + "mountOptions": [ + "uid=1024", + "gid=1024", + "dir_mode=0755", + "file_mode=0644", + ], + "reclaimPolicy": "Retain", + "volumeBindingMode": "Immediate", + } + + with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml", delete=False) as f: + yaml.dump(sc_manifest, f) + sc_path = f.name + + try: + run_kubectl(["apply", "-f", sc_path], kubeconfig_path=kubeconfig_path) + logger.info("Created StorageClass '%s'.", CONST_OPENCLAW_STORAGE_CLASS_NAME) + finally: + os.remove(sc_path) + + +def generate_helm_values(endpoint, api_key, deployment_name, model_name, gateway_token=None): + """Generate the openclaw Helm values dict for webui mode with LiteLLM → AI Foundry.""" + if gateway_token is None: + gateway_token = secrets.token_hex(32) + + litellm_master_key = secrets.token_hex(16) + + values = { + "gateway": { + "token": gateway_token, + }, + "litellm": { + "model": model_name, + "configOverride": { + "model_list": [ + { + "model_name": model_name, + "litellm_params": { + "model": f"azure/{deployment_name}", + "api_base": endpoint, + "api_key": "os.environ/AZURE_API_KEY", + "api_version": CONST_OPENCLAW_LITELLM_API_VERSION, + }, + } + ], + "general_settings": { + "master_key": "os.environ/LITELLM_MASTER_KEY", + }, + }, + "extraEnv": [ + {"name": "AZURE_API_KEY", "value": api_key}, + {"name": "LITELLM_MASTER_KEY", "value": litellm_master_key}, + ], + }, + "persistence": { + "storageClass": CONST_OPENCLAW_STORAGE_CLASS_NAME, + }, + "serviceAccount": { + "role": "view", + }, + } + + return values + + +def install_helm_chart(kubeconfig_path, values, namespace=CONST_OPENCLAW_DEFAULT_NAMESPACE): + """Install or upgrade the openclaw Helm chart.""" + with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml", delete=False) as f: + yaml.dump(values, f) + values_path = f.name + + try: + success, output = run_helm( + [ + "upgrade", "--install", "openclaw", + CONST_OPENCLAW_HELM_CHART_URL, + "-f", values_path, + "--namespace", namespace, + "--create-namespace", + "--wait", + "--timeout", "10m", + ], + kubeconfig_path=kubeconfig_path, + ) + if not success: + raise CLIError(f"Helm install failed: {output}") + logger.info("Helm chart installed successfully.") + finally: + os.remove(values_path) + + +def patch_openclaw_api_format(kubeconfig_path, namespace=CONST_OPENCLAW_DEFAULT_NAMESPACE): + """Patch the openclaw configmap to use openai-completions instead of openai-responses.""" + try: + cm_json = run_kubectl( + ["get", "configmap", "openclaw-config", "-n", namespace, "-o", "json"], + kubeconfig_path=kubeconfig_path, + ) + cm = json.loads(cm_json) + except (CLIError, json.JSONDecodeError) as e: + logger.warning("Could not read openclaw-config configmap, skipping patch: %s", e) + return + + patched = False + if "openclaw.json" in cm.get("data", {}): + original = cm["data"]["openclaw.json"] + updated = original.replace('"api": "openai-responses"', '"api": "openai-completions"') + if original != updated: + cm["data"]["openclaw.json"] = updated + patched = True + + if "codex-config.toml" in cm.get("data", {}): + original = cm["data"]["codex-config.toml"] + updated = original.replace('wire_api = "responses"', 'wire_api = "chat"') + if original != updated: + cm["data"]["codex-config.toml"] = updated + patched = True + + if patched: + with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as f: + json.dump(cm, f) + cm_path = f.name + try: + run_kubectl(["apply", "-f", cm_path, "-n", namespace], + kubeconfig_path=kubeconfig_path) + logger.info("Patched openclaw-config configmap (openai-responses → openai-completions).") + finally: + os.remove(cm_path) + + # Restart to pick up the config change + run_kubectl( + ["rollout", "restart", "statefulset", "openclaw", "-n", namespace], + kubeconfig_path=kubeconfig_path, + check=False, + ) + logger.info("Restarted openclaw statefulset.") + else: + logger.info("No API format patch needed.") + + +def uninstall_helm_chart(kubeconfig_path, namespace=CONST_OPENCLAW_DEFAULT_NAMESPACE): + """Uninstall the openclaw Helm chart.""" + success, output = run_helm( + ["uninstall", "openclaw", "--namespace", namespace], + kubeconfig_path=kubeconfig_path, + ) + if not success: + if "not found" in output.lower(): + logger.warning("Helm release 'openclaw' not found in namespace '%s'.", namespace) + else: + raise CLIError(f"Helm uninstall failed: {output}") + + # Delete namespace + run_kubectl( + ["delete", "namespace", namespace, "--ignore-not-found"], + kubeconfig_path=kubeconfig_path, + check=False, + ) + + +def get_deployment_status(kubeconfig_path, namespace=CONST_OPENCLAW_DEFAULT_NAMESPACE): + """Get the status of openclaw pods.""" + output = run_kubectl( + ["get", "pods", "-n", namespace, "-o", "json"], + kubeconfig_path=kubeconfig_path, + check=False, + ) + try: + pods = json.loads(output) + result = [] + for pod in pods.get("items", []): + name = pod["metadata"]["name"] + phase = pod.get("status", {}).get("phase", "Unknown") + containers = pod.get("status", {}).get("containerStatuses", []) + ready = all(c.get("ready", False) for c in containers) if containers else False + result.append({ + "name": name, + "phase": phase, + "ready": ready, + }) + return result + except json.JSONDecodeError: + return [] diff --git a/src/aks-preview/azext_aks_preview/openclaw/deploy.py b/src/aks-preview/azext_aks_preview/openclaw/deploy.py new file mode 100644 index 00000000000..88b99f870ee --- /dev/null +++ b/src/aks-preview/azext_aks_preview/openclaw/deploy.py @@ -0,0 +1,377 @@ +# -------------------------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for license information. +# -------------------------------------------------------------------------------------------- + +import json + +from knack.log import get_logger +from knack.util import CLIError + +from azext_aks_preview.openclaw._consts import ( + CONST_OPENCLAW_AI_SERVICES_KIND, + CONST_OPENCLAW_AI_SERVICES_SKU, + CONST_OPENCLAW_COGNITIVE_API_VERSION, + CONST_OPENCLAW_DEFAULT_CAPACITY, + CONST_OPENCLAW_DEFAULT_MODEL, + CONST_OPENCLAW_DEFAULT_MODEL_VERSION, + CONST_OPENCLAW_DEFAULT_NAMESPACE, + CONST_OPENCLAW_DEFAULT_SKU, +) +from azext_aks_preview.openclaw._helpers import ( + apply_storage_class, + ensure_prerequisites, + generate_deployment_name, + generate_foundry_name, + generate_helm_values, + get_deployment_status, + get_kubeconfig, + install_helm_chart, + patch_openclaw_api_format, + uninstall_helm_chart, +) + +logger = get_logger(__name__) + + +def _get_resource_group_location(cmd, resource_group_name): + """Get the location of a resource group.""" + from azext_aks_preview._client_factory import get_resource_groups_client + + rg_client = get_resource_groups_client(cmd.cli_ctx) + rg = rg_client.get(resource_group_name) + return rg.location + + +def _provision_ai_foundry(cmd, resource_group_name, location, foundry_name, + model_name, model_version, deployment_name, capacity): + """Create a new AIServices account and deploy a model. Returns (endpoint, api_key, deployment_name).""" + from azure.cli.core.util import send_raw_request + + subscription_id = cmd.cli_ctx.data["subscription_id"] + + # Create AIServices account + logger.warning("Creating AIServices account '%s' in '%s'...", foundry_name, location) + account_url = ( + f"https://management.azure.com/subscriptions/{subscription_id}" + f"/resourceGroups/{resource_group_name}" + f"/providers/Microsoft.CognitiveServices/accounts/{foundry_name}" + f"?api-version={CONST_OPENCLAW_COGNITIVE_API_VERSION}" + ) + account_body = { + "kind": CONST_OPENCLAW_AI_SERVICES_KIND, + "sku": {"name": CONST_OPENCLAW_AI_SERVICES_SKU}, + "location": location, + "properties": {}, + } + send_raw_request(cmd.cli_ctx, "PUT", account_url, body=json.dumps(account_body)) + logger.warning("AIServices account '%s' created.", foundry_name) + + # Deploy model + if not deployment_name: + deployment_name = generate_deployment_name(model_name) + + logger.warning("Deploying model '%s' as '%s'...", model_name, deployment_name) + deploy_url = ( + f"https://management.azure.com/subscriptions/{subscription_id}" + f"/resourceGroups/{resource_group_name}" + f"/providers/Microsoft.CognitiveServices/accounts/{foundry_name}" + f"/deployments/{deployment_name}" + f"?api-version={CONST_OPENCLAW_COGNITIVE_API_VERSION}" + ) + deploy_body = { + "sku": { + "name": CONST_OPENCLAW_DEFAULT_SKU, + "capacity": capacity, + }, + "properties": { + "model": { + "format": "OpenAI", + "name": model_name, + "version": model_version, + } + }, + } + send_raw_request(cmd.cli_ctx, "PUT", deploy_url, body=json.dumps(deploy_body)) + logger.warning("Model deployment '%s' created.", deployment_name) + + # Get API key + keys_url = ( + f"https://management.azure.com/subscriptions/{subscription_id}" + f"/resourceGroups/{resource_group_name}" + f"/providers/Microsoft.CognitiveServices/accounts/{foundry_name}" + f"/listKeys?api-version={CONST_OPENCLAW_COGNITIVE_API_VERSION}" + ) + keys_response = send_raw_request(cmd.cli_ctx, "POST", keys_url) + keys = keys_response.json() + api_key = keys.get("key1", "") + + # Build regional endpoint + endpoint = f"https://{location}.api.cognitive.microsoft.com/openai/deployments/{deployment_name}" + + return endpoint, api_key, deployment_name + + +def _resolve_byo_resource_id(cmd, ai_foundry_resource_id, model_name, deployment_name): + """Resolve an existing AIServices account by resource ID. Returns (endpoint, api_key, deployment_name).""" + from azure.cli.core.util import send_raw_request + + # Get account details + account_url = ( + f"https://management.azure.com{ai_foundry_resource_id}" + f"?api-version={CONST_OPENCLAW_COGNITIVE_API_VERSION}" + ) + response = send_raw_request(cmd.cli_ctx, "GET", account_url) + account = response.json() + location = account.get("location", "") + + # Get API key + keys_url = ( + f"https://management.azure.com{ai_foundry_resource_id}" + f"/listKeys?api-version={CONST_OPENCLAW_COGNITIVE_API_VERSION}" + ) + keys_response = send_raw_request(cmd.cli_ctx, "POST", keys_url) + keys = keys_response.json() + api_key = keys.get("key1", "") + + # If no deployment name given, try to find one matching the model + if not deployment_name: + deployments_url = ( + f"https://management.azure.com{ai_foundry_resource_id}" + f"/deployments?api-version={CONST_OPENCLAW_COGNITIVE_API_VERSION}" + ) + dep_response = send_raw_request(cmd.cli_ctx, "GET", deployments_url) + deployments = dep_response.json().get("value", []) + + for dep in deployments: + dep_model = dep.get("properties", {}).get("model", {}).get("name", "") + if dep_model == model_name: + deployment_name = dep["name"] + break + + if not deployment_name: + available = [d["name"] for d in deployments] + raise CLIError( + f"No deployment found for model '{model_name}' in the account. " + f"Available deployments: {available}. " + f"Use --deployment-name to specify one explicitly." + ) + + endpoint = f"https://{location}.api.cognitive.microsoft.com/openai/deployments/{deployment_name}" + return endpoint, api_key, deployment_name + + +def _resolve_byo_endpoint(ai_foundry_endpoint, ai_foundry_api_key, deployment_name): + """Use user-provided endpoint and API key directly. Returns (endpoint, api_key, deployment_name).""" + if not deployment_name: + raise CLIError( + "--deployment-name is required when using --ai-foundry-endpoint." + ) + # Normalize endpoint: ensure it includes the deployment path if not already + endpoint = ai_foundry_endpoint.rstrip("/") + if "/openai/deployments/" not in endpoint: + endpoint = f"{endpoint}/openai/deployments/{deployment_name}" + return endpoint, ai_foundry_api_key, deployment_name + + +def resolve_or_provision_ai_foundry(cmd, resource_group_name, + ai_foundry_resource_id=None, + ai_foundry_endpoint=None, + ai_foundry_api_key=None, + ai_foundry_location=None, + model_name=None, + model_version=None, + deployment_name=None, + capacity=None): + """Dispatch to the right AI Foundry path. Returns (endpoint, api_key, deployment_name).""" + model_name = model_name or CONST_OPENCLAW_DEFAULT_MODEL + model_version = model_version or CONST_OPENCLAW_DEFAULT_MODEL_VERSION + capacity = capacity or CONST_OPENCLAW_DEFAULT_CAPACITY + + # Validate mutual exclusivity + byo_flags = sum([ + bool(ai_foundry_resource_id), + bool(ai_foundry_endpoint), + ]) + if byo_flags > 1: + raise CLIError( + "Only one of --ai-foundry-resource-id or --ai-foundry-endpoint can be specified." + ) + if ai_foundry_endpoint and not ai_foundry_api_key: + raise CLIError( + "--ai-foundry-api-key is required when using --ai-foundry-endpoint." + ) + + if ai_foundry_resource_id: + logger.warning("Using existing AI Foundry resource: %s", ai_foundry_resource_id) + return _resolve_byo_resource_id(cmd, ai_foundry_resource_id, model_name, deployment_name) + + if ai_foundry_endpoint: + logger.warning("Using provided AI Foundry endpoint: %s", ai_foundry_endpoint) + return _resolve_byo_endpoint(ai_foundry_endpoint, ai_foundry_api_key, deployment_name) + + # Default: provision new + location = ai_foundry_location or _get_resource_group_location(cmd, resource_group_name) + foundry_name = generate_foundry_name(resource_group_name) + return _provision_ai_foundry( + cmd, resource_group_name, location, foundry_name, + model_name, model_version, deployment_name, capacity, + ) + + +def deploy_openclaw(cmd, resource_group_name, cluster_name, + ai_foundry_resource_id=None, + ai_foundry_endpoint=None, + ai_foundry_api_key=None, + ai_foundry_location=None, + model=None, + model_version=None, + deployment_name=None, + capacity=None, + namespace=None): + """Full deploy: provision/resolve AI Foundry + install Helm chart.""" + namespace = namespace or CONST_OPENCLAW_DEFAULT_NAMESPACE + ensure_prerequisites() + + # Step 1: Resolve or provision AI Foundry + endpoint, api_key, resolved_deployment = resolve_or_provision_ai_foundry( + cmd, resource_group_name, + ai_foundry_resource_id=ai_foundry_resource_id, + ai_foundry_endpoint=ai_foundry_endpoint, + ai_foundry_api_key=ai_foundry_api_key, + ai_foundry_location=ai_foundry_location, + model_name=model, + model_version=model_version, + deployment_name=deployment_name, + capacity=capacity, + ) + logger.warning("AI Foundry endpoint: %s", endpoint) + + # Step 2: Get kubeconfig + logger.warning("Getting AKS credentials for cluster '%s'...", cluster_name) + kubeconfig_path = get_kubeconfig(cmd, resource_group_name, cluster_name) + + try: + # Step 3: Create StorageClass + logger.warning("Ensuring StorageClass '%s'...", CONST_OPENCLAW_DEFAULT_NAMESPACE) + apply_storage_class(kubeconfig_path) + + # Step 4: Generate values and install chart + model_name = model or CONST_OPENCLAW_DEFAULT_MODEL + values = generate_helm_values(endpoint, api_key, resolved_deployment, model_name) + logger.warning("Installing openclaw Helm chart in namespace '%s'...", namespace) + install_helm_chart(kubeconfig_path, values, namespace=namespace) + + # Step 5: Patch API format + logger.warning("Patching API format (openai-responses → openai-completions)...") + patch_openclaw_api_format(kubeconfig_path, namespace=namespace) + + # Step 6: Show status + logger.warning("\nOpenClaw deployed successfully!") + logger.warning("Namespace: %s", namespace) + logger.warning("AI Foundry endpoint: %s", endpoint) + logger.warning("Model: %s (deployment: %s)", model_name, resolved_deployment) + logger.warning( + "\nTo access the web UI, run:\n" + " kubectl port-forward -n %s svc/openclaw 18789:18789\n" + " Then open http://localhost:18789", + namespace, + ) + finally: + # Clean up temp kubeconfig + import shutil + kubeconfig_dir = os.path.dirname(kubeconfig_path) + shutil.rmtree(kubeconfig_dir, ignore_errors=True) + + return { + "namespace": namespace, + "endpoint": endpoint, + "model": model_name, + "deployment_name": resolved_deployment, + } + + +def delete_openclaw(cmd, resource_group_name, cluster_name, + namespace=None, + delete_ai_resources=False): + """Delete openclaw deployment and optionally AI Foundry resources.""" + namespace = namespace or CONST_OPENCLAW_DEFAULT_NAMESPACE + ensure_prerequisites() + + kubeconfig_path = get_kubeconfig(cmd, resource_group_name, cluster_name) + + try: + logger.warning("Uninstalling openclaw from namespace '%s'...", namespace) + uninstall_helm_chart(kubeconfig_path, namespace=namespace) + logger.warning("OpenClaw uninstalled successfully.") + + if delete_ai_resources: + foundry_name = generate_foundry_name(resource_group_name) + _delete_ai_foundry(cmd, resource_group_name, foundry_name) + finally: + import shutil + kubeconfig_dir = os.path.dirname(kubeconfig_path) + shutil.rmtree(kubeconfig_dir, ignore_errors=True) + + +def _delete_ai_foundry(cmd, resource_group_name, foundry_name): + """Delete the AIServices account.""" + from azure.cli.core.util import send_raw_request + + subscription_id = cmd.cli_ctx.data["subscription_id"] + url = ( + f"https://management.azure.com/subscriptions/{subscription_id}" + f"/resourceGroups/{resource_group_name}" + f"/providers/Microsoft.CognitiveServices/accounts/{foundry_name}" + f"?api-version={CONST_OPENCLAW_COGNITIVE_API_VERSION}" + ) + try: + send_raw_request(cmd.cli_ctx, "DELETE", url) + logger.warning("AIServices account '%s' deleted.", foundry_name) + except Exception as e: # pylint: disable=broad-except + logger.warning("Could not delete AIServices account '%s': %s", foundry_name, e) + + +def show_openclaw(cmd, resource_group_name, cluster_name, namespace=None): + """Show openclaw deployment status.""" + namespace = namespace or CONST_OPENCLAW_DEFAULT_NAMESPACE + ensure_prerequisites() + + kubeconfig_path = get_kubeconfig(cmd, resource_group_name, cluster_name) + + try: + pods = get_deployment_status(kubeconfig_path, namespace=namespace) + + # Try to get the LiteLLM config to show model info + model_info = None + try: + from azext_aks_preview.openclaw._helpers import run_kubectl + cm_json = run_kubectl( + ["get", "configmap", "openclaw-litellm-config", "-n", namespace, "-o", "json"], + kubeconfig_path=kubeconfig_path, + check=False, + ) + import json as json_mod + cm = json_mod.loads(cm_json) + config_yaml = cm.get("data", {}).get("config.yaml", "") + if config_yaml: + import yaml + config = yaml.safe_load(config_yaml) + model_list = config.get("model_list", []) + if model_list: + model_info = { + "model_name": model_list[0].get("model_name", ""), + "api_base": model_list[0].get("litellm_params", {}).get("api_base", ""), + } + except Exception: # pylint: disable=broad-except + pass + + return { + "namespace": namespace, + "pods": pods, + "model_info": model_info, + } + finally: + import shutil + kubeconfig_dir = os.path.dirname(kubeconfig_path) + shutil.rmtree(kubeconfig_dir, ignore_errors=True) diff --git a/src/aks-preview/azext_aks_preview/tests/latest/test_openclaw.py b/src/aks-preview/azext_aks_preview/tests/latest/test_openclaw.py new file mode 100644 index 00000000000..6e10d07d136 --- /dev/null +++ b/src/aks-preview/azext_aks_preview/tests/latest/test_openclaw.py @@ -0,0 +1,144 @@ +# -------------------------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for license information. +# -------------------------------------------------------------------------------------------- + +import json +import unittest +from unittest.mock import MagicMock, patch + +from knack.util import CLIError + +from azext_aks_preview.openclaw._consts import ( + CONST_OPENCLAW_DEFAULT_MODEL, + CONST_OPENCLAW_DEFAULT_NAMESPACE, + CONST_OPENCLAW_STORAGE_CLASS_NAME, +) +from azext_aks_preview.openclaw._helpers import ( + generate_deployment_name, + generate_foundry_name, + generate_helm_values, +) +from azext_aks_preview.openclaw.deploy import ( + _resolve_byo_endpoint, + resolve_or_provision_ai_foundry, +) + + +class TestGenerateFoundryName(unittest.TestCase): + def test_basic_name(self): + name = generate_foundry_name("mycluster") + self.assertTrue(name.startswith("openclaw-mycluster-")) + self.assertLessEqual(len(name), 64) + + def test_long_cluster_name(self): + name = generate_foundry_name("a" * 100) + self.assertLessEqual(len(name), 64) + self.assertFalse(name.endswith("-")) + + def test_deterministic(self): + name1 = generate_foundry_name("test") + name2 = generate_foundry_name("test") + self.assertEqual(name1, name2) + + def test_different_clusters_different_names(self): + name1 = generate_foundry_name("cluster1") + name2 = generate_foundry_name("cluster2") + self.assertNotEqual(name1, name2) + + +class TestGenerateDeploymentName(unittest.TestCase): + def test_removes_dots_and_hyphens(self): + self.assertEqual(generate_deployment_name("gpt-5.1-chat"), "gpt51chat") + + def test_plain_name(self): + self.assertEqual(generate_deployment_name("gpt4o"), "gpt4o") + + +class TestGenerateHelmValues(unittest.TestCase): + def test_basic_values(self): + values = generate_helm_values( + endpoint="https://eastus.api.cognitive.microsoft.com/openai/deployments/gpt51chat", + api_key="test-key", + deployment_name="gpt51chat", + model_name="gpt-5.1-chat", + gateway_token="fixed-token", + ) + + self.assertEqual(values["gateway"]["token"], "fixed-token") + self.assertEqual(values["litellm"]["model"], "gpt-5.1-chat") + self.assertEqual( + values["persistence"]["storageClass"], + CONST_OPENCLAW_STORAGE_CLASS_NAME, + ) + + model_list = values["litellm"]["configOverride"]["model_list"] + self.assertEqual(len(model_list), 1) + self.assertEqual(model_list[0]["model_name"], "gpt-5.1-chat") + self.assertEqual(model_list[0]["litellm_params"]["model"], "azure/gpt51chat") + + env_vars = values["litellm"]["extraEnv"] + api_key_env = next(e for e in env_vars if e["name"] == "AZURE_API_KEY") + self.assertEqual(api_key_env["value"], "test-key") + + def test_generates_token_if_not_provided(self): + values = generate_helm_values( + endpoint="https://test.com", + api_key="key", + deployment_name="dep", + model_name="model", + ) + self.assertTrue(len(values["gateway"]["token"]) > 0) + + +class TestResolveBYOEndpoint(unittest.TestCase): + def test_requires_deployment_name(self): + with self.assertRaises(CLIError): + _resolve_byo_endpoint("https://test.com", "key", None) + + def test_appends_deployment_path(self): + endpoint, key, dep = _resolve_byo_endpoint( + "https://eastus.api.cognitive.microsoft.com", + "mykey", + "gpt51chat", + ) + self.assertIn("/openai/deployments/gpt51chat", endpoint) + self.assertEqual(key, "mykey") + self.assertEqual(dep, "gpt51chat") + + def test_preserves_existing_deployment_path(self): + endpoint, _, _ = _resolve_byo_endpoint( + "https://eastus.api.cognitive.microsoft.com/openai/deployments/existing", + "mykey", + "gpt51chat", + ) + self.assertIn("/openai/deployments/existing", endpoint) + self.assertNotIn("gpt51chat", endpoint) + + +class TestResolveOrProvisionMutualExclusivity(unittest.TestCase): + def test_rejects_both_byo_flags(self): + cmd = MagicMock() + with self.assertRaises(CLIError) as ctx: + resolve_or_provision_ai_foundry( + cmd, + resource_group_name="rg", + ai_foundry_resource_id="/subscriptions/sub/resourceGroups/rg/providers/Microsoft.CognitiveServices/accounts/acc", + ai_foundry_endpoint="https://test.com", + ai_foundry_api_key="key", + ) + self.assertIn("Only one of", str(ctx.exception)) + + def test_endpoint_requires_api_key(self): + cmd = MagicMock() + with self.assertRaises(CLIError) as ctx: + resolve_or_provision_ai_foundry( + cmd, + resource_group_name="rg", + ai_foundry_endpoint="https://test.com", + ) + self.assertIn("--ai-foundry-api-key is required", str(ctx.exception)) + + +if __name__ == "__main__": + unittest.main() From 55d5056d67d50f9feefb95148f70136b922b52b8 Mon Sep 17 00:00:00 2001 From: Chudi Huang Date: Thu, 26 Feb 2026 08:34:22 +0000 Subject: [PATCH 2/5] new command to connect and add init prompt --- src/aks-preview/azext_aks_preview/_help.py | 21 ++ src/aks-preview/azext_aks_preview/_params.py | 2 +- src/aks-preview/azext_aks_preview/commands.py | 1 + src/aks-preview/azext_aks_preview/custom.py | 12 +- .../azext_aks_preview/openclaw/_helpers.py | 128 +++++++++--- .../azext_aks_preview/openclaw/deploy.py | 190 ++++++++++-------- .../tests/latest/test_openclaw.py | 18 +- 7 files changed, 244 insertions(+), 128 deletions(-) diff --git a/src/aks-preview/azext_aks_preview/_help.py b/src/aks-preview/azext_aks_preview/_help.py index 81f4afb15f1..903159695b3 100644 --- a/src/aks-preview/azext_aks_preview/_help.py +++ b/src/aks-preview/azext_aks_preview/_help.py @@ -4544,3 +4544,24 @@ - name: Show OpenClaw status text: az aks openclaw show -g MyResourceGroup --cluster-name MyCluster """ + +helps['aks openclaw connect'] = """ + type: command + short-summary: Get OpenClaw gateway token and help with web UI connection. + long-summary: | + Retrieves the OpenClaw gateway authentication token from the K8s secret + and displays instructions for connecting to the web UI via port-forward. + Provides a direct link with the token embedded as a query parameter. + parameters: + - name: --cluster-name + type: string + short-summary: Name of the AKS cluster. + - name: --namespace + type: string + short-summary: Kubernetes namespace where OpenClaw is deployed. Default is openclaw. + examples: + - name: Get connection info and token + text: az aks openclaw connect -g MyResourceGroup --cluster-name MyCluster + - name: Get connection info for custom namespace + text: az aks openclaw connect -g MyResourceGroup --cluster-name MyCluster --namespace custom-ns +""" diff --git a/src/aks-preview/azext_aks_preview/_params.py b/src/aks-preview/azext_aks_preview/_params.py index 0ae178b0334..26998bb6b5d 100644 --- a/src/aks-preview/azext_aks_preview/_params.py +++ b/src/aks-preview/azext_aks_preview/_params.py @@ -3186,7 +3186,7 @@ def load_arguments(self, _): help="Model version to deploy. Default is 2025-11-13. Only used when provisioning new resources.", ) c.argument( - "deployment_name", + "model_deployment_name", options_list=["--deployment-name"], help="Azure model deployment name. Auto-generated from model name if not specified.", ) diff --git a/src/aks-preview/azext_aks_preview/commands.py b/src/aks-preview/azext_aks_preview/commands.py index d7a64b5dda1..1db7cba0368 100644 --- a/src/aks-preview/azext_aks_preview/commands.py +++ b/src/aks-preview/azext_aks_preview/commands.py @@ -613,3 +613,4 @@ def load_command_table(self, _): g.custom_command("deploy", "aks_openclaw_deploy") g.custom_command("delete", "aks_openclaw_delete", confirmation=True) g.custom_show_command("show", "aks_openclaw_show") + g.custom_command("connect", "aks_openclaw_connect") diff --git a/src/aks-preview/azext_aks_preview/custom.py b/src/aks-preview/azext_aks_preview/custom.py index 6d6882d74a7..404bf395fa2 100644 --- a/src/aks-preview/azext_aks_preview/custom.py +++ b/src/aks-preview/azext_aks_preview/custom.py @@ -5385,7 +5385,7 @@ def aks_openclaw_deploy(cmd, client, resource_group_name, cluster_name, ai_foundry_location=None, model=None, model_version=None, - deployment_name=None, + model_deployment_name=None, capacity=None, namespace=None): from azext_aks_preview.openclaw.deploy import deploy_openclaw @@ -5397,7 +5397,7 @@ def aks_openclaw_deploy(cmd, client, resource_group_name, cluster_name, ai_foundry_location=ai_foundry_location, model=model, model_version=model_version, - deployment_name=deployment_name, + deployment_name=model_deployment_name, capacity=capacity, namespace=namespace, ) @@ -5421,3 +5421,11 @@ def aks_openclaw_show(cmd, client, resource_group_name, cluster_name, cmd, resource_group_name, cluster_name, namespace=namespace, ) + + +def aks_openclaw_connect(cmd, client, resource_group_name, cluster_name=None, namespace=None): + from azext_aks_preview.openclaw.deploy import connect_openclaw + return connect_openclaw( + cmd, resource_group_name, cluster_name=cluster_name, + namespace=namespace, + ) diff --git a/src/aks-preview/azext_aks_preview/openclaw/_helpers.py b/src/aks-preview/azext_aks_preview/openclaw/_helpers.py index ed47e3ce9ec..9642fb95eaf 100644 --- a/src/aks-preview/azext_aks_preview/openclaw/_helpers.py +++ b/src/aks-preview/azext_aks_preview/openclaw/_helpers.py @@ -105,10 +105,13 @@ def get_kubeconfig(cmd, resource_group_name, cluster_name): temp_dir = tempfile.mkdtemp() kubeconfig_path = os.path.join(temp_dir, "kubeconfig") - from azure.cli.command_modules.acs.custom import aks_get_credentials + from azext_aks_preview._client_factory import cf_managed_clusters + from azext_aks_preview.custom import aks_get_credentials + client = cf_managed_clusters(cmd.cli_ctx) aks_get_credentials( cmd, + client, resource_group_name=resource_group_name, name=cluster_name, path=kubeconfig_path, @@ -160,34 +163,40 @@ def apply_storage_class(kubeconfig_path): def generate_helm_values(endpoint, api_key, deployment_name, model_name, gateway_token=None): - """Generate the openclaw Helm values dict for webui mode with LiteLLM → AI Foundry.""" + """Generate the openclaw Helm values dict for webui mode with LiteLLM → AI Foundry. + + Returns (values_dict, litellm_master_key). + """ if gateway_token is None: gateway_token = secrets.token_hex(32) litellm_master_key = secrets.token_hex(16) + # configOverride must be a YAML string (the Helm template pipes it through nindent) + config_override = { + "model_list": [ + { + "model_name": model_name, + "litellm_params": { + "model": f"azure/{deployment_name}", + "api_base": endpoint, + "api_key": "os.environ/AZURE_API_KEY", + "api_version": CONST_OPENCLAW_LITELLM_API_VERSION, + }, + } + ], + "general_settings": { + "master_key": "os.environ/LITELLM_MASTER_KEY", + }, + } + values = { - "gateway": { - "token": gateway_token, + "secrets": { + "openclawGatewayToken": gateway_token, }, "litellm": { "model": model_name, - "configOverride": { - "model_list": [ - { - "model_name": model_name, - "litellm_params": { - "model": f"azure/{deployment_name}", - "api_base": endpoint, - "api_key": "os.environ/AZURE_API_KEY", - "api_version": CONST_OPENCLAW_LITELLM_API_VERSION, - }, - } - ], - "general_settings": { - "master_key": "os.environ/LITELLM_MASTER_KEY", - }, - }, + "configOverride": yaml.dump(config_override, default_flow_style=False), "extraEnv": [ {"name": "AZURE_API_KEY", "value": api_key}, {"name": "LITELLM_MASTER_KEY", "value": litellm_master_key}, @@ -201,7 +210,7 @@ def generate_helm_values(endpoint, api_key, deployment_name, model_name, gateway }, } - return values + return values, litellm_master_key def install_helm_chart(kubeconfig_path, values, namespace=CONST_OPENCLAW_DEFAULT_NAMESPACE): @@ -230,8 +239,10 @@ def install_helm_chart(kubeconfig_path, values, namespace=CONST_OPENCLAW_DEFAULT os.remove(values_path) -def patch_openclaw_api_format(kubeconfig_path, namespace=CONST_OPENCLAW_DEFAULT_NAMESPACE): - """Patch the openclaw configmap to use openai-completions instead of openai-responses.""" +def patch_openclaw_api_format(kubeconfig_path, namespace=CONST_OPENCLAW_DEFAULT_NAMESPACE, + litellm_master_key=None): + """Patch openclaw to use openai-completions API, fix LiteLLM auth, and set required gateway config.""" + # --- Step 1: Patch the configmap --- try: cm_json = run_kubectl( ["get", "configmap", "openclaw-config", "-n", namespace, "-o", "json"], @@ -242,22 +253,24 @@ def patch_openclaw_api_format(kubeconfig_path, namespace=CONST_OPENCLAW_DEFAULT_ logger.warning("Could not read openclaw-config configmap, skipping patch: %s", e) return - patched = False + cm_patched = False if "openclaw.json" in cm.get("data", {}): original = cm["data"]["openclaw.json"] updated = original.replace('"api": "openai-responses"', '"api": "openai-completions"') + if litellm_master_key: + updated = updated.replace('"apiKey": "not-needed"', f'"apiKey": "{litellm_master_key}"') if original != updated: cm["data"]["openclaw.json"] = updated - patched = True + cm_patched = True if "codex-config.toml" in cm.get("data", {}): original = cm["data"]["codex-config.toml"] updated = original.replace('wire_api = "responses"', 'wire_api = "chat"') if original != updated: cm["data"]["codex-config.toml"] = updated - patched = True + cm_patched = True - if patched: + if cm_patched: with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as f: json.dump(cm, f) cm_path = f.name @@ -268,15 +281,66 @@ def patch_openclaw_api_format(kubeconfig_path, namespace=CONST_OPENCLAW_DEFAULT_ finally: os.remove(cm_path) - # Restart to pick up the config change + # --- Step 2: Patch runtime config files inside the pod --- + pod_name = f"openclaw-0" + _wait_for_pod_ready(kubeconfig_path, namespace, pod_name) + + # Patch API format and apiKey in persistent config files + for cfg_file in [ + "/home/vibe/.openclaw/openclaw.json", + "/home/vibe/.openclaw/agents/main/agent/models.json", + ]: + run_kubectl( + ["exec", pod_name, "-n", namespace, "-c", "openclaw", "--", + "sed", "-i", 's/"openai-responses"/"openai-completions"/g', cfg_file], + kubeconfig_path=kubeconfig_path, + check=False, + ) + if litellm_master_key: + run_kubectl( + ["exec", pod_name, "-n", namespace, "-c", "openclaw", "--", + "sed", "-i", f's/"apiKey": "not-needed"/"apiKey": "{litellm_master_key}"/g', cfg_file], + kubeconfig_path=kubeconfig_path, + check=False, + ) + + # Set gateway.mode=local and disable memory search + for config_cmd in [ + ["openclaw", "config", "set", "gateway.mode", "local"], + ["openclaw", "config", "set", "agents.defaults.memorySearch.enabled", "false"], + ]: run_kubectl( - ["rollout", "restart", "statefulset", "openclaw", "-n", namespace], + ["exec", pod_name, "-n", namespace, "-c", "openclaw", "--"] + config_cmd, + kubeconfig_path=kubeconfig_path, + check=False, + ) + + logger.info("Patched runtime config inside pod.") + + # --- Step 3: Restart to apply all changes --- + run_kubectl( + ["rollout", "restart", "statefulset", "openclaw", "-n", namespace], + kubeconfig_path=kubeconfig_path, + check=False, + ) + logger.info("Restarted openclaw statefulset.") + + +def _wait_for_pod_ready(kubeconfig_path, namespace, pod_name, timeout_seconds=120): + """Wait for a specific pod to be ready.""" + import time + deadline = time.time() + timeout_seconds + while time.time() < deadline: + output = run_kubectl( + ["get", "pod", pod_name, "-n", namespace, + "-o", "jsonpath={.status.conditions[?(@.type=='Ready')].status}"], kubeconfig_path=kubeconfig_path, check=False, ) - logger.info("Restarted openclaw statefulset.") - else: - logger.info("No API format patch needed.") + if output.strip() == "True": + return + time.sleep(5) + logger.warning("Pod %s did not become ready within %ds, proceeding anyway.", pod_name, timeout_seconds) def uninstall_helm_chart(kubeconfig_path, namespace=CONST_OPENCLAW_DEFAULT_NAMESPACE): diff --git a/src/aks-preview/azext_aks_preview/openclaw/deploy.py b/src/aks-preview/azext_aks_preview/openclaw/deploy.py index 88b99f870ee..3bc4bdb5fdf 100644 --- a/src/aks-preview/azext_aks_preview/openclaw/deploy.py +++ b/src/aks-preview/azext_aks_preview/openclaw/deploy.py @@ -4,6 +4,11 @@ # -------------------------------------------------------------------------------------------- import json +import base64 +import subprocess +import os +import shutil +import time from knack.log import get_logger from knack.util import CLIError @@ -47,8 +52,9 @@ def _provision_ai_foundry(cmd, resource_group_name, location, foundry_name, model_name, model_version, deployment_name, capacity): """Create a new AIServices account and deploy a model. Returns (endpoint, api_key, deployment_name).""" from azure.cli.core.util import send_raw_request + from azure.cli.core.commands.client_factory import get_subscription_id - subscription_id = cmd.cli_ctx.data["subscription_id"] + subscription_id = get_subscription_id(cmd.cli_ctx) # Create AIServices account logger.warning("Creating AIServices account '%s' in '%s'...", foundry_name, location) @@ -62,9 +68,23 @@ def _provision_ai_foundry(cmd, resource_group_name, location, foundry_name, "kind": CONST_OPENCLAW_AI_SERVICES_KIND, "sku": {"name": CONST_OPENCLAW_AI_SERVICES_SKU}, "location": location, - "properties": {}, + "properties": { + "publicNetworkAccess": "Enabled", + }, } send_raw_request(cmd.cli_ctx, "PUT", account_url, body=json.dumps(account_body)) + + # Poll until the account is fully provisioned + for _ in range(60): + resp = send_raw_request(cmd.cli_ctx, "GET", account_url) + state = resp.json().get("properties", {}).get("provisioningState", "") + if state == "Succeeded": + break + if state in ("Failed", "Canceled"): + raise CLIError(f"AIServices account provisioning {state}.") + time.sleep(5) + else: + raise CLIError("Timed out waiting for AIServices account to be provisioned.") logger.warning("AIServices account '%s' created.", foundry_name) # Deploy model @@ -253,18 +273,20 @@ def deploy_openclaw(cmd, resource_group_name, cluster_name, try: # Step 3: Create StorageClass - logger.warning("Ensuring StorageClass '%s'...", CONST_OPENCLAW_DEFAULT_NAMESPACE) + from azext_aks_preview.openclaw._consts import CONST_OPENCLAW_STORAGE_CLASS_NAME + logger.warning("Ensuring StorageClass '%s'...", CONST_OPENCLAW_STORAGE_CLASS_NAME) apply_storage_class(kubeconfig_path) # Step 4: Generate values and install chart model_name = model or CONST_OPENCLAW_DEFAULT_MODEL - values = generate_helm_values(endpoint, api_key, resolved_deployment, model_name) + values, litellm_master_key = generate_helm_values(endpoint, api_key, resolved_deployment, model_name) logger.warning("Installing openclaw Helm chart in namespace '%s'...", namespace) install_helm_chart(kubeconfig_path, values, namespace=namespace) - # Step 5: Patch API format - logger.warning("Patching API format (openai-responses → openai-completions)...") - patch_openclaw_api_format(kubeconfig_path, namespace=namespace) + # Step 5: Patch API format and auth + logger.warning("Patching API format and LiteLLM auth...") + patch_openclaw_api_format(kubeconfig_path, namespace=namespace, + litellm_master_key=litellm_master_key) # Step 6: Show status logger.warning("\nOpenClaw deployed successfully!") @@ -277,9 +299,15 @@ def deploy_openclaw(cmd, resource_group_name, cluster_name, " Then open http://localhost:18789", namespace, ) + + logger.warning("\n💡 Startup Tips:") + logger.warning(" • You're running inside a pod with a service account") + logger.warning(" • Try 'kubectl get pods' inside the pod to verify access") + logger.warning(" • Run 'openclaw configure' to set up integrations (Telegram, Discord, etc.)") + logger.warning(" • Use 'openclaw --help' to explore available commands") + logger.warning(" • Run 'az aks openclaw connect' to get the gateway token and web UI link") finally: # Clean up temp kubeconfig - import shutil kubeconfig_dir = os.path.dirname(kubeconfig_path) shutil.rmtree(kubeconfig_dir, ignore_errors=True) @@ -291,87 +319,79 @@ def deploy_openclaw(cmd, resource_group_name, cluster_name, } -def delete_openclaw(cmd, resource_group_name, cluster_name, - namespace=None, - delete_ai_resources=False): - """Delete openclaw deployment and optionally AI Foundry resources.""" +def connect_openclaw(cmd, resource_group_name, cluster_name=None, namespace=None): + """Show OpenClaw gateway token and help user connect to the web UI.""" namespace = namespace or CONST_OPENCLAW_DEFAULT_NAMESPACE - ensure_prerequisites() - - kubeconfig_path = get_kubeconfig(cmd, resource_group_name, cluster_name) - + + # If cluster_name is provided, get kubeconfig + if cluster_name: + ensure_prerequisites() + logger.warning("Getting AKS credentials for cluster '%s'...", cluster_name) + kubeconfig_path = get_kubeconfig(cmd, resource_group_name, cluster_name) + else: + # Use current kubectl context if no cluster specified + kubeconfig_path = None + try: - logger.warning("Uninstalling openclaw from namespace '%s'...", namespace) - uninstall_helm_chart(kubeconfig_path, namespace=namespace) - logger.warning("OpenClaw uninstalled successfully.") - - if delete_ai_resources: - foundry_name = generate_foundry_name(resource_group_name) - _delete_ai_foundry(cmd, resource_group_name, foundry_name) - finally: - import shutil - kubeconfig_dir = os.path.dirname(kubeconfig_path) - shutil.rmtree(kubeconfig_dir, ignore_errors=True) - - -def _delete_ai_foundry(cmd, resource_group_name, foundry_name): - """Delete the AIServices account.""" - from azure.cli.core.util import send_raw_request - - subscription_id = cmd.cli_ctx.data["subscription_id"] - url = ( - f"https://management.azure.com/subscriptions/{subscription_id}" - f"/resourceGroups/{resource_group_name}" - f"/providers/Microsoft.CognitiveServices/accounts/{foundry_name}" - f"?api-version={CONST_OPENCLAW_COGNITIVE_API_VERSION}" - ) - try: - send_raw_request(cmd.cli_ctx, "DELETE", url) - logger.warning("AIServices account '%s' deleted.", foundry_name) - except Exception as e: # pylint: disable=broad-except - logger.warning("Could not delete AIServices account '%s': %s", foundry_name, e) - - -def show_openclaw(cmd, resource_group_name, cluster_name, namespace=None): - """Show openclaw deployment status.""" - namespace = namespace or CONST_OPENCLAW_DEFAULT_NAMESPACE - ensure_prerequisites() - - kubeconfig_path = get_kubeconfig(cmd, resource_group_name, cluster_name) - - try: - pods = get_deployment_status(kubeconfig_path, namespace=namespace) - - # Try to get the LiteLLM config to show model info - model_info = None - try: - from azext_aks_preview.openclaw._helpers import run_kubectl - cm_json = run_kubectl( - ["get", "configmap", "openclaw-litellm-config", "-n", namespace, "-o", "json"], - kubeconfig_path=kubeconfig_path, - check=False, + # Retrieve the gateway token from the openclaw secret + token_cmd = [ + "kubectl", "get", "secret", "openclaw", "-n", namespace, + "-o", "jsonpath={.data.OPENCLAW_GATEWAY_TOKEN}", "--ignore-not-found" + ] + if kubeconfig_path: + token_cmd.extend(["--kubeconfig", kubeconfig_path]) + + result = subprocess.run(token_cmd, capture_output=True, text=True, check=False) + token_b64 = result.stdout.strip() + + if not token_b64: + raise CLIError( + f"Could not find openclaw secret 'openclaw' in namespace '{namespace}'. " + "Has OpenClaw been deployed? Run 'az aks openclaw deploy' first." ) - import json as json_mod - cm = json_mod.loads(cm_json) - config_yaml = cm.get("data", {}).get("config.yaml", "") - if config_yaml: - import yaml - config = yaml.safe_load(config_yaml) - model_list = config.get("model_list", []) - if model_list: - model_info = { - "model_name": model_list[0].get("model_name", ""), - "api_base": model_list[0].get("litellm_params", {}).get("api_base", ""), - } - except Exception: # pylint: disable=broad-except - pass - + + # Decode the base64 token + try: + token = base64.b64decode(token_b64).decode('utf-8') + except Exception as e: + raise CLIError(f"Failed to decode gateway token: {e}") + + # Display connection info + logger.warning("\n" + "="*70) + logger.warning("🦞 OpenClaw Gateway Token") + logger.warning("="*70) + logger.warning("\nGateway Token (use for web UI authentication):") + logger.warning(" %s", token) + + logger.warning("\n🌐 Web UI Access:") + dashboard_url = f"http://localhost:18789?token={token}" + logger.warning(" " + dashboard_url) + + logger.warning("\n🔧 Port Forwarding Setup:") + logger.warning(" If you haven't set up port-forwarding yet, run:") + if cluster_name: + logger.warning(" kubectl port-forward -n %s svc/openclaw 18789:18789", namespace) + else: + logger.warning(" kubectl port-forward -n %s statefulset/openclaw 18789:18789", namespace) + + logger.warning("\n📝 Quick Start:") + logger.warning(" 1. Set up port-forwarding (see 🔧 section above)") + logger.warning(" 2. Click the Web UI link above - token is already included") + logger.warning(" 3. If you see a token mismatch error, manually paste the token above") + logger.warning(" 4. (Optional) Run 'openclaw configure' inside the pod to set up integrations") + logger.warning("\n💡 Note: You're running inside a pod with service account. You should already") + logger.warning(" have kubectl access. Try 'kubectl get pods' to verify.") + + logger.warning("\n" + "="*70 + "\n") + return { + "token": token, + "dashboard_url": dashboard_url, "namespace": namespace, - "pods": pods, - "model_info": model_info, } + finally: - import shutil - kubeconfig_dir = os.path.dirname(kubeconfig_path) - shutil.rmtree(kubeconfig_dir, ignore_errors=True) + # Clean up temp kubeconfig + if cluster_name and kubeconfig_path: + kubeconfig_dir = os.path.dirname(kubeconfig_path) + shutil.rmtree(kubeconfig_dir, ignore_errors=True) diff --git a/src/aks-preview/azext_aks_preview/tests/latest/test_openclaw.py b/src/aks-preview/azext_aks_preview/tests/latest/test_openclaw.py index 6e10d07d136..0dd1838045f 100644 --- a/src/aks-preview/azext_aks_preview/tests/latest/test_openclaw.py +++ b/src/aks-preview/azext_aks_preview/tests/latest/test_openclaw.py @@ -57,7 +57,7 @@ def test_plain_name(self): class TestGenerateHelmValues(unittest.TestCase): def test_basic_values(self): - values = generate_helm_values( + values, master_key = generate_helm_values( endpoint="https://eastus.api.cognitive.microsoft.com/openai/deployments/gpt51chat", api_key="test-key", deployment_name="gpt51chat", @@ -65,30 +65,32 @@ def test_basic_values(self): gateway_token="fixed-token", ) - self.assertEqual(values["gateway"]["token"], "fixed-token") + self.assertTrue(len(master_key) > 0) + self.assertEqual(values["secrets"]["openclawGatewayToken"], "fixed-token") self.assertEqual(values["litellm"]["model"], "gpt-5.1-chat") self.assertEqual( values["persistence"]["storageClass"], CONST_OPENCLAW_STORAGE_CLASS_NAME, ) - model_list = values["litellm"]["configOverride"]["model_list"] - self.assertEqual(len(model_list), 1) - self.assertEqual(model_list[0]["model_name"], "gpt-5.1-chat") - self.assertEqual(model_list[0]["litellm_params"]["model"], "azure/gpt51chat") + # configOverride is a YAML string + config_override = values["litellm"]["configOverride"] + self.assertIsInstance(config_override, str) + self.assertIn("azure/gpt51chat", config_override) + self.assertIn("gpt-5.1-chat", config_override) env_vars = values["litellm"]["extraEnv"] api_key_env = next(e for e in env_vars if e["name"] == "AZURE_API_KEY") self.assertEqual(api_key_env["value"], "test-key") def test_generates_token_if_not_provided(self): - values = generate_helm_values( + values, _ = generate_helm_values( endpoint="https://test.com", api_key="key", deployment_name="dep", model_name="model", ) - self.assertTrue(len(values["gateway"]["token"]) > 0) + self.assertTrue(len(values["secrets"]["openclawGatewayToken"]) > 0) class TestResolveBYOEndpoint(unittest.TestCase): From 7d0bd1d7baccbaab33929f6b80334b12430062f7 Mon Sep 17 00:00:00 2001 From: Chudi Huang Date: Thu, 26 Feb 2026 09:26:50 +0000 Subject: [PATCH 3/5] port-forward to pod and update systen instruction --- .../azext_aks_preview/openclaw/_helpers.py | 10 ++ .../azext_aks_preview/openclaw/deploy.py | 166 +++++++++++++++--- 2 files changed, 151 insertions(+), 25 deletions(-) diff --git a/src/aks-preview/azext_aks_preview/openclaw/_helpers.py b/src/aks-preview/azext_aks_preview/openclaw/_helpers.py index 9642fb95eaf..c6afbfda0d0 100644 --- a/src/aks-preview/azext_aks_preview/openclaw/_helpers.py +++ b/src/aks-preview/azext_aks_preview/openclaw/_helpers.py @@ -305,9 +305,19 @@ def patch_openclaw_api_format(kubeconfig_path, namespace=CONST_OPENCLAW_DEFAULT_ ) # Set gateway.mode=local and disable memory search + # Add system instructions to inform the agent about its Kubernetes context + k8s_context_instructions = ( + "You are running inside a Kubernetes pod on an Azure Kubernetes Service (AKS) cluster. " + "You have access to a service account with 'view' role permissions. " + "When you run kubectl commands locally, you are directly accessing the cluster resources. " + "You can list pods, services, deployments, and other resources in your namespace and cluster-wide (for allowed resources). " + "Do not assume you need to configure external cluster access - kubectl commands work directly within the cluster." + ) + for config_cmd in [ ["openclaw", "config", "set", "gateway.mode", "local"], ["openclaw", "config", "set", "agents.defaults.memorySearch.enabled", "false"], + ["openclaw", "config", "set", "agents.defaults.systemInstructions", k8s_context_instructions], ]: run_kubectl( ["exec", pod_name, "-n", namespace, "-c", "openclaw", "--"] + config_cmd, diff --git a/src/aks-preview/azext_aks_preview/openclaw/deploy.py b/src/aks-preview/azext_aks_preview/openclaw/deploy.py index 3bc4bdb5fdf..e0b7b07b633 100644 --- a/src/aks-preview/azext_aks_preview/openclaw/deploy.py +++ b/src/aks-preview/azext_aks_preview/openclaw/deploy.py @@ -301,8 +301,10 @@ def deploy_openclaw(cmd, resource_group_name, cluster_name, ) logger.warning("\n💡 Startup Tips:") - logger.warning(" • You're running inside a pod with a service account") - logger.warning(" • Try 'kubectl get pods' inside the pod to verify access") + logger.warning(" • The agent is running INSIDE this AKS cluster as a pod") + logger.warning(" • It has a service account with 'view' role for kubectl access") + logger.warning(" • kubectl commands executed by the agent work directly within the cluster") + logger.warning(" • Try asking: 'list all pods in the cluster' or 'show me the deployments'") logger.warning(" • Run 'openclaw configure' to set up integrations (Telegram, Discord, etc.)") logger.warning(" • Use 'openclaw --help' to explore available commands") logger.warning(" • Run 'az aks openclaw connect' to get the gateway token and web UI link") @@ -320,7 +322,7 @@ def deploy_openclaw(cmd, resource_group_name, cluster_name, def connect_openclaw(cmd, resource_group_name, cluster_name=None, namespace=None): - """Show OpenClaw gateway token and help user connect to the web UI.""" + """Show OpenClaw gateway token and set up port-forward to the pod.""" namespace = namespace or CONST_OPENCLAW_DEFAULT_NAMESPACE # If cluster_name is provided, get kubeconfig @@ -356,38 +358,68 @@ def connect_openclaw(cmd, resource_group_name, cluster_name=None, namespace=None except Exception as e: raise CLIError(f"Failed to decode gateway token: {e}") - # Display connection info - logger.warning("\n" + "="*70) - logger.warning("🦞 OpenClaw Gateway Token") - logger.warning("="*70) - logger.warning("\nGateway Token (use for web UI authentication):") - logger.warning(" %s", token) + # Start kubectl port-forward to the pod in the background + logger.warning("Setting up port-forward to openclaw pod...") + forward_port = 18789 + forward_cmd = ["kubectl", "port-forward", "-n", namespace, "pod/openclaw-0", f"{forward_port}:{forward_port}"] + if kubeconfig_path: + forward_cmd.extend(["--kubeconfig", kubeconfig_path]) - logger.warning("\n🌐 Web UI Access:") - dashboard_url = f"http://localhost:18789?token={token}" - logger.warning(" " + dashboard_url) + # Check if port-forward is already running + import socket + sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + port_in_use = sock.connect_ex(('localhost', forward_port)) == 0 + sock.close() - logger.warning("\n🔧 Port Forwarding Setup:") - logger.warning(" If you haven't set up port-forwarding yet, run:") - if cluster_name: - logger.warning(" kubectl port-forward -n %s svc/openclaw 18789:18789", namespace) + if not port_in_use: + # Start port-forward in background (detached) + try: + subprocess.Popen( + forward_cmd, + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + start_new_session=True + ) + logger.warning("✓ Port-forward started on port %d", forward_port) + time.sleep(2) # Give port-forward time to establish + except Exception as e: + logger.warning("Failed to start port-forward: %s", e) + logger.warning("You may need to run manually: %s", " ".join(forward_cmd)) else: - logger.warning(" kubectl port-forward -n %s statefulset/openclaw 18789:18789", namespace) + logger.warning("✓ Port already in use (port-forward may already be running)") - logger.warning("\n📝 Quick Start:") - logger.warning(" 1. Set up port-forwarding (see 🔧 section above)") - logger.warning(" 2. Click the Web UI link above - token is already included") - logger.warning(" 3. If you see a token mismatch error, manually paste the token above") - logger.warning(" 4. (Optional) Run 'openclaw configure' inside the pod to set up integrations") - logger.warning("\n💡 Note: You're running inside a pod with service account. You should already") - logger.warning(" have kubectl access. Try 'kubectl get pods' to verify.") + # Build the dashboard URL with token + dashboard_url = f"http://localhost:{forward_port}?token={token}" - logger.warning("\n" + "="*70 + "\n") + # Display connection info + logger.warning("") + logger.warning("="*70) + logger.warning("🦞 OpenClaw Web UI Access") + logger.warning("="*70) + logger.warning("") + logger.warning("🌐 Access URL (token included):") + logger.warning(" " + dashboard_url) + logger.warning("") + logger.warning("📋 Gateway Token:") + logger.warning(" %s", token) + logger.warning("") + logger.warning("📝 Instructions:") + logger.warning(" 1. Click the URL above to open OpenClaw web UI") + logger.warning(" 2. Token is already included in the URL") + logger.warning(" 3. If connection fails, the token is shown above for manual entry") + logger.warning(" 4. To stop port-forward: pkill -f 'kubectl port-forward'") + logger.warning("") + logger.warning("💡 Note: Port-forward connects directly to the openclaw-0 pod") + logger.warning(" This avoids CNI service routing issues.") + logger.warning("") + logger.warning("="*70) + logger.warning("") return { "token": token, "dashboard_url": dashboard_url, "namespace": namespace, + "forward_port": forward_port, } finally: @@ -395,3 +427,87 @@ def connect_openclaw(cmd, resource_group_name, cluster_name=None, namespace=None if cluster_name and kubeconfig_path: kubeconfig_dir = os.path.dirname(kubeconfig_path) shutil.rmtree(kubeconfig_dir, ignore_errors=True) + + +def delete_openclaw(cmd, resource_group_name, cluster_name, + namespace=None, + delete_ai_resources=False): + """Delete openclaw deployment and optionally AI Foundry resources.""" + namespace = namespace or CONST_OPENCLAW_DEFAULT_NAMESPACE + ensure_prerequisites() + + kubeconfig_path = get_kubeconfig(cmd, resource_group_name, cluster_name) + + try: + logger.warning("Uninstalling openclaw from namespace '%s'...", namespace) + uninstall_helm_chart(kubeconfig_path, namespace=namespace) + logger.warning("OpenClaw uninstalled successfully.") + + if delete_ai_resources: + foundry_name = generate_foundry_name(resource_group_name) + _delete_ai_foundry(cmd, resource_group_name, foundry_name) + finally: + kubeconfig_dir = os.path.dirname(kubeconfig_path) + shutil.rmtree(kubeconfig_dir, ignore_errors=True) + + +def _delete_ai_foundry(cmd, resource_group_name, foundry_name): + """Delete the AIServices account.""" + from azure.cli.core.util import send_raw_request + from azure.cli.core.commands.client_factory import get_subscription_id + + subscription_id = get_subscription_id(cmd.cli_ctx) + url = ( + f"https://management.azure.com/subscriptions/{subscription_id}" + f"/resourceGroups/{resource_group_name}" + f"/providers/Microsoft.CognitiveServices/accounts/{foundry_name}" + f"?api-version={CONST_OPENCLAW_COGNITIVE_API_VERSION}" + ) + try: + send_raw_request(cmd.cli_ctx, "DELETE", url) + logger.warning("AIServices account '%s' deleted.", foundry_name) + except Exception as e: # pylint: disable=broad-except + logger.warning("Could not delete AIServices account '%s': %s", foundry_name, e) + + +def show_openclaw(cmd, resource_group_name, cluster_name, namespace=None): + """Show openclaw deployment status.""" + namespace = namespace or CONST_OPENCLAW_DEFAULT_NAMESPACE + ensure_prerequisites() + + kubeconfig_path = get_kubeconfig(cmd, resource_group_name, cluster_name) + + try: + pods = get_deployment_status(kubeconfig_path, namespace=namespace) + + # Try to get the LiteLLM config to show model info + model_info = None + try: + from azext_aks_preview.openclaw._helpers import run_kubectl + cm_json = run_kubectl( + ["get", "configmap", "openclaw-litellm-config", "-n", namespace, "-o", "json"], + kubeconfig_path=kubeconfig_path, + check=False, + ) + cm = json.loads(cm_json) + config_yaml = cm.get("data", {}).get("config.yaml", "") + if config_yaml: + import yaml + config = yaml.safe_load(config_yaml) + model_list = config.get("model_list", []) + if model_list: + model_info = { + "model_name": model_list[0].get("model_name", ""), + "api_base": model_list[0].get("litellm_params", {}).get("api_base", ""), + } + except Exception: # pylint: disable=broad-except + pass + + return { + "namespace": namespace, + "pods": pods, + "model_info": model_info, + } + finally: + kubeconfig_dir = os.path.dirname(kubeconfig_path) + shutil.rmtree(kubeconfig_dir, ignore_errors=True) From a036d3632edee70b81162a7d3f9b9681a82601ba Mon Sep 17 00:00:00 2001 From: Chudi Huang Date: Thu, 26 Feb 2026 10:05:36 +0000 Subject: [PATCH 4/5] put foundry in node resource group and delete if it is not BYO --- src/aks-preview/azext_aks_preview/custom.py | 4 +- .../azext_aks_preview/openclaw/_helpers.py | 8 +++ .../azext_aks_preview/openclaw/deploy.py | 52 ++++++++++++------- 3 files changed, 42 insertions(+), 22 deletions(-) diff --git a/src/aks-preview/azext_aks_preview/custom.py b/src/aks-preview/azext_aks_preview/custom.py index 404bf395fa2..4ec9e60d742 100644 --- a/src/aks-preview/azext_aks_preview/custom.py +++ b/src/aks-preview/azext_aks_preview/custom.py @@ -5404,13 +5404,11 @@ def aks_openclaw_deploy(cmd, client, resource_group_name, cluster_name, def aks_openclaw_delete(cmd, client, resource_group_name, cluster_name, - namespace=None, - delete_ai_resources=False): + namespace=None): from azext_aks_preview.openclaw.deploy import delete_openclaw return delete_openclaw( cmd, resource_group_name, cluster_name, namespace=namespace, - delete_ai_resources=delete_ai_resources, ) diff --git a/src/aks-preview/azext_aks_preview/openclaw/_helpers.py b/src/aks-preview/azext_aks_preview/openclaw/_helpers.py index c6afbfda0d0..61e214e7a1e 100644 --- a/src/aks-preview/azext_aks_preview/openclaw/_helpers.py +++ b/src/aks-preview/azext_aks_preview/openclaw/_helpers.py @@ -100,6 +100,14 @@ def generate_deployment_name(model_name): return model_name.replace(".", "").replace("-", "") +def get_aks_cluster(cmd, resource_group_name, cluster_name): + """Get AKS cluster object.""" + from azext_aks_preview._client_factory import cf_managed_clusters + + client = cf_managed_clusters(cmd.cli_ctx) + return client.get(resource_group_name, cluster_name) + + def get_kubeconfig(cmd, resource_group_name, cluster_name): """Get AKS credentials into a temp kubeconfig file, return path.""" temp_dir = tempfile.mkdtemp() diff --git a/src/aks-preview/azext_aks_preview/openclaw/deploy.py b/src/aks-preview/azext_aks_preview/openclaw/deploy.py index e0b7b07b633..c1d4e91261d 100644 --- a/src/aks-preview/azext_aks_preview/openclaw/deploy.py +++ b/src/aks-preview/azext_aks_preview/openclaw/deploy.py @@ -48,7 +48,7 @@ def _get_resource_group_location(cmd, resource_group_name): return rg.location -def _provision_ai_foundry(cmd, resource_group_name, location, foundry_name, +def _provision_ai_foundry(cmd, node_resource_group, location, foundry_name, model_name, model_version, deployment_name, capacity): """Create a new AIServices account and deploy a model. Returns (endpoint, api_key, deployment_name).""" from azure.cli.core.util import send_raw_request @@ -56,11 +56,11 @@ def _provision_ai_foundry(cmd, resource_group_name, location, foundry_name, subscription_id = get_subscription_id(cmd.cli_ctx) - # Create AIServices account - logger.warning("Creating AIServices account '%s' in '%s'...", foundry_name, location) + # Create AIServices account in nodeResourceGroup + logger.warning("Creating AIServices account '%s' in node resource group '%s'...", foundry_name, node_resource_group) account_url = ( f"https://management.azure.com/subscriptions/{subscription_id}" - f"/resourceGroups/{resource_group_name}" + f"/resourceGroups/{node_resource_group}" f"/providers/Microsoft.CognitiveServices/accounts/{foundry_name}" f"?api-version={CONST_OPENCLAW_COGNITIVE_API_VERSION}" ) @@ -94,7 +94,7 @@ def _provision_ai_foundry(cmd, resource_group_name, location, foundry_name, logger.warning("Deploying model '%s' as '%s'...", model_name, deployment_name) deploy_url = ( f"https://management.azure.com/subscriptions/{subscription_id}" - f"/resourceGroups/{resource_group_name}" + f"/resourceGroups/{node_resource_group}" f"/providers/Microsoft.CognitiveServices/accounts/{foundry_name}" f"/deployments/{deployment_name}" f"?api-version={CONST_OPENCLAW_COGNITIVE_API_VERSION}" @@ -118,7 +118,7 @@ def _provision_ai_foundry(cmd, resource_group_name, location, foundry_name, # Get API key keys_url = ( f"https://management.azure.com/subscriptions/{subscription_id}" - f"/resourceGroups/{resource_group_name}" + f"/resourceGroups/{node_resource_group}" f"/providers/Microsoft.CognitiveServices/accounts/{foundry_name}" f"/listKeys?api-version={CONST_OPENCLAW_COGNITIVE_API_VERSION}" ) @@ -194,7 +194,7 @@ def _resolve_byo_endpoint(ai_foundry_endpoint, ai_foundry_api_key, deployment_na return endpoint, ai_foundry_api_key, deployment_name -def resolve_or_provision_ai_foundry(cmd, resource_group_name, +def resolve_or_provision_ai_foundry(cmd, resource_group_name, cluster_name=None, ai_foundry_resource_id=None, ai_foundry_endpoint=None, ai_foundry_api_key=None, @@ -204,6 +204,8 @@ def resolve_or_provision_ai_foundry(cmd, resource_group_name, deployment_name=None, capacity=None): """Dispatch to the right AI Foundry path. Returns (endpoint, api_key, deployment_name).""" + from azext_aks_preview.openclaw._helpers import get_aks_cluster + model_name = model_name or CONST_OPENCLAW_DEFAULT_MODEL model_version = model_version or CONST_OPENCLAW_DEFAULT_MODEL_VERSION capacity = capacity or CONST_OPENCLAW_DEFAULT_CAPACITY @@ -230,11 +232,19 @@ def resolve_or_provision_ai_foundry(cmd, resource_group_name, logger.warning("Using provided AI Foundry endpoint: %s", ai_foundry_endpoint) return _resolve_byo_endpoint(ai_foundry_endpoint, ai_foundry_api_key, deployment_name) - # Default: provision new + # Default: provision new in the AKS cluster's nodeResourceGroup location = ai_foundry_location or _get_resource_group_location(cmd, resource_group_name) foundry_name = generate_foundry_name(resource_group_name) + + # Get the cluster's nodeResourceGroup + if cluster_name: + cluster = get_aks_cluster(cmd, resource_group_name, cluster_name) + node_resource_group = cluster.node_resource_group + else: + raise CLIError("cluster_name is required for provisioning AI Foundry") + return _provision_ai_foundry( - cmd, resource_group_name, location, foundry_name, + cmd, node_resource_group, location, foundry_name, model_name, model_version, deployment_name, capacity, ) @@ -256,6 +266,7 @@ def deploy_openclaw(cmd, resource_group_name, cluster_name, # Step 1: Resolve or provision AI Foundry endpoint, api_key, resolved_deployment = resolve_or_provision_ai_foundry( cmd, resource_group_name, + cluster_name=cluster_name, ai_foundry_resource_id=ai_foundry_resource_id, ai_foundry_endpoint=ai_foundry_endpoint, ai_foundry_api_key=ai_foundry_api_key, @@ -430,9 +441,10 @@ def connect_openclaw(cmd, resource_group_name, cluster_name=None, namespace=None def delete_openclaw(cmd, resource_group_name, cluster_name, - namespace=None, - delete_ai_resources=False): - """Delete openclaw deployment and optionally AI Foundry resources.""" + namespace=None): + """Delete openclaw deployment and AI Foundry resources.""" + from azext_aks_preview.openclaw._helpers import get_aks_cluster + namespace = namespace or CONST_OPENCLAW_DEFAULT_NAMESPACE ensure_prerequisites() @@ -443,29 +455,31 @@ def delete_openclaw(cmd, resource_group_name, cluster_name, uninstall_helm_chart(kubeconfig_path, namespace=namespace) logger.warning("OpenClaw uninstalled successfully.") - if delete_ai_resources: - foundry_name = generate_foundry_name(resource_group_name) - _delete_ai_foundry(cmd, resource_group_name, foundry_name) + # Always delete AI Foundry resources from nodeResourceGroup + cluster = get_aks_cluster(cmd, resource_group_name, cluster_name) + node_resource_group = cluster.node_resource_group + foundry_name = generate_foundry_name(resource_group_name) + _delete_ai_foundry(cmd, node_resource_group, foundry_name) finally: kubeconfig_dir = os.path.dirname(kubeconfig_path) shutil.rmtree(kubeconfig_dir, ignore_errors=True) -def _delete_ai_foundry(cmd, resource_group_name, foundry_name): - """Delete the AIServices account.""" +def _delete_ai_foundry(cmd, node_resource_group, foundry_name): + """Delete the AIServices account from the node resource group.""" from azure.cli.core.util import send_raw_request from azure.cli.core.commands.client_factory import get_subscription_id subscription_id = get_subscription_id(cmd.cli_ctx) url = ( f"https://management.azure.com/subscriptions/{subscription_id}" - f"/resourceGroups/{resource_group_name}" + f"/resourceGroups/{node_resource_group}" f"/providers/Microsoft.CognitiveServices/accounts/{foundry_name}" f"?api-version={CONST_OPENCLAW_COGNITIVE_API_VERSION}" ) try: send_raw_request(cmd.cli_ctx, "DELETE", url) - logger.warning("AIServices account '%s' deleted.", foundry_name) + logger.warning("AIServices account '%s' deleted from '%s'.", foundry_name, node_resource_group) except Exception as e: # pylint: disable=broad-except logger.warning("Could not delete AIServices account '%s': %s", foundry_name, e) From 89d5f4598246ff5257ca40181248f694f6b1cc84 Mon Sep 17 00:00:00 2001 From: Chudi Huang Date: Thu, 26 Feb 2026 11:56:24 +0000 Subject: [PATCH 5/5] 400 no db issue --- .../azext_aks_preview/openclaw/_helpers.py | 1 + .../azext_aks_preview/openclaw/deploy.py | 17 ++++++++++++++++- 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/src/aks-preview/azext_aks_preview/openclaw/_helpers.py b/src/aks-preview/azext_aks_preview/openclaw/_helpers.py index 61e214e7a1e..5f9c49f6602 100644 --- a/src/aks-preview/azext_aks_preview/openclaw/_helpers.py +++ b/src/aks-preview/azext_aks_preview/openclaw/_helpers.py @@ -239,6 +239,7 @@ def install_helm_chart(kubeconfig_path, values, namespace=CONST_OPENCLAW_DEFAULT "--timeout", "10m", ], kubeconfig_path=kubeconfig_path, + timeout=660, # must exceed Helm's --timeout (10m = 600s) ) if not success: raise CLIError(f"Helm install failed: {output}") diff --git a/src/aks-preview/azext_aks_preview/openclaw/deploy.py b/src/aks-preview/azext_aks_preview/openclaw/deploy.py index c1d4e91261d..43b9e153f0d 100644 --- a/src/aks-preview/azext_aks_preview/openclaw/deploy.py +++ b/src/aks-preview/azext_aks_preview/openclaw/deploy.py @@ -292,13 +292,28 @@ def deploy_openclaw(cmd, resource_group_name, cluster_name, model_name = model or CONST_OPENCLAW_DEFAULT_MODEL values, litellm_master_key = generate_helm_values(endpoint, api_key, resolved_deployment, model_name) logger.warning("Installing openclaw Helm chart in namespace '%s'...", namespace) - install_helm_chart(kubeconfig_path, values, namespace=namespace) + helm_failed = False + try: + install_helm_chart(kubeconfig_path, values, namespace=namespace) + except CLIError as e: + # Helm --wait may time out (e.g. slow image pull) even though the + # release was actually installed. Continue to the patching step so + # the deployment is not left in a half-configured state. + logger.warning("Helm install reported an error (will still attempt patching): %s", e) + helm_failed = True # Step 5: Patch API format and auth logger.warning("Patching API format and LiteLLM auth...") patch_openclaw_api_format(kubeconfig_path, namespace=namespace, litellm_master_key=litellm_master_key) + if helm_failed: + logger.warning( + "\n⚠️ Helm --wait timed out but the chart was installed. " + "Patching was applied. Check pod status with:\n" + " kubectl get pods -n %s", namespace, + ) + # Step 6: Show status logger.warning("\nOpenClaw deployed successfully!") logger.warning("Namespace: %s", namespace)