From c82b7042ee963561aa368478bf7f2421eeb9002d Mon Sep 17 00:00:00 2001 From: kbhos Date: Tue, 16 Jun 2026 12:22:23 +0530 Subject: [PATCH 1/4] feat(): additional log info in k0s script output --- tools/cluster_setup/k0s_cluster_with_stack.sh | 30 +++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/tools/cluster_setup/k0s_cluster_with_stack.sh b/tools/cluster_setup/k0s_cluster_with_stack.sh index 4f1bdae7..2e267334 100755 --- a/tools/cluster_setup/k0s_cluster_with_stack.sh +++ b/tools/cluster_setup/k0s_cluster_with_stack.sh @@ -5045,6 +5045,36 @@ show_platform_access_info() { log " kubectl get aiservice -n ${AI_NS}" log "" + # LoadBalancer endpoint (only shown when svc type is LoadBalancer) + local _svc_type + _svc_type=$(yq eval '.aiPlatform.serviceTemplate.type // ""' "${CONFIG_FILE}" 2>/dev/null || echo "") + if [[ "${_svc_type}" == "LoadBalancer" ]]; then + log "🌐 AI Platform LoadBalancer URL:" + local _lb_ip="" _retries=0 _lb_svc_name="" + # Find the saia nginx service name (ends with -saia-service and is type LoadBalancer) + _lb_svc_name=$(kubectl get svc -n "${AI_NS}" \ + -o jsonpath='{range .items[?(@.spec.type=="LoadBalancer")]}{.metadata.name}{"\n"}{end}' 2>/dev/null \ + | grep -m1 "saia-service" || echo "") + while [[ -z "${_lb_ip}" || "${_lb_ip}" == "" ]] && (( _retries < 12 )); do + if [[ -n "${_lb_svc_name}" ]]; then + _lb_ip=$(kubectl get svc "${_lb_svc_name}" -n "${AI_NS}" \ + -o jsonpath='{.status.loadBalancer.ingress[0].ip}{.status.loadBalancer.ingress[0].hostname}' 2>/dev/null || echo "") + fi + [[ -z "${_lb_ip}" || "${_lb_ip}" == "" ]] && { sleep 5; _retries=$(( _retries + 1 )); } + done + if [[ -n "${_lb_ip}" && "${_lb_ip}" != "" ]]; then + local _lb_port + _lb_port=$(kubectl get svc "${_lb_svc_name}" -n "${AI_NS}" \ + -o jsonpath='{.spec.ports[0].port}' 2>/dev/null || echo "8080") + log " ✅ Endpoint: http://${_lb_ip}:${_lb_port}" + log " 👉 Use this URL in the UI configuration." + else + log " ⚠️ LoadBalancer IP still pending. Run to check:" + log " kubectl get svc -n ${AI_NS} -o wide" + fi + log "" + fi + # Splunk information log "📊 Splunk Enterprise:" log " Check Status:" From 2b4909dec6d9d10622fbec8a1985d8357f4d9756 Mon Sep 17 00:00:00 2001 From: kbhos Date: Tue, 16 Jun 2026 15:57:36 +0530 Subject: [PATCH 2/4] feat(): k0s cluster config --- tools/cluster_setup/k0s-cluster-config.yaml | 49 ++++++++++++--------- 1 file changed, 28 insertions(+), 21 deletions(-) diff --git a/tools/cluster_setup/k0s-cluster-config.yaml b/tools/cluster_setup/k0s-cluster-config.yaml index f222386d..a912171c 100644 --- a/tools/cluster_setup/k0s-cluster-config.yaml +++ b/tools/cluster_setup/k0s-cluster-config.yaml @@ -169,7 +169,7 @@ aiPlatform: workerGroupConfig: imageRegistry: "" - # ---------- SAIA public exposure (NodePort-free) ---------- + # ---------- SAIA public exposure ---------- # The SAIA "public" Service (nginx reverse proxy in front of v1 + v2 API # pods) defaults to ClusterIP — only reachable from inside the cluster. Two # call patterns hit it: @@ -180,25 +180,29 @@ aiPlatform: # feedback, admin endpoints). Without external exposure the v2 chat UI # breaks for users; v1 one-shot SPL still works. # - # The supported on-prem path is `type: LoadBalancer` backed by MetalLB - # (allocates a routable VIP from a pool you provide; ARP / BGP-announces it - # on your network). NodePort is intentionally avoided so we never open - # 30000-32767 on every worker node. + # Choose the exposure type based on WHERE k0s runs: # - # The installer: - # * Installs MetalLB (set metallb.install: true below). - # * Applies an IPAddressPool + L2Advertisement (or BGPAdvertisement) from - # the metallb config below. - # * Renders the SAIA Service as type: LoadBalancer; MetalLB allocates a - # VIP from the pool and announces it. - # * Patches the Service with `allocateLoadBalancerNodePorts: false` and - # `externalTrafficPolicy: Local` so kube-proxy does not open a NodePort. + # NodePort — works everywhere (bare-metal, on-prem, AND cloud VMs like + # EC2). Opens a fixed port (default 30080) on every worker + # node; reach SAIA at http://:. On a + # cloud VM, point an external load balancer (AWS NLB, GCP LB) + # at the worker nodes on this port to get a public DNS name. + # This is the recommended default — it has no L2 network + # assumptions and is the only option that works on a cloud VPC. + # + # LoadBalancer — only for TRUE bare-metal / on-prem k0s on a real L2 LAN, + # backed by MetalLB (see the metallb block below). MetalLB + # allocates a routable VIP and announces it via ARP (layer2) + # or BGP. This does NOT work on AWS/GCP/Azure VPCs: cloud + # fabrics drop the gratuitous ARP MetalLB relies on and only + # route to real ENI addresses, so the VIP is unreachable. On a + # cloud VM use NodePort + a cloud load balancer instead. # # To DISABLE external exposure (ClusterIP only), comment out the whole # serviceTemplate block AND set metallb.install: false. serviceTemplate: - type: LoadBalancer # ClusterIP | LoadBalancer (NodePort is not used on k0s) - # No nodePort field — explicitly NodePort-free. + type: NodePort # ClusterIP | NodePort | LoadBalancer + nodePort: 30080 # Fixed NodePort (30000-32767). Required only for NodePort. features: - name: "saia" @@ -217,14 +221,17 @@ aiPlatform: effect: "NoSchedule" # ---------- MetalLB (k0s LoadBalancer provider) ---------- -# Required when aiPlatform.serviceTemplate.type=LoadBalancer on a bare-metal -# / k0s cluster. Pinned chart version for supply-chain reproducibility -# (codeguard-0-supply-chain-security). +# ONLY for true bare-metal / on-prem k0s on a real L2 LAN, and ONLY when +# aiPlatform.serviceTemplate.type=LoadBalancer. Does NOT work on cloud VPCs +# (AWS/GCP/Azure) — see the SAIA public exposure notes above. Pinned chart +# version for supply-chain reproducibility (codeguard-0-supply-chain-security). # -# If serviceTemplate.type=NodePort, the installer skips MetalLB entirely even -# when metallb.install=true (NodePort does not use a LoadBalancer provider). +# If serviceTemplate.type=NodePort (the default), the installer skips MetalLB +# entirely even when metallb.install=true (NodePort does not use a +# LoadBalancer provider). Leave install: false unless you switch to +# LoadBalancer on bare-metal. metallb: - install: true # set false if MetalLB is already installed or not needed + install: false # true only for bare-metal LoadBalancer; ignored under NodePort chartVersion: "0.14.8" # metallb/metallb Helm chart (matches MetalLB v0.14.8) namespace: "metallb-system" From 2100a0707e571836ec355a24cf6c679ab5cfb68e Mon Sep 17 00:00:00 2001 From: kbhos Date: Tue, 16 Jun 2026 16:00:34 +0530 Subject: [PATCH 3/4] remove unnecessary files --- tools/cluster_setup/k0s_cluster_with_stack.sh | 30 ------------------- 1 file changed, 30 deletions(-) diff --git a/tools/cluster_setup/k0s_cluster_with_stack.sh b/tools/cluster_setup/k0s_cluster_with_stack.sh index 2e267334..4f1bdae7 100755 --- a/tools/cluster_setup/k0s_cluster_with_stack.sh +++ b/tools/cluster_setup/k0s_cluster_with_stack.sh @@ -5045,36 +5045,6 @@ show_platform_access_info() { log " kubectl get aiservice -n ${AI_NS}" log "" - # LoadBalancer endpoint (only shown when svc type is LoadBalancer) - local _svc_type - _svc_type=$(yq eval '.aiPlatform.serviceTemplate.type // ""' "${CONFIG_FILE}" 2>/dev/null || echo "") - if [[ "${_svc_type}" == "LoadBalancer" ]]; then - log "🌐 AI Platform LoadBalancer URL:" - local _lb_ip="" _retries=0 _lb_svc_name="" - # Find the saia nginx service name (ends with -saia-service and is type LoadBalancer) - _lb_svc_name=$(kubectl get svc -n "${AI_NS}" \ - -o jsonpath='{range .items[?(@.spec.type=="LoadBalancer")]}{.metadata.name}{"\n"}{end}' 2>/dev/null \ - | grep -m1 "saia-service" || echo "") - while [[ -z "${_lb_ip}" || "${_lb_ip}" == "" ]] && (( _retries < 12 )); do - if [[ -n "${_lb_svc_name}" ]]; then - _lb_ip=$(kubectl get svc "${_lb_svc_name}" -n "${AI_NS}" \ - -o jsonpath='{.status.loadBalancer.ingress[0].ip}{.status.loadBalancer.ingress[0].hostname}' 2>/dev/null || echo "") - fi - [[ -z "${_lb_ip}" || "${_lb_ip}" == "" ]] && { sleep 5; _retries=$(( _retries + 1 )); } - done - if [[ -n "${_lb_ip}" && "${_lb_ip}" != "" ]]; then - local _lb_port - _lb_port=$(kubectl get svc "${_lb_svc_name}" -n "${AI_NS}" \ - -o jsonpath='{.spec.ports[0].port}' 2>/dev/null || echo "8080") - log " ✅ Endpoint: http://${_lb_ip}:${_lb_port}" - log " 👉 Use this URL in the UI configuration." - else - log " ⚠️ LoadBalancer IP still pending. Run to check:" - log " kubectl get svc -n ${AI_NS} -o wide" - fi - log "" - fi - # Splunk information log "📊 Splunk Enterprise:" log " Check Status:" From be94b4294fa69753572da132fc7fee5bb55048fb Mon Sep 17 00:00:00 2001 From: kbhos Date: Tue, 16 Jun 2026 16:19:22 +0530 Subject: [PATCH 4/4] =?UTF-8?q?docs(k0s):=20align=20SAIA=20exposure=20guid?= =?UTF-8?q?ance=20=E2=80=94=20NodePort=20as=20portable=20default?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Resolve contradictory messaging between the config template and the installer: the config comment block now presents NodePort as the default/portable option (works on cloud VPCs where MetalLB L2 cannot) and frames LoadBalancer+MetalLB as the bare-metal-only alternative. Replace the stale 'NodePort exposure is discouraged' WARNING in the installer with an informational line that prints the access URL. Co-Authored-By: Claude --- tools/cluster_setup/k0s-cluster-config.yaml | 16 +++++++++------- tools/cluster_setup/k0s_cluster_with_stack.sh | 2 +- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/tools/cluster_setup/k0s-cluster-config.yaml b/tools/cluster_setup/k0s-cluster-config.yaml index a912171c..113f1dc1 100644 --- a/tools/cluster_setup/k0s-cluster-config.yaml +++ b/tools/cluster_setup/k0s-cluster-config.yaml @@ -182,13 +182,15 @@ aiPlatform: # # Choose the exposure type based on WHERE k0s runs: # - # NodePort — works everywhere (bare-metal, on-prem, AND cloud VMs like - # EC2). Opens a fixed port (default 30080) on every worker - # node; reach SAIA at http://:. On a - # cloud VM, point an external load balancer (AWS NLB, GCP LB) - # at the worker nodes on this port to get a public DNS name. - # This is the recommended default — it has no L2 network - # assumptions and is the only option that works on a cloud VPC. + # NodePort — the default/portable option; works everywhere (bare-metal, + # on-prem, AND cloud VMs like EC2). Opens a fixed port + # (default 30080) on every worker node; reach SAIA at + # http://:. On a cloud VM, point an + # external load balancer (AWS NLB, GCP LB) at the worker nodes + # on this port to get a public DNS name. Use this whenever + # MetalLB/LoadBalancer is unavailable (i.e. any cloud VPC), + # since it has no L2 network assumptions. On bare-metal L2 you + # may instead prefer LoadBalancer below for a stable VIP. # # LoadBalancer — only for TRUE bare-metal / on-prem k0s on a real L2 LAN, # backed by MetalLB (see the metallb block below). MetalLB diff --git a/tools/cluster_setup/k0s_cluster_with_stack.sh b/tools/cluster_setup/k0s_cluster_with_stack.sh index 4f1bdae7..126011e6 100755 --- a/tools/cluster_setup/k0s_cluster_with_stack.sh +++ b/tools/cluster_setup/k0s_cluster_with_stack.sh @@ -3879,7 +3879,7 @@ patch_k0s_saia_public_service_workaround() { if saia_service_template_enabled_k0s; then log "Patching AIService/${aiservice_name} with SAIA public exposure settings (type=${svc_type})..." if [[ "${svc_type}" == "NodePort" && -n "${svc_node_port}" && "${svc_node_port}" != "null" ]]; then - log "WARNING: NodePort exposure is discouraged on k0s. Prefer type=LoadBalancer with metallb.install=true (MetalLB install is skipped automatically when type=NodePort)." >&2 + log "SAIA exposed via NodePort ${svc_node_port} — reach it at http://:${svc_node_port} (front with a cloud LB on cloud VMs). For bare-metal L2 LANs you may alternatively use type=LoadBalancer with metallb.install=true; MetalLB is skipped automatically under NodePort." >&2 kubectl -n "${AI_NS}" patch aiservice "${aiservice_name}" --type merge -p "{ \"spec\": { \"serviceTemplate\": {