From 02554b37e41b40ef8941aff1b2cf511a466dc5b3 Mon Sep 17 00:00:00 2001 From: JooYoung Park Date: Thu, 23 Apr 2026 19:56:41 +0800 Subject: [PATCH 1/2] add go multi-node deployment script Signed-off-by: JooYoung Park --- go.work | 1 + scripts/setup/cluster/create_mulitnode.go | 115 +++++ scripts/setup/cluster/create_multinode.sh | 287 +++++++++++ scripts/setup/cluster/setup_knative.go | 452 ++++++++++++++++++ scripts/setup/cluster/setup_prometheus.go | 141 ++++++ scripts/setup/configs/config.go | 168 +++++++ scripts/setup/configs/node_setup.json | 29 ++ .../configs/prometheus/kubeadm_init.yaml | 46 ++ .../setup/configs/prometheus/prom_config.json | 7 + .../configs/prometheus/prom_pushgateway.yaml | 26 + .../setup/configs/prometheus/prom_values.yaml | 198 ++++++++ scripts/setup/configs/setup.json | 11 + scripts/setup/go.mod | 7 + scripts/setup/go.sum | 4 + scripts/setup/setup.go | 59 +++ scripts/setup/utils/label.go | 44 ++ scripts/setup/utils/utils.go | 132 +++++ scripts/setup/utils/utils_test.go | 79 +++ 18 files changed, 1806 insertions(+) create mode 100644 scripts/setup/cluster/create_mulitnode.go create mode 100755 scripts/setup/cluster/create_multinode.sh create mode 100644 scripts/setup/cluster/setup_knative.go create mode 100644 scripts/setup/cluster/setup_prometheus.go create mode 100644 scripts/setup/configs/config.go create mode 100644 scripts/setup/configs/node_setup.json create mode 100644 scripts/setup/configs/prometheus/kubeadm_init.yaml create mode 100644 scripts/setup/configs/prometheus/prom_config.json create mode 100644 scripts/setup/configs/prometheus/prom_pushgateway.yaml create mode 100644 scripts/setup/configs/prometheus/prom_values.yaml create mode 100644 scripts/setup/configs/setup.json create mode 100644 scripts/setup/go.mod create mode 100644 scripts/setup/go.sum create mode 100644 scripts/setup/setup.go create mode 100644 scripts/setup/utils/label.go create mode 100644 scripts/setup/utils/utils.go create mode 100644 scripts/setup/utils/utils_test.go diff --git a/go.work b/go.work index e8f20deb2..55af4ad97 100644 --- a/go.work +++ b/go.work @@ -2,5 +2,6 @@ go 1.26.2 use ( . + ./scripts/setup ./tools/generateTimeline ) diff --git a/scripts/setup/cluster/create_mulitnode.go b/scripts/setup/cluster/create_mulitnode.go new file mode 100644 index 000000000..027802039 --- /dev/null +++ b/scripts/setup/cluster/create_mulitnode.go @@ -0,0 +1,115 @@ +package cluster + +import ( + "fmt" + "time" + + "github.com/vhive-serverless/loader/scripts/setup/configs" + loaderUtils "github.com/vhive-serverless/loader/scripts/setup/utils" + "github.com/vhive-serverless/vHive/scripts/utils" +) + +func CreateMultiNodeSetup(configDir string, configName string) error { + // Load Configurations + cfg, err := configs.CommonConfigSetup(configDir, configName) + if err != nil { + utils.FatalPrintf("Failed to load configurations: %v\n", err) + return err + } + + // Distribute Loader SSH Key + utils.InfoPrintf("Distributing loader SSH key...\n") + if err := distributeLoaderSSHKey(cfg.LoaderNode, cfg.AllNodes); err != nil { + utils.FatalPrintf("Failed to distribute loader SSH key: %v\n", err) + return err + } + utils.InfoPrintf("Loader SSH key distributed.\n") + + // Determine Operation Mode + var operationMode string + switch cfg.SetupCfg.ClusterMode { + case "container": + operationMode = "stock-only" + case "firecracker", "firecracker_snapshots": + operationMode = "firecracker" + default: + utils.FatalPrintf("Unsupported cluster mode: %s\n", cfg.SetupCfg.ClusterMode) + return fmt.Errorf("unsupported cluster mode: %s", cfg.SetupCfg.ClusterMode) + } + + // Common Initialization on all nodes + utils.InfoPrintf("Starting common initialization on all nodes...\n") + if err := commonInit(cfg.AllNodes, cfg.SetupCfg, operationMode); err != nil { + utils.FatalPrintf("Failed during common initialization: %v\n", err) + return err + } + utils.InfoPrintf("Common initialization completed.\n") + + // Setup Master Node + utils.InfoPrintf("Setting up master node: %s\n", cfg.MasterNode) + joinToken, err := setupMaster(cfg.MasterNode, operationMode) + if err != nil { + utils.FatalPrintf("Failed to setup master node: %v\n", err) + return err + } + utils.InfoPrintf("Master node setup completed.\n") + + // Setup Worker Nodes + utils.InfoPrintf("Setting up worker nodes...\n") + if err := setupWorkers(cfg.WorkerNodes, joinToken, cfg.SetupCfg, operationMode); err != nil { + utils.FatalPrintf("Failed to setup worker nodes: %v\n", err) + return err + } + utils.InfoPrintf("Worker nodes setup completed.\n") + + time.Sleep(5 * time.Second) // Wait for nodes to stabilize + + // Extend CIDR if necessary + if cfg.SetupCfg.PodsPerNode > 240 { + if cfg.SetupCfg.PodsPerNode > 1022 { + utils.FatalPrintf("PODS_PER_NODE value %d is too high to extend CIDR range. Maximum supported is 1022.\n", cfg.SetupCfg.PodsPerNode) + return fmt.Errorf("PODS_PER_NODE value %d is too high to extend CIDR range. Maximum supported is 1022.", cfg.SetupCfg.PodsPerNode) + } + if len(cfg.AllNodes) > 63 { + utils.FatalPrintf("Total number of nodes %d is too high to extend CIDR range. Maximum supported is 63.\n", len(cfg.AllNodes)) + return fmt.Errorf("Total number of nodes %d is too high to extend CIDR range. Maximum supported is 63.", len(cfg.AllNodes)) + } + utils.InfoPrintf("Extending CIDR range...\n") + if err := extendCIDR(cfg.MasterNode, cfg.WorkerNodes, joinToken); err != nil { + utils.FatalPrintf("Failed to extend CIDR range: %v\n", err) + return err + } + utils.InfoPrintf("CIDR range extended.\n") + } + + time.Sleep(5 * time.Second) // Wait for nodes to stabilize + + // Finalize Cluster Setup + utils.InfoPrintf("Finalizing cluster setup...\n") + if err := finalizeClusterSetup(cfg.MasterNode, cfg.AllNodes); err != nil { + utils.FatalPrintf("Failed to finalize cluster setup: %v\n", err) + return err + } + utils.InfoPrintf("Cluster setup finalized.\n") + + // Label Nodes + utils.InfoPrintf("Labeling nodes...\n") + if err := loaderUtils.LabelNodes(cfg.MasterNode, configDir, configName); err != nil { + utils.FatalPrintf("Failed to label nodes: %v\n", err) + return err + } + utils.InfoPrintf("Node labeling completed.\n") + + // Deploy Prometheus if enabled + if cfg.SetupCfg.DeployPrometheus { + utils.InfoPrintf("Setting up Prometheus components...\n") + if err := setupPrometheus(cfg.MasterNode, cfg.AllNodes, cfg.PromConfig); err != nil { + utils.FatalPrintf("Failed to setup Prometheus components: %v\n", err) + return err + } + utils.InfoPrintf("Prometheus components setup completed.\n") + } + + utils.InfoPrintf("Multi-node cluster setup finished successfully!\n") + return nil +} diff --git a/scripts/setup/cluster/create_multinode.sh b/scripts/setup/cluster/create_multinode.sh new file mode 100755 index 000000000..513fb9331 --- /dev/null +++ b/scripts/setup/cluster/create_multinode.sh @@ -0,0 +1,287 @@ +#!/usr/bin/env bash + +# +# MIT License +# +# Copyright (c) 2023 EASL and the vHive community +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# + +MASTER_NODE=$1 +DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" > /dev/null 2>&1 && pwd)" + +source "$DIR/setup.cfg" + +if [ "$CLUSTER_MODE" = "container" ] +then + OPERATION_MODE="stock-only" + FIRECRACKER_SNAPSHOTS="" +elif [ $CLUSTER_MODE = "firecracker" ] +then + OPERATION_MODE="firecracker" + FIRECRACKER_SNAPSHOTS="" +elif [ $CLUSTER_MODE = "firecracker_snapshots" ] +then + OPERATION_MODE="firecracker" + FIRECRACKER_SNAPSHOTS="-snapshots" +else + echo "Unsupported cluster mode" + exit 1 +fi + +if [ $PODS_PER_NODE -gt 1022 ]; then + # CIDR range limitation exceeded + echo "Pods per node cannot be greater than 1022. Cluster deployment has been aborted." + exit 1 +fi + +server_exec() { + ssh -oStrictHostKeyChecking=no -p 22 "$1" "$2"; +} + +common_init() { + internal_init() { + server_exec $1 "git clone --branch=$VHIVE_BRANCH $VHIVE_REPO" + + server_exec $1 "pushd ~/vhive/scripts > /dev/null && ./install_go.sh && source /etc/profile && go build -o setup_tool && ./setup_tool setup_node ${OPERATION_MODE} && popd > /dev/null" + + server_exec $1 'tmux new -s containerd -d' + server_exec $1 'tmux send -t containerd "sudo containerd 2>&1 | tee ~/containerd_log.txt" ENTER' + # install precise NTP clock synchronizer + server_exec $1 'sudo apt-get update && sudo apt-get install -y chrony htop sysstat' + # synchronize clock across nodes + server_exec $1 "echo \"server ops.emulab.net iburst prefer\" > >(sudo tee -a /etc/chrony/chrony.conf >/dev/null)" + server_exec $1 'sudo systemctl restart chronyd' + # dump clock info + server_exec $1 'sudo chronyc tracking' + + clone_loader $1 + server_exec $1 '~/loader/scripts/setup/stabilize.sh' + } + + for node in "$@" + do + internal_init "$node" & + done + + wait +} + +function setup_master() { + echo "Setting up master node: $MASTER_NODE" + + server_exec "$MASTER_NODE" 'tmux new -s runner -d' + server_exec "$MASTER_NODE" 'tmux new -s kwatch -d' + server_exec "$MASTER_NODE" 'tmux new -s master -d' + + server_exec $MASTER_NODE '~/loader/scripts/setup/rewrite_yaml_files.sh' + + MN_CLUSTER="pushd ~/vhive/scripts > /dev/null && ./setup_tool create_multinode_cluster ${OPERATION_MODE} && popd > /dev/null" + server_exec "$MASTER_NODE" "tmux send -t master \"$MN_CLUSTER\" ENTER" + + # Get the join token from k8s. + while ! server_exec "$MASTER_NODE" "[ -e ~/vhive/scripts/masterKey.yaml ]"; do + sleep 1 + done + + LOGIN_TOKEN=$(server_exec "$MASTER_NODE" \ + 'awk '\''/^ApiserverAdvertiseAddress:/ {ip=$2} \ + /^ApiserverPort:/ {port=$2} \ + /^ApiserverToken:/ {token=$2} \ + /^ApiserverTokenHash:/ {token_hash=$2} \ + END {print "sudo kubeadm join " ip ":" port " --token " token " --discovery-token-ca-cert-hash " token_hash}'\'' ~/vhive/scripts/masterKey.yaml') +} + +function setup_vhive_firecracker_daemon() { + node=$1 + + server_exec $node 'cd vhive; source /etc/profile && go build' + server_exec $node 'tmux new -s firecracker -d' + server_exec $node 'tmux send -t firecracker "sudo PATH=$PATH /usr/local/bin/firecracker-containerd --config /etc/firecracker-containerd/config.toml 2>&1 | tee ~/firecracker_log.txt" ENTER' + server_exec $node 'tmux new -s vhive -d' + server_exec $node 'tmux send -t vhive "cd vhive" ENTER' + RUN_VHIVE_CMD="sudo ./vhive ${FIRECRACKER_SNAPSHOTS} 2>&1 | tee ~/vhive_log.txt" + server_exec $node "tmux send -t vhive \"$RUN_VHIVE_CMD\" ENTER" +} + +function setup_workers() { + internal_setup() { + node=$1 + + echo "Setting up worker node: $node" + + server_exec $node "pushd ~/vhive/scripts > /dev/null && ./setup_tool setup_worker_kubelet ${OPERATION_MODE} && popd > /dev/null" + + if [ "$OPERATION_MODE" = "firecracker" ]; then + setup_vhive_firecracker_daemon $node + fi + + server_exec $node "sudo ${LOGIN_TOKEN}" + echo "Worker node $node has joined the cluster." + + # Stretch the capacity of the worker node to 240 (k8s default: 110) + # Empirically, this gives us a max. #pods being 240-40=200 + echo "Stretching node capacity for $node." + server_exec $node "echo \"maxPods: ${PODS_PER_NODE}\" > >(sudo tee -a /var/lib/kubelet/config.yaml >/dev/null)" + server_exec $node "echo \"containerLogMaxSize: 512Mi\" > >(sudo tee -a /var/lib/kubelet/config.yaml >/dev/null)" + server_exec $node 'sudo systemctl restart kubelet' + server_exec $node 'sleep 10' + + # Rejoin has to be performed although errors will be thrown. Otherwise, restarting the kubelet will cause the node unreachable for some reason + server_exec $node "sudo ${LOGIN_TOKEN} > /dev/null 2>&1" + echo "Worker node $node joined the cluster (again :P)." + } + + for node in "$@" + do + internal_setup "$node" & + done + + wait +} + +function extend_CIDR() { + #* Get node name list. + readarray -t NODE_NAMES < <(server_exec $MASTER_NODE 'kubectl get no' | tail -n +2 | awk '{print $1}') + + if [ ${#NODE_NAMES[@]} -gt 63 ]; then + echo "Cannot extend CIDR range for more than 63 nodes. Cluster deployment has been aborted." + exit 1 + fi + + for i in "${!NODE_NAMES[@]}"; do + NODE_NAME=${NODE_NAMES[i]} + #* Compute subnet: 00001010.10101000.000000 00.00000000 -> about 1022 IPs per worker. + #* To be safe, we change both master and workers with an offset of 0.0.4.0 (4 * 2^8) + # (NB: zsh indices start from 1.) + #* Assume less than 63 nodes in total. + let SUBNET=i*4+4 + #* Extend pod ip range, delete and create again. + server_exec $MASTER_NODE "kubectl get node $NODE_NAME -o json | jq '.spec.podCIDR |= \"10.168.$SUBNET.0/22\"' > node.yaml" + server_exec $MASTER_NODE "kubectl delete node $NODE_NAME && kubectl create -f node.yaml" + + echo "Changed pod CIDR for worker $NODE_NAME to 10.168.$SUBNET.0/22" + sleep 5 + done + + #* Join the cluster for the 3rd time. + for node in "$@" + do + server_exec $node "sudo ${LOGIN_TOKEN} > /dev/null 2>&1" + echo "Worker node $node joined the cluster (again^2 :D)." + done +} + +function clone_loader() { + server_exec $1 "git clone --depth=1 --branch=$LOADER_BRANCH $LOADER_REPO loader" + server_exec $1 'echo -en "\n\n" | sudo apt-get install -y python3-pip' + # server_exec $1 'cd; cd loader; pip install -r config/requirements.txt --break-system-packages' +} + +function copy_k8s_certificates() { + function internal_copy() { + server_exec $1 "mkdir -p ~/.kube" + rsync ./kubeconfig $1:~/.kube/config + } + + echo $MASTER_NODE + rsync $MASTER_NODE:~/.kube/config ./kubeconfig + + for node in "$@" + do + internal_copy "$node" & + done + + wait + + rm ./kubeconfig +} + +# Distributes loader public sshkey to all other nodes +function distribute_loader_ssh_key() { + LOADER_NODE=$1 + ALL_NODES="$MASTER_NODE $@" + + # Generate SSH key pair for loader node + server_exec $LOADER_NODE 'echo -e "\n\n\n" | ssh-keygen -t rsa > /dev/null' + server_exec $LOADER_NODE 'chmod 600 ~/.ssh/id_rsa' + server_exec $LOADER_NODE 'eval "$(ssh-agent -s)" && ssh-add' + # Copy public key into all nodes authorized_keys + rsync $LOADER_NODE:~/.ssh/id_rsa.pub ./loader_sshpub + for node in $ALL_NODES; do + cat ./loader_sshpub | ssh $node 'cat >> ~/.ssh/authorized_keys' & + done + + wait + # clean up + rm ./loader_sshpub +} + +############################################### +######## MAIN SETUP PROCEDURE IS BELOW ######## +############################################### + +{ + # Set up all nodes including the master + common_init "$@" + + shift # make argument list only contain worker nodes (drops master node) + + setup_master + setup_workers "$@" + + if [ $PODS_PER_NODE -gt 240 ]; then + extend_CIDR "$@" + fi + + # Untaint master to schedule knative control plane there + server_exec $MASTER_NODE "kubectl taint nodes \$(hostname) node-role.kubernetes.io/control-plane-" + + # Notify the master that all nodes have joined the cluster + server_exec $MASTER_NODE 'tmux send -t master "y" ENTER' + + namespace_info=$(server_exec $MASTER_NODE "kubectl get namespaces") + while [[ ${namespace_info} != *'knative-serving'* ]]; do + sleep 60 + namespace_info=$(server_exec $MASTER_NODE "kubectl get namespaces") + done + + echo "Master node $MASTER_NODE finalised." + + # Copy API server certificates from master to each worker node + copy_k8s_certificates "$@" + + # Create and distribute loader RSA key to other nodes + distribute_loader_ssh_key "$@" + + server_exec $MASTER_NODE 'cd loader; bash scripts/setup/patch_init_scale.sh' + + source $DIR/label.sh + + # Force placement of metrics collectors and instrumentation on the loader node and control plane on master + label_nodes $MASTER_NODE $1 # loader node is second on the list, becoming first after arg shift + + server_exec $MASTER_NODE "kubectl patch configmap -n knative-serving config-features -p '{\"data\": {\"kubernetes.podspec-affinity\": \"enabled\"}}'" + + + if [[ "$DEPLOY_PROMETHEUS" == true ]]; then + $DIR/expose_infra_metrics.sh $MASTER_NODE + fi +} diff --git a/scripts/setup/cluster/setup_knative.go b/scripts/setup/cluster/setup_knative.go new file mode 100644 index 000000000..d0d3be733 --- /dev/null +++ b/scripts/setup/cluster/setup_knative.go @@ -0,0 +1,452 @@ +package cluster + +import ( + "fmt" + "strings" + "sync" + "time" + + "github.com/vhive-serverless/loader/scripts/setup/configs" + loaderUtils "github.com/vhive-serverless/loader/scripts/setup/utils" + "github.com/vhive-serverless/vHive/scripts/utils" +) + +func commonInit(nodes []string, cfg *configs.SetupConfig, operationMode string) error { + var wg sync.WaitGroup + errChan := make(chan error, len(nodes)) + + for _, node := range nodes { + wg.Add(1) + go func(node string) { + defer wg.Done() + utils.InfoPrintf("Initializing node: %s\n", node) + // Clone vHive repository + utils.WaitPrintf("Cloning vHive repository on node %s...\n", node) + _, err := loaderUtils.ServerExec(node, fmt.Sprintf("git clone --branch=%s %s", cfg.HiveBranch, cfg.HiveRepo)) + if !utils.CheckErrorWithMsg(err, "Failed to clone vHive repository on node %s: %v \n", node, err) { + errChan <- err + return + } + + // Install Go and setup node + utils.WaitPrintf("Installing Go and setting up node %s...\n", node) + _, err = loaderUtils.ServerExec(node, fmt.Sprintf("pushd ~/vhive/scripts > /dev/null && ./install_go.sh && source /etc/profile && go build -o setup_tool && ./setup_tool setup_node %s && popd > /dev/null", operationMode)) + if !utils.CheckErrorWithMsg(err, "Failed to install Go and setup node %s: %v \n", node, err) { + errChan <- err + return + } + + // Start containerd in tmux + utils.WaitPrintf("Starting containerd in tmux on node %s...\n", node) + _, err = loaderUtils.ServerExec(node, "tmux new -s containerd -d") + if !utils.CheckErrorWithMsg(err, "Failed to create tmux session for containerd on node %s: %v \n", node, err) { + errChan <- err + return + } + _, err = loaderUtils.ServerExec(node, `tmux send -t containerd "sudo containerd 2>&1 | tee ~/containerd_log.txt" ENTER`) + if !utils.CheckErrorWithMsg(err, "Failed to start containerd in tmux on node %s: %v \n", node, err) { + errChan <- err + return + } + + // Install chrony, htop, sysstat + utils.WaitPrintf("Installing chrony, htop, sysstat on node %s...\n", node) + _, err = loaderUtils.ServerExec(node, "sudo apt-get update && sudo apt-get install -y chrony htop sysstat etcd-client") + if !utils.CheckErrorWithMsg(err, "Failed to install chrony, htop, sysstat on node %s: %v \n", node, err) { + errChan <- err + return + } + + // Configure chrony + utils.WaitPrintf("Configuring chrony on node %s...\n", node) + _, err = loaderUtils.ServerExec(node, `echo "server ops.emulab.net iburst prefer" | sudo tee -a /etc/chrony/chrony.conf >/dev/null`) + if !utils.CheckErrorWithMsg(err, "Failed to configure chrony on node %s: %v \n", node, err) { + errChan <- err + return + } + + // Restart chronyd + utils.WaitPrintf("Restarting chronyd on node %s...\n", node) + _, err = loaderUtils.ServerExec(node, "sudo systemctl restart chronyd") + if !utils.CheckErrorWithMsg(err, "Failed to restart chronyd on node %s: %v \n", node, err) { + errChan <- err + return + } + + // Check chrony tracking + utils.WaitPrintf("Checking chrony tracking on node %s...\n", node) + _, err = loaderUtils.ServerExec(node, "sudo chronyc tracking") + if !utils.CheckErrorWithMsg(err, "Failed to check chrony tracking on node %s: %v \n", node, err) { + errChan <- err + return + } + + // Clone loader repository + utils.WaitPrintf("Cloning loader repository on node %s...\n", node) + _, err = loaderUtils.ServerExec(node, fmt.Sprintf("git clone --depth=1 --branch=%s %s loader", cfg.LoaderBranch, cfg.LoaderRepo)) + if !utils.CheckErrorWithMsg(err, "Failed to clone loader repository on node %s: %v \n", node, err) { + errChan <- err + return + } + + // Install python3-pip + utils.WaitPrintf("Installing python3-pip on node %s...\n", node) + _, err = loaderUtils.ServerExec(node, `echo -en "\n\n" | sudo apt-get install -y python3-pip`) + if !utils.CheckErrorWithMsg(err, "Failed to install python3-pip on node %s: %v \n", node, err) { + errChan <- err + return + } + + // Run stabilize script + utils.WaitPrintf("Running stabilize script on node %s...\n", node) + _, err = loaderUtils.ServerExec(node, "~/loader/scripts/setup/stabilize.sh") + if !utils.CheckErrorWithMsg(err, "Failed to run stabilize script on node %s: %v \n", node, err) { + errChan <- err + return + } + }(node) + } + + wg.Wait() + close(errChan) + + for err := range errChan { + return err + } + return nil +} + +func setupMaster(masterNode string, operationMode string) (string, error) { + setupCmd := fmt.Sprintf("pushd ~/vhive/scripts > /dev/null && ./setup_tool create_multinode_cluster %s && popd > /dev/null", operationMode) + // Create tmux sessions + _, err := loaderUtils.ServerExec(masterNode, "tmux new -s runner -d") + if !utils.CheckErrorWithMsg(err, "Failed to create tmux session 'runner' on master node %s: %v \n", masterNode, err) { + return "", err + } + _, err = loaderUtils.ServerExec(masterNode, "tmux new -s master -d") + if !utils.CheckErrorWithMsg(err, "Failed to create tmux session 'master' on master node %s: %v\n", masterNode, err) { + return "", err + } + + // Rewrite YAML files + _, err = loaderUtils.ServerExec(masterNode, "~/loader/scripts/setup/rewrite_yaml_files.sh") + if !utils.CheckErrorWithMsg(err, "Failed to rewrite YAML files on master node %s: %v\n", masterNode, err) { + return "", err + } + + // Execute setup command in master tmux session + _, err = loaderUtils.ServerExec(masterNode, fmt.Sprintf(`tmux send -t master "%s" ENTER`, setupCmd)) + if !utils.CheckErrorWithMsg(err, "Failed to send setup command to master tmux session on master node %s: %v\n", masterNode, err) { + return "", err + } + + // Wait for masterKey.yaml to be created + utils.InfoPrintf("Waiting for master key to be generated...\n") + for { + _, err := loaderUtils.ServerExec(masterNode, "stat ~/vhive/scripts/masterKey.yaml") + if err == nil { + break + } + time.Sleep(2 * time.Second) + } + utils.InfoPrintf("Master key found.\n") + + // Get the join token + getTokenCmd := `awk '/^ApiserverAdvertiseAddress:/ {ip=$2} /^ApiserverPort:/ {port=$2} /^ApiserverToken:/ {token=$2} /^ApiserverTokenHash:/ {token_hash=$2} END {print "sudo kubeadm join " ip ":" port " --token " token " --discovery-token-ca-cert-hash " token_hash}' ~/vhive/scripts/masterKey.yaml` + joinToken, err := loaderUtils.ServerExec(masterNode, getTokenCmd) + if !utils.CheckErrorWithMsg(err, "Failed to get join token from master: %v\n", err) { + return "", err + } + + return strings.TrimSpace(joinToken), nil +} + +func setupWorkers(workerNodes []string, joinToken string, cfg *configs.SetupConfig, operationMode string) error { + var wg sync.WaitGroup + errChan := make(chan error, len(workerNodes)) + + for _, node := range workerNodes { + wg.Add(1) + go func(node string) { + defer wg.Done() + utils.InfoPrintf("Setting up worker: %s\n", node) + + // Setup worker kubelet + utils.WaitPrintf("Setting up worker kubelet on node %s...\n", node) + setupKubeletCmd := fmt.Sprintf("pushd ~/vhive/scripts > /dev/null && ./setup_tool setup_worker_kubelet %s && popd > /dev/null", operationMode) + _, err := loaderUtils.ServerExec(node, setupKubeletCmd) + if !utils.CheckErrorWithMsg(err, "Failed to setup worker kubelet on worker %s: %v\n", node, err) { + errChan <- err + return + } + + // Setup vHive Firecracker daemon if operation mode is firecracker + if operationMode == "firecracker" { + err = setupVhiveFirecrackerDaemon(node, cfg.ClusterMode) + if !utils.CheckErrorWithMsg(err, "Failed to setup vHive Firecracker daemon on worker %s: %v\n", node, err) { + errChan <- err + return + } + } + + // Join token + _, err = loaderUtils.ServerExec(node, joinToken) + if !utils.CheckErrorWithMsg(err, "Failed to join worker %s with token: %v\n", node, err) { + errChan <- err + return + } + + // Configure maxPods + _, err = loaderUtils.ServerExec(node, fmt.Sprintf("echo \"maxPods: %d\" | sudo tee -a /var/lib/kubelet/config.yaml >/dev/null", cfg.PodsPerNode)) + if !utils.CheckErrorWithMsg(err, "Failed to set maxPods on worker %s: %v\n", node, err) { + errChan <- err + return + } + + // Configure containerLogMaxSize + _, err = loaderUtils.ServerExec(node, `echo "containerLogMaxSize: 512Mi" | sudo tee -a /var/lib/kubelet/config.yaml >/dev/null`) + if !utils.CheckErrorWithMsg(err, "Failed to set containerLogMaxSize on worker %s: %v\n", node, err) { + errChan <- err + return + } + + // Restart kubelet + utils.WaitPrintf("Restarting kubelet on worker %s...\n", node) + _, err = loaderUtils.ServerExec(node, "sudo systemctl restart kubelet") + if !utils.CheckErrorWithMsg(err, "Failed to restart kubelet on worker %s: %v\n", node, err) { + errChan <- err + return + } + + time.Sleep(10 * time.Second) + + // Rejoin after restart + utils.WaitPrintf("Rejoining worker %s after kubelet restart...\n", node) + _, _ = loaderUtils.ServerExec(node, joinToken) + utils.InfoPrintf("Worker node %s has joined the cluster.\n", node) + }(node) + } + + wg.Wait() + close(errChan) + + for err := range errChan { + return err + } + return nil +} + +func extendCIDR(masterNode string, workerNodes []string, joinToken string) error { + utils.InfoPrintf("Extending CIDR for all nodes...\n") + + // Get node names first + nodeList, err := loaderUtils.ServerExec(masterNode, "kubectl get no | tail -n +2 | awk '{print $1}'") + if !utils.CheckErrorWithMsg(err, "Failed to get node names from master node %s: %v\n", masterNode, err) { + return err + } + nodeNames := strings.Fields(nodeList) + if len(nodeNames) == 0 { + return fmt.Errorf("could not retrieve any node names") + } + + for i, nodeName := range nodeNames { + subnet := i*4 + 4 + // Get node JSON, modify podCIDR, and save to node.yaml + getNodeCmd := fmt.Sprintf("kubectl get node %s -o json | jq '.spec.podCIDR |= \"10.168.%d.0/22\"' > node.yaml", nodeName, subnet) + _, err := loaderUtils.ServerExec(masterNode, getNodeCmd) + if !utils.CheckErrorWithMsg(err, "Failed to get and modify node %s JSON on master node %s: %v\n", nodeName, masterNode, err) { + return err + } + + // Delete the node + deleteNodeCmd := fmt.Sprintf("kubectl delete node %s", nodeName) + _, err = loaderUtils.ServerExec(masterNode, deleteNodeCmd) + if !utils.CheckErrorWithMsg(err, "Failed to delete node %s on master node %s: %v\n", nodeName, masterNode, err) { + return err + } + + // Create the node with updated CIDR + createNodeCmd := "kubectl create -f node.yaml" + _, err = loaderUtils.ServerExec(masterNode, createNodeCmd) + if !utils.CheckErrorWithMsg(err, "Failed to create node %s with updated CIDR on master node %s: %v\n", nodeName, masterNode, err) { + return err + } + utils.InfoPrintf("Changed pod CIDR for node %s to 10.168.%d.0/22\n", nodeName, subnet) + } + + // Rejoin the cluster for the 3rd time + for _, node := range workerNodes { + _, _ = loaderUtils.ServerExec(node, joinToken) + } + + return nil +} + +func finalizeClusterSetup(masterNode string, allNodes []string) error { + // Untaint master node + untaintCmd := "kubectl taint nodes $(hostname) node-role.kubernetes.io/control-plane-" + var err error // Declare err once + _, err = loaderUtils.ServerExec(masterNode, untaintCmd) + if !utils.CheckErrorWithMsg(err, "Failed to untaint master node %s: %v\n", masterNode, err) { + return err + } + + // Notify the master that all nodes have joined + _, err = loaderUtils.ServerExec(masterNode, `tmux send -t master "y" ENTER`) + if !utils.CheckErrorWithMsg(err, "Failed to notify master node %s: %v\n", masterNode, err) { + return err + } + + // Wait for knative-serving namespace + utils.InfoPrintf("Waiting for knative-serving namespace to be created...\n") + for { + out, innerErr := loaderUtils.ServerExec(masterNode, "kubectl get namespaces") // Use innerErr for loop scope + if innerErr == nil && strings.Contains(out, "knative-serving") { + break + } + time.Sleep(15 * time.Second) + } + utils.InfoPrintf("Knative-serving namespace is ready.\n") + + // Copy kubeconfig from master to all other nodes + err = copyK8sCertificates(masterNode, allNodes) + if !utils.CheckErrorWithMsg(err, "Failed to copy kubeconfig: %v\n", err) { + return err + } + + // Patch init scale + _, err = loaderUtils.ServerExec(masterNode, "cd ~/loader; bash scripts/setup/patch_init_scale.sh") + if !utils.CheckErrorWithMsg(err, "Failed to run patch_init_scale.sh on master node %s: %v\n", masterNode, err) { + return err + } + + // Enable affinity + _, err = loaderUtils.ServerExec(masterNode, `kubectl patch configmap -n knative-serving config-features -p '{"data": {"kubernetes.podspec-affinity": "enabled"}}'`) + if !utils.CheckErrorWithMsg(err, "Failed to enable affinity on master node %s: %v\n", masterNode, err) { + return err + } + + return nil +} + +func copyK8sCertificates(masterNode string, allNodes []string) error { + utils.InfoPrintf("Copying K8s certificates from master to all nodes...\n") + kubeconfig, err := loaderUtils.ServerExec(masterNode, "cat ~/.kube/config") + if !utils.CheckErrorWithMsg(err, "Failed to get kubeconfig from master node %s: %v\n", masterNode, err) { + return err + } + + var wg sync.WaitGroup + errChan := make(chan error, len(allNodes)-1) + for _, node := range allNodes { + if node == masterNode { + continue + } + wg.Add(1) + go func(node string) { + defer wg.Done() + // Create .kube directory + mkdirCmd := "mkdir -p ~/.kube" + _, err := loaderUtils.ServerExec(node, mkdirCmd) + if !utils.CheckErrorWithMsg(err, "Failed to create .kube directory on node %s: %v\n", node, err) { + errChan <- err + return + } + + // Write kubeconfig to file + echoKubeconfigCmd := fmt.Sprintf("echo '%s' > ~/.kube/config", kubeconfig) + _, err = loaderUtils.ServerExec(node, echoKubeconfigCmd) + if !utils.CheckErrorWithMsg(err, "Failed to write kubeconfig to node %s: %v\n", node, err) { + errChan <- err + } + }(node) + } + wg.Wait() + close(errChan) + for err := range errChan { + return err + } + utils.InfoPrintf("Successfully copied K8s certificates.\n") + return nil +} + +func distributeLoaderSSHKey(loaderNode string, allNodes []string) error { + _, err := loaderUtils.ServerExec(loaderNode, `echo -e "\n\n\n" | ssh-keygen -t rsa -N ""`) + if !utils.CheckErrorWithMsg(err, "Failed to generate SSH key on loader node %s: %v\n", loaderNode, err) { + return err + } + + pubKey, err := loaderUtils.ServerExec(loaderNode, "cat ~/.ssh/id_rsa.pub") + if !utils.CheckErrorWithMsg(err, "Failed to get public key from loader node %s: %v\n", loaderNode, err) { + return err + } + + var wg sync.WaitGroup + errChan := make(chan error, len(allNodes)) + for _, node := range allNodes { + wg.Add(1) + go func(node string) { + defer wg.Done() + cmd := fmt.Sprintf("echo '%s' >> ~/.ssh/authorized_keys", strings.TrimSpace(pubKey)) + _, err := loaderUtils.ServerExec(node, cmd) + if !utils.CheckErrorWithMsg(err, "Failed to distribute SSH key to node %s: %v\n", node, err) { + errChan <- err + } + }(node) + } + wg.Wait() + close(errChan) + for err := range errChan { + return err + } + utils.InfoPrintf("Successfully distributed loader SSH key.\n") + return nil +} + +// setupVhiveFirecrackerDaemon sets up the vHive Firecracker daemon on a given node. +func setupVhiveFirecrackerDaemon(node string, clusterMode string) error { + // Build vHive + _, err := loaderUtils.ServerExec(node, "cd vhive; source /etc/profile && go build") + if !utils.CheckErrorWithMsg(err, "Failed to build vHive on node %s: %v\n", node, err) { + return err + } + + // Create tmux session for firecracker + _, err = loaderUtils.ServerExec(node, "tmux new -s firecracker -d") + if !utils.CheckErrorWithMsg(err, "Failed to create tmux session 'firecracker' on node %s: %v\n", node, err) { + return err + } + + // Start firecracker-containerd + _, err = loaderUtils.ServerExec(node, `tmux send -t firecracker "sudo PATH=$PATH /usr/local/bin/firecracker-containerd --config /etc/firecracker-containerd/config.toml 2>&1 | tee ~/firecracker_log.txt" ENTER`) + if !utils.CheckErrorWithMsg(err, "Failed to start firecracker-containerd on node %s: %v\n", node, err) { + return err + } + + // Create tmux session for vhive + _, err = loaderUtils.ServerExec(node, "tmux new -s vhive -d") + if !utils.CheckErrorWithMsg(err, "Failed to create tmux session 'vhive' on node %s: %v\n", node, err) { + return err + } + + // Change directory to vhive in tmux + _, err = loaderUtils.ServerExec(node, `tmux send -t vhive "cd vhive" ENTER`) + if !utils.CheckErrorWithMsg(err, "Failed to change directory to vhive in tmux on node %s: %v\n", node, err) { + return err + } + + // Determine firecracker snapshots argument based on cluster mode + firecrackerSnapshots := "" + if clusterMode == "firecracker_snapshots" { + firecrackerSnapshots = "-snapshots" + } + + // Run vhive + runVhiveCmd := fmt.Sprintf("sudo ./vhive %s 2>&1 | tee ~/vhive_log.txt", firecrackerSnapshots) + _, err = loaderUtils.ServerExec(node, fmt.Sprintf(`tmux send -t vhive "%s" ENTER`, runVhiveCmd)) + if !utils.CheckErrorWithMsg(err, "Failed to run vhive on node %s: %v\n", node, err) { + return err + } + + return nil +} diff --git a/scripts/setup/cluster/setup_prometheus.go b/scripts/setup/cluster/setup_prometheus.go new file mode 100644 index 000000000..e674eefcf --- /dev/null +++ b/scripts/setup/cluster/setup_prometheus.go @@ -0,0 +1,141 @@ +package cluster + +import ( + "fmt" + "time" + + "github.com/vhive-serverless/loader/scripts/setup/configs" + loaderUtils "github.com/vhive-serverless/loader/scripts/setup/utils" + "github.com/vhive-serverless/vHive/scripts/utils" +) + +// setupPrometheus sets up Prometheus components on the master node. +func setupPrometheus(masterNode string, allNode []string, promConfig *configs.PrometheusConfig) error { + // Install htop + utils.WaitPrintf("Installing htop on master node %s...\n", masterNode) + _, err := loaderUtils.ServerExec(masterNode, "sudo apt install htop") + if !utils.CheckErrorWithMsg(err, "Failed to install htop on master node %s: %v\n", masterNode, err) { + return err + } + + for _, node := range allNode { + utils.WaitPrintf("Set Paranoid level on node %s...\n", node) + _, err := loaderUtils.ServerExec(node, "sudo sysctl -w kernel.perf_event_paranoid=-1") + if !utils.CheckErrorWithMsg(err, "Failed to set Paranoid level on node %s: %v\n", node, err) { + return err + } + } + + // Deploy Metrics Server + utils.WaitPrintf("Deploying Metrics Server version %s on master node %s...\n", promConfig.MetricsServerVersion, masterNode) + _, err = loaderUtils.ServerExec(masterNode, fmt.Sprintf("kubectl apply -f https://github.com/kubernetes-sigs/metrics-server/releases/download/%s/components.yaml", promConfig.MetricsServerVersion)) + if !utils.CheckErrorWithMsg(err, "Failed to deploy Metrics Server on master node %s: %v\n", masterNode, err) { + return err + } + + // Patch Metrics Server deployment to allow insecure TLS + utils.WaitPrintf("Patching Metrics Server deployment for insecure TLS on master node %s...\n", masterNode) + _, err = loaderUtils.ServerExec(masterNode, `kubectl patch deployment metrics-server -n kube-system --type='json' -p='[{"op": "add", "path": "/spec/template/spec/containers/0/args/-", "value": "--kubelet-insecure-tls=true"}]'`) + if !utils.CheckErrorWithMsg(err, "Failed to patch Metrics Server deployment on master node %s: %v\n", masterNode, err) { + return err + } + + // Install Helm + utils.WaitPrintf("Installing Helm on master node %s...\n", masterNode) + _, err = loaderUtils.ServerExec(masterNode, "curl https://raw.githubusercontent.com/helm/helm/master/scripts/get-helm-3 | bash") + if !utils.CheckErrorWithMsg(err, "Failed to install Helm on master node %s: %v\n", masterNode, err) { + return err + } + + time.Sleep(1 * time.Second) + + // Add Prometheus Helm repository + utils.WaitPrintf("Adding Prometheus Helm repository on master node %s...\n", masterNode) + _, err = loaderUtils.ServerExec(masterNode, "helm repo add prometheus-community https://prometheus-community.github.io/helm-charts") + if !utils.CheckErrorWithMsg(err, "Failed to add Prometheus Helm repository on master node %s: %v\n", masterNode, err) { + return err + } + + // Update Helm repositories + utils.WaitPrintf("Updating Helm repositories on master node %s...\n", masterNode) + _, err = loaderUtils.ServerExec(masterNode, "helm repo update") + if !utils.CheckErrorWithMsg(err, "Failed to update Helm repositories on master node %s: %v\n", masterNode, err) { + return err + } + + // Create monitoring namespace + utils.WaitPrintf("Creating 'monitoring' namespace on master node %s...\n", masterNode) + _, err = loaderUtils.ServerExec(masterNode, "kubectl create namespace monitoring") + if !utils.CheckErrorWithMsg(err, "Failed to create 'monitoring' namespace on master node %s: %v\n", masterNode, err) { + return err + } + + // Install and start Prometheus stack using helm + utils.WaitPrintf("Installing Prometheus stack (version %s) on master node %s...\n", promConfig.PromChartVersion, masterNode) + _, err = loaderUtils.ServerExec(masterNode, fmt.Sprintf("helm install -n monitoring prometheus --version %s prometheus-community/kube-prometheus-stack -f %s/prom_values.yaml", promConfig.PromChartVersion, promConfig.PromValuePath)) + if !utils.CheckErrorWithMsg(err, "Failed to install Prometheus stack on master node %s: %v\n", masterNode, err) { + return err + } + + // Install Prometheus pushgateway using helm + utils.WaitPrintf("Installing Prometheus pushgateway (version %s) on master node %s...\n", promConfig.PushgatewayChartVersion, masterNode) + _, err = loaderUtils.ServerExec(masterNode, fmt.Sprintf("helm install -n monitoring prometheus-pushgateway --version %s prometheus-community/prometheus-pushgateway -f %s/prom_pushgateway.yaml", promConfig.PushgatewayChartVersion, promConfig.PromValuePath)) + if !utils.CheckErrorWithMsg(err, "Failed to install Prometheus pushgateway on master node %s: %v\n", masterNode, err) { + return err + } + + // Apply ServiceMonitors/PodMonitors for Knative metrics + utils.WaitPrintf("Applying Knative ServiceMonitors/PodMonitors on master node %s...\n", masterNode) + _, err = loaderUtils.ServerExec(masterNode, fmt.Sprintf("curl -sL %s/config/serving-monitors.yaml | sed 's/interval: 30s/interval: 2s/g' | kubectl apply -f -", promConfig.KnativePromURL)) + if !utils.CheckErrorWithMsg(err, "Failed to apply Knative ServiceMonitors/PodMonitors on master node %s: %v\n", masterNode, err) { + return err + } + + // Apply Grafana dashboards for Knative + utils.WaitPrintf("Applying Knative Grafana dashboards on master node %s...\n", masterNode) + _, err = loaderUtils.ServerExec(masterNode, fmt.Sprintf(`curl -sL %s/config/configmap-serving-dashboard.json | sed 's/"namespace": "knative-serving"/"namespace": "monitoring"/g' | kubectl apply -f -`, promConfig.KnativePromURL)) + if !utils.CheckErrorWithMsg(err, "Failed to apply Knative Grafana dashboards on master node %s: %v\n", masterNode, err) { + return err + } + + // Bind addresses of the control manager and scheduler to "0.0.0.0" + utils.WaitPrintf("Binding control manager and scheduler addresses on master node %s...\n", masterNode) + _, err = loaderUtils.ServerExec(masterNode, fmt.Sprintf("sudo kubeadm upgrade apply --config %s/kubeadm_init.yaml --ignore-preflight-errors all --force --v=7", promConfig.PromValuePath)) + if !utils.CheckErrorWithMsg(err, "Failed to bind control manager and scheduler addresses on master node %s: %v\n", masterNode, err) { + return err + } + + // Restart the kube-proxy + utils.WaitPrintf("Restarting kube-proxy on master node %s...\n", masterNode) + _, err = loaderUtils.ServerExec(masterNode, "kubectl delete pod -l k8s-app=kube-proxy -n kube-system") + if !utils.CheckErrorWithMsg(err, "Failed to restart kube-proxy on master node %s: %v\n", masterNode, err) { + return err + } + + time.Sleep(5 * time.Second) + + // Set up Prometheus port-forwarding in tmux + utils.WaitPrintf("Setting up Prometheus port-forwarding on master node %s...\n", masterNode) + _, err = loaderUtils.ServerExec(masterNode, "tmux new -s prometheusd -d") + if !utils.CheckErrorWithMsg(err, "Failed to create tmux session for Prometheus on master node %s: %v\n", masterNode, err) { + return err + } + _, err = loaderUtils.ServerExec(masterNode, `tmux send -t prometheusd "while true; do kubectl port-forward -n monitoring service/prometheus-kube-prometheus-prometheus 9090; done" ENTER`) + if !utils.CheckErrorWithMsg(err, "Failed to set up Prometheus port-forwarding in tmux on master node %s: %v\n", masterNode, err) { + return err + } + + // Set up Grafana dashboard port-forwarding in tmux (id: admin, pwd: prom-operator) + utils.WaitPrintf("Setting up Grafana dashboard port-forwarding on master node %s...\n", masterNode) + _, err = loaderUtils.ServerExec(masterNode, "tmux new -s grafanad -d") + if !utils.CheckErrorWithMsg(err, "Failed to create tmux session for Grafana on master node %s: %v\n", masterNode, err) { + return err + } + _, err = loaderUtils.ServerExec(masterNode, `tmux send -t grafanad "while true; do kubectl -n monitoring port-forward deployment/prometheus-grafana 3000; done" ENTER`) + if !utils.CheckErrorWithMsg(err, "Failed to set up Grafana dashboard port-forwarding in tmux on master node %s: %v\n", masterNode, err) { + return err + } + + utils.InfoPrintf("Done setting up Prometheus components.\n") + return nil +} diff --git a/scripts/setup/configs/config.go b/scripts/setup/configs/config.go new file mode 100644 index 000000000..637fbd46e --- /dev/null +++ b/scripts/setup/configs/config.go @@ -0,0 +1,168 @@ +package configs + +import ( + "encoding/json" + "os" + "path/filepath" + "strconv" + + "github.com/vhive-serverless/vHive/scripts/utils" +) + +type InvitroSetupConfig struct { + MasterNode string + LoaderNode string + WorkerNodes []string + AllNodes []string + + SetupCfg *SetupConfig + PromConfig *PrometheusConfig +} + +type NodeSetup struct { + NodeSetup struct { + MasterNode []string `json:"MASTER_NODE"` + LoaderNode []string `json:"LOADER_NODE"` + WorkerNode []string `json:"WORKER_NODE"` + } `json:"NODE_SETUP"` + NodeLabel map[string][]string `json:"NODE_LABEL"` + NodeURL []string `json:"NODE_URL"` +} + +type SetupConfig struct { + HiveRepo string `json:"VHIVE_REPO"` + HiveBranch string `json:"VHIVE_BRANCH"` + LoaderRepo string `json:"LOADER_REPO"` + LoaderBranch string `json:"LOADER_BRANCH"` + KhalaRepo string `json:"KHALA_REPO"` + KhalaBranch string `json:"KHALA_BRANCH"` + ClusterMode string `json:"CLUSTER_MODE"` + PodsPerNode int `json:"PODS_PER_NODE"` + DeployPrometheus bool `json:"DEPLOY_PROMETHEUS"` + DeployRDMA bool `json:"DEPLOY_RDMA"` +} + +type PrometheusConfig struct { + MetricsServerVersion string `json:"MetricsServerVersion"` + PromChartVersion string `json:"PromChartVersion"` + PushgatewayChartVersion string `json:"PushgatewayChartVersion"` + PromValuePath string `json:"PromValuePath"` + KnativePromURL string `json:"KnativePromURL"` +} + +func CommonConfigSetup(configDir string, configName string) (*InvitroSetupConfig, error) { + // Load Configurations + _, extNodeSetup, err := GetNodeSetup(configDir, configName) + if err != nil { + utils.FatalPrintf("Failed to get node setup config: %v\n", err) + return nil, err + } + + setupCfg, err := GetSetupJSON(configDir) + if err != nil { + utils.FatalPrintf("Failed to get setup config: %v\n", err) + return nil, err + } + + promConfig, err := GetPromConfig(configDir) + if err != nil { + utils.FatalPrintf("Failed to get Prometheus config: %v\n", err) + return nil, err + } + + masterNode := extNodeSetup.NodeSetup.MasterNode[0] + loaderNode := extNodeSetup.NodeSetup.LoaderNode[0] + workerNodes := extNodeSetup.NodeSetup.WorkerNode + allNodes := append([]string{masterNode}, workerNodes...) + + return &InvitroSetupConfig{ + MasterNode: masterNode, + LoaderNode: loaderNode, + WorkerNodes: workerNodes, + AllNodes: allNodes, + SetupCfg: setupCfg, + PromConfig: promConfig, + }, nil +} + +func GetNodeSetup(path string, configName string) (*NodeSetup, *NodeSetup, error) { + configPath := filepath.Join(path, configName) + configFile, err := os.ReadFile(configPath) + if err != nil { + return nil, nil, err + } + + var intNodeSetup NodeSetup + var extlNodeSetup NodeSetup + err = json.Unmarshal(configFile, &intNodeSetup) + if err != nil { + return nil, nil, err + } + + // Map internal IPs to real world URLs + ipToURL := mapNodeURLs(&intNodeSetup) + + extlNodeSetup.NodeSetup.MasterNode = swapIPs(intNodeSetup.NodeSetup.MasterNode, ipToURL) + extlNodeSetup.NodeSetup.LoaderNode = swapIPs(intNodeSetup.NodeSetup.LoaderNode, ipToURL) + extlNodeSetup.NodeSetup.WorkerNode = swapIPs(intNodeSetup.NodeSetup.WorkerNode, ipToURL) + + extlNodeSetup.NodeLabel = make(map[string][]string) + for k, v := range intNodeSetup.NodeLabel { + extlNodeSetup.NodeLabel[k] = swapIPs(v, ipToURL) + } + + return &intNodeSetup, &extlNodeSetup, nil +} + +func GetSetupJSON(path string) (*SetupConfig, error) { + configPath := filepath.Join(path, "setup.json") + configFile, err := os.ReadFile(configPath) + if err != nil { + return nil, err + } + + var setupConfig SetupConfig + err = json.Unmarshal(configFile, &setupConfig) + if err != nil { + return nil, err + } + + return &setupConfig, nil +} + +func GetPromConfig(path string) (*PrometheusConfig, error) { + configPath := filepath.Join(path, "prometheus/prom_config.json") + configFile, err := os.ReadFile(configPath) + if err != nil { + return nil, err + } + + var promConfig PrometheusConfig + err = json.Unmarshal(configFile, &promConfig) + if err != nil { + return nil, err + } + + return &promConfig, nil +} + +func mapNodeURLs(nodeSetup *NodeSetup) map[string]string { + mapping := make(map[string]string) + for i, url := range nodeSetup.NodeURL { + ip := "10.0.1." + strconv.Itoa(i+1) + mapping[ip] = url + } + return mapping +} + +func swapIPs(nodes []string, ipToURL map[string]string) []string { + swapped := make([]string, len(nodes)) + for i, ip := range nodes { + if url, ok := ipToURL[ip]; ok { + swapped[i] = url + } else { + swapped[i] = ip + } + } + return swapped +} diff --git a/scripts/setup/configs/node_setup.json b/scripts/setup/configs/node_setup.json new file mode 100644 index 000000000..534df101a --- /dev/null +++ b/scripts/setup/configs/node_setup.json @@ -0,0 +1,29 @@ +{ + "NODE_SETUP": { + "MASTER_NODE": [ + "10.0.1.1" + ], + "LOADER_NODE": [ + "10.0.1.2" + ], + "WORKER_NODE": [ + "10.0.1.3", + "10.0.1.4" + ] + }, + + "NODE_LABEL": { + "loader-nodetype=master": [ + "10.0.1.1" + ], + "loader-nodetype=monitoring": [ + "10.0.1.2" + ], + "loader-nodetype=worker": [ + "10.0.1.3", + "10.0.1.4" + ] + }, + + "NODE_URL": ["username@example.com", "username@example.com", "username@example.com", "username@example.com"] +} \ No newline at end of file diff --git a/scripts/setup/configs/prometheus/kubeadm_init.yaml b/scripts/setup/configs/prometheus/kubeadm_init.yaml new file mode 100644 index 000000000..7193a13b2 --- /dev/null +++ b/scripts/setup/configs/prometheus/kubeadm_init.yaml @@ -0,0 +1,46 @@ +apiVersion: kubeadm.k8s.io/v1beta3 +kind: InitConfiguration +localAPIEndpoint: + advertiseAddress: 10.0.1.1 + +--- +apiVersion: kubeadm.k8s.io/v1beta3 +kind: ClusterConfiguration +apiServer: + extraArgs: + authorization-mode: Node,RBAC + timeoutForControlPlane: 4m0s # The timeout that we wait for the API server to appear +certificatesDir: /etc/kubernetes/pki +clusterName: kubernetes +dns: {} +etcd: + local: + dataDir: /var/lib/etcd + extraArgs: + listen-metrics-urls: http://0.0.0.0:2381 +imageRepository: registry.k8s.io +kubernetesVersion: v1.29.1 +networking: + dnsDomain: cluster.local + podSubnet: 10.168.0.0/16 + serviceSubnet: 10.96.0.0/12 +controllerManager: + extraArgs: + bind-address: 0.0.0.0 # This allows scheduler and controllers to be scraped on other ports and nodes. +scheduler: + extraArgs: + bind-address: 0.0.0.0 + +--- +# Configure kubelet for large pod limits. +apiVersion: kubelet.config.k8s.io/v1beta1 +kind: KubeletConfiguration +kubeAPIQPS: 50 # Default: 5 +kubeAPIBurst: 100 # Default: 10 +configMapAndSecretChangeDetectionStrategy: "Watch" + + +--- +apiVersion: kubeproxy.config.k8s.io/v1alpha1 +kind: KubeProxyConfiguration +metricsBindAddress: 0.0.0.0:10249 \ No newline at end of file diff --git a/scripts/setup/configs/prometheus/prom_config.json b/scripts/setup/configs/prometheus/prom_config.json new file mode 100644 index 000000000..b8c8a7c9d --- /dev/null +++ b/scripts/setup/configs/prometheus/prom_config.json @@ -0,0 +1,7 @@ +{ + "MetricsServerVersion": "v0.7.2", + "PromChartVersion": "72.6.2", + "PushgatewayChartVersion": "3.4.1", + "PromValuePath": "~/loader/scripts/setup/configs/prometheus", + "KnativePromURL": "https://raw.githubusercontent.com/knative-extensions/monitoring/2427116e8e3cf0c65118c42a7f586490a1bb3ec2" +} \ No newline at end of file diff --git a/scripts/setup/configs/prometheus/prom_pushgateway.yaml b/scripts/setup/configs/prometheus/prom_pushgateway.yaml new file mode 100644 index 000000000..abc142db9 --- /dev/null +++ b/scripts/setup/configs/prometheus/prom_pushgateway.yaml @@ -0,0 +1,26 @@ +enabled: true +serviceMonitor: + enabled: true + additionalLabels: + release: prometheus + interval: "1s" +affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: loader-nodetype + operator: In + values: + - monitoring + - singlenode +ingress: + enabled: true + ingressClassName: "" + annotations: + kubernetes.io/ingress.class: "istio" + hosts: + - prom-pushgateway.monitoring.10.200.3.4.sslip.io + paths: + - / + pathType: Prefix \ No newline at end of file diff --git a/scripts/setup/configs/prometheus/prom_values.yaml b/scripts/setup/configs/prometheus/prom_values.yaml new file mode 100644 index 000000000..02ae4c78d --- /dev/null +++ b/scripts/setup/configs/prometheus/prom_values.yaml @@ -0,0 +1,198 @@ +alertmanager: + config: + route: + group_by: ['job'] + serviceMonitor: + interval: "15s" + alertmanagerSpec: + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: loader-nodetype + operator: In + values: + - monitoring + - singlenode + +grafana: + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: loader-nodetype + operator: In + values: + - monitoring + - singlenode + serviceMonitor: + interval: "15s" + scrapeTimeout: "15s" # Cannot be larger than Prometheus scrape intervals + +kubeApiServer: + serviceMonitor: + interval: "15s" + metricRelabelings: [] + +kubelet: + serviceMonitor: + interval: "15s" + cAdvisorMetricRelabelings: [] + +kubeControllerManager: + service: + targetPort: 10257 + serviceMonitor: + interval: "15s" + https: true + insecureSkipVerify: true + +coreDns: + serviceMonitor: + interval: "15s" + +kubeEtcd: + serviceMonitor: + interval: "15s" + +kubeScheduler: + service: + targetPort: 10259 + serviceMonitor: + interval: "15s" + https: true + insecureSkipVerify: true + +kubeProxy: + serviceMonitor: + interval: "15s" + +kube-state-metrics: + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: loader-nodetype + operator: In + values: + - monitoring + - singlenode + metricLabelsAllowlist: + - pods=[*] + - deployments=[app.kubernetes.io/name,app.kubernetes.io/component,app.kubernetes.io/instance] + prometheus: + monitor: + interval: "15s" + +prometheus-node-exporter: + service: + listenOnAllInterfaces: false + prometheus: + monitor: + interval: "15s" + # extraArgs: + # # enable perf collector + # - --collector.perf + # - --collector.perf.cpus=0-27 # adjust according to number of cores (xl170 has 10 cores, c6620 has 28 cores) + +prometheusOperator: + admissionWebhooks: + deployment: + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: loader-nodetype + operator: In + values: + - monitoring + - singlenode + patch: + resources: + limits: + cpu: 1 + memory: 5Gi + requests: + memory: 150Mi + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: loader-nodetype + operator: In + values: + - monitoring + - singlenode + + serviceMonitor: + interval: "15s" + resources: + limits: + cpu: 2 + memory: 5Gi + requests: + cpu: 100m + memory: 100Mi + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: loader-nodetype + operator: In + values: + - monitoring + - singlenode + + prometheusConfigReloader: + resources: + requests: + cpu: 200m + memory: 50Mi + limits: + cpu: 1 + memory: 2Gi + +prometheus: + serviceMonitor: + interval: "15s" + + prometheusSpec: + maximumStartupDurationSeconds: 60 + scrapeInterval: "15s" + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: loader-nodetype + operator: In + values: + - monitoring + - singlenode + serviceMonitorSelectorNilUsesHelmValues: false + podMonitorSelectorNilUsesHelmValues: false + walCompression: false + resources: + limits: + cpu: 3 + memory: 10Gi + requests: + memory: 150Mi + enableAdminAPI: true + +# additionalPrometheusRulesMap: +# perf-custom: +# groups: +# - name: "custom:perf_rules" +# rules: +# - record: "custom_perf_instructions_per_cycle:rate1m" +# expr: | +# sum by (cpu,instance) (rate(node_perf_instructions_total[1m])) / +# sum by (cpu,instance) (rate(node_perf_cpucycles_total[1m])) +# labels: +# unit: "instructions/cycle" \ No newline at end of file diff --git a/scripts/setup/configs/setup.json b/scripts/setup/configs/setup.json new file mode 100644 index 000000000..26360ef64 --- /dev/null +++ b/scripts/setup/configs/setup.json @@ -0,0 +1,11 @@ +{ + "VHIVE_REPO": "https://github.com/vhive-serverless/vhive", + "VHIVE_BRANCH": "v1.8.2", + "LOADER_REPO": "https://github.com/vhive-serverless/invitro", + "LOADER_BRANCH": "main", + "KHALA_REPO": "https://github.com/hyscale-lab/khala.git", + "KHALA_BRANCH": "jy/nexus-knative-impl-rdma", + "CLUSTER_MODE": "container", + "PODS_PER_NODE": 240, + "DEPLOY_PROMETHEUS": false +} \ No newline at end of file diff --git a/scripts/setup/go.mod b/scripts/setup/go.mod new file mode 100644 index 000000000..de15ddbae --- /dev/null +++ b/scripts/setup/go.mod @@ -0,0 +1,7 @@ +module github.com/vhive-serverless/loader/scripts/setup + +go 1.22.7 + +require github.com/vhive-serverless/vHive/scripts/configs v0.0.0-20231114022852-400a49d284cb // indirect + +require github.com/vhive-serverless/vHive/scripts/utils v0.0.0-20231114022852-400a49d284cb diff --git a/scripts/setup/go.sum b/scripts/setup/go.sum new file mode 100644 index 000000000..8a34ec0f9 --- /dev/null +++ b/scripts/setup/go.sum @@ -0,0 +1,4 @@ +github.com/vhive-serverless/vHive/scripts/configs v0.0.0-20231114022852-400a49d284cb h1:JoudbjIOvkLiDY7WjaQosP0IKipHnlxFOCIeICQrtRQ= +github.com/vhive-serverless/vHive/scripts/configs v0.0.0-20231114022852-400a49d284cb/go.mod h1:nJSon4Eng7PdZ4HJX9dnZ7H4qxVm/r5zseFPfom7Jto= +github.com/vhive-serverless/vHive/scripts/utils v0.0.0-20231114022852-400a49d284cb h1:SmFLKJc4wAhr61ige8hoLiLWDKAI2uewBQUNejI63Hs= +github.com/vhive-serverless/vHive/scripts/utils v0.0.0-20231114022852-400a49d284cb/go.mod h1:xyjKlPn6JqSQtzKOCu8L4DW4rQcNmhxK9f97cOPo0Sg= diff --git a/scripts/setup/setup.go b/scripts/setup/setup.go new file mode 100644 index 000000000..e47326b60 --- /dev/null +++ b/scripts/setup/setup.go @@ -0,0 +1,59 @@ +package main + +import ( + "flag" + "fmt" + "os" + "path/filepath" + + "github.com/vhive-serverless/loader/scripts/setup/cluster" + loaderUtils "github.com/vhive-serverless/loader/scripts/setup/utils" + "github.com/vhive-serverless/vHive/scripts/utils" +) + +var ( + Setup = flag.String("setup-type", "create_multinode_cluster", "Type of setup to perform") + configName = flag.String("config", "node_setup.json", "Configuration file name") + configDir = flag.String("config-dir", "", "Path to setup config directory") +) + +func main() { + flag.Parse() + + availableCmds := []string{ + "create_multinode_cluster", + } + + dir, err := loaderUtils.ResolveConfigDir(*configDir, *configName) + if err != nil { + utils.FatalPrintf("%v\n", err) + os.Exit(1) + } + + switch *Setup { + case "create_multinode_cluster": + selectedConfigName := *configName + if flag.NArg() > 0 { + selectedConfigName, err = loaderUtils.CreateTempNodeSetup(dir, flag.Args()) + if err != nil { + utils.FatalPrintf("Failed to create temporary node setup config: %v\n", err) + os.Exit(1) + } + defer os.Remove(filepath.Join(dir, selectedConfigName)) + } + + err := cluster.CreateMultiNodeSetup(dir, selectedConfigName) + if err != nil { + utils.FatalPrintf("Failed to create multinode cluster: %v\n", err) + os.Exit(1) + } + // Call the function to create a multinode cluster + + default: + utils.FatalPrintf("Invalid subcommand --> %s! Available subcommands list: \n", *Setup) + for _, subCmd := range availableCmds { + fmt.Printf("%s\n", subCmd) + } + os.Exit(1) + } +} diff --git a/scripts/setup/utils/label.go b/scripts/setup/utils/label.go new file mode 100644 index 000000000..1f57a8eb2 --- /dev/null +++ b/scripts/setup/utils/label.go @@ -0,0 +1,44 @@ +package utils + +import ( + "fmt" + "strings" + + "github.com/vhive-serverless/loader/scripts/setup/configs" + "github.com/vhive-serverless/vHive/scripts/utils" +) + +// LabelNodes applies labels to Kubernetes nodes based on the configuration file using kubectl commands. +// masterNode is the IP address of the master node where kubectl commands will be executed. +// configDir specifies the directory where `node_setup.json` is located. +func LabelNodes(masterNode, configDir, configName string) error { + _, extNodeSetup, err := configs.GetNodeSetup(configDir, configName) + if !utils.CheckErrorWithMsg(err, "Failed to get node setup config from %s: %v", configDir, err) { + return err + } + + for label, nodeIPs := range extNodeSetup.NodeLabel { + for _, nodeIP := range nodeIPs { + // Get the hostname of the node from its IP + nodeName, err := ServerExec(nodeIP, "hostname") + if !utils.CheckErrorWithMsg(err, "Failed to get hostname for node IP %s: %v", nodeIP, err) { + // Continue labeling other nodes even if one fails + utils.FatalPrintf("Error getting hostname for node IP %s: %v\n", nodeIP, err) + continue + } + nodeName = strings.TrimSpace(nodeName) + + // Apply the label using kubectl on the master node + labelCmd := fmt.Sprintf("kubectl label nodes %s %s --overwrite", nodeName, label) + _, err = ServerExec(masterNode, labelCmd) + if !utils.CheckErrorWithMsg(err, "Failed to label node %s with %s on master node %s: %v", nodeName, label, masterNode, err) { + // Continue labeling other nodes even if one fails + utils.FatalPrintf("Error labeling node %s with %s: %v\n", nodeName, label, err) + } else { + utils.InfoPrintf("Successfully labeled node %s with %s\n", nodeName, label) + } + } + } + + return nil +} diff --git a/scripts/setup/utils/utils.go b/scripts/setup/utils/utils.go new file mode 100644 index 000000000..b60f503f0 --- /dev/null +++ b/scripts/setup/utils/utils.go @@ -0,0 +1,132 @@ +package utils + +import ( + "bufio" + "encoding/json" + "fmt" + "os" + "os/exec" + "path/filepath" + "strings" +) + +func ReadSetupCfg(path string) (map[string]string, error) { + file, err := os.Open(path) + if err != nil { + return nil, err + } + defer file.Close() + + config := make(map[string]string) + scanner := bufio.NewScanner(file) + for scanner.Scan() { + line := scanner.Text() + if strings.Contains(line, "=") { + parts := strings.SplitN(line, "=", 2) + key := strings.TrimSpace(parts[0]) + value := strings.Trim(strings.TrimSpace(parts[1]), "'") + config[key] = value + } + } + + if err := scanner.Err(); err != nil { + return nil, err + } + + return config, nil +} + +func ServerExec(node, command string) (string, error) { + cmd := exec.Command("ssh", "-oStrictHostKeyChecking=no", "-p", "22", node, command) + output, err := cmd.CombinedOutput() + if err != nil { + return "", fmt.Errorf("failed to execute command on node %s: %v, output: %s", node, err, string(output)) + } + return string(output), nil +} + +func ResolveConfigDir(input string, configName string) (string, error) { + if input != "" { + if isValidConfigDir(input) { + return input, nil + } + return "", fmt.Errorf("config directory %q does not contain setup.json and prometheus/prom_config.json", input) + } + + candidates := []string{ + "configs", + "scripts/setup/configs", + } + + for _, dir := range candidates { + if isValidConfigDir(dir) { + return dir, nil + } + } + + return "", fmt.Errorf("could not locate config directory; pass -config-dir") +} + +type tempNodeSetup struct { + NodeSetup struct { + MasterNode []string `json:"MASTER_NODE"` + LoaderNode []string `json:"LOADER_NODE"` + WorkerNode []string `json:"WORKER_NODE"` + } `json:"NODE_SETUP"` + NodeLabel map[string][]string `json:"NODE_LABEL"` + NodeURL []string `json:"NODE_URL"` +} + +func CreateTempNodeSetup(configDir string, nodes []string) (string, error) { + if len(nodes) < 2 { + return "", fmt.Errorf("expected at least ") + } + + const tempConfigName = "node_setup_temp.json" + + setup := tempNodeSetup{ + NodeLabel: make(map[string][]string), + NodeURL: append([]string(nil), nodes...), + } + + setup.NodeSetup.MasterNode = []string{"10.0.1.1"} + setup.NodeSetup.LoaderNode = []string{"10.0.1.2"} + + workerNodes := make([]string, 0, len(nodes)-1) + workerLabelNodes := make([]string, 0, len(nodes)-2) + + for i := 1; i < len(nodes); i++ { + internalIP := fmt.Sprintf("10.0.1.%d", i+1) + workerNodes = append(workerNodes, internalIP) + if i > 1 { + workerLabelNodes = append(workerLabelNodes, internalIP) + } + } + + setup.NodeSetup.WorkerNode = workerNodes + setup.NodeLabel["loader-nodetype=master"] = []string{"10.0.1.1"} + setup.NodeLabel["loader-nodetype=monitoring"] = []string{"10.0.1.2"} + setup.NodeLabel["loader-nodetype=worker"] = workerLabelNodes + + configData, err := json.MarshalIndent(setup, "", " ") + if err != nil { + return "", err + } + + configPath := filepath.Join(configDir, tempConfigName) + if err := os.WriteFile(configPath, append(configData, '\n'), 0o644); err != nil { + return "", err + } + + return tempConfigName, nil +} + +func isValidConfigDir(dir string) bool { + if _, err := os.Stat(filepath.Join(dir, "setup.json")); err != nil { + return false + } + if _, err := os.Stat(filepath.Join(dir, "prometheus", "prom_config.json")); err != nil { + return false + } + return true +} diff --git a/scripts/setup/utils/utils_test.go b/scripts/setup/utils/utils_test.go new file mode 100644 index 000000000..0a4b74b77 --- /dev/null +++ b/scripts/setup/utils/utils_test.go @@ -0,0 +1,79 @@ +package utils + +import ( + "os" + "path/filepath" + "reflect" + "testing" + + "github.com/vhive-serverless/loader/scripts/setup/configs" +) + +func TestCreateTempNodeSetupPreservesNodeSetupJSONContract(t *testing.T) { + t.Parallel() + + configDir := t.TempDir() + nodes := []string{ + "master@example.com", + "loader@example.com", + "worker1@example.com", + "worker2@example.com", + } + + configName, err := CreateTempNodeSetup(configDir, nodes) + if err != nil { + t.Fatalf("CreateTempNodeSetup returned error: %v", err) + } + + if configName != "node_setup_temp.json" { + t.Fatalf("CreateTempNodeSetup returned unexpected file name: %s", configName) + } + + intNodeSetup, extNodeSetup, err := configs.GetNodeSetup(configDir, configName) + if err != nil { + t.Fatalf("GetNodeSetup returned error for generated temp config: %v", err) + } + + if _, err := os.Stat(filepath.Join(configDir, configName)); err != nil { + t.Fatalf("generated config file is missing: %v", err) + } + + expectedInternalWorkers := []string{"10.0.1.2", "10.0.1.3", "10.0.1.4"} + if !reflect.DeepEqual(intNodeSetup.NodeSetup.MasterNode, []string{"10.0.1.1"}) { + t.Fatalf("unexpected internal master node: %#v", intNodeSetup.NodeSetup.MasterNode) + } + if !reflect.DeepEqual(intNodeSetup.NodeSetup.LoaderNode, []string{"10.0.1.2"}) { + t.Fatalf("unexpected internal loader node: %#v", intNodeSetup.NodeSetup.LoaderNode) + } + if !reflect.DeepEqual(intNodeSetup.NodeSetup.WorkerNode, expectedInternalWorkers) { + t.Fatalf("unexpected internal worker nodes: %#v", intNodeSetup.NodeSetup.WorkerNode) + } + + if !reflect.DeepEqual(extNodeSetup.NodeSetup.MasterNode, []string{"master@example.com"}) { + t.Fatalf("unexpected external master node: %#v", extNodeSetup.NodeSetup.MasterNode) + } + if !reflect.DeepEqual(extNodeSetup.NodeSetup.LoaderNode, []string{"loader@example.com"}) { + t.Fatalf("unexpected external loader node: %#v", extNodeSetup.NodeSetup.LoaderNode) + } + if !reflect.DeepEqual(extNodeSetup.NodeSetup.WorkerNode, []string{"loader@example.com", "worker1@example.com", "worker2@example.com"}) { + t.Fatalf("unexpected external worker nodes: %#v", extNodeSetup.NodeSetup.WorkerNode) + } + + expectedLabels := map[string][]string{ + "loader-nodetype=master": {"master@example.com"}, + "loader-nodetype=monitoring": {"loader@example.com"}, + "loader-nodetype=worker": {"worker1@example.com", "worker2@example.com"}, + } + if !reflect.DeepEqual(extNodeSetup.NodeLabel, expectedLabels) { + t.Fatalf("unexpected external node labels: %#v", extNodeSetup.NodeLabel) + } +} + +func TestCreateTempNodeSetupRequiresMasterAndLoader(t *testing.T) { + t.Parallel() + + _, err := CreateTempNodeSetup(t.TempDir(), []string{"master@example.com"}) + if err == nil { + t.Fatal("CreateTempNodeSetup returned nil error for incomplete node list") + } +} From afe14e39af2cf572d3a5f999881d50391c253dae Mon Sep 17 00:00:00 2001 From: JooYoung Park Date: Thu, 23 Apr 2026 21:29:16 +0800 Subject: [PATCH 2/2] fix control plane update for prometheus, remove unused yaml files Signed-off-by: JooYoung Park --- config/kubeadm_init.yaml | 46 --- config/prometh_values_kn.yaml | 182 ----------- go.work.sum | 16 - scripts/konk-ci/03-kind-metrics.sh | 4 +- scripts/setup/cluster/create_multinode.sh | 287 ------------------ scripts/setup/cluster/setup_prometheus.go | 24 +- scripts/setup/configs/config.go | 3 - scripts/setup/configs/node_setup.json | 1 + .../configs/prometheus/kubeadm_init.yaml | 31 +- scripts/setup/configs/setup.json | 2 - scripts/setup/expose_infra_metrics.sh | 7 +- scripts/setup/go.mod | 2 +- 12 files changed, 46 insertions(+), 559 deletions(-) delete mode 100644 config/kubeadm_init.yaml delete mode 100644 config/prometh_values_kn.yaml delete mode 100755 scripts/setup/cluster/create_multinode.sh diff --git a/config/kubeadm_init.yaml b/config/kubeadm_init.yaml deleted file mode 100644 index 31334b009..000000000 --- a/config/kubeadm_init.yaml +++ /dev/null @@ -1,46 +0,0 @@ -apiVersion: kubeadm.k8s.io/v1beta3 -kind: InitConfiguration -localAPIEndpoint: - advertiseAddress: 10.0.1.1 - ---- -apiVersion: kubeadm.k8s.io/v1beta3 -kind: ClusterConfiguration -apiServer: - extraArgs: - authorization-mode: Node,RBAC - timeoutForControlPlane: 4m0s # The timeout that we wait for the API server to appear -certificatesDir: /etc/kubernetes/pki -clusterName: kubernetes -dns: {} -etcd: - local: - dataDir: /var/lib/etcd - extraArgs: - listen-metrics-urls: http://0.0.0.0:2381 -imageRepository: registry.k8s.io -kubernetesVersion: v1.34.3 -networking: - dnsDomain: cluster.local - podSubnet: 10.168.0.0/16 - serviceSubnet: 10.96.0.0/12 -controllerManager: - extraArgs: - bind-address: 0.0.0.0 # This allows scheduler and controllers to be scraped on other ports and nodes. -scheduler: - extraArgs: - bind-address: 0.0.0.0 - ---- -# Configure kubelet for large pod limits. -apiVersion: kubelet.config.k8s.io/v1beta1 -kind: KubeletConfiguration -kubeAPIQPS: 50 # Default: 5 -kubeAPIBurst: 100 # Default: 10 -configMapAndSecretChangeDetectionStrategy: "Watch" - - ---- -apiVersion: kubeproxy.config.k8s.io/v1alpha1 -kind: KubeProxyConfiguration -metricsBindAddress: 0.0.0.0:10249 \ No newline at end of file diff --git a/config/prometh_values_kn.yaml b/config/prometh_values_kn.yaml deleted file mode 100644 index a50618b12..000000000 --- a/config/prometh_values_kn.yaml +++ /dev/null @@ -1,182 +0,0 @@ -alertmanager: - config: - route: - group_by: ['job'] - serviceMonitor: - interval: "15s" - alertmanagerSpec: - affinity: - nodeAffinity: - requiredDuringSchedulingIgnoredDuringExecution: - nodeSelectorTerms: - - matchExpressions: - - key: loader-nodetype - operator: In - values: - - monitoring - - singlenode - -grafana: - affinity: - nodeAffinity: - requiredDuringSchedulingIgnoredDuringExecution: - nodeSelectorTerms: - - matchExpressions: - - key: loader-nodetype - operator: In - values: - - monitoring - - singlenode - serviceMonitor: - interval: "15s" - scrapeTimeout: "15s" # Cannot be larger than Prometheus scrape intervals - -kubeApiServer: - serviceMonitor: - interval: "15s" - metricRelabelings: [] - -kubelet: - serviceMonitor: - interval: "15s" - cAdvisorMetricRelabelings: [] - -kubeControllerManager: - service: - targetPort: 10257 - serviceMonitor: - interval: "15s" - https: true - insecureSkipVerify: true - -coreDns: - serviceMonitor: - interval: "15s" - -kubeEtcd: - serviceMonitor: - interval: "15s" - -kubeScheduler: - service: - targetPort: 10259 - serviceMonitor: - interval: "15s" - https: true - insecureSkipVerify: true - -kubeProxy: - serviceMonitor: - interval: "15s" - -kube-state-metrics: - affinity: - nodeAffinity: - requiredDuringSchedulingIgnoredDuringExecution: - nodeSelectorTerms: - - matchExpressions: - - key: loader-nodetype - operator: In - values: - - monitoring - - singlenode - metricLabelsAllowlist: - - pods=[*] - - deployments=[app.kubernetes.io/name,app.kubernetes.io/component,app.kubernetes.io/instance] - prometheus: - monitor: - interval: "15s" - -prometheus-node-exporter: - service: - listenOnAllInterfaces: false - prometheus: - monitor: - interval: "15s" - -prometheusOperator: - admissionWebhooks: - deployment: - affinity: - nodeAffinity: - requiredDuringSchedulingIgnoredDuringExecution: - nodeSelectorTerms: - - matchExpressions: - - key: loader-nodetype - operator: In - values: - - monitoring - - singlenode - patch: - resources: - limits: - cpu: 1 - memory: 5Gi - requests: - memory: 150Mi - nodeAffinity: - requiredDuringSchedulingIgnoredDuringExecution: - nodeSelectorTerms: - - matchExpressions: - - key: loader-nodetype - operator: In - values: - - monitoring - - singlenode - - serviceMonitor: - interval: "15s" - resources: - limits: - cpu: 2 - memory: 5Gi - requests: - cpu: 100m - memory: 100Mi - affinity: - nodeAffinity: - requiredDuringSchedulingIgnoredDuringExecution: - nodeSelectorTerms: - - matchExpressions: - - key: loader-nodetype - operator: In - values: - - monitoring - - singlenode - - prometheusConfigReloader: - resources: - requests: - cpu: 200m - memory: 50Mi - limits: - cpu: 1 - memory: 2Gi - -prometheus: - serviceMonitor: - interval: "15s" - - prometheusSpec: - maximumStartupDurationSeconds: 60 - scrapeInterval: "15s" - affinity: - nodeAffinity: - requiredDuringSchedulingIgnoredDuringExecution: - nodeSelectorTerms: - - matchExpressions: - - key: loader-nodetype - operator: In - values: - - monitoring - - singlenode - serviceMonitorSelectorNilUsesHelmValues: false - podMonitorSelectorNilUsesHelmValues: false - walCompression: false - resources: - limits: - cpu: 3 - memory: 10Gi - requests: - memory: 150Mi - enableAdminAPI: true diff --git a/go.work.sum b/go.work.sum index bf3d7145b..8956b7a2d 100644 --- a/go.work.sum +++ b/go.work.sum @@ -6,22 +6,16 @@ github.com/IBM/sarama v1.43.1/go.mod h1:GG5q1RURtDNPz8xxJs3mgX6Ytak8Z9eLhAkJPObe github.com/boombuler/barcode v1.0.1/go.mod h1:paBWMcWSl3LHKBqUq+rly7CNSldXjb2rDl3JlRe0mD8= github.com/campoy/embedmd v1.0.0/go.mod h1:oxyr9RCiSXg0M3VJ3ks0UGfp98BpSSGr0kpiX3MzVl8= github.com/cncf/xds/go v0.0.0-20251210132809-ee656c7534f5/go.mod h1:KdCmV+x/BuvyMxRnYBlmVaq4OLiKW6iRQfvC62cvdkI= -github.com/containerd/containerd v1.5.2/go.mod h1:0DOxVqwDy2iZvrZp2JUx/E+hS0UNTVn7dJnIOwtYR4g= -github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= github.com/eapache/go-resiliency v1.6.0/go.mod h1:5yPzW0MIvSe0JDsv0v+DvcjEv2FyD6iZYSs1ZI+iQho= github.com/eapache/go-xerial-snappy v0.0.0-20230731223053-c322873962e3/go.mod h1:YvSRo5mw33fLEx1+DlK6L2VV43tJt5Eyel9n9XBcR+0= github.com/eapache/queue v1.1.0/go.mod h1:6eCeP0CKFpHLu8blIFXhExK/dRa7WDZfr6jVFPTqq+I= -github.com/ease-lab/vhive/utils/tracing/go v0.0.0-20211206145412-3a4607297428/go.mod h1:7a477TAJimC1AkzEynida5yXncx5l2FwL1YqEaxLNI8= github.com/envoyproxy/go-control-plane v0.14.0/go.mod h1:NcS5X47pLl/hfqxU70yPwL9ZMkUlwlKxtAohpi2wBEU= github.com/envoyproxy/go-control-plane/envoy v1.36.0/go.mod h1:ty89S1YCCVruQAm9OtKeEkQLTb+Lkz0k8v9W0Oxsv98= github.com/envoyproxy/go-control-plane/ratelimit v0.1.0/go.mod h1:Wk+tMFAFbCXaJPzVVHnPgRKdUdwW/KdbRt94AzgRee4= github.com/envoyproxy/protoc-gen-validate v1.3.0/go.mod h1:HvYl7zwPa5mffgyeTUHA9zHIH36nmrm7oCbo4YKoSWA= -github.com/go-fonts/liberation v0.3.1/go.mod h1:jdJ+cqF+F4SUL2V+qxBth8fvBpBDS7yloUL5Fi8GTGY= github.com/go-fonts/liberation v0.3.3/go.mod h1:eUAzNRuJnpSnd1sm2EyloQfSOT79pdw7X7++Ri+3MCU= github.com/go-jose/go-jose/v4 v4.1.3/go.mod h1:x4oUasVrzR7071A4TnHLGSPpNOm2a21K9Kf04k1rs08= -github.com/go-latex/latex v0.0.0-20230307184459-12ec69307ad9/go.mod h1:gWuR/CrFDDeVRFQwHPvsv9soJVB/iqymhuZQuJ3a9OM= github.com/go-latex/latex v0.0.0-20240709081214-31cef3c7570e/go.mod h1:J4SAGzkcl+28QWi7yz72tyC/4aGnppOvya+AEv4TaAQ= -github.com/go-pdf/fpdf v0.8.0/go.mod h1:gfqhcNwXrsd3XYKte9a7vM3smvU/jB4ZRDrmWSxpfdc= github.com/go-pdf/fpdf v0.9.0/go.mod h1:oO8N111TkmKb9D7VvWGLvLJlaZUQVPM+6V42pp3iV4Y= github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572/go.mod h1:9Pwr4B2jHnOSGXyyzV8ROjYa2ojvAY6HCGYYfMoC3Ls= github.com/goccmack/gocc v1.0.2/go.mod h1:LXX2tFVUggS/Zgx/ICPOr3MLyusuM7EcbfkPvNsjdO8= @@ -42,34 +36,24 @@ github.com/onsi/ginkgo/v2 v2.11.0/go.mod h1:ZhrRA5XmEE3x3rhlzamx/JJvujdZoJ2uvgI7 github.com/onsi/gomega v1.27.10/go.mod h1:RsS8tutOdbdgzbPtzzATp12yT7kM5I5aElG3evPbQ0M= github.com/phpdave11/gofpdi v1.0.13/go.mod h1:vBmVV0Do6hSBHC8uKUQ71JGW+ZGQq74llk/7bXwjDoI= github.com/pierrec/lz4/v4 v4.1.21/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4= -github.com/pkg/diff v0.0.0-20210226163009-20ebb0f2a09e/go.mod h1:pJLUxLENpZxwdsKMEsNbx1VGcRFpLqf3715MtcvvzbA= github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/planetscale/vtprotobuf v0.6.1-0.20240319094008-0393e58bdf10/go.mod h1:t/avpk3KcrXxUnYOhZhMXJlSEyie6gQbtLq5NM3loB8= github.com/rabbitmq/amqp091-go v1.9.0/go.mod h1:+jPrT9iY2eLjRaMSRHUhc3z14E/l85kv/f+6luSD3pc= github.com/rcrowley/go-metrics v0.0.0-20201227073835-cf1acfcdf475/go.mod h1:bCqnVzQkZxMG4s8nGwiZ5l3QUCyqpo9Y+/ZMZ9VjZe4= -github.com/rogpeppe/go-internal v1.9.0/go.mod h1:WtVeX8xhTBvf0smdhujwtBcq4Qrzq/fJaraNFVN+nFs= github.com/ruudk/golang-pdf417 v0.0.0-20201230142125-a7e3863a1245/go.mod h1:pQAZKsJ8yyVxGRWYNEm9oFB8ieLgKFnamEyDmSA0BRk= github.com/spiffe/go-spiffe/v2 v2.6.0/go.mod h1:gm2SeUoMZEtpnzPNs2Csc0D/gX33k1xIx7lEzqblHEs= github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA= -github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= -github.com/vhive-serverless/loader v0.0.0-20260415061204-0512e9eaa2a3/go.mod h1:yET1r9lcWPzTJl8W4RJ7xK+5G4iQXx+aSUWjBArdT5E= github.com/xdg-go/pbkdf2 v1.0.0/go.mod h1:jrpuAogTd400dnrH08LKmI/xc1MbPOebTwRqcT5RDeI= github.com/xdg-go/scram v1.1.2/go.mod h1:RT/sEzTbU5y00aCK8UOx6R7YryM0iF1N2MOmC3kKLN4= github.com/xdg-go/stringprep v1.0.4/go.mod h1:mPGuuIYwz7CmR2bT9j4GbQqutWS1zV24gijq1dTyGkM= github.com/youmark/pkcs8 v0.0.0-20240726163527-a2c0da244d78/go.mod h1:aL8wCCfTfSfmXjznFBSZNN13rSJjlIOI1fUNAtF7rmI= -go.opentelemetry.io/contrib v0.20.0 h1:ubFQUn0VCZ0gPwIoJfBJVpeBlyRMxu8Mm/huKWYd9p0= -go.opentelemetry.io/contrib v0.20.0/go.mod h1:G/EtFaa6qaN7+LxqfIAT3GiZa7Wv5DTBUzl5H4LY0Kc= go.opentelemetry.io/contrib/detectors/gcp v1.39.0/go.mod h1:t/OGqzHBa5v6RHZwrDBJ2OirWc+4q/w2fTbLZwAKjTk= -go.opentelemetry.io/otel/exporters/trace/zipkin v0.20.0/go.mod h1:QnYEWBA4wTy/15vvmj7Poeklp6xndAMcdejvzZNUtvM= golang.org/x/mod v0.35.0/go.mod h1:+GwiRhIInF8wPm+4AoT6L0FA1QWAad3OMdTRx4tFYlU= golang.org/x/oauth2 v0.34.0/go.mod h1:lzm5WQJQwKZ3nwavOZ3IS5Aulzxi68dUSgRHujetwEA= golang.org/x/sync v0.20.0/go.mod h1:9xrNwdLfx4jkKbNva9FpL6vEN7evnE43NNNJQ2LF3+0= golang.org/x/sys v0.13.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.30.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/tools v0.44.0/go.mod h1:KA0AfVErSdxRZIsOVipbv3rQhVXTnlU6UhKxHd1seDI= golang.org/x/tools/go/expect v0.1.1-deprecated/go.mod h1:eihoPOH+FgIqa3FpoTwguz/bVUSGBlGQU67vpBeOrBY= golang.org/x/tools/go/packages/packagestest v0.1.1-deprecated/go.mod h1:RVAQXBGNv1ib0J382/DPCRS/BPnsGebyM1Gj5VSDpG8= gonum.org/v1/tools v0.0.0-20200318103217-c168b003ce8c/go.mod h1:fy6Otjqbk477ELp8IXTpw1cObQtLbRCBVonY+bTTfcM= -google.golang.org/genproto v0.0.0-20230410155749-daa745c078e1 h1:KpwkzHKEF7B9Zxg18WzOa7djJ+Ha5DzthMyZYQfEn2A= -google.golang.org/genproto v0.0.0-20230410155749-daa745c078e1/go.mod h1:nKE/iIaLqn2bQwXBg8f1g2Ylh6r5MN5CmZvuzZCgsCU= google.golang.org/genproto/googleapis/api v0.0.0-20260120221211-b8f7ae30c516/go.mod h1:p3MLuOwURrGBRoEyFHBT3GjUwaCQVKeNqqWxlcISGdw= diff --git a/scripts/konk-ci/03-kind-metrics.sh b/scripts/konk-ci/03-kind-metrics.sh index 17ac1238b..e365f0376 100644 --- a/scripts/konk-ci/03-kind-metrics.sh +++ b/scripts/konk-ci/03-kind-metrics.sh @@ -33,7 +33,7 @@ yq eval ' .prometheus.prometheusSpec.resources.limits.memory = "512Mi" | .prometheus.prometheusSpec.resources.requests.cpu = "100m" | .prometheus.prometheusSpec.resources.requests.memory = "256Mi" -' -i "./config/prometh_values_kn.yaml" +' -i "./scripts/setup/configs/prometheus/prom_values.yaml" # Install prometheus stack release_label="prometheus" @@ -42,7 +42,7 @@ prometheus_chart_version="60.1.0" helm install \ -n monitoring $release_label \ --version $prometheus_chart_version prometheus-community/kube-prometheus-stack \ - -f ./config/prometh_values_kn.yaml + -f ./scripts/setup/configs/prometheus/prom_values.yaml # Configure kubectl config for non-root user docker exec knative-control-plane sh -c "mkdir -p /home/$(whoami)/.kube" diff --git a/scripts/setup/cluster/create_multinode.sh b/scripts/setup/cluster/create_multinode.sh deleted file mode 100755 index 513fb9331..000000000 --- a/scripts/setup/cluster/create_multinode.sh +++ /dev/null @@ -1,287 +0,0 @@ -#!/usr/bin/env bash - -# -# MIT License -# -# Copyright (c) 2023 EASL and the vHive community -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in all -# copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. -# - -MASTER_NODE=$1 -DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" > /dev/null 2>&1 && pwd)" - -source "$DIR/setup.cfg" - -if [ "$CLUSTER_MODE" = "container" ] -then - OPERATION_MODE="stock-only" - FIRECRACKER_SNAPSHOTS="" -elif [ $CLUSTER_MODE = "firecracker" ] -then - OPERATION_MODE="firecracker" - FIRECRACKER_SNAPSHOTS="" -elif [ $CLUSTER_MODE = "firecracker_snapshots" ] -then - OPERATION_MODE="firecracker" - FIRECRACKER_SNAPSHOTS="-snapshots" -else - echo "Unsupported cluster mode" - exit 1 -fi - -if [ $PODS_PER_NODE -gt 1022 ]; then - # CIDR range limitation exceeded - echo "Pods per node cannot be greater than 1022. Cluster deployment has been aborted." - exit 1 -fi - -server_exec() { - ssh -oStrictHostKeyChecking=no -p 22 "$1" "$2"; -} - -common_init() { - internal_init() { - server_exec $1 "git clone --branch=$VHIVE_BRANCH $VHIVE_REPO" - - server_exec $1 "pushd ~/vhive/scripts > /dev/null && ./install_go.sh && source /etc/profile && go build -o setup_tool && ./setup_tool setup_node ${OPERATION_MODE} && popd > /dev/null" - - server_exec $1 'tmux new -s containerd -d' - server_exec $1 'tmux send -t containerd "sudo containerd 2>&1 | tee ~/containerd_log.txt" ENTER' - # install precise NTP clock synchronizer - server_exec $1 'sudo apt-get update && sudo apt-get install -y chrony htop sysstat' - # synchronize clock across nodes - server_exec $1 "echo \"server ops.emulab.net iburst prefer\" > >(sudo tee -a /etc/chrony/chrony.conf >/dev/null)" - server_exec $1 'sudo systemctl restart chronyd' - # dump clock info - server_exec $1 'sudo chronyc tracking' - - clone_loader $1 - server_exec $1 '~/loader/scripts/setup/stabilize.sh' - } - - for node in "$@" - do - internal_init "$node" & - done - - wait -} - -function setup_master() { - echo "Setting up master node: $MASTER_NODE" - - server_exec "$MASTER_NODE" 'tmux new -s runner -d' - server_exec "$MASTER_NODE" 'tmux new -s kwatch -d' - server_exec "$MASTER_NODE" 'tmux new -s master -d' - - server_exec $MASTER_NODE '~/loader/scripts/setup/rewrite_yaml_files.sh' - - MN_CLUSTER="pushd ~/vhive/scripts > /dev/null && ./setup_tool create_multinode_cluster ${OPERATION_MODE} && popd > /dev/null" - server_exec "$MASTER_NODE" "tmux send -t master \"$MN_CLUSTER\" ENTER" - - # Get the join token from k8s. - while ! server_exec "$MASTER_NODE" "[ -e ~/vhive/scripts/masterKey.yaml ]"; do - sleep 1 - done - - LOGIN_TOKEN=$(server_exec "$MASTER_NODE" \ - 'awk '\''/^ApiserverAdvertiseAddress:/ {ip=$2} \ - /^ApiserverPort:/ {port=$2} \ - /^ApiserverToken:/ {token=$2} \ - /^ApiserverTokenHash:/ {token_hash=$2} \ - END {print "sudo kubeadm join " ip ":" port " --token " token " --discovery-token-ca-cert-hash " token_hash}'\'' ~/vhive/scripts/masterKey.yaml') -} - -function setup_vhive_firecracker_daemon() { - node=$1 - - server_exec $node 'cd vhive; source /etc/profile && go build' - server_exec $node 'tmux new -s firecracker -d' - server_exec $node 'tmux send -t firecracker "sudo PATH=$PATH /usr/local/bin/firecracker-containerd --config /etc/firecracker-containerd/config.toml 2>&1 | tee ~/firecracker_log.txt" ENTER' - server_exec $node 'tmux new -s vhive -d' - server_exec $node 'tmux send -t vhive "cd vhive" ENTER' - RUN_VHIVE_CMD="sudo ./vhive ${FIRECRACKER_SNAPSHOTS} 2>&1 | tee ~/vhive_log.txt" - server_exec $node "tmux send -t vhive \"$RUN_VHIVE_CMD\" ENTER" -} - -function setup_workers() { - internal_setup() { - node=$1 - - echo "Setting up worker node: $node" - - server_exec $node "pushd ~/vhive/scripts > /dev/null && ./setup_tool setup_worker_kubelet ${OPERATION_MODE} && popd > /dev/null" - - if [ "$OPERATION_MODE" = "firecracker" ]; then - setup_vhive_firecracker_daemon $node - fi - - server_exec $node "sudo ${LOGIN_TOKEN}" - echo "Worker node $node has joined the cluster." - - # Stretch the capacity of the worker node to 240 (k8s default: 110) - # Empirically, this gives us a max. #pods being 240-40=200 - echo "Stretching node capacity for $node." - server_exec $node "echo \"maxPods: ${PODS_PER_NODE}\" > >(sudo tee -a /var/lib/kubelet/config.yaml >/dev/null)" - server_exec $node "echo \"containerLogMaxSize: 512Mi\" > >(sudo tee -a /var/lib/kubelet/config.yaml >/dev/null)" - server_exec $node 'sudo systemctl restart kubelet' - server_exec $node 'sleep 10' - - # Rejoin has to be performed although errors will be thrown. Otherwise, restarting the kubelet will cause the node unreachable for some reason - server_exec $node "sudo ${LOGIN_TOKEN} > /dev/null 2>&1" - echo "Worker node $node joined the cluster (again :P)." - } - - for node in "$@" - do - internal_setup "$node" & - done - - wait -} - -function extend_CIDR() { - #* Get node name list. - readarray -t NODE_NAMES < <(server_exec $MASTER_NODE 'kubectl get no' | tail -n +2 | awk '{print $1}') - - if [ ${#NODE_NAMES[@]} -gt 63 ]; then - echo "Cannot extend CIDR range for more than 63 nodes. Cluster deployment has been aborted." - exit 1 - fi - - for i in "${!NODE_NAMES[@]}"; do - NODE_NAME=${NODE_NAMES[i]} - #* Compute subnet: 00001010.10101000.000000 00.00000000 -> about 1022 IPs per worker. - #* To be safe, we change both master and workers with an offset of 0.0.4.0 (4 * 2^8) - # (NB: zsh indices start from 1.) - #* Assume less than 63 nodes in total. - let SUBNET=i*4+4 - #* Extend pod ip range, delete and create again. - server_exec $MASTER_NODE "kubectl get node $NODE_NAME -o json | jq '.spec.podCIDR |= \"10.168.$SUBNET.0/22\"' > node.yaml" - server_exec $MASTER_NODE "kubectl delete node $NODE_NAME && kubectl create -f node.yaml" - - echo "Changed pod CIDR for worker $NODE_NAME to 10.168.$SUBNET.0/22" - sleep 5 - done - - #* Join the cluster for the 3rd time. - for node in "$@" - do - server_exec $node "sudo ${LOGIN_TOKEN} > /dev/null 2>&1" - echo "Worker node $node joined the cluster (again^2 :D)." - done -} - -function clone_loader() { - server_exec $1 "git clone --depth=1 --branch=$LOADER_BRANCH $LOADER_REPO loader" - server_exec $1 'echo -en "\n\n" | sudo apt-get install -y python3-pip' - # server_exec $1 'cd; cd loader; pip install -r config/requirements.txt --break-system-packages' -} - -function copy_k8s_certificates() { - function internal_copy() { - server_exec $1 "mkdir -p ~/.kube" - rsync ./kubeconfig $1:~/.kube/config - } - - echo $MASTER_NODE - rsync $MASTER_NODE:~/.kube/config ./kubeconfig - - for node in "$@" - do - internal_copy "$node" & - done - - wait - - rm ./kubeconfig -} - -# Distributes loader public sshkey to all other nodes -function distribute_loader_ssh_key() { - LOADER_NODE=$1 - ALL_NODES="$MASTER_NODE $@" - - # Generate SSH key pair for loader node - server_exec $LOADER_NODE 'echo -e "\n\n\n" | ssh-keygen -t rsa > /dev/null' - server_exec $LOADER_NODE 'chmod 600 ~/.ssh/id_rsa' - server_exec $LOADER_NODE 'eval "$(ssh-agent -s)" && ssh-add' - # Copy public key into all nodes authorized_keys - rsync $LOADER_NODE:~/.ssh/id_rsa.pub ./loader_sshpub - for node in $ALL_NODES; do - cat ./loader_sshpub | ssh $node 'cat >> ~/.ssh/authorized_keys' & - done - - wait - # clean up - rm ./loader_sshpub -} - -############################################### -######## MAIN SETUP PROCEDURE IS BELOW ######## -############################################### - -{ - # Set up all nodes including the master - common_init "$@" - - shift # make argument list only contain worker nodes (drops master node) - - setup_master - setup_workers "$@" - - if [ $PODS_PER_NODE -gt 240 ]; then - extend_CIDR "$@" - fi - - # Untaint master to schedule knative control plane there - server_exec $MASTER_NODE "kubectl taint nodes \$(hostname) node-role.kubernetes.io/control-plane-" - - # Notify the master that all nodes have joined the cluster - server_exec $MASTER_NODE 'tmux send -t master "y" ENTER' - - namespace_info=$(server_exec $MASTER_NODE "kubectl get namespaces") - while [[ ${namespace_info} != *'knative-serving'* ]]; do - sleep 60 - namespace_info=$(server_exec $MASTER_NODE "kubectl get namespaces") - done - - echo "Master node $MASTER_NODE finalised." - - # Copy API server certificates from master to each worker node - copy_k8s_certificates "$@" - - # Create and distribute loader RSA key to other nodes - distribute_loader_ssh_key "$@" - - server_exec $MASTER_NODE 'cd loader; bash scripts/setup/patch_init_scale.sh' - - source $DIR/label.sh - - # Force placement of metrics collectors and instrumentation on the loader node and control plane on master - label_nodes $MASTER_NODE $1 # loader node is second on the list, becoming first after arg shift - - server_exec $MASTER_NODE "kubectl patch configmap -n knative-serving config-features -p '{\"data\": {\"kubernetes.podspec-affinity\": \"enabled\"}}'" - - - if [[ "$DEPLOY_PROMETHEUS" == true ]]; then - $DIR/expose_infra_metrics.sh $MASTER_NODE - fi -} diff --git a/scripts/setup/cluster/setup_prometheus.go b/scripts/setup/cluster/setup_prometheus.go index e674eefcf..fd8e92015 100644 --- a/scripts/setup/cluster/setup_prometheus.go +++ b/scripts/setup/cluster/setup_prometheus.go @@ -98,10 +98,26 @@ func setupPrometheus(masterNode string, allNode []string, promConfig *configs.Pr return err } - // Bind addresses of the control manager and scheduler to "0.0.0.0" - utils.WaitPrintf("Binding control manager and scheduler addresses on master node %s...\n", masterNode) - _, err = loaderUtils.ServerExec(masterNode, fmt.Sprintf("sudo kubeadm upgrade apply --config %s/kubeadm_init.yaml --ignore-preflight-errors all --force --v=7", promConfig.PromValuePath)) - if !utils.CheckErrorWithMsg(err, "Failed to bind control manager and scheduler addresses on master node %s: %v\n", masterNode, err) { + // Re-render the controller manager manifest so Prometheus can scrape it on + // all interfaces using the existing kubeadm init configuration file. + utils.WaitPrintf("Binding controller manager address on master node %s...\n", masterNode) + _, err = loaderUtils.ServerExec(masterNode, fmt.Sprintf("sudo kubeadm init phase control-plane controller-manager --config %s/kubeadm_init.yaml --v=7", promConfig.PromValuePath)) + if !utils.CheckErrorWithMsg(err, "Failed to bind controller manager address on master node %s: %v\n", masterNode, err) { + return err + } + + // Re-render the scheduler manifest using the same single kubeadm config file. + utils.WaitPrintf("Binding scheduler address on master node %s...\n", masterNode) + _, err = loaderUtils.ServerExec(masterNode, fmt.Sprintf("sudo kubeadm init phase control-plane scheduler --config %s/kubeadm_init.yaml --v=7", promConfig.PromValuePath)) + if !utils.CheckErrorWithMsg(err, "Failed to bind scheduler address on master node %s: %v\n", masterNode, err) { + return err + } + + // kube-proxy is configured via the kube-system/kube-proxy ConfigMap, so + // re-apply it from the init config before restarting the DaemonSet pods. + utils.WaitPrintf("Updating kube-proxy metrics bind address on master node %s...\n", masterNode) + _, err = loaderUtils.ServerExec(masterNode, fmt.Sprintf("sudo kubeadm init phase addon kube-proxy --config %s/kubeadm_init.yaml --v=7", promConfig.PromValuePath)) + if !utils.CheckErrorWithMsg(err, "Failed to update kube-proxy configuration on master node %s: %v\n", masterNode, err) { return err } diff --git a/scripts/setup/configs/config.go b/scripts/setup/configs/config.go index 637fbd46e..612b0c42b 100644 --- a/scripts/setup/configs/config.go +++ b/scripts/setup/configs/config.go @@ -34,12 +34,9 @@ type SetupConfig struct { HiveBranch string `json:"VHIVE_BRANCH"` LoaderRepo string `json:"LOADER_REPO"` LoaderBranch string `json:"LOADER_BRANCH"` - KhalaRepo string `json:"KHALA_REPO"` - KhalaBranch string `json:"KHALA_BRANCH"` ClusterMode string `json:"CLUSTER_MODE"` PodsPerNode int `json:"PODS_PER_NODE"` DeployPrometheus bool `json:"DEPLOY_PROMETHEUS"` - DeployRDMA bool `json:"DEPLOY_RDMA"` } type PrometheusConfig struct { diff --git a/scripts/setup/configs/node_setup.json b/scripts/setup/configs/node_setup.json index 534df101a..ab777aa14 100644 --- a/scripts/setup/configs/node_setup.json +++ b/scripts/setup/configs/node_setup.json @@ -7,6 +7,7 @@ "10.0.1.2" ], "WORKER_NODE": [ + "10.0.1.2", "10.0.1.3", "10.0.1.4" ] diff --git a/scripts/setup/configs/prometheus/kubeadm_init.yaml b/scripts/setup/configs/prometheus/kubeadm_init.yaml index 7193a13b2..d7e7aff6e 100644 --- a/scripts/setup/configs/prometheus/kubeadm_init.yaml +++ b/scripts/setup/configs/prometheus/kubeadm_init.yaml @@ -1,15 +1,17 @@ -apiVersion: kubeadm.k8s.io/v1beta3 +apiVersion: kubeadm.k8s.io/v1beta4 kind: InitConfiguration localAPIEndpoint: advertiseAddress: 10.0.1.1 +timeouts: + controlPlaneComponentHealthCheck: 4m0s --- -apiVersion: kubeadm.k8s.io/v1beta3 +apiVersion: kubeadm.k8s.io/v1beta4 kind: ClusterConfiguration apiServer: extraArgs: - authorization-mode: Node,RBAC - timeoutForControlPlane: 4m0s # The timeout that we wait for the API server to appear + - name: authorization-mode + value: Node,RBAC certificatesDir: /etc/kubernetes/pki clusterName: kubernetes dns: {} @@ -17,30 +19,31 @@ etcd: local: dataDir: /var/lib/etcd extraArgs: - listen-metrics-urls: http://0.0.0.0:2381 + - name: listen-metrics-urls + value: http://0.0.0.0:2381 imageRepository: registry.k8s.io -kubernetesVersion: v1.29.1 +kubernetesVersion: v1.34.3 networking: dnsDomain: cluster.local podSubnet: 10.168.0.0/16 serviceSubnet: 10.96.0.0/12 controllerManager: extraArgs: - bind-address: 0.0.0.0 # This allows scheduler and controllers to be scraped on other ports and nodes. + - name: bind-address + value: 0.0.0.0 scheduler: extraArgs: - bind-address: 0.0.0.0 + - name: bind-address + value: 0.0.0.0 --- -# Configure kubelet for large pod limits. apiVersion: kubelet.config.k8s.io/v1beta1 kind: KubeletConfiguration -kubeAPIQPS: 50 # Default: 5 -kubeAPIBurst: 100 # Default: 10 -configMapAndSecretChangeDetectionStrategy: "Watch" - +kubeAPIQPS: 50 +kubeAPIBurst: 100 +configMapAndSecretChangeDetectionStrategy: Watch --- apiVersion: kubeproxy.config.k8s.io/v1alpha1 kind: KubeProxyConfiguration -metricsBindAddress: 0.0.0.0:10249 \ No newline at end of file +metricsBindAddress: 0.0.0.0:10249 diff --git a/scripts/setup/configs/setup.json b/scripts/setup/configs/setup.json index 26360ef64..ab6e5b7b9 100644 --- a/scripts/setup/configs/setup.json +++ b/scripts/setup/configs/setup.json @@ -3,8 +3,6 @@ "VHIVE_BRANCH": "v1.8.2", "LOADER_REPO": "https://github.com/vhive-serverless/invitro", "LOADER_BRANCH": "main", - "KHALA_REPO": "https://github.com/hyscale-lab/khala.git", - "KHALA_BRANCH": "jy/nexus-knative-impl-rdma", "CLUSTER_MODE": "container", "PODS_PER_NODE": 240, "DEPLOY_PROMETHEUS": false diff --git a/scripts/setup/expose_infra_metrics.sh b/scripts/setup/expose_infra_metrics.sh index d6dec0034..c5222f6ea 100755 --- a/scripts/setup/expose_infra_metrics.sh +++ b/scripts/setup/expose_infra_metrics.sh @@ -47,7 +47,7 @@ server_exec() { server_exec 'kubectl create namespace monitoring' release_label="prometheus" prometheus_chart_version="72.6.2" - server_exec "cd loader; helm install -n monitoring $release_label --version $prometheus_chart_version prometheus-community/kube-prometheus-stack -f config/prometh_values_kn.yaml" + server_exec "cd loader; helm install -n monitoring $release_label --version $prometheus_chart_version prometheus-community/kube-prometheus-stack -f ~/loader/scripts/setup/configs/prometheus/prom_values.yaml" #* Apply the ServiceMonitors/PodMonitors to collect metrics from Knative. #* The ports of the control manager and scheduler are mapped in a way that prometheus default installation can find them. @@ -57,7 +57,10 @@ server_exec() { server_exec "curl -sL $commit_version/config/configmap-serving-dashboard.json | sed 's/"namespace": "knative-serving"/"namespace": "monitoring"/g' | kubectl apply -f -" #* Bind addresses of the control manager and scheduler to "0.0.0.0" so that prometheus can scrape them from any domains. - server_exec 'cd loader; sudo kubeadm upgrade apply --config config/kubeadm_init.yaml --ignore-preflight-errors all --yes --v=7' + server_exec 'sudo kubeadm init phase control-plane controller-manager --config ~/loader/scripts/setup/configs/prometheus/kubeadm_init.yaml --v=7' + server_exec 'sudo kubeadm init phase control-plane scheduler --config ~/loader/scripts/setup/configs/prometheus/kubeadm_init.yaml --v=7' + #* Re-apply kube-proxy configuration so Prometheus can scrape kube-proxy metrics. + server_exec 'sudo kubeadm init phase addon kube-proxy --config ~/loader/scripts/setup/configs/prometheus/kubeadm_init.yaml --v=7' #* Restart the kube-proxy to apply the changes. server_exec 'kubectl delete pod -l k8s-app=kube-proxy -n kube-system' diff --git a/scripts/setup/go.mod b/scripts/setup/go.mod index de15ddbae..9e3e26382 100644 --- a/scripts/setup/go.mod +++ b/scripts/setup/go.mod @@ -1,6 +1,6 @@ module github.com/vhive-serverless/loader/scripts/setup -go 1.22.7 +go 1.26.2 require github.com/vhive-serverless/vHive/scripts/configs v0.0.0-20231114022852-400a49d284cb // indirect