diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md index 9f1314e3c..01cfb8cc6 100644 --- a/.github/pull_request_template.md +++ b/.github/pull_request_template.md @@ -25,6 +25,7 @@ Fixes ${ISSUE_URL} Add one or more labels to trigger offline builds: - `build-default` - Full production build (ansible, terraform, all packages) - `build-demo` - Demo/WIAB build +- `build-wiab-staging` - WIAB-staging build - `build-min` - Minimal build (fastest, essential charts only) - `build-all` - Run all three builds diff --git a/.github/workflows/deploy-wiab.yml b/.github/workflows/deploy-wiab.yml deleted file mode 100644 index ad4b0a17f..000000000 --- a/.github/workflows/deploy-wiab.yml +++ /dev/null @@ -1,39 +0,0 @@ -# This playbook is not-up-to-date, requires to be updated to match with current developments -# A new WIAB (wire in a box) dev solution has been created https://docs.wire.com/latest/how-to/install/demo-wiab.html and can be used until this (wiab-staging) gets updated -name: Deploy on Hetzner WIAB setup -on: - workflow_run: - workflows: ["Prepare custom offline package"] - types: - - completed - -jobs: - deploy: - runs-on: ubuntu-latest - concurrency: - group: autodeploy-script - cancel-in-progress: false - - steps: - # Step 1: Checkout the repository code - - name: Checkout code - uses: actions/checkout@v3 - - # Step 2: Set up SSH key for remote access - - name: Set up SSH key - uses: webfactory/ssh-agent@v0.5.3 - with: - ssh-private-key: ${{ secrets.WIAB_PRIVATE_SSH_KEY }} - - # Step 3: Get the latest commit SHA, for the artifact - - name: Get latest commit SHA - id: get_commit_sha - run: | - COMMIT_SHA=$(git rev-parse HEAD) - echo "commit_sha=$COMMIT_SHA" >> $GITHUB_ENV - - # Step 4: Run the autodeploy script - - name: Run Auto Deploy Script - run: | - cd bin - ./autodeploy.sh --artifact-hash ${{ env.COMMIT_SHA }} --target-domain wiab-test-box.wire.link --force-redeploy diff --git a/.github/workflows/offline.yml b/.github/workflows/offline.yml index aa947a33b..32beb5cf4 100644 --- a/.github/workflows/offline.yml +++ b/.github/workflows/offline.yml @@ -9,6 +9,7 @@ # - No label: No builds run (must add label to trigger builds) # - 'build-default': Builds only default profile # - 'build-demo': Builds only demo profile +# - 'build-wiab-staging' - Builds only wiab-staging profile # - 'build-min': Builds only min profile # - 'build-all': Explicitly builds all profiles (useful for workflow changes) # @@ -16,14 +17,14 @@ # on: push: - branches: [master, develop] + branches: ["**"] tags: [v*] paths-ignore: - "*.md" - "**/*.md" pull_request: types: [synchronize, reopened, labeled] - branches: [master, develop] + branches: ["**"] paths-ignore: - "*.md" - "**/*.md" @@ -32,9 +33,9 @@ jobs: build-default: name: Build default profile if: | - github.event_name == 'push' || contains(github.event.pull_request.labels.*.name, 'build-all') || - contains(github.event.pull_request.labels.*.name, 'build-default') + contains(github.event.pull_request.labels.*.name, 'build-default') || + contains(github.event.pull_request.labels.*.name, 'build-wiab-staging') runs-on: group: wire-server-deploy outputs: @@ -72,6 +73,27 @@ jobs: AWS_SECRET_ACCESS_KEY: '${{ secrets.AWS_SECRET_ACCESS_KEY }}' AWS_REGION: "eu-west-1" + verify-default: + name: Verify default profile + needs: build-default + if: | + contains(github.event.pull_request.labels.*.name, 'build-all') || + contains(github.event.pull_request.labels.*.name, 'build-default') + runs-on: + group: wire-server-deploy + steps: + - uses: actions/checkout@v2 + with: + submodules: true + - uses: cachix/install-nix-action@v27 + - uses: cachix/cachix-action@v15 + with: + name: wire-server + signingKey: "${{ secrets.CACHIX_SIGNING_KEY }}" + + - name: Install nix environment + run: nix-env -f default.nix -iA env + - name: Install terraform uses: hashicorp/setup-terraform@v3 with: @@ -89,6 +111,45 @@ jobs: env: HCLOUD_TOKEN: '${{ secrets.HCLOUD_TOKEN }}' + # verify wiab-staging profile + verify-wiab-staging: + name: Verify wiab staging profile + needs: build-default + if: | + contains(github.event.pull_request.labels.*.name, 'build-all') || + contains(github.event.pull_request.labels.*.name, 'build-wiab-staging') + runs-on: + group: wire-server-deploy + steps: + - uses: actions/checkout@v2 + with: + submodules: true + - uses: cachix/install-nix-action@v27 + - uses: cachix/cachix-action@v15 + with: + name: wire-server + signingKey: "${{ secrets.CACHIX_SIGNING_KEY }}" + + - name: Install nix environment + run: nix-env -f default.nix -iA env + + - name: Install terraform + uses: hashicorp/setup-terraform@v3 + with: + terraform_version: "^1.3.7" + terraform_wrapper: false + + - name: Deploy offline wiab-staging environment to hetzner + run: ./offline/cd_staging.sh + env: + HCLOUD_TOKEN: '${{ secrets.HCLOUD_TOKEN }}' + + - name: Clean up hetzner wiab-staging environment; just in case + if: always() + run: (cd terraform/examples/wiab-staging-hetzner ; terraform init && terraform destroy -auto-approve) + env: + HCLOUD_TOKEN: '${{ secrets.HCLOUD_TOKEN }}' + # Build container in parallel build-container: name: Build container @@ -118,7 +179,6 @@ jobs: build-demo: name: Build demo profile if: | - github.event_name == 'push' || contains(github.event.pull_request.labels.*.name, 'build-all') || contains(github.event.pull_request.labels.*.name, 'build-demo') runs-on: @@ -179,7 +239,6 @@ jobs: build-min: name: Build min profile if: | - github.event_name == 'push' || contains(github.event.pull_request.labels.*.name, 'build-all') || contains(github.event.pull_request.labels.*.name, 'build-min') runs-on: diff --git a/ansible/inventory/offline/99-static b/ansible/inventory/offline/99-static index 42098caa0..f6e469574 100644 --- a/ansible/inventory/offline/99-static +++ b/ansible/inventory/offline/99-static @@ -83,7 +83,19 @@ [postgresql:vars] postgresql_network_interface = enp1s0 - +repmgr_node_config: + postgresql1: # Maps to postgresql_rw group + node_id: 1 + priority: 150 + role: primary + postgresql2: # Maps to first postgresql_ro + node_id: 2 + priority: 100 + role: standby + postgresql3: # Maps to second postgresql_ro + node_id: 3 + priority: 50 + role: standby [elasticsearch:vars] # elasticsearch_network_interface = enp1s0 diff --git a/ansible/inventory/offline/group_vars/postgresql/postgresql.yml b/ansible/inventory/offline/group_vars/postgresql/postgresql.yml index decd66b19..351b99539 100644 --- a/ansible/inventory/offline/group_vars/postgresql/postgresql.yml +++ b/ansible/inventory/offline/group_vars/postgresql/postgresql.yml @@ -24,19 +24,8 @@ repmgr_namespace: "{{ wire_namespace | default('default') }}" wire_pg_secret_name: "wire-postgresql-external-secret" # Node configuration for repmgr -repmgr_node_config: - postgresql1: # Maps to postgresql_rw group - node_id: 1 - priority: 150 - role: primary - postgresql2: # Maps to first postgresql_ro - node_id: 2 - priority: 100 - role: standby - postgresql3: # Maps to second postgresql_ro - node_id: 3 - priority: 50 - role: standby +# NOTE: repmgr_node_config is defined in the inventory file ansible/inventory/offline/99-static, ansible/inventory/offline/staging.yml and terraform/examples/wire-server-deploy-offline-hetzner/outputs.tf +# to allow environment-specific node mappings. Do not define here. # repmgr settings # repmgrd monitoring and reconnection configuration diff --git a/ansible/inventory/offline/staging.yml b/ansible/inventory/offline/staging.yml index f5c8fd8f7..4c4bf4dbf 100644 --- a/ansible/inventory/offline/staging.yml +++ b/ansible/inventory/offline/staging.yml @@ -38,8 +38,6 @@ k8s-cluster: kube-master: {} datanodes: - # host names here must match each node's actual hostname - # its a requirement for rabbitmq hosts: datanode1: ansible_host: "datanode1_ip" @@ -76,6 +74,8 @@ minio: rmq-cluster: children: + # host names here must match each node's actual hostname + # its a requirement for rabbitmq datanodes: {} vars: rabbitmq_network_interface: enp1s0 @@ -88,6 +88,19 @@ postgresql: vars: wire_dbname: wire-server postgresql_network_interface: enp1s0 + repmgr_node_config: + datanode1: # Maps to postgresql_rw group + node_id: 1 + priority: 150 + role: primary + datanode2: # Maps to first postgresql_ro + node_id: 2 + priority: 100 + role: standby + datanode3: # Maps to second postgresql_ro + node_id: 3 + priority: 50 + role: standby postgresql_rw: hosts: diff --git a/ansible/wiab-staging-provision.yml b/ansible/wiab-staging-provision.yml index d72b8622f..39c24027d 100644 --- a/ansible/wiab-staging-provision.yml +++ b/ansible/wiab-staging-provision.yml @@ -258,6 +258,10 @@ yq -i ".rmq-cluster.vars.rabbitmq_network_interface |= \"$GATEWAY_NAME\"" "{{ inventory_file }}" yq -i ".rmq-cluster.vars.rabbitmq_cluster_master |= \"datanode1\"" "{{ inventory_file }}" yq -i ".postgresql.vars.postgresql_network_interface |= \"$GATEWAY_NAME\"" "{{ inventory_file }}" + + # setting up ansible_ssh_private_key_file to access VMs for secondary ansible operations + yq -i ".all.vars.ansible_ssh_private_key_file |= \"ssh/id_ed25519\"" "{{ inventory_file }}" + args: executable: /bin/bash diff --git a/bin/autodeploy.sh b/bin/autodeploy.sh deleted file mode 100755 index d7506cf3b..000000000 --- a/bin/autodeploy.sh +++ /dev/null @@ -1,450 +0,0 @@ -#!/usr/bin/env bash -# This script is not-up-to-date, requires to be updated to match with current developments -# A new WIAB (wire in a box) dev solution has been created https://docs.wire.com/latest/how-to/install/demo-wiab.html and can be used until this (wiab-staging) gets updated - -# shellcheck disable=SC2087 - -# This script can be replaced with a simpler solution of wiab-demo installtion -# https://docs.wire.com/latest/how-to/install/demo-wiab.html - -set -Eeuo pipefail - -msg() { - echo >&2 -e "${1-}" -} - -trap cleanup SIGINT SIGTERM ERR EXIT - -usage() { - cat </dev/null 2>&1 ; then - msg "INFO: DNS A record exists: $SUBDOMAIN.$TARGET_SYSTEM" - else - die "ERROR: DNS A record for $SUBDOMAIN.$TARGET_SYSTEM does not exist. Exiting. Please check DNS record set." - fi -done - -if ssh -q -o StrictHostKeyChecking=no -o ConnectTimeout=5 -p "$SSH_PORT" "$SSH_USER"@webapp."$TARGET_SYSTEM" id | grep -q "$SSH_USER"; then - msg "" - msg "INFO: Successfully logged into $TARGET_SYSTEM as $SSH_USER" -else - die "ERROR: Can't log into $TARGET_SYSTEM via SSH, please check SSH connectivity." -fi - - -if curl --head --silent --fail https://s3-eu-west-1.amazonaws.com/public.wire.com/artifacts/wire-server-deploy-static-"$ARTIFACT_HASH".tgz >/dev/null 2>&1 ; then - msg "INFO: Artifact exists https://s3-eu-west-1.amazonaws.com/public.wire.com/artifacts/wire-server-deploy-static-$ARTIFACT_HASH.tgz" -else - die "ERROR: No artifact found via https://s3-eu-west-1.amazonaws.com/public.wire.com/artifacts/wire-server-deploy-static-$ARTIFACT_HASH.tgz" -fi - -system_cleanup_meta() { - msg "" - msg "INFO: Cleaning up all VMs, docker resources and wire-server-deploy files on $TARGET_SYSTEM." - msg "" - sleep 5 - ssh -p "$SSH_PORT" -o StrictHostKeyChecking=no "$SSH_USER"@webapp."$TARGET_SYSTEM" "bash -s" < /dev/null; then - for VM in $(virsh list --all --name); do virsh destroy "$VM"; virsh undefine "$VM" --remove-all-storage; done - fi - if which docker > /dev/null; then - docker system prune -a -f - fi - rm -f /home/$DEMO_USER/.ssh/known_hosts - rm -rf /home/$DEMO_USER/wire-server-deploy - rm -f /home/$DEMO_USER/wire-server-deploy-static-*.tgz -} - -preprovision_hetzner() { - msg "" - msg "INFO: running local ansible playbook for inital server deployment." - msg "INFO: This will setup up the Hetzner system with basic defaults, download and unpack the wire-server-deploy artifact." - sleep 5 - # on Mac devices C.UTF-8 is not available - if [[ $(uname) == "Darwin" ]]; then - export LC_ALL=en_US.UTF-8 - else - export LC_ALL=C.UTF-8 - fi - ansible-playbook ../ansible/hetzner-single-deploy.yml -e "artifact_hash=$ARTIFACT_HASH" -e "ansible_ssh_common_args='-o ServerAliveInterval=30 -o ServerAliveCountMax=10 -o ControlMaster=auto -o ControlPersist=180m'" -i $SSH_USER@webapp."$TARGET_SYSTEM", --diff -} - -remote_deployment() { - msg() { - echo >&2 -e "${1-}" - } - cd $SCRIPT_DIR &>/dev/null || exit 1 - - bash bin/offline-vm-setup.sh - msg "" - while sudo virsh list --all | grep -Fq running; do - sleep 20 - msg "INFO: VM deployment still in progress ..." - done - sleep 20 - msg "" - msg "INFO: VM deployment done. Starting all VMs:" - msg "" - for VM in $(sudo virsh list --all --name); do sudo virsh start "$VM"; done - sleep 60 - - msg "" - msg "INFO: Setting up offline environment (this will take a while)." - msg "" - # Rather than sourcing wire-server-deploy/bin/offline-env.sh, we invoke - # the relevant commands below, declaring "d" as a function instead of an alias. - ZAUTH_CONTAINER=$(sudo docker load -i "$SCRIPT_DIR"/containers-adminhost/quay.io_wire_zauth_*.tar | awk '{print $3}') - export ZAUTH_CONTAINER - WSD_CONTAINER=$(sudo docker load -i "$SCRIPT_DIR"/containers-adminhost/container-wire-server-deploy.tgz | awk '{print $3}') - d() { - sudo docker run --network=host -v "${SSH_AUTH_SOCK:-nonexistent}":/ssh-agent -e SSH_AUTH_SOCK=/ssh-agent -v "$HOME"/.ssh:/root/.ssh -v "$PWD":/wire-server-deploy "$WSD_CONTAINER" "$@" - } - export -f d - - bash bin/offline-secrets.sh - - HOST_IP=$(dig @resolver4.opendns.com myip.opendns.com +short) - - cat >ansible/inventory/offline/hosts.ini</dev/null) - if [[ $? -eq 0 && -n "$podCIDR" ]]; then - sed -i "s|RELAY_NETWORKS: \".*\"|RELAY_NETWORKS: \":${podCIDR}\"|" $SMTP_VALUES_FILE - else - echo "Failed to fetch podSubnet. Attention using the default value: $(grep -i RELAY_NETWORKS $SMTP_VALUES_FILE)" - fi - d helm install smtp ./charts/smtp --values $SMTP_VALUES_FILE - - d helm install reaper ./charts/reaper - - cp values/wire-server/prod-values.example.yaml values/wire-server/values.yaml - sed -i "s/example.com/$TARGET_SYSTEM/g" values/wire-server/values.yaml - sed -i "s/# - \"turn::3478\"/- \"turn:$HOST_IP:3478\"/g" values/wire-server/values.yaml - sed -i "s/# - \"turn::3478?transport=tcp\"/- \"turn:$HOST_IP:3478?transport=tcp\"/g" values/wire-server/values.yaml - - d helm install wire-server ./charts/wire-server --timeout=15m0s --values ./values/wire-server/values.yaml --values ./values/wire-server/secrets.yaml - - sed -i "s/example.com/$TARGET_SYSTEM/g" values/webapp/prod-values.example.yaml - d helm install webapp ./charts/webapp --values ./values/webapp/prod-values.example.yaml - - sed -i "s/example.com/$TARGET_SYSTEM/g" values/team-settings/prod-values.example.yaml - d helm install team-settings ./charts/team-settings --values ./values/team-settings/prod-values.example.yaml --values ./values/team-settings/prod-secrets.example.yaml - - sed -i "s/example.com/$TARGET_SYSTEM/g" values/account-pages/prod-values.example.yaml - d helm install account-pages ./charts/account-pages --values ./values/account-pages/prod-values.example.yaml - - cp values/ingress-nginx-controller/prod-values.example.yaml ./values/ingress-nginx-controller/values.yaml - d helm install ingress-nginx-controller ./charts/ingress-nginx-controller --values ./values/ingress-nginx-controller/values.yaml - - KUBENODEIP=$(d kubectl get pods -l app.kubernetes.io/name=ingress-nginx -o=custom-columns=IP:.status.hostIP --no-headers) - sudo sed -i "s/define KUBENODEIP.*/define KUBENODEIP = $KUBENODEIP/" /etc/nftables.conf - sudo systemctl restart nftables - - INGRESSNODE=$(d kubectl get pods -l app.kubernetes.io/name=ingress-nginx -o=custom-columns=NODE:.spec.nodeName --no-headers) - d kubectl cordon "$INGRESSNODE" - - cp ./values/nginx-ingress-services/prod-values.example.yaml ./values/nginx-ingress-services/values.yaml - cp ./values/nginx-ingress-services/prod-secrets.example.yaml ./values/nginx-ingress-services/secrets.yaml - sed -i 's/useCertManager: false/useCertManager: true/g' values/nginx-ingress-services/values.yaml - sed -i 's/certmasterEmail:/certmasterEmail: backend+wiabautodeploy@wire.com/g' values/nginx-ingress-services/values.yaml - sed -i "s/example.com/$TARGET_SYSTEM/" values/nginx-ingress-services/values.yaml - - d kubectl create namespace cert-manager-ns - d helm upgrade --install -n cert-manager-ns --set 'installCRDs=true' cert-manager charts/cert-manager --values values/cert-manager/prod-values.example.yaml - - d kubectl uncordon "$INGRESSNODE" - - d helm upgrade --install nginx-ingress-services charts/nginx-ingress-services -f values/nginx-ingress-services/values.yaml - - d kubectl get certificate - - cp values/sftd/prod-values.example.yaml values/sftd/values.yaml - sed -i "s/webapp.example.com/webapp.$TARGET_SYSTEM/" values/sftd/values.yaml - sed -i "s/sftd.example.com/sftd.$TARGET_SYSTEM/" values/sftd/values.yaml - sed -i 's/name: letsencrypt-prod/name: letsencrypt-http01/' values/sftd/values.yaml - sed -i "s/replicaCount: 3/replicaCount: 1/" values/sftd/values.yaml - d kubectl label node kubenode1 wire.com/role=sftd - d helm upgrade --install sftd ./charts/sftd --set 'nodeSelector.wire\.com/role=sftd' --set 'node_annotations="{'wire\.com/external-ip': '"$HOST_IP"'}"' --values values/sftd/values.yaml - - ZREST_SECRET=$(grep -A1 turn values/wire-server/secrets.yaml | grep secret | tr -d '"' | awk '{print $NF}') - - cat >values/coturn/values.yaml<values/coturn/secrets.yaml</dev/null" || echo "false") -EXISTING_VMS=$(ssh -p "$SSH_PORT" -o StrictHostKeyChecking=no "$SSH_USER"@webapp."$TARGET_SYSTEM" "virsh list --all --name" || echo "false") -EXISTING_CONTAINERS=$(ssh -p "$SSH_PORT" -o StrictHostKeyChecking=no "$SSH_USER"@webapp."$TARGET_SYSTEM" "docker ps -q --all" || echo "false") - -if [[ "$EXISTING_INSTALL" != "false" && -n "$EXISTING_INSTALL" ]]; then - msg "" - msg "WARNING: existing wire-server-deploy installation found: $EXISTING_INSTALL" - DO_SYSTEM_CLEANUP=true -fi -if [[ "$EXISTING_VMS" != "false" && -n "$EXISTING_VMS" ]]; then - msg "" - msg "WARNING: existing libvirt VMs found: $EXISTING_VMS" - DO_SYSTEM_CLEANUP=true -fi -if [[ "$EXISTING_CONTAINERS" != "false" && -n "$EXISTING_CONTAINERS" ]]; then - echo "$EXISTING_CONTAINERS" - msg "" - msg "WARNING: existing Docker containers found." - DO_SYSTEM_CLEANUP=true -fi - -if [ "$DO_SYSTEM_CLEANUP" = false ]; then - msg "" - msg "INFO: Target system clean, no previous wire-server-deploy installation found." -fi -if [ "$DO_SYSTEM_CLEANUP" = true ] && [ "$FORCE_REDEPLOY" = 0 ]; then - msg "" - IFS= read -r -p "Do you want to wipe all wire-server-deploy components from $TARGET_SYSTEM? (y/n) " PROMPT_CLEANUP - if [[ $PROMPT_CLEANUP == "n" || $PROMPT_CLEANUP == "N" ]]; then - msg "" - die "Aborting, not cleaning up $TARGET_SYSTEM" - fi - system_cleanup_meta -fi -if [ "$DO_SYSTEM_CLEANUP" = true ] && [ "$FORCE_REDEPLOY" = 1 ]; then - system_cleanup_meta -fi - -msg "INFO: Commencing Wire-in-a-box deployment on $TARGET_SYSTEM." -preprovision_hetzner -ssh -p "$SSH_PORT" -o StrictHostKeyChecking=no -o ServerAliveInterval=30 -o ServerAliveCountMax=10 "$DEMO_USER"@webapp."$TARGET_SYSTEM" "bash -s" < ssh_private_key +chmod 400 ssh_private_key # TO-DO: make changes to test the deployment with demo user in terraform output -json static-inventory > inventory.json yq eval -o=yaml '.' inventory.json > inventory.yml -ssh -oStrictHostKeyChecking=accept-new -oConnectionAttempts=10 "root@$adminhost" tar xzv < "$ARTIFACTS_DIR/assets.tgz" +ssh $SSH_OPTS "root@$adminhost" wget -q "https://s3-eu-west-1.amazonaws.com/public.wire.com/artifacts/${ARTIFACT}.tgz" + +ssh $SSH_OPTS "root@$adminhost" tar xzf "$ARTIFACT.tgz" # override for ingress-nginx-controller values for hetzner environment $TF_DIR/setup_nodes.yml -scp -A "$VALUES_DIR/ingress-nginx-controller/hetzner-ci.example.yaml" "root@$adminhost:./values/ingress-nginx-controller/prod-values.example.yaml" +scp $SSH_OPTS "$VALUES_DIR/ingress-nginx-controller/hetzner-ci.example.yaml" "root@$adminhost:./values/ingress-nginx-controller/prod-values.example.yaml" -scp inventory.yml "root@$adminhost":./ansible/inventory/offline/inventory.yml +scp $SSH_OPTS inventory.yml "root@$adminhost":./ansible/inventory/offline/inventory.yml -ssh "root@$adminhost" cat ./ansible/inventory/offline/inventory.yml || true +ssh $SSH_OPTS "root@$adminhost" cat ./ansible/inventory/offline/inventory.yml || true echo "Running ansible playbook setup_nodes.yml via adminhost ($adminhost)..." -ansible-playbook -i inventory.yml setup_nodes.yml --private-key "ssh_private_key" \ - -e "ansible_ssh_common_args='-o ProxyCommand=\"ssh -W %h:%p -q root@$adminhost -i ssh_private_key\" -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null'" +ansible-playbook -i inventory.yml setup_nodes.yml --private-key "ssh_private_key" # NOTE: Agent is forwarded; so that the adminhost can provision the other boxes -ssh -A "root@$adminhost" ./bin/offline-deploy.sh +ssh $SSH_OPTS -A "root@$adminhost" ./bin/offline-deploy.sh echo "" echo "Wire offline deployment completed successfully!" diff --git a/offline/cd_staging.sh b/offline/cd_staging.sh new file mode 100755 index 000000000..d70028468 --- /dev/null +++ b/offline/cd_staging.sh @@ -0,0 +1,215 @@ +#!/usr/bin/env bash + +set -euo pipefail + +CD_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +TF_DIR="${CD_DIR}/../terraform/examples/wiab-staging-hetzner" +ARTIFACTS_DIR="${CD_DIR}/default-build/output" +VALUES_DIR="${CD_DIR}/../values" +COMMIT_HASH="${GITHUB_SHA}" +ARTIFACT="wire-server-deploy-static-${COMMIT_HASH}" + +# Retry configuration +MAX_RETRIES=3 +RETRY_DELAY=30 + +echo "Wire Offline Deployment with Retry Logic" +echo "========================================" + +function cleanup { + (cd "$TF_DIR" && terraform destroy -auto-approve) + echo "Cleanup completed" +} +trap cleanup EXIT + +cd "$TF_DIR" +terraform init + +# Retry loop for terraform apply +echo "Starting deployment with automatic retry on resource unavailability..." +for attempt in $(seq 1 $MAX_RETRIES); do + echo "" + echo "Deployment attempt $attempt of $MAX_RETRIES" + date + + if terraform apply -auto-approve; then + echo "Infrastructure deployment successful on attempt $attempt!" + break + else + echo "Infrastructure deployment failed on attempt $attempt" + + if [[ $attempt -lt $MAX_RETRIES ]]; then + echo "Will retry with different configuration..." + + # Clean up partial deployment + echo "Cleaning up partial deployment..." + terraform destroy -auto-approve || true + + # Wait for resources to potentially become available + echo "Waiting ${RETRY_DELAY}s for resources to become available..." + sleep $RETRY_DELAY + + # Modify configuration for better availability + echo "Adjusting server type preferences for attempt $((attempt + 1))..." + case $attempt in + 1) + # Attempt 2: Prioritize cpx22 and cx53 + sed -i.bak 's/"cx33", "cpx22", "cx43"/"cpx22", "cx43", "cx33"/' main.tf + sed -i.bak 's/"cx43", "cx53", "cpx42"/"cx53", "cpx42", "cx43"/' main.tf + echo " -> Prioritizing cpx22 and cx53 server types" + ;; + 2) + # Attempt 3: Use biggest available types + sed -i.bak 's/"cpx22", "cx43", "cx33"/"cx43", "cx33", "cpx22"/' main.tf + sed -i.bak 's/"cx53", "cpx42", "cx43"/"cpx42", "cx43", "cx53"/' main.tf + echo " -> Using Biggest available server types" + ;; + esac + + terraform init -reconfigure + else + echo "All deployment attempts failed after $MAX_RETRIES tries" + echo "" + echo "This usually means:" + echo " 1. High demand for Hetzner Cloud resources in EU regions" + echo " 2. Your account may have resource limits" + echo " 3. Try again later when resources become available" + echo "" + echo "Manual solutions:" + echo " 1. Check Hetzner Console for resource limits" + echo " 2. Try different server types manually" + echo " 3. Contact Hetzner support for resource availability" + + # Restore original config + if [[ -f main.tf.bak ]]; then + mv main.tf.bak main.tf + terraform init -reconfigure + fi + + exit 1 + fi + fi +done + +# Restore original config after successful deployment +if [[ -f main.tf.bak ]]; then + mv main.tf.bak main.tf + terraform init -reconfigure +fi + +echo "" +echo "Infrastructure ready! Proceeding with application deployment..." + +# Common SSH options for all ssh and scp commands +SSH_OPTS="-o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o ConnectionAttempts=10 -o ConnectTimeout=15 -o ServerAliveInterval=15 -o ServerAliveCountMax=4 -o TCPKeepAlive=yes" + +# Continue with the rest of the original cd.sh logic +adminhost=$(terraform output -raw adminhost) +ssh_private_key=$(terraform output ssh_private_key) + +eval "$(ssh-agent)" +ssh-add - <<< "$ssh_private_key" +rm -f ssh_private_key || true +echo "$ssh_private_key" > ssh_private_key +chmod 400 ssh_private_key + +terraform output -json static-inventory > inventory.json +yq eval -o=yaml '.' inventory.json > inventory.yml + +echo "Running ansible playbook setup_nodes.yml via adminhost ($adminhost)..." +ansible-playbook -i inventory.yml setup_nodes.yml --private-key "ssh_private_key" + +# user demo needs to exist +ssh $SSH_OPTS "demo@$adminhost" wget -q "https://s3-eu-west-1.amazonaws.com/public.wire.com/artifacts/${ARTIFACT}.tgz" + +ssh $SSH_OPTS "demo@$adminhost" tar xzf "$ARTIFACT.tgz" + +# override for ingress-nginx-controller values for hetzner environment $TF_DIR/setup_nodes.yml +scp $SSH_OPTS "$VALUES_DIR/ingress-nginx-controller/hetzner-ci.example.yaml" "demo@$adminhost:./values/ingress-nginx-controller/prod-values.example.yaml" + +# Source and target files +SOURCE="inventory.yml" +cp "${CD_DIR}/../ansible/inventory/offline/staging.yml" "inventory-secondary.yml" +TARGET="inventory-secondary.yml" + +# Read assethost IP +ASSETHOST_IP=$(yq eval '.assethost.hosts.assethost.ansible_host' "$SOURCE") +yq eval -i ".assethost.hosts.assethost.ansible_host = \"$ASSETHOST_IP\"" "$TARGET" + +# Read kube-node IPs using to_entries +KUBENODE1_IP=$(yq eval '.["kube-node"].hosts | to_entries | .[0].value.ansible_host' "$SOURCE") +KUBENODE2_IP=$(yq eval '.["kube-node"].hosts | to_entries | .[1].value.ansible_host' "$SOURCE") +KUBENODE3_IP=$(yq eval '.["kube-node"].hosts | to_entries | .[2].value.ansible_host' "$SOURCE") + +yq eval -i ".kube-node.hosts.kubenode1.ansible_host = \"$KUBENODE1_IP\"" "$TARGET" +yq eval -i ".kube-node.hosts.kubenode2.ansible_host = \"$KUBENODE2_IP\"" "$TARGET" +yq eval -i ".kube-node.hosts.kubenode3.ansible_host = \"$KUBENODE3_IP\"" "$TARGET" + +# Read datanodes IPs using to_entries +DATANODE1_IP=$(yq eval '.datanode.hosts | to_entries | .[0].value.ansible_host' "$SOURCE") +DATANODE2_IP=$(yq eval '.datanode.hosts | to_entries | .[1].value.ansible_host' "$SOURCE") +DATANODE3_IP=$(yq eval '.datanode.hosts | to_entries | .[2].value.ansible_host' "$SOURCE") + +# Read datanodes names using to_entries +DATANODE1_NAME=$(yq eval '.datanode.hosts | keys | .[0]' "$SOURCE") +DATANODE2_NAME=$(yq eval '.datanode.hosts | keys | .[1]' "$SOURCE") +DATANODE3_NAME=$(yq eval '.datanode.hosts | keys | .[2]' "$SOURCE") + +# clean old hosts for datanodes +yq eval -i '.datanodes.hosts = {}' "$TARGET" + +# re-create the datanodes group with actual names from SOURCE +yq eval -i ".datanodes.hosts[\"${DATANODE1_NAME}\"].ansible_host = \"${DATANODE1_IP}\"" "$TARGET" +yq eval -i ".datanodes.hosts[\"${DATANODE2_NAME}\"].ansible_host = \"${DATANODE2_IP}\"" "$TARGET" +yq eval -i ".datanodes.hosts[\"${DATANODE3_NAME}\"].ansible_host = \"${DATANODE3_IP}\"" "$TARGET" + +# Override network_interface from SOURCE to TARGET for all service groups +NETWORK_INTERFACE=$(yq eval '.datanode.vars.datanode_network_interface' "$SOURCE") +yq eval -i ".cassandra.vars.cassandra_network_interface = \"$NETWORK_INTERFACE\"" "$TARGET" +yq eval -i ".elasticsearch.vars.elasticsearch_network_interface = \"$NETWORK_INTERFACE\"" "$TARGET" +yq eval -i ".minio.vars.minio_network_interface = \"$NETWORK_INTERFACE\"" "$TARGET" +yq eval -i ".postgresql.vars.postgresql_network_interface = \"$NETWORK_INTERFACE\"" "$TARGET" +yq eval -i ".rmq-cluster.vars.rabbitmq_network_interface = \"$NETWORK_INTERFACE\"" "$TARGET" + +# re-writing sub-groups for rabbitmq_cluster_master, cassandra_seed, postgresql_rw and postgresql_ro +yq eval -i ".rmq-cluster.vars.rabbitmq_cluster_master = \"${DATANODE1_NAME}\"" "$TARGET" + +yq eval -i '.cassandra_seed.hosts = {}' "$TARGET" +yq eval -i ".cassandra_seed.hosts.[\"${DATANODE1_NAME}\"] = \"\"" "$TARGET" + +yq eval -i '.postgresql_rw.hosts = {}' "$TARGET" +yq eval -i '.postgresql_ro.hosts = {}' "$TARGET" +yq eval -i ".postgresql_rw.hosts.[\"${DATANODE1_NAME}\"] = \"\"" "$TARGET" +yq eval -i ".postgresql_ro.hosts.[\"${DATANODE2_NAME}\"] = \"\"" "$TARGET" +yq eval -i ".postgresql_ro.hosts.[\"${DATANODE3_NAME}\"] = \"\"" "$TARGET" + +# re-populate the postgresql.vars.repmgr_node_config group with actual names from SOURCE +i=1 +while IFS= read -r actual_name; do + yq eval -i " + .postgresql.vars.repmgr_node_config[\"${actual_name}\"] = + .postgresql.vars.repmgr_node_config.datanode${i} + | del(.postgresql.vars.repmgr_node_config.datanode${i}) + " "$TARGET" + i=$((i+1)) +done < <(yq eval -r '.datanode.hosts | keys | .[]' "$SOURCE") + +# Extract all kube-node vars from SOURCE and merge into TARGET +KUBE_NODE_VARS_FILE=$(mktemp) +yq eval '.["kube-node"].vars' "$SOURCE" > "$KUBE_NODE_VARS_FILE" +yq eval -i '.kube-node.vars |= load("'"$KUBE_NODE_VARS_FILE"'")' "$TARGET" + +rm -f "$KUBE_NODE_VARS_FILE" + +echo "created secondary inventory file $TARGET successfully" + +scp $SSH_OPTS "$TARGET" "demo@$adminhost":./ansible/inventory/offline/inventory.yml + +ssh $SSH_OPTS "demo@$adminhost" cat ./ansible/inventory/offline/inventory.yml || true + +# NOTE: Agent is forwarded; so that the adminhost can provision the other boxes +ssh $SSH_OPTS -A "demo@$adminhost" ./bin/offline-deploy.sh + +echo "" +echo "Wire offline deployment completed successfully!" +cleanup diff --git a/offline/coturn.md b/offline/coturn.md index ffee8d583..dcdf74558 100644 --- a/offline/coturn.md +++ b/offline/coturn.md @@ -10,8 +10,8 @@ This document explains how to install Coturn on a newly deployed Wire-Server ins This presumes you already have: -* Followed the [single Hetzner machine installation](single_hetzner_machine_installation.md) guide or otherwise have a machine ready to accept a Wire-Server deployment. -* Have followed the [Wire-Server installation](docs_ubuntu_22.04.md) guide and have Wire-Server deployed and working. +* Followed the [WIAB Staging](wiab-staging.md) guide to setup a Wire in a Box staging solution. +* OR, Have followed the [Wire-Server installation](docs_ubuntu_22.04.md) guide and have Wire-Server deployed and working. ## Plan. @@ -250,7 +250,7 @@ Calling and TURN services (Coturn, SFT) require being reachable on a range of po Here we have decided the following distribution of ports: -* Coturn will operate between ports 49152 and 65535. +* Coturn will operate between ports 32768 and 65535. We will configure the port redirection in Nftables to allow traffic to reach Coturn. @@ -293,7 +293,7 @@ table ip nat { iifname { $INF_WAN, virbr0 } tcp dport 3478 fib daddr type local dnat to $COTURNIP comment "COTURN control TCP" iifname { $INF_WAN, virbr0 } udp dport 3478 fib daddr type local dnat to $COTURNIP comment "COTURN control UDP" - iifname { $INF_WAN, virbr0 } udp dport 49152-65535 fib daddr type local dnat to $COTURNIP comment "COTURN UDP range" + iifname { $INF_WAN, virbr0 } udp dport 32768-65535 fib daddr type local dnat to $COTURNIP comment "COTURN UDP range" fib daddr type local counter jump DOCKER } @@ -312,7 +312,7 @@ This is used for the HTTP(S) ingress: This is the part that routes the UDP packets (media/calling traffic) to the calling services: ```nft - iifname { $INF_WAN, virbr0 } udp dport 49152-65535 fib daddr type local dnat to $COTURNIP comment "COTURN UDP range" + iifname { $INF_WAN, virbr0 } udp dport 32768-65535 fib daddr type local dnat to $COTURNIP comment "COTURN UDP range" ``` This is the part that redirects the control traffic to the Coturn port: diff --git a/offline/docs_ubuntu_22.04.md b/offline/docs_ubuntu_22.04.md index e72f8cc26..7cb4c68e9 100644 --- a/offline/docs_ubuntu_22.04.md +++ b/offline/docs_ubuntu_22.04.md @@ -6,7 +6,7 @@ install Wire. ## Demo / Testing installation -To install a self-hosted instance of Wire deployed on one Server ("Wire in a box") for testing purposes, we recommend the [autodeploy.sh](../bin/autodeploy.sh) script. See also: [Automated full install](single_hetzner_machine_installation.md#automated-full-install) section in the Single Hetzner Machine installation readme. +To install a self-hosted instance of Wire deployed on one Server ("Wire in a box") for testing purposes, we recommend the [WIAB Staging](wiab-staging.md) or [WIAB Dev](https://docs.wire.com/latest/how-to/install/demo-wiab.html) solution. ## Installing docker @@ -58,8 +58,6 @@ If you see the curent docker version and no error, it means that Docker is now c ## Downloading and extracting the artifact -Note: If you have followed the Ubuntu installation instructions (`single_hetzner_machine_installation.md`) before following this page, you already have a wire-server-deploy folder with an artifact extracted into it, and you can simply use that. - Create a fresh workspace to download the artifacts: ``` @@ -743,7 +741,6 @@ ufw allow in on $OUTBOUNDINTERFACE proto tcp to any port 80; " ``` -For wire-in-a-box deployments based on single_hetzner_machine_installation.md, an nftables based firewall including a predefined ruleset should already exist. By default, the predefined ruleset forwards ingress traffic to kubenode1 (192.168.122.21). To check on which node the ingress controller has been deployed, get the node IP via kubectl: ``` d kubectl get pods -l app.kubernetes.io/name=ingress-nginx -o=custom-columns=NAME:.metadata.name,NODE:.spec.nodeName,IP:.status.hostIP diff --git a/offline/postgresql-cluster.md b/offline/postgresql-cluster.md index caaac441f..003c2b7c0 100644 --- a/offline/postgresql-cluster.md +++ b/offline/postgresql-cluster.md @@ -177,7 +177,19 @@ postgresql3 ansible_host=192.168.122.206 [postgresql:vars] postgresql_network_interface = enp1s0 - +repmgr_node_config: + postgresql1: # Maps to postgresql_rw group + node_id: 1 + priority: 150 + role: primary + postgresql2: # Maps to first postgresql_ro + node_id: 2 + priority: 100 + role: standby + postgresql3: # Maps to second postgresql_ro + node_id: 3 + priority: 50 + role: standby # All PostgreSQL nodes [postgresql] diff --git a/offline/single_hetzner_machine_installation.md b/offline/single_hetzner_machine_installation.md deleted file mode 100644 index 55e912b7c..000000000 --- a/offline/single_hetzner_machine_installation.md +++ /dev/null @@ -1,120 +0,0 @@ -# Scope - -This document gives exact instructions for performing an offline demo installation of Wire on a single dedicated Hetzner server. It uses the KVM based virtual machine system to create all of the required virtual machines. - -Bootstrapping a single dedicated Hetzner server for virtual machine deployment, the wire-server-deploy artifact download as well as the wire-server k8s installation have been fully automated. - -## Use the hetzner robot console to create a new server. - -Select Ubuntu 22.04.2 on an ax101 dedicated server. Make sure you provide a public key in the Hetzner console which can be used for ansible deployment. - -If not using Hetzner, for reference, the specs of the ax101 server are: - -- AMD Ryzen™ 9 5950X -- 128 GB DDR4 ECC RAM -- 2 x 3.84 TB NVMe SSD Datacenter Edition (software RAID 1) -- 1 GBit/s port - -The main public IPv4 address of the Hetzner server to connect to with SSH / ansible can be found in the "Server" tab in the Hetzner Robot console, next to the Server Name. -As soon as the initial Hetzner server deployment is finished, we'll use Ansible to further provision the system. - -## Automated full install - -If you wish to set up "Wire in a box" for demo or testing purposes, use the script [autodeploy.sh](../bin/autodeploy.sh). It supports several config flags, which can be reviewed by calling the script using a helper flag: - -```bash -autodeploy.sh -h -``` - -Running the script against a valid dedicated (Hetzner) server will install a fully functioning "Wire in a box" demo environment, based on the instructions provided in [docs_ubuntu_22.04.md](docs_ubuntu_22.04.md) and [coturn.md](coturn.md). - -This process takes approximately 90 minutes. If this script suits your needs and the installation is a success, there's no need to follow the individualized instructions below. - - -## Adjust ansible playbook vars as needed - -Take a look at the "vars:" section in wire-server-deploy/ansible/hetzner-single-deploy.yml and adjust vars as needed. Example: -``` - vars: - artifact_hash: 452c8d41b519a3b41f22d93110cfbcf269697953 - ubuntu_version: 22.04.3 - ssh_pubkey: "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIDPTGTo1lTqd3Ym/75MRyQvj8xZINO/GI6FzfIadSe5c backend+hetzner-dedicated-operator@wire.com" -``` - -The variable 'artifact_hash' above is the hash of your deployment artifact, given to you by Wire, or acquired by looking at the build job. - -## Run ansible playbook for server bootstrapping - -Navigate to the ansible folder in wire-server-deploy and execute the playbook using valid vars as described above. -``` -~ ❯ cd wire-server-deploy/ansible -~ ❯ ansible-playbook hetzner-single-deploy.yml -i root@$HETZNER_IP, --diff -``` -Please note and include the trailing comma when invoking the playbook. Playbook execution might take a few minutes, especially when downloading and unpacking a new artifact. - -The playbook will install baseline defaults (packages, firewall, SSH config, SSH key(s), user(s)), download & extract wire-server-deploy and download the specified ubuntu ISO. -The playbook is written to be idempotent; eg. files won't be redownloaded as long as they already exist on the target host. Deploying a new version of "wire-server-deploy" is as easy as removing the folder from the target host and updating the "artifact_hash" variable in the playbook. - -At this point it's recommended to reboot the server once. - -## Create VMs - -SSH into the target host as demo@$HETZNER_IP and execute wire-server-deploy/bin/offline-vm-setup.sh -``` -demo@Ubuntu-2204-jammy-amd64-base:~$ cd wire-server-deploy/ -demo@Ubuntu-2204-jammy-amd64-base:~/wire-server-deploy$ bin/offline-vm-setup.sh -``` -Without arguments, the script will deploy seven VMs behind the default libvirt network (virbr0, 192.168.122.0/24). - - * assethost - IP: 192.168.122.10 - * kubenode1 - IP: 192.168.122.21 - * kubenode2 - IP: 192.168.122.22 - * kubenode3 - IP: 192.168.122.23 - * ansnode1 - IP: 192.168.122.31 - * ansnode2 - IP: 192.168.122.32 - * ansnode3 - IP: 192.168.122.33 - -This will take up to 15 min (longer if the server still builds its MD RAID in the background). Once all VMs are deployed, they should be shut off. Status can be checked with: -``` -demo@Ubuntu-2204-jammy-amd64-base:~$ sudo virsh list --all -``` - -Hint: If your local machine is running Linux, use "virt-manager" to connect to the Hetzner server and make VM administration more comfortable. - -Start all VMs: - -``` -demo@Ubuntu-2204-jammy-amd64-base:~$ sudo bash -c " -set -e; -virsh start assethost; -virsh start kubenode1; -virsh start kubenode2; -virsh start kubenode3; -virsh start ansnode1; -virsh start ansnode2; -virsh start ansnode3; -" -``` - -## Access VMs - -VMs created with offline-vm-setup.sh are accessible via SSH with two public keys. - * Existing key from ~/.ssh/authorized_keys (externally via ProxyJump) - * Local keypair key from ~/.ssh/id_ed25519 (Keypair on dedicated server) - -To use your own key, use SSH with ProxyJump, as it's the more secure alternative compared to Key Forwarding ("ssh -A"): -``` -~ ❯ ssh demo@192.168.122.XXX -J demo@$HETZNER_IP -``` - -Or just use the local keypair, created by offline-vm-setup.sh inside the dedicated server: -``` -demo@Ubuntu-2204-jammy-amd64-base:~$ ssh assethost -``` - -Hint: resolving VM hostnames from inside the dedicated server should work, since the script is appending entries to /etc/hosts during VM creation. -But this does not work for resolving hostnames between VMs at this point. We'll be using IP addresses only going forward. - -### From this point: - -Switch to [the Ubuntu 22.04 Wire install docs](docs_ubuntu_22.04.md) diff --git a/offline/wiab-staging.md b/offline/wiab-staging.md new file mode 100644 index 000000000..1cc13a656 --- /dev/null +++ b/offline/wiab-staging.md @@ -0,0 +1,313 @@ +# Scope + +**Wire in a Box (WIAB) Staging** is a demo installation of Wire running on a single physical machine using KVM-based virtual machines. This setup replicates the multi-node production Wire architecture in a consolidated environment suitable for testing, evaluation, and learning about Wire's infrastructure—but **not for production use**. + +**Important:** This is a sandbox environment. Data from a staging installation cannot be migrated to production. WIAB Staging is designed for experimentation, validation, and understanding Wire's deployment model. + +## Requirements + +**Architecture Overview:** +- Multiple VMs (7) are deployed to simulate production infrastructure with separate roles (Kubernetes, data services, asset storage) +- All VMs share the same physical node and storage, creating a single failure domain +- [Calling services](https://docs.wire.com/latest/understand/overview.html#calling) will share the same k8s cluster as Wire services hence, all infrastructure will be DMZ (De-militarized zone). +- This solution helps developers understand Wire's infrastructure requirements and test deployment processes + +**Resource Requirements:** +- One physical machine with hypervisor support: + - **Memory:** 55 GiB RAM + - **Compute:** 29 vCPUs + - **Storage:** 850 GB disk space (thin-provisioned) + - 7 VMs with [Ubuntu 22](https://releases.ubuntu.com/jammy/) as per (#VM-Provisioning) +- **DNS Records**: + - a way to create DNS records for your domain name (e.g. wire.example.com) + - Find a detailed explanation at [How to set up DNS records](https://docs.wire.com/latest/how-to/install/demo-wiab.html#dns-requirements) +- **SSL/TLS certificates**: + - a way to create SSL/TLS certificates for your domain name (to allow connecting via https://) + - To ease out the process of managing certs, we recommend using [Let's Encrypt](https://letsencrypt.org/getting-started/) & [cert-manager](https://cert-manager.io/docs/tutorials/acme/http-validation/) +- **Network**: No interference from UFW or other system specific firewalls, and IP forwarding enabled between network cards. An IP address reachable for ssh and which can act as entry point for Wire traffic. +- **Wire-server-deploy artifact**: A tar bundle containing all the required bash scripts, deb packages, ansible playbooks, helm charts and docker images to help with the installation. Reach out to [Wire support](https://support.wire.com/) to get access to the latest stable Wire artifact. + +## VM Provisioning + +We would require 7 VMs as per the following details, you can choose to use your own hypervisor to manage the VMs or use our [Wiab staging ansible playbook](https://github.com/wireapp/wire-server-deploy/blob/master/ansible/wiab-staging-provision.yml) against your physical node to setup the VMs. + +**VM Architecture and Resource Allocation:** + +| Hostname | Role | RAM | vCPUs | Disk | +|----------|------|-----|-------|------| +| assethost | Asset/Storage Server | 4 GiB | 2 | 100 GB | +| kubenode1 | Kubernetes Node 1 | 9 GiB | 5 | 150 GB | +| kubenode2 | Kubernetes Node 2 | 9 GiB | 5 | 150 GB | +| kubenode3 | Kubernetes Node 3 | 9 GiB | 5 | 150 GB | +| datanode1 | Data Node 1 | 8 GiB | 4 | 100 GB | +| datanode2 | Data Node 2 | 8 GiB | 4 | 100 GB | +| datanode3 | Data Node 3 | 8 GiB | 4 | 100 GB | +| **Total** | | **55 GiB** | **29** | **850 GB** | + +*Note: These specifications are optimized for testing and validation purposes, not for performance benchmarking.* + +**VM Service Distribution:** + +- **kubenodes (kubenode1, kubenode2, kubenode3):** Run the Kubernetes cluster and host Wire backend services +- **datanodes (datanode1, datanode2, datanode3):** Run distributed data services: + - Cassandra (distributed database) + - PostgreSQL (operational database) + - Elasticsearch (search engine) + - Minio (S3-compatible object storage) + - RabbitMQ (message broker) +- **assethost:** Hosts static assets to be used by kubenodes and datanodes + +## WIAB staging ansible playbook + +The ansible playbook will perform the following operations for you: + +**System Setup & Networking**: + - Updates all system packages and installs required tools (git, curl, docker, qemu, libvirt, yq, etc.) + - Configures SSH, firewall (nftables), and user permissions (sudo, kvm, docker groups) + +**wire-server-deploy Artifact & Ubuntu Cloud Image**: + - Downloads wire-server-deploy static artifact and Ubuntu cloud image + - Extracts artifacts and sets proper file permissions + - *Note: The wire-server-deploy artifact downloaded corresponds to the currently supported version* + +**Libvirt Network Setup and VM Creation**: + - Removes default libvirt network and creates custom "wirebox" network + - Launches VMs using the `offline-vm-setup.sh` script with KVM + - Creates an SSH key directory at `/home/ansible_user/wire-server-deploy/ssh` for VM access + +**Ansible Inventory Generation**: + - Generates inventory.yml with actual VM IPs replacing placeholders + - Configures network interface variables for all k8s-nodes and datanodes + + +*Note: Skip the Ansible playbook step if you are managing VMs with your own hypervisor.* + +### Getting started with Ansible playbook + +**Step 1: Obtain the ansible directory** + +We need the whole ansible directory as ansible-playbook uses some templates for its operations. Choose one method to download the `wire-server-deploy/ansible` directory: + +**Option A: Download as ZIP** +```bash +wget https://github.com/wireapp/wire-server-deploy/archive/refs/heads/master.zip +unzip master.zip +cd wire-server-deploy-master +``` + +**Option B: Clone with Git** +```bash +git clone https://github.com/wireapp/wire-server-deploy.git +cd wire-server-deploy +``` + +**Step 2: Configure your Ansible inventory for your physical machine** + +A sample inventory is available at [ansible/inventory/demo/wiab-staging.yml](https://github.com/wireapp/wire-server-deploy/blob/master/ansible/inventory/demo/wiab-staging.yml). + +*Note: Replace example.com with your physical machine address where KVM is available and adjust other variables accordingly.* + +**Step 3: Run the VM and network provision** + +```bash +ansible-playbook -i ansible/inventory/demo/wiab-staging.yml ansible/wiab-staging-provision.yml +``` + +*Note: Ansible core version 2.16.3 or compatible is required for this step* + +## Ensure secondary ansible inventory for VMs + +Now you should have 7 VMs running on your physical machine. If you have used the ansible playbook, you should also have a directory `/home/ansible_user/wire-server-deploy` with all resources required for further deployment. If you didn't use the above playbook, download the `wire-server-deploy` artifact shared by Wire support and unarchieve (tar tgz) it. + +Ensure the inventory file `ansible/inventory/offline/inventory.yml` in the directory `/home/ansible_user/wire-server-deploy` contains values corresponding to your VMs. If you have already used the [Ansible playbook above](#getting-started-with-ansible-playbook) to set up VMs, this file should have been prepared for you. + +## Next steps + +Since the inventory is ready, please continue with the following steps: + +### Environment Setup + +- **[Making tooling available in your environment](docs_ubuntu_22.04.md#making-tooling-available-in-your-environment)** + - Source the `bin/offline-env.sh` shell script to set up a `d` alias that runs commands inside a Docker container with all necessary tools for offline deployment. + +- **[Generating secrets](docs_ubuntu_22.04.md#generating-secrets)** + - Run `./bin/offline-secrets.sh` to generate fresh secrets for Minio and coturn services. This creates two secret files: `ansible/inventory/group_vars/all/secrets.yaml` and `values/wire-server/secrets.yaml`. + +### Kubernetes & Data Services Deployment + +- **[Deploying Kubernetes and stateful services](docs_ubuntu_22.04.md#deploying-kubernetes-and-stateful-services)** + - Run `d ./bin/offline-cluster.sh` to deploy Kubernetes and stateful services (Cassandra, PostgreSQL, Elasticsearch, Minio, RabbitMQ). This script deploys all infrastructure needed for Wire backend operations. + +*Note: Ensure all Helm charts use the values and secrets files in their `values/` directories—do not run `helm install` without them, or it will fall back to defaults and the artifact-provided values won’t apply. Sample commands can be found at [offline-helm.sh](https://github.com/wireapp/wire-server-deploy/blob/master/bin/offline-helm.sh)* + +### Wire Components Deployment + +- **Deploying Helm charts** + - **[Deploying stateless services and other dependencies](docs_ubuntu_22.04.md#deploying-stateless-dependencies)** + - Deploy cassandra-external, elasticsearch-external, minio-external, rabbitmq-external and databases-ephemeral helm charts to set up connections to external data services and stateless database dependencies. + + - **[Deploying Wire Server](docs_ubuntu_22.04.md#deploying-wire-server)** + - Install the core Wire backend platform with `d helm install wire-server ./charts/wire-server`. Update `values/wire-server/values.yaml` with your domain and inspect `values/wire-server/secrets.yaml` for required secrets. + + - **[Deploying webapp](docs_ubuntu_22.04.md#deploying-webapp)** + - Deploy the Wire web application frontend. Set your domain name and configure it for user access to the Wire interface. + + - **[Deploying team-settings](docs_ubuntu_22.04.md#deploying-team-settings)** + - Install team management and settings services for enterprise features and team administration. + + - **[Deploying account-pages](docs_ubuntu_22.04.md#deploying-account-pages)** + - Deploy account management pages for user profile, password reset, and account-related functionalities. + + - **[Deploying smallstep-accomp](docs_ubuntu_22.04.md#deploying-smallstep-accomp)** + - Install the smallstep ACME companion for certificate management integration. + +### Network & Security + +- **[Enabling emails for wire](smtp.md)** + - Configure SMTP for user onboarding via email. Deploy either a temporary SMTP service included in the bundle or integrate with your existing SMTP relay, and ensure proper network configuration for email delivery. + +- **[Deploy ingress-nginx-controller](docs_ubuntu_22.04.md#deploy-ingress-nginx-controller)** + - Install nginx ingress controller as the entry point for HTTP/HTTPS traffic routing to Wire services. This component is required for all traffic forwarding methods. + +- **[Acquiring / Deploying SSL Certificates](docs_ubuntu_22.04.md#acquiring--deploying-ssl-certificates)** + - Configure SSL/TLS certificates either by bringing your own or using cert-manager with Let's Encrypt. SSL certificates are required by the nginx-ingress-services helm chart for secure HTTPS connections. + + > **Note (cert-manager & hairpin NAT):** When cert-manager performs HTTP-01 self-checks inside the cluster, traffic can hairpin (Pod → Node → host public IP → DNAT → Node → Ingress). If your nftables rules DNAT in PREROUTING without a matching SNAT on virbr0→virbr0, return packets may bypass the host and break conntrack, causing HTTP-01 timeouts. Also, strict rp_filter can drop asymmetric return packets. If cert-manager is deployed, verify whether hairpin handling is needed: + > + > - Enable hairpin SNAT for DNATed traffic (forces return traffic through the host): + > ```bash + > sudo nft insert rule ip nat POSTROUTING position 0 \ + > iifname "virbr0" oifname "virbr0" \ + > ct status dnat counter masquerade + > ``` + > - Relax reverse-path filtering to loose mode to allow asymmetric flows: + > ```bash + > sudo sysctl -w net.ipv4.conf.all.rp_filter=2 + > sudo sysctl -w net.ipv4.conf.virbr0.rp_filter=2 + > ``` + > These settings help conntrack reverse DNAT correctly and avoid drops during cert-manager’s HTTP-01 challenges in NAT/bridge (virbr0) environments. + +### Calling Services + +- **[Installing SFTD](docs_ubuntu_22.04.md#installing-sftd)** + - Deploy the Selective Forwarding Unit (SFT) calling server for Wire's voice and video calling capabilities. Optionally enable cooperation with TURN servers and configure appropriate node annotations for external IPs. + +- **[Installing Coturn](coturn.md)** + - Deploy TURN/STUN servers for WebRTC connectivity, enabling peer-to-peer communication for calling services and ensuring connectivity through firewalls and NATs. + +## Network Traffic Configuration + +### Bring traffic from Physical machine to Wire services in k8s cluster + +If you used the Ansible playbook earlier, nftables firewall rules are pre-configured to forward traffic. If you set up VMs manually with your own hypervisor, you must manually configure network traffic flow using nftables. + +**Required Network Configuration:** + +The physical machine must forward traffic from external clients to the Kubernetes cluster running Wire services. This involves: + +1. **HTTP/HTTPS Traffic (Ingress)** - Forward ports 80 and 443 to the nginx-ingress-controller running on a Kubernetes node + - Port 80 (HTTP) → Kubernetes node port 31772 + - Port 443 (HTTPS) → Kubernetes node port 31773 + +2. **Calling Services Traffic (Coturn/SFT)** - Forward media and TURN protocol traffic to Coturn/SFT + - Port 3478 (TCP/UDP) → Coturn control traffic + - Ports 32768-65535 (UDP) → Media relay traffic for WebRTC calling + +**Implementation:** + +Use the detailed nftables rules in [../ansible/files/wiab_server_nftables.conf.j2](../ansible/files/wiab_server_nftables.conf.j2) as the template. The guide covers: +- Defining your network variables (Coturn IP, Kubernetes node IP, WAN interface) +- Creating NAT rules for HTTP/HTTPS ingress traffic +- Setting up TURN protocol forwarding for Coturn +- Restarting nftables to apply changes + +You can also apply these rules using the Ansible playbook, by following: + +```bash +ansible-playbook -i inventory.yml ansible/wiab-staging-nftables.yml +``` + +*Note: If you ran the playbook wiab-staging-provision.yml then it might already be configured for you. Please confirm before running.* + +The inventory should define the following variables: + +```ini +[all:vars] +# Kubernetes node IPs +kubenode1_ip=192.168.122.11 +kubenode2_ip=192.168.122.12 +kubenode3_ip=192.168.122.13 + +# Calling services node (usually kubenode3) +calling_node_ip=192.168.122.13 + +# Host WAN interface name +inf_wan=eth0 +``` + +> **Note (cert-manager & hairpin NAT):** +> When cert-manager performs HTTP-01 self-checks inside the cluster, traffic can hairpin (Pod → Node → host public IP → DNAT → Node → Ingress). +> If your nftables rules DNAT in `PREROUTING` without a matching SNAT on `virbr0 → virbr0`, return packets may bypass the host and break conntrack, causing HTTP-01 timeouts, resulting in certificate verification failure. +> Additionally, strict `rp_filter` can drop asymmetric return packets. +> If cert-manager is deployed in a NAT/bridge (`virbr0`) environment, first verify whether certificate issuance is failing before applying hairpin handling. +> Check whether certificates are successfully issued: +> ```bash +> d kubectl get certificates +> ``` +> If certificates are not in `Ready=True` state, inspect cert-manager logs for HTTP-01 self-check or timeout errors: +> ```bash +> d kubectl logs -n cert-manager-ns +> ``` +> If you observe HTTP-01 challenge timeouts or self-check failures in a NAT/bridge environment, hairpin SNAT and relaxed reverse-path filtering handling may be required. + > - Relax reverse-path filtering to loose mode to allow asymmetric flows: + > ```bash + > sudo sysctl -w net.ipv4.conf.all.rp_filter=2 + > sudo sysctl -w net.ipv4.conf.virbr0.rp_filter=2 + > ``` + > These settings help conntrack reverse DNAT correctly and avoid drops during cert-manager’s HTTP-01 challenges in NAT/bridge (virbr0) environments. + > + > - Enable Hairpin SNAT (temporary for cert-manager HTTP-01): + > ```bash + > sudo nft insert rule ip nat POSTROUTING position 0 \ + > iifname "virbr0" oifname "virbr0" \ + > ip daddr 192.168.122.0/24 ct status dnat \ + > counter masquerade \ + > comment "wire-hairpin-dnat-virbr0" + > ``` + > This forces DNATed traffic that hairpins over the bridge to be masqueraded, ensuring return traffic flows back through the host and conntrack can correctly reverse the DNAT. + > Verify the rule was added: + > ```bash + > sudo nft list chain ip nat POSTROUTING + > ``` + > You should see a rule similar to: + > ``` + > iifname "virbr0" oifname "virbr0" ip daddr 192.168.122.0/24 ct status dnat counter masquerade # handle + > ``` + > + > - Remove the rule after certificates are issued + > ```bash + > d kubectl get certificates + > ``` + > - Once Let's Encrypt validation completes and certificates are issued, remove the temporary hairpin SNAT rule. Use the following pipeline to locate the rule handle and delete it safely: + > ```bash + > sudo nft list chain ip nat POSTROUTING | \ + > grep wire-hairpin-dnat-virbr0 | \ + > sed -E 's/.*handle ([0-9]+).*/\1/' | \ + > xargs -r -I {} sudo nft delete rule ip nat POSTROUTING handle {} + > ``` + + +## Further Reading + +- **[Deploying stateless services and other dependencies](docs_ubuntu_22.04.md#deploying-stateless-dependencies)**: Read more about external datastores and stateless dependencies. +- **[Deploying Wire Server](docs_ubuntu_22.04.md#deploying-wire-server)**: Read more about core Wire backend deployment and required values/secrets. +- **[Deploying webapp](docs_ubuntu_22.04.md#deploying-webapp)**: Read more about webapp deployment and domain configuration. +- **[Deploying team-settings](docs_ubuntu_22.04.md#deploying-team-settings)**: Read more about team settings services. +- **[Deploying account-pages](docs_ubuntu_22.04.md#deploying-account-pages)**: Read more about account management services. +- **[Deploying smallstep-accomp](docs_ubuntu_22.04.md#deploying-smallstep-accomp)**: Read more about the ACME companion. +- **[Enabling emails for wire](smtp.md)**: Read more about SMTP options for onboarding email delivery and relay setup. +- **[Deploy ingress-nginx-controller](docs_ubuntu_22.04.md#deploy-ingress-nginx-controller)**: Read more about ingress configuration and traffic forwarding requirements. +- **[Acquiring / Deploying SSL Certificates](docs_ubuntu_22.04.md#acquiring--deploying-ssl-certificates)**: Read more about TLS options (Bring Your Own or cert-manager) and certificate requirements. +- **[Installing SFTD](docs_ubuntu_22.04.md#installing-sftd)**: Read more about the Selective Forwarding Unit (SFT) and related configuration. +- **[Installing Coturn](coturn.md)**: Read more about TURN/STUN setup for WebRTC connectivity and NAT traversal. +- **[Configure the port redirection in Nftables](coturn.md#configure-the-port-redirection-in-nftables)**: Read more about configuring Nftables rules diff --git a/terraform/examples/wire-server-deploy-offline-hetzner/outputs.tf b/terraform/examples/wire-server-deploy-offline-hetzner/outputs.tf index 6441b1c83..5bfebe101 100644 --- a/terraform/examples/wire-server-deploy-offline-hetzner/outputs.tf +++ b/terraform/examples/wire-server-deploy-offline-hetzner/outputs.tf @@ -46,7 +46,6 @@ output "static-inventory" { ansible_user = "root" private_interface = "enp7s0" adminhost_ip = tolist(hcloud_server.adminhost.network)[0].ip - ansible_ssh_common_args = "-o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o ControlMaster=auto -o ControlPersist=60s" } } adminhost = { @@ -55,10 +54,28 @@ output "static-inventory" { ansible_host = hcloud_server.adminhost.ipv4_address } } + vars = { + ansible_ssh_common_args = "-o StrictHostKeyChecking=accept-new -o UserKnownHostsFile=/dev/null -o ControlMaster=auto -o ControlPersist=60s -o BatchMode=yes -o ConnectionAttempts=10 -o ServerAliveInterval=60 -o ServerAliveCountMax=3" + } + } + private = { + children = { + adminhost_local = {} + assethost = {} + "kube-node" = {} + cassandra = {} + elasticsearch = {} + minio = {} + postgresql = {} + rmq-cluster = {} + } + vars = { + ansible_ssh_common_args = "-o ProxyCommand=\"ssh -i ssh_private_key -o StrictHostKeyChecking=accept-new -o UserKnownHostsFile=/dev/null -W %h:%p -q root@${hcloud_server.adminhost.ipv4_address}\" -o StrictHostKeyChecking=accept-new -o UserKnownHostsFile=/dev/null -o ControlMaster=auto -o ControlPersist=60s -o BatchMode=yes -o ConnectionAttempts=10 -o ServerAliveInterval=60 -o ServerAliveCountMax=3" + } } adminhost_local = { hosts = { - "adminhost" = { + "adminhost_local" = { ansible_host = tolist(hcloud_server.adminhost.network)[0].ip } } @@ -145,6 +162,23 @@ output "static-inventory" { vars = { wire_dbname = "wire-server" postgresql_network_interface = "enp7s0" + repmgr_node_config = { + postgresql1 = { + node_id = 1 + priority = 150 + role = "primary" + } + postgresql2 = { + node_id = 2 + priority = 100 + role = "standby" + } + postgresql3 = { + node_id = 3 + priority = 50 + role = "standby" + } + } } } postgresql_rw = {