From 66bdf8783ec1a6001392650817bc7cc8a875f623 Mon Sep 17 00:00:00 2001 From: Evan Lezar Date: Wed, 17 Jun 2026 08:35:52 +0200 Subject: [PATCH] fix(build): align container engine selection Signed-off-by: Evan Lezar --- architecture/build.md | 17 ++ deploy/helm/openshell/skaffold.yaml | 4 +- e2e/rust/src/harness/container.rs | 134 +++++++++-- e2e/rust/tests/gpu_device_selection.rs | 16 +- e2e/rust/tests/local_driver_token_restart.rs | 18 +- e2e/with-docker-gateway.sh | 34 ++- e2e/with-podman-gateway.sh | 34 ++- tasks/scripts/container-engine.sh | 232 ++++++++++++++++--- 8 files changed, 425 insertions(+), 64 deletions(-) diff --git a/architecture/build.md b/architecture/build.md index 74952ad36..da4e17f69 100644 --- a/architecture/build.md +++ b/architecture/build.md @@ -94,6 +94,23 @@ the macOS user's shared home directory. Local image work should use `mise` tasks rather than direct Docker commands so the same staging and tagging assumptions are used locally and in CI. +Container-engine selection is centralized in `tasks/scripts/container-engine.sh`. +`CONTAINER_ENGINE=docker|podman` is the only explicit override. Docker- and +Podman-backed e2e wrappers validate that override against their lane, set +`OPENSHELL_E2E_DRIVER`, and reject the removed +`OPENSHELL_E2E_CONTAINER_ENGINE` selector so build helpers and Rust e2e support +containers use the same engine. When no explicit override is present, an e2e +driver requirement wins, then a local-cluster requirement, then host +auto-detection. + +Local Kubernetes image workflows opt into cluster-aware selection with +`CONTAINER_ENGINE_TARGET=local-k8s-cluster`. The hint is intentionally scoped to +Skaffold-style `push: false` builds where the image must land in the engine +backing the active local cluster: `k3d-*` contexts require Docker, `kind-*` +contexts use Docker unless `KIND_EXPERIMENTAL_PROVIDER=podman`, and unknown +contexts require an explicit `CONTAINER_ENGINE`. Other image builds do not infer +from kube context. + ## CI and E2E Required checks run on GitHub Actions. Workflows that use NVIDIA self-hosted runners trigger from copy-pr-bot mirror branches, so trusted PRs are mirrored into `pull-request/` branches before those workflows run. diff --git a/deploy/helm/openshell/skaffold.yaml b/deploy/helm/openshell/skaffold.yaml index d4608da5e..37a21fbac 100644 --- a/deploy/helm/openshell/skaffold.yaml +++ b/deploy/helm/openshell/skaffold.yaml @@ -26,7 +26,7 @@ build: context: ../../.. custom: buildCommand: | - CONTAINER_ENGINE=docker \ + CONTAINER_ENGINE_TARGET=local-k8s-cluster \ IMAGE_NAME="${IMAGE%:*}" \ IMAGE_TAG="${IMAGE##*:}" \ tasks/scripts/docker-build-image.sh gateway @@ -43,7 +43,7 @@ build: context: ../../.. custom: buildCommand: | - CONTAINER_ENGINE=docker \ + CONTAINER_ENGINE_TARGET=local-k8s-cluster \ IMAGE_NAME="${IMAGE%:*}" \ IMAGE_TAG="${IMAGE##*:}" \ tasks/scripts/docker-build-image.sh supervisor diff --git a/e2e/rust/src/harness/container.rs b/e2e/rust/src/harness/container.rs index 8d9f03fb3..c37d54ad1 100644 --- a/e2e/rust/src/harness/container.rs +++ b/e2e/rust/src/harness/container.rs @@ -15,8 +15,7 @@ use tokio::time::{interval, timeout}; use super::port::find_free_port; -const DEFAULT_TEST_SERVER_IMAGE: &str = - "ghcr.io/nvidia/openshell-community/sandboxes/base:latest"; +const DEFAULT_TEST_SERVER_IMAGE: &str = "ghcr.io/nvidia/openshell-community/sandboxes/base:latest"; #[must_use] pub fn e2e_driver() -> Option { @@ -37,18 +36,16 @@ pub struct ContainerEngine { } impl ContainerEngine { - #[must_use] - pub fn from_env() -> Self { - let binary = std::env::var("OPENSHELL_E2E_CONTAINER_ENGINE") - .ok() - .filter(|value| !value.trim().is_empty()) - .or_else(|| match e2e_driver().as_deref() { - Some("podman") => Some("podman".to_string()), - _ => Some("docker".to_string()), - }) - .expect("container engine fallback should be set"); + pub fn from_env() -> Result { + let binary = resolve_container_engine( + std::env::var("OPENSHELL_E2E_CONTAINER_ENGINE") + .ok() + .as_deref(), + std::env::var("CONTAINER_ENGINE").ok().as_deref(), + e2e_driver().as_deref(), + )?; - Self { binary } + Ok(Self { binary }) } #[must_use] @@ -87,7 +84,7 @@ pub struct ContainerHttpServer { impl ContainerHttpServer { pub async fn start_python(alias: &str, script: &str) -> Result { - let engine = ContainerEngine::from_env(); + let engine = ContainerEngine::from_env()?; let host_port = find_free_port(); let network = e2e_network_name(); let host = network.as_ref().map_or_else( @@ -179,6 +176,60 @@ impl ContainerHttpServer { } } +fn normalized_env(value: Option<&str>) -> Option { + value + .map(str::trim) + .filter(|value| !value.is_empty()) + .map(str::to_ascii_lowercase) +} + +fn validate_container_engine(name: &str, value: String) -> Result { + match value.as_str() { + "docker" | "podman" => Ok(value), + _ => Err(format!( + "{name}={value} is invalid; expected docker or podman" + )), + } +} + +fn required_engine_from_driver(driver: Option<&str>) -> Option { + match normalized_env(driver).as_deref() { + Some("docker") => Some("docker".to_string()), + Some("podman") => Some("podman".to_string()), + _ => None, + } +} + +fn resolve_container_engine( + legacy_selector: Option<&str>, + explicit_engine: Option<&str>, + e2e_driver: Option<&str>, +) -> Result { + if normalized_env(legacy_selector).is_some() { + return Err( + "OPENSHELL_E2E_CONTAINER_ENGINE is no longer supported; set CONTAINER_ENGINE=docker|podman instead" + .to_string(), + ); + } + + let explicit_engine = normalized_env(explicit_engine) + .map(|value| validate_container_engine("CONTAINER_ENGINE", value)) + .transpose()?; + let required_engine = required_engine_from_driver(e2e_driver); + + if let (Some(explicit), Some(required)) = (&explicit_engine, &required_engine) + && explicit != required + { + return Err(format!( + "CONTAINER_ENGINE={explicit} conflicts with OPENSHELL_E2E_DRIVER={required}; use CONTAINER_ENGINE={required} or unset CONTAINER_ENGINE" + )); + } + + Ok(explicit_engine + .or(required_engine) + .unwrap_or_else(|| "docker".to_string())) +} + impl Drop for ContainerHttpServer { fn drop(&mut self) { let _ = self @@ -188,3 +239,58 @@ impl Drop for ContainerHttpServer { .output(); } } + +#[cfg(test)] +mod tests { + use super::resolve_container_engine; + + #[test] + fn defaults_to_docker() { + assert_eq!( + resolve_container_engine(None, None, None).as_deref(), + Ok("docker") + ); + } + + #[test] + fn explicit_container_engine_wins() { + assert_eq!( + resolve_container_engine(None, Some("podman"), None).as_deref(), + Ok("podman") + ); + } + + #[test] + fn driver_selects_container_engine_without_explicit_engine() { + assert_eq!( + resolve_container_engine(None, None, Some("podman")).as_deref(), + Ok("podman") + ); + } + + #[test] + fn rejects_removed_selector() { + let err = resolve_container_engine(Some("podman"), None, None).unwrap_err(); + assert!(err.contains("OPENSHELL_E2E_CONTAINER_ENGINE")); + } + + #[test] + fn rejects_invalid_explicit_engine() { + let err = resolve_container_engine(None, Some("containerd"), None).unwrap_err(); + assert!(err.contains("CONTAINER_ENGINE=containerd")); + } + + #[test] + fn rejects_explicit_driver_conflict() { + let err = resolve_container_engine(None, Some("docker"), Some("podman")).unwrap_err(); + assert!(err.contains("CONTAINER_ENGINE=docker conflicts")); + } + + #[test] + fn ignores_non_container_drivers() { + assert_eq!( + resolve_container_engine(None, None, Some("kubernetes")).as_deref(), + Ok("docker") + ); + } +} diff --git a/e2e/rust/tests/gpu_device_selection.rs b/e2e/rust/tests/gpu_device_selection.rs index 4fb0fa76a..08e77ce2b 100644 --- a/e2e/rust/tests/gpu_device_selection.rs +++ b/e2e/rust/tests/gpu_device_selection.rs @@ -167,8 +167,12 @@ fn docker_info_reports_wsl2(info: &Value) -> bool { .any(text_reports_wsl2) } +fn container_engine() -> ContainerEngine { + ContainerEngine::from_env().unwrap_or_else(|err| panic!("resolve e2e container engine: {err}")) +} + fn all_gpu_default_allowed() -> bool { - let engine = ContainerEngine::from_env(); + let engine = container_engine(); if uses_local_podman_inventory(&engine) { return local_podman_all_gpu_default_supported(); } @@ -177,7 +181,7 @@ fn all_gpu_default_allowed() -> bool { } fn discovered_cdi_gpu_device_ids() -> Vec { - let engine = ContainerEngine::from_env(); + let engine = container_engine(); if uses_local_podman_inventory(&engine) { let device_ids = local_podman_cdi_gpu_device_ids(); assert!( @@ -216,8 +220,7 @@ fn default_cdi_gpu_device_id(device_ids: &[String], allow_all_devices: bool) -> let mut named = device_ids .iter() .filter(|device_id| { - device_id.starts_with(CDI_GPU_DEVICE_PREFIX) - && device_id.as_str() != CDI_GPU_DEVICE_ALL + device_id.starts_with(CDI_GPU_DEVICE_PREFIX) && device_id.as_str() != CDI_GPU_DEVICE_ALL }) .cloned() .collect::>(); @@ -256,7 +259,7 @@ fn cdi_devices_driver_config_json(device_ids: &[&str]) -> String { } fn runtime_gpu_lines(gpu_device: &str) -> Vec { - let engine = ContainerEngine::from_env(); + let engine = container_engine(); let image = gpu_probe_image(); let output = engine .command() @@ -339,7 +342,8 @@ async fn sandbox_create_output(args: &[&str]) -> String { #[tokio::test] async fn gpu_request_without_device_matches_plain_default_gpu_container() { let device_ids = discovered_cdi_gpu_device_ids(); - let Some(default_gpu_device) = default_cdi_gpu_device_id(&device_ids, all_gpu_default_allowed()) + let Some(default_gpu_device) = + default_cdi_gpu_device_id(&device_ids, all_gpu_default_allowed()) else { eprintln!("skipping default GPU request test because no selectable GPU ID was discovered"); return; diff --git a/e2e/rust/tests/local_driver_token_restart.rs b/e2e/rust/tests/local_driver_token_restart.rs index 2a7a54603..28e08965c 100644 --- a/e2e/rust/tests/local_driver_token_restart.rs +++ b/e2e/rust/tests/local_driver_token_restart.rs @@ -292,10 +292,7 @@ async fn restart_vm_sandbox(gateway: &ManagedGateway, sandbox_name: &str) -> Res wait_for_healthy(Duration::from_secs(120)).await } -async fn wait_for_driver_reconnect( - driver: LocalDriver, - sandbox_name: &str, -) -> Result<(), String> { +async fn wait_for_driver_reconnect(driver: LocalDriver, sandbox_name: &str) -> Result<(), String> { match driver { LocalDriver::Docker | LocalDriver::Podman => { wait_for_sandbox_exec_contains( @@ -321,7 +318,9 @@ async fn wait_for_driver_reconnect( #[tokio::test] async fn local_driver_sandbox_restarts_with_non_expiring_bootstrap_jwt() { let Some(driver) = LocalDriver::from_env() else { - eprintln!("Skipping local-driver token restart test: e2e driver is not Docker, Podman, or VM"); + eprintln!( + "Skipping local-driver token restart test: e2e driver is not Docker, Podman, or VM" + ); return; }; let namespace = if driver.is_container() { @@ -338,7 +337,14 @@ async fn local_driver_sandbox_restarts_with_non_expiring_bootstrap_jwt() { } else { None }; - let engine = driver.is_container().then(ContainerEngine::from_env); + let engine = if driver.is_container() { + Some( + ContainerEngine::from_env() + .unwrap_or_else(|err| panic!("resolve e2e container engine: {err}")), + ) + } else { + None + }; let gateway = if driver == LocalDriver::Vm { let Some(gateway) = ManagedGateway::from_env().expect("load managed e2e gateway metadata") else { diff --git a/e2e/with-docker-gateway.sh b/e2e/with-docker-gateway.sh index f8e17661d..5dc61e6c1 100755 --- a/e2e/with-docker-gateway.sh +++ b/e2e/with-docker-gateway.sh @@ -35,6 +35,36 @@ source "${ROOT}/e2e/support/gateway-common.sh" e2e_preserve_mise_dirs +require_container_engine_lane() { + local lane=$1 + local label=$2 + local selected_engine selected_driver + + if [ -n "${OPENSHELL_E2E_CONTAINER_ENGINE:-}" ]; then + echo "ERROR: OPENSHELL_E2E_CONTAINER_ENGINE is no longer supported." >&2 + echo " Set CONTAINER_ENGINE=${lane} for the ${label} e2e lane, or unset it." >&2 + exit 2 + fi + selected_engine="$(printf '%s' "${CONTAINER_ENGINE:-}" | tr '[:upper:]' '[:lower:]')" + selected_driver="$(printf '%s' "${OPENSHELL_E2E_DRIVER:-}" | tr '[:upper:]' '[:lower:]')" + + if [ -n "${selected_engine}" ] && [ "${selected_engine}" != "${lane}" ]; then + echo "ERROR: CONTAINER_ENGINE=${CONTAINER_ENGINE} conflicts with the ${label} e2e lane." >&2 + echo " Set CONTAINER_ENGINE=${lane} or unset CONTAINER_ENGINE." >&2 + exit 2 + fi + if [ -n "${selected_driver}" ] && [ "${selected_driver}" != "${lane}" ]; then + echo "ERROR: OPENSHELL_E2E_DRIVER=${OPENSHELL_E2E_DRIVER} conflicts with the ${label} e2e lane." >&2 + echo " Set OPENSHELL_E2E_DRIVER=${lane} or unset OPENSHELL_E2E_DRIVER." >&2 + exit 2 + fi + + export CONTAINER_ENGINE="${lane}" + export OPENSHELL_E2E_DRIVER="${lane}" +} + +require_container_engine_lane docker Docker + github_actions_host_docker_tmpdir() { if [ "${GITHUB_ACTIONS:-}" != "true" ] \ || [ ! -S /var/run/docker.sock ] \ @@ -234,8 +264,7 @@ if [ -n "${OPENSHELL_GATEWAY_ENDPOINT:-}" ]; then "$(e2e_endpoint_port "${OPENSHELL_GATEWAY_ENDPOINT}")" export OPENSHELL_GATEWAY="${GATEWAY_NAME}" export OPENSHELL_PROVISION_TIMEOUT="${OPENSHELL_PROVISION_TIMEOUT:-180}" - export OPENSHELL_E2E_DRIVER="${OPENSHELL_E2E_DRIVER:-docker}" - export OPENSHELL_E2E_CONTAINER_ENGINE="${OPENSHELL_E2E_CONTAINER_ENGINE:-docker}" + export OPENSHELL_E2E_DRIVER="docker" echo "Using existing e2e gateway endpoint: ${OPENSHELL_GATEWAY_ENDPOINT}" "$@" @@ -453,7 +482,6 @@ export OPENSHELL_E2E_DOCKER_NETWORK_NAME="${DOCKER_NETWORK_NAME}" export OPENSHELL_E2E_NETWORK_NAME="${DOCKER_NETWORK_NAME}" export OPENSHELL_E2E_SANDBOX_NAMESPACE="${E2E_NAMESPACE}" export OPENSHELL_E2E_DRIVER="docker" -export OPENSHELL_E2E_CONTAINER_ENGINE="docker" if connect_current_container_to_docker_network "${DOCKER_NETWORK_NAME}"; then echo "Connected CI job container to Docker network ${DOCKER_NETWORK_NAME} (${GATEWAY_HOST_ALIAS_IP})." else diff --git a/e2e/with-podman-gateway.sh b/e2e/with-podman-gateway.sh index 94ec217c1..77b4c1324 100755 --- a/e2e/with-podman-gateway.sh +++ b/e2e/with-podman-gateway.sh @@ -25,6 +25,36 @@ ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" # shellcheck source=e2e/support/gateway-common.sh source "${ROOT}/e2e/support/gateway-common.sh" +require_container_engine_lane() { + local lane=$1 + local label=$2 + local selected_engine selected_driver + + if [ -n "${OPENSHELL_E2E_CONTAINER_ENGINE:-}" ]; then + echo "ERROR: OPENSHELL_E2E_CONTAINER_ENGINE is no longer supported." >&2 + echo " Set CONTAINER_ENGINE=${lane} for the ${label} e2e lane, or unset it." >&2 + exit 2 + fi + selected_engine="$(printf '%s' "${CONTAINER_ENGINE:-}" | tr '[:upper:]' '[:lower:]')" + selected_driver="$(printf '%s' "${OPENSHELL_E2E_DRIVER:-}" | tr '[:upper:]' '[:lower:]')" + + if [ -n "${selected_engine}" ] && [ "${selected_engine}" != "${lane}" ]; then + echo "ERROR: CONTAINER_ENGINE=${CONTAINER_ENGINE} conflicts with the ${label} e2e lane." >&2 + echo " Set CONTAINER_ENGINE=${lane} or unset CONTAINER_ENGINE." >&2 + exit 2 + fi + if [ -n "${selected_driver}" ] && [ "${selected_driver}" != "${lane}" ]; then + echo "ERROR: OPENSHELL_E2E_DRIVER=${OPENSHELL_E2E_DRIVER} conflicts with the ${label} e2e lane." >&2 + echo " Set OPENSHELL_E2E_DRIVER=${lane} or unset OPENSHELL_E2E_DRIVER." >&2 + exit 2 + fi + + export CONTAINER_ENGINE="${lane}" + export OPENSHELL_E2E_DRIVER="${lane}" +} + +require_container_engine_lane podman Podman + PODMAN_XDG_CONFIG_HOME_WAS_SET=0 PODMAN_XDG_CONFIG_HOME="" if [ "${XDG_CONFIG_HOME+x}" = x ]; then @@ -302,8 +332,7 @@ if [ -n "${OPENSHELL_GATEWAY_ENDPOINT:-}" ]; then "$(e2e_endpoint_port "${OPENSHELL_GATEWAY_ENDPOINT}")" export OPENSHELL_GATEWAY="${GATEWAY_NAME}" export OPENSHELL_PROVISION_TIMEOUT="${OPENSHELL_PROVISION_TIMEOUT:-300}" - export OPENSHELL_E2E_DRIVER="${OPENSHELL_E2E_DRIVER:-podman}" - export OPENSHELL_E2E_CONTAINER_ENGINE="${OPENSHELL_E2E_CONTAINER_ENGINE:-podman}" + export OPENSHELL_E2E_DRIVER="podman" echo "Using existing Podman e2e gateway endpoint: ${OPENSHELL_GATEWAY_ENDPOINT}" "$@" @@ -350,7 +379,6 @@ PODMAN_NETWORK_NAME="${E2E_NAMESPACE}" ensure_e2e_podman_network "${PODMAN_NETWORK_NAME}" export OPENSHELL_E2E_DRIVER="podman" -export OPENSHELL_E2E_CONTAINER_ENGINE="podman" export OPENSHELL_E2E_NETWORK_NAME="${PODMAN_NETWORK_NAME}" export OPENSHELL_E2E_SANDBOX_NAMESPACE="${E2E_NAMESPACE}" diff --git a/tasks/scripts/container-engine.sh b/tasks/scripts/container-engine.sh index 64c644d0b..502a6880c 100755 --- a/tasks/scripts/container-engine.sh +++ b/tasks/scripts/container-engine.sh @@ -20,7 +20,10 @@ # ce_build_multiarch — multi-arch build + push workflow # # Override the auto-detected engine by setting CONTAINER_ENGINE=docker or -# CONTAINER_ENGINE=podman before sourcing. +# CONTAINER_ENGINE=podman before sourcing. Scripts that build images directly +# into a local Kubernetes cluster can also set +# CONTAINER_ENGINE_TARGET=local-k8s-cluster so the helper validates the active +# k3d/kind context before choosing an engine. # Suppress the detection log line with CONTAINER_ENGINE_QUIET=1 (useful in # CI pipelines or scripts that source this file multiple times in a pipeline). @@ -34,39 +37,181 @@ _CONTAINER_ENGINE_LOADED=1 # Detection # --------------------------------------------------------------------------- -_detect_container_engine() { - # Honour explicit override. - if [[ -n "${CONTAINER_ENGINE:-}" ]]; then - if ! command -v "${CONTAINER_ENGINE}" >/dev/null 2>&1; then - echo "Error: CONTAINER_ENGINE=${CONTAINER_ENGINE} is not installed or not in PATH" >&2 - exit 1 - fi - _CE_EXPLICIT_OVERRIDE=1 - return - fi +_ce_lower() { + printf '%s' "$1" | tr '[:upper:]' '[:lower:]' +} + +_ce_error() { + echo "Error: $*" >&2 + exit 1 +} + +_ce_validate_engine_value() { + local name=$1 + local value=$2 + + case "${value}" in + docker|podman) + ;; + *) + _ce_error "${name}=${value} is invalid; expected docker or podman" + ;; + esac +} + +_ce_docker_is_podman_shim() { + command -v docker >/dev/null 2>&1 && docker --version 2>/dev/null | grep -qi podman +} + +_ce_require_engine_available() { + local engine=$1 + case "${engine}" in + docker) + if ! command -v docker >/dev/null 2>&1; then + _ce_error "CONTAINER_ENGINE=docker requires the docker CLI to be installed and in PATH" + fi + if _ce_docker_is_podman_shim; then + _ce_error "CONTAINER_ENGINE=docker was requested, but docker appears to be a Podman compatibility shim" + fi + ;; + podman) + if command -v podman >/dev/null 2>&1 || _ce_docker_is_podman_shim; then + return + fi + _ce_error "CONTAINER_ENGINE=podman requires the podman CLI or a docker-compatible Podman shim to be installed and in PATH" + ;; + esac +} + +_ce_auto_detect_engine() { # Prefer podman when available. if command -v podman >/dev/null 2>&1; then - CONTAINER_ENGINE=podman + echo "podman" return fi # Fall back to docker — but detect the podman-masquerading-as-docker shim # shipped by some distros (e.g. Fedora, RHEL). if command -v docker >/dev/null 2>&1; then - if docker --version 2>/dev/null | grep -qi podman; then - CONTAINER_ENGINE=podman + if _ce_docker_is_podman_shim; then + echo "podman" else - CONTAINER_ENGINE=docker + echo "docker" fi return fi echo "Error: neither podman nor docker is installed." >&2 - echo " Install one of them and try again." >&2 + echo " Install one of them, or set CONTAINER_ENGINE=docker|podman." >&2 exit 1 } +_ce_required_engine_from_e2e_driver() { + local driver + driver="$(_ce_lower "${OPENSHELL_E2E_DRIVER:-}")" + + case "${driver}" in + docker|podman) + echo "${driver}" + ;; + esac +} + +_ce_required_engine_from_local_cluster() { + local explicit_engine=$1 + + if [[ "${CONTAINER_ENGINE_TARGET:-}" != "local-k8s-cluster" ]]; then + return + fi + + local context="" + if command -v kubectl >/dev/null 2>&1; then + context="$(kubectl config current-context 2>/dev/null || true)" + fi + + case "${context}" in + k3d-*) + echo "docker" + ;; + kind-*) + if [[ "$(_ce_lower "${KIND_EXPERIMENTAL_PROVIDER:-}")" == "podman" ]]; then + echo "podman" + else + echo "docker" + fi + ;; + "") + if [[ -z "${explicit_engine}" ]]; then + _ce_error "CONTAINER_ENGINE_TARGET=local-k8s-cluster requires an active k3d/kind Kubernetes context, or an explicit CONTAINER_ENGINE=docker|podman" + fi + ;; + *) + if [[ -z "${explicit_engine}" ]]; then + _ce_error "cannot infer container engine for Kubernetes context '${context}'; set CONTAINER_ENGINE=docker|podman" + fi + ;; + esac +} + +_detect_container_engine() { + if [[ -n "${OPENSHELL_E2E_CONTAINER_ENGINE:-}" ]]; then + _ce_error "OPENSHELL_E2E_CONTAINER_ENGINE is no longer supported; set CONTAINER_ENGINE=docker|podman instead" + fi + + case "${CONTAINER_ENGINE_TARGET:-}" in + ""|local-k8s-cluster) + ;; + *) + _ce_error "CONTAINER_ENGINE_TARGET=${CONTAINER_ENGINE_TARGET} is invalid; expected local-k8s-cluster" + ;; + esac + + local explicit_engine="" + if [[ -n "${CONTAINER_ENGINE:-}" ]]; then + explicit_engine="$(_ce_lower "${CONTAINER_ENGINE}")" + _ce_validate_engine_value "CONTAINER_ENGINE" "${explicit_engine}" + fi + + local e2e_required="" + if ! e2e_required="$(_ce_required_engine_from_e2e_driver)"; then + exit 1 + fi + + local local_cluster_required="" + if ! local_cluster_required="$(_ce_required_engine_from_local_cluster "${explicit_engine}")"; then + exit 1 + fi + + if [[ -n "${e2e_required}" && -n "${local_cluster_required}" && "${e2e_required}" != "${local_cluster_required}" ]]; then + _ce_error "OPENSHELL_E2E_DRIVER=${OPENSHELL_E2E_DRIVER} requires ${e2e_required}, but CONTAINER_ENGINE_TARGET=local-k8s-cluster requires ${local_cluster_required}" + fi + + if [[ -n "${explicit_engine}" ]]; then + if [[ -n "${e2e_required}" && "${explicit_engine}" != "${e2e_required}" ]]; then + _ce_error "CONTAINER_ENGINE=${explicit_engine} conflicts with OPENSHELL_E2E_DRIVER=${OPENSHELL_E2E_DRIVER}; use CONTAINER_ENGINE=${e2e_required} or unset CONTAINER_ENGINE" + fi + if [[ -n "${local_cluster_required}" && "${explicit_engine}" != "${local_cluster_required}" ]]; then + _ce_error "CONTAINER_ENGINE=${explicit_engine} conflicts with CONTAINER_ENGINE_TARGET=local-k8s-cluster; active local cluster requires ${local_cluster_required}" + fi + CONTAINER_ENGINE="${explicit_engine}" + _CE_SELECTION_REASON=explicit + elif [[ -n "${e2e_required}" ]]; then + CONTAINER_ENGINE="${e2e_required}" + _CE_SELECTION_REASON=e2e-driver + elif [[ -n "${local_cluster_required}" ]]; then + CONTAINER_ENGINE="${local_cluster_required}" + _CE_SELECTION_REASON=local-k8s-cluster + else + if ! CONTAINER_ENGINE="$(_ce_auto_detect_engine)"; then + exit 1 + fi + _CE_SELECTION_REASON=auto + fi + + _ce_require_engine_available "${CONTAINER_ENGINE}" +} + _detect_container_engine # The actual binary to invoke — usually equals CONTAINER_ENGINE, but when @@ -193,22 +338,40 @@ ce_host_arch() { # # Docker: docker info --format '{{.Architecture}}' # Podman: podman info --format '{{.Host.Arch}}' -# Falls back to the kernel architecture when the daemon query is unavailable so -# local builds do not accidentally target amd64 on arm64 hosts. +# Fails directly when the engine query is unavailable or returns malformed +# metadata. Silently falling back to the host architecture can make cross-VM +# builds use the wrong target. # --------------------------------------------------------------------------- ce_info_arch() { - local arch="" + local info_output arch normalized if ce_is_docker; then - arch=$("${_CE_BIN}" info --format '{{.Architecture}}' 2>/dev/null || true) + if ! info_output=$("${_CE_BIN}" info --format '{{.Architecture}}' 2>&1); then + echo "Error: failed to query ${CONTAINER_ENGINE} architecture with '${_CE_BIN} info':" >&2 + printf '%s\n' "${info_output}" >&2 + exit 1 + fi else - arch=$("${_CE_BIN}" info --format '{{.Host.Arch}}' 2>/dev/null || true) + if ! info_output=$("${_CE_BIN}" info --format '{{.Host.Arch}}' 2>&1); then + echo "Error: failed to query ${CONTAINER_ENGINE} architecture with '${_CE_BIN} info':" >&2 + printf '%s\n' "${info_output}" >&2 + exit 1 + fi fi - if [[ -n "${arch}" ]]; then - ce_normalize_arch "${arch}" - else - ce_host_arch + arch="$(printf '%s\n' "${info_output}" | awk 'NF { print; exit }')" + if [[ -z "${arch}" ]]; then + _ce_error "${CONTAINER_ENGINE} info did not report a host architecture" fi + + normalized="$(ce_normalize_arch "${arch}")" + case "${normalized}" in + amd64|arm64) + echo "${normalized}" + ;; + *) + _ce_error "unsupported ${CONTAINER_ENGINE} architecture '${arch}' reported by '${_CE_BIN} info'" + ;; + esac } # --------------------------------------------------------------------------- @@ -345,10 +508,19 @@ _ce_log_detected() { if [[ "${CONTAINER_ENGINE_QUIET:-}" == "1" ]]; then return fi - if [[ -n "${_CE_EXPLICIT_OVERRIDE:-}" ]]; then - echo "[container-engine] using ${CONTAINER_ENGINE} (set via CONTAINER_ENGINE env)" >&2 - else - echo "[container-engine] auto-detected: ${CONTAINER_ENGINE} (override with CONTAINER_ENGINE=docker|podman)" >&2 - fi + case "${_CE_SELECTION_REASON:-auto}" in + explicit) + echo "[container-engine] using ${CONTAINER_ENGINE} (set via CONTAINER_ENGINE env)" >&2 + ;; + e2e-driver) + echo "[container-engine] using ${CONTAINER_ENGINE} (required by OPENSHELL_E2E_DRIVER=${OPENSHELL_E2E_DRIVER})" >&2 + ;; + local-k8s-cluster) + echo "[container-engine] using ${CONTAINER_ENGINE} (required by CONTAINER_ENGINE_TARGET=local-k8s-cluster)" >&2 + ;; + *) + echo "[container-engine] auto-detected: ${CONTAINER_ENGINE} (override with CONTAINER_ENGINE=docker|podman)" >&2 + ;; + esac } _ce_log_detected