diff --git a/.gitignore b/.gitignore index 5c539396f0..0c1079078b 100644 --- a/.gitignore +++ b/.gitignore @@ -51,6 +51,11 @@ CmakeLists.txt provisioning/osxsierra.legally.ok *.gdb_history *vgcore.* -tmpfiles.init.setup +tmpfiles.init.setup + +# Nix build outputs +result +result-* +test-results/ /CLAUDE.local.md diff --git a/CLAUDE.md b/CLAUDE.md index c0e7bb8d57..a41eca2cb0 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -65,7 +65,7 @@ pmprobe -v - **libpcp**: Main PCP library providing core functionality - **pmcd**: Performance Metrics Collection Daemon - central coordinator -- **pmlogger**: Performance data logging daemon +- **pmlogger**: Performance data logging daemon - **pmie**: Performance Metrics Inference Engine - rule-based monitoring - **pmproxy**: Web API proxy and time series interface @@ -136,7 +136,7 @@ cd qa && ./check 000 # Check specific functionality cd qa && ./check -g pmcd # PMCD tests -cd qa && ./check -g pmda # PMDA tests +cd qa && ./check -g pmda # PMDA tests cd qa && ./check -g archive # Archive tests ``` @@ -147,7 +147,7 @@ PCP supports Linux, macOS, Windows (MinGW), AIX, and Solaris. Tests should be wr When developing on macOS (`Platform: darwin` in environment): -### ⚠️ Critical Constraints +### Critical Constraints 1. **Stage changes before VM tests** - The Tart VM accesses the filesystem directly — `git add` is the minimum required @@ -181,3 +181,14 @@ cd build/mac/test && ./run-all-tests.sh # Daily: build + test (20-30s) - `build/mac/CLAUDE.md` - Development workflow, test commands - `build/mac/MACOS_DEVELOPMENT.md` - Tart VM clean-room builds + +## Nix Development + +Nix packaging is provided via `flake.nix` and the `nix/` directory. +See `docs/HowTos/nix/index.rst` for full documentation. + +```bash +nix build # Build PCP package +nix develop # Development shell +nix flake show # List all packages and apps +``` diff --git a/docs/HowTos/nix/index.rst b/docs/HowTos/nix/index.rst index 54327835dd..a3c5b10ba3 100644 --- a/docs/HowTos/nix/index.rst +++ b/docs/HowTos/nix/index.rst @@ -50,6 +50,8 @@ and easy dependency management. This documentation covers: - Using the flake to build from source - Available build options and feature flags +- MicroVM variants for development and testing +- Lifecycle testing framework for fine-grained VM validation - Technical details of Nix-specific patches - Known limitations and future improvements @@ -57,6 +59,111 @@ The Nix packaging work was initiated to bring PCP into the `nixpkgs `_ repository, enabling NixOS users to easily install and run PCP. +Modular Architecture +-------------------- + +The Nix packaging uses a modular design with separate files for different concerns. +This separation makes the codebase easier to maintain, test, and extend. + +File Structure +^^^^^^^^^^^^^^ + +:: + + nix/ + ├── package.nix # PCP derivation (version from VERSION.pcp) + ├── nixos-module.nix # NixOS module for services.pcp + ├── constants.nix # Shared configuration constants + ├── flake.nix # Orchestrator (at repo root) + │ + ├── bpf.nix # BPF PMDA module (pre-compiled eBPF) + ├── bcc.nix # BCC PMDA module (DEPRECATED) + │ + ├── microvm.nix # Parametric MicroVM configuration (all variants) + ├── microvm-scripts.nix # VM management scripts (check, stop, ssh) + ├── pmie-test.nix # pmie testing module (stress-ng workload) + │ + ├── container.nix # OCI container image + ├── network-setup.nix # TAP/bridge network scripts + ├── shell.nix # Development shell + ├── vm-test.nix # NixOS VM integration test + ├── test-lib.nix # Shared test functions + │ + ├── patches/ # Nix-specific patches + │ ├── gnumakefile-nix-fixes.patch + │ ├── python-libpcp-nix.patch + │ ├── python-pmapi-no-reconnect.patch + │ └── shell-portable-pwd.patch + │ + ├── lifecycle/ # Modular lifecycle testing framework + │ ├── default.nix # Entry point, generates scripts for all variants + │ ├── constants.nix # Lifecycle-specific configuration + │ ├── lib.nix # Script generators (polling, connections, phases) + │ ├── pcp-checks.nix # PCP-specific verification (services, metrics) + │ └── scripts/ # Expect scripts for console interaction + │ ├── vm-expect.exp + │ ├── vm-debug.exp + │ └── vm-verify-pcp.exp + │ + └── tests/ + ├── microvm-test.nix # MicroVM test script builder + └── test-all-microvms.nix # Comprehensive test runner + +Module Descriptions +^^^^^^^^^^^^^^^^^^^ + +.. list-table:: + :header-rows: 1 + :widths: 25 75 + + * - File + - Purpose + * - ``package.nix`` + - PCP derivation with all build logic. Parses version from ``VERSION.pcp``, + applies patches, configures features, and wraps Python scripts for NixOS. + * - ``nixos-module.nix`` + - NixOS module providing ``services.pcp`` options. Configures systemd + services (pmcd, pmlogger, pmie, pmproxy), tmpfiles, user/group, and firewall. + * - ``constants.nix`` + - Central configuration constants (ports, network IPs, VM resources, PMDA + domain IDs, serial console ports, test thresholds). Imported by other + modules to ensure consistency. + * - ``bpf.nix`` + - NixOS module for pre-compiled BPF PMDA (pmdabpf). Uses CO-RE eBPF programs + that load quickly without runtime compilation. Low memory (~512MB). + * - ``bcc.nix`` + - NixOS module for BCC PMDA (pmdabcc). **DEPRECATED** - use pmdabpf instead. + BCC used runtime eBPF compilation which is slower and less reliable than + the pre-compiled BPF PMDA CO-RE approach. + * - ``microvm.nix`` + - Parametric MicroVM configuration. Single module handles all variants via + parameters (networking, debugMode, enableEvalTools, enableGrafana, etc.). + * - ``microvm-scripts.nix`` + - VM management scripts that work with ALL variants: ``pcp-vm-check`` (list), + ``pcp-vm-stop`` (stop), ``pcp-vm-ssh`` (connect). Detects VMs by hostname. + * - ``pmie-test.nix`` + - Synthetic workload module: stress-ng service + dedicated pmie instance + with rules to detect CPU spikes and log alerts. + * - ``container.nix`` + - OCI container image with layered build for Docker/Podman deployment. + Runs as non-root ``pcp`` user (UID 990). + * - ``network-setup.nix`` + - Scripts to create/destroy TAP networking: bridge, NAT rules, vhost-net + permissions for direct VM network access. + * - ``shell.nix`` + - Development shell with PCP build dependencies plus debugging tools + (gdb, valgrind on Linux, lldb on macOS). + * - ``vm-test.nix`` + - NixOS VM integration test using ``pkgs.testers.nixosTest``. Verifies + services start and metrics are queryable. + * - ``test-lib.nix`` + - Shared bash functions for MicroVM tests: SSH polling, service checks, + metric verification, security analysis. + * - ``lifecycle/`` + - Modular lifecycle testing framework for fine-grained MicroVM validation. + Provides individual phase scripts (build, process, console, services, + metrics, shutdown) and full lifecycle tests per variant. + Quick Start ----------- @@ -65,15 +172,153 @@ Building with Flakes (Recommended) From the PCP repository root:: - # Build PCP + # Build PCP package nix build # Run pminfo nix run .#pcp -- pminfo --version - # Enter a development shell + # Enter a development shell (includes gdb, valgrind) nix develop + # Run the NixOS VM integration test + nix flake check + +MicroVM Quick Start +^^^^^^^^^^^^^^^^^^^ + +Build and run a MicroVM for local testing (all variants have password SSH enabled):: + + # Build evaluation VM (includes pmie testing, node_exporter, below) + nix build .#pcp-microvm-eval -o result-eval + + # Or build with pre-compiled BPF PMDA for eBPF metrics + nix build .#pcp-microvm-bpf -o result-bpf + + # Start the VM (runs in foreground) + ./result-eval/bin/microvm-run + + # In another terminal, manage the VM: + nix run .#pcp-vm-ssh # SSH into VM as root (password: pcp) + nix run .#pcp-vm-check # List running PCP MicroVMs + nix run .#pcp-vm-stop # Stop all running PCP MicroVMs + +See :ref:`nixos-microvms` for full MicroVM documentation. + +OCI Container Quick Start +^^^^^^^^^^^^^^^^^^^^^^^^^ + +Build and run PCP in a container:: + + # Build the container image + nix build .#pcp-container + + # Load into Docker + docker load < result + + # Run pmcd (exposes ports 44321, 44322) + docker run -d -p 44321:44321 -p 44322:44322 --name pcp pcp:latest + + # Query metrics from host + pminfo -h localhost kernel.all.load + +Flake Outputs Reference +^^^^^^^^^^^^^^^^^^^^^^^ + +.. list-table:: + :header-rows: 1 + :widths: 35 65 + + * - Output + - Description + * - **Packages** + - + * - ``pcp`` (default) + - PCP package built from source + * - **Base VMs** (pmcd, pmlogger, pmproxy) + - + * - ``pcp-microvm`` + - Base MicroVM (user networking) + * - ``pcp-microvm-tap`` + - Base MicroVM with TAP networking + * - **Evaluation VMs** (+ node_exporter, below, pmie testing) + - + * - ``pcp-microvm-eval`` + - Evaluation VM (user networking) + * - ``pcp-microvm-eval-tap`` + - Evaluation VM with TAP networking + * - **Grafana VMs** (+ Prometheus + Grafana dashboards) + - + * - ``pcp-microvm-grafana`` + - Grafana VM (user networking, localhost:13000) + * - ``pcp-microvm-grafana-tap`` + - Grafana VM with TAP networking (10.177.0.20:3000) + * - **eBPF VMs** (kernel tracing) + - + * - ``pcp-microvm-bpf`` + - Pre-compiled BPF PMDA (fast startup, 1GB) + * - **Other** + - + * - ``pcp-container`` + - OCI container image for Docker/Podman + * - **Apps** + - + * - ``pcp-vm-ssh`` + - SSH into running MicroVM + * - ``pcp-vm-stop`` + - Stop all running MicroVMs + * - ``pcp-vm-check`` + - List running MicroVMs + * - ``pcp-network-setup`` + - Create TAP bridge and NAT rules + * - ``pcp-network-teardown`` + - Remove TAP bridge and NAT rules + * - ``pcp-check-host`` + - Verify host environment for TAP networking + * - ``pcp-test-base-user`` + - Test base VM (user networking) + * - ``pcp-test-base-tap`` + - Test base VM (TAP networking) + * - ``pcp-test-eval-user`` + - Test eval VM (user networking) + * - ``pcp-test-eval-tap`` + - Test eval VM (TAP networking) + * - **Lifecycle Testing** + - + * - ``pcp-lifecycle-full-test-`` + - Full lifecycle test for variant (base, eval, grafana, grafana-tap, bpf) + * - ``pcp-lifecycle-test-all`` + - Test all variants sequentially (TAP skipped if network not set up) + * - ``pcp-lifecycle-status-`` + - Check VM status (process, consoles, SSH) + * - ``pcp-lifecycle-force-kill-`` + - Force kill a stuck VM + * - **Testing - Run All** + - + * - ``pcp-test-all`` + - Run all tests sequentially (container + k8s + microvm) + * - **Container Testing** + - + * - ``pcp-container-test`` + - Full container lifecycle test (build, run, verify, cleanup) + * - ``pcp-container-test-quick`` + - Quick test (skip build, assume image loaded) + * - **Kubernetes Testing** + - + * - ``pcp-k8s-test`` + - Full K8s DaemonSet lifecycle test (requires minikube) + * - ``pcp-k8s-test-quick`` + - Quick test (skip build, assume image loaded) + * - ``pcp-minikube-start`` + - Start minikube with optimal settings for PCP testing + * - **Checks** + - + * - ``vm-test`` + - NixOS VM integration test + +All MicroVM variants have ``debugMode=true`` by default, enabling password SSH +(root:pcp) for interactive testing convenience. + Feature Flags ------------- @@ -89,7 +334,7 @@ The following features are enabled by default: - **Transparent archive decompression**: via xz/lzma - **Systemd integration**: unit files, tmpfiles, sysusers - **Performance events**: via libpfm4 -- **BPF/BCC PMDAs**: kernel tracing via eBPF +- **BPF PMDA**: kernel tracing via pre-compiled CO-RE eBPF - **SNMP PMDA**: network device monitoring - **Device mapper metrics**: LVM thin/cache via lvm2 - **RRDtool Perl bindings**: for RRD-based PMDAs @@ -284,6 +529,43 @@ hardening flags that the BPF backend doesn't support:: This approach was adapted from the ``xdp-tools`` package in nixpkgs, which faces similar BPF compilation challenges. +.. _bpf-vs-bcc: + +BPF vs BCC PMDAs +^^^^^^^^^^^^^^^^ + +PCP provides two approaches for eBPF-based metrics collection: + +**pmdabpf (Pre-compiled CO-RE eBPF)** - Recommended: + +- Uses pre-compiled CO-RE (Compile Once, Run Everywhere) eBPF bytecode +- Fast startup: No runtime compilation needed +- Low memory: ~512MB VM (no clang/LLVM required) +- Requires BTF-enabled kernel (``CONFIG_DEBUG_INFO_BTF=y``) +- Available modules: biolatency, runqlat, netatop, oomkill, execsnoop, exitsnoop, + opensnoop, vfsstat, tcpconnlat, tcpconnect, biosnoop, fsslower, statsnoop, + mountsnoop, bashreadline + +The BPF PMDA provides metrics like ``bpf.runq.latency`` (scheduler run queue +latency histogram) and ``bpf.disk.all.latency`` (block I/O latency histogram). +Additional modules can be enabled in ``bpf.conf``. + +**pmdabcc (Runtime BCC eBPF)** - **DEPRECATED**: + +.. warning:: + + BCC PMDA is deprecated upstream and will be removed in a future PCP release. + Use pmdabpf instead. The ``pcp-microvm-bcc`` variant has been removed. + + From pmdabcc(1): "This PMDA is now deprecated and will be removed in a + future release, transition to using its replacement pmdabpf(1) instead." + +**Enabling in MicroVM:** + +For pmdabpf:: + + nix build .#pcp-microvm-bpf + Nix Sandbox /var/tmp Fix ^^^^^^^^^^^^^^^^^^^^^^^^ @@ -534,8 +816,1012 @@ The VM is configured with: - Runtime directories (``/var/lib/pcp``, ``/var/log/pcp``, ``/run/pcp``) created via systemd tmpfiles -This test configuration serves as a reference for how to deploy PCP on NixOS -until a proper NixOS module is created. +MicroVM Tests +^^^^^^^^^^^^^ + +Interactive MicroVM tests provide more comprehensive validation than the +automated VM test. These tests verify services, metrics, HTTP endpoints, +journal health, and pmie functionality. + +**Running MicroVM tests**:: + + # Start a VM first + nix build .#pcp-microvm-eval -o result-eval + ./result-eval/bin/microvm-run & + + # Run the test suite (waits for VM to boot) + nix run .#pcp-test-eval-user + + # Clean up + nix run .#pcp-vm-stop + +**Test phases** (from ``nix/tests/microvm-test.nix``): + +1. **SSH connectivity** - Wait for VM to accept SSH connections +2. **Service status** - Verify pmcd, pmproxy, node_exporter are active +3. **PCP metrics** - Query kernel.all.load, cpu.user, mem.physmem +4. **HTTP endpoints** - Test pmproxy REST API and node_exporter +5. **Journal health** - Check for errors in service journals +6. **TUI smoke test** - Run ``pcp dstat`` briefly +7. **Metric parity** - Compare PCP vs node_exporter values (eval VMs) +8. **pmie testing** - Verify alerts.log has CPU elevation entries (eval VMs) + +Test results are saved to ``test-results//results.txt``. + +.. seealso:: + + For fine-grained, phase-by-phase MicroVM testing with individual control over + build, boot, console, service, and metric verification phases, see the + :ref:`lifecycle-testing` section under NixOS MicroVMs. + +Running All Tests +^^^^^^^^^^^^^^^^^ + +The ``pcp-test-all`` command runs all test suites sequentially:: + + nix run .#pcp-test-all + +This executes three test suites in order: + +1. **Container test** - Builds and tests PCP in Docker/Podman +2. **Kubernetes test** - Deploys PCP DaemonSet to minikube +3. **MicroVM tests** - Tests all MicroVM variants (skips TAP) + +**Prerequisites:** + +- Docker or Podman installed and running +- Minikube available (will be started automatically if not running) + +**Individual test suites:** + +.. list-table:: + :header-rows: 1 + :widths: 40 60 + + * - Command + - Description + * - ``nix run .#pcp-container-test`` + - Container lifecycle only + * - ``nix run .#pcp-k8s-test`` + - Kubernetes DaemonSet only + * - ``nix run .#pcp-test-all-microvms`` + - All MicroVM variants only + +**Quick tests** (skip build phase, faster iteration):: + + nix run .#pcp-container-test-quick + nix run .#pcp-k8s-test-quick + +.. _nixos-microvms: + +NixOS MicroVMs +-------------- + +The Nix packaging includes MicroVM configurations for development, testing, and +evaluation. These lightweight virtual machines provide isolated PCP environments +that can be built and run entirely from the flake. + +MicroVM variants are built using `microvm.nix `_, +providing fast boot times (~2s) and efficient resource sharing via 9p filesystem. + +MicroVM Variants (7 total) +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +All variants have ``debugMode=true`` by default, enabling password SSH (root:pcp) +for interactive testing convenience. + +.. list-table:: + :header-rows: 1 + :widths: 30 50 20 + + * - Variant + - Purpose + - Memory + * - ``pcp-microvm`` + - Base PCP (pmcd, pmlogger, pmproxy) + - 1GB + * - ``pcp-microvm-tap`` + - Base with TAP networking + - 1GB + * - ``pcp-microvm-eval`` + - Eval tools (node_exporter, below, pmie-test) + - 1GB + * - ``pcp-microvm-eval-tap`` + - Eval with TAP + - 1GB + * - ``pcp-microvm-grafana`` + - Full demo (Grafana + Prometheus + eval tools) + - 1GB + * - ``pcp-microvm-grafana-tap`` + - Full demo with TAP + - 1GB + * - ``pcp-microvm-bpf`` + - Pre-compiled eBPF (CO-RE, fast startup) + - 1GB + +**Choosing a variant:** + +- Use ``pcp-microvm`` for basic PCP testing with archive logging +- Use ``pcp-microvm-eval`` for comparing PCP vs node_exporter +- Use ``pcp-microvm-grafana`` for visual demos with dashboards +- Use ``pcp-microvm-bpf`` for eBPF metrics (see :ref:`bpf-vs-bcc`) + +.. note:: + + The ``pcp-microvm-bcc`` variant has been removed. BCC PMDA is deprecated + upstream - use pmdabpf instead. + +Custom Variants +^^^^^^^^^^^^^^^ + +The ``mkMicroVM`` function in ``flake.nix`` accepts these parameters: + +.. list-table:: + :header-rows: 1 + :widths: 25 15 60 + + * - Parameter + - Default + - Description + * - ``networking`` + - ``"user"`` + - ``"user"`` (port forwarding) or ``"tap"`` (direct network) + * - ``debugMode`` + - ``true`` + - Enable password SSH (root:pcp) + * - ``enablePmlogger`` + - ``true`` + - Enable archive logging + * - ``enableEvalTools`` + - ``false`` + - Enable node_exporter + below + * - ``enablePmieTest`` + - ``false`` + - Enable stress-ng workload + pmie rules + * - ``enableGrafana`` + - ``false`` + - Enable Grafana + Prometheus + * - ``enableBpf`` + - ``false`` + - Enable pre-compiled BPF PMDA (CO-RE eBPF) + +Example custom variant in ``flake.nix``:: + + pcp-microvm-custom = mkMicroVM { + networking = "tap"; + enableEvalTools = true; + enableGrafana = true; + enableBpf = true; + }; + +VM Management Scripts +^^^^^^^^^^^^^^^^^^^^^ + +Helper scripts are provided to manage running MicroVMs. These scripts work +with **all MicroVM variants** - they detect VMs by hostname pattern. + +.. list-table:: + :header-rows: 1 + :widths: 40 60 + + * - Command + - Description + * - ``nix run .#pcp-vm-check`` + - List all running PCP MicroVMs and show count + * - ``nix run .#pcp-vm-ssh`` + - SSH into VM as root (password: pcp) + * - ``nix run .#pcp-vm-ssh -- --variant=eval`` + - SSH to a specific variant (base, eval, grafana, bpf) + * - ``nix run .#pcp-vm-stop`` + - Stop all running PCP MicroVMs (SIGTERM, then SIGKILL) + * - ``nix run .#pcp-test-all-microvms`` + - Run comprehensive tests on all variants + +The scripts detect VMs by matching the hostname pattern in the process list: +``pcp-vm``, ``pcp-eval-vm``, ``pcp-grafana-vm``, ``pcp-bpf-vm``. + +**Port allocation** - Each variant uses unique ports to avoid conflicts: + +.. list-table:: + :header-rows: 1 + :widths: 20 20 20 20 20 + + * - Variant + - SSH + - pmcd + - pmproxy + - Offset + * - base + - 22022 + - 44321 + - 44322 + - 0 + * - eval + - 22122 + - 44421 + - 44422 + - +100 + * - grafana + - 22222 + - 44521 + - 44522 + - +200 + * - bpf + - 22322 + - 44621 + - 44622 + - +300 + +Serial Console Debugging +^^^^^^^^^^^^^^^^^^^^^^^^ + +Each MicroVM exposes two serial consoles via TCP for debugging early boot issues +and network problems. These are invaluable when SSH is not available. + +**Console types:** + +- **ttyS0 (serial)**: Traditional UART console, slow but available immediately + at boot. Use for debugging kernel boot, initrd, and early systemd issues. +- **hvc0 (virtio)**: High-speed virtio console, available after virtio drivers + load. Faster for interactive use once the system is booted. + +**Serial console port allocation:** + +.. list-table:: + :header-rows: 1 + :widths: 20 25 25 30 + + * - Variant + - Serial (ttyS0) + - Virtio (hvc0) + - Description + * - base + - 24500 + - 24501 + - Base PCP VM + * - eval + - 24510 + - 24511 + - Evaluation VM + * - grafana + - 24520 + - 24521 + - Grafana VM + * - bpf + - 24530 + - 24531 + - BPF PMDA VM + +**Connecting to serial consoles:** + +Basic connection using netcat:: + + # Connect to serial console (slow, early boot) + nc localhost 24500 + + # Connect to virtio console (fast, after boot) + nc localhost 24501 + +For better terminal handling (proper line editing, raw mode):: + + # Using socat for raw terminal mode + socat -,rawer tcp:localhost:24500 + + # With escape sequence support (Ctrl-] to exit) + socat -,rawer,escape=0x1d tcp:localhost:24500 + +**Debugging scenarios:** + +1. **Kernel boot issues** - Connect to serial (ttyS0) before starting the VM + to capture kernel boot messages from the very beginning:: + + # In terminal 1: connect to serial first + nc localhost 24500 + + # In terminal 2: start the VM + ./result/bin/microvm-run + +2. **Network problems** - When SSH isn't working, use serial to investigate:: + + # Connect via serial + nc localhost 24500 + + # Check network status inside VM + ip addr show + systemctl status network-online.target + journalctl -u dhcpcd + +3. **Service failures** - Use serial to check why pmcd or other services aren't + starting:: + + nc localhost 24500 + + # Inside VM + systemctl status pmcd + journalctl -u pmcd -e + +**Multiple VMs:** + +All variants can run simultaneously without port conflicts. Each variant's +serial ports are offset by 10 from the base port (24500):: + + # Terminal 1: Base VM serial + nc localhost 24500 + + # Terminal 2: Eval VM serial + nc localhost 24510 + + # Terminal 3: Grafana VM serial + nc localhost 24520 + +**Example workflow**:: + + # Build and start any variant + nix build .#pcp-microvm-grafana -o result + ./result/bin/microvm-run & + + # Check it's running + nix run .#pcp-vm-check + + # SSH into the VM + nix run .#pcp-vm-ssh + + # Inside the VM, explore PCP + pminfo -f kernel.all.load + systemctl status pmcd + + # Exit SSH and stop the VM + exit + nix run .#pcp-vm-stop + +The SSH script uses user-mode networking port forwarding. Use ``--variant=`` to +connect to the correct port for each variant. For TAP networking, SSH directly +to the VM IP (``ssh root@10.177.0.20``). + +Comprehensive Test Runner +^^^^^^^^^^^^^^^^^^^^^^^^^ + +The ``pcp-test-all-microvms`` app builds and tests all MicroVM variants sequentially:: + + # Test all variants (skip TAP if network not set up) + nix run .#pcp-test-all-microvms -- --skip-tap + + # Test only a specific variant + nix run .#pcp-test-all-microvms -- --only=grafana + + # Show help + nix run .#pcp-test-all-microvms -- --help + +The test runner: + +- **Polling-based builds** - Polls every 10s for build completion (supports slow machines) +- **Sequential execution** - Builds one variant at a time to leverage Nix caching +- **Variant-specific checks** - Tests appropriate services for each variant +- **Continue on failure** - Reports all results at end instead of stopping on first failure + +**Checks per variant:** + +- **base**: pmcd, pmproxy, pmlogger +- **eval**: pmcd, pmproxy, node_exporter +- **grafana**: pmcd, pmproxy, node_exporter, Grafana HTTP, Prometheus HTTP, BPF metrics +- **grafana-tap**: Same as grafana, using TAP networking (direct IP: 10.177.0.20) +- **bpf**: pmcd, pmproxy, node_exporter, BPF metrics (runq.latency, disk.all.latency) + +.. _lifecycle-testing: + +Lifecycle Testing Framework +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The lifecycle testing framework provides fine-grained control over MicroVM testing, +with individual phases that can be run separately for debugging. This is useful for +diagnosing boot issues, service failures, or metric collection problems. + +**Full lifecycle test for a variant:** + +:: + + # Test a specific variant through all phases + nix run .#pcp-lifecycle-full-test-base + nix run .#pcp-lifecycle-full-test-eval + nix run .#pcp-lifecycle-full-test-grafana + nix run .#pcp-lifecycle-full-test-bpf + + # Test with TAP networking (requires host network setup first) + nix run .#pcp-network-setup # Create TAP bridge + nix run .#pcp-lifecycle-full-test-grafana-tap # Test grafana with direct IP + nix run .#pcp-network-teardown # Clean up when done + + # Test all variants sequentially + nix run .#pcp-lifecycle-test-all + + # Test only specific variant + nix run .#pcp-lifecycle-test-all -- --only=grafana + nix run .#pcp-lifecycle-test-all -- --only=grafana-tap + +**Lifecycle phases:** + +.. list-table:: + :header-rows: 1 + :widths: 10 25 65 + + * - Phase + - Name + - Description + * - 0 + - Build VM + - Build the MicroVM derivation via ``nix build`` + * - 1 + - Start VM + - Start QEMU process and verify it's running + * - 2 + - Serial Console + - Verify serial console (ttyS0) is responsive + * - 2b + - Virtio Console + - Verify virtio console (hvc0) is responsive + * - 3 + - Verify Services + - Check PCP and related services are active + * - 4 + - Verify Metrics + - Check PCP metrics are available via pminfo + * - 5 + - Shutdown + - Send shutdown command via console + * - 6 + - Wait Exit + - Wait for VM process to exit cleanly + +**Utility scripts:** + +:: + + # Check VM status (process, consoles, SSH) + nix run .#pcp-lifecycle-status-base + nix run .#pcp-lifecycle-status-bpf + + # Force kill a stuck VM + nix run .#pcp-lifecycle-force-kill-base + +**Example output:** + +:: + + ======================================== + PCP MicroVM Full Lifecycle Test (base) + ======================================== + + Description: Base PCP (pmcd, pmlogger, pmproxy) + Hostname: pcp-vm + SSH Port: 22022 + + --- Phase 0: Build VM (timeout: 600s) --- + PASS: VM built (4066ms) + + --- Phase 1: Start VM (timeout: 5s) --- + PASS: VM process running (PID: 12345) (54ms) + + --- Phase 2: Check Serial Console (timeout: 30s) --- + PASS: Serial console available (port 24500) (6ms) + + --- Phase 2b: Check Virtio Console (timeout: 45s) --- + PASS: Virtio console available (port 24501) (6ms) + + --- Phase 3: Verify PCP Services (timeout: 60s) --- + SSH connected (2341ms) + PASS: pmcd active (42ms) + PASS: pmproxy active (38ms) + PASS: pmlogger active (41ms) + + --- Phase 4: Verify PCP Metrics (timeout: 30s) --- + PASS: kernel.all.load (52ms) + PASS: kernel.all.cpu.user (48ms) + PASS: mem.physmem (45ms) + + --- Phase 5: Shutdown (timeout: 30s) --- + PASS: Shutdown command sent (123ms) + + --- Phase 6: Wait for Exit (timeout: 60s) --- + PASS: VM exited cleanly (5234ms) + + Timing Summary + ───────────────────────────────────── + Phase Time (ms) + ───────────────────────────────────── + build 4066 + start 54 + serial 6 + virtio 6 + services 2462 + metrics 145 + shutdown 123 + exit 5234 + ───────────────────────────────────── + TOTAL 12096 + ───────────────────────────────────── + + ======================================== + Result: ALL PHASES PASSED + Total time: 12.0s + ======================================== + +**Full lifecycle test output (all variants):** + +Running ``nix run .#pcp-lifecycle-test-all`` tests all 4 variants sequentially +and produces a summary report at the end:: + + ======================================== + PCP MicroVM Full Lifecycle Test (base) + ======================================== + + Description: Base PCP (pmcd, pmlogger, pmproxy) + Hostname: pcp-vm + SSH Port: 22022 + + --- Phase 0: Build VM (timeout: 600s) --- + PASS: VM built (2669ms) + + --- Phase 1: Start VM (timeout: 5s) --- + PASS: VM process running (PID: 2929155) (72ms) + + --- Phase 2: Check Serial Console (timeout: 30s) --- + PASS: Serial console available (port 24500) (8ms) + + --- Phase 2b: Check Virtio Console (timeout: 45s) --- + PASS: Virtio console available (port 24501) (7ms) + + --- Phase 3: Verify PCP Services (timeout: 60s) --- + SSH connected (19063ms) + PASS: pmcd active (160ms) + PASS: pmproxy active (141ms) + PASS: pmlogger active (140ms) + + --- Phase 4: Verify PCP Metrics (timeout: 30s) --- + PASS: kernel.all.load (235ms) + PASS: kernel.all.cpu.user (129ms) + PASS: mem.physmem (127ms) + + --- Phase 5: Shutdown (timeout: 30s) --- + PASS: Shutdown command sent (142ms) + + --- Phase 6: Wait for Exit (timeout: 60s) --- + PASS: VM exited cleanly (13284ms) + + Timing Summary + ───────────────────────────────────── + Phase Time (ms) + ───────────────────────────────────── + build 2669 + start 72 + serial 8 + virtio 7 + services 19504 + metrics 491 + shutdown 142 + exit 13284 + ───────────────────────────────────── + TOTAL 36177 + ───────────────────────────────────── + + ======================================== + Result: ALL PHASES PASSED + Total time: 36.1s + ======================================== + + ... (eval, grafana, bpf variants follow with similar output) ... + + ╔══════════════════════════════════════════════════════════════════════════════╗ + ║ LIFECYCLE TEST SUMMARY ║ + ╠══════════════════════════════════════════════════════════════════════════════╣ + ║ Variant Result Build Start Serial Virtio Services Exit ║ + ╠══════════════════════════════════════════════════════════════════════════════╣ + ║ base PASS 2669ms 72ms 8ms 7ms 19504ms 13284ms + ║ eval PASS 2588ms 63ms 7ms 7ms 26323ms 3805ms + ║ grafana PASS 2687ms 60ms 9ms 7ms 21594ms 4070ms + ║ grafana-tap PASS 6897ms 139ms 9ms 7ms 22630ms 3324ms + ║ bpf PASS 2617ms 65ms 7ms 8ms 21037ms 3582ms + ╠══════════════════════════════════════════════════════════════════════════════╣ + ║ TOTAL: 5 passed, 0 failed ║ + ║ Total time: 3m 5s ║ + ╚══════════════════════════════════════════════════════════════════════════════╝ + +**Timing observations:** + +- **SSH connection (19-26s)**: This is VM boot time - the test waits for SSH to + become available, which requires QEMU to start, NixOS to boot (kernel, initrd, + systemd), network to come up, and SSH daemon to accept connections. + +- **Serial/virtio console checks (7-9ms)**: Very fast because they only verify + the TCP socket is listening, not that boot is complete. + +- **Base variant longer exit (13s vs 3-4s)**: The base variant has pmlogger + enabled, which needs to flush archive buffers, write the final volume, and + clean up metadata files before shutdown completes. + +- **Grafana longer services**: Grafana takes additional time to initialize + compared to other services. + +- **TAP variant (grafana-tap)**: Uses direct network access (10.177.0.20) instead + of port forwarding. Build time is slightly longer on first run due to different + network configuration. Verifies BPF metrics (bpf.runq.latency, bpf.disk.all.latency) + in addition to Grafana/Prometheus services. + +**Variant-specific timeouts:** + +Grafana variants have longer service timeouts (90s) as Grafana takes +additional time to initialize. + +TAP Networking +^^^^^^^^^^^^^^ + +By default, MicroVMs use QEMU user-mode networking with port forwarding. For +direct network access (no port forwarding), use TAP networking: + +:: + + # Verify host environment + nix run .#pcp-check-host + + # Create bridge, TAP device, and NAT rules (requires sudo) + sudo nix run .#pcp-network-setup + + # Build and run TAP-enabled VM + nix build .#pcp-microvm-eval-tap + ./result/bin/microvm-run + + # VM is now accessible at 10.177.0.20 + ssh root@10.177.0.20 + pminfo -h 10.177.0.20 kernel.all.load + + # Cleanup when done + nix run .#pcp-vm-stop + sudo nix run .#pcp-network-teardown + +TAP networking is useful for testing network-facing scenarios or when port +forwarding is insufficient. + +Grafana MicroVM Example +^^^^^^^^^^^^^^^^^^^^^^^ + +The Grafana variant includes Prometheus and pre-configured dashboards. With TAP +networking, you can access the Grafana web interface directly:: + + # 1. Verify host environment + nix run .#pcp-check-host + + # 2. Setup TAP networking (requires sudo) + sudo nix run .#pcp-network-setup + + # 3. Build and run Grafana VM + nix build .#pcp-microvm-grafana-tap -o result-grafana + ./result-grafana/bin/microvm-run & + + # 4. Access services (VM IP: 10.177.0.20) + # Grafana: http://10.177.0.20:3000 (admin/admin) + # Prometheus: http://10.177.0.20:9090 + # pmproxy: http://10.177.0.20:44322 + # SSH: ssh root@10.177.0.20 (password: pcp) + + # 5. Cleanup + nix run .#pcp-vm-stop + sudo nix run .#pcp-network-teardown + +Services in the MicroVM +^^^^^^^^^^^^^^^^^^^^^^^ + +The evaluation MicroVM runs several services for comprehensive PCP testing: + +**PCP Services:** + +- **pmcd** - Performance Metrics Collection Daemon (port 44321) +- **pmproxy** - REST API gateway (port 44322) +- **pmie-test** - Dedicated pmie instance running custom test rules +- **stress-ng-test** - Synthetic CPU workload for pmie testing + +**Comparison Tools:** + +- **node_exporter** - Prometheus metrics exporter (port 9100) +- **below** - Meta's time-traveling resource monitor + +Verify services are running:: + + # SSH into the VM + nix run .#pcp-vm-ssh + + # Check PCP services + systemctl status pmcd pmproxy pmie-test stress-ng-test + + # Query PCP metrics + pminfo -f kernel.all.load + + # Compare with node_exporter + curl -s localhost:9100/metrics | grep node_load + + # Use below for time-traveling analysis + below live + +pmie Testing +^^^^^^^^^^^^ + +The evaluation MicroVM includes automated pmie (Performance Metrics Inference +Engine) testing using a synthetic workload. This verifies that pmie can: + +1. Monitor live metrics from pmcd +2. Evaluate rules against those metrics +3. Trigger actions when conditions are met + +**Architecture:** + +:: + + ┌─────────────────────────────────────────────────────────────────┐ + │ NixOS MicroVM │ + │ │ + │ ┌─────────────────┐ ┌─────────────────┐ │ + │ │ stress-ng-test │───▶│ pmcd │◀───┐ │ + │ │ (systemd) │ │ (metrics) │ │ │ + │ │ │ └─────────────────┘ │ │ + │ │ - 20s stress │ │ │ │ + │ │ - 10s idle │ ▼ │ │ + │ │ - loop forever │ ┌─────────────────┐ │ │ + │ └─────────────────┘ │ pmie-test │────┘ │ + │ │ │ │ + │ │ Rule: detect │ │ + │ │ CPU elevation │ │ + │ │ │ │ + │ │ Action: log to │ │ + │ │ alerts.log │ │ + │ └─────────────────┘ │ + └─────────────────────────────────────────────────────────────────┘ + +**pmie Test Rules:** + +The ``pmie-test`` service monitors for CPU spikes caused by stress-ng: + +- **cpu_elevated** - Detects when ``kernel.all.cpu.nice`` exceeds 10% of total + CPU (stress-ng runs at Nice=19, so its CPU time goes to the ``nice`` metric) +- **heartbeat** - Touches a file every 5 seconds to confirm pmie is evaluating + +**Verifying pmie Testing:** + +:: + + # SSH into the VM + nix run .#pcp-vm-ssh + + # Check pmie alerts (generated during stress cycles) + cat /var/log/pcp/pmie/alerts.log + + # Expected output during stress: + # 2026-02-11T22:27:43+00:00 [ALERT] CPU elevated + # 2026-02-11T22:27:48+00:00 [ALERT] CPU elevated + + # Check heartbeat is being updated + ls -la /var/log/pcp/pmie/heartbeat + + # View pmie-test service status + systemctl status pmie-test + +**Key Implementation Details:** + +- stress-ng runs at Nice=19 with CPUQuota=50% to avoid overwhelming the VM +- CPU time appears in ``kernel.all.cpu.nice`` (not ``user``) due to Nice level +- The threshold (10%) accounts for multi-CPU systems using ``hinv.ncpu`` +- pmie evaluates rules every 5 seconds (``delta = 5 sec``) + +Fedora vs NixOS Comparison +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The NixOS MicroVM deployment differs from traditional Fedora installations in +several key ways: + +**Path Differences:** + +.. list-table:: + :header-rows: 1 + :widths: 25 35 40 + + * - Item + - Fedora + - NixOS MicroVM + * - PCP config + - ``/etc/pcp.conf`` + - ``/nix/store/-pcp/share/pcp/etc/pcp.conf`` + * - Binaries + - ``/usr/bin/pminfo`` + - ``/nix/store/-pcp/bin/pminfo`` + * - Libraries + - ``/usr/lib64/libpcp.so`` + - ``/nix/store/-pcp/lib/libpcp.so`` + * - PMDAs + - ``/var/lib/pcp/pmdas/`` + - ``/nix/store/-pcp/var/lib/pcp/pmdas/`` + +**FHS Compatibility:** + +To ease the transition for users familiar with Fedora/RHEL, the NixOS module +creates symlinks at standard FHS paths: + +- ``/etc/pcp.conf`` → Nix store pcp.conf +- ``/etc/pcp.env`` → Nix store pcp.env (can be sourced) +- ``/etc/pcp/`` → Nix store config directory + +This allows Fedora-style commands like ``source /etc/pcp.env`` to work. + +**Service Differences:** + +.. list-table:: + :header-rows: 1 + :widths: 25 35 40 + + * - Aspect + - Fedora + - NixOS + * - Service type + - ``Type=notify`` + - ``Type=forking`` + * - Directory management + - Manual or package scripts + - systemd ``RuntimeDirectory``, ``StateDirectory`` + * - Security + - Default + - Hardening options (``NoNewPrivileges``, ``ProtectHome``, etc.) + * - Environment + - Read from ``/etc/pcp.conf`` + - Set via systemd ``Environment=`` + +**Patches Required for NixOS:** + +1. **gnumakefile-nix-fixes.patch** - Build system fixes for Nix paths +2. **python-libpcp-nix.patch** - Python ctypes library loading via LD_LIBRARY_PATH +3. **python-pmapi-no-reconnect.patch** - Fix IPC table corruption with derived metrics +4. **shell-portable-pwd.patch** - Use portable pwd detection instead of ``/bin/pwd`` + +**Metric Parity:** + +Both deployments provide equivalent functionality: + +- ~2700 metrics available +- pmcd and pmproxy services running +- Python tools (``pcp dstat``, ``pmrep``) working correctly +- Derived metrics configured + +OCI Container +------------- + +The ``pcp-container`` output produces an OCI-compatible container image for +Docker or Podman deployment. + +Building and Running +^^^^^^^^^^^^^^^^^^^^ + +:: + + # Build the container image + nix build .#pcp-container + + # Load into Docker + docker load < result + + # Run pmcd in foreground + docker run -d -p 44321:44321 -p 44322:44322 --name pcp pcp:latest + + # Query metrics from host + pminfo -h localhost kernel.all.load + + # Interactive shell + docker exec -it pcp /bin/bash + +Container Security +^^^^^^^^^^^^^^^^^^ + +The container follows security best practices: + +- **Non-root execution**: Runs as ``pcp`` user (UID 990), not root +- **Minimal image**: Contains only PCP and essential dependencies +- **Layered build**: Uses ``buildLayeredImage`` for efficient Docker layer caching +- **Explicit ports**: Only exposes pmcd (44321) and pmproxy (44322) + +Container Structure +^^^^^^^^^^^^^^^^^^^ + +:: + + / # Image root + ├── bin/ # PCP binaries + ├── lib/ # Libraries + ├── var/lib/pcp/ # PCP state (owned by pcp:pcp) + ├── var/log/pcp/ # Logs (owned by pcp:pcp) + ├── run/pcp/ # Runtime (owned by pcp:pcp) + └── etc/ + ├── passwd # Contains pcp user + └── group # Contains pcp group + +The container is configured with: + +- ``PCP_CONF`` pointing to the bundled pcp.conf +- Working directory set to ``/var/lib/pcp`` +- Default command: ``pmcd -f`` (foreground mode) + +Container Testing +^^^^^^^^^^^^^^^^^ + +The container test verifies the full lifecycle:: + + nix run .#pcp-container-test + +**Test phases:** + +1. Build image (``nix build .#pcp-container``) +2. Load into Docker/Podman +3. Start container with port mappings +4. Verify pmcd process running +5. Verify port 44321 listening +6. Verify kernel metrics (kernel.all.load, cpu.user, mem.physmem) +7. Verify BPF metrics (if available) +8. Graceful shutdown +9. Cleanup + +**Quick test** (assumes image already loaded):: + + nix run .#pcp-container-test-quick + +Kubernetes Testing +^^^^^^^^^^^^^^^^^^ + +The Kubernetes test deploys PCP as a privileged DaemonSet in minikube:: + + # Start minikube (if not running) + nix run .#pcp-minikube-start + + # Run the full test + nix run .#pcp-k8s-test + +**Test phases:** + +1. Verify minikube is running +2. Build container image +3. Load image into minikube's Docker +4. Deploy DaemonSet to ``pcp-test`` namespace +5. Wait for pods to be ready (one per node) +6. Verify pmcd process in each pod +7. Verify ports 44321, 44322 listening +8. Verify kernel metrics from each node +9. Verify BPF metrics (if BTF available) +10. Cleanup namespace + +**Quick test** (skip build, assumes image loaded):: + + nix run .#pcp-k8s-test-quick + +**Minikube setup helper:** + +The ``pcp-minikube-start`` app configures minikube with optimal settings +for PCP testing (4 CPUs, 8GB RAM, docker driver):: + + nix run .#pcp-minikube-start + +Development Shell +----------------- + +The development shell provides PCP build dependencies plus debugging tools:: + + # Enter the shell + nix develop + + # Available tools: + # - All PCP build dependencies (autoconf, bison, flex, etc.) + # - gdb for debugging + # - valgrind for memory analysis (Linux only) + # - lldb for debugging (macOS only) + + # Build from source manually + ./configure --prefix=$PWD/install + make -j$(nproc) + make install + +The shell displays the PCP icon on entry (via jp2a) and provides hints for +getting started. Future Improvements ------------------- @@ -573,29 +1859,29 @@ PMDA Extensibility Consider using ``lib.makeSearchPath`` to allow users to add custom PMDAs without rebuilding the package, via ``PCP_PMDA_PATH`` or similar mechanism. -NixOS Module -^^^^^^^^^^^^ +NixOS Module Enhancements +^^^^^^^^^^^^^^^^^^^^^^^^^ -A NixOS module (``services.pcp``) would provide: +The current NixOS module (``nix/nixos-module.nix``) provides basic functionality. +Future enhancements could include: -- Systemd service management for ``pmcd``, ``pmlogger``, ``pmproxy`` -- Declarative PMDA configuration -- Proper user/group creation -- Log rotation and retention policies -- Integration with ``services.prometheus`` for exporters +- Declarative PMDA configuration (enable/disable specific PMDAs) +- Log rotation and retention policies via systemd +- Integration with ``services.prometheus`` for metric export +- Custom archive retention settings for pmlogger -Integrate NixOS Test -^^^^^^^^^^^^^^^^^^^^ +Integrate into Nixpkgs +^^^^^^^^^^^^^^^^^^^^^^ -The VM test currently exists as a standalone file. To fully integrate into -nixpkgs: +To fully integrate into the nixpkgs repository: -1. Add to ``nixos/tests/all-tests.nix`` -2. Reference via ``passthru.tests = { inherit (nixosTests) pcp; };`` in the - package +1. Submit the package to ``pkgs/by-name/pc/pcp/`` +2. Add NixOS module to ``nixos/modules/services/monitoring/`` +3. Add VM test to ``nixos/tests/all-tests.nix`` +4. Reference via ``passthru.tests = { inherit (nixosTests) pcp; };`` -This would allow running ``nix-build -A nixosTests.pcp`` and ensure the test -runs automatically in nixpkgs CI. +This would enable ``nix-build -A nixosTests.pcp`` and ensure tests run +automatically in nixpkgs CI. Summary ------- @@ -604,10 +1890,17 @@ The Nix packaging of PCP provides: ✅ Reproducible builds from source ✅ All core PMDAs and tools -✅ BPF/BCC kernel tracing support +✅ Pre-compiled BPF PMDA (CO-RE eBPF, fast startup, low memory) ✅ Python and Perl language bindings -✅ Systemd integration +✅ Systemd integration (services, tmpfiles, sysusers) ✅ Split outputs for minimal installations +✅ NixOS module (``services.pcp``) with pmcd, pmlogger, pmie, pmproxy +✅ MicroVMs for development and testing (user-mode and TAP networking) +✅ Lifecycle testing framework for fine-grained VM validation (7 phases) +✅ Serial console debugging (ttyS0 slow/early, hvc0 fast/virtio) +✅ pmie testing with synthetic workload verification +✅ OCI container for Docker/Podman deployment +✅ Development shell with debugging tools Some features require enabling optional flags, and a few PMDAs need packages not yet available in nixpkgs. diff --git a/flake.lock b/flake.lock index d8142e7a81..18301f739f 100644 --- a/flake.lock +++ b/flake.lock @@ -18,6 +18,27 @@ "type": "github" } }, + "microvm": { + "inputs": { + "nixpkgs": [ + "nixpkgs" + ], + "spectrum": "spectrum" + }, + "locked": { + "lastModified": 1771802632, + "narHash": "sha256-UAH8YfrHRvXAMeFxUzJ4h4B1loz1K1wiNUNI8KiPqOg=", + "owner": "astro", + "repo": "microvm.nix", + "rev": "b67e3d80df3ec35bdfd3a00ad64ee437ef4fcded", + "type": "github" + }, + "original": { + "owner": "astro", + "repo": "microvm.nix", + "type": "github" + } + }, "nixpkgs": { "locked": { "lastModified": 1767640445, @@ -37,9 +58,26 @@ "root": { "inputs": { "flake-utils": "flake-utils", + "microvm": "microvm", "nixpkgs": "nixpkgs" } }, + "spectrum": { + "flake": false, + "locked": { + "lastModified": 1759482047, + "narHash": "sha256-H1wiXRQHxxPyMMlP39ce3ROKCwI5/tUn36P8x6dFiiQ=", + "ref": "refs/heads/main", + "rev": "c5d5786d3dc938af0b279c542d1e43bce381b4b9", + "revCount": 996, + "type": "git", + "url": "https://spectrum-os.org/git/spectrum" + }, + "original": { + "type": "git", + "url": "https://spectrum-os.org/git/spectrum" + } + }, "systems": { "locked": { "lastModified": 1681028828, diff --git a/flake.nix b/flake.nix index 7008717377..b44de14ef2 100644 --- a/flake.nix +++ b/flake.nix @@ -1,13 +1,39 @@ # # flake.nix - PCP Nix packaging # +# Quick Start: +# nix build # Build PCP package +# nix develop # Development shell +# nix flake show # List all outputs +# +# Run All Tests: +# nix run .#pcp-test-all # Container + K8s + MicroVM tests +# +# Individual Tests: +# nix run .#pcp-container-test # Docker/Podman lifecycle +# nix run .#pcp-k8s-test # Kubernetes DaemonSet (needs minikube) +# nix run .#pcp-test-all-microvms # All MicroVM variants +# +# MicroVM with TAP networking (for Grafana dashboards): +# nix run .#pcp-check-host # Verify host environment +# sudo nix run .#pcp-network-setup # Create TAP bridge (requires sudo) +# nix build .#pcp-microvm-grafana-tap && ./result/bin/microvm-run +# # Access Grafana at http://10.177.0.20:3000 +# nix run .#pcp-vm-stop # Stop VM +# sudo nix run .#pcp-network-teardown # Cleanup (requires sudo) +# # See also: ./docs/HowTos/nix/index.rst +# { description = "Performance Co-Pilot (PCP) - system performance monitoring toolkit"; inputs = { nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable"; flake-utils.url = "github:numtide/flake-utils"; + microvm = { + url = "github:astro/microvm.nix"; + inputs.nixpkgs.follows = "nixpkgs"; + }; }; outputs = @@ -15,6 +41,7 @@ self, nixpkgs, flake-utils, + microvm, }: flake-utils.lib.eachDefaultSystem ( system: @@ -22,255 +49,122 @@ pkgs = nixpkgs.legacyPackages.${system}; lib = pkgs.lib; - pcp = pkgs.stdenv.mkDerivation rec { - pname = "pcp"; - version = "7.0.5"; - - src = ./.; - - outputs = [ - "out" - "man" - "doc" - ]; - - nativeBuildInputs = with pkgs; [ - autoconf - automake - pkg-config - bison - flex - which - perl - python3 - python3.pkgs.setuptools - ] ++ lib.optionals withBpf [ - llvmPackages.clang - llvmPackages.llvm - ]; - - buildInputs = with pkgs; [ - zlib - ncurses - readline - openssl - libuv - cyrus_sasl - inih - xz - python3 - perl - rrdtool - ] ++ lib.optionals pkgs.stdenv.isLinux [ - avahi - lvm2 - ] ++ lib.optionals withSystemd [ - systemd - ] ++ lib.optionals withPfm [ - libpfm - ] ++ lib.optionals withBpf [ - libbpf - bcc - elfutils - ] ++ lib.optionals withSnmp [ - net-snmp - ] ++ lib.optionals withPythonHttp [ - python3.pkgs.requests - ] ++ lib.optionals withPerlHttp [ - perlPackages.JSON - perlPackages.LWPUserAgent - ]; - - withSystemd = pkgs.stdenv.isLinux; - withPfm = pkgs.stdenv.isLinux; - withBpf = pkgs.stdenv.isLinux; - withSnmp = true; - withPythonHttp = true; - withPerlHttp = true; - - configureFlags = lib.concatLists [ - - [ - "--prefix=${placeholder "out"}" - "--sysconfdir=${placeholder "out"}/etc" - "--localstatedir=${placeholder "out"}/var" - "--with-rcdir=${placeholder "out"}/etc/init.d" - "--with-tmpdir=/tmp" - "--with-logdir=${placeholder "out"}/var/log/pcp" - "--with-rundir=/run/pcp" - ] - - [ - "--with-user=pcp" - "--with-group=pcp" - ] - - [ - "--with-make=make" - "--with-tar=tar" - "--with-python3=${lib.getExe pkgs.python3}" - ] - - [ - "--with-perl=yes" - "--with-threads=yes" - ] - - [ - "--with-secure-sockets=yes" - "--with-transparent-decompression=yes" - ] - - (if pkgs.stdenv.isLinux then [ "--with-discovery=yes" ] else [ "--with-discovery=no" ]) - - [ - "--with-dstat-symlink=no" - "--with-pmdamongodb=no" - "--with-pmdamysql=no" - "--with-pmdanutcracker=no" - "--with-qt=no" - "--with-infiniband=no" - "--with-selinux=no" - ] - - (if withSystemd then [ "--with-systemd=yes" ] else [ "--with-systemd=no" ]) - (if withPfm then [ "--with-perfevent=yes" ] else [ "--with-perfevent=no" ]) - ( - if withBpf then - [ - "--with-pmdabcc=yes" - "--with-pmdabpf=yes" - "--with-pmdabpftrace=yes" - ] - else - [ - "--with-pmdabcc=no" - "--with-pmdabpf=no" - "--with-pmdabpftrace=no" - ] - ) - - (if pkgs.stdenv.isLinux then [ "--with-devmapper=yes" ] else [ "--with-devmapper=no" ]) - - (if withSnmp then [ "--with-pmdasnmp=yes" ] else [ "--with-pmdasnmp=no" ]) - ]; - - - patches = [ - ./nix/patches/gnumakefile-nix-fixes.patch - ]; - - postPatch = '' - # Fix shebangs (can't be done as static patch - needs Nix store paths) - patchShebangs src build configure scripts man - ''; - - hardeningDisable = lib.optionals withBpf [ "zerocallusedregs" ]; - - BPF_CFLAGS = lib.optionalString withBpf "-fno-stack-protector -Wno-error=unused-command-line-argument"; - CLANG = lib.optionalString withBpf (lib.getExe pkgs.llvmPackages.clang); - - SYSTEMD_SYSTEMUNITDIR = lib.optionalString withSystemd "${placeholder "out"}/lib/systemd/system"; - SYSTEMD_TMPFILESDIR = lib.optionalString withSystemd "${placeholder "out"}/lib/tmpfiles.d"; - SYSTEMD_SYSUSERSDIR = lib.optionalString withSystemd "${placeholder "out"}/lib/sysusers.d"; - - postInstall = '' - # Build the combined PMNS root file - # The individual root_* files exist but pmcd needs a combined 'root' file - # Use pmnsmerge to combine all the root_* files into one - ( - cd $out/var/lib/pcp/pmns - export PCP_DIR=$out - export PCP_CONF=$out/etc/pcp.conf - . $out/etc/pcp.env - - # Merge all the root_* files into the combined root file - # Order matters: root_root first (base), then others - $out/libexec/pcp/bin/pmnsmerge -a \ - $out/libexec/pcp/pmns/root_root \ - $out/libexec/pcp/pmns/root_pmcd \ - $out/libexec/pcp/pmns/root_linux \ - $out/libexec/pcp/pmns/root_proc \ - $out/libexec/pcp/pmns/root_xfs \ - $out/libexec/pcp/pmns/root_jbd2 \ - $out/libexec/pcp/pmns/root_kvm \ - $out/libexec/pcp/pmns/root_mmv \ - $out/libexec/pcp/pmns/root_bpf \ - $out/libexec/pcp/pmns/root_pmproxy \ - root - ) - - # Remove runtime state directories - rm -rf $out/var/{run,log} $out/var/lib/pcp/tmp || true - - # Move vendor config to share - if [ -d "$out/etc" ]; then - mkdir -p $out/share/pcp/etc - mv $out/etc/* $out/share/pcp/etc/ - rmdir $out/etc || true - - # Fix paths in pcp.conf to point to new locations - substituteInPlace $out/share/pcp/etc/pcp.conf \ - --replace-quiet "$out/etc/pcp" "$out/share/pcp/etc/pcp" \ - --replace-quiet "$out/etc/sysconfig" "$out/share/pcp/etc/sysconfig" \ - --replace-quiet "PCP_ETC_DIR=$out/etc" "PCP_ETC_DIR=$out/share/pcp/etc" - - # Fix symlinks that pointed to /etc/pcp/... - find $out/var/lib/pcp -type l | while read link; do - target=$(readlink "$link") - if [[ "$target" == *"/etc/pcp/"* ]]; then - suffix="''${target#*/etc/pcp/}" - rm "$link" - ln -sf "$out/share/pcp/etc/pcp/$suffix" "$link" - fi - done - fi - - # Fix broken symlinks with double /nix/store prefix - # These occur when the build system prepends a path to an already-absolute path - for broken_link in "$out/share/pcp/etc/pcp/pm"{search/pmsearch,series/pmseries}.conf; do - [[ -L "$broken_link" ]] && rm "$broken_link" && \ - ln -sf "$out/share/pcp/etc/pcp/pmproxy/pmproxy.conf" "$broken_link" - done - - # Fix pmcd/rc.local symlink (points to libexec/pcp/services/local) - if [[ -L "$out/share/pcp/etc/pcp/pmcd/rc.local" ]]; then - rm "$out/share/pcp/etc/pcp/pmcd/rc.local" - ln -sf "$out/libexec/pcp/services/local" "$out/share/pcp/etc/pcp/pmcd/rc.local" - fi - - # Move man pages to $man output - if [ -d "$out/share/man" ]; then - mkdir -p $man/share - mv $out/share/man $man/share/ - fi - - # Move documentation to $doc output - for docdir in $out/share/doc/pcp*; do - if [ -d "$docdir" ]; then - mkdir -p $doc/share/doc - mv "$docdir" $doc/share/doc/ - fi - done - ''; + # Import modular package definition + # Pass self for stable source hashing - see nix/package.nix for details + pcp = import ./nix/package.nix { inherit pkgs; src = self; }; + + # Import shared constants and variant definitions + constants = import ./nix/constants.nix; + variants = import ./nix/variants.nix { inherit constants; }; + nixosModule = import ./nix/nixos-module.nix; + + # ─── MicroVM Generator ─────────────────────────────────────────── + # Creates a MicroVM runner with the specified configuration. + # See nix/microvm.nix for full parameter documentation. + mkMicroVM = { + networking ? "user", + debugMode ? true, + enablePmlogger ? true, + enableEvalTools ? false, + enablePmieTest ? false, + enableGrafana ? false, + enableBpf ? false, + enableBcc ? false, + portOffset ? 0, + variant ? "base", + }: + import ./nix/microvm.nix { + inherit pkgs lib pcp microvm nixosModule nixpkgs system; + inherit networking debugMode enablePmlogger enableEvalTools + enablePmieTest enableGrafana enableBpf enableBcc + portOffset variant; + }; - doCheck = false; - enableParallelBuilding = true; + # ─── Variant Package Generator ─────────────────────────────────── + # Generates MicroVM packages for all variants and networking modes. + mkVariantPackages = lib.foldl' (acc: variantName: + let + def = variants.definitions.${variantName}; + portOffset = constants.variantPortOffsets.${variantName}; + + # User-mode networking variant + userPkg = { + name = variants.mkPackageName variantName "user"; + value = mkMicroVM ({ + networking = "user"; + inherit portOffset; + variant = variantName; + } // def.config); + }; + + # TAP networking variant (if supported) + tapPkg = lib.optionalAttrs def.supportsTap { + name = variants.mkPackageName variantName "tap"; + value = mkMicroVM ({ + networking = "tap"; + inherit portOffset; + variant = variantName; + } // def.config); + }; + in + acc // { ${userPkg.name} = userPkg.value; } + // lib.optionalAttrs (tapPkg ? name) { ${tapPkg.name} = tapPkg.value; } + ) {} variants.variantNames; + + # Import lifecycle testing framework (Linux only) + lifecycle = lib.optionalAttrs pkgs.stdenv.isLinux ( + import ./nix/lifecycle { inherit pkgs lib; } + ); + + # Import container module (Linux only) - returns { image, inputsHash } + containerModule = lib.optionalAttrs pkgs.stdenv.isLinux ( + import ./nix/container.nix { inherit pkgs pcp; } + ); + + # Import container testing framework (Linux only) + containerTest = lib.optionalAttrs pkgs.stdenv.isLinux ( + import ./nix/container-test { + inherit pkgs lib pcp; + containerInputsHash = containerModule.inputsHash or ""; + } + ); + + # Import Kubernetes testing framework (Linux only) + k8sTest = lib.optionalAttrs pkgs.stdenv.isLinux ( + import ./nix/k8s-test { + inherit pkgs lib pcp; + containerInputsHash = containerModule.inputsHash or ""; + } + ); + + # Import test-all runner (Linux only) + testAll = lib.optionalAttrs pkgs.stdenv.isLinux ( + import ./nix/test-all { + inherit pkgs lib containerTest k8sTest; + } + ); - meta = with lib; { - description = "Performance Co-Pilot - system performance monitoring toolkit"; - homepage = "https://pcp.io"; - license = licenses.gpl2Plus; - platforms = platforms.linux ++ platforms.darwin; - mainProgram = "pminfo"; - }; - }; in { packages = { default = pcp; inherit pcp; - }; + } // lib.optionalAttrs pkgs.stdenv.isLinux ( + { + # OCI container image (Linux only) + pcp-container = containerModule.image; + } + # MicroVM packages for all variants + // mkVariantPackages + # Lifecycle testing packages + // lifecycle.packages + # Container testing packages + // containerTest.packages + # Kubernetes testing packages + // k8sTest.packages + # Test-all runner + // testAll.packages + ); checks = lib.optionalAttrs pkgs.stdenv.isLinux { vm-test = import ./nix/vm-test.nix { @@ -278,28 +172,91 @@ }; }; - devShells.default = pkgs.mkShell { - inputsFrom = [ pcp ]; - packages = with pkgs; [ - gdb - jp2a - ] ++ lib.optionals pkgs.stdenv.isLinux [ - valgrind - ] ++ lib.optionals pkgs.stdenv.isDarwin [ - lldb - ]; - - shellHook = '' - # Display PCP logo on shell entry - if [[ -f ./images/pcpicon-light.png ]]; then - jp2a --colors ./images/pcpicon-light.png 2>/dev/null || true - fi - echo "PCP Development Shell" - echo "Run './configure --help' to see build options" - echo "Otherwise use 'nix build' to build the package" - ''; - }; + # Import modular development shell + devShells.default = import ./nix/shell.nix { inherit pkgs pcp; }; + + # ─── Apps (Linux only) ───────────────────────────────────────────── + apps = lib.optionalAttrs pkgs.stdenv.isLinux ( + let + networkScripts = import ./nix/network-setup.nix { inherit pkgs; }; + vmScripts = import ./nix/microvm-scripts.nix { inherit pkgs; }; + + # ─── MicroVM Test Apps ──────────────────────────────────────────── + # Generate test apps for each variant + mkTestApp = variant: networkMode: + let + testName = variants.mkTestAppName variant networkMode; + isTap = networkMode == "tap"; + portOffset = constants.variantPortOffsets.${variant}; + sshPort = constants.ports.sshForward + portOffset; + host = if isTap then constants.network.vmIp else "localhost"; + in { + name = testName; + value = { + type = "app"; + program = "${import ./nix/tests/microvm-test.nix { + inherit pkgs lib; + variant = "${variant}-${networkMode}"; + inherit host sshPort; + }}/bin/pcp-test-${variant}-${networkMode}"; + }; + }; + + # Generate test apps for all variants + testApps = lib.foldl' (acc: variantName: + let + def = variants.definitions.${variantName}; + userTest = mkTestApp variantName "user"; + tapTest = lib.optionalAttrs def.supportsTap (mkTestApp variantName "tap"); + in + acc // { ${userTest.name} = userTest.value; } + // lib.optionalAttrs (tapTest ? name) { ${tapTest.name} = tapTest.value; } + ) {} variants.variantNames; + + in { + # Network management + pcp-check-host = { + type = "app"; + program = "${networkScripts.check}/bin/pcp-check-host"; + }; + pcp-network-setup = { + type = "app"; + program = "${networkScripts.setup}/bin/pcp-network-setup"; + }; + pcp-network-teardown = { + type = "app"; + program = "${networkScripts.teardown}/bin/pcp-network-teardown"; + }; + # VM management + pcp-vm-check = { + type = "app"; + program = "${vmScripts.check}/bin/pcp-vm-check"; + }; + pcp-vm-stop = { + type = "app"; + program = "${vmScripts.stop}/bin/pcp-vm-stop"; + }; + pcp-vm-ssh = { + type = "app"; + program = "${vmScripts.ssh}/bin/pcp-vm-ssh"; + }; + # Comprehensive test runner + pcp-test-all-microvms = { + type = "app"; + program = "${import ./nix/tests/test-all-microvms.nix { inherit pkgs lib; }}/bin/pcp-test-all-microvms"; + }; + } + # Per-variant test apps + // testApps + # Lifecycle testing apps + // lifecycle.apps + # Container testing apps + // containerTest.apps + # Kubernetes testing apps + // k8sTest.apps + # Test-all runner + // testAll.apps + ); } ); } - diff --git a/nix/bcc.nix b/nix/bcc.nix new file mode 100644 index 0000000000..c129a33cde --- /dev/null +++ b/nix/bcc.nix @@ -0,0 +1,304 @@ +# nix/bcc.nix +# +# NixOS module for BCC PMDA (eBPF metrics). +# +# ══════════════════════════════════════════════════════════════════════════════ +# DEPRECATED: pmdabcc is deprecated upstream and will be removed in a future +# PCP release. Use pmdabpf instead (see nix/bpf.nix). +# +# From pmdabcc(1): +# "This PMDA is now deprecated and will be removed in a future release, +# transition to using its replacement pmdabpf(1) instead." +# +# This module is provided for reference but is NOT functional on NixOS. +# No further development will be done on this module. +# ══════════════════════════════════════════════════════════════════════════════ +# +# ─── Known Issues (will not be fixed) ───────────────────────────────────────── +# +# BCC compilation works but metrics don't register with pmcd: +# - pminfo bcc returns "Unknown metric name" +# - Modules compile but don't register metrics via pmcd pipe protocol +# - Log file path doesn't work correctly +# +# ─── Use pmdabpf Instead ────────────────────────────────────────────────────── +# +# pmdabpf is the supported replacement: +# - Pre-compiled CO-RE eBPF (no runtime compilation) +# - Works reliably on NixOS +# - Provides runqlat, biolatency, oomkill, execsnoop, etc. +# - Fast startup, ~512MB memory vs ~2GB for BCC +# - Actively maintained +# +# ══════════════════════════════════════════════════════════════════════════════ +# +# Architecture (for reference): +# pmcd reads pmcd.conf which includes bcc PMDA entry +# pmcd spawns pmdabcc as a pipe PMDA (binary protocol) +# pmdabcc loads eBPF modules via BCC library +# Grafana PCP Vector datasource queries pmproxy → pmcd → pmdabcc +# +{ config, lib, pkgs, ... }: +with lib; +let + constants = import ./constants.nix; + cfg = config.services.pcp; + bccCfg = cfg.bcc; + + # Use the kernel from pkgs.linuxPackages (NixOS default). + # Note: We can't use config.boot.kernelPackages.kernel here because it creates + # an infinite recursion with fileSystems evaluation. + # + # This works for MicroVMs because they use the default NixOS kernel (pkgs.linuxPackages). + # If you customize boot.kernelPackages, ensure it matches pkgs.linuxPackages or + # the bind mounts will be at the wrong path. + # + # BCC uses uname -r to find headers at /lib/modules/$(uname -r)/build + kernel = pkgs.linuxPackages.kernel; + + # All modules (defaults + extras) + allModules = bccCfg.modules ++ bccCfg.extraModules; + + # Generate bcc.conf from enabled modules + moduleConfigs = concatStringsSep "\n" (map (mod: '' + [${mod.name}] + module = ${if mod.module != null then mod.module else mod.name} + cluster = ${toString mod.cluster} + ${optionalString (mod.extraConfig != "") mod.extraConfig} + '') allModules); + + bccConf = pkgs.writeText "bcc.conf" '' + # + # PCP BCC PMDA config - generated by NixOS + # See pmdabcc(1) for configuration options + # + + [pmda] + modules = ${concatMapStringsSep "," (m: m.name) allModules} + prefix = bcc. + process_refresh = ${toString bccCfg.processRefresh} + module_failure_fatal = ${if bccCfg.moduleFailureFatal then "True" else "False"} + + ${moduleConfigs} + ''; + + # Python path for BCC - use the same Python as PCP + python = pkgs.python3; + pythonVersion = python.pythonVersion; + +in { + # ═══════════════════════════════════════════════════════════════════════ + # Options interface + # ═══════════════════════════════════════════════════════════════════════ + + options.services.pcp.bcc = { + enable = mkEnableOption "BCC PMDA for eBPF metrics (requires kernel eBPF support)"; + + modules = mkOption { + type = types.listOf (types.submodule { + options = { + name = mkOption { + type = types.str; + description = "Module name (used in config section header)"; + }; + module = mkOption { + type = types.nullOr types.str; + default = null; + description = "Module to load (defaults to name if not specified)"; + }; + cluster = mkOption { + type = types.int; + description = "Unique cluster ID (0-99 for BCC tools, 100-199 for PCP-specific)"; + }; + extraConfig = mkOption { + type = types.str; + default = ""; + description = "Additional config lines for this module"; + }; + }; + }); + default = [ + # Modules required for eBPF/BCC Overview dashboard + { name = "runqlat"; cluster = 4; } + { name = "biolatency"; cluster = 0; } + { name = "tcptop"; cluster = 12; } + { name = "tcplife"; cluster = 3; } + ]; + description = '' + BCC modules to enable. Default includes modules required for + the Grafana eBPF/BCC Overview dashboard. + ''; + }; + + processRefresh = mkOption { + type = types.int; + default = 60; + description = "Interval in seconds to refresh monitored processes (0 to disable)"; + }; + + moduleFailureFatal = mkOption { + type = types.bool; + default = false; + description = '' + Whether BCC PMDA should exit when a module fails to compile. + Set to false for development (partial functionality) or true for production. + ''; + }; + + extraModules = mkOption { + type = types.listOf (types.submodule { + options = { + name = mkOption { type = types.str; }; + module = mkOption { type = types.nullOr types.str; default = null; }; + cluster = mkOption { type = types.int; }; + extraConfig = mkOption { type = types.str; default = ""; }; + }; + }); + default = []; + description = "Additional BCC modules beyond the dashboard defaults"; + example = literalExpression '' + [ + { name = "netproc"; cluster = 40; } + { name = "bioperpid"; cluster = 1; } + ] + ''; + }; + }; + + # ═══════════════════════════════════════════════════════════════════════ + # Implementation + # ═══════════════════════════════════════════════════════════════════════ + + config = mkIf (cfg.enable && bccCfg.enable) { + # Warning about deprecation + warnings = [ + "pmdabcc is DEPRECATED and will be removed in a future PCP release. Use pmdabpf instead (services.pcp.bpf.enable = true)." + "BCC PMDA is NOT functional on NixOS - metrics will not register with pmcd." + ]; + + # Add BCC to system packages for debugging + environment.systemPackages = [ pkgs.bcc ]; + + # Register BCC PMDA with pmcd via the pmdas option + services.pcp.pmdas.bcc = { + domain = 149; # BCC domain ID + type = "pipe"; + flags = "binary notready"; + command = let + # Create wrapper script that sets up BCC environment + # Note: BCC requires clang for runtime eBPF compilation + # We use clang-unwrapped to avoid NixOS clang-wrapper validation issues + # that cause objdump to fail during pmdabcc initialization + + # Create an ld wrapper that prepends glibc library path + # Problem: cffi runs `ld -t -L -o /dev/null -lc` to verify libraries + # This fails because glibc isn't in . Our wrapper injects -L/lib + ldWrapper = pkgs.writeShellScript "ld" '' + exec ${pkgs.llvmPackages.bintools-unwrapped}/bin/ld -L${pkgs.glibc}/lib "$@" + ''; + + # Create a clang wrapper that adds kernel include paths for BPF compilation + # BCC calls clang directly without respecting environment variables for includes + # This wrapper prepends the NixOS kernel dev paths + kernelDev = "${kernel.dev}/lib/modules/${kernel.version}"; + clangWrapper = pkgs.writeShellScript "clang" '' + exec ${pkgs.llvmPackages.clang-unwrapped}/bin/clang \ + -I${kernelDev}/build/include/generated \ + -I${kernelDev}/build/include \ + -I${kernelDev}/source/include \ + -I${kernelDev}/source/arch/x86/include \ + -I${kernelDev}/build/arch/x86/include/generated \ + -I${kernelDev}/source/include/uapi \ + -I${kernelDev}/source/arch/x86/include/uapi \ + "$@" + ''; + + # Directory containing our wrappers (must be first in PATH) + wrapperDir = pkgs.runCommand "bcc-wrapper-dir" {} '' + mkdir -p $out/bin + ln -s ${ldWrapper} $out/bin/ld + ln -s ${clangWrapper} $out/bin/clang + ''; + + bccWrapper = pkgs.writeShellScript "pmdabcc-wrapper" '' + # BCC Python bindings + export PYTHONPATH="${pkgs.bcc}/lib/python${pythonVersion}/site-packages:${cfg.package}/lib/python${pythonVersion}/site-packages" + + # Put our wrappers FIRST in PATH: + # - clang wrapper: adds kernel include paths for BPF compilation + # - ld wrapper: adds -L/lib for cffi library checks + # Also add kmod for modprobe (needed by BCC for loading kernel modules) + # See: nix/tests/BCC_PMDA_DEFECT.md for details + export PATH="${wrapperDir}/bin:${pkgs.llvmPackages.llvm}/bin:${pkgs.llvmPackages.bintools-unwrapped}/bin:${pkgs.bcc}/bin:${pkgs.kmod}/bin:$PATH" + + # Library paths for dynamic linking (required for pcp.pmapi to find libpcp.so) + export LD_LIBRARY_PATH="${cfg.package}/lib:${pkgs.glibc}/lib''${LD_LIBRARY_PATH:+:$LD_LIBRARY_PATH}" + + # Provide library paths that would normally come from the wrapper + export NIX_CFLAGS_COMPILE="-isystem ${pkgs.glibc.dev}/include -isystem ${kernel.dev}/include" + export NIX_LDFLAGS="-L${pkgs.glibc}/lib" + + # Library search paths (for tools that respect these variables) + export LIBRARY_PATH="${pkgs.glibc}/lib:${cfg.package}/lib" + export LDFLAGS="-L${pkgs.glibc}/lib" + + # Kernel headers for eBPF compilation + # DO NOT set BCC_KERNEL_SOURCE - this breaks split-layout detection! + # Instead, we create symlinks at the NixOS kernel modules path to our bind mounts + # See systemd.tmpfiles.rules for the symlink creation + + # Disable wrapper purity enforcement + export NIX_ENFORCE_PURITY=0 + + # Override PCP_PMDAS_DIR so pmdabcc.python finds our custom config + # pmdabcc.python uses: pmGetConfig('PCP_PMDAS_DIR') + '/bcc/bcc.conf' + # Our config is at: /var/lib/pcp/config/bcc/bcc.conf + # So we set PCP_PMDAS_DIR=/var/lib/pcp/config + export PCP_PMDAS_DIR=/var/lib/pcp/config + + # Log file for debugging (pmdabcc.python uses the -l flag) + exec ${cfg.package}/bin/pmpython \ + ${cfg.package}/var/lib/pcp/pmdas/bcc/pmdabcc.python \ + -l /var/log/pcp/pmcd/bcc.log \ + "$@" + ''; + in "${bccWrapper} -d 149"; + }; + + # Create BCC config directory and files + # Note: /var/lib/pcp/config is owned by pcp:pcp, so subdirs must also be pcp-owned + # to avoid "unsafe path transition" errors from systemd-tmpfiles + systemd.tmpfiles.rules = [ + # Writable config directory for BCC (must be pcp:pcp to match parent) + "d /var/lib/pcp/config/bcc 0755 pcp pcp -" + "d /var/log/pcp/pmcd 0755 root root -" + + # Create kernel headers mount points for BCC at /lib/modules/VERSION + "d /lib/modules 0755 root root -" + "d /lib/modules/${kernel.version} 0755 root root -" + "d /lib/modules/${kernel.version}/build 0755 root root -" + "d /lib/modules/${kernel.version}/source 0755 root root -" + + # Copy BCC config (using L+ to create symlink, since C doesn't work well here) + # The config is read-only which is fine for the PMDA + "L+ /var/lib/pcp/config/bcc/bcc.conf - - - - ${bccConf}" + + # Symlink BCC module Python files from package + "L+ /var/lib/pcp/config/bcc/modules - - - - ${cfg.package}/var/lib/pcp/pmdas/bcc/modules" + ]; + + # Bind mount kernel dev headers to /lib/modules/VERSION/{build,source} + # NixOS BCC uses KERNEL_MODULES_DIR=/run/booted-system/... but that's a symlink we can't mount into + # So we mount at /lib/modules/ and set BCC_KERNEL_SOURCE to point there + fileSystems."/lib/modules/${kernel.version}/build" = { + device = "${kernel.dev}/lib/modules/${kernel.version}/build"; + fsType = "none"; + options = [ "bind" "ro" ]; + }; + fileSystems."/lib/modules/${kernel.version}/source" = { + device = "${kernel.dev}/lib/modules/${kernel.version}/source"; + fsType = "none"; + options = [ "bind" "ro" ]; + }; + }; +} diff --git a/nix/bpf.nix b/nix/bpf.nix new file mode 100644 index 0000000000..f7b9cd9822 --- /dev/null +++ b/nix/bpf.nix @@ -0,0 +1,160 @@ +# nix/bpf.nix +# +# NixOS module for BPF PMDA (pre-compiled eBPF metrics). +# +# This module uses pmdabpf which loads pre-compiled CO-RE (Compile Once, Run Everywhere) +# eBPF programs. Unlike pmdabcc, no runtime compilation is needed - the eBPF bytecode +# is compiled at Nix build time and loaded directly by the kernel via libbpf. +# +# Advantages over pmdabcc: +# - Fast startup: No eBPF compilation at runtime +# - Low memory: No clang/LLVM needed in the VM (~512MB vs ~2GB) +# - Simpler: Just needs libbpf + BTF-enabled kernel +# +# Trade-offs: +# - Fewer modules available compared to pmdabcc +# - Missing: tcptop, tcplife (required for some Grafana BCC dashboards) +# +# If you need tcptop/tcplife metrics, see bcc.nix which uses pmdabcc with runtime +# eBPF compilation. This requires: +# - 2GB+ VM memory (clang/LLVM are memory-hungry) +# - BTF-enabled kernel (CONFIG_DEBUG_INFO_BTF=y) +# - Extended pmcd timeout (-q 60) for module compilation +# +# Available pmdabpf modules: +# - biolatency: Block device I/O latency histogram +# - runqlat: Scheduler run queue latency histogram +# - netatop: Per-process TCP/UDP network statistics +# - oomkill: OOM killer event tracing +# - execsnoop: New process tracing +# - exitsnoop: Process exit tracing +# - opensnoop: open(2) syscall tracing +# - vfsstat: VFS operation statistics +# - tcpconnlat: TCP connection latency +# - tcpconnect: TCP connection tracing +# - biosnoop: Block I/O tracing +# - fsslower: Slow filesystem operations +# - statsnoop: stat(2) family tracing +# - mountsnoop: mount/umount tracing +# - bashreadline: Bash command tracing +# +{ config, lib, pkgs, ... }: +with lib; +let + constants = import ./constants.nix; + cfg = config.services.pcp; + bpfCfg = cfg.bpf; + + # Generate bpf.conf from enabled modules + bpfConf = pkgs.writeText "bpf.conf" '' + # + # PCP BPF PMDA config - generated by NixOS + # See pmdabpf(1) for module configuration + # + + ${concatStringsSep "\n" (map (mod: '' + [${mod.name}.so] + enabled = ${if mod.enable then "true" else "false"} + ${optionalString (mod.extraConfig != "") mod.extraConfig} + '') bpfCfg.modules)} + ''; + +in { + # ═══════════════════════════════════════════════════════════════════════ + # Options interface + # ═══════════════════════════════════════════════════════════════════════ + + options.services.pcp.bpf = { + enable = mkEnableOption "BPF PMDA for pre-compiled eBPF metrics (requires BTF-enabled kernel)"; + + modules = mkOption { + type = types.listOf (types.submodule { + options = { + name = mkOption { + type = types.str; + description = "Module name (without .so suffix)"; + }; + enable = mkOption { + type = types.bool; + default = true; + description = "Whether this module is enabled"; + }; + extraConfig = mkOption { + type = types.str; + default = ""; + description = "Additional config lines for this module"; + }; + }; + }); + default = [ + # Default modules for system monitoring + { name = "biolatency"; } # Block I/O latency histogram + { name = "runqlat"; } # Scheduler run queue latency + { name = "netatop"; } # Per-process network stats + { name = "oomkill"; } # OOM killer events + ]; + description = '' + BPF modules to enable. These are pre-compiled CO-RE eBPF programs + that load quickly without runtime compilation. + ''; + }; + + extraModules = mkOption { + type = types.listOf (types.submodule { + options = { + name = mkOption { type = types.str; }; + enable = mkOption { type = types.bool; default = true; }; + extraConfig = mkOption { type = types.str; default = ""; }; + }; + }); + default = []; + description = "Additional BPF modules beyond the defaults"; + example = literalExpression '' + [ + { name = "execsnoop"; } + { name = "tcpconnlat"; extraConfig = "min_us = 100"; } + ] + ''; + }; + }; + + # ═══════════════════════════════════════════════════════════════════════ + # Implementation + # ═══════════════════════════════════════════════════════════════════════ + + config = mkIf (cfg.enable && bpfCfg.enable) { + # Informational message about BTF requirement + warnings = [ + "BPF PMDA requires BTF-enabled kernel (CONFIG_DEBUG_INFO_BTF=y). Verify with: ls /sys/kernel/btf/vmlinux" + ]; + + # Register BPF PMDA with pmcd via the pmdas option + # pmdabpf is a DSO PMDA (shared library loaded by pmcd) + services.pcp.pmdas.bpf = { + domain = constants.pmdaDomains.bpf; + type = "dso"; + init = "bpf_init"; + path = "${cfg.package}/var/lib/pcp/pmdas/bpf/pmda_bpf.so"; + }; + + # Create BPF config directory and files + # Note: pmdabpf looks for bpf.conf in $PCP_PMDAS_DIR/bpf/ (i.e., /var/lib/pcp/pmdas/bpf/) + # We also create a symlink in config/bpf for consistency with other PMDAs + systemd.tmpfiles.rules = [ + # Writable config directory for BPF + "d /var/lib/pcp/config/bpf 0755 pcp pcp -" + + # Symlink the generated config to where pmdabpf looks for it + "L+ /var/lib/pcp/pmdas/bpf/bpf.conf - - - - ${bpfConf}" + + # Also symlink to config dir for consistency + "L+ /var/lib/pcp/config/bpf/bpf.conf - - - - ${bpfConf}" + + # Symlink the pre-compiled module .so files from package + "L+ /var/lib/pcp/pmdas/bpf/modules - - - - ${cfg.package}/var/lib/pcp/pmdas/bpf/modules" + ]; + + # Ensure libbpf is available + environment.systemPackages = [ pkgs.libbpf ]; + }; +} diff --git a/nix/constants.nix b/nix/constants.nix new file mode 100644 index 0000000000..6cda9c0d04 --- /dev/null +++ b/nix/constants.nix @@ -0,0 +1,236 @@ +# nix/constants.nix +# +# Shared constants for PCP MicroVM infrastructure. +# Import this file in microvm.nix, network-setup.nix, test-lib.nix, etc. +# to ensure all configurations stay synchronized. +# +# DESIGN: These are defaults that can be overridden via module options. +# The constants file provides consistency; the NixOS module provides flexibility. +# See services.pcp.ports.*, services.pcp.network.* for user-facing overrides. +# +# PORT ALLOCATION: Each MicroVM variant gets a unique port offset to avoid +# conflicts when testing. See variantPortOffsets below. +# +rec { + # ─── User/Group Configuration ───────────────────────────────────────── + # Standard PCP user/group IDs (matches upstream packages) + user = { + name = "pcp"; + uid = 990; + gid = 990; + home = "/var/lib/pcp"; + description = "Performance Co-Pilot"; + }; + + # ─── PCP Directory Paths ────────────────────────────────────────────── + # Mutable paths that override Nix store defaults (read-only). + # Used by both nixos-module.nix and container.nix. + paths = { + logDir = "/var/log/pcp"; + varDir = "/var/lib/pcp"; + tmpDir = "/var/lib/pcp/tmp"; + runDir = "/run/pcp"; + archiveDir = "/var/log/pcp/pmlogger"; + }; + + # Log subdirectories that need to be created + logSubdirs = [ "pmcd" "pmlogger" "pmie" "pmproxy" ]; + + # Symlinks from /var/lib/pcp/* to package store paths + # These provide access to read-only package data + storeSymlinks = [ "pmns" "pmdas" "pmcd" ]; + + # Config symlinks (read-only from package) + configSymlinks = [ "derived" "pmafm" "pmieconf" "pmlogconf" "pmlogredact" "pmlogrewrite" ]; + + # Config directories that need to be writable + configWritableDirs = [ "pmda" "pmie" "pmlogger" ]; + + # ─── Network Configuration ───────────────────────────────────────────── + # These are defaults. For custom deployments, override via module options. + network = { + # TAP networking + bridge = "pcpbr0"; + tap = "pcptap0"; + subnet = "10.177.0.0/24"; + gateway = "10.177.0.1"; + vmIp = "10.177.0.20"; + + # VM MAC address (consistent across all variants) + vmMac = "02:00:00:0a:cb:01"; + }; + + # ─── Port Configuration ──────────────────────────────────────────────── + # Standard PCP ports. These match upstream defaults. + ports = { + pmcd = 44321; + pmproxy = 44322; + nodeExporter = 9100; + prometheus = 9090; + grafana = 3000; + + # Host-side port forwarding (user-mode networking) + # These avoid conflicts with services running on the host + # Note: Base starts at 22122 to avoid conflicts with common services + sshForward = 22122; + prometheusForward = 19090; + grafanaForward = 13000; + }; + + # ─── VM Resources ────────────────────────────────────────────────────── + vm = { + memoryMB = 1024; + vcpus = 4; + }; + + # ─── Security Configuration ──────────────────────────────────────────── + security = { + # systemd-analyze security score thresholds + # These are enforced during testing; adjust if hardening changes scores + networkServiceMaxScore = 5.0; # pmcd, pmproxy must be <= this + internalServiceWarnScore = 7.0; # pmlogger, pmie warn if > this + + # SSH defaults for MicroVM + sshAllowPasswordAuth = false; # Require key-based auth + sshPermitRootLogin = "prohibit-password"; # Keys only + }; + + # ─── PMDA Domain IDs ─────────────────────────────────────────────────── + # These are assigned in PCP source: src/pmdas//domain.h + # Must match the PMNS definitions in libexec/pcp/pmns/root_ + pmdaDomains = { + pmcd = 2; # src/pmdas/pmcd/domain.h + linux = 60; # src/pmdas/linux/domain.h + bpf = 157; # src/pmdas/bpf/domain.h (pre-compiled CO-RE eBPF) + # NOTE: BCC is deprecated - use BPF PMDA instead (CO-RE eBPF) + # bcc = 149; # src/pmdas/bcc/domain.h (runtime-compiled eBPF) + }; + + # ─── Test Configuration ──────────────────────────────────────────────── + test = { + sshTimeoutSeconds = 5; + sshMaxAttempts = 60; # 60 * 2s = 120s max wait + sshRetryDelaySeconds = 2; + metricParityTolerancePct = 10; + minExpectedMetrics = 1000; # Typical linux PMDA set + buildPollSeconds = 10; # Poll interval for build completion + }; + + # ─── Variant Port Offsets ───────────────────────────────────────────── + # Each MicroVM variant gets a unique port offset to avoid conflicts. + # All forwarded ports (SSH, pmcd, pmproxy, etc.) are shifted by this offset. + # This allows multiple variants to run simultaneously for testing. + # + # Usage: actual_port = base_port + variantPortOffsets. + # + # TAP variants use direct IP access (10.177.0.20) and share offset with + # their user-mode counterpart (only one TAP VM can run at a time anyway). + # + variantPortOffsets = { + base = 0; # pcp-microvm, pcp-microvm-tap + eval = 100; # pcp-microvm-eval, pcp-microvm-eval-tap + grafana = 200; # pcp-microvm-grafana, pcp-microvm-grafana-tap + bpf = 300; # pcp-microvm-bpf + # NOTE: BCC is deprecated - use BPF PMDA instead + # bcc = 400; # pcp-microvm-bcc + }; + + # ─── Serial Console Configuration ──────────────────────────────────── + # Each MicroVM variant gets TCP ports for serial console access. + # This enables debugging early boot issues and network problems. + # + # Two console types: + # - serial (ttyS0): Traditional UART, slow but available immediately + # - virtio (hvc0): High-speed virtio-console, available after driver loads + # + # Port allocation scheme: + # Base port: 24500 (high port, unlikely to conflict) + # Each variant gets 10 ports: + # +0 = serial console (ttyS0) + # +1 = virtio console (hvc0) + # +2-9 = reserved for future use + # + # Connect with: nc localhost + # Or use: socat -,rawer tcp:localhost: + # + console = { + portBase = 24500; + + # Port offsets within each variant's block + serialOffset = 0; # ttyS0 - slow, early boot + virtioOffset = 1; # hvc0 - fast, after drivers load + + # Variant port blocks (10 ports each) + variantBlocks = { + base = 0; # 24500-24509 + eval = 10; # 24510-24519 + grafana = 20; # 24520-24529 + bpf = 30; # 24530-24539 + # NOTE: BCC is deprecated - use BPF PMDA instead + # bcc = 40; # 24540-24549 + }; + }; + + # Helper function to get console ports for a variant + # Usage: (getConsolePorts "base").serial -> 24500 + # (getConsolePorts "eval").virtio -> 24511 + getConsolePorts = variant: { + serial = console.portBase + console.variantBlocks.${variant} + console.serialOffset; + virtio = console.portBase + console.variantBlocks.${variant} + console.virtioOffset; + }; + + # ─── Lifecycle Test Configuration ──────────────────────────────────── + # Timeouts and configuration for MicroVM lifecycle testing. + # Each phase has a configurable timeout, with variant-specific overrides. + # + lifecycle = { + # Poll interval for build/wait operations (seconds) + pollInterval = 1; + + # Per-phase timeouts in seconds + timeouts = { + build = 600; # Phase 0: Build VM (10 minutes) + processStart = 5; # Phase 1: Wait for QEMU process to appear + serialReady = 30; # Phase 2: Wait for serial console to be responsive + virtioReady = 45; # Phase 2b: Wait for virtio console (needs drivers) + serviceReady = 60; # Phase 3: Wait for services to be ready + metricsReady = 30; # Phase 4: Wait for metrics to be available + shutdown = 30; # Phase 5: Wait for shutdown command to complete + waitExit = 60; # Phase 6: Wait for process to exit cleanly + }; + + # Variant-specific timeout overrides (in seconds) + variantTimeouts = { + base = {}; + eval = {}; + grafana = { serviceReady = 90; }; # Grafana takes longer to start + bpf = {}; + # NOTE: BCC is deprecated - use BPF PMDA instead (CO-RE eBPF) + # BCC used runtime eBPF compilation which required longer timeouts + # bcc = { + # serviceReady = 180; # BCC modules compile at pmcd startup + # metricsReady = 120; # BCC metrics appear after compilation + # }; + }; + }; + + # Helper function to get hostname for a variant + # Usage: getHostname "base" -> "pcp-vm" + # getHostname "bpf" -> "pcp-bpf-vm" + getHostname = variant: + if variant == "base" then "pcp-vm" + else "pcp-${variant}-vm"; + + # Helper function to get process name for a variant (same as hostname) + # Used for pgrep matching against QEMU -name argument + getProcessName = variant: getHostname variant; + + # Helper function to get timeout for a specific phase and variant + # Returns variant-specific timeout if defined, otherwise default + getTimeout = variant: phase: + let + variantOverrides = lifecycle.variantTimeouts.${variant} or {}; + defaultTimeout = lifecycle.timeouts.${phase}; + in + variantOverrides.${phase} or defaultTimeout; +} diff --git a/nix/container-test/constants.nix b/nix/container-test/constants.nix new file mode 100644 index 0000000000..0b7ef57a26 --- /dev/null +++ b/nix/container-test/constants.nix @@ -0,0 +1,58 @@ +# nix/container-test/constants.nix +# +# Container lifecycle testing configuration. +# Provides constants for OCI container testing phases. +# +{ }: +let + common = import ../test-common/constants.nix { }; +in +rec { + # Re-export from shared constants + inherit (common) ports colors; + + # ─── Container Settings ───────────────────────────────────────────────── + container = { + name = "pcp-test"; + imageName = "pcp"; + imageTag = "latest"; + }; + + # ─── Timeouts (seconds) ───────────────────────────────────────────────── + timeouts = { + build = common.timeouts.build; + load = 60; # Phase 1: Load image into runtime + start = 10; # Phase 2: Start container + ready = common.timeouts.ready; + shutdown = common.timeouts.shutdown; + forceKill = 5; # Phase 7: Force kill timeout + cleanup = common.timeouts.cleanup; + }; + + # ─── Verification Checks ──────────────────────────────────────────────── + checks = { + # Processes to verify inside container + processes = [ "pmcd" ]; + + # Basic metrics to verify via pminfo -h container + metrics = common.metrics.basic ++ [ "pmcd.services" ]; + + # Kernel metrics (require privileged mode for full /proc access) + kernelMetrics = common.metrics.kernel; + + # BPF metrics (require privileged mode + BTF kernel) + bpfMetrics = common.metrics.bpf; + }; + + # ─── Phase Definitions ────────────────────────────────────────────────── + phases = { + "0" = { name = "Build Image"; description = "Build OCI image with nix build"; }; + "1" = { name = "Load Image"; description = "Load image into docker/podman"; }; + "2" = { name = "Start Container"; description = "Run container with port mappings"; }; + "3" = { name = "Verify Process"; description = "Check pmcd process is running"; }; + "4" = { name = "Verify Ports"; description = "Check ports 44321/44322 are listening"; }; + "5" = { name = "Verify Metrics"; description = "Query metrics via pminfo -h localhost"; }; + "6" = { name = "Shutdown"; description = "Graceful docker stop with timeout"; }; + "7" = { name = "Cleanup"; description = "Remove container"; }; + }; +} diff --git a/nix/container-test/default.nix b/nix/container-test/default.nix new file mode 100644 index 0000000000..31a36b83a5 --- /dev/null +++ b/nix/container-test/default.nix @@ -0,0 +1,534 @@ +# nix/container-test/default.nix +# +# Entry point for PCP OCI container lifecycle testing. +# Generates lifecycle test scripts for the PCP container image. +# +# Usage in flake.nix: +# containerTest = import ./nix/container-test { inherit pkgs lib pcp; }; +# +# Generated outputs: +# containerTest.packages.pcp-container-test - Full lifecycle test +# containerTest.apps.pcp-container-test - App entry point +# +{ pkgs, lib, pcp, containerInputsHash }: +let + constants = import ./constants.nix { }; + mainConstants = import ../constants.nix; + helpers = import ./lib.nix { inherit pkgs lib; }; + + inherit (helpers) + colorHelpers timingHelpers containerHelpers + portHelpers processHelpers metricHelpers + commonInputs containerInputs; + + # PCP tools for metric verification (use local package, not nixpkgs) + pcpInputs = [ pcp ]; + + # PCP_CONF path for pminfo to find its configuration + pcpConfPath = "${pcp}/share/pcp/etc/pcp.conf"; + + # ─── Full Lifecycle Test Script ───────────────────────────────────────── + # Tests the complete container lifecycle: + # Build -> Load -> Start -> Verify -> Shutdown -> Cleanup + # + mkFullTest = pkgs.writeShellApplication { + name = "pcp-container-test"; + runtimeInputs = commonInputs ++ containerInputs ++ pcpInputs; + text = '' + set +e # Don't exit on first failure + + # Set PCP_CONF so pminfo can find its configuration + export PCP_CONF="${pcpConfPath}" + + ${colorHelpers} + ${timingHelpers} + ${containerHelpers} + ${portHelpers} + ${processHelpers} + ${metricHelpers} + + # Configuration + CONTAINER_NAME="${constants.container.name}" + IMAGE="${constants.container.imageName}:${constants.container.imageTag}" + RESULT_LINK="result-container" + + # Port configuration + PMCD_PORT=${toString mainConstants.ports.pmcd} + PMPROXY_PORT=${toString mainConstants.ports.pmproxy} + + # Metrics to verify + METRICS="${lib.concatStringsSep " " constants.checks.metrics}" + KERNEL_METRICS="${lib.concatStringsSep " " constants.checks.kernelMetrics}" + BPF_METRICS="${lib.concatStringsSep " " constants.checks.bpfMetrics}" + + # Timing tracking + declare -A PHASE_TIMES + TOTAL_START=$(time_ms) + + # Results tracking + TOTAL_PASSED=0 + TOTAL_FAILED=0 + TOTAL_SKIPPED=0 + + record_result() { + local phase="$1" + local passed="$2" + local time_ms="$3" + PHASE_TIMES["$phase"]=$time_ms + if [[ "$passed" == "true" ]]; then + TOTAL_PASSED=$((TOTAL_PASSED + 1)) + elif [[ "$passed" == "skip" ]]; then + TOTAL_SKIPPED=$((TOTAL_SKIPPED + 1)) + else + TOTAL_FAILED=$((TOTAL_FAILED + 1)) + fi + } + + # Cleanup function + cleanup() { + info "Cleaning up..." + if [[ -n "$CONTAINER_RUNTIME" ]]; then + container_remove "$CONTAINER_NAME" + fi + rm -f "$RESULT_LINK" + } + + # Header + bold "========================================" + bold " PCP Container Lifecycle Test" + bold "========================================" + echo "" + + # Detect container runtime + detect_runtime + echo "" + + # ─── Phase 0: Build Image ───────────────────────────────────────────── + phase_header "0" "Build Image" "${toString constants.timeouts.build}" + start_time=$(time_ms) + + # Clean up any existing result link + rm -f "$RESULT_LINK" + + info " Building pcp-container..." + if nix build ".#pcp-container" -o "$RESULT_LINK" 2>&1 | while read -r line; do + echo " $line" + done; then + elapsed=$(elapsed_ms "$start_time") + result_pass "Image built" "$elapsed" + record_result "build" "true" "$elapsed" + else + elapsed=$(elapsed_ms "$start_time") + result_fail "Build failed" "$elapsed" + record_result "build" "false" "$elapsed" + exit 1 + fi + + # ─── Phase 1: Load Image ────────────────────────────────────────────── + phase_header "1" "Load Image" "${toString constants.timeouts.load}" + start_time=$(time_ms) + + # Fast cache check using Nix inputs hash label + EXPECTED_HASH="${containerInputsHash}" + LOADED_HASH="" + + # Check if image exists and get its inputs hash label + if $CONTAINER_RUNTIME image inspect "$IMAGE" &>/dev/null; then + LOADED_HASH=$($CONTAINER_RUNTIME inspect "$IMAGE" --format '{{index .Config.Labels "nix.inputs.hash"}}' 2>/dev/null || echo "") + fi + + # Show cache check status + if [[ -n "$LOADED_HASH" ]]; then + info " Cache: expected=$EXPECTED_HASH loaded=$LOADED_HASH" + else + info " Cache: no existing image label found" + fi + + if [[ -n "$EXPECTED_HASH" && "$EXPECTED_HASH" == "$LOADED_HASH" ]]; then + elapsed=$(elapsed_ms "$start_time") + result_pass "Image unchanged, skipping load" "$elapsed" + record_result "load" "true" "$elapsed" + else + info " Loading image into $CONTAINER_RUNTIME..." + if $CONTAINER_RUNTIME load < "$RESULT_LINK" 2>&1 | while read -r line; do + echo " $line" + done; then + elapsed=$(elapsed_ms "$start_time") + result_pass "Image loaded" "$elapsed" + record_result "load" "true" "$elapsed" + else + elapsed=$(elapsed_ms "$start_time") + result_fail "Failed to load image" "$elapsed" + record_result "load" "false" "$elapsed" + cleanup + exit 1 + fi + fi + + # ─── Phase 2: Start Container ───────────────────────────────────────── + phase_header "2" "Start Container" "${toString constants.timeouts.start}" + start_time=$(time_ms) + + # Remove existing container if present + if container_exists "$CONTAINER_NAME"; then + info " Removing existing container..." + container_remove "$CONTAINER_NAME" + sleep 1 + fi + + info " Starting container with port mappings (privileged + root for BPF)..." + if $CONTAINER_RUNTIME run -d \ + --name "$CONTAINER_NAME" \ + --privileged \ + --user root \ + -p "$PMCD_PORT:$PMCD_PORT" \ + -p "$PMPROXY_PORT:$PMPROXY_PORT" \ + "$IMAGE" 2>&1; then + elapsed=$(elapsed_ms "$start_time") + result_pass "Container started" "$elapsed" + record_result "start" "true" "$elapsed" + else + elapsed=$(elapsed_ms "$start_time") + result_fail "Failed to start container" "$elapsed" + record_result "start" "false" "$elapsed" + cleanup + exit 1 + fi + + # ─── Phase 3: Verify Process ────────────────────────────────────────── + phase_header "3" "Verify Process" "${toString constants.timeouts.ready}" + start_time=$(time_ms) + + # Give pmcd time to start + sleep 2 + + ${lib.concatMapStringsSep "\n" (proc: '' + proc_start=$(time_ms) + if wait_for_process_in_container "${proc}" ${toString constants.timeouts.ready}; then + result_pass "${proc} running" "$(elapsed_ms "$proc_start")" + TOTAL_PASSED=$((TOTAL_PASSED + 1)) + else + result_fail "${proc} not running" "$(elapsed_ms "$proc_start")" + TOTAL_FAILED=$((TOTAL_FAILED + 1)) + fi + '') constants.checks.processes} + + elapsed=$(elapsed_ms "$start_time") + record_result "process" "true" "$elapsed" + + # ─── Phase 4: Verify Ports ──────────────────────────────────────────── + phase_header "4" "Verify Ports" "${toString constants.timeouts.ready}" + start_time=$(time_ms) + + port_passed=0 + port_failed=0 + + for port in $PMCD_PORT; do # Only pmcd runs in container + port_start=$(time_ms) + if wait_for_port "$port" ${toString constants.timeouts.ready}; then + result_pass "Port $port listening" "$(elapsed_ms "$port_start")" + port_passed=$((port_passed + 1)) + else + result_fail "Port $port not listening" "$(elapsed_ms "$port_start")" + port_failed=$((port_failed + 1)) + fi + done + + elapsed=$(elapsed_ms "$start_time") + if [[ $port_failed -eq 0 ]]; then + record_result "ports" "true" "$elapsed" + else + record_result "ports" "false" "$elapsed" + fi + + # ─── Phase 5: Verify Metrics ────────────────────────────────────────── + phase_header "5" "Verify Metrics" "${toString constants.timeouts.ready}" + start_time=$(time_ms) + + metric_passed=0 + metric_failed=0 + + for metric in $METRICS; do + met_start=$(time_ms) + if check_metric "$metric"; then + result_pass "$metric" "$(elapsed_ms "$met_start")" + metric_passed=$((metric_passed + 1)) + else + result_fail "$metric not available" "$(elapsed_ms "$met_start")" + metric_failed=$((metric_failed + 1)) + fi + done + + elapsed=$(elapsed_ms "$start_time") + if [[ $metric_failed -eq 0 ]]; then + record_result "metrics" "true" "$elapsed" + else + record_result "metrics" "false" "$elapsed" + fi + + # ─── Phase 5b: Verify Kernel Metrics ───────────────────────────────── + phase_header "5b" "Verify Kernel Metrics" "${toString constants.timeouts.ready}" + start_time=$(time_ms) + + kernel_passed=0 + kernel_failed=0 + + for metric in $KERNEL_METRICS; do + met_start=$(time_ms) + if check_metric "$metric"; then + result_pass "$metric" "$(elapsed_ms "$met_start")" + kernel_passed=$((kernel_passed + 1)) + else + result_fail "$metric not available" "$(elapsed_ms "$met_start")" + kernel_failed=$((kernel_failed + 1)) + fi + done + + elapsed=$(elapsed_ms "$start_time") + if [[ $kernel_failed -eq 0 ]]; then + record_result "kernel_metrics" "true" "$elapsed" + else + record_result "kernel_metrics" "false" "$elapsed" + fi + + # ─── Phase 5c: Verify BPF Metrics ──────────────────────────────────── + phase_header "5c" "Verify BPF Metrics" "${toString constants.timeouts.ready}" + start_time=$(time_ms) + + BPF_AVAILABLE=true + + # Check if BPF PMDA is loaded + if ! check_metric "bpf"; then + warn " BPF PMDA not loaded - BPF metrics will be skipped" + BPF_AVAILABLE=false + fi + + if $BPF_AVAILABLE; then + bpf_passed=0 + bpf_failed=0 + + for metric in $BPF_METRICS; do + met_start=$(time_ms) + # BPF histogram metrics need time to populate, retry a few times + retry=0 + max_retries=6 + metric_ok=false + + while [[ $retry -lt $max_retries ]]; do + if pminfo -h "$CONTAINER_IP" -f "$metric" 2>/dev/null | grep -qE '(inst|value)'; then + metric_ok=true + break + fi + sleep 5 + retry=$((retry + 1)) + done + + if $metric_ok; then + result_pass "$metric" "$(elapsed_ms "$met_start")" + bpf_passed=$((bpf_passed + 1)) + else + result_fail "$metric not available" "$(elapsed_ms "$met_start")" + bpf_failed=$((bpf_failed + 1)) + fi + done + + elapsed=$(elapsed_ms "$start_time") + if [[ $bpf_failed -eq 0 ]]; then + record_result "bpf_metrics" "true" "$elapsed" + else + record_result "bpf_metrics" "false" "$elapsed" + fi + else + for metric in $BPF_METRICS; do + result_skip "$metric (BPF unavailable)" + TOTAL_SKIPPED=$((TOTAL_SKIPPED + 1)) + done + elapsed=$(elapsed_ms "$start_time") + record_result "bpf_metrics" "skip" "$elapsed" + fi + + # ─── Phase 6: Shutdown ──────────────────────────────────────────────── + phase_header "6" "Shutdown" "${toString constants.timeouts.shutdown}" + start_time=$(time_ms) + + info " Stopping container..." + if container_stop "$CONTAINER_NAME" ${toString constants.timeouts.shutdown}; then + elapsed=$(elapsed_ms "$start_time") + result_pass "Clean shutdown" "$elapsed" + record_result "shutdown" "true" "$elapsed" + else + elapsed=$(elapsed_ms "$start_time") + warn " Graceful shutdown failed, forcing..." + container_kill "$CONTAINER_NAME" + result_warn "Forced kill" "$elapsed" + record_result "shutdown" "true" "$elapsed" + fi + + # ─── Phase 7: Cleanup ───────────────────────────────────────────────── + phase_header "7" "Cleanup" "${toString constants.timeouts.cleanup}" + start_time=$(time_ms) + + container_remove "$CONTAINER_NAME" + rm -f "$RESULT_LINK" + + elapsed=$(elapsed_ms "$start_time") + result_pass "Container removed" "$elapsed" + record_result "cleanup" "true" "$elapsed" + + # ─── Summary ────────────────────────────────────────────────────────── + TOTAL_ELAPSED=$(elapsed_ms "$TOTAL_START") + + echo "" + bold " Timing Summary" + echo " $(printf '─%.0s' {1..37})" + printf " %-20s %10s\n" "Phase" "Time (ms)" + echo " $(printf '─%.0s' {1..37})" + for phase in build load start process ports metrics kernel_metrics bpf_metrics shutdown cleanup; do + if [[ -n "''${PHASE_TIMES[$phase]:-}" ]]; then + printf " %-20s %10s\n" "$phase" "''${PHASE_TIMES[$phase]}" + fi + done + echo " $(printf '─%.0s' {1..37})" + printf " %-20s %10s\n" "TOTAL" "$TOTAL_ELAPSED" + echo " $(printf '─%.0s' {1..37})" + + echo "" + bold "========================================" + if [[ $TOTAL_FAILED -eq 0 ]]; then + if [[ $TOTAL_SKIPPED -gt 0 ]]; then + success " Result: PASSED ($TOTAL_SKIPPED skipped)" + else + success " Result: ALL PHASES PASSED" + fi + success " Total time: $(format_ms "$TOTAL_ELAPSED")" + else + error " Result: $TOTAL_FAILED CHECKS FAILED" + fi + bold "========================================" + + [[ $TOTAL_FAILED -eq 0 ]] + ''; + }; + + # ─── Quick Test Script ────────────────────────────────────────────────── + # A faster test that skips the build phase (assumes image already exists) + # + mkQuickTest = pkgs.writeShellApplication { + name = "pcp-container-test-quick"; + runtimeInputs = commonInputs ++ containerInputs ++ pcpInputs; + text = '' + set +e + + # Set PCP_CONF so pminfo can find its configuration + export PCP_CONF="${pcpConfPath}" + + ${colorHelpers} + ${timingHelpers} + ${containerHelpers} + ${portHelpers} + ${processHelpers} + ${metricHelpers} + + # Configuration + CONTAINER_NAME="${constants.container.name}" + IMAGE="${constants.container.imageName}:${constants.container.imageTag}" + + PMCD_PORT=${toString mainConstants.ports.pmcd} + PMPROXY_PORT=${toString mainConstants.ports.pmproxy} + METRICS="${lib.concatStringsSep " " constants.checks.metrics}" + + TOTAL_PASSED=0 + TOTAL_FAILED=0 + + bold "========================================" + bold " PCP Container Quick Test" + bold "========================================" + echo "" + info "Assumes image is already loaded. Use 'pcp-container-test' for full test." + echo "" + + detect_runtime + + # Cleanup existing + if container_exists "$CONTAINER_NAME"; then + container_remove "$CONTAINER_NAME" + sleep 1 + fi + + # Start + info "Starting container..." + $CONTAINER_RUNTIME run -d \ + --name "$CONTAINER_NAME" \ + -p "$PMCD_PORT:$PMCD_PORT" \ + -p "$PMPROXY_PORT:$PMPROXY_PORT" \ + "$IMAGE" + + sleep 3 + + # Verify process + ${lib.concatMapStringsSep "\n" (proc: '' + if check_process_in_container "${proc}"; then + result_pass "${proc} running" + TOTAL_PASSED=$((TOTAL_PASSED + 1)) + else + result_fail "${proc} not running" + TOTAL_FAILED=$((TOTAL_FAILED + 1)) + fi + '') constants.checks.processes} + + # Verify ports + for port in $PMCD_PORT; do # Only pmcd runs in container + if port_is_open "$port"; then + result_pass "Port $port listening" + TOTAL_PASSED=$((TOTAL_PASSED + 1)) + else + result_fail "Port $port not listening" + TOTAL_FAILED=$((TOTAL_FAILED + 1)) + fi + done + + # Verify metrics + for metric in $METRICS; do + if check_metric "$metric"; then + result_pass "$metric" + TOTAL_PASSED=$((TOTAL_PASSED + 1)) + else + result_fail "$metric" + TOTAL_FAILED=$((TOTAL_FAILED + 1)) + fi + done + + # Cleanup + container_remove "$CONTAINER_NAME" + + echo "" + if [[ $TOTAL_FAILED -eq 0 ]]; then + success "All $TOTAL_PASSED checks passed" + else + error "$TOTAL_FAILED checks failed" + fi + + [[ $TOTAL_FAILED -eq 0 ]] + ''; + }; + +in +{ + # Packages output for flake.nix + packages = { + pcp-container-test = mkFullTest; + pcp-container-test-quick = mkQuickTest; + }; + + # Apps output for flake.nix + apps = { + pcp-container-test = { + type = "app"; + program = "${mkFullTest}/bin/pcp-container-test"; + }; + pcp-container-test-quick = { + type = "app"; + program = "${mkQuickTest}/bin/pcp-container-test-quick"; + }; + }; +} diff --git a/nix/container-test/lib.nix b/nix/container-test/lib.nix new file mode 100644 index 0000000000..0af7732be6 --- /dev/null +++ b/nix/container-test/lib.nix @@ -0,0 +1,189 @@ +# nix/container-test/lib.nix +# +# Shell helper functions for PCP container lifecycle testing. +# Provides container-specific operations on top of shared helpers. +# +{ pkgs, lib }: +let + # Import shared helpers + sharedHelpers = import ../test-common/shell-helpers.nix { }; + sharedInputs = import ../test-common/inputs.nix { inherit pkgs; }; +in +rec { + # Runtime inputs - use shared common + container-specific + commonInputs = sharedInputs.common; + containerInputs = sharedInputs.container; + + # Re-export shared shell helpers + inherit (sharedHelpers) colorHelpers timingHelpers; + + # ─── Container Runtime Helpers ────────────────────────────────────────── + # Auto-detect and use docker or podman + containerHelpers = '' + CONTAINER_RUNTIME="" + CONTAINER_IP="" + + # Detect available container runtime + detect_runtime() { + if command -v docker &>/dev/null && docker info &>/dev/null; then + CONTAINER_RUNTIME="docker" + elif command -v podman &>/dev/null; then + CONTAINER_RUNTIME="podman" + else + error "No container runtime found (docker/podman)" + error "Install docker or podman and ensure the daemon is running" + exit 1 + fi + info "Using container runtime: $CONTAINER_RUNTIME" + } + + # Get container IP address (for direct connection, bypassing port mapping) + get_container_ip() { + local name="$1" + $CONTAINER_RUNTIME inspect "$name" --format '{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}' 2>/dev/null + } + + # Check if container exists (running or stopped) + container_exists() { + local name="$1" + $CONTAINER_RUNTIME ps -a --format '{{.Names}}' 2>/dev/null | grep -q "^''${name}$" + } + + # Check if container is running + container_running() { + local name="$1" + $CONTAINER_RUNTIME ps --format '{{.Names}}' 2>/dev/null | grep -q "^''${name}$" + } + + # Execute command inside container + container_exec() { + $CONTAINER_RUNTIME exec "$CONTAINER_NAME" "$@" + } + + # Remove container (force) + container_remove() { + local name="$1" + $CONTAINER_RUNTIME rm -f "$name" &>/dev/null || true + } + + # Stop container with timeout + container_stop() { + local name="$1" + local timeout="$2" + $CONTAINER_RUNTIME stop -t "$timeout" "$name" &>/dev/null + } + + # Kill container + container_kill() { + local name="$1" + $CONTAINER_RUNTIME kill "$name" &>/dev/null || true + } + ''; + + # ─── Port Verification Helpers ────────────────────────────────────────── + portHelpers = '' + # Wait for a TCP port to be listening (uses container IP) + wait_for_port() { + local port="$1" + local timeout="$2" + local elapsed=0 + + # Get container IP if not set + if [[ -z "$CONTAINER_IP" ]]; then + CONTAINER_IP=$(get_container_ip "$CONTAINER_NAME") + fi + + while ! nc -z "$CONTAINER_IP" "$port" 2>/dev/null; do + sleep 1 + elapsed=$((elapsed + 1)) + if [[ $elapsed -ge $timeout ]]; then + return 1 + fi + done + return 0 + } + + # Check if port is listening (uses container IP) + port_is_open() { + local port="$1" + + # Get container IP if not set + if [[ -z "$CONTAINER_IP" ]]; then + CONTAINER_IP=$(get_container_ip "$CONTAINER_NAME") + fi + + nc -z "$CONTAINER_IP" "$port" 2>/dev/null + } + ''; + + # ─── Process Verification Helpers ─────────────────────────────────────── + processHelpers = '' + # Check if a process is running inside the container + # Uses /proc/1/comm since the container runs pmcd as PID 1 + check_process_in_container() { + local proc="$1" + local comm + comm=$(container_exec cat /proc/1/comm 2>/dev/null || true) + [[ "$comm" == "$proc" ]] + } + + # Wait for process to appear in container + wait_for_process_in_container() { + local proc="$1" + local timeout="$2" + local elapsed=0 + + while ! check_process_in_container "$proc"; do + sleep 1 + elapsed=$((elapsed + 1)) + if [[ $elapsed -ge $timeout ]]; then + return 1 + fi + done + return 0 + } + ''; + + # ─── Metric Verification Helpers ──────────────────────────────────────── + metricHelpers = '' + # Check if a metric is available via pminfo (uses container IP) + check_metric() { + local metric="$1" + + # Get container IP if not set + if [[ -z "$CONTAINER_IP" ]]; then + CONTAINER_IP=$(get_container_ip "$CONTAINER_NAME") + fi + + pminfo -h "$CONTAINER_IP" -f "$metric" &>/dev/null + } + + # Verify all metrics in a space-separated list + verify_all_metrics() { + local metrics="$1" + local failed=0 + + for m in $metrics; do + if check_metric "$m"; then + result_pass "$m" + else + result_fail "$m" + failed=$((failed + 1)) + fi + done + + return $failed + } + ''; + + # ─── Combined Helpers ─────────────────────────────────────────────────── + # All helpers combined for use in test scripts + allHelpers = lib.concatStringsSep "\n" [ + colorHelpers + timingHelpers + containerHelpers + portHelpers + processHelpers + metricHelpers + ]; +} diff --git a/nix/container.nix b/nix/container.nix new file mode 100644 index 0000000000..c27d29d157 --- /dev/null +++ b/nix/container.nix @@ -0,0 +1,223 @@ +# nix/container.nix +# +# OCI container image with PCP. +# Uses buildLayeredImage for better Docker layer caching. +# +# SECURITY: Container runs as 'pcp' user (UID 990), not root. +# This follows container best practices for reduced blast radius. +# For BPF metrics, run the container with --privileged and as root. +# +# Build: nix build .#pcp-container +# Load: docker load < result +# Run: docker run -d -p 44321:44321 -p 44322:44322 --name pcp pcp:latest +# +# For BPF support (requires privileged mode): +# docker run -d --privileged -p 44321:44321 --name pcp pcp:latest +# +# Directory structure mirrors nixos-module.nix: +# - Writable directories for logs, tmp, run +# - Symlinks to Nix store for read-only content (pmns, pmdas, configs) +# - Environment overrides for Nix store paths +# +{ pkgs, pcp }: +let + constants = import ./constants.nix; + inherit (constants) user paths ports logSubdirs storeSymlinks configSymlinks configWritableDirs pmdaDomains; + + # ─── Inputs Hash for Fast Cache Checking ─────────────────────────────────── + # Compute a hash of all inputs that affect the container image. + # This is embedded as a label, allowing instant cache validation + # without needing to extract/decompress the tarball. + # + # The hash changes when any of these change: + # - PCP package (source code, build config) + # - Base packages (bash, coreutils) + # - Container config (this file's logic via pmcdConf, bpfConf store paths) + # + inputsHash = builtins.substring 0 32 (builtins.hashString "sha256" ( + builtins.concatStringsSep ":" [ + pcp.outPath + pkgs.bashInteractive.outPath + pkgs.coreutils.outPath + ] + )); + + # NSS files for user/group resolution inside the container. + # Required because we run as non-root and need user lookup to work. + passwdContents = '' + root:x:0:0:root:/root:/bin/sh + ${user.name}:x:${toString user.uid}:${toString user.gid}:${user.description}:${user.home}:/bin/sh + ''; + + groupContents = '' + root:x:0: + ${user.name}:x:${toString user.gid}: + ''; + + passwd = pkgs.writeTextDir "etc/passwd" passwdContents; + group = pkgs.writeTextDir "etc/group" groupContents; + + # ─── pmcd.conf with BPF PMDA ───────────────────────────────────────────── + # Configure pmcd with linux, pmcd, and bpf PMDAs + pmcdConf = pkgs.writeText "pmcd.conf" '' + # + # Performance Co-Pilot PMDA Configuration + # Generated for PCP container image + # + # Format: name domain_id type init_func path + # + + # ─── Base Platform PMDAs ────────────────────────────────────────────── + # Linux kernel metrics (DSO for performance) + linux ${toString pmdaDomains.linux} dso linux_init ${pcp}/libexec/pcp/pmdas/linux/pmda_linux.so + + # PMCD internal metrics (DSO) + pmcd ${toString pmdaDomains.pmcd} dso pmcd_init ${pcp}/libexec/pcp/pmdas/pmcd/pmda_pmcd.so + + # ─── BPF PMDA ───────────────────────────────────────────────────────── + # CO-RE eBPF metrics (requires privileged mode and BTF kernel) + bpf ${toString pmdaDomains.bpf} dso bpf_init ${pcp}/libexec/pcp/pmdas/bpf/pmda_bpf.so + ''; + + # ─── BPF module configuration ──────────────────────────────────────────── + # Enable runqlat (scheduler latency) and biolatency (disk I/O latency) + bpfConf = pkgs.writeText "bpf.conf" '' + # + # BPF PMDA module configuration + # Generated for PCP container image + # + # Enable impressive demo metrics: + # - runqlat: scheduler run queue latency histogram + # - biolatency: block I/O latency histogram + # + + [runqlat.so] + enabled = true + + [biolatency.so] + enabled = true + ''; + + # PCP environment variables - mirrors nixos-module.nix pcpEnv + # These override the Nix store paths baked into pcp.conf + pcpEnv = [ + "PCP_CONF=${pcp}/share/pcp/etc/pcp.conf" + "PCP_DIR=${pcp}" + # Point to our generated pmcd.conf with BPF PMDA + "PCP_PMCDCONF_PATH=/etc/pcp/pmcd/pmcd.conf" + # Mutable runtime paths (Nix store versions are read-only) + "PCP_LOG_DIR=${paths.logDir}" + "PCP_VAR_DIR=${paths.varDir}" + "PCP_TMP_DIR=${paths.tmpDir}" + "PCP_RUN_DIR=${paths.runDir}" + "PCP_ARCHIVE_DIR=${paths.archiveDir}" + # Override hardcoded /bin/pwd path in shell scripts + "PWDCMND=pwd" + "HOME=${user.home}" + ]; + + # Generate mkdir commands for log subdirectories + mkLogDirs = pkgs.lib.concatMapStringsSep "\n" + (dir: "mkdir -p var/log/pcp/${dir}") logSubdirs; + + # Generate symlinks to Nix store paths + mkStoreSymlinks = pkgs.lib.concatMapStringsSep "\n" + (name: "ln -sf ${pcp}/var/lib/pcp/${name} var/lib/pcp/${name}") storeSymlinks; + + # Generate config symlinks + mkConfigSymlinks = pkgs.lib.concatMapStringsSep "\n" + (name: "ln -sf ${pcp}/var/lib/pcp/config/${name} var/lib/pcp/config/${name}") configSymlinks; + + # Generate writable config directories + mkConfigWritableDirs = pkgs.lib.concatMapStringsSep "\n" + (dir: "mkdir -p var/lib/pcp/config/${dir}") configWritableDirs; + + # Generate chown commands for writable directories + chownWritableDirs = pkgs.lib.concatMapStringsSep "\n" + (dir: "chown -R ${toString user.uid}:${toString user.gid} var/lib/pcp/config/${dir}") configWritableDirs; + + # The actual container image + containerImage = pkgs.dockerTools.buildLayeredImage { + name = "pcp"; + tag = "latest"; + + contents = [ + pcp + pkgs.bashInteractive + pkgs.coreutils + passwd + group + ]; + + # Create directory structure - mirrors nixos-module.nix tmpfiles.rules + # Note: extraCommands runs without fakeroot, so no chown here + extraCommands = '' + # Runtime directories (writable) + mkdir -p var/lib/pcp + mkdir -p var/lib/pcp/tmp + mkdir -p var/log/pcp + ${mkLogDirs} + mkdir -p run/pcp + mkdir -p tmp + chmod 1777 tmp + + # Symlinks to read-only Nix store paths (pmns, pmdas, pmcd) + ${mkStoreSymlinks} + + # Config directory - mix of symlinks and writable dirs + mkdir -p var/lib/pcp/config + ${mkConfigSymlinks} + + # Writable config directories + ${mkConfigWritableDirs} + + # ─── PCP configuration files ───────────────────────────────────────── + # pmcd.conf with BPF PMDA enabled + mkdir -p etc/pcp/pmcd + cp ${pmcdConf} etc/pcp/pmcd/pmcd.conf + + # BPF PMDA configuration (enable runqlat and biolatency modules) + mkdir -p etc/pcp/bpf + cp ${bpfConf} etc/pcp/bpf/bpf.conf + + # Symlink bpf.conf to where the BPF PMDA expects it + mkdir -p var/lib/pcp/pmdas/bpf + ln -sf /etc/pcp/bpf/bpf.conf var/lib/pcp/pmdas/bpf/bpf.conf + ''; + + # fakeRootCommands runs with fakeroot, allowing chown to work + fakeRootCommands = '' + # Set ownership on writable directories + chown -R ${toString user.uid}:${toString user.gid} var/lib/pcp/tmp + ${chownWritableDirs} + chown -R ${toString user.uid}:${toString user.gid} var/log/pcp + chown -R ${toString user.uid}:${toString user.gid} run/pcp + ''; + + config = { + # SECURITY: Run as pcp user, not root + User = user.name; + Env = pcpEnv; + # -f: foreground, -i 0.0.0.0: listen on all interfaces for container networking + Cmd = [ "${pcp}/libexec/pcp/bin/pmcd" "-f" "-i" "0.0.0.0" ]; + ExposedPorts = { + "${toString ports.pmcd}/tcp" = {}; + "${toString ports.pmproxy}/tcp" = {}; + }; + WorkingDir = user.home; + # Label for fast cache checking - avoids tarball extraction + Labels = { + "nix.inputs.hash" = inputsHash; + }; + }; +}; + +in +{ + # The container image derivation + image = containerImage; + + # The inputs hash for fast cache validation + # Test scripts can compare this with the loaded image's label + inherit inputsHash; +} diff --git a/nix/dashboards/pcp-bpf-overview.json b/nix/dashboards/pcp-bpf-overview.json new file mode 100644 index 0000000000..a2b0b8dcbf --- /dev/null +++ b/nix/dashboards/pcp-bpf-overview.json @@ -0,0 +1,318 @@ +{ + "annotations": { + "list": [] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": null, + "links": [], + "liveNow": false, + "panels": [ + { + "collapsed": false, + "gridPos": { "h": 1, "w": 24, "x": 0, "y": 0 }, + "id": 1, + "panels": [], + "title": "Latency Histograms", + "type": "row" + }, + { + "datasource": { "type": "performancecopilot-vector-datasource", "uid": "${datasource}" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "palette-classic" }, + "custom": { + "hideFrom": { "legend": false, "tooltip": false, "viz": false } + }, + "mappings": [], + "unit": "µs" + }, + "overrides": [] + }, + "gridPos": { "h": 8, "w": 12, "x": 0, "y": 1 }, + "id": 2, + "options": { + "displayLabels": [], + "legend": { "displayMode": "list", "placement": "bottom", "showLegend": true }, + "pieType": "pie", + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, + "tooltip": { "mode": "single", "sort": "none" } + }, + "targets": [ + { + "expr": "bpf.runq.latency", + "format": "time_series", + "legendFormat": "$instance", + "refId": "A" + } + ], + "title": "Run Queue Latency Distribution", + "description": "Scheduler run queue latency - time tasks wait before being scheduled", + "type": "piechart" + }, + { + "datasource": { "type": "performancecopilot-vector-datasource", "uid": "${datasource}" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "palette-classic" }, + "custom": { + "hideFrom": { "legend": false, "tooltip": false, "viz": false } + }, + "mappings": [], + "unit": "µs" + }, + "overrides": [] + }, + "gridPos": { "h": 8, "w": 12, "x": 12, "y": 1 }, + "id": 3, + "options": { + "displayLabels": [], + "legend": { "displayMode": "list", "placement": "bottom", "showLegend": true }, + "pieType": "pie", + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, + "tooltip": { "mode": "single", "sort": "none" } + }, + "targets": [ + { + "expr": "bpf.disk.all.latency", + "format": "time_series", + "legendFormat": "$instance", + "refId": "A" + } + ], + "title": "Block I/O Latency Distribution", + "description": "Block device I/O latency histogram - time to complete I/O operations", + "type": "piechart" + }, + { + "collapsed": false, + "gridPos": { "h": 1, "w": 24, "x": 0, "y": 9 }, + "id": 4, + "panels": [], + "title": "Network I/O (per process)", + "type": "row" + }, + { + "datasource": { "type": "performancecopilot-vector-datasource", "uid": "${datasource}" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "palette-classic" }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { "legend": false, "tooltip": false, "viz": false }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { "type": "linear" }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { "group": "A", "mode": "none" }, + "thresholdsStyle": { "mode": "off" } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { "color": "green", "value": null } + ] + }, + "unit": "Bps" + }, + "overrides": [] + }, + "gridPos": { "h": 8, "w": 12, "x": 0, "y": 10 }, + "id": 5, + "options": { + "legend": { "calcs": ["mean", "max"], "displayMode": "table", "placement": "bottom", "showLegend": true }, + "tooltip": { "mode": "multi", "sort": "desc" } + }, + "targets": [ + { + "expr": "rate(bpf.proc.net.tcp.send.bytes)", + "format": "time_series", + "legendFormat": "TCP Send", + "refId": "A" + }, + { + "expr": "rate(bpf.proc.net.tcp.recv.bytes)", + "format": "time_series", + "legendFormat": "TCP Recv", + "refId": "B" + } + ], + "title": "TCP Network Throughput", + "description": "Per-process TCP network bytes sent and received", + "type": "timeseries" + }, + { + "datasource": { "type": "performancecopilot-vector-datasource", "uid": "${datasource}" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "palette-classic" }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { "legend": false, "tooltip": false, "viz": false }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { "type": "linear" }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { "group": "A", "mode": "none" }, + "thresholdsStyle": { "mode": "off" } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { "color": "green", "value": null } + ] + }, + "unit": "Bps" + }, + "overrides": [] + }, + "gridPos": { "h": 8, "w": 12, "x": 12, "y": 10 }, + "id": 6, + "options": { + "legend": { "calcs": ["mean", "max"], "displayMode": "table", "placement": "bottom", "showLegend": true }, + "tooltip": { "mode": "multi", "sort": "desc" } + }, + "targets": [ + { + "expr": "rate(bpf.proc.net.udp.send.bytes)", + "format": "time_series", + "legendFormat": "UDP Send", + "refId": "A" + }, + { + "expr": "rate(bpf.proc.net.udp.recv.bytes)", + "format": "time_series", + "legendFormat": "UDP Recv", + "refId": "B" + } + ], + "title": "UDP Network Throughput", + "description": "Per-process UDP network bytes sent and received", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { "h": 1, "w": 24, "x": 0, "y": 18 }, + "id": 7, + "panels": [], + "title": "OOM Killer Events", + "type": "row" + }, + { + "datasource": { "type": "performancecopilot-vector-datasource", "uid": "${datasource}" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "thresholds" }, + "custom": { + "align": "auto", + "cellOptions": { "type": "auto" }, + "inspect": false + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { "color": "green", "value": null }, + { "color": "red", "value": 1 } + ] + } + }, + "overrides": [] + }, + "gridPos": { "h": 8, "w": 24, "x": 0, "y": 19 }, + "id": 8, + "options": { + "cellHeight": "sm", + "footer": { "countRows": false, "fields": "", "reducer": ["sum"], "show": false }, + "showHeader": true + }, + "targets": [ + { + "expr": "bpf.oomkill.tcomm", + "format": "time_series", + "legendFormat": "Triggered Comm", + "refId": "A" + }, + { + "expr": "bpf.oomkill.tpid", + "format": "time_series", + "legendFormat": "Triggered PID", + "refId": "B" + }, + { + "expr": "bpf.oomkill.fcomm", + "format": "time_series", + "legendFormat": "Killed Comm", + "refId": "C" + }, + { + "expr": "bpf.oomkill.fpid", + "format": "time_series", + "legendFormat": "Killed PID", + "refId": "D" + }, + { + "expr": "bpf.oomkill.pages", + "format": "time_series", + "legendFormat": "Pages", + "refId": "E" + } + ], + "title": "OOM Kill Events", + "description": "Out-of-memory killer events - processes killed due to memory pressure", + "type": "table" + } + ], + "refresh": "5s", + "schemaVersion": 38, + "style": "dark", + "tags": ["pcp", "bpf", "ebpf"], + "templating": { + "list": [ + { + "current": {}, + "hide": 0, + "includeAll": false, + "label": "Data Source", + "multi": false, + "name": "datasource", + "options": [], + "query": "performancecopilot-vector-datasource", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "type": "datasource" + } + ] + }, + "time": { "from": "now-5m", "to": "now" }, + "timepicker": {}, + "timezone": "", + "title": "PCP BPF Overview", + "uid": "pcp-bpf-overview", + "version": 1, + "weekStart": "" +} diff --git a/nix/grafana.nix b/nix/grafana.nix new file mode 100644 index 0000000000..dad0376254 --- /dev/null +++ b/nix/grafana.nix @@ -0,0 +1,236 @@ +# nix/grafana.nix +# +# NixOS module for Grafana with PCP dashboards. +# Provides visualization for PCP metrics via the grafana-pcp plugin. +# +# When Prometheus is also enabled, provisions the Node Exporter Full +# dashboard for head-to-head comparison of PCP vs Prometheus metrics. +# +# Usage: +# imports = [ ./grafana.nix ]; +# services.pcp.grafana.enable = true; +# +{ config, lib, pkgs, ... }: + +with lib; +let + cfg = config.services.pcp.grafana; + pcpCfg = config.services.pcp; + constants = import ./constants.nix; + + # ─── grafana-pcp plugin ─────────────────────────────────────────────── + # Fetched from Grafana CDN and unpacked. + # Version 5.0.0 is the current stable release (2022-06-30). + # Note: In v5.0.0 the datasource is called "redis", renamed to "valkey" in later versions. + pcpPlugin = pkgs.stdenv.mkDerivation { + pname = "grafana-pcp-plugin"; + version = "5.0.0"; + + src = pkgs.fetchurl { + url = "https://grafana.com/api/plugins/performancecopilot-pcp-app/versions/5.0.0/download"; + sha256 = "0jgdnzzinv2skw7rxkijkgkjw4aal2znqpkn2xczlksh9xymfmvj"; + name = "grafana-pcp-5.0.0.zip"; + }; + + nativeBuildInputs = [ pkgs.unzip ]; + + unpackPhase = '' + runHook preUnpack + unzip $src + runHook postUnpack + ''; + + # No build needed - just install the plugin files + dontBuild = true; + + installPhase = '' + runHook preInstall + mkdir -p $out + # The zip extracts to performancecopilot-pcp-app/ + cp -r performancecopilot-pcp-app/* $out/ + runHook postInstall + ''; + + meta = { + description = "Performance Co-Pilot Grafana Plugin"; + homepage = "https://github.com/performancecopilot/grafana-pcp"; + license = lib.licenses.asl20; + }; + }; + + # ─── Node Exporter Full dashboard ───────────────────────────────────── + # Popular Prometheus dashboard for system metrics. + # Pinned to specific commit for reproducibility. + # Source: https://github.com/rfmoz/grafana-dashboards + # Grafana Dashboard ID: 1860 + nodeExporterDashboard = pkgs.fetchurl { + url = "https://raw.githubusercontent.com/rfmoz/grafana-dashboards/741b1b3878d920439e413c7a7a3ff9cfa8ab2a20/prometheus/node-exporter-full.json"; + sha256 = "1x6r6vrif259zjjzh8m1cdhxr7hnr57ija76vgipyaryh8pyrv33"; + }; + + # Create a directory structure for Prometheus dashboards + prometheusDashboards = pkgs.linkFarm "prometheus-dashboards" [ + { name = "node-exporter-full.json"; path = nodeExporterDashboard; } + ]; + + # ─── Custom PCP dashboards ──────────────────────────────────────────── + # BPF overview dashboard for pmdabpf metrics (bpf.* namespace) + # The plugin's BCC dashboard uses bcc.* metrics which require pmdabcc + customDashboards = pkgs.linkFarm "custom-pcp-dashboards" [ + { name = "pcp-bpf-overview.json"; path = ./dashboards/pcp-bpf-overview.json; } + ]; + +in +{ + # ═══════════════════════════════════════════════════════════════════════ + # Options interface + # ═══════════════════════════════════════════════════════════════════════ + + options.services.pcp.grafana = { + enable = mkEnableOption "Grafana with PCP dashboards"; + + port = mkOption { + type = types.port; + default = constants.ports.grafana; + description = "Port for Grafana web interface."; + }; + + adminPassword = mkOption { + type = types.str; + default = "pcp"; + description = '' + Grafana admin password. + Default is 'pcp' for local development - INSECURE. + Only use for local testing. + ''; + }; + + openFirewall = mkOption { + type = types.bool; + default = true; + description = "Open firewall port for Grafana."; + }; + }; + + # ═══════════════════════════════════════════════════════════════════════ + # Implementation + # ═══════════════════════════════════════════════════════════════════════ + + config = mkIf cfg.enable { + # Require PCP to be enabled (need pmproxy running for the datasource) + assertions = [{ + assertion = pcpCfg.enable; + message = "services.pcp.grafana requires services.pcp.enable = true"; + } { + assertion = pcpCfg.pmproxy.enable; + message = "services.pcp.grafana requires services.pcp.pmproxy.enable = true (for PCP Vector datasource)"; + }]; + + warnings = [ + "Grafana is configured with insecure default password 'pcp'. Only use for local development." + ]; + + services.grafana = { + enable = true; + + settings = { + server = { + http_addr = "0.0.0.0"; # Allow host access via port forward + http_port = cfg.port; + }; + + security = { + admin_user = "admin"; + admin_password = cfg.adminPassword; + }; + + # Disable analytics/phone-home + analytics.reporting_enabled = false; + + # Allow unsigned community plugin + plugins.allow_loading_unsigned_plugins = "performancecopilot-pcp-app"; + + # Login hint for insecure dev mode + "auth.basic".enabled = true; + }; + + # ─── Plugin loading ─────────────────────────────────────────────── + # declarativePlugins expects a list of plugin derivations + declarativePlugins = [ pcpPlugin ]; + + # ─── Provisioning ───────────────────────────────────────────────── + provision = { + enable = true; + + # Datasources + datasources.settings.datasources = [ + # PCP Vector - real-time metrics via pmproxy + { + name = "PCP Vector"; + type = "performancecopilot-vector-datasource"; + access = "proxy"; + url = "http://localhost:${toString constants.ports.pmproxy}"; + isDefault = true; + editable = false; + jsonData = { + hostspec = "localhost"; + }; + } + ] ++ optionals (config.services.prometheus.enable or false) [ + # Prometheus - when enabled for comparison + { + name = "Prometheus"; + type = "prometheus"; + access = "proxy"; + url = "http://localhost:${toString constants.ports.prometheus}"; + editable = false; + } + ]; + + # Dashboard providers + dashboards.settings.providers = [ + # PCP Vector dashboards from plugin + { + name = "PCP Vector"; + type = "file"; + folder = "PCP"; + options.path = "${pcpPlugin}/datasources/vector/dashboards"; + disableDeletion = true; + } + # Custom PCP dashboards (BPF overview for pmdabpf metrics) + { + name = "PCP Custom"; + type = "file"; + folder = "PCP"; + options.path = customDashboards; + disableDeletion = true; + } + ] ++ optionals (config.services.prometheus.enable or false) [ + # Node Exporter Full dashboard for Prometheus comparison + { + name = "Prometheus"; + type = "file"; + folder = "Prometheus"; + options.path = prometheusDashboards; + disableDeletion = true; + } + ]; + }; + }; + + # ─── Firewall ───────────────────────────────────────────────────────── + networking.firewall.allowedTCPPorts = mkIf cfg.openFirewall [ cfg.port ]; + + # ─── Security hardening ─────────────────────────────────────────────── + # Additional hardening beyond NixOS defaults + systemd.services.grafana.serviceConfig = { + # DynamicUser provides additional isolation + # Note: NixOS grafana module may already set some of these + ProtectKernelTunables = true; + ProtectControlGroups = true; + RestrictNamespaces = true; + RestrictRealtime = true; + LockPersonality = true; + }; + }; +} diff --git a/nix/k8s-test/constants.nix b/nix/k8s-test/constants.nix new file mode 100644 index 0000000000..271a45a380 --- /dev/null +++ b/nix/k8s-test/constants.nix @@ -0,0 +1,74 @@ +# nix/k8s-test/constants.nix +# +# Kubernetes DaemonSet testing configuration. +# Provides constants for Minikube-based PCP testing with BPF metrics. +# +{ }: +let + common = import ../test-common/constants.nix { }; +in +rec { + # Re-export from shared constants + inherit (common) ports user colors; + + # ─── Kubernetes Settings ─────────────────────────────────────────────── + k8s = { + namespace = "pcp-test"; + daemonSetName = "pcp"; + imageName = "pcp"; + imageTag = "latest"; + }; + + # ─── Timeouts (seconds) ──────────────────────────────────────────────── + timeouts = { + prerequisites = 30; # Phase 0: Check minikube/kubectl + build = common.timeouts.build; + load = 120; # Phase 2: Load image into minikube (longer for first load) + deploy = 30; # Phase 3: Apply DaemonSet manifest + podsReady = 120; # Phase 4: Wait for pods to be ready + verify = common.timeouts.ready; + metrics = 60; # Phase 7: Verify kernel metrics + bpfMetrics = 90; # Phase 8: BPF metrics need time to collect data + cleanup = common.timeouts.shutdown; + }; + + # ─── Minikube Recommended Settings ───────────────────────────────────── + minikube = { + driver = "docker"; # Docker driver - most reliable, works everywhere + cpus = 4; # More CPUs for parallel workloads + memory = 8192; # 8GB RAM for comfortable operation + diskSize = "20g"; # 20GB disk + }; + + # ─── Verification Checks ─────────────────────────────────────────────── + checks = { + # Processes to verify inside pods + processes = [ "pmcd" ]; + + # Basic metrics (always available) + basicMetrics = common.metrics.basic; + + # Kernel metrics (require host /proc access) + kernelMetrics = common.metrics.kernel; + + # BPF metrics (require privileged + BTF kernel) + bpfMetrics = common.metrics.bpf; + + # Ports to verify + ports = [ 44321 ]; + }; + + # ─── Phase Definitions ───────────────────────────────────────────────── + phases = { + "0" = { name = "Prerequisites"; description = "Check minikube and kubectl"; }; + "1" = { name = "Build Image"; description = "Build OCI image with nix build"; }; + "2" = { name = "Load Image"; description = "Load image into minikube"; }; + "3" = { name = "Deploy DaemonSet"; description = "Apply privileged DaemonSet manifest"; }; + "4" = { name = "Wait Pods Ready"; description = "Wait for all DaemonSet pods"; }; + "5" = { name = "Verify Process"; description = "Check pmcd running in each pod"; }; + "6" = { name = "Verify Ports"; description = "Check port 44321 in each pod"; }; + "7" = { name = "Verify Kernel Metrics"; description = "Query kernel.all.load, mem.physmem"; }; + "8" = { name = "Verify BPF Metrics"; description = "Query bpf.runq.latency, bpf.disk.all.latency"; }; + "9" = { name = "Cleanup"; description = "Delete namespace and resources"; }; + }; +} diff --git a/nix/k8s-test/default.nix b/nix/k8s-test/default.nix new file mode 100644 index 0000000000..5bcfcdd01b --- /dev/null +++ b/nix/k8s-test/default.nix @@ -0,0 +1,646 @@ +# nix/k8s-test/default.nix +# +# Entry point for PCP Kubernetes DaemonSet lifecycle testing. +# Generates test scripts for deploying PCP as a privileged DaemonSet +# in minikube with full node monitoring including BPF metrics. +# +# Usage in flake.nix: +# k8sTest = import ./nix/k8s-test { inherit pkgs lib pcp; }; +# +# Generated outputs: +# k8sTest.packages.pcp-k8s-test - Full lifecycle test +# k8sTest.packages.pcp-k8s-test-quick - Quick test (skip build) +# k8sTest.apps.pcp-k8s-test - App entry point +# k8sTest.apps.pcp-k8s-test-quick - Quick app entry point +# +{ pkgs, lib, pcp, containerInputsHash }: +let + constants = import ./constants.nix { }; + mainConstants = import ../constants.nix; + helpers = import ./lib.nix { inherit pkgs lib; }; + manifests = import ./manifests.nix { inherit pkgs lib; }; + + inherit (helpers) + colorHelpers timingHelpers k8sHelpers metricHelpers + commonInputs k8sInputs; + + # PCP tools for metric verification (use local package) + pcpInputs = [ pcp ]; + + # PCP_CONF path for pminfo + pcpConfPath = "${pcp}/share/pcp/etc/pcp.conf"; + + # ─── Full Lifecycle Test Script ──────────────────────────────────────── + # Tests the complete Kubernetes deployment lifecycle: + # Prerequisites -> Build -> Load -> Deploy -> Verify (including BPF) -> Cleanup + # + mkFullTest = pkgs.writeShellApplication { + name = "pcp-k8s-test"; + runtimeInputs = commonInputs ++ k8sInputs ++ pcpInputs; + text = '' + set +e # Don't exit on first failure + + # Set PCP_CONF so pminfo can find its configuration + export PCP_CONF="${pcpConfPath}" + + ${colorHelpers} + ${timingHelpers} + ${k8sHelpers} + ${metricHelpers} + + # Configuration + NAMESPACE="${constants.k8s.namespace}" + DAEMONSET_NAME="${constants.k8s.daemonSetName}" + RESULT_LINK="result-container" + MANIFEST_FILE="${manifests.manifestFile}" + + # Metrics to verify (exported for potential external use) + export KERNEL_METRICS="${lib.concatStringsSep " " constants.checks.kernelMetrics}" + export BPF_METRICS="${lib.concatStringsSep " " constants.checks.bpfMetrics}" + + # Ports to verify + PORTS_TO_CHECK="${lib.concatStringsSep " " (map toString constants.checks.ports)}" + + # Timing tracking + declare -A PHASE_TIMES + TOTAL_START=$(time_ms) + + # Results tracking + TOTAL_PASSED=0 + TOTAL_FAILED=0 + TOTAL_SKIPPED=0 + BPF_AVAILABLE=true + + record_result() { + local phase="$1" + local passed="$2" + local time_ms="$3" + PHASE_TIMES["$phase"]=$time_ms + if [[ "$passed" == "true" ]]; then + TOTAL_PASSED=$((TOTAL_PASSED + 1)) + elif [[ "$passed" == "skip" ]]; then + TOTAL_SKIPPED=$((TOTAL_SKIPPED + 1)) + else + TOTAL_FAILED=$((TOTAL_FAILED + 1)) + fi + } + + # Cleanup function + cleanup() { + info "Cleaning up..." + cleanup_namespace "$NAMESPACE" ${toString constants.timeouts.cleanup} || true + rm -f "$RESULT_LINK" + } + + # Header + bold "========================================" + bold " PCP Kubernetes DaemonSet Test" + bold "========================================" + echo "" + + # ─── Phase 0: Prerequisites ──────────────────────────────────────────── + phase_header "0" "Prerequisites" "${toString constants.timeouts.prerequisites}" + start_time=$(time_ms) + + # Check minikube + if minikube_running; then + NODE_COUNT=$(get_node_count) + elapsed=$(elapsed_ms "$start_time") + result_pass "minikube running ($NODE_COUNT node(s))" "$elapsed" + record_result "prerequisites" "true" "$elapsed" + else + elapsed=$(elapsed_ms "$start_time") + result_fail "minikube not running" "$elapsed" + error "" + error "Please start minikube first. Options:" + error "" + error " # Recommended: Use helper with optimal settings (KVM2, 4 CPUs, 8GB RAM):" + error " nix run .#pcp-minikube-start" + error "" + error " # Or start manually:" + error " nix shell nixpkgs#minikube -c minikube start --driver=kvm2 --cpus=4 --memory=8192" + error "" + error " # For multi-node testing:" + error " nix shell nixpkgs#minikube -c minikube start --driver=kvm2 --cpus=4 --memory=8192 --nodes=2" + record_result "prerequisites" "false" "$elapsed" + exit 1 + fi + + # Check kubectl + if command -v kubectl &>/dev/null; then + result_pass "kubectl available" + TOTAL_PASSED=$((TOTAL_PASSED + 1)) + else + result_fail "kubectl not found" + TOTAL_FAILED=$((TOTAL_FAILED + 1)) + exit 1 + fi + + echo "" + info "Using minikube cluster with $NODE_COUNT node(s)" + + # Quick connectivity check before slow operations + info "Verifying minikube connectivity..." + if ! timeout 10 kubectl cluster-info &>/dev/null; then + error "" + error "Kubernetes API not responding (10s timeout)." + error "The minikube cluster may be unresponsive or have stale port mappings." + error "" + error "Try:" + error " minikube update-context # Fix stale port mappings" + error " minikube stop && minikube start # Restart cluster" + error " minikube delete && minikube start # Full reset" + exit 1 + fi + result_pass "Kubernetes API responsive" + + # Switch to minikube's docker daemon + info "Connecting to minikube's docker daemon..." + docker_env_start=$(time_ms) + eval "$(minikube docker-env)" + docker_env_elapsed=$(elapsed_ms "$docker_env_start") + info " Connected (''${docker_env_elapsed}ms)" + PHASE_TIMES["docker_env"]=$docker_env_elapsed + + # ─── Phase 1: Build Image ────────────────────────────────────────────── + phase_header "1" "Build Image" "${toString constants.timeouts.build}" + start_time=$(time_ms) + + # Clean up any existing result link + rm -f "$RESULT_LINK" + + info " Building pcp-container..." + if nix build ".#pcp-container" -o "$RESULT_LINK" 2>&1 | while read -r line; do + echo " $line" + done; then + elapsed=$(elapsed_ms "$start_time") + result_pass "Image built" "$elapsed" + record_result "build" "true" "$elapsed" + else + elapsed=$(elapsed_ms "$start_time") + result_fail "Build failed" "$elapsed" + record_result "build" "false" "$elapsed" + exit 1 + fi + + # ─── Phase 2: Load Image ─────────────────────────────────────────────── + phase_header "2" "Load Image" "${toString constants.timeouts.load}" + start_time=$(time_ms) + + # Fast cache check using Nix inputs hash label (docker-env already set above) + EXPECTED_HASH="${containerInputsHash}" + LOADED_HASH="" + + # Check if image exists and get its inputs hash label + if docker image inspect "pcp:latest" &>/dev/null; then + LOADED_HASH=$(docker inspect "pcp:latest" --format '{{index .Config.Labels "nix.inputs.hash"}}' 2>/dev/null || echo "") + fi + + # Show cache check status + if [[ -n "$LOADED_HASH" ]]; then + info " Cache: expected=$EXPECTED_HASH loaded=$LOADED_HASH" + else + info " Cache: no existing image label found" + fi + + if [[ -n "$EXPECTED_HASH" && "$EXPECTED_HASH" == "$LOADED_HASH" ]]; then + elapsed=$(elapsed_ms "$start_time") + result_pass "Image unchanged, skipping load" "$elapsed" + record_result "load" "true" "$elapsed" + else + info " Loading image into minikube's docker..." + if docker load < "$RESULT_LINK" 2>&1 | while read -r line; do + echo " $line" + done; then + elapsed=$(elapsed_ms "$start_time") + result_pass "Image loaded into minikube" "$elapsed" + record_result "load" "true" "$elapsed" + else + elapsed=$(elapsed_ms "$start_time") + result_fail "Failed to load image" "$elapsed" + record_result "load" "false" "$elapsed" + cleanup + exit 1 + fi + fi + + # ─── Phase 3: Deploy DaemonSet ───────────────────────────────────────── + phase_header "3" "Deploy DaemonSet" "${toString constants.timeouts.deploy}" + start_time=$(time_ms) + + # Clean up any existing deployment + if kubectl get namespace "$NAMESPACE" &>/dev/null; then + info " Removing existing deployment..." + cleanup_namespace "$NAMESPACE" ${toString constants.timeouts.cleanup} + sleep 2 + fi + + info " Applying DaemonSet manifest..." + if kubectl apply -f "$MANIFEST_FILE" 2>&1 | while read -r line; do + echo " $line" + done; then + elapsed=$(elapsed_ms "$start_time") + result_pass "DaemonSet deployed" "$elapsed" + record_result "deploy" "true" "$elapsed" + else + elapsed=$(elapsed_ms "$start_time") + result_fail "Failed to deploy DaemonSet" "$elapsed" + record_result "deploy" "false" "$elapsed" + cleanup + exit 1 + fi + + # ─── Phase 4: Wait Pods Ready ────────────────────────────────────────── + phase_header "4" "Wait Pods Ready" "${toString constants.timeouts.podsReady}" + start_time=$(time_ms) + + info " Waiting for DaemonSet pods to be ready..." + if READY_COUNT=$(wait_daemonset_ready "$NAMESPACE" "$DAEMONSET_NAME" ${toString constants.timeouts.podsReady}); then + elapsed=$(elapsed_ms "$start_time") + result_pass "$READY_COUNT/$NODE_COUNT pods ready" "$elapsed" + record_result "pods_ready" "true" "$elapsed" + else + elapsed=$(elapsed_ms "$start_time") + result_fail "Pods not ready in time" "$elapsed" + record_result "pods_ready" "false" "$elapsed" + # Show pod status for debugging + kubectl get pods -n "$NAMESPACE" -o wide + kubectl describe pods -n "$NAMESPACE" | tail -50 + cleanup + exit 1 + fi + + # Get list of pods for verification + PODS=$(get_daemonset_pods "$NAMESPACE" "$DAEMONSET_NAME") + info " Pods: $PODS" + + # ─── Phase 5: Verify Process ─────────────────────────────────────────── + phase_header "5" "Verify Process" "${toString constants.timeouts.verify}" + start_time=$(time_ms) + + for pod in $PODS; do + node=$(get_pod_node "$NAMESPACE" "$pod") + proc_start=$(time_ms) + if check_process_in_pod "$NAMESPACE" "$pod" "pmcd"; then + result_pass "pmcd running on $node ($pod)" "$(elapsed_ms "$proc_start")" + TOTAL_PASSED=$((TOTAL_PASSED + 1)) + else + result_fail "pmcd not running on $node ($pod)" "$(elapsed_ms "$proc_start")" + TOTAL_FAILED=$((TOTAL_FAILED + 1)) + fi + done + + elapsed=$(elapsed_ms "$start_time") + record_result "process" "true" "$elapsed" + + # ─── Phase 6: Verify Ports ───────────────────────────────────────────── + phase_header "6" "Verify Ports" "${toString constants.timeouts.verify}" + start_time=$(time_ms) + + for pod in $PODS; do + node=$(get_pod_node "$NAMESPACE" "$pod") + for port in $PORTS_TO_CHECK; do + port_start=$(time_ms) + if check_port_in_pod "$NAMESPACE" "$pod" "$port"; then + result_pass "Port $port on $node ($pod)" "$(elapsed_ms "$port_start")" + TOTAL_PASSED=$((TOTAL_PASSED + 1)) + else + result_fail "Port $port not listening on $node ($pod)" "$(elapsed_ms "$port_start")" + TOTAL_FAILED=$((TOTAL_FAILED + 1)) + fi + done + done + + elapsed=$(elapsed_ms "$start_time") + record_result "ports" "true" "$elapsed" + + # ─── Phase 7: Verify Kernel Metrics ──────────────────────────────────── + phase_header "7" "Verify Kernel Metrics" "${toString constants.timeouts.metrics}" + start_time=$(time_ms) + + for pod in $PODS; do + node=$(get_pod_node "$NAMESPACE" "$pod") + info " Node: $node" + + for metric in $KERNEL_METRICS; do + met_start=$(time_ms) + if check_metric_in_pod "$NAMESPACE" "$pod" "$metric"; then + value=$(get_metric_value "$NAMESPACE" "$pod" "$metric" | head -1) + result_pass "$metric = $value" "$(elapsed_ms "$met_start")" + TOTAL_PASSED=$((TOTAL_PASSED + 1)) + else + result_fail "$metric not available" "$(elapsed_ms "$met_start")" + TOTAL_FAILED=$((TOTAL_FAILED + 1)) + fi + done + done + + elapsed=$(elapsed_ms "$start_time") + record_result "kernel_metrics" "true" "$elapsed" + + # ─── Phase 8: Verify BPF Metrics ─────────────────────────────────────── + phase_header "8" "Verify BPF Metrics" "${toString constants.timeouts.bpfMetrics}" + start_time=$(time_ms) + + # First check if BTF is available (required for BPF PMDA) + first_pod=$(echo "$PODS" | awk '{print $1}') + if ! check_btf_available "$NAMESPACE" "$first_pod"; then + warn " BTF not available - BPF metrics will be skipped" + warn " (Minikube kernel may not have CONFIG_DEBUG_INFO_BTF=y)" + BPF_AVAILABLE=false + fi + + # Also check if BPF PMDA is loaded + if $BPF_AVAILABLE && ! check_metric_in_pod "$NAMESPACE" "$first_pod" "bpf"; then + warn " BPF PMDA not loaded - BPF metrics will be skipped" + warn " (Container may need BPF PMDA installation)" + BPF_AVAILABLE=false + fi + + if $BPF_AVAILABLE; then + for pod in $PODS; do + node=$(get_pod_node "$NAMESPACE" "$pod") + info " Node: $node" + + for metric in $BPF_METRICS; do + met_start=$(time_ms) + # BPF histogram metrics need retries as they populate over time + if bucket_count=$(check_bpf_metric "$NAMESPACE" "$pod" "$metric" 6); then + result_pass "$metric (histogram with $bucket_count buckets)" "$(elapsed_ms "$met_start")" + TOTAL_PASSED=$((TOTAL_PASSED + 1)) + else + result_fail "$metric not available" "$(elapsed_ms "$met_start")" + TOTAL_FAILED=$((TOTAL_FAILED + 1)) + fi + done + done + else + for metric in $BPF_METRICS; do + result_skip "$metric (BPF unavailable)" + TOTAL_SKIPPED=$((TOTAL_SKIPPED + 1)) + done + fi + + elapsed=$(elapsed_ms "$start_time") + record_result "bpf_metrics" "true" "$elapsed" + + # ─── Phase 9: Cleanup ────────────────────────────────────────────────── + phase_header "9" "Cleanup" "${toString constants.timeouts.cleanup}" + start_time=$(time_ms) + + info " Deleting namespace..." + if cleanup_namespace "$NAMESPACE" ${toString constants.timeouts.cleanup}; then + elapsed=$(elapsed_ms "$start_time") + result_pass "Namespace deleted" "$elapsed" + record_result "cleanup" "true" "$elapsed" + else + elapsed=$(elapsed_ms "$start_time") + result_warn "Cleanup timed out" "$elapsed" + record_result "cleanup" "true" "$elapsed" + fi + + rm -f "$RESULT_LINK" + + # ─── Summary ─────────────────────────────────────────────────────────── + TOTAL_ELAPSED=$(elapsed_ms "$TOTAL_START") + + echo "" + bold " Timing Summary" + echo " $(printf '─%.0s' {1..37})" + printf " %-20s %10s\n" "Phase" "Time (ms)" + echo " $(printf '─%.0s' {1..37})" + for phase in prerequisites docker_env build load deploy pods_ready process ports kernel_metrics bpf_metrics cleanup; do + if [[ -n "''${PHASE_TIMES[$phase]:-}" ]]; then + printf " %-20s %10s\n" "$phase" "''${PHASE_TIMES[$phase]}" + fi + done + echo " $(printf '─%.0s' {1..37})" + printf " %-20s %10s\n" "TOTAL" "$TOTAL_ELAPSED" + echo " $(printf '─%.0s' {1..37})" + + echo "" + bold "========================================" + if [[ $TOTAL_FAILED -eq 0 ]]; then + if [[ $TOTAL_SKIPPED -gt 0 ]]; then + success " Result: PASSED ($TOTAL_SKIPPED skipped)" + else + success " Result: ALL PHASES PASSED" + fi + success " Total time: $(format_ms "$TOTAL_ELAPSED")" + else + error " Result: $TOTAL_FAILED CHECKS FAILED" + fi + bold "========================================" + + [[ $TOTAL_FAILED -eq 0 ]] + ''; + }; + + # ─── Quick Test Script ───────────────────────────────────────────────── + # A faster test that skips the build phase (assumes image already loaded) + # + mkQuickTest = pkgs.writeShellApplication { + name = "pcp-k8s-test-quick"; + runtimeInputs = commonInputs ++ k8sInputs ++ pcpInputs; + text = '' + set +e + + # Set PCP_CONF so pminfo can find its configuration + export PCP_CONF="${pcpConfPath}" + + ${colorHelpers} + ${timingHelpers} + ${k8sHelpers} + ${metricHelpers} + + # Configuration + NAMESPACE="${constants.k8s.namespace}" + DAEMONSET_NAME="${constants.k8s.daemonSetName}" + MANIFEST_FILE="${manifests.manifestFile}" + + export KERNEL_METRICS="${lib.concatStringsSep " " constants.checks.kernelMetrics}" + + TOTAL_PASSED=0 + TOTAL_FAILED=0 + TOTAL_START=$(time_ms) + + bold "========================================" + bold " PCP Kubernetes Quick Test" + bold "========================================" + echo "" + info "Assumes image is already loaded. Use 'pcp-k8s-test' for full test." + echo "" + + # Check minikube + if ! minikube_running; then + error "minikube not running. Please start it first:" + error "" + error " # If minikube is installed on your system:" + error " minikube start" + error "" + error " # Or use nix to run minikube:" + error " nix shell nixpkgs#minikube -c minikube start" + exit 1 + fi + + NODE_COUNT=$(get_node_count) + info "Using minikube cluster with $NODE_COUNT node(s)" + echo "" + + # Clean up existing deployment + if kubectl get namespace "$NAMESPACE" &>/dev/null; then + info "Removing existing deployment..." + cleanup_namespace "$NAMESPACE" ${toString constants.timeouts.cleanup} + sleep 2 + fi + + # Deploy + info "Deploying DaemonSet..." + kubectl apply -f "$MANIFEST_FILE" + + # Wait for pods + info "Waiting for pods to be ready..." + if ! READY_COUNT=$(wait_daemonset_ready "$NAMESPACE" "$DAEMONSET_NAME" ${toString constants.timeouts.podsReady}); then + error "Pods not ready in time" + kubectl get pods -n "$NAMESPACE" -o wide + cleanup_namespace "$NAMESPACE" ${toString constants.timeouts.cleanup} + exit 1 + fi + success "$READY_COUNT pods ready" + echo "" + + PODS=$(get_daemonset_pods "$NAMESPACE" "$DAEMONSET_NAME") + + # Verify process + info "Verifying processes..." + for pod in $PODS; do + node=$(get_pod_node "$NAMESPACE" "$pod") + if check_process_in_pod "$NAMESPACE" "$pod" "pmcd"; then + result_pass "pmcd on $node" + TOTAL_PASSED=$((TOTAL_PASSED + 1)) + else + result_fail "pmcd on $node" + TOTAL_FAILED=$((TOTAL_FAILED + 1)) + fi + done + + # Verify kernel metrics + echo "" + info "Verifying kernel metrics..." + for pod in $PODS; do + node=$(get_pod_node "$NAMESPACE" "$pod") + for metric in $KERNEL_METRICS; do + if check_metric_in_pod "$NAMESPACE" "$pod" "$metric"; then + result_pass "$metric on $node" + TOTAL_PASSED=$((TOTAL_PASSED + 1)) + else + result_fail "$metric on $node" + TOTAL_FAILED=$((TOTAL_FAILED + 1)) + fi + done + done + + # Cleanup + echo "" + info "Cleaning up..." + cleanup_namespace "$NAMESPACE" ${toString constants.timeouts.cleanup} + + TOTAL_ELAPSED=$(elapsed_ms "$TOTAL_START") + + echo "" + if [[ $TOTAL_FAILED -eq 0 ]]; then + success "All $TOTAL_PASSED checks passed ($(format_ms "$TOTAL_ELAPSED"))" + else + error "$TOTAL_FAILED checks failed" + fi + + [[ $TOTAL_FAILED -eq 0 ]] + ''; + }; + + # ─── Minikube Start Helper ─────────────────────────────────────────────── + # Starts minikube with optimal settings for PCP testing + # + mkMinikubeStart = pkgs.writeShellApplication { + name = "pcp-minikube-start"; + runtimeInputs = k8sInputs; + text = '' + ${colorHelpers} + + bold "========================================" + bold " PCP Minikube Setup" + bold "========================================" + echo "" + + DRIVER="${constants.minikube.driver}" + CPUS="${toString constants.minikube.cpus}" + MEMORY="${toString constants.minikube.memory}" + DISK="${constants.minikube.diskSize}" + + # Check if minikube is already running + if minikube status --format='{{.Host}}' 2>/dev/null | grep -q "Running"; then + warn "Minikube is already running." + echo "" + info "Current configuration:" + minikube config view 2>/dev/null || true + echo "" + info "To recreate with optimal settings, run:" + echo " minikube delete" + echo " nix run .#pcp-minikube-start" + exit 0 + fi + + info "Starting minikube with settings for PCP testing:" + echo " Driver: $DRIVER" + echo " CPUs: $CPUS" + echo " Memory: $MEMORY MB" + echo " Disk: $DISK" + echo "" + info "Tip: For better I/O performance, use KVM2 driver:" + echo " minikube start --driver=kvm2 --cpus=$CPUS --memory=$MEMORY" + echo " (requires libvirtd running with default network configured)" + echo "" + + info "Starting minikube..." + if minikube start \ + --driver="$DRIVER" \ + --cpus="$CPUS" \ + --memory="$MEMORY" \ + --disk-size="$DISK"; then + echo "" + success "Minikube started successfully!" + echo "" + info "Run the PCP Kubernetes test:" + echo " nix run .#pcp-k8s-test" + else + error "Failed to start minikube" + exit 1 + fi + ''; + }; + +in +{ + # Packages output for flake.nix + packages = { + pcp-k8s-test = mkFullTest; + pcp-k8s-test-quick = mkQuickTest; + pcp-minikube-start = mkMinikubeStart; + }; + + # Apps output for flake.nix + apps = { + pcp-k8s-test = { + type = "app"; + program = "${mkFullTest}/bin/pcp-k8s-test"; + }; + pcp-k8s-test-quick = { + type = "app"; + program = "${mkQuickTest}/bin/pcp-k8s-test-quick"; + }; + pcp-minikube-start = { + type = "app"; + program = "${mkMinikubeStart}/bin/pcp-minikube-start"; + }; + }; +} diff --git a/nix/k8s-test/lib.nix b/nix/k8s-test/lib.nix new file mode 100644 index 0000000000..6f6fe5d6bc --- /dev/null +++ b/nix/k8s-test/lib.nix @@ -0,0 +1,281 @@ +# nix/k8s-test/lib.nix +# +# Shell helper functions for PCP Kubernetes DaemonSet testing. +# Provides k8s-specific operations on top of shared helpers. +# +{ pkgs, lib }: +let + # Import shared helpers + sharedHelpers = import ../test-common/shell-helpers.nix { }; + sharedInputs = import ../test-common/inputs.nix { inherit pkgs; }; +in +rec { + # Runtime inputs - use shared common + k8s-specific + commonInputs = sharedInputs.common; + k8sInputs = sharedInputs.k8s; + + # Re-export shared shell helpers + inherit (sharedHelpers) colorHelpers timingHelpers; + + # ─── Kubernetes Helpers ──────────────────────────────────────────────── + k8sHelpers = '' + # Check if minikube is running + # Uses timeout to avoid hanging, with fallback to kubectl check + minikube_running() { + # First try minikube status with a short timeout + if timeout 5 minikube status --format='{{.Host}}' 2>/dev/null | grep -q "Running"; then + return 0 + fi + + # Fallback: check if kubectl can reach the minikube context + if kubectl config current-context 2>/dev/null | grep -q "minikube"; then + if timeout 5 kubectl cluster-info &>/dev/null; then + return 0 + fi + fi + + return 1 + } + + # Get number of nodes in the cluster + get_node_count() { + kubectl get nodes --no-headers 2>/dev/null | wc -l + } + + # Load image into minikube + load_image_to_minikube() { + local image_path="$1" + minikube image load "$image_path" + } + + # Wait for DaemonSet to be ready + # Returns 0 when desiredNumberScheduled == numberReady + wait_daemonset_ready() { + local namespace="$1" + local name="$2" + local timeout="$3" + local elapsed=0 + + while [[ $elapsed -lt $timeout ]]; do + local status + status=$(kubectl get daemonset "$name" -n "$namespace" -o json 2>/dev/null) + if [[ -n "$status" ]]; then + local desired ready + desired=$(echo "$status" | jq -r '.status.desiredNumberScheduled // 0') + ready=$(echo "$status" | jq -r '.status.numberReady // 0') + + if [[ "$desired" -gt 0 && "$desired" == "$ready" ]]; then + echo "$ready" + return 0 + fi + fi + + sleep 2 + elapsed=$((elapsed + 2)) + done + + return 1 + } + + # Get all pods for a DaemonSet + get_daemonset_pods() { + local namespace="$1" + local name="$2" + kubectl get pods -n "$namespace" -l "app=$name" -o jsonpath='{.items[*].metadata.name}' 2>/dev/null + } + + # Get pod's node name + get_pod_node() { + local namespace="$1" + local pod="$2" + kubectl get pod "$pod" -n "$namespace" -o jsonpath='{.spec.nodeName}' 2>/dev/null + } + + # Execute command in a pod + kubectl_exec() { + local namespace="$1" + local pod="$2" + shift 2 + kubectl exec -n "$namespace" "$pod" -- "$@" 2>/dev/null + } + + # Check if pmcd process is running in pod + check_process_in_pod() { + local namespace="$1" + local pod="$2" + local proc="$3" + + if [[ "$proc" == "pmcd" ]]; then + # For pmcd, verify it's responding to requests + if kubectl_exec "$namespace" "$pod" pminfo -f pmcd.version &>/dev/null; then + return 0 + fi + fi + + # Try pgrep if available + if kubectl_exec "$namespace" "$pod" pgrep -x "$proc" &>/dev/null; then + return 0 + fi + + # Fallback: check /proc/1/comm + local comm + comm=$(kubectl_exec "$namespace" "$pod" cat /proc/1/comm 2>/dev/null || true) + [[ "$comm" == "$proc" ]] + } + + # Check if port is listening in pod + check_port_in_pod() { + local namespace="$1" + local pod="$2" + local port="$3" + + if [[ "$port" == "44321" ]]; then + # For pmcd port, verify by querying pmcd + if kubectl_exec "$namespace" "$pod" pminfo -f pmcd.version &>/dev/null; then + return 0 + fi + fi + + # Try ss if available + if kubectl_exec "$namespace" "$pod" ss -tln 2>/dev/null | grep -q ":$port "; then + return 0 + fi + + # Try netstat if available + if kubectl_exec "$namespace" "$pod" netstat -tln 2>/dev/null | grep -q ":$port "; then + return 0 + fi + + return 1 + } + + # Create namespace if it doesn't exist + ensure_namespace() { + local namespace="$1" + if ! kubectl get namespace "$namespace" &>/dev/null; then + kubectl create namespace "$namespace" + fi + } + + # Get image ID from a docker tarball (nix-built container) + get_tarball_image_id() { + local tarball="$1" + tar -xOf "$tarball" manifest.json 2>/dev/null | jq -r '.[0].Config' | sed 's/\.json$//' || echo "" + } + + # Get current image ID from docker + get_docker_image_id() { + local image="$1" + docker images --no-trunc -q "$image" 2>/dev/null | head -1 | sed 's/sha256://' || echo "" + } + + # Check if image needs to be loaded (compares hashes) + image_needs_loading() { + local tarball="$1" + local image="$2" + + local tarball_id docker_id + tarball_id=$(get_tarball_image_id "$tarball") + docker_id=$(get_docker_image_id "$image") + + # If we can't get the tarball ID, always load + [[ -z "$tarball_id" ]] && return 0 + + # If docker doesn't have the image, need to load + [[ -z "$docker_id" ]] && return 0 + + # Compare IDs + [[ "$tarball_id" != "$docker_id" ]] + } + + # Delete namespace and wait for cleanup + cleanup_namespace() { + local namespace="$1" + local timeout="$2" + local elapsed=0 + + if kubectl get namespace "$namespace" &>/dev/null; then + kubectl delete namespace "$namespace" --wait=false &>/dev/null || true + + # Wait for namespace to be deleted + while kubectl get namespace "$namespace" &>/dev/null; do + sleep 2 + elapsed=$((elapsed + 2)) + if [[ $elapsed -ge $timeout ]]; then + warn "Namespace deletion timed out" + return 1 + fi + done + fi + return 0 + } + ''; + + # ─── Metric Verification Helpers ─────────────────────────────────────── + metricHelpers = '' + # Check if a metric is available and has actual values via pminfo inside the pod + check_metric_in_pod() { + local namespace="$1" + local pod="$2" + local metric="$3" + local output + output=$(kubectl_exec "$namespace" "$pod" pminfo -f "$metric" 2>/dev/null) + # Check that we got output AND it contains a value (not just the metric name) + [[ -n "$output" ]] && echo "$output" | grep -qE '(value|inst)' + } + + # Check BPF metric with retry (histograms need time to populate) + check_bpf_metric() { + local namespace="$1" + local pod="$2" + local metric="$3" + local max_retries="$4" + local retry=0 + + while [[ $retry -lt $max_retries ]]; do + local output + output=$(kubectl_exec "$namespace" "$pod" pminfo -f "$metric" 2>/dev/null || true) + + # Check if we got histogram data (contains "inst" or actual values) + if [[ -n "$output" ]] && echo "$output" | grep -qE '(inst|value)'; then + # Count buckets/instances + local count + count=$(echo "$output" | grep -c 'inst' || echo "0") + if [[ "$count" -gt 0 ]]; then + echo "$count" + return 0 + fi + fi + + sleep 5 + retry=$((retry + 1)) + done + + return 1 + } + + # Check if BTF is available (required for BPF PMDA) + check_btf_available() { + local namespace="$1" + local pod="$2" + kubectl_exec "$namespace" "$pod" test -f /sys/kernel/btf/vmlinux 2>/dev/null + } + + # Get metric value (for display) + get_metric_value() { + local namespace="$1" + local pod="$2" + local metric="$3" + kubectl_exec "$namespace" "$pod" pminfo -f "$metric" 2>/dev/null | grep -E '(value|inst)' | head -3 + } + ''; + + # ─── Combined Helpers ────────────────────────────────────────────────── + # All helpers combined for use in test scripts + allHelpers = lib.concatStringsSep "\n" [ + colorHelpers + timingHelpers + k8sHelpers + metricHelpers + ]; +} diff --git a/nix/k8s-test/manifests.nix b/nix/k8s-test/manifests.nix new file mode 100644 index 0000000000..56b3c8e001 --- /dev/null +++ b/nix/k8s-test/manifests.nix @@ -0,0 +1,122 @@ +# nix/k8s-test/manifests.nix +# +# Generates Kubernetes manifests for PCP DaemonSet deployment. +# Creates privileged DaemonSet with full node monitoring capabilities. +# +{ pkgs, lib }: +let + constants = import ./constants.nix { }; + mainConstants = import ../constants.nix; + + # ─── DaemonSet Manifest ──────────────────────────────────────────────── + # Privileged DaemonSet for full node monitoring including BPF metrics + daemonSetManifest = '' + apiVersion: v1 + kind: Namespace + metadata: + name: ${constants.k8s.namespace} + --- + apiVersion: apps/v1 + kind: DaemonSet + metadata: + name: ${constants.k8s.daemonSetName} + namespace: ${constants.k8s.namespace} + labels: + app: ${constants.k8s.daemonSetName} + spec: + selector: + matchLabels: + app: ${constants.k8s.daemonSetName} + template: + metadata: + labels: + app: ${constants.k8s.daemonSetName} + spec: + # Required for seeing all node processes + hostPID: true + + containers: + - name: pcp + image: ${constants.k8s.imageName}:${constants.k8s.imageTag} + imagePullPolicy: Never + + # Privileged for BPF and full /proc access + # Run as root to override container's default pcp user + securityContext: + privileged: true + runAsUser: 0 + + ports: + - containerPort: ${toString mainConstants.ports.pmcd} + name: pmcd + - containerPort: ${toString mainConstants.ports.pmproxy} + name: pmproxy + + env: + # Tell PCP where host filesystem is mounted + - name: HOST_MOUNT + value: "/host" + - name: PCP_SYSFS_DIR + value: "/host/sys" + + volumeMounts: + # Host root filesystem (read-only) + - name: host-root + mountPath: /host + readOnly: true + # Required for BPF + - name: sys-kernel-debug + mountPath: /sys/kernel/debug + # Host /proc for process metrics + - name: host-proc + mountPath: /host/proc + readOnly: true + # Host /sys for system metrics + - name: host-sys + mountPath: /host/sys + readOnly: true + + readinessProbe: + tcpSocket: + port: pmcd + initialDelaySeconds: 10 + periodSeconds: 5 + + resources: + limits: + memory: "512Mi" + cpu: "500m" + requests: + memory: "256Mi" + cpu: "100m" + + volumes: + - name: host-root + hostPath: + path: / + - name: sys-kernel-debug + hostPath: + path: /sys/kernel/debug + - name: host-proc + hostPath: + path: /proc + - name: host-sys + hostPath: + path: /sys + + # Run on all nodes including control plane + tolerations: + - operator: Exists + ''; + +in +{ + # The full DaemonSet manifest as a string + manifest = daemonSetManifest; + + # Write manifest to a file for kubectl apply + manifestFile = pkgs.writeText "pcp-daemonset.yaml" daemonSetManifest; + + # Helper to get the manifest path + getManifestPath = "${pkgs.writeText "pcp-daemonset.yaml" daemonSetManifest}"; +} diff --git a/nix/lifecycle/constants.nix b/nix/lifecycle/constants.nix new file mode 100644 index 0000000000..d284906ef8 --- /dev/null +++ b/nix/lifecycle/constants.nix @@ -0,0 +1,158 @@ +# nix/lifecycle/constants.nix +# +# Lifecycle testing configuration for PCP MicroVMs. +# Extends the main constants.nix with lifecycle-specific values. +# +# This file provides: +# - Per-variant service and metric check configurations +# - Phase descriptions and expected outputs +# - Color codes and formatting for terminal output +# +{ }: +let + # Import main constants for shared values + mainConstants = import ../constants.nix; +in +rec { + # Re-export relevant main constants + inherit (mainConstants) + network ports console variantPortOffsets + getConsolePorts getHostname getProcessName getTimeout + lifecycle; + + # ─── Variant Definitions ────────────────────────────────────────────── + # Each variant specifies which services and metrics to verify. + # + variants = { + base = { + description = "Base PCP (pmcd, pmlogger, pmproxy)"; + services = [ "pmcd" "pmproxy" "pmlogger" ]; + metrics = [ "kernel.all.load" "kernel.all.cpu.user" "mem.physmem" ]; + httpChecks = [ + { name = "pmproxy"; port = ports.pmproxy; path = "/pmapi/1/metrics?target=kernel.all.load"; } + ]; + }; + + eval = { + description = "Evaluation (+ node_exporter, below, pmie-test)"; + services = [ "pmcd" "pmproxy" "prometheus-node-exporter" "pmie-test" "stress-ng-test" ]; + metrics = [ "kernel.all.load" "kernel.all.cpu.user" "mem.physmem" ]; + httpChecks = [ + { name = "pmproxy"; port = ports.pmproxy; path = "/pmapi/1/metrics?target=kernel.all.load"; } + { name = "node_exporter"; port = ports.nodeExporter; path = "/metrics"; } + ]; + }; + + grafana = { + description = "Grafana (+ Prometheus + BPF dashboards)"; + services = [ "pmcd" "pmproxy" "prometheus-node-exporter" "grafana" "prometheus" ]; + metrics = [ + "kernel.all.load" "kernel.all.cpu.user" "mem.physmem" + # BPF PMDA metrics (grafana variant has enableBpf=true for BPF dashboards) + "bpf.runq.latency" "bpf.disk.all.latency" + ]; + httpChecks = [ + { name = "pmproxy"; port = ports.pmproxy; path = "/pmapi/1/metrics?target=kernel.all.load"; } + { name = "node_exporter"; port = ports.nodeExporter; path = "/metrics"; } + { name = "grafana"; port = ports.grafana; path = "/api/health"; } + { name = "prometheus"; port = ports.prometheus; path = "/-/ready"; } + ]; + }; + + bpf = { + description = "BPF PMDA (pre-compiled eBPF)"; + services = [ "pmcd" "pmproxy" "prometheus-node-exporter" ]; + metrics = [ + "kernel.all.load" "kernel.all.cpu.user" "mem.physmem" + # BPF PMDA metrics (runqlat -> bpf.runq.latency, biolatency -> bpf.disk.all.latency) + "bpf.runq.latency" "bpf.disk.all.latency" + ]; + httpChecks = [ + { name = "pmproxy"; port = ports.pmproxy; path = "/pmapi/1/metrics?target=kernel.all.load"; } + { name = "node_exporter"; port = ports.nodeExporter; path = "/metrics"; } + ]; + }; + + # NOTE: BCC is deprecated - use BPF PMDA instead. + # BCC used runtime eBPF compilation which is slower and less reliable + # than the pre-compiled BPF PMDA CO-RE approach. + # bcc = { + # description = "BCC PMDA (runtime eBPF compilation)"; + # services = [ "pmcd" "pmproxy" "prometheus-node-exporter" ]; + # metrics = [ + # "kernel.all.load" "kernel.all.cpu.user" "mem.physmem" + # ]; + # bccMetrics = [ "bcc.runqlat" "bcc.biolatency" ]; + # httpChecks = [ + # { name = "pmproxy"; port = ports.pmproxy; path = "/pmapi/1/metrics?target=kernel.all.load"; } + # { name = "node_exporter"; port = ports.nodeExporter; path = "/metrics"; } + # ]; + # }; + }; + + # ─── Phase Definitions ──────────────────────────────────────────────── + # Human-readable descriptions for each lifecycle phase. + # + phases = { + "0" = { name = "Build VM"; description = "Build the MicroVM derivation"; }; + "1" = { name = "Start VM"; description = "Start QEMU process and verify it's running"; }; + "2" = { name = "Serial Console"; description = "Verify serial console (ttyS0) is responsive"; }; + "2b" = { name = "Virtio Console"; description = "Verify virtio console (hvc0) is responsive"; }; + "3" = { name = "Verify Services"; description = "Check PCP and related services are active"; }; + "4" = { name = "Verify Metrics"; description = "Check PCP metrics are available"; }; + "5" = { name = "Shutdown"; description = "Send shutdown command via console"; }; + "6" = { name = "Wait Exit"; description = "Wait for VM process to exit cleanly"; }; + }; + + # ─── Terminal Formatting ────────────────────────────────────────────── + # ANSI color codes for terminal output. + # + colors = { + reset = "\\033[0m"; + bold = "\\033[1m"; + red = "\\033[31m"; + green = "\\033[32m"; + yellow = "\\033[33m"; + blue = "\\033[34m"; + cyan = "\\033[36m"; + }; + + # ─── Expect Script Configuration ────────────────────────────────────── + # Configuration for expect scripts that interact with serial/virtio consoles. + # + expect = { + # Login prompt patterns + loginPrompt = "login:"; + + # Shell prompt patterns (after login) + # Matches: root@pcp-vm:~# or root@pcp-eval-vm:~# etc. + shellPromptPattern = "root@pcp-.*-?vm:.*#"; + + # Boot completion marker (systemd target reached) + bootCompletePattern = "Reached target.*Multi-User System|Welcome to NixOS"; + + # Default username and password for debug mode VMs + username = "root"; + password = "pcp"; + + # Timeout for expect operations (seconds) + defaultTimeout = 30; + + # Time to wait between sending characters (milliseconds) + sendDelay = 50; + }; + + # ─── Shutdown Commands ──────────────────────────────────────────────── + # Commands used to gracefully shut down the VM. + # + shutdown = { + # Primary shutdown command + command = "poweroff"; + + # Alternative if poweroff hangs + alternative = "systemctl poweroff --force"; + + # Pattern indicating shutdown has begun + shutdownPattern = "System is going down|Powering off|Reached target.*Shutdown"; + }; +} diff --git a/nix/lifecycle/default.nix b/nix/lifecycle/default.nix new file mode 100644 index 0000000000..576e5a8d5c --- /dev/null +++ b/nix/lifecycle/default.nix @@ -0,0 +1,680 @@ +# nix/lifecycle/default.nix +# +# Entry point for PCP MicroVM lifecycle testing. +# Generates lifecycle test scripts for all MicroVM variants. +# +# Usage in flake.nix: +# lifecycle = import ./nix/lifecycle { inherit pkgs lib; }; +# +# Generated outputs: +# lifecycle.scripts.. - Individual phase scripts +# lifecycle.tests. - Full lifecycle test for variant +# lifecycle.tests.all - Test all variants sequentially +# +{ pkgs, lib }: +let + constants = import ./constants.nix { }; + mainConstants = import ../constants.nix; + lifecycleLib = import ./lib.nix { inherit pkgs lib; }; + pcpChecks = import ./pcp-checks.nix { inherit pkgs lib; }; + + inherit (lifecycleLib) colorHelpers timingHelpers processHelpers consoleHelpers; + inherit (lifecycleLib) commonInputs sshInputs; + + # All variant names (user-mode networking) + # NOTE: BCC is deprecated and no longer supported. Use BPF PMDA instead. + # BCC used runtime eBPF compilation which is slower and less reliable than + # the pre-compiled BPF PMDA CO-RE approach. + variantNames = [ "base" "eval" "grafana" "bpf" ]; + + # TAP networking variants (require host network setup first) + # These use direct IP access instead of port forwarding. + # Only one TAP VM can run at a time (they share the same IP). + tapVariantNames = [ "grafana-tap" ]; + + # All variant names including TAP variants + allVariantNames = variantNames ++ tapVariantNames; + + # Parse a variant name to get base variant and networking type + # "grafana-tap" -> { base = "grafana"; networking = "tap"; } + # "grafana" -> { base = "grafana"; networking = "user"; } + parseVariant = name: + if lib.hasSuffix "-tap" name then { + base = lib.removeSuffix "-tap" name; + networking = "tap"; + } else { + base = name; + networking = "user"; + }; + + # Generate all phase scripts for a variant + mkVariantScripts = variant: { + check-process = lifecycleLib.mkCheckProcessScript { inherit variant; }; + check-serial = lifecycleLib.mkCheckSerialScript { inherit variant; }; + check-virtio = lifecycleLib.mkCheckVirtioScript { inherit variant; }; + verify-services = pcpChecks.mkVerifyServicesScript { inherit variant; }; + verify-metrics = pcpChecks.mkVerifyMetricsScript { inherit variant; }; + verify-http = pcpChecks.mkVerifyHttpScript { inherit variant; }; + verify-full = pcpChecks.mkFullVerificationScript { inherit variant; }; + shutdown = lifecycleLib.mkShutdownScript { inherit variant; }; + wait-exit = lifecycleLib.mkWaitExitScript { inherit variant; }; + force-kill = lifecycleLib.mkForceKillScript { inherit variant; }; + status = lifecycleLib.mkStatusScript { inherit variant; }; + }; + + # Generate a full lifecycle test for a variant + mkFullTest = variant: + let + # Parse variant name to get base variant and networking type + parsed = parseVariant variant; + baseVariant = parsed.base; + isTap = parsed.networking == "tap"; + + # Use base variant for configuration lookups + variantConfig = constants.variants.${baseVariant}; + hostname = mainConstants.getHostname baseVariant; + consolePorts = mainConstants.getConsolePorts baseVariant; + portOffset = mainConstants.variantPortOffsets.${baseVariant}; + + # TAP uses direct IP and standard SSH port; user-mode uses localhost with forwarding + sshHost = if isTap then mainConstants.network.vmIp else "localhost"; + sshPort = if isTap then 22 else mainConstants.ports.sshForward + portOffset; + + # Get timeouts (use base variant) + buildTimeout = mainConstants.getTimeout baseVariant "build"; + processTimeout = mainConstants.getTimeout baseVariant "processStart"; + serialTimeout = mainConstants.getTimeout baseVariant "serialReady"; + virtioTimeout = mainConstants.getTimeout baseVariant "virtioReady"; + serviceTimeout = mainConstants.getTimeout baseVariant "serviceReady"; + metricTimeout = mainConstants.getTimeout baseVariant "metricsReady"; + shutdownTimeout = mainConstants.getTimeout baseVariant "shutdown"; + exitTimeout = mainConstants.getTimeout baseVariant "waitExit"; + + # Package name in flake (TAP variants have -tap suffix) + packageName = + if baseVariant == "base" then + if isTap then "pcp-microvm-tap" else "pcp-microvm" + else + if isTap then "pcp-microvm-${baseVariant}-tap" else "pcp-microvm-${baseVariant}"; + + # SSH options + sshOpts = lib.concatStringsSep " " [ + "-o" "StrictHostKeyChecking=no" + "-o" "UserKnownHostsFile=/dev/null" + "-o" "ConnectTimeout=5" + "-o" "LogLevel=ERROR" + "-o" "PubkeyAuthentication=no" + ]; + in + pkgs.writeShellApplication { + name = "pcp-lifecycle-full-test-${variant}"; + runtimeInputs = commonInputs ++ sshInputs ++ [ pkgs.curl pkgs.nix ]; + text = '' + set +e # Don't exit on first failure + + ${colorHelpers} + ${timingHelpers} + ${processHelpers} + ${consoleHelpers} + + # SSH helpers + ssh_cmd() { + local host="$1" + local port="$2" + shift 2 + sshpass -p pcp ssh ${sshOpts} -p "$port" "root@$host" "$@" 2>/dev/null + } + + check_service() { + local host="$1" + local port="$2" + local service="$3" + ssh_cmd "$host" "$port" "systemctl is-active $service" 2>/dev/null | grep -q "^active$" + } + + # Wait for a service to become active (handles Type=oneshot services) + # Some services like pmlogger use Type=oneshot and take time to start + wait_for_service() { + local host="$1" + local port="$2" + local service="$3" + local timeout="$4" + local elapsed=0 + while [[ $elapsed -lt $timeout ]]; do + local status + status=$(ssh_cmd "$host" "$port" "systemctl is-active $service" 2>/dev/null || echo "unknown") + case "$status" in + active) + return 0 + ;; + activating|inactive|unknown) + # Service is starting, waiting to start, or we couldn't get status + # Keep waiting + sleep 1 + elapsed=$((elapsed + 1)) + ;; + failed) + # Service explicitly failed + return 1 + ;; + *) + # Other states (deactivating, etc.) - keep waiting + sleep 1 + elapsed=$((elapsed + 1)) + ;; + esac + done + return 1 + } + + check_metric() { + local host="$1" + local port="$2" + local metric="$3" + ssh_cmd "$host" "$port" "pminfo -f $metric 2>/dev/null | grep -q value" + } + + wait_for_ssh() { + local host="$1" + local port="$2" + local timeout="$3" + local elapsed=0 + while [[ $elapsed -lt $timeout ]]; do + if sshpass -p pcp ssh ${sshOpts} -p "$port" "root@$host" true 2>/dev/null; then + return 0 + fi + sleep 1 + elapsed=$((elapsed + 1)) + done + return 1 + } + + # Configuration + VARIANT="${variant}" + HOSTNAME="${hostname}" + PACKAGE_NAME="${packageName}" + SERIAL_PORT=${toString consolePorts.serial} + VIRTIO_PORT=${toString consolePorts.virtio} + SSH_HOST="${sshHost}" + SSH_PORT=${toString sshPort} + IS_TAP="${if isTap then "true" else "false"}" + RESULT_LINK="result-lifecycle-$VARIANT" + + # Timing tracking + declare -A PHASE_TIMES + TOTAL_START=$(time_ms) + + # Results tracking + TOTAL_PASSED=0 + TOTAL_FAILED=0 + + record_result() { + local phase="$1" + local passed="$2" + local time_ms="$3" + PHASE_TIMES["$phase"]=$time_ms + if [[ "$passed" == "true" ]]; then + TOTAL_PASSED=$((TOTAL_PASSED + 1)) + else + TOTAL_FAILED=$((TOTAL_FAILED + 1)) + fi + } + + # Header + bold "========================================" + bold " PCP MicroVM Full Lifecycle Test ($VARIANT)" + bold "========================================" + echo "" + info "Description: ${variantConfig.description}" + info "Hostname: $HOSTNAME" + info "SSH: $SSH_HOST:$SSH_PORT" + if [[ "$IS_TAP" == "true" ]]; then + warn "TAP networking - requires host network setup (nix run .#pcp-network-setup)" + fi + echo "" + + # ─── Phase 0: Build VM ───────────────────────────────────────────── + phase_header "0" "Build VM" "${toString buildTimeout}" + start_time=$(time_ms) + + # Clean up any existing result link + rm -f "$RESULT_LINK" + + info " Building $PACKAGE_NAME..." + if nix build ".#$PACKAGE_NAME" -o "$RESULT_LINK" 2>&1 | while read -r line; do + echo " $line" + done; then + elapsed=$(elapsed_ms "$start_time") + result_pass "VM built" "$elapsed" + record_result "build" "true" "$elapsed" + else + elapsed=$(elapsed_ms "$start_time") + result_fail "Build failed" "$elapsed" + record_result "build" "false" "$elapsed" + exit 1 + fi + + # ─── Phase 1: Start VM ───────────────────────────────────────────── + phase_header "1" "Start VM" "${toString processTimeout}" + start_time=$(time_ms) + + # Kill any existing VM with this hostname + if vm_is_running "$HOSTNAME"; then + warn " Killing existing VM..." + kill_vm "$HOSTNAME" + sleep 2 + fi + + info " Starting $RESULT_LINK/bin/microvm-run..." + "$RESULT_LINK/bin/microvm-run" & + _bg_pid=$! # Background PID (VM spawns its own QEMU process) + + if wait_for_process "$HOSTNAME" "${toString processTimeout}"; then + elapsed=$(elapsed_ms "$start_time") + qemu_pid=$(vm_pid "$HOSTNAME") + result_pass "VM process running (PID: $qemu_pid, launcher: $_bg_pid)" "$elapsed" + record_result "start" "true" "$elapsed" + else + elapsed=$(elapsed_ms "$start_time") + result_fail "VM process not found" "$elapsed" + record_result "start" "false" "$elapsed" + rm -f "$RESULT_LINK" + exit 1 + fi + + # ─── Phase 2: Check Serial Console ───────────────────────────────── + phase_header "2" "Check Serial Console" "${toString serialTimeout}" + start_time=$(time_ms) + + if wait_for_console "$SERIAL_PORT" "${toString serialTimeout}"; then + elapsed=$(elapsed_ms "$start_time") + result_pass "Serial console available (port $SERIAL_PORT)" "$elapsed" + record_result "serial" "true" "$elapsed" + else + elapsed=$(elapsed_ms "$start_time") + result_fail "Serial console not available" "$elapsed" + record_result "serial" "false" "$elapsed" + fi + + # ─── Phase 2b: Check Virtio Console ──────────────────────────────── + phase_header "2b" "Check Virtio Console" "${toString virtioTimeout}" + start_time=$(time_ms) + + if wait_for_console "$VIRTIO_PORT" "${toString virtioTimeout}"; then + elapsed=$(elapsed_ms "$start_time") + result_pass "Virtio console available (port $VIRTIO_PORT)" "$elapsed" + record_result "virtio" "true" "$elapsed" + else + elapsed=$(elapsed_ms "$start_time") + result_fail "Virtio console not available" "$elapsed" + record_result "virtio" "false" "$elapsed" + fi + + # ─── Phase 3: Verify PCP Services ────────────────────────────────── + phase_header "3" "Verify PCP Services" "${toString serviceTimeout}" + start_time=$(time_ms) + + info " Waiting for SSH..." + if ! wait_for_ssh "$SSH_HOST" "$SSH_PORT" "${toString serviceTimeout}"; then + elapsed=$(elapsed_ms "$start_time") + result_fail "SSH not available" "$elapsed" + record_result "services" "false" "$elapsed" + else + ssh_elapsed=$(elapsed_ms "$start_time") + info " SSH connected (''${ssh_elapsed}ms)" + + service_passed=0 + service_failed=0 + + # Services that use Type=oneshot and need extra wait time + SLOW_SERVICES="pmlogger pmie" + + ${lib.concatMapStringsSep "\n" (service: '' + svc_start=$(time_ms) + # Check if this is a slow-starting service + if [[ " $SLOW_SERVICES " == *" ${service} "* ]]; then + # Wait up to 60s for Type=oneshot services + if wait_for_service "$SSH_HOST" "$SSH_PORT" "${service}" 60; then + result_pass "${service} active" "$(elapsed_ms "$svc_start")" + service_passed=$((service_passed + 1)) + else + result_fail "${service} not active" "$(elapsed_ms "$svc_start")" + service_failed=$((service_failed + 1)) + fi + else + # Quick check for normal services + if check_service "$SSH_HOST" "$SSH_PORT" "${service}"; then + result_pass "${service} active" "$(elapsed_ms "$svc_start")" + service_passed=$((service_passed + 1)) + else + result_fail "${service} not active" "$(elapsed_ms "$svc_start")" + service_failed=$((service_failed + 1)) + fi + fi + '') variantConfig.services} + + elapsed=$(elapsed_ms "$start_time") + if [[ $service_failed -eq 0 ]]; then + record_result "services" "true" "$elapsed" + else + record_result "services" "false" "$elapsed" + fi + fi + + # ─── Phase 4: Verify PCP Metrics ─────────────────────────────────── + phase_header "4" "Verify PCP Metrics" "${toString metricTimeout}" + start_time=$(time_ms) + + metric_passed=0 + metric_failed=0 + + ${lib.concatMapStringsSep "\n" (metric: '' + met_start=$(time_ms) + if check_metric "$SSH_HOST" "$SSH_PORT" "${metric}"; then + result_pass "${metric}" "$(elapsed_ms "$met_start")" + metric_passed=$((metric_passed + 1)) + else + result_fail "${metric} not available" "$(elapsed_ms "$met_start")" + metric_failed=$((metric_failed + 1)) + fi + '') variantConfig.metrics} + + elapsed=$(elapsed_ms "$start_time") + if [[ $metric_failed -eq 0 ]]; then + record_result "metrics" "true" "$elapsed" + else + record_result "metrics" "false" "$elapsed" + fi + + # ─── Phase 5: Shutdown ───────────────────────────────────────────── + phase_header "5" "Shutdown" "${toString shutdownTimeout}" + start_time=$(time_ms) + + info " Sending shutdown command..." + if ssh_cmd "$SSH_HOST" "$SSH_PORT" "poweroff" 2>/dev/null; then + elapsed=$(elapsed_ms "$start_time") + result_pass "Shutdown command sent" "$elapsed" + record_result "shutdown" "true" "$elapsed" + else + elapsed=$(elapsed_ms "$start_time") + warn " SSH shutdown failed, killing process..." + kill_vm "$HOSTNAME" + result_pass "VM process killed" "$elapsed" + record_result "shutdown" "true" "$elapsed" + fi + + # ─── Phase 6: Wait for Exit ──────────────────────────────────────── + phase_header "6" "Wait for Exit" "${toString exitTimeout}" + start_time=$(time_ms) + + if wait_for_exit "$HOSTNAME" "${toString exitTimeout}"; then + elapsed=$(elapsed_ms "$start_time") + result_pass "VM exited cleanly" "$elapsed" + record_result "exit" "true" "$elapsed" + else + elapsed=$(elapsed_ms "$start_time") + result_fail "VM did not exit, forcing kill" "$elapsed" + kill_vm "$HOSTNAME" + record_result "exit" "false" "$elapsed" + fi + + # ─── Cleanup ─────────────────────────────────────────────────────── + rm -f "$RESULT_LINK" + + # ─── Summary ─────────────────────────────────────────────────────── + TOTAL_ELAPSED=$(elapsed_ms "$TOTAL_START") + + echo "" + bold " Timing Summary" + echo " $(printf '─%.0s' {1..37})" + printf " %-25s %10s\n" "Phase" "Time (ms)" + echo " $(printf '─%.0s' {1..37})" + for phase in build start serial virtio services metrics shutdown exit; do + if [[ -n "''${PHASE_TIMES[$phase]:-}" ]]; then + printf " %-25s %10s\n" "$phase" "''${PHASE_TIMES[$phase]}" + fi + done + echo " $(printf '─%.0s' {1..37})" + printf " %-25s %10s\n" "TOTAL" "$TOTAL_ELAPSED" + echo " $(printf '─%.0s' {1..37})" + + echo "" + bold "========================================" + if [[ $TOTAL_FAILED -eq 0 ]]; then + success " Result: ALL PHASES PASSED" + success " Total time: $(format_ms "$TOTAL_ELAPSED")" + else + error " Result: $TOTAL_FAILED PHASES FAILED" + fi + bold "========================================" + + [[ $TOTAL_FAILED -eq 0 ]] + ''; + }; + + # Generate test-all script + mkTestAll = pkgs.writeShellApplication { + name = "pcp-lifecycle-test-all"; + runtimeInputs = commonInputs ++ sshInputs ++ [ pkgs.curl pkgs.nix ]; + text = '' + set +e + + ${colorHelpers} + ${timingHelpers} + + bold "========================================" + bold " PCP MicroVM Lifecycle Test Suite" + bold "========================================" + echo "" + + VARIANTS="${lib.concatStringsSep " " allVariantNames}" + SKIP_VARIANTS="" + ONLY_VARIANT="" + + # Parse arguments + while [[ $# -gt 0 ]]; do + case "$1" in + --skip=*) + SKIP_VARIANTS="''${1#--skip=}" + shift + ;; + --only=*) + ONLY_VARIANT="''${1#--only=}" + shift + ;; + --help|-h) + echo "Usage: pcp-lifecycle-test-all [OPTIONS]" + echo "" + echo "Options:" + echo " --skip=VARIANT Skip specified variant (comma-separated)" + echo " --only=VARIANT Test only specified variant" + echo " --help, -h Show this help" + echo "" + echo "User-mode variants: ${lib.concatStringsSep " " variantNames}" + echo "TAP variants: ${lib.concatStringsSep " " tapVariantNames}" + echo "" + echo "TAP variants require host network setup first:" + echo " nix run .#pcp-network-setup" + exit 0 + ;; + *) + echo "Unknown option: $1" + exit 1 + ;; + esac + done + + # Results tracking + declare -A RESULTS + declare -A DURATIONS + TOTAL_PASSED=0 + TOTAL_FAILED=0 + TOTAL_SKIPPED=0 + + TOTAL_START=$(time_ms) + + # Check if TAP network is available (for TAP variants) + TAP_AVAILABLE="false" + if ip link show ${mainConstants.network.bridge} >/dev/null 2>&1; then + TAP_AVAILABLE="true" + info "TAP network available (bridge: ${mainConstants.network.bridge})" + else + warn "TAP network not available - TAP variants will be skipped" + warn "Run 'nix run .#pcp-network-setup' to enable TAP testing" + fi + echo "" + + for variant in $VARIANTS; do + # Check if should skip + if [[ -n "$ONLY_VARIANT" ]] && [[ "$variant" != "$ONLY_VARIANT" ]]; then + RESULTS[$variant]="SKIPPED" + DURATIONS[$variant]=0 + TOTAL_SKIPPED=$((TOTAL_SKIPPED + 1)) + continue + fi + + if [[ "$SKIP_VARIANTS" == *"$variant"* ]]; then + RESULTS[$variant]="SKIPPED" + DURATIONS[$variant]=0 + TOTAL_SKIPPED=$((TOTAL_SKIPPED + 1)) + continue + fi + + # Skip TAP variants if TAP network is not available + if [[ "$variant" == *"-tap" ]] && [[ "$TAP_AVAILABLE" != "true" ]]; then + RESULTS[$variant]="SKIPPED (no TAP)" + DURATIONS[$variant]=0 + TOTAL_SKIPPED=$((TOTAL_SKIPPED + 1)) + continue + fi + + echo "" + bold "════════════════════════════════════════" + bold " Testing: $variant" + bold "════════════════════════════════════════" + + variant_start=$(time_ms) + + # Run the full test for this variant + test_script="pcp-lifecycle-full-test-$variant" + if command -v "$test_script" >/dev/null 2>&1; then + if "$test_script"; then + RESULTS[$variant]="PASSED" + TOTAL_PASSED=$((TOTAL_PASSED + 1)) + else + RESULTS[$variant]="FAILED" + TOTAL_FAILED=$((TOTAL_FAILED + 1)) + fi + else + # Try running via nix run + if nix run ".#$test_script" 2>/dev/null; then + RESULTS[$variant]="PASSED" + TOTAL_PASSED=$((TOTAL_PASSED + 1)) + else + RESULTS[$variant]="FAILED" + TOTAL_FAILED=$((TOTAL_FAILED + 1)) + fi + fi + + DURATIONS[$variant]=$(elapsed_ms "$variant_start") + done + + TOTAL_ELAPSED=$(elapsed_ms "$TOTAL_START") + + # Summary + echo "" + bold "========================================" + bold " Test Suite Summary" + bold "========================================" + echo "" + + printf "%-12s %-15s %12s\n" "Variant" "Result" "Duration" + printf "%-12s %-15s %12s\n" "───────" "──────" "────────" + + for variant in $VARIANTS; do + result="''${RESULTS[$variant]:-UNKNOWN}" + duration="''${DURATIONS[$variant]:-0}" + + if [[ "$result" == "PASSED" ]]; then + printf "%-12s \033[32m%-15s\033[0m %12s\n" "$variant" "$result" "$(format_ms "$duration")" + elif [[ "$result" == "FAILED" ]]; then + printf "%-12s \033[31m%-15s\033[0m %12s\n" "$variant" "$result" "$(format_ms "$duration")" + else + printf "%-12s \033[33m%-15s\033[0m %12s\n" "$variant" "$result" "-" + fi + done + + echo "" + echo "────────────────────────────────────────" + echo "Total: $TOTAL_PASSED passed, $TOTAL_FAILED failed, $TOTAL_SKIPPED skipped" + echo "Total time: $(format_ms "$TOTAL_ELAPSED")" + echo "────────────────────────────────────────" + + [[ $TOTAL_FAILED -eq 0 ]] + ''; + }; + + # Generate all scripts for all variants (user-mode only, TAP shares scripts) + lifecycleByVariant = lib.genAttrs variantNames (variant: mkVariantScripts variant); + + # Generate full tests for all variants (including TAP) + testsByVariant = lib.genAttrs allVariantNames (variant: mkFullTest variant); + +in +{ + # Individual phase scripts by variant + # Usage: lifecycle.scripts.base.check-process + scripts = lifecycleByVariant; + + # Full lifecycle tests by variant + # Usage: lifecycle.tests.base + tests = testsByVariant // { + all = mkTestAll; + }; + + # Flattened package set for flake.nix packages output + # Usage: packages.pcp-lifecycle-full-test-base + packages = + let + # Flatten scripts: pcp-lifecycle-- + flattenedScripts = lib.foldl' (acc: variant: + acc // (lib.mapAttrs' (phase: script: + lib.nameValuePair "pcp-lifecycle-${phase}-${variant}" script + ) lifecycleByVariant.${variant}) + ) {} variantNames; + + # Full tests: pcp-lifecycle-full-test- + fullTests = lib.mapAttrs' (variant: test: + lib.nameValuePair "pcp-lifecycle-full-test-${variant}" test + ) testsByVariant; + in + flattenedScripts // fullTests // { + pcp-lifecycle-test-all = mkTestAll; + }; + + # Apps output for flake.nix + apps = lib.mapAttrs (name: pkg: { + type = "app"; + program = "${pkg}/bin/${name}"; + }) ( + let + # Full tests for all variants (including TAP) + fullTestApps = lib.foldl' (acc: variant: + acc // { + "pcp-lifecycle-full-test-${variant}" = testsByVariant.${variant}; + } + ) {} allVariantNames; + + # Status and force-kill for user-mode variants only (TAP shares base scripts) + utilityApps = lib.foldl' (acc: variant: + acc // { + "pcp-lifecycle-status-${variant}" = lifecycleByVariant.${variant}.status; + "pcp-lifecycle-force-kill-${variant}" = lifecycleByVariant.${variant}.force-kill; + } + ) {} variantNames; + in + fullTestApps // utilityApps // { + pcp-lifecycle-test-all = mkTestAll; + } + ); +} diff --git a/nix/lifecycle/lib.nix b/nix/lifecycle/lib.nix new file mode 100644 index 0000000000..1040a89ddf --- /dev/null +++ b/nix/lifecycle/lib.nix @@ -0,0 +1,545 @@ +# nix/lifecycle/lib.nix +# +# Script generators for PCP MicroVM lifecycle testing. +# Provides functions to generate bash scripts for each lifecycle phase. +# +# Adapted from xdp2's lifecycle testing framework with PCP-specific +# verification phases and variant handling. +# +{ pkgs, lib }: +let + constants = import ./constants.nix { }; + mainConstants = import ../constants.nix; + + # Common runtime inputs for all lifecycle scripts + commonInputs = with pkgs; [ + coreutils + gnugrep + gnused + gawk + procps + netcat-gnu + socat + expect + bc + util-linux # for kill, etc. + ]; + + # SSH-related inputs (for service verification via SSH fallback) + sshInputs = with pkgs; [ + openssh + sshpass + ]; + + # ANSI color helpers (shell functions) + colorHelpers = '' + # ANSI color codes + _reset='\033[0m' + _bold='\033[1m' + _red='\033[31m' + _green='\033[32m' + _yellow='\033[33m' + _blue='\033[34m' + _cyan='\033[36m' + + # Color output functions + info() { echo -e "''${_cyan}$*''${_reset}"; } + success() { echo -e "''${_green}$*''${_reset}"; } + warn() { echo -e "''${_yellow}$*''${_reset}"; } + error() { echo -e "''${_red}$*''${_reset}"; } + bold() { echo -e "''${_bold}$*''${_reset}"; } + + # Phase header + phase_header() { + local phase="$1" + local name="$2" + local timeout="$3" + echo "" + echo -e "''${_bold}--- Phase $phase: $name (timeout: ''${timeout}s) ---''${_reset}" + } + + # Pass/fail result with timing + result_pass() { + local msg="$1" + local time_ms="$2" + echo -e " ''${_green}PASS''${_reset}: $msg (''${time_ms}ms)" + } + + result_fail() { + local msg="$1" + local time_ms="$2" + echo -e " ''${_red}FAIL''${_reset}: $msg (''${time_ms}ms)" + } + ''; + + # Timing helpers + timingHelpers = '' + # Get current time in milliseconds + time_ms() { + echo $(($(date +%s%N) / 1000000)) + } + + # Calculate elapsed time in milliseconds + elapsed_ms() { + local start="$1" + local now + now=$(time_ms) + echo $((now - start)) + } + + # Convert milliseconds to human-readable format + format_ms() { + local ms="$1" + if [[ $ms -lt 1000 ]]; then + echo "''${ms}ms" + elif [[ $ms -lt 60000 ]]; then + echo "$((ms / 1000)).$((ms % 1000 / 100))s" + else + local mins=$((ms / 60000)) + local secs=$(((ms % 60000) / 1000)) + echo "''${mins}m''${secs}s" + fi + } + ''; + + # Process detection helpers + processHelpers = '' + # Check if VM process is running by hostname pattern + # Returns 0 if running, 1 if not + vm_is_running() { + local hostname="$1" + pgrep -f "process=$hostname" >/dev/null 2>&1 + } + + # Get VM process PID + vm_pid() { + local hostname="$1" + pgrep -f "process=$hostname" 2>/dev/null | head -1 + } + + # Wait for VM process to start + wait_for_process() { + local hostname="$1" + local timeout="$2" + local poll_interval="${toString constants.lifecycle.pollInterval}" + local elapsed=0 + + while [[ $elapsed -lt $timeout ]]; do + if vm_is_running "$hostname"; then + return 0 + fi + sleep "$poll_interval" + elapsed=$((elapsed + poll_interval)) + done + return 1 + } + + # Wait for VM process to exit + wait_for_exit() { + local hostname="$1" + local timeout="$2" + local poll_interval="${toString constants.lifecycle.pollInterval}" + local elapsed=0 + + while [[ $elapsed -lt $timeout ]]; do + if ! vm_is_running "$hostname"; then + return 0 + fi + sleep "$poll_interval" + elapsed=$((elapsed + poll_interval)) + done + return 1 + } + + # Force kill VM process + kill_vm() { + local hostname="$1" + local pid + pid=$(vm_pid "$hostname") + if [[ -n "$pid" ]]; then + kill "$pid" 2>/dev/null || true + sleep 1 + if vm_is_running "$hostname"; then + kill -9 "$pid" 2>/dev/null || true + fi + fi + } + ''; + + # Console connection helpers + consoleHelpers = '' + # Check if a TCP port is listening + port_is_open() { + local host="$1" + local port="$2" + nc -z "$host" "$port" 2>/dev/null + } + + # Wait for console port to be available + wait_for_console() { + local port="$1" + local timeout="$2" + local poll_interval="${toString constants.lifecycle.pollInterval}" + local elapsed=0 + + while [[ $elapsed -lt $timeout ]]; do + if port_is_open "127.0.0.1" "$port"; then + return 0 + fi + sleep "$poll_interval" + elapsed=$((elapsed + poll_interval)) + done + return 1 + } + + # Send command to console via expect and capture output + # Usage: console_cmd + console_cmd() { + local port="$1" + local cmd="$2" + local expect_pattern="$3" + local timeout="''${4:-30}" + + expect -c " + set timeout $timeout + spawn socat -,rawer tcp:127.0.0.1:$port + sleep 0.5 + send \"\\r\" + expect { + -re \"$expect_pattern\" { + send \"$cmd\\r\" + expect -re \"$expect_pattern\" + exit 0 + } + timeout { + exit 1 + } + } + " 2>/dev/null + } + + # Login to console and run a command + console_login_cmd() { + local port="$1" + local username="$2" + local password="$3" + local cmd="$4" + local timeout="''${5:-60}" + + expect -c " + set timeout $timeout + spawn socat -,rawer tcp:127.0.0.1:$port + sleep 0.5 + send \"\\r\" + expect { + \"login:\" { + send \"$username\\r\" + expect \"Password:\" + send \"$password\\r\" + expect -re \"$username@.*:.*[#\$]\" + send \"$cmd\\r\" + expect -re \"$username@.*:.*[#\$]\" + exit 0 + } + -re \"$username@.*:.*[#\$]\" { + # Already logged in + send \"$cmd\\r\" + expect -re \"$username@.*:.*[#\$]\" + exit 0 + } + timeout { + exit 1 + } + } + " 2>/dev/null + } + ''; + +in +{ + inherit constants mainConstants commonInputs sshInputs; + inherit colorHelpers timingHelpers processHelpers consoleHelpers; + + # Generate a polling script that waits for a condition + # condition is a shell command that returns 0 when ready + mkPollingScript = { name, condition, timeout, pollInterval ? 1, description ? "" }: + '' + ${timingHelpers} + + poll_until() { + local timeout="$1" + local poll_interval="$2" + local start_time + start_time=$(time_ms) + local elapsed=0 + + while [[ $elapsed -lt $timeout ]]; do + if ${condition}; then + echo "$(elapsed_ms "$start_time")" + return 0 + fi + sleep "$poll_interval" + elapsed=$((elapsed + poll_interval)) + done + echo "$(elapsed_ms "$start_time")" + return 1 + } + + poll_until "${toString timeout}" "${toString pollInterval}" + ''; + + # Generate a script that checks VM process status + mkCheckProcessScript = { variant }: + let + hostname = mainConstants.getHostname variant; + timeout = mainConstants.getTimeout variant "processStart"; + in + pkgs.writeShellApplication { + name = "pcp-lifecycle-1-check-process-${variant}"; + runtimeInputs = commonInputs; + text = '' + ${colorHelpers} + ${timingHelpers} + ${processHelpers} + + HOSTNAME="${hostname}" + TIMEOUT=${toString timeout} + + phase_header "1" "Check Process" "$TIMEOUT" + + start_time=$(time_ms) + if wait_for_process "$HOSTNAME" "$TIMEOUT"; then + elapsed=$(elapsed_ms "$start_time") + pid=$(vm_pid "$HOSTNAME") + result_pass "VM process '$HOSTNAME' running (PID: $pid)" "$elapsed" + exit 0 + else + elapsed=$(elapsed_ms "$start_time") + result_fail "VM process '$HOSTNAME' not found" "$elapsed" + exit 1 + fi + ''; + }; + + # Generate a script that checks serial console availability + mkCheckSerialScript = { variant }: + let + consolePorts = mainConstants.getConsolePorts variant; + timeout = mainConstants.getTimeout variant "serialReady"; + in + pkgs.writeShellApplication { + name = "pcp-lifecycle-2-check-serial-${variant}"; + runtimeInputs = commonInputs; + text = '' + ${colorHelpers} + ${timingHelpers} + ${consoleHelpers} + + SERIAL_PORT=${toString consolePorts.serial} + TIMEOUT=${toString timeout} + + phase_header "2" "Check Serial Console" "$TIMEOUT" + + start_time=$(time_ms) + if wait_for_console "$SERIAL_PORT" "$TIMEOUT"; then + elapsed=$(elapsed_ms "$start_time") + result_pass "Serial console available on port $SERIAL_PORT" "$elapsed" + exit 0 + else + elapsed=$(elapsed_ms "$start_time") + result_fail "Serial console not available on port $SERIAL_PORT" "$elapsed" + exit 1 + fi + ''; + }; + + # Generate a script that checks virtio console availability + mkCheckVirtioScript = { variant }: + let + consolePorts = mainConstants.getConsolePorts variant; + timeout = mainConstants.getTimeout variant "virtioReady"; + in + pkgs.writeShellApplication { + name = "pcp-lifecycle-2b-check-virtio-${variant}"; + runtimeInputs = commonInputs; + text = '' + ${colorHelpers} + ${timingHelpers} + ${consoleHelpers} + + VIRTIO_PORT=${toString consolePorts.virtio} + TIMEOUT=${toString timeout} + + phase_header "2b" "Check Virtio Console" "$TIMEOUT" + + start_time=$(time_ms) + if wait_for_console "$VIRTIO_PORT" "$TIMEOUT"; then + elapsed=$(elapsed_ms "$start_time") + result_pass "Virtio console available on port $VIRTIO_PORT" "$elapsed" + exit 0 + else + elapsed=$(elapsed_ms "$start_time") + result_fail "Virtio console not available on port $VIRTIO_PORT" "$elapsed" + exit 1 + fi + ''; + }; + + # Generate a shutdown script + mkShutdownScript = { variant }: + let + consolePorts = mainConstants.getConsolePorts variant; + hostname = mainConstants.getHostname variant; + timeout = mainConstants.getTimeout variant "shutdown"; + in + pkgs.writeShellApplication { + name = "pcp-lifecycle-5-shutdown-${variant}"; + runtimeInputs = commonInputs; + text = '' + ${colorHelpers} + ${timingHelpers} + ${consoleHelpers} + ${processHelpers} + + VIRTIO_PORT=${toString consolePorts.virtio} + HOSTNAME="${hostname}" + TIMEOUT=${toString timeout} + + phase_header "5" "Shutdown" "$TIMEOUT" + + start_time=$(time_ms) + + # Try to send shutdown command via virtio console + if port_is_open "127.0.0.1" "$VIRTIO_PORT"; then + info " Sending shutdown command via virtio console..." + if console_login_cmd "$VIRTIO_PORT" "root" "pcp" "poweroff" "$TIMEOUT"; then + elapsed=$(elapsed_ms "$start_time") + result_pass "Shutdown command sent" "$elapsed" + exit 0 + fi + fi + + # Fallback: kill the process + warn " Console shutdown failed, killing process..." + kill_vm "$HOSTNAME" + elapsed=$(elapsed_ms "$start_time") + result_pass "VM process killed" "$elapsed" + ''; + }; + + # Generate a wait-for-exit script + mkWaitExitScript = { variant }: + let + hostname = mainConstants.getHostname variant; + timeout = mainConstants.getTimeout variant "waitExit"; + in + pkgs.writeShellApplication { + name = "pcp-lifecycle-6-wait-exit-${variant}"; + runtimeInputs = commonInputs; + text = '' + ${colorHelpers} + ${timingHelpers} + ${processHelpers} + + HOSTNAME="${hostname}" + TIMEOUT=${toString timeout} + + phase_header "6" "Wait for Exit" "$TIMEOUT" + + start_time=$(time_ms) + if wait_for_exit "$HOSTNAME" "$TIMEOUT"; then + elapsed=$(elapsed_ms "$start_time") + result_pass "VM exited cleanly" "$elapsed" + exit 0 + else + elapsed=$(elapsed_ms "$start_time") + result_fail "VM did not exit within timeout, forcing kill" "$elapsed" + kill_vm "$HOSTNAME" + exit 1 + fi + ''; + }; + + # Generate a force-kill script + mkForceKillScript = { variant }: + let + hostname = mainConstants.getHostname variant; + in + pkgs.writeShellApplication { + name = "pcp-lifecycle-force-kill-${variant}"; + runtimeInputs = commonInputs; + text = '' + ${colorHelpers} + ${processHelpers} + + HOSTNAME="${hostname}" + + info "Force killing VM: $HOSTNAME" + + if vm_is_running "$HOSTNAME"; then + kill_vm "$HOSTNAME" + if vm_is_running "$HOSTNAME"; then + error "Failed to kill VM process" + exit 1 + else + success "VM process killed" + fi + else + info "VM process not running" + fi + ''; + }; + + # Generate a status script + mkStatusScript = { variant }: + let + hostname = mainConstants.getHostname variant; + consolePorts = mainConstants.getConsolePorts variant; + portOffset = mainConstants.variantPortOffsets.${variant}; + in + pkgs.writeShellApplication { + name = "pcp-lifecycle-status-${variant}"; + runtimeInputs = commonInputs; + text = '' + ${colorHelpers} + ${processHelpers} + ${consoleHelpers} + + HOSTNAME="${hostname}" + SERIAL_PORT=${toString consolePorts.serial} + VIRTIO_PORT=${toString consolePorts.virtio} + SSH_PORT=$((${toString mainConstants.ports.sshForward} + ${toString portOffset})) + + bold "MicroVM Status: ${variant}" + echo "" + + # Process status + if vm_is_running "$HOSTNAME"; then + pid=$(vm_pid "$HOSTNAME") + success " Process: Running (PID: $pid)" + else + error " Process: Not running" + fi + + # Console status + if port_is_open "127.0.0.1" "$SERIAL_PORT"; then + success " Serial Console: Available (port $SERIAL_PORT)" + else + warn " Serial Console: Not available" + fi + + if port_is_open "127.0.0.1" "$VIRTIO_PORT"; then + success " Virtio Console: Available (port $VIRTIO_PORT)" + else + warn " Virtio Console: Not available" + fi + + # SSH status + if port_is_open "127.0.0.1" "$SSH_PORT"; then + success " SSH: Available (port $SSH_PORT)" + else + warn " SSH: Not available (port $SSH_PORT)" + fi + ''; + }; +} diff --git a/nix/lifecycle/pcp-checks.nix b/nix/lifecycle/pcp-checks.nix new file mode 100644 index 0000000000..37ddef8090 --- /dev/null +++ b/nix/lifecycle/pcp-checks.nix @@ -0,0 +1,394 @@ +# nix/lifecycle/pcp-checks.nix +# +# PCP-specific verification functions for MicroVM lifecycle testing. +# Provides service checks, metric verification, and HTTP endpoint testing. +# +{ pkgs, lib }: +let + constants = import ./constants.nix { }; + mainConstants = import ../constants.nix; + lifecycleLib = import ./lib.nix { inherit pkgs lib; }; + + inherit (lifecycleLib) colorHelpers timingHelpers processHelpers consoleHelpers; + inherit (lifecycleLib) commonInputs sshInputs; + + # SSH options for connecting to debug VMs + sshOpts = lib.concatStringsSep " " [ + "-o" "StrictHostKeyChecking=no" + "-o" "UserKnownHostsFile=/dev/null" + "-o" "ConnectTimeout=5" + "-o" "LogLevel=ERROR" + "-o" "PubkeyAuthentication=no" + ]; + + # Service checking helpers (via SSH) + sshHelpers = '' + # Run a command via SSH + ssh_cmd() { + local host="$1" + local port="$2" + shift 2 + sshpass -p pcp ssh ${sshOpts} -p "$port" "root@$host" "$@" 2>/dev/null + } + + # Check if a systemd service is active + check_service() { + local host="$1" + local port="$2" + local service="$3" + ssh_cmd "$host" "$port" "systemctl is-active $service" | grep -q "^active$" + } + + # Check if a port is listening (inside VM) + check_port() { + local host="$1" + local port="$2" + local target_port="$3" + ssh_cmd "$host" "$port" "ss -tlnp | grep -q :$target_port" + } + + # Run pminfo and check for output + check_metric() { + local host="$1" + local port="$2" + local metric="$3" + ssh_cmd "$host" "$port" "pminfo -f $metric 2>/dev/null | grep -q value" + } + + # Check HTTP endpoint (inside VM) + check_http() { + local host="$1" + local port="$2" + local target_port="$3" + local path="$4" + ssh_cmd "$host" "$port" "curl -sf http://localhost:$target_port$path >/dev/null" + } + + # Wait for SSH to be available + wait_for_ssh() { + local host="$1" + local port="$2" + local timeout="$3" + local poll_interval=1 + local elapsed=0 + + while [[ $elapsed -lt $timeout ]]; do + if sshpass -p pcp ssh ${sshOpts} -p "$port" "root@$host" true 2>/dev/null; then + return 0 + fi + sleep "$poll_interval" + elapsed=$((elapsed + poll_interval)) + done + return 1 + } + ''; + +in +{ + # Generate a script that verifies PCP services are running + mkVerifyServicesScript = { variant }: + let + variantConfig = constants.variants.${variant}; + portOffset = mainConstants.variantPortOffsets.${variant}; + sshPort = mainConstants.ports.sshForward + portOffset; + timeout = mainConstants.getTimeout variant "serviceReady"; + in + pkgs.writeShellApplication { + name = "pcp-lifecycle-3-verify-services-${variant}"; + runtimeInputs = commonInputs ++ sshInputs ++ [ pkgs.curl ]; + text = '' + set +e # Don't exit on first failure + + ${colorHelpers} + ${timingHelpers} + ${sshHelpers} + + HOST="localhost" + SSH_PORT=${toString sshPort} + TIMEOUT=${toString timeout} + SERVICES="${lib.concatStringsSep " " variantConfig.services}" + + phase_header "3" "Verify PCP Services" "$TIMEOUT" + + # Wait for SSH first + info " Waiting for SSH connectivity..." + start_time=$(time_ms) + if ! wait_for_ssh "$HOST" "$SSH_PORT" "$TIMEOUT"; then + elapsed=$(elapsed_ms "$start_time") + result_fail "SSH not available within timeout" "$elapsed" + exit 1 + fi + ssh_elapsed=$(elapsed_ms "$start_time") + info " SSH connected in ''${ssh_elapsed}ms" + + # Check each service + passed=0 + failed=0 + + for service in $SERVICES; do + start_time=$(time_ms) + if check_service "$HOST" "$SSH_PORT" "$service"; then + elapsed=$(elapsed_ms "$start_time") + result_pass "$service active" "$elapsed" + passed=$((passed + 1)) + else + elapsed=$(elapsed_ms "$start_time") + result_fail "$service not active" "$elapsed" + failed=$((failed + 1)) + fi + done + + echo "" + if [[ $failed -eq 0 ]]; then + success " All $passed services verified" + exit 0 + else + error " $failed of $((passed + failed)) services failed" + exit 1 + fi + ''; + }; + + # Generate a script that verifies PCP metrics are available + mkVerifyMetricsScript = { variant }: + let + variantConfig = constants.variants.${variant}; + portOffset = mainConstants.variantPortOffsets.${variant}; + sshPort = mainConstants.ports.sshForward + portOffset; + timeout = mainConstants.getTimeout variant "metricsReady"; + metrics = variantConfig.metrics; + bccMetrics = variantConfig.bccMetrics or []; + in + pkgs.writeShellApplication { + name = "pcp-lifecycle-4-verify-metrics-${variant}"; + runtimeInputs = commonInputs ++ sshInputs ++ [ pkgs.curl ]; + text = '' + set +e # Don't exit on first failure + + ${colorHelpers} + ${timingHelpers} + ${sshHelpers} + + HOST="localhost" + SSH_PORT=${toString sshPort} + TIMEOUT=${toString timeout} + METRICS="${lib.concatStringsSep " " metrics}" + ${lib.optionalString (bccMetrics != []) ''BCC_METRICS="${lib.concatStringsSep " " bccMetrics}"''} + + phase_header "4" "Verify PCP Metrics" "$TIMEOUT" + + # Wait for SSH (should already be available from phase 3) + if ! wait_for_ssh "$HOST" "$SSH_PORT" 10; then + result_fail "SSH not available" "0" + exit 1 + fi + + passed=0 + failed=0 + warned=0 + + # Check standard metrics + for metric in $METRICS; do + start_time=$(time_ms) + if check_metric "$HOST" "$SSH_PORT" "$metric"; then + elapsed=$(elapsed_ms "$start_time") + result_pass "$metric returns data" "$elapsed" + passed=$((passed + 1)) + else + elapsed=$(elapsed_ms "$start_time") + result_fail "$metric not available" "$elapsed" + failed=$((failed + 1)) + fi + done + + ${lib.optionalString (bccMetrics != []) '' + # Check BCC metrics (may take longer, warn instead of fail) + info " Checking BCC metrics (may still be compiling)..." + for metric in $BCC_METRICS; do + start_time=$(time_ms) + if check_metric "$HOST" "$SSH_PORT" "$metric"; then + elapsed=$(elapsed_ms "$start_time") + result_pass "$metric returns data" "$elapsed" + passed=$((passed + 1)) + else + elapsed=$(elapsed_ms "$start_time") + echo -e " \033[33mWARN\033[0m: $metric not yet available (''${elapsed}ms)" + warned=$((warned + 1)) + fi + done + ''} + + echo "" + if [[ $failed -eq 0 ]]; then + if [[ $warned -gt 0 ]]; then + warn " $passed metrics verified, $warned still compiling" + else + success " All $passed metrics verified" + fi + exit 0 + else + error " $failed metrics failed, $passed passed" + exit 1 + fi + ''; + }; + + # Generate a script that verifies HTTP endpoints + mkVerifyHttpScript = { variant }: + let + variantConfig = constants.variants.${variant}; + portOffset = mainConstants.variantPortOffsets.${variant}; + sshPort = mainConstants.ports.sshForward + portOffset; + httpChecks = variantConfig.httpChecks; + in + pkgs.writeShellApplication { + name = "pcp-lifecycle-verify-http-${variant}"; + runtimeInputs = commonInputs ++ sshInputs ++ [ pkgs.curl ]; + text = '' + set +e # Don't exit on first failure + + ${colorHelpers} + ${timingHelpers} + ${sshHelpers} + + HOST="localhost" + SSH_PORT=${toString sshPort} + + bold "HTTP Endpoint Verification: ${variant}" + echo "" + + if ! wait_for_ssh "$HOST" "$SSH_PORT" 10; then + error "SSH not available" + exit 1 + fi + + passed=0 + failed=0 + + ${lib.concatMapStringsSep "\n" (check: '' + start_time=$(time_ms) + if check_http "$HOST" "$SSH_PORT" "${toString check.port}" "${check.path}"; then + elapsed=$(elapsed_ms "$start_time") + result_pass "${check.name} HTTP endpoint" "$elapsed" + passed=$((passed + 1)) + else + elapsed=$(elapsed_ms "$start_time") + result_fail "${check.name} HTTP endpoint (port ${toString check.port}${check.path})" "$elapsed" + failed=$((failed + 1)) + fi + '') httpChecks} + + echo "" + if [[ $failed -eq 0 ]]; then + success "All $passed HTTP endpoints verified" + exit 0 + else + error "$failed HTTP endpoints failed" + exit 1 + fi + ''; + }; + + # Generate a comprehensive service and metric check + mkFullVerificationScript = { variant }: + let + variantConfig = constants.variants.${variant}; + portOffset = mainConstants.variantPortOffsets.${variant}; + sshPort = mainConstants.ports.sshForward + portOffset; + serviceTimeout = mainConstants.getTimeout variant "serviceReady"; + metricTimeout = mainConstants.getTimeout variant "metricsReady"; + in + pkgs.writeShellApplication { + name = "pcp-lifecycle-verify-full-${variant}"; + runtimeInputs = commonInputs ++ sshInputs ++ [ pkgs.curl ]; + text = '' + set +e + + ${colorHelpers} + ${timingHelpers} + ${sshHelpers} + + HOST="localhost" + SSH_PORT=${toString sshPort} + SERVICE_TIMEOUT=${toString serviceTimeout} + METRIC_TIMEOUT=${toString metricTimeout} + + bold "========================================" + bold " Full Verification: ${variant}" + bold "========================================" + echo "" + info "Description: ${variantConfig.description}" + info "SSH Port: $SSH_PORT" + echo "" + + total_start=$(time_ms) + total_passed=0 + total_failed=0 + + # Wait for SSH + info "Waiting for SSH (timeout: ''${SERVICE_TIMEOUT}s)..." + ssh_start=$(time_ms) + if ! wait_for_ssh "$HOST" "$SSH_PORT" "$SERVICE_TIMEOUT"; then + result_fail "SSH connectivity" "$(elapsed_ms "$ssh_start")" + exit 1 + fi + result_pass "SSH connectivity" "$(elapsed_ms "$ssh_start")" + total_passed=$((total_passed + 1)) + + # Service checks + echo "" + bold "--- Service Checks ---" + ${lib.concatMapStringsSep "\n" (service: '' + start_time=$(time_ms) + if check_service "$HOST" "$SSH_PORT" "${service}"; then + result_pass "Service: ${service}" "$(elapsed_ms "$start_time")" + total_passed=$((total_passed + 1)) + else + result_fail "Service: ${service}" "$(elapsed_ms "$start_time")" + total_failed=$((total_failed + 1)) + fi + '') variantConfig.services} + + # Metric checks + echo "" + bold "--- Metric Checks ---" + ${lib.concatMapStringsSep "\n" (metric: '' + start_time=$(time_ms) + if check_metric "$HOST" "$SSH_PORT" "${metric}"; then + result_pass "Metric: ${metric}" "$(elapsed_ms "$start_time")" + total_passed=$((total_passed + 1)) + else + result_fail "Metric: ${metric}" "$(elapsed_ms "$start_time")" + total_failed=$((total_failed + 1)) + fi + '') variantConfig.metrics} + + # HTTP endpoint checks + echo "" + bold "--- HTTP Endpoint Checks ---" + ${lib.concatMapStringsSep "\n" (check: '' + start_time=$(time_ms) + if check_http "$HOST" "$SSH_PORT" "${toString check.port}" "${check.path}"; then + result_pass "HTTP: ${check.name}" "$(elapsed_ms "$start_time")" + total_passed=$((total_passed + 1)) + else + result_fail "HTTP: ${check.name}" "$(elapsed_ms "$start_time")" + total_failed=$((total_failed + 1)) + fi + '') variantConfig.httpChecks} + + # Summary + total_elapsed=$(elapsed_ms "$total_start") + echo "" + bold "========================================" + if [[ $total_failed -eq 0 ]]; then + success " Result: ALL PASSED ($total_passed checks in $(format_ms "$total_elapsed"))" + else + error " Result: FAILED ($total_failed of $((total_passed + total_failed)) checks)" + fi + bold "========================================" + + [[ $total_failed -eq 0 ]] + ''; + }; +} diff --git a/nix/lifecycle/scripts/vm-debug.exp b/nix/lifecycle/scripts/vm-debug.exp new file mode 100644 index 0000000000..b3065463f3 --- /dev/null +++ b/nix/lifecycle/scripts/vm-debug.exp @@ -0,0 +1,60 @@ +#!/usr/bin/expect -f +# +# vm-debug.exp - Run diagnostic commands on MicroVM via console +# +# Usage: vm-debug.exp +# +# Connects to the console and runs a series of diagnostic commands +# to help debug MicroVM issues. Outputs all results to stdout. +# +# Environment variables: +# VM_USERNAME - Login username (default: root) +# VM_PASSWORD - Login password (default: pcp) +# EXPECT_DEBUG - Enable expect debug mode +# + +source [file dirname [info script]]/vm-lib.exp + +if {$argc < 2} { + puts "Usage: vm-debug.exp " + exit 1 +} + +set port [lindex $argv 0] +set timeout [lindex $argv 1] + +vmlib::init +vmlib::connect $port +vmlib::login + +puts "\n==========================================" +puts " MicroVM Diagnostic Output" +puts "==========================================\n" + +# Diagnostic commands +set commands { + "echo '=== System Info ==='" + "uname -a" + "cat /etc/os-release | head -5" + "echo '=== Network ==='" + "ip addr show | grep -E 'inet |state '" + "ss -tlnp | head -10" + "echo '=== PCP Status ==='" + "systemctl is-active pmcd pmproxy pmlogger 2>/dev/null || true" + "pminfo --version 2>/dev/null || echo 'pminfo not available'" + "echo '=== Journal Errors ==='" + "journalctl -p err --no-pager -n 5 --no-hostname 2>/dev/null || true" + "echo '=== Memory ==='" + "free -h" + "echo '=== Disk ==='" + "df -h / /nix/store 2>/dev/null | head -5" +} + +foreach cmd $commands { + if {![vmlib::run_cmd $cmd]} { + puts "WARNING: Command timed out: $cmd" + } +} + +puts "\n==========================================\n" +exit 0 diff --git a/nix/lifecycle/scripts/vm-expect.exp b/nix/lifecycle/scripts/vm-expect.exp new file mode 100644 index 0000000000..e2c95e7217 --- /dev/null +++ b/nix/lifecycle/scripts/vm-expect.exp @@ -0,0 +1,41 @@ +#!/usr/bin/expect -f +# +# vm-expect.exp - Connect to MicroVM serial/virtio console and wait for prompt +# +# Usage: vm-expect.exp [command] +# +# Connects to the specified TCP port (serial or virtio console), +# waits for a shell prompt, optionally runs a command, and exits. +# +# Environment variables: +# VM_USERNAME - Login username (default: root) +# VM_PASSWORD - Login password (default: pcp) +# EXPECT_DEBUG - Enable expect debug mode +# + +source [file dirname [info script]]/vm-lib.exp + +if {$argc < 2} { + puts "Usage: vm-expect.exp \[command\]" + exit 1 +} + +set port [lindex $argv 0] +set timeout [lindex $argv 1] +set command "" +if {$argc >= 3} { + set command [lindex $argv 2] +} + +vmlib::init +vmlib::connect $port +vmlib::login + +if {$command ne ""} { + if {![vmlib::run_cmd $command]} { + exit 1 + } +} + +puts "SUCCESS: Console session established" +exit 0 diff --git a/nix/lifecycle/scripts/vm-lib.exp b/nix/lifecycle/scripts/vm-lib.exp new file mode 100644 index 0000000000..14147cbb88 --- /dev/null +++ b/nix/lifecycle/scripts/vm-lib.exp @@ -0,0 +1,172 @@ +#!/usr/bin/expect -f +# +# vm-lib.exp - Shared library for MicroVM console scripts +# +# Source this file from other scripts: +# source [file dirname [info script]]/vm-lib.exp +# +# Environment variables: +# VM_USERNAME - Login username (default: root) +# VM_PASSWORD - Login password (default: pcp) +# EXPECT_DEBUG - Enable expect debug mode (exp_internal 1) +# + +namespace eval vmlib { + # Configuration + variable prompt_pattern "root@pcp-.*:.*#" + variable wake_delay 0.5 + variable debug 0 + + # Initialize library - call this at script start + proc init {} { + variable debug + if {[info exists ::env(EXPECT_DEBUG)] && $::env(EXPECT_DEBUG) ne ""} { + set debug 1 + exp_internal 1 + } + } + + # Safe credential retrieval from environment + proc get_credentials {} { + set username "root" + set password "pcp" + if {[info exists ::env(VM_USERNAME)] && $::env(VM_USERNAME) ne ""} { + set username $::env(VM_USERNAME) + } + if {[info exists ::env(VM_PASSWORD)] && $::env(VM_PASSWORD) ne ""} { + set password $::env(VM_PASSWORD) + } + return [list $username $password] + } + + # Connect to console via socat + proc connect {port} { + spawn socat -,rawer tcp:127.0.0.1:$port + expect { + "Connection refused" { + puts "ERROR: Connection refused on port $port" + exit 1 + } + timeout { + puts "ERROR: Connection timeout on port $port" + exit 1 + } + -re "." { + # Got some output, continue + } + } + } + + # Wake console and handle login sequence + proc login {} { + variable prompt_pattern + variable wake_delay + + lassign [get_credentials] username password + + # Send newline to wake console + send "\r" + sleep $wake_delay + + expect { + "login:" { + send "$username\r" + expect "Password:" + send "$password\r" + expect { + -re $prompt_pattern { + # Logged in successfully + } + "Login incorrect" { + puts "ERROR: Login failed" + exit 1 + } + timeout { + puts "ERROR: Login timeout" + exit 1 + } + } + } + -re $prompt_pattern { + # Already logged in + } + "Welcome to NixOS" { + # System just booted, wait for login prompt + expect "login:" + send "$username\r" + expect "Password:" + send "$password\r" + expect -re $prompt_pattern + } + timeout { + puts "ERROR: No prompt received within timeout" + exit 1 + } + } + } + + # Run a command and wait for prompt + # Returns 1 on success, 0 on timeout + proc run_cmd {cmd} { + variable prompt_pattern + send "$cmd\r" + expect { + -re $prompt_pattern { + return 1 + } + timeout { + puts "ERROR: Command timeout: $cmd" + return 0 + } + } + } + + # Run command and check output against patterns + # service_patterns is a dict: pattern -> {action incr_var} + # Returns the matched pattern or "timeout" + proc check_service {service} { + variable prompt_pattern + send "systemctl is-active $service\r" + expect { + "active\r" { + expect -re $prompt_pattern + return "active" + } + "inactive\r" { + expect -re $prompt_pattern + return "inactive" + } + "failed\r" { + expect -re $prompt_pattern + return "failed" + } + "activating\r" { + expect -re $prompt_pattern + return "activating" + } + timeout { + return "timeout" + } + } + } + + # Run pminfo check for metrics + # Returns "ok", "no_pmcd", or "timeout" + proc check_pminfo {} { + variable prompt_pattern + send "pminfo -f kernel.all.load 2>&1 | head -3\r" + expect { + "value" { + expect -re $prompt_pattern + return "ok" + } + "No PMCD" { + expect -re $prompt_pattern + return "no_pmcd" + } + timeout { + return "timeout" + } + } + } +} diff --git a/nix/lifecycle/scripts/vm-verify-pcp.exp b/nix/lifecycle/scripts/vm-verify-pcp.exp new file mode 100644 index 0000000000..07e193d71e --- /dev/null +++ b/nix/lifecycle/scripts/vm-verify-pcp.exp @@ -0,0 +1,88 @@ +#!/usr/bin/expect -f +# +# vm-verify-pcp.exp - Verify PCP services are running via console +# +# Usage: vm-verify-pcp.exp +# +# Connects to the console and checks if the specified services are active. +# Returns exit code 0 if all services are active, 1 otherwise. +# +# Example: +# vm-verify-pcp.exp 24500 30 pmcd pmproxy pmlogger +# +# Environment variables: +# VM_USERNAME - Login username (default: root) +# VM_PASSWORD - Login password (default: pcp) +# EXPECT_DEBUG - Enable expect debug mode +# + +source [file dirname [info script]]/vm-lib.exp + +if {$argc < 3} { + puts "Usage: vm-verify-pcp.exp " + puts "Example: vm-verify-pcp.exp 24500 30 pmcd pmproxy" + exit 1 +} + +set port [lindex $argv 0] +set timeout [lindex $argv 1] +set services [lrange $argv 2 end] + +vmlib::init +vmlib::connect $port +vmlib::login + +set passed 0 +set failed 0 + +# Check each service +foreach service $services { + set result [vmlib::check_service $service] + switch $result { + "active" { + puts "PASS: $service is active" + incr passed + } + "inactive" { + puts "FAIL: $service is inactive" + incr failed + } + "failed" { + puts "FAIL: $service has failed" + incr failed + } + "activating" { + puts "WARN: $service is still activating" + incr passed + } + "timeout" { + puts "FAIL: $service check timed out" + incr failed + } + } +} + +# Check pminfo metrics +set result [vmlib::check_pminfo] +switch $result { + "ok" { + puts "PASS: pminfo returns metrics" + incr passed + } + "no_pmcd" { + puts "FAIL: pmcd not responding" + incr failed + } + "timeout" { + puts "FAIL: pminfo timed out" + incr failed + } +} + +puts "" +puts "Results: $passed passed, $failed failed" + +if {$failed > 0} { + exit 1 +} +exit 0 diff --git a/nix/microvm-scripts.nix b/nix/microvm-scripts.nix new file mode 100644 index 0000000000..e5cb9e020d --- /dev/null +++ b/nix/microvm-scripts.nix @@ -0,0 +1,145 @@ +# nix/microvm-scripts.nix +# +# Helper scripts for managing PCP MicroVMs. +# These provide simple Unix-idiomatic ways to check, stop, and connect to VMs. +# +# Port offsets per variant (see constants.variantPortOffsets): +# base: 0 -> SSH port 22022 +# eval: 100 -> SSH port 22122 +# grafana: 200 -> SSH port 22222 +# bpf: 300 -> SSH port 22322 +# bcc: 400 -> SSH port 22422 +# +{ pkgs }: +let + constants = import ./constants.nix; + baseSshPort = constants.ports.sshForward; + + # Pattern to identify our MicroVMs in process list + # Hostnames: pcp-vm, pcp-eval-vm, pcp-grafana-vm, pcp-bpf-vm, pcp-bcc-vm + vmPattern = "microvm@pcp-(vm|eval-vm|grafana-vm|bpf-vm|bcc-vm)"; + +in { + # ─── Check Script ─────────────────────────────────────────────────────────── + # Lists running PCP MicroVM processes and shows a count. + # + # Usage: nix run .#pcp-vm-check + # + check = pkgs.writeShellApplication { + name = "pcp-vm-check"; + runtimeInputs = with pkgs; [ procps ]; + text = '' + echo "=== PCP MicroVM Processes ===" + echo + + # Use pgrep -af to show full command line (-f matches full cmdline) + if pgrep -af '${vmPattern}'; then + echo + echo "=== Count ===" + pgrep -cf '${vmPattern}' + else + echo "(none running)" + echo + echo "=== Count ===" + echo "0" + fi + ''; + }; + + # ─── Stop Script ──────────────────────────────────────────────────────────── + # Kills all running PCP MicroVM processes. + # + # Usage: nix run .#pcp-vm-stop + # + stop = pkgs.writeShellApplication { + name = "pcp-vm-stop"; + runtimeInputs = with pkgs; [ procps ]; + text = '' + echo "=== Stopping PCP MicroVMs ===" + + # Check if any are running + if ! pgrep -f '${vmPattern}' > /dev/null; then + echo "No PCP MicroVMs running." + exit 0 + fi + + echo "Found processes:" + pgrep -af '${vmPattern}' + + echo + echo "Sending SIGTERM..." + pkill -f '${vmPattern}' || true + + sleep 1 + + # Check if any survived + if pgrep -f '${vmPattern}' > /dev/null; then + echo "Processes still running, sending SIGKILL..." + pkill -9 -f '${vmPattern}' || true + fi + + echo "Done." + ''; + }; + + # ─── SSH Script ───────────────────────────────────────────────────────────── + # Connects to the MicroVM as root via SSH. + # Uses password auth (debug mode only - password is "pcp"). + # + # Usage: + # nix run .#pcp-vm-ssh # Connect to base variant (port 22022) + # nix run .#pcp-vm-ssh -- --variant=eval # Connect to eval variant (port 22122) + # nix run .#pcp-vm-ssh -- -p 22222 # Connect to specific port + # + ssh = pkgs.writeShellApplication { + name = "pcp-vm-ssh"; + runtimeInputs = with pkgs; [ openssh sshpass ]; + text = '' + # Disable SSH agent to avoid keyring popups + unset SSH_AUTH_SOCK + + # Default port (base variant) + PORT=${toString baseSshPort} + + # Parse arguments + PASSTHROUGH_ARGS=() + while [[ $# -gt 0 ]]; do + case "$1" in + --variant=*) + VARIANT="''${1#--variant=}" + case "$VARIANT" in + base) PORT=$((${toString baseSshPort} + ${toString constants.variantPortOffsets.base})) ;; + eval) PORT=$((${toString baseSshPort} + ${toString constants.variantPortOffsets.eval})) ;; + grafana) PORT=$((${toString baseSshPort} + ${toString constants.variantPortOffsets.grafana})) ;; + bpf) PORT=$((${toString baseSshPort} + ${toString constants.variantPortOffsets.bpf})) ;; + bcc) PORT=$((${toString baseSshPort} + ${toString constants.variantPortOffsets.bcc})) ;; + *) + echo "Unknown variant: $VARIANT" + echo "Valid variants: base, eval, grafana, bpf, bcc" + exit 1 + ;; + esac + shift + ;; + -p) + # User specified port directly, use it + PORT="$2" + shift 2 + ;; + *) + PASSTHROUGH_ARGS+=("$1") + shift + ;; + esac + done + + # Connect with password "pcp" (debug mode) + exec sshpass -p pcp ssh \ + -o StrictHostKeyChecking=no \ + -o UserKnownHostsFile=/dev/null \ + -o LogLevel=ERROR \ + -p "$PORT" \ + root@localhost "''${PASSTHROUGH_ARGS[@]}" + ''; + }; +} diff --git a/nix/microvm.nix b/nix/microvm.nix new file mode 100644 index 0000000000..abddb6af81 --- /dev/null +++ b/nix/microvm.nix @@ -0,0 +1,412 @@ +# nix/microvm.nix +# +# Parametric MicroVM generator for PCP. +# This single module handles all MicroVM configurations via parameters. +# +# Parameters: +# networking - "user" (port forwarding) or "tap" (direct network) +# debugMode - Enable password SSH (default: true for interactive testing) +# enablePmlogger - Enable archive logging (default: true) +# enableEvalTools - Enable node_exporter + below for comparison (default: false) +# enablePmieTest - Enable stress-ng workload + pmie rules (default: false) +# enableGrafana - Enable Grafana + Prometheus for visual monitoring (default: false) +# enableBpf - Enable pre-compiled BPF PMDA (default: false) +# enableBcc - Enable runtime BCC PMDA, requires 2GB (default: false) +# +# Helper scripts (see nix/microvm-scripts.nix): +# nix run .#pcp-vm-check - List running PCP MicroVMs and show count +# nix run .#pcp-vm-stop - Stop all running PCP MicroVMs +# nix run .#pcp-vm-ssh - SSH into the VM as root (debug mode only) +# +# Security notes: +# - debugMode is enabled by default for interactive testing convenience +# - For production/CI use, set debugMode = false and configure SSH keys +# +# Returns the microvm.declaredRunner - a script that starts the VM. +# +{ + pkgs, + lib, + pcp, + microvm, + nixosModule, + nixpkgs, + system, + networking ? "user", # "user" or "tap" + debugMode ? true, # Enable password auth (default: true for interactive testing) + enablePmlogger ? true, # Archive logging + enableEvalTools ? false, # node_exporter, below + enablePmieTest ? false, # stress-ng + pmie rules + enableGrafana ? false, # Grafana + Prometheus + enableBpf ? false, # Pre-compiled CO-RE eBPF (lightweight) + enableBcc ? false, # Runtime eBPF compilation (heavyweight, 2GB) + portOffset ? 0, # Port offset for all forwarded ports (see constants.variantPortOffsets) + variant ? "base", # Variant name for console port allocation (base, eval, grafana, bpf, bcc) +}: +let + constants = import ./constants.nix; + useTap = networking == "tap"; + + # Get serial console ports for this variant + consolePorts = constants.getConsolePorts variant; + + # BCC overlay: change KERNEL_MODULES_DIR from /run/booted-system/... to /lib/modules + # This is required because /run/booted-system is a symlink to read-only Nix store, + # but we need to bind mount kernel dev headers at this location. + # See bcc.nix for the bind mount configuration. + bccOverlay = final: prev: { + bcc = prev.bcc.overrideAttrs (old: { + cmakeFlags = builtins.map (flag: + if builtins.match ".*BCC_KERNEL_MODULES_DIR.*" flag != null + then "-DBCC_KERNEL_MODULES_DIR=/lib/modules" + else flag + ) old.cmakeFlags; + }); + }; + + # Dynamic hostname based on enabled features + # Priority: bcc > grafana > bpf > eval > base + # Note: grafana comes before bpf because grafana variant enables bpf for dashboards + # but should still be identified as grafana-vm for lifecycle testing + hostname = + if enableBcc then "pcp-bcc-vm" + else if enableGrafana then "pcp-grafana-vm" + else if enableBpf then "pcp-bpf-vm" + else if enableEvalTools || enablePmieTest then "pcp-eval-vm" + else "pcp-vm"; + + # Dynamic memory: 2GB+ for BCC (clang/LLVM), 1GB otherwise + # Use 2049 instead of 2048 to avoid QEMU hang with exactly 2GB + # See: https://github.com/microvm-nix/microvm.nix/issues/171 + memoryMB = if enableBcc then 2049 else constants.vm.memoryMB; + + # Build a NixOS system with MicroVM support + vmConfig = nixpkgs.lib.nixosSystem { + inherit system; + + # Pass PCP package into modules via specialArgs + specialArgs = { inherit pcp; }; + + modules = [ + # Import MicroVM NixOS module + microvm.nixosModules.microvm + + # Import our PCP NixOS module + nixosModule + + # Import pmie testing module (stress-ng workload + pmie rules) + ./pmie-test.nix + + # Import Grafana module (provides services.pcp.grafana option) + ./grafana.nix + + # Import BPF module (provides services.pcp.bpf option) + ./bpf.nix + + # Import BCC module (provides services.pcp.bcc option) + ./bcc.nix + + # PCP service configuration + ({ pcp, ... }: { + services.pcp = { + enable = true; + package = pcp; + preset = "custom"; + pmlogger.enable = enablePmlogger; + pmie.enable = false; + pmproxy.enable = true; + # For TAP networking, restrict to bridge subnet + allowedNetworks = lib.optionals useTap [ + constants.network.subnet + "127.0.0.0/8" + "::1/128" + ]; + }; + }) + + # MicroVM and system configuration + ({ config, pkgs, ... }: { + system.stateVersion = "26.05"; + + nixpkgs.hostPlatform = system; + + # Apply BCC overlay when BCC is enabled + # This changes BCC's KERNEL_MODULES_DIR to /lib/modules (see bccOverlay above) + nixpkgs.overlays = lib.optionals enableBcc [ bccOverlay ]; + + microvm = { + hypervisor = "qemu"; + mem = memoryMB; + vcpu = constants.vm.vcpus; + + # Share host's nix store read-only + shares = [{ + tag = "ro-store"; + source = "/nix/store"; + mountPoint = "/nix/.ro-store"; + proto = "9p"; + }]; + + # Network interface configuration + interfaces = if useTap then [{ + type = "tap"; + id = constants.network.tap; + mac = constants.network.vmMac; + }] else [{ + type = "user"; + id = "eth0"; + mac = constants.network.vmMac; + }]; + + # Port forwarding for user-mode networking (additive based on features) + # All host ports are shifted by portOffset to allow multiple VMs to coexist + forwardPorts = lib.optionals (!useTap) ( + # Base ports: pmcd, pmproxy, ssh + [ + { from = "host"; host.port = constants.ports.pmcd + portOffset; guest.port = constants.ports.pmcd; } + { from = "host"; host.port = constants.ports.pmproxy + portOffset; guest.port = constants.ports.pmproxy; } + { from = "host"; host.port = constants.ports.sshForward + portOffset; guest.port = 22; } + ] + # Eval tools: node_exporter + ++ lib.optionals enableEvalTools [ + { from = "host"; host.port = constants.ports.nodeExporter + portOffset; guest.port = constants.ports.nodeExporter; } + ] + # Grafana: Grafana + Prometheus + ++ lib.optionals enableGrafana [ + { from = "host"; host.port = constants.ports.grafanaForward + portOffset; guest.port = constants.ports.grafana; } + { from = "host"; host.port = constants.ports.prometheusForward + portOffset; guest.port = constants.ports.prometheus; } + ] + ); + + # ─── Serial Console Configuration ───────────────────────────────── + # Two consoles for debugging boot issues and network problems: + # - ttyS0 (serial): Slow but available immediately at boot + # - hvc0 (virtio): Fast, available after virtio drivers load + # + # Connect with: nc localhost + # Or for raw mode: socat -,rawer tcp:localhost: + # + qemu = { + # Disable default serial console (we configure our own TCP-accessible ones) + serialConsole = false; + + extraArgs = [ + # VM identification (for ps/pgrep matching) + "-name" "${hostname},process=${hostname}" + + # Serial console on TCP (ttyS0) - slow but early boot access + "-serial" "tcp:127.0.0.1:${toString consolePorts.serial},server,nowait" + + # Virtio console (hvc0) - high-speed, requires drivers + "-device" "virtio-serial-pci" + "-chardev" "socket,id=virtcon,port=${toString consolePorts.virtio},host=127.0.0.1,server=on,wait=off" + "-device" "virtconsole,chardev=virtcon" + ]; + }; + }; + + # Console output configuration - send to both serial and virtio + boot.kernelParams = [ + "console=ttyS0,115200" # Serial first (early boot messages) + "console=hvc0" # Virtio second (becomes primary after driver loads) + ]; + + networking.hostName = hostname; + + # Static IP for TAP networking + # Use systemd-networkd for reliable interface matching (enp* covers PCI ethernet) + systemd.network = lib.mkIf useTap { + enable = true; + networks."10-tap" = { + matchConfig.Name = "enp*"; + networkConfig = { + Address = "${constants.network.vmIp}/24"; + Gateway = constants.network.gateway; + DHCP = "no"; + }; + }; + }; + # Disable dhcpcd to avoid conflicts with systemd-networkd + networking.useDHCP = lib.mkIf useTap false; + + # SSH Configuration + # Default: password auth enabled for interactive testing convenience + services.openssh = { + enable = true; + settings = { + PasswordAuthentication = false; + PermitRootLogin = "prohibit-password"; + KbdInteractiveAuthentication = false; + }; + }; + + # Create a pcp-admin user for SSH key access + users.users.pcp-admin = { + isNormalUser = true; + extraGroups = [ "wheel" ]; + openssh.authorizedKeys.keys = [ + # Users should add their keys here or via module option + ]; + }; + + # Allow passwordless sudo for pcp-admin + security.sudo.wheelNeedsPassword = false; + }) + + # ─── pmie Testing Module ─────────────────────────────────────────── + # Enables stress-ng workload and pmie rules for testing the inference engine. + (lib.mkIf enablePmieTest { + services.pcp.pmieTest.enable = true; + }) + + # ─── Evaluation Tools ────────────────────────────────────────────── + # node_exporter and below for metric comparison + ({ pkgs, ... }: lib.mkIf enableEvalTools { + # Prometheus node_exporter for comparison with PCP metrics + services.prometheus.exporters.node = { + enable = true; + port = constants.ports.nodeExporter; + enabledCollectors = [ + "cpu" "diskstats" "filesystem" "loadavg" + "meminfo" "netdev" "netstat" "stat" "time" "vmstat" + ]; + disabledCollectors = [ "textfile" ]; + }; + + # below - Meta's time-traveling resource monitor + environment.systemPackages = [ pkgs.below ]; + services.below.enable = true; + + # Open firewall for node_exporter + networking.firewall.allowedTCPPorts = [ constants.ports.nodeExporter ]; + }) + + # ─── Grafana Module ───────────────────────────────────────────────── + # Enables Grafana with PCP Vector dashboards for visual monitoring. + (lib.mkIf enableGrafana { + # Enable Grafana with PCP dashboards + services.pcp.grafana.enable = true; + + # Prometheus server for comparison with PCP + services.prometheus = { + enable = true; + port = constants.ports.prometheus; + + # Minimal retention for eval VM + retentionTime = "1d"; + + # Scrape node_exporter (if enabled) + scrapeConfigs = lib.optionals enableEvalTools [{ + job_name = "node"; + static_configs = [{ + targets = [ "localhost:${toString constants.ports.nodeExporter}" ]; + }]; + scrape_interval = "15s"; + }]; + }; + + # Open firewall for Prometheus + networking.firewall.allowedTCPPorts = [ constants.ports.prometheus ]; + }) + + # ─── BPF PMDA (Pre-compiled eBPF) ──────────────────────────────── + # Pre-compiled CO-RE eBPF: fast startup, low memory. + # Metrics: bpf.runqlat, bpf.biolatency, bpf.netatop, bpf.oomkill + (lib.mkIf enableBpf { + services.pcp.bpf = { + enable = true; + }; + + # BPF PMDA requires BTF for CO-RE relocation + boot.kernelPatches = [ + { + name = "btf-for-bpf"; + patch = null; + structuredExtraConfig = with lib.kernel; { + DEBUG_INFO_BTF = yes; + }; + } + ]; + }) + + # ─── BCC PMDA (Runtime-compiled eBPF) ─────────────────────────── + # Runtime eBPF compilation: slow startup (~30-60s), 2GB memory. + # Required for: tcptop, tcplife metrics (Grafana eBPF/BCC Overview dashboard) + (lib.mkIf enableBcc { + services.pcp.bcc = { + enable = true; + moduleFailureFatal = false; # Continue if some modules fail to compile + }; + + boot.kernelPatches = [ + { + name = "btf-for-bcc"; + patch = null; + structuredExtraConfig = with lib.kernel; { + DEBUG_INFO_BTF = yes; + }; + } + ]; + }) + + # ─── Debug Mode Module ──────────────────────────────────────────── + # Enables password auth for quick local testing. + (lib.mkIf debugMode { + warnings = [ + "PCP MicroVM is running in DEBUG MODE with insecure SSH settings!" + ]; + + services.openssh.settings = { + PasswordAuthentication = lib.mkForce true; + PermitRootLogin = lib.mkForce "yes"; + KbdInteractiveAuthentication = lib.mkForce true; + }; + + users.users.root.password = "pcp"; + + environment.etc."motd".text = '' + ${lib.optionalString enableBcc '' + ╔═══════════════════════════════════════════════════════════════╗ + ║ PCP MicroVM with BCC PMDA (runtime eBPF compilation) ║ + ╠═══════════════════════════════════════════════════════════════╣ + ║ BCC modules take 30-60s to compile at pmcd startup. ║ + ║ Check status: journalctl -u pmcd -f ║ + ╚═══════════════════════════════════════════════════════════════╝ + ''}${lib.optionalString enableBpf '' + ╔═══════════════════════════════════════════════════════════════╗ + ║ PCP MicroVM with BPF PMDA (pre-compiled eBPF) ║ + ╠═══════════════════════════════════════════════════════════════╣ + ║ Query eBPF metrics: pminfo -f bpf.runqlat.latency ║ + ╚═══════════════════════════════════════════════════════════════╝ + ''}${lib.optionalString enableGrafana '' + ╔═══════════════════════════════════════════════════════════════╗ + ║ PCP MicroVM with Grafana ║ + ╠═══════════════════════════════════════════════════════════════╣ + ║ Grafana: http://localhost:${toString constants.ports.grafanaForward} (admin/pcp) ║ + ║ Prometheus: http://localhost:${toString constants.ports.prometheusForward} ║ + ╚═══════════════════════════════════════════════════════════════╝ + ''}${lib.optionalString (enableEvalTools && !enableGrafana && !enableBpf && !enableBcc) '' + ╔═══════════════════════════════════════════════════════════════╗ + ║ PCP Evaluation MicroVM ║ + ╠═══════════════════════════════════════════════════════════════╣ + ║ PCP: pminfo -f kernel.all.load ║ + ║ node_exporter: curl localhost:9100/metrics ║ + ║ below: below live ║ + ╚═══════════════════════════════════════════════════════════════╝ + ''}${lib.optionalString (!enableEvalTools && !enableGrafana && !enableBpf && !enableBcc) '' + ╔═══════════════════════════════════════════════════════════════╗ + ║ PCP Base MicroVM ║ + ╠═══════════════════════════════════════════════════════════════╣ + ║ Query metrics: pminfo -f kernel.all.load ║ + ║ pmlogger archives: /var/log/pcp/pmlogger ║ + ╚═══════════════════════════════════════════════════════════════╝ + ''} + WARNING: DEBUG MODE - Password authentication is enabled. + Do NOT use in production. + ''; + }) + ]; + }; +in +# Return the runner script that starts this MicroVM +vmConfig.config.microvm.declaredRunner diff --git a/nix/network-setup.nix b/nix/network-setup.nix new file mode 100644 index 0000000000..01155fd1bc --- /dev/null +++ b/nix/network-setup.nix @@ -0,0 +1,206 @@ +# nix/network-setup.nix +# +# TAP/bridge/vhost-net setup and teardown scripts. +# All network parameters come from constants.nix. +# +# Usage (setup and teardown require sudo): +# nix run .#pcp-check-host # Verify host environment +# sudo nix run .#pcp-network-setup # Create bridge + TAP + NAT +# sudo nix run .#pcp-network-teardown # Remove bridge + TAP + NAT +# +{ pkgs }: +let + constants = import ./constants.nix; + inherit (constants.network) bridge tap subnet gateway vmIp; +in +{ + # Host environment check + # Verify the host has necessary kernel modules and devices before setup. + check = pkgs.writeShellApplication { + name = "pcp-check-host"; + runtimeInputs = with pkgs; [ kmod coreutils ]; + text = '' + echo "=== PCP MicroVM Host Environment Check ===" + errors=0 + + # Check for TUN device + if [[ -c /dev/net/tun ]]; then + echo "OK /dev/net/tun exists" + else + echo "FAIL /dev/net/tun not found" + echo " Run: sudo modprobe tun" + errors=$((errors + 1)) + fi + + # Check for vhost-net module/device + if lsmod | grep -q vhost_net; then + echo "OK vhost_net module loaded" + elif [[ -c /dev/vhost-net ]]; then + echo "OK /dev/vhost-net exists" + else + echo "FAIL vhost_net not available" + echo " Run: sudo modprobe vhost_net" + errors=$((errors + 1)) + fi + + # Check for bridge module + if lsmod | grep -q bridge; then + echo "OK bridge module loaded" + else + echo "INFO bridge module not loaded (will be loaded during setup)" + fi + + # Check sudo access + if sudo -n true 2>/dev/null; then + echo "OK sudo access available" + else + echo "FAIL sudo access required for network setup" + errors=$((errors + 1)) + fi + + if [[ $errors -gt 0 ]]; then + echo "" + echo "Host environment check failed with $errors error(s)" + exit 1 + else + echo "" + echo "Host environment ready for TAP networking" + fi + ''; + }; + + # Network setup + # Create bridge, TAP device, and NAT rules for VM networking. + setup = pkgs.writeShellApplication { + name = "pcp-network-setup"; + runtimeInputs = with pkgs; [ iproute2 kmod nftables acl ]; + text = '' + echo "=== PCP MicroVM Network Setup ===" + + # Check we're running as root (via sudo) + if [[ $EUID -ne 0 ]]; then + echo "ERROR: Run with sudo: sudo nix run .#pcp-network-setup" + exit 1 + fi + + # Determine the actual user (not root when running via sudo) + REAL_USER="''${SUDO_USER:-$USER}" + if [[ "$REAL_USER" == "root" ]]; then + echo "ERROR: Run this script via 'sudo nix run .#pcp-network-setup' as a regular user" + echo " The script needs to know which user should have TAP device access" + exit 1 + fi + echo "Setting up network for user: $REAL_USER" + + # Load required kernel modules + modprobe tun + modprobe vhost_net + modprobe bridge + + # Create bridge + if ! ip link show ${bridge} &>/dev/null; then + echo "Creating bridge ${bridge}..." + ip link add ${bridge} type bridge + ip addr add ${gateway}/24 dev ${bridge} + ip link set ${bridge} up + else + echo "Bridge ${bridge} already exists" + fi + + # Create TAP device with multi_queue for vhost-net + # Recreate if it exists but with wrong owner + if ip link show ${tap} &>/dev/null; then + echo "Removing existing TAP device ${tap}..." + ip link del ${tap} + fi + echo "Creating TAP device ${tap} for user $REAL_USER..." + ip tuntap add dev ${tap} mode tap multi_queue user "$REAL_USER" + ip link set ${tap} master ${bridge} + ip link set ${tap} up + + # Enable vhost-net access (secure method: ACL, fallback: group) + # SECURITY: We avoid chmod 666 (world-writable) as it's a red flag + if [[ -c /dev/vhost-net ]]; then + if command -v setfacl &>/dev/null; then + # Preferred: ACL-based per-user access + setfacl -m "u:$REAL_USER:rw" /dev/vhost-net + echo "vhost-net enabled (ACL for $REAL_USER)" + elif getent group kvm &>/dev/null; then + # Fallback: group-based access (user must be in kvm group) + chgrp kvm /dev/vhost-net + chmod 660 /dev/vhost-net + echo "vhost-net enabled (kvm group)" + else + echo "WARNING: Cannot set vhost-net permissions securely" + echo " Option 1: Install acl package and rerun setup" + echo " Option 2: Add $REAL_USER to 'kvm' group and rerun setup" + echo " vhost acceleration may not work" + fi + fi + + # NAT for VM internet access + echo "Configuring NAT..." + nft add table inet pcp-nat 2>/dev/null || true + nft flush table inet pcp-nat 2>/dev/null || true + nft -f - </dev/null + + echo "" + echo "Network ready. MicroVM will be accessible at:" + echo " pmcd: ${vmIp}:${toString constants.ports.pmcd}" + echo " pmproxy: ${vmIp}:${toString constants.ports.pmproxy}" + echo " node_exporter: ${vmIp}:${toString constants.ports.nodeExporter}" + echo " SSH: ssh root@${vmIp}" + ''; + }; + + # Network teardown + # Remove bridge, TAP device, and NAT rules. + # Run with: sudo nix run .#pcp-network-teardown + teardown = pkgs.writeShellApplication { + name = "pcp-network-teardown"; + runtimeInputs = with pkgs; [ iproute2 nftables ]; + text = '' + echo "=== PCP MicroVM Network Teardown ===" + + # Check we're running as root + if [[ $EUID -ne 0 ]]; then + echo "ERROR: Run with sudo: sudo nix run .#pcp-network-teardown" + exit 1 + fi + + # Remove TAP device + if ip link show ${tap} &>/dev/null; then + ip link del ${tap} + echo "Removed TAP device ${tap}" + fi + + # Remove bridge + if ip link show ${bridge} &>/dev/null; then + ip link set ${bridge} down + ip link del ${bridge} + echo "Removed bridge ${bridge}" + fi + + # Remove NAT rules + nft delete table inet pcp-nat 2>/dev/null && \ + echo "Removed NAT rules" || true + + echo "Network teardown complete" + ''; + }; +} diff --git a/nix/nixos-module.nix b/nix/nixos-module.nix new file mode 100644 index 0000000000..88af2a63e5 --- /dev/null +++ b/nix/nixos-module.nix @@ -0,0 +1,471 @@ +# nix/nixos-module.nix +# +# NixOS module for Performance Co-Pilot. +# Mirrors a standard RHEL/Debian standalone PCP deployment. +# +# Design notes: +# - Uses systemd.packages to pull in upstream unit files from the PCP build +# - Applies hardening overlays rather than rewriting units from scratch +# - This preserves upstream ExecStartPre, EnvironmentFile, and other settings +# +# Cache optimization: +# - The `pcp` argument should be passed via specialArgs from the flake +# - This ensures all derivations use the same PCP package for cache reuse +# - See microvm.nix line 88: specialArgs = { inherit pcp; }; +# +{ config, pkgs, lib, pcp ? null, ... }: +with lib; +let + cfg = config.services.pcp; + constants = import ./constants.nix; + + pcpConf = "${cfg.package}/share/pcp/etc/pcp.conf"; + pcpDir = "${cfg.package}/share/pcp"; + + # ─── PMDA Configuration ───────────────────────────────────────────────── + # Generate pmcd.conf entries for additional PMDAs + pmdaEntries = concatStringsSep "\n" (mapAttrsToList (name: pmda: + if pmda.type == "dso" then + "${name}\t${toString pmda.domain}\tdso\t${pmda.init}\t${pmda.path}" + else + "${name}\t${toString pmda.domain}\t${pmda.type}\t${pmda.flags}\t${pmda.command}" + ) cfg.pmdas); + + # Generate pmcd.conf with base PMDAs + additional ones + # On Linux, the base is: linux (60) + pmcd (2), both as DSO + pmcdConf = pkgs.writeText "pmcd.conf" '' + # + # Performance Co-Pilot PMDA Configuration + # Generated by NixOS - do not edit directly + # + # Format: name domain_id type init_func/flags path/command + # + + # ─── Base Platform PMDAs ────────────────────────────────────────────── + # Linux kernel metrics (DSO for performance) + linux ${toString constants.pmdaDomains.linux} dso linux_init ${cfg.package}/var/lib/pcp/pmdas/linux/pmda_linux.so + + # PMCD internal metrics (DSO) + pmcd ${toString constants.pmdaDomains.pmcd} dso pmcd_init ${cfg.package}/var/lib/pcp/pmdas/pmcd/pmda_pmcd.so + + ${optionalString (pmdaEntries != "") '' + # ─── Additional PMDAs ───────────────────────────────────────────────── + ${pmdaEntries} + ''} + ''; + + # Environment variables for PCP services + # Override PCP_*_DIR variables to use mutable system paths instead of + # immutable Nix store paths (required for pmlogger, pmie, pmcd, etc.) + pcpEnv = { + PCP_CONF = pcpConf; + PCP_DIR = pcpDir; + # Point to generated pmcd.conf (overrides default location) + PCP_PMCDCONF_PATH = "/etc/pcp/pmcd/pmcd.conf"; + # Mutable runtime paths (from constants.nix - Nix store versions are read-only) + PCP_LOG_DIR = constants.paths.logDir; + PCP_VAR_DIR = constants.paths.varDir; + PCP_TMP_DIR = constants.paths.tmpDir; + PCP_RUN_DIR = constants.paths.runDir; + # pmlogger archive directory (must be writable) + PCP_ARCHIVE_DIR = constants.paths.archiveDir; + # Override hardcoded /bin/pwd path in shell scripts (see shell-portable-pwd.patch) + PWDCMND = "pwd"; + }; + + # ─── Shared systemd hardening ──────────────────────────────────────── + # Applied to network-facing PCP services (pmcd, pmproxy). + # Note: Some hardening is relaxed because pmcd needs to: + # - Access /proc for metric collection + # - Use various system calls for performance monitoring + baseHardening = { + PrivateTmp = true; + ProtectHome = true; + ProtectControlGroups = true; + ProtectKernelModules = true; + NoNewPrivileges = true; + RestrictRealtime = true; + LockPersonality = true; + }; + + # pmcd hardening: optionally add capabilities for PMDAs that need elevated access + # - relaxedPermissions: DAC and ptrace for pmdaproc + # - BCC PMDA: BPF syscall access for eBPF programs + hasBccPmda = cfg.pmdas ? bcc; + pmcdHardening = recursiveUpdate baseHardening ( + optionalAttrs (cfg.relaxedPermissions || hasBccPmda) ({ + CapabilityBoundingSet = [] + ++ optionals cfg.relaxedPermissions [ + "CAP_DAC_READ_SEARCH" # For reading /proc of other users' processes + "CAP_SYS_PTRACE" # For pmdaproc process inspection + ] + ++ optionals hasBccPmda [ + "CAP_SYS_ADMIN" # BPF syscall (kernel < 5.8) + "CAP_BPF" # BPF syscall (kernel >= 5.8) + "CAP_PERFMON" # Perf events (kernel >= 5.8) + "CAP_SYS_PTRACE" # Read /proc of other processes + "CAP_DAC_READ_SEARCH" # Access kernel symbols + ]; + } // optionalAttrs hasBccPmda { + AmbientCapabilities = [ + "CAP_SYS_ADMIN" + "CAP_BPF" + "CAP_PERFMON" + "CAP_SYS_PTRACE" + "CAP_DAC_READ_SEARCH" + ]; + # BCC PMDA needs to disable NoNewPrivileges to use ambient capabilities + NoNewPrivileges = false; + }) + ); + + pmproxyHardening = baseHardening; + + # IP filtering when allowedNetworks is specified + ipFiltering = optionalAttrs (cfg.allowedNetworks != []) { + IPAddressAllow = cfg.allowedNetworks; + IPAddressDeny = "any"; + }; + +in +{ + # ═══════════════════════════════════════════════════════════════════════ + # Options interface + # ═══════════════════════════════════════════════════════════════════════ + + options.services.pcp = { + enable = mkEnableOption "Performance Co-Pilot monitoring services"; + + package = mkOption { + type = types.package; + default = if pcp != null then pcp + else throw "PCP package not found. Pass 'pcp' via specialArgs or set services.pcp.package explicitly."; + defaultText = lib.literalExpression "pcp (from specialArgs)"; + description = "The PCP package to use. Passed via specialArgs from flake.nix for cache consistency."; + }; + + preset = mkOption { + type = types.enum [ "standalone" "minimal" "custom" ]; + default = "standalone"; + description = '' + Deployment preset: + - standalone: Full PCP suite (pmcd + pmlogger + pmie + pmproxy) + - minimal: Only pmcd (for remote collection targets) + - custom: Manually configure each component + ''; + }; + + pmlogger.enable = mkOption { + type = types.bool; + default = cfg.preset == "standalone"; + defaultText = literalExpression ''config.services.pcp.preset == "standalone"''; + description = "Enable pmlogger (archive logger)."; + }; + + pmie.enable = mkOption { + type = types.bool; + default = cfg.preset == "standalone"; + defaultText = literalExpression ''config.services.pcp.preset == "standalone"''; + description = "Enable pmie (inference engine)."; + }; + + pmproxy.enable = mkOption { + type = types.bool; + default = cfg.preset == "standalone"; + defaultText = literalExpression ''config.services.pcp.preset == "standalone"''; + description = "Enable pmproxy (REST API gateway)."; + }; + + allowedNetworks = mkOption { + type = types.listOf types.str; + default = []; + example = [ "10.177.0.0/24" "127.0.0.0/8" ]; + description = "IP ranges allowed to connect to pmcd/pmproxy."; + }; + + openFirewall = mkOption { + type = types.bool; + default = true; + description = "Automatically open firewall ports for enabled services."; + }; + + relaxedPermissions = mkOption { + type = types.bool; + default = false; + description = '' + Enable additional capabilities for pmcd to collect more metrics. + When true, grants CAP_DAC_READ_SEARCH and CAP_SYS_PTRACE for + pmdaproc and other introspective PMDAs. + When false (default), pmcd runs with minimal privileges. + ''; + }; + + pmdas = mkOption { + type = types.attrsOf (types.submodule { + options = { + domain = mkOption { + type = types.int; + description = "PMDA domain number (unique identifier, 0-511)"; + }; + type = mkOption { + type = types.enum [ "dso" "pipe" "socket" ]; + default = "pipe"; + description = "PMDA type: dso (shared library), pipe (spawned process), socket (daemon)"; + }; + flags = mkOption { + type = types.str; + default = "binary"; + description = "PMDA flags (e.g., 'binary', 'binary notready')"; + }; + command = mkOption { + type = types.str; + description = "For pipe/socket PMDAs: command to execute"; + }; + init = mkOption { + type = types.nullOr types.str; + default = null; + description = "For DSO PMDAs: init function name"; + }; + path = mkOption { + type = types.nullOr types.str; + default = null; + description = "For DSO PMDAs: path to shared library"; + }; + }; + }); + default = {}; + description = '' + Additional PMDAs to register with pmcd. + These are added to pmcd.conf alongside the base platform PMDAs. + ''; + example = literalExpression '' + { + bcc = { + domain = 149; + type = "pipe"; + flags = "binary notready"; + command = "pmpython /path/to/pmdabcc.python -d 149"; + }; + } + ''; + }; + }; + + # ═══════════════════════════════════════════════════════════════════════ + # Implementation + # ═══════════════════════════════════════════════════════════════════════ + + config = mkIf cfg.enable { + environment.systemPackages = [ cfg.package ]; + + # Note: We define our own service units rather than using upstream units + # because NixOS requires specific service configurations for proper systemd + # integration (RuntimeDirectory, StateDirectory, etc.) + + users.users.pcp = { + isSystemUser = true; + group = "pcp"; + description = "Performance Co-Pilot daemon user"; + }; + users.groups.pcp = {}; + + environment.variables = pcpEnv; + + systemd.tmpfiles.rules = [ + # Runtime directories + "d /var/lib/pcp 0755 pcp pcp -" + "d /var/lib/pcp/tmp 0775 pcp pcp -" + "d /var/log/pcp 0755 pcp pcp -" + "d /var/log/pcp/pmcd 0755 root root -" + "d /var/log/pcp/pmlogger 0775 pcp pcp -" + "d /var/log/pcp/pmie 0775 pcp pcp -" + "d /var/log/pcp/pmproxy 0775 pcp pcp -" + "d /run/pcp 0775 pcp pcp -" + + # Symlinks to read-only store paths (PMNS, PMDAs) + "L+ /var/lib/pcp/pmns - - - - ${cfg.package}/var/lib/pcp/pmns" + "L+ /var/lib/pcp/pmdas - - - - ${cfg.package}/var/lib/pcp/pmdas" + "L+ /var/lib/pcp/pmcd - - - - ${cfg.package}/var/lib/pcp/pmcd" + + # Config directory structure - mix of read-only symlinks and writable dirs + # Note: parent /var/lib/pcp is owned by pcp, so config must also be pcp-owned + # to avoid "unsafe path transition" errors from systemd-tmpfiles + "d /var/lib/pcp/config 0755 pcp pcp -" + "L+ /var/lib/pcp/config/derived - - - - ${cfg.package}/var/lib/pcp/config/derived" + "L+ /var/lib/pcp/config/pmafm - - - - ${cfg.package}/var/lib/pcp/config/pmafm" + "L+ /var/lib/pcp/config/pmieconf - - - - ${cfg.package}/var/lib/pcp/config/pmieconf" + "L+ /var/lib/pcp/config/pmlogconf - - - - ${cfg.package}/var/lib/pcp/config/pmlogconf" + "L+ /var/lib/pcp/config/pmlogredact - - - - ${cfg.package}/var/lib/pcp/config/pmlogredact" + "L+ /var/lib/pcp/config/pmlogrewrite - - - - ${cfg.package}/var/lib/pcp/config/pmlogrewrite" + # Writable directories for runtime-generated configs (pmlogconf creates config.default here) + "d /var/lib/pcp/config/pmda 0775 pcp pcp -" + "d /var/lib/pcp/config/pmie 0775 pcp pcp -" + "d /var/lib/pcp/config/pmlogger 0775 pcp pcp -" + # Static pmlogger configs as symlinks + "L+ /var/lib/pcp/config/pmlogger/config.pmstat - - - - ${cfg.package}/var/lib/pcp/config/pmlogger/config.pmstat" + "L+ /var/lib/pcp/config/pmlogger/options.pmstat - - - - ${cfg.package}/var/lib/pcp/config/pmlogger/options.pmstat" + + # FHS-compatible symlinks for users familiar with Fedora/RHEL + # These make standard paths work: cat /etc/pcp.conf, source /etc/pcp.env + "L+ /etc/pcp.conf - - - - ${cfg.package}/share/pcp/etc/pcp.conf" + "L+ /etc/pcp.env - - - - ${cfg.package}/share/pcp/etc/pcp.env" + + # Create /etc/pcp directory structure (not a symlink, so we can have writable pmcd.conf) + "d /etc/pcp 0755 root root -" + "d /etc/pcp/pmcd 0755 root root -" + # Copy generated pmcd.conf (needs to be a real file, not symlink, for pmcd to modify) + "C /etc/pcp/pmcd/pmcd.conf 0644 root root - ${pmcdConf}" + # Symlink other config dirs from package + "L+ /etc/pcp/labels - - - - ${cfg.package}/share/pcp/etc/pcp/labels" + "L+ /etc/pcp/pmie - - - - ${cfg.package}/share/pcp/etc/pcp/pmie" + "L+ /etc/pcp/pmlogger - - - - ${cfg.package}/share/pcp/etc/pcp/pmlogger" + "L+ /etc/pcp/pmproxy - - - - ${cfg.package}/share/pcp/etc/pcp/pmproxy" + "L+ /etc/pcp/derived - - - - ${cfg.package}/share/pcp/etc/pcp/derived" + ]; + + # ─── pmcd service overlay ────────────────────────────────────────── + systemd.services.pmcd = { + description = "Performance Co-Pilot Collector Daemon"; + wantedBy = [ "multi-user.target" ]; + after = [ "network.target" ]; + wants = [ "network.target" ]; + environment = pcpEnv; + serviceConfig = { + # When BCC PMDA is enabled, use Type=simple because pmcd doesn't write + # its PID file until all PMDAs are initialized, but BCC compilation + # takes 2-3 minutes. With Type=forking, systemd expects the PID file + # immediately after the fork completes, causing restart loops. + Type = if hasBccPmda then "simple" else "forking"; + # When BCC PMDA is enabled, increase PMDA negotiation timeout (-q) + # since eBPF compilation takes several seconds even with BTF enabled. + # -q = initial negotiation timeout (default 3s) + # -t = response timeout (default 5s) + # -f = run in foreground (required for Type=simple) + ExecStart = "${cfg.package}/libexec/pcp/bin/pmcd" + + optionalString hasBccPmda " -f -q 60 -t 30"; + Restart = "on-failure"; + RuntimeDirectory = "pcp"; + StateDirectory = "pcp"; + # Note: Don't use LogsDirectory here - it creates dirs as root:root + # since pmcd has no User=. tmpfiles.rules creates /var/log/pcp/* with pcp:pcp. + } // optionalAttrs (!hasBccPmda) { + # Only use PIDFile for non-BCC variants (where pmcd forks quickly) + PIDFile = "/run/pcp/pmcd.pid"; + } // optionalAttrs hasBccPmda { + # BCC PMDA compiles eBPF programs at startup which can take 2-3 minutes. + TimeoutStartSec = "300"; # 5 minutes for BCC eBPF compilation + } // pmcdHardening // ipFiltering; + }; + + # ─── pmlogger service overlay ────────────────────────────────────── + # pmlogger_check is a shell script that: + # 1. Reads the control file to find pmlogger specifications + # 2. Generates config.default using pmlogconf if needed + # 3. Starts pmlogger processes as background daemons + # 4. Exits after starting all pmloggers + # + # We use Type=oneshot because pmlogger_check starts pmloggers and exits. + # RemainAfterExit=yes makes systemd consider the service "active" after exit. + # The actual pmlogger processes run independently as daemons. + systemd.services.pmlogger = mkIf cfg.pmlogger.enable { + description = "Performance Co-Pilot Archive Logger"; + wantedBy = [ "multi-user.target" ]; + after = [ "pmcd.service" ]; + bindsTo = [ "pmcd.service" ]; + environment = pcpEnv; + path = with pkgs; [ coreutils gawk gnused gnugrep hostname findutils util-linux procps cfg.package ]; + serviceConfig = { + Type = "oneshot"; + RemainAfterExit = true; + # Start pmloggers based on control file + # -V: verbose output for debugging in journal + ExecStart = "${cfg.package}/libexec/pcp/bin/pmlogger_check -V"; + # Stop all pmloggers managed by this control file + # -s: stop mode (sets STOP_PMLOGGER=true) + ExecStop = "${cfg.package}/libexec/pcp/bin/pmlogger_check -s -V"; + User = "pcp"; + Group = "pcp"; + StateDirectory = "pcp"; + LogsDirectory = "pcp/pmlogger"; + # Give pmlogger_check time to generate config.default and start pmloggers + TimeoutStartSec = "120"; + }; + }; + + # ─── pmie service overlay ────────────────────────────────────────── + # pmie_check is a shell script that needs awk, sed, grep, etc. + systemd.services.pmie = mkIf cfg.pmie.enable { + description = "Performance Co-Pilot Inference Engine"; + wantedBy = [ "multi-user.target" ]; + after = [ "pmcd.service" ]; + bindsTo = [ "pmcd.service" ]; + environment = pcpEnv; + path = with pkgs; [ coreutils gawk gnused gnugrep hostname findutils util-linux procps cfg.package ]; + serviceConfig = { + Type = "forking"; + ExecStart = "${cfg.package}/libexec/pcp/bin/pmie_check"; + Restart = "on-failure"; + User = "pcp"; + Group = "pcp"; + StateDirectory = "pcp"; + LogsDirectory = "pcp/pmie"; + }; + }; + + # ─── pmproxy service overlay ─────────────────────────────────────── + systemd.services.pmproxy = mkIf cfg.pmproxy.enable { + description = "Performance Co-Pilot REST API Proxy"; + wantedBy = [ "multi-user.target" ]; + after = [ "pmcd.service" ]; + bindsTo = [ "pmcd.service" ]; + environment = pcpEnv; + serviceConfig = { + Type = "forking"; + ExecStart = "${cfg.package}/libexec/pcp/bin/pmproxy"; + PIDFile = "/run/pcp/pmproxy.pid"; + Restart = "on-failure"; + } // pmproxyHardening // ipFiltering; + }; + + # ─── Maintenance timers ──────────────────────────────────────────── + systemd.timers.pmlogger-daily = mkIf cfg.pmlogger.enable { + wantedBy = [ "timers.target" ]; + timerConfig = { + OnCalendar = "*-*-* 00:10:00"; + Persistent = true; + }; + }; + systemd.services.pmlogger-daily = mkIf cfg.pmlogger.enable { + description = "PCP Logger Daily Maintenance"; + environment = pcpEnv; + path = with pkgs; [ coreutils gawk gnused gnugrep hostname findutils util-linux procps cfg.package ]; + serviceConfig = { + Type = "oneshot"; + ExecStart = "${cfg.package}/libexec/pcp/bin/pmlogger_daily"; + }; + }; + + systemd.timers.pmie-daily = mkIf cfg.pmie.enable { + wantedBy = [ "timers.target" ]; + timerConfig = { + OnCalendar = "*-*-* 00:08:00"; + Persistent = true; + }; + }; + systemd.services.pmie-daily = mkIf cfg.pmie.enable { + description = "PCP Inference Engine Daily Maintenance"; + environment = pcpEnv; + path = with pkgs; [ coreutils gawk gnused gnugrep hostname findutils util-linux procps cfg.package ]; + serviceConfig = { + Type = "oneshot"; + ExecStart = "${cfg.package}/libexec/pcp/bin/pmie_daily"; + }; + }; + + # Dynamic firewall based on enabled services + networking.firewall.allowedTCPPorts = mkIf cfg.openFirewall ( + [ constants.ports.pmcd ] ++ + optional cfg.pmproxy.enable constants.ports.pmproxy + ); + }; +} diff --git a/nix/package.nix b/nix/package.nix new file mode 100644 index 0000000000..ffc05bbf02 --- /dev/null +++ b/nix/package.nix @@ -0,0 +1,487 @@ +# nix/package.nix +# +# PCP package derivation. +# Version is automatically derived from VERSION.pcp (the configure script's +# source of truth), eliminating manual maintenance when PCP is released. +# +# CACHE OPTIMIZATION: The `src` parameter should be `self` from flake.nix. +# This ensures stable derivation hashes because: +# 1. `self` in a flake references the git-tracked content +# 2. For clean trees, the hash is based on the git commit +# 3. For dirty trees, the hash is based on the filtered content +# 4. Using `./..` directly would cause hash changes on every file modification +# +# The source filtering (cleanSourceWith) further improves cache hits by +# excluding build artifacts, editor files, and other non-essential content. +# +{ pkgs, src }: +let + lib = pkgs.lib; + + # ─── Version Parsing ─────────────────────────────────────────────────── + # Parse version from VERSION.pcp with explicit error handling. + # VERSION.pcp format: + # PACKAGE_MAJOR=7 + # PACKAGE_MINOR=1 + # PACKAGE_REVISION=1 + # PACKAGE_BUILD=1 + # + versionFile = builtins.readFile (src + "/VERSION.pcp"); + + parseField = field: + let + # Match pattern: field=digits (handles multi-line file) + # The regex needs to work across the whole file content + lines = lib.splitString "\n" versionFile; + matchingLines = builtins.filter (line: + builtins.match "^${field}=([0-9]+).*" line != null + ) lines; + matchedLine = if matchingLines == [] then null else builtins.head matchingLines; + match = if matchedLine == null then null + else builtins.match "^${field}=([0-9]+).*" matchedLine; + in + if match == null then + throw '' + Failed to parse ${field} from VERSION.pcp. + Expected format: ${field}= + File contents: + ${versionFile} + '' + else + builtins.head match; + + major = parseField "PACKAGE_MAJOR"; + minor = parseField "PACKAGE_MINOR"; + revision = parseField "PACKAGE_REVISION"; + + # Validate parsed version components + version = let + v = "${major}.${minor}.${revision}"; + in + assert lib.assertMsg (major != "") "PACKAGE_MAJOR is empty"; + assert lib.assertMsg (minor != "") "PACKAGE_MINOR is empty"; + assert lib.assertMsg (revision != "") "PACKAGE_REVISION is empty"; + v; + + # ─── Feature Flags ───────────────────────────────────────────────────── + # All enabled features are open source. Proprietary integrations are excluded + # via configureFlags (mongodb, mysql, nutcracker) or postInstall (mssql). + # + withSystemd = pkgs.stdenv.isLinux; # systemd service management + withPfm = pkgs.stdenv.isLinux; # hardware performance counters (libpfm) + withBpf = pkgs.stdenv.isLinux; # eBPF tracing (bcc, bpf, bpftrace PMDAs) + withSnmp = true; # SNMP network monitoring + withPythonHttp = true; # Python HTTP client (requests) + withPerlHttp = true; # Perl HTTP client (LWP) + + # ─── Source Filtering ────────────────────────────────────────────────── + # Use cleanSourceWith to exclude build outputs and non-essential files + # from the Nix store. This prevents accidental inclusion of result symlinks, + # test-results/, etc. which would cause unnecessary rebuilds. + # + # IMPORTANT: Only exclude top-level result* symlinks, not files like result.c + # or result.o which are legitimate source/build files. + # + # NOTE: We use the `src` parameter (passed from flake.nix as `self`) to + # ensure stable derivation hashes. This enables proper cache reuse + # between builds of the main package, microvms, and containers. + # + srcPath = toString src; + cleanedSrc = lib.cleanSourceWith { + inherit src; + filter = path: type: + let + baseName = baseNameOf path; + parentDir = dirOf path; + isTopLevel = parentDir == srcPath; + # Patterns to exclude + isExcluded = + # Build output symlinks (only at top level, and only exact matches or result-*) + (isTopLevel && (baseName == "result" || lib.hasPrefix "result-" baseName)) || + baseName == "test-results" || + # Editor/IDE artifacts + baseName == ".vscode" || + baseName == ".idea" || + lib.hasSuffix ".swp" baseName || + lib.hasSuffix ".swo" baseName || + # Nix build artifacts + baseName == ".direnv" || + # Baseline metrics (Phase 0) + baseName == "baseline-metrics.txt" || + baseName == "baseline-check.txt" || + baseName == "phase1-metrics.txt"; + in + !isExcluded; + }; + +in +pkgs.stdenv.mkDerivation rec { + pname = "pcp"; + inherit version; + src = cleanedSrc; + + outputs = [ + "out" + "man" + "doc" + ]; + + nativeBuildInputs = with pkgs; [ + autoconf + automake + pkg-config + bison + flex + which + perl + python3 + python3.pkgs.setuptools + makeWrapper + binutils + ] ++ lib.optionals withBpf [ + llvmPackages.clang + llvmPackages.llvm + ]; + + buildInputs = with pkgs; [ + zlib + ncurses + readline + openssl + libuv + cyrus_sasl + inih + xz + python3 + perl + rrdtool + ] ++ lib.optionals pkgs.stdenv.isLinux [ + avahi + lvm2 + ] ++ lib.optionals withSystemd [ + systemd + ] ++ lib.optionals withPfm [ + libpfm + ] ++ lib.optionals withBpf [ + libbpf + bcc + elfutils + ] ++ lib.optionals withSnmp [ + net-snmp + ] ++ lib.optionals withPythonHttp [ + python3.pkgs.requests + ] ++ lib.optionals withPerlHttp [ + perlPackages.JSON + perlPackages.LWPUserAgent + ]; + + configureFlags = lib.concatLists [ + + [ + "--prefix=${placeholder "out"}" + "--sysconfdir=${placeholder "out"}/etc" + "--localstatedir=${placeholder "out"}/var" + "--with-rcdir=${placeholder "out"}/etc/init.d" + "--with-tmpdir=/tmp" + "--with-logdir=${placeholder "out"}/var/log/pcp" + "--with-rundir=/run/pcp" + ] + + [ + "--with-user=pcp" + "--with-group=pcp" + ] + + [ + "--with-make=make" + "--with-tar=tar" + "--with-python3=${lib.getExe pkgs.python3}" + ] + + [ + "--with-perl=yes" + "--with-threads=yes" + ] + + [ + "--with-secure-sockets=yes" + "--with-transparent-decompression=yes" + ] + + (if pkgs.stdenv.isLinux then [ "--with-discovery=yes" ] else [ "--with-discovery=no" ]) + + [ + "--with-dstat-symlink=no" + "--with-pmdamongodb=no" + "--with-pmdamysql=no" + "--with-pmdanutcracker=no" + "--with-qt=no" + "--with-infiniband=no" + "--with-selinux=no" + ] + + (if withSystemd then [ "--with-systemd=yes" ] else [ "--with-systemd=no" ]) + (if withPfm then [ "--with-perfevent=yes" ] else [ "--with-perfevent=no" ]) + ( + if withBpf then + [ + "--with-pmdabcc=yes" + "--with-pmdabpf=yes" + "--with-pmdabpftrace=yes" + ] + else + [ + "--with-pmdabcc=no" + "--with-pmdabpf=no" + "--with-pmdabpftrace=no" + ] + ) + + (if pkgs.stdenv.isLinux then [ "--with-devmapper=yes" ] else [ "--with-devmapper=no" ]) + + (if withSnmp then [ "--with-pmdasnmp=yes" ] else [ "--with-pmdasnmp=no" ]) + ]; + + + patches = [ + ./patches/gnumakefile-nix-fixes.patch + ./patches/python-libpcp-nix.patch + # Fix Python wrapper bug: pmReconnectContext() after pmRegisterDerived() causes + # IPC table corruption when registering multiple derived metrics. The reconnect + # calls __dmclosecontext()/__dmopencontext() which corrupts the IPC version table, + # causing subsequent pmGetPMNSLocation() to fail with PM_ERR_NOPMNS. + # Symptoms: "pcp dstat" fails after first few metrics with "PMNS not accessible" + ./patches/python-pmapi-no-reconnect.patch + # Use portable pwd fallback instead of hardcoded /bin/pwd which doesn't exist on NixOS + ./patches/shell-portable-pwd.patch + ]; + + postPatch = '' + # Fix shebangs (can't be done as static patch - needs Nix store paths) + patchShebangs src build configure scripts man + + # Fix build scripts that use /var/tmp (not available in Nix sandbox) + # Substitute with TMPDIR which Nix sets up correctly + for f in src/pmdas/bind2/mk.rewrite \ + src/pmdas/jbd2/mk.rewrite \ + src/pmdas/linux/mk.rewrite \ + src/pmdas/linux_proc/mk.rewrite; do + if [ -f "$f" ]; then + substituteInPlace "$f" --replace '/var/tmp' "$TMPDIR" + fi + done + ''; + + preConfigure = '' + # Ensure AR is in PATH and exported for configure script + export AR="${pkgs.stdenv.cc.bintools.bintools}/bin/ar" + ''; + + hardeningDisable = lib.optionals withBpf [ "zerocallusedregs" ]; + + BPF_CFLAGS = lib.optionalString withBpf "-fno-stack-protector -Wno-error=unused-command-line-argument"; + CLANG = lib.optionalString withBpf (lib.getExe pkgs.llvmPackages.clang); + + # Set AR explicitly so configure can find it (configure checks $AR first) + AR = "${pkgs.stdenv.cc.bintools.bintools}/bin/ar"; + + SYSTEMD_SYSTEMUNITDIR = lib.optionalString withSystemd "${placeholder "out"}/lib/systemd/system"; + SYSTEMD_TMPFILESDIR = lib.optionalString withSystemd "${placeholder "out"}/lib/tmpfiles.d"; + SYSTEMD_SYSUSERSDIR = lib.optionalString withSystemd "${placeholder "out"}/lib/sysusers.d"; + + postInstall = '' + # Build the combined PMNS root file + # The individual root_* files exist but pmcd needs a combined 'root' file + # Use pmnsmerge to combine all the root_* files into one + ( + cd $out/var/lib/pcp/pmns + export PCP_DIR=$out + export PCP_CONF=$out/etc/pcp.conf + . $out/etc/pcp.env + + # Merge all the root_* files into the combined root file + # Order matters: root_root first (base), then others + $out/libexec/pcp/bin/pmnsmerge -a \ + $out/libexec/pcp/pmns/root_root \ + $out/libexec/pcp/pmns/root_pmcd \ + $out/libexec/pcp/pmns/root_linux \ + $out/libexec/pcp/pmns/root_proc \ + $out/libexec/pcp/pmns/root_xfs \ + $out/libexec/pcp/pmns/root_jbd2 \ + $out/libexec/pcp/pmns/root_kvm \ + $out/libexec/pcp/pmns/root_mmv \ + $out/libexec/pcp/pmns/root_bpf \ + $out/libexec/pcp/pmns/root_pmproxy \ + root + ) + + # Remove runtime state directories + rm -rf $out/var/{run,log} $out/var/lib/pcp/tmp || true + + # Remove derived metric configs for proprietary software + # - mssql.conf: Microsoft SQL Server (proprietary), also has novalue() syntax errors + rm -f $out/var/lib/pcp/config/derived/mssql.conf || true + + # Move vendor config to share + if [ -d "$out/etc" ]; then + mkdir -p $out/share/pcp/etc + mv $out/etc/* $out/share/pcp/etc/ + rmdir $out/etc || true + + # Fix paths in pcp.conf to point to new locations + substituteInPlace $out/share/pcp/etc/pcp.conf \ + --replace-quiet "$out/etc/pcp" "$out/share/pcp/etc/pcp" \ + --replace-quiet "$out/etc/sysconfig" "$out/share/pcp/etc/sysconfig" \ + --replace-quiet "PCP_ETC_DIR=$out/etc" "PCP_ETC_DIR=$out/share/pcp/etc" + + # Fix symlinks that pointed to /etc/pcp/... + find $out/var/lib/pcp -type l | while read link; do + target=$(readlink "$link") + if [[ "$target" == *"/etc/pcp/"* ]]; then + suffix="''${target#*/etc/pcp/}" + rm "$link" + ln -sf "$out/share/pcp/etc/pcp/$suffix" "$link" + fi + done + fi + + # Fix broken symlinks with double /nix/store prefix + # These occur when the build system prepends a path to an already-absolute path + for broken_link in "$out/share/pcp/etc/pcp/pm"{search/pmsearch,series/pmseries}.conf; do + [[ -L "$broken_link" ]] && rm "$broken_link" && \ + ln -sf "$out/share/pcp/etc/pcp/pmproxy/pmproxy.conf" "$broken_link" + done + + # Fix pmcd/rc.local symlink (points to libexec/pcp/services/local) + if [[ -L "$out/share/pcp/etc/pcp/pmcd/rc.local" ]]; then + rm "$out/share/pcp/etc/pcp/pmcd/rc.local" + ln -sf "$out/libexec/pcp/services/local" "$out/share/pcp/etc/pcp/pmcd/rc.local" + fi + + # Fix proc.conf novalue() syntax errors (upstream bug): + # 1. Remove spaces after commas in novalue() parameters + # 2. Remove invalid 'indom=' parameter (novalue() only accepts type, semantics, units) + # Must run after /etc is moved to share/pcp/etc + for procconf in $out/share/pcp/etc/pcp/derived/proc.conf $out/var/lib/pcp/config/derived/proc.conf; do + if [ -f "$procconf" ]; then + sed -i -e 's/novalue(type=u64, semantics=counter, units=Kbyte, indom=157\.2)/novalue(type=u64,semantics=counter,units=Kbyte)/g' \ + -e 's/novalue(type=u64,semantics=counter,units=Kbyte,indom=157\.2)/novalue(type=u64,semantics=counter,units=Kbyte)/g' "$procconf" + fi + done + + # Create .py symlinks for Python PMDA utility files + # PCP's Python PMDA framework expects pmdautil.py but ships pmdautil.python + # On NixOS, the /nix/store is read-only, so we create symlinks at build time + # instead of letting the runtime code fail trying to create them + for pmda_dir in $out/libexec/pcp/pmdas/*/; do + for pyfile in "$pmda_dir"*.python; do + if [ -f "$pyfile" ]; then + base=$(basename "$pyfile" .python) + pylink="$pmda_dir$base.py" + if [ ! -e "$pylink" ]; then + ln -s "$(basename "$pyfile")" "$pylink" + fi + fi + done + done + + # Also create symlinks for all Python files recursively in libexec/pcp/pmdas + # This covers BCC modules in modules/ subdirectory which pmdabcc.python + # tries to load with .py extension. On NixOS /nix/store is read-only, + # so we create all .py symlinks at build time. + find $out/libexec/pcp/pmdas -name "*.python" -type f 2>/dev/null | while read pyfile; do + base=$(basename "$pyfile" .python) + pylink="$(dirname "$pyfile")/$base.py" + if [ ! -e "$pylink" ]; then + ln -s "$(basename "$pyfile")" "$pylink" + fi + done + + # Also create .py symlinks for the symlinks in var/lib/pcp/pmdas/ + # pmdabcc.python walks PCP_PMDASADM_DIR (var/lib/pcp/pmdas/bcc) and looks + # for .py files, but that directory contains symlinks to libexec. + # We create .py -> .python symlinks here so pmdabcc can find them. + find $out/var/lib/pcp/pmdas -name "*.python" -type l 2>/dev/null | while read pyfile; do + base=$(basename "$pyfile" .python) + pylink="$(dirname "$pyfile")/$base.py" + if [ ! -e "$pylink" ]; then + ln -s "$(basename "$pyfile")" "$pylink" + fi + done + + # Move man pages to $man output + if [ -d "$out/share/man" ]; then + mkdir -p $man/share + mv $out/share/man $man/share/ + fi + + # Move documentation to $doc output + for docdir in $out/share/doc/pcp*; do + if [ -d "$docdir" ]; then + mkdir -p $doc/share/doc + mv "$docdir" $doc/share/doc/ + fi + done + ''; + + # Wrap Python scripts with correct environment for NixOS + # + # Each environment variable solves a specific NixOS compatibility issue: + # + # PCP_DIR: libpcp's config.c searches for pcp.conf in this order: + # 1. $PCP_CONF (if set) + # 2. $PCP_DIR/etc/pcp.conf (if $PCP_DIR set) + # 3. /etc/pcp.conf (fallback) + # On NixOS, pcp.conf lives in the store, so we must set PCP_DIR. + # This enables pmGetConfig() to return correct paths like PCP_SYSCONF_DIR, + # which pcp-dstat needs to find its derived metric configs at $PCP_SYSCONF_DIR/dstat. + # + # PYTHONPATH: Standard path for PCP's Python modules in site-packages. + # + # LD_LIBRARY_PATH: Python's ctypes.util.find_library() uses ldconfig to + # locate shared libraries. On NixOS, ldconfig doesn't know about /nix/store + # paths, so find_library("pcp") returns None. Our python-libpcp-nix.patch + # adds a fallback that searches LD_LIBRARY_PATH when find_library fails. + # Without this, CDLL(None) loads the Python executable itself, causing + # "undefined symbol: pmGetChildren" errors. + # + postFixup = '' + # Wrap Python scripts in libexec/pcp/bin + for script in $out/libexec/pcp/bin/pcp-*; do + if head -1 "$script" 2>/dev/null | grep -q python; then + wrapProgram "$script" \ + --set PCP_DIR "$out/share/pcp" \ + --prefix PYTHONPATH : "$out/lib/${pkgs.python3.libPrefix}/site-packages" \ + --prefix LD_LIBRARY_PATH : "$out/lib" + fi + done + + # Wrap pmpython binary - it executes Python but doesn't set PYTHONPATH + # This is needed for pmrep, pmiostat, and other tools that use #!/usr/bin/env pmpython + if [ -f "$out/bin/pmpython" ]; then + wrapProgram "$out/bin/pmpython" \ + --set PCP_DIR "$out/share/pcp" \ + --prefix PYTHONPATH : "$out/lib/${pkgs.python3.libPrefix}/site-packages" \ + --prefix LD_LIBRARY_PATH : "$out/lib" + fi + + # Also wrap the main pcp command (Python-based dispatcher) + if [ -f "$out/bin/pcp" ]; then + wrapProgram "$out/bin/pcp" \ + --set PCP_DIR "$out/share/pcp" \ + --prefix PYTHONPATH : "$out/lib/${pkgs.python3.libPrefix}/site-packages" \ + --prefix LD_LIBRARY_PATH : "$out/lib" + fi + ''; + + doCheck = false; + enableParallelBuilding = true; + + meta = with lib; { + description = "Performance Co-Pilot - system performance monitoring toolkit"; + homepage = "https://pcp.io"; + license = licenses.gpl2Plus; + platforms = platforms.linux ++ platforms.darwin; + mainProgram = "pminfo"; + }; +} diff --git a/nix/patches/perl-install-path-fix.patch b/nix/patches/perl-install-path-fix.patch new file mode 100644 index 0000000000..ef4f15a8b7 --- /dev/null +++ b/nix/patches/perl-install-path-fix.patch @@ -0,0 +1,56 @@ +From: Nix Packaging +Subject: [PATCH] build: fix Perl install path for absolute prefixes + +When PERL_INSTALL_BASE is an absolute path (e.g., /nix/store/xxx) and +DIST_ROOT is empty, the path concatenation "$$DIST_ROOT/$(PERL_INSTALL_BASE)" +creates "//nix/store/xxx" (double slash), causing find commands to fail. + +Since PERL_INSTALL_BASE is derived from --prefix and always starts with /, +remove the redundant / in the concatenation. This affects both the Gentoo +and default (RPM-based) packaging code paths in PERL_GET_FILELIST. + +--- + src/include/builddefs.in | 14 +++++++------- + 1 file changed, 7 insertions(+), 7 deletions(-) + +--- a/src/include/builddefs.in ++++ b/src/include/builddefs.in +@@ -642,9 +642,9 @@ PERL_GET_FILELIST = \ + if [ -s $(2) ]; then rm -f pack.list; \ + else echo "Arrgh ... no files to include in package via $(2), see pack.list"; exit 1; \ + fi; \ +- find "$$DIST_ROOT/$(PERL_INSTALL_BASE)" -name perllocal.pod -exec rm -f '{}' ';' ; \ +- find "$$DIST_ROOT/$(PERL_INSTALL_BASE)" -name \*.bs -exec rm -f '{}' ';' ; \ +- find "$$DIST_ROOT/$(PERL_INSTALL_BASE)" -name $(3).so -exec chmod 755 '{}' ';' ++ find "$$DIST_ROOT$(PERL_INSTALL_BASE)" -name perllocal.pod -exec rm -f '{}' ';' ; \ ++ find "$$DIST_ROOT$(PERL_INSTALL_BASE)" -name \*.bs -exec rm -f '{}' ';' ; \ ++ find "$$DIST_ROOT$(PERL_INSTALL_BASE)" -name $(3).so -exec chmod 755 '{}' ';' + else + ifeq "$(PACKAGE_DISTRIBUTION)" "freebsd" + # FreeBSD Perl packaging is a broken mystery at this point in time +@@ -661,14 +661,14 @@ endif + PERL_GET_FILELIST = \ + $(PERLMAKE) -f Makefile $(1) DESTDIR="$$DIST_ROOT"; \ +- find "$$DIST_ROOT/$(PERL_INSTALL_BASE)" -name .packlist -exec mv '{}' $(2) ';' ; \ ++ find "$$DIST_ROOT$(PERL_INSTALL_BASE)" -name .packlist -exec mv '{}' $(2) ';' ; \ + if [ -s $(2) ] ; then \ + $(MANPAGE_SUFFIX) \ + if [ "$(HAVE_MANPAGES)" = "false" ] ; then \ + sed -e '/.*man[1-9].*/d' -e '/.*3pm.*/d' $(2) >$(2).tmp; \ + mv $(2).tmp $(2); \ +- find "$$DIST_ROOT/$(PERL_INSTALL_BASE)" -name "*3pm*" -exec rm -rf '{}' ';' ; \ ++ find "$$DIST_ROOT$(PERL_INSTALL_BASE)" -name "*3pm*" -exec rm -rf '{}' ';' ; \ + fi ;\ + sed -n -e '/\.bs$$/d' -e 's/\.[0-9]pm$$/&'"$$_sfx/" -e "s@^$$DIST_ROOT@@p" $(2) >$(2).tmp; \ + mv $(2).tmp $(2); \ + else echo "Arrgh ... no files to include in package via $(2)"; exit 1; \ + fi; \ +- find "$$DIST_ROOT/$(PERL_INSTALL_BASE)" -name perllocal.pod -exec rm -f '{}' ';' ; \ +- find "$$DIST_ROOT/$(PERL_INSTALL_BASE)" -name \*.bs -exec rm -f '{}' ';' ; \ +- find "$$DIST_ROOT/$(PERL_INSTALL_BASE)" -name $(3).so -exec chmod 755 '{}' ';' ++ find "$$DIST_ROOT$(PERL_INSTALL_BASE)" -name perllocal.pod -exec rm -f '{}' ';' ; \ ++ find "$$DIST_ROOT$(PERL_INSTALL_BASE)" -name \*.bs -exec rm -f '{}' ';' ; \ ++ find "$$DIST_ROOT$(PERL_INSTALL_BASE)" -name $(3).so -exec chmod 755 '{}' ';' + endif + endif + endif diff --git a/nix/patches/python-libpcp-nix.patch b/nix/patches/python-libpcp-nix.patch new file mode 100644 index 0000000000..130037d476 --- /dev/null +++ b/nix/patches/python-libpcp-nix.patch @@ -0,0 +1,118 @@ +diff --git a/src/python/pcp/mmv.py b/src/python/pcp/mmv.py +index 57bb7e2292..f2a4b6a73b 100644 +--- a/src/python/pcp/mmv.py ++++ b/src/python/pcp/mmv.py +@@ -78,7 +78,22 @@ from ctypes import Structure, POINTER + from ctypes import c_int, c_uint, c_long, c_char, c_char_p, c_double, c_void_p + + # Performance Co-Pilot MMV library (C) +-LIBPCP_MMV = ctypes.CDLL(ctypes.util.find_library("pcp_mmv")) ++def _find_libpcp_mmv(): ++ """Find libpcp_mmv.so, with fallback for NixOS.""" ++ lib = ctypes.util.find_library("pcp_mmv") ++ if lib: ++ return ctypes.CDLL(lib) ++ import os ++ ld_path = os.environ.get("LD_LIBRARY_PATH", "") ++ for path in ld_path.split(":"): ++ if path: ++ for libname in ("libpcp_mmv.so", "libpcp_mmv.so.1"): ++ candidate = os.path.join(path, libname) ++ if os.path.exists(candidate): ++ return ctypes.CDLL(candidate) ++ return ctypes.CDLL("libpcp_mmv.so") ++ ++LIBPCP_MMV = _find_libpcp_mmv() + + ############################################################################## + # +diff --git a/src/python/pcp/pmapi.py b/src/python/pcp/pmapi.py +index 0d98a12121..9a13d8fd04 100644 +--- a/src/python/pcp/pmapi.py ++++ b/src/python/pcp/pmapi.py +@@ -116,7 +116,28 @@ from ctypes.util import find_library + # + # dynamic library loads + # +-LIBPCP = CDLL(find_library("pcp")) ++def _find_libpcp(): ++ """Find libpcp.so, with fallback for NixOS where find_library uses ldconfig.""" ++ # Try standard find_library first (works on FHS systems) ++ lib = find_library("pcp") ++ if lib: ++ return CDLL(lib) ++ ++ # Fallback: search LD_LIBRARY_PATH (for NixOS and similar) ++ # Prefer libpcp.so (symlink to current version) over versioned names ++ ld_path = os.environ.get("LD_LIBRARY_PATH", "") ++ for path in ld_path.split(":"): ++ if not path: ++ continue ++ for libname in ("libpcp.so", "libpcp.so.4", "libpcp.so.3"): ++ candidate = os.path.join(path, libname) ++ if os.path.exists(candidate): ++ return CDLL(candidate) ++ ++ # Last resort: try loading by name (might work if rpath is set) ++ return CDLL("libpcp.so") ++ ++LIBPCP = _find_libpcp() + libc_name = "c" if sys.platform != "win32" else "msvcrt" + LIBC = CDLL(find_library(libc_name)) + +diff --git a/src/python/pcp/pmda.py b/src/python/pcp/pmda.py +index 70b5a23a83..43748275eb 100644 +--- a/src/python/pcp/pmda.py ++++ b/src/python/pcp/pmda.py +@@ -33,7 +33,22 @@ from ctypes import CDLL, c_int, c_long, c_char_p, c_void_p + from ctypes import addressof, byref, POINTER, Structure + + ## Performance Co-Pilot PMDA library (C) +-LIBPCP_PMDA = CDLL(find_library("pcp_pmda")) ++def _find_libpcp_pmda(): ++ """Find libpcp_pmda.so, with fallback for NixOS.""" ++ lib = find_library("pcp_pmda") ++ if lib: ++ return CDLL(lib) ++ import os ++ ld_path = os.environ.get("LD_LIBRARY_PATH", "") ++ for path in ld_path.split(":"): ++ if path: ++ for libname in ("libpcp_pmda.so", "libpcp_pmda.so.3"): ++ candidate = os.path.join(path, libname) ++ if os.path.exists(candidate): ++ return CDLL(candidate) ++ return CDLL("libpcp_pmda.so") ++ ++LIBPCP_PMDA = _find_libpcp_pmda() + + + ### +diff --git a/src/python/pcp/pmi.py b/src/python/pcp/pmi.py +index 7b884c6e50..c9037b3aa3 100644 +--- a/src/python/pcp/pmi.py ++++ b/src/python/pcp/pmi.py +@@ -57,7 +57,22 @@ from datetime import datetime, timedelta, tzinfo + from math import modf + + # Performance Co-Pilot PMI library (C) +-LIBPCP_IMPORT = CDLL(find_library("pcp_import")) ++def _find_libpcp_import(): ++ """Find libpcp_import.so, with fallback for NixOS.""" ++ lib = find_library("pcp_import") ++ if lib: ++ return CDLL(lib) ++ import os ++ ld_path = os.environ.get("LD_LIBRARY_PATH", "") ++ for path in ld_path.split(":"): ++ if path: ++ for libname in ("libpcp_import.so", "libpcp_import.so.1"): ++ candidate = os.path.join(path, libname) ++ if os.path.exists(candidate): ++ return CDLL(candidate) ++ return CDLL("libpcp_import.so") ++ ++LIBPCP_IMPORT = _find_libpcp_import() + + ## + # PMI Log Import Services diff --git a/nix/patches/python-pmapi-no-reconnect.patch b/nix/patches/python-pmapi-no-reconnect.patch new file mode 100644 index 0000000000..3fb47db63f --- /dev/null +++ b/nix/patches/python-pmapi-no-reconnect.patch @@ -0,0 +1,28 @@ +--- a/src/python/pcp/pmapi.py ++++ b/src/python/pcp/pmapi.py +@@ -1715,9 +1715,9 @@ class pmContext(object): + text = str(errmsg.value.decode()) + LIBC.free(errmsg) + raise pmErr(c_api.PM_ERR_CONV, text) +- status = LIBPCP.pmReconnectContext(self.ctx) +- if status < 0: +- raise pmErr(status) ++ # Note: pmReconnectContext removed here - calling it after each derived metric ++ # registration corrupts the IPC version table when connected to pmcd, causing ++ # subsequent PMNS lookups to fail with PM_ERR_NOPMNS. See NixOS MicroVM testing. + + def pmLoadDerivedConfig(self, fname): + """PMAPI - Register derived metric names and definitions from a file +@@ -1726,9 +1726,9 @@ class pmContext(object): + if not isinstance(fname, bytes): + fname = fname.encode('utf-8') + status = count = LIBPCP.pmLoadDerivedConfig(fname) +- if status < 0: +- raise pmErr(status) +- status = LIBPCP.pmReconnectContext(self.ctx) ++ # Note: pmReconnectContext removed here - calling it after loading derived ++ # configs corrupts the IPC version table when connected to pmcd, causing ++ # subsequent PMNS lookups to fail with PM_ERR_NOPMNS. See NixOS MicroVM testing. + if status < 0: + raise pmErr(status) + return count diff --git a/nix/patches/shell-portable-pwd.patch b/nix/patches/shell-portable-pwd.patch new file mode 100644 index 0000000000..ff568a97fa --- /dev/null +++ b/nix/patches/shell-portable-pwd.patch @@ -0,0 +1,138 @@ +From: Nix Packaging +Subject: [PATCH] build: use portable pwd detection with environment override + +On systems like NixOS, /bin/pwd does not exist. This patch makes two +improvements: + +1. Check if PWDCMND is already set in the environment before searching, + allowing users/systems to override the pwd command path. + +2. Use 'pwd' (shell builtin) as the fallback instead of '/bin/pwd'. + The shell builtin is POSIX-compliant and supports -P on all modern + shells (bash, dash, ksh, zsh). + +Affected scripts: +- src/pmlogger/pmlogger_check.sh +- src/pmlogger/pmlogger_daily.sh +- src/pmlogger/pmlogger_janitor.sh +- src/pmlogger/utilproc.sh +- src/pmie/pmie_check.sh +- src/pmie/pmie_daily.sh + +--- + src/pmie/pmie_check.sh | 4 +++- + src/pmie/pmie_daily.sh | 4 +++- + src/pmlogger/pmlogger_check.sh | 4 +++- + src/pmlogger/pmlogger_daily.sh | 4 +++- + src/pmlogger/pmlogger_janitor.sh | 4 +++- + src/pmlogger/utilproc.sh | 3 ++- + 6 files changed, 17 insertions(+), 6 deletions(-) + +--- a/src/pmie/pmie_check.sh ++++ b/src/pmie/pmie_check.sh +@@ -96,12 +96,14 @@ umask 022 + + # determine path for pwd command to override shell built-in +-PWDCMND=`which pwd 2>/dev/null | $PCP_AWK_PROG ' ++# Honor PWDCMND from environment if already set ++[ -z "$PWDCMND" ] && PWDCMND=`which pwd 2>/dev/null | $PCP_AWK_PROG ' + BEGIN { i = 0 } + / not in / { i = 1 } + / aliased to / { i = 1 } + { if ( i == 0 ) print } + '` +-[ -z "$PWDCMND" ] && PWDCMND=/bin/pwd ++# Fallback to shell builtin (portable, supports -P on POSIX shells) ++[ -z "$PWDCMND" ] && PWDCMND=pwd + eval $PWDCMND -P >/dev/null 2>&1 + [ $? -eq 0 ] && PWDCMND="$PWDCMND -P" + here=`$PWDCMND` +--- a/src/pmie/pmie_daily.sh ++++ b/src/pmie/pmie_daily.sh +@@ -106,12 +106,14 @@ myname="$PCP_BINADM_DIR/pmie_daily" + + # determine path for pwd command to override shell built-in +-PWDCMND=`which pwd 2>/dev/null | $PCP_AWK_PROG ' ++# Honor PWDCMND from environment if already set ++[ -z "$PWDCMND" ] && PWDCMND=`which pwd 2>/dev/null | $PCP_AWK_PROG ' + BEGIN { i = 0 } + / not in / { i = 1 } + / aliased to / { i = 1 } + { if ( i == 0 ) print } + '` +-[ -z "$PWDCMND" ] && PWDCMND=/bin/pwd ++# Fallback to shell builtin (portable, supports -P on POSIX shells) ++[ -z "$PWDCMND" ] && PWDCMND=pwd + eval $PWDCMND -P >/dev/null 2>&1 + [ $? -eq 0 ] && PWDCMND="$PWDCMND -P" + here=`$PWDCMND` +--- a/src/pmlogger/pmlogger_check.sh ++++ b/src/pmlogger/pmlogger_check.sh +@@ -86,12 +86,14 @@ trap "_cleanup; exit \$status" 0 1 2 3 15 + + # determine path for pwd command to override shell built-in +-PWDCMND=`which pwd 2>/dev/null | $PCP_AWK_PROG ' ++# Honor PWDCMND from environment if already set ++[ -z "$PWDCMND" ] && PWDCMND=`which pwd 2>/dev/null | $PCP_AWK_PROG ' + BEGIN { i = 0 } + / not in / { i = 1 } + / aliased to / { i = 1 } + { if ( i == 0 ) print } + '` +-[ -z "$PWDCMND" ] && PWDCMND=/bin/pwd ++# Fallback to shell builtin (portable, supports -P on POSIX shells) ++[ -z "$PWDCMND" ] && PWDCMND=pwd + eval $PWDCMND -P >/dev/null 2>&1 + [ $? -eq 0 ] && PWDCMND="$PWDCMND -P" + here=`$PWDCMND` +--- a/src/pmlogger/pmlogger_daily.sh ++++ b/src/pmlogger/pmlogger_daily.sh +@@ -336,12 +336,14 @@ myname="$PCP_BINADM_DIR/pmlogger_daily" + + # determine path for pwd command to override shell built-in +-PWDCMND=`which pwd 2>/dev/null | $PCP_AWK_PROG ' ++# Honor PWDCMND from environment if already set ++[ -z "$PWDCMND" ] && PWDCMND=`which pwd 2>/dev/null | $PCP_AWK_PROG ' + BEGIN { i = 0 } + / not in / { i = 1 } + / aliased to / { i = 1 } + { if ( i == 0 ) print } + '` +-[ -z "$PWDCMND" ] && PWDCMND=/bin/pwd ++# Fallback to shell builtin (portable, supports -P on POSIX shells) ++[ -z "$PWDCMND" ] && PWDCMND=pwd + eval $PWDCMND -P >/dev/null 2>&1 + [ $? -eq 0 ] && PWDCMND="$PWDCMND -P" + here=`$PWDCMND` +--- a/src/pmlogger/pmlogger_janitor.sh ++++ b/src/pmlogger/pmlogger_janitor.sh +@@ -103,12 +103,14 @@ myname="$PCP_BINADM_DIR/pmlogger_janitor" + + # determine path for pwd command to override shell built-in +-PWDCMND=`which pwd 2>/dev/null | $PCP_AWK_PROG ' ++# Honor PWDCMND from environment if already set ++[ -z "$PWDCMND" ] && PWDCMND=`which pwd 2>/dev/null | $PCP_AWK_PROG ' + BEGIN { i = 0 } + / not in / { i = 1 } + / aliased to / { i = 1 } + { if ( i == 0 ) print } + '` +-[ -z "$PWDCMND" ] && PWDCMND=/bin/pwd ++# Fallback to shell builtin (portable, supports -P on POSIX shells) ++[ -z "$PWDCMND" ] && PWDCMND=pwd + eval $PWDCMND -P >/dev/null 2>&1 + [ $? -eq 0 ] && PWDCMND="$PWDCMND -P" + here=`$PWDCMND` +--- a/src/pmlogger/utilproc.sh ++++ b/src/pmlogger/utilproc.sh +@@ -251,7 +251,8 @@ BEGIN { i = 0 } + / not in / { i = 1 } + / aliased to / { i = 1 } + { if ( i == 0 ) print } + '` +- [ -z "$_PWDCMD" ] && _PWDCMD=/bin/pwd ++ # Fallback to shell builtin (portable, supports -P on POSIX shells) ++ [ -z "$_PWDCMD" ] && _PWDCMD=pwd + eval $_PWDCMD -P >/dev/null 2>&1 + [ $? -eq 0 ] && _PWDCMD="$_PWDCMD -P" + fi diff --git a/nix/pmie-test.nix b/nix/pmie-test.nix new file mode 100644 index 0000000000..a5bdf1476b --- /dev/null +++ b/nix/pmie-test.nix @@ -0,0 +1,186 @@ +# nix/pmie-test.nix +# +# pmie testing module with synthetic workload. +# Creates predictable CPU spikes that a dedicated pmie instance detects. +# +# This module provides: +# 1. stress-ng-test service: Periodic CPU load (20s on, 10s off) +# 2. pmie-test service: Dedicated pmie instance monitoring for the workload +# 3. Alert logging to /var/log/pcp/pmie/alerts.log +# +# Usage: +# imports = [ ./pmie-test.nix ]; +# services.pcp.pmieTest.enable = true; +# +{ config, pkgs, lib, ... }: + +with lib; +let + cfg = config.services.pcp.pmieTest; + pcpCfg = config.services.pcp; + + # Stress workload script - creates periodic CPU load for pmie to detect + stressScript = pkgs.writeShellApplication { + name = "stress-ng-test"; + runtimeInputs = [ pkgs.stress-ng pkgs.coreutils pkgs.util-linux ]; + text = '' + STRESS_DURATION=''${STRESS_DURATION:-20} + IDLE_DURATION=''${IDLE_DURATION:-10} + CPU_WORKERS=''${CPU_WORKERS:-2} + + echo "stress-ng-test: ''${STRESS_DURATION}s stress, ''${IDLE_DURATION}s idle, ''${CPU_WORKERS} workers" + + while true; do + logger -t stress-ng-test "Starting stress cycle" + stress-ng --cpu "$CPU_WORKERS" --timeout "''${STRESS_DURATION}s" --quiet || true + logger -t stress-ng-test "Idle period" + sleep "$IDLE_DURATION" + done + ''; + }; + + # Helper scripts for pmie actions (pmie shell action needs simple commands) + # Use absolute paths since pmie doesn't have PATH set + alertScript = pkgs.writeShellScript "pmie-alert" '' + ${pkgs.coreutils}/bin/echo "$(${pkgs.coreutils}/bin/date -Iseconds) [ALERT] CPU elevated" >> /var/log/pcp/pmie/alerts.log + ''; + + heartbeatScript = pkgs.writeShellScript "pmie-heartbeat" '' + ${pkgs.coreutils}/bin/touch /var/log/pcp/pmie/heartbeat + ''; + + # pmie rules for detecting the stress-ng workload + # Note: pmie shell actions need simple command paths, not complex shell invocations + pmieRules = pkgs.writeText "pmie-test.rules" '' + // + // pmie test rules - detect stress-ng-test workload + // Deployed by nix/pmie-test.nix + // + + delta = 5 sec; + + // Rule 1: Detect elevated CPU (>10% nice time) and log + // Note: stress-ng runs at Nice=19 with CPUQuota=50%, so CPU nice is ~0.5 + // With 4 CPUs, threshold of 0.10*4=0.4 should trigger during stress + cpu_elevated = + kernel.all.cpu.nice > 0.10 * hinv.ncpu + -> shell "${alertScript}"; + + // Rule 2: Heartbeat to confirm pmie is evaluating + heartbeat = + hinv.ncpu >= 1 + -> shell "${heartbeatScript}"; + ''; + + # PCP environment variables (same as main module) + pcpConf = "${pcpCfg.package}/share/pcp/etc/pcp.conf"; + pcpDir = "${pcpCfg.package}/share/pcp"; + pcpEnv = { + PCP_CONF = pcpConf; + PCP_DIR = pcpDir; + PCP_LOG_DIR = "/var/log/pcp"; + PCP_VAR_DIR = "/var/lib/pcp"; + PCP_TMP_DIR = "/var/lib/pcp/tmp"; + PCP_RUN_DIR = "/run/pcp"; + }; + +in +{ + options.services.pcp.pmieTest = { + enable = mkEnableOption "pmie testing with stress-ng workload"; + + stressDuration = mkOption { + type = types.int; + default = 20; + description = "Seconds to run stress-ng."; + }; + + idleDuration = mkOption { + type = types.int; + default = 10; + description = "Seconds to idle between stress cycles."; + }; + + cpuWorkers = mkOption { + type = types.int; + default = 2; + description = "Number of CPU stress workers."; + }; + }; + + config = mkIf cfg.enable { + # Require PCP to be enabled (need pmcd running) + assertions = [{ + assertion = pcpCfg.enable; + message = "services.pcp.pmieTest requires services.pcp.enable = true"; + }]; + + # Install stress-ng + environment.systemPackages = [ pkgs.stress-ng ]; + + # Create the stress workload service + systemd.services.stress-ng-test = { + description = "Synthetic CPU workload for pmie testing"; + wantedBy = [ "multi-user.target" ]; + after = [ "pmcd.service" ]; + wants = [ "pmcd.service" ]; + + environment = { + STRESS_DURATION = toString cfg.stressDuration; + IDLE_DURATION = toString cfg.idleDuration; + CPU_WORKERS = toString cfg.cpuWorkers; + }; + + serviceConfig = { + Type = "simple"; + ExecStart = "${stressScript}/bin/stress-ng-test"; + Restart = "always"; + RestartSec = "5s"; + + # Low priority - won't interfere with other services + Nice = 19; + IOSchedulingClass = "idle"; + + # Limit resource usage + CPUQuota = "50%"; + MemoryMax = "128M"; + + # Security + NoNewPrivileges = true; + ProtectHome = true; + PrivateTmp = true; + }; + }; + + # Dedicated pmie service for testing (runs our custom rules) + # This is separate from the main pmie service to avoid conflicts + systemd.services.pmie-test = { + description = "Performance Co-Pilot Inference Engine (Test Rules)"; + wantedBy = [ "multi-user.target" ]; + after = [ "pmcd.service" "stress-ng-test.service" ]; + bindsTo = [ "pmcd.service" ]; + + environment = pcpEnv; + + serviceConfig = { + Type = "simple"; + # Run pmie directly with our rules file + # -f = run in foreground (for systemd) + # -c = config file + # -l = log file + ExecStart = "${pcpCfg.package}/bin/pmie -f -c ${pmieRules} -l /var/log/pcp/pmie/pmie-test.log"; + Restart = "on-failure"; + RestartSec = "5s"; + User = "pcp"; + Group = "pcp"; + }; + }; + + # Add pmie directories and alert log file + # Note: nixos-module.nix also creates /var/log/pcp/pmie via tmpfiles + systemd.tmpfiles.rules = [ + "d /var/log/pcp/pmie 0755 pcp pcp -" + "f /var/log/pcp/pmie/alerts.log 0644 pcp pcp -" + ]; + }; +} diff --git a/nix/shell.nix b/nix/shell.nix new file mode 100644 index 0000000000..066b74ea5e --- /dev/null +++ b/nix/shell.nix @@ -0,0 +1,32 @@ +# nix/shell.nix +# +# Development shell for PCP. +# Provides build dependencies plus debugging tools. +# +{ pkgs, pcp }: +let + lib = pkgs.lib; +in +pkgs.mkShell { + inputsFrom = [ pcp ]; + packages = with pkgs; [ + gdb + jp2a + ] ++ lib.optionals pkgs.stdenv.isLinux [ + valgrind + # K8s testing tools + minikube + kubectl + docker + ] ++ lib.optionals pkgs.stdenv.isDarwin [ + lldb + ]; + shellHook = '' + if [[ -f ./images/pcpicon-light.png ]]; then + jp2a --colors ./images/pcpicon-light.png 2>/dev/null || true + fi + echo "PCP Development Shell" + echo "Run './configure --help' to see build options" + echo "Otherwise use 'nix build' to build the package" + ''; +} diff --git a/nix/test-all/default.nix b/nix/test-all/default.nix new file mode 100644 index 0000000000..55f29e22c1 --- /dev/null +++ b/nix/test-all/default.nix @@ -0,0 +1,195 @@ +# nix/test-all/default.nix +# +# Runs all PCP tests sequentially (container-test, k8s-test, microvm-tests). +# Reports overall pass/fail status. +# +# Usage in flake.nix: +# testAll = import ./nix/test-all { inherit pkgs lib containerTest k8sTest; }; +# +# Generated outputs: +# testAll.packages.pcp-test-all - Sequential test runner +# testAll.apps.pcp-test-all - App entry point +# +{ pkgs, lib, containerTest, k8sTest }: +let + # Get the test executables + containerTestBin = "${containerTest.packages.pcp-container-test}/bin/pcp-container-test"; + k8sTestBin = "${k8sTest.packages.pcp-k8s-test}/bin/pcp-k8s-test"; + minikubeStartBin = "${k8sTest.packages.pcp-minikube-start}/bin/pcp-minikube-start"; + + # Import microvm test runner + microvmTestAll = import ../tests/test-all-microvms.nix { inherit pkgs lib; }; + microvmTestBin = "${microvmTestAll}/bin/pcp-test-all-microvms"; + + mkTestAll = pkgs.writeShellApplication { + name = "pcp-test-all"; + runtimeInputs = with pkgs; [ minikube kubectl bc ]; + text = '' + set +e # Don't exit on first failure + + # Colors + RED='\033[0;31m' + GREEN='\033[0;32m' + YELLOW='\033[0;33m' + BLUE='\033[0;34m' + CYAN='\033[0;36m' + BOLD='\033[1m' + NC='\033[0m' + + # Timing + time_ms() { date +%s%3N; } + elapsed_ms() { echo $(( $(time_ms) - $1 )); } + format_time() { + local ms=$1 + if [[ $ms -lt 1000 ]]; then + echo "''${ms}ms" + elif [[ $ms -lt 60000 ]]; then + echo "$(echo "scale=1; $ms/1000" | bc)s" + else + local mins=$((ms / 60000)) + local secs=$(( (ms % 60000) / 1000 )) + echo "''${mins}m''${secs}s" + fi + } + + # Check if minikube is running + minikube_running() { + if timeout 5 minikube status --format='{{.Host}}' 2>/dev/null | grep -q "Running"; then + return 0 + fi + return 1 + } + + echo "" + echo -e "''${BOLD}╔══════════════════════════════════════════════════════════════╗''${NC}" + echo -e "''${BOLD}║ PCP Test Suite - All Tests ║''${NC}" + echo -e "''${BOLD}╚══════════════════════════════════════════════════════════════╝''${NC}" + echo "" + + TOTAL_START=$(time_ms) + TESTS_PASSED=0 + TESTS_FAILED=0 + declare -a FAILED_TESTS=() + + # ─── Prerequisites: Ensure minikube is running ──────────────────────────── + echo -e "''${CYAN}Checking prerequisites...''${NC}" + if ! minikube_running; then + echo -e "''${YELLOW}Minikube not running, starting it...''${NC}" + echo "" + if "${minikubeStartBin}"; then + echo "" + echo -e "''${GREEN}Minikube started successfully''${NC}" + else + echo "" + echo -e "''${RED}Failed to start minikube''${NC}" + echo -e "''${RED}K8s tests will fail. Continuing anyway...''${NC}" + fi + else + echo -e "''${GREEN}Minikube is running''${NC}" + fi + echo "" + + # ─── Test 1: Container Test ───────────────────────────────────────────── + echo -e "''${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━''${NC}" + echo -e "''${BOLD}[1/3] Container Test''${NC}" + echo -e "''${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━''${NC}" + echo "" + + TEST_START=$(time_ms) + if "${containerTestBin}"; then + TEST_ELAPSED=$(elapsed_ms "$TEST_START") + echo "" + echo -e "''${GREEN}✓ Container test passed ($(format_time "$TEST_ELAPSED"))''${NC}" + TESTS_PASSED=$((TESTS_PASSED + 1)) + else + TEST_ELAPSED=$(elapsed_ms "$TEST_START") + echo "" + echo -e "''${RED}✗ Container test failed ($(format_time "$TEST_ELAPSED"))''${NC}" + TESTS_FAILED=$((TESTS_FAILED + 1)) + FAILED_TESTS+=("container-test") + fi + echo "" + + # ─── Test 2: Kubernetes Test ──────────────────────────────────────────── + echo -e "''${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━''${NC}" + echo -e "''${BOLD}[2/3] Kubernetes Test''${NC}" + echo -e "''${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━''${NC}" + echo "" + + TEST_START=$(time_ms) + if "${k8sTestBin}"; then + TEST_ELAPSED=$(elapsed_ms "$TEST_START") + echo "" + echo -e "''${GREEN}✓ Kubernetes test passed ($(format_time "$TEST_ELAPSED"))''${NC}" + TESTS_PASSED=$((TESTS_PASSED + 1)) + else + TEST_ELAPSED=$(elapsed_ms "$TEST_START") + echo "" + echo -e "''${RED}✗ Kubernetes test failed ($(format_time "$TEST_ELAPSED"))''${NC}" + TESTS_FAILED=$((TESTS_FAILED + 1)) + FAILED_TESTS+=("k8s-test") + fi + echo "" + + # ─── Test 3: MicroVM Tests ────────────────────────────────────────────── + echo -e "''${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━''${NC}" + echo -e "''${BOLD}[3/3] MicroVM Tests (all variants)''${NC}" + echo -e "''${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━''${NC}" + echo "" + + TEST_START=$(time_ms) + if "${microvmTestBin}" --skip-tap; then + TEST_ELAPSED=$(elapsed_ms "$TEST_START") + echo "" + echo -e "''${GREEN}✓ MicroVM tests passed ($(format_time "$TEST_ELAPSED"))''${NC}" + TESTS_PASSED=$((TESTS_PASSED + 1)) + else + TEST_ELAPSED=$(elapsed_ms "$TEST_START") + echo "" + echo -e "''${RED}✗ MicroVM tests failed ($(format_time "$TEST_ELAPSED"))''${NC}" + TESTS_FAILED=$((TESTS_FAILED + 1)) + FAILED_TESTS+=("microvm-tests") + fi + echo "" + + # ─── Summary ──────────────────────────────────────────────────────────── + TOTAL_ELAPSED=$(elapsed_ms "$TOTAL_START") + echo -e "''${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━''${NC}" + echo -e "''${BOLD}Summary''${NC}" + echo -e "''${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━''${NC}" + echo "" + echo " Tests passed: $TESTS_PASSED" + echo " Tests failed: $TESTS_FAILED" + echo " Total time: $(format_time "$TOTAL_ELAPSED")" + echo "" + + if [[ $TESTS_FAILED -eq 0 ]]; then + echo -e "''${GREEN}''${BOLD}All tests passed!''${NC}" + echo "" + exit 0 + else + echo -e "''${RED}''${BOLD}Failed tests:''${NC}" + for test in "''${FAILED_TESTS[@]}"; do + echo -e " ''${RED}• $test''${NC}" + done + echo "" + exit 1 + fi + ''; + }; + +in +{ + # Packages output for flake.nix + packages = { + pcp-test-all = mkTestAll; + }; + + # Apps output for flake.nix + apps = { + pcp-test-all = { + type = "app"; + program = "${mkTestAll}/bin/pcp-test-all"; + }; + }; +} diff --git a/nix/test-common/constants.nix b/nix/test-common/constants.nix new file mode 100644 index 0000000000..d1db13cf7f --- /dev/null +++ b/nix/test-common/constants.nix @@ -0,0 +1,61 @@ +# nix/test-common/constants.nix +# +# Shared constants for PCP test scripts. +# Provides common values used by container and k8s tests. +# +# Usage: +# common = import ../test-common/constants.nix { }; +# inherit (common) colors metrics; +# +{ }: +let + mainConstants = import ../constants.nix; +in +rec { + # Re-export from main constants + inherit (mainConstants) ports user; + + # ─── Terminal Formatting ──────────────────────────────────────────────── + # ANSI escape codes for terminal colors (used in constants, not shell code) + colors = { + reset = "\\033[0m"; + bold = "\\033[1m"; + red = "\\033[31m"; + green = "\\033[32m"; + yellow = "\\033[33m"; + blue = "\\033[34m"; + cyan = "\\033[36m"; + }; + + # ─── Common Metrics ───────────────────────────────────────────────────── + # Metrics used by both container and k8s tests + metrics = { + # Basic PCP metrics (always available) + basic = [ + "pmcd.version" + "pmcd.numagents" + ]; + + # Kernel metrics (require host /proc access) + kernel = [ + "kernel.all.load" + "kernel.all.cpu.user" + "mem.physmem" + ]; + + # BPF metrics (require privileged mode + BTF kernel) + bpf = [ + "bpf.runq.latency" + "bpf.disk.all.latency" + ]; + }; + + # ─── Common Timeouts ──────────────────────────────────────────────────── + # Default timeouts shared across tests (can be overridden) + timeouts = { + build = 300; # Build container image + ready = 30; # Wait for services/ports/metrics + shutdown = 30; # Graceful shutdown + cleanup = 5; # Cleanup resources + }; +} diff --git a/nix/test-common/inputs.nix b/nix/test-common/inputs.nix new file mode 100644 index 0000000000..9e353ea3a4 --- /dev/null +++ b/nix/test-common/inputs.nix @@ -0,0 +1,43 @@ +# nix/test-common/inputs.nix +# +# Shared runtime inputs for PCP test scripts. +# Provides common packages needed by container and k8s tests. +# +# Usage: +# inputs = import ../test-common/inputs.nix { inherit pkgs; }; +# runtimeInputs = inputs.common ++ inputs.container; +# +{ pkgs }: +rec { + # Common runtime inputs for all test scripts + common = with pkgs; [ + coreutils + gnugrep + gnused + gawk + procps + netcat-gnu + bc + util-linux + nix + ]; + + # Additional inputs for container tests (docker/podman) + container = with pkgs; [ + docker + ]; + + # Additional inputs for Kubernetes tests + k8s = with pkgs; [ + jq + kubectl + minikube + docker + ]; + + # All container test inputs combined + containerAll = common ++ container; + + # All k8s test inputs combined + k8sAll = common ++ k8s; +} diff --git a/nix/test-common/shell-helpers.nix b/nix/test-common/shell-helpers.nix new file mode 100644 index 0000000000..0f9ed9f4f6 --- /dev/null +++ b/nix/test-common/shell-helpers.nix @@ -0,0 +1,120 @@ +# nix/test-common/shell-helpers.nix +# +# Shared shell helper functions for PCP test scripts. +# Provides color output, timing, and result formatting. +# +# Usage: +# helpers = import ../test-common/shell-helpers.nix { }; +# text = '' +# ${helpers.colorHelpers} +# ${helpers.timingHelpers} +# ... +# ''; +# +{ }: +rec { + # ─── Color Helpers ────────────────────────────────────────────────────── + # ANSI color codes and output functions for terminal formatting. + colorHelpers = '' + # ANSI color codes + _reset='\033[0m' + _bold='\033[1m' + _red='\033[31m' + _green='\033[32m' + _yellow='\033[33m' + _blue='\033[34m' + _cyan='\033[36m' + + # Color output functions + info() { echo -e "''${_cyan}$*''${_reset}"; } + success() { echo -e "''${_green}$*''${_reset}"; } + warn() { echo -e "''${_yellow}$*''${_reset}"; } + error() { echo -e "''${_red}$*''${_reset}"; } + bold() { echo -e "''${_bold}$*''${_reset}"; } + + # Phase header + phase_header() { + local phase="$1" + local name="$2" + local timeout="$3" + echo "" + echo -e "''${_bold}--- Phase $phase: $name (timeout: ''${timeout}s) ---''${_reset}" + } + + # Pass/fail/warn/skip result with optional timing + result_pass() { + local msg="$1" + local time_ms="''${2:-}" + if [[ -n "$time_ms" ]]; then + echo -e " ''${_green}PASS''${_reset}: $msg (''${time_ms}ms)" + else + echo -e " ''${_green}PASS''${_reset}: $msg" + fi + } + + result_fail() { + local msg="$1" + local time_ms="''${2:-}" + if [[ -n "$time_ms" ]]; then + echo -e " ''${_red}FAIL''${_reset}: $msg (''${time_ms}ms)" + else + echo -e " ''${_red}FAIL''${_reset}: $msg" + fi + } + + result_warn() { + local msg="$1" + local time_ms="''${2:-}" + if [[ -n "$time_ms" ]]; then + echo -e " ''${_yellow}WARN''${_reset}: $msg (''${time_ms}ms)" + else + echo -e " ''${_yellow}WARN''${_reset}: $msg" + fi + } + + result_skip() { + local msg="$1" + local time_ms="''${2:-}" + if [[ -n "$time_ms" ]]; then + echo -e " ''${_yellow}SKIP''${_reset}: $msg (''${time_ms}ms)" + else + echo -e " ''${_yellow}SKIP''${_reset}: $msg" + fi + } + ''; + + # ─── Timing Helpers ───────────────────────────────────────────────────── + # Millisecond timing for phase durations. + timingHelpers = '' + # Get current time in milliseconds + time_ms() { + echo $(($(date +%s%N) / 1000000)) + } + + # Calculate elapsed time in milliseconds + elapsed_ms() { + local start="$1" + local now + now=$(time_ms) + echo $((now - start)) + } + + # Convert milliseconds to human-readable format + format_ms() { + local ms="$1" + if [[ $ms -lt 1000 ]]; then + echo "''${ms}ms" + elif [[ $ms -lt 60000 ]]; then + echo "$((ms / 1000)).$((ms % 1000 / 100))s" + else + local mins=$((ms / 60000)) + local secs=$(((ms % 60000) / 1000)) + echo "''${mins}m''${secs}s" + fi + } + ''; + + # ─── Combined Helpers ─────────────────────────────────────────────────── + # All common helpers combined. + allHelpers = colorHelpers + "\n" + timingHelpers; +} diff --git a/nix/test-lib.nix b/nix/test-lib.nix new file mode 100644 index 0000000000..1b811dc0db --- /dev/null +++ b/nix/test-lib.nix @@ -0,0 +1,493 @@ +# nix/test-lib.nix +# +# Shared test functions for MicroVM validation. +# All timeouts, thresholds, and ports come from constants.nix. +# +# This library provides shell script fragments that can be composed +# into test scripts. Each function is a string containing bash code. +# +{ pkgs, lib }: +let + constants = import ./constants.nix; + + sshOpts = [ + "-o" "StrictHostKeyChecking=no" + "-o" "UserKnownHostsFile=/dev/null" + "-o" "ConnectTimeout=${toString constants.test.sshTimeoutSeconds}" + "-o" "LogLevel=ERROR" + "-o" "PubkeyAuthentication=no" + ]; + sshOptsStr = lib.concatStringsSep " " sshOpts; + + # SSH command with sshpass for debug VMs (password: pcp) + sshPassCmd = "sshpass -p pcp ssh"; +in +{ + inherit sshOpts sshOptsStr constants; + + # SSH connectivity + # Polling loop instead of fixed sleep. Respects constants.test.sshMaxAttempts. + waitForSsh = '' + wait_for_ssh() { + local host="$1" port="$2" + local max="${toString constants.test.sshMaxAttempts}" + local delay="${toString constants.test.sshRetryDelaySeconds}" + local attempt=0 + + echo "Waiting for SSH on $host:$port (max $max attempts)..." + while ! sshpass -p pcp ssh ${sshOptsStr} -p "$port" "root@$host" true 2>/dev/null; do + attempt=$((attempt + 1)) + if [[ $attempt -ge $max ]]; then + echo "FAIL: SSH not available after $max attempts" + return 1 + fi + echo " attempt $attempt/$max..." + sleep "$delay" + done + echo "SSH connected to $host:$port" + } + ''; + + # Service readiness + # Wait for a service to be active AND its port to be listening. + waitForService = '' + wait_for_service() { + local host="$1" port="$2" service="$3" target_port="$4" + local max=30 + local attempt=0 + + echo -n " Waiting for $service on port $target_port... " + while true; do + if sshpass -p pcp ssh ${sshOptsStr} -p "$port" "root@$host" \ + "systemctl is-active $service >/dev/null 2>&1 && ss -tlnp | grep -q :$target_port" 2>/dev/null; then + echo "ready" + return 0 + fi + + attempt=$((attempt + 1)) + if [[ $attempt -ge $max ]]; then + echo "timeout" + return 1 + fi + sleep 2 + done + } + ''; + + # Basic check runner + runCheck = '' + run_check() { + local desc="$1" host="$2" port="$3" + shift 3 + local cmd="$*" + echo -n " CHECK: $desc ... " + local output + if output=$(sshpass -p pcp ssh ${sshOptsStr} -p "$port" "root@$host" "$cmd" 2>&1); then + echo "OK" + if [[ -n "$output" ]]; then + echo "$output" | head -5 | while IFS= read -r line; do echo " $line"; done + fi + return 0 + else + echo "FAIL" + echo " command: $cmd" + echo " output: $output" + return 1 + fi + } + ''; + + # Service check + checkService = '' + check_service() { + run_check "service $3 is active" "$1" "$2" "systemctl is-active $3" + } + ''; + + # Port check + checkPort = '' + check_port() { + run_check "port $3 is listening" "$1" "$2" "ss -tlnp | grep -q :$3" + } + ''; + + # Metric check + checkMetric = '' + check_metric() { + run_check "metric $3" "$1" "$2" "pminfo -f $3 2>/dev/null | head -3" + } + ''; + + # Journal error checking + checkJournal = '' + check_journal() { + local host="$1" port="$2" service="$3" + echo -n " CHECK: no errors in $service journal ... " + local errors + errors=$(sshpass -p pcp ssh ${sshOptsStr} -p "$port" "root@$host" \ + "journalctl -u $service --no-pager -p err -q --no-hostname 2>/dev/null" 2>&1) + if [[ -z "$errors" ]]; then + echo "OK (clean)" + return 0 + else + local count + count=$(echo "$errors" | wc -l) + echo "FAIL ($count error line(s))" + while IFS= read -r line; do + echo " $line" + done <<< "$errors" + return 1 + fi + } + ''; + + # TUI smoke test + checkTui = '' + check_tui() { + local host="$1" port="$2" desc="$3" + shift 3 + local cmd="$*" + echo -n " CHECK: $desc ... " + local output exit_code + output=$(sshpass -p pcp ssh ${sshOptsStr} -p "$port" "root@$host" \ + "timeout 10 $cmd 2>&1" 2>&1) && exit_code=$? || exit_code=$? + # 124 = timeout (acceptable), 0 = clean, others = failure + if [[ $exit_code -eq 0 || $exit_code -eq 124 ]]; then + echo "OK (exit $exit_code)" + return 0 + else + echo "FAIL (exit $exit_code)" + echo " command: $cmd" + echo "$output" | tail -5 | while IFS= read -r line; do echo " $line"; done + return 1 + fi + } + ''; + + # Security analysis using systemd-analyze security + checkSecurity = '' + check_security() { + local host="$1" port="$2" service="$3" network_facing="''${4:-false}" + local max_score="${toString constants.security.networkServiceMaxScore}" + local warn_score="${toString constants.security.internalServiceWarnScore}" + + echo -n " CHECK: systemd security $service ... " + local output score_line score level + + output=$(sshpass -p pcp ssh ${sshOptsStr} -p "$port" "root@$host" \ + "systemd-analyze security $service 2>&1" 2>&1) + + score_line=$(echo "$output" | grep -E "^→ Overall exposure level" | tail -1) + + if [[ -z "$score_line" ]]; then + echo "FAIL (could not parse security output)" + return 1 + fi + + score=$(echo "$score_line" | grep -oE '[0-9]+\.[0-9]+' | head -1) + level=$(echo "$score_line" | awk '{print $NF}') + + if [[ "$network_facing" == "true" ]]; then + if (( $(echo "$score > $max_score" | bc -l) )); then + echo "FAIL (score $score $level, must be <= $max_score)" + return 1 + else + echo "OK (score $score $level)" + return 0 + fi + else + if (( $(echo "$score > $warn_score" | bc -l) )); then + echo "WARN (score $score $level, consider hardening)" + return 0 + else + echo "OK (score $score $level)" + return 0 + fi + fi + } + ''; + + # PMNS verification + checkPmns = '' + check_pmns() { + local host="$1" port="$2" + local min_metrics="${toString constants.test.minExpectedMetrics}" + + echo -n " CHECK: PMNS root loaded ... " + if sshpass -p pcp ssh ${sshOptsStr} -p "$port" "root@$host" \ + "pminfo >/dev/null 2>&1"; then + echo "OK" + else + echo "FAIL" + return 1 + fi + + echo -n " CHECK: metric count >= $min_metrics ... " + local count + count=$(sshpass -p pcp ssh ${sshOptsStr} -p "$port" "root@$host" "pminfo 2>/dev/null | wc -l") + if [[ "$count" -ge "$min_metrics" ]]; then + echo "OK ($count metrics)" + else + echo "WARN (only $count metrics)" + fi + + return 0 + } + ''; + + # pmie testing check + # Verifies pmie-test and stress-ng-test services are running and generating alerts + checkPmieTest = '' + check_pmie_test() { + local host="$1" port="$2" + local wait_time="$3" # Seconds to wait for alerts + + echo -n " CHECK: service pmie-test is active ... " + if sshpass -p pcp ssh ${sshOptsStr} -p "$port" "root@$host" \ + "systemctl is-active pmie-test" >/dev/null 2>&1; then + echo "OK" + else + echo "FAIL" + return 1 + fi + + echo -n " CHECK: service stress-ng-test is active ... " + if sshpass -p pcp ssh ${sshOptsStr} -p "$port" "root@$host" \ + "systemctl is-active stress-ng-test" >/dev/null 2>&1; then + echo "OK" + else + echo "FAIL" + return 1 + fi + + echo " Waiting ''${wait_time}s for pmie to detect stress cycles..." + sleep "$wait_time" + + echo -n " CHECK: pmie alerts.log has content ... " + if sshpass -p pcp ssh ${sshOptsStr} -p "$port" "root@$host" \ + "test -s /var/log/pcp/pmie/alerts.log" 2>/dev/null; then + echo "OK" + sshpass -p pcp ssh ${sshOptsStr} -p "$port" "root@$host" \ + "tail -3 /var/log/pcp/pmie/alerts.log" 2>/dev/null | while IFS= read -r line; do + echo " $line" + done + else + echo "FAIL (empty or missing)" + return 1 + fi + + echo -n " CHECK: pmie heartbeat file exists ... " + if sshpass -p pcp ssh ${sshOptsStr} -p "$port" "root@$host" \ + "test -f /var/log/pcp/pmie/heartbeat" 2>/dev/null; then + echo "OK" + else + echo "FAIL" + return 1 + fi + + echo -n " CHECK: pmie detected CPU elevation ... " + if sshpass -p pcp ssh ${sshOptsStr} -p "$port" "root@$host" \ + "grep -q 'ALERT.*CPU elevated' /var/log/pcp/pmie/alerts.log" 2>/dev/null; then + echo "OK" + else + echo "WARN (no CPU alerts yet)" + fi + + return 0 + } + ''; + + # Metric parity check (PCP vs node_exporter) + checkMetricParity = '' + check_metric_parity() { + local host="$1" port="$2" pcp_metric="$3" prom_query="$4" + local tolerance="${toString constants.test.metricParityTolerancePct}" + + echo -n " CHECK: parity $pcp_metric vs $prom_query ... " + + local pcp_val prom_val + pcp_val=$(sshpass -p pcp ssh ${sshOptsStr} -p "$port" "root@$host" \ + "pmval -s 1 -f 4 $pcp_metric 2>/dev/null | tail -1 | awk '{print \$NF}'" 2>&1) + prom_val=$(sshpass -p pcp ssh ${sshOptsStr} -p "$port" "root@$host" \ + "curl -sf localhost:${toString constants.ports.nodeExporter}/metrics | grep -E '^$prom_query ' | head -1 | awk '{print \$2}'" 2>&1) + + if ! [[ "$pcp_val" =~ ^[0-9.]+$ ]] || ! [[ "$prom_val" =~ ^[0-9.]+$ ]]; then + echo "SKIP (non-numeric values)" + return 0 + fi + + local pct + if (( $(echo "$prom_val != 0" | bc -l) )); then + pct=$(echo "scale=2; 100 * ($pcp_val - $prom_val) / $prom_val" | bc -l) + pct=''${pct#-} + else + pct="0" + fi + + if (( $(echo "$pct <= $tolerance" | bc -l) )); then + echo "OK (diff $pct%)" + return 0 + else + echo "WARN (diff $pct% > $tolerance%)" + return 0 + fi + } + ''; + + # ─── Grafana Tests ──────────────────────────────────────────────────────── + # Tests for Grafana + Prometheus comparison stack + + # Check Grafana service and HTTP endpoint + checkGrafana = '' + check_grafana() { + local host="$1" port="$2" + + echo -n " CHECK: service grafana is active ... " + if sshpass -p pcp ssh ${sshOptsStr} -p "$port" "root@$host" \ + "systemctl is-active grafana" >/dev/null 2>&1; then + echo "OK" + else + echo "FAIL" + return 1 + fi + + echo -n " CHECK: Grafana HTTP responds on port ${toString constants.ports.grafana} ... " + if sshpass -p pcp ssh ${sshOptsStr} -p "$port" "root@$host" \ + "curl -sf http://localhost:${toString constants.ports.grafana}/api/health" >/dev/null 2>&1; then + echo "OK" + else + echo "FAIL" + return 1 + fi + + echo -n " CHECK: Grafana login works (admin/pcp) ... " + if sshpass -p pcp ssh ${sshOptsStr} -p "$port" "root@$host" \ + "curl -sf -u admin:pcp http://localhost:${toString constants.ports.grafana}/api/org" >/dev/null 2>&1; then + echo "OK" + else + echo "FAIL" + return 1 + fi + + return 0 + } + ''; + + # Check Prometheus service and HTTP endpoint + checkPrometheus = '' + check_prometheus() { + local host="$1" port="$2" + + echo -n " CHECK: service prometheus is active ... " + if sshpass -p pcp ssh ${sshOptsStr} -p "$port" "root@$host" \ + "systemctl is-active prometheus" >/dev/null 2>&1; then + echo "OK" + else + echo "FAIL" + return 1 + fi + + echo -n " CHECK: Prometheus HTTP responds on port ${toString constants.ports.prometheus} ... " + if sshpass -p pcp ssh ${sshOptsStr} -p "$port" "root@$host" \ + "curl -sf http://localhost:${toString constants.ports.prometheus}/-/ready" >/dev/null 2>&1; then + echo "OK" + else + echo "FAIL" + return 1 + fi + + echo -n " CHECK: Prometheus query API works ... " + if sshpass -p pcp ssh ${sshOptsStr} -p "$port" "root@$host" \ + "curl -sf 'http://localhost:${toString constants.ports.prometheus}/api/v1/query?query=up' | grep -q success" 2>/dev/null; then + echo "OK" + else + echo "FAIL" + return 1 + fi + + return 0 + } + ''; + + # Check Grafana datasources are provisioned + checkGrafanaDatasources = '' + check_grafana_datasources() { + local host="$1" port="$2" + + echo -n " CHECK: PCP Vector datasource provisioned ... " + if sshpass -p pcp ssh ${sshOptsStr} -p "$port" "root@$host" \ + "curl -sf -u admin:pcp http://localhost:${toString constants.ports.grafana}/api/datasources | grep -q 'PCP Vector'" 2>/dev/null; then + echo "OK" + else + echo "FAIL" + return 1 + fi + + echo -n " CHECK: Prometheus datasource provisioned ... " + if sshpass -p pcp ssh ${sshOptsStr} -p "$port" "root@$host" \ + "curl -sf -u admin:pcp http://localhost:${toString constants.ports.grafana}/api/datasources | grep -q 'Prometheus'" 2>/dev/null; then + echo "OK" + else + echo "FAIL" + return 1 + fi + + return 0 + } + ''; + + # Check Grafana dashboards are loaded + checkGrafanaDashboards = '' + check_grafana_dashboards() { + local host="$1" port="$2" + + echo -n " CHECK: PCP dashboards folder exists ... " + if sshpass -p pcp ssh ${sshOptsStr} -p "$port" "root@$host" \ + "curl -sf -u admin:pcp http://localhost:${toString constants.ports.grafana}/api/search?folderIds=0 | grep -q 'PCP'" 2>/dev/null; then + echo "OK" + else + echo "FAIL" + return 1 + fi + + echo -n " CHECK: Host Overview dashboard loaded ... " + if sshpass -p pcp ssh ${sshOptsStr} -p "$port" "root@$host" \ + "curl -sf -u admin:pcp http://localhost:${toString constants.ports.grafana}/api/search | grep -q 'Host Overview'" 2>/dev/null; then + echo "OK" + else + echo "FAIL" + return 1 + fi + + echo -n " CHECK: Node Exporter dashboard loaded ... " + if sshpass -p pcp ssh ${sshOptsStr} -p "$port" "root@$host" \ + "curl -sf -u admin:pcp http://localhost:${toString constants.ports.grafana}/api/search | grep -qi 'node'" 2>/dev/null; then + echo "OK" + else + echo "FAIL" + return 1 + fi + + return 0 + } + ''; + + # Full Grafana test suite + checkGrafanaFull = '' + check_grafana_full() { + local host="$1" port="$2" + + echo "=== Grafana Tests ===" + check_grafana "$host" "$port" || return 1 + check_prometheus "$host" "$port" || return 1 + check_grafana_datasources "$host" "$port" || return 1 + check_grafana_dashboards "$host" "$port" || return 1 + + # Security checks + check_security "$host" "$port" "grafana.service" true || return 1 + check_security "$host" "$port" "prometheus.service" true || return 1 + + return 0 + } + ''; +} diff --git a/nix/tests/microvm-test.nix b/nix/tests/microvm-test.nix new file mode 100644 index 0000000000..a48e86a82f --- /dev/null +++ b/nix/tests/microvm-test.nix @@ -0,0 +1,137 @@ +# nix/tests/microvm-test.nix +# +# Test script builder for MicroVM variants. +# Uses constants.nix for all configuration. +# +# Usage: +# nix run .#pcp-test-base-user # Test base VM with user networking +# nix run .#pcp-test-base-tap # Test base VM with TAP networking +# nix run .#pcp-test-eval-user # Test eval VM with user networking +# nix run .#pcp-test-eval-tap # Test eval VM with TAP networking +# +{ pkgs, lib, variant, host, sshPort }: +let + testLib = import ../test-lib.nix { inherit pkgs lib; }; + constants = testLib.constants; + + isEval = lib.hasPrefix "eval" variant; + isTap = lib.hasSuffix "tap" variant; +in +pkgs.writeShellApplication { + name = "pcp-test-${variant}"; + runtimeInputs = with pkgs; [ openssh sshpass curl bc coreutils ]; + text = '' + echo "========================================================" + echo " PCP MicroVM Test: ${variant}" + echo " Host: ${host} SSH Port: ${toString sshPort}" + echo "========================================================" + echo "" + + # Create output directory + output_dir="test-results/${variant}" + mkdir -p "$output_dir" + + passed=0 + failed=0 + + check() { + if "$@"; then + passed=$((passed + 1)) + else + failed=$((failed + 1)) + fi + } + + ${testLib.waitForSsh} + ${testLib.waitForService} + ${testLib.runCheck} + ${testLib.checkService} + ${testLib.checkPort} + ${testLib.checkMetric} + ${testLib.checkJournal} + ${testLib.checkTui} + ${testLib.checkSecurity} + ${testLib.checkPmns} + ${lib.optionalString isEval testLib.checkMetricParity} + ${lib.optionalString isEval testLib.checkPmieTest} + + # Phase 1: SSH connectivity + echo "" + echo "Phase 1: SSH connectivity" + wait_for_ssh "${host}" "${toString sshPort}" + + # Phase 2: Service status + echo "" + echo "Phase 2: Service status" + wait_for_service "${host}" "${toString sshPort}" "pmcd" "${toString constants.ports.pmcd}" + check check_service "${host}" "${toString sshPort}" "pmcd" + check check_service "${host}" "${toString sshPort}" "pmproxy" + check check_port "${host}" "${toString sshPort}" "${toString constants.ports.pmcd}" + check check_port "${host}" "${toString sshPort}" "${toString constants.ports.pmproxy}" + ${lib.optionalString isEval '' + check check_service "${host}" "${toString sshPort}" "prometheus-node-exporter" + check check_port "${host}" "${toString sshPort}" "${toString constants.ports.nodeExporter}" + ''} + + # Phase 3: PCP metrics + echo "" + echo "Phase 3: PCP metrics" + check check_pmns "${host}" "${toString sshPort}" + check check_metric "${host}" "${toString sshPort}" "kernel.all.load" + check check_metric "${host}" "${toString sshPort}" "kernel.all.cpu.user" + check check_metric "${host}" "${toString sshPort}" "mem.physmem" + + # Phase 4: HTTP endpoints + echo "" + echo "Phase 4: HTTP endpoints" + check run_check "pmproxy REST API" "${host}" "${toString sshPort}" \ + "curl -sf http://localhost:${toString constants.ports.pmproxy}/pmapi/1/metrics?target=kernel.all.load | head -1" + ${lib.optionalString isEval '' + check run_check "node_exporter metrics" "${host}" "${toString sshPort}" \ + "curl -sf http://localhost:${toString constants.ports.nodeExporter}/metrics | grep -q node_cpu_seconds_total" + ''} + + # Phase 5: Journal health + echo "" + echo "Phase 5: Journal health" + check check_journal "${host}" "${toString sshPort}" "pmcd" + check check_journal "${host}" "${toString sshPort}" "pmproxy" + + # Phase 6: PCP TUI smoke test + echo "" + echo "Phase 6: PCP TUI smoke test" + check check_tui "${host}" "${toString sshPort}" "pcp-dstat outputs" \ + "pcp dstat --nocolor 1 2" + + ${lib.optionalString isEval '' + # Phase 7: Metric parity (eval only) + echo "" + echo "Phase 7: Metric parity" + check check_metric_parity "${host}" "${toString sshPort}" \ + "kernel.all.load" "node_load1" + + # Phase 8: pmie testing (eval only) + echo "" + echo "Phase 8: pmie testing" + check check_pmie_test "${host}" "${toString sshPort}" 45 + ''} + + # Summary + echo "" + echo "========================================================" + echo " Results: $passed passed, $failed failed" + echo "========================================================" + + # Save results + { + echo "variant=${variant}" + echo "passed=$passed" + echo "failed=$failed" + echo "timestamp=$(date -Iseconds)" + } > "$output_dir/results.txt" + + if [[ $failed -gt 0 ]]; then + exit 1 + fi + ''; +} diff --git a/nix/tests/test-all-microvms.nix b/nix/tests/test-all-microvms.nix new file mode 100644 index 0000000000..32808f27f6 --- /dev/null +++ b/nix/tests/test-all-microvms.nix @@ -0,0 +1,624 @@ +# nix/tests/test-all-microvms.nix +# +# Comprehensive test runner for all PCP MicroVM variants. +# +# Features: +# - Polling-based build checking (10s intervals) for slow machine support +# - Sequential builds to leverage Nix caching of shared PCP package +# - Variant-specific port offsets to avoid conflicts +# - Continue on failure, report all results at end +# +# Usage: +# nix run .#pcp-test-all-microvms +# nix run .#pcp-test-all-microvms -- --skip-tap # Skip TAP variants +# nix run .#pcp-test-all-microvms -- --only=eval # Test only eval variant +# +{ pkgs, lib }: +let + constants = import ../constants.nix; + testLib = import ../test-lib.nix { inherit pkgs lib; }; + + # Variant definitions with their properties + variants = { + base = { + name = "pcp-microvm"; + offset = constants.variantPortOffsets.base; + checks = [ "pmcd" "pmproxy" "pmlogger" ]; + description = "Base PCP (pmcd, pmlogger, pmproxy)"; + }; + base-tap = { + name = "pcp-microvm-tap"; + offset = constants.variantPortOffsets.base; + tap = true; + checks = [ "pmcd" "pmproxy" "pmlogger" ]; + description = "Base PCP with TAP networking"; + }; + eval = { + name = "pcp-microvm-eval"; + offset = constants.variantPortOffsets.eval; + checks = [ "pmcd" "pmproxy" "node_exporter" ]; + description = "Eval (+ node_exporter, below, pmie-test)"; + }; + eval-tap = { + name = "pcp-microvm-eval-tap"; + offset = constants.variantPortOffsets.eval; + tap = true; + checks = [ "pmcd" "pmproxy" "node_exporter" ]; + description = "Eval with TAP networking"; + }; + grafana = { + name = "pcp-microvm-grafana"; + offset = constants.variantPortOffsets.grafana; + checks = [ "pmcd" "pmproxy" "node_exporter" "grafana" "prometheus" ]; + description = "Grafana (+ Prometheus dashboards)"; + }; + grafana-tap = { + name = "pcp-microvm-grafana-tap"; + offset = constants.variantPortOffsets.grafana; + tap = true; + checks = [ "pmcd" "pmproxy" "node_exporter" "grafana" "prometheus" ]; + description = "Grafana with TAP networking"; + }; + bpf = { + name = "pcp-microvm-bpf"; + offset = constants.variantPortOffsets.bpf; + checks = [ "pmcd" "pmproxy" "node_exporter" "bpf" ]; + description = "BPF PMDA (pre-compiled eBPF)"; + }; + # NOTE: BCC is deprecated - use BPF PMDA instead (CO-RE eBPF) + }; + + sshOpts = lib.concatStringsSep " " testLib.sshOpts; + +in +pkgs.writeShellApplication { + name = "pcp-test-all-microvms"; + runtimeInputs = with pkgs; [ + openssh + sshpass + curl + coreutils + gnugrep + procps + nix + ]; + text = '' + set +e # Don't exit on error - we want to continue and report all results + + # ─── Configuration ───────────────────────────────────────────────────── + POLL_INTERVAL=${toString constants.test.buildPollSeconds} + SSH_MAX_ATTEMPTS=${toString constants.test.sshMaxAttempts} + SSH_RETRY_DELAY=${toString constants.test.sshRetryDelaySeconds} + BASE_SSH_PORT=${toString constants.ports.sshForward} + TAP_VM_IP="${constants.network.vmIp}" + + # Service warmup: wait for services to fully start after SSH connects + # pmlogger takes ~9s, Grafana HTTP takes ~17s after service activation + SERVICE_WARMUP_SECONDS=15 + + # HTTP check retry settings (Grafana/Prometheus may need extra time) + HTTP_CHECK_RETRIES=3 + HTTP_CHECK_DELAY=5 + + # Guest-side ports (used in checks via SSH) + GUEST_PMCD_PORT=${toString constants.ports.pmcd} + GUEST_PMPROXY_PORT=${toString constants.ports.pmproxy} + GUEST_NODE_EXPORTER_PORT=${toString constants.ports.nodeExporter} + GUEST_GRAFANA_PORT=${toString constants.ports.grafana} + GUEST_PROMETHEUS_PORT=${toString constants.ports.prometheus} + + # Results tracking + declare -A RESULTS + declare -A DURATIONS + TOTAL_PASSED=0 + TOTAL_FAILED=0 + TOTAL_SKIPPED=0 + + # CLI options + SKIP_TAP=false + ONLY_VARIANT="" + + # ─── Argument Parsing ────────────────────────────────────────────────── + while [[ $# -gt 0 ]]; do + case "$1" in + --skip-tap) + SKIP_TAP=true + shift + ;; + --only=*) + ONLY_VARIANT="''${1#--only=}" + shift + ;; + --help|-h) + echo "Usage: pcp-test-all-microvms [OPTIONS]" + echo "" + echo "Options:" + echo " --skip-tap Skip TAP networking variants" + echo " --only=VARIANT Test only specified variant (base, eval, grafana, bpf)" + echo " --help, -h Show this help" + echo "" + echo "Variants:" + echo " base, base-tap, eval, eval-tap, grafana, grafana-tap, bpf" + exit 0 + ;; + *) + echo "Unknown option: $1" + exit 1 + ;; + esac + done + + # ─── Helper Functions ────────────────────────────────────────────────── + + log() { + echo "[$(date '+%H:%M:%S')] $*" + } + + log_section() { + echo "" + echo "════════════════════════════════════════════════════════════════════" + echo " $*" + echo "════════════════════════════════════════════════════════════════════" + } + + # Poll for build completion + wait_for_build() { + local variant="$1" + local result_link="$2" + local start_time + start_time=$(date +%s) + + log "Building $variant..." + + # Start build in background + nix build ".#$variant" -o "$result_link" 2>&1 & + local build_pid=$! + + # Poll for completion + while kill -0 "$build_pid" 2>/dev/null; do + local elapsed=$(( $(date +%s) - start_time )) + log " still building... (''${elapsed}s elapsed)" + sleep "$POLL_INTERVAL" + done + + # Check if build succeeded + wait "$build_pid" + local exit_code=$? + + local duration=$(( $(date +%s) - start_time )) + if [[ $exit_code -eq 0 ]] && [[ -L "$result_link" ]]; then + log "Build complete (''${duration}s)" + return 0 + else + log "Build FAILED (exit code: $exit_code)" + return 1 + fi + } + + # Wait for SSH to become available + wait_for_ssh() { + local host="$1" + local port="$2" + local max_attempts="''${3:-$SSH_MAX_ATTEMPTS}" + local attempt=0 + + log "Waiting for SSH on $host:$port..." + + while ! sshpass -p pcp ssh ${sshOpts} -p "$port" "root@$host" true 2>/dev/null; do + attempt=$((attempt + 1)) + if [[ $attempt -ge $max_attempts ]]; then + log "SSH not available after $max_attempts attempts" + return 1 + fi + echo -n "." + sleep "$SSH_RETRY_DELAY" + done + echo "" + log "SSH connected" + return 0 + } + + # Run a check command via SSH + run_ssh_check() { + local host="$1" + local port="$2" + local desc="$3" + shift 3 + local cmd="$*" + + echo -n " CHECK: $desc ... " + if sshpass -p pcp ssh ${sshOpts} -p "$port" "root@$host" "$cmd" >/dev/null 2>&1; then + echo "OK" + return 0 + else + echo "FAIL" + return 1 + fi + } + + # Check if a service is running + check_service() { + local host="$1" + local port="$2" + local service="$3" + run_ssh_check "$host" "$port" "service $service active" "systemctl is-active $service" + } + + # Check if a port is listening + check_port() { + local host="$1" + local port="$2" + local target_port="$3" + run_ssh_check "$host" "$port" "port $target_port listening" "ss -tlnp | grep -q :$target_port" + } + + # Check if pminfo returns metrics + check_pminfo() { + local host="$1" + local port="$2" + run_ssh_check "$host" "$port" "pminfo kernel.all.load" "pminfo -f kernel.all.load" + } + + # Check Grafana HTTP endpoint (with retry - Grafana needs extra startup time) + check_grafana_http() { + local host="$1" + local port="$2" + local attempt=0 + + echo -n " CHECK: Grafana HTTP on port $GUEST_GRAFANA_PORT ... " + while [[ $attempt -lt $HTTP_CHECK_RETRIES ]]; do + if sshpass -p pcp ssh ${sshOpts} -p "$port" "root@$host" \ + "curl -sf http://localhost:$GUEST_GRAFANA_PORT/api/health" >/dev/null 2>&1; then + echo "OK" + return 0 + fi + attempt=$((attempt + 1)) + if [[ $attempt -lt $HTTP_CHECK_RETRIES ]]; then + echo -n "(retry $attempt) " + sleep "$HTTP_CHECK_DELAY" + fi + done + echo "FAIL" + return 1 + } + + # Check Prometheus HTTP endpoint (with retry) + check_prometheus_http() { + local host="$1" + local port="$2" + local attempt=0 + + echo -n " CHECK: Prometheus HTTP on port $GUEST_PROMETHEUS_PORT ... " + while [[ $attempt -lt $HTTP_CHECK_RETRIES ]]; do + if sshpass -p pcp ssh ${sshOpts} -p "$port" "root@$host" \ + "curl -sf http://localhost:$GUEST_PROMETHEUS_PORT/-/ready" >/dev/null 2>&1; then + echo "OK" + return 0 + fi + attempt=$((attempt + 1)) + if [[ $attempt -lt $HTTP_CHECK_RETRIES ]]; then + echo -n "(retry $attempt) " + sleep "$HTTP_CHECK_DELAY" + fi + done + echo "FAIL" + return 1 + } + + # Check BPF metrics + check_bpf_metrics() { + local host="$1" + local port="$2" + run_ssh_check "$host" "$port" "BPF metrics available" \ + "pminfo bpf 2>/dev/null | grep -q bpf" + } + # NOTE: BCC is deprecated - use BPF PMDA instead (CO-RE eBPF) + + # Stop all PCP MicroVMs + stop_all_vms() { + log "Stopping any running PCP MicroVMs..." + pkill -f 'microvm@pcp-(vm|eval-vm|grafana-vm|bpf-vm|bcc-vm)' 2>/dev/null || true + sleep 2 + # Force kill if still running + pkill -9 -f 'microvm@pcp-(vm|eval-vm|grafana-vm|bpf-vm|bcc-vm)' 2>/dev/null || true + } + + # ─── Test Runner for a Single Variant ────────────────────────────────── + + test_variant() { + local key="$1" + local name="$2" + local offset="$3" + local is_tap="$4" + local checks="$5" + local extra_timeout="''${6:-0}" + local description="$7" + + local start_time + start_time=$(date +%s) + + log_section "Testing: $name" + log "Description: $description" + log "Port offset: $offset" + + # Calculate ports (offset applied to host-side forwarded ports) + local ssh_port=$((BASE_SSH_PORT + offset)) + # Note: Guest-side ports are fixed (from constants), host-side are offset + # These are logged for debugging but checks use the constant guest ports + + # For TAP, use direct IP + local ssh_host="localhost" + if [[ "$is_tap" == "true" ]]; then + ssh_host="$TAP_VM_IP" + ssh_port=22 + fi + + local result_link="result-test-$key" + local checks_passed=0 + local checks_failed=0 + + # Phase 1: Build + log "" + log "Phase 1: Build" + if ! wait_for_build "$name" "$result_link"; then + RESULTS[$key]="BUILD_FAILED" + DURATIONS[$key]=$(( $(date +%s) - start_time )) + TOTAL_FAILED=$((TOTAL_FAILED + 1)) + return 1 + fi + + # Phase 2: Start VM + log "" + log "Phase 2: Start VM" + log "Starting $result_link/bin/microvm-run..." + "$result_link/bin/microvm-run" & + local vm_pid=$! + sleep 3 # Give VM a moment to initialize + + # Check VM process is running + if ! kill -0 "$vm_pid" 2>/dev/null; then + log "VM process died immediately" + RESULTS[$key]="VM_START_FAILED" + DURATIONS[$key]=$(( $(date +%s) - start_time )) + TOTAL_FAILED=$((TOTAL_FAILED + 1)) + return 1 + fi + + # Phase 3: Wait for SSH + log "" + log "Phase 3: SSH connectivity" + local max_attempts=$SSH_MAX_ATTEMPTS + if [[ $extra_timeout -gt 0 ]]; then + max_attempts=$((max_attempts + extra_timeout / SSH_RETRY_DELAY)) + log "(Extended timeout for this variant: +''${extra_timeout}s)" + fi + + if ! wait_for_ssh "$ssh_host" "$ssh_port" "$max_attempts"; then + log "SSH connectivity failed" + stop_all_vms + rm -f "$result_link" + RESULTS[$key]="SSH_FAILED" + DURATIONS[$key]=$(( $(date +%s) - start_time )) + TOTAL_FAILED=$((TOTAL_FAILED + 1)) + return 1 + fi + + # Wait for services to fully start (pmlogger takes ~9s, Grafana ~17s) + # BCC variants need extra time for eBPF compilation (30-60s) + local warmup_time=$SERVICE_WARMUP_SECONDS + if [[ $extra_timeout -gt 0 ]]; then + warmup_time=$((SERVICE_WARMUP_SECONDS + extra_timeout)) + log "Waiting ''${warmup_time}s for services (includes BCC compilation time)..." + else + log "Waiting ''${warmup_time}s for services to start..." + fi + sleep "$warmup_time" + + # Phase 4: Run checks + log "" + log "Phase 4: Service checks" + + # Always check pmcd and basic metrics + if check_service "$ssh_host" "$ssh_port" "pmcd"; then + checks_passed=$((checks_passed + 1)) + else + checks_failed=$((checks_failed + 1)) + fi + + if check_port "$ssh_host" "$ssh_port" "$GUEST_PMCD_PORT"; then + checks_passed=$((checks_passed + 1)) + else + checks_failed=$((checks_failed + 1)) + fi + + if check_pminfo "$ssh_host" "$ssh_port"; then + checks_passed=$((checks_passed + 1)) + else + checks_failed=$((checks_failed + 1)) + fi + + # Check pmproxy if in checks list + if [[ "$checks" == *"pmproxy"* ]]; then + if check_service "$ssh_host" "$ssh_port" "pmproxy"; then + checks_passed=$((checks_passed + 1)) + else + checks_failed=$((checks_failed + 1)) + fi + if check_port "$ssh_host" "$ssh_port" "$GUEST_PMPROXY_PORT"; then + checks_passed=$((checks_passed + 1)) + else + checks_failed=$((checks_failed + 1)) + fi + fi + + # Check pmlogger if in checks list + if [[ "$checks" == *"pmlogger"* ]]; then + if check_service "$ssh_host" "$ssh_port" "pmlogger"; then + checks_passed=$((checks_passed + 1)) + else + checks_failed=$((checks_failed + 1)) + fi + fi + + # Check node_exporter if in checks list + if [[ "$checks" == *"node_exporter"* ]]; then + if check_service "$ssh_host" "$ssh_port" "prometheus-node-exporter"; then + checks_passed=$((checks_passed + 1)) + else + checks_failed=$((checks_failed + 1)) + fi + if check_port "$ssh_host" "$ssh_port" "$GUEST_NODE_EXPORTER_PORT"; then + checks_passed=$((checks_passed + 1)) + else + checks_failed=$((checks_failed + 1)) + fi + fi + + # Check Grafana if in checks list + if [[ "$checks" == *"grafana"* ]]; then + if check_service "$ssh_host" "$ssh_port" "grafana"; then + checks_passed=$((checks_passed + 1)) + else + checks_failed=$((checks_failed + 1)) + fi + if check_grafana_http "$ssh_host" "$ssh_port"; then + checks_passed=$((checks_passed + 1)) + else + checks_failed=$((checks_failed + 1)) + fi + fi + + # Check Prometheus if in checks list + if [[ "$checks" == *"prometheus"* ]]; then + if check_service "$ssh_host" "$ssh_port" "prometheus"; then + checks_passed=$((checks_passed + 1)) + else + checks_failed=$((checks_failed + 1)) + fi + if check_prometheus_http "$ssh_host" "$ssh_port"; then + checks_passed=$((checks_passed + 1)) + else + checks_failed=$((checks_failed + 1)) + fi + fi + + # Check BPF if in checks list + if [[ "$checks" == *"bpf"* ]]; then + if check_bpf_metrics "$ssh_host" "$ssh_port"; then + checks_passed=$((checks_passed + 1)) + else + checks_failed=$((checks_failed + 1)) + fi + fi + # NOTE: BCC is deprecated - use BPF PMDA instead (CO-RE eBPF) + + # Phase 5: Cleanup + log "" + log "Phase 5: Cleanup" + stop_all_vms + rm -f "$result_link" + + # Record result + local duration=$(( $(date +%s) - start_time )) + DURATIONS[$key]=$duration + + if [[ $checks_failed -eq 0 ]]; then + RESULTS[$key]="PASSED ($checks_passed checks)" + TOTAL_PASSED=$((TOTAL_PASSED + 1)) + log "Result: PASSED ($checks_passed checks in ''${duration}s)" + return 0 + else + RESULTS[$key]="FAILED ($checks_failed/$((checks_passed + checks_failed)) failed)" + TOTAL_FAILED=$((TOTAL_FAILED + 1)) + log "Result: FAILED ($checks_failed checks failed in ''${duration}s)" + return 1 + fi + } + + # ─── Main ────────────────────────────────────────────────────────────── + + log_section "PCP MicroVM Test Suite" + log "Starting comprehensive MicroVM tests" + log "Build poll interval: ''${POLL_INTERVAL}s" + log "Skip TAP variants: $SKIP_TAP" + [[ -n "$ONLY_VARIANT" ]] && log "Only testing: $ONLY_VARIANT" + + # Ensure clean state + stop_all_vms + + # Define test order (NOTE: BCC is deprecated - use BPF PMDA instead) + VARIANT_ORDER=(base base-tap eval eval-tap grafana grafana-tap bpf) + + for key in "''${VARIANT_ORDER[@]}"; do + # Skip if --only specified and doesn't match + if [[ -n "$ONLY_VARIANT" ]] && [[ "$key" != "$ONLY_VARIANT" ]] && [[ "$key" != "$ONLY_VARIANT-tap" ]]; then + continue + fi + + # Skip TAP variants if requested + if [[ "$SKIP_TAP" == "true" ]] && [[ "$key" == *"-tap" ]]; then + RESULTS[$key]="SKIPPED" + DURATIONS[$key]=0 + TOTAL_SKIPPED=$((TOTAL_SKIPPED + 1)) + continue + fi + + # Get variant properties + case "$key" in + base) + test_variant "$key" "pcp-microvm" "${toString constants.variantPortOffsets.base}" "false" \ + "pmcd pmproxy pmlogger" "0" "Base PCP (pmcd, pmlogger, pmproxy)" + ;; + base-tap) + test_variant "$key" "pcp-microvm-tap" "${toString constants.variantPortOffsets.base}" "true" \ + "pmcd pmproxy pmlogger" "0" "Base PCP with TAP networking" + ;; + eval) + test_variant "$key" "pcp-microvm-eval" "${toString constants.variantPortOffsets.eval}" "false" \ + "pmcd pmproxy node_exporter" "0" "Eval (+ node_exporter, below, pmie-test)" + ;; + eval-tap) + test_variant "$key" "pcp-microvm-eval-tap" "${toString constants.variantPortOffsets.eval}" "true" \ + "pmcd pmproxy node_exporter" "0" "Eval with TAP networking" + ;; + grafana) + test_variant "$key" "pcp-microvm-grafana" "${toString constants.variantPortOffsets.grafana}" "false" \ + "pmcd pmproxy node_exporter grafana prometheus" "0" "Grafana (+ Prometheus dashboards)" + ;; + grafana-tap) + test_variant "$key" "pcp-microvm-grafana-tap" "${toString constants.variantPortOffsets.grafana}" "true" \ + "pmcd pmproxy node_exporter grafana prometheus" "0" "Grafana with TAP networking" + ;; + bpf) + test_variant "$key" "pcp-microvm-bpf" "${toString constants.variantPortOffsets.bpf}" "false" \ + "pmcd pmproxy node_exporter bpf" "0" "BPF PMDA (pre-compiled eBPF)" + ;; + # NOTE: BCC is deprecated - use BPF PMDA instead (CO-RE eBPF) + esac + done + + # ─── Summary ─────────────────────────────────────────────────────────── + + log_section "Test Summary" + + echo "" + printf "%-15s %-40s %10s\n" "VARIANT" "RESULT" "DURATION" + printf "%-15s %-40s %10s\n" "───────" "──────" "────────" + + for key in "''${VARIANT_ORDER[@]}"; do + if [[ -n "''${RESULTS[$key]:-}" ]]; then + printf "%-15s %-40s %10s\n" "$key" "''${RESULTS[$key]}" "''${DURATIONS[$key]}s" + fi + done + + echo "" + echo "────────────────────────────────────────────────────────────────────" + echo "Total: $TOTAL_PASSED passed, $TOTAL_FAILED failed, $TOTAL_SKIPPED skipped" + echo "────────────────────────────────────────────────────────────────────" + + if [[ $TOTAL_FAILED -gt 0 ]]; then + exit 1 + fi + exit 0 + ''; +} diff --git a/nix/variants.nix b/nix/variants.nix new file mode 100644 index 0000000000..17f7f30425 --- /dev/null +++ b/nix/variants.nix @@ -0,0 +1,90 @@ +# nix/variants.nix +# +# Centralized MicroVM variant definitions for PCP. +# +# This file defines all MicroVM variants in one place, eliminating duplication +# in flake.nix. Each variant specifies its configuration options and whether +# it supports TAP networking. +# +# USAGE: +# variants = import ./nix/variants.nix { inherit constants; }; +# variants.definitions.grafana.config # -> { enableGrafana = true; ... } +# variants.mkPackageName "grafana" "tap" # -> "pcp-microvm-grafana-tap" +# +# ADDING A NEW VARIANT: +# 1. Add entry to definitions below +# 2. Add port offset in constants.nix (variantPortOffsets) +# 3. Add console block in constants.nix (console.variantBlocks) +# 4. Run: nix flake show (verify new packages appear) +# +{ constants }: +rec { + # ─── Variant Definitions ──────────────────────────────────────────────── + # Each variant specifies: + # - description: Human-readable description for documentation + # - config: Options passed to mkMicroVM (merged with defaults) + # - supportsTap: Whether to generate a TAP networking variant + # + definitions = { + base = { + description = "Base PCP (pmcd, pmlogger, pmproxy)"; + config = {}; # Uses defaults + supportsTap = true; + }; + + eval = { + description = "Evaluation (+ node_exporter, pmie-test)"; + config = { + enablePmlogger = false; + enableEvalTools = true; + enablePmieTest = true; + }; + supportsTap = true; + }; + + grafana = { + description = "Grafana (+ Prometheus + BPF dashboards)"; + config = { + enablePmlogger = false; + enableEvalTools = true; + enablePmieTest = true; + enableGrafana = true; + enableBpf = true; + }; + supportsTap = true; + }; + + bpf = { + description = "BPF PMDA (pre-compiled eBPF)"; + config = { + enablePmlogger = false; + enableEvalTools = true; + enablePmieTest = true; + enableBpf = true; + }; + supportsTap = false; + }; + }; + + # ─── Helper Functions ─────────────────────────────────────────────────── + + # Generate package name for a variant and networking mode + # mkPackageName "base" "user" -> "pcp-microvm" + # mkPackageName "base" "tap" -> "pcp-microvm-tap" + # mkPackageName "grafana" "user" -> "pcp-microvm-grafana" + # mkPackageName "grafana" "tap" -> "pcp-microvm-grafana-tap" + mkPackageName = variant: networking: + let + base = if variant == "base" then "pcp-microvm" else "pcp-microvm-${variant}"; + in + if networking == "tap" then "${base}-tap" else base; + + # Generate test app name for a variant and networking mode + # mkTestAppName "base" "user" -> "pcp-test-base-user" + # mkTestAppName "grafana" "tap" -> "pcp-test-grafana-tap" + mkTestAppName = variant: networking: + "pcp-test-${variant}-${networking}"; + + # List of all variant names + variantNames = builtins.attrNames definitions; +} diff --git a/nix/vm-test.nix b/nix/vm-test.nix index 750ecaf28e..33909d8718 100644 --- a/nix/vm-test.nix +++ b/nix/vm-test.nix @@ -1,95 +1,52 @@ -# NixOS VM test for Performance Co-Pilot (PCP) +# nix/vm-test.nix +# +# NixOS VM integration test for PCP. +# Uses the shared NixOS module for service configuration. # # This test verifies: -# - PCP package builds and installs correctly -# - pmcd daemon can start and listen on port 44321 -# - Basic metrics can be queried via pminfo +# - PCP services start correctly via systemd +# - pmcd, pmlogger, pmie, pmproxy all function +# - Basic metrics can be queried +# - Archives are created by pmlogger # # Run via flake: nix flake check # Or standalone: nix build .#checks.x86_64-linux.vm-test -{ - pkgs, - pcp, -}: - +# +{ pkgs, pcp }: +let + constants = import ./constants.nix; + nixosModule = import ./nixos-module.nix; +in pkgs.testers.nixosTest { name = "pcp-vm-test"; - nodes.machine = - { pkgs, ... }: - { - environment.systemPackages = [ pcp ]; - - # Create pcp user/group required by pmcd - users.users.pcp = { - isSystemUser = true; - group = "pcp"; - description = "Performance Co-Pilot daemon user"; - }; - users.groups.pcp = { }; - - # Create required runtime directories - systemd.tmpfiles.rules = [ - "d /var/lib/pcp 0755 pcp pcp -" - "d /var/log/pcp 0755 pcp pcp -" - "d /run/pcp 0755 pcp pcp -" - ]; + nodes.machine = { ... }: { + imports = [ nixosModule ]; + services.pcp = { + enable = true; + package = pcp; + preset = "custom"; # Use custom to control which services are enabled + pmlogger.enable = false; # Requires additional configuration + pmie.enable = false; # Requires additional configuration + pmproxy.enable = true; }; + }; testScript = '' machine.wait_for_unit("multi-user.target") - # Verify the package is installed and pminfo works - machine.succeed("pminfo --version") - - # Find the actual pcp package path by resolving pminfo symlink - # pminfo is at /nix/store/xxx-pcp-7.0.5/bin/pminfo - # We need to get the package root to find libexec/pcp/bin/pmcd - pminfo_real = machine.succeed("realpath $(which pminfo)").strip() - print(f"pminfo real path: {pminfo_real}") - - # Get package root (two levels up from bin/pminfo) - pkg_root = machine.succeed(f"dirname $(dirname {pminfo_real})").strip() - pmcd_path = f"{pkg_root}/libexec/pcp/bin/pmcd" - print(f"pmcd expected at: {pmcd_path}") - - # Verify pmcd exists - machine.succeed(f"test -x {pmcd_path}") + # Wait for core PCP services + machine.wait_for_unit("pmcd.service") + machine.wait_for_unit("pmproxy.service") - # PCP_CONF points to the main configuration file - # After our postInstall, config is at share/pcp/etc/pcp.conf - pcp_conf = f"{pkg_root}/share/pcp/etc/pcp.conf" + # Wait for ports to be listening + machine.wait_for_open_port(${toString constants.ports.pmcd}) + machine.wait_for_open_port(${toString constants.ports.pmproxy}) - # Start pmcd daemon in background with PCP_CONF set - machine.succeed(f"PCP_CONF={pcp_conf} setsid {pmcd_path} -f > /tmp/pmcd.log 2>&1 &") - - # Give pmcd a moment to start or fail - import time - time.sleep(3) - - # Debug: check if pmcd is running and show log - print("=== Checking pmcd process ===") - ps_out = machine.succeed("ps aux | grep pmcd || true") - print(ps_out) - - print("=== pmcd log contents ===") - log_out = machine.succeed("cat /tmp/pmcd.log 2>/dev/null || echo 'No log file'") - print(log_out) - - print("=== Checking listening ports ===") - ports_out = machine.succeed("ss -tlnp | grep -E '44321|pmcd' || echo 'No pmcd ports found'") - print(ports_out) - - # Wait for pmcd to start listening on its default port - machine.wait_for_open_port(44321, timeout=30) - - # Query basic kernel metrics to verify pmcd is working - machine.succeed(f"PCP_CONF={pcp_conf} pminfo -f kernel.all.load") - - # Additional verification: check pmcd is responding - machine.succeed(f"PCP_CONF={pcp_conf} pminfo -h localhost kernel.all.cpu.user") + # Basic metric queries + machine.succeed("pminfo -f kernel.all.load") + machine.succeed("pminfo -h localhost kernel.all.cpu.user") print("=== PCP VM test passed! ===") ''; } -