From ae1e482e05220fcf52d5d25f0c076cbcbc8a01f2 Mon Sep 17 00:00:00 2001
From: testvalue <wgordon@redhat.com>
Date: Wed, 18 Mar 2026 17:54:30 -0400
Subject: [PATCH 1/2] feat(vm): adds async VM integration with discovery and
 validation

---
 pyproject.toml               |   2 +-
 src/mac2nix/cli.py           | 108 +++-
 src/mac2nix/vm/__init__.py   |  30 ++
 src/mac2nix/vm/_utils.py     | 170 ++++++
 src/mac2nix/vm/comparator.py | 272 ++++++++++
 src/mac2nix/vm/discovery.py  | 261 ++++++++++
 src/mac2nix/vm/manager.py    | 296 +++++++++++
 src/mac2nix/vm/validator.py  | 396 ++++++++++++++
 tests/test_cli_vm.py         | 271 ++++++++++
 tests/vm/__init__.py         |   0
 tests/vm/test_comparator.py  | 667 ++++++++++++++++++++++++
 tests/vm/test_discovery.py   | 953 ++++++++++++++++++++++++++++++++++
 tests/vm/test_manager.py     | 974 +++++++++++++++++++++++++++++++++++
 tests/vm/test_validator.py   | 653 +++++++++++++++++++++++
 tests/vm/test_vm_utils.py    | 480 +++++++++++++++++
 15 files changed, 5528 insertions(+), 5 deletions(-)
 create mode 100644 src/mac2nix/vm/__init__.py
 create mode 100644 src/mac2nix/vm/_utils.py
 create mode 100644 src/mac2nix/vm/comparator.py
 create mode 100644 src/mac2nix/vm/discovery.py
 create mode 100644 src/mac2nix/vm/manager.py
 create mode 100644 src/mac2nix/vm/validator.py
 create mode 100644 tests/test_cli_vm.py
 create mode 100644 tests/vm/__init__.py
 create mode 100644 tests/vm/test_comparator.py
 create mode 100644 tests/vm/test_discovery.py
 create mode 100644 tests/vm/test_manager.py
 create mode 100644 tests/vm/test_validator.py
 create mode 100644 tests/vm/test_vm_utils.py

diff --git a/pyproject.toml b/pyproject.toml
index 51b5ce6..df05a00 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -63,7 +63,7 @@ ignore = [
 ]
 
 [tool.ruff.lint.per-file-ignores]
-"**/tests/**/*.py" = ["S101", "S105", "S108", "SLF001"]  # assert + test data + private access OK in tests
+"**/tests/**/*.py" = ["S101", "S105", "S106", "S107", "S108", "SLF001", "ARG001"]  # assert + test data + credentials + private access + unused mock args OK in tests
 
 [tool.ruff.lint.isort]
 known-first-party = ["mac2nix"]
diff --git a/src/mac2nix/cli.py b/src/mac2nix/cli.py
index 7276f22..9dd64ba 100644
--- a/src/mac2nix/cli.py
+++ b/src/mac2nix/cli.py
@@ -10,8 +10,12 @@
 from rich.console import Console
 from rich.progress import BarColumn, MofNCompleteColumn, Progress, SpinnerColumn, TextColumn, TimeElapsedColumn
 
+from mac2nix.models.system_state import SystemState
 from mac2nix.orchestrator import run_scan
 from mac2nix.scanners import get_all_scanners
+from mac2nix.vm.discovery import DiscoveryRunner
+from mac2nix.vm.manager import TartVMManager
+from mac2nix.vm.validator import Validator
 
 
 @click.group()
@@ -105,9 +109,60 @@ def generate() -> None:
 
 
 @main.command()
-def validate() -> None:
+@click.option(
+    "--flake-path",
+    required=True,
+    type=click.Path(exists=True, file_okay=False, path_type=Path),
+    help="Path to the nix-darwin flake directory.",
+)
+@click.option(
+    "--scan-file",
+    required=True,
+    type=click.Path(exists=True, dir_okay=False, path_type=Path),
+    help="Source SystemState JSON produced by 'mac2nix scan'.",
+)
+@click.option("--base-vm", default="base-macos", show_default=True, help="Base Tart VM name.")
+@click.option("--vm-user", default="admin", show_default=True, help="SSH username inside the VM.")
+@click.option("--vm-password", default="admin", show_default=True, help="SSH password inside the VM.")
+def validate(
+    flake_path: Path,
+    scan_file: Path,
+    base_vm: str,
+    vm_user: str,
+    vm_password: str,
+) -> None:
     """Validate generated configuration in a Tart VM."""
-    click.echo("validate: not yet implemented")
+    if not TartVMManager.is_available():
+        raise click.ClickException("tart CLI not found — install tart to use 'validate'.")
+
+    try:
+        source_state = SystemState.from_json(scan_file)
+    except Exception as exc:
+        raise click.ClickException(f"Failed to load scan file: {exc}") from exc
+
+    async def _run() -> None:
+        async with TartVMManager(base_vm, vm_user, vm_password) as vm:
+            result = await Validator(vm).validate(flake_path, source_state)
+
+        if result.errors:
+            click.echo("Validation errors:", err=True)
+            for error in result.errors:
+                click.echo(f"  {error}", err=True)
+
+        if result.fidelity:
+            click.echo(f"Overall fidelity: {result.fidelity.overall_score:.1%}")
+            for domain, ds in sorted(result.fidelity.domain_scores.items()):
+                click.echo(f"  {domain}: {ds.score:.1%} ({ds.matching_fields}/{ds.total_fields})")
+
+        if not result.success:
+            raise click.ClickException("Validation failed.")
+
+    try:
+        asyncio.run(_run())
+    except click.ClickException:
+        raise
+    except Exception as exc:
+        raise click.ClickException(str(exc)) from exc
 
 
 @main.command()
@@ -117,6 +172,51 @@ def diff() -> None:
 
 
 @main.command()
-def discover() -> None:
+@click.option("--package", required=True, help="Package name to install and discover.")
+@click.option(
+    "--type",
+    "package_type",
+    default="brew",
+    show_default=True,
+    type=click.Choice(["brew", "cask"]),
+    help="Package manager type.",
+)
+@click.option("--base-vm", default="base-macos", show_default=True, help="Base Tart VM name.")
+@click.option("--vm-user", default="admin", show_default=True, help="SSH username inside the VM.")
+@click.option("--vm-password", default="admin", show_default=True, help="SSH password inside the VM.")
+@click.option(
+    "--output",
+    "-o",
+    type=click.Path(path_type=Path),
+    default=None,
+    metavar="FILE",
+    help="Write JSON result to FILE instead of stdout.",
+)
+def discover(  # noqa: PLR0913
+    package: str,
+    package_type: str,
+    base_vm: str,
+    vm_user: str,
+    vm_password: str,
+    output: Path | None,
+) -> None:
     """Discover app config paths by installing in a Tart VM."""
-    click.echo("discover: not yet implemented")
+    if not TartVMManager.is_available():
+        raise click.ClickException("tart CLI not found — install tart to use 'discover'.")
+
+    async def _run() -> str:
+        async with TartVMManager(base_vm, vm_user, vm_password) as vm:
+            result = await DiscoveryRunner(vm).discover(package, package_type)
+        return result.model_dump_json(indent=2)
+
+    try:
+        json_output = asyncio.run(_run())
+    except Exception as exc:
+        raise click.ClickException(str(exc)) from exc
+
+    if output is not None:
+        output.parent.mkdir(parents=True, exist_ok=True)
+        output.write_text(json_output)
+        click.echo(f"Discovery result written to {output}", err=True)
+    else:
+        click.echo(json_output)
diff --git a/src/mac2nix/vm/__init__.py b/src/mac2nix/vm/__init__.py
new file mode 100644
index 0000000..72c9d2a
--- /dev/null
+++ b/src/mac2nix/vm/__init__.py
@@ -0,0 +1,30 @@
+"""VM integration package for mac2nix."""
+
+from mac2nix.vm._utils import VMConnectionError, VMError, VMTimeoutError
+from mac2nix.vm.comparator import FileSystemComparator
+from mac2nix.vm.discovery import DiscoveryResult, DiscoveryRunner
+from mac2nix.vm.manager import TartVMManager
+from mac2nix.vm.validator import (
+    DomainScore,
+    FidelityReport,
+    Mismatch,
+    ValidationResult,
+    Validator,
+    compute_fidelity,
+)
+
+__all__ = [
+    "DiscoveryResult",
+    "DiscoveryRunner",
+    "DomainScore",
+    "FidelityReport",
+    "FileSystemComparator",
+    "Mismatch",
+    "TartVMManager",
+    "VMConnectionError",
+    "VMError",
+    "VMTimeoutError",
+    "ValidationResult",
+    "Validator",
+    "compute_fidelity",
+]
diff --git a/src/mac2nix/vm/_utils.py b/src/mac2nix/vm/_utils.py
new file mode 100644
index 0000000..576be4f
--- /dev/null
+++ b/src/mac2nix/vm/_utils.py
@@ -0,0 +1,170 @@
+"""Async VM utilities — subprocess execution, SSH, and exception types."""
+
+from __future__ import annotations
+
+import asyncio
+import contextlib
+import logging
+import os
+import shutil
+
+logger = logging.getLogger(__name__)
+
+
+# ---------------------------------------------------------------------------
+# Exception hierarchy
+# ---------------------------------------------------------------------------
+
+
+class VMError(Exception):
+    """Base exception for VM integration errors."""
+
+
+class VMConnectionError(VMError):
+    """Raised when an SSH/network connection to a VM cannot be established."""
+
+
+class VMTimeoutError(VMError):
+    """Raised when a VM operation exceeds its timeout."""
+
+
+# ---------------------------------------------------------------------------
+# Tool availability
+# ---------------------------------------------------------------------------
+
+
+def is_tart_available() -> bool:
+    """Return True if the tart CLI is on PATH."""
+    return shutil.which("tart") is not None
+
+
+def is_sshpass_available() -> bool:
+    """Return True if sshpass is on PATH."""
+    return shutil.which("sshpass") is not None
+
+
+# ---------------------------------------------------------------------------
+# Async subprocess helpers
+# ---------------------------------------------------------------------------
+
+
+async def async_run_command(
+    cmd: list[str],
+    *,
+    timeout: int = 30,
+    env: dict[str, str] | None = None,
+) -> tuple[int, str, str]:
+    """Run a subprocess command asynchronously.
+
+    Validates that the executable exists before running. Never uses shell=True.
+
+    Args:
+        cmd: Command as an argument list. cmd[0] must be the executable name.
+        timeout: Maximum seconds to wait for the process. Defaults to 30.
+        env: Extra environment variables merged into the current environment.
+
+    Returns:
+        Tuple of (returncode, stdout, stderr).
+
+    Raises:
+        VMError: If the executable is not found on PATH.
+        VMTimeoutError: If the process exceeds *timeout* seconds.
+    """
+    executable = cmd[0]
+    if shutil.which(executable) is None:
+        logger.warning("Executable not found: %s", executable)
+        raise VMError(f"Executable not found: {executable}")
+
+    logger.debug("Running async command: %s", cmd)
+    try:
+        proc = await asyncio.create_subprocess_exec(
+            *cmd,
+            stdout=asyncio.subprocess.PIPE,
+            stderr=asyncio.subprocess.PIPE,
+            env={**os.environ, **env} if env else None,
+        )
+        try:
+            stdout_bytes, stderr_bytes = await asyncio.wait_for(proc.communicate(), timeout=timeout)
+        except TimeoutError:
+            proc.kill()
+            with contextlib.suppress(Exception):  # Best-effort reap — process may already be gone
+                await proc.communicate()
+            logger.warning("Command timed out after %ds: %s", timeout, cmd)
+            raise VMTimeoutError(f"Command timed out after {timeout}s: {cmd}") from None
+
+        returncode = proc.returncode if proc.returncode is not None else -1
+        return returncode, stdout_bytes.decode(errors="replace"), stderr_bytes.decode(errors="replace")
+
+    except (VMError, VMTimeoutError):
+        raise
+    except FileNotFoundError:
+        logger.warning("Executable not found during execution: %s", executable)
+        raise VMError(f"Executable not found during execution: {executable}") from None
+    except OSError as exc:
+        logger.warning("OS error running command %s: %s", cmd, exc)
+        raise VMError(f"OS error running {cmd}: {exc}") from exc
+
+
+async def async_ssh_exec(
+    ip: str,
+    user: str,
+    password: str,
+    cmd: list[str],
+    *,
+    timeout: int = 30,
+) -> tuple[bool, str, str]:
+    """Execute a command on a remote VM via SSH using sshpass.
+
+    Builds the argument list with sshpass + ssh options. For remote commands
+    involving pipes or redirects, wrap in ``['bash', '-c', 'pipeline']``.
+
+    Args:
+        ip: IP address or hostname of the VM.
+        user: SSH username.
+        password: SSH password (passed to sshpass, never via shell).
+        cmd: Remote command as an argument list.
+        timeout: Maximum seconds to wait. Defaults to 30.
+
+    Returns:
+        Tuple of (success, stdout, stderr). ``success`` is True when returncode == 0.
+
+    Raises:
+        VMConnectionError: If sshpass is not available.
+        VMTimeoutError: If the SSH command exceeds *timeout* seconds.
+        VMError: For other subprocess failures.
+    """
+    if not is_sshpass_available():
+        raise VMConnectionError("sshpass is not available — cannot perform SSH exec")
+
+    # Use sshpass -e (reads password from SSHPASS env var) to avoid exposing
+    # the password in process listings (ps aux shows argv, not environment).
+    ssh_cmd = [
+        "sshpass",
+        "-e",
+        "ssh",
+        "-o",
+        "StrictHostKeyChecking=no",
+        "-o",
+        "UserKnownHostsFile=/dev/null",
+        "-o",
+        "LogLevel=ERROR",
+        "-o",
+        f"ConnectTimeout={max(timeout // 2, 5)}",
+        f"{user}@{ip}",
+        "--",
+        *cmd,
+    ]
+
+    logger.debug("Running SSH exec on %s@%s: %s", user, ip, cmd)
+    try:
+        returncode, stdout, stderr = await async_run_command(ssh_cmd, timeout=timeout, env={"SSHPASS": password})
+    except VMTimeoutError:
+        raise
+    except VMError as exc:
+        raise VMConnectionError(f"SSH connection to {user}@{ip} failed: {exc}") from exc
+
+    success = returncode == 0
+    if not success:
+        logger.debug("SSH exec on %s@%s returned %d: %s", user, ip, returncode, stderr.strip())
+
+    return success, stdout, stderr
diff --git a/src/mac2nix/vm/comparator.py b/src/mac2nix/vm/comparator.py
new file mode 100644
index 0000000..3c19ea7
--- /dev/null
+++ b/src/mac2nix/vm/comparator.py
@@ -0,0 +1,272 @@
+"""FileSystemComparator — VM filesystem snapshot and diff engine."""
+
+from __future__ import annotations
+
+import logging
+import re
+import shlex
+from datetime import datetime
+
+from mac2nix.vm._utils import VMError
+from mac2nix.vm.manager import TartVMManager
+
+logger = logging.getLogger(__name__)
+
+# ---------------------------------------------------------------------------
+# Noise filter patterns (10 categories, adapted from macbook-config-mirror)
+# ---------------------------------------------------------------------------
+
+_FILTER_PATTERNS: dict[str, list[str]] = {
+    # Apple system services that commonly create metadata
+    "apple_services": [
+        r".*com\.apple\..*",
+    ],
+    # System directories with frequently changing metadata
+    "system_directories": [
+        r"/private/var/(spool|run|sntpd|sntp|select|rpc|protected|networkd|netboot|msgs|mail|ma|logs|log|folders|db|empty|dirs_cleaner)/.*",
+        r"/private/tmp/.*",
+        r"/private/etc/cups/.*",
+        r"/private/var/tmp/.*",
+        r"/Volumes/.*_Cryptex/\.fseventsd/.*",
+    ],
+    # Library directories that contain system-managed data
+    "library_system_data": [
+        (
+            r".*/Library/(Saved Application State|DataDeliveryServices|DoNotDisturb"
+            r"|HomeKit|PPM|Passes|Suggestions|UnifiedAssetFramework|TRM"
+            r"|MemoryMaintenance|MediaRemote|Mail|Logs|Keychains|HTTPStorages|Audio"
+            r"|Caches|Receipts|Bluetooth|CoreAnalytics|OSAnalytics|Trial|Assistant"
+            r"|Messages|Photos|Biome|Metadata|IdentityServices|AppleMediaServices"
+            r"|DuetExpertCenter|IntelligencePlatform|News)/.*"
+        ),
+        r".*/Library/Application Support/(AddressBook|CallHistoryDB|FaceTime|CrashReporter|DiskImages).*",
+        r".*/Library/Daemon Containers/.*/Data/ActionTranscript/System/local/.*",
+    ],
+    # Development and documentation files from Homebrew
+    "homebrew_noise": [
+        r"/opt/homebrew/\.git.*",
+        r"/opt/homebrew/Cellar/go/.*",
+        r"/opt/homebrew(/Cellar/[^/]+/[^/]+)?/(share|include)/.*",
+        r"/opt/homebrew/Library/Homebrew/(vendor|test|unpack_strategy|rubocops|sorbet|utils)/.*",
+        r"/opt/homebrew/Library/Homebrew/.*\.rb$",
+        r"/opt/homebrew/Library/Taps/.*/\.git/.*",
+    ],
+    # Application framework internals (not app-specific)
+    "app_internals": [
+        r"/Applications/.*/Contents/Frameworks/.*",
+        r"/Applications/.*/Contents/_CodeSignature/.*",
+        r"/Applications/.*/Contents/Resources/.*",
+    ],
+    # Database temporary files
+    "database_files": [
+        r".*\.[^.]*(db|sqlite(3)?)-(shm|wal)$",
+        r".*\.db\..*\.migrated$",
+    ],
+    # Cache directories
+    "caches": [
+        r".*/Caches/.*",
+        r".*/\.?cache/.*",  # matches both /cache/ and /.cache/
+        r".*/\.Spotlight-V100/.*",
+    ],
+    # Common temporary and system files
+    "temp_and_system": [
+        r".*/\.DS_Store$",
+        r".*\.swp$",
+        r".*\.tmp$",
+        r".*\.git/.*",
+        r".*\.localized$",
+    ],
+    # Test artifacts from the scanning process
+    "test_artifacts": [
+        r"/Users/.*/before\.txt$",
+        r"/Users/.*/after\.txt$",
+    ],
+    # Files that always change between snapshots
+    "always_modified": [
+        r"/private/var/root/Library/Preferences/SubmitDiagInfo\.plist",
+        r"/private/var/root/Library/ContainerManager/System/boot\.txt",
+        r"/Library/Updates/ProductMetadata\.plist",
+        r"/Library/Preferences/SystemConfiguration/OSThermalStatus\.plist",
+        r"/Library/Preferences/SystemConfiguration/NetworkInterfaces\.plist",
+        r"/Library/Printers/InstalledPrinters\.plist",
+        r"/opt/homebrew/var/homebrew/locks/update",
+        r"/opt/homebrew/etc/paths",
+        r"/Users/.*/Library/Shortcuts/ToolKit/Tools-prod.*\.sqlite",
+        r"/Users/.*/Library/Sharing/AutoUnlock/pairing-records\.plist",
+        r"/Users/.*/Library/Sharing/AirDropHashDB/AirDropHashDB\.airdrop_dbv4",
+        r"/Users/.*/Library/Safari/PasswordBreachStore\.plist",
+        r"/Users/.*/Library/Preferences/pbs\.plist",
+        r"/Users/.*/Library/Preferences/no_backup/commcenter_runtime_storage\.plist",
+        r"/Users/.*/Library/Preferences/diagnostics_agent\.plist",
+        r"/Users/.*/Library/PersonalizationPortrait/Topics/ScoreCache",
+        r"/Users/.*/Library/ContainerManager/boot\.txt",
+        r"/Users/.*/.zshenv",
+        r"/Users/.*/.zsh_history",
+        r"/Users/.*/Library/Preferences/ContextStoreAgent\.plist",
+        r"/Users/.*/Library/Preferences/\.GlobalPreferences\.plist",
+    ],
+}
+
+# Compile once at import time.
+_COMPILED_PATTERNS: list[re.Pattern[str]] = []
+for _category, _pats in _FILTER_PATTERNS.items():
+    for _pat in _pats:
+        try:
+            _COMPILED_PATTERNS.append(re.compile(_pat))
+        except re.error:
+            logger.warning("Invalid noise filter regex (skipped): %r", _pat)
+
+
+# ---------------------------------------------------------------------------
+# Default exclude directories for find commands
+# ---------------------------------------------------------------------------
+
+_DEFAULT_EXCLUDE_DIRS: list[str] = ["Spotlight", "Caches", "AssetsV2", "Pictures"]
+
+
+class FileSystemComparator:
+    """Async filesystem snapshot and diff engine for VM-based package discovery.
+
+    Takes before/after filesystem snapshots via remote ``find`` commands through
+    :class:`~mac2nix.vm.manager.TartVMManager`, computes created/deleted/modified
+    file lists, and applies noise filters.
+
+    :param vm: Active :class:`TartVMManager` with a running clone.
+    :param scan_root: Remote root to scan (default ``'/System/Volumes/Data'``).
+    :param exclude_dirs: Directory names to prune during find (default list
+        includes Spotlight, Caches, AssetsV2, Pictures).
+    """
+
+    def __init__(
+        self,
+        vm: TartVMManager,
+        scan_root: str = "/System/Volumes/Data",
+        exclude_dirs: list[str] | None = None,
+    ) -> None:
+        self._vm = vm
+        self._scan_root = scan_root
+        self._exclude_dirs: list[str] = exclude_dirs if exclude_dirs is not None else list(_DEFAULT_EXCLUDE_DIRS)
+
+    # ------------------------------------------------------------------
+    # Internal helpers
+    # ------------------------------------------------------------------
+
+    def _build_find_pipeline(self, save_path: str) -> str:
+        """Return a shell pipeline string that snapshots the filesystem to *save_path*."""
+        # Note: exclude_dirs are controlled constants (_DEFAULT_EXCLUDE_DIRS) — safe
+        # to interpolate directly. The double quotes protect * from shell expansion
+        # while letting find interpret it as a glob pattern.
+        prune_parts = " -or ".join(f'-path "*/{d}"' for d in self._exclude_dirs)
+        prune_clause = f'\\( {prune_parts} -or -name ".localized" \\) -prune -or -print'
+        quoted_root = shlex.quote(self._scan_root)
+        quoted_save = shlex.quote(save_path)
+        # Use awk substr to strip the scan_root prefix — no regex, no injection risk.
+        prefix_len = len(self._scan_root) + 1  # +1 to also skip trailing separator
+        return (
+            f"sudo find {quoted_root} {prune_clause} 2>/dev/null"
+            f" | awk '{{print substr($0, {prefix_len})}}'"
+            f" | sort > {quoted_save}"
+        )
+
+    # ------------------------------------------------------------------
+    # Public async API
+    # ------------------------------------------------------------------
+
+    async def snapshot(self, save_path: str) -> None:
+        """Take a sorted filesystem snapshot on the VM and save it to *save_path*.
+
+        Runs a remote ``find | sed | sort > path`` pipeline via bash.
+        Raises :exc:`VMError` if the command fails.
+        """
+        pipeline = self._build_find_pipeline(save_path)
+        logger.debug("Taking snapshot → %s", save_path)
+        success, _out, err = await self._vm.exec_command(["bash", "-c", pipeline], timeout=45)
+        if not success:
+            raise VMError(f"snapshot to {save_path!r} failed: {err.strip()}")
+        logger.debug("Snapshot complete: %s", save_path)
+
+    async def get_created_files(self, before_path: str, after_path: str) -> list[str]:
+        """Return files that appear in *after_path* but not in *before_path*.
+
+        Uses remote ``comm -13`` (lines unique to the second file).
+        """
+        cmd = f"comm -13 {shlex.quote(before_path)} {shlex.quote(after_path)}"
+        success, out, err = await self._vm.exec_command(["bash", "-c", cmd], timeout=45)
+        if not success:
+            logger.warning("get_created_files failed: %s", err.strip())
+        return self.filter_noise(out.strip().splitlines())
+
+    async def get_deleted_files(self, before_path: str, after_path: str) -> list[str]:
+        """Return files that appear in *before_path* but not in *after_path*.
+
+        Uses remote ``comm -23`` (lines unique to the first file).
+        """
+        cmd = f"comm -23 {shlex.quote(before_path)} {shlex.quote(after_path)}"
+        success, out, err = await self._vm.exec_command(["bash", "-c", cmd], timeout=45)
+        if not success:
+            logger.warning("get_deleted_files failed: %s", err.strip())
+        return self.filter_noise(out.strip().splitlines())
+
+    async def get_modified_files(
+        self,
+        since: datetime,
+        scan_root: str | None = None,
+    ) -> list[str]:
+        """Return files modified after *since* that existed before *since*.
+
+        Uses remote ``find -newermt`` + ``stat`` + ``awk`` to exclude newly
+        created files (birth time >= cutoff). Results are noise-filtered.
+
+        :param since: Datetime threshold (files modified after this instant).
+        :param scan_root: Override the instance scan root for this call.
+        """
+        root = scan_root if scan_root is not None else self._scan_root
+        # Note: exclude_dirs are controlled constants — see _build_find_pipeline comment.
+        prune_parts = " -or ".join(f'-path "*/{d}"' for d in self._exclude_dirs)
+        prune_clause = f'\\( {prune_parts} -or -name ".localized" \\) -prune -or'
+        ts_iso = since.replace(microsecond=0).isoformat()
+        cutoff = int(since.timestamp())
+        quoted_root = shlex.quote(root)
+        prefix_len = len(root) + 1
+
+        pipeline = (
+            f"sudo find {quoted_root} {prune_clause}"
+            f" -type f -newermt {shlex.quote(ts_iso)}"
+            f" -exec stat -f '%B %N' {{}} + 2>/dev/null"
+            f" | awk -v cutoff={cutoff} '$1 < cutoff {{ print substr($0, index($0, $2)) }}'"
+            f" | awk '{{print substr($0, {prefix_len})}}'"
+            f" | sort"
+        )
+
+        logger.debug("Finding modified files since %s in %s", ts_iso, root)
+        success, out, err = await self._vm.exec_command(["bash", "-c", pipeline], timeout=45)
+        if not success:
+            logger.warning("get_modified_files failed: %s", err.strip())
+        return self.filter_noise(out.strip().splitlines())
+
+    # ------------------------------------------------------------------
+    # Noise filtering
+    # ------------------------------------------------------------------
+
+    def filter_noise(self, files: list[str]) -> list[str]:
+        """Filter *files* against all compiled noise patterns.
+
+        Empty strings are discarded. Each remaining path is tested against the
+        module-level compiled patterns; the first match causes the path to be
+        excluded.
+
+        Returns the filtered list.
+        """
+        result: list[str] = []
+        for raw in files:
+            stripped = raw.strip()
+            if not stripped:
+                continue
+            if not any(p.search(stripped) for p in _COMPILED_PATTERNS):
+                result.append(stripped)
+
+        filtered_count = len(files) - len(result)
+        if filtered_count > 0:
+            logger.debug("Noise filter: removed %d of %d paths", filtered_count, len(files))
+
+        return result
diff --git a/src/mac2nix/vm/discovery.py b/src/mac2nix/vm/discovery.py
new file mode 100644
index 0000000..08eefdc
--- /dev/null
+++ b/src/mac2nix/vm/discovery.py
@@ -0,0 +1,261 @@
+"""DiscoveryRunner — app config path discovery via VM install diffing."""
+
+from __future__ import annotations
+
+import asyncio
+import logging
+import shlex
+import uuid
+from datetime import UTC, datetime
+
+from pydantic import BaseModel
+
+from mac2nix.vm._utils import VMError
+from mac2nix.vm.comparator import FileSystemComparator
+from mac2nix.vm.manager import TartVMManager
+
+logger = logging.getLogger(__name__)
+
+# Remote snapshot paths inside the VM.
+_REMOTE_BEFORE = "/tmp/mac2nix-before.txt"  # noqa: S108
+_REMOTE_AFTER = "/tmp/mac2nix-after.txt"  # noqa: S108
+
+# Directories searched for new executables/app bundles.
+_EXEC_SEARCH_DIRS = (
+    "/Applications",
+    "/System/Applications",
+    "/opt/homebrew/bin",
+    "/usr/local/bin",
+)
+
+# Binary options tried to trigger config file creation.
+_BINARY_PROBE_OPTIONS = ("--version", "--help", "-v")
+
+
+# ---------------------------------------------------------------------------
+# Model
+# ---------------------------------------------------------------------------
+
+
+class DiscoveryResult(BaseModel):
+    package: str
+    package_type: str
+    created_files: list[str]
+    modified_files: list[str]
+    deleted_files: list[str]
+    executables_found: dict[str, list[str]]
+
+
+# ---------------------------------------------------------------------------
+# Runner
+# ---------------------------------------------------------------------------
+
+
+class DiscoveryRunner:
+    """Orchestrates VM clone + filesystem diff to discover app config paths.
+
+    Workflow per :meth:`discover` call:
+    1. Clone the base VM and start it.
+    2. Pre-installation filesystem snapshot.
+    3. Install the package via ``brew install [--cask]``.
+    4. Settle (5 s).
+    5. Find newly created executables and app bundles.
+    6. Execute them to trigger config file creation, then wait 10 s.
+    7. Post-installation filesystem snapshot.
+    8. Diff created/deleted files; apply noise filters.
+    9. Cleanup the clone (always, even on error).
+
+    :param vm: :class:`TartVMManager` configured with the base VM name and
+        SSH credentials. The manager must NOT have an active clone yet —
+        :meth:`discover` creates a fresh clone per run.
+    """
+
+    def __init__(self, vm: TartVMManager) -> None:
+        self._vm = vm
+
+    async def discover(
+        self,
+        package: str,
+        package_type: str = "brew",
+    ) -> DiscoveryResult:
+        """Run the full discovery workflow for *package*.
+
+        Clone/start failures re-raise as :exc:`VMError`. Install failures and
+        snapshot failures return a :class:`DiscoveryResult` with empty file
+        lists so callers can distinguish "couldn't run" from "nothing changed".
+        Cleanup always runs.
+        """
+        clone_name = f"mac2nix-discover-{package.replace('/', '-')}-{uuid.uuid4().hex[:8]}"
+        comparator = FileSystemComparator(self._vm)
+
+        try:
+            # Step 1: Clone and start — failure here is fatal (raises VMError).
+            await self._vm.clone(clone_name)
+            await self._vm.start()
+
+            # Step 2: Pre-installation snapshot.
+            try:
+                await comparator.snapshot(_REMOTE_BEFORE)
+            except VMError as exc:
+                logger.error("Pre-snapshot failed for %r: %s", package, exc)
+                return self._empty_result(package, package_type)
+
+            # Step 3: Install the package.
+            install_ok = await self._install_package(package, package_type)
+            if not install_ok:
+                return self._empty_result(package, package_type)
+
+            # Step 4: Settle.
+            await asyncio.sleep(5)
+
+            # Step 5 & 6: Find and execute new executables.
+            install_timestamp = datetime.now(UTC)
+            executables = await self._find_new_executables(package)
+            await self._execute_found(executables)
+
+            # Step 7: Post-installation snapshot.
+            try:
+                await comparator.snapshot(_REMOTE_AFTER)
+            except VMError as exc:
+                logger.error("Post-snapshot failed for %r: %s", package, exc)
+                return self._empty_result(package, package_type, executables)
+
+            # Step 8: Diff.
+            created = await comparator.get_created_files(_REMOTE_BEFORE, _REMOTE_AFTER)
+            deleted = await comparator.get_deleted_files(_REMOTE_BEFORE, _REMOTE_AFTER)
+            modified = await comparator.get_modified_files(install_timestamp)
+
+            return DiscoveryResult(
+                package=package,
+                package_type=package_type,
+                created_files=created,
+                modified_files=modified,
+                deleted_files=deleted,
+                executables_found=executables,
+            )
+
+        finally:
+            # Step 9: Always cleanup.
+            await self._vm.cleanup()
+
+    # ------------------------------------------------------------------
+    # Internal helpers
+    # ------------------------------------------------------------------
+
+    @staticmethod
+    def _empty_result(
+        package: str,
+        package_type: str,
+        executables_found: dict[str, list[str]] | None = None,
+    ) -> DiscoveryResult:
+        return DiscoveryResult(
+            package=package,
+            package_type=package_type,
+            created_files=[],
+            modified_files=[],
+            deleted_files=[],
+            executables_found=executables_found or {},
+        )
+
+    async def _install_package(self, package: str, package_type: str) -> bool:
+        """Install *package* via brew. Returns True on success."""
+        # Use bash -c with shlex.quote to prevent shell injection via package name.
+        cask_flag = "--cask " if package_type == "cask" else ""
+        install_cmd = f"brew install {cask_flag}{shlex.quote(package)}"
+
+        logger.debug("Installing %r via: %s", package, install_cmd)
+        ok, _out, err = await self._vm.exec_command(["bash", "-c", install_cmd], timeout=1800)
+        if not ok:
+            logger.warning("Installation failed for %r: %s", package, err.strip())
+        return ok
+
+    async def _find_new_executables(self, package: str) -> dict[str, list[str]]:
+        """Return new .app bundles and binaries created since the pre-snapshot."""
+        executables: dict[str, list[str]] = {"apps": [], "binaries": []}
+
+        search_dirs = " ".join(_EXEC_SEARCH_DIRS)
+        temp_path = f"/tmp/mac2nix-exec-{package.replace('/', '-')}.txt"  # noqa: S108
+
+        # Find all .app bundles and executable files/symlinks in well-known dirs.
+        find_pipeline = (
+            f"find {search_dirs}"
+            r" \( -name '*.app' -type d -o \( -type f -o -type l \) -perm +111 \)"
+            " -maxdepth 2 2>/dev/null"
+            f" | sort > {shlex.quote(temp_path)}"
+        )
+        ok, _out, err = await self._vm.exec_command(["bash", "-c", find_pipeline], timeout=30)
+        if not ok:
+            logger.warning("Failed to find executables for %r: %s", package, err.strip())
+            return executables
+
+        # comm -13: lines only in second file = newly appeared since before-snapshot.
+        comm_cmd = f"comm -13 {shlex.quote(_REMOTE_BEFORE)} {shlex.quote(temp_path)}"
+        ok, out, err = await self._vm.exec_command(["bash", "-c", comm_cmd], timeout=15)
+
+        # Best-effort cleanup of temp file.
+        await self._vm.exec_command(["rm", "-f", temp_path], timeout=5)
+
+        if not ok:
+            logger.warning("Failed to compare executable lists for %r: %s", package, err.strip())
+            return executables
+
+        for raw_path in out.strip().splitlines():
+            stripped = raw_path.strip()
+            if not stripped:
+                continue
+            if stripped.endswith(".app"):
+                executables["apps"].append(stripped)
+            else:
+                executables["binaries"].append(stripped)
+
+        logger.debug(
+            "Found %d app(s) and %d binary/binaries for %r",
+            len(executables["apps"]),
+            len(executables["binaries"]),
+            package,
+        )
+        return executables
+
+    async def _execute_found(self, executables: dict[str, list[str]]) -> None:
+        """Launch found executables to trigger config file creation."""
+        if not executables["apps"] and not executables["binaries"]:
+            return
+
+        # Launch .app bundles in background via their CFBundleExecutable.
+        for app_path in executables["apps"]:
+            q_app = shlex.quote(app_path)
+            ok, out, err = await self._vm.exec_command(
+                [
+                    "bash",
+                    "-c",
+                    f"sudo xattr -rc {q_app} >/dev/null 2>&1 || true;"
+                    f" defaults read {shlex.quote(app_path + '/Contents/Info.plist')} CFBundleExecutable",
+                ],
+            )
+            if ok:
+                executable_name = out.strip()
+                launch_cmd = (
+                    f"{shlex.quote(app_path + '/Contents/MacOS/' + executable_name)} >/dev/null 2>&1 & sleep 0.1"
+                )
+                await self._vm.exec_command(["bash", "-c", launch_cmd], timeout=2)
+                logger.debug("Launched app bundle: %s", app_path)
+            else:
+                logger.debug("Could not identify executable in %s: %s", app_path, err.strip())
+
+        # Probe binaries with --version / --help to trigger config writes.
+        if executables["binaries"]:
+            probe_parts = []
+            for binary in executables["binaries"]:
+                q_bin = shlex.quote(binary)
+                for option in _BINARY_PROBE_OPTIONS:
+                    probe_parts.append(
+                        f"sudo xattr -c {q_bin} 2>/dev/null; timeout 5 {q_bin} {option} >/dev/null 2>&1;"
+                    )
+            probe_parts.append("true")  # Always succeed
+            probe_cmd = " ".join(probe_parts)
+            timeout = len(executables["binaries"]) * 5 + 3
+            await self._vm.exec_command(["bash", "-c", probe_cmd], timeout=timeout)
+
+        # Allow executables time to write their config files.
+        logger.debug("Waiting 10s for executables to write config files")
+        await asyncio.sleep(10)
diff --git a/src/mac2nix/vm/manager.py b/src/mac2nix/vm/manager.py
new file mode 100644
index 0000000..19ed7e6
--- /dev/null
+++ b/src/mac2nix/vm/manager.py
@@ -0,0 +1,296 @@
+"""TartVMManager — full async VM lifecycle with clone, start, exec, and cleanup."""
+
+from __future__ import annotations
+
+import asyncio
+import contextlib
+import logging
+import shutil
+
+from mac2nix.vm._utils import (
+    VMConnectionError,
+    VMError,
+    VMTimeoutError,
+    async_run_command,
+    async_ssh_exec,
+)
+
+logger = logging.getLogger(__name__)
+
+# SSH stderr patterns that indicate a transient disconnect worth retrying.
+_SSH_DISCONNECT_PATTERNS = (
+    "received disconnect",
+    "connection closed",
+    "connection reset by peer",
+    "broken pipe",
+    "connection refused",
+    "no route to host",
+)
+
+
+class TartVMManager:
+    """Async lifecycle manager for a single Tart VM clone.
+
+    Typical usage::
+
+        async with TartVMManager("sequoia-base") as vm:
+            await vm.clone("sequoia-test-001")
+            await vm.start()
+            ok, out, err = await vm.exec_command(["sw_vers"])
+
+    :param base_vm: Name of the base/template VM to clone.
+    :param vm_user: SSH username inside the VM (default ``'admin'``).
+    :param vm_password: SSH password inside the VM (default ``'admin'``).
+    """
+
+    def __init__(
+        self,
+        base_vm: str,
+        vm_user: str = "admin",
+        vm_password: str = "admin",  # noqa: S107 — well-known tart VM default credential
+    ) -> None:
+        self._base_vm = base_vm
+        self._vm_user = vm_user
+        self._vm_password = vm_password
+        self._current_clone: str | None = None
+        self._vm_process: asyncio.subprocess.Process | None = None
+        self._cached_ip: str | None = None
+
+    # ------------------------------------------------------------------
+    # Async context manager
+    # ------------------------------------------------------------------
+
+    async def __aenter__(self) -> TartVMManager:
+        return self
+
+    async def __aexit__(self, *_: object) -> None:
+        await self.cleanup()
+
+    # ------------------------------------------------------------------
+    # Public properties
+    # ------------------------------------------------------------------
+
+    @property
+    def vm_user(self) -> str:
+        """SSH username for the VM."""
+        return self._vm_user
+
+    @property
+    def vm_password(self) -> str:
+        """SSH password for the VM."""
+        return self._vm_password
+
+    # ------------------------------------------------------------------
+    # Availability
+    # ------------------------------------------------------------------
+
+    @staticmethod
+    def is_available() -> bool:
+        """Return True if the tart CLI is on PATH."""
+        return shutil.which("tart") is not None
+
+    def _require_tart(self) -> None:
+        if not self.is_available():
+            raise VMError("tart CLI is not available — install tart to manage VMs")
+
+    def _require_clone(self) -> str:
+        if self._current_clone is None:
+            raise VMError("No active VM clone — call clone() first")
+        return self._current_clone
+
+    # ------------------------------------------------------------------
+    # Lifecycle methods
+    # ------------------------------------------------------------------
+
+    async def clone(self, clone_name: str) -> None:
+        """Clone *base_vm* into *clone_name*.
+
+        Raises :exc:`VMError` on failure.
+        """
+        self._require_tart()
+        logger.debug("Cloning %r → %r", self._base_vm, clone_name)
+        returncode, _stdout, stderr = await async_run_command(["tart", "clone", self._base_vm, clone_name])
+        if returncode != 0:
+            raise VMError(f"tart clone {self._base_vm!r} → {clone_name!r} failed (exit {returncode}): {stderr.strip()}")
+        self._current_clone = clone_name
+        logger.debug("Clone created: %r", clone_name)
+
+    async def start(self) -> None:
+        """Launch the clone in headless mode and wait until SSH is ready.
+
+        Starts ``tart run --no-graphics <clone>`` as a background process, then
+        calls :meth:`wait_ready`. Raises :exc:`VMError` on failure.
+        """
+        self._require_tart()
+        clone = self._require_clone()
+
+        logger.debug("Starting VM %r in headless mode", clone)
+        self._vm_process = await asyncio.create_subprocess_exec(
+            "tart",
+            "run",
+            "--no-graphics",
+            clone,
+            stdout=asyncio.subprocess.PIPE,
+            stderr=asyncio.subprocess.PIPE,
+        )
+        logger.debug("VM %r process started (pid=%d)", clone, self._vm_process.pid)
+        await self.wait_ready()
+
+    async def wait_ready(self, max_attempts: int = 10) -> None:
+        """Poll until the VM has an IP and accepts SSH connections.
+
+        Sleeps 5 seconds between attempts. Raises :exc:`VMTimeoutError` when
+        *max_attempts* is exhausted without a successful SSH handshake.
+        """
+        clone = self._require_clone()
+        logger.debug("Waiting for VM %r to be ready (%d attempts)", clone, max_attempts)
+
+        for attempt in range(max_attempts):
+            # Fail fast if the background tart process already exited.
+            if self._vm_process is not None and self._vm_process.returncode is not None:
+                raise VMError(f"VM process for {clone!r} exited unexpectedly (code {self._vm_process.returncode})")
+
+            logger.debug("Readiness check %d/%d for %r", attempt + 1, max_attempts, clone)
+
+            ip = await self.get_ip()
+            if ip:
+                logger.debug("VM %r has IP %s — testing SSH", clone, ip)
+                try:
+                    success, out, _err = await async_ssh_exec(
+                        ip, self._vm_user, self._vm_password, ["whoami"], timeout=10
+                    )
+                    if success and self._vm_user in out:
+                        logger.debug("VM %r is ready at %s", clone, ip)
+                        return
+                    logger.debug("SSH not yet ready for %r: %r", clone, _err.strip())
+                except (VMConnectionError, VMError):
+                    logger.debug("SSH attempt %d failed for %r", attempt + 1, clone)
+            else:
+                logger.debug("VM %r has no IP yet", clone)
+
+            if attempt < max_attempts - 1:
+                await asyncio.sleep(5)
+
+        raise VMTimeoutError(f"VM {clone!r} did not become ready within {max_attempts * 5}s")
+
+    async def get_ip(self) -> str | None:
+        """Return the IP of the current clone, or None if unavailable.
+
+        Caches the result for the lifetime of the clone — IP is stable once assigned.
+        """
+        if self._cached_ip is not None:
+            return self._cached_ip
+
+        self._require_tart()
+        clone = self._require_clone()
+        try:
+            returncode, stdout, stderr = await async_run_command(["tart", "ip", clone], timeout=5)
+        except (VMError, VMTimeoutError):
+            logger.debug("tart ip %r raised — VM may not be running", clone)
+            return None
+
+        if returncode != 0:
+            logger.debug("tart ip %r returned %d: %s", clone, returncode, stderr.strip())
+            return None
+
+        ip = stdout.strip()
+        if ip:
+            self._cached_ip = ip
+        return ip if ip else None
+
+    async def exec_command(
+        self,
+        cmd: list[str],
+        *,
+        timeout: int = 30,
+    ) -> tuple[bool, str, str]:
+        """Execute *cmd* inside the VM via SSH.
+
+        Detects transient SSH disconnects and retries once with ``timeout * 2``.
+
+        Returns:
+            Tuple of (success, stdout, stderr).
+        """
+        clone = self._require_clone()
+        ip = await self.get_ip()
+        if not ip:
+            logger.error("Could not get IP for VM %r — exec_command aborted", clone)
+            return False, "", "Could not get VM IP address"
+
+        success, out, err = await self._ssh_exec_raw(ip, cmd, timeout=timeout)
+
+        # Detect transient disconnect and retry once.
+        if not success and self._is_disconnect(err):
+            logger.info("SSH disconnect detected for %r — retrying once (timeout=%ds)", clone, timeout * 2)
+            success, out, err = await self._ssh_exec_raw(ip, cmd, timeout=timeout * 2)
+            if not success:
+                logger.warning("Retry also failed for %r: %s", clone, err.strip())
+
+        return success, out, err
+
+    async def _ssh_exec_raw(self, ip: str, cmd: list[str], *, timeout: int) -> tuple[bool, str, str]:
+        """Thin wrapper around async_ssh_exec that converts exceptions to (False, '', err)."""
+        try:
+            return await async_ssh_exec(ip, self._vm_user, self._vm_password, cmd, timeout=timeout)
+        except VMTimeoutError as exc:
+            logger.error("SSH exec timed out after %ds: %s", timeout, exc)
+            return False, "", f"SSH command timed out after {timeout}s"
+        except (VMConnectionError, VMError) as exc:
+            logger.error("SSH exec failed: %s", exc)
+            return False, "", str(exc)
+
+    @staticmethod
+    def _is_disconnect(stderr: str) -> bool:
+        """Return True if *stderr* contains a known SSH disconnect pattern."""
+        lower = stderr.lower()
+        return any(pattern in lower for pattern in _SSH_DISCONNECT_PATTERNS)
+
+    async def stop(self) -> None:
+        """Stop the current clone. Raises :exc:`VMError` on failure."""
+        self._require_tart()
+        clone = self._require_clone()
+        logger.debug("Stopping VM %r", clone)
+        returncode, _stdout, stderr = await async_run_command(["tart", "stop", clone])
+        if returncode != 0:
+            raise VMError(f"tart stop {clone!r} failed (exit {returncode}): {stderr.strip()}")
+
+    async def delete(self) -> None:
+        """Delete the current clone. Raises :exc:`VMError` on failure."""
+        self._require_tart()
+        clone = self._require_clone()
+        logger.debug("Deleting VM %r", clone)
+        returncode, _stdout, stderr = await async_run_command(["tart", "delete", clone])
+        if returncode != 0:
+            raise VMError(f"tart delete {clone!r} failed (exit {returncode}): {stderr.strip()}")
+        self._current_clone = None
+
+    async def cleanup(self) -> None:
+        """Stop and delete the current clone, swallowing all errors.
+
+        Also terminates the background ``tart run`` process if still alive.
+        Safe to call even if no clone exists.
+        """
+        if self._vm_process is not None:
+            try:
+                self._vm_process.kill()
+            except (ProcessLookupError, OSError):
+                pass
+            finally:
+                with contextlib.suppress(Exception):
+                    await self._vm_process.communicate()
+                self._vm_process = None
+
+        self._cached_ip = None
+
+        if self._current_clone is None:
+            return
+
+        clone = self._current_clone
+        try:
+            await self.stop()
+        except Exception:
+            logger.debug("cleanup: stop %r raised (ignored)", clone)
+        try:
+            await self.delete()
+        except Exception:
+            logger.debug("cleanup: delete %r raised (ignored)", clone)
diff --git a/src/mac2nix/vm/validator.py b/src/mac2nix/vm/validator.py
new file mode 100644
index 0000000..258c749
--- /dev/null
+++ b/src/mac2nix/vm/validator.py
@@ -0,0 +1,396 @@
+"""Validator — nix-darwin config validation with fidelity scoring."""
+
+from __future__ import annotations
+
+import logging
+import tempfile
+from pathlib import Path
+from typing import Any
+
+from pydantic import BaseModel
+
+from mac2nix.models.system_state import SystemState
+from mac2nix.vm._utils import VMError, async_run_command
+from mac2nix.vm.manager import TartVMManager
+
+logger = logging.getLogger(__name__)
+
+# ---------------------------------------------------------------------------
+# Models (co-located per architect decision)
+# ---------------------------------------------------------------------------
+
+
+class Mismatch(BaseModel):
+    domain: str
+    field: str
+    source_value: Any
+    target_value: Any
+
+
+class DomainScore(BaseModel):
+    domain: str
+    score: float
+    total_fields: int
+    matching_fields: int
+    mismatches: list[str]
+
+
+class FidelityReport(BaseModel):
+    overall_score: float
+    domain_scores: dict[str, DomainScore]
+    mismatches: list[Mismatch]
+
+
+class ValidationResult(BaseModel):
+    success: bool
+    fidelity: FidelityReport | None
+    build_output: str
+    errors: list[str]
+
+
+# ---------------------------------------------------------------------------
+# Fidelity comparison (pure function, no I/O)
+# ---------------------------------------------------------------------------
+
+# SystemState fields that are metadata, not scanner domains — skip comparison.
+_META_FIELDS = frozenset({"hostname", "scan_timestamp", "macos_version", "architecture"})
+
+# Domain fields on SystemState that hold optional scanner results.
+_DOMAIN_FIELDS: list[str] = [f for f in SystemState.model_fields if f not in _META_FIELDS]
+
+
+def _compare_values(source: Any, target: Any) -> bool:
+    """Compare two values order-independently where possible."""
+    if isinstance(source, list) and isinstance(target, list):
+        # Order-independent set comparison for flat lists of hashable items.
+        try:
+            return set(source) == set(target)
+        except TypeError:
+            # Unhashable elements — fall back to sorted repr comparison.
+            return sorted(str(x) for x in source) == sorted(str(x) for x in target)
+    return source == target
+
+
+def _score_domain(domain: str, source_obj: BaseModel, target_obj: BaseModel | None) -> DomainScore:
+    """Compute a fidelity score for one domain."""
+    if target_obj is None:
+        return DomainScore(
+            domain=domain,
+            score=0.0,
+            total_fields=sum(1 for f in type(source_obj).model_fields if getattr(source_obj, f) is not None),
+            matching_fields=0,
+            mismatches=[f"{domain}: entire domain missing in target"],
+        )
+
+    total = 0
+    matching = 0
+    mismatches: list[str] = []
+
+    for field_name in type(source_obj).model_fields:
+        src_val = getattr(source_obj, field_name)
+        tgt_val = getattr(target_obj, field_name, None)
+
+        if src_val is None:
+            # Source has no data for this field — skip (not a mismatch).
+            continue
+
+        total += 1
+
+        if isinstance(src_val, BaseModel):
+            # Recurse into nested models.
+            sub = _score_domain(f"{domain}.{field_name}", src_val, tgt_val if isinstance(tgt_val, BaseModel) else None)
+            matching += sub.matching_fields
+            total = total - 1 + sub.total_fields  # replace 1 field with sub-field count
+            mismatches.extend(sub.mismatches)
+        elif _compare_values(src_val, tgt_val):
+            matching += 1
+        else:
+            mismatches.append(f"{domain}.{field_name}")
+
+    score = (matching / total) if total > 0 else 1.0
+    return DomainScore(
+        domain=domain,
+        score=round(score, 4),
+        total_fields=total,
+        matching_fields=matching,
+        mismatches=mismatches,
+    )
+
+
+def compute_fidelity(source: SystemState, target: SystemState) -> FidelityReport:
+    """Compare *target* against *source* and return a fidelity report.
+
+    For each non-None domain in *source*, fields are compared field-by-field.
+    Lists are compared as sets (order-independent). Nested Pydantic models are
+    compared recursively. A domain absent in *target* when present in *source*
+    scores 0.0.
+
+    Returns a :class:`FidelityReport` with per-domain scores and an overall
+    weighted average.
+    """
+    domain_scores: dict[str, DomainScore] = {}
+    all_mismatches: list[Mismatch] = []
+
+    for field_name in _DOMAIN_FIELDS:
+        src_domain = getattr(source, field_name)
+        if src_domain is None:
+            continue  # Source has no data for this domain — skip.
+
+        tgt_domain = getattr(target, field_name, None)
+        ds = _score_domain(field_name, src_domain, tgt_domain)
+        domain_scores[field_name] = ds
+
+        for mismatch_desc in ds.mismatches:
+            # mismatch_desc is a full dot-path like "domain.field" or "domain.sub.field".
+            # Store the full path — nested paths make getattr lookups fragile.
+            all_mismatches.append(
+                Mismatch(
+                    domain=field_name,
+                    field=mismatch_desc,
+                    source_value=None,
+                    target_value=None,
+                )
+            )
+
+    # Overall score: weighted by total_fields per domain.
+    total_weight = sum(ds.total_fields for ds in domain_scores.values())
+    if total_weight > 0:
+        overall = sum(ds.score * ds.total_fields for ds in domain_scores.values()) / total_weight
+    else:
+        overall = 1.0
+
+    return FidelityReport(
+        overall_score=round(overall, 4),
+        domain_scores=domain_scores,
+        mismatches=all_mismatches,
+    )
+
+
+# ---------------------------------------------------------------------------
+# Validator
+# ---------------------------------------------------------------------------
+
+
+class Validator:
+    """Validates a generated nix-darwin config by deploying it into a Tart VM.
+
+    Steps performed by :meth:`validate`:
+    1. Copy the flake directory into the VM via SCP.
+    2. Bootstrap Nix + nix-darwin in the VM.
+    3. Run ``darwin-rebuild switch --flake .``.
+    4. Install mac2nix in the VM, run a scan, SCP the result back.
+    5. Compare the VM's :class:`SystemState` against the source using
+       :func:`compute_fidelity`.
+
+    :param vm: Active :class:`TartVMManager` with a running clone.
+    """
+
+    # Remote paths inside the VM.
+    _REMOTE_FLAKE_DIR = "/tmp/mac2nix-flake"  # noqa: S108
+    _REMOTE_SCAN_PATH = "/tmp/mac2nix-state.json"  # noqa: S108
+
+    def __init__(self, vm: TartVMManager) -> None:
+        self._vm = vm
+
+    async def validate(self, flake_path: Path, source_state: SystemState) -> ValidationResult:
+        """Run the full validation pipeline.
+
+        Returns a :class:`ValidationResult` with fidelity scores if successful,
+        or error details if any step fails.
+        """
+        errors: list[str] = []
+        build_output = ""
+
+        try:
+            await self._copy_flake_to_vm(flake_path)
+        except VMError as exc:
+            errors.append(f"copy_flake failed: {exc}")
+            return ValidationResult(success=False, fidelity=None, build_output="", errors=errors)
+
+        try:
+            await self._bootstrap_nix_darwin()
+        except VMError as exc:
+            errors.append(f"bootstrap_nix_darwin failed: {exc}")
+            return ValidationResult(success=False, fidelity=None, build_output="", errors=errors)
+
+        try:
+            build_output = await self._rebuild_switch()
+        except VMError as exc:
+            errors.append(f"darwin-rebuild switch failed: {exc}")
+            return ValidationResult(success=False, fidelity=None, build_output=build_output, errors=errors)
+
+        try:
+            vm_state = await self._scan_vm()
+        except VMError as exc:
+            errors.append(f"vm scan failed: {exc}")
+            return ValidationResult(success=False, fidelity=None, build_output=build_output, errors=errors)
+
+        report = compute_fidelity(source_state, vm_state)
+        return ValidationResult(
+            success=True,
+            fidelity=report,
+            build_output=build_output,
+            errors=[],
+        )
+
+    async def _copy_flake_to_vm(self, flake_path: Path) -> None:
+        """SCP the flake directory into the VM at :attr:`_REMOTE_FLAKE_DIR`.
+
+        Uses sshpass + scp with argument lists (no shell=True).
+        Raises :exc:`VMError` if scp fails or the VM has no IP.
+        """
+        ip = await self._vm.get_ip()
+        if not ip:
+            raise VMError("Cannot copy flake — VM has no IP address")
+
+        # Ensure remote destination exists.
+        ok, _out, err = await self._vm.exec_command(["mkdir", "-p", self._REMOTE_FLAKE_DIR])
+        if not ok:
+            raise VMError(f"mkdir {self._REMOTE_FLAKE_DIR!r} failed: {err.strip()}")
+
+        # scp -r <local> user@ip:<remote>  — uses sshpass -e for password auth.
+        # Password passed via SSHPASS env var to avoid exposure in ps aux.
+        scp_cmd = [
+            "sshpass",
+            "-e",
+            "scp",
+            "-o",
+            "StrictHostKeyChecking=no",
+            "-o",
+            "UserKnownHostsFile=/dev/null",
+            "-o",
+            "LogLevel=ERROR",
+            "-r",
+            str(flake_path) + "/.",
+            f"{self._vm.vm_user}@{ip}:{self._REMOTE_FLAKE_DIR}",
+        ]
+
+        returncode, _stdout, stderr = await async_run_command(
+            scp_cmd, timeout=120, env={"SSHPASS": self._vm.vm_password}
+        )
+        if returncode != 0:
+            raise VMError(f"scp flake to VM failed (exit {returncode}): {stderr.strip()}")
+
+        logger.debug("Flake copied to VM at %s", self._REMOTE_FLAKE_DIR)
+
+    async def _bootstrap_nix_darwin(self) -> None:
+        """Install Nix and nix-darwin inside the VM.
+
+        Raises :exc:`VMError` if any bootstrap step fails.
+        """
+        logger.debug("Bootstrapping Nix in VM")
+
+        installer_path = "/tmp/nix-installer.sh"  # noqa: S108
+
+        # Step 1: Download the Determinate Systems nix installer to a file.
+        ok, _out, err = await self._vm.exec_command(
+            [
+                "curl",
+                "--proto",
+                "=https",
+                "--tlsv1.2",
+                "-sSf",
+                "-L",
+                "https://install.determinate.systems/nix",
+                "-o",
+                installer_path,
+            ],
+            timeout=60,
+        )
+        if not ok:
+            raise VMError(f"Failed to download Nix installer: {err.strip()}")
+
+        # Step 2: Make it executable.
+        ok, _out, err = await self._vm.exec_command(["chmod", "+x", installer_path])
+        if not ok:
+            raise VMError(f"chmod +x nix-installer.sh failed: {err.strip()}")
+
+        # Step 3: Run the installer.
+        ok, _out, err = await self._vm.exec_command(
+            [installer_path, "install", "--no-confirm"],
+            timeout=300,
+        )
+        if not ok:
+            raise VMError(f"Nix installation failed: {err.strip()}")
+
+        # Source nix profile so subsequent commands can find nix binaries.
+        # Note: nix-darwin is applied via _rebuild_switch(), not here.
+        # The bootstrap only installs Nix itself.
+        logger.debug("Nix installed successfully")
+
+    async def _rebuild_switch(self) -> str:
+        """Bootstrap nix-darwin and apply the flake configuration.
+
+        On a fresh VM (no nix-darwin yet), uses ``nix run nix-darwin -- switch``
+        to install nix-darwin and apply the flake in a single step. On subsequent
+        calls, ``darwin-rebuild switch`` would suffice, but the ``nix run``
+        approach works in both cases.
+
+        Returns the combined stdout+stderr output.
+        Raises :exc:`VMError` if the rebuild fails.
+        """
+        logger.debug("Running nix-darwin switch")
+        cmd = (
+            f"cd {self._REMOTE_FLAKE_DIR}"
+            " && . /nix/var/nix/profiles/default/etc/profile.d/nix-daemon.sh"
+            " && nix run nix-darwin -- switch --flake ."
+        )
+        ok, out, err = await self._vm.exec_command(["bash", "-c", cmd], timeout=600)
+        combined = (out + "\n" + err).strip()
+        if not ok:
+            raise VMError(f"darwin-rebuild switch failed: {err.strip()}")
+
+        logger.debug("darwin-rebuild switch succeeded")
+        return combined
+
+    async def _scan_vm(self) -> SystemState:
+        """Run mac2nix inside the VM via nix run, SCP the result back, parse it.
+
+        Raises :exc:`VMError` if any step fails or the result cannot be parsed.
+        """
+        logger.debug("Running mac2nix scan in VM via nix run")
+
+        # Run mac2nix directly from GitHub using nix run — no pip needed.
+        nix_run_cmd = (
+            ". /nix/var/nix/profiles/default/etc/profile.d/nix-daemon.sh"
+            f" && nix run github:gordon-code/mac2nix -- scan -o {self._REMOTE_SCAN_PATH}"
+        )
+        ok, _out, err = await self._vm.exec_command(["bash", "-c", nix_run_cmd], timeout=300)
+        if not ok:
+            raise VMError(f"mac2nix scan failed: {err.strip()}")
+
+        # SCP the result back to a local temp file.
+        ip = await self._vm.get_ip()
+        if not ip:
+            raise VMError("Cannot SCP scan result — VM has no IP")
+
+        with tempfile.NamedTemporaryFile(suffix=".json", delete=False) as tmp:
+            local_path = Path(tmp.name)
+
+        try:
+            scp_cmd = [
+                "sshpass",
+                "-e",
+                "scp",
+                "-o",
+                "StrictHostKeyChecking=no",
+                "-o",
+                "UserKnownHostsFile=/dev/null",
+                "-o",
+                "LogLevel=ERROR",
+                f"{self._vm.vm_user}@{ip}:{self._REMOTE_SCAN_PATH}",
+                str(local_path),
+            ]
+
+            returncode, _stdout, stderr = await async_run_command(
+                scp_cmd, timeout=60, env={"SSHPASS": self._vm.vm_password}
+            )
+            if returncode != 0:
+                raise VMError(f"scp scan result from VM failed (exit {returncode}): {stderr.strip()}")
+
+            try:
+                return SystemState.from_json(local_path)
+            except Exception as exc:
+                raise VMError(f"Failed to parse VM scan result: {exc}") from exc
+        finally:
+            local_path.unlink(missing_ok=True)
diff --git a/tests/test_cli_vm.py b/tests/test_cli_vm.py
new file mode 100644
index 0000000..a6cf115
--- /dev/null
+++ b/tests/test_cli_vm.py
@@ -0,0 +1,271 @@
+"""Tests for the mac2nix validate and discover CLI commands."""
+
+from __future__ import annotations
+
+import json
+from pathlib import Path
+from unittest.mock import patch
+
+from click.testing import CliRunner
+
+from mac2nix.cli import main
+from mac2nix.models.system_state import SystemState
+from mac2nix.vm.discovery import DiscoveryResult
+from mac2nix.vm.validator import DomainScore, FidelityReport
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+def _make_state(**kwargs: object) -> SystemState:
+    defaults: dict[str, object] = {
+        "hostname": "test-mac",
+        "macos_version": "15.3.0",
+        "architecture": "arm64",
+    }
+    defaults.update(kwargs)
+    return SystemState(**defaults)  # type: ignore[arg-type]
+
+
+def _make_fidelity(score: float = 0.95) -> FidelityReport:
+    return FidelityReport(
+        overall_score=score,
+        domain_scores={
+            "homebrew": DomainScore(
+                domain="homebrew",
+                score=score,
+                total_fields=10,
+                matching_fields=int(score * 10),
+                mismatches=[],
+            )
+        },
+        mismatches=[],
+    )
+
+
+def _make_discovery_result(package: str = "ripgrep") -> DiscoveryResult:
+    return DiscoveryResult(
+        package=package,
+        package_type="brew",
+        created_files=["/opt/homebrew/bin/rg"],
+        modified_files=[],
+        deleted_files=[],
+        executables_found={"apps": [], "binaries": ["/opt/homebrew/bin/rg"]},
+    )
+
+
+# ---------------------------------------------------------------------------
+# validate command — registration and help
+# ---------------------------------------------------------------------------
+
+
+class TestValidateCommandRegistration:
+    def test_validate_registered(self) -> None:
+        runner = CliRunner()
+        result = runner.invoke(main, ["--help"])
+        assert result.exit_code == 0
+        assert "validate" in result.output
+
+    def test_validate_help_shows_options(self) -> None:
+        runner = CliRunner()
+        result = runner.invoke(main, ["validate", "--help"])
+        assert result.exit_code == 0
+        assert "--flake-path" in result.output
+        assert "--scan-file" in result.output
+        assert "--base-vm" in result.output
+        assert "--vm-user" in result.output
+        assert "--vm-password" in result.output
+
+
+# ---------------------------------------------------------------------------
+# validate command — tart not available
+# ---------------------------------------------------------------------------
+
+
+class TestValidateTartUnavailable:
+    def test_exits_with_error_when_tart_missing(self, tmp_path: Path) -> None:
+        scan_file = tmp_path / "state.json"
+        scan_file.write_text(_make_state().to_json())
+        flake_dir = tmp_path / "flake"
+        flake_dir.mkdir()
+
+        runner = CliRunner()
+        with patch("mac2nix.vm.manager.TartVMManager.is_available", return_value=False):
+            result = runner.invoke(
+                main,
+                ["validate", "--flake-path", str(flake_dir), "--scan-file", str(scan_file)],
+            )
+
+        assert result.exit_code != 0
+        assert "tart" in result.output.lower()
+
+
+# ---------------------------------------------------------------------------
+# validate command — successful run
+# ---------------------------------------------------------------------------
+
+
+class TestValidateSuccess:
+    def test_validate_calls_asyncio_run(self, tmp_path: Path) -> None:
+        scan_file = tmp_path / "state.json"
+        scan_file.write_text(_make_state().to_json())
+        flake_dir = tmp_path / "flake"
+        flake_dir.mkdir()
+
+        called_with_coroutine = False
+
+        runner = CliRunner()
+        with (
+            patch("mac2nix.vm.manager.TartVMManager.is_available", return_value=True),
+            patch("mac2nix.cli.asyncio.run") as mock_run,
+        ):
+
+            def fake_run(coro: object) -> None:  # type: ignore[return]
+                nonlocal called_with_coroutine
+                called_with_coroutine = coro is not None
+                # Close the coroutine to avoid RuntimeWarning
+                if hasattr(coro, "close"):
+                    coro.close()
+
+            mock_run.side_effect = fake_run
+            result = runner.invoke(
+                main,
+                ["validate", "--flake-path", str(flake_dir), "--scan-file", str(scan_file)],
+            )
+
+        assert result.exit_code == 0
+        assert called_with_coroutine, "asyncio.run was not called with a coroutine"
+
+    def test_missing_scan_file_exits_nonzero(self, tmp_path: Path) -> None:
+        flake_dir = tmp_path / "flake"
+        flake_dir.mkdir()
+
+        runner = CliRunner()
+        result = runner.invoke(
+            main,
+            [
+                "validate",
+                "--flake-path",
+                str(flake_dir),
+                "--scan-file",
+                str(tmp_path / "nonexistent.json"),
+            ],
+        )
+        assert result.exit_code != 0
+
+
+# ---------------------------------------------------------------------------
+# discover command — registration and help
+# ---------------------------------------------------------------------------
+
+
+class TestDiscoverCommandRegistration:
+    def test_discover_registered(self) -> None:
+        runner = CliRunner()
+        result = runner.invoke(main, ["--help"])
+        assert result.exit_code == 0
+        assert "discover" in result.output
+
+    def test_discover_help_shows_options(self) -> None:
+        runner = CliRunner()
+        result = runner.invoke(main, ["discover", "--help"])
+        assert result.exit_code == 0
+        assert "--package" in result.output
+        assert "--type" in result.output
+        assert "--base-vm" in result.output
+        assert "--output" in result.output
+
+
+# ---------------------------------------------------------------------------
+# discover command — tart not available
+# ---------------------------------------------------------------------------
+
+
+class TestDiscoverTartUnavailable:
+    def test_exits_with_error_when_tart_missing(self) -> None:
+        runner = CliRunner()
+        with patch("mac2nix.vm.manager.TartVMManager.is_available", return_value=False):
+            result = runner.invoke(main, ["discover", "--package", "ripgrep"])
+
+        assert result.exit_code != 0
+        assert "tart" in result.output.lower()
+
+
+# ---------------------------------------------------------------------------
+# discover command — successful run
+# ---------------------------------------------------------------------------
+
+
+class TestDiscoverSuccess:
+    def test_prints_json_to_stdout(self) -> None:
+        discovery = _make_discovery_result("ripgrep")
+        json_out = discovery.model_dump_json(indent=2)
+
+        def fake_run(coro: object) -> str:
+            if hasattr(coro, "close"):
+                coro.close()
+            return json_out
+
+        runner = CliRunner()
+        with (
+            patch("mac2nix.vm.manager.TartVMManager.is_available", return_value=True),
+            patch("mac2nix.cli.asyncio.run", side_effect=fake_run),
+        ):
+            result = runner.invoke(main, ["discover", "--package", "ripgrep"])
+
+        assert result.exit_code == 0
+        data = json.loads(result.output)
+        assert data["package"] == "ripgrep"
+        assert data["package_type"] == "brew"
+
+    def test_writes_json_to_file(self, tmp_path: Path) -> None:
+        discovery = _make_discovery_result("ripgrep")
+        json_out = discovery.model_dump_json(indent=2)
+        out_file = tmp_path / "result.json"
+
+        def fake_run(coro: object) -> str:
+            if hasattr(coro, "close"):
+                coro.close()
+            return json_out
+
+        runner = CliRunner()
+        with (
+            patch("mac2nix.vm.manager.TartVMManager.is_available", return_value=True),
+            patch("mac2nix.cli.asyncio.run", side_effect=fake_run),
+        ):
+            result = runner.invoke(
+                main,
+                ["discover", "--package", "ripgrep", "--output", str(out_file)],
+            )
+
+        assert result.exit_code == 0
+        assert out_file.exists()
+        data = json.loads(out_file.read_text())
+        assert data["package"] == "ripgrep"
+
+    def test_cask_type_accepted(self) -> None:
+        discovery = _make_discovery_result("firefox")
+        json_out = discovery.model_dump_json(indent=2)
+
+        def fake_run(coro: object) -> str:
+            if hasattr(coro, "close"):
+                coro.close()
+            return json_out
+
+        runner = CliRunner()
+        with (
+            patch("mac2nix.vm.manager.TartVMManager.is_available", return_value=True),
+            patch("mac2nix.cli.asyncio.run", side_effect=fake_run),
+        ):
+            result = runner.invoke(
+                main,
+                ["discover", "--package", "firefox", "--type", "cask"],
+            )
+
+        assert result.exit_code == 0
+
+    def test_invalid_type_rejected(self) -> None:
+        runner = CliRunner()
+        result = runner.invoke(main, ["discover", "--package", "foo", "--type", "pip"])
+        assert result.exit_code != 0
diff --git a/tests/vm/__init__.py b/tests/vm/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/vm/test_comparator.py b/tests/vm/test_comparator.py
new file mode 100644
index 0000000..ab51703
--- /dev/null
+++ b/tests/vm/test_comparator.py
@@ -0,0 +1,667 @@
+"""Tests for vm/comparator.py — FileSystemComparator filesystem diffing and noise filters."""
+
+from __future__ import annotations
+
+import asyncio
+from datetime import UTC, datetime
+from unittest.mock import AsyncMock, MagicMock
+
+import pytest
+
+from mac2nix.vm._utils import VMError
+from mac2nix.vm.comparator import _DEFAULT_EXCLUDE_DIRS, FileSystemComparator
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+def _make_vm(exec_result: tuple[bool, str, str] = (True, "", "")) -> MagicMock:
+    """Return a mock TartVMManager whose exec_command returns a fixed result."""
+    vm = MagicMock()
+    vm.exec_command = AsyncMock(return_value=exec_result)
+    return vm
+
+
+def _make_comparator(
+    exec_result: tuple[bool, str, str] = (True, "", ""),
+    scan_root: str = "/System/Volumes/Data",
+    exclude_dirs: list[str] | None = None,
+) -> FileSystemComparator:
+    return FileSystemComparator(_make_vm(exec_result), scan_root=scan_root, exclude_dirs=exclude_dirs)
+
+
+# ---------------------------------------------------------------------------
+# Constructor defaults
+# ---------------------------------------------------------------------------
+
+
+class TestConstructor:
+    def test_default_scan_root(self) -> None:
+        fc = FileSystemComparator(_make_vm())
+        assert fc._scan_root == "/System/Volumes/Data"
+
+    def test_custom_scan_root(self) -> None:
+        fc = FileSystemComparator(_make_vm(), scan_root="/custom/root")
+        assert fc._scan_root == "/custom/root"
+
+    def test_default_exclude_dirs_matches_module_default(self) -> None:
+        fc = FileSystemComparator(_make_vm())
+        assert fc._exclude_dirs == list(_DEFAULT_EXCLUDE_DIRS)
+
+    def test_custom_exclude_dirs(self) -> None:
+        fc = FileSystemComparator(_make_vm(), exclude_dirs=["Foo", "Bar"])
+        assert fc._exclude_dirs == ["Foo", "Bar"]
+
+    def test_none_exclude_dirs_uses_defaults(self) -> None:
+        fc = FileSystemComparator(_make_vm(), exclude_dirs=None)
+        assert fc._exclude_dirs == list(_DEFAULT_EXCLUDE_DIRS)
+
+    def test_default_exclude_dirs_is_copy_not_reference(self) -> None:
+        """Mutating the instance's exclude_dirs shouldn't affect the module default."""
+        fc = FileSystemComparator(_make_vm())
+        fc._exclude_dirs.append("extra")
+        assert "extra" not in _DEFAULT_EXCLUDE_DIRS
+
+
+# ---------------------------------------------------------------------------
+# filter_noise() — per-category
+# ---------------------------------------------------------------------------
+
+
+class TestFilterNoise:
+    def test_empty_list_returns_empty(self) -> None:
+        fc = FileSystemComparator(_make_vm())
+        assert fc.filter_noise([]) == []
+
+    def test_empty_strings_discarded(self) -> None:
+        fc = FileSystemComparator(_make_vm())
+        assert fc.filter_noise(["", "  ", "\t"]) == []
+
+    def test_signal_file_passes_through(self) -> None:
+        fc = FileSystemComparator(_make_vm())
+        result = fc.filter_noise(["/Users/admin/.config/myapp/settings.toml"])
+        assert result == ["/Users/admin/.config/myapp/settings.toml"]
+
+    def test_strips_whitespace_from_paths(self) -> None:
+        fc = FileSystemComparator(_make_vm())
+        result = fc.filter_noise(["  /Users/admin/.config/myapp/config.yml  "])
+        assert result == ["/Users/admin/.config/myapp/config.yml"]
+
+    # apple_services
+    def test_apple_services_filtered(self) -> None:
+        fc = FileSystemComparator(_make_vm())
+        noise = [
+            "/Users/admin/Library/Preferences/com.apple.finder.plist",
+            "/private/var/db/com.apple.xpc.launchd/config/user/501/com.apple.Accessibility-Settings.daemon.plist",
+        ]
+        assert fc.filter_noise(noise) == []
+
+    def test_non_apple_service_not_filtered(self) -> None:
+        fc = FileSystemComparator(_make_vm())
+        signal = ["/Users/admin/.config/starship.toml"]
+        assert fc.filter_noise(signal) == signal
+
+    # system_directories
+    def test_private_var_spool_filtered(self) -> None:
+        fc = FileSystemComparator(_make_vm())
+        assert fc.filter_noise(["/private/var/spool/something"]) == []
+
+    def test_private_var_run_filtered(self) -> None:
+        fc = FileSystemComparator(_make_vm())
+        assert fc.filter_noise(["/private/var/run/launchd.pid"]) == []
+
+    def test_private_tmp_filtered(self) -> None:
+        fc = FileSystemComparator(_make_vm())
+        assert fc.filter_noise(["/private/tmp/some_temp_file"]) == []
+
+    def test_private_etc_cups_filtered(self) -> None:
+        fc = FileSystemComparator(_make_vm())
+        assert fc.filter_noise(["/private/etc/cups/printers.conf"]) == []
+
+    def test_volumes_cryptex_fseventsd_filtered(self) -> None:
+        fc = FileSystemComparator(_make_vm())
+        assert fc.filter_noise(["/Volumes/Preboot_Cryptex/.fseventsd/0000001"]) == []
+
+    # library_system_data
+    def test_library_caches_filtered(self) -> None:
+        fc = FileSystemComparator(_make_vm())
+        assert fc.filter_noise(["/Users/admin/Library/Caches/com.example.app/cache.db"]) == []
+
+    def test_library_logs_filtered(self) -> None:
+        fc = FileSystemComparator(_make_vm())
+        assert fc.filter_noise(["/Users/admin/Library/Logs/DiagnosticReports/crash.ips"]) == []
+
+    def test_library_keychains_filtered(self) -> None:
+        fc = FileSystemComparator(_make_vm())
+        assert fc.filter_noise(["/Users/admin/Library/Keychains/login.keychain-db"]) == []
+
+    def test_library_biome_filtered(self) -> None:
+        fc = FileSystemComparator(_make_vm())
+        assert fc.filter_noise(["/Users/admin/Library/Biome/streams/public.data"]) == []
+
+    def test_library_application_support_crashreporter_filtered(self) -> None:
+        fc = FileSystemComparator(_make_vm())
+        assert fc.filter_noise(["/Users/admin/Library/Application Support/CrashReporter/some.log"]) == []
+
+    # homebrew_noise
+    def test_homebrew_git_filtered(self) -> None:
+        fc = FileSystemComparator(_make_vm())
+        assert fc.filter_noise(["/opt/homebrew/.git/index"]) == []
+
+    def test_homebrew_cellar_go_filtered(self) -> None:
+        fc = FileSystemComparator(_make_vm())
+        assert fc.filter_noise(["/opt/homebrew/Cellar/go/1.21.0/bin/go"]) == []
+
+    def test_homebrew_share_filtered(self) -> None:
+        fc = FileSystemComparator(_make_vm())
+        assert fc.filter_noise(["/opt/homebrew/share/doc/something.txt"]) == []
+
+    def test_homebrew_cellar_share_filtered(self) -> None:
+        fc = FileSystemComparator(_make_vm())
+        assert fc.filter_noise(["/opt/homebrew/Cellar/wget/1.21/share/man/man1/wget.1"]) == []
+
+    def test_homebrew_ruby_file_filtered(self) -> None:
+        fc = FileSystemComparator(_make_vm())
+        assert fc.filter_noise(["/opt/homebrew/Library/Homebrew/utils/analytics.rb"]) == []
+
+    def test_homebrew_taps_git_filtered(self) -> None:
+        fc = FileSystemComparator(_make_vm())
+        assert fc.filter_noise(["/opt/homebrew/Library/Taps/homebrew/homebrew-core/.git/COMMIT_EDITMSG"]) == []
+
+    def test_homebrew_bin_not_filtered(self) -> None:
+        """An installed binary in Homebrew bin should pass noise filter."""
+        fc = FileSystemComparator(_make_vm())
+        result = fc.filter_noise(["/opt/homebrew/bin/wget"])
+        assert result == ["/opt/homebrew/bin/wget"]
+
+    # app_internals
+    def test_app_frameworks_filtered(self) -> None:
+        fc = FileSystemComparator(_make_vm())
+        assert fc.filter_noise(["/Applications/MyApp.app/Contents/Frameworks/Sparkle.framework/Sparkle"]) == []
+
+    def test_app_code_signature_filtered(self) -> None:
+        fc = FileSystemComparator(_make_vm())
+        assert fc.filter_noise(["/Applications/MyApp.app/Contents/_CodeSignature/CodeResources"]) == []
+
+    def test_app_resources_filtered(self) -> None:
+        fc = FileSystemComparator(_make_vm())
+        assert fc.filter_noise(["/Applications/MyApp.app/Contents/Resources/icon.icns"]) == []
+
+    def test_app_macos_binary_not_filtered(self) -> None:
+        """The app's own executable in MacOS/ should pass through."""
+        fc = FileSystemComparator(_make_vm())
+        result = fc.filter_noise(["/Applications/MyApp.app/Contents/MacOS/MyApp"])
+        assert result == ["/Applications/MyApp.app/Contents/MacOS/MyApp"]
+
+    # database_files
+    def test_sqlite_wal_filtered(self) -> None:
+        fc = FileSystemComparator(_make_vm())
+        assert fc.filter_noise(["/Users/admin/Library/SomeApp/data.sqlite-wal"]) == []
+
+    def test_sqlite_shm_filtered(self) -> None:
+        fc = FileSystemComparator(_make_vm())
+        assert fc.filter_noise(["/Users/admin/Library/SomeApp/data.sqlite-shm"]) == []
+
+    def test_sqlite3_wal_filtered(self) -> None:
+        fc = FileSystemComparator(_make_vm())
+        assert fc.filter_noise(["/Users/admin/some/app.sqlite3-wal"]) == []
+
+    def test_db_shm_filtered(self) -> None:
+        fc = FileSystemComparator(_make_vm())
+        assert fc.filter_noise(["/Users/admin/some/app.db-shm"]) == []
+
+    def test_plain_sqlite_not_filtered(self) -> None:
+        """A bare .sqlite file (without -wal/-shm suffix) should pass."""
+        fc = FileSystemComparator(_make_vm())
+        result = fc.filter_noise(["/Users/admin/myapp/store.sqlite"])
+        assert result == ["/Users/admin/myapp/store.sqlite"]
+
+    # caches
+    def test_caches_dir_filtered(self) -> None:
+        fc = FileSystemComparator(_make_vm())
+        assert fc.filter_noise(["/Users/admin/Library/Caches/SomeApp/somefile"]) == []
+
+    def test_lowercase_cache_dir_filtered(self) -> None:
+        fc = FileSystemComparator(_make_vm())
+        assert fc.filter_noise(["/home/user/.cache/pip/something"]) == []
+
+    def test_spotlight_filtered(self) -> None:
+        fc = FileSystemComparator(_make_vm())
+        assert fc.filter_noise(["/Volumes/Macintosh HD/.Spotlight-V100/Store-V2/something"]) == []
+
+    # temp_and_system
+    def test_ds_store_filtered(self) -> None:
+        fc = FileSystemComparator(_make_vm())
+        assert fc.filter_noise(["/Users/admin/Projects/.DS_Store"]) == []
+
+    def test_swp_file_filtered(self) -> None:
+        fc = FileSystemComparator(_make_vm())
+        assert fc.filter_noise(["/Users/admin/.config/myapp/.config.yml.swp"]) == []
+
+    def test_tmp_file_filtered(self) -> None:
+        fc = FileSystemComparator(_make_vm())
+        assert fc.filter_noise(["/Users/admin/something.tmp"]) == []
+
+    def test_git_dir_filtered(self) -> None:
+        fc = FileSystemComparator(_make_vm())
+        assert fc.filter_noise(["/Users/admin/projects/myrepo/.git/index"]) == []
+
+    def test_localized_file_filtered(self) -> None:
+        fc = FileSystemComparator(_make_vm())
+        assert fc.filter_noise(["/Users/admin/Documents/.localized"]) == []
+
+    # test_artifacts
+    def test_before_txt_filtered(self) -> None:
+        fc = FileSystemComparator(_make_vm())
+        assert fc.filter_noise(["/Users/admin/before.txt"]) == []
+
+    def test_after_txt_filtered(self) -> None:
+        fc = FileSystemComparator(_make_vm())
+        assert fc.filter_noise(["/Users/admin/after.txt"]) == []
+
+    def test_non_test_artifact_txt_not_filtered(self) -> None:
+        fc = FileSystemComparator(_make_vm())
+        result = fc.filter_noise(["/Users/admin/readme.txt"])
+        assert result == ["/Users/admin/readme.txt"]
+
+    # always_modified
+    def test_submit_diag_info_filtered(self) -> None:
+        fc = FileSystemComparator(_make_vm())
+        assert fc.filter_noise(["/private/var/root/Library/Preferences/SubmitDiagInfo.plist"]) == []
+
+    def test_network_interfaces_plist_filtered(self) -> None:
+        fc = FileSystemComparator(_make_vm())
+        assert fc.filter_noise(["/Library/Preferences/SystemConfiguration/NetworkInterfaces.plist"]) == []
+
+    def test_homebrew_locks_update_filtered(self) -> None:
+        fc = FileSystemComparator(_make_vm())
+        assert fc.filter_noise(["/opt/homebrew/var/homebrew/locks/update"]) == []
+
+    def test_zsh_history_filtered(self) -> None:
+        fc = FileSystemComparator(_make_vm())
+        assert fc.filter_noise(["/Users/admin/.zsh_history"]) == []
+
+    def test_zshenv_filtered(self) -> None:
+        fc = FileSystemComparator(_make_vm())
+        assert fc.filter_noise(["/Users/admin/.zshenv"]) == []
+
+    def test_global_preferences_filtered(self) -> None:
+        fc = FileSystemComparator(_make_vm())
+        assert fc.filter_noise(["/Users/admin/Library/Preferences/.GlobalPreferences.plist"]) == []
+
+    # Mixed signal + noise
+    def test_mixed_list_keeps_signal_removes_noise(self) -> None:
+        fc = FileSystemComparator(_make_vm())
+        files = [
+            "/Users/admin/.config/fish/config.fish",  # signal
+            "/Users/admin/Library/Preferences/com.apple.dock.plist",  # apple_services noise
+            "/opt/homebrew/bin/fish",  # signal (homebrew binary)
+            "/private/tmp/install_tmp_file",  # system_directories noise
+            "/Users/admin/.zsh_history",  # always_modified noise
+            "/Applications/Fish.app/Contents/MacOS/fish",  # signal (app executable)
+        ]
+        result = fc.filter_noise(files)
+        assert "/Users/admin/.config/fish/config.fish" in result
+        assert "/opt/homebrew/bin/fish" in result
+        assert "/Applications/Fish.app/Contents/MacOS/fish" in result
+        assert len(result) == 3
+
+
+# ---------------------------------------------------------------------------
+# snapshot()
+# ---------------------------------------------------------------------------
+
+
+class TestSnapshot:
+    def test_calls_exec_command_with_bash_c(self) -> None:
+        vm = _make_vm((True, "", ""))
+        fc = FileSystemComparator(vm)
+
+        async def _run() -> None:
+            await fc.snapshot("/tmp/before.txt")
+
+        asyncio.run(_run())
+        vm.exec_command.assert_called_once()
+        cmd_arg = vm.exec_command.call_args[0][0]
+        assert cmd_arg[0] == "bash"
+        assert cmd_arg[1] == "-c"
+
+    def test_pipeline_contains_find(self) -> None:
+        vm = _make_vm((True, "", ""))
+        fc = FileSystemComparator(vm)
+
+        async def _run() -> None:
+            await fc.snapshot("/tmp/before.txt")
+
+        asyncio.run(_run())
+        pipeline = vm.exec_command.call_args[0][0][2]
+        assert "find" in pipeline
+
+    def test_pipeline_contains_sort(self) -> None:
+        vm = _make_vm((True, "", ""))
+        fc = FileSystemComparator(vm)
+
+        async def _run() -> None:
+            await fc.snapshot("/tmp/snap.txt")
+
+        asyncio.run(_run())
+        pipeline = vm.exec_command.call_args[0][0][2]
+        assert "sort" in pipeline
+
+    def test_pipeline_contains_save_path(self) -> None:
+        vm = _make_vm((True, "", ""))
+        fc = FileSystemComparator(vm)
+
+        async def _run() -> None:
+            await fc.snapshot("/tmp/my_snapshot.txt")
+
+        asyncio.run(_run())
+        pipeline = vm.exec_command.call_args[0][0][2]
+        assert "/tmp/my_snapshot.txt" in pipeline
+
+    def test_pipeline_contains_scan_root(self) -> None:
+        vm = _make_vm((True, "", ""))
+        fc = FileSystemComparator(vm, scan_root="/custom/root")
+
+        async def _run() -> None:
+            await fc.snapshot("/tmp/snap.txt")
+
+        asyncio.run(_run())
+        pipeline = vm.exec_command.call_args[0][0][2]
+        assert "/custom/root" in pipeline
+
+    def test_pipeline_contains_sed_strip(self) -> None:
+        """sed should strip the scan_root prefix from each path."""
+        vm = _make_vm((True, "", ""))
+        fc = FileSystemComparator(vm)
+
+        async def _run() -> None:
+            await fc.snapshot("/tmp/snap.txt")
+
+        asyncio.run(_run())
+        pipeline = vm.exec_command.call_args[0][0][2]
+        # Pipeline uses awk substr to strip the scan_root prefix (no regex, no injection)
+        assert "awk" in pipeline
+
+    def test_failure_raises_vm_error(self) -> None:
+        vm = _make_vm((False, "", "disk full"))
+        fc = FileSystemComparator(vm)
+
+        async def _run() -> None:
+            await fc.snapshot("/tmp/snap.txt")
+
+        with pytest.raises(VMError, match="snapshot"):
+            asyncio.run(_run())
+
+    def test_uses_timeout_45(self) -> None:
+        vm = _make_vm((True, "", ""))
+        fc = FileSystemComparator(vm)
+
+        async def _run() -> None:
+            await fc.snapshot("/tmp/snap.txt")
+
+        asyncio.run(_run())
+        kwargs = vm.exec_command.call_args[1]
+        assert kwargs.get("timeout") == 45
+
+    def test_exclude_dirs_in_pipeline(self) -> None:
+        vm = _make_vm((True, "", ""))
+        fc = FileSystemComparator(vm, exclude_dirs=["MySpecialDir"])
+
+        async def _run() -> None:
+            await fc.snapshot("/tmp/snap.txt")
+
+        asyncio.run(_run())
+        pipeline = vm.exec_command.call_args[0][0][2]
+        assert "MySpecialDir" in pipeline
+
+
+# ---------------------------------------------------------------------------
+# get_created_files()
+# ---------------------------------------------------------------------------
+
+
+class TestGetCreatedFiles:
+    def test_returns_parsed_file_list(self) -> None:
+        output = "/opt/homebrew/bin/wget\n/Users/admin/.config/fish/config.fish\n"
+        vm = _make_vm((True, output, ""))
+        fc = FileSystemComparator(vm)
+
+        async def _run() -> list[str]:
+            return await fc.get_created_files("/tmp/before.txt", "/tmp/after.txt")
+
+        result = asyncio.run(_run())
+        assert "/opt/homebrew/bin/wget" in result
+        assert "/Users/admin/.config/fish/config.fish" in result
+
+    def test_noise_is_filtered_from_result(self) -> None:
+        output = "/opt/homebrew/bin/wget\n/Users/admin/.zsh_history\n"
+        vm = _make_vm((True, output, ""))
+        fc = FileSystemComparator(vm)
+
+        async def _run() -> list[str]:
+            return await fc.get_created_files("/tmp/before.txt", "/tmp/after.txt")
+
+        result = asyncio.run(_run())
+        assert "/opt/homebrew/bin/wget" in result
+        assert "/Users/admin/.zsh_history" not in result
+
+    def test_calls_comm_minus_13(self) -> None:
+        vm = _make_vm((True, "", ""))
+        fc = FileSystemComparator(vm)
+
+        async def _run() -> None:
+            await fc.get_created_files("/tmp/before.txt", "/tmp/after.txt")
+
+        asyncio.run(_run())
+        pipeline = vm.exec_command.call_args[0][0][2]
+        assert "comm -13" in pipeline
+        assert "/tmp/before.txt" in pipeline
+        assert "/tmp/after.txt" in pipeline
+
+    def test_empty_output_returns_empty_list(self) -> None:
+        vm = _make_vm((True, "", ""))
+        fc = FileSystemComparator(vm)
+
+        async def _run() -> list[str]:
+            return await fc.get_created_files("/tmp/before.txt", "/tmp/after.txt")
+
+        assert asyncio.run(_run()) == []
+
+    def test_failure_returns_empty_list_not_raises(self) -> None:
+        vm = _make_vm((False, "", "error"))
+        fc = FileSystemComparator(vm)
+
+        async def _run() -> list[str]:
+            return await fc.get_created_files("/tmp/before.txt", "/tmp/after.txt")
+
+        result = asyncio.run(_run())
+        assert result == []
+
+    def test_uses_timeout_45(self) -> None:
+        vm = _make_vm((True, "", ""))
+        fc = FileSystemComparator(vm)
+
+        async def _run() -> None:
+            await fc.get_created_files("/tmp/before.txt", "/tmp/after.txt")
+
+        asyncio.run(_run())
+        kwargs = vm.exec_command.call_args[1]
+        assert kwargs.get("timeout") == 45
+
+
+# ---------------------------------------------------------------------------
+# get_deleted_files()
+# ---------------------------------------------------------------------------
+
+
+class TestGetDeletedFiles:
+    def test_returns_parsed_file_list(self) -> None:
+        output = "/Users/admin/.config/oldtool/config\n"
+        vm = _make_vm((True, output, ""))
+        fc = FileSystemComparator(vm)
+
+        async def _run() -> list[str]:
+            return await fc.get_deleted_files("/tmp/before.txt", "/tmp/after.txt")
+
+        result = asyncio.run(_run())
+        assert "/Users/admin/.config/oldtool/config" in result
+
+    def test_calls_comm_minus_23(self) -> None:
+        vm = _make_vm((True, "", ""))
+        fc = FileSystemComparator(vm)
+
+        async def _run() -> None:
+            await fc.get_deleted_files("/tmp/before.txt", "/tmp/after.txt")
+
+        asyncio.run(_run())
+        pipeline = vm.exec_command.call_args[0][0][2]
+        assert "comm -23" in pipeline
+        assert "/tmp/before.txt" in pipeline
+        assert "/tmp/after.txt" in pipeline
+
+    def test_noise_is_filtered(self) -> None:
+        output = "/Users/admin/.zshenv\n/Users/admin/.config/myapp/config.toml\n"
+        vm = _make_vm((True, output, ""))
+        fc = FileSystemComparator(vm)
+
+        async def _run() -> list[str]:
+            return await fc.get_deleted_files("/tmp/before.txt", "/tmp/after.txt")
+
+        result = asyncio.run(_run())
+        assert "/Users/admin/.config/myapp/config.toml" in result
+        assert "/Users/admin/.zshenv" not in result
+
+    def test_failure_returns_empty_list(self) -> None:
+        vm = _make_vm((False, "", "error"))
+        fc = FileSystemComparator(vm)
+
+        async def _run() -> list[str]:
+            return await fc.get_deleted_files("/tmp/before.txt", "/tmp/after.txt")
+
+        assert asyncio.run(_run()) == []
+
+    def test_empty_output_returns_empty(self) -> None:
+        vm = _make_vm((True, "   \n", ""))
+        fc = FileSystemComparator(vm)
+
+        async def _run() -> list[str]:
+            return await fc.get_deleted_files("/tmp/before.txt", "/tmp/after.txt")
+
+        assert asyncio.run(_run()) == []
+
+
+# ---------------------------------------------------------------------------
+# get_modified_files()
+# ---------------------------------------------------------------------------
+
+
+class TestGetModifiedFiles:
+    def _make_since(self) -> datetime:
+        return datetime(2026, 3, 18, 12, 0, 0, tzinfo=UTC)
+
+    def test_returns_parsed_file_list(self) -> None:
+        output = "/Users/admin/.config/fish/config.fish\n/opt/homebrew/bin/wget\n"
+        vm = _make_vm((True, output, ""))
+        fc = FileSystemComparator(vm)
+
+        async def _run() -> list[str]:
+            return await fc.get_modified_files(self._make_since())
+
+        result = asyncio.run(_run())
+        assert "/Users/admin/.config/fish/config.fish" in result
+        assert "/opt/homebrew/bin/wget" in result
+
+    def test_calls_exec_command_with_bash_c(self) -> None:
+        vm = _make_vm((True, "", ""))
+        fc = FileSystemComparator(vm)
+
+        async def _run() -> None:
+            await fc.get_modified_files(self._make_since())
+
+        asyncio.run(_run())
+        cmd_arg = vm.exec_command.call_args[0][0]
+        assert cmd_arg[0] == "bash"
+        assert cmd_arg[1] == "-c"
+
+    def test_pipeline_contains_newermt(self) -> None:
+        vm = _make_vm((True, "", ""))
+        fc = FileSystemComparator(vm)
+
+        async def _run() -> None:
+            await fc.get_modified_files(self._make_since())
+
+        asyncio.run(_run())
+        pipeline = vm.exec_command.call_args[0][0][2]
+        assert "newermt" in pipeline
+
+    def test_pipeline_contains_since_timestamp(self) -> None:
+        vm = _make_vm((True, "", ""))
+        fc = FileSystemComparator(vm)
+        since = self._make_since()
+
+        async def _run() -> None:
+            await fc.get_modified_files(since)
+
+        asyncio.run(_run())
+        pipeline = vm.exec_command.call_args[0][0][2]
+        # ISO format timestamp should appear in pipeline
+        ts_iso = since.replace(microsecond=0).isoformat()
+        assert ts_iso in pipeline
+
+    def test_scan_root_override(self) -> None:
+        vm = _make_vm((True, "", ""))
+        fc = FileSystemComparator(vm, scan_root="/System/Volumes/Data")
+
+        async def _run() -> None:
+            await fc.get_modified_files(self._make_since(), scan_root="/Users")
+
+        asyncio.run(_run())
+        pipeline = vm.exec_command.call_args[0][0][2]
+        assert "/Users" in pipeline
+
+    def test_noise_filtered_from_result(self) -> None:
+        output = "/Users/admin/.zsh_history\n/Users/admin/.config/fish/config.fish\n"
+        vm = _make_vm((True, output, ""))
+        fc = FileSystemComparator(vm)
+
+        async def _run() -> list[str]:
+            return await fc.get_modified_files(self._make_since())
+
+        result = asyncio.run(_run())
+        assert "/Users/admin/.config/fish/config.fish" in result
+        assert "/Users/admin/.zsh_history" not in result
+
+    def test_failure_returns_empty_list(self) -> None:
+        vm = _make_vm((False, "", "permission denied"))
+        fc = FileSystemComparator(vm)
+
+        async def _run() -> list[str]:
+            return await fc.get_modified_files(self._make_since())
+
+        assert asyncio.run(_run()) == []
+
+    def test_uses_timeout_45(self) -> None:
+        vm = _make_vm((True, "", ""))
+        fc = FileSystemComparator(vm)
+
+        async def _run() -> None:
+            await fc.get_modified_files(self._make_since())
+
+        asyncio.run(_run())
+        kwargs = vm.exec_command.call_args[1]
+        assert kwargs.get("timeout") == 45
+
+    def test_pipeline_contains_awk_cutoff(self) -> None:
+        """awk with cutoff= must be present to exclude newly-created files."""
+        vm = _make_vm((True, "", ""))
+        fc = FileSystemComparator(vm)
+
+        async def _run() -> None:
+            await fc.get_modified_files(self._make_since())
+
+        asyncio.run(_run())
+        pipeline = vm.exec_command.call_args[0][0][2]
+        assert "awk" in pipeline
+        assert "cutoff" in pipeline
diff --git a/tests/vm/test_discovery.py b/tests/vm/test_discovery.py
new file mode 100644
index 0000000..830ab98
--- /dev/null
+++ b/tests/vm/test_discovery.py
@@ -0,0 +1,953 @@
+"""Tests for vm/discovery.py — DiscoveryRunner workflow and helpers."""
+
+from __future__ import annotations
+
+import asyncio
+import contextlib
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+from pydantic import ValidationError
+
+from mac2nix.vm._utils import VMError
+from mac2nix.vm.discovery import _BINARY_PROBE_OPTIONS, DiscoveryResult, DiscoveryRunner
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+def _make_vm(
+    clone_raises: Exception | None = None,
+    start_raises: Exception | None = None,
+    exec_result: tuple[bool, str, str] = (True, "", ""),
+) -> MagicMock:
+    """Build a minimal TartVMManager mock."""
+    vm = MagicMock()
+    if clone_raises is not None:
+        vm.clone = AsyncMock(side_effect=clone_raises)
+    else:
+        vm.clone = AsyncMock()
+    if start_raises is not None:
+        vm.start = AsyncMock(side_effect=start_raises)
+    else:
+        vm.start = AsyncMock()
+    vm.exec_command = AsyncMock(return_value=exec_result)
+    vm.cleanup = AsyncMock()
+    return vm
+
+
+def _make_comparator(
+    snapshot_raises: Exception | None = None,
+    created: list[str] | None = None,
+    deleted: list[str] | None = None,
+    modified: list[str] | None = None,
+) -> MagicMock:
+    """Build a minimal FileSystemComparator mock."""
+    comp = MagicMock()
+    if snapshot_raises is not None:
+        comp.snapshot = AsyncMock(side_effect=snapshot_raises)
+    else:
+        comp.snapshot = AsyncMock()
+    comp.get_created_files = AsyncMock(return_value=created or [])
+    comp.get_deleted_files = AsyncMock(return_value=deleted or [])
+    comp.get_modified_files = AsyncMock(return_value=modified or [])
+    return comp
+
+
+def _patch_comparator(comp: MagicMock):
+    """Return a context-manager patch that replaces FileSystemComparator with comp."""
+    return patch("mac2nix.vm.discovery.FileSystemComparator", return_value=comp)
+
+
+# ---------------------------------------------------------------------------
+# DiscoveryResult model
+# ---------------------------------------------------------------------------
+
+
+class TestDiscoveryResultModel:
+    def test_construction(self) -> None:
+        dr = DiscoveryResult(
+            package="wget",
+            package_type="brew",
+            created_files=["/opt/homebrew/bin/wget"],
+            modified_files=[],
+            deleted_files=[],
+            executables_found={"apps": [], "binaries": ["/opt/homebrew/bin/wget"]},
+        )
+        assert dr.package == "wget"
+        assert dr.package_type == "brew"
+        assert dr.created_files == ["/opt/homebrew/bin/wget"]
+        assert dr.executables_found == {"apps": [], "binaries": ["/opt/homebrew/bin/wget"]}
+
+    def test_empty_result(self) -> None:
+        dr = DiscoveryResult(
+            package="myapp",
+            package_type="cask",
+            created_files=[],
+            modified_files=[],
+            deleted_files=[],
+            executables_found={},
+        )
+        assert dr.created_files == []
+        assert dr.deleted_files == []
+        assert dr.modified_files == []
+
+    def test_serializable(self) -> None:
+        dr = DiscoveryResult(
+            package="fish",
+            package_type="brew",
+            created_files=["/opt/homebrew/bin/fish"],
+            modified_files=[],
+            deleted_files=[],
+            executables_found={"apps": [], "binaries": ["/opt/homebrew/bin/fish"]},
+        )
+        # Should not raise
+        data = dr.model_dump()
+        assert data["package"] == "fish"
+
+    def test_pydantic_validation(self) -> None:
+        with pytest.raises(ValidationError):
+            DiscoveryResult(
+                package="x",
+                # missing required fields
+            )
+
+
+# ---------------------------------------------------------------------------
+# discover() — success path
+# ---------------------------------------------------------------------------
+
+
+class TestDiscoverSuccess:
+    def test_returns_discovery_result(self) -> None:
+        vm = _make_vm()
+        comp = _make_comparator(
+            created=["/opt/homebrew/bin/wget"],
+            deleted=[],
+            modified=[],
+        )
+
+        async def _run() -> DiscoveryResult:
+            runner = DiscoveryRunner(vm)
+            with (
+                _patch_comparator(comp),
+                patch("mac2nix.vm.discovery.asyncio.sleep", new=AsyncMock()),
+            ):
+                return await runner.discover("wget", "brew")
+
+        result = asyncio.run(_run())
+        assert isinstance(result, DiscoveryResult)
+        assert result.package == "wget"
+        assert result.package_type == "brew"
+
+    def test_created_files_populated(self) -> None:
+        vm = _make_vm()
+        comp = _make_comparator(created=["/opt/homebrew/bin/wget", "/opt/homebrew/share/wget"])
+
+        async def _run() -> DiscoveryResult:
+            runner = DiscoveryRunner(vm)
+            with (
+                _patch_comparator(comp),
+                patch("mac2nix.vm.discovery.asyncio.sleep", new=AsyncMock()),
+            ):
+                return await runner.discover("wget")
+
+        result = asyncio.run(_run())
+        assert "/opt/homebrew/bin/wget" in result.created_files
+
+    def test_deleted_files_populated(self) -> None:
+        vm = _make_vm()
+        comp = _make_comparator(deleted=["/old/config"])
+
+        async def _run() -> DiscoveryResult:
+            runner = DiscoveryRunner(vm)
+            with (
+                _patch_comparator(comp),
+                patch("mac2nix.vm.discovery.asyncio.sleep", new=AsyncMock()),
+            ):
+                return await runner.discover("myapp")
+
+        result = asyncio.run(_run())
+        assert "/old/config" in result.deleted_files
+
+    def test_modified_files_populated(self) -> None:
+        vm = _make_vm()
+        comp = _make_comparator(modified=["/Users/admin/.config/myapp/config"])
+
+        async def _run() -> DiscoveryResult:
+            runner = DiscoveryRunner(vm)
+            with (
+                _patch_comparator(comp),
+                patch("mac2nix.vm.discovery.asyncio.sleep", new=AsyncMock()),
+            ):
+                return await runner.discover("myapp")
+
+        result = asyncio.run(_run())
+        assert "/Users/admin/.config/myapp/config" in result.modified_files
+
+    def test_clone_called_with_generated_name(self) -> None:
+        vm = _make_vm()
+        comp = _make_comparator()
+
+        async def _run() -> None:
+            runner = DiscoveryRunner(vm)
+            with (
+                _patch_comparator(comp),
+                patch("mac2nix.vm.discovery.asyncio.sleep", new=AsyncMock()),
+            ):
+                await runner.discover("wget", "brew")
+
+        asyncio.run(_run())
+        vm.clone.assert_called_once()
+        clone_name = vm.clone.call_args[0][0]
+        assert "wget" in clone_name
+        assert clone_name.startswith("mac2nix-discover-")
+
+    def test_start_called_after_clone(self) -> None:
+        vm = _make_vm()
+        comp = _make_comparator()
+        call_order: list[str] = []
+
+        async def recording_clone(name: str) -> None:
+            call_order.append("clone")
+
+        async def recording_start() -> None:
+            call_order.append("start")
+
+        vm.clone = AsyncMock(side_effect=recording_clone)
+        vm.start = AsyncMock(side_effect=recording_start)
+
+        async def _run() -> None:
+            runner = DiscoveryRunner(vm)
+            with (
+                _patch_comparator(comp),
+                patch("mac2nix.vm.discovery.asyncio.sleep", new=AsyncMock()),
+            ):
+                await runner.discover("wget")
+
+        asyncio.run(_run())
+        assert call_order.index("clone") < call_order.index("start")
+
+    def test_snapshots_taken_before_and_after_install(self) -> None:
+        vm = _make_vm()
+        comp = _make_comparator()
+
+        async def _run() -> None:
+            runner = DiscoveryRunner(vm)
+            with (
+                _patch_comparator(comp),
+                patch("mac2nix.vm.discovery.asyncio.sleep", new=AsyncMock()),
+            ):
+                await runner.discover("wget")
+
+        asyncio.run(_run())
+        assert comp.snapshot.call_count == 2
+
+    def test_default_package_type_is_brew(self) -> None:
+        vm = _make_vm()
+        comp = _make_comparator()
+
+        async def _run() -> DiscoveryResult:
+            runner = DiscoveryRunner(vm)
+            with (
+                _patch_comparator(comp),
+                patch("mac2nix.vm.discovery.asyncio.sleep", new=AsyncMock()),
+            ):
+                return await runner.discover("wget")
+
+        result = asyncio.run(_run())
+        assert result.package_type == "brew"
+
+    def test_cask_type_passed_through(self) -> None:
+        vm = _make_vm()
+        comp = _make_comparator()
+
+        async def _run() -> DiscoveryResult:
+            runner = DiscoveryRunner(vm)
+            with (
+                _patch_comparator(comp),
+                patch("mac2nix.vm.discovery.asyncio.sleep", new=AsyncMock()),
+            ):
+                return await runner.discover("firefox", "cask")
+
+        result = asyncio.run(_run())
+        assert result.package_type == "cask"
+
+
+# ---------------------------------------------------------------------------
+# discover() — clone/start failure
+# ---------------------------------------------------------------------------
+
+
+class TestDiscoverCloneStartFailure:
+    def test_clone_failure_reraises_vm_error(self) -> None:
+        vm = _make_vm(clone_raises=VMError("VM not found"))
+        comp = _make_comparator()
+
+        async def _run() -> None:
+            runner = DiscoveryRunner(vm)
+            with _patch_comparator(comp):
+                await runner.discover("wget")
+
+        with pytest.raises(VMError, match="VM not found"):
+            asyncio.run(_run())
+
+    def test_start_failure_reraises_vm_error(self) -> None:
+        vm = _make_vm(start_raises=VMError("VM failed to boot"))
+        comp = _make_comparator()
+
+        async def _run() -> None:
+            runner = DiscoveryRunner(vm)
+            with _patch_comparator(comp):
+                await runner.discover("wget")
+
+        with pytest.raises(VMError, match="VM failed to boot"):
+            asyncio.run(_run())
+
+    def test_cleanup_called_even_after_clone_failure(self) -> None:
+        vm = _make_vm(clone_raises=VMError("clone failed"))
+        comp = _make_comparator()
+
+        async def _run() -> None:
+            runner = DiscoveryRunner(vm)
+            with _patch_comparator(comp), contextlib.suppress(VMError):
+                await runner.discover("wget")
+
+        asyncio.run(_run())
+        vm.cleanup.assert_called_once()
+
+    def test_cleanup_called_even_after_start_failure(self) -> None:
+        vm = _make_vm(start_raises=VMError("start failed"))
+        comp = _make_comparator()
+
+        async def _run() -> None:
+            runner = DiscoveryRunner(vm)
+            with _patch_comparator(comp), contextlib.suppress(VMError):
+                await runner.discover("wget")
+
+        asyncio.run(_run())
+        vm.cleanup.assert_called_once()
+
+
+# ---------------------------------------------------------------------------
+# discover() — pre-snapshot failure
+# ---------------------------------------------------------------------------
+
+
+class TestDiscoverPreSnapshotFailure:
+    def test_pre_snapshot_failure_returns_empty_result(self) -> None:
+        vm = _make_vm()
+        comp = _make_comparator(snapshot_raises=VMError("disk full"))
+
+        async def _run() -> DiscoveryResult:
+            runner = DiscoveryRunner(vm)
+            with _patch_comparator(comp):
+                return await runner.discover("wget")
+
+        result = asyncio.run(_run())
+        assert isinstance(result, DiscoveryResult)
+        assert result.created_files == []
+        assert result.modified_files == []
+        assert result.deleted_files == []
+
+    def test_pre_snapshot_failure_does_not_raise(self) -> None:
+        vm = _make_vm()
+        comp = _make_comparator(snapshot_raises=VMError("disk full"))
+
+        async def _run() -> None:
+            runner = DiscoveryRunner(vm)
+            with _patch_comparator(comp):
+                await runner.discover("wget")  # must not raise
+
+        asyncio.run(_run())
+
+    def test_pre_snapshot_failure_still_cleans_up(self) -> None:
+        vm = _make_vm()
+        comp = _make_comparator(snapshot_raises=VMError("disk full"))
+
+        async def _run() -> None:
+            runner = DiscoveryRunner(vm)
+            with _patch_comparator(comp):
+                await runner.discover("wget")
+
+        asyncio.run(_run())
+        vm.cleanup.assert_called_once()
+
+    def test_pre_snapshot_failure_package_name_preserved(self) -> None:
+        vm = _make_vm()
+        comp = _make_comparator(snapshot_raises=VMError("error"))
+
+        async def _run() -> DiscoveryResult:
+            runner = DiscoveryRunner(vm)
+            with _patch_comparator(comp):
+                return await runner.discover("mypackage", "cask")
+
+        result = asyncio.run(_run())
+        assert result.package == "mypackage"
+        assert result.package_type == "cask"
+
+
+# ---------------------------------------------------------------------------
+# discover() — install failure
+# ---------------------------------------------------------------------------
+
+
+class TestDiscoverInstallFailure:
+    def test_install_failure_returns_empty_result(self) -> None:
+        vm = _make_vm()
+        # First exec_command (pre-snapshot) should succeed via comp.snapshot mock.
+        # Install uses vm.exec_command — make it fail.
+        vm.exec_command = AsyncMock(return_value=(False, "", "brew: package not found"))
+        comp = _make_comparator()
+
+        async def _run() -> DiscoveryResult:
+            runner = DiscoveryRunner(vm)
+            with (
+                _patch_comparator(comp),
+                patch("mac2nix.vm.discovery.asyncio.sleep", new=AsyncMock()),
+            ):
+                return await runner.discover("nonexistent-pkg")
+
+        result = asyncio.run(_run())
+        assert result.created_files == []
+        assert result.modified_files == []
+        assert result.deleted_files == []
+
+    def test_install_failure_does_not_raise(self) -> None:
+        vm = _make_vm()
+        vm.exec_command = AsyncMock(return_value=(False, "", "brew error"))
+        comp = _make_comparator()
+
+        async def _run() -> None:
+            runner = DiscoveryRunner(vm)
+            with (
+                _patch_comparator(comp),
+                patch("mac2nix.vm.discovery.asyncio.sleep", new=AsyncMock()),
+            ):
+                await runner.discover("bad-pkg")
+
+        asyncio.run(_run())  # Must not raise
+
+    def test_install_failure_still_cleans_up(self) -> None:
+        vm = _make_vm()
+        vm.exec_command = AsyncMock(return_value=(False, "", "brew error"))
+        comp = _make_comparator()
+
+        async def _run() -> None:
+            runner = DiscoveryRunner(vm)
+            with (
+                _patch_comparator(comp),
+                patch("mac2nix.vm.discovery.asyncio.sleep", new=AsyncMock()),
+            ):
+                await runner.discover("bad-pkg")
+
+        asyncio.run(_run())
+        vm.cleanup.assert_called_once()
+
+
+# ---------------------------------------------------------------------------
+# discover() — post-snapshot failure
+# ---------------------------------------------------------------------------
+
+
+class TestDiscoverPostSnapshotFailure:
+    def test_post_snapshot_failure_returns_empty_with_executables(self) -> None:
+        vm = _make_vm()
+        snap_count = 0
+
+        async def snapshot_side_effect(path: str) -> None:
+            nonlocal snap_count
+            snap_count += 1
+            if snap_count == 2:  # second snapshot (post-install) fails
+                raise VMError("post-snapshot error")
+
+        comp = _make_comparator()
+        comp.snapshot = AsyncMock(side_effect=snapshot_side_effect)
+
+        async def _run() -> DiscoveryResult:
+            runner = DiscoveryRunner(vm)
+            with (
+                _patch_comparator(comp),
+                patch("mac2nix.vm.discovery.asyncio.sleep", new=AsyncMock()),
+            ):
+                return await runner.discover("wget")
+
+        result = asyncio.run(_run())
+        assert result.created_files == []
+
+    def test_post_snapshot_failure_still_cleans_up(self) -> None:
+        vm = _make_vm()
+        snap_count = 0
+
+        async def snapshot_side_effect(path: str) -> None:
+            nonlocal snap_count
+            snap_count += 1
+            if snap_count == 2:
+                raise VMError("post-snapshot error")
+
+        comp = _make_comparator()
+        comp.snapshot = AsyncMock(side_effect=snapshot_side_effect)
+
+        async def _run() -> None:
+            runner = DiscoveryRunner(vm)
+            with (
+                _patch_comparator(comp),
+                patch("mac2nix.vm.discovery.asyncio.sleep", new=AsyncMock()),
+            ):
+                await runner.discover("wget")
+
+        asyncio.run(_run())
+        vm.cleanup.assert_called_once()
+
+
+# ---------------------------------------------------------------------------
+# cleanup always runs
+# ---------------------------------------------------------------------------
+
+
+class TestCleanupAlwaysRuns:
+    def test_cleanup_called_on_success(self) -> None:
+        vm = _make_vm()
+        comp = _make_comparator()
+
+        async def _run() -> None:
+            runner = DiscoveryRunner(vm)
+            with (
+                _patch_comparator(comp),
+                patch("mac2nix.vm.discovery.asyncio.sleep", new=AsyncMock()),
+            ):
+                await runner.discover("wget")
+
+        asyncio.run(_run())
+        vm.cleanup.assert_called_once()
+
+    def test_cleanup_called_exactly_once_on_success(self) -> None:
+        vm = _make_vm()
+        comp = _make_comparator()
+
+        async def _run() -> None:
+            runner = DiscoveryRunner(vm)
+            with (
+                _patch_comparator(comp),
+                patch("mac2nix.vm.discovery.asyncio.sleep", new=AsyncMock()),
+            ):
+                await runner.discover("wget")
+
+        asyncio.run(_run())
+        assert vm.cleanup.call_count == 1
+
+
+# ---------------------------------------------------------------------------
+# _install_package()
+# ---------------------------------------------------------------------------
+
+
+class TestInstallPackage:
+    def test_brew_install_command_args(self) -> None:
+        vm = _make_vm()
+        captured: list[list[str]] = []
+
+        async def recording_exec(cmd: list[str], **_kw) -> tuple[bool, str, str]:
+            captured.append(cmd)
+            return (True, "", "")
+
+        vm.exec_command = AsyncMock(side_effect=recording_exec)
+
+        async def _run() -> None:
+            runner = DiscoveryRunner(vm)
+            await runner._install_package("wget", "brew")
+
+        asyncio.run(_run())
+        # Command wrapped in bash -c with shlex.quote for shell injection safety
+        assert captured == [["bash", "-c", "brew install wget"]]
+
+    def test_cask_adds_cask_flag(self) -> None:
+        vm = _make_vm()
+        captured: list[list[str]] = []
+
+        async def recording_exec(cmd: list[str], **_kw) -> tuple[bool, str, str]:
+            captured.append(cmd)
+            return (True, "", "")
+
+        vm.exec_command = AsyncMock(side_effect=recording_exec)
+
+        async def _run() -> None:
+            runner = DiscoveryRunner(vm)
+            await runner._install_package("firefox", "cask")
+
+        asyncio.run(_run())
+        # Command is now ['bash', '-c', 'brew install --cask firefox']
+        bash_cmd = captured[0]
+        assert bash_cmd[0] == "bash"
+        assert "--cask" in bash_cmd[2]
+        assert "firefox" in bash_cmd[2]
+
+    def test_success_returns_true(self) -> None:
+        vm = _make_vm(exec_result=(True, "", ""))
+
+        async def _run() -> bool:
+            runner = DiscoveryRunner(vm)
+            return await runner._install_package("wget", "brew")
+
+        assert asyncio.run(_run()) is True
+
+    def test_failure_returns_false(self) -> None:
+        vm = _make_vm(exec_result=(False, "", "not found"))
+
+        async def _run() -> bool:
+            runner = DiscoveryRunner(vm)
+            return await runner._install_package("bad-pkg", "brew")
+
+        assert asyncio.run(_run()) is False
+
+    def test_uses_timeout_1800(self) -> None:
+        vm = _make_vm()
+        captured_kwargs: list[dict] = []
+
+        async def recording_exec(cmd: list[str], **kwargs) -> tuple[bool, str, str]:
+            captured_kwargs.append(kwargs)
+            return (True, "", "")
+
+        vm.exec_command = AsyncMock(side_effect=recording_exec)
+
+        async def _run() -> None:
+            runner = DiscoveryRunner(vm)
+            await runner._install_package("wget", "brew")
+
+        asyncio.run(_run())
+        assert captured_kwargs[0].get("timeout") == 1800
+
+
+# ---------------------------------------------------------------------------
+# _find_new_executables()
+# ---------------------------------------------------------------------------
+
+
+class TestFindNewExecutables:
+    def test_returns_empty_when_find_fails(self) -> None:
+        vm = _make_vm(exec_result=(False, "", "find: error"))
+
+        async def _run() -> dict:
+            runner = DiscoveryRunner(vm)
+            return await runner._find_new_executables("wget")
+
+        result = asyncio.run(_run())
+        assert result == {"apps": [], "binaries": []}
+
+    def test_app_bundles_classified_separately(self) -> None:
+        vm = _make_vm()
+        call_count = 0
+
+        async def exec_side_effect(cmd: list[str], **_kw) -> tuple[bool, str, str]:
+            nonlocal call_count
+            call_count += 1
+            if call_count == 1:  # find pipeline
+                return (True, "", "")
+            if call_count == 2:  # comm -13
+                return (True, "/Applications/MyApp.app\n/opt/homebrew/bin/myapp\n", "")
+            return (True, "", "")  # rm cleanup
+
+        vm.exec_command = AsyncMock(side_effect=exec_side_effect)
+
+        async def _run() -> dict:
+            runner = DiscoveryRunner(vm)
+            return await runner._find_new_executables("myapp")
+
+        result = asyncio.run(_run())
+        assert "/Applications/MyApp.app" in result["apps"]
+        assert "/opt/homebrew/bin/myapp" in result["binaries"]
+
+    def test_non_app_paths_classified_as_binaries(self) -> None:
+        vm = _make_vm()
+        call_count = 0
+
+        async def exec_side_effect(cmd: list[str], **_kw) -> tuple[bool, str, str]:
+            nonlocal call_count
+            call_count += 1
+            if call_count == 1:
+                return (True, "", "")
+            if call_count == 2:
+                return (True, "/opt/homebrew/bin/wget\n/usr/local/bin/curl\n", "")
+            return (True, "", "")
+
+        vm.exec_command = AsyncMock(side_effect=exec_side_effect)
+
+        async def _run() -> dict:
+            runner = DiscoveryRunner(vm)
+            return await runner._find_new_executables("wget")
+
+        result = asyncio.run(_run())
+        assert result["apps"] == []
+        assert "/opt/homebrew/bin/wget" in result["binaries"]
+        assert "/usr/local/bin/curl" in result["binaries"]
+
+    def test_empty_lines_discarded(self) -> None:
+        vm = _make_vm()
+        call_count = 0
+
+        async def exec_side_effect(cmd: list[str], **_kw) -> tuple[bool, str, str]:
+            nonlocal call_count
+            call_count += 1
+            if call_count == 1:
+                return (True, "", "")
+            if call_count == 2:
+                return (True, "\n/opt/homebrew/bin/wget\n\n", "")
+            return (True, "", "")
+
+        vm.exec_command = AsyncMock(side_effect=exec_side_effect)
+
+        async def _run() -> dict:
+            runner = DiscoveryRunner(vm)
+            return await runner._find_new_executables("wget")
+
+        result = asyncio.run(_run())
+        assert "" not in result["binaries"]
+        assert len(result["binaries"]) == 1
+
+    def test_returns_empty_when_comm_fails(self) -> None:
+        vm = _make_vm()
+        call_count = 0
+
+        async def exec_side_effect(cmd: list[str], **_kw) -> tuple[bool, str, str]:
+            nonlocal call_count
+            call_count += 1
+            if call_count == 1:  # find succeeds
+                return (True, "", "")
+            if call_count == 2:  # comm fails
+                return (False, "", "comm: error")
+            return (True, "", "")  # rm
+
+        vm.exec_command = AsyncMock(side_effect=exec_side_effect)
+
+        async def _run() -> dict:
+            runner = DiscoveryRunner(vm)
+            return await runner._find_new_executables("wget")
+
+        result = asyncio.run(_run())
+        assert result == {"apps": [], "binaries": []}
+
+    def test_temp_file_removed_after_comm(self) -> None:
+        """rm -f of temp file should be called regardless of comm success."""
+        vm = _make_vm()
+        rm_called = False
+        call_count = 0
+
+        async def exec_side_effect(cmd: list[str], **_kw) -> tuple[bool, str, str]:
+            nonlocal call_count, rm_called
+            call_count += 1
+            if call_count == 3 and cmd[:2] == ["rm", "-f"]:
+                rm_called = True
+            return (True, "", "")
+
+        vm.exec_command = AsyncMock(side_effect=exec_side_effect)
+
+        async def _run() -> None:
+            runner = DiscoveryRunner(vm)
+            await runner._find_new_executables("wget")
+
+        asyncio.run(_run())
+        assert rm_called
+
+    def test_package_name_in_temp_path(self) -> None:
+        """Temp file path should include the package name."""
+        vm = _make_vm()
+        find_pipeline: list[str] = []
+        call_count = 0
+
+        async def exec_side_effect(cmd: list[str], **_kw) -> tuple[bool, str, str]:
+            nonlocal call_count
+            call_count += 1
+            if call_count == 1:
+                find_pipeline.append(cmd[2])  # bash -c <pipeline>
+            return (True, "", "")
+
+        vm.exec_command = AsyncMock(side_effect=exec_side_effect)
+
+        async def _run() -> None:
+            runner = DiscoveryRunner(vm)
+            await runner._find_new_executables("myspecialpkg")
+
+        asyncio.run(_run())
+        assert find_pipeline
+        assert "myspecialpkg" in find_pipeline[0]
+
+
+# ---------------------------------------------------------------------------
+# _execute_found()
+# ---------------------------------------------------------------------------
+
+
+class TestExecuteFound:
+    def test_no_op_when_empty(self) -> None:
+        vm = _make_vm()
+
+        async def _run() -> None:
+            runner = DiscoveryRunner(vm)
+            with patch("mac2nix.vm.discovery.asyncio.sleep", new=AsyncMock()):
+                await runner._execute_found({"apps": [], "binaries": []})
+
+        asyncio.run(_run())
+        vm.exec_command.assert_not_called()
+
+    def test_probe_options_used_for_binaries(self) -> None:
+        vm = _make_vm()
+        captured_cmds: list[list[str]] = []
+
+        async def recording_exec(cmd: list[str], **_kw) -> tuple[bool, str, str]:
+            captured_cmds.append(cmd)
+            return (True, "", "")
+
+        vm.exec_command = AsyncMock(side_effect=recording_exec)
+
+        async def _run() -> None:
+            runner = DiscoveryRunner(vm)
+            with patch("mac2nix.vm.discovery.asyncio.sleep", new=AsyncMock()):
+                await runner._execute_found({"apps": [], "binaries": ["/opt/homebrew/bin/wget"]})
+
+        asyncio.run(_run())
+        # One exec_command call for the probe batch
+        assert len(captured_cmds) == 1
+        probe_cmd = captured_cmds[0][2]  # bash -c <pipeline>
+        for option in _BINARY_PROBE_OPTIONS:
+            assert option in probe_cmd
+
+    def test_probe_contains_binary_path(self) -> None:
+        vm = _make_vm()
+        captured_pipeline: list[str] = []
+
+        async def recording_exec(cmd: list[str], **_kw) -> tuple[bool, str, str]:
+            if len(cmd) >= 3:
+                captured_pipeline.append(cmd[2])
+            return (True, "", "")
+
+        vm.exec_command = AsyncMock(side_effect=recording_exec)
+
+        async def _run() -> None:
+            runner = DiscoveryRunner(vm)
+            with patch("mac2nix.vm.discovery.asyncio.sleep", new=AsyncMock()):
+                await runner._execute_found({"apps": [], "binaries": ["/opt/homebrew/bin/mybin"]})
+
+        asyncio.run(_run())
+        assert any("/opt/homebrew/bin/mybin" in p for p in captured_pipeline)
+
+    def test_sleep_called_after_execution(self) -> None:
+        vm = _make_vm()
+        sleep_calls: list[float] = []
+
+        async def recording_sleep(secs: float) -> None:
+            sleep_calls.append(secs)
+
+        async def _run() -> None:
+            runner = DiscoveryRunner(vm)
+            with patch("mac2nix.vm.discovery.asyncio.sleep", side_effect=recording_sleep):
+                await runner._execute_found({"apps": [], "binaries": ["/opt/homebrew/bin/wget"]})
+
+        asyncio.run(_run())
+        assert 10 in sleep_calls
+
+    def test_no_sleep_when_nothing_to_execute(self) -> None:
+        vm = _make_vm()
+        sleep_calls: list[float] = []
+
+        async def recording_sleep(secs: float) -> None:
+            sleep_calls.append(secs)
+
+        async def _run() -> None:
+            runner = DiscoveryRunner(vm)
+            with patch("mac2nix.vm.discovery.asyncio.sleep", side_effect=recording_sleep):
+                await runner._execute_found({"apps": [], "binaries": []})
+
+        asyncio.run(_run())
+        assert sleep_calls == []
+
+    def test_app_bundle_info_plist_read(self) -> None:
+        vm = _make_vm()
+        captured_cmds: list[list[str]] = []
+
+        async def recording_exec(cmd: list[str], **_kw) -> tuple[bool, str, str]:
+            captured_cmds.append(cmd)
+            return (True, "MyApp", "")  # defaults read returns executable name
+
+        vm.exec_command = AsyncMock(side_effect=recording_exec)
+
+        async def _run() -> None:
+            runner = DiscoveryRunner(vm)
+            with patch("mac2nix.vm.discovery.asyncio.sleep", new=AsyncMock()):
+                await runner._execute_found({"apps": ["/Applications/MyApp.app"], "binaries": []})
+
+        asyncio.run(_run())
+        # First call: defaults read to get CFBundleExecutable
+        pipeline0 = captured_cmds[0][2]
+        assert "CFBundleExecutable" in pipeline0
+        assert "MyApp.app" in pipeline0
+
+    def test_app_launch_uses_executable_name(self) -> None:
+        vm = _make_vm()
+        captured_cmds: list[list[str]] = []
+        call_count = 0
+
+        async def recording_exec(cmd: list[str], **_kw) -> tuple[bool, str, str]:
+            nonlocal call_count
+            call_count += 1
+            captured_cmds.append(cmd)
+            if call_count == 1:
+                return (True, "MyApp", "")  # CFBundleExecutable = "MyApp"
+            return (True, "", "")
+
+        vm.exec_command = AsyncMock(side_effect=recording_exec)
+
+        async def _run() -> None:
+            runner = DiscoveryRunner(vm)
+            with patch("mac2nix.vm.discovery.asyncio.sleep", new=AsyncMock()):
+                await runner._execute_found({"apps": ["/Applications/MyApp.app"], "binaries": []})
+
+        asyncio.run(_run())
+        # Second call: launch the app
+        assert len(captured_cmds) >= 2
+        launch_pipeline = captured_cmds[1][2]
+        assert "MyApp.app/Contents/MacOS/MyApp" in launch_pipeline
+
+    def test_app_bundle_skipped_when_defaults_read_fails(self) -> None:
+        """If defaults read fails, the app launch should be skipped (not crashed)."""
+        vm = _make_vm()
+
+        async def recording_exec(cmd: list[str], **_kw) -> tuple[bool, str, str]:
+            return (False, "", "defaults: error")  # all calls fail
+
+        vm.exec_command = AsyncMock(side_effect=recording_exec)
+
+        async def _run() -> None:
+            runner = DiscoveryRunner(vm)
+            with patch("mac2nix.vm.discovery.asyncio.sleep", new=AsyncMock()):
+                # Should not raise
+                await runner._execute_found({"apps": ["/Applications/Broken.app"], "binaries": []})
+
+        asyncio.run(_run())  # Must not raise
+
+    def test_multiple_binaries_batched_into_one_command(self) -> None:
+        vm = _make_vm()
+        probe_calls: list[list[str]] = []
+
+        async def recording_exec(cmd: list[str], **_kw) -> tuple[bool, str, str]:
+            probe_calls.append(cmd)
+            return (True, "", "")
+
+        vm.exec_command = AsyncMock(side_effect=recording_exec)
+
+        async def _run() -> None:
+            runner = DiscoveryRunner(vm)
+            with patch("mac2nix.vm.discovery.asyncio.sleep", new=AsyncMock()):
+                await runner._execute_found(
+                    {
+                        "apps": [],
+                        "binaries": ["/opt/homebrew/bin/wget", "/opt/homebrew/bin/curl"],
+                    }
+                )
+
+        asyncio.run(_run())
+        # Both binaries probed in a single exec_command call
+        assert len(probe_calls) == 1
+        pipeline = probe_calls[0][2]
+        assert "wget" in pipeline
+        assert "curl" in pipeline
diff --git a/tests/vm/test_manager.py b/tests/vm/test_manager.py
new file mode 100644
index 0000000..15d6fcb
--- /dev/null
+++ b/tests/vm/test_manager.py
@@ -0,0 +1,974 @@
+"""Tests for vm/manager.py — TartVMManager async VM lifecycle."""
+
+from __future__ import annotations
+
+import asyncio
+import contextlib
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+
+from mac2nix.vm._utils import VMConnectionError, VMError, VMTimeoutError
+from mac2nix.vm.manager import TartVMManager
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+def _make_manager(
+    base_vm: str = "sequoia-base",
+    vm_user: str = "admin",
+    vm_password: str = "admin",
+) -> TartVMManager:
+    return TartVMManager(base_vm, vm_user, vm_password)
+
+
+def _cloned_manager(clone_name: str = "test-clone") -> TartVMManager:
+    """Return a manager that already has a clone set (bypasses _require_clone)."""
+    mgr = _make_manager()
+    mgr._current_clone = clone_name
+    return mgr
+
+
+def _make_bg_proc() -> MagicMock:
+    """Mock object representing a long-running background asyncio subprocess."""
+    proc = MagicMock()
+    proc.pid = 12345
+    proc.kill = MagicMock()
+    return proc
+
+
+# ---------------------------------------------------------------------------
+# Constructor and availability
+# ---------------------------------------------------------------------------
+
+
+class TestConstructor:
+    def test_defaults(self) -> None:
+        mgr = TartVMManager("base")
+        assert mgr._base_vm == "base"
+        assert mgr._vm_user == "admin"
+        assert mgr._vm_password == "admin"
+        assert mgr._current_clone is None
+        assert mgr._vm_process is None
+
+    def test_custom_credentials(self) -> None:
+        mgr = TartVMManager("base", vm_user="user1", vm_password="s3cr3t")
+        assert mgr._vm_user == "user1"
+        assert mgr._vm_password == "s3cr3t"
+
+    def test_is_available_true(self) -> None:
+        with patch("mac2nix.vm.manager.shutil.which", return_value="/usr/local/bin/tart"):
+            assert TartVMManager.is_available() is True
+
+    def test_is_available_false(self) -> None:
+        with patch("mac2nix.vm.manager.shutil.which", return_value=None):
+            assert TartVMManager.is_available() is False
+
+
+# ---------------------------------------------------------------------------
+# _require_clone guard
+# ---------------------------------------------------------------------------
+
+
+class TestRequireClone:
+    def test_raises_when_no_clone(self) -> None:
+        mgr = _make_manager()
+        with pytest.raises(VMError, match="No active VM clone"):
+            mgr._require_clone()
+
+    def test_returns_clone_name_when_set(self) -> None:
+        mgr = _cloned_manager("my-clone")
+        assert mgr._require_clone() == "my-clone"
+
+
+# ---------------------------------------------------------------------------
+# _is_disconnect
+# ---------------------------------------------------------------------------
+
+
+class TestIsDisconnect:
+    def test_received_disconnect(self) -> None:
+        assert TartVMManager._is_disconnect("Received disconnect from 10.0.0.1")
+
+    def test_connection_closed(self) -> None:
+        assert TartVMManager._is_disconnect("Connection closed by remote host")
+
+    def test_connection_reset_by_peer(self) -> None:
+        assert TartVMManager._is_disconnect("Connection reset by peer")
+
+    def test_broken_pipe(self) -> None:
+        assert TartVMManager._is_disconnect("Write failed: Broken pipe")
+
+    def test_connection_refused(self) -> None:
+        assert TartVMManager._is_disconnect("ssh: connect to host 10.0.0.1: Connection refused")
+
+    def test_no_route_to_host(self) -> None:
+        assert TartVMManager._is_disconnect("No route to host")
+
+    def test_case_insensitive(self) -> None:
+        assert TartVMManager._is_disconnect("CONNECTION CLOSED BY REMOTE")
+
+    def test_normal_stderr_not_disconnect(self) -> None:
+        assert TartVMManager._is_disconnect("command not found") is False
+
+    def test_empty_stderr_not_disconnect(self) -> None:
+        assert TartVMManager._is_disconnect("") is False
+
+
+# ---------------------------------------------------------------------------
+# clone()
+# ---------------------------------------------------------------------------
+
+
+class TestClone:
+    def test_success_sets_current_clone(self) -> None:
+        async def _run() -> None:
+            mgr = _make_manager(base_vm="base")
+            with (
+                patch("mac2nix.vm.manager.shutil.which", return_value="/usr/local/bin/tart"),
+                patch(
+                    "mac2nix.vm.manager.async_run_command",
+                    new=AsyncMock(return_value=(0, "", "")),
+                ),
+            ):
+                await mgr.clone("my-clone")
+
+            assert mgr._current_clone == "my-clone"
+
+        asyncio.run(_run())
+
+    def test_calls_tart_clone_with_correct_args(self) -> None:
+        captured: list[list[str]] = []
+
+        async def recording_run(cmd: list[str], **_kw) -> tuple[int, str, str]:
+            captured.append(cmd)
+            return (0, "", "")
+
+        async def _run() -> None:
+            mgr = _make_manager(base_vm="sequoia-base")
+            with (
+                patch("mac2nix.vm.manager.shutil.which", return_value="/usr/local/bin/tart"),
+                patch("mac2nix.vm.manager.async_run_command", side_effect=recording_run),
+            ):
+                await mgr.clone("sequoia-test-001")
+
+        asyncio.run(_run())
+        assert captured == [["tart", "clone", "sequoia-base", "sequoia-test-001"]]
+
+    def test_nonzero_returncode_raises_vm_error(self) -> None:
+        async def _run() -> None:
+            mgr = _make_manager()
+            with (
+                patch("mac2nix.vm.manager.shutil.which", return_value="/usr/local/bin/tart"),
+                patch(
+                    "mac2nix.vm.manager.async_run_command",
+                    new=AsyncMock(return_value=(1, "", "VM not found")),
+                ),
+            ):
+                await mgr.clone("bad-clone")
+
+        with pytest.raises(VMError, match="tart clone"):
+            asyncio.run(_run())
+
+    def test_failure_does_not_set_current_clone(self) -> None:
+        async def _run() -> TartVMManager:
+            mgr = _make_manager()
+            with (
+                patch("mac2nix.vm.manager.shutil.which", return_value="/usr/local/bin/tart"),
+                patch(
+                    "mac2nix.vm.manager.async_run_command",
+                    new=AsyncMock(return_value=(1, "", "error")),
+                ),
+                contextlib.suppress(VMError),
+            ):
+                await mgr.clone("bad-clone")
+            return mgr
+
+        mgr = asyncio.run(_run())
+        assert mgr._current_clone is None
+
+    def test_tart_not_available_raises_vm_error(self) -> None:
+        async def _run() -> None:
+            mgr = _make_manager()
+            with patch("mac2nix.vm.manager.shutil.which", return_value=None):
+                await mgr.clone("any-clone")
+
+        with pytest.raises(VMError, match="tart CLI is not available"):
+            asyncio.run(_run())
+
+
+# ---------------------------------------------------------------------------
+# start()
+# ---------------------------------------------------------------------------
+
+
+class TestStart:
+    def test_launches_background_process(self) -> None:
+        bg_proc = _make_bg_proc()
+
+        async def _run() -> None:
+            mgr = _cloned_manager("my-vm")
+            with (
+                patch("mac2nix.vm.manager.shutil.which", return_value="/usr/local/bin/tart"),
+                patch(
+                    "mac2nix.vm.manager.asyncio.create_subprocess_exec",
+                    new=AsyncMock(return_value=bg_proc),
+                ),
+                patch.object(mgr, "wait_ready", new=AsyncMock()),
+            ):
+                await mgr.start()
+
+            assert mgr._vm_process is bg_proc
+
+        asyncio.run(_run())
+
+    def test_calls_create_subprocess_exec_with_no_graphics(self) -> None:
+        bg_proc = _make_bg_proc()
+        captured_args: list[tuple] = []
+
+        async def recording_exec(*args, **_kw):
+            captured_args.append(args)
+            return bg_proc
+
+        async def _run() -> None:
+            mgr = _cloned_manager("test-vm")
+            with (
+                patch("mac2nix.vm.manager.shutil.which", return_value="/usr/local/bin/tart"),
+                patch("mac2nix.vm.manager.asyncio.create_subprocess_exec", side_effect=recording_exec),
+                patch.object(mgr, "wait_ready", new=AsyncMock()),
+            ):
+                await mgr.start()
+
+        asyncio.run(_run())
+        assert captured_args, "create_subprocess_exec was not called"
+        args = captured_args[0]
+        assert args[0] == "tart"
+        assert "run" in args
+        assert "--no-graphics" in args
+        assert "test-vm" in args
+
+    def test_calls_wait_ready_after_launch(self) -> None:
+        bg_proc = _make_bg_proc()
+        wait_ready_calls: list[int] = []
+
+        async def _run() -> None:
+            mgr = _cloned_manager("test-vm")
+
+            async def mock_wait_ready(**_kw) -> None:
+                wait_ready_calls.append(1)
+
+            with (
+                patch("mac2nix.vm.manager.shutil.which", return_value="/usr/local/bin/tart"),
+                patch(
+                    "mac2nix.vm.manager.asyncio.create_subprocess_exec",
+                    new=AsyncMock(return_value=bg_proc),
+                ),
+                patch.object(mgr, "wait_ready", side_effect=mock_wait_ready),
+            ):
+                await mgr.start()
+
+        asyncio.run(_run())
+        assert wait_ready_calls == [1]
+
+    def test_requires_clone_before_start(self) -> None:
+        async def _run() -> None:
+            mgr = _make_manager()
+            with patch("mac2nix.vm.manager.shutil.which", return_value="/usr/local/bin/tart"):
+                await mgr.start()
+
+        with pytest.raises(VMError, match="No active VM clone"):
+            asyncio.run(_run())
+
+    def test_tart_not_available_raises_vm_error(self) -> None:
+        async def _run() -> None:
+            mgr = _cloned_manager()
+            with patch("mac2nix.vm.manager.shutil.which", return_value=None):
+                await mgr.start()
+
+        with pytest.raises(VMError, match="tart CLI is not available"):
+            asyncio.run(_run())
+
+
+# ---------------------------------------------------------------------------
+# wait_ready()
+# ---------------------------------------------------------------------------
+
+
+class TestWaitReady:
+    def test_success_when_ip_and_ssh_ready(self) -> None:
+        async def _run() -> None:
+            mgr = _cloned_manager("ready-vm")
+            with (
+                patch.object(mgr, "get_ip", new=AsyncMock(return_value="192.168.64.5")),
+                patch(
+                    "mac2nix.vm.manager.async_ssh_exec",
+                    new=AsyncMock(return_value=(True, "admin", "")),
+                ),
+                patch("mac2nix.vm.manager.asyncio.sleep", new=AsyncMock()),
+            ):
+                await mgr.wait_ready(max_attempts=3)
+
+        asyncio.run(_run())  # Should not raise
+
+    def test_timeout_when_no_ip_ever(self) -> None:
+        async def _run() -> None:
+            mgr = _cloned_manager("never-ready")
+            with (
+                patch.object(mgr, "get_ip", new=AsyncMock(return_value=None)),
+                patch("mac2nix.vm.manager.asyncio.sleep", new=AsyncMock()),
+            ):
+                await mgr.wait_ready(max_attempts=3)
+
+        with pytest.raises(VMTimeoutError):
+            asyncio.run(_run())
+
+    def test_timeout_when_ssh_never_succeeds(self) -> None:
+        async def _run() -> None:
+            mgr = _cloned_manager("ssh-not-ready")
+            with (
+                patch.object(mgr, "get_ip", new=AsyncMock(return_value="10.0.0.1")),
+                patch(
+                    "mac2nix.vm.manager.async_ssh_exec",
+                    new=AsyncMock(return_value=(False, "", "connection refused")),
+                ),
+                patch("mac2nix.vm.manager.asyncio.sleep", new=AsyncMock()),
+            ):
+                await mgr.wait_ready(max_attempts=3)
+
+        with pytest.raises(VMTimeoutError):
+            asyncio.run(_run())
+
+    def test_ssh_connection_error_does_not_abort_polling(self) -> None:
+        """VMConnectionError on SSH should be swallowed and polling continues."""
+        call_count = 0
+
+        async def _run() -> None:
+            nonlocal call_count
+            mgr = _cloned_manager("flaky-vm")
+
+            async def flaky_ssh(ip, user, pw, cmd, *, timeout):
+                nonlocal call_count
+                call_count += 1
+                if call_count < 3:
+                    raise VMConnectionError("not yet")
+                return (True, user, "")
+
+            with (
+                patch.object(mgr, "get_ip", new=AsyncMock(return_value="10.0.0.1")),
+                patch("mac2nix.vm.manager.async_ssh_exec", side_effect=flaky_ssh),
+                patch("mac2nix.vm.manager.asyncio.sleep", new=AsyncMock()),
+            ):
+                await mgr.wait_ready(max_attempts=5)
+
+        asyncio.run(_run())  # Should not raise; succeeds on attempt 3
+        assert call_count == 3
+
+    def test_sleeps_between_attempts(self) -> None:
+        sleep_calls: list[float] = []
+
+        async def recording_sleep(secs: float) -> None:
+            sleep_calls.append(secs)
+
+        async def _run() -> None:
+            mgr = _cloned_manager("slow-vm")
+            with (
+                patch.object(mgr, "get_ip", new=AsyncMock(return_value=None)),
+                patch("mac2nix.vm.manager.asyncio.sleep", side_effect=recording_sleep),
+                contextlib.suppress(VMTimeoutError),
+            ):
+                await mgr.wait_ready(max_attempts=3)
+
+        asyncio.run(_run())
+        # 3 attempts → 2 sleeps (no sleep after last attempt)
+        assert len(sleep_calls) == 2
+
+    def test_requires_clone(self) -> None:
+        async def _run() -> None:
+            mgr = _make_manager()
+            await mgr.wait_ready()
+
+        with pytest.raises(VMError, match="No active VM clone"):
+            asyncio.run(_run())
+
+    def test_fails_fast_if_vm_process_exited(self) -> None:
+        """If tart run exited immediately, raise VMError instead of spinning 50s."""
+
+        async def _run() -> None:
+            mgr = _cloned_manager("bad-vm")
+            # Simulate a VM process that exited with code 1
+            mock_proc = MagicMock()
+            mock_proc.returncode = 1
+            mgr._vm_process = mock_proc
+            await mgr.wait_ready(max_attempts=10)
+
+        with pytest.raises(VMError, match=r"exited unexpectedly.*code 1"):
+            asyncio.run(_run())
+
+
+# ---------------------------------------------------------------------------
+# get_ip()
+# ---------------------------------------------------------------------------
+
+
+class TestGetIp:
+    def test_returns_ip_on_success(self) -> None:
+        async def _run() -> str | None:
+            mgr = _cloned_manager("my-vm")
+            with (
+                patch("mac2nix.vm.manager.shutil.which", return_value="/usr/local/bin/tart"),
+                patch(
+                    "mac2nix.vm.manager.async_run_command",
+                    new=AsyncMock(return_value=(0, "192.168.64.10\n", "")),
+                ),
+            ):
+                return await mgr.get_ip()
+
+        assert asyncio.run(_run()) == "192.168.64.10"
+
+    def test_returns_none_on_nonzero_returncode(self) -> None:
+        async def _run() -> str | None:
+            mgr = _cloned_manager("my-vm")
+            with (
+                patch("mac2nix.vm.manager.shutil.which", return_value="/usr/local/bin/tart"),
+                patch(
+                    "mac2nix.vm.manager.async_run_command",
+                    new=AsyncMock(return_value=(1, "", "VM not running")),
+                ),
+            ):
+                return await mgr.get_ip()
+
+        assert asyncio.run(_run()) is None
+
+    def test_returns_none_on_empty_output(self) -> None:
+        async def _run() -> str | None:
+            mgr = _cloned_manager("my-vm")
+            with (
+                patch("mac2nix.vm.manager.shutil.which", return_value="/usr/local/bin/tart"),
+                patch(
+                    "mac2nix.vm.manager.async_run_command",
+                    new=AsyncMock(return_value=(0, "   \n", "")),
+                ),
+            ):
+                return await mgr.get_ip()
+
+        assert asyncio.run(_run()) is None
+
+    def test_returns_none_on_vm_error(self) -> None:
+        async def _run() -> str | None:
+            mgr = _cloned_manager("my-vm")
+            with (
+                patch("mac2nix.vm.manager.shutil.which", return_value="/usr/local/bin/tart"),
+                patch(
+                    "mac2nix.vm.manager.async_run_command",
+                    new=AsyncMock(side_effect=VMError("tart not found")),
+                ),
+            ):
+                return await mgr.get_ip()
+
+        assert asyncio.run(_run()) is None
+
+    def test_returns_none_on_timeout_error(self) -> None:
+        async def _run() -> str | None:
+            mgr = _cloned_manager("my-vm")
+            with (
+                patch("mac2nix.vm.manager.shutil.which", return_value="/usr/local/bin/tart"),
+                patch(
+                    "mac2nix.vm.manager.async_run_command",
+                    new=AsyncMock(side_effect=VMTimeoutError("timed out")),
+                ),
+            ):
+                return await mgr.get_ip()
+
+        assert asyncio.run(_run()) is None
+
+    def test_calls_tart_ip_with_clone_name(self) -> None:
+        captured: list[list[str]] = []
+
+        async def recording_run(cmd: list[str], **_kw) -> tuple[int, str, str]:
+            captured.append(cmd)
+            return (0, "10.0.0.1\n", "")
+
+        async def _run() -> None:
+            mgr = _cloned_manager("my-special-vm")
+            with (
+                patch("mac2nix.vm.manager.shutil.which", return_value="/usr/local/bin/tart"),
+                patch("mac2nix.vm.manager.async_run_command", side_effect=recording_run),
+            ):
+                await mgr.get_ip()
+
+        asyncio.run(_run())
+        assert captured == [["tart", "ip", "my-special-vm"]]
+
+    def test_strips_whitespace_from_ip(self) -> None:
+        async def _run() -> str | None:
+            mgr = _cloned_manager("my-vm")
+            with (
+                patch("mac2nix.vm.manager.shutil.which", return_value="/usr/local/bin/tart"),
+                patch(
+                    "mac2nix.vm.manager.async_run_command",
+                    new=AsyncMock(return_value=(0, "  10.0.0.5  \n", "")),
+                ),
+            ):
+                return await mgr.get_ip()
+
+        assert asyncio.run(_run()) == "10.0.0.5"
+
+    def test_caches_ip_after_first_success(self) -> None:
+        call_count = 0
+
+        async def counting_run(cmd: list[str], **_kw) -> tuple[int, str, str]:
+            nonlocal call_count
+            call_count += 1
+            return (0, "192.168.64.10\n", "")
+
+        async def _run() -> tuple[str | None, str | None]:
+            mgr = _cloned_manager("cache-vm")
+            with (
+                patch("mac2nix.vm.manager.shutil.which", return_value="/usr/local/bin/tart"),
+                patch("mac2nix.vm.manager.async_run_command", side_effect=counting_run),
+            ):
+                first = await mgr.get_ip()
+                second = await mgr.get_ip()
+            return first, second
+
+        first, second = asyncio.run(_run())
+        assert first == "192.168.64.10"
+        assert second == "192.168.64.10"
+        assert call_count == 1  # only one tart ip call, second was cached
+
+    def test_cleanup_clears_cached_ip(self) -> None:
+        async def _run() -> None:
+            mgr = _cloned_manager("cache-vm")
+            with (
+                patch("mac2nix.vm.manager.shutil.which", return_value="/usr/local/bin/tart"),
+                patch(
+                    "mac2nix.vm.manager.async_run_command",
+                    new=AsyncMock(return_value=(0, "192.168.64.10\n", "")),
+                ),
+            ):
+                await mgr.get_ip()  # caches
+                assert mgr._cached_ip == "192.168.64.10"
+                await mgr.cleanup()  # clears cache + clone
+                assert mgr._cached_ip is None
+
+        asyncio.run(_run())
+
+
+# ---------------------------------------------------------------------------
+# exec_command()
+# ---------------------------------------------------------------------------
+
+
+class TestExecCommand:
+    def test_success_returns_true_with_output(self) -> None:
+        async def _run() -> tuple[bool, str, str]:
+            mgr = _cloned_manager("exec-vm")
+            with (
+                patch.object(mgr, "get_ip", new=AsyncMock(return_value="10.0.0.1")),
+                patch(
+                    "mac2nix.vm.manager.async_ssh_exec",
+                    new=AsyncMock(return_value=(True, "hello", "")),
+                ),
+            ):
+                return await mgr.exec_command(["echo", "hello"])
+
+        success, stdout, _ = asyncio.run(_run())
+        assert success is True
+        assert stdout == "hello"
+
+    def test_no_ip_returns_false(self) -> None:
+        async def _run() -> tuple[bool, str, str]:
+            mgr = _cloned_manager("no-ip-vm")
+            with patch.object(mgr, "get_ip", new=AsyncMock(return_value=None)):
+                return await mgr.exec_command(["ls"])
+
+        success, _, stderr = asyncio.run(_run())
+        assert success is False
+        assert "IP" in stderr or "ip" in stderr.lower()
+
+    def test_disconnect_triggers_retry(self) -> None:
+        call_count = 0
+
+        async def _run() -> tuple[bool, str, str]:
+            nonlocal call_count
+            mgr = _cloned_manager("retry-vm")
+
+            async def flaky_ssh(ip, user, pw, cmd, *, timeout):
+                nonlocal call_count
+                call_count += 1
+                if call_count == 1:
+                    return (False, "", "Received disconnect from peer")
+                return (True, "ok", "")
+
+            with (
+                patch.object(mgr, "get_ip", new=AsyncMock(return_value="10.0.0.1")),
+                patch("mac2nix.vm.manager.async_ssh_exec", side_effect=flaky_ssh),
+            ):
+                return await mgr.exec_command(["ls"], timeout=30)
+
+        success, _stdout, _ = asyncio.run(_run())
+        assert success is True
+        assert call_count == 2
+
+    def test_retry_uses_doubled_timeout(self) -> None:
+        timeouts_used: list[int] = []
+
+        async def _run() -> None:
+            mgr = _cloned_manager("timeout-vm")
+
+            async def recording_ssh(ip, user, pw, cmd, *, timeout):
+                timeouts_used.append(timeout)
+                return (False, "", "Connection closed by remote")
+
+            with (
+                patch.object(mgr, "get_ip", new=AsyncMock(return_value="10.0.0.1")),
+                patch("mac2nix.vm.manager.async_ssh_exec", side_effect=recording_ssh),
+            ):
+                await mgr.exec_command(["ls"], timeout=30)
+
+        asyncio.run(_run())
+        assert len(timeouts_used) == 2
+        assert timeouts_used[0] == 30
+        assert timeouts_used[1] == 60  # doubled
+
+    def test_non_disconnect_failure_does_not_retry(self) -> None:
+        call_count = 0
+
+        async def _run() -> tuple[bool, str, str]:
+            nonlocal call_count
+            mgr = _cloned_manager("no-retry-vm")
+
+            async def failing_ssh(ip, user, pw, cmd, *, timeout):
+                nonlocal call_count
+                call_count += 1
+                return (False, "", "permission denied")
+
+            with (
+                patch.object(mgr, "get_ip", new=AsyncMock(return_value="10.0.0.1")),
+                patch("mac2nix.vm.manager.async_ssh_exec", side_effect=failing_ssh),
+            ):
+                return await mgr.exec_command(["ls"])
+
+        asyncio.run(_run())
+        assert call_count == 1
+
+    def test_requires_clone(self) -> None:
+        async def _run() -> None:
+            mgr = _make_manager()
+            await mgr.exec_command(["ls"])
+
+        with pytest.raises(VMError, match="No active VM clone"):
+            asyncio.run(_run())
+
+    def test_ssh_timeout_converted_to_false_result(self) -> None:
+        async def _run() -> tuple[bool, str, str]:
+            mgr = _cloned_manager("timeout-vm")
+            with (
+                patch.object(mgr, "get_ip", new=AsyncMock(return_value="10.0.0.1")),
+                patch(
+                    "mac2nix.vm.manager.async_ssh_exec",
+                    new=AsyncMock(side_effect=VMTimeoutError("timed out")),
+                ),
+            ):
+                return await mgr.exec_command(["long-cmd"], timeout=5)
+
+        success, _, stderr = asyncio.run(_run())
+        assert success is False
+        assert "timed out" in stderr.lower() or "5" in stderr
+
+
+# ---------------------------------------------------------------------------
+# stop()
+# ---------------------------------------------------------------------------
+
+
+class TestStop:
+    def test_success_calls_tart_stop(self) -> None:
+        captured: list[list[str]] = []
+
+        async def recording_run(cmd: list[str], **_kw) -> tuple[int, str, str]:
+            captured.append(cmd)
+            return (0, "", "")
+
+        async def _run() -> None:
+            mgr = _cloned_manager("stop-vm")
+            with (
+                patch("mac2nix.vm.manager.shutil.which", return_value="/usr/local/bin/tart"),
+                patch("mac2nix.vm.manager.async_run_command", side_effect=recording_run),
+            ):
+                await mgr.stop()
+
+        asyncio.run(_run())
+        assert captured == [["tart", "stop", "stop-vm"]]
+
+    def test_nonzero_returncode_raises_vm_error(self) -> None:
+        async def _run() -> None:
+            mgr = _cloned_manager("stop-vm")
+            with (
+                patch("mac2nix.vm.manager.shutil.which", return_value="/usr/local/bin/tart"),
+                patch(
+                    "mac2nix.vm.manager.async_run_command",
+                    new=AsyncMock(return_value=(1, "", "not running")),
+                ),
+            ):
+                await mgr.stop()
+
+        with pytest.raises(VMError, match="tart stop"):
+            asyncio.run(_run())
+
+    def test_requires_clone(self) -> None:
+        async def _run() -> None:
+            mgr = _make_manager()
+            with patch("mac2nix.vm.manager.shutil.which", return_value="/usr/local/bin/tart"):
+                await mgr.stop()
+
+        with pytest.raises(VMError, match="No active VM clone"):
+            asyncio.run(_run())
+
+    def test_tart_not_available_raises_vm_error(self) -> None:
+        async def _run() -> None:
+            mgr = _cloned_manager()
+            with patch("mac2nix.vm.manager.shutil.which", return_value=None):
+                await mgr.stop()
+
+        with pytest.raises(VMError, match="tart CLI is not available"):
+            asyncio.run(_run())
+
+
+# ---------------------------------------------------------------------------
+# delete()
+# ---------------------------------------------------------------------------
+
+
+class TestDelete:
+    def test_success_clears_current_clone(self) -> None:
+        async def _run() -> TartVMManager:
+            mgr = _cloned_manager("del-vm")
+            with (
+                patch("mac2nix.vm.manager.shutil.which", return_value="/usr/local/bin/tart"),
+                patch(
+                    "mac2nix.vm.manager.async_run_command",
+                    new=AsyncMock(return_value=(0, "", "")),
+                ),
+            ):
+                await mgr.delete()
+            return mgr
+
+        mgr = asyncio.run(_run())
+        assert mgr._current_clone is None
+
+    def test_calls_tart_delete_with_clone_name(self) -> None:
+        captured: list[list[str]] = []
+
+        async def recording_run(cmd: list[str], **_kw) -> tuple[int, str, str]:
+            captured.append(cmd)
+            return (0, "", "")
+
+        async def _run() -> None:
+            mgr = _cloned_manager("del-vm")
+            with (
+                patch("mac2nix.vm.manager.shutil.which", return_value="/usr/local/bin/tart"),
+                patch("mac2nix.vm.manager.async_run_command", side_effect=recording_run),
+            ):
+                await mgr.delete()
+
+        asyncio.run(_run())
+        assert captured == [["tart", "delete", "del-vm"]]
+
+    def test_nonzero_returncode_raises_vm_error(self) -> None:
+        async def _run() -> None:
+            mgr = _cloned_manager("del-vm")
+            with (
+                patch("mac2nix.vm.manager.shutil.which", return_value="/usr/local/bin/tart"),
+                patch(
+                    "mac2nix.vm.manager.async_run_command",
+                    new=AsyncMock(return_value=(1, "", "not found")),
+                ),
+            ):
+                await mgr.delete()
+
+        with pytest.raises(VMError, match="tart delete"):
+            asyncio.run(_run())
+
+    def test_failure_does_not_clear_current_clone(self) -> None:
+        async def _run() -> TartVMManager:
+            mgr = _cloned_manager("del-vm")
+            with (
+                patch("mac2nix.vm.manager.shutil.which", return_value="/usr/local/bin/tart"),
+                patch(
+                    "mac2nix.vm.manager.async_run_command",
+                    new=AsyncMock(return_value=(1, "", "error")),
+                ),
+                contextlib.suppress(VMError),
+            ):
+                await mgr.delete()
+            return mgr
+
+        mgr = asyncio.run(_run())
+        assert mgr._current_clone == "del-vm"
+
+
+# ---------------------------------------------------------------------------
+# cleanup()
+# ---------------------------------------------------------------------------
+
+
+class TestCleanup:
+    def test_no_op_when_nothing_to_clean(self) -> None:
+        async def _run() -> None:
+            mgr = _make_manager()
+            await mgr.cleanup()  # Should not raise
+
+        asyncio.run(_run())
+
+    def test_kills_background_process(self) -> None:
+        bg_proc = _make_bg_proc()
+
+        async def _run() -> None:
+            mgr = _make_manager()
+            mgr._vm_process = bg_proc
+            with (
+                patch("mac2nix.vm.manager.shutil.which", return_value="/usr/local/bin/tart"),
+                patch(
+                    "mac2nix.vm.manager.async_run_command",
+                    new=AsyncMock(return_value=(0, "", "")),
+                ),
+            ):
+                await mgr.cleanup()
+
+        asyncio.run(_run())
+        bg_proc.kill.assert_called_once()
+
+    def test_clears_vm_process_after_kill(self) -> None:
+        bg_proc = _make_bg_proc()
+
+        async def _run() -> TartVMManager:
+            mgr = _make_manager()
+            mgr._vm_process = bg_proc
+            await mgr.cleanup()
+            return mgr
+
+        mgr = asyncio.run(_run())
+        assert mgr._vm_process is None
+
+    def test_process_lookup_error_on_kill_is_swallowed(self) -> None:
+        bg_proc = _make_bg_proc()
+        bg_proc.kill.side_effect = ProcessLookupError("already gone")
+
+        async def _run() -> None:
+            mgr = _make_manager()
+            mgr._vm_process = bg_proc
+            await mgr.cleanup()  # Should not raise
+
+        asyncio.run(_run())
+
+    def test_stop_error_is_swallowed(self) -> None:
+        async def _run() -> None:
+            mgr = _cloned_manager("cleanup-vm")
+            with (
+                patch("mac2nix.vm.manager.shutil.which", return_value="/usr/local/bin/tart"),
+                patch(
+                    "mac2nix.vm.manager.async_run_command",
+                    new=AsyncMock(side_effect=VMError("stop failed")),
+                ),
+            ):
+                await mgr.cleanup()  # Should not raise
+
+        asyncio.run(_run())
+
+    def test_delete_called_even_if_stop_fails(self) -> None:
+        captured: list[list[str]] = []
+
+        async def recording_run(cmd: list[str], **_kw) -> tuple[int, str, str]:
+            captured.append(cmd)
+            if cmd[1] == "stop":
+                return (1, "", "stop failed")
+            return (0, "", "")
+
+        async def _run() -> None:
+            mgr = _cloned_manager("cleanup-vm")
+            with (
+                patch("mac2nix.vm.manager.shutil.which", return_value="/usr/local/bin/tart"),
+                patch("mac2nix.vm.manager.async_run_command", side_effect=recording_run),
+            ):
+                await mgr.cleanup()
+
+        asyncio.run(_run())
+        cmds = [c[1] for c in captured]
+        assert "stop" in cmds
+        assert "delete" in cmds
+
+
+# ---------------------------------------------------------------------------
+# Async context manager
+# ---------------------------------------------------------------------------
+
+
+class TestContextManager:
+    def test_aenter_returns_manager(self) -> None:
+        async def _run() -> TartVMManager:
+            mgr = _make_manager()
+            result = await mgr.__aenter__()
+            # cleanup with nothing to do
+            await mgr.cleanup()
+            return result
+
+        result = asyncio.run(_run())
+        assert isinstance(result, TartVMManager)
+
+    def test_aexit_calls_cleanup(self) -> None:
+        cleanup_calls: list[int] = []
+
+        async def _run() -> None:
+            mgr = _make_manager()
+            original_cleanup = mgr.cleanup
+
+            async def tracking_cleanup():
+                cleanup_calls.append(1)
+                await original_cleanup()
+
+            mgr.cleanup = tracking_cleanup  # type: ignore[method-assign]
+            async with mgr:
+                pass
+
+        asyncio.run(_run())
+        assert cleanup_calls == [1]
+
+    def test_aexit_calls_cleanup_even_on_exception(self) -> None:
+        cleanup_calls: list[int] = []
+
+        async def _run() -> None:
+            mgr = _make_manager()
+            original_cleanup = mgr.cleanup
+
+            async def tracking_cleanup():
+                cleanup_calls.append(1)
+                await original_cleanup()
+
+            mgr.cleanup = tracking_cleanup  # type: ignore[method-assign]
+            try:
+                async with mgr:
+                    raise RuntimeError("something went wrong")
+            except RuntimeError:
+                pass
+
+        asyncio.run(_run())
+        assert cleanup_calls == [1]
+
+    def test_context_manager_with_clone(self) -> None:
+        async def _run() -> None:
+            with (
+                patch("mac2nix.vm.manager.shutil.which", return_value="/usr/local/bin/tart"),
+                patch(
+                    "mac2nix.vm.manager.async_run_command",
+                    new=AsyncMock(return_value=(0, "", "")),
+                ),
+            ):
+                async with TartVMManager("base") as vm:
+                    await vm.clone("ctx-clone")
+                    assert vm._current_clone == "ctx-clone"
+                # After exit, cleanup was called (stop + delete ran)
+
+        asyncio.run(_run())
diff --git a/tests/vm/test_validator.py b/tests/vm/test_validator.py
new file mode 100644
index 0000000..017bb20
--- /dev/null
+++ b/tests/vm/test_validator.py
@@ -0,0 +1,653 @@
+"""Tests for vm/validator.py — fidelity scoring, Validator workflow, and Pydantic models."""
+
+from __future__ import annotations
+
+import asyncio
+import json
+from pathlib import Path
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+
+from mac2nix.models.services import ShellConfig
+from mac2nix.models.system import NetworkConfig, SecurityState
+from mac2nix.models.system_state import SystemState
+from mac2nix.vm._utils import VMError
+from mac2nix.vm.validator import (
+    DomainScore,
+    FidelityReport,
+    Mismatch,
+    ValidationResult,
+    Validator,
+    _score_domain,
+    compute_fidelity,
+)
+
+# ---------------------------------------------------------------------------
+# Helpers — minimal SystemState construction
+# ---------------------------------------------------------------------------
+
+
+def _base_state(**overrides) -> SystemState:
+    """Return a minimal all-None-domain SystemState."""
+    defaults = {"hostname": "testhost", "macos_version": "15.0", "architecture": "arm64"}
+    defaults.update(overrides)
+    return SystemState(**defaults)
+
+
+def _state_with_shell(shell_type: str = "fish", path_components: list[str] | None = None) -> SystemState:
+    return _base_state(
+        shell=ShellConfig(
+            shell_type=shell_type,
+            path_components=path_components or ["/usr/bin", "/opt/homebrew/bin"],
+        )
+    )
+
+
+def _state_with_network(dns_servers: list[str] | None = None, wifi_networks: list[str] | None = None) -> SystemState:
+    return _base_state(
+        network=NetworkConfig(
+            dns_servers=dns_servers or ["1.1.1.1", "8.8.8.8"],
+            wifi_networks=wifi_networks or ["HomeNetwork"],
+        )
+    )
+
+
+def _state_with_security(filevault: bool | None = True, sip: bool | None = True) -> SystemState:
+    return _base_state(
+        security=SecurityState(
+            filevault_enabled=filevault,
+            sip_enabled=sip,
+        )
+    )
+
+
+# ---------------------------------------------------------------------------
+# Pydantic model construction
+# ---------------------------------------------------------------------------
+
+
+class TestModels:
+    def test_mismatch_model(self) -> None:
+        m = Mismatch(domain="shell", field="shell.shell_type", source_value="fish", target_value="zsh")
+        assert m.domain == "shell"
+        assert m.field == "shell.shell_type"
+        assert m.source_value == "fish"
+        assert m.target_value == "zsh"
+
+    def test_domain_score_model(self) -> None:
+        ds = DomainScore(domain="shell", score=0.75, total_fields=4, matching_fields=3, mismatches=["shell.env_vars"])
+        assert ds.score == 0.75
+        assert ds.total_fields == 4
+        assert ds.matching_fields == 3
+
+    def test_fidelity_report_model(self) -> None:
+        ds = DomainScore(domain="shell", score=1.0, total_fields=2, matching_fields=2, mismatches=[])
+        report = FidelityReport(overall_score=1.0, domain_scores={"shell": ds}, mismatches=[])
+        assert report.overall_score == 1.0
+        assert "shell" in report.domain_scores
+
+    def test_validation_result_success(self) -> None:
+        ds = DomainScore(domain="shell", score=1.0, total_fields=1, matching_fields=1, mismatches=[])
+        report = FidelityReport(overall_score=1.0, domain_scores={"shell": ds}, mismatches=[])
+        vr = ValidationResult(success=True, fidelity=report, build_output="switched", errors=[])
+        assert vr.success is True
+        assert vr.fidelity is not None
+        assert vr.errors == []
+
+    def test_validation_result_failure(self) -> None:
+        vr = ValidationResult(success=False, fidelity=None, build_output="", errors=["copy_flake failed: no IP"])
+        assert vr.success is False
+        assert vr.fidelity is None
+        assert len(vr.errors) == 1
+
+    def test_models_are_pydantic_serializable(self) -> None:
+        ds = DomainScore(domain="shell", score=0.5, total_fields=2, matching_fields=1, mismatches=["shell.x"])
+        report = FidelityReport(overall_score=0.5, domain_scores={"shell": ds}, mismatches=[])
+        vr = ValidationResult(success=True, fidelity=report, build_output="ok", errors=[])
+        # Should not raise
+        json_str = vr.model_dump_json()
+        parsed = json.loads(json_str)
+        assert parsed["success"] is True
+
+
+# ---------------------------------------------------------------------------
+# _score_domain()
+# ---------------------------------------------------------------------------
+
+
+class TestScoreDomain:
+    def test_identical_objects_score_one(self) -> None:
+        shell = ShellConfig(shell_type="fish", path_components=["/usr/bin"])
+        ds = _score_domain("shell", shell, shell)
+        assert ds.score == 1.0
+        assert ds.mismatches == []
+
+    def test_target_none_scores_zero(self) -> None:
+        shell = ShellConfig(shell_type="fish")
+        ds = _score_domain("shell", shell, None)
+        assert ds.score == 0.0
+        assert ds.matching_fields == 0
+        assert any("missing in target" in m for m in ds.mismatches)
+
+    def test_target_none_reports_domain_name(self) -> None:
+        shell = ShellConfig(shell_type="zsh")
+        ds = _score_domain("shell", shell, None)
+        assert ds.domain == "shell"
+
+    def test_single_field_mismatch(self) -> None:
+        src = ShellConfig(shell_type="fish")
+        tgt = ShellConfig(shell_type="zsh")
+        ds = _score_domain("shell", src, tgt)
+        # shell_type differs → one mismatch
+        assert ds.matching_fields < ds.total_fields
+        assert any("shell_type" in m for m in ds.mismatches)
+
+    def test_single_field_match(self) -> None:
+        src = ShellConfig(shell_type="fish")
+        tgt = ShellConfig(shell_type="fish")
+        ds = _score_domain("shell", src, tgt)
+        assert ds.score == 1.0
+
+    def test_source_none_field_skipped(self) -> None:
+        """Fields that are None in source should not count toward total."""
+        # SecurityState has many optional bool fields; set only filevault_enabled.
+        src = SecurityState(filevault_enabled=True)
+        tgt = SecurityState(filevault_enabled=True)
+        ds = _score_domain("security", src, tgt)
+        # filevault_enabled + 2 default list fields (firewall_app_rules, custom_certificates) = 3
+        assert ds.total_fields == 3
+        assert ds.matching_fields == 3
+        assert ds.score == 1.0
+
+    def test_list_comparison_is_order_independent(self) -> None:
+        src = NetworkConfig(dns_servers=["8.8.8.8", "1.1.1.1"])
+        tgt = NetworkConfig(dns_servers=["1.1.1.1", "8.8.8.8"])
+        ds = _score_domain("network", src, tgt)
+        # dns_servers differ only in order — should match
+        assert "network.dns_servers" not in ds.mismatches
+
+    def test_list_order_independent_mismatch(self) -> None:
+        src = NetworkConfig(dns_servers=["8.8.8.8", "1.1.1.1"])
+        tgt = NetworkConfig(dns_servers=["8.8.8.8", "9.9.9.9"])
+        ds = _score_domain("network", src, tgt)
+        assert any("dns_servers" in m for m in ds.mismatches)
+
+    def test_empty_source_no_non_none_fields_scores_one(self) -> None:
+        """When source has only default list fields, score is 1.0."""
+        src = SecurityState()  # bool fields all None, list fields default to []
+        tgt = SecurityState()
+        ds = _score_domain("security", src, tgt)
+        assert ds.score == 1.0
+        # 2 list fields (firewall_app_rules, custom_certificates) have default [] (non-None)
+        assert ds.total_fields == 2
+
+
+# ---------------------------------------------------------------------------
+# compute_fidelity()
+# ---------------------------------------------------------------------------
+
+
+class TestComputeFidelity:
+    def test_identical_states_score_one(self) -> None:
+        state = _state_with_shell("fish")
+        report = compute_fidelity(state, state)
+        assert report.overall_score == 1.0
+        assert report.mismatches == []
+
+    def test_empty_states_score_one(self) -> None:
+        """Both states with all-None domains → no domains compared → overall 1.0."""
+        src = _base_state()
+        tgt = _base_state()
+        report = compute_fidelity(src, tgt)
+        assert report.overall_score == 1.0
+        assert report.domain_scores == {}
+
+    def test_source_domain_none_skipped(self) -> None:
+        """Domains that are None in source are not scored."""
+        src = _base_state()  # all domains None
+        tgt = _state_with_shell("fish")
+        report = compute_fidelity(src, tgt)
+        # No source domains → no domain_scores
+        assert "shell" not in report.domain_scores
+
+    def test_target_domain_none_scores_zero(self) -> None:
+        """When source has a domain but target does not, that domain scores 0.0."""
+        src = _state_with_shell("fish")
+        tgt = _base_state()  # shell is None in target
+        report = compute_fidelity(src, tgt)
+        assert "shell" in report.domain_scores
+        assert report.domain_scores["shell"].score == 0.0
+
+    def test_partially_matching_state(self) -> None:
+        """Partial match: one domain matches, one doesn't."""
+        src = _base_state(
+            shell=ShellConfig(shell_type="fish"),
+            network=NetworkConfig(dns_servers=["1.1.1.1"]),
+        )
+        tgt = _base_state(
+            shell=ShellConfig(shell_type="fish"),  # matches
+            network=NetworkConfig(dns_servers=["9.9.9.9"]),  # differs
+        )
+        report = compute_fidelity(src, tgt)
+        assert report.domain_scores["shell"].score == 1.0
+        assert report.domain_scores["network"].score < 1.0
+
+    def test_overall_score_is_weighted_average(self) -> None:
+        """Overall score is weighted by total_fields per domain, not simple mean."""
+        # Two domains, one with more fields matching
+        src = _base_state(
+            shell=ShellConfig(shell_type="fish"),
+            security=SecurityState(filevault_enabled=True),
+        )
+        tgt = _base_state(
+            shell=ShellConfig(shell_type="fish"),  # shell matches (1.0)
+            security=SecurityState(filevault_enabled=False),  # security mismatches (0.0)
+        )
+        report = compute_fidelity(src, tgt)
+        assert 0.0 < report.overall_score < 1.0
+
+    def test_mismatches_populated_for_differing_domains(self) -> None:
+        src = _state_with_shell("fish")
+        tgt = _state_with_shell("zsh")
+        report = compute_fidelity(src, tgt)
+        assert len(report.mismatches) > 0
+        mismatch_domains = {m.domain for m in report.mismatches}
+        assert "shell" in mismatch_domains
+
+    def test_mismatches_empty_for_identical_states(self) -> None:
+        state = _state_with_shell("fish")
+        report = compute_fidelity(state, state)
+        assert report.mismatches == []
+
+    def test_meta_fields_not_compared(self) -> None:
+        """hostname, scan_timestamp, macos_version, architecture are not domain fields."""
+        src = _base_state(hostname="machine-a", macos_version="15.0")
+        tgt = _base_state(hostname="machine-b", macos_version="14.0")
+        report = compute_fidelity(src, tgt)
+        # No domain scores since all scanner domains are None
+        assert report.domain_scores == {}
+
+    def test_list_order_independent_in_compute_fidelity(self) -> None:
+        src = _state_with_network(dns_servers=["8.8.8.8", "1.1.1.1"])
+        tgt = _state_with_network(dns_servers=["1.1.1.1", "8.8.8.8"])
+        report = compute_fidelity(src, tgt)
+        # dns_servers same content different order — should not mismatch
+        assert report.domain_scores["network"].score == 1.0
+
+    def test_fidelity_report_has_correct_structure(self) -> None:
+        state = _state_with_shell("fish")
+        report = compute_fidelity(state, state)
+        assert isinstance(report, FidelityReport)
+        assert isinstance(report.overall_score, float)
+        assert isinstance(report.domain_scores, dict)
+        assert isinstance(report.mismatches, list)
+
+    def test_domain_score_matching_fields_le_total(self) -> None:
+        src = _state_with_shell("fish")
+        tgt = _state_with_shell("zsh")
+        report = compute_fidelity(src, tgt)
+        for ds in report.domain_scores.values():
+            assert ds.matching_fields <= ds.total_fields
+
+    def test_overall_score_clamped_between_zero_and_one(self) -> None:
+        src = _state_with_shell("fish")
+        tgt = _base_state()
+        report = compute_fidelity(src, tgt)
+        assert 0.0 <= report.overall_score <= 1.0
+
+    def test_multiple_domains_scored(self) -> None:
+        src = _base_state(
+            shell=ShellConfig(shell_type="fish"),
+            network=NetworkConfig(dns_servers=["1.1.1.1"]),
+            security=SecurityState(filevault_enabled=True),
+        )
+        tgt = _base_state(
+            shell=ShellConfig(shell_type="fish"),
+            network=NetworkConfig(dns_servers=["1.1.1.1"]),
+            security=SecurityState(filevault_enabled=True),
+        )
+        report = compute_fidelity(src, tgt)
+        assert len(report.domain_scores) == 3
+        assert report.overall_score == 1.0
+
+
+# ---------------------------------------------------------------------------
+# Validator._copy_flake_to_vm()  (tested via validate() with mocking)
+# ---------------------------------------------------------------------------
+
+
+def _make_vm(
+    get_ip_result: str | None = "192.168.64.5",
+    exec_result: tuple[bool, str, str] = (True, "", ""),
+) -> MagicMock:
+    vm = MagicMock()
+    vm.get_ip = AsyncMock(return_value=get_ip_result)
+    vm.exec_command = AsyncMock(return_value=exec_result)
+    vm.vm_user = "admin"
+    vm.vm_password = "admin"
+    return vm
+
+
+def _minimal_source_state() -> SystemState:
+    return _base_state(shell=ShellConfig(shell_type="fish"))
+
+
+def _make_json_state(shell_type: str = "fish") -> str:
+    state = _base_state(shell=ShellConfig(shell_type=shell_type))
+    return state.model_dump_json()
+
+
+class TestValidatorCopyFlake:
+    def test_no_ip_raises_vm_error(self) -> None:
+        vm = _make_vm(get_ip_result=None)
+
+        async def _run() -> None:
+            v = Validator(vm)
+            await v._copy_flake_to_vm(Path("/tmp/flake"))
+
+        with pytest.raises(VMError, match="no IP"):
+            asyncio.run(_run())
+
+    def test_mkdir_failure_raises_vm_error(self) -> None:
+        vm = _make_vm(exec_result=(False, "", "permission denied"))
+
+        async def _run() -> None:
+            v = Validator(vm)
+            await v._copy_flake_to_vm(Path("/tmp/flake"))
+
+        with pytest.raises(VMError, match="mkdir"):
+            asyncio.run(_run())
+
+    def test_scp_failure_raises_vm_error(self) -> None:
+        # First exec_command (mkdir) succeeds; scp (async_run_command) fails.
+        vm = _make_vm(exec_result=(True, "", ""))
+
+        async def _run() -> None:
+            v = Validator(vm)
+            with patch(
+                "mac2nix.vm.validator.async_run_command",
+                new=AsyncMock(return_value=(1, "", "permission denied")),
+            ):
+                await v._copy_flake_to_vm(Path("/tmp/flake"))
+
+        with pytest.raises(VMError, match="scp flake"):
+            asyncio.run(_run())
+
+    def test_scp_success_does_not_raise(self) -> None:
+        vm = _make_vm(exec_result=(True, "", ""))
+
+        async def _run() -> None:
+            v = Validator(vm)
+            with patch(
+                "mac2nix.vm.validator.async_run_command",
+                new=AsyncMock(return_value=(0, "", "")),
+            ):
+                await v._copy_flake_to_vm(Path("/tmp/flake"))
+
+        asyncio.run(_run())  # Should not raise
+
+    def test_scp_cmd_contains_sshpass(self) -> None:
+        vm = _make_vm(exec_result=(True, "", ""))
+        captured: list[list[str]] = []
+
+        async def recording_run(cmd: list[str], **_kw) -> tuple[int, str, str]:
+            captured.append(cmd)
+            return (0, "", "")
+
+        async def _run() -> None:
+            v = Validator(vm)
+            with patch("mac2nix.vm.validator.async_run_command", side_effect=recording_run):
+                await v._copy_flake_to_vm(Path("/tmp/flake"))
+
+        asyncio.run(_run())
+        assert captured, "async_run_command not called"
+        cmd = captured[0]
+        assert cmd[0] == "sshpass"
+        assert "scp" in cmd
+
+    def test_scp_cmd_no_shell_true(self) -> None:
+        """SCP must use sshpass -e with SSHPASS env var (not -p password in argv)."""
+        vm = _make_vm(exec_result=(True, "", ""))
+        captured: list[list[str]] = []
+        captured_env: list[dict[str, str] | None] = []
+
+        async def recording_run(cmd: list[str], **kw: object) -> tuple[int, str, str]:
+            captured.append(cmd)
+            captured_env.append(kw.get("env"))  # type: ignore[arg-type]
+            return (0, "", "")
+
+        async def _run() -> None:
+            v = Validator(vm)
+            with patch("mac2nix.vm.validator.async_run_command", side_effect=recording_run):
+                await v._copy_flake_to_vm(Path("/tmp/flake"))
+
+        asyncio.run(_run())
+        cmd = captured[0]
+        # sshpass -e reads password from SSHPASS env var — not in argv
+        assert cmd[0] == "sshpass"
+        assert cmd[1] == "-e"
+        assert "admin" not in cmd  # password must not appear in argv
+        assert captured_env[0] == {"SSHPASS": "admin"}
+
+
+# ---------------------------------------------------------------------------
+# Validator.validate() — full pipeline
+# ---------------------------------------------------------------------------
+
+
+class TestValidatorValidate:
+    def _success_vm(self, _vm_json: str) -> MagicMock:
+        """VM mock where all steps succeed and scan returns _vm_json."""
+        vm = _make_vm()
+        # exec_command always succeeds
+        vm.exec_command = AsyncMock(return_value=(True, "admin", ""))
+
+        # async_run_command: first call = scp flake (success), second = scp result back (success)
+        # We patch async_run_command and from_json separately.
+        return vm
+
+    def test_success_returns_validation_result(self) -> None:
+        vm = _make_vm(exec_result=(True, "admin", ""))
+        source = _minimal_source_state()
+        vm_state = _base_state(shell=ShellConfig(shell_type="fish"))
+
+        async def _run() -> ValidationResult:
+            v = Validator(vm)
+            with (
+                patch("mac2nix.vm.validator.async_run_command", new=AsyncMock(return_value=(0, "", ""))),
+                patch.object(SystemState, "from_json", return_value=vm_state),
+            ):
+                return await v.validate(Path("/tmp/flake"), source)
+
+        result = asyncio.run(_run())
+        assert isinstance(result, ValidationResult)
+        assert result.success is True
+        assert result.fidelity is not None
+        assert result.errors == []
+
+    def test_copy_flake_failure_returns_early(self) -> None:
+        vm = _make_vm(get_ip_result=None)  # No IP → copy_flake fails
+        source = _minimal_source_state()
+
+        async def _run() -> ValidationResult:
+            v = Validator(vm)
+            return await v.validate(Path("/tmp/flake"), source)
+
+        result = asyncio.run(_run())
+        assert result.success is False
+        assert any("copy_flake" in e for e in result.errors)
+        assert result.fidelity is None
+
+    def test_bootstrap_failure_returns_early(self) -> None:
+        # mkdir succeeds, then first exec_command call in bootstrap fails
+        call_count = 0
+
+        async def exec_side_effect(cmd, **_kw):
+            nonlocal call_count
+            call_count += 1
+            if call_count == 1:
+                return (True, "", "")  # mkdir in copy_flake
+            return (False, "", "curl: not found")  # download in bootstrap
+
+        vm = _make_vm()
+        vm.exec_command = AsyncMock(side_effect=exec_side_effect)
+        source = _minimal_source_state()
+
+        async def _run() -> ValidationResult:
+            v = Validator(vm)
+            with patch("mac2nix.vm.validator.async_run_command", new=AsyncMock(return_value=(0, "", ""))):
+                return await v.validate(Path("/tmp/flake"), source)
+
+        result = asyncio.run(_run())
+        assert result.success is False
+        assert any("bootstrap" in e for e in result.errors)
+
+    def test_rebuild_failure_returns_early(self) -> None:
+        # All bootstrap steps succeed, darwin-rebuild switch fails
+        call_count = 0
+
+        async def exec_side_effect(cmd, **_kw):
+            nonlocal call_count
+            call_count += 1
+            # mkdir(1) + curl(2) + chmod(3) + installer(4) + nix-darwin bootstrap(5) succeed
+            # darwin-rebuild switch (6th call) fails
+            if call_count <= 5:
+                return (True, "admin", "")
+            return (False, "", "nix-darwin build error")
+
+        vm = _make_vm()
+        vm.exec_command = AsyncMock(side_effect=exec_side_effect)
+        source = _minimal_source_state()
+
+        async def _run() -> ValidationResult:
+            v = Validator(vm)
+            with patch("mac2nix.vm.validator.async_run_command", new=AsyncMock(return_value=(0, "", ""))):
+                return await v.validate(Path("/tmp/flake"), source)
+
+        result = asyncio.run(_run())
+        assert result.success is False
+        assert any("nix-darwin" in e or "darwin-rebuild" in e for e in result.errors)
+
+    def test_scan_failure_returns_early(self) -> None:
+        # All VM exec_commands succeed, but mac2nix scan fails
+        call_count = 0
+
+        async def exec_side_effect(cmd, **_kw):
+            nonlocal call_count
+            call_count += 1
+            # mkdir + curl + chmod + installer + nix-darwin + darwin-rebuild all succeed
+            if call_count <= 6:
+                return (True, "admin", "")
+            # mac2nix scan fails
+            return (False, "", "scan failed")
+
+        vm = _make_vm()
+        vm.exec_command = AsyncMock(side_effect=exec_side_effect)
+        source = _minimal_source_state()
+
+        async def _run() -> ValidationResult:
+            v = Validator(vm)
+            with patch("mac2nix.vm.validator.async_run_command", new=AsyncMock(return_value=(0, "", ""))):
+                return await v.validate(Path("/tmp/flake"), source)
+
+        result = asyncio.run(_run())
+        assert result.success is False
+        assert any("scan" in e for e in result.errors)
+
+    def test_scan_parse_failure_returns_error(self) -> None:
+        """If SCP back succeeds but JSON parse fails, validate returns failure."""
+        vm = _make_vm(exec_result=(True, "admin", ""))
+        source = _minimal_source_state()
+
+        async def _run() -> ValidationResult:
+            v = Validator(vm)
+            with (
+                patch("mac2nix.vm.validator.async_run_command", new=AsyncMock(return_value=(0, "", ""))),
+                patch.object(SystemState, "from_json", side_effect=ValueError("bad json")),
+            ):
+                return await v.validate(Path("/tmp/flake"), source)
+
+        result = asyncio.run(_run())
+        assert result.success is False
+        assert any("scan" in e for e in result.errors)
+
+    def test_success_build_output_captured(self) -> None:
+        vm = _make_vm()
+        vm.exec_command = AsyncMock(return_value=(True, "build output text", ""))
+        source = _minimal_source_state()
+        vm_state = _base_state(shell=ShellConfig(shell_type="fish"))
+
+        async def _run() -> ValidationResult:
+            v = Validator(vm)
+            with (
+                patch("mac2nix.vm.validator.async_run_command", new=AsyncMock(return_value=(0, "", ""))),
+                patch.object(SystemState, "from_json", return_value=vm_state),
+            ):
+                return await v.validate(Path("/tmp/flake"), source)
+
+        result = asyncio.run(_run())
+        assert result.success is True
+        # build_output is the combined stdout+stderr from darwin-rebuild
+        assert isinstance(result.build_output, str)
+
+    def test_success_fidelity_report_populated(self) -> None:
+        vm = _make_vm(exec_result=(True, "admin", ""))
+        source = _base_state(shell=ShellConfig(shell_type="fish"))
+        vm_state = _base_state(shell=ShellConfig(shell_type="fish"))
+
+        async def _run() -> ValidationResult:
+            v = Validator(vm)
+            with (
+                patch("mac2nix.vm.validator.async_run_command", new=AsyncMock(return_value=(0, "", ""))),
+                patch.object(SystemState, "from_json", return_value=vm_state),
+            ):
+                return await v.validate(Path("/tmp/flake"), source)
+
+        result = asyncio.run(_run())
+        assert result.fidelity is not None
+        assert isinstance(result.fidelity, FidelityReport)
+        assert result.fidelity.overall_score == 1.0
+
+    def test_fidelity_reflects_vm_state_difference(self) -> None:
+        """Fidelity < 1.0 when VM state differs from source."""
+        vm = _make_vm(exec_result=(True, "admin", ""))
+        source = _base_state(shell=ShellConfig(shell_type="fish"))
+        vm_state = _base_state(shell=ShellConfig(shell_type="zsh"))  # different
+
+        async def _run() -> ValidationResult:
+            v = Validator(vm)
+            with (
+                patch("mac2nix.vm.validator.async_run_command", new=AsyncMock(return_value=(0, "", ""))),
+                patch.object(SystemState, "from_json", return_value=vm_state),
+            ):
+                return await v.validate(Path("/tmp/flake"), source)
+
+        result = asyncio.run(_run())
+        assert result.success is True
+        assert result.fidelity is not None
+        assert result.fidelity.overall_score < 1.0
+
+    def test_scp_result_back_no_ip_returns_scan_error(self) -> None:
+        """If VM has no IP when SCPing result back, scan fails gracefully."""
+        get_ip_calls = 0
+        vm = _make_vm()
+
+        async def get_ip_side_effect():
+            nonlocal get_ip_calls
+            get_ip_calls += 1
+            # First call (in copy_flake) succeeds; subsequent calls fail
+            return "192.168.64.5" if get_ip_calls <= 2 else None
+
+        vm.get_ip = AsyncMock(side_effect=get_ip_side_effect)
+        vm.exec_command = AsyncMock(return_value=(True, "admin", ""))
+        source = _minimal_source_state()
+
+        async def _run() -> ValidationResult:
+            v = Validator(vm)
+            with patch("mac2nix.vm.validator.async_run_command", new=AsyncMock(return_value=(0, "", ""))):
+                return await v.validate(Path("/tmp/flake"), source)
+
+        result = asyncio.run(_run())
+        assert result.success is False
+        assert any("scan" in e for e in result.errors)
diff --git a/tests/vm/test_vm_utils.py b/tests/vm/test_vm_utils.py
new file mode 100644
index 0000000..4a74c50
--- /dev/null
+++ b/tests/vm/test_vm_utils.py
@@ -0,0 +1,480 @@
+"""Tests for vm/_utils.py — async subprocess helpers and exception hierarchy."""
+
+from __future__ import annotations
+
+import asyncio
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+
+from mac2nix.vm._utils import (
+    VMConnectionError,
+    VMError,
+    VMTimeoutError,
+    async_run_command,
+    async_ssh_exec,
+    is_sshpass_available,
+    is_tart_available,
+)
+
+# ---------------------------------------------------------------------------
+# Exception hierarchy
+# ---------------------------------------------------------------------------
+
+
+class TestExceptionHierarchy:
+    def test_vm_error_is_exception(self) -> None:
+        assert issubclass(VMError, Exception)
+
+    def test_vm_connection_error_is_vm_error(self) -> None:
+        assert issubclass(VMConnectionError, VMError)
+
+    def test_vm_timeout_error_is_vm_error(self) -> None:
+        assert issubclass(VMTimeoutError, VMError)
+
+    def test_vm_connection_error_can_be_caught_as_vm_error(self) -> None:
+        with pytest.raises(VMError):
+            raise VMConnectionError("connection failed")
+
+    def test_vm_timeout_error_can_be_caught_as_vm_error(self) -> None:
+        with pytest.raises(VMError):
+            raise VMTimeoutError("timed out")
+
+    def test_vm_error_message(self) -> None:
+        exc = VMError("something went wrong")
+        assert str(exc) == "something went wrong"
+
+
+# ---------------------------------------------------------------------------
+# Tool availability
+# ---------------------------------------------------------------------------
+
+
+class TestIsToolAvailable:
+    def test_tart_available_when_on_path(self) -> None:
+        with patch("mac2nix.vm._utils.shutil.which", return_value="/usr/local/bin/tart"):
+            assert is_tart_available() is True
+
+    def test_tart_unavailable_when_not_on_path(self) -> None:
+        with patch("mac2nix.vm._utils.shutil.which", return_value=None):
+            assert is_tart_available() is False
+
+    def test_sshpass_available_when_on_path(self) -> None:
+        with patch("mac2nix.vm._utils.shutil.which", return_value="/usr/bin/sshpass"):
+            assert is_sshpass_available() is True
+
+    def test_sshpass_unavailable_when_not_on_path(self) -> None:
+        with patch("mac2nix.vm._utils.shutil.which", return_value=None):
+            assert is_sshpass_available() is False
+
+    def test_tart_checks_tart_binary(self) -> None:
+        calls: list[str] = []
+
+        def recording_which(name: str) -> str | None:
+            calls.append(name)
+            return "/usr/local/bin/tart"
+
+        with patch("mac2nix.vm._utils.shutil.which", side_effect=recording_which):
+            is_tart_available()
+
+        assert calls == ["tart"]
+
+    def test_sshpass_checks_sshpass_binary(self) -> None:
+        calls: list[str] = []
+
+        def recording_which(name: str) -> str | None:
+            calls.append(name)
+            return "/usr/bin/sshpass"
+
+        with patch("mac2nix.vm._utils.shutil.which", side_effect=recording_which):
+            is_sshpass_available()
+
+        assert calls == ["sshpass"]
+
+
+# ---------------------------------------------------------------------------
+# async_run_command
+# ---------------------------------------------------------------------------
+
+
+def _make_proc(returncode: int = 0, stdout: bytes = b"", stderr: bytes = b"") -> MagicMock:
+    """Build a mock asyncio.subprocess.Process-like object."""
+    proc = MagicMock()
+    proc.returncode = returncode
+    proc.communicate = AsyncMock(return_value=(stdout, stderr))
+    proc.kill = MagicMock()
+    return proc
+
+
+class TestAsyncRunCommand:
+    def test_success_returns_tuple(self) -> None:
+        proc = _make_proc(returncode=0, stdout=b"hello\n", stderr=b"")
+
+        async def _run() -> tuple[int, str, str]:
+            with (
+                patch("mac2nix.vm._utils.shutil.which", return_value="/usr/bin/echo"),
+                patch("mac2nix.vm._utils.asyncio.create_subprocess_exec", new=AsyncMock(return_value=proc)),
+            ):
+                return await async_run_command(["echo", "hello"])
+
+        rc, stdout, stderr = asyncio.run(_run())
+        assert rc == 0
+        assert stdout == "hello\n"
+        assert stderr == ""
+
+    def test_nonzero_returncode_returned_not_raised(self) -> None:
+        proc = _make_proc(returncode=1, stdout=b"", stderr=b"error")
+
+        async def _run() -> tuple[int, str, str]:
+            with (
+                patch("mac2nix.vm._utils.shutil.which", return_value="/usr/bin/false"),
+                patch("mac2nix.vm._utils.asyncio.create_subprocess_exec", new=AsyncMock(return_value=proc)),
+            ):
+                return await async_run_command(["false"])
+
+        rc, _stdout, stderr = asyncio.run(_run())
+        assert rc == 1
+        assert stderr == "error"
+
+    def test_executable_not_found_raises_vm_error(self) -> None:
+        async def _run() -> None:
+            with patch("mac2nix.vm._utils.shutil.which", return_value=None):
+                await async_run_command(["nonexistent-tool"])
+
+        with pytest.raises(VMError, match="Executable not found"):
+            asyncio.run(_run())
+
+    def test_executable_not_found_is_not_timeout_error(self) -> None:
+        async def _run() -> None:
+            with patch("mac2nix.vm._utils.shutil.which", return_value=None):
+                await async_run_command(["nonexistent-tool"])
+
+        with pytest.raises(VMError) as exc_info:
+            asyncio.run(_run())
+        assert not isinstance(exc_info.value, VMTimeoutError)
+
+    def test_timeout_raises_vm_timeout_error(self) -> None:
+        proc = MagicMock()
+        proc.returncode = None
+        proc.kill = MagicMock()
+        proc.communicate = AsyncMock(side_effect=[TimeoutError(), (b"", b"")])
+
+        async def _run() -> None:
+            with (
+                patch("mac2nix.vm._utils.shutil.which", return_value="/usr/bin/sleep"),
+                patch("mac2nix.vm._utils.asyncio.create_subprocess_exec", new=AsyncMock(return_value=proc)),
+                patch("mac2nix.vm._utils.asyncio.wait_for", side_effect=TimeoutError),
+            ):
+                await async_run_command(["sleep", "999"], timeout=1)
+
+        with pytest.raises(VMTimeoutError, match="timed out"):
+            asyncio.run(_run())
+
+    def test_timeout_kills_process(self) -> None:
+        proc = MagicMock()
+        proc.returncode = None
+        proc.kill = MagicMock()
+        # Second communicate() call (after kill) returns empty bytes
+        proc.communicate = AsyncMock(return_value=(b"", b""))
+
+        async def _run() -> None:
+            with (
+                patch("mac2nix.vm._utils.shutil.which", return_value="/usr/bin/sleep"),
+                patch("mac2nix.vm._utils.asyncio.create_subprocess_exec", new=AsyncMock(return_value=proc)),
+                patch("mac2nix.vm._utils.asyncio.wait_for", side_effect=TimeoutError),
+            ):
+                await async_run_command(["sleep", "999"], timeout=1)
+
+        with pytest.raises(VMTimeoutError):
+            asyncio.run(_run())
+
+        proc.kill.assert_called_once()
+
+    def test_file_not_found_during_exec_raises_vm_error(self) -> None:
+        async def _run() -> None:
+            with (
+                patch("mac2nix.vm._utils.shutil.which", return_value="/usr/bin/gone"),
+                patch(
+                    "mac2nix.vm._utils.asyncio.create_subprocess_exec",
+                    new=AsyncMock(side_effect=FileNotFoundError("gone")),
+                ),
+            ):
+                await async_run_command(["gone"])
+
+        with pytest.raises(VMError, match="Executable not found during execution"):
+            asyncio.run(_run())
+
+    def test_os_error_raises_vm_error(self) -> None:
+        async def _run() -> None:
+            with (
+                patch("mac2nix.vm._utils.shutil.which", return_value="/usr/bin/badcmd"),
+                patch(
+                    "mac2nix.vm._utils.asyncio.create_subprocess_exec",
+                    new=AsyncMock(side_effect=OSError("permission denied")),
+                ),
+            ):
+                await async_run_command(["badcmd"])
+
+        with pytest.raises(VMError, match="OS error"):
+            asyncio.run(_run())
+
+    def test_stdout_decoded_from_bytes(self) -> None:
+        proc = _make_proc(returncode=0, stdout="output with unicode \u00e9".encode(), stderr=b"")
+
+        async def _run() -> tuple[int, str, str]:
+            with (
+                patch("mac2nix.vm._utils.shutil.which", return_value="/usr/bin/cmd"),
+                patch("mac2nix.vm._utils.asyncio.create_subprocess_exec", new=AsyncMock(return_value=proc)),
+            ):
+                return await async_run_command(["cmd"])
+
+        _, stdout, _ = asyncio.run(_run())
+        assert "output with unicode" in stdout
+
+    def test_uses_no_shell(self) -> None:
+        """Verify create_subprocess_exec is called without shell=True."""
+        proc = _make_proc(returncode=0)
+        captured_kwargs: list[dict] = []
+
+        async def fake_exec(*args, **kwargs):
+            captured_kwargs.append(kwargs)
+            return proc
+
+        async def _run() -> None:
+            with (
+                patch("mac2nix.vm._utils.shutil.which", return_value="/usr/bin/cmd"),
+                patch("mac2nix.vm._utils.asyncio.create_subprocess_exec", new=AsyncMock(side_effect=fake_exec)),
+            ):
+                await async_run_command(["cmd", "arg1"])
+
+        asyncio.run(_run())
+        assert captured_kwargs, "create_subprocess_exec was not called"
+        assert "shell" not in captured_kwargs[0] or captured_kwargs[0].get("shell") is not True
+
+
+# ---------------------------------------------------------------------------
+# async_ssh_exec
+# ---------------------------------------------------------------------------
+
+
+class TestAsyncSshExec:
+    def test_raises_vm_connection_error_when_sshpass_missing(self) -> None:
+        async def _run() -> None:
+            with patch("mac2nix.vm._utils.is_sshpass_available", return_value=False):
+                await async_ssh_exec("192.168.1.1", "admin", "pass", ["ls"])
+
+        with pytest.raises(VMConnectionError, match="sshpass is not available"):
+            asyncio.run(_run())
+
+    def test_success_returns_true_on_zero_returncode(self) -> None:
+        async def _run() -> tuple[bool, str, str]:
+            with (
+                patch("mac2nix.vm._utils.is_sshpass_available", return_value=True),
+                patch(
+                    "mac2nix.vm._utils.async_run_command",
+                    new=AsyncMock(return_value=(0, "output", "")),
+                ),
+            ):
+                return await async_ssh_exec("10.0.0.1", "user", "secret", ["ls", "/"])
+
+        success, stdout, _stderr = asyncio.run(_run())
+        assert success is True
+        assert stdout == "output"
+
+    def test_nonzero_returncode_returns_false(self) -> None:
+        async def _run() -> tuple[bool, str, str]:
+            with (
+                patch("mac2nix.vm._utils.is_sshpass_available", return_value=True),
+                patch(
+                    "mac2nix.vm._utils.async_run_command",
+                    new=AsyncMock(return_value=(1, "", "command not found")),
+                ),
+            ):
+                return await async_ssh_exec("10.0.0.1", "user", "secret", ["bad-cmd"])
+
+        success, _stdout, stderr = asyncio.run(_run())
+        assert success is False
+        assert stderr == "command not found"
+
+    def test_ssh_argument_list_construction(self) -> None:
+        """Verify the exact SSH argument list passed to async_run_command."""
+        captured_cmd: list[list[str]] = []
+        captured_env: list[dict[str, str] | None] = []
+
+        async def capturing_run(
+            cmd: list[str], *, timeout: int = 30, env: dict[str, str] | None = None
+        ) -> tuple[int, str, str]:
+            captured_cmd.append(cmd)
+            captured_env.append(env)
+            return (0, "", "")
+
+        async def _run() -> None:
+            with (
+                patch("mac2nix.vm._utils.is_sshpass_available", return_value=True),
+                patch("mac2nix.vm._utils.async_run_command", side_effect=capturing_run),
+            ):
+                await async_ssh_exec("192.168.64.10", "admin", "mypassword", ["uname", "-a"], timeout=30)
+
+        asyncio.run(_run())
+        assert captured_cmd, "async_run_command was not called"
+        cmd = captured_cmd[0]
+
+        expected_prefix = [
+            "sshpass",
+            "-e",
+            "ssh",
+            "-o",
+            "StrictHostKeyChecking=no",
+            "-o",
+            "UserKnownHostsFile=/dev/null",
+            "-o",
+            "LogLevel=ERROR",
+            "-o",
+            "ConnectTimeout=15",  # max(30 // 2, 5) — SSH timeout is half the process timeout
+            "admin@192.168.64.10",
+            "--",
+        ]
+        assert cmd[: len(expected_prefix)] == expected_prefix
+        # Remote command appended after --
+        assert cmd[len(expected_prefix) :] == ["uname", "-a"]
+        # Password passed via env, not argv
+        assert captured_env[0] == {"SSHPASS": "mypassword"}
+
+    def test_connect_timeout_embedded_in_arg_list(self) -> None:
+        """ConnectTimeout is half the process timeout (min 5s) to avoid race."""
+        captured_cmd: list[list[str]] = []
+
+        async def capturing_run(
+            cmd: list[str], *, timeout: int = 30, env: dict[str, str] | None = None
+        ) -> tuple[int, str, str]:
+            captured_cmd.append(cmd)
+            return (0, "", "")
+
+        async def _run() -> None:
+            with (
+                patch("mac2nix.vm._utils.is_sshpass_available", return_value=True),
+                patch("mac2nix.vm._utils.async_run_command", side_effect=capturing_run),
+            ):
+                await async_ssh_exec("10.0.0.1", "admin", "pw", ["true"], timeout=60)
+
+        asyncio.run(_run())
+        cmd = captured_cmd[0]
+        assert "ConnectTimeout=30" in cmd  # max(60 // 2, 5) = 30
+
+    def test_password_not_shell_joined(self) -> None:
+        """Password must be passed via SSHPASS env var, not as a command-line argument."""
+        captured_cmd: list[list[str]] = []
+        captured_env: list[dict[str, str] | None] = []
+
+        async def capturing_run(
+            cmd: list[str], *, timeout: int = 30, env: dict[str, str] | None = None
+        ) -> tuple[int, str, str]:
+            captured_cmd.append(cmd)
+            captured_env.append(env)
+            return (0, "", "")
+
+        async def _run() -> None:
+            with (
+                patch("mac2nix.vm._utils.is_sshpass_available", return_value=True),
+                patch("mac2nix.vm._utils.async_run_command", side_effect=capturing_run),
+            ):
+                await async_ssh_exec("10.0.0.1", "u", "p@$$w0rd!", ["id"])
+
+        asyncio.run(_run())
+        cmd = captured_cmd[0]
+        # sshpass -e (reads from env) — password must NOT appear in argv
+        assert cmd[0] == "sshpass"
+        assert cmd[1] == "-e"
+        assert "p@$$w0rd!" not in cmd
+        # Password passed safely via SSHPASS env var
+        assert captured_env[0] == {"SSHPASS": "p@$$w0rd!"}
+
+    def test_vm_error_from_run_command_becomes_connection_error(self) -> None:
+        async def _run() -> None:
+            with (
+                patch("mac2nix.vm._utils.is_sshpass_available", return_value=True),
+                patch(
+                    "mac2nix.vm._utils.async_run_command",
+                    new=AsyncMock(side_effect=VMError("exec failed")),
+                ),
+            ):
+                await async_ssh_exec("10.0.0.1", "u", "p", ["ls"])
+
+        with pytest.raises(VMConnectionError, match="SSH connection"):
+            asyncio.run(_run())
+
+    def test_timeout_error_propagated_as_vm_timeout_error(self) -> None:
+        async def _run() -> None:
+            with (
+                patch("mac2nix.vm._utils.is_sshpass_available", return_value=True),
+                patch(
+                    "mac2nix.vm._utils.async_run_command",
+                    new=AsyncMock(side_effect=VMTimeoutError("timed out")),
+                ),
+            ):
+                await async_ssh_exec("10.0.0.1", "u", "p", ["sleep", "100"])
+
+        with pytest.raises(VMTimeoutError):
+            asyncio.run(_run())
+
+    def test_timeout_not_wrapped_in_connection_error(self) -> None:
+        """VMTimeoutError must not be caught and re-raised as VMConnectionError."""
+
+        async def _run() -> None:
+            with (
+                patch("mac2nix.vm._utils.is_sshpass_available", return_value=True),
+                patch(
+                    "mac2nix.vm._utils.async_run_command",
+                    new=AsyncMock(side_effect=VMTimeoutError("timed out")),
+                ),
+            ):
+                await async_ssh_exec("10.0.0.1", "u", "p", ["sleep", "100"])
+
+        with pytest.raises(VMTimeoutError) as exc_info:
+            asyncio.run(_run())
+        assert not isinstance(exc_info.value, VMConnectionError)
+
+    def test_user_at_ip_format_in_arg_list(self) -> None:
+        """SSH target must be formatted as user@ip as a single argument."""
+        captured_cmd: list[list[str]] = []
+
+        async def capturing_run(
+            cmd: list[str], *, timeout: int = 30, env: dict[str, str] | None = None
+        ) -> tuple[int, str, str]:
+            captured_cmd.append(cmd)
+            return (0, "", "")
+
+        async def _run() -> None:
+            with (
+                patch("mac2nix.vm._utils.is_sshpass_available", return_value=True),
+                patch("mac2nix.vm._utils.async_run_command", side_effect=capturing_run),
+            ):
+                await async_ssh_exec("192.168.64.5", "testuser", "pw", ["echo", "hi"])
+
+        asyncio.run(_run())
+        cmd = captured_cmd[0]
+        assert "testuser@192.168.64.5" in cmd
+
+    def test_double_dash_separator_present(self) -> None:
+        """-- must appear in the argument list to separate SSH options from remote cmd."""
+        captured_cmd: list[list[str]] = []
+
+        async def capturing_run(
+            cmd: list[str], *, timeout: int = 30, env: dict[str, str] | None = None
+        ) -> tuple[int, str, str]:
+            captured_cmd.append(cmd)
+            return (0, "", "")
+
+        async def _run() -> None:
+            with (
+                patch("mac2nix.vm._utils.is_sshpass_available", return_value=True),
+                patch("mac2nix.vm._utils.async_run_command", side_effect=capturing_run),
+            ):
+                await async_ssh_exec("10.0.0.1", "u", "p", ["ls", "-la"])
+
+        asyncio.run(_run())
+        cmd = captured_cmd[0]
+        assert "--" in cmd
+        # -- must appear before the remote command
+        dash_idx = cmd.index("--")
+        assert cmd[dash_idx + 1 :] == ["ls", "-la"]

From 2ff46a080516f2377fba87a685349bb009408690 Mon Sep 17 00:00:00 2001
From: testvalue <wgordon@redhat.com>
Date: Wed, 18 Mar 2026 19:15:30 -0400
Subject: [PATCH 2/2] refactor(vm): removes unused Mismatch fields, documents
 SSH

---
 src/mac2nix/vm/_utils.py    |  3 +++
 src/mac2nix/vm/validator.py | 11 +----------
 tests/vm/test_validator.py  |  4 +---
 3 files changed, 5 insertions(+), 13 deletions(-)

diff --git a/src/mac2nix/vm/_utils.py b/src/mac2nix/vm/_utils.py
index 576be4f..cd0498c 100644
--- a/src/mac2nix/vm/_utils.py
+++ b/src/mac2nix/vm/_utils.py
@@ -138,6 +138,9 @@ async def async_ssh_exec(
 
     # Use sshpass -e (reads password from SSHPASS env var) to avoid exposing
     # the password in process listings (ps aux shows argv, not environment).
+    # StrictHostKeyChecking=no and UserKnownHostsFile=/dev/null: safe because
+    # target VMs are ephemeral Tart clones on localhost — no persistent host
+    # identity to verify, and the IP/key changes on every clone.
     ssh_cmd = [
         "sshpass",
         "-e",
diff --git a/src/mac2nix/vm/validator.py b/src/mac2nix/vm/validator.py
index 258c749..cea33ad 100644
--- a/src/mac2nix/vm/validator.py
+++ b/src/mac2nix/vm/validator.py
@@ -23,8 +23,6 @@
 class Mismatch(BaseModel):
     domain: str
     field: str
-    source_value: Any
-    target_value: Any
 
 
 class DomainScore(BaseModel):
@@ -143,14 +141,7 @@ def compute_fidelity(source: SystemState, target: SystemState) -> FidelityReport
         for mismatch_desc in ds.mismatches:
             # mismatch_desc is a full dot-path like "domain.field" or "domain.sub.field".
             # Store the full path — nested paths make getattr lookups fragile.
-            all_mismatches.append(
-                Mismatch(
-                    domain=field_name,
-                    field=mismatch_desc,
-                    source_value=None,
-                    target_value=None,
-                )
-            )
+            all_mismatches.append(Mismatch(domain=field_name, field=mismatch_desc))
 
     # Overall score: weighted by total_fields per domain.
     total_weight = sum(ds.total_fields for ds in domain_scores.values())
diff --git a/tests/vm/test_validator.py b/tests/vm/test_validator.py
index 017bb20..3e02ce8 100644
--- a/tests/vm/test_validator.py
+++ b/tests/vm/test_validator.py
@@ -69,11 +69,9 @@ def _state_with_security(filevault: bool | None = True, sip: bool | None = True)
 
 class TestModels:
     def test_mismatch_model(self) -> None:
-        m = Mismatch(domain="shell", field="shell.shell_type", source_value="fish", target_value="zsh")
+        m = Mismatch(domain="shell", field="shell.shell_type")
         assert m.domain == "shell"
         assert m.field == "shell.shell_type"
-        assert m.source_value == "fish"
-        assert m.target_value == "zsh"
 
     def test_domain_score_model(self) -> None:
         ds = DomainScore(domain="shell", score=0.75, total_fields=4, matching_fields=3, mismatches=["shell.env_vars"])