From ad1f6b08d1edba4b1fe87c62970eb40c9c94edcc Mon Sep 17 00:00:00 2001 From: testvalue Date: Sat, 14 Mar 2026 11:36:58 -0400 Subject: [PATCH 1/3] feat(cli): adds scan command with orchestrator --- pyproject.toml | 1 + src/mac2nix/cli.py | 87 +++- src/mac2nix/orchestrator.py | 219 ++++++++++ src/mac2nix/scanners/_utils.py | 47 +++ src/mac2nix/scanners/audio.py | 31 +- src/mac2nix/scanners/cron.py | 15 +- src/mac2nix/scanners/display.py | 32 +- src/mac2nix/scanners/launch_agents.py | 40 +- src/mac2nix/scanners/system_scanner.py | 21 +- tests/scanners/test_prefetch_injection.py | 488 ++++++++++++++++++++++ tests/test_cli.py | 268 ++++++++++++ tests/test_orchestrator.py | 442 ++++++++++++++++++++ uv.lock | 36 ++ 13 files changed, 1684 insertions(+), 43 deletions(-) create mode 100644 src/mac2nix/orchestrator.py create mode 100644 tests/scanners/test_prefetch_injection.py create mode 100644 tests/test_cli.py create mode 100644 tests/test_orchestrator.py diff --git a/pyproject.toml b/pyproject.toml index 728a161..51b5ce6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -10,6 +10,7 @@ dependencies = [ "pydantic>=2.0", "jinja2>=3.1", "pyyaml>=6.0", + "rich>=13.0", ] [project.scripts] diff --git a/src/mac2nix/cli.py b/src/mac2nix/cli.py index 2b02c67..7276f22 100644 --- a/src/mac2nix/cli.py +++ b/src/mac2nix/cli.py @@ -1,6 +1,17 @@ """mac2nix CLI.""" +from __future__ import annotations + +import asyncio +import time +from pathlib import Path + import click +from rich.console import Console +from rich.progress import BarColumn, MofNCompleteColumn, Progress, SpinnerColumn, TextColumn, TimeElapsedColumn + +from mac2nix.orchestrator import run_scan +from mac2nix.scanners import get_all_scanners @click.group() @@ -10,9 +21,81 @@ def main() -> None: @main.command() -def scan() -> None: +@click.option( + "--output", + "-o", + type=click.Path(path_type=Path), + default=None, + help="Write JSON output to FILE instead of stdout.", + metavar="FILE", +) +@click.option( + "--scanner", + "-s", + "selected_scanners", + multiple=True, + help="Run only this scanner (repeatable). Defaults to all scanners.", + metavar="NAME", +) +def scan(output: Path | None, selected_scanners: tuple[str, ...]) -> None: """Scan the current macOS system state.""" - click.echo("scan: not yet implemented") + all_names = list(get_all_scanners().keys()) + scanners: list[str] | None = list(selected_scanners) if selected_scanners else None + + # Validate any explicitly requested scanner names + if scanners is not None: + unknown = [s for s in scanners if s not in all_names] + if unknown: + available = ", ".join(sorted(all_names)) + raise click.UsageError(f"Unknown scanner(s): {', '.join(unknown)}. Available: {available}") + + total = len(scanners) if scanners is not None else len(all_names) + + completed: int = 0 + start = time.monotonic() + + with Progress( + SpinnerColumn(), + TextColumn("[progress.description]{task.description}"), + BarColumn(), + MofNCompleteColumn(), + TimeElapsedColumn(), + console=Console(stderr=True), + transient=True, + redirect_stdout=False, + redirect_stderr=False, + ) as progress: + task_id = progress.add_task("Scanning...", total=total) + + def progress_callback(name: str) -> None: + nonlocal completed + completed += 1 + progress.advance(task_id) + progress.update(task_id, description=f"[bold cyan]{name}[/] done") + + try: + state = asyncio.run(run_scan(scanners=scanners, progress_callback=progress_callback)) + except RuntimeError as e: + raise click.ClickException(str(e)) from e + + elapsed = time.monotonic() - start + scanner_count = completed + + json_output = state.to_json() + + if output is not None: + output.parent.mkdir(parents=True, exist_ok=True) + output.write_text(json_output) + click.echo( + f"Scanned {scanner_count} scanner(s) in {elapsed:.1f}s — wrote {output}", + err=True, + ) + else: + click.echo( + f"Scanned {scanner_count} scanner(s) in {elapsed:.1f}s", + err=True, + ) + click.echo(json_output) @main.command() diff --git a/src/mac2nix/orchestrator.py b/src/mac2nix/orchestrator.py new file mode 100644 index 0000000..85f4d1b --- /dev/null +++ b/src/mac2nix/orchestrator.py @@ -0,0 +1,219 @@ +"""Async scan orchestrator — dispatches all scanners concurrently.""" + +from __future__ import annotations + +import asyncio +import json +import logging +import platform +import shutil +import socket +from collections.abc import Callable +from pathlib import Path +from typing import Any + +from pydantic import BaseModel + +from mac2nix.models.system_state import SystemState +from mac2nix.scanners import get_all_scanners +from mac2nix.scanners._utils import read_launchd_plists, run_command +from mac2nix.scanners.audio import AudioScanner +from mac2nix.scanners.cron import CronScanner +from mac2nix.scanners.display import DisplayScanner +from mac2nix.scanners.launch_agents import LaunchAgentsScanner +from mac2nix.scanners.system_scanner import SystemScanner + +logger = logging.getLogger(__name__) + +# Scanners that share pre-fetched data — excluded from generic dispatch. +_PREFETCH_SCANNERS = frozenset({"display", "audio", "system", "launch_agents", "cron"}) + + +def _get_system_metadata() -> tuple[str, str, str]: + """Return (hostname, macos_version, architecture).""" + hostname = socket.gethostname() + architecture = platform.machine() # arm64 or x86_64 + + # Try sw_vers for macOS version string + result = run_command(["sw_vers", "-productVersion"]) + if result is not None and result.returncode == 0: + macos_version = result.stdout.strip() + else: + macos_version = platform.mac_ver()[0] or "unknown" + + return hostname, macos_version, architecture + + +def _fetch_system_profiler_batch() -> dict[str, Any]: + """Run a single batched system_profiler call for display, audio, and hardware data. + + Returns the parsed JSON dict with ``SPDisplaysDataType``, + ``SPAudioDataType``, and ``SPHardwareDataType`` as top-level keys. + Returns an empty dict on failure. + """ + if shutil.which("system_profiler") is None: + logger.warning("system_profiler not found — display/audio/hardware data unavailable") + return {} + + result = run_command( + ["system_profiler", "SPDisplaysDataType", "SPAudioDataType", "SPHardwareDataType", "-json"], + timeout=20, + ) + if result is None or result.returncode != 0: + logger.warning("Batched system_profiler call failed") + return {} + + try: + return json.loads(result.stdout) # type: ignore[no-any-return] + except (json.JSONDecodeError, ValueError): + logger.warning("Failed to parse batched system_profiler output") + return {} + + +async def _run_scanner_async( + scanner_name: str, + scanner_cls: type, + kwargs: dict[str, Any], + progress_callback: Callable[[str], None] | None, +) -> tuple[str, BaseModel | None]: + """Dispatch a single scanner in a thread and return (name, result).""" + try: + scanner = scanner_cls(**kwargs) + if not scanner.is_available(): + logger.info("Scanner '%s' not available — skipping", scanner_name) + return scanner_name, None + + result: BaseModel = await asyncio.to_thread(scanner.scan) + logger.debug("Scanner '%s' completed", scanner_name) + return scanner_name, result + except Exception: + logger.exception("Scanner '%s' raised an exception", scanner_name) + return scanner_name, None + finally: + if progress_callback is not None: + progress_callback(scanner_name) + + +async def run_scan( + scanners: list[str] | None = None, + progress_callback: Callable[[str], None] | None = None, +) -> SystemState: + """Run all (or selected) scanners concurrently and return a SystemState. + + Args: + scanners: List of scanner names to run. ``None`` runs all registered + scanners. Unknown names are silently ignored. + progress_callback: Optional callable invoked with the scanner name after + each scanner completes (or is skipped). Suitable for updating a + progress bar. Called from the asyncio event loop thread. + + Returns: + Populated :class:`~mac2nix.models.system_state.SystemState`. + """ + hostname, macos_version, architecture = _get_system_metadata() + + all_registered = get_all_scanners() + if scanners is not None: + selected = {name: cls for name, cls in all_registered.items() if name in scanners} + else: + selected = dict(all_registered) + + need_sp = "display" in selected or "audio" in selected or "system" in selected + need_launchd = "launch_agents" in selected or "cron" in selected + + # --- Dispatch independent scanners immediately --- + # These 14 scanners need no pre-fetched data; start them now. + tasks: list[asyncio.Task[tuple[str, BaseModel | None]]] = [] + for name, cls in selected.items(): + if name in _PREFETCH_SCANNERS: + continue + tasks.append( + asyncio.create_task( + _run_scanner_async(name, cls, {}, progress_callback), + name=f"scanner-{name}", + ) + ) + + # --- Run pre-fetches concurrently while independent scanners are running --- + batched_sp: dict[str, Any] + launchd_plists: list[tuple[Path, str, dict[str, Any]]] | None + batched_sp, launchd_plists = await asyncio.gather( + asyncio.to_thread(_fetch_system_profiler_batch) if need_sp else asyncio.sleep(0, result={}), + asyncio.to_thread(read_launchd_plists) if need_launchd else asyncio.sleep(0, result=None), + ) + + # --- Dispatch prefetch-dependent scanners --- + if "display" in selected: + tasks.append( + asyncio.create_task( + _run_scanner_async( + "display", + DisplayScanner, + {"prefetched_data": batched_sp}, + progress_callback, + ), + name="scanner-display", + ) + ) + if "audio" in selected: + tasks.append( + asyncio.create_task( + _run_scanner_async( + "audio", + AudioScanner, + {"prefetched_data": batched_sp}, + progress_callback, + ), + name="scanner-audio", + ) + ) + if "system" in selected: + tasks.append( + asyncio.create_task( + _run_scanner_async( + "system", + SystemScanner, + {"prefetched_data": batched_sp}, + progress_callback, + ), + name="scanner-system", + ) + ) + if "launch_agents" in selected: + tasks.append( + asyncio.create_task( + _run_scanner_async( + "launch_agents", + LaunchAgentsScanner, + {"launchd_plists": launchd_plists}, + progress_callback, + ), + name="scanner-launch_agents", + ) + ) + if "cron" in selected: + tasks.append( + asyncio.create_task( + _run_scanner_async( + "cron", + CronScanner, + {"launchd_plists": launchd_plists}, + progress_callback, + ), + name="scanner-cron", + ) + ) + + results = await asyncio.gather(*tasks, return_exceptions=False) + + # --- Assemble SystemState --- + domain_results: dict[str, BaseModel | None] = {} + for scanner_name, result in results: + domain_results[scanner_name] = result + + return SystemState( + hostname=hostname, + macos_version=macos_version, + architecture=architecture, + **{k: v for k, v in domain_results.items() if v is not None}, # type: ignore[arg-type] + ) diff --git a/src/mac2nix/scanners/_utils.py b/src/mac2nix/scanners/_utils.py index 2816656..72f7a94 100644 --- a/src/mac2nix/scanners/_utils.py +++ b/src/mac2nix/scanners/_utils.py @@ -15,6 +15,53 @@ logger = logging.getLogger(__name__) +WALK_SKIP_DIRS = frozenset( + { + # Caches & transient data + "Caches", + "Cache", + "cache", + ".cache", + "GPUCache", + "ShaderCache", + "Code Cache", + "CachedData", + "Service Worker", + "blob_storage", + "IndexedDB", + "GrShaderCache", + "component_crx_cache", + # Logs + "Logs", + "logs", + "log", + # VCS + ".git", + ".svn", + ".hg", + # Build artifacts & dependency trees + "node_modules", + "__pycache__", + ".tox", + ".nox", + "DerivedData", + "Build", + ".build", + "build", + "target", + "dist", + ".next", + ".nuxt", + # Temp + "tmp", + "temp", + ".tmp", + # Trash + ".Trash", + ".Trashes", + } +) + LAUNCHD_DIRS: list[tuple[Path, str]] = [ (Path.home() / "Library" / "LaunchAgents", "user"), (Path("/Library/LaunchAgents"), "system"), diff --git a/src/mac2nix/scanners/audio.py b/src/mac2nix/scanners/audio.py index 729ea00..8cab0af 100644 --- a/src/mac2nix/scanners/audio.py +++ b/src/mac2nix/scanners/audio.py @@ -5,6 +5,7 @@ import json import logging import shutil +from typing import Any from mac2nix.models.hardware import AudioConfig, AudioDevice from mac2nix.scanners._utils import run_command @@ -29,6 +30,17 @@ def _parse_float(value: str) -> float | None: @register("audio") class AudioScanner(BaseScannerPlugin): + def __init__(self, prefetched_data: dict[str, Any] | None = None) -> None: + """Initialise the audio scanner. + + Args: + prefetched_data: Pre-parsed JSON dict from a batched system_profiler call. + When provided, the scanner skips its own system_profiler invocation. + Must contain the ``SPAudioDataType`` key. Defaults to ``None`` + (the scanner fetches data itself). + """ + self._prefetched_data = prefetched_data + @property def name(self) -> str: return "audio" @@ -51,17 +63,24 @@ def scan(self) -> AudioConfig: output_muted=output_muted, ) - def _get_audio_devices( - self, - ) -> tuple[list[AudioDevice], list[AudioDevice], str | None, str | None]: + def _load_audio_data(self) -> dict[str, Any] | None: + """Return parsed SPAudio JSON, from prefetch or a fresh subprocess call.""" + if self._prefetched_data is not None: + return self._prefetched_data result = run_command(["system_profiler", "SPAudioDataType", "-json"], timeout=15) if result is None or result.returncode != 0: - return [], [], None, None - + return None try: - data = json.loads(result.stdout) + return json.loads(result.stdout) # type: ignore[no-any-return] except (json.JSONDecodeError, ValueError): logger.warning("Failed to parse system_profiler audio output") + return None + + def _get_audio_devices( + self, + ) -> tuple[list[AudioDevice], list[AudioDevice], str | None, str | None]: + data = self._load_audio_data() + if data is None: return [], [], None, None input_devices: list[AudioDevice] = [] diff --git a/src/mac2nix/scanners/cron.py b/src/mac2nix/scanners/cron.py index 1aaf8c7..7677b7d 100644 --- a/src/mac2nix/scanners/cron.py +++ b/src/mac2nix/scanners/cron.py @@ -3,6 +3,8 @@ from __future__ import annotations import logging +from pathlib import Path +from typing import Any from mac2nix.models.services import CronEntry, LaunchdScheduledJob, ScheduledTasks from mac2nix.scanners._utils import read_launchd_plists, run_command @@ -13,6 +15,16 @@ @register("cron") class CronScanner(BaseScannerPlugin): + def __init__(self, launchd_plists: list[tuple[Path, str, dict[str, Any]]] | None = None) -> None: + """Initialise the cron scanner. + + Args: + launchd_plists: Pre-computed launchd plist tuples as returned by + ``read_launchd_plists()``. When provided, the scanner skips its own + disk read. Defaults to ``None`` (the scanner reads plists itself). + """ + self._launchd_plists = launchd_plists + @property def name(self) -> str: return "cron" @@ -68,8 +80,9 @@ def _get_cron_entries(self) -> tuple[list[CronEntry], dict[str, str]]: def _get_launchd_scheduled(self) -> list[LaunchdScheduledJob]: """Find launchd plists with scheduling keys.""" + plists = self._launchd_plists if self._launchd_plists is not None else read_launchd_plists() jobs: list[LaunchdScheduledJob] = [] - for _plist_path, _source_key, data in read_launchd_plists(): + for _plist_path, _source_key, data in plists: label = data.get("Label") if not label: continue diff --git a/src/mac2nix/scanners/display.py b/src/mac2nix/scanners/display.py index 3316480..a93d1f3 100644 --- a/src/mac2nix/scanners/display.py +++ b/src/mac2nix/scanners/display.py @@ -18,6 +18,17 @@ @register("display") class DisplayScanner(BaseScannerPlugin): + def __init__(self, prefetched_data: dict[str, Any] | None = None) -> None: + """Initialise the display scanner. + + Args: + prefetched_data: Pre-parsed JSON dict from a batched system_profiler call. + When provided, the scanner skips its own system_profiler invocation. + Must contain the ``SPDisplaysDataType`` key. Defaults to ``None`` + (the scanner fetches data itself). + """ + self._prefetched_data = prefetched_data + @property def name(self) -> str: return "display" @@ -26,15 +37,18 @@ def is_available(self) -> bool: return shutil.which("system_profiler") is not None def scan(self) -> DisplayConfig: - result = run_command(["system_profiler", "SPDisplaysDataType", "-json"], timeout=15) - if result is None or result.returncode != 0: - return DisplayConfig() - - try: - data = json.loads(result.stdout) - except (json.JSONDecodeError, ValueError): - logger.warning("Failed to parse system_profiler display output") - return DisplayConfig() + if self._prefetched_data is not None: + data = self._prefetched_data + else: + result = run_command(["system_profiler", "SPDisplaysDataType", "-json"], timeout=15) + if result is None or result.returncode != 0: + return DisplayConfig() + + try: + data = json.loads(result.stdout) + except (json.JSONDecodeError, ValueError): + logger.warning("Failed to parse system_profiler display output") + return DisplayConfig() monitors: list[Monitor] = [] gpu_list = data.get("SPDisplaysDataType", []) diff --git a/src/mac2nix/scanners/launch_agents.py b/src/mac2nix/scanners/launch_agents.py index f13e0a3..2fca092 100644 --- a/src/mac2nix/scanners/launch_agents.py +++ b/src/mac2nix/scanners/launch_agents.py @@ -2,7 +2,6 @@ from __future__ import annotations -import copy import logging import os import re @@ -28,6 +27,16 @@ @register("launch_agents") class LaunchAgentsScanner(BaseScannerPlugin): + def __init__(self, launchd_plists: list[tuple[Path, str, dict[str, Any]]] | None = None) -> None: + """Initialise the launch agents scanner. + + Args: + launchd_plists: Pre-computed launchd plist tuples as returned by + ``read_launchd_plists()``. When provided, the scanner skips its own + disk read. Defaults to ``None`` (the scanner reads plists itself). + """ + self._launchd_plists = launchd_plists + @property def name(self) -> str: return "launch_agents" @@ -35,8 +44,9 @@ def name(self) -> str: def scan(self) -> LaunchAgentsResult: entries: list[LaunchAgentEntry] = [] + plists = self._launchd_plists if self._launchd_plists is not None else read_launchd_plists() # Scan plist directories using shared reader - for plist_path, source_key, data in read_launchd_plists(): + for plist_path, source_key, data in plists: source = _SOURCE_MAP[source_key] entry = self._parse_agent_data(plist_path, source, data) if entry is not None: @@ -63,10 +73,16 @@ def _parse_agent_data( program_arguments = data.get("ProgramArguments", []) run_at_load = data.get("RunAtLoad", False) - # Deep copy data for raw_plist to avoid mutating the shared cache - raw_plist = copy.deepcopy(data) - # Redact sensitive environment variables in raw_plist - self._redact_sensitive_env(raw_plist) + # Shallow copy + replace env vars to avoid mutating the shared prefetch data + raw_plist = dict(data) + env_raw = data.get("EnvironmentVariables") + if isinstance(env_raw, dict): + redacted = { + k: "***REDACTED***" if any(p in k.upper() for p in _SENSITIVE_ENV_PATTERNS) else v + for k, v in env_raw.items() + } + if redacted != env_raw: + raw_plist["EnvironmentVariables"] = redacted # Extract filtered environment variables env_vars = data.get("EnvironmentVariables") @@ -103,23 +119,13 @@ def _parse_agent_data( group_name=data.get("GroupName"), ) - @staticmethod - def _redact_sensitive_env(plist: dict[str, Any]) -> None: - """Redact sensitive keys from EnvironmentVariables in the plist dict.""" - env_vars = plist.get("EnvironmentVariables") - if not isinstance(env_vars, dict): - return - for key in list(env_vars.keys()): - if any(p in key.upper() for p in _SENSITIVE_ENV_PATTERNS): - env_vars[key] = "***REDACTED***" - def _get_login_items(self) -> list[LaunchAgentEntry]: """Parse login items from sfltool dumpbtm text output. Filters to the current user's UID section and extracts items with type "login item". """ - result = run_command(["sfltool", "dumpbtm"]) + result = run_command(["sfltool", "dumpbtm"], timeout=15) if result is None or result.returncode != 0: return [] diff --git a/src/mac2nix/scanners/system_scanner.py b/src/mac2nix/scanners/system_scanner.py index 4abbdac..d3e0dc5 100644 --- a/src/mac2nix/scanners/system_scanner.py +++ b/src/mac2nix/scanners/system_scanner.py @@ -26,6 +26,9 @@ @register("system") class SystemScanner(BaseScannerPlugin): + def __init__(self, prefetched_data: dict[str, Any] | None = None) -> None: + self._prefetched_data = prefetched_data + @property def name(self) -> str: return "system" @@ -177,14 +180,16 @@ def _get_hardware_info( self, ) -> tuple[str | None, str | None, str | None, str | None]: """Parse system_profiler SPHardwareDataType for hardware info.""" - result = run_command(["system_profiler", "SPHardwareDataType", "-json"], timeout=15) - if result is None or result.returncode != 0: - return None, None, None, None - - try: - data = json.loads(result.stdout) - except (json.JSONDecodeError, ValueError): - return None, None, None, None + if self._prefetched_data is not None: + data = self._prefetched_data + else: + result = run_command(["system_profiler", "SPHardwareDataType", "-json"], timeout=15) + if result is None or result.returncode != 0: + return None, None, None, None + try: + data = json.loads(result.stdout) + except (json.JSONDecodeError, ValueError): + return None, None, None, None hw_list = data.get("SPHardwareDataType", []) if not hw_list: diff --git a/tests/scanners/test_prefetch_injection.py b/tests/scanners/test_prefetch_injection.py new file mode 100644 index 0000000..7605fce --- /dev/null +++ b/tests/scanners/test_prefetch_injection.py @@ -0,0 +1,488 @@ +"""Tests for prefetch data injection into display, audio, launch_agents, and cron scanners. + +These tests verify that when prefetched_data is provided, scanners use it +instead of making their own subprocess/IO calls, and that the results are +identical to self-fetched data. +""" + +from __future__ import annotations + +import json +import subprocess +from pathlib import Path +from typing import Any +from unittest.mock import patch + +from mac2nix.models.hardware import AudioConfig, DisplayConfig +from mac2nix.models.services import LaunchAgentSource, LaunchAgentsResult, ScheduledTasks +from mac2nix.scanners.audio import AudioScanner +from mac2nix.scanners.cron import CronScanner +from mac2nix.scanners.display import DisplayScanner +from mac2nix.scanners.launch_agents import LaunchAgentsScanner + +# --------------------------------------------------------------------------- +# Shared test data +# --------------------------------------------------------------------------- + +_SP_DISPLAYS_DATA = [ + { + "_name": "Apple M3 Pro", + "spdisplays_ndrvs": [ + { + "_name": "Built-in Retina Display", + "_spdisplays_resolution": "2880 x 1864 Retina", + "spdisplays_main": "spdisplays_yes", + "spdisplays_display_type": "spdisplays_retina", + } + ], + } +] + +_SP_AUDIO_DATA = [ + { + "_name": "MacBook Pro Speakers", + "_items": [ + { + "_name": "MacBook Pro Speakers", + "coreaudio_device_uid": "BuiltInSpeaker", + "coreaudio_device_output": "yes", + "coreaudio_default_audio_output_device": "yes", + }, + { + "_name": "MacBook Pro Microphone", + "coreaudio_device_uid": "BuiltInMic", + "coreaudio_device_input": "yes", + }, + ], + } +] + +_LAUNCHD_PLISTS = [ + ( + Path("/Users/test/Library/LaunchAgents/com.test.prefetch.plist"), + "user", + { + "Label": "com.test.prefetch", + "Program": "/usr/bin/test", + "RunAtLoad": True, + }, + ) +] + +_SCHEDULED_PLISTS = [ + ( + Path("/Users/test/Library/LaunchAgents/com.test.scheduled.plist"), + "user", + { + "Label": "com.test.scheduled", + "StartCalendarInterval": {"Hour": 6, "Minute": 0}, + }, + ) +] + + +# --------------------------------------------------------------------------- +# DisplayScanner prefetch injection +# --------------------------------------------------------------------------- + + +class TestDisplayScannerPrefetch: + def test_prefetch_bypasses_run_command(self) -> None: + """When prefetched_data is provided, DisplayScanner must not call run_command for system_profiler.""" + sp_data = {"SPDisplaysDataType": _SP_DISPLAYS_DATA} + scanner = DisplayScanner(prefetched_data=sp_data) + + with ( + patch("mac2nix.scanners.display.run_command") as mock_cmd, + patch("mac2nix.scanners.display.read_plist_safe", return_value=None), + ): + result = scanner.scan() + + # run_command should NOT have been called with system_profiler SPDisplaysDataType + for call in mock_cmd.call_args_list: + args = call[0][0] if call[0] else [] + assert "SPDisplaysDataType" not in args, ( + "DisplayScanner called system_profiler despite having prefetched data" + ) + + assert isinstance(result, DisplayConfig) + assert len(result.monitors) == 1 + assert result.monitors[0].name == "Built-in Retina Display" + + def test_prefetch_produces_same_result_as_self_fetch(self, cmd_result: Any) -> None: + """Prefetched data should produce the same monitors as self-fetched data.""" + sp_data = {"SPDisplaysDataType": _SP_DISPLAYS_DATA} + + # Self-fetched path + with ( + patch( + "mac2nix.scanners.display.run_command", + return_value=cmd_result(json.dumps(sp_data)), + ), + patch("mac2nix.scanners.display.read_plist_safe", return_value=None), + ): + self_fetched = DisplayScanner().scan() + + # Prefetched path + with patch("mac2nix.scanners.display.read_plist_safe", return_value=None): + prefetched = DisplayScanner(prefetched_data=sp_data).scan() + + assert len(self_fetched.monitors) == len(prefetched.monitors) + for sf_mon, pf_mon in zip(self_fetched.monitors, prefetched.monitors, strict=True): + assert sf_mon.name == pf_mon.name + assert sf_mon.resolution == pf_mon.resolution + assert sf_mon.retina == pf_mon.retina + + def test_none_prefetch_falls_back_to_run_command(self, cmd_result: Any) -> None: + """When prefetched_data is None, DisplayScanner fetches data itself (backward compat).""" + sp_data = {"SPDisplaysDataType": _SP_DISPLAYS_DATA} + scanner = DisplayScanner(prefetched_data=None) + + with ( + patch( + "mac2nix.scanners.display.run_command", + return_value=cmd_result(json.dumps(sp_data)), + ), + patch("mac2nix.scanners.display.read_plist_safe", return_value=None), + ): + result = scanner.scan() + + assert isinstance(result, DisplayConfig) + assert len(result.monitors) == 1 + + def test_no_prefetch_arg_falls_back_to_run_command(self, cmd_result: Any) -> None: + """Default (no prefetched_data kwarg) should preserve existing behavior.""" + sp_data = {"SPDisplaysDataType": _SP_DISPLAYS_DATA} + scanner = DisplayScanner() # no prefetched_data + + with ( + patch( + "mac2nix.scanners.display.run_command", + return_value=cmd_result(json.dumps(sp_data)), + ), + patch("mac2nix.scanners.display.read_plist_safe", return_value=None), + ): + result = scanner.scan() + + assert isinstance(result, DisplayConfig) + + def test_empty_prefetch_produces_empty_monitors(self) -> None: + """Prefetched data with empty SPDisplaysDataType should yield no monitors.""" + sp_data = {"SPDisplaysDataType": []} + scanner = DisplayScanner(prefetched_data=sp_data) + + with ( + patch("mac2nix.scanners.display.read_plist_safe", return_value=None), + patch("mac2nix.scanners.display.run_command", return_value=None), + ): + result = scanner.scan() + + assert isinstance(result, DisplayConfig) + assert result.monitors == [] + + +# --------------------------------------------------------------------------- +# AudioScanner prefetch injection +# --------------------------------------------------------------------------- + + +class TestAudioScannerPrefetch: + def test_prefetch_bypasses_system_profiler_call(self) -> None: + """When prefetched_data is provided, AudioScanner must not call system_profiler.""" + sp_data = {"SPAudioDataType": _SP_AUDIO_DATA} + scanner = AudioScanner(prefetched_data=sp_data) + + with patch("mac2nix.scanners.audio.run_command", return_value=None) as mock_cmd: + result = scanner.scan() + + # run_command should NOT have been called with SPAudioDataType + for call in mock_cmd.call_args_list: + args = call[0][0] if call[0] else [] + assert "SPAudioDataType" not in args, "AudioScanner called system_profiler despite having prefetched data" + + assert isinstance(result, AudioConfig) + assert len(result.output_devices) >= 1 + + def test_prefetch_produces_same_result_as_self_fetch(self, cmd_result: Any) -> None: + """Prefetched data should produce the same devices as self-fetched data.""" + sp_data = {"SPAudioDataType": _SP_AUDIO_DATA} + + # Self-fetched path + with patch( + "mac2nix.scanners.audio.run_command", + return_value=cmd_result(json.dumps(sp_data)), + ): + self_fetched = AudioScanner().scan() + + # Prefetched path + with patch("mac2nix.scanners.audio.run_command", return_value=None): + prefetched = AudioScanner(prefetched_data=sp_data).scan() + + assert len(self_fetched.output_devices) == len(prefetched.output_devices) + assert self_fetched.default_output == prefetched.default_output + + def test_none_prefetch_falls_back_to_run_command(self, cmd_result: Any) -> None: + """When prefetched_data is None, AudioScanner fetches data itself (backward compat).""" + sp_data = {"SPAudioDataType": _SP_AUDIO_DATA} + scanner = AudioScanner(prefetched_data=None) + + with patch( + "mac2nix.scanners.audio.run_command", + return_value=cmd_result(json.dumps(sp_data)), + ): + result = scanner.scan() + + assert isinstance(result, AudioConfig) + assert len(result.output_devices) >= 1 + + def test_no_prefetch_arg_falls_back_to_run_command(self) -> None: + """Default (no prefetched_data kwarg) should preserve existing behavior.""" + scanner = AudioScanner() + + with patch("mac2nix.scanners.audio.run_command", return_value=None): + result = scanner.scan() + + assert isinstance(result, AudioConfig) + + def test_empty_prefetch_produces_empty_devices(self) -> None: + """Empty SPAudioDataType in prefetch should yield no devices.""" + sp_data = {"SPAudioDataType": []} + scanner = AudioScanner(prefetched_data=sp_data) + + with patch("mac2nix.scanners.audio.run_command", return_value=None): + result = scanner.scan() + + assert isinstance(result, AudioConfig) + assert result.output_devices == [] + assert result.input_devices == [] + + def test_volume_settings_still_fetched_with_prefetch(self, cmd_result: Any) -> None: + """Volume settings (osascript) should still be fetched even with prefetched device data.""" + sp_data = {"SPAudioDataType": _SP_AUDIO_DATA} + scanner = AudioScanner(prefetched_data=sp_data) + + def _side_effect(cmd: list[str], **_kwargs: Any) -> subprocess.CompletedProcess[str] | None: + if "osascript" in cmd: + return cmd_result("output volume:50, input volume:75, alert volume:100, output muted:false") + return None + + with patch("mac2nix.scanners.audio.run_command", side_effect=_side_effect): + result = scanner.scan() + + assert isinstance(result, AudioConfig) + assert result.output_volume == 50 + assert result.input_volume == 75 + + +# --------------------------------------------------------------------------- +# LaunchAgentsScanner prefetch injection +# --------------------------------------------------------------------------- + + +class TestLaunchAgentsScannerPrefetch: + def test_prefetch_bypasses_read_launchd_plists(self) -> None: + """When launchd_plists is provided, LaunchAgentsScanner must not call read_launchd_plists.""" + scanner = LaunchAgentsScanner(launchd_plists=_LAUNCHD_PLISTS) + + with ( + patch("mac2nix.scanners.launch_agents.read_launchd_plists") as mock_read, + patch("mac2nix.scanners.launch_agents.run_command", return_value=None), + ): + result = scanner.scan() + + mock_read.assert_not_called() + assert isinstance(result, LaunchAgentsResult) + assert len(result.entries) == 1 + assert result.entries[0].label == "com.test.prefetch" + + def test_prefetch_produces_same_result_as_self_fetch(self) -> None: + """Pre-computed launchd plists should produce same entries as self-fetched.""" + # Self-fetched path + with ( + patch( + "mac2nix.scanners.launch_agents.read_launchd_plists", + return_value=_LAUNCHD_PLISTS, + ), + patch("mac2nix.scanners.launch_agents.run_command", return_value=None), + ): + self_fetched = LaunchAgentsScanner().scan() + + # Prefetched path + with patch("mac2nix.scanners.launch_agents.run_command", return_value=None): + prefetched = LaunchAgentsScanner(launchd_plists=_LAUNCHD_PLISTS).scan() + + assert len(self_fetched.entries) == len(prefetched.entries) + assert self_fetched.entries[0].label == prefetched.entries[0].label + + def test_none_prefetch_falls_back_to_read_launchd_plists(self) -> None: + """When launchd_plists is None, scanner reads plists itself (backward compat).""" + scanner = LaunchAgentsScanner(launchd_plists=None) + + with ( + patch( + "mac2nix.scanners.launch_agents.read_launchd_plists", + return_value=_LAUNCHD_PLISTS, + ) as mock_read, + patch("mac2nix.scanners.launch_agents.run_command", return_value=None), + ): + result = scanner.scan() + + mock_read.assert_called_once() + assert isinstance(result, LaunchAgentsResult) + + def test_no_prefetch_arg_falls_back_to_read_launchd_plists(self) -> None: + """Default (no launchd_plists kwarg) should preserve existing behavior.""" + with ( + patch( + "mac2nix.scanners.launch_agents.read_launchd_plists", + return_value=[], + ) as mock_read, + patch("mac2nix.scanners.launch_agents.run_command", return_value=None), + ): + result = LaunchAgentsScanner().scan() + + mock_read.assert_called_once() + assert isinstance(result, LaunchAgentsResult) + + def test_empty_prefetch_produces_no_plist_entries(self) -> None: + """Empty launchd_plists should yield no plist entries (login items may still appear).""" + with patch("mac2nix.scanners.launch_agents.run_command", return_value=None): + result = LaunchAgentsScanner(launchd_plists=[]).scan() + + assert isinstance(result, LaunchAgentsResult) + # No plist-sourced entries + plist_entries = [e for e in result.entries if e.source != LaunchAgentSource.LOGIN_ITEM] + assert plist_entries == [] + + +# --------------------------------------------------------------------------- +# CronScanner prefetch injection +# --------------------------------------------------------------------------- + + +class TestCronScannerPrefetch: + def test_prefetch_bypasses_read_launchd_plists(self) -> None: + """When launchd_plists is provided, CronScanner must not call read_launchd_plists.""" + scanner = CronScanner(launchd_plists=_SCHEDULED_PLISTS) + + with ( + patch("mac2nix.scanners.cron.read_launchd_plists") as mock_read, + patch("mac2nix.scanners.cron.run_command", return_value=None), + ): + result = scanner.scan() + + mock_read.assert_not_called() + assert isinstance(result, ScheduledTasks) + assert len(result.launchd_scheduled) == 1 + assert result.launchd_scheduled[0].label == "com.test.scheduled" + + def test_prefetch_produces_same_result_as_self_fetch(self) -> None: + """Pre-computed launchd plists should produce same scheduled tasks as self-fetched.""" + # Self-fetched path + with ( + patch("mac2nix.scanners.cron.run_command", return_value=None), + patch( + "mac2nix.scanners.cron.read_launchd_plists", + return_value=_SCHEDULED_PLISTS, + ), + ): + self_fetched = CronScanner().scan() + + # Prefetched path + with patch("mac2nix.scanners.cron.run_command", return_value=None): + prefetched = CronScanner(launchd_plists=_SCHEDULED_PLISTS).scan() + + assert len(self_fetched.launchd_scheduled) == len(prefetched.launchd_scheduled) + assert self_fetched.launchd_scheduled[0].label == prefetched.launchd_scheduled[0].label + assert self_fetched.launchd_scheduled[0].trigger_type == prefetched.launchd_scheduled[0].trigger_type + + def test_none_prefetch_falls_back_to_read_launchd_plists(self) -> None: + """When launchd_plists is None, CronScanner reads plists itself (backward compat).""" + scanner = CronScanner(launchd_plists=None) + + with ( + patch("mac2nix.scanners.cron.run_command", return_value=None), + patch( + "mac2nix.scanners.cron.read_launchd_plists", + return_value=[], + ) as mock_read, + ): + result = scanner.scan() + + mock_read.assert_called_once() + assert isinstance(result, ScheduledTasks) + + def test_no_prefetch_arg_falls_back_to_read_launchd_plists(self) -> None: + """Default (no launchd_plists kwarg) should preserve existing behavior.""" + with ( + patch("mac2nix.scanners.cron.run_command", return_value=None), + patch( + "mac2nix.scanners.cron.read_launchd_plists", + return_value=[], + ) as mock_read, + ): + result = CronScanner().scan() + + mock_read.assert_called_once() + assert isinstance(result, ScheduledTasks) + + def test_empty_prefetch_produces_no_launchd_scheduled(self) -> None: + """Empty launchd_plists should yield no launchd_scheduled entries.""" + with patch("mac2nix.scanners.cron.run_command", return_value=None): + result = CronScanner(launchd_plists=[]).scan() + + assert isinstance(result, ScheduledTasks) + assert result.launchd_scheduled == [] + + def test_cron_entries_still_fetched_with_prefetch(self, cmd_result: Any) -> None: + """Crontab entries (from run_command crontab -l) should still be fetched with prefetch.""" + crontab = "0 5 * * * /usr/bin/backup\n" + + with patch("mac2nix.scanners.cron.run_command", return_value=cmd_result(crontab)): + result = CronScanner(launchd_plists=[]).scan() + + assert isinstance(result, ScheduledTasks) + assert len(result.cron_entries) == 1 + assert result.cron_entries[0].command == "/usr/bin/backup" + + +# --------------------------------------------------------------------------- +# Backward compatibility: existing tests still pass +# --------------------------------------------------------------------------- + + +class TestPrefetchBackwardCompatibility: + """Verify that instantiating scanners without prefetched_data still works identically.""" + + def test_display_scanner_no_args(self) -> None: + scanner = DisplayScanner() + with ( + patch("mac2nix.scanners.display.run_command", return_value=None), + patch("mac2nix.scanners.display.read_plist_safe", return_value=None), + ): + result = scanner.scan() + assert isinstance(result, DisplayConfig) + + def test_audio_scanner_no_args(self) -> None: + scanner = AudioScanner() + with patch("mac2nix.scanners.audio.run_command", return_value=None): + result = scanner.scan() + assert isinstance(result, AudioConfig) + + def test_launch_agents_scanner_no_args(self) -> None: + scanner = LaunchAgentsScanner() + with ( + patch("mac2nix.scanners.launch_agents.read_launchd_plists", return_value=[]), + patch("mac2nix.scanners.launch_agents.run_command", return_value=None), + ): + result = scanner.scan() + assert isinstance(result, LaunchAgentsResult) + + def test_cron_scanner_no_args(self) -> None: + scanner = CronScanner() + with ( + patch("mac2nix.scanners.cron.run_command", return_value=None), + patch("mac2nix.scanners.cron.read_launchd_plists", return_value=[]), + ): + result = scanner.scan() + assert isinstance(result, ScheduledTasks) diff --git a/tests/test_cli.py b/tests/test_cli.py new file mode 100644 index 0000000..955250d --- /dev/null +++ b/tests/test_cli.py @@ -0,0 +1,268 @@ +"""Tests for the mac2nix scan CLI command.""" + +from __future__ import annotations + +import json +from pathlib import Path +from typing import Any +from unittest.mock import patch + +import pytest +from click.testing import CliRunner + +from mac2nix.cli import main +from mac2nix.models.system_state import SystemState + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _make_state(**kwargs: Any) -> SystemState: + defaults: dict[str, Any] = { + "hostname": "test-mac", + "macos_version": "15.3.0", + "architecture": "arm64", + } + defaults.update(kwargs) + return SystemState(**defaults) + + +def _extract_json(output: str) -> str: + """Extract the first JSON object from mixed CLI output (stdout+stderr mixed by CliRunner).""" + start = output.find("{") + if start == -1: + return output + return output[start:] + + +# --------------------------------------------------------------------------- +# CLI command registration +# --------------------------------------------------------------------------- + + +class TestCliCommandRegistration: + def test_scan_command_is_registered(self) -> None: + runner = CliRunner() + result = runner.invoke(main, ["--help"]) + + assert result.exit_code == 0 + assert "scan" in result.output + + def test_scan_help_shows_output_option(self) -> None: + runner = CliRunner() + result = runner.invoke(main, ["scan", "--help"]) + + assert result.exit_code == 0 + assert "--output" in result.output or "-o" in result.output + + def test_scan_help_shows_scanner_option(self) -> None: + runner = CliRunner() + result = runner.invoke(main, ["scan", "--help"]) + + assert result.exit_code == 0 + assert "--scanner" in result.output or "-s" in result.output + + +# --------------------------------------------------------------------------- +# Basic invocation +# --------------------------------------------------------------------------- + + +class TestScanCommandBasic: + def test_scan_exits_zero(self) -> None: + runner = CliRunner() + state = _make_state() + + with patch("mac2nix.cli.asyncio") as mock_asyncio: + mock_asyncio.run.return_value = state + result = runner.invoke(main, ["scan"]) + + assert result.exit_code == 0 + + def test_scan_outputs_valid_json(self) -> None: + runner = CliRunner() + state = _make_state() + + with patch("mac2nix.cli.asyncio") as mock_asyncio: + mock_asyncio.run.return_value = state + result = runner.invoke(main, ["scan"]) + + assert result.exit_code == 0 + try: + # CliRunner mixes stderr+stdout; extract the JSON portion + parsed = json.loads(_extract_json(result.output)) + except json.JSONDecodeError: + pytest.fail(f"scan output does not contain valid JSON: {result.output!r}") + + assert parsed["hostname"] == "test-mac" + assert parsed["macos_version"] == "15.3.0" + assert parsed["architecture"] == "arm64" + + def test_scan_json_round_trips(self) -> None: + runner = CliRunner() + state = _make_state() + + with patch("mac2nix.cli.asyncio") as mock_asyncio: + mock_asyncio.run.return_value = state + result = runner.invoke(main, ["scan"]) + + assert result.exit_code == 0 + recovered = SystemState.from_json(_extract_json(result.output)) + assert recovered.hostname == state.hostname + assert recovered.macos_version == state.macos_version + assert recovered.architecture == state.architecture + + +# --------------------------------------------------------------------------- +# --output / -o option +# --------------------------------------------------------------------------- + + +class TestScanOutputOption: + def test_output_writes_to_file(self, tmp_path: Path) -> None: + runner = CliRunner() + state = _make_state() + output_file = tmp_path / "scan.json" + + with patch("mac2nix.cli.asyncio") as mock_asyncio: + mock_asyncio.run.return_value = state + result = runner.invoke(main, ["scan", "--output", str(output_file)]) + + assert result.exit_code == 0 + assert output_file.exists() + parsed = json.loads(output_file.read_text()) + assert parsed["hostname"] == "test-mac" + + def test_short_flag_o_works(self, tmp_path: Path) -> None: + runner = CliRunner() + state = _make_state() + output_file = tmp_path / "scan-short.json" + + with patch("mac2nix.cli.asyncio") as mock_asyncio: + mock_asyncio.run.return_value = state + result = runner.invoke(main, ["scan", "-o", str(output_file)]) + + assert result.exit_code == 0 + assert output_file.exists() + + def test_output_to_file_stdout_is_not_json_blob(self, tmp_path: Path) -> None: + """When --output is given, stdout should not be the full JSON blob.""" + runner = CliRunner() + state = _make_state() + output_file = tmp_path / "scan.json" + + with patch("mac2nix.cli.asyncio") as mock_asyncio: + mock_asyncio.run.return_value = state + result = runner.invoke(main, ["scan", "--output", str(output_file)]) + + assert result.exit_code == 0 + # stdout should be empty when output goes to a file (summary goes to stderr) + assert "scan_timestamp" not in result.output + + def test_output_creates_parent_dirs(self, tmp_path: Path) -> None: + runner = CliRunner() + state = _make_state() + output_file = tmp_path / "nested" / "dir" / "scan.json" + + with patch("mac2nix.cli.asyncio") as mock_asyncio: + mock_asyncio.run.return_value = state + result = runner.invoke(main, ["scan", "--output", str(output_file)]) + + assert result.exit_code == 0 + assert output_file.exists() + + +# --------------------------------------------------------------------------- +# --scanner / -s option +# --------------------------------------------------------------------------- + + +class TestScanScannerOption: + def test_scanner_short_flag_accepted(self) -> None: + runner = CliRunner() + state = _make_state() + + with patch("mac2nix.cli.asyncio") as mock_asyncio: + mock_asyncio.run.return_value = state + result = runner.invoke(main, ["scan", "-s", "display"]) + + assert result.exit_code == 0 + + def test_scanner_repeatable(self) -> None: + runner = CliRunner() + state = _make_state() + + with patch("mac2nix.cli.asyncio") as mock_asyncio: + mock_asyncio.run.return_value = state + result = runner.invoke(main, ["scan", "--scanner", "display", "--scanner", "audio"]) + + assert result.exit_code == 0 + + def test_unknown_scanner_exits_nonzero(self) -> None: + runner = CliRunner() + + result = runner.invoke(main, ["scan", "--scanner", "bogus_scanner_xyz_nonexistent"]) + + assert result.exit_code != 0 + + def test_no_scanner_option_exits_zero(self) -> None: + runner = CliRunner() + state = _make_state() + + with patch("mac2nix.cli.asyncio") as mock_asyncio: + mock_asyncio.run.return_value = state + result = runner.invoke(main, ["scan"]) + + assert result.exit_code == 0 + + +# --------------------------------------------------------------------------- +# Progress on stderr +# --------------------------------------------------------------------------- + + +class TestScanProgressOutput: + def test_json_portion_is_parseable(self) -> None: + """When no --output flag, the JSON portion of output must be parseable.""" + runner = CliRunner() + state = _make_state() + + with patch("mac2nix.cli.asyncio") as mock_asyncio: + mock_asyncio.run.return_value = state + result = runner.invoke(main, ["scan"]) + + assert result.exit_code == 0 + try: + json.loads(_extract_json(result.output)) + except json.JSONDecodeError: + pytest.fail(f"output does not contain valid JSON: {result.output!r}") + + +# --------------------------------------------------------------------------- +# Error handling +# --------------------------------------------------------------------------- + + +class TestScanErrorHandling: + def test_orchestrator_exception_exits_nonzero(self) -> None: + runner = CliRunner() + + with patch("mac2nix.cli.asyncio") as mock_asyncio: + mock_asyncio.run.side_effect = RuntimeError("orchestrator failed") + result = runner.invoke(main, ["scan"]) + + assert result.exit_code != 0 + + def test_summary_shown_after_writing_to_file(self, tmp_path: Path) -> None: + """After a successful scan to file, the output file must exist.""" + runner = CliRunner() + state = _make_state() + output_file = tmp_path / "scan.json" + + with patch("mac2nix.cli.asyncio") as mock_asyncio: + mock_asyncio.run.return_value = state + result = runner.invoke(main, ["scan", "--output", str(output_file)]) + + assert result.exit_code == 0 + assert output_file.exists() diff --git a/tests/test_orchestrator.py b/tests/test_orchestrator.py new file mode 100644 index 0000000..84135d3 --- /dev/null +++ b/tests/test_orchestrator.py @@ -0,0 +1,442 @@ +"""Tests for the async scan orchestrator.""" + +from __future__ import annotations + +import asyncio +from pathlib import Path +from typing import Any +from unittest.mock import MagicMock, patch + +from pydantic import BaseModel + +from mac2nix.models.hardware import AudioConfig, DisplayConfig +from mac2nix.models.services import LaunchAgentsResult, ScheduledTasks +from mac2nix.models.system_state import SystemState +from mac2nix.orchestrator import _fetch_system_profiler_batch, _get_system_metadata, run_scan + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _make_minimal_scanner(name: str, result: Any, *, available: bool = True) -> type: + """Create a minimal mock scanner class that returns a fixed result.""" + + class _MockScanner: + def __init__(self, **_kwargs: object) -> None: + pass + + @property + def name(self) -> str: + return name + + def is_available(self) -> bool: + return available + + def scan(self) -> Any: + return result + + return _MockScanner + + +# --------------------------------------------------------------------------- +# _get_system_metadata +# --------------------------------------------------------------------------- + + +class TestGetSystemMetadata: + def test_returns_three_strings(self) -> None: + with patch("mac2nix.orchestrator.run_command") as mock_cmd: + mock_cmd.return_value = MagicMock(returncode=0, stdout="14.3.1\n") + hostname, macos_version, architecture = _get_system_metadata() + + assert isinstance(hostname, str) + assert isinstance(macos_version, str) + assert isinstance(architecture, str) + + def test_sw_vers_fallback(self) -> None: + with ( + patch("mac2nix.orchestrator.run_command", return_value=None), + patch("mac2nix.orchestrator.platform.mac_ver", return_value=("13.0", (), "")), + ): + _, macos_version, _ = _get_system_metadata() + + assert macos_version == "13.0" + + def test_sw_vers_fallback_unknown(self) -> None: + with ( + patch("mac2nix.orchestrator.run_command", return_value=None), + patch("mac2nix.orchestrator.platform.mac_ver", return_value=("", (), "")), + ): + _, macos_version, _ = _get_system_metadata() + + assert macos_version == "unknown" + + def test_hostname_is_string(self) -> None: + with patch("mac2nix.orchestrator.run_command") as mock_cmd: + mock_cmd.return_value = MagicMock(returncode=0, stdout="14.0\n") + hostname, _, _ = _get_system_metadata() + assert len(hostname) > 0 + + +# --------------------------------------------------------------------------- +# _fetch_system_profiler_batch +# --------------------------------------------------------------------------- + + +class TestFetchSystemProfilerBatch: + def test_returns_parsed_dict(self) -> None: + payload = '{"SPDisplaysDataType": [], "SPAudioDataType": []}' + with ( + patch("mac2nix.orchestrator.shutil.which", return_value="/usr/sbin/system_profiler"), + patch("mac2nix.orchestrator.run_command") as mock_cmd, + ): + mock_cmd.return_value = MagicMock(returncode=0, stdout=payload) + result = _fetch_system_profiler_batch() + + assert "SPDisplaysDataType" in result + assert "SPAudioDataType" in result + + def test_returns_empty_when_not_found(self) -> None: + with patch("mac2nix.orchestrator.shutil.which", return_value=None): + result = _fetch_system_profiler_batch() + + assert result == {} + + def test_returns_empty_on_command_failure(self) -> None: + with ( + patch("mac2nix.orchestrator.shutil.which", return_value="/usr/sbin/system_profiler"), + patch("mac2nix.orchestrator.run_command", return_value=None), + ): + result = _fetch_system_profiler_batch() + + assert result == {} + + def test_returns_empty_on_nonzero_returncode(self) -> None: + with ( + patch("mac2nix.orchestrator.shutil.which", return_value="/usr/sbin/system_profiler"), + patch("mac2nix.orchestrator.run_command") as mock_cmd, + ): + mock_cmd.return_value = MagicMock(returncode=1, stdout="") + result = _fetch_system_profiler_batch() + + assert result == {} + + def test_returns_empty_on_invalid_json(self) -> None: + with ( + patch("mac2nix.orchestrator.shutil.which", return_value="/usr/sbin/system_profiler"), + patch("mac2nix.orchestrator.run_command") as mock_cmd, + ): + mock_cmd.return_value = MagicMock(returncode=0, stdout="{not valid json!!!") + result = _fetch_system_profiler_batch() + + assert result == {} + + +# --------------------------------------------------------------------------- +# run_scan +# --------------------------------------------------------------------------- + + +class _FakeResult(BaseModel): + """Trivial Pydantic model used as a scanner result placeholder in tests.""" + + +class TestRunScan: + def _make_pydantic_result(self) -> _FakeResult: + return _FakeResult() + + def _make_registry(self, names: list[str]) -> dict[str, type]: + """Create a registry of scanners that report as unavailable (result = None).""" + return {name: _make_minimal_scanner(name, _FakeResult(), available=False) for name in names} + + def test_returns_system_state(self) -> None: + # Use a scanner name that doesn't match any SystemState field — extra fields are ignored + registry = self._make_registry(["_fake_scanner_a"]) + + with ( + patch("mac2nix.orchestrator.get_all_scanners", return_value=registry), + patch("mac2nix.orchestrator._get_system_metadata", return_value=("host", "14.0", "arm64")), + patch("mac2nix.orchestrator._fetch_system_profiler_batch", return_value={}), + patch("mac2nix.orchestrator.read_launchd_plists", return_value=[]), + ): + state = asyncio.run(run_scan()) + + assert isinstance(state, SystemState) + assert state.hostname == "host" + assert state.macos_version == "14.0" + assert state.architecture == "arm64" + + def test_unavailable_scanner_produces_none(self) -> None: + registry = {"shell": _make_minimal_scanner("shell", _FakeResult(), available=False)} + + with ( + patch("mac2nix.orchestrator.get_all_scanners", return_value=registry), + patch("mac2nix.orchestrator._get_system_metadata", return_value=("host", "14.0", "arm64")), + patch("mac2nix.orchestrator._fetch_system_profiler_batch", return_value={}), + patch("mac2nix.orchestrator.read_launchd_plists", return_value=[]), + ): + state = asyncio.run(run_scan()) + + assert isinstance(state, SystemState) + assert state.shell is None + + def test_scanner_exception_does_not_crash_orchestrator(self) -> None: + class _CrashingScanner: + def __init__(self, **_kwargs: object) -> None: + pass + + @property + def name(self) -> str: + return "_fake_crash" + + def is_available(self) -> bool: + return True + + def scan(self) -> Any: + msg = "boom" + raise RuntimeError(msg) + + registry: dict[str, type] = {"_fake_crash": _CrashingScanner} + + with ( + patch("mac2nix.orchestrator.get_all_scanners", return_value=registry), + patch("mac2nix.orchestrator._get_system_metadata", return_value=("host", "14.0", "arm64")), + patch("mac2nix.orchestrator._fetch_system_profiler_batch", return_value={}), + patch("mac2nix.orchestrator.read_launchd_plists", return_value=[]), + ): + state = asyncio.run(run_scan()) + + assert isinstance(state, SystemState) + # Crash scanner produces None — shell field should still be None + assert state.shell is None + assert state.audio is None + + def test_progress_callback_called_for_each_scanner(self) -> None: + registry = self._make_registry(["_fake_a", "_fake_b"]) + called: list[str] = [] + + with ( + patch("mac2nix.orchestrator.get_all_scanners", return_value=registry), + patch("mac2nix.orchestrator._get_system_metadata", return_value=("host", "14.0", "arm64")), + patch("mac2nix.orchestrator._fetch_system_profiler_batch", return_value={}), + patch("mac2nix.orchestrator.read_launchd_plists", return_value=[]), + ): + asyncio.run(run_scan(progress_callback=called.append)) + + assert sorted(called) == sorted(["_fake_a", "_fake_b"]) + + def test_progress_callback_called_for_unavailable(self) -> None: + """Unavailable scanners should still trigger the progress callback.""" + registry = {"_fake": _make_minimal_scanner("_fake", _FakeResult(), available=False)} + called: list[str] = [] + + with ( + patch("mac2nix.orchestrator.get_all_scanners", return_value=registry), + patch("mac2nix.orchestrator._get_system_metadata", return_value=("host", "14.0", "arm64")), + patch("mac2nix.orchestrator._fetch_system_profiler_batch", return_value={}), + patch("mac2nix.orchestrator.read_launchd_plists", return_value=[]), + ): + asyncio.run(run_scan(progress_callback=called.append)) + + assert "_fake" in called + + def test_display_receives_prefetched_data(self) -> None: + """Display scanner should be instantiated with prefetched_data from the batch call.""" + batched = {"SPDisplaysDataType": [], "SPAudioDataType": []} + init_kwargs: list[dict[str, Any]] = [] + + class _TrackingDisplay: + def __init__(self, **kwargs: Any) -> None: + init_kwargs.append(kwargs) + + @property + def name(self) -> str: + return "display" + + def is_available(self) -> bool: + return True + + def scan(self) -> DisplayConfig: + return DisplayConfig() + + with ( + patch("mac2nix.orchestrator.get_all_scanners", return_value={"display": _TrackingDisplay}), + patch("mac2nix.orchestrator.DisplayScanner", _TrackingDisplay), + patch("mac2nix.orchestrator._get_system_metadata", return_value=("host", "14.0", "arm64")), + patch("mac2nix.orchestrator._fetch_system_profiler_batch", return_value=batched), + patch("mac2nix.orchestrator.read_launchd_plists", return_value=[]), + ): + asyncio.run(run_scan(scanners=["display"])) + + assert init_kwargs, "Display scanner was not instantiated" + assert init_kwargs[0].get("prefetched_data") == batched + + def test_audio_receives_prefetched_data(self) -> None: + """Audio scanner should be instantiated with prefetched_data from the batch call.""" + batched = {"SPDisplaysDataType": [], "SPAudioDataType": []} + init_kwargs: list[dict[str, Any]] = [] + + class _TrackingAudio: + def __init__(self, **kwargs: Any) -> None: + init_kwargs.append(kwargs) + + @property + def name(self) -> str: + return "audio" + + def is_available(self) -> bool: + return True + + def scan(self) -> AudioConfig: + return AudioConfig() + + with ( + patch("mac2nix.orchestrator.get_all_scanners", return_value={"audio": _TrackingAudio}), + patch("mac2nix.orchestrator.AudioScanner", _TrackingAudio), + patch("mac2nix.orchestrator._get_system_metadata", return_value=("host", "14.0", "arm64")), + patch("mac2nix.orchestrator._fetch_system_profiler_batch", return_value=batched), + patch("mac2nix.orchestrator.read_launchd_plists", return_value=[]), + ): + asyncio.run(run_scan(scanners=["audio"])) + + assert init_kwargs, "Audio scanner was not instantiated" + assert init_kwargs[0].get("prefetched_data") == batched + + def test_launch_agents_and_cron_share_plists(self) -> None: + """Both launch_agents and cron should receive the same pre-read plist list.""" + plist_data: list[tuple[Path, str, dict[str, Any]]] = [] + la_kwargs: list[dict[str, Any]] = [] + cron_kwargs: list[dict[str, Any]] = [] + + class _TrackingLA: + def __init__(self, **kwargs: Any) -> None: + la_kwargs.append(kwargs) + + @property + def name(self) -> str: + return "launch_agents" + + def is_available(self) -> bool: + return True + + def scan(self) -> LaunchAgentsResult: + return LaunchAgentsResult() + + class _TrackingCron: + def __init__(self, **kwargs: Any) -> None: + cron_kwargs.append(kwargs) + + @property + def name(self) -> str: + return "cron" + + def is_available(self) -> bool: + return True + + def scan(self) -> ScheduledTasks: + return ScheduledTasks() + + with ( + patch( + "mac2nix.orchestrator.get_all_scanners", + return_value={"launch_agents": _TrackingLA, "cron": _TrackingCron}, + ), + patch("mac2nix.orchestrator.LaunchAgentsScanner", _TrackingLA), + patch("mac2nix.orchestrator.CronScanner", _TrackingCron), + patch("mac2nix.orchestrator._get_system_metadata", return_value=("host", "14.0", "arm64")), + patch("mac2nix.orchestrator._fetch_system_profiler_batch", return_value={}), + patch("mac2nix.orchestrator.read_launchd_plists", return_value=plist_data), + ): + asyncio.run(run_scan(scanners=["launch_agents", "cron"])) + + assert la_kwargs[0].get("launchd_plists") is plist_data + assert cron_kwargs[0].get("launchd_plists") is plist_data + + def test_empty_scan_produces_valid_system_state(self) -> None: + with ( + patch("mac2nix.orchestrator.get_all_scanners", return_value={}), + patch("mac2nix.orchestrator._get_system_metadata", return_value=("myhost", "15.0", "x86_64")), + patch("mac2nix.orchestrator._fetch_system_profiler_batch", return_value={}), + patch("mac2nix.orchestrator.read_launchd_plists", return_value=[]), + ): + state = asyncio.run(run_scan()) + + assert state.hostname == "myhost" + assert state.display is None + assert state.audio is None + + def test_launchd_not_read_when_not_selected(self) -> None: + """read_launchd_plists should NOT be called when launch_agents and cron not selected.""" + registry = {"shell": _make_minimal_scanner("shell", _FakeResult(), available=False)} + + with ( + patch("mac2nix.orchestrator.get_all_scanners", return_value=registry), + patch("mac2nix.orchestrator._get_system_metadata", return_value=("host", "14.0", "arm64")), + patch("mac2nix.orchestrator._fetch_system_profiler_batch", return_value={}), + patch("mac2nix.orchestrator.read_launchd_plists") as mock_read, + ): + asyncio.run(run_scan(scanners=["shell"])) + + mock_read.assert_not_called() + + def test_system_profiler_not_called_when_not_selected(self) -> None: + """Batched system_profiler should NOT be called when display+audio not selected.""" + registry = {"shell": _make_minimal_scanner("shell", _FakeResult(), available=False)} + + with ( + patch("mac2nix.orchestrator.get_all_scanners", return_value=registry), + patch("mac2nix.orchestrator._get_system_metadata", return_value=("host", "14.0", "arm64")), + patch("mac2nix.orchestrator._fetch_system_profiler_batch") as mock_sp, + patch("mac2nix.orchestrator.read_launchd_plists", return_value=[]), + ): + asyncio.run(run_scan(scanners=["shell"])) + + mock_sp.assert_not_called() + + def test_scanner_filter_excludes_unselected(self) -> None: + """Scanners not in the filter list should not run.""" + called: list[str] = [] + + class _TrackingScanner: + def __init__(self, **_kwargs: object) -> None: + pass + + @property + def name(self) -> str: + return "_fake_selected" + + def is_available(self) -> bool: + return True + + def scan(self) -> _FakeResult: + called.append("_fake_selected") + return _FakeResult() + + class _UnselectedScanner: + def __init__(self, **_kwargs: object) -> None: + pass + + @property + def name(self) -> str: + return "_fake_other" + + def is_available(self) -> bool: + return True + + def scan(self) -> _FakeResult: + called.append("_fake_other") + return _FakeResult() + + registry: dict[str, type] = {"_fake_selected": _TrackingScanner, "_fake_other": _UnselectedScanner} + + with ( + patch("mac2nix.orchestrator.get_all_scanners", return_value=registry), + patch("mac2nix.orchestrator._get_system_metadata", return_value=("host", "14.0", "arm64")), + patch("mac2nix.orchestrator._fetch_system_profiler_batch", return_value={}), + patch("mac2nix.orchestrator.read_launchd_plists", return_value=[]), + ): + asyncio.run(run_scan(scanners=["_fake_selected"])) + + assert "_fake_selected" in called + assert "_fake_other" not in called diff --git a/uv.lock b/uv.lock index 0cce97d..31415e1 100644 --- a/uv.lock +++ b/uv.lock @@ -107,6 +107,7 @@ dependencies = [ { name = "jinja2" }, { name = "pydantic" }, { name = "pyyaml" }, + { name = "rich" }, ] [package.dev-dependencies] @@ -124,6 +125,7 @@ requires-dist = [ { name = "jinja2", specifier = ">=3.1" }, { name = "pydantic", specifier = ">=2.0" }, { name = "pyyaml", specifier = ">=6.0" }, + { name = "rich", specifier = ">=13.0" }, ] [package.metadata.requires-dev] @@ -135,6 +137,18 @@ dev = [ { name = "ruff" }, ] +[[package]] +name = "markdown-it-py" +version = "4.0.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "mdurl" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/5b/f5/4ec618ed16cc4f8fb3b701563655a69816155e79e24a17b651541804721d/markdown_it_py-4.0.0.tar.gz", hash = "sha256:cb0a2b4aa34f932c007117b194e945bd74e0ec24133ceb5bac59009cda1cb9f3", size = 73070, upload-time = "2025-08-11T12:57:52.854Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/94/54/e7d793b573f298e1c9013b8c4dade17d481164aa517d1d7148619c2cedbf/markdown_it_py-4.0.0-py3-none-any.whl", hash = "sha256:87327c59b172c5011896038353a81343b6754500a08cd7a4973bb48c6d578147", size = 87321, upload-time = "2025-08-11T12:57:51.923Z" }, +] + [[package]] name = "markupsafe" version = "3.0.3" @@ -187,6 +201,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/70/bc/6f1c2f612465f5fa89b95bead1f44dcb607670fd42891d8fdcd5d039f4f4/markupsafe-3.0.3-cp314-cp314t-win_arm64.whl", hash = "sha256:32001d6a8fc98c8cb5c947787c5d08b0a50663d139f1305bac5885d98d9b40fa", size = 14146, upload-time = "2025-09-27T18:37:28.327Z" }, ] +[[package]] +name = "mdurl" +version = "0.1.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d6/54/cfe61301667036ec958cb99bd3efefba235e65cdeb9c84d24a8293ba1d90/mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba", size = 8729, upload-time = "2022-08-14T12:40:10.846Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b3/38/89ba8ad64ae25be8de66a6d463314cf1eb366222074cfda9ee839c56a4b4/mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8", size = 9979, upload-time = "2022-08-14T12:40:09.779Z" }, +] + [[package]] name = "nodeenv" version = "1.10.0" @@ -407,6 +430,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/f1/12/de94a39c2ef588c7e6455cfbe7343d3b2dc9d6b6b2f40c4c6565744c873d/pyyaml-6.0.3-cp314-cp314t-win_arm64.whl", hash = "sha256:ebc55a14a21cb14062aa4162f906cd962b28e2e9ea38f9b4391244cd8de4ae0b", size = 149341, upload-time = "2025-09-25T21:32:56.828Z" }, ] +[[package]] +name = "rich" +version = "14.3.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "markdown-it-py" }, + { name = "pygments" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/b3/c6/f3b320c27991c46f43ee9d856302c70dc2d0fb2dba4842ff739d5f46b393/rich-14.3.3.tar.gz", hash = "sha256:b8daa0b9e4eef54dd8cf7c86c03713f53241884e814f4e2f5fb342fe520f639b", size = 230582, upload-time = "2026-02-19T17:23:12.474Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/14/25/b208c5683343959b670dc001595f2f3737e051da617f66c31f7c4fa93abc/rich-14.3.3-py3-none-any.whl", hash = "sha256:793431c1f8619afa7d3b52b2cdec859562b950ea0d4b6b505397612db8d5362d", size = 310458, upload-time = "2026-02-19T17:23:13.732Z" }, +] + [[package]] name = "ruff" version = "0.15.5" From 4d5e0d86cc2f41a4c9fdcfb123fead34fc6482d5 Mon Sep 17 00:00:00 2001 From: testvalue Date: Sat, 14 Mar 2026 11:37:34 -0400 Subject: [PATCH 2/3] refactor(scanners): merges library_audit + app_config into library --- src/mac2nix/models/__init__.py | 6 +- src/mac2nix/models/files.py | 9 +- src/mac2nix/models/system_state.py | 5 +- src/mac2nix/scanners/__init__.py | 3 +- src/mac2nix/scanners/app_config.py | 152 --- .../{library_audit.py => library_scanner.py} | 253 +++- tests/models/test_dotfile.py | 39 +- tests/models/test_remaining.py | 40 +- tests/models/test_system_state.py | 14 +- tests/scanners/test_app_config.py | 324 ----- tests/scanners/test_library_audit.py | 512 -------- tests/scanners/test_library_scanner.py | 1138 +++++++++++++++++ 12 files changed, 1372 insertions(+), 1123 deletions(-) delete mode 100644 src/mac2nix/scanners/app_config.py rename src/mac2nix/scanners/{library_audit.py => library_scanner.py} (66%) delete mode 100644 tests/scanners/test_app_config.py delete mode 100644 tests/scanners/test_library_audit.py create mode 100644 tests/scanners/test_library_scanner.py diff --git a/src/mac2nix/models/__init__.py b/src/mac2nix/models/__init__.py index 79e0ef6..e5f6a05 100644 --- a/src/mac2nix/models/__init__.py +++ b/src/mac2nix/models/__init__.py @@ -14,7 +14,6 @@ ) from mac2nix.models.files import ( AppConfigEntry, - AppConfigResult, BundleEntry, ConfigFileType, DotfileEntry, @@ -25,9 +24,9 @@ FontSource, FontsResult, KeyBindingEntry, - LibraryAuditResult, LibraryDirEntry, LibraryFileEntry, + LibraryResult, WorkflowEntry, ) from mac2nix.models.hardware import ( @@ -93,7 +92,6 @@ __all__ = [ "AppConfigEntry", - "AppConfigResult", "AppSource", "ApplicationsResult", "AudioConfig", @@ -131,9 +129,9 @@ "LaunchAgentSource", "LaunchAgentsResult", "LaunchdScheduledJob", - "LibraryAuditResult", "LibraryDirEntry", "LibraryFileEntry", + "LibraryResult", "MacPortsPackage", "MacPortsState", "ManagedRuntime", diff --git a/src/mac2nix/models/files.py b/src/mac2nix/models/files.py index e1a022a..7bb9068 100644 --- a/src/mac2nix/models/files.py +++ b/src/mac2nix/models/files.py @@ -1,4 +1,4 @@ -"""Dotfile, app config, font, and library audit models.""" +"""Dotfile, app config, font, and library models.""" from __future__ import annotations @@ -56,10 +56,6 @@ class AppConfigEntry(BaseModel): modified_time: datetime | None = None -class AppConfigResult(BaseModel): - entries: list[AppConfigEntry] - - class FontSource(StrEnum): USER = "user" # ~/Library/Fonts SYSTEM = "system" # /Library/Fonts @@ -121,7 +117,8 @@ class KeyBindingEntry(BaseModel): action: str | dict[str, Any] -class LibraryAuditResult(BaseModel): +class LibraryResult(BaseModel): + app_configs: list[AppConfigEntry] = [] bundles: list[BundleEntry] = [] directories: list[LibraryDirEntry] = [] uncovered_files: list[LibraryFileEntry] = [] diff --git a/src/mac2nix/models/system_state.py b/src/mac2nix/models/system_state.py index 9fa46d6..b1505df 100644 --- a/src/mac2nix/models/system_state.py +++ b/src/mac2nix/models/system_state.py @@ -8,7 +8,7 @@ from pydantic import BaseModel, Field from mac2nix.models.application import ApplicationsResult, HomebrewState -from mac2nix.models.files import AppConfigResult, DotfilesResult, FontsResult, LibraryAuditResult +from mac2nix.models.files import DotfilesResult, FontsResult, LibraryResult from mac2nix.models.hardware import AudioConfig, DisplayConfig from mac2nix.models.package_managers import ( ContainersResult, @@ -37,7 +37,6 @@ class SystemState(BaseModel): applications: ApplicationsResult | None = None homebrew: HomebrewState | None = None dotfiles: DotfilesResult | None = None - app_config: AppConfigResult | None = None fonts: FontsResult | None = None launch_agents: LaunchAgentsResult | None = None shell: ShellConfig | None = None @@ -47,7 +46,7 @@ class SystemState(BaseModel): display: DisplayConfig | None = None audio: AudioConfig | None = None cron: ScheduledTasks | None = None - library_audit: LibraryAuditResult | None = None + library: LibraryResult | None = None nix_state: NixState | None = None version_managers: VersionManagersResult | None = None package_managers: PackageManagersResult | None = None diff --git a/src/mac2nix/scanners/__init__.py b/src/mac2nix/scanners/__init__.py index 8d99231..6713930 100644 --- a/src/mac2nix/scanners/__init__.py +++ b/src/mac2nix/scanners/__init__.py @@ -1,7 +1,6 @@ """Scanner plugins for macOS system state discovery.""" from mac2nix.scanners import ( # noqa: F401 - app_config, applications, audio, containers, @@ -11,7 +10,7 @@ fonts, homebrew, launch_agents, - library_audit, + library_scanner, network, nix_state, package_managers_scanner, diff --git a/src/mac2nix/scanners/app_config.py b/src/mac2nix/scanners/app_config.py deleted file mode 100644 index aead506..0000000 --- a/src/mac2nix/scanners/app_config.py +++ /dev/null @@ -1,152 +0,0 @@ -"""App config scanner — discovers application configuration files.""" - -from __future__ import annotations - -import logging -import os -from datetime import UTC, datetime -from pathlib import Path - -from mac2nix.models.files import AppConfigEntry, AppConfigResult, ConfigFileType -from mac2nix.scanners._utils import hash_file -from mac2nix.scanners.base import BaseScannerPlugin, register - -logger = logging.getLogger(__name__) - -_EXTENSION_MAP: dict[str, ConfigFileType] = { - ".json": ConfigFileType.JSON, - ".plist": ConfigFileType.PLIST, - ".toml": ConfigFileType.TOML, - ".yaml": ConfigFileType.YAML, - ".yml": ConfigFileType.YAML, - ".xml": ConfigFileType.XML, - ".conf": ConfigFileType.CONF, - ".cfg": ConfigFileType.CONF, - ".ini": ConfigFileType.CONF, - ".sqlite": ConfigFileType.DATABASE, - ".db": ConfigFileType.DATABASE, - ".sqlite3": ConfigFileType.DATABASE, -} - -_SKIP_DIRS = frozenset( - { - "Caches", - "Cache", - "Logs", - "logs", - "tmp", - "temp", - "__pycache__", - "node_modules", - ".git", - ".svn", - ".hg", - "DerivedData", - "Build", - ".build", - "IndexedDB", - "GPUCache", - "ShaderCache", - "Service Worker", - "Code Cache", - "CachedData", - "blob_storage", - } -) - -_MAX_FILE_SIZE = 10 * 1024 * 1024 # 10 MB -_MAX_FILES_PER_APP = 500 - - -@register("app_config") -class AppConfigScanner(BaseScannerPlugin): - @property - def name(self) -> str: - return "app_config" - - def scan(self) -> AppConfigResult: - home = Path.home() - entries: list[AppConfigEntry] = [] - - scan_dirs = [ - home / "Library" / "Application Support", - home / "Library" / "Group Containers", - ] - - # Add Containers app support dirs - containers_dir = home / "Library" / "Containers" - if containers_dir.is_dir(): - try: - for container in sorted(containers_dir.iterdir()): - app_support = container / "Data" / "Library" / "Application Support" - if app_support.is_dir() and os.access(app_support, os.R_OK): - scan_dirs.append(app_support) - except PermissionError: - logger.warning("Permission denied reading: %s", containers_dir) - - for base_dir in scan_dirs: - if not base_dir.is_dir(): - continue - try: - app_dirs = sorted(base_dir.iterdir()) - except PermissionError: - logger.warning("Permission denied reading: %s", base_dir) - continue - for app_dir in app_dirs: - if not app_dir.is_dir(): - continue - if not os.access(app_dir, os.R_OK): - logger.debug("Skipping TCC-protected directory: %s", app_dir) - continue - self._scan_app_dir(app_dir, entries) - - return AppConfigResult(entries=entries) - - def _scan_app_dir(self, app_dir: Path, entries: list[AppConfigEntry]) -> None: - app_name = app_dir.name - file_count = 0 - - try: - for dirpath, dirnames, filenames in os.walk(app_dir, followlinks=False): - # Prune skipped directories in-place - dirnames[:] = [d for d in dirnames if d not in _SKIP_DIRS] - - for filename in filenames: - if file_count >= _MAX_FILES_PER_APP: - logger.warning( - "Reached %d file cap for app directory: %s", - _MAX_FILES_PER_APP, - app_dir, - ) - return - - filepath = Path(dirpath) / filename - try: - stat = filepath.stat() - except OSError: - continue - - # Skip files over 10MB - if stat.st_size > _MAX_FILE_SIZE: - continue - - ext = filepath.suffix.lower() - file_type = _EXTENSION_MAP.get(ext, ConfigFileType.UNKNOWN) - scannable = file_type != ConfigFileType.DATABASE - - content_hash = hash_file(filepath) if scannable else None - modified_time = datetime.fromtimestamp(stat.st_mtime, tz=UTC) - - entries.append( - AppConfigEntry( - app_name=app_name, - path=filepath, - file_type=file_type, - content_hash=content_hash, - scannable=scannable, - modified_time=modified_time, - ) - ) - file_count += 1 - except PermissionError: - logger.warning("Permission denied reading app config dir: %s", app_dir) diff --git a/src/mac2nix/scanners/library_audit.py b/src/mac2nix/scanners/library_scanner.py similarity index 66% rename from src/mac2nix/scanners/library_audit.py rename to src/mac2nix/scanners/library_scanner.py index 3dcc47d..9370a90 100644 --- a/src/mac2nix/scanners/library_audit.py +++ b/src/mac2nix/scanners/library_scanner.py @@ -1,7 +1,8 @@ -"""Library audit scanner — discovers uncovered ~/Library and /Library content.""" +"""Library scanner — discovers ~/Library content, app configs, and system bundles.""" from __future__ import annotations +import contextlib import logging import os import sqlite3 @@ -10,25 +11,49 @@ from typing import Any from mac2nix.models.files import ( + AppConfigEntry, BundleEntry, + ConfigFileType, KeyBindingEntry, - LibraryAuditResult, LibraryDirEntry, LibraryFileEntry, + LibraryResult, WorkflowEntry, ) -from mac2nix.scanners._utils import hash_file, read_plist_safe, run_command +from mac2nix.scanners._utils import WALK_SKIP_DIRS, hash_file, read_plist_safe, run_command from mac2nix.scanners.base import BaseScannerPlugin, register logger = logging.getLogger(__name__) +# --- App config constants --- + +_EXTENSION_MAP: dict[str, ConfigFileType] = { + ".json": ConfigFileType.JSON, + ".plist": ConfigFileType.PLIST, + ".toml": ConfigFileType.TOML, + ".yaml": ConfigFileType.YAML, + ".yml": ConfigFileType.YAML, + ".xml": ConfigFileType.XML, + ".conf": ConfigFileType.CONF, + ".cfg": ConfigFileType.CONF, + ".ini": ConfigFileType.CONF, + ".sqlite": ConfigFileType.DATABASE, + ".db": ConfigFileType.DATABASE, + ".sqlite3": ConfigFileType.DATABASE, +} + +_MAX_FILE_SIZE = 10 * 1024 * 1024 # 10 MB +_MAX_FILES_PER_APP = 500 + +# --- Library audit constants --- + _COVERED_DIRS: dict[str, str] = { "Preferences": "preferences", - "Application Support": "app_config", + "Application Support": "library", "Fonts": "fonts", "LaunchAgents": "launch_agents", - "Containers": "preferences+app_config", - "Group Containers": "app_config", + "Containers": "preferences+library", + "Group Containers": "library", "FontCollections": "fonts", "SyncedPreferences": "preferences", } @@ -52,7 +77,8 @@ _SENSITIVE_KEY_PATTERNS = {"_KEY", "_TOKEN", "_SECRET", "_PASSWORD", "_CREDENTIAL", "_AUTH"} -_MAX_FILES_PER_DIR = 200 +_MAX_FILES_PER_DIR = 1000 +_MAX_SCRIPTS = 50 _SYSTEM_SCAN_PATTERNS: dict[str, str] = { "Extensions": "*.kext", @@ -87,32 +113,37 @@ def _redact_sensitive_keys(data: dict[str, Any]) -> None: _redact_sensitive_keys(item) -@register("library_audit") -class LibraryAuditScanner(BaseScannerPlugin): +@register("library") +class LibraryScanner(BaseScannerPlugin): @property def name(self) -> str: - return "library_audit" + return "library" + + def scan(self) -> LibraryResult: + home = Path.home() + home_lib = home / "Library" - def scan(self) -> LibraryAuditResult: - home_lib = Path.home() / "Library" + # --- Library directory audit --- directories = self._audit_directories(home_lib) uncovered_files: list[LibraryFileEntry] = [] workflows: list[WorkflowEntry] = [] + bundles: list[BundleEntry] = [] key_bindings = self._scan_key_bindings(home_lib) spelling_words, spelling_dicts = self._scan_spelling(home_lib) text_replacements = self._scan_text_replacements(home_lib) input_methods = self._scan_bundles_in_dir(home_lib / "Input Methods") - keyboard_layouts = self._scan_file_hashes(home_lib / "Keyboard Layouts", ".keylayout") - color_profiles = self._scan_file_hashes(home_lib / "ColorSync" / "Profiles", ".icc", ".icm") - compositions = self._scan_file_hashes(home_lib / "Compositions", ".qtz") + keyboard_layouts = self._list_files_by_extension(home_lib / "Keyboard Layouts", ".keylayout") + color_profiles = self._list_files_by_extension(home_lib / "ColorSync" / "Profiles", ".icc", ".icm") + compositions = self._list_files_by_extension(home_lib / "Compositions", ".qtz") scripts = self._scan_scripts(home_lib) - # Capture uncovered files and workflows from uncovered directories + # Capture uncovered files, workflows, and bundles from uncovered directories for d in directories: if d.covered_by_scanner is None and d.name not in _TRANSIENT_DIRS: - files, wf = self._capture_uncovered_dir(d.path) + files, wf, bdl = self._capture_uncovered_dir(d.path) uncovered_files.extend(files) workflows.extend(wf) + bundles.extend(bdl) # Scan workflows from known Workflows/Services dirs for wf_dir_name in ["Workflows", "Services"]: @@ -120,9 +151,15 @@ def scan(self) -> LibraryAuditResult: if wf_dir.is_dir(): workflows.extend(self._scan_workflows(wf_dir)) + # --- App config scanning --- + entries = self._scan_app_configs(home_lib) + + # --- system library bundles --- system_bundles = self._scan_system_library() - return LibraryAuditResult( + return LibraryResult( + app_configs=entries, + bundles=bundles, directories=directories, uncovered_files=uncovered_files, workflows=workflows, @@ -138,6 +175,93 @@ def scan(self) -> LibraryAuditResult: system_bundles=system_bundles, ) + # --- App config scanning --- + + def _scan_app_configs(self, home_lib: Path) -> list[AppConfigEntry]: + """Walk Application Support, Group Containers, and Containers for app configs.""" + entries: list[AppConfigEntry] = [] + + scan_dirs = [ + home_lib / "Application Support", + home_lib / "Group Containers", + ] + + # Add Containers app support dirs + containers_dir = home_lib / "Containers" + if containers_dir.is_dir(): + try: + for container in sorted(containers_dir.iterdir()): + app_support = container / "Data" / "Library" / "Application Support" + if app_support.is_dir() and os.access(app_support, os.R_OK): + scan_dirs.append(app_support) + except PermissionError: + logger.warning("Permission denied reading: %s", containers_dir) + + for base_dir in scan_dirs: + if not base_dir.is_dir(): + continue + try: + app_dirs = sorted(base_dir.iterdir()) + except PermissionError: + logger.warning("Permission denied reading: %s", base_dir) + continue + for app_dir in app_dirs: + if not app_dir.is_dir(): + continue + if not os.access(app_dir, os.R_OK): + logger.debug("Skipping TCC-protected directory: %s", app_dir) + continue + self._scan_app_dir(app_dir, entries) + + return entries + + def _scan_app_dir(self, app_dir: Path, entries: list[AppConfigEntry]) -> None: + app_name = app_dir.name + file_count = 0 + + try: + for dirpath, dirnames, filenames in os.walk(app_dir, followlinks=False): + # Prune skipped directories in-place + dirnames[:] = [d for d in dirnames if d not in WALK_SKIP_DIRS] + + for filename in filenames: + if file_count >= _MAX_FILES_PER_APP: + logger.info("Reached %d file cap for app: %s", _MAX_FILES_PER_APP, app_name) + return + + filepath = Path(dirpath) / filename + try: + stat = filepath.stat() + except OSError: + continue + + # Skip files over 10MB + if stat.st_size > _MAX_FILE_SIZE: + continue + + ext = filepath.suffix.lower() + file_type = _EXTENSION_MAP.get(ext, ConfigFileType.UNKNOWN) + scannable = file_type != ConfigFileType.DATABASE + + content_hash = hash_file(filepath) if scannable else None + modified_time = datetime.fromtimestamp(stat.st_mtime, tz=UTC) + + entries.append( + AppConfigEntry( + app_name=app_name, + path=filepath, + file_type=file_type, + content_hash=content_hash, + scannable=scannable, + modified_time=modified_time, + ) + ) + file_count += 1 + except PermissionError: + logger.warning("Permission denied reading app config dir: %s", app_dir) + + # --- Library audit scanning --- + def _audit_directories(self, lib_path: Path) -> list[LibraryDirEntry]: """Walk top-level ~/Library directories and collect metadata.""" if not lib_path.is_dir(): @@ -174,6 +298,8 @@ def _dir_stats(path: Path) -> tuple[int | None, int | None, datetime | None]: total_size = 0 newest = 0.0 for entry in path.iterdir(): + if entry.is_symlink(): + continue try: st = entry.stat() file_count += 1 @@ -186,44 +312,44 @@ def _dir_stats(path: Path) -> tuple[int | None, int | None, datetime | None]: except PermissionError: return None, None, None - def _capture_uncovered_dir(self, dir_path: Path) -> tuple[list[LibraryFileEntry], list[WorkflowEntry]]: - """Capture files from an uncovered directory (capped).""" + def _capture_uncovered_dir( + self, dir_path: Path + ) -> tuple[list[LibraryFileEntry], list[WorkflowEntry], list[BundleEntry]]: + """Capture files, workflows, and bundles from an uncovered directory.""" files: list[LibraryFileEntry] = [] workflows: list[WorkflowEntry] = [] + bundles: list[BundleEntry] = [] count = 0 try: for dirpath, dirnames, filenames in os.walk(dir_path, followlinks=False): for filename in filenames: if count >= _MAX_FILES_PER_DIR: - logger.warning( - "Reached %d file cap for directory: %s", - _MAX_FILES_PER_DIR, - dir_path, - ) - return files, workflows + logger.info("Reached %d file cap for directory: %s", _MAX_FILES_PER_DIR, dir_path) + return files, workflows, bundles filepath = Path(dirpath) / filename + count += 1 entry = self._classify_file(filepath) if entry is not None: files.append(entry) - count += 1 - # Check dirnames for workflow bundles (they're directories, not files) - # and prune them + transient/cache subdirectories in a single pass - _skip = {"Caches", "Cache", "Logs", "tmp", "__pycache__"} + # Check dirnames for workflow/bundle directories + # and prune known non-config directories in a single pass kept: list[str] = [] for dirname in dirnames: + sub_path = Path(dirpath) / dirname if dirname.endswith(".workflow"): - wf_path = Path(dirpath) / dirname - wf = self._parse_workflow(wf_path) + wf = self._parse_workflow(sub_path) if wf is not None: workflows.append(wf) - elif dirname not in _skip: + elif any(dirname.endswith(ext) for ext in _BUNDLE_EXTENSIONS): + bundles.append(self._parse_bundle(sub_path)) + elif dirname not in WALK_SKIP_DIRS: kept.append(dirname) dirnames[:] = kept except PermissionError: logger.warning("Permission denied walking: %s", dir_path) - return files, workflows + return files, workflows, bundles def _classify_file(self, filepath: Path) -> LibraryFileEntry | None: """Classify and capture a file from an uncovered directory.""" @@ -235,26 +361,29 @@ def _classify_file(self, filepath: Path) -> LibraryFileEntry | None: size = stat.st_size suffix = filepath.suffix.lower() file_type = suffix.lstrip(".") if suffix else "unknown" - content_hash = hash_file(filepath) plist_content: dict[str, Any] | None = None text_content: str | None = None - strategy = "hash_only" + content_hash: str | None = None + # Only do expensive IO on known config file types if suffix == ".plist": raw_plist = read_plist_safe(filepath) if isinstance(raw_plist, dict): plist_content = raw_plist _redact_sensitive_keys(plist_content) - strategy = "plist_capture" - elif suffix in {".txt", ".md", ".cfg", ".conf", ".ini", ".yaml", ".yml", ".json", ".xml"}: + content_hash = hash_file(filepath) + strategy = "plist_capture" if plist_content else "hash_only" + elif suffix in {".txt", ".md", ".cfg", ".conf", ".ini", ".yaml", ".yml", ".json", ".xml", ".toml"}: + content_hash = hash_file(filepath) if size < 65536: - try: + with contextlib.suppress(OSError): text_content = filepath.read_text(errors="replace") - strategy = "text_capture" - except OSError: - pass + strategy = "text_capture" if text_content else "hash_only" elif suffix in _BUNDLE_EXTENSIONS: strategy = "bundle" + else: + # Non-config file: record path + size only, no IO + strategy = "metadata_only" return LibraryFileEntry( path=filepath, @@ -355,6 +484,7 @@ def _parse_workflow(wf_path: Path) -> WorkflowEntry | None: if doc_plist.is_file(): raw = read_plist_safe(doc_plist) if isinstance(raw, dict): + _redact_sensitive_keys(raw) definition = raw return WorkflowEntry( @@ -371,33 +501,15 @@ def _scan_bundles_in_dir(self, dir_path: Path) -> list[BundleEntry]: bundles: list[BundleEntry] = [] try: for item in sorted(dir_path.iterdir()): - if not item.is_dir(): + if item.is_symlink() or not item.is_dir(): continue - info_plist = item / "Contents" / "Info.plist" - if not info_plist.is_file(): - info_plist = item / "Info.plist" - bundle_id: str | None = None - version: str | None = None - if info_plist.is_file(): - data = read_plist_safe(info_plist) - if isinstance(data, dict): - bundle_id = data.get("CFBundleIdentifier") - version = data.get("CFBundleShortVersionString") - bundles.append( - BundleEntry( - name=item.name, - path=item, - bundle_id=bundle_id, - version=version, - bundle_type=item.suffix.lstrip(".") if item.suffix else None, - ) - ) + bundles.append(self._parse_bundle(item)) except PermissionError: logger.debug("Permission denied reading: %s", dir_path) return bundles @staticmethod - def _scan_file_hashes(dir_path: Path, *extensions: str) -> list[str]: + def _list_files_by_extension(dir_path: Path, *extensions: str) -> list[str]: """Scan files in a directory and return their names.""" if not dir_path.is_dir(): return [] @@ -419,6 +531,9 @@ def _scan_scripts(self, lib_path: Path) -> list[str]: scripts: list[str] = [] try: for f in sorted(scripts_dir.iterdir()): + if len(scripts) >= _MAX_SCRIPTS: + logger.info("Reached %d script cap for: %s", _MAX_SCRIPTS, scripts_dir) + break if f.is_file(): if f.suffix == ".scpt": # Try to decompile AppleScript @@ -449,9 +564,7 @@ def _scan_system_library(self) -> list[BundleEntry]: try: for item in sorted(scan_dir.glob(pattern)): if item.is_dir(): - bundle = self._parse_system_bundle(item) - if bundle is not None: - bundles.append(bundle) + bundles.append(self._parse_bundle(item)) except PermissionError: logger.debug("Permission denied reading: %s", scan_dir) @@ -475,16 +588,14 @@ def _scan_audio_plugins(self, audio_plugins: Path) -> list[BundleEntry]: if subdir.is_dir(): for item in sorted(subdir.iterdir()): if item.is_dir() and item.suffix in _BUNDLE_EXTENSIONS: - bundle = self._parse_system_bundle(item) - if bundle is not None: - bundles.append(bundle) + bundles.append(self._parse_bundle(item)) except PermissionError: pass return bundles @staticmethod - def _parse_system_bundle(item: Path) -> BundleEntry | None: - """Parse a system-level bundle.""" + def _parse_bundle(item: Path) -> BundleEntry: + """Parse a bundle directory, reading Info.plist for metadata.""" info_plist = item / "Contents" / "Info.plist" if not info_plist.is_file(): info_plist = item / "Info.plist" diff --git a/tests/models/test_dotfile.py b/tests/models/test_dotfile.py index a861dd0..997ba5b 100644 --- a/tests/models/test_dotfile.py +++ b/tests/models/test_dotfile.py @@ -1,10 +1,9 @@ -"""Tests for dotfile, app_config, and font models.""" +"""Tests for dotfile, app config entry, and font models.""" from pathlib import Path from mac2nix.models.files import ( AppConfigEntry, - AppConfigResult, ConfigFileType, DotfileEntry, DotfileManager, @@ -92,42 +91,6 @@ def test_defaults(self): assert entry.scannable is True -class TestAppConfigResult: - def test_with_entries(self): - entries = [ - AppConfigEntry( - app_name="VSCode", - path=Path("~/Library/Application Support/Code/settings.json"), - file_type=ConfigFileType.JSON, - ), - AppConfigEntry( - app_name="Safari", - path=Path("~/Library/Safari/History.db"), - file_type=ConfigFileType.DATABASE, - scannable=False, - ), - ] - result = AppConfigResult(entries=entries) - assert len(result.entries) == 2 - - def test_json_roundtrip(self): - original = AppConfigResult( - entries=[ - AppConfigEntry( - app_name="iTerm2", - app_bundle_id="com.googlecode.iterm2", - path=Path("~/Library/Preferences/com.googlecode.iterm2.plist"), - file_type=ConfigFileType.PLIST, - content_hash="def456", - ), - ], - ) - json_str = original.model_dump_json() - restored = AppConfigResult.model_validate_json(json_str) - assert restored.entries[0].app_name == "iTerm2" - assert restored.entries[0].file_type == ConfigFileType.PLIST - - class TestFontEntry: def test_user_source(self): entry = FontEntry( diff --git a/tests/models/test_remaining.py b/tests/models/test_remaining.py index 71d0b8a..8892657 100644 --- a/tests/models/test_remaining.py +++ b/tests/models/test_remaining.py @@ -7,12 +7,14 @@ from mac2nix.models.application import BinarySource, BrewService, PathBinary from mac2nix.models.files import ( + AppConfigEntry, BundleEntry, + ConfigFileType, DotfileEntry, DotfileManager, FontCollection, - LibraryAuditResult, LibraryFileEntry, + LibraryResult, WorkflowEntry, ) from mac2nix.models.hardware import AudioConfig, AudioDevice, DisplayConfig, Monitor, NightShiftConfig @@ -371,9 +373,10 @@ def test_defaults(self) -> None: assert job.trigger_type == "calendar" -class TestLibraryAuditResult: +class TestLibraryResult: def test_all_defaults_empty(self) -> None: - result = LibraryAuditResult() + result = LibraryResult() + assert result.app_configs == [] assert result.bundles == [] assert result.directories == [] assert result.uncovered_files == [] @@ -390,16 +393,45 @@ def test_all_defaults_empty(self) -> None: assert result.system_bundles == [] def test_with_populated_fields(self) -> None: - result = LibraryAuditResult( + result = LibraryResult( + app_configs=[ + AppConfigEntry( + app_name="iTerm2", + path=Path("~/Library/Application Support/iTerm2/settings.json"), + file_type=ConfigFileType.JSON, + ), + ], bundles=[BundleEntry(name="Test.bundle", path=Path("/Library/Bundles/Test.bundle"))], spelling_words=["nix", "darwin"], keyboard_layouts=["US", "Dvorak"], text_replacements=[{"shortcut": "omw", "phrase": "On my way!"}], ) + assert len(result.app_configs) == 1 + assert result.app_configs[0].app_name == "iTerm2" assert len(result.bundles) == 1 assert result.spelling_words == ["nix", "darwin"] assert len(result.text_replacements) == 1 + def test_json_roundtrip_with_app_configs(self) -> None: + original = LibraryResult( + app_configs=[ + AppConfigEntry( + app_name="iTerm2", + app_bundle_id="com.googlecode.iterm2", + path=Path("~/Library/Application Support/iTerm2/settings.json"), + file_type=ConfigFileType.JSON, + content_hash="abc123", + ), + ], + spelling_words=["nix"], + ) + json_str = original.model_dump_json() + restored = LibraryResult.model_validate_json(json_str) + assert len(restored.app_configs) == 1 + assert restored.app_configs[0].app_name == "iTerm2" + assert restored.app_configs[0].file_type == ConfigFileType.JSON + assert restored.spelling_words == ["nix"] + class TestBundleEntry: def test_construction(self) -> None: diff --git a/tests/models/test_system_state.py b/tests/models/test_system_state.py index 7e9443d..d1f0be9 100644 --- a/tests/models/test_system_state.py +++ b/tests/models/test_system_state.py @@ -7,7 +7,7 @@ from mac2nix.models import ( BrewFormula, HomebrewState, - LibraryAuditResult, + LibraryResult, PreferencesDomain, PreferencesResult, SystemState, @@ -104,23 +104,23 @@ def test_with_domain_data(self): assert restored.homebrew is not None assert len(restored.homebrew.formulae) == 2 - def test_library_audit_field(self): + def test_library_field(self): state = SystemState( hostname="test-mac", macos_version="15.3", architecture="arm64", - library_audit=LibraryAuditResult( + library=LibraryResult( spelling_words=["nix", "darwin"], keyboard_layouts=["US"], ), ) - assert state.library_audit is not None - assert state.library_audit.spelling_words == ["nix", "darwin"] + assert state.library is not None + assert state.library.spelling_words == ["nix", "darwin"] - def test_library_audit_default_none(self): + def test_library_default_none(self): state = SystemState( hostname="test-mac", macos_version="15.3", architecture="arm64", ) - assert state.library_audit is None + assert state.library is None diff --git a/tests/scanners/test_app_config.py b/tests/scanners/test_app_config.py deleted file mode 100644 index 21206c9..0000000 --- a/tests/scanners/test_app_config.py +++ /dev/null @@ -1,324 +0,0 @@ -"""Tests for app config scanner.""" - -from pathlib import Path -from unittest.mock import patch - -from mac2nix.models.files import AppConfigResult, ConfigFileType -from mac2nix.scanners.app_config import AppConfigScanner - - -def _setup_app_support(tmp_path: Path) -> Path: - app_support = tmp_path / "Library" / "Application Support" - app_support.mkdir(parents=True) - return app_support - - -class TestAppConfigScanner: - def test_name_property(self) -> None: - assert AppConfigScanner().name == "app_config" - - def test_json_config(self, tmp_path: Path) -> None: - app_support = _setup_app_support(tmp_path) - app_dir = app_support / "MyApp" - app_dir.mkdir() - (app_dir / "settings.json").write_text('{"key": "value"}') - - with patch("mac2nix.scanners.app_config.Path.home", return_value=tmp_path): - result = AppConfigScanner().scan() - - assert isinstance(result, AppConfigResult) - assert len(result.entries) == 1 - assert result.entries[0].file_type == ConfigFileType.JSON - assert result.entries[0].app_name == "MyApp" - assert result.entries[0].scannable is True - - def test_plist_config(self, tmp_path: Path) -> None: - app_support = _setup_app_support(tmp_path) - app_dir = app_support / "SomeApp" - app_dir.mkdir() - (app_dir / "config.plist").write_text("") - - with patch("mac2nix.scanners.app_config.Path.home", return_value=tmp_path): - result = AppConfigScanner().scan() - - assert isinstance(result, AppConfigResult) - assert result.entries[0].file_type == ConfigFileType.PLIST - assert result.entries[0].scannable is True - - def test_database_not_scannable(self, tmp_path: Path) -> None: - app_support = _setup_app_support(tmp_path) - app_dir = app_support / "DBApp" - app_dir.mkdir() - (app_dir / "data.sqlite").write_bytes(b"SQLite format 3\x00") - - with patch("mac2nix.scanners.app_config.Path.home", return_value=tmp_path): - result = AppConfigScanner().scan() - - assert isinstance(result, AppConfigResult) - assert result.entries[0].file_type == ConfigFileType.DATABASE - assert result.entries[0].scannable is False - - def test_unknown_extension(self, tmp_path: Path) -> None: - app_support = _setup_app_support(tmp_path) - app_dir = app_support / "OtherApp" - app_dir.mkdir() - (app_dir / "data.xyz").write_text("unknown format") - - with patch("mac2nix.scanners.app_config.Path.home", return_value=tmp_path): - result = AppConfigScanner().scan() - - assert isinstance(result, AppConfigResult) - assert result.entries[0].file_type == ConfigFileType.UNKNOWN - - def test_conf_extension(self, tmp_path: Path) -> None: - app_support = _setup_app_support(tmp_path) - app_dir = app_support / "ConfApp" - app_dir.mkdir() - (app_dir / "app.conf").write_text("[section]\nkey=value") - - with patch("mac2nix.scanners.app_config.Path.home", return_value=tmp_path): - result = AppConfigScanner().scan() - - assert isinstance(result, AppConfigResult) - assert result.entries[0].file_type == ConfigFileType.CONF - - def test_content_hash_computed(self, tmp_path: Path) -> None: - app_support = _setup_app_support(tmp_path) - app_dir = app_support / "HashApp" - app_dir.mkdir() - (app_dir / "config.json").write_text('{"a": 1}') - - with patch("mac2nix.scanners.app_config.Path.home", return_value=tmp_path): - result = AppConfigScanner().scan() - - assert isinstance(result, AppConfigResult) - assert result.entries[0].content_hash is not None - assert len(result.entries[0].content_hash) == 16 - - def test_empty_app_support(self, tmp_path: Path) -> None: - _setup_app_support(tmp_path) - - with patch("mac2nix.scanners.app_config.Path.home", return_value=tmp_path): - result = AppConfigScanner().scan() - - assert isinstance(result, AppConfigResult) - assert result.entries == [] - - def test_database_hash_skipped(self, tmp_path: Path) -> None: - app_support = _setup_app_support(tmp_path) - app_dir = app_support / "DBApp" - app_dir.mkdir() - (app_dir / "data.db").write_bytes(b"SQLite format 3\x00" + b"\x00" * 100) - - with patch("mac2nix.scanners.app_config.Path.home", return_value=tmp_path): - result = AppConfigScanner().scan() - - assert isinstance(result, AppConfigResult) - assert len(result.entries) == 1 - assert result.entries[0].file_type == ConfigFileType.DATABASE - assert result.entries[0].scannable is False - assert result.entries[0].content_hash is None - - def test_group_containers(self, tmp_path: Path) -> None: - group_containers = tmp_path / "Library" / "Group Containers" - group_containers.mkdir(parents=True) - app_dir = group_containers / "group.com.example.app" - app_dir.mkdir() - (app_dir / "settings.json").write_text('{"key": "value"}') - - with patch("mac2nix.scanners.app_config.Path.home", return_value=tmp_path): - result = AppConfigScanner().scan() - - assert isinstance(result, AppConfigResult) - assert len(result.entries) == 1 - assert result.entries[0].app_name == "group.com.example.app" - assert result.entries[0].file_type == ConfigFileType.JSON - - def test_yaml_extension(self, tmp_path: Path) -> None: - app_support = _setup_app_support(tmp_path) - app_dir = app_support / "YamlApp" - app_dir.mkdir() - (app_dir / "config.yaml").write_text("key: value") - (app_dir / "settings.yml").write_text("other: true") - - with patch("mac2nix.scanners.app_config.Path.home", return_value=tmp_path): - result = AppConfigScanner().scan() - - assert isinstance(result, AppConfigResult) - assert len(result.entries) == 2 - assert all(e.file_type == ConfigFileType.YAML for e in result.entries) - - def test_xml_extension(self, tmp_path: Path) -> None: - app_support = _setup_app_support(tmp_path) - app_dir = app_support / "XmlApp" - app_dir.mkdir() - (app_dir / "config.xml").write_text("") - - with patch("mac2nix.scanners.app_config.Path.home", return_value=tmp_path): - result = AppConfigScanner().scan() - - assert isinstance(result, AppConfigResult) - assert len(result.entries) == 1 - assert result.entries[0].file_type == ConfigFileType.XML - - def test_returns_app_config_result(self, tmp_path: Path) -> None: - with patch("mac2nix.scanners.app_config.Path.home", return_value=tmp_path): - result = AppConfigScanner().scan() - - assert isinstance(result, AppConfigResult) - - def test_containers_app_support(self, tmp_path: Path) -> None: - _setup_app_support(tmp_path) - container = tmp_path / "Library" / "Containers" / "com.test.app" / "Data" / "Library" / "Application Support" - container.mkdir(parents=True) - app_dir = container / "TestApp" - app_dir.mkdir() - (app_dir / "config.json").write_text('{"key": "value"}') - - with patch("mac2nix.scanners.app_config.Path.home", return_value=tmp_path): - result = AppConfigScanner().scan() - - assert isinstance(result, AppConfigResult) - assert len(result.entries) == 1 - assert result.entries[0].app_name == "TestApp" - - def test_skip_dirs_pruned(self, tmp_path: Path) -> None: - app_support = _setup_app_support(tmp_path) - app_dir = app_support / "MyApp" - app_dir.mkdir() - (app_dir / "settings.json").write_text("{}") - cache_dir = app_dir / "Caches" - cache_dir.mkdir() - (cache_dir / "cached.json").write_text("{}") - git_dir = app_dir / ".git" - git_dir.mkdir() - (git_dir / "config").write_text("[core]") - - with patch("mac2nix.scanners.app_config.Path.home", return_value=tmp_path): - result = AppConfigScanner().scan() - - assert isinstance(result, AppConfigResult) - paths = {str(e.path) for e in result.entries} - assert any("settings.json" in p for p in paths) - assert not any("Caches" in p for p in paths) - assert not any(".git" in p for p in paths) - - def test_large_file_skipped(self, tmp_path: Path) -> None: - app_support = _setup_app_support(tmp_path) - app_dir = app_support / "BigApp" - app_dir.mkdir() - (app_dir / "small.json").write_text("{}") - big_file = app_dir / "huge.json" - # Write just over 10MB - big_file.write_bytes(b"x" * (10 * 1024 * 1024 + 1)) - - with patch("mac2nix.scanners.app_config.Path.home", return_value=tmp_path): - result = AppConfigScanner().scan() - - assert isinstance(result, AppConfigResult) - assert len(result.entries) == 1 - assert result.entries[0].path.name == "small.json" - - def test_max_files_per_app_cap(self, tmp_path: Path) -> None: - app_support = _setup_app_support(tmp_path) - app_dir = app_support / "ManyFilesApp" - app_dir.mkdir() - # Create 501 files to hit the cap (500) - for i in range(501): - (app_dir / f"file{i:04d}.json").write_text("{}") - - with patch("mac2nix.scanners.app_config.Path.home", return_value=tmp_path): - result = AppConfigScanner().scan() - - assert isinstance(result, AppConfigResult) - app_entries = [e for e in result.entries if e.app_name == "ManyFilesApp"] - assert len(app_entries) == 500 - - def test_toml_extension(self, tmp_path: Path) -> None: - app_support = _setup_app_support(tmp_path) - app_dir = app_support / "TomlApp" - app_dir.mkdir() - (app_dir / "config.toml").write_text("[section]\nkey = 'value'") - - with patch("mac2nix.scanners.app_config.Path.home", return_value=tmp_path): - result = AppConfigScanner().scan() - - assert isinstance(result, AppConfigResult) - assert result.entries[0].file_type == ConfigFileType.TOML - - def test_ini_extension(self, tmp_path: Path) -> None: - app_support = _setup_app_support(tmp_path) - app_dir = app_support / "IniApp" - app_dir.mkdir() - (app_dir / "config.ini").write_text("[section]\nkey=value") - - with patch("mac2nix.scanners.app_config.Path.home", return_value=tmp_path): - result = AppConfigScanner().scan() - - assert isinstance(result, AppConfigResult) - assert result.entries[0].file_type == ConfigFileType.CONF - - def test_cfg_extension(self, tmp_path: Path) -> None: - app_support = _setup_app_support(tmp_path) - app_dir = app_support / "CfgApp" - app_dir.mkdir() - (app_dir / "app.cfg").write_text("key=value") - - with patch("mac2nix.scanners.app_config.Path.home", return_value=tmp_path): - result = AppConfigScanner().scan() - - assert isinstance(result, AppConfigResult) - assert result.entries[0].file_type == ConfigFileType.CONF - - def test_sqlite3_extension(self, tmp_path: Path) -> None: - app_support = _setup_app_support(tmp_path) - app_dir = app_support / "Sqlite3App" - app_dir.mkdir() - (app_dir / "data.sqlite3").write_bytes(b"SQLite format 3\x00") - - with patch("mac2nix.scanners.app_config.Path.home", return_value=tmp_path): - result = AppConfigScanner().scan() - - assert isinstance(result, AppConfigResult) - assert result.entries[0].file_type == ConfigFileType.DATABASE - assert result.entries[0].scannable is False - - def test_nested_config_files(self, tmp_path: Path) -> None: - app_support = _setup_app_support(tmp_path) - app_dir = app_support / "Chrome" - profile = app_dir / "Default" - profile.mkdir(parents=True) - (profile / "Preferences").write_text('{"key": "value"}') - (app_dir / "Local State").write_text('{"other": true}') - - with patch("mac2nix.scanners.app_config.Path.home", return_value=tmp_path): - result = AppConfigScanner().scan() - - assert isinstance(result, AppConfigResult) - assert len(result.entries) == 2 - - def test_modified_time_set(self, tmp_path: Path) -> None: - app_support = _setup_app_support(tmp_path) - app_dir = app_support / "TimeApp" - app_dir.mkdir() - (app_dir / "config.json").write_text("{}") - - with patch("mac2nix.scanners.app_config.Path.home", return_value=tmp_path): - result = AppConfigScanner().scan() - - assert isinstance(result, AppConfigResult) - assert result.entries[0].modified_time is not None - - def test_permission_denied_containers(self, tmp_path: Path) -> None: - _setup_app_support(tmp_path) - containers = tmp_path / "Library" / "Containers" - containers.mkdir(parents=True) - - with ( - patch("mac2nix.scanners.app_config.Path.home", return_value=tmp_path), - patch("pathlib.Path.iterdir", side_effect=PermissionError("denied")), - ): - # Should not crash — gracefully handles permission error - result = AppConfigScanner().scan() - - assert isinstance(result, AppConfigResult) diff --git a/tests/scanners/test_library_audit.py b/tests/scanners/test_library_audit.py deleted file mode 100644 index 168b467..0000000 --- a/tests/scanners/test_library_audit.py +++ /dev/null @@ -1,512 +0,0 @@ -"""Tests for library audit scanner.""" - -import sqlite3 -from pathlib import Path -from unittest.mock import MagicMock, patch - -from mac2nix.models.files import LibraryAuditResult -from mac2nix.scanners.library_audit import ( - _COVERED_DIRS, - _TRANSIENT_DIRS, - LibraryAuditScanner, - _redact_sensitive_keys, -) - - -class TestLibraryAuditScanner: - def test_name_property(self) -> None: - assert LibraryAuditScanner().name == "library_audit" - - def test_returns_library_audit_result(self, tmp_path: Path) -> None: - lib = tmp_path / "Library" - lib.mkdir() - with ( - patch("mac2nix.scanners.library_audit.Path.home", return_value=tmp_path), - patch.object(LibraryAuditScanner, "_scan_system_library", return_value=[]), - ): - result = LibraryAuditScanner().scan() - - assert isinstance(result, LibraryAuditResult) - - def test_audit_directories_covered(self, tmp_path: Path) -> None: - lib = tmp_path / "Library" - lib.mkdir() - prefs = lib / "Preferences" - prefs.mkdir() - (prefs / "com.apple.finder.plist").write_bytes(b"data") - - with ( - patch("mac2nix.scanners.library_audit.Path.home", return_value=tmp_path), - patch.object(LibraryAuditScanner, "_scan_system_library", return_value=[]), - ): - result = LibraryAuditScanner().scan() - - pref_dir = next(d for d in result.directories if d.name == "Preferences") - assert pref_dir.covered_by_scanner == "preferences" - assert pref_dir.has_user_content is False - - def test_audit_directories_uncovered(self, tmp_path: Path) -> None: - lib = tmp_path / "Library" - lib.mkdir() - custom = lib / "CustomDir" - custom.mkdir() - (custom / "file.txt").write_text("hello") - - with ( - patch("mac2nix.scanners.library_audit.Path.home", return_value=tmp_path), - patch.object(LibraryAuditScanner, "_scan_system_library", return_value=[]), - ): - result = LibraryAuditScanner().scan() - - custom_dir = next(d for d in result.directories if d.name == "CustomDir") - assert custom_dir.covered_by_scanner is None - assert custom_dir.has_user_content is True - - def test_audit_directories_transient_not_user_content(self, tmp_path: Path) -> None: - lib = tmp_path / "Library" - lib.mkdir() - caches = lib / "Caches" - caches.mkdir() - (caches / "something.cache").write_bytes(b"data") - - with ( - patch("mac2nix.scanners.library_audit.Path.home", return_value=tmp_path), - patch.object(LibraryAuditScanner, "_scan_system_library", return_value=[]), - ): - result = LibraryAuditScanner().scan() - - cache_dir = next(d for d in result.directories if d.name == "Caches") - assert cache_dir.covered_by_scanner is None - assert cache_dir.has_user_content is False - - def test_dir_stats(self, tmp_path: Path) -> None: - (tmp_path / "file1.txt").write_text("hello") - (tmp_path / "file2.txt").write_text("world!") - - file_count, total_size, newest_mod = LibraryAuditScanner._dir_stats(tmp_path) - - assert file_count == 2 - assert total_size is not None - assert total_size > 0 - assert newest_mod is not None - - def test_dir_stats_permission_denied(self, tmp_path: Path) -> None: - protected = tmp_path / "protected" - protected.mkdir() - - with patch.object(Path, "iterdir", side_effect=PermissionError("denied")): - file_count, total_size, newest_mod = LibraryAuditScanner._dir_stats(protected) - - assert file_count is None - assert total_size is None - assert newest_mod is None - - def test_classify_file_plist(self, tmp_path: Path) -> None: - plist_file = tmp_path / "test.plist" - plist_file.write_bytes(b"data") - - with ( - patch("mac2nix.scanners.library_audit.read_plist_safe", return_value={"key": "value"}), - patch("mac2nix.scanners.library_audit.hash_file", return_value="abc123"), - ): - entry = LibraryAuditScanner()._classify_file(plist_file) - - assert entry is not None - assert entry.file_type == "plist" - assert entry.migration_strategy == "plist_capture" - assert entry.plist_content == {"key": "value"} - - def test_classify_file_text(self, tmp_path: Path) -> None: - txt_file = tmp_path / "readme.txt" - txt_file.write_text("some text content") - - with patch("mac2nix.scanners.library_audit.hash_file", return_value="def456"): - entry = LibraryAuditScanner()._classify_file(txt_file) - - assert entry is not None - assert entry.file_type == "txt" - assert entry.migration_strategy == "text_capture" - assert entry.text_content == "some text content" - - def test_classify_file_text_too_large(self, tmp_path: Path) -> None: - large_file = tmp_path / "big.txt" - large_file.write_text("x" * 70000) - - with patch("mac2nix.scanners.library_audit.hash_file", return_value="abc"): - entry = LibraryAuditScanner()._classify_file(large_file) - - assert entry is not None - assert entry.migration_strategy == "hash_only" - assert entry.text_content is None - - def test_classify_file_bundle_extension(self, tmp_path: Path) -> None: - bundle = tmp_path / "plugin.component" - bundle.write_bytes(b"data") - - with patch("mac2nix.scanners.library_audit.hash_file", return_value="hash"): - entry = LibraryAuditScanner()._classify_file(bundle) - - assert entry is not None - assert entry.migration_strategy == "bundle" - - def test_classify_file_unknown(self, tmp_path: Path) -> None: - binary_file = tmp_path / "data.bin" - binary_file.write_bytes(b"\x00\x01\x02") - - with patch("mac2nix.scanners.library_audit.hash_file", return_value="hash"): - entry = LibraryAuditScanner()._classify_file(binary_file) - - assert entry is not None - assert entry.migration_strategy == "hash_only" - - def test_key_bindings(self, tmp_path: Path) -> None: - lib = tmp_path / "Library" - kb_dir = lib / "KeyBindings" - kb_dir.mkdir(parents=True) - kb_file = kb_dir / "DefaultKeyBinding.dict" - kb_file.write_bytes(b"dummy") - - with patch( - "mac2nix.scanners.library_audit.read_plist_safe", - return_value={"^w": "deleteWordBackward:", "~f": "moveWordForward:"}, - ): - result = LibraryAuditScanner()._scan_key_bindings(lib) - - assert len(result) == 2 - keys = {e.key for e in result} - assert "^w" in keys - assert "~f" in keys - - def test_key_bindings_no_file(self, tmp_path: Path) -> None: - lib = tmp_path / "Library" - lib.mkdir() - result = LibraryAuditScanner()._scan_key_bindings(lib) - assert result == [] - - def test_spelling_words(self, tmp_path: Path) -> None: - lib = tmp_path / "Library" - spelling = lib / "Spelling" - spelling.mkdir(parents=True) - local_dict = spelling / "LocalDictionary" - local_dict.write_text("nix\ndarwin\nhomebrew\n") - (spelling / "en_US").write_text("") - - words, dicts = LibraryAuditScanner()._scan_spelling(lib) - - assert words == ["nix", "darwin", "homebrew"] - assert "en_US" in dicts - - def test_spelling_no_dir(self, tmp_path: Path) -> None: - lib = tmp_path / "Library" - lib.mkdir() - words, dicts = LibraryAuditScanner()._scan_spelling(lib) - assert words == [] - assert dicts == [] - - def test_scan_workflows(self, tmp_path: Path) -> None: - wf_dir = tmp_path / "Services" - wf = wf_dir / "MyService.workflow" - contents = wf / "Contents" - contents.mkdir(parents=True) - info = contents / "Info.plist" - info.write_bytes(b"dummy") - - with patch( - "mac2nix.scanners.library_audit.read_plist_safe", - return_value={"CFBundleIdentifier": "com.example.myservice"}, - ): - result = LibraryAuditScanner()._scan_workflows(wf_dir) - - assert len(result) == 1 - assert result[0].name == "MyService" - assert result[0].identifier == "com.example.myservice" - - def test_scan_workflows_no_dir(self, tmp_path: Path) -> None: - result = LibraryAuditScanner()._scan_workflows(tmp_path / "nonexistent") - assert result == [] - - def test_scan_bundles_in_dir(self, tmp_path: Path) -> None: - im_dir = tmp_path / "Input Methods" - bundle = im_dir / "MyInput.app" - contents = bundle / "Contents" - contents.mkdir(parents=True) - info = contents / "Info.plist" - info.write_bytes(b"dummy") - - with patch( - "mac2nix.scanners.library_audit.read_plist_safe", - return_value={ - "CFBundleIdentifier": "com.example.input", - "CFBundleShortVersionString": "1.0", - }, - ): - result = LibraryAuditScanner()._scan_bundles_in_dir(im_dir) - - assert len(result) == 1 - assert result[0].bundle_id == "com.example.input" - assert result[0].version == "1.0" - - def test_scan_bundles_no_dir(self) -> None: - result = LibraryAuditScanner()._scan_bundles_in_dir(Path("/nonexistent")) - assert result == [] - - def test_scan_file_hashes(self, tmp_path: Path) -> None: - (tmp_path / "layout1.keylayout").write_text("xml") - (tmp_path / "layout2.keylayout").write_text("xml") - (tmp_path / "other.txt").write_text("ignored") - - result = LibraryAuditScanner._scan_file_hashes(tmp_path, ".keylayout") - - assert len(result) == 2 - assert "layout1.keylayout" in result - assert "layout2.keylayout" in result - - def test_scan_file_hashes_no_dir(self) -> None: - result = LibraryAuditScanner._scan_file_hashes(Path("/nonexistent"), ".icc") - assert result == [] - - def test_scan_scripts_with_applescript(self, tmp_path: Path) -> None: - lib = tmp_path / "Library" - scripts = lib / "Scripts" - scripts.mkdir(parents=True) - scpt = scripts / "hello.scpt" - scpt.write_bytes(b"compiled") - sh = scripts / "cleanup.sh" - sh.write_text("#!/bin/bash\necho cleanup") - - with patch( - "mac2nix.scanners.library_audit.run_command", - return_value=MagicMock(returncode=0, stdout='display dialog "Hello"'), - ): - result = LibraryAuditScanner()._scan_scripts(lib) - - assert len(result) == 2 - script_names = [s.split(":")[0] if ":" in s else s for s in result] - assert "cleanup.sh" in script_names - assert "hello.scpt" in script_names - - def test_scan_scripts_applescript_decompile_fails(self, tmp_path: Path) -> None: - lib = tmp_path / "Library" - scripts = lib / "Scripts" - scripts.mkdir(parents=True) - scpt = scripts / "broken.scpt" - scpt.write_bytes(b"compiled") - - with patch("mac2nix.scanners.library_audit.run_command", return_value=None): - result = LibraryAuditScanner()._scan_scripts(lib) - - assert result == ["broken.scpt"] - - def test_scan_scripts_no_dir(self, tmp_path: Path) -> None: - lib = tmp_path / "Library" - lib.mkdir() - result = LibraryAuditScanner()._scan_scripts(lib) - assert result == [] - - def test_text_replacements(self, tmp_path: Path) -> None: - lib = tmp_path / "Library" - ks_dir = lib / "KeyboardServices" - ks_dir.mkdir(parents=True) - db_path = ks_dir / "TextReplacements.db" - db_path.write_bytes(b"dummy") - - mock_rows = [("omw", "On my way!"), ("addr", "123 Main St")] - mock_cursor = type("MockCursor", (), {"fetchall": lambda _self: mock_rows})() - mock_conn = type( - "MockConn", - (), - { - "execute": lambda _self, _query: mock_cursor, - "close": lambda _self: None, - }, - )() - - with patch("mac2nix.scanners.library_audit.sqlite3.connect", return_value=mock_conn): - result = LibraryAuditScanner()._scan_text_replacements(lib) - - assert len(result) == 2 - assert result[0] == {"shortcut": "omw", "phrase": "On my way!"} - - def test_text_replacements_no_db(self, tmp_path: Path) -> None: - lib = tmp_path / "Library" - lib.mkdir() - result = LibraryAuditScanner()._scan_text_replacements(lib) - assert result == [] - - def test_capture_uncovered_dir_capped(self, tmp_path: Path) -> None: - for i in range(210): - (tmp_path / f"file{i:03d}.txt").write_text(f"content {i}") - - with ( - patch("mac2nix.scanners.library_audit.hash_file", return_value="hash"), - patch("mac2nix.scanners.library_audit.read_plist_safe", return_value=None), - ): - files, _workflows = LibraryAuditScanner()._capture_uncovered_dir(tmp_path) - - assert len(files) <= 200 - - def test_uncovered_files_collected(self, tmp_path: Path) -> None: - lib = tmp_path / "Library" - lib.mkdir() - custom = lib / "CustomStuff" - custom.mkdir() - (custom / "config.json").write_text('{"key": "value"}') - - with ( - patch("mac2nix.scanners.library_audit.Path.home", return_value=tmp_path), - patch.object(LibraryAuditScanner, "_scan_system_library", return_value=[]), - patch("mac2nix.scanners.library_audit.hash_file", return_value="hash"), - patch("mac2nix.scanners.library_audit.read_plist_safe", return_value=None), - ): - result = LibraryAuditScanner().scan() - - assert len(result.uncovered_files) >= 1 - json_file = next(f for f in result.uncovered_files if "config.json" in str(f.path)) - assert json_file.file_type == "json" - - def test_workflows_from_services_dir(self, tmp_path: Path) -> None: - lib = tmp_path / "Library" - lib.mkdir() - services = lib / "Services" - wf = services / "Convert.workflow" - contents = wf / "Contents" - contents.mkdir(parents=True) - (contents / "Info.plist").write_bytes(b"dummy") - - with ( - patch("mac2nix.scanners.library_audit.Path.home", return_value=tmp_path), - patch.object(LibraryAuditScanner, "_scan_system_library", return_value=[]), - patch( - "mac2nix.scanners.library_audit.read_plist_safe", - return_value={"CFBundleIdentifier": "com.example.convert"}, - ), - ): - result = LibraryAuditScanner().scan() - - assert any(w.name == "Convert" for w in result.workflows) - - def test_empty_library(self, tmp_path: Path) -> None: - lib = tmp_path / "Library" - lib.mkdir() - - with ( - patch("mac2nix.scanners.library_audit.Path.home", return_value=tmp_path), - patch.object(LibraryAuditScanner, "_scan_system_library", return_value=[]), - ): - result = LibraryAuditScanner().scan() - - assert isinstance(result, LibraryAuditResult) - assert result.directories == [] - assert result.uncovered_files == [] - - def test_no_library_dir(self, tmp_path: Path) -> None: - with ( - patch("mac2nix.scanners.library_audit.Path.home", return_value=tmp_path), - patch.object(LibraryAuditScanner, "_scan_system_library", return_value=[]), - ): - result = LibraryAuditScanner().scan() - - assert isinstance(result, LibraryAuditResult) - assert result.directories == [] - - -class TestRedactSensitiveKeys: - def test_redacts_api_key(self) -> None: - data = {"API_KEY": "secret123", "name": "test"} - _redact_sensitive_keys(data) - - redacted = "***REDACTED***" - assert data["API_KEY"] == redacted - assert data["name"] == "test" - - def test_redacts_nested_dict(self) -> None: - data = {"config": {"DB_PASSWORD": "secret", "host": "localhost"}} - _redact_sensitive_keys(data) - - redacted = "***REDACTED***" - assert data["config"]["DB_PASSWORD"] == redacted - assert data["config"]["host"] == "localhost" - - def test_redacts_in_list(self) -> None: - data = {"items": [{"ACCESS_TOKEN": "token123"}, {"normal": "value"}]} - _redact_sensitive_keys(data) - - redacted = "***REDACTED***" - assert data["items"][0]["ACCESS_TOKEN"] == redacted - assert data["items"][1]["normal"] == "value" - - def test_case_insensitive_match(self) -> None: - data = {"my_auth_header": "Bearer xyz"} - _redact_sensitive_keys(data) - - redacted = "***REDACTED***" - assert data["my_auth_header"] == redacted - - def test_no_sensitive_keys(self) -> None: - data = {"name": "test", "count": 42} - _redact_sensitive_keys(data) - assert data == {"name": "test", "count": 42} - - -class TestCoveredDirsMapping: - def test_known_covered_dirs(self) -> None: - assert _COVERED_DIRS["Preferences"] == "preferences" - assert _COVERED_DIRS["Application Support"] == "app_config" - assert _COVERED_DIRS["LaunchAgents"] == "launch_agents" - assert _COVERED_DIRS["Fonts"] == "fonts" - - def test_transient_dirs(self) -> None: - assert "Caches" in _TRANSIENT_DIRS - assert "Logs" in _TRANSIENT_DIRS - assert "Saved Application State" in _TRANSIENT_DIRS - - -class TestScanAudioPlugins: - def test_finds_component_bundles(self, tmp_path: Path) -> None: - components = tmp_path / "Components" - components.mkdir() - plugin = components / "MyPlugin.component" - plugin.mkdir() - info = plugin / "Contents" / "Info.plist" - info.parent.mkdir() - info.write_bytes(b"dummy") - - with patch( - "mac2nix.scanners.library_audit.read_plist_safe", - return_value={"CFBundleIdentifier": "com.test.plugin", "CFBundleShortVersionString": "1.0"}, - ): - result = LibraryAuditScanner()._scan_audio_plugins(tmp_path) - - assert len(result) == 1 - assert result[0].name == "MyPlugin.component" - assert result[0].bundle_id == "com.test.plugin" - - def test_skips_non_bundle_dirs(self, tmp_path: Path) -> None: - components = tmp_path / "Components" - components.mkdir() - regular_dir = components / "NotABundle" - regular_dir.mkdir() - - result = LibraryAuditScanner()._scan_audio_plugins(tmp_path) - assert result == [] - - def test_empty_audio_dir(self, tmp_path: Path) -> None: - result = LibraryAuditScanner()._scan_audio_plugins(tmp_path / "nonexistent") - assert result == [] - - -class TestTextReplacementsCorrupted: - def test_corrupted_db_returns_empty(self, tmp_path: Path) -> None: - lib = tmp_path / "Library" - ks_dir = lib / "KeyboardServices" - ks_dir.mkdir(parents=True) - db_path = ks_dir / "TextReplacements.db" - db_path.write_bytes(b"not a sqlite database") - - with patch( - "mac2nix.scanners.library_audit.sqlite3.connect", - side_effect=sqlite3.OperationalError("not a database"), - ): - result = LibraryAuditScanner()._scan_text_replacements(lib) - - assert result == [] diff --git a/tests/scanners/test_library_scanner.py b/tests/scanners/test_library_scanner.py new file mode 100644 index 0000000..844be6e --- /dev/null +++ b/tests/scanners/test_library_scanner.py @@ -0,0 +1,1138 @@ +"""Tests for library scanner.""" + +import sqlite3 +from pathlib import Path +from unittest.mock import MagicMock, patch + +from mac2nix.models.files import ConfigFileType, LibraryResult +from mac2nix.scanners.library_scanner import ( + _COVERED_DIRS, + _TRANSIENT_DIRS, + LibraryScanner, + _redact_sensitive_keys, +) + + +def _setup_app_support(tmp_path: Path) -> Path: + app_support = tmp_path / "Library" / "Application Support" + app_support.mkdir(parents=True) + return app_support + + +class TestLibraryScanner: + def test_name_property(self) -> None: + assert LibraryScanner().name == "library" + + # --- App config tests (via scan()) --- + + def test_json_config(self, tmp_path: Path) -> None: + app_support = _setup_app_support(tmp_path) + app_dir = app_support / "MyApp" + app_dir.mkdir() + (app_dir / "settings.json").write_text('{"key": "value"}') + + with ( + patch("mac2nix.scanners.library_scanner.Path.home", return_value=tmp_path), + patch.object(LibraryScanner, "_scan_system_library", return_value=[]), + ): + result = LibraryScanner().scan() + + assert isinstance(result, LibraryResult) + assert len(result.app_configs) == 1 + assert result.app_configs[0].file_type == ConfigFileType.JSON + assert result.app_configs[0].app_name == "MyApp" + assert result.app_configs[0].scannable is True + + def test_plist_config(self, tmp_path: Path) -> None: + app_support = _setup_app_support(tmp_path) + app_dir = app_support / "SomeApp" + app_dir.mkdir() + (app_dir / "config.plist").write_text("") + + with ( + patch("mac2nix.scanners.library_scanner.Path.home", return_value=tmp_path), + patch.object(LibraryScanner, "_scan_system_library", return_value=[]), + ): + result = LibraryScanner().scan() + + assert isinstance(result, LibraryResult) + assert result.app_configs[0].file_type == ConfigFileType.PLIST + assert result.app_configs[0].scannable is True + + def test_database_not_scannable(self, tmp_path: Path) -> None: + app_support = _setup_app_support(tmp_path) + app_dir = app_support / "DBApp" + app_dir.mkdir() + (app_dir / "data.sqlite").write_bytes(b"SQLite format 3\x00") + + with ( + patch("mac2nix.scanners.library_scanner.Path.home", return_value=tmp_path), + patch.object(LibraryScanner, "_scan_system_library", return_value=[]), + ): + result = LibraryScanner().scan() + + assert isinstance(result, LibraryResult) + assert result.app_configs[0].file_type == ConfigFileType.DATABASE + assert result.app_configs[0].scannable is False + + def test_unknown_extension(self, tmp_path: Path) -> None: + app_support = _setup_app_support(tmp_path) + app_dir = app_support / "OtherApp" + app_dir.mkdir() + (app_dir / "data.xyz").write_text("unknown format") + + with ( + patch("mac2nix.scanners.library_scanner.Path.home", return_value=tmp_path), + patch.object(LibraryScanner, "_scan_system_library", return_value=[]), + ): + result = LibraryScanner().scan() + + assert isinstance(result, LibraryResult) + assert result.app_configs[0].file_type == ConfigFileType.UNKNOWN + + def test_conf_extension(self, tmp_path: Path) -> None: + app_support = _setup_app_support(tmp_path) + app_dir = app_support / "ConfApp" + app_dir.mkdir() + (app_dir / "app.conf").write_text("[section]\nkey=value") + + with ( + patch("mac2nix.scanners.library_scanner.Path.home", return_value=tmp_path), + patch.object(LibraryScanner, "_scan_system_library", return_value=[]), + ): + result = LibraryScanner().scan() + + assert isinstance(result, LibraryResult) + assert result.app_configs[0].file_type == ConfigFileType.CONF + + def test_content_hash_computed(self, tmp_path: Path) -> None: + app_support = _setup_app_support(tmp_path) + app_dir = app_support / "HashApp" + app_dir.mkdir() + (app_dir / "config.json").write_text('{"a": 1}') + + with ( + patch("mac2nix.scanners.library_scanner.Path.home", return_value=tmp_path), + patch.object(LibraryScanner, "_scan_system_library", return_value=[]), + ): + result = LibraryScanner().scan() + + assert isinstance(result, LibraryResult) + assert result.app_configs[0].content_hash is not None + assert len(result.app_configs[0].content_hash) == 16 + + def test_empty_app_support(self, tmp_path: Path) -> None: + _setup_app_support(tmp_path) + + with ( + patch("mac2nix.scanners.library_scanner.Path.home", return_value=tmp_path), + patch.object(LibraryScanner, "_scan_system_library", return_value=[]), + ): + result = LibraryScanner().scan() + + assert isinstance(result, LibraryResult) + assert result.app_configs == [] + + def test_database_hash_skipped(self, tmp_path: Path) -> None: + app_support = _setup_app_support(tmp_path) + app_dir = app_support / "DBApp" + app_dir.mkdir() + (app_dir / "data.db").write_bytes(b"SQLite format 3\x00" + b"\x00" * 100) + + with ( + patch("mac2nix.scanners.library_scanner.Path.home", return_value=tmp_path), + patch.object(LibraryScanner, "_scan_system_library", return_value=[]), + ): + result = LibraryScanner().scan() + + assert isinstance(result, LibraryResult) + assert len(result.app_configs) == 1 + assert result.app_configs[0].file_type == ConfigFileType.DATABASE + assert result.app_configs[0].scannable is False + assert result.app_configs[0].content_hash is None + + def test_group_containers(self, tmp_path: Path) -> None: + group_containers = tmp_path / "Library" / "Group Containers" + group_containers.mkdir(parents=True) + app_dir = group_containers / "group.com.example.app" + app_dir.mkdir() + (app_dir / "settings.json").write_text('{"key": "value"}') + + with ( + patch("mac2nix.scanners.library_scanner.Path.home", return_value=tmp_path), + patch.object(LibraryScanner, "_scan_system_library", return_value=[]), + ): + result = LibraryScanner().scan() + + assert isinstance(result, LibraryResult) + assert len(result.app_configs) == 1 + assert result.app_configs[0].app_name == "group.com.example.app" + assert result.app_configs[0].file_type == ConfigFileType.JSON + + def test_yaml_extension(self, tmp_path: Path) -> None: + app_support = _setup_app_support(tmp_path) + app_dir = app_support / "YamlApp" + app_dir.mkdir() + (app_dir / "config.yaml").write_text("key: value") + (app_dir / "settings.yml").write_text("other: true") + + with ( + patch("mac2nix.scanners.library_scanner.Path.home", return_value=tmp_path), + patch.object(LibraryScanner, "_scan_system_library", return_value=[]), + ): + result = LibraryScanner().scan() + + assert isinstance(result, LibraryResult) + assert len(result.app_configs) == 2 + assert all(e.file_type == ConfigFileType.YAML for e in result.app_configs) + + def test_xml_extension(self, tmp_path: Path) -> None: + app_support = _setup_app_support(tmp_path) + app_dir = app_support / "XmlApp" + app_dir.mkdir() + (app_dir / "config.xml").write_text("") + + with ( + patch("mac2nix.scanners.library_scanner.Path.home", return_value=tmp_path), + patch.object(LibraryScanner, "_scan_system_library", return_value=[]), + ): + result = LibraryScanner().scan() + + assert isinstance(result, LibraryResult) + assert len(result.app_configs) == 1 + assert result.app_configs[0].file_type == ConfigFileType.XML + + def test_returns_library_result(self, tmp_path: Path) -> None: + with ( + patch("mac2nix.scanners.library_scanner.Path.home", return_value=tmp_path), + patch.object(LibraryScanner, "_scan_system_library", return_value=[]), + ): + result = LibraryScanner().scan() + + assert isinstance(result, LibraryResult) + + def test_containers_app_support(self, tmp_path: Path) -> None: + _setup_app_support(tmp_path) + container = tmp_path / "Library" / "Containers" / "com.test.app" / "Data" / "Library" / "Application Support" + container.mkdir(parents=True) + app_dir = container / "TestApp" + app_dir.mkdir() + (app_dir / "config.json").write_text('{"key": "value"}') + + with ( + patch("mac2nix.scanners.library_scanner.Path.home", return_value=tmp_path), + patch.object(LibraryScanner, "_scan_system_library", return_value=[]), + ): + result = LibraryScanner().scan() + + assert isinstance(result, LibraryResult) + assert len(result.app_configs) == 1 + assert result.app_configs[0].app_name == "TestApp" + + def test_skip_dirs_pruned(self, tmp_path: Path) -> None: + app_support = _setup_app_support(tmp_path) + app_dir = app_support / "MyApp" + app_dir.mkdir() + (app_dir / "settings.json").write_text("{}") + cache_dir = app_dir / "Caches" + cache_dir.mkdir() + (cache_dir / "cached.json").write_text("{}") + git_dir = app_dir / ".git" + git_dir.mkdir() + (git_dir / "config").write_text("[core]") + + with ( + patch("mac2nix.scanners.library_scanner.Path.home", return_value=tmp_path), + patch.object(LibraryScanner, "_scan_system_library", return_value=[]), + ): + result = LibraryScanner().scan() + + assert isinstance(result, LibraryResult) + paths = {str(e.path) for e in result.app_configs} + assert any("settings.json" in p for p in paths) + assert not any("Caches" in p for p in paths) + assert not any(".git" in p for p in paths) + + def test_large_file_skipped(self, tmp_path: Path) -> None: + app_support = _setup_app_support(tmp_path) + app_dir = app_support / "BigApp" + app_dir.mkdir() + (app_dir / "small.json").write_text("{}") + big_file = app_dir / "huge.json" + # Write just over 10MB + big_file.write_bytes(b"x" * (10 * 1024 * 1024 + 1)) + + with ( + patch("mac2nix.scanners.library_scanner.Path.home", return_value=tmp_path), + patch.object(LibraryScanner, "_scan_system_library", return_value=[]), + ): + result = LibraryScanner().scan() + + assert isinstance(result, LibraryResult) + assert len(result.app_configs) == 1 + assert result.app_configs[0].path.name == "small.json" + + def test_max_files_per_app_cap(self, tmp_path: Path) -> None: + app_support = _setup_app_support(tmp_path) + app_dir = app_support / "ManyFilesApp" + app_dir.mkdir() + for i in range(501): + (app_dir / f"file{i:04d}.json").write_text("{}") + + with ( + patch("mac2nix.scanners.library_scanner.Path.home", return_value=tmp_path), + patch.object(LibraryScanner, "_scan_system_library", return_value=[]), + ): + result = LibraryScanner().scan() + + assert isinstance(result, LibraryResult) + app_entries = [e for e in result.app_configs if e.app_name == "ManyFilesApp"] + assert len(app_entries) == 500 + + def test_skips_non_config_dirs(self, tmp_path: Path) -> None: + app_support = _setup_app_support(tmp_path) + app_dir = app_support / "TestApp" + app_dir.mkdir() + (app_dir / "config.json").write_text("{}") + for skip_name in ["node_modules", ".git", "Caches"]: + skip_dir = app_dir / skip_name + skip_dir.mkdir() + (skip_dir / "junk.json").write_text("{}") + + with ( + patch("mac2nix.scanners.library_scanner.Path.home", return_value=tmp_path), + patch.object(LibraryScanner, "_scan_system_library", return_value=[]), + ): + result = LibraryScanner().scan() + + app_entries = [e for e in result.app_configs if e.app_name == "TestApp"] + paths = {str(e.path) for e in app_entries} + assert any("config.json" in p for p in paths) + assert not any("junk.json" in p for p in paths) + + def test_toml_extension(self, tmp_path: Path) -> None: + app_support = _setup_app_support(tmp_path) + app_dir = app_support / "TomlApp" + app_dir.mkdir() + (app_dir / "config.toml").write_text("[section]\nkey = 'value'") + + with ( + patch("mac2nix.scanners.library_scanner.Path.home", return_value=tmp_path), + patch.object(LibraryScanner, "_scan_system_library", return_value=[]), + ): + result = LibraryScanner().scan() + + assert isinstance(result, LibraryResult) + assert result.app_configs[0].file_type == ConfigFileType.TOML + + def test_ini_extension(self, tmp_path: Path) -> None: + app_support = _setup_app_support(tmp_path) + app_dir = app_support / "IniApp" + app_dir.mkdir() + (app_dir / "config.ini").write_text("[section]\nkey=value") + + with ( + patch("mac2nix.scanners.library_scanner.Path.home", return_value=tmp_path), + patch.object(LibraryScanner, "_scan_system_library", return_value=[]), + ): + result = LibraryScanner().scan() + + assert isinstance(result, LibraryResult) + assert result.app_configs[0].file_type == ConfigFileType.CONF + + def test_cfg_extension(self, tmp_path: Path) -> None: + app_support = _setup_app_support(tmp_path) + app_dir = app_support / "CfgApp" + app_dir.mkdir() + (app_dir / "app.cfg").write_text("key=value") + + with ( + patch("mac2nix.scanners.library_scanner.Path.home", return_value=tmp_path), + patch.object(LibraryScanner, "_scan_system_library", return_value=[]), + ): + result = LibraryScanner().scan() + + assert isinstance(result, LibraryResult) + assert result.app_configs[0].file_type == ConfigFileType.CONF + + def test_sqlite3_extension(self, tmp_path: Path) -> None: + app_support = _setup_app_support(tmp_path) + app_dir = app_support / "Sqlite3App" + app_dir.mkdir() + (app_dir / "data.sqlite3").write_bytes(b"SQLite format 3\x00") + + with ( + patch("mac2nix.scanners.library_scanner.Path.home", return_value=tmp_path), + patch.object(LibraryScanner, "_scan_system_library", return_value=[]), + ): + result = LibraryScanner().scan() + + assert isinstance(result, LibraryResult) + assert result.app_configs[0].file_type == ConfigFileType.DATABASE + assert result.app_configs[0].scannable is False + + def test_nested_config_files(self, tmp_path: Path) -> None: + app_support = _setup_app_support(tmp_path) + app_dir = app_support / "Chrome" + profile = app_dir / "Default" + profile.mkdir(parents=True) + (profile / "Preferences").write_text('{"key": "value"}') + (app_dir / "Local State").write_text('{"other": true}') + + with ( + patch("mac2nix.scanners.library_scanner.Path.home", return_value=tmp_path), + patch.object(LibraryScanner, "_scan_system_library", return_value=[]), + ): + result = LibraryScanner().scan() + + assert isinstance(result, LibraryResult) + assert len(result.app_configs) == 2 + + def test_modified_time_set(self, tmp_path: Path) -> None: + app_support = _setup_app_support(tmp_path) + app_dir = app_support / "TimeApp" + app_dir.mkdir() + (app_dir / "config.json").write_text("{}") + + with ( + patch("mac2nix.scanners.library_scanner.Path.home", return_value=tmp_path), + patch.object(LibraryScanner, "_scan_system_library", return_value=[]), + ): + result = LibraryScanner().scan() + + assert isinstance(result, LibraryResult) + assert result.app_configs[0].modified_time is not None + + def test_permission_denied_containers(self, tmp_path: Path) -> None: + _setup_app_support(tmp_path) + containers = tmp_path / "Library" / "Containers" + containers.mkdir(parents=True) + + with ( + patch("mac2nix.scanners.library_scanner.Path.home", return_value=tmp_path), + patch("pathlib.Path.iterdir", side_effect=PermissionError("denied")), + ): + # Should not crash — gracefully handles permission error + result = LibraryScanner().scan() + + assert isinstance(result, LibraryResult) + + # --- Library audit tests --- + + def test_audit_directories_covered(self, tmp_path: Path) -> None: + lib = tmp_path / "Library" + lib.mkdir() + prefs = lib / "Preferences" + prefs.mkdir() + (prefs / "com.apple.finder.plist").write_bytes(b"data") + + with ( + patch("mac2nix.scanners.library_scanner.Path.home", return_value=tmp_path), + patch.object(LibraryScanner, "_scan_system_library", return_value=[]), + ): + result = LibraryScanner().scan() + + pref_dir = next(d for d in result.directories if d.name == "Preferences") + assert pref_dir.covered_by_scanner == "preferences" + assert pref_dir.has_user_content is False + + def test_audit_directories_uncovered(self, tmp_path: Path) -> None: + lib = tmp_path / "Library" + lib.mkdir() + custom = lib / "CustomDir" + custom.mkdir() + (custom / "file.txt").write_text("hello") + + with ( + patch("mac2nix.scanners.library_scanner.Path.home", return_value=tmp_path), + patch.object(LibraryScanner, "_scan_system_library", return_value=[]), + ): + result = LibraryScanner().scan() + + custom_dir = next(d for d in result.directories if d.name == "CustomDir") + assert custom_dir.covered_by_scanner is None + assert custom_dir.has_user_content is True + + def test_audit_directories_transient_not_user_content(self, tmp_path: Path) -> None: + lib = tmp_path / "Library" + lib.mkdir() + caches = lib / "Caches" + caches.mkdir() + (caches / "something.cache").write_bytes(b"data") + + with ( + patch("mac2nix.scanners.library_scanner.Path.home", return_value=tmp_path), + patch.object(LibraryScanner, "_scan_system_library", return_value=[]), + ): + result = LibraryScanner().scan() + + cache_dir = next(d for d in result.directories if d.name == "Caches") + assert cache_dir.covered_by_scanner is None + assert cache_dir.has_user_content is False + + def test_dir_stats(self, tmp_path: Path) -> None: + (tmp_path / "file1.txt").write_text("hello") + (tmp_path / "file2.txt").write_text("world!") + + file_count, total_size, newest_mod = LibraryScanner._dir_stats(tmp_path) + + assert file_count == 2 + assert total_size is not None + assert total_size > 0 + assert newest_mod is not None + + def test_dir_stats_permission_denied(self, tmp_path: Path) -> None: + protected = tmp_path / "protected" + protected.mkdir() + + with patch.object(Path, "iterdir", side_effect=PermissionError("denied")): + file_count, total_size, newest_mod = LibraryScanner._dir_stats(protected) + + assert file_count is None + assert total_size is None + assert newest_mod is None + + def test_classify_file_plist(self, tmp_path: Path) -> None: + plist_file = tmp_path / "test.plist" + plist_file.write_bytes(b"data") + + with ( + patch("mac2nix.scanners.library_scanner.read_plist_safe", return_value={"key": "value"}), + patch("mac2nix.scanners.library_scanner.hash_file", return_value="abc123"), + ): + entry = LibraryScanner()._classify_file(plist_file) + + assert entry is not None + assert entry.file_type == "plist" + assert entry.migration_strategy == "plist_capture" + assert entry.plist_content == {"key": "value"} + + def test_classify_file_text(self, tmp_path: Path) -> None: + txt_file = tmp_path / "readme.txt" + txt_file.write_text("some text content") + + with patch("mac2nix.scanners.library_scanner.hash_file", return_value="def456"): + entry = LibraryScanner()._classify_file(txt_file) + + assert entry is not None + assert entry.file_type == "txt" + assert entry.migration_strategy == "text_capture" + assert entry.text_content == "some text content" + + def test_classify_file_text_too_large(self, tmp_path: Path) -> None: + large_file = tmp_path / "big.txt" + large_file.write_text("x" * 70000) + + with patch("mac2nix.scanners.library_scanner.hash_file", return_value="abc"): + entry = LibraryScanner()._classify_file(large_file) + + assert entry is not None + assert entry.migration_strategy == "hash_only" + assert entry.text_content is None + + def test_classify_file_bundle_extension(self, tmp_path: Path) -> None: + bundle = tmp_path / "plugin.component" + bundle.write_bytes(b"data") + + with patch("mac2nix.scanners.library_scanner.hash_file", return_value="hash"): + entry = LibraryScanner()._classify_file(bundle) + + assert entry is not None + assert entry.migration_strategy == "bundle" + + def test_classify_file_unknown(self, tmp_path: Path) -> None: + binary_file = tmp_path / "data.bin" + binary_file.write_bytes(b"\x00\x01\x02") + + entry = LibraryScanner()._classify_file(binary_file) + + assert entry is not None + assert entry.migration_strategy == "metadata_only" + assert entry.content_hash is None + + def test_key_bindings(self, tmp_path: Path) -> None: + lib = tmp_path / "Library" + kb_dir = lib / "KeyBindings" + kb_dir.mkdir(parents=True) + kb_file = kb_dir / "DefaultKeyBinding.dict" + kb_file.write_bytes(b"dummy") + + with patch( + "mac2nix.scanners.library_scanner.read_plist_safe", + return_value={"^w": "deleteWordBackward:", "~f": "moveWordForward:"}, + ): + result = LibraryScanner()._scan_key_bindings(lib) + + assert len(result) == 2 + keys = {e.key for e in result} + assert "^w" in keys + assert "~f" in keys + + def test_key_bindings_no_file(self, tmp_path: Path) -> None: + lib = tmp_path / "Library" + lib.mkdir() + result = LibraryScanner()._scan_key_bindings(lib) + assert result == [] + + def test_spelling_words(self, tmp_path: Path) -> None: + lib = tmp_path / "Library" + spelling = lib / "Spelling" + spelling.mkdir(parents=True) + local_dict = spelling / "LocalDictionary" + local_dict.write_text("nix\ndarwin\nhomebrew\n") + (spelling / "en_US").write_text("") + + words, dicts = LibraryScanner()._scan_spelling(lib) + + assert words == ["nix", "darwin", "homebrew"] + assert "en_US" in dicts + + def test_spelling_no_dir(self, tmp_path: Path) -> None: + lib = tmp_path / "Library" + lib.mkdir() + words, dicts = LibraryScanner()._scan_spelling(lib) + assert words == [] + assert dicts == [] + + def test_scan_workflows(self, tmp_path: Path) -> None: + wf_dir = tmp_path / "Services" + wf = wf_dir / "MyService.workflow" + contents = wf / "Contents" + contents.mkdir(parents=True) + info = contents / "Info.plist" + info.write_bytes(b"dummy") + + with patch( + "mac2nix.scanners.library_scanner.read_plist_safe", + return_value={"CFBundleIdentifier": "com.example.myservice"}, + ): + result = LibraryScanner()._scan_workflows(wf_dir) + + assert len(result) == 1 + assert result[0].name == "MyService" + assert result[0].identifier == "com.example.myservice" + + def test_scan_workflows_no_dir(self, tmp_path: Path) -> None: + result = LibraryScanner()._scan_workflows(tmp_path / "nonexistent") + assert result == [] + + def test_scan_bundles_in_dir(self, tmp_path: Path) -> None: + im_dir = tmp_path / "Input Methods" + bundle = im_dir / "MyInput.app" + contents = bundle / "Contents" + contents.mkdir(parents=True) + info = contents / "Info.plist" + info.write_bytes(b"dummy") + + with patch( + "mac2nix.scanners.library_scanner.read_plist_safe", + return_value={ + "CFBundleIdentifier": "com.example.input", + "CFBundleShortVersionString": "1.0", + }, + ): + result = LibraryScanner()._scan_bundles_in_dir(im_dir) + + assert len(result) == 1 + assert result[0].bundle_id == "com.example.input" + assert result[0].version == "1.0" + + def test_scan_bundles_no_dir(self) -> None: + result = LibraryScanner()._scan_bundles_in_dir(Path("/nonexistent")) + assert result == [] + + def test_list_files_by_extension(self, tmp_path: Path) -> None: + (tmp_path / "layout1.keylayout").write_text("xml") + (tmp_path / "layout2.keylayout").write_text("xml") + (tmp_path / "other.txt").write_text("ignored") + + result = LibraryScanner._list_files_by_extension(tmp_path, ".keylayout") + + assert len(result) == 2 + assert "layout1.keylayout" in result + assert "layout2.keylayout" in result + + def test_list_files_by_extension_no_dir(self) -> None: + result = LibraryScanner._list_files_by_extension(Path("/nonexistent"), ".icc") + assert result == [] + + def test_scan_scripts_with_applescript(self, tmp_path: Path) -> None: + lib = tmp_path / "Library" + scripts = lib / "Scripts" + scripts.mkdir(parents=True) + scpt = scripts / "hello.scpt" + scpt.write_bytes(b"compiled") + sh = scripts / "cleanup.sh" + sh.write_text("#!/bin/bash\necho cleanup") + + with patch( + "mac2nix.scanners.library_scanner.run_command", + return_value=MagicMock(returncode=0, stdout='display dialog "Hello"'), + ): + result = LibraryScanner()._scan_scripts(lib) + + assert len(result) == 2 + script_names = [s.split(":")[0] if ":" in s else s for s in result] + assert "cleanup.sh" in script_names + assert "hello.scpt" in script_names + + def test_scan_scripts_applescript_decompile_fails(self, tmp_path: Path) -> None: + lib = tmp_path / "Library" + scripts = lib / "Scripts" + scripts.mkdir(parents=True) + scpt = scripts / "broken.scpt" + scpt.write_bytes(b"compiled") + + with patch("mac2nix.scanners.library_scanner.run_command", return_value=None): + result = LibraryScanner()._scan_scripts(lib) + + assert result == ["broken.scpt"] + + def test_scan_scripts_no_dir(self, tmp_path: Path) -> None: + lib = tmp_path / "Library" + lib.mkdir() + result = LibraryScanner()._scan_scripts(lib) + assert result == [] + + def test_text_replacements(self, tmp_path: Path) -> None: + lib = tmp_path / "Library" + ks_dir = lib / "KeyboardServices" + ks_dir.mkdir(parents=True) + db_path = ks_dir / "TextReplacements.db" + db_path.write_bytes(b"dummy") + + mock_rows = [("omw", "On my way!"), ("addr", "123 Main St")] + mock_cursor = type("MockCursor", (), {"fetchall": lambda _self: mock_rows})() + mock_conn = type( + "MockConn", + (), + { + "execute": lambda _self, _query: mock_cursor, + "close": lambda _self: None, + }, + )() + + with patch("mac2nix.scanners.library_scanner.sqlite3.connect", return_value=mock_conn): + result = LibraryScanner()._scan_text_replacements(lib) + + assert len(result) == 2 + assert result[0] == {"shortcut": "omw", "phrase": "On my way!"} + + def test_text_replacements_no_db(self, tmp_path: Path) -> None: + lib = tmp_path / "Library" + lib.mkdir() + result = LibraryScanner()._scan_text_replacements(lib) + assert result == [] + + def test_capture_uncovered_dir_walks_below_cap(self, tmp_path: Path) -> None: + for i in range(210): + (tmp_path / f"file{i:03d}.txt").write_text(f"content {i}") + + with ( + patch("mac2nix.scanners.library_scanner.hash_file", return_value="hash"), + patch("mac2nix.scanners.library_scanner.read_plist_safe", return_value=None), + ): + files, _workflows, _bundles = LibraryScanner()._capture_uncovered_dir(tmp_path) + + assert len(files) == 210 # all files captured (below 1000 cap) + + def test_capture_uncovered_dir_skips_non_config_dirs(self, tmp_path: Path) -> None: + config_dir = tmp_path / "real_config" + config_dir.mkdir() + (config_dir / "settings.json").write_text("{}") + + for skip_name in ["node_modules", ".git", "Caches", "DerivedData"]: + skip_dir = tmp_path / skip_name + skip_dir.mkdir() + (skip_dir / "junk.txt").write_text("should be skipped") + + with ( + patch("mac2nix.scanners.library_scanner.hash_file", return_value="hash"), + patch("mac2nix.scanners.library_scanner.read_plist_safe", return_value=None), + ): + files, _workflows, _bundles = LibraryScanner()._capture_uncovered_dir(tmp_path) + + paths = {str(f.path) for f in files} + assert any("settings.json" in p for p in paths) + assert not any("junk.txt" in p for p in paths) + + def test_uncovered_files_collected(self, tmp_path: Path) -> None: + lib = tmp_path / "Library" + lib.mkdir() + custom = lib / "CustomStuff" + custom.mkdir() + (custom / "config.json").write_text('{"key": "value"}') + + with ( + patch("mac2nix.scanners.library_scanner.Path.home", return_value=tmp_path), + patch.object(LibraryScanner, "_scan_system_library", return_value=[]), + patch("mac2nix.scanners.library_scanner.hash_file", return_value="hash"), + patch("mac2nix.scanners.library_scanner.read_plist_safe", return_value=None), + ): + result = LibraryScanner().scan() + + assert len(result.uncovered_files) >= 1 + json_file = next(f for f in result.uncovered_files if "config.json" in str(f.path)) + assert json_file.file_type == "json" + + def test_workflows_from_services_dir(self, tmp_path: Path) -> None: + lib = tmp_path / "Library" + lib.mkdir() + services = lib / "Services" + wf = services / "Convert.workflow" + contents = wf / "Contents" + contents.mkdir(parents=True) + (contents / "Info.plist").write_bytes(b"dummy") + + with ( + patch("mac2nix.scanners.library_scanner.Path.home", return_value=tmp_path), + patch.object(LibraryScanner, "_scan_system_library", return_value=[]), + patch( + "mac2nix.scanners.library_scanner.read_plist_safe", + return_value={"CFBundleIdentifier": "com.example.convert"}, + ), + ): + result = LibraryScanner().scan() + + assert any(w.name == "Convert" for w in result.workflows) + + def test_empty_library(self, tmp_path: Path) -> None: + lib = tmp_path / "Library" + lib.mkdir() + + with ( + patch("mac2nix.scanners.library_scanner.Path.home", return_value=tmp_path), + patch.object(LibraryScanner, "_scan_system_library", return_value=[]), + ): + result = LibraryScanner().scan() + + assert isinstance(result, LibraryResult) + assert result.directories == [] + assert result.uncovered_files == [] + + def test_no_library_dir(self, tmp_path: Path) -> None: + with ( + patch("mac2nix.scanners.library_scanner.Path.home", return_value=tmp_path), + patch.object(LibraryScanner, "_scan_system_library", return_value=[]), + ): + result = LibraryScanner().scan() + + assert isinstance(result, LibraryResult) + assert result.directories == [] + + +class TestRedactSensitiveKeys: + def test_redacts_api_key(self) -> None: + data = {"API_KEY": "secret123", "name": "test"} + _redact_sensitive_keys(data) + + redacted = "***REDACTED***" + assert data["API_KEY"] == redacted + assert data["name"] == "test" + + def test_redacts_nested_dict(self) -> None: + data = {"config": {"DB_PASSWORD": "secret", "host": "localhost"}} + _redact_sensitive_keys(data) + + redacted = "***REDACTED***" + assert data["config"]["DB_PASSWORD"] == redacted + assert data["config"]["host"] == "localhost" + + def test_redacts_in_list(self) -> None: + data = {"items": [{"ACCESS_TOKEN": "token123"}, {"normal": "value"}]} + _redact_sensitive_keys(data) + + redacted = "***REDACTED***" + assert data["items"][0]["ACCESS_TOKEN"] == redacted + assert data["items"][1]["normal"] == "value" + + def test_case_insensitive_match(self) -> None: + data = {"my_auth_header": "Bearer xyz"} + _redact_sensitive_keys(data) + + redacted = "***REDACTED***" + assert data["my_auth_header"] == redacted + + def test_no_sensitive_keys(self) -> None: + data = {"name": "test", "count": 42} + _redact_sensitive_keys(data) + assert data == {"name": "test", "count": 42} + + +class TestCoveredDirsMapping: + def test_known_covered_dirs(self) -> None: + assert _COVERED_DIRS["Preferences"] == "preferences" + assert _COVERED_DIRS["Application Support"] == "library" + assert _COVERED_DIRS["LaunchAgents"] == "launch_agents" + assert _COVERED_DIRS["Fonts"] == "fonts" + + def test_transient_dirs(self) -> None: + assert "Caches" in _TRANSIENT_DIRS + assert "Logs" in _TRANSIENT_DIRS + assert "Saved Application State" in _TRANSIENT_DIRS + + +class TestScanAudioPlugins: + def test_finds_component_bundles(self, tmp_path: Path) -> None: + components = tmp_path / "Components" + components.mkdir() + plugin = components / "MyPlugin.component" + plugin.mkdir() + info = plugin / "Contents" / "Info.plist" + info.parent.mkdir() + info.write_bytes(b"dummy") + + with patch( + "mac2nix.scanners.library_scanner.read_plist_safe", + return_value={"CFBundleIdentifier": "com.test.plugin", "CFBundleShortVersionString": "1.0"}, + ): + result = LibraryScanner()._scan_audio_plugins(tmp_path) + + assert len(result) == 1 + assert result[0].name == "MyPlugin.component" + assert result[0].bundle_id == "com.test.plugin" + + def test_skips_non_bundle_dirs(self, tmp_path: Path) -> None: + components = tmp_path / "Components" + components.mkdir() + regular_dir = components / "NotABundle" + regular_dir.mkdir() + + result = LibraryScanner()._scan_audio_plugins(tmp_path) + assert result == [] + + def test_empty_audio_dir(self, tmp_path: Path) -> None: + result = LibraryScanner()._scan_audio_plugins(tmp_path / "nonexistent") + assert result == [] + + +class TestTextReplacementsCorrupted: + def test_corrupted_db_returns_empty(self, tmp_path: Path) -> None: + lib = tmp_path / "Library" + ks_dir = lib / "KeyboardServices" + ks_dir.mkdir(parents=True) + db_path = ks_dir / "TextReplacements.db" + db_path.write_bytes(b"not a sqlite database") + + with patch( + "mac2nix.scanners.library_scanner.sqlite3.connect", + side_effect=sqlite3.OperationalError("not a database"), + ): + result = LibraryScanner()._scan_text_replacements(lib) + + assert result == [] + + def test_null_rows_filtered(self, tmp_path: Path) -> None: + lib = tmp_path / "Library" + ks_dir = lib / "KeyboardServices" + ks_dir.mkdir(parents=True) + db_path = ks_dir / "TextReplacements.db" + db_path.write_bytes(b"dummy") + + mock_rows = [("omw", "On my way!"), (None, "no shortcut"), ("notext", None), (None, None)] + mock_cursor = type("MockCursor", (), {"fetchall": lambda _self: mock_rows})() + mock_conn = type( + "MockConn", + (), + { + "execute": lambda _self, _query: mock_cursor, + "close": lambda _self: None, + }, + )() + + with patch("mac2nix.scanners.library_scanner.sqlite3.connect", return_value=mock_conn): + result = LibraryScanner()._scan_text_replacements(lib) + + assert len(result) == 1 + assert result[0] == {"shortcut": "omw", "phrase": "On my way!"} + + +class TestCaptureUncoveredDirEdgeCases: + def test_file_cap_enforced(self, tmp_path: Path) -> None: + for i in range(1050): + (tmp_path / f"file{i:04d}.txt").write_text(f"content {i}") + + with ( + patch("mac2nix.scanners.library_scanner.hash_file", return_value="hash"), + patch("mac2nix.scanners.library_scanner.read_plist_safe", return_value=None), + ): + files, _workflows, _bundles = LibraryScanner()._capture_uncovered_dir(tmp_path) + + # Count includes all files encountered, but total should be capped + assert len(files) <= 1000 + + def test_workflow_bundles_discovered(self, tmp_path: Path) -> None: + wf = tmp_path / "MyAction.workflow" + contents = wf / "Contents" + contents.mkdir(parents=True) + (contents / "Info.plist").write_bytes(b"dummy") + + with patch( + "mac2nix.scanners.library_scanner.read_plist_safe", + return_value={"CFBundleIdentifier": "com.example.action"}, + ): + _files, workflows, _bundles = LibraryScanner()._capture_uncovered_dir(tmp_path) + + assert len(workflows) == 1 + assert workflows[0].name == "MyAction" + assert workflows[0].identifier == "com.example.action" + + def test_bundle_dirs_discovered(self, tmp_path: Path) -> None: + plugin = tmp_path / "MyPlugin.component" + plugin_contents = plugin / "Contents" + plugin_contents.mkdir(parents=True) + (plugin_contents / "Info.plist").write_bytes(b"dummy") + + with patch( + "mac2nix.scanners.library_scanner.read_plist_safe", + return_value={"CFBundleIdentifier": "com.example.plugin", "CFBundleShortVersionString": "2.0"}, + ): + _files, _workflows, bundles = LibraryScanner()._capture_uncovered_dir(tmp_path) + + assert len(bundles) == 1 + assert bundles[0].name == "MyPlugin.component" + assert bundles[0].bundle_id == "com.example.plugin" + assert bundles[0].bundle_type == "component" + + +class TestClassifyFileEdgeCases: + def test_plist_returns_non_dict(self, tmp_path: Path) -> None: + plist_file = tmp_path / "list.plist" + plist_file.write_bytes(b"data") + + with ( + patch("mac2nix.scanners.library_scanner.read_plist_safe", return_value=["item1", "item2"]), + patch("mac2nix.scanners.library_scanner.hash_file", return_value="abc123"), + ): + entry = LibraryScanner()._classify_file(plist_file) + + assert entry is not None + assert entry.migration_strategy == "hash_only" + assert entry.plist_content is None + assert entry.content_hash == "abc123" + + +class TestKeyBindingsEdgeCases: + def test_non_string_dict_actions_filtered(self, tmp_path: Path) -> None: + lib = tmp_path / "Library" + kb_dir = lib / "KeyBindings" + kb_dir.mkdir(parents=True) + (kb_dir / "DefaultKeyBinding.dict").write_bytes(b"dummy") + + with patch( + "mac2nix.scanners.library_scanner.read_plist_safe", + return_value={ + "^w": "deleteWordBackward:", + "^x": 42, + "^y": ["a", "b"], + "^z": {"moveRight:": "moveWordRight:"}, + }, + ): + result = LibraryScanner()._scan_key_bindings(lib) + + assert len(result) == 2 + keys = {e.key for e in result} + assert "^w" in keys + assert "^z" in keys + assert "^x" not in keys + assert "^y" not in keys + + +class TestScanSystemLibrary: + def test_discovers_kext_bundles(self, tmp_path: Path) -> None: + extensions = tmp_path / "Extensions" + kext = extensions / "MyDriver.kext" + kext_contents = kext / "Contents" + kext_contents.mkdir(parents=True) + (kext_contents / "Info.plist").write_bytes(b"dummy") + + with ( + patch("mac2nix.scanners.library_scanner.Path", wraps=Path) as mock_path, + patch( + "mac2nix.scanners.library_scanner.read_plist_safe", + return_value={"CFBundleIdentifier": "com.example.driver", "CFBundleShortVersionString": "1.0"}, + ), + ): + mock_path.side_effect = lambda p: tmp_path if p == "/Library" else Path(p) + scanner = LibraryScanner() + # Directly call with our tmp_path standing in for /Library + result = scanner._scan_bundles_in_dir(extensions) + + # At minimum, verify the bundle parsing works + # (full _scan_system_library integration is hard to mock cleanly) + assert len(result) == 1 + assert result[0].bundle_id == "com.example.driver" + + def test_discovers_audio_plugins(self, tmp_path: Path) -> None: + components = tmp_path / "Components" + components.mkdir() + vst = components / "Synth.vst" + vst_contents = vst / "Contents" + vst_contents.mkdir(parents=True) + (vst_contents / "Info.plist").write_bytes(b"dummy") + + with patch( + "mac2nix.scanners.library_scanner.read_plist_safe", + return_value={"CFBundleIdentifier": "com.example.synth"}, + ): + result = LibraryScanner()._scan_audio_plugins(tmp_path) + + assert len(result) == 1 + assert result[0].bundle_id == "com.example.synth" + assert result[0].bundle_type == "vst" + + def test_parse_bundle_fallback_info_plist(self, tmp_path: Path) -> None: + bundle = tmp_path / "MyBundle.plugin" + bundle.mkdir() + # Info.plist at root level (no Contents/ dir) + (bundle / "Info.plist").write_bytes(b"dummy") + + with patch( + "mac2nix.scanners.library_scanner.read_plist_safe", + return_value={"CFBundleIdentifier": "com.example.root"}, + ): + result = LibraryScanner._parse_bundle(bundle) + + assert result.bundle_id == "com.example.root" + + def test_parse_bundle_no_info_plist(self, tmp_path: Path) -> None: + bundle = tmp_path / "Empty.plugin" + bundle.mkdir() + + result = LibraryScanner._parse_bundle(bundle) + + assert result.name == "Empty.plugin" + assert result.bundle_id is None + assert result.version is None + assert result.bundle_type == "plugin" + + def test_parse_bundle_no_suffix(self, tmp_path: Path) -> None: + bundle = tmp_path / "WeirdBundle" + bundle.mkdir() + + result = LibraryScanner._parse_bundle(bundle) + + assert result.name == "WeirdBundle" + assert result.bundle_type is None + + +class TestSymlinkSafety: + def test_dir_stats_skips_symlinks(self, tmp_path: Path) -> None: + (tmp_path / "real_file.txt").write_text("hello") + (tmp_path / "link").symlink_to(tmp_path / "real_file.txt") + + file_count, _total_size, _ = LibraryScanner._dir_stats(tmp_path) + + # Symlink should be skipped — only real_file counted + assert file_count == 1 + + def test_scan_bundles_in_dir_skips_symlinks(self, tmp_path: Path) -> None: + real_bundle = tmp_path / "Real.app" + real_bundle.mkdir() + (tmp_path / "Linked.app").symlink_to(real_bundle) + + result = LibraryScanner()._scan_bundles_in_dir(tmp_path) + + names = [b.name for b in result] + assert "Real.app" in names + assert "Linked.app" not in names From e49ddcee6c2db8ff40d5d52bdac4b9ecc1db106e Mon Sep 17 00:00:00 2001 From: testvalue Date: Sat, 14 Mar 2026 13:45:19 -0400 Subject: [PATCH 3/3] =?UTF-8?q?fix(scanners):=20review=20fixes=20=E2=80=94?= =?UTF-8?q?=20PII=20redaction,=20callback=20safety,=20naming?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Remove hardware_serial capture (PII with no migration value) - Wrap progress callback in try/except (broken terminal can't crash scan) - deepcopy raw_plist to protect shared prefetch data from mutation - Expand nix_state _PRUNE_DIRS to skip macOS non-project directories - Rename convert_datetimes → sanitize_plist_values (handles bytes, UIDs too) --- src/mac2nix/orchestrator.py | 7 ++++++- src/mac2nix/scanners/_utils.py | 8 ++++---- src/mac2nix/scanners/launch_agents.py | 5 +++-- src/mac2nix/scanners/nix_state.py | 20 +++++++++++++++++++- src/mac2nix/scanners/preferences.py | 4 ++-- src/mac2nix/scanners/system_scanner.py | 4 ++-- tests/scanners/test_system_scanner.py | 2 +- tests/scanners/test_utils.py | 14 +++++++------- 8 files changed, 44 insertions(+), 20 deletions(-) diff --git a/src/mac2nix/orchestrator.py b/src/mac2nix/orchestrator.py index 85f4d1b..46fb31b 100644 --- a/src/mac2nix/orchestrator.py +++ b/src/mac2nix/orchestrator.py @@ -90,8 +90,13 @@ async def _run_scanner_async( logger.exception("Scanner '%s' raised an exception", scanner_name) return scanner_name, None finally: + # Safe: this runs on the event loop thread (after the await), not the + # worker thread, so the callback sees serialised access. if progress_callback is not None: - progress_callback(scanner_name) + try: + progress_callback(scanner_name) + except Exception: + logger.debug("Progress callback failed for '%s'", scanner_name) async def run_scan( diff --git a/src/mac2nix/scanners/_utils.py b/src/mac2nix/scanners/_utils.py index 72f7a94..d0a162f 100644 --- a/src/mac2nix/scanners/_utils.py +++ b/src/mac2nix/scanners/_utils.py @@ -69,7 +69,7 @@ ] -def convert_datetimes(obj: Any) -> Any: +def sanitize_plist_values(obj: Any) -> Any: """Recursively convert non-JSON-safe plist values. plistlib returns datetime objects (for NSDate), bytes objects (for NSData), @@ -82,9 +82,9 @@ def convert_datetimes(obj: Any) -> Any: if isinstance(obj, plistlib.UID): return int(obj) if isinstance(obj, dict): - return {k: convert_datetimes(v) for k, v in obj.items()} + return {k: sanitize_plist_values(v) for k, v in obj.items()} if isinstance(obj, list): - return [convert_datetimes(item) for item in obj] + return [sanitize_plist_values(item) for item in obj] return obj @@ -143,7 +143,7 @@ def read_plist_safe(path: Path) -> dict[str, Any] | list[Any] | None: logger.warning("Failed to read plist %s: %s", path, exc) return None - return convert_datetimes(data) + return sanitize_plist_values(data) def _read_plist_via_plutil(path: Path) -> dict[str, Any] | None: diff --git a/src/mac2nix/scanners/launch_agents.py b/src/mac2nix/scanners/launch_agents.py index 2fca092..1eb973b 100644 --- a/src/mac2nix/scanners/launch_agents.py +++ b/src/mac2nix/scanners/launch_agents.py @@ -2,6 +2,7 @@ from __future__ import annotations +import copy import logging import os import re @@ -73,8 +74,8 @@ def _parse_agent_data( program_arguments = data.get("ProgramArguments", []) run_at_load = data.get("RunAtLoad", False) - # Shallow copy + replace env vars to avoid mutating the shared prefetch data - raw_plist = dict(data) + # Deep copy to avoid mutating the shared prefetch data + raw_plist = copy.deepcopy(data) env_raw = data.get("EnvironmentVariables") if isinstance(env_raw, dict): redacted = { diff --git a/src/mac2nix/scanners/nix_state.py b/src/mac2nix/scanners/nix_state.py index 188f858..9476900 100644 --- a/src/mac2nix/scanners/nix_state.py +++ b/src/mac2nix/scanners/nix_state.py @@ -35,7 +35,25 @@ _PACKAGE_CAP = 500 _ADJACENT_CAP = 50 _ADJACENT_MAX_DEPTH = 2 -_PRUNE_DIRS = {".git", "node_modules", ".direnv", "__pycache__", ".venv"} +_PRUNE_DIRS = { + # VCS / build + ".git", + "node_modules", + ".direnv", + "__pycache__", + ".venv", + # macOS non-project directories (avoid wasting IO at depth 0-1) + "Library", + "Applications", + "Downloads", + "Movies", + "Music", + "Pictures", + "Public", + ".Trash", + ".cache", + ".local", +} _SYSTEM_NIX_CONF = Path("/etc/nix/nix.conf") _VERSION_RE = re.compile(r"(\d+\.\d+[\w.]*)") diff --git a/src/mac2nix/scanners/preferences.py b/src/mac2nix/scanners/preferences.py index ab7ca4a..c5653cd 100644 --- a/src/mac2nix/scanners/preferences.py +++ b/src/mac2nix/scanners/preferences.py @@ -7,7 +7,7 @@ from pathlib import Path from mac2nix.models.preferences import PreferencesDomain, PreferencesResult, PreferenceValue -from mac2nix.scanners._utils import convert_datetimes, read_plist_safe, run_command +from mac2nix.scanners._utils import read_plist_safe, run_command, sanitize_plist_values from mac2nix.scanners.base import BaseScannerPlugin, register logger = logging.getLogger(__name__) @@ -95,4 +95,4 @@ def _export_domain(domain_name: str) -> dict[str, PreferenceValue] | None: return None if not isinstance(data, dict): return None - return convert_datetimes(data) + return sanitize_plist_values(data) diff --git a/src/mac2nix/scanners/system_scanner.py b/src/mac2nix/scanners/system_scanner.py index d3e0dc5..da9d716 100644 --- a/src/mac2nix/scanners/system_scanner.py +++ b/src/mac2nix/scanners/system_scanner.py @@ -199,9 +199,9 @@ def _get_hardware_info( model = hw.get("machine_model") or hw.get("machine_name") chip = hw.get("chip_type") or hw.get("cpu_type") memory = hw.get("physical_memory") - serial = hw.get("serial_number") + # serial_number is PII with no nix-darwin migration value — do not capture - return model, chip, memory, serial + return model, chip, memory, None def _get_additional_hostnames(self) -> tuple[str | None, str | None]: """Get LocalHostName and HostName separately.""" diff --git a/tests/scanners/test_system_scanner.py b/tests/scanners/test_system_scanner.py index 131b076..1e55bf0 100644 --- a/tests/scanners/test_system_scanner.py +++ b/tests/scanners/test_system_scanner.py @@ -194,7 +194,7 @@ def side_effect(cmd: list[str], **_kwargs: object) -> subprocess.CompletedProces assert result.hardware_model == "Mac14,2" assert result.hardware_chip == "Apple M2" assert result.hardware_memory == "16 GB" - assert result.hardware_serial == "XYZ123456" + assert result.hardware_serial is None # serial is PII — never captured def test_hardware_info_fallback_keys(self, cmd_result) -> None: hw_data = { diff --git a/tests/scanners/test_utils.py b/tests/scanners/test_utils.py index 1e49fef..913abb7 100644 --- a/tests/scanners/test_utils.py +++ b/tests/scanners/test_utils.py @@ -7,11 +7,11 @@ from unittest.mock import patch from mac2nix.scanners._utils import ( - convert_datetimes, hash_file, read_launchd_plists, read_plist_safe, run_command, + sanitize_plist_values, ) @@ -138,29 +138,29 @@ def test_read_plist_safe_converts_datetimes(self, tmp_path: Path) -> None: class TestConvertDatetimes: def test_datetime_converted(self) -> None: dt = datetime(2026, 3, 7, 12, 0, 0, tzinfo=UTC) - result = convert_datetimes(dt) + result = sanitize_plist_values(dt) assert isinstance(result, str) assert "2026-03-07" in result def test_nested_dict(self) -> None: dt = datetime(2026, 1, 1, 0, 0, 0, tzinfo=UTC) data = {"outer": {"inner": dt, "keep": "string"}} - result = convert_datetimes(data) + result = sanitize_plist_values(data) assert isinstance(result["outer"]["inner"], str) assert result["outer"]["keep"] == "string" def test_nested_list(self) -> None: dt = datetime(2026, 6, 15, 8, 0, 0, tzinfo=UTC) data = [dt, "plain", 42] - result = convert_datetimes(data) + result = sanitize_plist_values(data) assert isinstance(result[0], str) assert result[1] == "plain" assert result[2] == 42 def test_passthrough_non_datetime(self) -> None: - assert convert_datetimes("hello") == "hello" - assert convert_datetimes(42) == 42 - assert convert_datetimes(None) is None + assert sanitize_plist_values("hello") == "hello" + assert sanitize_plist_values(42) == 42 + assert sanitize_plist_values(None) is None class TestHashFile: