From e0576ffea9b589b86e86ea2d894ed06d2f493a38 Mon Sep 17 00:00:00 2001 From: "Ian H. Pittwood" Date: Thu, 11 Jun 2026 13:43:11 -0600 Subject: [PATCH 1/2] feat: parallelize ci publish with retry via a unified imagetools plugin Apply the parallel execution module to `bakery ci publish`: each target's oras index-create -> soci convert -> index-copy -> verify sequence now runs as one job on the parallel executor, so independent targets publish concurrently, and every registry command is wrapped in retry-with-backoff to absorb the transient GHCR eventual-consistency failures described in #591 (not found / manifest unknown / 5xx / timeouts; permanent auth/reference errors fail fast). Consolidate the oras and soci plugins into a single `imagetools` plugin that owns the full pipeline. `bakery oras merge`, `soci convert`, `ci publish`, and `ci merge` all route through it (command names preserved); the soci tool options still parse via `tool: soci`. Parallel module gains RetryPolicy, CommandResult, CommandRunner, ShellJob, JobResult, and run_jobs() alongside the existing one-command ShellTask path, sharing one tracked-spawn primitive so timeout + process-group termination + Ctrl-C safety are identical. Co-Authored-By: Claude Opus 4.8 (1M context) --- posit-bakery/posit_bakery/cli/ci.py | 96 +---- .../posit_bakery/parallel/__init__.py | 10 + .../posit_bakery/parallel/executor.py | 314 ++++++++++++--- posit-bakery/posit_bakery/parallel/retry.py | 50 +++ .../plugins/builtin/imagetools/__init__.py | 357 ++++++++++++++++++ .../builtin/{soci => imagetools}/options.py | 0 .../builtin/{oras => imagetools}/oras.py | 71 +++- .../plugins/builtin/imagetools/publish.py | 160 ++++++++ .../plugins/builtin/imagetools/retry.py | 63 ++++ .../builtin/{soci => imagetools}/soci.py | 75 +++- .../plugins/builtin/oras/__init__.py | 169 --------- .../plugins/builtin/soci/__init__.py | 266 ------------- posit-bakery/posit_bakery/plugins/registry.py | 7 +- posit-bakery/pyproject.toml | 3 +- posit-bakery/test/cli/test_ci.py | 15 +- posit-bakery/test/cli/test_ci_publish.py | 2 +- posit-bakery/test/parallel/test_jobs.py | 227 +++++++++++ posit-bakery/test/parallel/test_retry.py | 72 ++++ .../builtin/{oras => imagetools}/__init__.py | 0 .../imagetools/test_cli_registration.py | 71 ++++ .../builtin/{oras => imagetools}/test_oras.py | 6 +- .../builtin/imagetools/test_oras_runner.py | 129 +++++++ .../plugins/builtin/imagetools/test_plugin.py | 93 +++++ .../builtin/imagetools/test_publish.py | 215 +++++++++++ .../plugins/builtin/imagetools/test_retry.py | 68 ++++ .../test_soci_cli.py} | 0 .../test_soci_command_base.py} | 2 +- .../test_soci_convert.py} | 2 +- .../builtin/imagetools/test_soci_discovery.py | 31 ++ .../test_soci_options.py} | 2 +- .../imagetools/test_soci_plugin_execute.py | 71 ++++ .../test_soci_plugin_results.py} | 12 +- .../builtin/imagetools/test_soci_runner.py | 81 ++++ .../test_soci_workflow.py} | 10 +- .../plugins/builtin/oras/test_oras_plugin.py | 223 ----------- .../test/plugins/builtin/soci/__init__.py | 0 .../plugins/builtin/soci/test_discovery.py | 15 - .../builtin/soci/test_plugin_execute.py | 283 -------------- 38 files changed, 2138 insertions(+), 1133 deletions(-) create mode 100644 posit-bakery/posit_bakery/parallel/retry.py create mode 100644 posit-bakery/posit_bakery/plugins/builtin/imagetools/__init__.py rename posit-bakery/posit_bakery/plugins/builtin/{soci => imagetools}/options.py (100%) rename posit-bakery/posit_bakery/plugins/builtin/{oras => imagetools}/oras.py (87%) create mode 100644 posit-bakery/posit_bakery/plugins/builtin/imagetools/publish.py create mode 100644 posit-bakery/posit_bakery/plugins/builtin/imagetools/retry.py rename posit-bakery/posit_bakery/plugins/builtin/{soci => imagetools}/soci.py (74%) delete mode 100644 posit-bakery/posit_bakery/plugins/builtin/oras/__init__.py delete mode 100644 posit-bakery/posit_bakery/plugins/builtin/soci/__init__.py create mode 100644 posit-bakery/test/parallel/test_jobs.py create mode 100644 posit-bakery/test/parallel/test_retry.py rename posit-bakery/test/plugins/builtin/{oras => imagetools}/__init__.py (100%) create mode 100644 posit-bakery/test/plugins/builtin/imagetools/test_cli_registration.py rename posit-bakery/test/plugins/builtin/{oras => imagetools}/test_oras.py (99%) create mode 100644 posit-bakery/test/plugins/builtin/imagetools/test_oras_runner.py create mode 100644 posit-bakery/test/plugins/builtin/imagetools/test_plugin.py create mode 100644 posit-bakery/test/plugins/builtin/imagetools/test_publish.py create mode 100644 posit-bakery/test/plugins/builtin/imagetools/test_retry.py rename posit-bakery/test/plugins/builtin/{soci/test_cli.py => imagetools/test_soci_cli.py} (100%) rename posit-bakery/test/plugins/builtin/{soci/test_command_base.py => imagetools/test_soci_command_base.py} (96%) rename posit-bakery/test/plugins/builtin/{soci/test_convert.py => imagetools/test_soci_convert.py} (96%) create mode 100644 posit-bakery/test/plugins/builtin/imagetools/test_soci_discovery.py rename posit-bakery/test/plugins/builtin/{soci/test_options.py => imagetools/test_soci_options.py} (97%) create mode 100644 posit-bakery/test/plugins/builtin/imagetools/test_soci_plugin_execute.py rename posit-bakery/test/plugins/builtin/{soci/test_plugin_results.py => imagetools/test_soci_plugin_results.py} (77%) create mode 100644 posit-bakery/test/plugins/builtin/imagetools/test_soci_runner.py rename posit-bakery/test/plugins/builtin/{soci/test_workflow.py => imagetools/test_soci_workflow.py} (92%) delete mode 100644 posit-bakery/test/plugins/builtin/oras/test_oras_plugin.py delete mode 100644 posit-bakery/test/plugins/builtin/soci/__init__.py delete mode 100644 posit-bakery/test/plugins/builtin/soci/test_discovery.py delete mode 100644 posit-bakery/test/plugins/builtin/soci/test_plugin_execute.py diff --git a/posit-bakery/posit_bakery/cli/ci.py b/posit-bakery/posit_bakery/cli/ci.py index 2cc3509ab..8c5d7711b 100644 --- a/posit-bakery/posit_bakery/cli/ci.py +++ b/posit-bakery/posit_bakery/cli/ci.py @@ -289,12 +289,6 @@ def publish( """ # Imports kept local to mirror existing patterns and to avoid bloating # module load time when this command isn't invoked. - from posit_bakery.plugins.builtin.oras.oras import ( - OrasIndexCopyWorkflow, - OrasIndexCreateWorkflow, - OrasIndexVerifyWorkflow, - find_oras_bin, - ) from posit_bakery.plugins.registry import get_plugin if dev_stream is not None: @@ -337,8 +331,6 @@ def publish( log.info(f"Found {len(loaded_targets)} targets") log.debug(", ".join(loaded_targets)) - oras_bin = find_oras_bin(config.base_path) - # Act only on targets that were actually present in the provided metadata # files, not every target defined in the config. Publishing a single set of # files (e.g. one version / dev stream) otherwise drags in every other @@ -350,86 +342,14 @@ def publish( key=lambda t: t.push_sort_key, ) - # Phase 1: index create. Failures abort. - temp_refs: dict[str, str] = {} - for t in targets: - if not t.get_merge_sources(): - log.debug(f"Skipping target '{t}' (no merge sources).") - continue - if not t.settings.temp_registry: - log.error(f"Cannot publish '{t}': temp_registry not configured.") - raise typer.Exit(code=1) - res = OrasIndexCreateWorkflow( - oras_bin=oras_bin, - image_target=t, - annotations=t.labels, - ).run(dry_run=dry_run) - if not res.success: - log.error(f"index-create failed for '{t}': {res.error}") - raise typer.Exit(code=1) - temp_refs[t.uid] = res.temp_ref - - # Phase 2: SOCI convert. Driven by per-target config; targets whose - # resolved SOCI options have enabled=False are skipped by the plugin. - soci = get_plugin("soci") - soci_results = soci.execute( - config.base_path, - targets, - source_refs=temp_refs, - dry_run=dry_run, - ) - soci_failed = False - for r in soci_results: - artifacts = r.artifacts or {} - if artifacts.get("skipped"): - continue - wf = artifacts.get("workflow_result") - if r.exit_code != 0: - soci_failed = True - continue - if wf and getattr(wf, "destination_ref", None): - temp_refs[r.target.uid] = wf.destination_ref - if soci_failed: - soci.results(soci_results) # raises typer.Exit(1) - - # Phase 3: index copy. - copy_failed = False - copied_targets: list = [] - for t in targets: - if t.uid not in temp_refs: - continue - copy = OrasIndexCopyWorkflow( - oras_bin=oras_bin, - image_target=t, - ).run(source=temp_refs[t.uid], dry_run=dry_run) - if not copy.success: - log.error(f"index-copy failed for '{t}': {copy.error}") - copy_failed = True - else: - copied_targets.append(t) - - # Phase 4: verify each final destination tag resolves. This replaces the - # `docker buildx imagetools inspect` check the old `bakery ci merge` ran; - # ORAS is faster and more reliable for the existence check. - verify_failed = False - if not dry_run: - for t in copied_targets: - verify = OrasIndexVerifyWorkflow( - oras_bin=oras_bin, - image_target=t, - ).run(dry_run=dry_run) - if not verify.success: - log.error(f"verification failed for '{t}': {verify.error}") - verify_failed = True - else: - log.info(f"Verified '{t}' -> {', '.join(verify.verified)}") - - # The temporary indexes (and any SOCI-converted variants) are intentionally - # left in place; they are cleaned up out-of-band by the clean.yml workflow - # (bakery clean temp-registry) rather than deleted here. - - if copy_failed or verify_failed: - raise typer.Exit(code=1) + # Each target's create -> soci -> copy -> verify sequence runs as one job on the parallel + # executor (independent targets publish concurrently), and every registry command is + # retried-with-backoff on transient errors. SOCI conversion is driven per-target by the + # resolved `soci` options. Temporary indexes are intentionally left in place; clean.yml + # (bakery clean temp-registry) reaps them out-of-band. + imagetools = get_plugin("imagetools") + results = imagetools.execute(config.base_path, targets, dry_run=dry_run) + imagetools.results(results) # raises typer.Exit(1) on any failure @app.command() diff --git a/posit-bakery/posit_bakery/parallel/__init__.py b/posit-bakery/posit_bakery/parallel/__init__.py index b88c666a2..bec7ab7fa 100644 --- a/posit-bakery/posit_bakery/parallel/__init__.py +++ b/posit-bakery/posit_bakery/parallel/__init__.py @@ -1,12 +1,22 @@ from posit_bakery.parallel.executor import ( + CommandResult, + CommandRunner, + JobResult, ParallelShellExecutor, + ShellJob, ShellResult, ShellTask, resolve_max_workers, ) +from posit_bakery.parallel.retry import RetryPolicy __all__ = [ + "CommandResult", + "CommandRunner", + "JobResult", "ParallelShellExecutor", + "RetryPolicy", + "ShellJob", "ShellResult", "ShellTask", "resolve_max_workers", diff --git a/posit-bakery/posit_bakery/parallel/executor.py b/posit-bakery/posit_bakery/parallel/executor.py index 6ed287c84..cf82273cd 100644 --- a/posit-bakery/posit_bakery/parallel/executor.py +++ b/posit-bakery/posit_bakery/parallel/executor.py @@ -9,7 +9,7 @@ from concurrent.futures import ThreadPoolExecutor, as_completed from dataclasses import dataclass from pathlib import Path -from typing import Any, Callable +from typing import TYPE_CHECKING, Any, Callable from rich.console import Console from rich.progress import Progress, SpinnerColumn, TextColumn, TimeElapsedColumn @@ -17,6 +17,9 @@ from posit_bakery.log import stderr_console from posit_bakery.settings import SETTINGS +if TYPE_CHECKING: + from posit_bakery.parallel.retry import RetryPolicy + # Grace period (seconds) to let a SIGTERM'd child group exit cleanly — e.g. dgoss removing # its container via its EXIT trap — before we escalate to SIGKILL. TERMINATE_GRACE_SECONDS = 10.0 @@ -73,6 +76,69 @@ def ok(self) -> bool: return self.exception is None and self.returncode == 0 +@dataclass +class CommandResult: + """The outcome of a single command run through a :class:`CommandRunner`. + + Unlike :class:`ShellResult` (one task = one command), this is the per-command result a + multi-step job sees, so a job can branch on each step's outcome before running the next. + """ + + cmd: list[str] + returncode: int | None + stdout: bytes + stderr: bytes + duration: float + timed_out: bool = False + exception: Exception | None = None + + @property + def ok(self) -> bool: + """True when the process spawned and exited zero.""" + return self.exception is None and self.returncode == 0 + + +@dataclass +class ShellJob: + """A unit of work that runs an ordered sequence of commands. + + ``run`` receives a :class:`CommandRunner` bound to this job and uses it to invoke each + command in order; the executor runs jobs concurrently up to the worker bound. The + callable's return value is surfaced on :attr:`JobResult.value`. + + ``retry`` and ``command_timeout`` are the default policy the bound runner applies to every + command the job runs (a command may still override them per call), so a job declares its + flakiness/timeout handling once rather than threading it through each step. + """ + + key: str + run: Callable[["CommandRunner"], Any] + label: str | None = None + payload: Any = None # opaque caller data, passed through unchanged to JobResult.job.payload + retry: "RetryPolicy | None" = None + command_timeout: float | None = None + + @property + def display_label(self) -> str: + """Human-facing label for live output; falls back to the job key.""" + return self.label or self.key + + +@dataclass +class JobResult: + """The outcome of running a :class:`ShellJob`.""" + + job: ShellJob + value: Any = None + exception: Exception | None = None + duration: float = 0.0 + + @property + def ok(self) -> bool: + """True when the job callable returned without raising.""" + return self.exception is None + + def resolve_max_workers(jobs: int | None, n_tasks: int) -> int: """Resolve the worker count: --jobs override, else SETTINGS, clamped to [1, n_tasks]. @@ -128,22 +194,36 @@ def _mark_running(self, task: ShellTask) -> None: progress.start_task(task_id) progress.update(task_id, description=f"[bright_blue]running {task.display_label}") - def _run_one(self, task: ShellTask) -> ShellResult: - """Run one task as a tracked child process, enforcing task.timeout if set.""" - start = time.monotonic() + def _spawn_and_communicate( + self, + cmd: list[str], + *, + env: dict[str, str] | None, + cwd: Path | None, + timeout: float | None, + on_started: Callable[[], None] | None = None, + ) -> tuple[int | None, bytes, bytes, bool, Exception | None]: + """Spawn ``cmd`` as a tracked child process group, enforcing ``timeout`` if set. + + The single point where this executor launches subprocesses, so both the one-task path + and multi-step jobs share the same registration, timeout, and interrupt-safety logic. + Honors the shutdown race: a child spawned after an interrupt began is stopped at once. + ``on_started`` (if given) fires after the child is registered, before it is awaited. + + :returns: ``(returncode, stdout, stderr, timed_out, exception)``. On spawn failure, + ``returncode`` is ``None`` and ``exception`` is set. + """ try: proc = subprocess.Popen( - task.cmd, - env=task.env, - cwd=task.cwd, + cmd, + env=env, + cwd=cwd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, start_new_session=True, ) except Exception as exc: # spawn failures (FileNotFoundError, PermissionError, ...) - return ShellResult( - task=task, returncode=None, stdout=b"", stderr=b"", duration=time.monotonic() - start, exception=exc - ) + return None, b"", b"", False, exc with self._lock: if self._shutdown: @@ -156,22 +236,16 @@ def _run_one(self, task: ShellTask) -> ShellResult: # cannot escape termination. The returned result is discarded as run() unwinds. self._stop(proc) stdout, stderr = proc.communicate() - return ShellResult( - task=task, - returncode=proc.returncode, - stdout=stdout or b"", - stderr=stderr or b"", - duration=time.monotonic() - start, - exception=None, - timed_out=False, - ) - self._mark_running(task) + return proc.returncode, stdout or b"", stderr or b"", False, None + + if on_started is not None: + on_started() timed_out = False exception: Exception | None = None - timeout = task.timeout if (task.timeout is not None and task.timeout > 0) else None + eff_timeout = timeout if (timeout is not None and timeout > 0) else None try: try: - stdout, stderr = proc.communicate(timeout=timeout) + stdout, stderr = proc.communicate(timeout=eff_timeout) except subprocess.TimeoutExpired as exc: timed_out = True exception = exc @@ -181,16 +255,64 @@ def _run_one(self, task: ShellTask) -> ShellResult: with self._lock: self._active.discard(proc) + return proc.returncode, stdout or b"", stderr or b"", timed_out, exception + + def _run_one(self, task: ShellTask) -> ShellResult: + """Run one task as a tracked child process, enforcing task.timeout if set.""" + start = time.monotonic() + returncode, stdout, stderr, timed_out, exception = self._spawn_and_communicate( + task.cmd, + env=task.env, + cwd=task.cwd, + timeout=task.timeout, + on_started=lambda: self._mark_running(task), + ) return ShellResult( task=task, - returncode=proc.returncode, - stdout=stdout or b"", - stderr=stderr or b"", + returncode=returncode, + stdout=stdout, + stderr=stderr, duration=time.monotonic() - start, exception=exception, timed_out=timed_out, ) + def _run_job(self, job: ShellJob) -> JobResult: + """Run one job's callable, handing it a CommandRunner bound to this job.""" + start = time.monotonic() + runner = CommandRunner( + self, + job.key, + job.display_label, + default_retry=job.retry, + default_timeout=job.command_timeout, + ) + try: + value = job.run(runner) + except Exception as exc: # job glue raised; surface it without crashing the pool + return JobResult(job=job, value=None, exception=exc, duration=time.monotonic() - start) + return JobResult(job=job, value=value, exception=None, duration=time.monotonic() - start) + + def _mark_step(self, key: str, label: str, step_label: str | None, attempt: int) -> None: + """Update a job's live-table row to show the command/step it is currently running. + + Safe to call from worker threads (Rich Progress mutations are internally locked). + No-op when no live Progress is attached. + """ + progress = self._progress + if progress is None: + return + task_id = self._progress_ids.get(key) + if task_id is None: + return + progress.start_task(task_id) + description = f"[bright_blue]running {label}" + if step_label: + description += f" — {step_label}" + if attempt > 1: + description += f" (retry {attempt})" + progress.update(task_id, description=description) + @staticmethod def _stop(proc: subprocess.Popen, grace: float = TERMINATE_GRACE_SECONDS) -> None: """Stop a child group gracefully: SIGTERM the group, wait up to grace, then SIGKILL it.""" @@ -221,20 +343,51 @@ def run( *, on_result: Callable[[ShellResult], None] | None = None, ) -> list[ShellResult]: - """Run all tasks, returning results in input order. ``on_result`` fires per task on the main thread. + """Run all tasks (one command each), returning results in input order. - On KeyboardInterrupt (or any BaseException) the executor cancels queued tasks and - terminates in-flight child processes before re-raising, so Ctrl-C is responsive and - does not leave orphaned children. + ``on_result`` fires per task on the main thread. On KeyboardInterrupt (or any + BaseException) queued tasks are cancelled and in-flight children terminated before + re-raising, so Ctrl-C is responsive and does not leave orphaned children. """ - if not tasks: + return self._execute_pool(tasks, worker=self._run_one, ok_of=lambda r: r.ok, on_result=on_result) + + def run_jobs( + self, + jobs: list[ShellJob], + *, + on_result: Callable[[JobResult], None] | None = None, + ) -> list[JobResult]: + """Run all jobs (each an ordered command sequence) concurrently, results in input order. + + Each job's callable runs on a worker thread with a :class:`CommandRunner` bound to it; + the commands it spawns are tracked exactly like one-task runs, so timeout enforcement, + process-group termination, and interrupt-safety all apply. ``on_result`` fires per job + on the main thread (so callers mutate shared state without locks). + """ + return self._execute_pool(jobs, worker=self._run_job, ok_of=lambda r: r.ok, on_result=on_result) + + def _execute_pool( + self, + units: list, + *, + worker: Callable[[Any], Any], + ok_of: Callable[[Any], bool], + on_result: Callable[[Any], None] | None, + ) -> list: + """Shared driver for :meth:`run` and :meth:`run_jobs`. + + ``units`` each expose ``.key`` and ``.display_label``; ``worker`` maps a unit to its + result and ``ok_of`` reports success for the live table's ✓/✗. Owns the thread pool, + live-table lifecycle, interrupt handling, and input-order result assembly. + """ + if not units: return [] self._shutdown = False self._progress = None self._progress_ids = {} - use_live = self._resolve_use_live(len(tasks)) - results_by_key: dict[str, ShellResult] = {} + use_live = self._resolve_use_live(len(units)) + results_by_key: dict[str, Any] = {} progress: Progress | None = None progress_ids: dict[str, Any] = {} @@ -250,13 +403,14 @@ def run( def consume(future_map: dict) -> None: for future in as_completed(future_map): + unit = future_map[future] result = future.result() - results_by_key[result.task.key] = result + results_by_key[unit.key] = result if progress is not None: - status = "[green3]✓" if result.ok else "[bright_red]✗" + status = "[green3]✓" if ok_of(result) else "[bright_red]✗" progress.update( - progress_ids[result.task.key], - description=f"{status} {result.task.display_label}", + progress_ids[unit.key], + description=f"{status} {unit.display_label}", completed=1, ) if on_result is not None: @@ -266,18 +420,18 @@ def consume(future_map: dict) -> None: try: if progress is not None: with progress: - for task in tasks: - progress_ids[task.key] = progress.add_task( - f"[quiet]{'queued':<7} {task.display_label}", total=1, start=False + for unit in units: + progress_ids[unit.key] = progress.add_task( + f"[quiet]{'queued':<7} {unit.display_label}", total=1, start=False ) self._progress_ids = progress_ids - future_map = {pool.submit(self._run_one, task): task for task in tasks} + future_map = {pool.submit(worker, unit): unit for unit in units} consume(future_map) else: - future_map = {pool.submit(self._run_one, task): task for task in tasks} + future_map = {pool.submit(worker, unit): unit for unit in units} consume(future_map) except BaseException: - # Interrupted (e.g. Ctrl-C): stop accepting/continuing work, drop queued tasks, + # Interrupted (e.g. Ctrl-C): stop accepting/continuing work, drop queued units, # and terminate running children, then propagate. with self._lock: self._shutdown = True @@ -290,4 +444,78 @@ def consume(future_map: dict) -> None: self._progress = None self._progress_ids = {} - return [results_by_key[task.key] for task in tasks] + return [results_by_key[unit.key] for unit in units] + + +class CommandRunner: + """Per-job handle for running commands as tracked child processes, with optional retry. + + Handed to a :class:`ShellJob`'s callable. Each :meth:`run` spawns through the owning + executor's tracked-spawn primitive (so timeout, process-group termination, and the + shutdown race all apply) and, when a :class:`RetryPolicy` is supplied, re-runs on + retryable failures with exponential backoff. + """ + + def __init__( + self, + executor: "ParallelShellExecutor", + key: str, + label: str | None = None, + *, + default_retry: "RetryPolicy | None" = None, + default_timeout: float | None = None, + ) -> None: + self._executor = executor + self._key = key + self._label = label or key + self._default_retry = default_retry + self._default_timeout = default_timeout + + def run( + self, + cmd: list[str], + *, + env: dict[str, str] | None = None, + cwd: Path | None = None, + timeout: float | None = None, + retry: "RetryPolicy | None" = None, + step_label: str | None = None, + ) -> CommandResult: + """Run ``cmd`` once (or until it succeeds / retries are exhausted) and return the outcome. + + ``timeout`` and ``retry`` fall back to the runner's job-level defaults when not given, + so most callers only pass ``cmd`` (and optionally ``step_label``). + + :param retry: When set (or defaulted from the job), failed results matching + ``retry.retry_on`` are retried up to ``retry.max_attempts`` with backoff sleeps. + :param step_label: Optional label for this step, surfaced on the job's live-table row. + """ + if retry is None: + retry = self._default_retry + if timeout is None: + timeout = self._default_timeout + attempt = 0 + while True: + attempt += 1 + start = time.monotonic() + current_attempt = attempt + returncode, stdout, stderr, timed_out, exception = self._executor._spawn_and_communicate( + cmd, + env=env, + cwd=cwd, + timeout=timeout, + on_started=lambda a=current_attempt: self._executor._mark_step(self._key, self._label, step_label, a), + ) + result = CommandResult( + cmd=cmd, + returncode=returncode, + stdout=stdout, + stderr=stderr, + duration=time.monotonic() - start, + timed_out=timed_out, + exception=exception, + ) + if retry is not None and retry.should_retry(result, attempt): + time.sleep(retry.delay_for(attempt)) + continue + return result diff --git a/posit-bakery/posit_bakery/parallel/retry.py b/posit-bakery/posit_bakery/parallel/retry.py new file mode 100644 index 000000000..354f15f08 --- /dev/null +++ b/posit-bakery/posit_bakery/parallel/retry.py @@ -0,0 +1,50 @@ +from __future__ import annotations + +import random +from dataclasses import dataclass +from typing import TYPE_CHECKING, Callable + +if TYPE_CHECKING: + from posit_bakery.parallel.executor import CommandResult + + +@dataclass +class RetryPolicy: + """Retry-with-backoff configuration for a single tracked command. + + Backoff is exponential and capped: the delay before the retry that follows a given + (1-based) attempt is ``base_delay * multiplier**(attempt-1)``, clamped to ``max_delay``. + With ``jitter`` enabled the delay is randomized within ``[0, capped]`` (full jitter) to + avoid synchronized retry storms across parallel jobs. + + ``retry_on`` decides whether a *failed* result is worth retrying (e.g. a transient + registry error vs. a permanent auth failure). When it is ``None`` nothing is retried. + """ + + max_attempts: int = 5 + base_delay: float = 2.0 + max_delay: float = 32.0 + multiplier: float = 2.0 + jitter: bool = True + retry_on: Callable[[CommandResult], bool] | None = None + + def delay_for(self, attempt: int) -> float: + """Backoff delay (seconds) before the retry that follows ``attempt`` (1-based).""" + capped = min(self.base_delay * self.multiplier ** max(0, attempt - 1), self.max_delay) + if self.jitter: + return random.uniform(0.0, capped) + return capped + + def should_retry(self, result: CommandResult, attempt: int) -> bool: + """True when ``result`` (which just failed on ``attempt``) should be retried. + + :param result: The outcome of the just-finished attempt. + :param attempt: The 1-based attempt number that produced ``result``. + """ + if attempt >= self.max_attempts: + return False + if self.retry_on is None: + return False + if getattr(result, "ok", False): + return False + return self.retry_on(result) diff --git a/posit-bakery/posit_bakery/plugins/builtin/imagetools/__init__.py b/posit-bakery/posit_bakery/plugins/builtin/imagetools/__init__.py new file mode 100644 index 000000000..fc3a1b5b7 --- /dev/null +++ b/posit-bakery/posit_bakery/plugins/builtin/imagetools/__init__.py @@ -0,0 +1,357 @@ +"""imagetools plugin: the unified multi-platform publish pipeline (oras + soci). + +A single plugin owns the oras and soci tooling. It registers three command groups — +``bakery ci publish`` (via the ci app), ``bakery oras merge``, and ``bakery soci convert`` — +that all run the same pipeline (restricted to the relevant phases). Each target's +create -> soci -> copy -> verify sequence runs as one job on the parallel executor, so +independent targets publish concurrently and every registry command retries-with-backoff on +transient failures (issue #591). +""" + +import logging +from pathlib import Path +from typing import Any + +import typer + +from posit_bakery.error import BakeryToolNotFoundError +from posit_bakery.image.image_target import ImageTarget +from posit_bakery.parallel import ( + JobResult, + ParallelShellExecutor, + ShellJob, + resolve_max_workers, +) +from posit_bakery.plugins.builtin.imagetools import soci as soci_mod +from posit_bakery.plugins.builtin.imagetools.options import SociOptions +from posit_bakery.plugins.builtin.imagetools.oras import find_oras_bin +from posit_bakery.plugins.builtin.imagetools.publish import ( + ALL_PHASES, + MERGE_PHASES, + SOCI_PHASES, + PublishPhase, + PublishResult, + PublishWorkflow, +) +from posit_bakery.plugins.builtin.imagetools.retry import DEFAULT_COMMAND_TIMEOUT, DEFAULT_REGISTRY_RETRY +from posit_bakery.plugins.builtin.imagetools.soci import find_soci_bin +from posit_bakery.plugins.protocol import BakeryToolPlugin, ToolCallResult + +log = logging.getLogger(__name__) + + +def _resolve_bin(finder: Any, base_path: Path, dry_run: bool, fallback: str) -> str: + """Resolve a tool path, tolerating a missing tool on a dry run (which executes nothing).""" + try: + return finder(base_path) + except BakeryToolNotFoundError: + if dry_run: + return fallback + raise + + +class ImageToolsPlugin(BakeryToolPlugin): + name: str = "imagetools" + description: str = "Publish multi-platform images (oras index + soci convert)" + # The soci tool options (``tool: soci`` in bakery.yaml) belong to this consolidated plugin. + tool_options_class = SociOptions + + def execute( + self, + base_path: Path, + targets: list[ImageTarget], + *, + source_refs: dict[str, str] | None = None, + phases: frozenset[PublishPhase] = ALL_PHASES, + dry_run: bool = False, + jobs: int | None = None, + **kwargs: Any, + ) -> list[ToolCallResult]: + """Run the (subset of the) publish pipeline for each target, in parallel. + + :param source_refs: ``target.uid`` -> source ref, used to seed the soci-only path. + :param phases: Which pipeline phases to run (full publish by default). + :param jobs: Concurrency override; otherwise BAKERY_MAX_CONCURRENCY / default. + """ + # Latest pushes last so registries that order tags by push time display it on top. + targets = sorted(targets, key=lambda t: t.push_sort_key) + if not targets: + return [] + source_refs = source_refs or {} + + oras_bin = _resolve_bin(find_oras_bin, base_path, dry_run, "oras") + needs_soci = PublishPhase.SOCI in phases and any( + soci_mod.get_soci_options_for_target(t).enabled for t in targets + ) + soci_bin = _resolve_bin(find_soci_bin, base_path, dry_run, "soci") if needs_soci else "soci" + + shell_jobs: list[ShellJob] = [] + for target in targets: + workflow = PublishWorkflow( + image_target=target, + oras_bin=oras_bin, + soci_bin=soci_bin, + source_ref=source_refs.get(target.uid), + ) + shell_jobs.append( + ShellJob( + key=target.uid, + label=str(target), + run=lambda runner, wf=workflow: wf.run(runner, phases=phases, dry_run=dry_run), + payload=target, + retry=DEFAULT_REGISTRY_RETRY, + command_timeout=DEFAULT_COMMAND_TIMEOUT, + ) + ) + + def on_result(job_result: JobResult) -> None: + # Main thread: safe to log. Surface progress per finished target. + target = job_result.job.payload + if job_result.exception is not None: + log.error(f"publish failed for '{target}': {job_result.exception}") + return + pr: PublishResult = job_result.value + if pr.skipped: + log.debug(f"Skipped '{target}' ({pr.skip_reason}).") + elif not pr.success: + log.error(f"{pr.failed_phase} failed for '{target}': {pr.error}") + else: + if pr.soci_destination_ref: + log.info(f"SOCI converted '{target}' -> {pr.soci_destination_ref}") + if pr.verified: + log.info(f"Verified '{target}' -> {', '.join(pr.verified)}") + + max_workers = resolve_max_workers(jobs, len(shell_jobs)) + executor = ParallelShellExecutor(max_workers=max_workers) + job_results = executor.run_jobs(shell_jobs, on_result=on_result) + return [self._to_tool_result(jr) for jr in job_results] + + @staticmethod + def _to_tool_result(job_result: JobResult) -> ToolCallResult: + target = job_result.job.payload + if job_result.exception is not None: + return ToolCallResult( + exit_code=1, + tool_name="imagetools", + target=target, + stdout="", + stderr=str(job_result.exception), + artifacts={"error": str(job_result.exception)}, + ) + pr: PublishResult = job_result.value + if pr.skipped: + return ToolCallResult( + exit_code=0, + tool_name="imagetools", + target=target, + stdout="", + stderr="", + artifacts={"skipped": True, "reason": pr.skip_reason, "publish_result": pr}, + ) + return ToolCallResult( + exit_code=0 if pr.success else 1, + tool_name="imagetools", + target=target, + stdout="", + stderr=pr.error or "", + artifacts={"publish_result": pr}, + ) + + def results(self, results: list[ToolCallResult]) -> None: + """Print a publish summary and raise typer.Exit(1) if any target failed.""" + from posit_bakery.log import stderr_console + + has_errors = False + for result in results: + artifacts = result.artifacts or {} + if artifacts.get("skipped"): + log.debug(f"imagetools skipped '{result.target}': {artifacts.get('reason')}") + continue + if result.exit_code != 0: + has_errors = True + stderr_console.print(f"Publish failed for '{result.target}': {result.stderr}", style="error") + + if has_errors: + stderr_console.print("❌ Publish failed", style="error") + raise typer.Exit(code=1) + + stderr_console.print("✅ Publish completed", style="success") + + def register_cli(self, app: typer.Typer) -> None: + """Register ``imagetools publish``, ``oras merge``, and ``soci convert``. + + All three drive :meth:`execute` with the appropriate phase subset; the historical + ``oras``/``soci`` command groups are preserved even though one plugin now owns them. + """ + from posit_bakery.cli.ci import publish as ci_publish + + plugin = self + + # `imagetools publish` mirrors `ci publish` without duplicating its metadata-loading. + imagetools_app = typer.Typer(no_args_is_help=True) + imagetools_app.command(name="publish")(ci_publish) + app.add_typer(imagetools_app, name="imagetools", help=self.description) + + app.add_typer(self._build_oras_app(plugin), name="oras", help="Merge multi-platform images using ORAS") + app.add_typer(self._build_soci_app(plugin), name="soci", help="Convert images to SOCI-enabled images") + + @staticmethod + def _build_oras_app(plugin: "ImageToolsPlugin") -> typer.Typer: + import glob as glob_module + from typing import Annotated, Optional + + from posit_bakery.cli.common import with_verbosity_flags + from posit_bakery.config.config import BakeryConfig, BakerySettings + from posit_bakery.const import DevVersionInclusionEnum, MatrixVersionInclusionEnum + from posit_bakery.util import auto_path + + oras_app = typer.Typer(no_args_is_help=True) + + @oras_app.command() + @with_verbosity_flags + def merge( + metadata_file: Annotated[ + list[Path], typer.Argument(help="Path to input build metadata JSON file(s) to merge.") + ], + context: Annotated[ + Path, + typer.Option(help="The root path to use. Defaults to the current working directory where invoked."), + ] = auto_path(), + temp_registry: Annotated[ + Optional[str], + typer.Option( + help="Temporary registry to use for multiplatform split/merge builds.", + rich_help_panel="Build Configuration & Outputs", + ), + ] = None, + dry_run: Annotated[ + bool, typer.Option(help="If set, the merged images will not be pushed to the registry.") + ] = False, + ): + """Merge multi-platform images from build metadata files using ORAS. + + \b + Takes one or more build metadata JSON files (produced by `bakery build --strategy build`) + and merges platform-specific images into multi-platform manifest indexes. + """ + settings = BakerySettings( + dev_versions=DevVersionInclusionEnum.INCLUDE, + matrix_versions=MatrixVersionInclusionEnum.INCLUDE, + clean_temporary=False, + temp_registry=temp_registry, + ) + config: BakeryConfig = BakeryConfig.from_context(context, settings) + + resolved_files: list[Path] = [] + for file in metadata_file: + if "*" in str(file) or "?" in str(file) or "[" in str(file): + resolved_files.extend(sorted(Path(x).absolute() for x in glob_module.glob(str(file)))) + else: + resolved_files.append(file.absolute()) + metadata_file = resolved_files + + log.info(f"Reading targets from {', '.join(f.name for f in metadata_file)}") + + files_ok = True + loaded_targets: list[str] = [] + for file in metadata_file: + try: + loaded_targets.extend(config.load_build_metadata_from_file(file)) + except Exception as e: + log.error(f"Failed to load metadata from file '{file}'") + log.error(str(e)) + files_ok = False + loaded_targets = list(set(loaded_targets)) + + if not files_ok: + log.error("One or more metadata files are invalid, aborting merge.") + raise typer.Exit(code=1) + + log.info(f"Found {len(loaded_targets)} targets") + log.debug(", ".join(loaded_targets)) + + results = plugin.execute(config.base_path, config.targets, phases=MERGE_PHASES, dry_run=dry_run) + plugin.results(results) + + return oras_app + + @staticmethod + def _build_soci_app(plugin: "ImageToolsPlugin") -> typer.Typer: + import glob as glob_module + from typing import Annotated, Optional + + from posit_bakery.cli.common import with_verbosity_flags + from posit_bakery.config.config import BakeryConfig, BakerySettings + from posit_bakery.const import DevVersionInclusionEnum, MatrixVersionInclusionEnum + from posit_bakery.util import auto_path + + soci_app = typer.Typer(no_args_is_help=True) + + @soci_app.command() + @with_verbosity_flags + def convert( + metadata_file: Annotated[list[Path], typer.Argument(help="Path to input build metadata JSON file(s).")], + context: Annotated[ + Path, + typer.Option(help="The root path to use. Defaults to the current working directory."), + ] = auto_path(), + temp_registry: Annotated[ + Optional[str], + typer.Option(help="Temporary registry to use for split/merge builds."), + ] = None, + dry_run: Annotated[ + bool, + typer.Option(help="Log commands without executing them."), + ] = False, + ) -> None: + """Convert images referenced by build-metadata JSON files into SOCI-enabled images. + + \b + Conversion runs in standalone (no-containerd) mode via oras. Targets + without `tool: soci, enabled: true` in bakery.yaml are skipped. + """ + settings = BakerySettings( + dev_versions=DevVersionInclusionEnum.INCLUDE, + matrix_versions=MatrixVersionInclusionEnum.INCLUDE, + clean_temporary=False, + temp_registry=temp_registry, + ) + config: BakeryConfig = BakeryConfig.from_context(context, settings) + + resolved_files: list[Path] = [] + for f in metadata_file: + s = str(f) + if "*" in s or "?" in s or "[" in s: + resolved_files.extend(sorted(Path(x).absolute() for x in glob_module.glob(s))) + else: + resolved_files.append(f.absolute()) + metadata_file = resolved_files + + log.info(f"Reading targets from {', '.join(f.name for f in metadata_file)}") + files_ok = True + for f in metadata_file: + try: + config.load_build_metadata_from_file(f) + except Exception as e: + log.error(f"Failed to load metadata from file '{f}': {e}") + files_ok = False + if not files_ok: + raise typer.Exit(code=1) + + # Build source_refs from each target's most recent build metadata. + source_refs: dict[str, str] = {} + for t in config.targets: + if t.build_metadata: + latest = max(t.build_metadata, key=lambda m: m.created_at) + source_refs[t.uid] = latest.image_ref + + results = plugin.execute( + config.base_path, + config.targets, + source_refs=source_refs, + phases=SOCI_PHASES, + dry_run=dry_run, + ) + plugin.results(results) + + return soci_app diff --git a/posit-bakery/posit_bakery/plugins/builtin/soci/options.py b/posit-bakery/posit_bakery/plugins/builtin/imagetools/options.py similarity index 100% rename from posit-bakery/posit_bakery/plugins/builtin/soci/options.py rename to posit-bakery/posit_bakery/plugins/builtin/imagetools/options.py diff --git a/posit-bakery/posit_bakery/plugins/builtin/oras/oras.py b/posit-bakery/posit_bakery/plugins/builtin/imagetools/oras.py similarity index 87% rename from posit-bakery/posit_bakery/plugins/builtin/oras/oras.py rename to posit-bakery/posit_bakery/plugins/builtin/imagetools/oras.py index 4eb02cb9b..9f0878b25 100644 --- a/posit-bakery/posit_bakery/plugins/builtin/oras/oras.py +++ b/posit-bakery/posit_bakery/plugins/builtin/imagetools/oras.py @@ -4,7 +4,7 @@ import subprocess from abc import ABC, abstractmethod from pathlib import Path -from typing import Annotated, Self +from typing import TYPE_CHECKING, Annotated, Self from pydantic import BaseModel, ConfigDict, Field, model_validator @@ -12,6 +12,9 @@ from posit_bakery.image.image_target import ImageTarget, Tag from posit_bakery.util import find_bin +if TYPE_CHECKING: + from posit_bakery.parallel import CommandRunner + log = logging.getLogger(__name__) @@ -47,10 +50,20 @@ def command(self) -> list[str]: """Return the full command to execute.""" ... - def run(self, dry_run: bool = False) -> subprocess.CompletedProcess: + def run( + self, + dry_run: bool = False, + *, + runner: "CommandRunner | None" = None, + step_label: str | None = None, + ) -> subprocess.CompletedProcess: """Execute the oras command. :param dry_run: If True, log the command without executing it. + :param runner: When given, execute through this CommandRunner (tracked-spawn, plus the + runner's retry/timeout policy) instead of calling subprocess directly. Production + callers always pass a runner; the direct path remains for low-level unit tests. + :param step_label: Live-table step label forwarded to the runner. :return: The completed process result. :raises BakeryToolRuntimeError: If the command fails. """ @@ -61,6 +74,21 @@ def run(self, dry_run: bool = False) -> subprocess.CompletedProcess: log.info(f"[DRY RUN] Would execute: {' '.join(cmd)}") return subprocess.CompletedProcess(args=cmd, returncode=0, stdout=b"", stderr=b"") + if runner is not None: + result = runner.run(cmd, step_label=step_label) + if not result.ok: + raise BakeryToolRuntimeError( + message="oras command failed", + tool_name="oras", + cmd=cmd, + stdout=result.stdout, + stderr=result.stderr, + exit_code=result.returncode if result.returncode is not None else 1, + ) + return subprocess.CompletedProcess( + args=cmd, returncode=result.returncode or 0, stdout=result.stdout, stderr=result.stderr + ) + result = subprocess.run(cmd, capture_output=True) if result.returncode != 0: @@ -208,7 +236,12 @@ def temp_index_tag(self) -> str: f"{self.image_target.temp_registry}/{self.image_target.image_name}/tmp:{self.image_target.uid}{source_hash}" ) - def run(self, dry_run: bool = False) -> OrasIndexCreateResult: + def run( + self, + dry_run: bool = False, + *, + runner: "CommandRunner | None" = None, + ) -> OrasIndexCreateResult: try: OrasManifestIndexCreate( oras_bin=self.oras_bin, @@ -216,7 +249,7 @@ def run(self, dry_run: bool = False) -> OrasIndexCreateResult: destination=self.temp_index_tag, annotations=self.annotations, plain_http=self.plain_http, - ).run(dry_run=dry_run) + ).run(dry_run=dry_run, runner=runner, step_label="index create") return OrasIndexCreateResult(success=True, temp_ref=self.temp_index_tag) except BakeryToolRuntimeError as e: log.error(f"oras index-create failed: {e}") @@ -240,7 +273,13 @@ class OrasIndexCopyWorkflow(BaseModel): image_target: Annotated[ImageTarget, Field(description="Target whose tags to fan out to.")] plain_http: Annotated[bool, Field(default=False)] - def run(self, source: str, dry_run: bool = False) -> OrasIndexCopyResult: + def run( + self, + source: str, + dry_run: bool = False, + *, + runner: "CommandRunner | None" = None, + ) -> OrasIndexCopyResult: try: destinations = [] for destination, tags in itertools.groupby(self.image_target.tags, lambda x: x.destination): @@ -250,7 +289,7 @@ def run(self, source: str, dry_run: bool = False) -> OrasIndexCopyResult: source=source, destination=combined, plain_http=self.plain_http, - ).run(dry_run=dry_run) + ).run(dry_run=dry_run, runner=runner, step_label="index copy") destinations.append(combined) return OrasIndexCopyResult(success=True, destinations=destinations) except BakeryToolRuntimeError as e: @@ -286,7 +325,12 @@ class OrasIndexVerifyWorkflow(BaseModel): image_target: Annotated[ImageTarget, Field(description="Target whose destination tags to verify.")] plain_http: Annotated[bool, Field(default=False)] - def run(self, dry_run: bool = False) -> OrasIndexVerifyResult: + def run( + self, + dry_run: bool = False, + *, + runner: "CommandRunner | None" = None, + ) -> OrasIndexVerifyResult: verified: list[str] = [] try: for ref in self.image_target.tags.as_strings(): @@ -295,7 +339,7 @@ def run(self, dry_run: bool = False) -> OrasIndexVerifyResult: reference=ref, descriptor=True, plain_http=self.plain_http, - ).run(dry_run=dry_run) + ).run(dry_run=dry_run, runner=runner, step_label="verify") verified.append(ref) return OrasIndexVerifyResult(success=True, verified=verified) except BakeryToolRuntimeError as e: @@ -350,7 +394,12 @@ def temp_index_tag(self) -> str: f"{self.image_target.temp_registry}/{self.image_target.image_name}/tmp:{self.image_target.uid}{source_hash}" ) - def run(self, dry_run: bool = False) -> OrasMergeWorkflowResult: + def run( + self, + dry_run: bool = False, + *, + runner: "CommandRunner | None" = None, + ) -> OrasMergeWorkflowResult: """Compose create → copy. Preserved as a single call for back-compat with the `bakery oras merge` CLI. @@ -364,7 +413,7 @@ def run(self, dry_run: bool = False) -> OrasMergeWorkflowResult: image_target=self.image_target, annotations=self.annotations, plain_http=self.plain_http, - ).run(dry_run=dry_run) + ).run(dry_run=dry_run, runner=runner) if not create.success: return OrasMergeWorkflowResult( success=False, @@ -377,7 +426,7 @@ def run(self, dry_run: bool = False) -> OrasMergeWorkflowResult: oras_bin=self.oras_bin, image_target=self.image_target, plain_http=self.plain_http, - ).run(source=create.temp_ref, dry_run=dry_run) + ).run(source=create.temp_ref, dry_run=dry_run, runner=runner) return OrasMergeWorkflowResult( success=copy.success, diff --git a/posit-bakery/posit_bakery/plugins/builtin/imagetools/publish.py b/posit-bakery/posit_bakery/plugins/builtin/imagetools/publish.py new file mode 100644 index 000000000..aaa5a0af5 --- /dev/null +++ b/posit-bakery/posit_bakery/plugins/builtin/imagetools/publish.py @@ -0,0 +1,160 @@ +"""Per-target publish pipeline: oras index-create -> soci convert -> oras index-copy -> +oras verify, executed through a CommandRunner so each registry command is tracked and +retried-with-backoff. One PublishWorkflow runs per target; the plugin fans targets out across +the parallel executor. +""" + +from __future__ import annotations + +import logging +from dataclasses import dataclass, field +from enum import Enum +from typing import TYPE_CHECKING + +# Referenced via their module objects (not direct names) so that monkeypatching the workflow +# classes and option lookup on these modules is honoured at call time. +from posit_bakery.plugins.builtin.imagetools import oras as oras_mod +from posit_bakery.plugins.builtin.imagetools import soci as soci_mod + +if TYPE_CHECKING: + from posit_bakery.image.image_target import ImageTarget + from posit_bakery.parallel import CommandRunner + +log = logging.getLogger(__name__) + + +class PublishPhase(str, Enum): + """A stage of the publish pipeline. Phase subsets back the standalone entrypoints.""" + + CREATE = "create" + SOCI = "soci" + COPY = "copy" + VERIFY = "verify" + + +ALL_PHASES = frozenset(PublishPhase) +MERGE_PHASES = frozenset({PublishPhase.CREATE, PublishPhase.COPY}) # bakery oras merge +SOCI_PHASES = frozenset({PublishPhase.SOCI}) # bakery soci convert + + +@dataclass +class PublishResult: + """Outcome of running the pipeline for one target.""" + + target: "ImageTarget" + success: bool = True + skipped: bool = False + skip_reason: str | None = None + temp_ref: str | None = None + destinations: list[str] = field(default_factory=list) + verified: list[str] = field(default_factory=list) + soci_destination_ref: str | None = None + soci_skipped: bool = False + error: str | None = None + failed_phase: str | None = None + + +class PublishWorkflow: + """Run the (subset of the) publish pipeline for a single image target through a runner. + + ``source_ref`` seeds the pipeline when CREATE is not part of ``phases`` (the soci-only + standalone path, where the ref comes from build metadata rather than an index-create). + """ + + def __init__( + self, + *, + image_target: "ImageTarget", + oras_bin: str, + soci_bin: str, + source_ref: str | None = None, + ) -> None: + self.image_target = image_target + self.oras_bin = oras_bin + self.soci_bin = soci_bin + self.source_ref = source_ref + + def run( + self, + runner: "CommandRunner", + *, + phases: frozenset[PublishPhase] = ALL_PHASES, + dry_run: bool = False, + ) -> PublishResult: + target = self.image_target + result = PublishResult(target=target) + temp_ref = self.source_ref + + if PublishPhase.CREATE in phases: + if not target.get_merge_sources(): + result.skipped = True + result.skip_reason = "no merge sources" + return result + if not target.settings.temp_registry: + result.success = False + result.failed_phase = "create" + result.error = f"temp_registry not configured for '{target}'" + return result + create = oras_mod.OrasIndexCreateWorkflow( + oras_bin=self.oras_bin, + image_target=target, + annotations=target.labels, + ).run(dry_run=dry_run, runner=runner) + if not create.success: + result.success = False + result.failed_phase = "create" + result.error = create.error + return result + temp_ref = create.temp_ref + result.temp_ref = temp_ref + + if PublishPhase.SOCI in phases: + options = soci_mod.get_soci_options_for_target(target) + if options.enabled and temp_ref: + soci = soci_mod.SociConvertWorkflow( + soci_bin=self.soci_bin, + oras_bin=self.oras_bin, + image_target=target, + options=options, + source_ref=temp_ref, + ).run(dry_run=dry_run, runner=runner) + if not soci.success: + result.success = False + result.failed_phase = "soci" + result.error = soci.error + return result + temp_ref = soci.destination_ref + result.soci_destination_ref = temp_ref + result.temp_ref = temp_ref + else: + result.soci_skipped = True + + if PublishPhase.COPY in phases: + if not temp_ref: + result.skipped = True + result.skip_reason = "no source ref to copy" + return result + copy = oras_mod.OrasIndexCopyWorkflow( + oras_bin=self.oras_bin, + image_target=target, + ).run(source=temp_ref, dry_run=dry_run, runner=runner) + result.destinations = copy.destinations + if not copy.success: + result.success = False + result.failed_phase = "copy" + result.error = copy.error + return result + + if PublishPhase.VERIFY in phases and not dry_run: + verify = oras_mod.OrasIndexVerifyWorkflow( + oras_bin=self.oras_bin, + image_target=target, + ).run(dry_run=dry_run, runner=runner) + result.verified = verify.verified + if not verify.success: + result.success = False + result.failed_phase = "verify" + result.error = verify.error + return result + + return result diff --git a/posit-bakery/posit_bakery/plugins/builtin/imagetools/retry.py b/posit-bakery/posit_bakery/plugins/builtin/imagetools/retry.py new file mode 100644 index 000000000..9a0d8b0d4 --- /dev/null +++ b/posit-bakery/posit_bakery/plugins/builtin/imagetools/retry.py @@ -0,0 +1,63 @@ +"""Retry policy for the registry-touching commands the publish pipeline runs. + +Issue #591: GHCR eventual-consistency makes digest-addressed manifests intermittently report +as "not found" / "manifest unknown" when a sibling runner references them seconds after they +were pushed. These are transient and self-heal on a re-run, so we retry them with backoff; +permanent errors (auth, bad references) fail fast. +""" + +from posit_bakery.parallel import CommandResult, RetryPolicy + +# Upper bound on a single registry command before it is treated as hung and killed. Generous +# because a SOCI conversion pulls and rewrites a full image; ordinary oras calls finish quickly. +DEFAULT_COMMAND_TIMEOUT = 900.0 + +# Substrings (lower-cased) that mark a failure as a transient registry/network blip worth a retry. +_TRANSIENT_MARKERS = ( + "not found", + "manifest unknown", + "blob unknown", + "500 internal server error", + "internal server error", + "502 bad gateway", + "bad gateway", + "503 service unavailable", + "service unavailable", + "504 gateway timeout", + "gateway timeout", + "too many requests", + "connection refused", + "connection reset", + "i/o timeout", + "tls handshake timeout", + "temporarily unavailable", + "unexpected eof", + "eof", +) + +# Substrings that mark a failure as permanent — never retried even if a transient marker also +# appears (auth/reference errors will not heal on their own). +_PERMANENT_MARKERS = ( + "401", + "unauthorized", + "403", + "forbidden", + "denied", + "invalid reference", +) + + +def is_transient_registry_error(result: CommandResult) -> bool: + """True when a failed command looks like a transient registry/network error worth retrying.""" + if result.ok: + return False + if result.timed_out: + return True + stderr = (result.stderr or b"").decode("utf-8", errors="replace").lower() + if any(marker in stderr for marker in _PERMANENT_MARKERS): + return False + return any(marker in stderr for marker in _TRANSIENT_MARKERS) + + +# Hardcoded sensible defaults: up to 5 attempts, exponential 2s -> 32s with full jitter. +DEFAULT_REGISTRY_RETRY = RetryPolicy(retry_on=is_transient_registry_error) diff --git a/posit-bakery/posit_bakery/plugins/builtin/soci/soci.py b/posit-bakery/posit_bakery/plugins/builtin/imagetools/soci.py similarity index 74% rename from posit-bakery/posit_bakery/plugins/builtin/soci/soci.py rename to posit-bakery/posit_bakery/plugins/builtin/imagetools/soci.py index a618efaf5..6fc08d792 100644 --- a/posit-bakery/posit_bakery/plugins/builtin/soci/soci.py +++ b/posit-bakery/posit_bakery/plugins/builtin/imagetools/soci.py @@ -7,16 +7,19 @@ import tempfile from abc import ABC, abstractmethod from pathlib import Path -from typing import Annotated, Literal +from typing import TYPE_CHECKING, Annotated, Literal from pydantic import BaseModel, ConfigDict, Field from posit_bakery.error import BakeryToolRuntimeError from posit_bakery.image.image_target import ImageTarget -from posit_bakery.plugins.builtin.oras.oras import OrasCopy -from posit_bakery.plugins.builtin.soci.options import SociOptions +from posit_bakery.plugins.builtin.imagetools.oras import OrasCopy +from posit_bakery.plugins.builtin.imagetools.options import SociOptions from posit_bakery.util import find_bin +if TYPE_CHECKING: + from posit_bakery.parallel import CommandRunner + log = logging.getLogger(__name__) @@ -42,10 +45,19 @@ def command(self) -> list[str]: """Return the full command to execute.""" ... - def run(self, dry_run: bool = False) -> subprocess.CompletedProcess: + def run( + self, + dry_run: bool = False, + *, + runner: "CommandRunner | None" = None, + step_label: str | None = None, + ) -> subprocess.CompletedProcess: """Execute the soci command. :param dry_run: If True, log the command without executing it. + :param runner: When given, execute through this CommandRunner (tracked-spawn, plus the + runner's retry/timeout policy) instead of calling subprocess directly. + :param step_label: Live-table step label forwarded to the runner. :return: The completed process result. :raises BakeryToolRuntimeError: On non-zero exit. """ @@ -56,6 +68,21 @@ def run(self, dry_run: bool = False) -> subprocess.CompletedProcess: log.info(f"[DRY RUN] Would execute: {' '.join(cmd)}") return subprocess.CompletedProcess(args=cmd, returncode=0, stdout=b"", stderr=b"") + if runner is not None: + result = runner.run(cmd, step_label=step_label) + if not result.ok: + raise BakeryToolRuntimeError( + message="soci command failed", + tool_name="soci", + cmd=cmd, + stdout=result.stdout, + stderr=result.stderr, + exit_code=result.returncode if result.returncode is not None else 1, + ) + return subprocess.CompletedProcess( + args=cmd, returncode=result.returncode or 0, stdout=result.stdout, stderr=result.stderr + ) + result = subprocess.run(cmd, capture_output=True) if result.returncode != 0: @@ -168,7 +195,12 @@ def _read_converted_digest(layout: Path) -> str: index = json.loads((layout / "index.json").read_text()) return index["manifests"][0]["digest"] - def run(self, dry_run: bool = False) -> SociConvertWorkflowResult: + def run( + self, + dry_run: bool = False, + *, + runner: "CommandRunner | None" = None, + ) -> SociConvertWorkflowResult: """Pull the source ref into an OCI layout, convert it to SOCI with ``soci convert --standalone``, and push the converted layout back to the registry. @@ -176,6 +208,9 @@ def run(self, dry_run: bool = False) -> SociConvertWorkflowResult: oras bridges the registry on both ends: ``--to-oci-layout`` to materialize the source as an OCI layout, ``--from-oci-layout`` to push the converted result. The scratch layouts are removed afterward. + + When a ``runner`` is supplied each step executes through it (tracked-spawn, plus the + runner's retry/timeout policy); otherwise the steps run via direct subprocess. """ log.info(f"Running SOCI conversion on {self.image_target.uid}") scratch = Path(tempfile.mkdtemp(prefix="soci-standalone-")) @@ -189,7 +224,7 @@ def run(self, dry_run: bool = False) -> SociConvertWorkflowResult: source=self.source_ref, destination=f"{src_layout}:image", to_oci_layout=True, - ).run(dry_run=dry_run) + ).run(dry_run=dry_run, runner=runner, step_label="soci pull") # 2. convert local layout -> local layout (directory so we can read # the resulting index.json for its digest). @@ -197,7 +232,7 @@ def run(self, dry_run: bool = False) -> SociConvertWorkflowResult: source=str(src_layout), destination=str(out_layout), output_format="oci-dir", - ).run(dry_run=dry_run) + ).run(dry_run=dry_run, runner=runner, step_label="soci convert") # 3. push converted layout -> registry, referenced by digest since # the converted layout carries no tag. @@ -207,7 +242,7 @@ def run(self, dry_run: bool = False) -> SociConvertWorkflowResult: source=f"{out_layout}@{digest}", destination=self.destination_ref, from_oci_layout=True, - ).run(dry_run=dry_run) + ).run(dry_run=dry_run, runner=runner, step_label="soci push") except BakeryToolRuntimeError as e: return SociConvertWorkflowResult( success=False, @@ -221,3 +256,27 @@ def run(self, dry_run: bool = False) -> SociConvertWorkflowResult: success=True, destination_ref=self.destination_ref, ) + + +def get_soci_options_for_target(target: ImageTarget) -> SociOptions: + """Resolve effective SociOptions for the given target, merging + variant-level options over image-version-parent-level options where + both exist. Returns a defaulted SociOptions (enabled=False) if no + soci configuration is present. + """ + # Local helper to keep the resolution logic in one place. + image_opts = None + variant_opts = None + parent = getattr(target.image_version, "parent", None) + for opt in getattr(parent, "options", []) or []: + if isinstance(opt, SociOptions): + image_opts = opt + break + variant = getattr(target, "image_variant", None) + for opt in getattr(variant, "options", []) or []: + if isinstance(opt, SociOptions): + variant_opts = opt + break + if variant_opts and image_opts: + return variant_opts.update(image_opts) + return variant_opts or image_opts or SociOptions() diff --git a/posit-bakery/posit_bakery/plugins/builtin/oras/__init__.py b/posit-bakery/posit_bakery/plugins/builtin/oras/__init__.py deleted file mode 100644 index 414338efe..000000000 --- a/posit-bakery/posit_bakery/plugins/builtin/oras/__init__.py +++ /dev/null @@ -1,169 +0,0 @@ -import logging -from pathlib import Path - -import typer - -from posit_bakery.image.image_target import ImageTarget -from posit_bakery.plugins.builtin.oras.oras import OrasMergeWorkflow, find_oras_bin -from posit_bakery.plugins.protocol import BakeryToolPlugin, ToolCallResult - -log = logging.getLogger(__name__) - - -class OrasPlugin(BakeryToolPlugin): - name: str = "oras" - description: str = "Merge multi-platform images using ORAS" - - def register_cli(self, app: typer.Typer) -> None: - """Register the oras CLI commands with the given Typer app.""" - import glob as glob_module - from typing import Annotated, Optional - - from posit_bakery.cli.common import with_verbosity_flags - from posit_bakery.config.config import BakeryConfig, BakerySettings - from posit_bakery.const import DevVersionInclusionEnum, MatrixVersionInclusionEnum - from posit_bakery.util import auto_path - - oras_app = typer.Typer(no_args_is_help=True) - plugin = self - - @oras_app.command() - @with_verbosity_flags - def merge( - metadata_file: Annotated[ - list[Path], typer.Argument(help="Path to input build metadata JSON file(s) to merge.") - ], - context: Annotated[ - Path, - typer.Option(help="The root path to use. Defaults to the current working directory where invoked."), - ] = auto_path(), - temp_registry: Annotated[ - Optional[str], - typer.Option( - help="Temporary registry to use for multiplatform split/merge builds.", - rich_help_panel="Build Configuration & Outputs", - ), - ] = None, - dry_run: Annotated[ - bool, typer.Option(help="If set, the merged images will not be pushed to the registry.") - ] = False, - ): - """Merge multi-platform images from build metadata files using ORAS. - - \b - Takes one or more build metadata JSON files (produced by `bakery build --strategy build`) - and merges platform-specific images into multi-platform manifest indexes. - """ - settings = BakerySettings( - dev_versions=DevVersionInclusionEnum.INCLUDE, - matrix_versions=MatrixVersionInclusionEnum.INCLUDE, - clean_temporary=False, - temp_registry=temp_registry, - ) - config: BakeryConfig = BakeryConfig.from_context(context, settings) - - # Resolve glob patterns in metadata_file arguments - resolved_files: list[Path] = [] - for file in metadata_file: - if "*" in str(file) or "?" in str(file) or "[" in str(file): - resolved_files.extend(sorted(Path(x).absolute() for x in glob_module.glob(str(file)))) - else: - resolved_files.append(file.absolute()) - metadata_file = resolved_files - - log.info(f"Reading targets from {', '.join(f.name for f in metadata_file)}") - - files_ok = True - loaded_targets: list[str] = [] - for file in metadata_file: - try: - loaded_targets.extend(config.load_build_metadata_from_file(file)) - except Exception as e: - log.error(f"Failed to load metadata from file '{file}'") - log.error(str(e)) - files_ok = False - loaded_targets = list(set(loaded_targets)) - - if not files_ok: - log.error("One or more metadata files are invalid, aborting merge.") - raise typer.Exit(code=1) - - log.info(f"Found {len(loaded_targets)} targets") - log.debug(", ".join(loaded_targets)) - - results = plugin.execute(config.base_path, config.targets, dry_run=dry_run) - plugin.results(results) - - app.add_typer(oras_app, name="oras", help="Merge multi-platform images using ORAS") - - def execute( - self, - base_path: Path, - targets: list[ImageTarget], - *, - dry_run: bool = False, - **kwargs, - ) -> list[ToolCallResult]: - """Execute ORAS merge workflow against the given image targets.""" - # Sort so latest pushes last; Docker Hub displays tags by push-time order. - targets = sorted(targets, key=lambda t: t.push_sort_key) - log.info("ORAS merge order: %s", ", ".join(str(t) for t in targets)) - results = [] - - for target in targets: - # Skip targets without merge sources - if not target.get_merge_sources(): - log.debug(f"Skipping target '{target}' — no merge sources.") - continue - - # Validate temp_registry - if not target.settings.temp_registry: - results.append( - ToolCallResult( - exit_code=1, - tool_name="oras", - target=target, - stdout="", - stderr=f"Cannot merge '{target}': temp_registry must be configured in settings.", - ) - ) - continue - - log.info(f"Merging sources for image UID '{target.uid}'") - workflow = OrasMergeWorkflow.from_image_target(target) - workflow_result = workflow.run(dry_run=dry_run) - - results.append( - ToolCallResult( - exit_code=0 if workflow_result.success else 1, - tool_name="oras", - target=target, - stdout="", - stderr=workflow_result.error or "", - artifacts={"workflow_result": workflow_result}, - ) - ) - - return results - - def results(self, results: list[ToolCallResult]) -> None: - """Display ORAS merge results and exit non-zero on failures.""" - from posit_bakery.log import stderr_console - - has_errors = False - for result in results: - workflow_result = result.artifacts.get("workflow_result") if result.artifacts else None - if result.exit_code != 0: - has_errors = True - stderr_console.print( - f"Error merging '{result.target}': {result.stderr}", - style="error", - ) - elif workflow_result: - log.info(f"Merged '{result.target}' -> {', '.join(workflow_result.destinations)}") - - if has_errors: - stderr_console.print("❌ ORAS merge(s) failed", style="error") - raise typer.Exit(code=1) - - stderr_console.print("✅ ORAS merge completed", style="success") diff --git a/posit-bakery/posit_bakery/plugins/builtin/soci/__init__.py b/posit-bakery/posit_bakery/plugins/builtin/soci/__init__.py deleted file mode 100644 index 78aa693d2..000000000 --- a/posit-bakery/posit_bakery/plugins/builtin/soci/__init__.py +++ /dev/null @@ -1,266 +0,0 @@ -"""SOCI plugin: convert built images into SOCI-enabled images.""" - -import logging -from pathlib import Path -from typing import Any - -import typer - -from posit_bakery.error import BakeryToolNotFoundError -from posit_bakery.image.image_target import ImageTarget -from posit_bakery.plugins.builtin.soci.options import SociOptions -from posit_bakery.plugins.builtin.oras.oras import find_oras_bin -from posit_bakery.plugins.builtin.soci.soci import ( - SociConvertWorkflow, - find_soci_bin, -) -from posit_bakery.plugins.protocol import BakeryToolPlugin, ToolCallResult - -log = logging.getLogger(__name__) - - -def get_soci_options_for_target(target: ImageTarget) -> SociOptions: - """Resolve effective SociOptions for the given target, merging - variant-level options over image-version-parent-level options where - both exist. Returns a defaulted SociOptions (enabled=False) if no - soci configuration is present. - """ - # Local helper to keep the resolution logic in one place. - image_opts = None - variant_opts = None - parent = getattr(target.image_version, "parent", None) - for opt in getattr(parent, "options", []) or []: - if isinstance(opt, SociOptions): - image_opts = opt - break - variant = getattr(target, "image_variant", None) - for opt in getattr(variant, "options", []) or []: - if isinstance(opt, SociOptions): - variant_opts = opt - break - if variant_opts and image_opts: - return variant_opts.update(image_opts) - return variant_opts or image_opts or SociOptions() - - -class SociPlugin(BakeryToolPlugin): - name: str = "soci" - description: str = "Convert images to SOCI-enabled images" - tool_options_class = SociOptions - - def register_cli(self, app: typer.Typer) -> None: - """Register the soci CLI commands.""" - import glob as glob_module - from typing import Annotated, Optional - - from posit_bakery.cli.common import with_verbosity_flags - from posit_bakery.config.config import BakeryConfig, BakerySettings - from posit_bakery.const import DevVersionInclusionEnum, MatrixVersionInclusionEnum - from posit_bakery.util import auto_path - - soci_app = typer.Typer(no_args_is_help=True) - plugin = self - - @soci_app.command() - @with_verbosity_flags - def convert( - metadata_file: Annotated[list[Path], typer.Argument(help="Path to input build metadata JSON file(s).")], - context: Annotated[ - Path, - typer.Option(help="The root path to use. Defaults to the current working directory."), - ] = auto_path(), - temp_registry: Annotated[ - Optional[str], - typer.Option(help="Temporary registry to use for split/merge builds."), - ] = None, - dry_run: Annotated[ - bool, - typer.Option(help="Log commands without executing them."), - ] = False, - ) -> None: - """Convert images referenced by build-metadata JSON files into SOCI-enabled images. - - \b - Conversion runs in standalone (no-containerd) mode via oras. Targets - without `tool: soci, enabled: true` in bakery.yaml are skipped. - """ - settings = BakerySettings( - dev_versions=DevVersionInclusionEnum.INCLUDE, - matrix_versions=MatrixVersionInclusionEnum.INCLUDE, - clean_temporary=False, - temp_registry=temp_registry, - ) - config: BakeryConfig = BakeryConfig.from_context(context, settings) - - resolved_files: list[Path] = [] - for f in metadata_file: - s = str(f) - if "*" in s or "?" in s or "[" in s: - resolved_files.extend(sorted(Path(x).absolute() for x in glob_module.glob(s))) - else: - resolved_files.append(f.absolute()) - metadata_file = resolved_files - - log.info(f"Reading targets from {', '.join(f.name for f in metadata_file)}") - files_ok = True - for f in metadata_file: - try: - config.load_build_metadata_from_file(f) - except Exception as e: - log.error(f"Failed to load metadata from file '{f}': {e}") - files_ok = False - if not files_ok: - raise typer.Exit(code=1) - - # Build source_refs from each target's most recent build metadata. - source_refs: dict[str, str] = {} - for t in config.targets: - if t.build_metadata: - latest = max(t.build_metadata, key=lambda m: m.created_at) - source_refs[t.uid] = latest.image_ref - - results = plugin.execute( - config.base_path, - config.targets, - source_refs=source_refs, - dry_run=dry_run, - ) - plugin.results(results) - - app.add_typer(soci_app, name="soci", help=self.description) - - def execute( - self, - base_path: Path, - targets: list[ImageTarget], - *, - source_refs: dict[str, str] | None = None, - dry_run: bool = False, - **kwargs: Any, - ) -> list[ToolCallResult]: - """Run SOCI convert workflows against eligible targets. - - ``source_refs`` maps ``target.uid`` -> the temp-registry ref to - convert (typically produced by the oras index-create phase). The refs - are registry refs; the OCI image layouts that ``soci convert - --standalone`` reads and writes are internal scratch that the workflow - materializes and pushes via oras. - - Conversion always runs in standalone (containerd-free, oras-based) - mode. - - Targets whose resolved SociOptions has ``enabled=False`` are - skipped with a ``skipped=True`` artifact entry. - """ - source_refs = source_refs or {} - - eligible: list[tuple[ImageTarget, SociOptions, str]] = [] - results: list[ToolCallResult] = [] - for target in targets: - opts = get_soci_options_for_target(target) - if not opts.enabled: - results.append( - ToolCallResult( - exit_code=0, - tool_name="soci", - target=target, - stdout="", - stderr="", - artifacts={"skipped": True, "reason": "soci.enabled is false"}, - ) - ) - continue - ref = source_refs.get(target.uid) - if not ref: - # SOCI is enabled for this target but it is not part of the - # current run — no source ref was produced for it (e.g. it has - # no merge sources / build metadata in the provided metadata, - # as happens for the other versions and dev streams when - # publishing a single set of files). There is nothing to - # convert, so skip it like a disabled target rather than - # reporting a spurious conversion failure. - results.append( - ToolCallResult( - exit_code=0, - tool_name="soci", - target=target, - stdout="", - stderr="", - artifacts={"skipped": True, "reason": "no source ref provided for this run"}, - ) - ) - continue - eligible.append((target, opts, ref)) - - if not eligible: - log.info( - "soci.execute: no targets have SOCI enabled (or no source refs " - "were provided for the enabled ones); skipping conversion." - ) - return results - - # Standalone conversion bridges the registry with oras and never - # touches containerd; both soci and oras must be present. - def resolve_bin(finder: Any, fallback: str) -> str: - # A tool only has to resolve when it will actually be executed: a - # dry run executes nothing, so fall back to the bare name purely - # for any logged command. When the tool resolves we keep its real - # path so output stays accurate. A tool a real run needs but cannot - # find is still a hard error. - try: - return finder(base_path) - except BakeryToolNotFoundError: - if dry_run: - return fallback - raise - - soci_bin = resolve_bin(find_soci_bin, "soci") - oras_bin = resolve_bin(find_oras_bin, "oras") - - for target, opts, ref in eligible: - workflow = SociConvertWorkflow( - soci_bin=soci_bin, - oras_bin=oras_bin, - image_target=target, - options=opts, - source_ref=ref, - ) - wf_result = workflow.run(dry_run=dry_run) - results.append( - ToolCallResult( - exit_code=0 if wf_result.success else 1, - tool_name="soci", - target=target, - stdout="", - stderr=wf_result.error or "", - artifacts={"workflow_result": wf_result}, - ) - ) - - return results - - def results(self, results: list[ToolCallResult]) -> None: - """Display SOCI conversion results and raise typer.Exit(1) on failure.""" - from posit_bakery.log import stderr_console - - has_errors = False - for r in results: - artifacts = r.artifacts or {} - if artifacts.get("skipped"): - log.info(f"SOCI skipped for {r.target}: {artifacts.get('reason')}") - continue - wf = artifacts.get("workflow_result") - if r.exit_code != 0: - has_errors = True - stderr_console.print( - f"SOCI convert failed for '{r.target}': {r.stderr}", - style="error", - ) - elif wf: - log.info(f"SOCI converted '{r.target}' -> {wf.destination_ref}") - - if has_errors: - stderr_console.print("❌ SOCI conversion(s) failed", style="error") - raise typer.Exit(code=1) - - stderr_console.print("✅ SOCI conversion(s) completed", style="success") diff --git a/posit-bakery/posit_bakery/plugins/registry.py b/posit-bakery/posit_bakery/plugins/registry.py index c67bd1908..20888c700 100644 --- a/posit-bakery/posit_bakery/plugins/registry.py +++ b/posit-bakery/posit_bakery/plugins/registry.py @@ -51,7 +51,12 @@ def _register_plugin_tool_options() -> None: for plugin in _plugins.values(): tool_options_class = getattr(plugin, "tool_options_class", None) if tool_options_class is not None: - register_tool_options(plugin.name, tool_options_class) + # Key by the options class's own `tool` discriminator rather than the plugin name: + # a single plugin may own a tool whose name differs from it (e.g. imagetools owns + # the `soci` tool options). Falls back to the plugin name if the field is absent. + tool_field = tool_options_class.model_fields.get("tool") + tool_name = getattr(tool_field, "default", None) or plugin.name + register_tool_options(tool_name, tool_options_class) registered_any = True if registered_any: diff --git a/posit-bakery/pyproject.toml b/posit-bakery/pyproject.toml index 23a5e160d..016ba4fdb 100644 --- a/posit-bakery/pyproject.toml +++ b/posit-bakery/pyproject.toml @@ -57,10 +57,9 @@ bakery = "posit_bakery.cli.main:app" [project.entry-points."bakery.plugins"] dgoss = "posit_bakery.plugins.builtin.dgoss:DGossPlugin" -oras = "posit_bakery.plugins.builtin.oras:OrasPlugin" hadolint = "posit_bakery.plugins.builtin.hadolint:HadolintPlugin" wizcli = "posit_bakery.plugins.builtin.wizcli:WizCLIPlugin" -soci = "posit_bakery.plugins.builtin.soci:SociPlugin" +imagetools = "posit_bakery.plugins.builtin.imagetools:ImageToolsPlugin" [build-system] requires = ["hatchling", "uv-dynamic-versioning"] diff --git a/posit-bakery/test/cli/test_ci.py b/posit-bakery/test/cli/test_ci.py index af21aa04f..2f3321113 100644 --- a/posit-bakery/test/cli/test_ci.py +++ b/posit-bakery/test/cli/test_ci.py @@ -50,7 +50,7 @@ def __init__(self, oras_bin, image_target, annotations, plain_http=False): self.annotations = annotations self.plain_http = plain_http - def run(self, dry_run=False): + def run(self, dry_run=False, **kwargs): sources = self.image_target.get_merge_sources() calls.append((sources, dry_run)) result = MagicMock() @@ -63,9 +63,10 @@ def __init__(self, oras_bin, image_target): self.image_target = image_target self.oras_bin = oras_bin - def run(self, source, dry_run=False): + def run(self, source, dry_run=False, **kwargs): result = MagicMock() result.success = True + result.destinations = self.image_target.tags.as_strings() return result class MockOrasIndexVerifyWorkflow: @@ -73,7 +74,7 @@ def __init__(self, oras_bin, image_target): self.image_target = image_target self.oras_bin = oras_bin - def run(self, dry_run=False): + def run(self, dry_run=False, **kwargs): result = MagicMock() result.success = True result.verified = self.image_target.tags.as_strings() @@ -81,19 +82,19 @@ def run(self, dry_run=False): # Patch the imports inside the publish function mocker.patch( - "posit_bakery.plugins.builtin.oras.oras.OrasIndexCreateWorkflow", + "posit_bakery.plugins.builtin.imagetools.oras.OrasIndexCreateWorkflow", MockOrasIndexCreateWorkflow, ) mocker.patch( - "posit_bakery.plugins.builtin.oras.oras.OrasIndexCopyWorkflow", + "posit_bakery.plugins.builtin.imagetools.oras.OrasIndexCopyWorkflow", MockOrasIndexCopyWorkflow, ) mocker.patch( - "posit_bakery.plugins.builtin.oras.oras.OrasIndexVerifyWorkflow", + "posit_bakery.plugins.builtin.imagetools.oras.OrasIndexVerifyWorkflow", MockOrasIndexVerifyWorkflow, ) mocker.patch( - "posit_bakery.plugins.builtin.oras.oras.find_oras_bin", + "posit_bakery.plugins.builtin.imagetools.oras.find_oras_bin", return_value="/mock/oras", ) return calls diff --git a/posit-bakery/test/cli/test_ci_publish.py b/posit-bakery/test/cli/test_ci_publish.py index 9bfb9d47d..fd6249d0b 100644 --- a/posit-bakery/test/cli/test_ci_publish.py +++ b/posit-bakery/test/cli/test_ci_publish.py @@ -61,7 +61,7 @@ def fake_execute(base_path, targets, *, source_refs=None, dry_run=False, **kwarg runner = CliRunner() with ( patch("posit_bakery.cli.ci.BakeryConfig.from_context", return_value=fake_config), - patch("posit_bakery.plugins.builtin.oras.oras.find_oras_bin", return_value="oras"), + patch("posit_bakery.plugins.builtin.imagetools.oras.find_oras_bin", return_value="oras"), patch("posit_bakery.plugins.registry.get_plugin", return_value=fake_soci), ): result = runner.invoke( diff --git a/posit-bakery/test/parallel/test_jobs.py b/posit-bakery/test/parallel/test_jobs.py new file mode 100644 index 000000000..07365bb04 --- /dev/null +++ b/posit-bakery/test/parallel/test_jobs.py @@ -0,0 +1,227 @@ +import io +import os +import signal +import sys +import threading +import time + +import pytest +from rich.console import Console + +from posit_bakery.parallel import ( + CommandResult, + CommandRunner, + JobResult, + ParallelShellExecutor, + RetryPolicy, + ShellJob, +) + +PY = sys.executable + +pytestmark = [pytest.mark.unit] + + +# A command that fails (exit 1, "transient" stderr) for the first N invocations tracked in a +# counter file, then succeeds (exit 0, stdout "ok"). Lets us drive retry with real subprocesses. +_FLAKY_SRC = ( + "import sys\n" + "p, fail_until = sys.argv[1], int(sys.argv[2])\n" + "try:\n" + " n = int(open(p).read())\n" + "except Exception:\n" + " n = 0\n" + "open(p, 'w').write(str(n + 1))\n" + "if n < fail_until:\n" + " sys.stderr.write('error: manifest not found')\n" + " sys.exit(1)\n" + "sys.stdout.write('ok')\n" +) + + +def _flaky_cmd(counter_path, fail_until): + return [PY, "-c", _FLAKY_SRC, str(counter_path), str(fail_until)] + + +def _transient(result): + return b"not found" in result.stderr.lower() + + +class TestCommandResult: + def test_ok_true_on_zero_exit_no_exception(self): + assert CommandResult(cmd=["true"], returncode=0, stdout=b"", stderr=b"", duration=0.0).ok is True + + def test_ok_false_on_nonzero_exit(self): + assert CommandResult(cmd=["false"], returncode=1, stdout=b"", stderr=b"", duration=0.0).ok is False + + def test_ok_false_on_exception(self): + result = CommandResult( + cmd=["nope"], returncode=None, stdout=b"", stderr=b"", duration=0.0, exception=FileNotFoundError() + ) + assert result.ok is False + + +class TestCommandRunner: + def _executor(self): + return ParallelShellExecutor(max_workers=1, console=Console(file=io.StringIO()), use_live=False) + + def _runner(self): + return CommandRunner(self._executor(), key="job", label="job") + + def test_runs_command_and_captures_output(self): + result = self._runner().run([PY, "-c", "import sys; sys.stdout.write('hi')"]) + assert result.ok is True + assert result.stdout == b"hi" + + def test_no_retry_returns_first_failure(self, tmp_path, monkeypatch): + sleeps = [] + monkeypatch.setattr("posit_bakery.parallel.executor.time.sleep", lambda s: sleeps.append(s)) + counter = tmp_path / "c" + result = self._runner().run(_flaky_cmd(counter, fail_until=5)) + assert result.ok is False + assert counter.read_text() == "1" # ran exactly once, no retries + assert sleeps == [] + + def test_retries_transient_failure_then_succeeds(self, tmp_path, monkeypatch): + sleeps = [] + monkeypatch.setattr("posit_bakery.parallel.executor.time.sleep", lambda s: sleeps.append(s)) + counter = tmp_path / "c" + policy = RetryPolicy(max_attempts=5, jitter=False, retry_on=_transient) + result = self._runner().run(_flaky_cmd(counter, fail_until=2), retry=policy) + assert result.ok is True + assert result.stdout == b"ok" + assert counter.read_text() == "3" # failed twice, succeeded on the third attempt + assert sleeps == [2.0, 4.0] # backoff between the two retries + + def test_exhausts_retries_and_returns_last_failure(self, tmp_path, monkeypatch): + monkeypatch.setattr("posit_bakery.parallel.executor.time.sleep", lambda s: None) + counter = tmp_path / "c" + policy = RetryPolicy(max_attempts=3, jitter=False, retry_on=_transient) + result = self._runner().run(_flaky_cmd(counter, fail_until=99), retry=policy) + assert result.ok is False + assert counter.read_text() == "3" # tried exactly max_attempts times + + def test_does_not_retry_non_transient_failure(self, tmp_path, monkeypatch): + monkeypatch.setattr("posit_bakery.parallel.executor.time.sleep", lambda s: None) + counter = tmp_path / "c" + # Predicate only retries "not found"; this command emits a different error. + policy = RetryPolicy(max_attempts=5, jitter=False, retry_on=lambda r: b"never-matches" in r.stderr) + result = self._runner().run(_flaky_cmd(counter, fail_until=99), retry=policy) + assert result.ok is False + assert counter.read_text() == "1" + + +class TestShellJobAndResult: + def test_job_display_label_defaults_to_key(self): + job = ShellJob(key="k", run=lambda runner: None) + assert job.display_label == "k" + + def test_job_result_ok_false_with_exception(self): + job = ShellJob(key="k", run=lambda runner: None) + assert JobResult(job=job, exception=RuntimeError()).ok is False + + def test_job_result_ok_true_without_exception(self): + job = ShellJob(key="k", run=lambda runner: None) + assert JobResult(job=job, value=42).ok is True + + +class TestRunJobs: + def _executor(self, max_workers, use_live=False): + return ParallelShellExecutor(max_workers=max_workers, console=Console(file=io.StringIO()), use_live=use_live) + + def test_empty_jobs_returns_empty(self): + assert self._executor(2).run_jobs([]) == [] + + def test_results_returned_in_input_order(self): + durations = {"a": 0.30, "b": 0.05, "c": 0.15} + jobs = [ + ShellJob(key=k, run=lambda runner, d=d: runner.run([PY, "-c", f"import time; time.sleep({d})"])) + for k, d in durations.items() + ] + results = self._executor(3).run_jobs(jobs) + assert [r.job.key for r in results] == ["a", "b", "c"] + + def test_callable_return_value_captured(self): + def job_run(runner): + r1 = runner.run([PY, "-c", "import sys; sys.stdout.write('a')"]) + r2 = runner.run([PY, "-c", "import sys; sys.stdout.write('b')"]) + return (r1.stdout, r2.stdout) + + jobs = [ShellJob(key="seq", run=job_run)] + results = self._executor(1).run_jobs(jobs) + assert results[0].ok is True + assert results[0].value == (b"a", b"b") + + def test_exception_in_callable_captured_not_raised(self): + def boom(runner): + raise RuntimeError("kaboom") + + jobs = [ + ShellJob(key="ok", run=lambda runner: runner.run([PY, "-c", "pass"])), + ShellJob(key="bad", run=boom), + ] + results = self._executor(2).run_jobs(jobs) + by_key = {r.job.key: r for r in results} + assert by_key["ok"].ok is True + assert by_key["bad"].ok is False + assert isinstance(by_key["bad"].exception, RuntimeError) + + def test_on_result_called_once_per_job_on_main_thread(self): + jobs = [ShellJob(key=str(i), run=lambda runner: runner.run([PY, "-c", "pass"])) for i in range(5)] + seen_keys = [] + seen_threads = set() + + def on_result(result): + seen_keys.append(result.job.key) + seen_threads.add(threading.get_ident()) + + self._executor(3).run_jobs(jobs, on_result=on_result) + assert sorted(seen_keys) == sorted(j.key for j in jobs) + assert seen_threads == {threading.main_thread().ident} + + def test_job_retry_default_applied_to_commands(self, tmp_path, monkeypatch): + # A job declares a retry policy; commands it runs without an explicit retry inherit it. + monkeypatch.setattr("posit_bakery.parallel.executor.time.sleep", lambda s: None) + counter = tmp_path / "c" + policy = RetryPolicy(max_attempts=5, jitter=False, retry_on=lambda r: b"not found" in r.stderr.lower()) + + def job_run(runner): + return runner.run(_flaky_cmd(counter, fail_until=1)) # no explicit retry -> job default + + jobs = [ShellJob(key="j", run=job_run, retry=policy)] + results = self._executor(1).run_jobs(jobs) + assert results[0].ok is True + assert results[0].value.ok is True + assert counter.read_text() == "2" # failed once, retried, succeeded + + def test_sigint_terminates_running_job_children(self, tmp_path): + # A job runs a 3s sleep via the runner; SIGINT fires 0.3s in. Expect KeyboardInterrupt + # and prompt return (the in-flight child is terminated, not waited out). + script = "import sys,time;open(sys.argv[1],'w').close();time.sleep(3);open(sys.argv[2],'w').close()" + jobs = [ + ShellJob( + key=str(i), + run=lambda runner, i=i: runner.run( + [PY, "-c", script, str(tmp_path / f"s{i}"), str(tmp_path / f"e{i}")] + ), + ) + for i in range(4) + ] + + def fire(): + time.sleep(0.3) + os.kill(os.getpid(), signal.SIGINT) + + threading.Thread(target=fire, daemon=True).start() + ex = self._executor(2) + start = time.monotonic() + interrupted = False + try: + ex.run_jobs(jobs) + except KeyboardInterrupt: + interrupted = True + elapsed = time.monotonic() - start + + assert interrupted is True + assert not list(tmp_path.glob("e*")) # no job ran to completion + assert elapsed < 3 diff --git a/posit-bakery/test/parallel/test_retry.py b/posit-bakery/test/parallel/test_retry.py new file mode 100644 index 000000000..d331ee90f --- /dev/null +++ b/posit-bakery/test/parallel/test_retry.py @@ -0,0 +1,72 @@ +from types import SimpleNamespace + +import pytest + +from posit_bakery.parallel import RetryPolicy + +pytestmark = [pytest.mark.unit] + + +def _result(ok: bool): + """A minimal CommandResult stand-in for policy tests (only `.ok` is read).""" + return SimpleNamespace(ok=ok) + + +class TestRetryPolicyDelayFor: + def test_exponential_growth_without_jitter(self): + policy = RetryPolicy(base_delay=2.0, multiplier=2.0, max_delay=32.0, jitter=False) + assert policy.delay_for(1) == 2.0 + assert policy.delay_for(2) == 4.0 + assert policy.delay_for(3) == 8.0 + assert policy.delay_for(4) == 16.0 + assert policy.delay_for(5) == 32.0 + + def test_capped_at_max_delay(self): + policy = RetryPolicy(base_delay=2.0, multiplier=2.0, max_delay=32.0, jitter=False) + assert policy.delay_for(6) == 32.0 + assert policy.delay_for(10) == 32.0 + + def test_jitter_stays_within_zero_and_cap(self): + policy = RetryPolicy(base_delay=2.0, multiplier=2.0, max_delay=32.0, jitter=True) + for attempt in range(1, 8): + uncapped = min(2.0 * 2.0 ** (attempt - 1), 32.0) + for _ in range(50): + delay = policy.delay_for(attempt) + assert 0.0 <= delay <= uncapped + + +class TestRetryPolicyShouldRetry: + def _policy(self, **kw): + kw.setdefault("retry_on", lambda r: True) + return RetryPolicy(max_attempts=5, **kw) + + def test_retries_transient_failure_below_attempt_cap(self): + policy = self._policy(retry_on=lambda r: True) + assert policy.should_retry(_result(ok=False), attempt=1) is True + + def test_no_retry_at_attempt_cap(self): + policy = self._policy(retry_on=lambda r: True) + assert policy.should_retry(_result(ok=False), attempt=5) is False + + def test_no_retry_when_result_ok(self): + policy = self._policy(retry_on=lambda r: True) + assert policy.should_retry(_result(ok=True), attempt=1) is False + + def test_no_retry_when_predicate_rejects(self): + policy = self._policy(retry_on=lambda r: False) + assert policy.should_retry(_result(ok=False), attempt=1) is False + + def test_no_retry_when_no_predicate(self): + policy = RetryPolicy(max_attempts=5, retry_on=None) + assert policy.should_retry(_result(ok=False), attempt=1) is False + + +class TestRetryPolicyDefaults: + def test_default_field_values(self): + policy = RetryPolicy() + assert policy.max_attempts == 5 + assert policy.base_delay == 2.0 + assert policy.max_delay == 32.0 + assert policy.multiplier == 2.0 + assert policy.jitter is True + assert policy.retry_on is None diff --git a/posit-bakery/test/plugins/builtin/oras/__init__.py b/posit-bakery/test/plugins/builtin/imagetools/__init__.py similarity index 100% rename from posit-bakery/test/plugins/builtin/oras/__init__.py rename to posit-bakery/test/plugins/builtin/imagetools/__init__.py diff --git a/posit-bakery/test/plugins/builtin/imagetools/test_cli_registration.py b/posit-bakery/test/plugins/builtin/imagetools/test_cli_registration.py new file mode 100644 index 000000000..b970b1ad8 --- /dev/null +++ b/posit-bakery/test/plugins/builtin/imagetools/test_cli_registration.py @@ -0,0 +1,71 @@ +"""The consolidated imagetools plugin registers the publish / oras merge / soci convert command +groups, and the merge-phase path runs end-to-end in dry-run.""" + +from pathlib import Path +from unittest.mock import MagicMock + +import pytest +import typer + +from posit_bakery.image.image_target import ImageTarget, ImageTargetContext, ImageTargetSettings, StringableList +from posit_bakery.plugins.builtin.imagetools import ImageToolsPlugin +from posit_bakery.plugins.builtin.imagetools.publish import MERGE_PHASES +from posit_bakery.plugins.protocol import BakeryToolPlugin + +pytestmark = [pytest.mark.unit] + + +@pytest.fixture +def plugin(): + return ImageToolsPlugin() + + +@pytest.fixture +def mock_target_with_sources(): + mock_target = MagicMock(spec=ImageTarget) + mock_target.image_name = "test-image" + mock_target.uid = "test-image-1-0-0" + mock_target.temp_registry = "ghcr.io/posit-dev" + mock_target.context = MagicMock(spec=ImageTargetContext) + mock_target.context.base_path = Path("/project") + mock_target.settings = MagicMock(spec=ImageTargetSettings) + mock_target.settings.temp_registry = "ghcr.io/posit-dev" + mock_target.get_merge_sources.return_value = [ + "ghcr.io/posit-dev/test/tmp@sha256:amd64digest", + "ghcr.io/posit-dev/test/tmp@sha256:arm64digest", + ] + mock_target.labels = {"org.opencontainers.image.title": "Test Image"} + mock_target.push_sort_key = (0,) + + mock_tag = MagicMock() + mock_tag.destination = "ghcr.io/posit-dev/test-image" + mock_tag.suffix = "1.0.0" + mock_tag.__str__ = lambda self: "ghcr.io/posit-dev/test-image:1.0.0" + mock_target.tags = StringableList([mock_tag]) + return mock_target + + +class TestProtocol: + def test_implements_protocol(self, plugin): + assert isinstance(plugin, BakeryToolPlugin) + + def test_name(self, plugin): + assert plugin.name == "imagetools" + + +class TestRegisterCli: + def test_registers_publish_oras_and_soci_groups(self, plugin): + app = typer.Typer() + plugin.register_cli(app) + group_names = {g.name for g in app.registered_groups} + assert {"imagetools", "oras", "soci"} <= group_names + + +class TestMergePathDryRun: + def test_dry_run_merges_end_to_end(self, plugin, mock_target_with_sources): + # Through the real pipeline + executor; dry-run short-circuits each command before any + # subprocess, so no tools are required and only the merge phases run. + results = plugin.execute(Path("/project"), [mock_target_with_sources], phases=MERGE_PHASES, dry_run=True) + assert len(results) == 1 + assert results[0].exit_code == 0 + assert results[0].target is mock_target_with_sources diff --git a/posit-bakery/test/plugins/builtin/oras/test_oras.py b/posit-bakery/test/plugins/builtin/imagetools/test_oras.py similarity index 99% rename from posit-bakery/test/plugins/builtin/oras/test_oras.py rename to posit-bakery/test/plugins/builtin/imagetools/test_oras.py index 6cd753cf3..31473d841 100644 --- a/posit-bakery/test/plugins/builtin/oras/test_oras.py +++ b/posit-bakery/test/plugins/builtin/imagetools/test_oras.py @@ -9,7 +9,7 @@ from posit_bakery.error import BakeryToolRuntimeError from posit_bakery.image.image_target import StringableList, ImageTarget, ImageTargetContext, ImageTargetSettings -from posit_bakery.plugins.builtin.oras.oras import ( +from posit_bakery.plugins.builtin.imagetools.oras import ( find_oras_bin, get_repository_from_ref, OrasCopy, @@ -398,7 +398,7 @@ def mock_image_target(self): def test_from_image_target(self, mock_image_target): """Test creating workflow from ImageTarget.""" - with patch("posit_bakery.plugins.builtin.oras.oras.find_oras_bin", return_value="oras"): + with patch("posit_bakery.plugins.builtin.imagetools.oras.find_oras_bin", return_value="oras"): workflow = OrasMergeWorkflow.from_image_target(mock_image_target) assert workflow.oras_bin == "oras" @@ -784,7 +784,7 @@ def test_from_image_target_with_plain_http(self, mock_image_target_for_local_reg "localhost:5000/test/tmp@sha256:digest", ] - with patch("posit_bakery.plugins.builtin.oras.oras.find_oras_bin", return_value="oras"): + with patch("posit_bakery.plugins.builtin.imagetools.oras.find_oras_bin", return_value="oras"): workflow = OrasMergeWorkflow.from_image_target(mock_target, plain_http=True) assert workflow.plain_http is True diff --git a/posit-bakery/test/plugins/builtin/imagetools/test_oras_runner.py b/posit-bakery/test/plugins/builtin/imagetools/test_oras_runner.py new file mode 100644 index 000000000..1bc591a59 --- /dev/null +++ b/posit-bakery/test/plugins/builtin/imagetools/test_oras_runner.py @@ -0,0 +1,129 @@ +"""The ORAS command/workflow runner seam: when a CommandRunner is supplied, commands +execute through it (gaining tracked-spawn + retry) instead of calling subprocess directly.""" + +from unittest.mock import MagicMock + +import pytest + +from posit_bakery.error import BakeryToolRuntimeError +from posit_bakery.image.image_target import ImageTarget, StringableList +from posit_bakery.parallel import CommandResult +from posit_bakery.plugins.builtin.imagetools.oras import ( + OrasIndexCopyWorkflow, + OrasIndexCreateWorkflow, + OrasIndexVerifyWorkflow, + OrasManifestIndexCreate, +) + +pytestmark = [pytest.mark.unit] + + +class StubRunner: + """Records commands and returns preset CommandResults (a CommandRunner seam stand-in).""" + + def __init__(self, results): + self.calls = [] + self._results = list(results) + + def run(self, cmd, **kwargs): + self.calls.append((cmd, kwargs)) + return self._results.pop(0) + + +def _ok(cmd): + return CommandResult(cmd=cmd, returncode=0, stdout=b"", stderr=b"", duration=0.0) + + +def _fail(cmd, stderr=b"not found"): + return CommandResult(cmd=cmd, returncode=1, stdout=b"", stderr=stderr, duration=0.0) + + +@pytest.fixture +def mock_image_target_factory(): + def _make(): + t = MagicMock(spec=ImageTarget) + t.image_name = "test-image" + t.uid = "test-image-1-0-0" + t.temp_registry = "ghcr.io/posit-dev" + t.get_merge_sources.return_value = [ + "ghcr.io/posit-dev/test/tmp@sha256:amd64digest", + "ghcr.io/posit-dev/test/tmp@sha256:arm64digest", + ] + t.labels = {"org.opencontainers.image.title": "Test Image"} + tag1 = MagicMock() + tag1.destination = "ghcr.io/posit-dev/test-image" + tag1.suffix = "1.0.0" + tag1.__str__ = lambda self: "ghcr.io/posit-dev/test-image:1.0.0" + t.tags = StringableList([tag1]) + return t + + return _make + + +class TestCommandRunnerSeam: + def test_run_routes_built_command_through_runner(self): + cmd = OrasManifestIndexCreate( + oras_bin="oras", + sources=["ghcr.io/posit-dev/test/tmp@sha256:digest"], + destination="ghcr.io/posit-dev/test/tmp:tag", + ) + runner = StubRunner([_ok(cmd.command)]) + result = cmd.run(runner=runner) + assert runner.calls[0][0] == cmd.command + assert result.returncode == 0 + + def test_run_raises_tool_error_on_runner_failure(self): + cmd = OrasManifestIndexCreate( + oras_bin="oras", + sources=["ghcr.io/posit-dev/test/tmp@sha256:digest"], + destination="ghcr.io/posit-dev/test/tmp:tag", + ) + runner = StubRunner([_fail(cmd.command, stderr=b"manifest unknown")]) + with pytest.raises(BakeryToolRuntimeError) as exc: + cmd.run(runner=runner) + assert exc.value.tool_name == "oras" + assert exc.value.exit_code == 1 + + def test_run_forwards_step_label_to_runner(self): + cmd = OrasManifestIndexCreate( + oras_bin="oras", + sources=["ghcr.io/posit-dev/test/tmp@sha256:digest"], + destination="ghcr.io/posit-dev/test/tmp:tag", + ) + runner = StubRunner([_ok(cmd.command)]) + cmd.run(runner=runner, step_label="index create") + assert runner.calls[0][1].get("step_label") == "index create" + + +class TestWorkflowsThreadRunner: + def test_index_create_workflow_uses_runner(self, mock_image_target_factory): + target = mock_image_target_factory() + wf = OrasIndexCreateWorkflow(oras_bin="oras", image_target=target, annotations={"k": "v"}) + runner = StubRunner([_ok(["oras", "manifest", "index", "create"])]) + result = wf.run(runner=runner) + assert result.success is True + assert runner.calls[0][0][:4] == ["oras", "manifest", "index", "create"] + + def test_index_copy_workflow_uses_runner(self, mock_image_target_factory): + target = mock_image_target_factory() + wf = OrasIndexCopyWorkflow(oras_bin="oras", image_target=target) + runner = StubRunner([_ok(["oras", "cp"])]) + result = wf.run(source="ghcr.io/posit-dev/test-image/tmp:src", runner=runner) + assert result.success is True + assert runner.calls[0][0][:2] == ["oras", "cp"] + + def test_index_verify_workflow_uses_runner(self, mock_image_target_factory): + target = mock_image_target_factory() + wf = OrasIndexVerifyWorkflow(oras_bin="oras", image_target=target) + runner = StubRunner([_ok(["oras", "manifest", "fetch"])]) + result = wf.run(runner=runner) + assert result.success is True + assert runner.calls[0][0][:3] == ["oras", "manifest", "fetch"] + + def test_index_create_workflow_failure_via_runner(self, mock_image_target_factory): + target = mock_image_target_factory() + wf = OrasIndexCreateWorkflow(oras_bin="oras", image_target=target) + runner = StubRunner([_fail(["oras", "manifest", "index", "create"])]) + result = wf.run(runner=runner) + assert result.success is False + assert result.error is not None diff --git a/posit-bakery/test/plugins/builtin/imagetools/test_plugin.py b/posit-bakery/test/plugins/builtin/imagetools/test_plugin.py new file mode 100644 index 000000000..38ac154e3 --- /dev/null +++ b/posit-bakery/test/plugins/builtin/imagetools/test_plugin.py @@ -0,0 +1,93 @@ +"""ImageToolsPlugin.execute(): fan targets out across the parallel executor and map each +PublishResult onto a ToolCallResult; results() summarizes and exits non-zero on failure.""" + +from unittest.mock import MagicMock, patch + +import pytest +import typer + +from posit_bakery.image.image_target import ImageTarget +from posit_bakery.plugins.builtin.imagetools import ImageToolsPlugin +from posit_bakery.plugins.builtin.imagetools.publish import PublishResult, PublishWorkflow +from posit_bakery.plugins.builtin.imagetools.options import SociOptions + +pytestmark = [pytest.mark.unit] + + +def _make_target(uid, push_sort_key=0): + t = MagicMock(spec=ImageTarget) + t.uid = uid + t.image_name = "test-image" + t.push_sort_key = push_sort_key + t.__str__ = lambda self: f"ImageTarget({uid})" + return t + + +def _patches(): + """Patch bin resolution and soci-option lookup so execute() runs without real tools.""" + return ( + patch("posit_bakery.plugins.builtin.imagetools.find_oras_bin", return_value="oras"), + patch("posit_bakery.plugins.builtin.imagetools.find_soci_bin", return_value="soci"), + patch( + "posit_bakery.plugins.builtin.imagetools.soci.get_soci_options_for_target", + return_value=SociOptions(enabled=False), + ), + ) + + +def _run_with(fake_run, targets, tmp_path, **kwargs): + plugin = ImageToolsPlugin() + p_oras, p_soci, p_opts = _patches() + with p_oras, p_soci, p_opts, patch.object(PublishWorkflow, "run", fake_run): + return plugin, plugin.execute(tmp_path, targets, dry_run=True, **kwargs) + + +class TestExecute: + def test_one_result_per_target_ordered_by_push_sort_key(self, tmp_path): + def fake_run(self, runner, *, phases, dry_run): + return PublishResult(target=self.image_target, success=True, verified=["t"]) + + _, results = _run_with(fake_run, [_make_target("b", 2), _make_target("a", 1)], tmp_path) + assert [r.target.uid for r in results] == ["a", "b"] + assert all(r.exit_code == 0 for r in results) + + def test_skipped_target_maps_to_skipped_artifact(self, tmp_path): + def fake_run(self, runner, *, phases, dry_run): + return PublishResult(target=self.image_target, skipped=True, skip_reason="no merge sources") + + _, results = _run_with(fake_run, [_make_target("a")], tmp_path) + assert results[0].exit_code == 0 + assert results[0].artifacts.get("skipped") is True + + def test_failed_target_maps_to_nonzero_exit(self, tmp_path): + def fake_run(self, runner, *, phases, dry_run): + return PublishResult(target=self.image_target, success=False, error="copy boom", failed_phase="copy") + + _, results = _run_with(fake_run, [_make_target("a")], tmp_path) + assert results[0].exit_code == 1 + assert "copy boom" in results[0].stderr + + def test_exception_in_job_maps_to_nonzero_exit(self, tmp_path): + def fake_run(self, runner, *, phases, dry_run): + raise RuntimeError("kaboom") + + _, results = _run_with(fake_run, [_make_target("a")], tmp_path) + assert results[0].exit_code == 1 + + +class TestResults: + def test_results_exits_nonzero_when_any_failed(self, tmp_path): + def fake_run(self, runner, *, phases, dry_run): + return PublishResult(target=self.image_target, success=False, error="boom", failed_phase="create") + + plugin, results = _run_with(fake_run, [_make_target("a")], tmp_path) + with pytest.raises(typer.Exit) as exc: + plugin.results(results) + assert exc.value.exit_code == 1 + + def test_results_succeeds_when_all_ok(self, tmp_path): + def fake_run(self, runner, *, phases, dry_run): + return PublishResult(target=self.image_target, success=True) + + plugin, results = _run_with(fake_run, [_make_target("a")], tmp_path) + plugin.results(results) # no raise diff --git a/posit-bakery/test/plugins/builtin/imagetools/test_publish.py b/posit-bakery/test/plugins/builtin/imagetools/test_publish.py new file mode 100644 index 000000000..173af756f --- /dev/null +++ b/posit-bakery/test/plugins/builtin/imagetools/test_publish.py @@ -0,0 +1,215 @@ +"""PublishWorkflow: per-target create -> soci -> copy -> verify sequencing, driven through a +CommandRunner. A StubRunner stands in for the executor-bound runner and records the command +sequence each phase emits.""" + +from unittest.mock import MagicMock, patch + +import pytest + +from posit_bakery.image.image_target import ImageTarget, StringableList +from posit_bakery.parallel import CommandResult +from posit_bakery.plugins.builtin.imagetools.publish import ( + ALL_PHASES, + MERGE_PHASES, + SOCI_PHASES, + PublishPhase, + PublishWorkflow, +) +from posit_bakery.plugins.builtin.imagetools.options import SociOptions + +pytestmark = [pytest.mark.unit] + + +class StubRunner: + """Returns ok CommandResults and records every command + step_label it runs.""" + + def __init__(self, fail_on=None): + self.calls = [] + self._fail_on = fail_on # substring to fail on (in joined cmd), else all succeed + + def run(self, cmd, *, env=None, cwd=None, timeout=None, retry=None, step_label=None): + self.calls.append({"cmd": cmd, "step_label": step_label}) + joined = " ".join(cmd) + if self._fail_on and self._fail_on in joined: + return CommandResult(cmd=cmd, returncode=1, stdout=b"", stderr=b"not found", duration=0.0) + return CommandResult(cmd=cmd, returncode=0, stdout=b"", stderr=b"", duration=0.0) + + def step_labels(self): + return [c["step_label"] for c in self.calls] + + +def _make_target(uid="img-1-0-0", image_name="test-image", with_merge_sources=True): + t = MagicMock(spec=ImageTarget) + t.uid = uid + t.image_name = image_name + t.temp_registry = "ghcr.io/posit-dev" + t.settings = MagicMock() + t.settings.temp_registry = "ghcr.io/posit-dev" + t.labels = {"org.opencontainers.image.title": "Test Image"} + t.get_merge_sources.return_value = ( + ["ghcr.io/posit-dev/test/tmp@sha256:amd64", "ghcr.io/posit-dev/test/tmp@sha256:arm64"] + if with_merge_sources + else [] + ) + tag = MagicMock() + tag.destination = "ghcr.io/posit-dev/test-image" + tag.suffix = "1.0.0" + tag.__str__ = lambda self: "ghcr.io/posit-dev/test-image:1.0.0" + t.tags = StringableList([tag]) + return t + + +def _workflow(target, source_ref=None): + return PublishWorkflow(image_target=target, oras_bin="oras", soci_bin="soci", source_ref=source_ref) + + +def _disabled_soci(target): + return SociOptions(enabled=False) + + +def _enabled_soci(target): + return SociOptions(enabled=True) + + +class TestFullPublishSequence: + def test_runs_all_phases_in_order_with_soci_disabled(self): + target = _make_target() + runner = StubRunner() + with patch( + "posit_bakery.plugins.builtin.imagetools.soci.get_soci_options_for_target", + side_effect=_disabled_soci, + ): + result = _workflow(target).run(runner, phases=ALL_PHASES) + + assert result.success is True + assert result.skipped is False + assert result.soci_skipped is True + # create, copy, verify (no soci) — verify produced the verified list + assert result.verified == ["ghcr.io/posit-dev/test-image:1.0.0"] + labels = runner.step_labels() + assert labels == ["index create", "index copy", "verify"] + + def test_soci_enabled_inserts_three_steps_and_propagates_ref(self): + target = _make_target() + runner = StubRunner() + with ( + patch( + "posit_bakery.plugins.builtin.imagetools.soci.get_soci_options_for_target", + side_effect=_enabled_soci, + ), + patch( + "posit_bakery.plugins.builtin.imagetools.soci.SociConvertWorkflow._read_converted_digest", + return_value="sha256:abc", + ), + ): + result = _workflow(target).run(runner, phases=ALL_PHASES) + + assert result.success is True + assert result.soci_skipped is False + assert result.soci_destination_ref is not None + assert result.soci_destination_ref.endswith("-soci") + labels = runner.step_labels() + assert labels == ["index create", "soci pull", "soci convert", "soci push", "index copy", "verify"] + # the index-copy source is the SOCI-converted ref + copy_cmd = next(c["cmd"] for c in runner.calls if c["step_label"] == "index copy") + assert any(arg.endswith("-soci") for arg in copy_cmd) + + +class TestPhaseSubsets: + def test_merge_phases_skip_soci_and_verify(self): + target = _make_target() + runner = StubRunner() + result = _workflow(target).run(runner, phases=MERGE_PHASES) + assert result.success is True + assert runner.step_labels() == ["index create", "index copy"] + + def test_soci_phases_only_convert_from_source_ref(self): + target = _make_target() + runner = StubRunner() + with ( + patch( + "posit_bakery.plugins.builtin.imagetools.soci.get_soci_options_for_target", + side_effect=_enabled_soci, + ), + patch( + "posit_bakery.plugins.builtin.imagetools.soci.SociConvertWorkflow._read_converted_digest", + return_value="sha256:abc", + ), + ): + result = _workflow(target, source_ref="ghcr.io/posit-dev/test-image/tmp:merged").run( + runner, phases=SOCI_PHASES + ) + assert result.success is True + assert runner.step_labels() == ["soci pull", "soci convert", "soci push"] + + def test_soci_phases_disabled_target_is_soci_skipped_not_failed(self): + target = _make_target() + runner = StubRunner() + with patch( + "posit_bakery.plugins.builtin.imagetools.soci.get_soci_options_for_target", + side_effect=_disabled_soci, + ): + result = _workflow(target, source_ref="ghcr.io/posit-dev/test-image/tmp:merged").run( + runner, phases=SOCI_PHASES + ) + assert result.success is True + assert result.soci_skipped is True + assert runner.calls == [] + + def test_soci_phases_enabled_without_source_ref_is_not_failed(self): + # Regression (#591 adjacent): a SOCI-enabled target not in this run (no source ref) + # must be skipped, not reported as a conversion failure. + target = _make_target() + runner = StubRunner() + with patch( + "posit_bakery.plugins.builtin.imagetools.soci.get_soci_options_for_target", + side_effect=_enabled_soci, + ): + result = _workflow(target, source_ref=None).run(runner, phases=SOCI_PHASES) + assert result.success is True + assert result.soci_skipped is True + assert runner.calls == [] + + +class TestSkipsAndFailures: + def test_skips_target_without_merge_sources(self): + target = _make_target(with_merge_sources=False) + runner = StubRunner() + result = _workflow(target).run(runner, phases=ALL_PHASES) + assert result.skipped is True + assert result.success is True # a skip is not a failure + assert runner.calls == [] + + def test_missing_temp_registry_is_failure(self): + target = _make_target() + target.settings.temp_registry = None + runner = StubRunner() + result = _workflow(target).run(runner, phases=ALL_PHASES) + assert result.success is False + assert result.failed_phase == "create" + + def test_create_failure_aborts_remaining_phases(self): + target = _make_target() + runner = StubRunner(fail_on="manifest index create") + result = _workflow(target).run(runner, phases=ALL_PHASES) + assert result.success is False + assert result.failed_phase == "create" + # no copy/verify attempted + assert runner.step_labels() == ["index create"] + + def test_dry_run_skips_verify(self): + target = _make_target() + runner = StubRunner() + with patch( + "posit_bakery.plugins.builtin.imagetools.soci.get_soci_options_for_target", + side_effect=_disabled_soci, + ): + result = _workflow(target).run(runner, phases=ALL_PHASES, dry_run=True) + assert result.success is True + # dry-run short-circuits each command before the runner, so nothing is recorded, + # and verify is skipped entirely. + assert result.verified == [] + + +def test_default_phase_set_is_all_four(): + assert ALL_PHASES == frozenset(PublishPhase) diff --git a/posit-bakery/test/plugins/builtin/imagetools/test_retry.py b/posit-bakery/test/plugins/builtin/imagetools/test_retry.py new file mode 100644 index 000000000..c2004509b --- /dev/null +++ b/posit-bakery/test/plugins/builtin/imagetools/test_retry.py @@ -0,0 +1,68 @@ +import pytest + +from posit_bakery.parallel import CommandResult, RetryPolicy +from posit_bakery.plugins.builtin.imagetools.retry import ( + DEFAULT_REGISTRY_RETRY, + is_transient_registry_error, +) + +pytestmark = [pytest.mark.unit] + + +def _result(stderr=b"", returncode=1, timed_out=False, exception=None): + return CommandResult( + cmd=["oras"], + returncode=returncode, + stdout=b"", + stderr=stderr, + duration=0.0, + timed_out=timed_out, + exception=exception, + ) + + +class TestIsTransientRegistryError: + @pytest.mark.parametrize( + "stderr", + [ + b"Error: ghcr.io/x/tmp@sha256:abc: not found", + b"failed to resolve: manifest unknown", + b"unexpected status: 503 Service Unavailable", + b"502 Bad Gateway", + b"500 Internal Server Error", + b"dial tcp: connection refused", + b"net/http: TLS handshake timeout", + b"unexpected EOF", + b"Too Many Requests", + ], + ) + def test_transient_errors_match(self, stderr): + assert is_transient_registry_error(_result(stderr=stderr)) is True + + @pytest.mark.parametrize( + "stderr", + [ + b"401 Unauthorized", + b"unexpected status: 403 Forbidden", + b"denied: requested access to the resource is denied", + b"invalid reference format", + b"some entirely unrelated error", + ], + ) + def test_permanent_or_unknown_errors_do_not_match(self, stderr): + assert is_transient_registry_error(_result(stderr=stderr)) is False + + def test_timeout_is_transient(self): + assert is_transient_registry_error(_result(timed_out=True)) is True + + def test_success_is_not_retryable(self): + assert is_transient_registry_error(_result(returncode=0)) is False + + def test_case_insensitive(self): + assert is_transient_registry_error(_result(stderr=b"MANIFEST NOT FOUND")) is True + + +def test_default_registry_retry_uses_classifier(): + assert isinstance(DEFAULT_REGISTRY_RETRY, RetryPolicy) + assert DEFAULT_REGISTRY_RETRY.retry_on is is_transient_registry_error + assert DEFAULT_REGISTRY_RETRY.max_attempts >= 2 diff --git a/posit-bakery/test/plugins/builtin/soci/test_cli.py b/posit-bakery/test/plugins/builtin/imagetools/test_soci_cli.py similarity index 100% rename from posit-bakery/test/plugins/builtin/soci/test_cli.py rename to posit-bakery/test/plugins/builtin/imagetools/test_soci_cli.py diff --git a/posit-bakery/test/plugins/builtin/soci/test_command_base.py b/posit-bakery/test/plugins/builtin/imagetools/test_soci_command_base.py similarity index 96% rename from posit-bakery/test/plugins/builtin/soci/test_command_base.py rename to posit-bakery/test/plugins/builtin/imagetools/test_soci_command_base.py index ff3a8bf87..1407f8bc3 100644 --- a/posit-bakery/test/plugins/builtin/soci/test_command_base.py +++ b/posit-bakery/test/plugins/builtin/imagetools/test_soci_command_base.py @@ -10,7 +10,7 @@ from typing import Annotated from posit_bakery.error import BakeryToolNotFoundError, BakeryToolRuntimeError -from posit_bakery.plugins.builtin.soci.soci import SociCommand, find_soci_bin +from posit_bakery.plugins.builtin.imagetools.soci import SociCommand, find_soci_bin pytestmark = [pytest.mark.unit] diff --git a/posit-bakery/test/plugins/builtin/soci/test_convert.py b/posit-bakery/test/plugins/builtin/imagetools/test_soci_convert.py similarity index 96% rename from posit-bakery/test/plugins/builtin/soci/test_convert.py rename to posit-bakery/test/plugins/builtin/imagetools/test_soci_convert.py index 96d0ffeb3..c02234f0f 100644 --- a/posit-bakery/test/plugins/builtin/soci/test_convert.py +++ b/posit-bakery/test/plugins/builtin/imagetools/test_soci_convert.py @@ -2,7 +2,7 @@ import pytest -from posit_bakery.plugins.builtin.soci.soci import SociConvert +from posit_bakery.plugins.builtin.imagetools.soci import SociConvert pytestmark = [pytest.mark.unit] diff --git a/posit-bakery/test/plugins/builtin/imagetools/test_soci_discovery.py b/posit-bakery/test/plugins/builtin/imagetools/test_soci_discovery.py new file mode 100644 index 000000000..d288639a6 --- /dev/null +++ b/posit-bakery/test/plugins/builtin/imagetools/test_soci_discovery.py @@ -0,0 +1,31 @@ +"""Discovery of the consolidated imagetools plugin and its soci tool options.""" + +import pytest + +from posit_bakery.config.tools.registry import get_tool_options_classes +from posit_bakery.plugins.builtin.imagetools.options import SociOptions +from posit_bakery.plugins.protocol import BakeryToolPlugin +from posit_bakery.plugins.registry import discover_plugins + +pytestmark = [pytest.mark.unit] + + +def test_imagetools_plugin_is_discovered(): + plugins = discover_plugins() + assert "imagetools" in plugins + assert isinstance(plugins["imagetools"], BakeryToolPlugin) + assert plugins["imagetools"].name == "imagetools" + + +def test_no_standalone_oras_or_soci_plugins(): + # The oras/soci tooling is owned by the single imagetools plugin now. + plugins = discover_plugins() + assert "oras" not in plugins + assert "soci" not in plugins + + +def test_soci_tool_options_registered_under_soci(): + # `tool: soci` in bakery.yaml must still resolve to SociOptions even though the imagetools + # plugin (not a "soci" plugin) provides it. + discover_plugins() + assert get_tool_options_classes().get("soci") is SociOptions diff --git a/posit-bakery/test/plugins/builtin/soci/test_options.py b/posit-bakery/test/plugins/builtin/imagetools/test_soci_options.py similarity index 97% rename from posit-bakery/test/plugins/builtin/soci/test_options.py rename to posit-bakery/test/plugins/builtin/imagetools/test_soci_options.py index 63d8e5405..b244d1ff6 100644 --- a/posit-bakery/test/plugins/builtin/soci/test_options.py +++ b/posit-bakery/test/plugins/builtin/imagetools/test_soci_options.py @@ -2,7 +2,7 @@ import pytest -from posit_bakery.plugins.builtin.soci.options import SociOptions +from posit_bakery.plugins.builtin.imagetools.options import SociOptions pytestmark = [pytest.mark.unit] diff --git a/posit-bakery/test/plugins/builtin/imagetools/test_soci_plugin_execute.py b/posit-bakery/test/plugins/builtin/imagetools/test_soci_plugin_execute.py new file mode 100644 index 000000000..975650bda --- /dev/null +++ b/posit-bakery/test/plugins/builtin/imagetools/test_soci_plugin_execute.py @@ -0,0 +1,71 @@ +"""ImageToolsPlugin.execute() on the SOCI phase subset (the `bakery soci convert` path). + +Detailed gating/sequencing is covered in test_publish.py; here we exercise the plugin's +dry-run integration (parallel executor, bin resolution) without requiring real tools. +""" + +from unittest.mock import MagicMock, patch + +import pytest + +import posit_bakery.util as util +from posit_bakery.image.image_target import ImageTarget +from posit_bakery.plugins.builtin.imagetools import ImageToolsPlugin +from posit_bakery.plugins.builtin.imagetools.options import SociOptions +from posit_bakery.plugins.builtin.imagetools.publish import SOCI_PHASES + +pytestmark = [pytest.mark.unit] + + +def _make_target(uid: str, image_name: str = "test-image") -> ImageTarget: + t = MagicMock(spec=ImageTarget) + t.uid = uid + t.image_name = image_name + t.temp_registry = "ghcr.io/posit-dev" + t.push_sort_key = (uid,) + t.__str__ = lambda self: f"ImageTarget({uid})" + return t + + +def _execute_soci(plugin, tmp_path, target): + return plugin.execute( + base_path=tmp_path, targets=[target], source_refs={target.uid: "ref"}, phases=SOCI_PHASES, dry_run=True + ) + + +def test_dry_run_disabled_target_exits_zero(tmp_path): + with patch( + "posit_bakery.plugins.builtin.imagetools.soci.get_soci_options_for_target", + return_value=SociOptions(enabled=False), + ): + results = _execute_soci(ImageToolsPlugin(), tmp_path, _make_target("a")) + assert [r.exit_code for r in results] == [0] + + +def test_dry_run_enabled_target_exits_zero(tmp_path): + with patch( + "posit_bakery.plugins.builtin.imagetools.soci.get_soci_options_for_target", + return_value=SociOptions(enabled=True), + ): + results = _execute_soci(ImageToolsPlugin(), tmp_path, _make_target("a")) + assert [r.exit_code for r in results] == [0] + + +@pytest.fixture +def missing_tools(monkeypatch): + """Simulate a host where soci/oras are not installed anywhere.""" + real_which = util.which + monkeypatch.setattr(util, "which", lambda name: None if name in {"soci", "oras"} else real_which(name)) + for env in ("SOCI_PATH", "ORAS_PATH"): + monkeypatch.delenv(env, raising=False) + + +def test_dry_run_does_not_require_tools_installed(tmp_path, missing_tools): + """A dry run executes nothing, so it must not abort when soci/oras are absent. Regression: + `ci publish --dry-run` raised BakeryToolNotFoundError when binaries were resolved eagerly.""" + with patch( + "posit_bakery.plugins.builtin.imagetools.soci.get_soci_options_for_target", + return_value=SociOptions(enabled=True), + ): + results = _execute_soci(ImageToolsPlugin(), tmp_path, _make_target("a")) + assert [r.exit_code for r in results] == [0] diff --git a/posit-bakery/test/plugins/builtin/soci/test_plugin_results.py b/posit-bakery/test/plugins/builtin/imagetools/test_soci_plugin_results.py similarity index 77% rename from posit-bakery/test/plugins/builtin/soci/test_plugin_results.py rename to posit-bakery/test/plugins/builtin/imagetools/test_soci_plugin_results.py index 37737c85f..834a9bd22 100644 --- a/posit-bakery/test/plugins/builtin/soci/test_plugin_results.py +++ b/posit-bakery/test/plugins/builtin/imagetools/test_soci_plugin_results.py @@ -1,4 +1,4 @@ -"""Tests for SociPlugin.results().""" +"""Tests for ImageToolsPlugin.results() (shared by publish / oras merge / soci convert).""" from unittest.mock import MagicMock @@ -6,8 +6,8 @@ import typer from posit_bakery.image.image_target import ImageTarget -from posit_bakery.plugins.builtin.soci import SociPlugin -from posit_bakery.plugins.builtin.soci.soci import SociConvertWorkflowResult +from posit_bakery.plugins.builtin.imagetools import ImageToolsPlugin +from posit_bakery.plugins.builtin.imagetools.soci import SociConvertWorkflowResult from posit_bakery.plugins.protocol import ToolCallResult pytestmark = [pytest.mark.unit] @@ -35,12 +35,12 @@ def _result(exit_code: int, workflow_success: bool, target_uid: str = "t") -> To def test_all_success_does_not_raise(): - SociPlugin().results([_result(0, True)]) + ImageToolsPlugin().results([_result(0, True)]) def test_any_failure_raises_typer_exit(): with pytest.raises(typer.Exit) as exc: - SociPlugin().results([_result(0, True), _result(1, False, "u")]) + ImageToolsPlugin().results([_result(0, True), _result(1, False, "u")]) assert exc.value.exit_code == 1 @@ -56,4 +56,4 @@ def test_skipped_results_do_not_raise(): stderr="", artifacts={"skipped": True, "reason": "soci.enabled is false"}, ) - SociPlugin().results([skipped]) + ImageToolsPlugin().results([skipped]) diff --git a/posit-bakery/test/plugins/builtin/imagetools/test_soci_runner.py b/posit-bakery/test/plugins/builtin/imagetools/test_soci_runner.py new file mode 100644 index 000000000..317b18f85 --- /dev/null +++ b/posit-bakery/test/plugins/builtin/imagetools/test_soci_runner.py @@ -0,0 +1,81 @@ +"""The SOCI command/workflow runner seam.""" + +from unittest.mock import MagicMock, patch + +import pytest + +from posit_bakery.error import BakeryToolRuntimeError +from posit_bakery.image.image_target import ImageTarget +from posit_bakery.parallel import CommandResult +from posit_bakery.plugins.builtin.imagetools.options import SociOptions +from posit_bakery.plugins.builtin.imagetools.soci import SociConvert, SociConvertWorkflow + +pytestmark = [pytest.mark.unit] + + +class StubRunner: + def __init__(self, results): + self.calls = [] + self._results = list(results) + + def run(self, cmd, **kwargs): + self.calls.append((cmd, kwargs)) + return self._results.pop(0) + + +def _ok(cmd): + return CommandResult(cmd=cmd, returncode=0, stdout=b"", stderr=b"", duration=0.0) + + +def _fail(cmd, stderr=b"not found"): + return CommandResult(cmd=cmd, returncode=1, stdout=b"", stderr=stderr, duration=0.0) + + +def test_soci_command_routes_through_runner(): + cmd = SociConvert(soci_bin="soci", source="/tmp/src", destination="/tmp/out") + runner = StubRunner([_ok(cmd.command)]) + result = cmd.run(runner=runner) + assert runner.calls[0][0] == cmd.command + assert result.returncode == 0 + + +def test_soci_command_raises_on_runner_failure(): + cmd = SociConvert(soci_bin="soci", source="/tmp/src", destination="/tmp/out") + runner = StubRunner([_fail(cmd.command, stderr=b"boom")]) + with pytest.raises(BakeryToolRuntimeError) as exc: + cmd.run(runner=runner) + assert exc.value.tool_name == "soci" + + +def _convert_workflow(): + target = MagicMock(spec=ImageTarget) + target.image_name = "test-image" + target.uid = "test-image-1-0-0" + return SociConvertWorkflow( + soci_bin="soci", + oras_bin="oras", + image_target=target, + options=SociOptions(enabled=True), + source_ref="ghcr.io/posit-dev/test-image/tmp:merged", + ) + + +def test_convert_workflow_threads_runner_through_all_three_steps(): + wf = _convert_workflow() + runner = StubRunner([_ok(["oras", "cp"]), _ok(["soci", "convert"]), _ok(["oras", "cp"])]) + with patch.object(SociConvertWorkflow, "_read_converted_digest", return_value="sha256:abc123"): + result = wf.run(runner=runner) + + assert result.success is True + assert len(runner.calls) == 3 + assert runner.calls[0][0][:2] == ["oras", "cp"] # pull (registry -> layout) + assert "--standalone" in runner.calls[1][0] # convert + assert "--from-oci-layout" in runner.calls[2][0] # push (layout -> registry) + + +def test_convert_workflow_failure_via_runner(): + wf = _convert_workflow() + runner = StubRunner([_fail(["oras", "cp"], stderr=b"pull boom")]) + result = wf.run(runner=runner) + assert result.success is False + assert "pull boom" in (result.error or "") diff --git a/posit-bakery/test/plugins/builtin/soci/test_workflow.py b/posit-bakery/test/plugins/builtin/imagetools/test_soci_workflow.py similarity index 92% rename from posit-bakery/test/plugins/builtin/soci/test_workflow.py rename to posit-bakery/test/plugins/builtin/imagetools/test_soci_workflow.py index 7ad174c47..dcbed95a7 100644 --- a/posit-bakery/test/plugins/builtin/soci/test_workflow.py +++ b/posit-bakery/test/plugins/builtin/imagetools/test_soci_workflow.py @@ -6,8 +6,8 @@ import pytest from posit_bakery.image.image_target import ImageTarget -from posit_bakery.plugins.builtin.soci.options import SociOptions -from posit_bakery.plugins.builtin.soci.soci import SociConvertWorkflow +from posit_bakery.plugins.builtin.imagetools.options import SociOptions +from posit_bakery.plugins.builtin.imagetools.soci import SociConvertWorkflow pytestmark = [pytest.mark.unit] @@ -125,7 +125,9 @@ def test_cleans_up_scratch_dir(workflow, tmp_path): scratch = tmp_path / "soci-scratch" with ( - patch("posit_bakery.plugins.builtin.soci.soci.tempfile.mkdtemp", return_value=str(scratch)) as mock_mkdtemp, + patch( + "posit_bakery.plugins.builtin.imagetools.soci.tempfile.mkdtemp", return_value=str(scratch) + ) as mock_mkdtemp, patch("subprocess.run") as mock_run, patch.object(SociConvertWorkflow, "_read_converted_digest", return_value="sha256:abc123"), ): @@ -142,7 +144,7 @@ def test_cleans_up_scratch_dir_on_failure(workflow, tmp_path): scratch = tmp_path / "soci-scratch" with ( - patch("posit_bakery.plugins.builtin.soci.soci.tempfile.mkdtemp", return_value=str(scratch)), + patch("posit_bakery.plugins.builtin.imagetools.soci.tempfile.mkdtemp", return_value=str(scratch)), patch("subprocess.run", side_effect=lambda cmd, capture_output: _ok_proc(cmd)), patch.object(SociConvertWorkflow, "_read_converted_digest", side_effect=RuntimeError("boom")), ): diff --git a/posit-bakery/test/plugins/builtin/oras/test_oras_plugin.py b/posit-bakery/test/plugins/builtin/oras/test_oras_plugin.py deleted file mode 100644 index 87bb1c8fe..000000000 --- a/posit-bakery/test/plugins/builtin/oras/test_oras_plugin.py +++ /dev/null @@ -1,223 +0,0 @@ -"""Tests for the OrasPlugin.""" - -import logging -import subprocess -from pathlib import Path -from unittest.mock import MagicMock, patch - -import pytest -import typer - -from posit_bakery.image.image_target import ImageTarget, ImageTargetContext, ImageTargetSettings, StringableList -from posit_bakery.plugins.builtin.oras import OrasPlugin -from posit_bakery.plugins.builtin.oras.oras import OrasMergeWorkflowResult -from posit_bakery.plugins.protocol import BakeryToolPlugin - -pytestmark = [pytest.mark.unit] - - -@pytest.fixture -def plugin(): - return OrasPlugin() - - -@pytest.fixture -def mock_target_with_sources(): - """Create a mock ImageTarget with merge sources.""" - mock_target = MagicMock(spec=ImageTarget) - mock_target.image_name = "test-image" - mock_target.uid = "test-image-1-0-0" - mock_target.temp_registry = "ghcr.io/posit-dev" - mock_target.context = MagicMock(spec=ImageTargetContext) - mock_target.context.base_path = Path("/project") - mock_target.settings = MagicMock(spec=ImageTargetSettings) - mock_target.settings.temp_registry = "ghcr.io/posit-dev" - mock_target.get_merge_sources.return_value = [ - "ghcr.io/posit-dev/test/tmp@sha256:amd64digest", - "ghcr.io/posit-dev/test/tmp@sha256:arm64digest", - ] - mock_target.labels = {"org.opencontainers.image.title": "Test Image"} - mock_target.push_sort_key = (0,) - - mock_tag = MagicMock() - mock_tag.destination = "ghcr.io/posit-dev/test-image" - mock_tag.suffix = "1.0.0" - mock_tag.__str__ = lambda self: "ghcr.io/posit-dev/test-image:1.0.0" - mock_target.tags = StringableList([mock_tag]) - - return mock_target - - -@pytest.fixture -def mock_target_without_sources(): - """Create a mock ImageTarget without merge sources.""" - mock_target = MagicMock(spec=ImageTarget) - mock_target.image_name = "no-sources" - mock_target.uid = "no-sources-1-0-0" - mock_target.get_merge_sources.return_value = [] - mock_target.push_sort_key = (0,) - return mock_target - - -class TestOrasPluginProtocol: - def test_implements_protocol(self, plugin): - assert isinstance(plugin, BakeryToolPlugin) - - def test_name(self, plugin): - assert plugin.name == "oras" - - def test_description(self, plugin): - assert plugin.description == "Merge multi-platform images using ORAS" - - -class TestOrasPluginExecute: - def test_execute_success(self, plugin, mock_target_with_sources): - with ( - patch("posit_bakery.plugins.builtin.oras.oras.find_oras_bin", return_value="oras"), - patch("subprocess.run") as mock_run, - ): - mock_run.return_value = subprocess.CompletedProcess(args=[], returncode=0, stdout=b"", stderr=b"") - results = plugin.execute( - Path("/project"), - [mock_target_with_sources], - ) - - assert len(results) == 1 - assert results[0].exit_code == 0 - assert results[0].tool_name == "oras" - assert results[0].target is mock_target_with_sources - assert results[0].artifacts["workflow_result"].success is True - - def test_execute_skips_targets_without_sources(self, plugin, mock_target_without_sources): - results = plugin.execute( - Path("/project"), - [mock_target_without_sources], - ) - - assert len(results) == 0 - - def test_execute_missing_temp_registry(self, plugin, mock_target_with_sources): - mock_target_with_sources.settings.temp_registry = None - - results = plugin.execute( - Path("/project"), - [mock_target_with_sources], - ) - - assert len(results) == 1 - assert results[0].exit_code == 1 - assert "temp_registry" in results[0].stderr - - def test_execute_workflow_failure(self, plugin, mock_target_with_sources): - with ( - patch("posit_bakery.plugins.builtin.oras.oras.find_oras_bin", return_value="oras"), - patch("subprocess.run") as mock_run, - ): - mock_run.return_value = subprocess.CompletedProcess( - args=[], returncode=1, stdout=b"", stderr=b"create failed" - ) - results = plugin.execute( - Path("/project"), - [mock_target_with_sources], - ) - - assert len(results) == 1 - assert results[0].exit_code == 1 - assert results[0].artifacts["workflow_result"].success is False - - def test_execute_dry_run(self, plugin, mock_target_with_sources): - with ( - patch("posit_bakery.plugins.builtin.oras.oras.find_oras_bin", return_value="oras"), - patch("subprocess.run") as mock_run, - ): - results = plugin.execute( - Path("/project"), - [mock_target_with_sources], - dry_run=True, - ) - - mock_run.assert_not_called() - assert len(results) == 1 - assert results[0].exit_code == 0 - assert results[0].artifacts["workflow_result"].success is True - - def test_execute_mixed_targets(self, plugin, mock_target_with_sources, mock_target_without_sources): - with ( - patch("posit_bakery.plugins.builtin.oras.oras.find_oras_bin", return_value="oras"), - patch("subprocess.run") as mock_run, - ): - mock_run.return_value = subprocess.CompletedProcess(args=[], returncode=0, stdout=b"", stderr=b"") - results = plugin.execute( - Path("/project"), - [mock_target_with_sources, mock_target_without_sources], - ) - - # Only the target with sources should produce a result - assert len(results) == 1 - assert results[0].target is mock_target_with_sources - - def test_execute_processes_targets_in_push_sort_key_order(self, plugin, caplog): - """Targets are processed in ascending push_sort_key order, regardless of input order.""" - - def make_target(name, sort_key): - t = MagicMock(spec=ImageTarget) - t.image_name = name - t.uid = f"{name}-uid" - t.context = MagicMock(spec=ImageTargetContext) - t.context.base_path = Path("/project") - t.settings = MagicMock(spec=ImageTargetSettings) - t.settings.temp_registry = "ghcr.io/posit-dev" - t.get_merge_sources.return_value = [f"ghcr.io/posit-dev/{name}/tmp@sha256:digest"] - t.labels = {} - mock_tag = MagicMock() - mock_tag.destination = f"ghcr.io/posit-dev/{name}" - mock_tag.suffix = "1.0.0" - mock_tag.__str__ = lambda self: f"ghcr.io/posit-dev/{name}:1.0.0" - t.tags = StringableList([mock_tag]) - # Override push_sort_key to a controlled tuple so the test is independent of - # ImageVersion / ImageVariant internals. - t.push_sort_key = sort_key - t.__str__ = lambda self: name - return t - - # Input order is intentionally scrambled. - targets = [ - make_target("c-second", (1,)), - make_target("a-first", (0,)), - make_target("d-last", (3,)), - make_target("b-third", (2,)), - ] - expected_order = ["a-first", "c-second", "b-third", "d-last"] - - call_order = [] - - def fake_run(self_workflow, dry_run=False): - call_order.append(self_workflow.image_target.image_name) - return OrasMergeWorkflowResult(success=True, destinations=[]) - - with ( - patch("posit_bakery.plugins.builtin.oras.oras.find_oras_bin", return_value="oras"), - patch( - "posit_bakery.plugins.builtin.oras.OrasMergeWorkflow.run", - autospec=True, - side_effect=fake_run, - ), - caplog.at_level(logging.INFO, logger="posit_bakery.plugins.builtin.oras"), - ): - plugin.execute(Path("/project"), targets) - - assert call_order == expected_order, f"got {call_order}, want {expected_order}" - order_log_lines = [r for r in caplog.records if "ORAS merge order:" in r.getMessage()] - assert len(order_log_lines) == 1, "expected exactly one ORAS merge order log line" - msg = order_log_lines[0].getMessage() - assert msg.endswith("a-first, c-second, b-third, d-last"), msg - - -class TestOrasPluginCLI: - def test_register_cli_adds_oras_command(self, plugin): - app = typer.Typer() - plugin.register_cli(app) - - # Verify the oras group was registered - group_names = [g.name for g in app.registered_groups] - assert "oras" in group_names diff --git a/posit-bakery/test/plugins/builtin/soci/__init__.py b/posit-bakery/test/plugins/builtin/soci/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/posit-bakery/test/plugins/builtin/soci/test_discovery.py b/posit-bakery/test/plugins/builtin/soci/test_discovery.py deleted file mode 100644 index 888f6097b..000000000 --- a/posit-bakery/test/plugins/builtin/soci/test_discovery.py +++ /dev/null @@ -1,15 +0,0 @@ -"""Tests for soci plugin discovery.""" - -import pytest - -from posit_bakery.plugins.registry import discover_plugins -from posit_bakery.plugins.protocol import BakeryToolPlugin - -pytestmark = [pytest.mark.unit] - - -def test_soci_plugin_is_discovered(): - plugins = discover_plugins() - assert "soci" in plugins - assert isinstance(plugins["soci"], BakeryToolPlugin) - assert plugins["soci"].name == "soci" diff --git a/posit-bakery/test/plugins/builtin/soci/test_plugin_execute.py b/posit-bakery/test/plugins/builtin/soci/test_plugin_execute.py deleted file mode 100644 index d3f65b072..000000000 --- a/posit-bakery/test/plugins/builtin/soci/test_plugin_execute.py +++ /dev/null @@ -1,283 +0,0 @@ -"""Tests for SociPlugin.execute().""" - -import subprocess -from unittest.mock import MagicMock, patch - -import pytest - -import posit_bakery.util as util -from posit_bakery.image.image_target import ImageTarget -from posit_bakery.plugins.builtin.soci import SociPlugin -from posit_bakery.plugins.builtin.soci.options import SociOptions -from posit_bakery.plugins.builtin.soci.soci import SociConvertWorkflow, SociConvertWorkflowResult - -pytestmark = [pytest.mark.unit] - - -def _make_target(uid: str, enabled: bool, image_name: str = "test-image") -> ImageTarget: - t = MagicMock(spec=ImageTarget) - t.uid = uid - t.image_name = image_name - t.temp_registry = "ghcr.io/posit-dev" - t.__str__ = lambda self: f"ImageTarget({uid})" - # Plugin reads SociOptions from target.image_version.parent.options or - # target.image_variant.options. For unit testing the plugin's gating - # behavior we let the plugin call get_soci_options(target) which we - # patch out via the helper exposed on the plugin module. - return t - - -def test_skips_targets_without_enabled_option(tmp_path): - plugin = SociPlugin() - t_off = _make_target("a", enabled=False) - t_on = _make_target("b", enabled=True) - - def fake_options(target): - return SociOptions(enabled=(target.uid == "b")) - - with ( - patch( - "posit_bakery.plugins.builtin.soci.get_soci_options_for_target", - side_effect=fake_options, - ), - patch( - "posit_bakery.plugins.builtin.soci.find_soci_bin", - return_value="soci", - ), - patch( - "posit_bakery.plugins.builtin.soci.find_oras_bin", - return_value="oras", - ), - patch( - "posit_bakery.plugins.builtin.soci.soci.SociConvertWorkflow._read_converted_digest", - return_value="sha256:abc", - ), - patch("subprocess.run") as mock_run, - ): - mock_run.return_value = subprocess.CompletedProcess(args=[], returncode=0, stdout=b"", stderr=b"") - # source_ref is provided via kwargs from the orchestrator. For the - # test we set it explicitly via per-target kwargs map. - results = plugin.execute( - base_path=tmp_path, - targets=[t_off, t_on], - source_refs={"a": "ref-a", "b": "ref-b"}, - ) - - assert len(results) == 2 - off_result = next(r for r in results if r.target.uid == "a") - on_result = next(r for r in results if r.target.uid == "b") - assert off_result.exit_code == 0 - assert off_result.artifacts is not None - assert off_result.artifacts.get("skipped") is True - assert on_result.exit_code == 0 - assert on_result.artifacts is not None - assert on_result.artifacts["workflow_result"].success is True - - -def test_enabled_target_without_source_ref_is_skipped_not_failed(tmp_path): - """A SOCI-enabled target with no source ref for this run is not in scope - (e.g. it has no merge sources / build metadata in the provided files, like - other versions/streams when publishing one set of metadata). It must be - skipped, not reported as a conversion failure. Regression: such targets - surfaced as "SOCI convert failed: no source ref provided" and flipped the - whole `ci publish` run to a failure.""" - plugin = SociPlugin() - t_ref = _make_target("a", enabled=True) - t_noref = _make_target("b", enabled=True) - - with ( - patch( - "posit_bakery.plugins.builtin.soci.get_soci_options_for_target", - return_value=SociOptions(enabled=True), - ), - patch("posit_bakery.plugins.builtin.soci.find_soci_bin", return_value="soci"), - patch("posit_bakery.plugins.builtin.soci.find_oras_bin", return_value="oras"), - patch( - "posit_bakery.plugins.builtin.soci.soci.SociConvertWorkflow._read_converted_digest", - return_value="sha256:abc", - ), - patch("subprocess.run") as mock_run, - ): - mock_run.return_value = subprocess.CompletedProcess(args=[], returncode=0, stdout=b"", stderr=b"") - results = plugin.execute( - base_path=tmp_path, - targets=[t_ref, t_noref], - source_refs={"a": "ref-a"}, - ) - - noref = next(r for r in results if r.target.uid == "b") - assert noref.exit_code == 0 - assert noref.artifacts is not None - assert noref.artifacts.get("skipped") is True - # The in-scope target still converts. - ref = next(r for r in results if r.target.uid == "a") - assert ref.exit_code == 0 - assert ref.artifacts["workflow_result"].success is True - - -def test_logs_summary_when_no_enabled_targets(tmp_path, caplog): - plugin = SociPlugin() - t = _make_target("a", enabled=False) - - import logging - - caplog.set_level(logging.INFO, logger="posit_bakery.plugins.builtin.soci") - with ( - patch( - "posit_bakery.plugins.builtin.soci.get_soci_options_for_target", - return_value=SociOptions(enabled=False), - ), - patch( - "posit_bakery.plugins.builtin.soci.find_soci_bin", - return_value="soci", - ), - patch( - "posit_bakery.plugins.builtin.soci.find_oras_bin", - return_value="oras", - ), - ): - results = plugin.execute( - base_path=tmp_path, - targets=[t], - source_refs={"a": "ref-a"}, - ) - - assert len(results) == 1 - assert results[0].artifacts.get("skipped") is True - assert "no targets have soci enabled" in caplog.text.lower() - - -def test_no_eligible_targets_does_not_invoke_binary_lookup(tmp_path): - """When all targets are disabled, execute should not require soci/oras - binaries to be installed — the lookups should be skipped.""" - plugin = SociPlugin() - t = _make_target("a", enabled=False) - - with ( - patch( - "posit_bakery.plugins.builtin.soci.get_soci_options_for_target", - return_value=SociOptions(enabled=False), - ), - patch( - "posit_bakery.plugins.builtin.soci.find_soci_bin", - ) as mock_find_soci, - patch( - "posit_bakery.plugins.builtin.soci.find_oras_bin", - ) as mock_find_oras, - ): - results = plugin.execute( - base_path=tmp_path, - targets=[t], - source_refs={"a": "ref-a"}, - ) - - assert len(results) == 1 - assert results[0].artifacts.get("skipped") is True - mock_find_soci.assert_not_called() - mock_find_oras.assert_not_called() - - -@pytest.fixture -def missing_tools(monkeypatch): - """Simulate a host where soci/oras are not installed anywhere.""" - real_which = util.which - monkeypatch.setattr(util, "which", lambda name: None if name in {"soci", "oras"} else real_which(name)) - for env in ("SOCI_PATH", "ORAS_PATH"): - monkeypatch.delenv(env, raising=False) - - -def test_dry_run_does_not_require_tools_installed(tmp_path, missing_tools): - """A dry-run executes nothing, so it must not abort when soci/oras are - absent from the host. Regression: ``ci publish --dry-run`` raised - BakeryToolNotFoundError because execute() resolved the binaries eagerly, - before the dry-run-aware workflow ran.""" - plugin = SociPlugin() - t = _make_target("a", enabled=True) - - with ( - patch( - "posit_bakery.plugins.builtin.soci.get_soci_options_for_target", - return_value=SociOptions(enabled=True), - ), - patch("subprocess.run") as mock_run, - patch( - "posit_bakery.plugins.builtin.soci.soci.SociConvertWorkflow._read_converted_digest", - return_value="sha256:abc", - ), - ): - results = plugin.execute( - base_path=tmp_path, - targets=[t], - source_refs={"a": "ref-a"}, - dry_run=True, - ) - - mock_run.assert_not_called() - assert [r.exit_code for r in results] == [0] - - -def test_dry_run_uses_resolved_path_when_tool_present(tmp_path, monkeypatch): - """When a tool IS resolvable, the dry-run still surfaces its real path - (env var / PATH / project tools dir) rather than a bare fallback name, so - the logged commands remain accurate.""" - monkeypatch.setenv("SOCI_PATH", "/custom/soci") - monkeypatch.setenv("ORAS_PATH", "/custom/oras") - plugin = SociPlugin() - t = _make_target("a", enabled=True) - - captured = {} - - def fake_run(self, dry_run=False): - captured["soci_bin"] = self.soci_bin - captured["oras_bin"] = self.oras_bin - return SociConvertWorkflowResult(success=True, destination_ref=self.destination_ref) - - with ( - patch( - "posit_bakery.plugins.builtin.soci.get_soci_options_for_target", - return_value=SociOptions(enabled=True), - ), - patch("posit_bakery.plugins.builtin.soci.soci.SociConvertWorkflow.run", fake_run), - ): - plugin.execute( - base_path=tmp_path, - targets=[t], - source_refs={"a": "ref-a"}, - dry_run=True, - ) - - assert captured["soci_bin"] == "/custom/soci" - assert captured["oras_bin"] == "/custom/oras" - - -def test_run_does_not_require_ctr(tmp_path, monkeypatch): - """Conversion never touches containerd, so a real (non-dry-run) conversion - must not require `ctr` to be installed — only the tools it actually - executes (soci, oras).""" - monkeypatch.setenv("SOCI_PATH", "/custom/soci") - monkeypatch.setenv("ORAS_PATH", "/custom/oras") - monkeypatch.delenv("CTR_PATH", raising=False) - monkeypatch.setattr(util, "which", lambda name: None) - plugin = SociPlugin() - t = _make_target("a", enabled=True) - - with ( - patch( - "posit_bakery.plugins.builtin.soci.get_soci_options_for_target", - return_value=SociOptions(enabled=True), - ), - patch("subprocess.run") as mock_run, - patch( - "posit_bakery.plugins.builtin.soci.soci.SociConvertWorkflow._read_converted_digest", - return_value="sha256:abc", - ), - ): - mock_run.return_value = subprocess.CompletedProcess(args=[], returncode=0, stdout=b"", stderr=b"") - results = plugin.execute( - base_path=tmp_path, - targets=[t], - source_refs={"a": "ref-a"}, - dry_run=False, - ) - - assert [r.exit_code for r in results] == [0] From 14736dc43210570647c4db9ba0817431ebb7aaa5 Mon Sep 17 00:00:00 2001 From: "Ian H. Pittwood" Date: Thu, 11 Jun 2026 14:26:45 -0600 Subject: [PATCH 2/2] feat(imagetools): pre-flight wait for source digests before publishing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Absorb the read-after-write wait from #598: OrasWaitForSourcesWorkflow polls every per-platform source digest with `oras manifest fetch --descriptor` until all are readable (10 min timeout), naming any laggard. ImageToolsPlugin.execute runs it once up front on create-bearing flows (publish / oras merge), aborting the publish if a digest never propagates — so GHCR eventual-consistency lag becomes condition-based waiting instead of an opaque downstream #591 failure. The soci-only path skips it. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../plugins/builtin/imagetools/__init__.py | 27 ++++ .../plugins/builtin/imagetools/oras.py | 102 +++++++++++- posit-bakery/test/cli/test_ci.py | 15 ++ .../builtin/imagetools/test_oras_wait.py | 149 ++++++++++++++++++ 4 files changed, 292 insertions(+), 1 deletion(-) create mode 100644 posit-bakery/test/plugins/builtin/imagetools/test_oras_wait.py diff --git a/posit-bakery/posit_bakery/plugins/builtin/imagetools/__init__.py b/posit-bakery/posit_bakery/plugins/builtin/imagetools/__init__.py index fc3a1b5b7..0eb9baceb 100644 --- a/posit-bakery/posit_bakery/plugins/builtin/imagetools/__init__.py +++ b/posit-bakery/posit_bakery/plugins/builtin/imagetools/__init__.py @@ -22,6 +22,7 @@ ShellJob, resolve_max_workers, ) +from posit_bakery.plugins.builtin.imagetools import oras as oras_mod from posit_bakery.plugins.builtin.imagetools import soci as soci_mod from posit_bakery.plugins.builtin.imagetools.options import SociOptions from posit_bakery.plugins.builtin.imagetools.oras import find_oras_bin @@ -85,6 +86,32 @@ def execute( ) soci_bin = _resolve_bin(find_soci_bin, base_path, dry_run, "soci") if needs_soci else "soci" + # Pre-flight (create-bearing flows only): wait for every per-platform source digest to + # be readable before any target references it. Those manifests are pushed by digest from + # separate build runners, and registries with read-after-write behaviour (notably GHCR) + # can briefly 404 them; polling here turns propagation lag into condition-based waiting + # and names the lagging digest, rather than failing a downstream phase opaquely (#591). + if PublishPhase.CREATE in phases: + all_sources = sorted({s for t in targets for s in t.get_merge_sources()}) + if all_sources: + log.info(f"Waiting for {len(all_sources)} source digest(s) to be readable before publishing.") + wait = oras_mod.OrasWaitForSourcesWorkflow(oras_bin=oras_bin, sources=all_sources).run(dry_run=dry_run) + if not wait.success: + log.error(f"Source digests not available: {wait.error}") + return [ + ToolCallResult( + exit_code=1, + tool_name="imagetools", + target=target, + stdout="", + stderr=wait.error or "source digests not available", + artifacts={"error": wait.error}, + ) + for target in targets + ] + if wait.ready: + log.info(f"All {len(wait.ready)} source digest(s) readable after {wait.waited_seconds:.0f}s.") + shell_jobs: list[ShellJob] = [] for target in targets: workflow = PublishWorkflow( diff --git a/posit-bakery/posit_bakery/plugins/builtin/imagetools/oras.py b/posit-bakery/posit_bakery/plugins/builtin/imagetools/oras.py index 9f0878b25..bb7f19aac 100644 --- a/posit-bakery/posit_bakery/plugins/builtin/imagetools/oras.py +++ b/posit-bakery/posit_bakery/plugins/builtin/imagetools/oras.py @@ -2,9 +2,10 @@ import itertools import logging import subprocess +import time from abc import ABC, abstractmethod from pathlib import Path -from typing import TYPE_CHECKING, Annotated, Self +from typing import TYPE_CHECKING, Annotated, Callable, Self from pydantic import BaseModel, ConfigDict, Field, model_validator @@ -347,6 +348,105 @@ def run( return OrasIndexVerifyResult(success=False, verified=verified, error=str(e)) +class OrasSourcesReadyResult(BaseModel): + """Result of a pre-flight source-digest availability wait.""" + + success: Annotated[bool, Field(description="Whether every source digest became readable before the timeout.")] + ready: Annotated[ + list[str], Field(default_factory=list, description="Source refs confirmed readable, in resolution order.") + ] + missing: Annotated[ + list[str], Field(default_factory=list, description="Source refs still unreadable when the wait gave up.") + ] + waited_seconds: Annotated[float, Field(default=0.0, description="Wall-clock seconds spent waiting.")] + error: Annotated[str | None, Field(default=None, description="Diagnostic message on timeout.")] + + +class OrasWaitForSourcesWorkflow(BaseModel): + """Poll source digests until they are all readable from the registry. + + Per-platform manifests are pushed *by digest* from separate build runners, and registries + with read-after-write (eventual consistency) behaviour — notably GHCR — may briefly 404 + those digests when the publish runner first asks for them. This pre-flight turns "hope it + has propagated" into condition-based waiting: each source is probed with ``oras manifest + fetch --descriptor`` (a lightweight existence check) and removed from the pending set once + it resolves. The wait succeeds as soon as every source resolves, and fails (logging exactly + which digests lagged) once ``timeout`` elapses. + """ + + model_config = ConfigDict(arbitrary_types_allowed=True) + + oras_bin: Annotated[str, Field(description="Path to the oras binary.")] + sources: Annotated[list[str], Field(description="Source refs (registry refs, typically by-digest) to await.")] + timeout: Annotated[float, Field(default=600.0, description="Maximum seconds to wait for all sources (10 min).")] + poll_interval: Annotated[float, Field(default=5.0, description="Seconds between polling sweeps.")] + plain_http: Annotated[bool, Field(default=False)] + + def _is_available(self, ref: str) -> bool: + try: + OrasManifestFetch( + oras_bin=self.oras_bin, + reference=ref, + descriptor=True, + plain_http=self.plain_http, + ).run(dry_run=False) + return True + except BakeryToolRuntimeError: + return False + + def run( + self, + dry_run: bool = False, + *, + sleep: Callable[[float], None] = time.sleep, + now: Callable[[], float] = time.monotonic, + ) -> OrasSourcesReadyResult: + """Probe each source until all resolve or ``timeout`` elapses. + + :param dry_run: When True, report success without contacting the registry (nothing has + been pushed to wait on). + :param sleep: Sleep function, injectable for testing. + :param now: Monotonic clock, injectable for testing. + """ + unique_sources = list(dict.fromkeys(self.sources)) + if dry_run or not unique_sources: + return OrasSourcesReadyResult(success=True, ready=unique_sources) + + start = now() + ready: list[str] = [] + pending = list(unique_sources) + while True: + still_pending: list[str] = [] + for ref in pending: + if self._is_available(ref): + ready.append(ref) + else: + still_pending.append(ref) + pending = still_pending + + if not pending: + return OrasSourcesReadyResult(success=True, ready=ready, waited_seconds=now() - start) + + elapsed = now() - start + if elapsed >= self.timeout: + return OrasSourcesReadyResult( + success=False, + ready=ready, + missing=pending, + waited_seconds=elapsed, + error=( + f"{len(pending)} source digest(s) still unreadable after {elapsed:.0f}s " + f"(timeout {self.timeout:.0f}s): {', '.join(pending)}" + ), + ) + + log.info( + f"Waiting on {len(pending)} source digest(s) to become readable " + f"({elapsed:.0f}s/{self.timeout:.0f}s elapsed); retrying in {self.poll_interval:.0f}s." + ) + sleep(self.poll_interval) + + class OrasMergeWorkflowResult(BaseModel): """Result of an ORAS merge workflow execution.""" diff --git a/posit-bakery/test/cli/test_ci.py b/posit-bakery/test/cli/test_ci.py index 2f3321113..d754b4dfb 100644 --- a/posit-bakery/test/cli/test_ci.py +++ b/posit-bakery/test/cli/test_ci.py @@ -80,7 +80,22 @@ def run(self, dry_run=False, **kwargs): result.verified = self.image_target.tags.as_strings() return result + class MockOrasWaitForSourcesWorkflow: + # Stub the pre-flight source-digest wait so the (non-dry-run) merge scenario does not + # actually poll a real registry for 10 minutes. + def __init__(self, oras_bin, sources, **kwargs): + self.sources = sources + + def run(self, dry_run=False, **kwargs): + from posit_bakery.plugins.builtin.imagetools.oras import OrasSourcesReadyResult + + return OrasSourcesReadyResult(success=True, ready=self.sources) + # Patch the imports inside the publish function + mocker.patch( + "posit_bakery.plugins.builtin.imagetools.oras.OrasWaitForSourcesWorkflow", + MockOrasWaitForSourcesWorkflow, + ) mocker.patch( "posit_bakery.plugins.builtin.imagetools.oras.OrasIndexCreateWorkflow", MockOrasIndexCreateWorkflow, diff --git a/posit-bakery/test/plugins/builtin/imagetools/test_oras_wait.py b/posit-bakery/test/plugins/builtin/imagetools/test_oras_wait.py new file mode 100644 index 000000000..b149e0b19 --- /dev/null +++ b/posit-bakery/test/plugins/builtin/imagetools/test_oras_wait.py @@ -0,0 +1,149 @@ +"""OrasWaitForSourcesWorkflow: pre-flight poll that waits for per-platform source digests to +become readable before the publish pipeline references them (GHCR read-after-write lag, #591).""" + +from unittest.mock import MagicMock, patch + +import pytest + +from posit_bakery.plugins.builtin.imagetools.oras import OrasWaitForSourcesWorkflow + +pytestmark = [pytest.mark.unit] + +_PROBE = "posit_bakery.plugins.builtin.imagetools.oras.OrasWaitForSourcesWorkflow._is_available" + + +def test_dry_run_returns_success_without_probing(): + wf = OrasWaitForSourcesWorkflow(oras_bin="oras", sources=["a", "b"]) + sleep = MagicMock() + with patch(_PROBE) as probe: + result = wf.run(dry_run=True, sleep=sleep) + assert result.success is True + probe.assert_not_called() + sleep.assert_not_called() + + +def test_empty_sources_returns_success(): + wf = OrasWaitForSourcesWorkflow(oras_bin="oras", sources=[]) + result = wf.run(sleep=MagicMock(), now=lambda: 0.0) + assert result.success is True + assert result.ready == [] + + +def test_all_ready_on_first_sweep_does_not_sleep(): + wf = OrasWaitForSourcesWorkflow(oras_bin="oras", sources=["a", "b"]) + sleep = MagicMock() + with patch(_PROBE, return_value=True): + result = wf.run(sleep=sleep, now=lambda: 0.0) + assert result.success is True + assert result.ready == ["a", "b"] + sleep.assert_not_called() + + +def test_waits_until_all_sources_ready(): + wf = OrasWaitForSourcesWorkflow(oras_bin="oras", sources=["a"], poll_interval=5.0, timeout=600.0) + probes = {"n": 0} + + def probe(self, ref): + probes["n"] += 1 + return probes["n"] >= 3 # readable on the third probe + + times = iter([0.0, 5.0, 10.0, 15.0]) + sleep = MagicMock() + with patch(_PROBE, probe): + result = wf.run(sleep=sleep, now=lambda: next(times)) + assert result.success is True + assert result.ready == ["a"] + assert sleep.call_count == 2 # slept before the 2nd and 3rd sweeps + + +def test_timeout_reports_missing_sources(): + wf = OrasWaitForSourcesWorkflow(oras_bin="oras", sources=["a", "b"], poll_interval=5.0, timeout=10.0) + times = iter([0.0, 5.0, 10.0]) + sleep = MagicMock() + with patch(_PROBE, return_value=False): + result = wf.run(sleep=sleep, now=lambda: next(times)) + assert result.success is False + assert set(result.missing) == {"a", "b"} + assert result.error is not None + assert "unreadable" in result.error + + +def test_dedups_sources(): + wf = OrasWaitForSourcesWorkflow(oras_bin="oras", sources=["a", "a", "b"]) + with patch(_PROBE, return_value=True): + result = wf.run(sleep=MagicMock(), now=lambda: 0.0) + assert result.ready == ["a", "b"] + + +class TestPublishPreflightWait: + """ImageToolsPlugin.execute runs the pre-flight wait when the create phase is in scope.""" + + def _target(self, uid="img-1-0-0"): + from posit_bakery.image.image_target import ImageTarget, StringableList + + t = MagicMock(spec=ImageTarget) + t.uid = uid + t.image_name = "test-image" + t.push_sort_key = (uid,) + t.settings = MagicMock() + t.settings.temp_registry = "ghcr.io/posit-dev" + t.labels = {} + t.get_merge_sources.return_value = ["ghcr.io/posit-dev/test/tmp@sha256:amd64"] + tag = MagicMock() + tag.destination = "ghcr.io/posit-dev/test-image" + tag.suffix = "1.0.0" + tag.__str__ = lambda self: "ghcr.io/posit-dev/test-image:1.0.0" + t.tags = StringableList([tag]) + t.__str__ = lambda self: uid + return t + + def _run(self, tmp_path, wait_result, phases): + from posit_bakery.plugins.builtin.imagetools import ImageToolsPlugin + from posit_bakery.plugins.builtin.imagetools.options import SociOptions + + wait_instance = MagicMock() + wait_instance.run.return_value = wait_result + with ( + patch( + "posit_bakery.plugins.builtin.imagetools.oras.OrasWaitForSourcesWorkflow", + return_value=wait_instance, + ) as wait_cls, + patch( + "posit_bakery.plugins.builtin.imagetools.soci.get_soci_options_for_target", + return_value=SociOptions(enabled=False), + ), + ): + results = ImageToolsPlugin().execute(tmp_path, [self._target()], phases=phases, dry_run=True) + return wait_cls, wait_instance, results + + def test_wait_invoked_with_all_sources_then_publishes(self, tmp_path): + from posit_bakery.plugins.builtin.imagetools.oras import OrasSourcesReadyResult + from posit_bakery.plugins.builtin.imagetools.publish import ALL_PHASES + + wait_cls, wait_instance, results = self._run( + tmp_path, + OrasSourcesReadyResult(success=True, ready=["ghcr.io/posit-dev/test/tmp@sha256:amd64"]), + ALL_PHASES, + ) + _, kwargs = wait_cls.call_args + assert kwargs["sources"] == ["ghcr.io/posit-dev/test/tmp@sha256:amd64"] + wait_instance.run.assert_called_once() + assert [r.exit_code for r in results] == [0] + + def test_wait_timeout_aborts_before_publishing(self, tmp_path): + from posit_bakery.plugins.builtin.imagetools.oras import OrasSourcesReadyResult + from posit_bakery.plugins.builtin.imagetools.publish import ALL_PHASES + + _, _, results = self._run( + tmp_path, + OrasSourcesReadyResult(success=False, missing=["x"], error="2 source digest(s) still unreadable"), + ALL_PHASES, + ) + assert [r.exit_code for r in results] == [1] + assert "unreadable" in results[0].stderr + + def test_soci_only_phase_skips_wait(self, tmp_path): + from posit_bakery.plugins.builtin.imagetools.publish import SOCI_PHASES + + wait_cls, wait_instance, results = self._run(tmp_path, None, SOCI_PHASES) + wait_cls.assert_not_called()