diff --git a/cc-catalog-svc/app/auth_dockerconfigjson.py b/cc-catalog-svc/app/auth_dockerconfigjson.py new file mode 100644 index 00000000000..17dc4b475ed --- /dev/null +++ b/cc-catalog-svc/app/auth_dockerconfigjson.py @@ -0,0 +1,184 @@ +""" +Resolve HTTP Basic credentials from a Docker config.json file. + +Why this exists +--------------- +The catalog talks to OCI registries via httpx (``app/sources/oci.py``) +and to git remotes via ``git`` subprocesses (``app/services/git_mirror.py``). +Neither honors the Docker keychain natively the way ``crane`` does on +the destination side. Operators running on Kubernetes already have a +``kubernetes.io/dockerconfigjson`` Secret for kubelet image pulls, and +they reasonably expect the catalog to reuse that one credential for +its upstream tag-list / git-clone calls instead of maintaining a +parallel Opaque Secret. + +This module provides a small, host-aware lookup against a Docker +config.json file. Both ``SourceAuth`` and ``GitAuth`` accept a new +``dockerconfigjson_env`` field (env var name holding the file path), +and resolve credentials through here at request time. + +Format reference +---------------- +Standard ``$HOME/.docker/config.json`` schema: + + { + "auths": { + "artifactory.example.com": { + "auth": "" + }, + "ghcr.io": { + "username": "u", + "password": "p" + } + } + } + +Both encodings are accepted; ``auth`` wins when both are present +(matches Docker CLI behavior). +""" + +from __future__ import annotations + +import base64 +import json +import logging +import os +from typing import Optional + +logger = logging.getLogger(__name__) + + +def resolve_basic_pair(file_path: str, target_host: str) -> Optional[tuple[str, str]]: + """Return ``(username, password)`` for ``target_host`` or ``None``. + + Resolution: + + 1. Open and JSON-parse ``file_path``. Any I/O / parse error logs a + warning and returns ``None`` so the caller can fall back to + anonymous instead of crashing the poll. + 2. Look up ``auths[target_host]``. **Exact match only** for now; + wildcard / fallback ``*`` entries are not implemented because + K8s ``dockerconfigjson`` Secrets in practice carry one host + per Secret (one Secret per upstream registry). + 3. Within the entry, prefer the base64 ``auth`` field. Fall back + to explicit ``username`` + ``password``. Both encodings ship in + the wild — Docker CLI writes ``auth``, ``docker-credential-*`` + helpers and some operators write the split form. + 4. A malformed entry (``auth`` not base64, or ``auth`` decoded + without a single ``:`` separator) is logged and treated as a + miss so the caller falls back rather than blowing up. + + Caller convention + ----------------- + A ``None`` return is normal and is **not** an error indicator — it + just means "no creds for this host." Callers should warn (once) if + they were configured to use this auth mode and got a miss for a + host they expected to find. + """ + if not file_path: + return None + if not os.path.exists(file_path): + logger.warning( + "dockerconfigjson: configured path %s does not exist; treating as no creds", + file_path, + ) + return None + + try: + with open(file_path) as f: + data = json.load(f) + except (OSError, json.JSONDecodeError) as exc: + logger.warning( + "dockerconfigjson: could not read/parse %s (%s); treating as no creds", + file_path, + exc, + ) + return None + + auths = (data or {}).get("auths") or {} + if not isinstance(auths, dict): + logger.warning( + "dockerconfigjson: %s has non-dict 'auths' key; treating as no creds", + file_path, + ) + return None + + entry = auths.get(target_host) + if not isinstance(entry, dict): + # Normal miss — log at debug so noisy on-purpose configs (one + # source per host) don't spam logs. + logger.debug( + "dockerconfigjson: %s has no entry for host %s", + file_path, + target_host, + ) + return None + + encoded = entry.get("auth") + if isinstance(encoded, str) and encoded: + try: + decoded = base64.b64decode(encoded, validate=True).decode("utf-8") + except (ValueError, UnicodeDecodeError) as exc: + logger.warning( + "dockerconfigjson: %s host %s has malformed 'auth' field (%s); " + "treating as no creds", + file_path, + target_host, + exc, + ) + return None + if ":" not in decoded: + logger.warning( + "dockerconfigjson: %s host %s 'auth' decoded without ':' separator; " + "treating as no creds", + file_path, + target_host, + ) + return None + user, _, pwd = decoded.partition(":") + if user and pwd: + return user, pwd + logger.warning( + "dockerconfigjson: %s host %s 'auth' decoded with empty user/password; " + "treating as no creds", + file_path, + target_host, + ) + return None + + user = entry.get("username") + pwd = entry.get("password") + if isinstance(user, str) and isinstance(pwd, str) and user and pwd: + return user, pwd + + logger.debug( + "dockerconfigjson: %s host %s entry has neither valid 'auth' nor " + "username/password pair; treating as no creds", + file_path, + target_host, + ) + return None + + +def resolve_basic_pair_from_env(env_var_name: str, target_host: str) -> Optional[tuple[str, str]]: + """Convenience wrapper: env var holds the file path. + + Matches the schema convention used elsewhere (``token_env``, + ``user_env``, ``pass_env``, ``docker_config_env`` on the JFrog + destination): the YAML stores the env-var **name**, not the value, + so the catalog config is safe to commit and the deployment + controls where the Secret is projected. + + Returns ``None`` when the env var is unset / empty (treated the + same as "no creds configured"). + """ + if not env_var_name: + return None + file_path = os.environ.get(env_var_name, "") + if not file_path: + logger.debug( + "dockerconfigjson: env var %s is unset/empty; no creds", + env_var_name, + ) + return None + return resolve_basic_pair(file_path, target_host) diff --git a/cc-catalog-svc/app/config.py b/cc-catalog-svc/app/config.py index 000da11ad5f..7b1df2bbcb7 100644 --- a/cc-catalog-svc/app/config.py +++ b/cc-catalog-svc/app/config.py @@ -112,7 +112,18 @@ def _slug_nonempty(cls, v: str) -> str: class SourceAuth(BaseModel): """Auth for an OCI-style source. - Set EITHER ``token_env`` OR ``user_env``+``pass_env`` — not both. + Set EXACTLY ONE OR NONE of: + + * ``token_env`` — Bearer token (JFrog access token, GHCR PAT) + * ``user_env`` + ``pass_env`` — HTTP Basic + * ``dockerconfigjson_env`` — env var holding the path to a Docker + config.json file (handy for reusing + an existing ``kubernetes.io/dockerconfigjson`` + pull-secret verbatim; the catalog looks + up the source's ``image_registry`` host + in the file's ``auths`` map and uses + HTTP Basic) + Unset = anonymous; the OCI source falls back to the public-GHCR bearer-realm dance so this still works against public registries. @@ -135,14 +146,31 @@ class SourceAuth(BaseModel): description="Env var name holding the HTTP Basic password " "(or an access token used as a password).", ) + dockerconfigjson_env: Optional[str] = Field( + None, + description=( + "Env var name holding the path to a Docker config.json file. " + "Catalog looks up the source's image_registry host in the " + "file's `auths` map and uses the entry's credentials as " + "HTTP Basic. Lets operators reuse an existing " + "kubernetes.io/dockerconfigjson pull-secret without " + "maintaining a parallel Opaque Secret." + ), + ) @model_validator(mode="after") def _exactly_one_or_none(self) -> "SourceAuth": token = bool(self.token_env) basic_any = bool(self.user_env) or bool(self.pass_env) basic_both = bool(self.user_env) and bool(self.pass_env) - if token and basic_any: - raise ValueError("source.auth: set EITHER token_env OR user_env+pass_env, not both") + docker = bool(self.dockerconfigjson_env) + + modes = int(token) + int(basic_any) + int(docker) + if modes > 1: + raise ValueError( + "source.auth: set AT MOST ONE of token_env, " + "user_env+pass_env, or dockerconfigjson_env" + ) if basic_any and not basic_both: raise ValueError("source.auth: user_env and pass_env must be set together") return self @@ -285,7 +313,22 @@ class SchedulerConfig(BaseModel): class GitAuth(BaseModel): - """Auth for fetching upstream git repos (GitHub PAT, etc.).""" + """Auth for fetching upstream git repos (GitHub PAT, etc.). + + Set EXACTLY ONE OR NONE of: + + * ``token_env`` — Bearer/PAT + * ``user_env`` + ``pass_env`` — HTTP Basic + * ``dockerconfigjson_env`` — env var holding the path to a Docker + config.json file; catalog looks up + each upstream ``git_url`` host in the + file's ``auths`` map and uses HTTP + Basic. Useful when the same auth + Secret covers both an OCI registry + and a colocated git host + (e.g. Artifactory's Git LFS / VCS + repos behind the same SSO). + """ token_env: Optional[str] = Field( None, @@ -299,14 +342,28 @@ class GitAuth(BaseModel): None, description="Env var holding HTTP Basic password or token-as-password.", ) + dockerconfigjson_env: Optional[str] = Field( + None, + description=( + "Env var name holding the path to a Docker config.json file. " + "Catalog looks up each upstream git_url host in the file's " + "`auths` map and uses the entry's credentials as HTTP Basic." + ), + ) @model_validator(mode="after") def _exactly_one_or_none(self) -> "GitAuth": token = bool(self.token_env) basic_any = bool(self.user_env) or bool(self.pass_env) basic_both = bool(self.user_env) and bool(self.pass_env) - if token and basic_any: - raise ValueError("git.auth: set EITHER token_env OR user_env+pass_env, not both") + docker = bool(self.dockerconfigjson_env) + + modes = int(token) + int(basic_any) + int(docker) + if modes > 1: + raise ValueError( + "git.auth: set AT MOST ONE of token_env, " + "user_env+pass_env, or dockerconfigjson_env" + ) if basic_any and not basic_both: raise ValueError("git.auth: user_env and pass_env must be set together") return self diff --git a/cc-catalog-svc/app/services/git_mirror.py b/cc-catalog-svc/app/services/git_mirror.py index 378ed81158d..2fccbde463d 100644 --- a/cc-catalog-svc/app/services/git_mirror.py +++ b/cc-catalog-svc/app/services/git_mirror.py @@ -31,7 +31,9 @@ from typing import Optional from sqlalchemy import select +from urllib.parse import urlparse +from app.auth_dockerconfigjson import resolve_basic_pair_from_env from app.config import AppConfig, GitAuth, GitServiceConfig, get_config from app.db import session_scope from app.git_http import ( @@ -293,6 +295,7 @@ def sync_one_repo(slug: str, upstream_url: str, git_cfg: GitServiceConfig) -> st ["clone", "--mirror", upstream_url, dest], git_cfg.auth, timeout=git_cfg.clone_timeout_seconds, + upstream_url=upstream_url, ) else: # If the configured upstream_url changed since the last clone @@ -303,8 +306,11 @@ def sync_one_repo(slug: str, upstream_url: str, git_cfg: GitServiceConfig) -> st ["-C", dest, "remote", "update", "--prune"], git_cfg.auth, timeout=git_cfg.fetch_timeout_seconds, + upstream_url=upstream_url, ) + # rev-parse is a local op; no upstream_url needed (no network call, + # so auth is moot and the host lookup would be a no-op). head = _run_git( ["--git-dir", dest, "rev-parse", "HEAD"], git_cfg.auth, @@ -335,9 +341,7 @@ def populate_baked_head_commits(cfg: Optional[AppConfig] = None) -> int: touched = 0 with session_scope() as db: - rows_by_slug = { - r.slug: r for r in db.execute(select(CodeCollection)).scalars().all() - } + rows_by_slug = {r.slug: r for r in db.execute(select(CodeCollection)).scalars().all()} for slug, _ in repos_to_sync(app_cfg): if slug not in on_disk: continue @@ -360,7 +364,16 @@ def populate_baked_head_commits(cfg: Optional[AppConfig] = None) -> int: return touched -def _git_auth_args(auth: GitAuth) -> list[str]: +def _git_auth_args(auth: GitAuth, upstream_url: Optional[str] = None) -> list[str]: + """Render ``-c http.extraHeader=...`` args for ``git`` to use. + + ``upstream_url`` is only consulted when ``auth.dockerconfigjson_env`` + is set — the catalog needs the host to look up in the file's + ``auths`` map. For ``token_env`` / ``user_env``+``pass_env`` the URL + is irrelevant (env-var values apply to every git call equally). + Callers that don't have a URL (e.g. local ``git rev-parse``) can + omit it; those operations don't talk to a remote so auth is moot. + """ args: list[str] = [] token = os.environ.get(auth.token_env, "") if auth.token_env else "" if token: @@ -377,6 +390,37 @@ def _git_auth_args(auth: GitAuth) -> list[str]: password = os.environ.get(auth.pass_env, "") if auth.pass_env else "" if user and password: args.extend(["-c", f"http.extraHeader=Authorization: Basic {_basic_auth(user, password)}"]) + return args + + if auth.dockerconfigjson_env and upstream_url: + try: + host = urlparse(upstream_url).hostname + except ValueError: + host = None + if host: + pair = resolve_basic_pair_from_env(auth.dockerconfigjson_env, host) + if pair is not None: + dc_user, dc_pwd = pair + args.extend( + [ + "-c", + f"http.extraHeader=Authorization: Basic {_basic_auth(dc_user, dc_pwd)}", + ] + ) + return args + logger.warning( + "git mirror: dockerconfigjson_env=%s yielded no creds for host %s " + "(upstream_url=%s); falling back to anonymous", + auth.dockerconfigjson_env, + host, + upstream_url, + ) + else: + logger.warning( + "git mirror: dockerconfigjson_env set but upstream_url=%r has no parseable host; " + "falling back to anonymous", + upstream_url, + ) return args @@ -392,8 +436,9 @@ def _run_git( auth: GitAuth, *, timeout: int, + upstream_url: Optional[str] = None, ) -> subprocess.CompletedProcess[str]: - cmd = ["git", *_git_auth_args(auth), *args] + cmd = ["git", *_git_auth_args(auth, upstream_url=upstream_url), *args] # Never log the full command — auth args may embed secrets indirectly. logger.debug("running git %s", " ".join(args)) proc = subprocess.run( diff --git a/cc-catalog-svc/app/sources/oci.py b/cc-catalog-svc/app/sources/oci.py index 2cefe01010b..abac1ae00a0 100644 --- a/cc-catalog-svc/app/sources/oci.py +++ b/cc-catalog-svc/app/sources/oci.py @@ -35,6 +35,7 @@ import httpx +from app.auth_dockerconfigjson import resolve_basic_pair_from_env from app.sources.base import DiscoveredImageRef, ImageSource logger = logging.getLogger(__name__) @@ -84,9 +85,7 @@ def discover_refs(self, cc: dict) -> list[DiscoveredImageRef]: # Single httpx.Client so the bearer dance, manifest GETs, and # config-blob fetches reuse the same TCP connection / token. with httpx.Client(timeout=self.timeout, follow_redirects=True) as client: - tags = self._list_tags( - client, host, repo, auth_header=auth_header, auth_mode=auth_mode - ) + tags = self._list_tags(client, host, repo, auth_header=auth_header, auth_mode=auth_mode) discovered: list[DiscoveredImageRef] = [] for tag in tags: @@ -147,14 +146,15 @@ def _split_registry_url(url: str) -> tuple[str, str]: host, _, repo = url.partition("/") return host, repo - @staticmethod - def _resolve_auth_header(cc: dict) -> tuple[Optional[str], str]: + @classmethod + def _resolve_auth_header(cls, cc: dict) -> tuple[Optional[str], str]: """Build the Authorization header for outbound requests. Returns ``(header_value, mode)`` where ``mode`` is one of: * ``"bearer"`` — explicit Bearer token in token_env * ``"basic"`` — explicit Basic from user_env + pass_env + OR from a dockerconfigjson lookup * ``"anonymous"`` — no creds; caller will fall back to the bearer-realm dance for public-GHCR-style reads @@ -163,6 +163,17 @@ def _resolve_auth_header(cc: dict) -> tuple[Optional[str], str]: separately so the caller knows whether a subsequent 401 is a real auth failure (explicit) or a signal to start the dance (anonymous). + + dockerconfigjson note + --------------------- + When ``dockerconfigjson_env`` is set, the catalog opens the + Docker config.json file pointed at by that env var and looks + up the source's ``image_registry`` host in the file's ``auths`` + map. A hit returns Basic (treated identically to explicit + ``user_env``+``pass_env`` from this point on — including the + bearer-realm dance behavior). A miss falls back to anonymous + — so a Secret that doesn't cover the target host degrades + gracefully against public registries instead of hard-failing. """ auth = cc.get("_source_auth") or {} slug = cc.get("slug") @@ -193,6 +204,40 @@ def _resolve_auth_header(cc: dict) -> tuple[Optional[str], str]: pass_env, ) + dockerconfigjson_env = auth.get("dockerconfigjson_env") + if dockerconfigjson_env: + registry_url = cc.get("image_registry") + if not registry_url: + logger.warning( + "oci source: %s requested dockerconfigjson_env=%s but cc has no " + "image_registry to look up; falling back to anonymous", + slug, + dockerconfigjson_env, + ) + return None, "anonymous" + try: + host, _ = cls._split_registry_url(registry_url) + except ValueError: + logger.warning( + "oci source: %s image_registry=%r could not be parsed; " + "dockerconfigjson lookup skipped, falling back to anonymous", + slug, + registry_url, + ) + return None, "anonymous" + pair = resolve_basic_pair_from_env(dockerconfigjson_env, host) + if pair is not None: + user, pwd = pair + creds = base64.b64encode(f"{user}:{pwd}".encode("utf-8")).decode("ascii") + return f"Basic {creds}", "basic" + logger.warning( + "oci source: %s dockerconfigjson_env=%s yielded no creds for host %s; " + "falling back to anonymous", + slug, + dockerconfigjson_env, + host, + ) + return None, "anonymous" def _list_tags( @@ -327,20 +372,13 @@ def _enrich_built_at_for_tiebreaks( by_ref: dict[str, list[DiscoveredImageRef]] = {} for r in refs: by_ref.setdefault(r.ref, []).append(r) - ambiguous_tags = { - r.image_tag - for group in by_ref.values() - if len(group) > 1 - for r in group - } + ambiguous_tags = {r.image_tag for group in by_ref.values() if len(group) > 1 for r in group} if not ambiguous_tags: return refs built_at_by_tag: dict[str, datetime] = {} for tag in ambiguous_tags: - built_at = self._fetch_built_at_for_tag( - client, host, repo, tag, auth_header, auth_mode - ) + built_at = self._fetch_built_at_for_tag(client, host, repo, tag, auth_header, auth_mode) if built_at is not None: built_at_by_tag[tag] = built_at @@ -348,9 +386,11 @@ def _enrich_built_at_for_tiebreaks( return refs return [ - dataclasses.replace(r, built_at=built_at_by_tag[r.image_tag]) - if r.image_tag in built_at_by_tag - else r + ( + dataclasses.replace(r, built_at=built_at_by_tag[r.image_tag]) + if r.image_tag in built_at_by_tag + else r + ) for r in refs ] diff --git a/cc-catalog-svc/config-examples/config.jfrog-remote-dockerconfigjson.yaml b/cc-catalog-svc/config-examples/config.jfrog-remote-dockerconfigjson.yaml new file mode 100644 index 00000000000..a8668b53cac --- /dev/null +++ b/cc-catalog-svc/config-examples/config.jfrog-remote-dockerconfigjson.yaml @@ -0,0 +1,123 @@ +# cc-catalog-svc — JFrog Remote (pull-only) reusing a dockerconfigjson Secret. +# +# Same shape as `config.jfrog-remote.yaml` (sources behind a JFrog Remote +# Docker repo, no destinations), but auth is resolved by looking up the +# source host inside a Docker `config.json` file rather than reading +# separate `JFROG_USER` / `JFROG_PASS` env vars. +# +# Why this exists +# --------------- +# Most clusters already have a `kubernetes.io/dockerconfigjson` Secret +# for kubelet image pulls. Maintaining a second Opaque Secret with the +# same user+password just to feed cc-catalog-svc is redundant and an +# easy place for the two to drift. When `dockerconfigjson_env` is set, +# the catalog opens the Docker config.json pointed at by that env var, +# looks up the source's `image_registry` host in the file's `auths` +# map, and uses HTTP Basic on the resulting credentials — identical to +# `user_env`+`pass_env` from that point on, including the bearer-realm +# dance against JFrog/Artifactory. +# +# Deployment expectations +# ----------------------- +# 1. Mount your existing `kubernetes.io/dockerconfigjson` Secret as +# a file in the cc-catalog-svc pod, e.g. at +# `/var/run/secrets/dockerconfigjson/.dockerconfigjson`. +# 2. Set the env var named below (e.g. `JCR_DOCKERCONFIGJSON`) to +# that path. The catalog reads the env var to find the file. +# 3. The Secret must contain an `auths` entry whose key exactly +# matches the source's image_registry host (e.g. +# `artifactory.example.com`). A miss soft-fails to anonymous — +# use this only when you've confirmed the entry is present. +# +# This file is safe to commit: nothing in it is secret. Only env var +# NAMES and paths appear; the actual config.json is mounted at runtime. + +storage: + # Leave commented to use CC_CATALOG_DB_URL env var (recommended). + # url: postgresql+psycopg2://catalog:secret@catalog-db:5432/catalog + +catalog_api: + prefix: /api/v1/catalog + +scheduler: + catalog_poll_minutes: 5 + +# ------------------------------------------------------------------------ +# Source — JFrog Remote, queried via OCI Distribution v2. Auth is the +# only difference vs config.jfrog-remote.yaml: +# +# auth: +# dockerconfigjson_env: JCR_DOCKERCONFIGJSON +# +# instead of +# +# auth: +# user_env: JFROG_USER +# pass_env: JFROG_PASS +# ------------------------------------------------------------------------ +sources: + - name: jfrog-remote-runwhen + type: oci + auth: + # Env var holding the PATH to a Docker config.json file. The + # deployment mounts the K8s pull-secret here: + # + # volumeMounts: + # - name: docker-config + # mountPath: /var/run/secrets/dockerconfigjson + # readOnly: true + # volumes: + # - name: docker-config + # secret: + # secretName: jcr-pull-secret # type: kubernetes.io/dockerconfigjson + # + # env: + # - name: JCR_DOCKERCONFIGJSON + # value: /var/run/secrets/dockerconfigjson/.dockerconfigjson + dockerconfigjson_env: JCR_DOCKERCONFIGJSON + + codecollections: + - slug: rw-cli-codecollection + name: RunWhen CLI CodeCollection + git_url: https://github.com/runwhen-contrib/rw-cli-codecollection + image_registry: artifactory.example.com/docker-ghcr/runwhen-contrib/rw-cli-codecollection + + - slug: rw-public-codecollection + name: RunWhen Public CodeCollection + git_url: https://github.com/runwhen-contrib/rw-public-codecollection + image_registry: artifactory.example.com/docker-ghcr/runwhen-contrib/rw-public-codecollection + + - slug: rw-generic-codecollection + name: RunWhen Generic CodeCollection + git_url: https://github.com/runwhen-contrib/rw-generic-codecollection + image_registry: artifactory.example.com/docker-ghcr/runwhen-contrib/rw-generic-codecollection + + - slug: rw-workspace-utils + name: RunWhen Workspace Utilities CodeCollection + git_url: https://github.com/runwhen-contrib/rw-workspace-utils + image_registry: artifactory.example.com/docker-ghcr/runwhen-contrib/rw-workspace-utils + + - slug: aws-c7n-codecollection + name: AWS CloudCustodian CodeCollection + git_url: https://github.com/runwhen-contrib/aws-c7n-codecollection + image_registry: artifactory.example.com/docker-ghcr/runwhen-contrib/aws-c7n-codecollection + + - slug: azure-c7n-codecollection + name: Azure CloudCustodian CodeCollection + git_url: https://github.com/runwhen-contrib/azure-c7n-codecollection + image_registry: artifactory.example.com/docker-ghcr/runwhen-contrib/azure-c7n-codecollection + +# ------------------------------------------------------------------------ +# Optional: same dockerconfigjson can feed the git mirror too, when +# `git.enabled: true` AND `runtime_sync: true` (e.g. an internal Git +# host colocated with the JFrog Docker registry behind the same SSO). +# For air-gap with baked mirrors, leave runtime_sync=false and skip +# this block entirely. +# ------------------------------------------------------------------------ +# git: +# enabled: true +# data_dir: /opt/cc-catalog/git +# mount_path: /git +# runtime_sync: true +# auth: +# dockerconfigjson_env: JCR_DOCKERCONFIGJSON diff --git a/cc-catalog-svc/tests/test_auth.py b/cc-catalog-svc/tests/test_auth.py index 49c2b84a589..c964c62dba3 100644 --- a/cc-catalog-svc/tests/test_auth.py +++ b/cc-catalog-svc/tests/test_auth.py @@ -48,7 +48,9 @@ def test_basic_pair_ok(self): def test_token_plus_basic_rejected(self): with pytest.raises(pydantic.ValidationError) as exc: SourceAuth(token_env="T", user_env="U", pass_env="P") - assert "EITHER token_env OR user_env+pass_env" in str(exc.value) + # Validator now covers three modes (token / basic / dockerconfigjson); + # message updated to "AT MOST ONE" to match. + assert "AT MOST ONE" in str(exc.value) def test_token_plus_half_basic_rejected(self): with pytest.raises(pydantic.ValidationError): @@ -445,3 +447,396 @@ def test_http_auth_returns_none_when_nothing_set(self): d = JFrogDestination() assert d._http_auth({"name": "x", "auth": {}}) is None assert d._http_auth({"name": "x"}) is None + + +# --------------------------------------------------------------------------- +# Shared dockerconfigjson lookup helper +# --------------------------------------------------------------------------- +class TestDockerconfigjsonResolveBasicPair: + """Direct tests of app.auth_dockerconfigjson.resolve_basic_pair. + + The OCI source + git mirror service both funnel through this helper, + so per-edge-case coverage lives here rather than being duplicated in + each call site's test class. + """ + + def _write_config(self, tmp_path, payload): + from json import dumps + + p = tmp_path / ".dockerconfigjson" + p.write_text(dumps(payload)) + return str(p) + + def test_base64_auth_field_wins(self, tmp_path): + from app.auth_dockerconfigjson import resolve_basic_pair + + encoded = base64.b64encode(b"alice:s3cr3t!").decode() + path = self._write_config( + tmp_path, + { + "auths": { + "artifactory.example.com": {"auth": encoded}, + } + }, + ) + assert resolve_basic_pair(path, "artifactory.example.com") == ("alice", "s3cr3t!") + + def test_explicit_username_password_fallback(self, tmp_path): + from app.auth_dockerconfigjson import resolve_basic_pair + + path = self._write_config( + tmp_path, + { + "auths": { + "ghcr.io": {"username": "u", "password": "p"}, + } + }, + ) + assert resolve_basic_pair(path, "ghcr.io") == ("u", "p") + + def test_auth_field_preferred_over_username_password(self, tmp_path): + """Docker CLI writes ``auth``; if both encodings appear, base64 + is canonical. Match that behavior.""" + from app.auth_dockerconfigjson import resolve_basic_pair + + encoded = base64.b64encode(b"from-auth:pwd1").decode() + path = self._write_config( + tmp_path, + { + "auths": { + "example.com": { + "auth": encoded, + "username": "from-fields", + "password": "pwd2", + }, + } + }, + ) + assert resolve_basic_pair(path, "example.com") == ("from-auth", "pwd1") + + def test_missing_host_returns_none(self, tmp_path): + from app.auth_dockerconfigjson import resolve_basic_pair + + path = self._write_config( + tmp_path, + {"auths": {"artifactory.example.com": {"username": "u", "password": "p"}}}, + ) + assert resolve_basic_pair(path, "ghcr.io") is None + + def test_missing_file_returns_none(self, tmp_path): + from app.auth_dockerconfigjson import resolve_basic_pair + + assert resolve_basic_pair(str(tmp_path / "nope.json"), "x.com") is None + + def test_empty_path_returns_none(self): + from app.auth_dockerconfigjson import resolve_basic_pair + + assert resolve_basic_pair("", "x.com") is None + + def test_malformed_json_returns_none(self, tmp_path): + from app.auth_dockerconfigjson import resolve_basic_pair + + p = tmp_path / ".dockerconfigjson" + p.write_text("not-valid-json{") + assert resolve_basic_pair(str(p), "x.com") is None + + def test_malformed_base64_auth_returns_none(self, tmp_path): + from app.auth_dockerconfigjson import resolve_basic_pair + + path = self._write_config( + tmp_path, + {"auths": {"x.com": {"auth": "***-not-base64-***"}}}, + ) + assert resolve_basic_pair(path, "x.com") is None + + def test_auth_without_colon_returns_none(self, tmp_path): + """``auth`` must decode to ``user:password``; otherwise it's not + a Basic credential and we shouldn't pretend it is.""" + from app.auth_dockerconfigjson import resolve_basic_pair + + encoded = base64.b64encode(b"no-colon-here").decode() + path = self._write_config(tmp_path, {"auths": {"x.com": {"auth": encoded}}}) + assert resolve_basic_pair(path, "x.com") is None + + def test_empty_user_or_pwd_returns_none(self, tmp_path): + from app.auth_dockerconfigjson import resolve_basic_pair + + encoded_empty_user = base64.b64encode(b":pwd").decode() + path = self._write_config(tmp_path, {"auths": {"x.com": {"auth": encoded_empty_user}}}) + assert resolve_basic_pair(path, "x.com") is None + + def test_resolve_from_env_reads_path_from_env_var(self, tmp_path, monkeypatch): + from app.auth_dockerconfigjson import resolve_basic_pair_from_env + + path = self._write_config( + tmp_path, {"auths": {"x.com": {"username": "u", "password": "p"}}} + ) + monkeypatch.setenv("MY_DC_PATH", path) + assert resolve_basic_pair_from_env("MY_DC_PATH", "x.com") == ("u", "p") + + def test_resolve_from_env_unset_returns_none(self, monkeypatch): + from app.auth_dockerconfigjson import resolve_basic_pair_from_env + + monkeypatch.delenv("MISSING_DC", raising=False) + assert resolve_basic_pair_from_env("MISSING_DC", "x.com") is None + + def test_resolve_from_env_empty_name_returns_none(self): + from app.auth_dockerconfigjson import resolve_basic_pair_from_env + + assert resolve_basic_pair_from_env("", "x.com") is None + + +# --------------------------------------------------------------------------- +# SourceAuth + GitAuth dockerconfigjson_env validators +# --------------------------------------------------------------------------- +class TestDockerconfigjsonValidators: + def test_source_auth_dockerconfigjson_alone_ok(self): + a = SourceAuth(dockerconfigjson_env="JCR_DOCKERCONFIGJSON") + assert a.dockerconfigjson_env == "JCR_DOCKERCONFIGJSON" + + def test_source_auth_dockerconfigjson_plus_token_rejected(self): + with pytest.raises(pydantic.ValidationError) as exc: + SourceAuth(token_env="T", dockerconfigjson_env="DC") + assert "AT MOST ONE" in str(exc.value) + + def test_source_auth_dockerconfigjson_plus_basic_rejected(self): + with pytest.raises(pydantic.ValidationError) as exc: + SourceAuth(user_env="U", pass_env="P", dockerconfigjson_env="DC") + assert "AT MOST ONE" in str(exc.value) + + def test_git_auth_dockerconfigjson_alone_ok(self): + from app.config import GitAuth + + a = GitAuth(dockerconfigjson_env="GIT_DC") + assert a.dockerconfigjson_env == "GIT_DC" + + def test_git_auth_dockerconfigjson_plus_token_rejected(self): + from app.config import GitAuth + + with pytest.raises(pydantic.ValidationError): + GitAuth(token_env="T", dockerconfigjson_env="DC") + + +# --------------------------------------------------------------------------- +# OCISource._resolve_auth_header — dockerconfigjson mode +# --------------------------------------------------------------------------- +class TestOCISourceDockerconfigjsonAuth: + """OCI-source-level integration of the lookup helper. + + Coverage notes: + + - Happy path: env var points at a file, host present, returns Basic. + - Mode is "basic" — the bearer-realm dance behavior is shared with + the explicit user_env+pass_env code path. Once resolved, the + catalog can't tell the two apart, which is intentional. + - Soft-fallback on every "can't resolve" case (missing image_registry, + malformed image_registry, env unset, file missing, host not in + file). The catalog should degrade to anonymous so a partial config + against public sources still works, not hard-fail the whole poll. + """ + + def _write_config(self, tmp_path, payload): + from json import dumps + + p = tmp_path / ".dockerconfigjson" + p.write_text(dumps(payload)) + return str(p) + + def test_resolves_basic_when_host_present(self, tmp_path, monkeypatch): + encoded = base64.b64encode(b"alice:s3cr3t!").decode() + path = self._write_config( + tmp_path, + {"auths": {"artifactory.example.com": {"auth": encoded}}}, + ) + monkeypatch.setenv("JCR_DC", path) + header, mode = OCISource._resolve_auth_header( + { + "slug": "x", + "image_registry": "artifactory.example.com/docker-ghcr/rw/foo", + "_source_auth": {"dockerconfigjson_env": "JCR_DC"}, + } + ) + assert mode == "basic" + assert header == "Basic " + encoded + + def test_host_miss_falls_back_to_anonymous(self, tmp_path, monkeypatch): + path = self._write_config( + tmp_path, + {"auths": {"some-other-host.example.com": {"username": "u", "password": "p"}}}, + ) + monkeypatch.setenv("JCR_DC", path) + header, mode = OCISource._resolve_auth_header( + { + "slug": "x", + "image_registry": "artifactory.example.com/docker-ghcr/rw/foo", + "_source_auth": {"dockerconfigjson_env": "JCR_DC"}, + } + ) + assert mode == "anonymous" + assert header is None + + def test_env_var_unset_falls_back_to_anonymous(self, monkeypatch): + monkeypatch.delenv("JCR_DC", raising=False) + header, mode = OCISource._resolve_auth_header( + { + "slug": "x", + "image_registry": "artifactory.example.com/docker-ghcr/rw/foo", + "_source_auth": {"dockerconfigjson_env": "JCR_DC"}, + } + ) + assert mode == "anonymous" + + def test_missing_image_registry_falls_back_to_anonymous(self, tmp_path, monkeypatch): + path = self._write_config( + tmp_path, + {"auths": {"x.com": {"username": "u", "password": "p"}}}, + ) + monkeypatch.setenv("JCR_DC", path) + header, mode = OCISource._resolve_auth_header( + { + "slug": "x", + # No image_registry — we can't derive a host to look up. + "_source_auth": {"dockerconfigjson_env": "JCR_DC"}, + } + ) + assert mode == "anonymous" + + def test_malformed_image_registry_falls_back_to_anonymous(self, tmp_path, monkeypatch): + path = self._write_config( + tmp_path, + {"auths": {"x.com": {"username": "u", "password": "p"}}}, + ) + monkeypatch.setenv("JCR_DC", path) + header, mode = OCISource._resolve_auth_header( + { + "slug": "x", + "image_registry": "no-slash-host-only", # _split_registry_url raises + "_source_auth": {"dockerconfigjson_env": "JCR_DC"}, + } + ) + assert mode == "anonymous" + + @respx.mock + def test_end_to_end_dockerconfigjson_sends_basic_header(self, tmp_path, monkeypatch): + """Full discover_refs path uses the resolved Basic header on the + first /tags/list request (same as explicit user_env+pass_env). + """ + encoded = base64.b64encode(b"alice:s3cr3t!").decode() + path = self._write_config( + tmp_path, + {"auths": {"artifactory.example.com": {"auth": encoded}}}, + ) + monkeypatch.setenv("JCR_DC", path) + src = OCISource() + cc = { + "slug": "rw-cli-codecollection", + "image_registry": ( + "artifactory.example.com/docker-ghcr/runwhen-contrib/rw-cli-codecollection" + ), + "_source_auth": {"dockerconfigjson_env": "JCR_DC"}, + } + route = respx.get( + "https://artifactory.example.com/v2/docker-ghcr/runwhen-contrib/" + "rw-cli-codecollection/tags/list" + ).mock(return_value=httpx.Response(200, json={"tags": ["main-c1a2b3d-e4f5a6b"]})) + + refs = src.discover_refs(cc) + + assert len(refs) == 1 + assert route.call_count == 1 + assert route.calls[0].request.headers["authorization"] == "Basic " + encoded + + +# --------------------------------------------------------------------------- +# git_mirror._git_auth_args — dockerconfigjson mode +# --------------------------------------------------------------------------- +class TestGitAuthArgsDockerconfigjson: + """``_git_auth_args`` needs an upstream_url to derive the host for + the dockerconfigjson lookup. The other auth modes (token_env, + user_env+pass_env) ignore upstream_url — covered here for regression + safety. + """ + + def _write_config(self, tmp_path, payload): + from json import dumps + + p = tmp_path / ".dockerconfigjson" + p.write_text(dumps(payload)) + return str(p) + + def test_resolves_basic_for_known_host(self, tmp_path, monkeypatch): + from app.config import GitAuth + from app.services.git_mirror import _git_auth_args + + encoded = base64.b64encode(b"alice:s3cr3t!").decode() + path = self._write_config(tmp_path, {"auths": {"git.example.com": {"auth": encoded}}}) + monkeypatch.setenv("GIT_DC", path) + args = _git_auth_args( + GitAuth(dockerconfigjson_env="GIT_DC"), + upstream_url="https://git.example.com/runwhen/x.git", + ) + assert args == [ + "-c", + f"http.extraHeader=Authorization: Basic {encoded}", + ] + + def test_unknown_host_falls_back_to_no_auth(self, tmp_path, monkeypatch): + from app.config import GitAuth + from app.services.git_mirror import _git_auth_args + + path = self._write_config( + tmp_path, {"auths": {"git.example.com": {"username": "u", "password": "p"}}} + ) + monkeypatch.setenv("GIT_DC", path) + args = _git_auth_args( + GitAuth(dockerconfigjson_env="GIT_DC"), + upstream_url="https://github.com/runwhen/x.git", + ) + assert args == [] + + def test_missing_upstream_url_falls_back_to_no_auth(self, tmp_path, monkeypatch): + """rev-parse and other local ops call without upstream_url; that + path must not crash and must not block local-only operations.""" + from app.config import GitAuth + from app.services.git_mirror import _git_auth_args + + path = self._write_config( + tmp_path, {"auths": {"git.example.com": {"username": "u", "password": "p"}}} + ) + monkeypatch.setenv("GIT_DC", path) + args = _git_auth_args(GitAuth(dockerconfigjson_env="GIT_DC")) + assert args == [] + + def test_token_env_still_works_when_upstream_url_passed(self, monkeypatch): + """Regression guard: existing token_env path must ignore the new + upstream_url parameter.""" + from app.config import GitAuth + from app.services.git_mirror import _git_auth_args + + monkeypatch.setenv("GH_TOKEN", "ghp_abc") + args = _git_auth_args( + GitAuth(token_env="GH_TOKEN"), + upstream_url="https://github.com/runwhen/x.git", + ) + # token path encodes as Basic x-access-token: + expected = base64.b64encode(b"x-access-token:ghp_abc").decode() + assert args == [ + "-c", + f"http.extraHeader=Authorization: Basic {expected}", + ] + + def test_basic_pair_still_works_when_upstream_url_passed(self, monkeypatch): + from app.config import GitAuth + from app.services.git_mirror import _git_auth_args + + monkeypatch.setenv("GIT_USER", "alice") + monkeypatch.setenv("GIT_PASS", "s3cr3t!") + args = _git_auth_args( + GitAuth(user_env="GIT_USER", pass_env="GIT_PASS"), + upstream_url="https://github.com/runwhen/x.git", + ) + expected = base64.b64encode(b"alice:s3cr3t!").decode() + assert args == [ + "-c", + f"http.extraHeader=Authorization: Basic {expected}", + ] diff --git a/cc-catalog-svc/tests/test_git_mirror.py b/cc-catalog-svc/tests/test_git_mirror.py index 6a0c1928def..743cc139f72 100644 --- a/cc-catalog-svc/tests/test_git_mirror.py +++ b/cc-catalog-svc/tests/test_git_mirror.py @@ -160,9 +160,7 @@ def test_run_git_sync_allow_runtime_sync_overrides_air_gap( "app.services.git_mirror.sync_one_repo", lambda slug, url, git_cfg: "deadbeef", ) - summary = run_git_sync( - git_enabled_config, force=True, allow_runtime_sync=True - ) + summary = run_git_sync(git_enabled_config, force=True, allow_runtime_sync=True) assert summary["repos_updated"] == 1 assert "skipped" not in summary @@ -170,7 +168,7 @@ def test_run_git_sync_allow_runtime_sync_overrides_air_gap( def test_run_git_sync_clone_and_update(git_enabled_config, monkeypatch): calls: list[list[str]] = [] - def fake_run(cmd, auth, *, timeout): + def fake_run(cmd, auth, *, timeout, upstream_url=None): calls.append(cmd) if cmd[:2] == ["clone", "--mirror"]: dest = cmd[3] @@ -186,6 +184,7 @@ def fake_run(cmd, auth, *, timeout): raise AssertionError(f"unexpected git args: {cmd}") monkeypatch.setattr("app.services.git_mirror._run_git", fake_run) + # ``sync_one_repo`` calls subprocess.run directly to peek at the # existing origin URL before deciding to fetch; stub it to a no-op # that reports the current upstream so no set-url is issued. @@ -273,9 +272,7 @@ def fake_run(**kwargs): # --------------------------------------------------------------------------- # Bugbot regressions # --------------------------------------------------------------------------- -def test_sync_one_repo_recovers_from_incomplete_bare_clone( - git_enabled_config, monkeypatch -): +def test_sync_one_repo_recovers_from_incomplete_bare_clone(git_enabled_config, monkeypatch): """A bare dir without HEAD must be removed and re-cloned.""" from app.services import git_mirror @@ -285,7 +282,7 @@ def test_sync_one_repo_recovers_from_incomplete_bare_clone( cloned: list[list[str]] = [] - def fake_run(cmd, auth, *, timeout): + def fake_run(cmd, auth, *, timeout, upstream_url=None): cloned.append(cmd) if cmd[:2] == ["clone", "--mirror"]: clone_dest = cmd[3] @@ -308,9 +305,7 @@ def fake_run(cmd, auth, *, timeout): assert cloned[0][:2] == ["clone", "--mirror"] -def test_sync_one_repo_resets_origin_when_upstream_changes( - git_enabled_config, monkeypatch -): +def test_sync_one_repo_resets_origin_when_upstream_changes(git_enabled_config, monkeypatch): """Existing mirror whose origin no longer matches config gets reset.""" from app.services import git_mirror @@ -325,14 +320,16 @@ def test_sync_one_repo_resets_origin_when_upstream_changes( def fake_run(cmd, check=False, capture_output=False, text=False, timeout=None, env=None): issued.append(cmd) if cmd[:4] == ["git", "--git-dir", dest, "remote"] and cmd[4] == "get-url": - return subprocess.CompletedProcess(cmd, 0, stdout="https://old.example.com/x\n", stderr="") + return subprocess.CompletedProcess( + cmd, 0, stdout="https://old.example.com/x\n", stderr="" + ) return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="") monkeypatch.setattr(git_mirror.subprocess, "run", fake_run) monkeypatch.setattr( git_mirror, "_run_git", - lambda cmd, auth, *, timeout: subprocess.CompletedProcess( + lambda cmd, auth, *, timeout, upstream_url=None: subprocess.CompletedProcess( cmd, 0, stdout="abc1234\n", stderr="" ), ) @@ -392,16 +389,12 @@ def test_populate_baked_head_commits_backfills_from_disk( with open(os.path.join(bare, "HEAD"), "w") as f: f.write("ref: refs/heads/main\n") - monkeypatch.setattr( - git_mirror, "_head_commit_from_disk", lambda path: "feedface" - ) + monkeypatch.setattr(git_mirror, "_head_commit_from_disk", lambda path: "feedface") touched = git_mirror.populate_baked_head_commits(git_enabled_config) assert touched == 1 statuses = git_mirror.list_repo_status(git_enabled_config) - assert any( - s.slug == "demo-cc" and s.head_commit == "feedface" for s in statuses - ) + assert any(s.slug == "demo-cc" and s.head_commit == "feedface" for s in statuses) def test_init_db_adds_missing_git_columns(tmp_path, monkeypatch):