Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
184 changes: 184 additions & 0 deletions cc-catalog-svc/app/auth_dockerconfigjson.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,184 @@
"""
Resolve HTTP Basic credentials from a Docker config.json file.

Why this exists
---------------
The catalog talks to OCI registries via httpx (``app/sources/oci.py``)
and to git remotes via ``git`` subprocesses (``app/services/git_mirror.py``).
Neither honors the Docker keychain natively the way ``crane`` does on
the destination side. Operators running on Kubernetes already have a
``kubernetes.io/dockerconfigjson`` Secret for kubelet image pulls, and
they reasonably expect the catalog to reuse that one credential for
its upstream tag-list / git-clone calls instead of maintaining a
parallel Opaque Secret.

This module provides a small, host-aware lookup against a Docker
config.json file. Both ``SourceAuth`` and ``GitAuth`` accept a new
``dockerconfigjson_env`` field (env var name holding the file path),
and resolve credentials through here at request time.

Format reference
----------------
Standard ``$HOME/.docker/config.json`` schema:

{
"auths": {
"artifactory.example.com": {
"auth": "<base64 of user:password>"
},
"ghcr.io": {
"username": "u",
"password": "p"
}
}
}

Both encodings are accepted; ``auth`` wins when both are present
(matches Docker CLI behavior).
"""

from __future__ import annotations

import base64
import json
import logging
import os
from typing import Optional

logger = logging.getLogger(__name__)


def resolve_basic_pair(file_path: str, target_host: str) -> Optional[tuple[str, str]]:
"""Return ``(username, password)`` for ``target_host`` or ``None``.

Resolution:

1. Open and JSON-parse ``file_path``. Any I/O / parse error logs a
warning and returns ``None`` so the caller can fall back to
anonymous instead of crashing the poll.
2. Look up ``auths[target_host]``. **Exact match only** for now;
wildcard / fallback ``*`` entries are not implemented because
K8s ``dockerconfigjson`` Secrets in practice carry one host
per Secret (one Secret per upstream registry).
3. Within the entry, prefer the base64 ``auth`` field. Fall back
to explicit ``username`` + ``password``. Both encodings ship in
the wild — Docker CLI writes ``auth``, ``docker-credential-*``
helpers and some operators write the split form.
4. A malformed entry (``auth`` not base64, or ``auth`` decoded
without a single ``:`` separator) is logged and treated as a
miss so the caller falls back rather than blowing up.

Caller convention
-----------------
A ``None`` return is normal and is **not** an error indicator — it
just means "no creds for this host." Callers should warn (once) if
they were configured to use this auth mode and got a miss for a
host they expected to find.
"""
if not file_path:
return None
if not os.path.exists(file_path):
logger.warning(
"dockerconfigjson: configured path %s does not exist; treating as no creds",
file_path,
)
return None

try:
with open(file_path) as f:
data = json.load(f)
except (OSError, json.JSONDecodeError) as exc:
logger.warning(
"dockerconfigjson: could not read/parse %s (%s); treating as no creds",
file_path,
exc,
)
return None

auths = (data or {}).get("auths") or {}
if not isinstance(auths, dict):
logger.warning(
"dockerconfigjson: %s has non-dict 'auths' key; treating as no creds",
file_path,
)
return None

entry = auths.get(target_host)
if not isinstance(entry, dict):
# Normal miss — log at debug so noisy on-purpose configs (one
# source per host) don't spam logs.
logger.debug(
"dockerconfigjson: %s has no entry for host %s",
file_path,
target_host,
)
return None

encoded = entry.get("auth")
if isinstance(encoded, str) and encoded:
try:
decoded = base64.b64decode(encoded, validate=True).decode("utf-8")
except (ValueError, UnicodeDecodeError) as exc:
logger.warning(
"dockerconfigjson: %s host %s has malformed 'auth' field (%s); "
"treating as no creds",
file_path,
target_host,
exc,
)
return None
if ":" not in decoded:
logger.warning(
"dockerconfigjson: %s host %s 'auth' decoded without ':' separator; "
"treating as no creds",
file_path,
target_host,
)
return None
user, _, pwd = decoded.partition(":")
if user and pwd:
return user, pwd
logger.warning(
"dockerconfigjson: %s host %s 'auth' decoded with empty user/password; "
"treating as no creds",
file_path,
target_host,
)
return None

user = entry.get("username")
pwd = entry.get("password")
if isinstance(user, str) and isinstance(pwd, str) and user and pwd:
return user, pwd

logger.debug(
"dockerconfigjson: %s host %s entry has neither valid 'auth' nor "
"username/password pair; treating as no creds",
file_path,
target_host,
)
return None


def resolve_basic_pair_from_env(env_var_name: str, target_host: str) -> Optional[tuple[str, str]]:
"""Convenience wrapper: env var holds the file path.

Matches the schema convention used elsewhere (``token_env``,
``user_env``, ``pass_env``, ``docker_config_env`` on the JFrog
destination): the YAML stores the env-var **name**, not the value,
so the catalog config is safe to commit and the deployment
controls where the Secret is projected.

Returns ``None`` when the env var is unset / empty (treated the
same as "no creds configured").
"""
if not env_var_name:
return None
file_path = os.environ.get(env_var_name, "")
if not file_path:
logger.debug(
"dockerconfigjson: env var %s is unset/empty; no creds",
env_var_name,
)
return None
return resolve_basic_pair(file_path, target_host)
69 changes: 63 additions & 6 deletions cc-catalog-svc/app/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,18 @@ def _slug_nonempty(cls, v: str) -> str:
class SourceAuth(BaseModel):
"""Auth for an OCI-style source.

Set EITHER ``token_env`` OR ``user_env``+``pass_env`` — not both.
Set EXACTLY ONE OR NONE of:

* ``token_env`` — Bearer token (JFrog access token, GHCR PAT)
* ``user_env`` + ``pass_env`` — HTTP Basic
* ``dockerconfigjson_env`` — env var holding the path to a Docker
config.json file (handy for reusing
an existing ``kubernetes.io/dockerconfigjson``
pull-secret verbatim; the catalog looks
up the source's ``image_registry`` host
in the file's ``auths`` map and uses
HTTP Basic)

Unset = anonymous; the OCI source falls back to the public-GHCR
bearer-realm dance so this still works against public registries.

Expand All @@ -135,14 +146,31 @@ class SourceAuth(BaseModel):
description="Env var name holding the HTTP Basic password "
"(or an access token used as a password).",
)
dockerconfigjson_env: Optional[str] = Field(
None,
description=(
"Env var name holding the path to a Docker config.json file. "
"Catalog looks up the source's image_registry host in the "
"file's `auths` map and uses the entry's credentials as "
"HTTP Basic. Lets operators reuse an existing "
"kubernetes.io/dockerconfigjson pull-secret without "
"maintaining a parallel Opaque Secret."
),
)

@model_validator(mode="after")
def _exactly_one_or_none(self) -> "SourceAuth":
token = bool(self.token_env)
basic_any = bool(self.user_env) or bool(self.pass_env)
basic_both = bool(self.user_env) and bool(self.pass_env)
if token and basic_any:
raise ValueError("source.auth: set EITHER token_env OR user_env+pass_env, not both")
docker = bool(self.dockerconfigjson_env)

modes = int(token) + int(basic_any) + int(docker)
if modes > 1:
raise ValueError(
"source.auth: set AT MOST ONE of token_env, "
"user_env+pass_env, or dockerconfigjson_env"
)
if basic_any and not basic_both:
raise ValueError("source.auth: user_env and pass_env must be set together")
return self
Expand Down Expand Up @@ -285,7 +313,22 @@ class SchedulerConfig(BaseModel):


class GitAuth(BaseModel):
"""Auth for fetching upstream git repos (GitHub PAT, etc.)."""
"""Auth for fetching upstream git repos (GitHub PAT, etc.).

Set EXACTLY ONE OR NONE of:

* ``token_env`` — Bearer/PAT
* ``user_env`` + ``pass_env`` — HTTP Basic
* ``dockerconfigjson_env`` — env var holding the path to a Docker
config.json file; catalog looks up
each upstream ``git_url`` host in the
file's ``auths`` map and uses HTTP
Basic. Useful when the same auth
Secret covers both an OCI registry
and a colocated git host
(e.g. Artifactory's Git LFS / VCS
repos behind the same SSO).
"""

token_env: Optional[str] = Field(
None,
Expand All @@ -299,14 +342,28 @@ class GitAuth(BaseModel):
None,
description="Env var holding HTTP Basic password or token-as-password.",
)
dockerconfigjson_env: Optional[str] = Field(
None,
description=(
"Env var name holding the path to a Docker config.json file. "
"Catalog looks up each upstream git_url host in the file's "
"`auths` map and uses the entry's credentials as HTTP Basic."
),
)

@model_validator(mode="after")
def _exactly_one_or_none(self) -> "GitAuth":
token = bool(self.token_env)
basic_any = bool(self.user_env) or bool(self.pass_env)
basic_both = bool(self.user_env) and bool(self.pass_env)
if token and basic_any:
raise ValueError("git.auth: set EITHER token_env OR user_env+pass_env, not both")
docker = bool(self.dockerconfigjson_env)

modes = int(token) + int(basic_any) + int(docker)
if modes > 1:
raise ValueError(
"git.auth: set AT MOST ONE of token_env, "
"user_env+pass_env, or dockerconfigjson_env"
)
if basic_any and not basic_both:
raise ValueError("git.auth: user_env and pass_env must be set together")
return self
Expand Down
Loading
Loading