diff --git a/docs/DESIGN.md b/docs/DESIGN.md index 66005da..a3cf279 100644 --- a/docs/DESIGN.md +++ b/docs/DESIGN.md @@ -292,6 +292,7 @@ Foundation project id. - **JWT Library**: `PyJWT` with `cryptography` support - Handles token parsing, validation, and signature verification - Includes `PyJWKClient` for JWKS key fetching +- **CLI**: `click` - **Testing**: `pytest` - **Linting**: `ruff` - **Python Project Management**: `uv` @@ -310,6 +311,8 @@ Foundation project id. - Pydantic request models: `PiaUploadPayload`, `DependencyTrackUploadPayload` - `oidc.py`: OIDC token validation and signature verification using PyJWT - `dependencytrack.py`: DependencyTrack API upload client for SBOMs +- `cli.py`: Management CLI for registering Workloads and DependencyTrackProjects + (see section 5.5) ### 5.3 Error Handling @@ -340,6 +343,43 @@ Metrics to track: - DependencyTrack upload time - Total API response time +### 5.5 CLI Tool + +PIA includes a management CLI (`pia`) for registering workloads and +DependencyTrack projects. It is installed as a console entry point via +`pyproject.toml` and connects to the DB using the `PIA_DATABASE_URL` +environment variable, e.g. `postgresql://user:secret@1.2.3.4:5432/pia`. + +#### `pia add-workload ` + +Registers a new workload for an Eclipse Foundation project. The workload type +is determined by the URL: + +- **GitHub** (URL contains `github.com`): Parses `owner` and `repo` from the + URL path. Queries the GitHub API (`GET https://api.github.com/users/{owner}`) + to fetch the numeric `repo_owner_id`. Creates a `GitHubWorkload`. +- **Jenkins** (URL starts with `https://ci.eclipse.org`): Uses the URL as + `issuer`. Creates a `JenkinsWorkload`. + +If the `EclipseFoundationProject` row for `ef_project_id` does not exist, it is +created automatically. + +#### `pia add-dt-project ` + +Registers a DependencyTrack project for an Eclipse Foundation project. Looks up +the project in DependencyTrack by name hierarchy: + +1. Find exactly one root project by `parent_name`. +2. Find exactly one child project by `project_name` +3. Store `DependencyTrackProject(name=project_name, parent_uuid=)` + +The `dt_url` argument is the DependencyTrack base URL (e.g. +`https://sbom.eclipse.org`). Authentication uses the `PIA_DEPENDENCY_TRACK_API_KEY` +environment variable. The API key requires at least the `VIEW_PORTFOLIO` permission. + +If the `EclipseFoundationProject` row for `ef_project_id` does not exist, it is +created automatically. + ## 6. Security Considerations ### 6.1 Token Validation diff --git a/pia/cli.py b/pia/cli.py new file mode 100644 index 0000000..edfe7da --- /dev/null +++ b/pia/cli.py @@ -0,0 +1,234 @@ +"""Management CLI for registering workloads and DependencyTrack projects. + +Subcommands +----------- +- add-workload: Register a CI/CD workload that is allowed to upload SBOMs for an + Eclipse Foundation project. + +- add-dt-project: Register a DependencyTrack project as the upload target for a + given Eclipse Foundation project. + +Usage Examples +-------------- +Register GitHub Actions: + + PIA_DATABASE_URL=postgresql://user:secret@localhost:5432/pia \ + uv run pia add-workload eclipse-foo \ + https://github.com/eclipse-foo/repo + +Register Jenkins Instance: + + PIA_DATABASE_URL=postgresql://user:secret@localhost:5432/pia \ + uv run pia add-workload eclipse-bar \ + https://ci.eclipse.org/eclipse-bar/oidc + +Register DependencyTrack Project: + + PIA_DATABASE_URL=postgresql://user:secret@localhost:5432/pia \ + PIA_DEPENDENCY_TRACK_API_KEY= \ + uv run pia add-dt-project eclipse-baz \ + https://sbom.eclipse.org "Eclipse Baz" baz-server + +""" + +import logging +import os +from typing import Any +from urllib.parse import urlparse + +import click +import requests +from sqlalchemy import create_engine +from sqlalchemy.orm import Session, sessionmaker + +from .models import ( + JENKINS_ISSUER_PREFIX, + DependencyTrackProject, + EclipseFoundationProject, + GitHubWorkload, + JenkinsWorkload, + Workload, +) + +logger = logging.getLogger(__name__) + + +@click.group() +@click.option("-v", "--verbose", is_flag=True, help="Enable debug logging.") +def cli(verbose: bool) -> None: + """PIA management CLI.""" + logging.basicConfig( + level=logging.DEBUG if verbose else logging.INFO, + format="%(asctime)s %(levelname)s %(name)s: %(message)s", + ) + # Fail early if the DB URL is missing — both subcommands need it, and we + # don't want to discover this after the GitHub / DependencyTrack lookups. + if not os.environ.get("PIA_DATABASE_URL"): + raise click.ClickException("PIA_DATABASE_URL is not set") + + +def _make_session() -> Session: + engine = create_engine(os.environ["PIA_DATABASE_URL"]) + return sessionmaker(bind=engine)() + + +def _get_dt_api_key() -> str: + key = os.environ.get("PIA_DEPENDENCY_TRACK_API_KEY") + if not key: + raise click.ClickException("PIA_DEPENDENCY_TRACK_API_KEY is not set") + return key + + +def _create_ef_project_if_needed(session: Session, ef_project_id: str) -> None: + if session.get(EclipseFoundationProject, ef_project_id) is None: + logger.info(f"Creating EclipseFoundationProject {ef_project_id!r}") + session.add(EclipseFoundationProject(id=ef_project_id)) + else: + logger.info(f"Using existing EclipseFoundationProject {ef_project_id!r}") + + +def _fetch_github_owner_id(owner: str) -> str: + url = f"https://api.github.com/users/{owner}" + logger.info(f"Fetching GitHub owner id from {url}") + response = requests.get(url, headers={"Accept": "application/vnd.github+json"}) + response.raise_for_status() + owner_id = str(response.json()["id"]) + logger.info(f"GitHub owner {owner!r} has id {owner_id}") + return owner_id + + +def _dt_find_root_project_by_name( + dt_url: str, name: str, api_key: str +) -> dict[str, Any]: + """Look up a root DependencyTrack project by name, asserting exactly one match.""" + url = f"{dt_url.rstrip('/')}/api/v1/project" + logger.info(f"Querying DependencyTrack root projects at {url} for name={name!r}") + response = requests.get( + url, + params={"name": name, "onlyRoot": "true"}, + headers={"X-Api-Key": api_key, "Accept": "application/json"}, + ) + response.raise_for_status() + projects = response.json() + if len(projects) != 1: + raise click.ClickException( + f"Expected exactly one root DependencyTrack project named {name!r}, " + f"found {len(projects)}" + ) + return projects[0] + + +@cli.command("add-workload") +@click.argument("ef_project_id") +@click.argument("url") +@click.option( + "--dry-run", + is_flag=True, + help="Look up data and prepare the row, but do not commit.", +) +def add_workload(ef_project_id: str, url: str, dry_run: bool) -> None: + """Register a GitHub or Jenkins workload for an Eclipse Foundation project. + + URL type is determined by its value: github.com URLs create a GitHubWorkload, + URLs starting with the Jenkins issuer prefix create a JenkinsWorkload with the + URL as issuer. Any other URL is rejected. + """ + parsed = urlparse(url) + if parsed.netloc == "github.com": + path_parts = parsed.path.strip("/").split("/") + if len(path_parts) != 2 or not all(path_parts): + raise click.ClickException(f"GitHub URL must include owner/repo: {url}") + owner, repo = path_parts[0], path_parts[1] + owner_id = _fetch_github_owner_id(owner) + workload: Workload = GitHubWorkload( + ef_project_id=ef_project_id, + repo_owner=owner, + repo_name=repo, + repo_owner_id=owner_id, + ) + logger.info( + f"Prepared GitHubWorkload(ef_project_id={ef_project_id!r}, " + f"repo_owner={owner!r}, repo_name={repo!r}, repo_owner_id={owner_id})" + ) + elif url.startswith(JENKINS_ISSUER_PREFIX): + workload = JenkinsWorkload(ef_project_id=ef_project_id, issuer=url) + logger.info( + f"Prepared JenkinsWorkload(ef_project_id={ef_project_id!r}, issuer={url!r})" + ) + else: + raise click.ClickException( + f"URL must be a GitHub repo URL or start with {JENKINS_ISSUER_PREFIX!r}: " + f"{url}" + ) + + with _make_session() as session: + _create_ef_project_if_needed(session, ef_project_id) + session.add(workload) + if dry_run: + logger.info("Dry-run: rolling back transaction") + session.rollback() + else: + session.commit() + logger.info("Committed workload") + + +@cli.command("add-dt-project") +@click.argument("ef_project_id") +@click.argument("dt_url") +@click.argument("parent_name") +@click.argument("project_name") +@click.option( + "--dry-run", + is_flag=True, + help="Look up data and prepare the row, but do not commit.", +) +def add_dt_project( + ef_project_id: str, + dt_url: str, + parent_name: str, + project_name: str, + dry_run: bool, +) -> None: + """Register a DependencyTrack project for an Eclipse Foundation project. + + Fetches the root project matching PARENT_NAME (asserting exactly one), then + finds PROJECT_NAME among its children (asserting exactly one), and stores + that child's UUID. + """ + api_key = _get_dt_api_key() + + parent = _dt_find_root_project_by_name(dt_url, parent_name, api_key) + logger.info(f"Resolved parent {parent_name!r} -> uuid={parent['uuid']}") + + children = [c for c in parent.get("children", []) if c.get("name") == project_name] + if len(children) != 1: + raise click.ClickException( + f"Expected exactly one child named {project_name!r} under {parent_name!r}, " + f"found {len(children)}" + ) + child_uuid = children[0]["uuid"] + logger.info(f"Resolved child {project_name!r} -> uuid={child_uuid}") + + dt_project = DependencyTrackProject( + ef_project_id=ef_project_id, + name=project_name, + parent_uuid=child_uuid, + ) + logger.info( + f"Prepared DependencyTrackProject(ef_project_id={ef_project_id!r}, " + f"name={project_name!r}, parent_uuid={child_uuid})" + ) + + with _make_session() as session: + _create_ef_project_if_needed(session, ef_project_id) + session.add(dt_project) + if dry_run: + logger.info("Dry-run: rolling back transaction") + session.rollback() + else: + session.commit() + logger.info("Committed DependencyTrack project") + + +if __name__ == "__main__": + cli() diff --git a/pyproject.toml b/pyproject.toml index 8b7b476..d5bcb5f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -18,8 +18,12 @@ dependencies = [ "sqlalchemy>=2.0.0", "psycopg2-binary>=2.9.0", "alembic>=1.15.0", + "click>=8.1.0", ] +[project.scripts] +pia = "pia.cli:cli" + [dependency-groups] dev = [ "pytest>=9.0.3", diff --git a/tests/test_cli.py b/tests/test_cli.py new file mode 100644 index 0000000..39e62db --- /dev/null +++ b/tests/test_cli.py @@ -0,0 +1,266 @@ +"""Tests for the management CLI.""" + +from unittest.mock import MagicMock + +import pytest +from click.testing import CliRunner + +from pia import cli as cli_module +from pia.models import ( + DependencyTrackProject, + EclipseFoundationProject, + GitHubWorkload, + JenkinsWorkload, +) + + +def _make_response(json_data): + r = MagicMock() + r.json.return_value = json_data + r.raise_for_status.return_value = None + return r + + +@pytest.fixture +def runner(): + return CliRunner() + + +@pytest.fixture(autouse=True) +def patch_session(session_factory, monkeypatch): + """Make CLI use the test session_factory instead of reading PIA_DATABASE_URL. + + PIA_DATABASE_URL still has to be set so the cli group's early-exit check + passes; its value is irrelevant since _make_session is patched out. + """ + monkeypatch.setenv("PIA_DATABASE_URL", "sqlite:///:memory:") + monkeypatch.setattr(cli_module, "_make_session", session_factory) + + +@pytest.fixture +def dt_api_key(monkeypatch): + monkeypatch.setenv("PIA_DEPENDENCY_TRACK_API_KEY", "test-key") + + +def test_add_github_workload(runner, session_factory, monkeypatch): + monkeypatch.setattr( + cli_module.requests, "get", lambda *a, **kw: _make_response({"id": 42}) + ) + + result = runner.invoke( + cli_module.cli, + ["add-workload", "eclipse-foo", "https://github.com/eclipse-foo/repo"], + ) + assert result.exit_code == 0, result.output + + with session_factory() as s: + workloads = s.query(GitHubWorkload).all() + assert len(workloads) == 1 + w = workloads[0] + assert w.ef_project_id == "eclipse-foo" + assert w.repo_owner == "eclipse-foo" + assert w.repo_name == "repo" + assert w.repo_owner_id == "42" + assert s.query(EclipseFoundationProject).count() == 1 + + +def test_add_github_workload_dry_run(runner, session_factory, monkeypatch): + monkeypatch.setattr( + cli_module.requests, "get", lambda *a, **kw: _make_response({"id": 42}) + ) + + result = runner.invoke( + cli_module.cli, + [ + "add-workload", + "eclipse-foo", + "https://github.com/eclipse-foo/repo", + "--dry-run", + ], + ) + assert result.exit_code == 0, result.output + + with session_factory() as s: + assert s.query(GitHubWorkload).count() == 0 + assert s.query(EclipseFoundationProject).count() == 0 + + +def test_add_jenkins_workload(runner, session_factory): + issuer = "https://ci.eclipse.org/eclipse-bar/oidc" + result = runner.invoke( + cli_module.cli, + ["add-workload", "eclipse-bar", issuer], + ) + assert result.exit_code == 0, result.output + + with session_factory() as s: + workloads = s.query(JenkinsWorkload).all() + assert len(workloads) == 1 + assert workloads[0].issuer == issuer + assert workloads[0].ef_project_id == "eclipse-bar" + + +def test_add_jenkins_workload_dry_run(runner, session_factory): + issuer = "https://ci.eclipse.org/eclipse-bar/oidc" + result = runner.invoke( + cli_module.cli, + ["add-workload", "eclipse-bar", issuer, "--dry-run"], + ) + assert result.exit_code == 0, result.output + + with session_factory() as s: + assert s.query(JenkinsWorkload).count() == 0 + + +def test_add_workload_reuses_existing_ef_project(runner, session_factory, monkeypatch): + with session_factory() as s: + s.add(EclipseFoundationProject(id="eclipse-foo")) + s.commit() + + monkeypatch.setattr( + cli_module.requests, "get", lambda *a, **kw: _make_response({"id": 42}) + ) + + result = runner.invoke( + cli_module.cli, + ["add-workload", "eclipse-foo", "https://github.com/eclipse-foo/repo"], + ) + assert result.exit_code == 0, result.output + + with session_factory() as s: + assert s.query(EclipseFoundationProject).count() == 1 + assert s.query(GitHubWorkload).count() == 1 + + +def test_add_workload_invalid_github_url(runner): + result = runner.invoke( + cli_module.cli, + ["add-workload", "eclipse-foo", "https://github.com/"], + ) + assert result.exit_code != 0 + assert "owner/repo" in result.output + + +def test_missing_database_url_fails_early(runner, monkeypatch): + """Without PIA_DATABASE_URL the cli group exits before any subcommand runs.""" + monkeypatch.delenv("PIA_DATABASE_URL", raising=False) + + def fail(*a, **kw): + raise AssertionError("network must not be touched when DB URL is missing") + + monkeypatch.setattr(cli_module.requests, "get", fail) + + result = runner.invoke( + cli_module.cli, + ["add-workload", "eclipse-foo", "https://github.com/eclipse-foo/repo"], + ) + assert result.exit_code != 0 + assert "PIA_DATABASE_URL is not set" in result.output + + +def test_add_dt_project(runner, session_factory, monkeypatch, dt_api_key): + monkeypatch.setattr( + cli_module.requests, + "get", + lambda *a, **kw: _make_response( + [ + { + "name": "Eclipse Foo", + "uuid": "parent-uuid", + "children": [{"name": "foo-server", "uuid": "child-uuid"}], + } + ] + ), + ) + + result = runner.invoke( + cli_module.cli, + [ + "add-dt-project", + "eclipse-foo", + "https://dt.example.com", + "Eclipse Foo", + "foo-server", + ], + ) + assert result.exit_code == 0, result.output + + with session_factory() as s: + projects = s.query(DependencyTrackProject).all() + assert len(projects) == 1 + assert projects[0].name == "foo-server" + assert projects[0].parent_uuid == "child-uuid" + assert projects[0].ef_project_id == "eclipse-foo" + + +def test_add_dt_project_dry_run(runner, session_factory, monkeypatch, dt_api_key): + monkeypatch.setattr( + cli_module.requests, + "get", + lambda *a, **kw: _make_response( + [ + { + "name": "Eclipse Foo", + "uuid": "parent-uuid", + "children": [{"name": "foo-server", "uuid": "child-uuid"}], + } + ] + ), + ) + + result = runner.invoke( + cli_module.cli, + [ + "add-dt-project", + "eclipse-foo", + "https://dt.example.com", + "Eclipse Foo", + "foo-server", + "--dry-run", + ], + ) + assert result.exit_code == 0, result.output + + with session_factory() as s: + assert s.query(DependencyTrackProject).count() == 0 + assert s.query(EclipseFoundationProject).count() == 0 + + +def test_add_dt_project_no_match(runner, monkeypatch, dt_api_key): + monkeypatch.setattr(cli_module.requests, "get", lambda *a, **kw: _make_response([])) + + result = runner.invoke( + cli_module.cli, + [ + "add-dt-project", + "eclipse-foo", + "https://dt.example.com", + "Eclipse Foo", + "foo-server", + ], + ) + assert result.exit_code != 0 + assert "Expected exactly one" in result.output + + +def test_add_dt_project_child_not_found(runner, monkeypatch, dt_api_key): + monkeypatch.setattr( + cli_module.requests, + "get", + lambda *a, **kw: _make_response( + [{"name": "Eclipse Foo", "uuid": "parent-uuid", "children": []}] + ), + ) + + result = runner.invoke( + cli_module.cli, + [ + "add-dt-project", + "eclipse-foo", + "https://dt.example.com", + "Eclipse Foo", + "foo-server", + ], + ) + assert result.exit_code != 0 + assert "Expected exactly one child" in result.output diff --git a/uv.lock b/uv.lock index a1b74b6..96f92e5 100644 --- a/uv.lock +++ b/uv.lock @@ -542,6 +542,7 @@ version = "0.2.1" source = { editable = "." } dependencies = [ { name = "alembic" }, + { name = "click" }, { name = "fastapi" }, { name = "psycopg2-binary" }, { name = "pydantic" }, @@ -565,6 +566,7 @@ dev = [ [package.metadata] requires-dist = [ { name = "alembic", specifier = ">=1.15.0" }, + { name = "click", specifier = ">=8.1.0" }, { name = "fastapi", specifier = ">=0.115.0" }, { name = "psycopg2-binary", specifier = ">=2.9.0" }, { name = "pydantic", specifier = ">=2.10.0" },