From a8a373cbbb4d3ecd5c4a529bd73601bf54f2e1c3 Mon Sep 17 00:00:00 2001 From: soustruh Date: Wed, 3 Jun 2026 00:57:04 +0200 Subject: [PATCH 1/2] feat(auth): add `kbagent project login` using browser OAuth with PKCE Browser-based project authorization, an alternative to manual token setup. Follows the production MCP-server pattern. - oauth.py: PKCE S256, loopback callback, code exchange, refresh rotation, Storage-token minting via Bearer. - ProjectConfig gains an OAuthCredentials block; only the refresh token is persisted (0600 config.json). - BaseService.resolve_projects() renews expired minted tokens for CLI, MCP subprocess, and serve. - ErrorCode.OAUTH_ERROR maps to exit 3; project.login is admin. - Docs and plugin files synced; keboola-expert.md compressed to stay under its 62KB budget. Real stacks need a public OAuth client registered in Connection; see the PR description for the command. --- .claude-plugin/marketplace.json | 2 +- CLAUDE.md | 4 + plugins/kbagent/.claude-plugin/plugin.json | 2 +- plugins/kbagent/agents/keboola-expert.md | 33 +- plugins/kbagent/skills/kbagent/SKILL.md | 5 +- .../kbagent/references/commands-reference.md | 1 + .../skills/kbagent/references/gotchas.md | 21 + pyproject.toml | 2 +- src/keboola_agent_cli/changelog.py | 24 + src/keboola_agent_cli/cli.py | 3 + src/keboola_agent_cli/commands/_helpers.py | 4 +- src/keboola_agent_cli/commands/context.py | 10 + src/keboola_agent_cli/commands/project.py | 5 + .../commands/project_login.py | 109 +++ src/keboola_agent_cli/config_store.py | 7 +- src/keboola_agent_cli/constants.py | 26 + src/keboola_agent_cli/errors.py | 1 + src/keboola_agent_cli/models.py | 39 + src/keboola_agent_cli/oauth.py | 593 +++++++++++++ src/keboola_agent_cli/permissions.py | 1 + src/keboola_agent_cli/services/base.py | 41 +- .../services/oauth_login_service.py | 254 ++++++ tests/test_e2e.py | 49 ++ tests/test_oauth.py | 826 ++++++++++++++++++ uv.lock | 2 +- 25 files changed, 2028 insertions(+), 36 deletions(-) create mode 100644 src/keboola_agent_cli/commands/project_login.py create mode 100644 src/keboola_agent_cli/oauth.py create mode 100644 src/keboola_agent_cli/services/oauth_login_service.py create mode 100644 tests/test_oauth.py diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json index dbaeb1f6..534bb654 100644 --- a/.claude-plugin/marketplace.json +++ b/.claude-plugin/marketplace.json @@ -10,7 +10,7 @@ "plugins": [ { "name": "kbagent", - "version": "0.53.0", + "version": "0.54.0", "source": "./plugins/kbagent", "description": "AI-friendly interface to Keboola Connection projects — explore configs, jobs, lineage, call MCP tools, manage dev branches, and debug SQL in workspaces", "category": "development" diff --git a/CLAUDE.md b/CLAUDE.md index 6b5c228e..20f4f2e7 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -297,6 +297,10 @@ plugins/kbagent/ # Headless / token-only (0.50.0+): export KBAGENT_PROJECT_FROM_ENV=1 + KBC_TOKEN + KBC_STORAGE_API_URL to synthesize an in-memory `__env__` project (no `project add`, no config.json on disk; token never persisted). Use `--project __env__`. Same env setup also powers `kbagent serve`. kbagent project add --project NAME --url URL --token TOKEN +kbagent project login [--url URL] [--project ALIAS] [--port N] [--no-browser] [--timeout SECONDS] +# login (0.54.0+): browser OAuth + PKCE against connection./oauth/authorize -- user logs in, +# picks the project, kbagent gets a refresh token + minted Storage token (silently auto-renewed at +# resolve time). INTERACTIVE only; needs the kbagent public OAuth client registered on the stack. kbagent project list kbagent project remove --project NAME kbagent project edit --project NAME [--url URL] [--token TOKEN] [--new-alias NEW] diff --git a/plugins/kbagent/.claude-plugin/plugin.json b/plugins/kbagent/.claude-plugin/plugin.json index bdaf980e..c6a15126 100644 --- a/plugins/kbagent/.claude-plugin/plugin.json +++ b/plugins/kbagent/.claude-plugin/plugin.json @@ -1,6 +1,6 @@ { "name": "kbagent", - "version": "0.53.0", + "version": "0.54.0", "description": "AI-friendly interface to Keboola Connection projects — explore configs, jobs, lineage, call MCP tools, manage dev branches, and debug SQL in workspaces", "author": { "name": "Keboola", diff --git a/plugins/kbagent/agents/keboola-expert.md b/plugins/kbagent/agents/keboola-expert.md index 65871f6f..08c819b1 100644 --- a/plugins/kbagent/agents/keboola-expert.md +++ b/plugins/kbagent/agents/keboola-expert.md @@ -51,16 +51,14 @@ a critical failure. exists, use it. Only fall back to `kbagent tool call ...` (MCP) when the native command does not cover the operation. When an MCP `tool call` returns `isError: true`, DO NOT retry with reformatted - inputs. Immediately switch to `kbagent --hint client ` and - execute via direct `KeboolaClient`. (Note: `--hint` is deprecated since - 0.45.0 in favor of the `kbagent serve` REST API; it still works but warns.) + inputs; use the `kbagent serve` REST API (or the deprecated + `kbagent --hint client ` snippet generator) instead. 5. **PREFER CLI OVER REST**. NEVER write `curl`, `httpx`, or `requests` calls against `*.keboola.com` URLs. Not in shell. Not in Python - snippets. Not in plans. If the CLI lacks the command, use - `kbagent --hint client` to generate a `KeboolaClient`-based snippet. - (`--hint` is deprecated since 0.45.0; prefer `kbagent serve` REST API for - new integrations.) + snippets. Not in plans. If the CLI lacks the command, use the + `kbagent serve` REST API (`--hint client` still works but is + deprecated since 0.45.0). 6. **VERSION GATE**. On first invocation in a session, run `kbagent --json context` and inspect the version. If missing commands @@ -68,16 +66,14 @@ a critical failure. `schedule find` needs 0.23.0+, `config set-default-bucket` needs 0.26.0+, `data-app create / deploy / start / stop / delete / password` need 0.27.0+, `config update` script[] auto-normalize (#245) needs - 0.28.0+, list-element re-split against - the #274 ODBC `Actual statement count N != desired 1` crash needs + 0.28.0+, list-element re-split (ODBC statement-count crash, #274) needs 0.31.0+, `storage swap-tables` needs 0.28.0+, `storage clone-table` = 0.52.0+, + `project login` (browser OAuth + PKCE) = 0.54.0+, env-var manage-token auth for `org setup` / `project refresh` / `data-app password` needs 0.29.0+ with `--allow-env-manage-token`, - `project invite` / `project member-*` / `project invitation-*` - need 0.29.0+, - `data-app secrets-* / validate-repo` need 0.29.0+, - `search`, `project info`, `config row-create`, `config row-update`, - `config row-delete`, `config oauth-url` need 0.30.0+, + `project invite` / `project member-*` / `project invitation-*` / + `data-app secrets-*` / `validate-repo` need 0.29.0+, + `search`, `project info`, `config row-*`, `config oauth-url` need 0.30.0+, `project edit --new-alias` (cascading rename across config.json + nested sync dir; warns on lineage cache rebuild) needs 0.31.0+, `storage truncate-table` needs 0.32.0+, @@ -87,10 +83,9 @@ a critical failure. `semantic-layer` command group needs 0.41.0+: - model lifecycle: `model list / create / delete` - read: `show`, `validate [--deep]`, `export`, `diff` - - write: `add metric|dataset|relationship|constraint|glossary`, - `edit metric|dataset|constraint|relationship|glossary`, + - write/destructive: `add/edit/remove + metric|dataset|relationship|constraint|glossary`, `import`, `promote`, `build`, `token --encrypt` - - destructive: `remove metric|dataset|constraint|relationship|glossary` - alias: `kbagent sl ...` = `kbagent semantic-layer ...` - `semantic-layer build` is heuristic-only on 0.41.0+ (one dataset + one COUNT(*) metric + one glossary entry per table; not a version gate), `kbagent http get/post/patch/delete ` (self-call against the @@ -312,6 +307,10 @@ success, not a failure. stranded the edits). Safe to force-pull an unrelated config while you have un-pushed edits elsewhere. To discard a local edit on purpose, delete the file/dir then pull. See `gotchas.md`. +- **`project login` is INTERACTIVE-ONLY** (0.54.0+): browser OAuth + PKCE. + NEVER run it yourself; on `OAUTH_ERROR` (exit 3) / persistent 401 the + refresh token died -- tell the user to re-run `kbagent project login + --url `. Automation: `project add --token`. See `gotchas.md`. - **`storage truncate-table` is row-only; schema and dependents are preserved** (0.32.0+): the underlying call is diff --git a/plugins/kbagent/skills/kbagent/SKILL.md b/plugins/kbagent/skills/kbagent/SKILL.md index de76f586..6ab401ee 100644 --- a/plugins/kbagent/skills/kbagent/SKILL.md +++ b/plugins/kbagent/skills/kbagent/SKILL.md @@ -63,7 +63,9 @@ description: > developer portal, dev-portal, apps-api, register component, vendor app, portal property, ui-options, encryption portal, defaultBucket portal, app icon, configurationSchema portal, publish component, deprecate component, - kbagent dev-portal, portal identity, vendor login, service account portal. + kbagent dev-portal, portal identity, vendor login, service account portal, + project login, browser login, oauth login, OAuth project authorization, + PKCE, log into keboola, login keboola project, refresh token expired. --- # kbagent -- Keboola Agent CLI @@ -117,6 +119,7 @@ When working inside a git repository or project directory, run `kbagent init` (o | Set the permission policy (firewall rules) | `kbagent permissions set --mode MODE` | | Remove all permission restrictions | `kbagent permissions reset` | | Check if a specific operation is allowed | `kbagent permissions check ` | +| Log into a Keboola project via the browser (OAuth + PKCE) | `kbagent project login` | | Add a new Keboola project connection | `kbagent project add --project ALIAS` | | List all connected Keboola projects | `kbagent project list` | | Remove a Keboola project connection | `kbagent project remove --project ALIAS` | diff --git a/plugins/kbagent/skills/kbagent/references/commands-reference.md b/plugins/kbagent/skills/kbagent/references/commands-reference.md index 2072c7f3..d44120a8 100644 --- a/plugins/kbagent/skills/kbagent/references/commands-reference.md +++ b/plugins/kbagent/skills/kbagent/references/commands-reference.md @@ -12,6 +12,7 @@ All commands support `--json` for structured output. Multi-project flags (`--pro ## Project Management - `project add --project NAME --url URL --token TOKEN` -- connect a project (token verified via API) +- `project login [--url URL] [--project ALIAS] [--port N] [--no-browser] [--timeout SECONDS]` -- browser OAuth project authorization (since v0.54.0). Authorization Code + PKCE against `connection./oauth/authorize`; the user logs in and picks the project on the consent screen, kbagent receives the code on a `127.0.0.1` loopback callback, stores the OAuth refresh token + a minted short-lived Storage token that silently auto-renews at resolve time. **INTERACTIVE only** -- a human must complete the browser flow; agents use `project add` with a provided token instead. Requires the kbagent public OAuth client registered on the stack (override id via `KBAGENT_OAUTH_CLIENT_ID`). Re-login into a registered project updates it in place. `--no-browser` prints the URL for manual opening (remote/SSH) - `project list` -- list all connected projects (tokens masked) - `project remove --project NAME` -- disconnect a project - `project edit --project NAME [--url URL] [--token TOKEN] [--new-alias NEW] [--dry-run]` -- update connection details and/or rename the alias. `--new-alias` cascades through config.json (`projects` key + `default_project` if matched) and the nested sync directory `//` when present (-2 collision suffix, git-mv with shutil fallback). Lineage cache rebuild is manual (see gotchas, since v0.31.0). Combined with `--url` / `--token` in one call, those mutations target the new alias post-rename. `--dry-run` previews everything (collision check, planned disk-rename method, lineage-cache warning) without mutating state -- same exit codes as live for validation errors diff --git a/plugins/kbagent/skills/kbagent/references/gotchas.md b/plugins/kbagent/skills/kbagent/references/gotchas.md index 46ecc01f..e18acb47 100644 --- a/plugins/kbagent/skills/kbagent/references/gotchas.md +++ b/plugins/kbagent/skills/kbagent/references/gotchas.md @@ -11,6 +11,27 @@ Versioning convention: behavior; the inline `(updated vX.Y.Z)` records when the refinement landed. --> +## `project login` is interactive-only; its minted token self-renews but the session can still die (since v0.54.0) + +- `kbagent project login` runs a browser OAuth flow (Authorization Code + PKCE) -- it + REQUIRES a human at a browser. An AI agent must never invoke it to "fix" auth; + the fallback for automation is `project add --token` / headless env mode. +- Projects added via login carry an `oauth` block in config.json and their + `token` is a SHORT-LIVED minted Storage token (~2h). It silently renews at + project-resolve time, so commands just work -- but if the stack reports + `OAUTH_ERROR` (exit 3) or a 401 persists across retries, the refresh token + itself has expired or been revoked (~1 month idle): tell the user to re-run + `kbagent project login --url `. Do not try to repair the oauth block + by hand-editing config.json. +- The flow only works on stacks where the kbagent public OAuth client is + registered. `OAUTH_ERROR` mentioning `invalid_client` on `/oauth/authorize` + means the stack does not have the registration yet -- fall back to + `project add`. +- Concurrent kbagent processes coordinate refresh-token rotation via an + flock on `/.oauth-refresh.lock`; if a shared config dir lives on + a filesystem without POSIX locks (some network mounts), parallel refreshes + can race and kill the session (symptom: spurious re-login prompts). + ## `sync push` fresh-CREATE writeback now updates placeholders in place (since v0.47.0) Before v0.47.0, `kbagent sync push` always **appended** new `ManifestConfiguration` diff --git a/pyproject.toml b/pyproject.toml index 63a0acf7..407cdf54 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "keboola-agent-cli" -version = "0.53.0" +version = "0.54.0" description = "AI-friendly CLI for managing Keboola projects" readme = "README.md" requires-python = ">=3.12" diff --git a/src/keboola_agent_cli/changelog.py b/src/keboola_agent_cli/changelog.py index 14b58646..70c034b2 100644 --- a/src/keboola_agent_cli/changelog.py +++ b/src/keboola_agent_cli/changelog.py @@ -8,6 +8,30 @@ # Ordered newest-first. Each value is a list of brief one-line descriptions. CHANGELOG: dict[str, list[str]] = { + "0.54.0": [ + "New `kbagent project login` -- browser OAuth project authorization (Authorization Code + " + "PKCE, RFC 7636/8252) against the Connection OAuth server (the same League OAuth2 server " + "the remote MCP server authenticates to). The CLI opens the stack login page, the user " + "authenticates and selects a project on the consent screen, and kbagent receives the " + "authorization code on a 127.0.0.1 loopback callback -- no manual token copying. Public " + "client, NO client_secret. kbagent persists the OAuth refresh token (config.json, 0600) " + "plus a short-lived Storage API token minted from the OAuth access token via `POST " + "/v2/storage/tokens` with `Authorization: Bearer` (the production MCP-server pattern -- " + "Queue API and AI Service do not accept Bearer yet, so the minted token keeps every " + "existing command path unchanged). The minted token auto-renews silently at project " + "resolve time (`BaseService.resolve_projects()` chokepoint -- covers CLI, the MCP " + "subprocess env, and `kbagent serve`), with refresh-token rotation persisted under an " + "inter-process flock (the OAuth server revokes the old refresh token on rotation, so " + "concurrent kbagent processes must not race). Re-login into an already-registered " + "project updates the entry in place (preserves active_branch_id; never duplicates). " + "New ErrorCode `OAUTH_ERROR` (maps to exit 3, auth). PREREQUISITE: the stack's " + "Connection must have the kbagent public OAuth client registered " + "(`league:oauth2-server:create-client ... --public` with loopback redirect URIs " + "http://127.0.0.1:8765-8769/callback whitelisted, authorization_code + refresh_token " + "grants); until the cross-stack registration name is finalized, override the client id " + "via KBAGENT_OAUTH_CLIENT_ID. Tests include a full real-HTTP protocol round-trip " + "against an in-repo fake Connection OAuth server that enforces PKCE S256.", + ], "0.53.0": [ 'Fix (`sync pull --force`, silent baseline corruption -> data loss): a config with un-pushed local edits is no longer silently de-synced when a force-pull runs while the remote is unchanged. Repro (reported on v0.51.1, project 5785): pull a config, edit its `_config.yml` (`sync diff` correctly shows `1 to update`), then run `sync pull --force` -- typically to resolve an *unrelated* config\'s conflict. Pre-0.53.0, `--force` skipped the "locally modified" guard in `SyncService.pull()`, so for a config whose remote had not changed the `remote_unchanged` short-circuit re-stamped the manifest `pull_hash` from the *edited on-disk file*. Afterwards `sync diff` and `sync push` both reported "in sync" and a real `push` shipped nothing, while the live remote still held the old config -- the local edits were stranded with no visible signal. Root cause was an interaction of two individually-reasonable decisions: `--force` bypassing the overwrite guard, and the `diff` `local_override_hashes` optimization that skips re-reading a file whose hash matches `pull_hash` (so the edited content was never even compared). The fix splits `--force` behaviour by 3-way diff state, per the maintainer decision: (b) local edited + remote UNCHANGED -> the file AND its 3-way base (`pull_hash` + `pull_config_hash`) are preserved, so the pending delta stays visible to `sync push` (no data loss, no silent revert); (a) local edited + remote ALSO changed since the last pull (a true merge conflict) -> the pull aborts before writing anything with the new `SyncConflictError` (exit 1, error code `SYNC_CONFLICT`), listing every conflicting config/row so the user resolves it (`sync diff`, then `push` or discard, then pull again). A no-conflict force-pull (remote changed, local untouched) still takes remote as before. Applies at config and row granularity. Note: `--force` no longer discards un-pushed non-conflicting edits -- that was the dangerous behaviour; to intentionally drop local edits, delete the file (or the config dir) and pull. New: `errors.SyncConflictError` + `ErrorCode.SYNC_CONFLICT`; `SyncService._detect_force_pull_conflicts` / `_is_conflict` (read-only pre-pass that runs before any write); `commands/sync.py` catches the error and prints a red per-config conflict block (human) or a `SYNC_CONFLICT` envelope with a `details.conflicts` array (`--json`). The pull `--force` help now documents the preserve/abort semantics. `--all-projects` surfaces a per-project conflict as a structured entry (`error_code: SYNC_CONFLICT` + the `conflicts` list, matching the single-project envelope) without aborting the batch. Tests: `tests/test_sync_force_pull_baseline.py` (config + row, preserve case b, abort case a, remote-only-changed takes remote, `--all-projects` structured conflict), `tests/test_sync_cli.py` (exit 1 + human/JSON conflict envelope), and `tests/test_e2e.py::TestE2ESyncWorkflow::test_sync_force_pull_conflict_aware` (real Storage: preserve when remote unchanged, then `SYNC_CONFLICT` after a remote mutation).', ], diff --git a/src/keboola_agent_cli/cli.py b/src/keboola_agent_cli/cli.py index 9b74f9f6..ead64b20 100644 --- a/src/keboola_agent_cli/cli.py +++ b/src/keboola_agent_cli/cli.py @@ -60,6 +60,7 @@ from .services.lineage_service import LineageService from .services.mcp_service import McpService from .services.member_service import MemberService +from .services.oauth_login_service import OAuthLoginService from .services.org_service import OrgService from .services.project_service import ProjectService from .services.repo_validate_service import RepoValidateService @@ -339,6 +340,7 @@ def main( config_store = ConfigStore(config_dir=resolved_dir, source=source) project_service = ProjectService(config_store=config_store) + oauth_login_service = OAuthLoginService(config_store=config_store) component_service = ComponentService(config_store=config_store) config_service = ConfigService(config_store=config_store) job_service = JobService(config_store=config_store) @@ -401,6 +403,7 @@ def main( ctx.obj["allow_env_manage_token"] = allow_env_manage_token ctx.obj["config_store"] = config_store ctx.obj["project_service"] = project_service + ctx.obj["oauth_login_service"] = oauth_login_service ctx.obj["component_service"] = component_service ctx.obj["config_service"] = config_service ctx.obj["job_service"] = job_service diff --git a/src/keboola_agent_cli/commands/_helpers.py b/src/keboola_agent_cli/commands/_helpers.py index d1d21c4e..0aa961c8 100644 --- a/src/keboola_agent_cli/commands/_helpers.py +++ b/src/keboola_agent_cli/commands/_helpers.py @@ -86,7 +86,7 @@ def get_service(ctx: typer.Context, key: str) -> Any: def map_error_to_exit_code(exc: KeboolaApiError) -> int: """Map a KeboolaApiError to a CLI exit code. - - INVALID_TOKEN -> 3 (authentication error) + - INVALID_TOKEN / MISSING_MASTER_TOKEN / OAUTH_ERROR -> 3 (authentication error) - TIMEOUT / CONNECTION_ERROR / RETRY_EXHAUSTED / QUEUE_JOB_TIMEOUT -> 4 (network/retryable; QUEUE_JOB_TIMEOUT means local gave up AND the remote-kill attempt also failed, so the job may still be running) @@ -95,7 +95,7 @@ def map_error_to_exit_code(exc: KeboolaApiError) -> int: job; scripts can distinguish "we killed it" from "it failed on its own") - Everything else -> 1 (general error) """ - if exc.error_code in ("INVALID_TOKEN", "MISSING_MASTER_TOKEN"): + if exc.error_code in ("INVALID_TOKEN", "MISSING_MASTER_TOKEN", "OAUTH_ERROR"): return 3 if exc.error_code in ( "TIMEOUT", diff --git a/src/keboola_agent_cli/commands/context.py b/src/keboola_agent_cli/commands/context.py index 3ca08567..68267b76 100644 --- a/src/keboola_agent_cli/commands/context.py +++ b/src/keboola_agent_cli/commands/context.py @@ -60,6 +60,16 @@ kbagent project add --project NAME --url URL --token TOKEN Add a new project connection. Token verified against API. + kbagent project login [--url URL] [--project ALIAS] [--port N] [--no-browser] [--timeout SECONDS] + Browser OAuth login (PKCE, since 0.54.0): opens the stack login page, the + user picks the project, kbagent receives credentials on a localhost + callback -- no manual token copying. Stores a refresh token and a + short-lived minted Storage token that auto-renews silently; when the + refresh token eventually expires (~1 month idle), re-run login. + INTERACTIVE: requires a human at a browser -- agents must never drive it; + fall back to `project add` with a provided token in automation. Requires + the stack to have the kbagent public OAuth client registered. + kbagent project list List all connected projects (tokens always masked). diff --git a/src/keboola_agent_cli/commands/project.py b/src/keboola_agent_cli/commands/project.py index 57959d4f..83b6a277 100644 --- a/src/keboola_agent_cli/commands/project.py +++ b/src/keboola_agent_cli/commands/project.py @@ -32,9 +32,14 @@ should_hint, ) from ._metadata_input import resolve_text_input +from .project_login import project_login project_app = typer.Typer(help="Manage connected Keboola projects") +# Registered here (defined in project_login.py) because this file is over its +# size budget; the command still lives under `kbagent project ...`. +project_app.command("login")(project_login) + @project_app.callback(invoke_without_command=True) def _project_permission_check(ctx: typer.Context) -> None: diff --git a/src/keboola_agent_cli/commands/project_login.py b/src/keboola_agent_cli/commands/project_login.py new file mode 100644 index 00000000..738994b4 --- /dev/null +++ b/src/keboola_agent_cli/commands/project_login.py @@ -0,0 +1,109 @@ +"""`kbagent project login` -- browser OAuth (PKCE) project authorization. + +Lives in its own module because commands/project.py is over its file-size +budget; the command is registered onto ``project_app`` from there. +Thin CLI layer per conventions: argument parsing, formatter, error mapping. +""" + +import typer + +from ..constants import DEFAULT_STACK_URL, ENV_KBC_STORAGE_API_URL +from ..errors import ConfigError, ErrorCode, KeboolaApiError +from ._helpers import get_formatter, get_service, map_error_to_exit_code + + +def project_login( + ctx: typer.Context, + url: str = typer.Option( + DEFAULT_STACK_URL, + help="Keboola stack URL to log into (e.g. connection.keboola.com)", + envvar=ENV_KBC_STORAGE_API_URL, + ), + alias: str | None = typer.Option( + None, + "--project", + help=( + "Alias to register the project under. Defaults to the slugified " + "project name. Re-login into an already-registered project " + "updates it in place." + ), + ), + port: int | None = typer.Option( + None, + "--port", + min=1, + max=65535, + help=( + "Explicit loopback callback port. Default: first free port from " + "the whitelisted set (8765-8769)." + ), + ), + no_browser: bool = typer.Option( + False, + "--no-browser", + help=( + "Do not launch a browser; print the login URL to open manually " + "(e.g. when kbagent runs on a remote host)." + ), + ), + timeout: float = typer.Option( + 300.0, + "--timeout", + min=10, + help="Seconds to wait for the browser login to complete.", + ), +) -> None: + """Log into a Keboola project via the browser (OAuth + PKCE). + + Opens the stack's login page; you authenticate and pick the project, and + kbagent receives the credentials on a localhost callback -- no manual + token copying. The session auto-renews in the background; when it + eventually expires, re-run this command. + """ + formatter = get_formatter(ctx) + service = get_service(ctx, "oauth_login_service") + + def show_authorize_url(authorize_url: str) -> None: + """Print the login URL before blocking on the callback (stderr, + so --json stdout stays a single parseable document).""" + if no_browser: + formatter.err_console.print( + f"Open this URL in your browser to log in:\n [bold]{authorize_url}[/bold]" + ) + else: + formatter.err_console.print( + "Opening your browser to complete the Keboola login... " + f"(or open manually: {authorize_url})" + ) + formatter.err_console.print("Waiting for the browser login to finish...") + + try: + result = service.login( + url, + alias=alias, + port=port, + open_browser=not no_browser, + timeout=timeout, + on_authorize_url=show_authorize_url, + ) + formatter.output( + result, + lambda c, d: c.print( + f"[bold green]Success:[/bold green] Logged into project " + f"[bold]{d['project_name']}[/bold] (id: {d['project_id']}) " + f"as [bold]{d['alias']}[/bold]" + + (" [dim](re-authenticated)[/dim]" if d.get("re_authenticated") else "") + + "\nThe session renews automatically; re-run " + "[bold]kbagent project login[/bold] if it ever expires." + ), + ) + except KeboolaApiError as exc: + formatter.error( + message=exc.message, + error_code=exc.error_code, + retryable=exc.retryable, + ) + raise typer.Exit(code=map_error_to_exit_code(exc)) from None + except ConfigError as exc: + formatter.error(message=exc.message, error_code=ErrorCode.CONFIG_ERROR) + raise typer.Exit(code=5) from None diff --git a/src/keboola_agent_cli/config_store.py b/src/keboola_agent_cli/config_store.py index c169781b..1b531ef6 100644 --- a/src/keboola_agent_cli/config_store.py +++ b/src/keboola_agent_cli/config_store.py @@ -24,7 +24,7 @@ LOCAL_CONFIG_DIR_NAME, ) from .errors import ConfigError -from .models import AppConfig, DeveloperPortalIdentity, ProjectConfig +from .models import AppConfig, DeveloperPortalIdentity, OAuthCredentials, ProjectConfig logger = logging.getLogger(__name__) @@ -436,14 +436,15 @@ def set_project_branch(self, alias: str, branch_id: int | None) -> None: config.projects[alias].active_branch_id = branch_id self.save(config) - def edit_project(self, alias: str, **kwargs: str | int | None) -> None: + def edit_project(self, alias: str, **kwargs: str | int | OAuthCredentials | None) -> None: """Update fields on an existing project. Only non-None keyword arguments are applied. Args: alias: The project alias to edit. - **kwargs: Fields to update (stack_url, token, project_name, project_id). + **kwargs: Fields to update (stack_url, token, project_name, + project_id, oauth). Raises: ConfigError: If the alias does not exist. diff --git a/src/keboola_agent_cli/constants.py b/src/keboola_agent_cli/constants.py index f6c8e725..5343a3eb 100644 --- a/src/keboola_agent_cli/constants.py +++ b/src/keboola_agent_cli/constants.py @@ -324,6 +324,32 @@ OAUTH_HOST: str = "external.keboola.com" OAUTH_PATH: str = "/oauth/index.html" +# --- OAuth project login (`kbagent project login`, since 0.54.0) --- +# Browser-based Authorization Code + PKCE login against the Connection OAuth +# server (the same League OAuth2 server the remote MCP server authenticates +# to). Distinct from OAUTH_HOST/OAUTH_PATH above, which point at the +# *component-credentials* wizard. +OAUTH_LOGIN_AUTHORIZE_PATH: str = "/oauth/authorize" +OAUTH_LOGIN_TOKEN_PATH: str = "/oauth/token" +# Public-client identifier. Must match the client registered per stack via +# `league:oauth2-server:create-client ... --public`. Overridable until the +# cross-stack registration name is finalized (and for fake-server tests). +DEFAULT_OAUTH_CLIENT_ID: str = "kbagent-cli" +ENV_OAUTH_CLIENT_ID: str = "KBAGENT_OAUTH_CLIENT_ID" +# Loopback callback (RFC 8252). The League server validates redirect URIs by +# EXACT match, so each `http://127.0.0.1:/callback` candidate below must +# be whitelisted on the registered client -- keep this tuple in sync with the +# registration request. +OAUTH_CALLBACK_PATH: str = "/callback" +OAUTH_CALLBACK_PORTS: tuple[int, ...] = (8765, 8766, 8767, 8768, 8769) +# How long `project login` waits for the user to finish the browser flow. +OAUTH_LOGIN_TIMEOUT_SECONDS: float = 300.0 +# Lifetime of the Storage token minted from the OAuth access token (mirrors +# the MCP server's ~2h mint), and how close to expiry we proactively refresh. +OAUTH_SAPI_TOKEN_LIFETIME_SECONDS: int = 7200 +OAUTH_REFRESH_MARGIN_SECONDS: int = 300 +OAUTH_SAPI_TOKEN_DESCRIPTION: str = "Created by kbagent OAuth login" + # --- Kai (Keboola AI Assistant) --- KAI_FEATURE_FLAG: str = "agent-chat" KAI_REQUEST_TIMEOUT: float = 300.0 # 5 min for non-streaming requests diff --git a/src/keboola_agent_cli/errors.py b/src/keboola_agent_cli/errors.py index ea5c9881..e5ae8838 100644 --- a/src/keboola_agent_cli/errors.py +++ b/src/keboola_agent_cli/errors.py @@ -18,6 +18,7 @@ class ErrorCode(StrEnum): PERMISSION_DENIED = "PERMISSION_DENIED" MISSING_MASTER_TOKEN = "MISSING_MASTER_TOKEN" UNAUTHORIZED = "UNAUTHORIZED" # Bearer-auth rejection by `kbagent serve` (0.40.0+) + OAUTH_ERROR = "OAUTH_ERROR" # `project login` / silent OAuth refresh failure (0.54.0+) # Network / transport TIMEOUT = "TIMEOUT" diff --git a/src/keboola_agent_cli/models.py b/src/keboola_agent_cli/models.py index 79679669..d239a471 100644 --- a/src/keboola_agent_cli/models.py +++ b/src/keboola_agent_cli/models.py @@ -45,6 +45,36 @@ def normalize_stack_url(value: str) -> str: return f"https://{parsed.netloc}" +class OAuthCredentials(BaseModel): + """OAuth session persisted for a project added via ``kbagent project login``. + + Only the refresh token is stored -- the short-lived OAuth access token is + used transiently to mint a Storage API token (kept in + ``ProjectConfig.token`` like any other project) and then discarded. The + refresh token has the same risk profile as a Storage token and lives + under the same config.json protections (0600, atomic write, flock). + + The Connection OAuth server ROTATES the refresh token on every refresh + (the old one is revoked), so this model is rewritten by the silent + refresh in ``oauth.ensure_fresh_oauth_token()``. + """ + + client_id: str = Field( + description="Public OAuth client id registered on the stack (no secret -- PKCE)" + ) + refresh_token: str = Field( + description="Long-lived OAuth refresh token (rotated on every refresh)" + ) + token_expires_at: float | None = Field( + default=None, + description=( + "Unix timestamp when the minted Storage token in ProjectConfig.token " + "expires; the silent refresh fires within OAUTH_REFRESH_MARGIN_SECONDS " + "of this. None means unknown -- treated as expired (refresh eagerly)." + ), + ) + + class ProjectConfig(BaseModel): """Configuration for a single Keboola project connection.""" @@ -78,6 +108,15 @@ class ProjectConfig(BaseModel): "token is never written to disk." ), ) + oauth: OAuthCredentials | None = Field( + default=None, + description=( + "OAuth session for projects added via `kbagent project login` " + "(browser PKCE flow). None for classic token-based projects. " + "When set, ProjectConfig.token holds a short-lived minted Storage " + "token that is silently refreshed at resolve time." + ), + ) @field_validator("stack_url") @classmethod diff --git a/src/keboola_agent_cli/oauth.py b/src/keboola_agent_cli/oauth.py new file mode 100644 index 00000000..a47fa7a0 --- /dev/null +++ b/src/keboola_agent_cli/oauth.py @@ -0,0 +1,593 @@ +"""OAuth 2.0 Authorization Code + PKCE flow against the Keboola Connection OAuth server. + +Implements browser-based project login (`kbagent project login`): + +- PKCE pair generation (RFC 7636, S256) -- public client, NO client_secret. +- Authorization URL building for ``https://connection./oauth/authorize``. +- Loopback callback server (RFC 8252) bound to 127.0.0.1 that receives the + authorization code redirect. +- Code -> token exchange and refresh-token rotation against ``/oauth/token``. +- Minting a short-lived Storage API token from the OAuth access token via + ``POST /v2/storage/tokens`` with ``Authorization: Bearer`` -- the same + pattern the Keboola MCP server uses in production. The minted token is what + every downstream kbagent client uses (Queue API and AI Service do not accept + Bearer tokens yet), so the rest of the CLI is untouched by OAuth. + +This module is LAYER 3 (HTTP + protocol) like ``http_base.py``: no Typer, no +Rich, no service imports. The login orchestration lives in +``services/oauth_login_service.py``; the silent-refresh chokepoint +(`ensure_fresh_oauth_token`) is called from ``BaseService.resolve_projects()``. + +Security notes: +- The OAuth refresh token is persisted in config.json under the existing + 0600 + atomic-write + flock protections (same risk class as Storage tokens). +- The OAuth access token is NEVER persisted -- it is used transiently to mint + a Storage token and discarded. With the refresh token we can always obtain + a fresh access token. +- The callback server binds 127.0.0.1 only, validates the CSRF ``state`` + parameter, and accepts exactly one code. +""" + +import base64 +import contextlib +import hashlib +import json +import logging +import os +import secrets +import threading +import time +from dataclasses import dataclass +from http.server import BaseHTTPRequestHandler, HTTPServer +from types import TracebackType +from urllib.parse import parse_qs, urlencode, urlparse + +import httpx + +from .constants import ( + DEFAULT_OAUTH_CLIENT_ID, + ENV_OAUTH_CLIENT_ID, + OAUTH_CALLBACK_PATH, + OAUTH_CALLBACK_PORTS, + OAUTH_LOGIN_AUTHORIZE_PATH, + OAUTH_LOGIN_TIMEOUT_SECONDS, + OAUTH_LOGIN_TOKEN_PATH, + OAUTH_REFRESH_MARGIN_SECONDS, + OAUTH_SAPI_TOKEN_DESCRIPTION, + OAUTH_SAPI_TOKEN_LIFETIME_SECONDS, +) +from .errors import ErrorCode, KeboolaApiError, mask_token + +logger = logging.getLogger(__name__) + +_HTTP_TIMEOUT = httpx.Timeout(30.0) + + +def resolve_oauth_client_id() -> str: + """Resolve the OAuth client_id for the CLI. + + The client must be registered in the stack's Connection OAuth server + (``league:oauth2-server:create-client ... --public``). Until the + registration name is finalized across stacks, the id can be overridden + via the ``KBAGENT_OAUTH_CLIENT_ID`` env var (also what the fake-server + tests use). + """ + return os.environ.get(ENV_OAUTH_CLIENT_ID, "").strip() or DEFAULT_OAUTH_CLIENT_ID + + +# ── PKCE (RFC 7636) ───────────────────────────────────────────────── + + +@dataclass(frozen=True) +class PkcePair: + """A PKCE code_verifier and its S256 code_challenge.""" + + verifier: str + challenge: str + + +def generate_pkce_pair() -> PkcePair: + """Generate a PKCE verifier/challenge pair (S256 method). + + The verifier is 43-128 chars of [A-Za-z0-9-._~] per RFC 7636; + ``secrets.token_urlsafe(64)`` yields ~86 chars from that alphabet. + The challenge is BASE64URL(SHA256(verifier)) without padding. + """ + verifier = secrets.token_urlsafe(64) + digest = hashlib.sha256(verifier.encode("ascii")).digest() + challenge = base64.urlsafe_b64encode(digest).rstrip(b"=").decode("ascii") + return PkcePair(verifier=verifier, challenge=challenge) + + +def generate_state() -> str: + """Generate a CSRF state nonce for the authorization request.""" + return secrets.token_urlsafe(32) + + +# ── Authorization URL ─────────────────────────────────────────────── + + +def build_authorize_url( + stack_url: str, + *, + client_id: str, + redirect_uri: str, + state: str, + code_challenge: str, +) -> str: + """Build the ``/oauth/authorize`` URL the browser is sent to. + + No ``scope`` is sent -- the Keboola OAuth server applies its own default + scope (mirrors the MCP server's authorize request). The user logs in and + selects the project on the Connection consent screen; the issued tokens + are tied to that selection. + """ + params = { + "client_id": client_id, + "response_type": "code", + "redirect_uri": redirect_uri, + "state": state, + "code_challenge": code_challenge, + "code_challenge_method": "S256", + } + return f"{stack_url.rstrip('/')}{OAUTH_LOGIN_AUTHORIZE_PATH}?{urlencode(params)}" + + +# ── Token endpoint (exchange + refresh) ───────────────────────────── + + +@dataclass(frozen=True) +class OAuthTokens: + """Response from the OAuth token endpoint.""" + + access_token: str + refresh_token: str + expires_in: int + + +def _post_token_request(stack_url: str, data: dict[str, str], operation: str) -> OAuthTokens: + """POST to ``/oauth/token`` and parse the token response. + + Shared by code exchange and refresh. Raises ``KeboolaApiError`` with + ``ErrorCode.OAUTH_ERROR`` on any failure; never echoes token material + into error messages. + """ + url = f"{stack_url.rstrip('/')}{OAUTH_LOGIN_TOKEN_PATH}" + try: + response = httpx.post( + url, + data=data, + headers={"Accept": "application/json"}, + timeout=_HTTP_TIMEOUT, + follow_redirects=True, + ) + except httpx.TimeoutException as exc: + raise KeboolaApiError( + f"OAuth {operation} timed out against {url}", + error_code=ErrorCode.TIMEOUT, + retryable=True, + ) from exc + except httpx.HTTPError as exc: + raise KeboolaApiError( + f"OAuth {operation} failed: cannot reach {url}: {exc}", + error_code=ErrorCode.CONNECTION_ERROR, + retryable=True, + ) from exc + + try: + payload = response.json() + except (json.JSONDecodeError, ValueError): + payload = {} + + if response.status_code != 200 or "error" in payload: + # League returns RFC 6749 error JSON ({"error": ..., "error_description"/"hint": ...}). + detail = ( + payload.get("error_description") + or payload.get("hint") + or payload.get("message") + or payload.get("error") + or f"HTTP {response.status_code}" + ) + raise KeboolaApiError( + f"OAuth {operation} rejected by {url}: {detail}", + error_code=ErrorCode.OAUTH_ERROR, + status_code=response.status_code, + ) + + access_token = payload.get("access_token") or "" + refresh_token = payload.get("refresh_token") or "" + if not access_token or not refresh_token: + raise KeboolaApiError( + f"OAuth {operation} response from {url} is missing access_token/refresh_token fields.", + error_code=ErrorCode.OAUTH_ERROR, + ) + return OAuthTokens( + access_token=access_token, + refresh_token=refresh_token, + expires_in=int(payload.get("expires_in") or 3600), + ) + + +def exchange_code( + stack_url: str, + *, + client_id: str, + code: str, + code_verifier: str, + redirect_uri: str, +) -> OAuthTokens: + """Exchange an authorization code for access + refresh tokens. + + Public client: ``code_verifier`` proves possession (PKCE), no secret. + The Keboola OAuth server requires ``redirect_uri`` in the exchange. + """ + return _post_token_request( + stack_url, + { + "client_id": client_id, + "grant_type": "authorization_code", + "code": code, + "code_verifier": code_verifier, + "redirect_uri": redirect_uri, + }, + operation="code exchange", + ) + + +def refresh_oauth_tokens(stack_url: str, *, client_id: str, refresh_token: str) -> OAuthTokens: + """Rotate the refresh token for a fresh access + refresh token pair. + + The League OAuth server issues a NEW refresh token on every refresh and + revokes the old one -- callers MUST persist the returned refresh token + immediately or the session is lost. + """ + return _post_token_request( + stack_url, + { + "client_id": client_id, + "grant_type": "refresh_token", + "refresh_token": refresh_token, + }, + operation="token refresh", + ) + + +# ── Storage token minting ─────────────────────────────────────────── + + +def mint_storage_token( + stack_url: str, + *, + access_token: str, + expires_in: int = OAUTH_SAPI_TOKEN_LIFETIME_SECONDS, + description: str = OAUTH_SAPI_TOKEN_DESCRIPTION, +) -> str: + """Create a short-lived Storage API token using the OAuth access token. + + ``POST /v2/storage/tokens`` accepts ``Authorization: Bearer + `` (verified against the production MCP server + implementation). The minted token carries the same capability flags + kbagent's `project refresh` uses via the Manage API, so every existing + command keeps working unchanged. + """ + url = f"{stack_url.rstrip('/')}/v2/storage/tokens" + try: + response = httpx.post( + url, + json={ + "description": description, + "expiresIn": expires_in, + "canManageBuckets": True, + "canReadAllFileUploads": True, + "canReadAllProjectEvents": True, + "canManageDevBranches": True, + "canManageTokens": True, + }, + headers={ + "Accept": "application/json", + "Authorization": f"Bearer {access_token}", + }, + timeout=_HTTP_TIMEOUT, + ) + except httpx.TimeoutException as exc: + raise KeboolaApiError( + f"Storage token creation timed out against {url}", + error_code=ErrorCode.TIMEOUT, + retryable=True, + ) from exc + except httpx.HTTPError as exc: + raise KeboolaApiError( + f"Storage token creation failed: cannot reach {url}: {exc}", + error_code=ErrorCode.CONNECTION_ERROR, + retryable=True, + ) from exc + + if response.status_code != 200: + raise KeboolaApiError( + f"Storage token creation rejected ({response.status_code}) by {url}. " + "The OAuth access token may lack project access.", + error_code=ErrorCode.OAUTH_ERROR, + status_code=response.status_code, + ) + token = response.json().get("token") or "" + if not token: + raise KeboolaApiError( + f"Storage token response from {url} is missing the 'token' field.", + error_code=ErrorCode.OAUTH_ERROR, + ) + return token + + +# ── Loopback callback server (RFC 8252) ───────────────────────────── + + +@dataclass(frozen=True) +class CallbackResult: + """Outcome of the browser redirect to the loopback callback.""" + + code: str = "" + state: str = "" + error: str = "" + + +_SUCCESS_PAGE = ( + "kbagent login' + "

✓ Login complete

" + "

You can close this tab and return to your terminal.

" + "" +) +_ERROR_PAGE = ( + "kbagent login' + "

✗ Login failed

" + "

{reason}

Return to your terminal for details.

" + "" +) + + +class _CallbackHandler(BaseHTTPRequestHandler): + """Captures the single OAuth redirect; everything else is a 404.""" + + server: "OAuthCallbackServer" # narrowed for type-checkers + + def do_GET(self) -> None: + parsed = urlparse(self.path) + if parsed.path != OAUTH_CALLBACK_PATH: + self._respond(404, _ERROR_PAGE.format(reason="Unknown callback path.")) + return + + params = parse_qs(parsed.query) + error = (params.get("error") or [""])[0] + code = (params.get("code") or [""])[0] + state = (params.get("state") or [""])[0] + + if error: + description = (params.get("error_description") or [error])[0] + self.server.deliver(CallbackResult(error=description)) + self._respond(200, _ERROR_PAGE.format(reason=description)) + return + if not code or not state: + self.server.deliver( + CallbackResult(error="Callback is missing 'code' or 'state' parameter.") + ) + self._respond(400, _ERROR_PAGE.format(reason="Missing code/state parameter.")) + return + + self.server.deliver(CallbackResult(code=code, state=state)) + self._respond(200, _SUCCESS_PAGE) + + def _respond(self, status: int, body: str) -> None: + payload = body.encode("utf-8") + self.send_response(status) + self.send_header("Content-Type", "text/html; charset=utf-8") + self.send_header("Content-Length", str(len(payload))) + self.send_header("Cache-Control", "no-store") + self.end_headers() + self.wfile.write(payload) + + def log_message(self, format: str, *args: object) -> None: + # Route http.server's stderr chatter to the debug logger; the query + # string contains the authorization code, so never log it verbatim. + logger.debug("OAuth callback request: %s", self.path.split("?")[0]) + + +class OAuthCallbackServer(HTTPServer): + """Single-use loopback HTTP server that waits for the OAuth redirect. + + Binds 127.0.0.1 on the first free port from ``ports`` (the registered + client must whitelist ``http://127.0.0.1:/callback`` for each). + Use as a context manager; call :meth:`wait_for_code` after opening the + browser. + """ + + def __init__(self, ports: tuple[int, ...] = OAUTH_CALLBACK_PORTS) -> None: + self._result: CallbackResult | None = None + self._received = threading.Event() + bound_error: OSError | None = None + for port in ports: + try: + super().__init__(("127.0.0.1", port), _CallbackHandler) + bound_error = None + break + except OSError as exc: + bound_error = exc + if bound_error is not None: + raise KeboolaApiError( + f"No free OAuth callback port among {ports}. " + "Close the process occupying them or pass --port.", + error_code=ErrorCode.OAUTH_ERROR, + ) + self._thread = threading.Thread(target=self.serve_forever, daemon=True) + self._thread.start() + + @property + def redirect_uri(self) -> str: + """The redirect URI this server listens on.""" + return f"http://127.0.0.1:{self.server_address[1]}{OAUTH_CALLBACK_PATH}" + + def deliver(self, result: CallbackResult) -> None: + """Record the first callback result and wake the waiter (later ones ignored).""" + if self._result is None: + self._result = result + self._received.set() + + def wait_for_code( + self, + expected_state: str, + timeout: float = OAUTH_LOGIN_TIMEOUT_SECONDS, + ) -> str: + """Block until the browser redirect arrives; return the authorization code. + + Raises: + KeboolaApiError: On timeout, an OAuth error redirect (e.g. the + user denied consent), or a CSRF state mismatch. + """ + if not self._received.wait(timeout): + raise KeboolaApiError( + f"Timed out after {int(timeout)}s waiting for the browser login. " + "Re-run `kbagent project login` and complete the login in the browser.", + error_code=ErrorCode.OAUTH_ERROR, + ) + result = self._result or CallbackResult(error="No callback received.") + if result.error: + raise KeboolaApiError( + f"OAuth login failed in the browser: {result.error}", + error_code=ErrorCode.OAUTH_ERROR, + ) + if not secrets.compare_digest(result.state, expected_state): + raise KeboolaApiError( + "OAuth state mismatch on the callback (possible CSRF or a " + "stale browser tab). Re-run `kbagent project login`.", + error_code=ErrorCode.OAUTH_ERROR, + ) + return result.code + + def close(self) -> None: + """Stop the server thread and release the port.""" + self.shutdown() + self.server_close() + self._thread.join(timeout=5) + + def __enter__(self) -> "OAuthCallbackServer": + return self + + def __exit__( + self, + exc_type: type[BaseException] | None, + exc_val: BaseException | None, + exc_tb: TracebackType | None, + ) -> None: + self.close() + + +# ── Silent refresh chokepoint ─────────────────────────────────────── + +# fcntl is POSIX-only (mirrors config_store.py); on Windows we skip locking. +try: + import fcntl + + _HAS_FCNTL = True +except ImportError: + _HAS_FCNTL = False + + +@contextlib.contextmanager +def _refresh_lock(lock_path: str): + """Inter-process lock serializing refresh-token rotation. + + The League OAuth server REVOKES the old refresh token on rotation, so two + kbagent processes refreshing concurrently would race: the loser persists + a revoked token and the session dies. An exclusive flock on a sidecar + file in the config dir serializes them; the winner persists the rotated + token and the loser re-reads config and sees the fresh one. + """ + fd = os.open(lock_path, os.O_RDWR | os.O_CREAT, 0o600) + try: + if _HAS_FCNTL: + with contextlib.suppress(OSError): + fcntl.flock(fd, fcntl.LOCK_EX) + yield + finally: + if _HAS_FCNTL: + with contextlib.suppress(OSError): + fcntl.flock(fd, fcntl.LOCK_UN) + os.close(fd) + + +def ensure_fresh_oauth_token(config_store, alias: str, project): + """Return ``project`` with a valid Storage token, refreshing if needed. + + No-op (and zero network calls) unless the project was added via + ``kbagent project login`` AND its minted Storage token is expired or + inside the refresh margin. Called from + ``BaseService.resolve_projects()`` so every service, the MCP subprocess + env, and the `kbagent serve` routers all receive a fresh token without + any per-call-site changes. + + On refresh failure the stale project is returned unchanged with a + warning logged -- a single stale OAuth project must not torpedo a + multi-project fan-out; the downstream 401 is reported per-project via + the existing error-accumulation machinery. + + Args: + config_store: ConfigStore for persisting the rotated credentials. + project: ProjectConfig (typed loosely to avoid an import cycle with + models via services.base). + + Returns: + The fresh ProjectConfig (the same instance when no refresh was needed). + """ + from .models import OAuthCredentials # local import: avoid models<->oauth cycle risk + + oauth = getattr(project, "oauth", None) + # isinstance (not a None check) so mock ProjectConfig objects used across + # the test suite never trip the refresh path. + if not isinstance(oauth, OAuthCredentials): + return project + expires_at = oauth.token_expires_at + if expires_at is not None and expires_at - time.time() > OAUTH_REFRESH_MARGIN_SECONDS: + return project + + lock_path = str(config_store.config_dir / ".oauth-refresh.lock") + with _refresh_lock(lock_path): + # Another process may have rotated the credentials while we waited + # on the lock -- re-read and bail out if the token is fresh now. + latest = config_store.get_project(alias) + if latest is not None and latest.oauth is not None: + latest_expiry = latest.oauth.token_expires_at + if ( + latest_expiry is not None + and latest_expiry - time.time() > OAUTH_REFRESH_MARGIN_SECONDS + ): + return latest + oauth = latest.oauth + project = latest + + logger.debug("Refreshing OAuth session for project '%s'", alias) + try: + tokens = refresh_oauth_tokens( + project.stack_url, + client_id=oauth.client_id, + refresh_token=oauth.refresh_token, + ) + sapi_token = mint_storage_token(project.stack_url, access_token=tokens.access_token) + except KeboolaApiError as exc: + logger.warning( + "OAuth refresh failed for project '%s' (%s): %s -- run " + "`kbagent project login --url %s` to re-authenticate.", + alias, + mask_token(oauth.refresh_token), + exc.message, + project.stack_url, + ) + return project + + new_creds = OAuthCredentials( + client_id=oauth.client_id, + refresh_token=tokens.refresh_token, + token_expires_at=time.time() + OAUTH_SAPI_TOKEN_LIFETIME_SECONDS, + ) + config_store.edit_project(alias, token=sapi_token, oauth=new_creds) + updated = config_store.get_project(alias) + return updated if updated is not None else project diff --git a/src/keboola_agent_cli/permissions.py b/src/keboola_agent_cli/permissions.py index 530ca6c1..7fe8e903 100644 --- a/src/keboola_agent_cli/permissions.py +++ b/src/keboola_agent_cli/permissions.py @@ -15,6 +15,7 @@ OPERATION_REGISTRY: dict[str, str] = { # Project management "project.add": "admin", + "project.login": "admin", "project.list": "read", "project.remove": "admin", "project.edit": "admin", diff --git a/src/keboola_agent_cli/services/base.py b/src/keboola_agent_cli/services/base.py index 6cd648d9..e748b2b3 100644 --- a/src/keboola_agent_cli/services/base.py +++ b/src/keboola_agent_cli/services/base.py @@ -14,7 +14,7 @@ from ..config_store import ConfigStore from ..constants import ENV_MAX_PARALLEL_WORKERS, UNEXPECTED_ERROR_MAX_MESSAGE_LEN from ..errors import ConfigError -from ..models import ProjectConfig +from ..models import OAuthCredentials, ProjectConfig logger = logging.getLogger(__name__) @@ -68,6 +68,13 @@ def __init__( def resolve_projects(self, aliases: list[str] | None = None) -> dict[str, ProjectConfig]: """Resolve project aliases to ProjectConfig instances. + Projects added via ``kbagent project login`` (OAuth) get their minted + Storage token silently refreshed here when it is expired or near + expiry -- this is the single chokepoint through which every service, + the MCP subprocess env, and the `kbagent serve` routers receive + tokens, so no per-call-site changes are needed. Classic token + projects pass through untouched (zero network calls). + Args: aliases: Specific project aliases. If None or empty, returns all. @@ -80,15 +87,31 @@ def resolve_projects(self, aliases: list[str] | None = None) -> dict[str, Projec config = self._config_store.load() if not aliases: - return dict(config.projects) - - resolved: dict[str, ProjectConfig] = {} - for alias in aliases: - if alias not in config.projects: - raise ConfigError(f"Project '{alias}' not found.") - resolved[alias] = config.projects[alias] + resolved = dict(config.projects) + else: + resolved = {} + for alias in aliases: + if alias not in config.projects: + raise ConfigError(f"Project '{alias}' not found.") + resolved[alias] = config.projects[alias] + + return { + alias: self._ensure_fresh_token(alias, project) for alias, project in resolved.items() + } + + def _ensure_fresh_token(self, alias: str, project: ProjectConfig) -> ProjectConfig: + """Silently refresh an OAuth project's minted Storage token if stale. + + Lazy import keeps the OAuth/httpx machinery off the hot path for the + common case (no OAuth projects configured). The isinstance gate (not + a None check) keeps mock ProjectConfig objects used across the test + suite from tripping the refresh path. + """ + if not isinstance(getattr(project, "oauth", None), OAuthCredentials): + return project + from ..oauth import ensure_fresh_oauth_token - return resolved + return ensure_fresh_oauth_token(self._config_store, alias, project) def _resolve_max_workers(self) -> int: """Resolve max parallel workers: env var > config.json > default (10). diff --git a/src/keboola_agent_cli/services/oauth_login_service.py b/src/keboola_agent_cli/services/oauth_login_service.py new file mode 100644 index 00000000..c3ead162 --- /dev/null +++ b/src/keboola_agent_cli/services/oauth_login_service.py @@ -0,0 +1,254 @@ +"""Business logic for `kbagent project login` (browser OAuth + PKCE). + +Orchestrates the interactive login: loopback callback server, browser +hand-off, code exchange, Storage-token minting, token verification, and +config persistence. The protocol-level pieces live in ``oauth.py``; the +silent refresh of an already-logged-in project happens in +``BaseService.resolve_projects()`` via ``oauth.ensure_fresh_oauth_token``. +""" + +import logging +import time +import webbrowser +from collections.abc import Callable +from dataclasses import dataclass +from typing import Any + +from ..constants import ( + OAUTH_CALLBACK_PORTS, + OAUTH_LOGIN_TIMEOUT_SECONDS, + OAUTH_SAPI_TOKEN_LIFETIME_SECONDS, +) +from ..errors import ConfigError, mask_token +from ..models import OAuthCredentials, ProjectConfig, normalize_stack_url +from ..oauth import ( + OAuthCallbackServer, + build_authorize_url, + exchange_code, + generate_pkce_pair, + generate_state, + mint_storage_token, + resolve_oauth_client_id, +) +from .base import BaseService +from .org_service import slugify + +logger = logging.getLogger(__name__) + + +@dataclass(frozen=True) +class LoginOutcome: + """Result of a completed browser login.""" + + alias: str + project_name: str + project_id: int | None + stack_url: str + masked_token: str + token_expires_at: float + re_authenticated: bool + + def as_dict(self) -> dict[str, Any]: + """Shape consumed by OutputFormatter (JSON mode emits this verbatim).""" + return { + "alias": self.alias, + "project_name": self.project_name, + "project_id": self.project_id, + "stack_url": self.stack_url, + "token": self.masked_token, + "token_expires_at": self.token_expires_at, + "re_authenticated": self.re_authenticated, + "auth_type": "oauth", + } + + +class OAuthLoginService(BaseService): + """Browser-based OAuth login for Keboola projects. + + Inherits config_store + client_factory injection from BaseService; + the client_factory is used to verify the freshly minted Storage token + and read the project identity (name/id/org) for persistence. + """ + + def login( + self, + stack_url: str, + *, + alias: str | None = None, + port: int | None = None, + open_browser: bool = True, + timeout: float = OAUTH_LOGIN_TIMEOUT_SECONDS, + on_authorize_url: Callable[[str], None] | None = None, + ) -> dict[str, Any]: + """Run the full Authorization Code + PKCE login flow. + + Blocks until the user completes (or abandons) the browser login. + + Args: + stack_url: Keboola stack URL (bare host / base URL / deep-link + are all accepted, same forgiveness as `project add`). + alias: Alias to register the project under. Defaults to the + slugified project name reported by the stack. Re-login into + a project already registered for this stack UPDATES that + entry instead of creating a duplicate. + port: Explicit loopback callback port. Defaults to the first + free port from OAUTH_CALLBACK_PORTS (all of which must be + whitelisted on the registered OAuth client). + open_browser: When False, skip launching the browser -- the + user opens the authorize URL manually (SSH/headless-with- + local-browser scenarios). + timeout: Seconds to wait for the browser redirect. + on_authorize_url: Callback invoked with the authorize URL right + before waiting, so the command layer can display it. + + Returns: + ``LoginOutcome.as_dict()`` payload. + + Raises: + KeboolaApiError: OAuth/token-mint/verification failures + (ErrorCode.OAUTH_ERROR for protocol failures). + ConfigError: Alias collision with a different project. + """ + stack_url = normalize_stack_url(stack_url) + client_id = resolve_oauth_client_id() + pkce = generate_pkce_pair() + state = generate_state() + candidate_ports = (port,) if port is not None else OAUTH_CALLBACK_PORTS + + with OAuthCallbackServer(candidate_ports) as server: + authorize_url = build_authorize_url( + stack_url, + client_id=client_id, + redirect_uri=server.redirect_uri, + state=state, + code_challenge=pkce.challenge, + ) + if on_authorize_url is not None: + on_authorize_url(authorize_url) + if open_browser: + # Best-effort: a False return (no browser available) is fine, + # the URL was already printed via on_authorize_url. + webbrowser.open(authorize_url) + code = server.wait_for_code(state, timeout) + redirect_uri = server.redirect_uri + + tokens = exchange_code( + stack_url, + client_id=client_id, + code=code, + code_verifier=pkce.verifier, + redirect_uri=redirect_uri, + ) + sapi_token = mint_storage_token(stack_url, access_token=tokens.access_token) + token_expires_at = time.time() + OAUTH_SAPI_TOKEN_LIFETIME_SECONDS + + # Verify the minted token and learn which project the user selected + # on the Connection consent screen. + client = self._client_factory(stack_url, sapi_token) + try: + token_info = client.verify_token() + finally: + client.close() + + credentials = OAuthCredentials( + client_id=client_id, + refresh_token=tokens.refresh_token, + token_expires_at=token_expires_at, + ) + outcome = self._persist( + alias=alias, + stack_url=stack_url, + sapi_token=sapi_token, + credentials=credentials, + token_info=token_info, + ) + return outcome.as_dict() + + def _persist( + self, + *, + alias: str | None, + stack_url: str, + sapi_token: str, + credentials: OAuthCredentials, + token_info: Any, + ) -> LoginOutcome: + """Save (or update on re-login) the project entry. + + Idempotency: logging into a project that is already registered for + the same stack updates that entry in place (rotated refresh token, + fresh minted token) -- a second login must never error with + "alias already exists" or create a duplicate. + """ + config = self._config_store.load() + + # Re-login detection: same stack + same project id -> update in place. + existing_alias = next( + ( + existing + for existing, proj in config.projects.items() + if not proj.ephemeral + and proj.stack_url == stack_url + and proj.project_id is not None + and proj.project_id == token_info.project_id + ), + None, + ) + + project = ProjectConfig( + stack_url=stack_url, + token=sapi_token, + project_name=token_info.project_name, + project_id=token_info.project_id, + org_id=token_info.org_id, + org_name=token_info.org_name, + oauth=credentials, + ) + + if existing_alias is not None and (alias is None or alias == existing_alias): + # Preserve fields login does not own (e.g. active_branch_id). + project.active_branch_id = config.projects[existing_alias].active_branch_id + self._config_store.edit_project( + existing_alias, + token=sapi_token, + oauth=credentials, + project_name=token_info.project_name, + project_id=token_info.project_id, + org_id=token_info.org_id, + org_name=token_info.org_name, + ) + chosen_alias = existing_alias + re_authenticated = True + else: + chosen_alias = alias or slugify(token_info.project_name) + collision = config.projects.get(chosen_alias) + if collision is not None and ( + collision.stack_url != stack_url or collision.project_id != token_info.project_id + ): + raise ConfigError( + f"Alias '{chosen_alias}' already points to a different project " + f"({collision.project_name or collision.stack_url}). Pass " + "--project to register this login separately." + ) + if collision is not None: + self._config_store.edit_project(chosen_alias, token=sapi_token, oauth=credentials) + re_authenticated = True + else: + self._config_store.add_project(chosen_alias, project) + re_authenticated = False + + logger.debug( + "OAuth login persisted for project '%s' (id=%s, token=%s)", + chosen_alias, + token_info.project_id, + mask_token(sapi_token), + ) + return LoginOutcome( + alias=chosen_alias, + project_name=token_info.project_name, + project_id=token_info.project_id, + stack_url=stack_url, + masked_token=mask_token(sapi_token), + token_expires_at=credentials.token_expires_at or 0.0, + re_authenticated=re_authenticated, + ) diff --git a/tests/test_e2e.py b/tests/test_e2e.py index fca70ba1..56dde1fb 100644 --- a/tests/test_e2e.py +++ b/tests/test_e2e.py @@ -49,6 +49,7 @@ import csv import json import os +import re import shutil import subprocess import time @@ -11097,3 +11098,51 @@ def test_headless_requires_opt_in_flag(self) -> None: result = _invoke(self.config_dir, ["--json", "project", "list"]) data = _json_ok(result) assert data["data"] == [], data + + +@pytest.mark.e2e +class TestE2EOAuthLogin: + """E2E for `kbagent project login` (browser OAuth + PKCE). + + The real-stack flow needs a PUBLIC OAuth client registered in the + stack's Connection OAuth server (`league:oauth2-server:create-client + ... --public` with the loopback redirect URIs whitelisted) AND a human + in front of a browser -- neither is available in CI. Until the client + registration lands, this suite covers the full protocol against the + in-repo fake Connection server (see tests/test_oauth.py + TestFullProtocolRoundTrip for the real-HTTP PKCE round-trip) and + asserts the --help flags here. + + To run the interactive real-stack check manually once the client exists: + + KBAGENT_OAUTH_CLIENT_ID= E2E_OAUTH_INTERACTIVE=1 \ + uv run pytest tests/test_e2e.py::TestE2EOAuthLogin -v -s + """ + + def test_login_help_flags(self, tmp_path: Path) -> None: + _step("OAUTH-1", "project login --help exposes the documented flags") + result = _invoke(tmp_path, ["project", "login", "--help"]) + assert result.exit_code == 0 + # CI renders help with ANSI colors; Rich then emits `--url` as + # `--url`, so the literal flag never appears in the raw bytes. + plain_output = re.sub(r"\x1b\[[0-9;]*m", "", result.output) + for flag in ("--url", "--project", "--port", "--no-browser", "--timeout"): + assert flag in plain_output, f"missing {flag} in --help" + + @pytest.mark.skipif( + not os.environ.get("E2E_OAUTH_INTERACTIVE"), + reason=( + "Interactive browser login: needs the registered public OAuth " + "client (KBAGENT_OAUTH_CLIENT_ID) and a human at a browser. " + "Set E2E_OAUTH_INTERACTIVE=1 to run." + ), + ) + def test_interactive_browser_login(self, tmp_path: Path) -> None: + _step("OAUTH-2", "full browser login against the real stack") + url = os.environ.get(ENV_URL, "connection.keboola.com") + result = _invoke(tmp_path, ["--json", "project", "login", "--url", url]) + data = _json_ok(result) + assert data["data"]["auth_type"] == "oauth" + # The minted token must verify against the real API. + status = _invoke(tmp_path, ["--json", "project", "status"]) + assert _json_ok(status) diff --git a/tests/test_oauth.py b/tests/test_oauth.py new file mode 100644 index 00000000..9983dbb6 --- /dev/null +++ b/tests/test_oauth.py @@ -0,0 +1,826 @@ +"""Tests for the OAuth project login flow (oauth.py + OAuthLoginService). + +Layers covered: +- PKCE pair generation and authorize-URL building (pure unit tests). +- Loopback callback server (real HTTP against 127.0.0.1). +- Token exchange / refresh / Storage-token minting (pytest-httpx mocks). +- Silent refresh chokepoint ``ensure_fresh_oauth_token`` incl. rotation + persistence and graceful degradation on failure. +- Full protocol round-trip against a fake Connection OAuth server that + enforces PKCE (real HTTP, no mocks) -- the closest local approximation of + the real stack until the public OAuth client is registered. +""" + +import base64 +import hashlib +import json +import threading +import time +import urllib.error +import urllib.parse +import urllib.request +from http.server import BaseHTTPRequestHandler, HTTPServer +from pathlib import Path +from unittest.mock import MagicMock, patch + +import pytest + +from keboola_agent_cli.config_store import ConfigStore +from keboola_agent_cli.errors import ErrorCode, KeboolaApiError +from keboola_agent_cli.models import OAuthCredentials, ProjectConfig, TokenVerifyResponse +from keboola_agent_cli.oauth import ( + OAuthCallbackServer, + build_authorize_url, + ensure_fresh_oauth_token, + exchange_code, + generate_pkce_pair, + generate_state, + mint_storage_token, + refresh_oauth_tokens, +) +from keboola_agent_cli.services.base import BaseService +from keboola_agent_cli.services.oauth_login_service import OAuthLoginService + +STACK_URL = "https://connection.test.keboola.com" +TEST_SAPI_TOKEN = "901-55555-fakeMintedTokenXXXXXXXXXXXXX" + + +def _b64url_sha256(value: str) -> str: + return ( + base64.urlsafe_b64encode(hashlib.sha256(value.encode("ascii")).digest()) + .rstrip(b"=") + .decode("ascii") + ) + + +# ── PKCE + authorize URL ──────────────────────────────────────────── + + +class TestPkce: + def test_verifier_charset_and_length(self) -> None: + pair = generate_pkce_pair() + assert 43 <= len(pair.verifier) <= 128 + allowed = set("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~") + assert set(pair.verifier) <= allowed + + def test_challenge_is_s256_of_verifier(self) -> None: + pair = generate_pkce_pair() + assert pair.challenge == _b64url_sha256(pair.verifier) + assert "=" not in pair.challenge # no base64 padding + + def test_pairs_are_unique(self) -> None: + assert generate_pkce_pair().verifier != generate_pkce_pair().verifier + assert generate_state() != generate_state() + + def test_authorize_url_params(self) -> None: + url = build_authorize_url( + STACK_URL + "/", # trailing slash must not double up + client_id="kbagent-cli", + redirect_uri="http://127.0.0.1:8765/callback", + state="state-123", + code_challenge="challenge-456", + ) + parsed = urllib.parse.urlparse(url) + assert parsed.scheme == "https" + assert parsed.path == "/oauth/authorize" + params = dict(urllib.parse.parse_qsl(parsed.query)) + assert params == { + "client_id": "kbagent-cli", + "response_type": "code", + "redirect_uri": "http://127.0.0.1:8765/callback", + "state": "state-123", + "code_challenge": "challenge-456", + "code_challenge_method": "S256", + } + # Public client: never a secret; server-default scope: never a scope. + assert "client_secret" not in params + assert "scope" not in params + + +# ── Loopback callback server ──────────────────────────────────────── + + +def _browser_get(url: str) -> int: + """Simulate the browser redirect with stdlib urllib (NOT httpx, so + pytest-httpx interception never swallows it).""" + with urllib.request.urlopen(url, timeout=5) as response: + return response.status + + +class TestCallbackServer: + def test_receives_code(self) -> None: + with OAuthCallbackServer(ports=(0,)) as server: # port 0 = ephemeral + status = _browser_get(f"{server.redirect_uri}?code=abc123&state=st1") + assert status == 200 + assert server.wait_for_code("st1", timeout=5) == "abc123" + + def test_state_mismatch_raises(self) -> None: + with OAuthCallbackServer(ports=(0,)) as server: + _browser_get(f"{server.redirect_uri}?code=abc123&state=WRONG") + with pytest.raises(KeboolaApiError) as exc_info: + server.wait_for_code("st1", timeout=5) + assert exc_info.value.error_code == ErrorCode.OAUTH_ERROR + assert "state mismatch" in exc_info.value.message.lower() + + def test_error_redirect_raises(self) -> None: + with OAuthCallbackServer(ports=(0,)) as server: + _browser_get( + f"{server.redirect_uri}?error=access_denied&error_description=User+denied+consent" + ) + with pytest.raises(KeboolaApiError) as exc_info: + server.wait_for_code("st1", timeout=5) + assert "User denied consent" in exc_info.value.message + + def test_timeout_raises(self) -> None: + with OAuthCallbackServer(ports=(0,)) as server: + with pytest.raises(KeboolaApiError) as exc_info: + server.wait_for_code("st1", timeout=0.1) + assert "Timed out" in exc_info.value.message + + def test_unknown_path_is_404_and_does_not_consume_the_wait(self) -> None: + with OAuthCallbackServer(ports=(0,)) as server: + base = server.redirect_uri.rsplit("/callback", 1)[0] + with pytest.raises(urllib.error.HTTPError) as http_err: + _browser_get(f"{base}/favicon.ico") + assert http_err.value.code == 404 + _browser_get(f"{server.redirect_uri}?code=late&state=st1") + assert server.wait_for_code("st1", timeout=5) == "late" + + def test_port_fallback_when_first_port_taken(self) -> None: + with OAuthCallbackServer(ports=(0,)) as first: + taken_port = first.server_address[1] + with OAuthCallbackServer(ports=(taken_port, 0)) as second: + assert second.server_address[1] != taken_port + + def test_no_free_port_raises(self) -> None: + with OAuthCallbackServer(ports=(0,)) as first: + taken_port = first.server_address[1] + with pytest.raises(KeboolaApiError) as exc_info: + OAuthCallbackServer(ports=(taken_port,)) + assert "callback port" in exc_info.value.message + + +# ── Token endpoint (mocked HTTP) ──────────────────────────────────── + + +def _token_response(access: str = "at-1", refresh: str = "rt-1") -> dict: + return { + "token_type": "Bearer", + "expires_in": 3600, + "access_token": access, + "refresh_token": refresh, + } + + +class TestTokenEndpoint: + def test_exchange_code_success_posts_pkce_form(self, httpx_mock) -> None: + httpx_mock.add_response( + url=f"{STACK_URL}/oauth/token", + method="POST", + json=_token_response(), + ) + tokens = exchange_code( + STACK_URL, + client_id="kbagent-cli", + code="auth-code-1", + code_verifier="verifier-1", + redirect_uri="http://127.0.0.1:8765/callback", + ) + assert tokens.access_token == "at-1" + assert tokens.refresh_token == "rt-1" + assert tokens.expires_in == 3600 + + form = dict(urllib.parse.parse_qsl(httpx_mock.get_requests()[0].read().decode())) + assert form["grant_type"] == "authorization_code" + assert form["code"] == "auth-code-1" + assert form["code_verifier"] == "verifier-1" + assert form["redirect_uri"] == "http://127.0.0.1:8765/callback" + assert "client_secret" not in form # public client, PKCE only + + def test_exchange_error_payload_raises_oauth_error(self, httpx_mock) -> None: + httpx_mock.add_response( + url=f"{STACK_URL}/oauth/token", + method="POST", + status_code=400, + json={"error": "invalid_grant", "error_description": "Code expired"}, + ) + with pytest.raises(KeboolaApiError) as exc_info: + exchange_code( + STACK_URL, + client_id="kbagent-cli", + code="stale", + code_verifier="v", + redirect_uri="http://127.0.0.1:8765/callback", + ) + assert exc_info.value.error_code == ErrorCode.OAUTH_ERROR + assert "Code expired" in exc_info.value.message + + def test_missing_refresh_token_raises(self, httpx_mock) -> None: + httpx_mock.add_response( + url=f"{STACK_URL}/oauth/token", + method="POST", + json={"access_token": "at", "expires_in": 3600}, + ) + with pytest.raises(KeboolaApiError) as exc_info: + refresh_oauth_tokens(STACK_URL, client_id="c", refresh_token="rt") + assert "missing" in exc_info.value.message + + def test_refresh_posts_refresh_grant(self, httpx_mock) -> None: + httpx_mock.add_response( + url=f"{STACK_URL}/oauth/token", + method="POST", + json=_token_response(access="at-2", refresh="rt-2"), + ) + tokens = refresh_oauth_tokens(STACK_URL, client_id="kbagent-cli", refresh_token="rt-1") + assert tokens.refresh_token == "rt-2" + form = dict(urllib.parse.parse_qsl(httpx_mock.get_requests()[0].read().decode())) + assert form == { + "client_id": "kbagent-cli", + "grant_type": "refresh_token", + "refresh_token": "rt-1", + } + + +class TestMintStorageToken: + def test_success_sends_bearer_and_capability_flags(self, httpx_mock) -> None: + httpx_mock.add_response( + url=f"{STACK_URL}/v2/storage/tokens", + method="POST", + json={"token": TEST_SAPI_TOKEN}, + ) + token = mint_storage_token(STACK_URL, access_token="at-1") + assert token == TEST_SAPI_TOKEN + + request = httpx_mock.get_requests()[0] + assert request.headers["Authorization"] == "Bearer at-1" + payload = json.loads(request.read()) + assert payload["canManageBuckets"] is True + assert payload["canManageTokens"] is True + assert payload["expiresIn"] == 7200 + + def test_rejection_raises_oauth_error(self, httpx_mock) -> None: + httpx_mock.add_response( + url=f"{STACK_URL}/v2/storage/tokens", + method="POST", + status_code=401, + json={"error": "Invalid access token"}, + ) + with pytest.raises(KeboolaApiError) as exc_info: + mint_storage_token(STACK_URL, access_token="bad") + assert exc_info.value.error_code == ErrorCode.OAUTH_ERROR + + +# ── Config round-trip ─────────────────────────────────────────────── + + +class TestOAuthCredentialsPersistence: + def test_round_trip_through_config_store(self, config_store: ConfigStore) -> None: + project = ProjectConfig( + stack_url=STACK_URL, + token=TEST_SAPI_TOKEN, + project_name="OAuth Proj", + project_id=901, + oauth=OAuthCredentials( + client_id="kbagent-cli", + refresh_token="rt-persisted", + token_expires_at=1234567890.0, + ), + ) + config_store.add_project("oauth-proj", project) + loaded = config_store.get_project("oauth-proj") + assert loaded is not None + assert loaded.oauth is not None + assert loaded.oauth.refresh_token == "rt-persisted" + assert loaded.oauth.token_expires_at == 1234567890.0 + + def test_classic_project_has_no_oauth_block(self, config_store: ConfigStore) -> None: + config_store.add_project( + "classic", ProjectConfig(stack_url=STACK_URL, token=TEST_SAPI_TOKEN) + ) + loaded = config_store.get_project("classic") + assert loaded is not None + assert loaded.oauth is None + + +# ── Silent refresh chokepoint ─────────────────────────────────────── + + +def _add_oauth_project( + config_store: ConfigStore, + alias: str = "oauth-proj", + expires_in: float = -10.0, # default: already expired +) -> ProjectConfig: + project = ProjectConfig( + stack_url=STACK_URL, + token="901-1-staleMintedToken", + project_name="OAuth Proj", + project_id=901, + oauth=OAuthCredentials( + client_id="kbagent-cli", + refresh_token="rt-old", + token_expires_at=time.time() + expires_in, + ), + ) + config_store.add_project(alias, project) + return project + + +class TestEnsureFreshOAuthToken: + def test_classic_project_passthrough_no_network(self, config_store: ConfigStore) -> None: + project = ProjectConfig(stack_url=STACK_URL, token=TEST_SAPI_TOKEN) + assert ensure_fresh_oauth_token(config_store, "p", project) is project + + def test_fresh_token_passthrough_no_network(self, config_store: ConfigStore) -> None: + project = _add_oauth_project(config_store, expires_in=3600.0) + result = ensure_fresh_oauth_token(config_store, "oauth-proj", project) + assert result.token == project.token + assert result.oauth is not None + assert result.oauth.refresh_token == "rt-old" + + def test_expired_token_refreshes_and_persists_rotation( + self, config_store: ConfigStore, httpx_mock + ) -> None: + _add_oauth_project(config_store) + httpx_mock.add_response( + url=f"{STACK_URL}/oauth/token", + method="POST", + json=_token_response(access="at-new", refresh="rt-new"), + ) + httpx_mock.add_response( + url=f"{STACK_URL}/v2/storage/tokens", + method="POST", + json={"token": TEST_SAPI_TOKEN}, + ) + + stale = config_store.get_project("oauth-proj") + assert stale is not None + fresh = ensure_fresh_oauth_token(config_store, "oauth-proj", stale) + + assert fresh.token == TEST_SAPI_TOKEN + assert fresh.oauth is not None + assert fresh.oauth.refresh_token == "rt-new" # rotation persisted + assert fresh.oauth.token_expires_at is not None + assert fresh.oauth.token_expires_at > time.time() + + # The rotated credentials must be ON DISK, not only in memory. + persisted = config_store.get_project("oauth-proj") + assert persisted is not None + assert persisted.token == TEST_SAPI_TOKEN + assert persisted.oauth is not None + assert persisted.oauth.refresh_token == "rt-new" + + def test_none_expiry_treated_as_expired(self, config_store: ConfigStore, httpx_mock) -> None: + project = ProjectConfig( + stack_url=STACK_URL, + token="901-1-stale", + project_id=901, + oauth=OAuthCredentials(client_id="c", refresh_token="rt-old"), + ) + config_store.add_project("oauth-proj", project) + httpx_mock.add_response( + url=f"{STACK_URL}/oauth/token", method="POST", json=_token_response() + ) + httpx_mock.add_response( + url=f"{STACK_URL}/v2/storage/tokens", + method="POST", + json={"token": TEST_SAPI_TOKEN}, + ) + fresh = ensure_fresh_oauth_token(config_store, "oauth-proj", project) + assert fresh.token == TEST_SAPI_TOKEN + + def test_refresh_failure_returns_stale_project( + self, config_store: ConfigStore, httpx_mock + ) -> None: + _add_oauth_project(config_store) + httpx_mock.add_response( + url=f"{STACK_URL}/oauth/token", + method="POST", + status_code=400, + json={"error": "invalid_grant", "error_description": "Refresh token revoked"}, + ) + stale = config_store.get_project("oauth-proj") + assert stale is not None + result = ensure_fresh_oauth_token(config_store, "oauth-proj", stale) + # Graceful degradation: stale project returned, no exception -- the + # downstream 401 is reported per-project in multi-project fan-outs. + assert result.token == stale.token + assert result.oauth is not None + assert result.oauth.refresh_token == "rt-old" + + def test_resolve_projects_triggers_refresh(self, config_store: ConfigStore, httpx_mock) -> None: + """BaseService.resolve_projects() is the chokepoint -- an expired + OAuth project resolved through ANY service comes back fresh.""" + _add_oauth_project(config_store) + config_store.add_project( + "classic", ProjectConfig(stack_url=STACK_URL, token=TEST_SAPI_TOKEN) + ) + httpx_mock.add_response( + url=f"{STACK_URL}/oauth/token", + method="POST", + json=_token_response(access="at-new", refresh="rt-new"), + ) + httpx_mock.add_response( + url=f"{STACK_URL}/v2/storage/tokens", + method="POST", + json={"token": TEST_SAPI_TOKEN}, + ) + + service = BaseService(config_store=config_store) + resolved = service.resolve_projects() + assert resolved["oauth-proj"].token == TEST_SAPI_TOKEN + assert resolved["classic"].token == TEST_SAPI_TOKEN # untouched + + +# ── Login service orchestration (mocked protocol) ─────────────────── + + +def _verify_response(project_id: int = 901, name: str = "OAuth Proj") -> TokenVerifyResponse: + return TokenVerifyResponse( + token_id="t-1", + token_description="desc", + project_id=project_id, + project_name=name, + owner_name="Owner", + ) + + +def _make_login_service(config_store: ConfigStore) -> tuple[OAuthLoginService, MagicMock]: + client = MagicMock() + client.verify_token.return_value = _verify_response() + + def client_factory(stack_url: str, token: str): + return client + + return OAuthLoginService(config_store=config_store, client_factory=client_factory), client + + +@pytest.fixture +def patched_protocol(monkeypatch: pytest.MonkeyPatch) -> dict: + """Patch the oauth protocol functions the service imported by name.""" + calls: dict = {} + base = "keboola_agent_cli.services.oauth_login_service" + + class FakeServer: + redirect_uri = "http://127.0.0.1:8765/callback" + + def __enter__(self): + return self + + def __exit__(self, *args): + return None + + def wait_for_code(self, state: str, timeout: float) -> str: + calls["waited_state"] = state + return "auth-code-1" + + def fake_exchange(stack_url, *, client_id, code, code_verifier, redirect_uri): + calls["exchange"] = { + "code": code, + "code_verifier": code_verifier, + "redirect_uri": redirect_uri, + } + from keboola_agent_cli.oauth import OAuthTokens + + return OAuthTokens(access_token="at-1", refresh_token="rt-1", expires_in=3600) + + def fake_mint(stack_url, *, access_token, **kwargs): + calls["minted_with"] = access_token + return TEST_SAPI_TOKEN + + monkeypatch.setattr(base + ".OAuthCallbackServer", lambda ports: FakeServer()) + monkeypatch.setattr(base + ".exchange_code", fake_exchange) + monkeypatch.setattr(base + ".mint_storage_token", fake_mint) + monkeypatch.setattr(base + ".webbrowser", MagicMock()) + return calls + + +class TestOAuthLoginService: + def test_login_persists_project_with_oauth_credentials( + self, config_store: ConfigStore, patched_protocol: dict + ) -> None: + service, client = _make_login_service(config_store) + shown_urls: list[str] = [] + + result = service.login( + "connection.test.keboola.com", # bare host accepted like project add + on_authorize_url=shown_urls.append, + ) + + assert result["alias"] == "oauth-proj" # slugified project name + assert result["project_id"] == 901 + assert result["re_authenticated"] is False + assert result["auth_type"] == "oauth" + assert TEST_SAPI_TOKEN not in json.dumps(result) # masked in output + + assert shown_urls and shown_urls[0].startswith(f"{STACK_URL}/oauth/authorize?") + assert patched_protocol["minted_with"] == "at-1" + client.verify_token.assert_called_once() + + saved = config_store.get_project("oauth-proj") + assert saved is not None + assert saved.token == TEST_SAPI_TOKEN + assert saved.oauth is not None + assert saved.oauth.refresh_token == "rt-1" + + def test_relogin_updates_existing_entry_in_place( + self, config_store: ConfigStore, patched_protocol: dict + ) -> None: + _add_oauth_project(config_store, alias="myproj") + config_store.set_project_branch("myproj", 4242) # must survive re-login + service, _client = _make_login_service(config_store) + + result = service.login(STACK_URL) + + assert result["alias"] == "myproj" + assert result["re_authenticated"] is True + saved = config_store.get_project("myproj") + assert saved is not None + assert saved.oauth is not None + assert saved.oauth.refresh_token == "rt-1" + assert saved.active_branch_id == 4242 + # No duplicate created under the slugified name. + assert config_store.get_project("oauth-proj") is None + + def test_alias_collision_with_different_project_raises( + self, config_store: ConfigStore, patched_protocol: dict + ) -> None: + config_store.add_project( + "oauth-proj", + ProjectConfig(stack_url=STACK_URL, token="901-2-other", project_id=999), + ) + service, _client = _make_login_service(config_store) + from keboola_agent_cli.errors import ConfigError + + with pytest.raises(ConfigError, match="different project"): + service.login(STACK_URL) + + def test_explicit_alias_wins(self, config_store: ConfigStore, patched_protocol: dict) -> None: + service, _client = _make_login_service(config_store) + result = service.login(STACK_URL, alias="prod") + assert result["alias"] == "prod" + assert config_store.get_project("prod") is not None + + +# ── Full protocol round-trip against a fake Connection server ─────── + + +class _FakeConnectionHandler(BaseHTTPRequestHandler): + """Minimal Connection stand-in: /oauth/authorize, /oauth/token, + /v2/storage/tokens, /v2/storage/tokens/verify. Enforces PKCE S256.""" + + server: "_FakeConnectionServer" + + def do_GET(self) -> None: + parsed = urllib.parse.urlparse(self.path) + if parsed.path == "/oauth/authorize": + params = dict(urllib.parse.parse_qsl(parsed.query)) + self.server.challenge = params["code_challenge"] + redirect = f"{params['redirect_uri']}?code=fake-auth-code&state={params['state']}" + self.send_response(302) + self.send_header("Location", redirect) + self.end_headers() + elif parsed.path == "/v2/storage/tokens/verify": + self._json( + 200, + { + "id": "t-1", + "description": "minted", + "owner": {"id": 901, "name": "Fake Project"}, + }, + ) + else: + self._json(404, {"error": "not found"}) + + def do_POST(self) -> None: + length = int(self.headers.get("Content-Length", "0")) + body = self.rfile.read(length).decode() + if self.path == "/oauth/token": + form = dict(urllib.parse.parse_qsl(body)) + if form.get("grant_type") == "refresh_token": + # League-style rotation: only the CURRENT refresh token works, + # and a NEW one is issued (the old one is revoked). + if form.get("refresh_token") != self.server.current_refresh_token: + self._json(400, {"error": "invalid_grant"}) + return + self.server.refresh_count += 1 + self.server.current_refresh_token = ( + f"fake-refresh-token-r{self.server.refresh_count}" + ) + self._json( + 200, + { + "token_type": "Bearer", + "expires_in": 3600, + "access_token": "fake-access-token", + "refresh_token": self.server.current_refresh_token, + }, + ) + return + # Real PKCE enforcement: S256(verifier) must equal the challenge + # captured during /oauth/authorize. + if form.get("code") != "fake-auth-code" or ( + _b64url_sha256(form.get("code_verifier", "")) != self.server.challenge + ): + self._json(400, {"error": "invalid_grant"}) + return + self._json( + 200, + { + "token_type": "Bearer", + "expires_in": 3600, + "access_token": "fake-access-token", + "refresh_token": self.server.current_refresh_token, + }, + ) + elif self.path == "/v2/storage/tokens": + if self.headers.get("Authorization") != "Bearer fake-access-token": + self._json(401, {"error": "bad bearer"}) + return + self._json(200, {"token": TEST_SAPI_TOKEN}) + else: + self._json(404, {"error": "not found"}) + + def _json(self, status: int, payload: dict) -> None: + data = json.dumps(payload).encode() + self.send_response(status) + self.send_header("Content-Type", "application/json") + self.send_header("Content-Length", str(len(data))) + self.end_headers() + self.wfile.write(data) + + def log_message(self, format: str, *args: object) -> None: + pass + + +class _FakeConnectionServer(HTTPServer): + challenge: str = "" + + def __init__(self) -> None: + super().__init__(("127.0.0.1", 0), _FakeConnectionHandler) + self.current_refresh_token = "fake-refresh-token" + self.refresh_count = 0 + self._thread = threading.Thread(target=self.serve_forever, daemon=True) + self._thread.start() + + @property + def url(self) -> str: + return f"http://127.0.0.1:{self.server_address[1]}" + + def close(self) -> None: + self.shutdown() + self.server_close() + self._thread.join(timeout=5) + + +class TestFullProtocolRoundTrip: + """Real-HTTP end-to-end: authorize redirect -> loopback callback -> + PKCE-verified code exchange -> Bearer-minted Storage token. + + No pytest-httpx fixture here on purpose -- every hop is a real request. + """ + + def test_oauth_dance_against_fake_server(self) -> None: + fake = _FakeConnectionServer() + try: + pkce = generate_pkce_pair() + state = generate_state() + with OAuthCallbackServer(ports=(0,)) as callback: + authorize_url = build_authorize_url( + fake.url, + client_id="kbagent-cli", + redirect_uri=callback.redirect_uri, + state=state, + code_challenge=pkce.challenge, + ) + # "Browser": follows the 302 from /oauth/authorize to the + # loopback callback automatically. + assert _browser_get(authorize_url) == 200 + code = callback.wait_for_code(state, timeout=10) + redirect_uri = callback.redirect_uri + + tokens = exchange_code( + fake.url, + client_id="kbagent-cli", + code=code, + code_verifier=pkce.verifier, + redirect_uri=redirect_uri, + ) + assert tokens.refresh_token == "fake-refresh-token" + + sapi_token = mint_storage_token(fake.url, access_token=tokens.access_token) + assert sapi_token == TEST_SAPI_TOKEN + finally: + fake.close() + + def test_refresh_rotation_against_fake_server(self) -> None: + """Refresh works only with the CURRENT refresh token (League rotates + and revokes), so a stale token must be rejected.""" + fake = _FakeConnectionServer() + try: + first = refresh_oauth_tokens( + fake.url, client_id="kbagent-cli", refresh_token="fake-refresh-token" + ) + assert first.refresh_token == "fake-refresh-token-r1" + second = refresh_oauth_tokens( + fake.url, client_id="kbagent-cli", refresh_token=first.refresh_token + ) + assert second.refresh_token == "fake-refresh-token-r2" + # The original (revoked) token no longer works. + with pytest.raises(KeboolaApiError) as exc_info: + refresh_oauth_tokens( + fake.url, client_id="kbagent-cli", refresh_token="fake-refresh-token" + ) + assert exc_info.value.error_code == ErrorCode.OAUTH_ERROR + finally: + fake.close() + + def test_wrong_verifier_is_rejected_by_pkce(self) -> None: + fake = _FakeConnectionServer() + try: + pkce = generate_pkce_pair() + state = generate_state() + with OAuthCallbackServer(ports=(0,)) as callback: + authorize_url = build_authorize_url( + fake.url, + client_id="kbagent-cli", + redirect_uri=callback.redirect_uri, + state=state, + code_challenge=pkce.challenge, + ) + _browser_get(authorize_url) + code = callback.wait_for_code(state, timeout=10) + redirect_uri = callback.redirect_uri + + with pytest.raises(KeboolaApiError) as exc_info: + exchange_code( + fake.url, + client_id="kbagent-cli", + code=code, + code_verifier="not-the-right-verifier-at-all-padpadpadpadpad", + redirect_uri=redirect_uri, + ) + assert exc_info.value.error_code == ErrorCode.OAUTH_ERROR + finally: + fake.close() + + +# ── CLI layer ─────────────────────────────────────────────────────── + + +class TestProjectLoginCli: + def _invoke(self, tmp_config_dir: Path, args: list[str], login_result=None, login_error=None): + from typer.testing import CliRunner + + from keboola_agent_cli.cli import app + + runner = CliRunner() + with patch.object(OAuthLoginService, "login") as mock_login: + if login_error is not None: + mock_login.side_effect = login_error + else: + mock_login.return_value = login_result or { + "alias": "oauth-proj", + "project_name": "OAuth Proj", + "project_id": 901, + "stack_url": STACK_URL, + "token": "901-55...XXXX", + "token_expires_at": time.time() + 7200, + "re_authenticated": False, + "auth_type": "oauth", + } + result = runner.invoke( + app, + ["--json", "--config-dir", str(tmp_config_dir), "project", "login", *args], + ) + return result, mock_login + + def test_login_success_json(self, tmp_config_dir: Path) -> None: + result, mock_login = self._invoke(tmp_config_dir, ["--url", STACK_URL]) + assert result.exit_code == 0, result.output + payload = json.loads(result.output) + assert payload["status"] == "ok" + assert payload["data"]["alias"] == "oauth-proj" + kwargs = mock_login.call_args.kwargs + assert kwargs["open_browser"] is True + assert kwargs["port"] is None + + def test_no_browser_and_port_flags_forwarded(self, tmp_config_dir: Path) -> None: + result, mock_login = self._invoke( + tmp_config_dir, + ["--url", STACK_URL, "--no-browser", "--port", "9000", "--project", "prod"], + ) + assert result.exit_code == 0, result.output + kwargs = mock_login.call_args.kwargs + assert kwargs["open_browser"] is False + assert kwargs["port"] == 9000 + assert kwargs["alias"] == "prod" + + def test_oauth_error_maps_to_exit_3(self, tmp_config_dir: Path) -> None: + error = KeboolaApiError( + "OAuth login failed in the browser: access_denied", + error_code=ErrorCode.OAUTH_ERROR, + ) + result, _mock = self._invoke(tmp_config_dir, ["--url", STACK_URL], login_error=error) + assert result.exit_code == 3 + payload = json.loads(result.output) + assert payload["error"]["code"] == "OAUTH_ERROR" diff --git a/uv.lock b/uv.lock index d9a35754..9e457970 100644 --- a/uv.lock +++ b/uv.lock @@ -496,7 +496,7 @@ wheels = [ [[package]] name = "keboola-agent-cli" -version = "0.53.0" +version = "0.54.0" source = { editable = "." } dependencies = [ { name = "croniter" }, From 6e2195b4cc21b8d016ea3d665b60b0ba74ba4776 Mon Sep 17 00:00:00 2001 From: soustruh Date: Wed, 3 Jun 2026 00:57:19 +0200 Subject: [PATCH 2/2] test: fix two pre-existing ty errors The errors exist on main; CI never sees them because it runs only ruff and pytest, and the pre-commit hook checks staged files only. --- tests/test_config_store.py | 4 +++- tests/test_services.py | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/tests/test_config_store.py b/tests/test_config_store.py index 74abb084..05de684d 100644 --- a/tests/test_config_store.py +++ b/tests/test_config_store.py @@ -962,7 +962,9 @@ def test_real_persisted_env_alias_still_mutable( ) # No opt-in -> the persisted entry is the only one; editing must work. store.edit_project("__env__", project_name="Renamed") - assert store.get_project("__env__").project_name == "Renamed" + renamed = store.get_project("__env__") + assert renamed is not None + assert renamed.project_name == "Renamed" def test_default_blanked_when_ephemeral_stripped( self, tmp_config_dir: Path, monkeypatch: pytest.MonkeyPatch diff --git a/tests/test_services.py b/tests/test_services.py index 578c9b80..6542061b 100644 --- a/tests/test_services.py +++ b/tests/test_services.py @@ -67,7 +67,9 @@ def factory(url: str, token: str): # Verification client and the stored/returned URL all use the clean base. assert captured["url"] == "https://connection.keboola.com" assert result["stack_url"] == "https://connection.keboola.com" - assert store.get_project("prod").stack_url == "https://connection.keboola.com" + saved = store.get_project("prod") + assert saved is not None + assert saved.stack_url == "https://connection.keboola.com" def test_add_project_invalid_token(self, tmp_config_dir: Path) -> None: """add_project raises KeboolaApiError when token verification fails."""