diff --git a/roar/application/publish/service.py b/roar/application/publish/service.py index 05ed555..8c3a09f 100644 --- a/roar/application/publish/service.py +++ b/roar/application/publish/service.py @@ -192,16 +192,20 @@ def build_register_preview_runtime( from ...publish_auth import PublishAuthContext from .lineage import LineageCollector - glaas_client = GlaasClient( - "", - start_dir=start_dir, - publish_auth=PublishAuthContext( + publish_auth = None + if not allow_public_without_binding: + publish_auth = PublishAuthContext( access_token=None, scope_request=None, auth_provider=None, user_sub=None, db_user_id=None, - ), + ) + + glaas_client = GlaasClient( + None, + start_dir=start_dir, + publish_auth=publish_auth, allow_public_without_binding=allow_public_without_binding, ) return _RegisterPreviewRuntime( diff --git a/roar/publish_auth.py b/roar/publish_auth.py index 0a25192..ec158f5 100644 --- a/roar/publish_auth.py +++ b/roar/publish_auth.py @@ -1,7 +1,11 @@ from __future__ import annotations +import json +import urllib.error +import urllib.request from dataclasses import dataclass from pathlib import Path +from typing import Any try: import tomllib @@ -18,6 +22,7 @@ class PublishAuthContext: auth_provider: str | None = None user_sub: str | None = None db_user_id: str | None = None + creator_identity: str | None = None def load_publish_auth_context( @@ -36,7 +41,7 @@ def load_publish_auth_context( user_sub = auth_state.user.sub or None db_user_id = auth_state.user.db_user_id - binding = _load_repo_binding(start_dir) + binding = None if allow_public_without_binding else _load_repo_binding(start_dir) if binding and not access_token: raise RuntimeError( "Repo is linked to GLaaS but no global auth state is available. Run `roar login`." @@ -46,6 +51,12 @@ def load_publish_auth_context( "No GLaaS repo binding found for this publish. Link the repo to a TReqs owner/project first, or rerun with --public to publish publicly." ) + creator_identity = None + if not access_token and allow_public_without_binding: + creator_identity, resolved_db_user_id = _load_authenticated_creator_identity() + if resolved_db_user_id and not db_user_id: + db_user_id = resolved_db_user_id + scope_request = None if binding: scope_request = { @@ -63,10 +74,15 @@ def load_publish_auth_context( auth_provider=auth_provider, user_sub=user_sub, db_user_id=db_user_id, + creator_identity=creator_identity, ) def resolve_publish_creator_identity(context: PublishAuthContext) -> str: + explicit_identity = _optional_string(context.creator_identity) + if explicit_identity is not None: + return explicit_identity + provider = (context.auth_provider or "").strip().lower() if provider.startswith("treqs") and context.user_sub: return f"treqs:user:{context.user_sub}" @@ -75,6 +91,51 @@ def resolve_publish_creator_identity(context: PublishAuthContext) -> str: return "anonymous" +def _load_authenticated_creator_identity() -> tuple[str | None, str | None]: + from .integrations.glaas import get_glaas_url, make_auth_header + + base_url = _optional_string(get_glaas_url()) + if base_url is None: + return None, None + + path = "/api/v1/auth/me" + auth_header = make_auth_header("GET", path, None) + if not auth_header: + return None, None + + request = urllib.request.Request(f"{base_url.rstrip('/')}{path}") + request.add_header("Authorization", auth_header) + request.add_header("Accept", "application/json") + + try: + with urllib.request.urlopen(request, timeout=10) as response: + payload = json.loads(response.read().decode("utf-8") or "{}") + except (urllib.error.HTTPError, urllib.error.URLError, json.JSONDecodeError): + return None, None + + data = payload.get("data") if isinstance(payload, dict) else None + if not isinstance(data, dict): + return None, None + + creator_identity = _optional_string( + data.get("creatorIdentity") if isinstance(data, dict) else None + ) or _optional_string(data.get("creator_identity") if isinstance(data, dict) else None) + + user = data.get("user") + db_user_id = _optional_string(user.get("id")) if isinstance(user, dict) else None + if creator_identity is None and db_user_id is not None: + creator_identity = f"glaas:user:{db_user_id}" + + return creator_identity, db_user_id + + +def _optional_string(value: Any) -> str | None: + if value is None: + return None + normalized = str(value).strip() + return normalized or None + + def _load_repo_binding(start_dir: str | Path | None = None) -> dict[str, str] | None: config_path = _find_repo_config(start_dir) if config_path is None or not config_path.exists(): diff --git a/tests/integration/fake_glaas.py b/tests/integration/fake_glaas.py index b38dd5e..e43e56f 100644 --- a/tests/integration/fake_glaas.py +++ b/tests/integration/fake_glaas.py @@ -56,6 +56,23 @@ def _write_json(self, status_code: int, payload: dict[str, Any]) -> None: self.end_headers() self.wfile.write(body) + def _resolve_authenticated_user(self, authorization: str | None) -> dict[str, str] | None: + if authorization and authorization.startswith("Bearer "): + return { + "id": "user-123", + "email": "trevor@example.com", + "username": "trevor", + "auth_mode": "bearer", + } + if authorization and authorization.startswith("Signature "): + return { + "id": "ssh-user-123", + "email": "ssh-user@example.com", + "username": "ssh-user", + "auth_mode": "ssh", + } + return None + def do_GET(self) -> None: authorization = self.headers.get("Authorization") if self.path == "/api/v1/auth/access-context": @@ -98,6 +115,30 @@ def do_GET(self) -> None: ) return + if self.path == "/api/v1/auth/me": + self.server.auth_headers.append({"path": self.path, "authorization": authorization}) + user = self._resolve_authenticated_user(authorization) + if user is None: + self._write_json(401, {"error": "Missing or invalid auth"}) + return + self._write_json( + 200, + { + "success": True, + "data": { + "user": { + "id": user["id"], + "email": user["email"], + "githubUsername": user["username"], + "treqsUserId": None, + }, + "creatorIdentity": f"glaas:user:{user['id']}", + }, + "meta": {"authMode": user["auth_mode"]}, + }, + ) + return + if self.path == "/api/v1/health": self.server.health_checks += 1 self._write_json(200, {"success": True, "status": "healthy"}) @@ -129,12 +170,19 @@ def do_POST(self) -> None: payload = self._read_json() authorization = self.headers.get("Authorization") self.server.auth_headers.append({"path": self.path, "authorization": authorization}) - if not authorization or not authorization.startswith("Bearer "): - self._write_json(401, {"error": "Missing or invalid bearer auth"}) + authenticated_user = self._resolve_authenticated_user(authorization) + if authenticated_user is None: + self._write_json(401, {"error": "Missing or invalid auth"}) return if self.path == "/api/v1/sessions": - self.server.session_registrations.append(payload) + self.server.session_registrations.append( + { + **payload, + "_authenticated_user_id": authenticated_user["id"], + "_auth_mode": authenticated_user["auth_mode"], + } + ) session_hash = str(payload.get("hash", "")) self._write_json( 200, diff --git a/tests/integration/test_public_publish_intent_cli.py b/tests/integration/test_public_publish_intent_cli.py index 3b82c58..f8d0585 100644 --- a/tests/integration/test_public_publish_intent_cli.py +++ b/tests/integration/test_public_publish_intent_cli.py @@ -3,6 +3,8 @@ from __future__ import annotations import json +import re +import shutil import subprocess from pathlib import Path @@ -19,6 +21,20 @@ def fake_glaas_publish_server() -> FakeGlaasServer: yield server +@pytest.fixture +def ssh_keypair(tmp_path: Path) -> Path: + if shutil.which("ssh-keygen") is None: + pytest.skip("ssh-keygen is required for SSH public publish tests") + + key_path = tmp_path / "id_ed25519" + subprocess.run( + ["ssh-keygen", "-q", "-t", "ed25519", "-N", "", "-f", str(key_path), "-C", "roar-test"], + check=True, + capture_output=True, + ) + return key_path + + def _configure_unbound_repo(repo: Path, roar_cli, fake_glaas_url: str) -> dict[str, str]: subprocess.run( ["git", "remote", "add", "origin", "https://github.com/test/repo.git"], @@ -55,6 +71,41 @@ def _configure_unbound_repo(repo: Path, roar_cli, fake_glaas_url: str) -> dict[s return env +def _configure_public_repo( + repo: Path, roar_cli, fake_glaas_url: str, *, bind_repo: bool +) -> dict[str, str]: + subprocess.run( + ["git", "remote", "add", "origin", "https://github.com/test/repo.git"], + cwd=repo, + capture_output=True, + check=True, + ) + home_dir = repo / ".home" + home_dir.mkdir(exist_ok=True) + env = { + "HOME": str(home_dir), + "XDG_CONFIG_HOME": str(repo / ".xdg"), + "GLAAS_API_URL": fake_glaas_url, + "ROAR_ENABLE_EXPERIMENTAL_ACCOUNT_COMMANDS": "1", + } + roar_cli("config", "set", "glaas.url", fake_glaas_url, env_overrides=env) + roar_cli("config", "set", "glaas.web_url", fake_glaas_url, env_overrides=env) + if bind_repo: + config_path = repo / ".roar" / "config.toml" + with config_path.open("a", encoding="utf-8") as handle: + handle.write("\n[treqs]\n") + handle.write('owner_id = "owner-test"\n') + handle.write('owner_type = "organization"\n') + handle.write('project_id = "proj-test"\n') + return env + + +def _parse_session_hash(output: str) -> str: + match = re.search(r"/dag/([0-9a-f]{64})", output) + assert match is not None, f"Missing session URL in output: {output}" + return match.group(1) + + def _create_register_fixture( repo: Path, roar_cli, git_commit, python_exe: str, env: dict[str, str] ) -> None: @@ -177,3 +228,90 @@ def test_put_public_succeeds_without_repo_binding_when_public_flag_is_set( assert result.returncode == 0 assert len(fake_glaas_publish_server.session_registrations) == 1 assert "scope_request" not in fake_glaas_publish_server.session_registrations[0] + + +def test_register_public_with_valid_ssh_uses_authenticated_creator_identity_for_hash_and_registration( + temp_git_repo: Path, + roar_cli, + git_commit, + python_exe: str, + fake_glaas_publish_server: FakeGlaasServer, + ssh_keypair: Path, +) -> None: + env = _configure_public_repo( + temp_git_repo, + roar_cli, + fake_glaas_publish_server.base_url, + bind_repo=False, + ) + _create_register_fixture(temp_git_repo, roar_cli, git_commit, python_exe, env) + + anonymous_preview = roar_cli( + "register", + "report.txt", + "--dry-run", + "--yes", + "--public", + env_overrides=env, + ) + anonymous_hash = _parse_session_hash(anonymous_preview.stdout) + + ssh_env = {**env, "ROAR_SSH_KEY": str(ssh_keypair)} + ssh_preview = roar_cli( + "register", + "report.txt", + "--dry-run", + "--yes", + "--public", + env_overrides=ssh_env, + ) + ssh_hash = _parse_session_hash(ssh_preview.stdout) + + assert ssh_hash != anonymous_hash + + result = roar_cli("register", "report.txt", "--yes", "--public", env_overrides=ssh_env) + + assert result.returncode == 0 + assert len(fake_glaas_publish_server.session_registrations) == 1 + registration = fake_glaas_publish_server.session_registrations[0] + assert registration["hash"] == ssh_hash + assert registration["_authenticated_user_id"] == "ssh-user-123" + assert registration["_auth_mode"] == "ssh" + assert "scope_request" not in registration + assert any( + entry["path"] == "/api/v1/auth/me" + and str(entry.get("authorization") or "").startswith("Signature ") + for entry in fake_glaas_publish_server.auth_headers + ) + + +def test_register_public_with_valid_ssh_ignores_existing_repo_binding( + temp_git_repo: Path, + roar_cli, + git_commit, + python_exe: str, + fake_glaas_publish_server: FakeGlaasServer, + ssh_keypair: Path, +) -> None: + env = _configure_public_repo( + temp_git_repo, + roar_cli, + fake_glaas_publish_server.base_url, + bind_repo=True, + ) + _create_register_fixture(temp_git_repo, roar_cli, git_commit, python_exe, env) + + result = roar_cli( + "register", + "report.txt", + "--yes", + "--public", + env_overrides={**env, "ROAR_SSH_KEY": str(ssh_keypair)}, + ) + + assert result.returncode == 0 + assert len(fake_glaas_publish_server.session_registrations) == 1 + registration = fake_glaas_publish_server.session_registrations[0] + assert registration["_authenticated_user_id"] == "ssh-user-123" + assert registration["_auth_mode"] == "ssh" + assert "scope_request" not in registration diff --git a/tests/live_glaas/test_register_live.py b/tests/live_glaas/test_register_live.py index 80007a6..17263dd 100644 --- a/tests/live_glaas/test_register_live.py +++ b/tests/live_glaas/test_register_live.py @@ -14,14 +14,18 @@ import json import os +import re import subprocess import sys +import urllib.error import urllib.request from collections.abc import Callable from pathlib import Path import pytest +from roar.integrations.glaas import make_auth_header + @pytest.fixture def glaas_url(): @@ -72,6 +76,30 @@ def python_exe() -> str: return sys.executable +def _parse_session_hash(output: str) -> str: + match = re.search(r"/dag/([0-9a-f]{64})", output) + assert match is not None, f"Missing session URL in output: {output}" + return match.group(1) + + +def _request_live_json(glaas_url: str, path: str) -> dict: + auth_header = make_auth_header("GET", path, None) + if not auth_header: + pytest.skip("SSH auth is not configured for live GLaaS validation") + + request = urllib.request.Request(f"{glaas_url.rstrip('/')}{path}") + request.add_header("Authorization", auth_header) + request.add_header("Accept", "application/json") + + try: + with urllib.request.urlopen(request, timeout=10) as response: + return json.loads(response.read().decode("utf-8") or "{}") + except urllib.error.HTTPError as exc: + if exc.code in {401, 403}: + pytest.skip("SSH auth is not valid for the configured live GLaaS server") + raise + + @pytest.fixture def sample_scripts(temp_git_repo: Path, git_commit: Callable) -> dict[str, Path]: """Create sample Python scripts for testing.""" @@ -328,3 +356,37 @@ def test_register_dry_run( output = result.stdout.lower() # Should indicate it's a dry run assert "dry" in output or "would" in output or "preview" in output + + def test_register_public_with_valid_ssh_attributes_session_to_authenticated_user( + self, + glaas_configured, + glaas_available, + glaas_url, + roar_cli, + git_commit, + python_exe, + sample_scripts, + sample_data, + ): + """Public register with valid SSH auth should preserve authenticated attribution.""" + if not glaas_available: + pytest.skip("GLaaS server not available") + + auth_test = roar_cli("auth", "test", check=False) + if auth_test.returncode != 0: + pytest.skip("SSH auth is not configured for the live GLaaS server") + + me_payload = _request_live_json(glaas_url, "/api/v1/auth/me") + expected_user_id = me_payload["data"]["user"]["id"] + + run_result = roar_cli("run", python_exe, "preprocess.py", "input.csv", "processed.csv") + assert run_result.returncode == 0 + git_commit("After preprocess") + + register_result = roar_cli("register", "processed.csv", "--yes", "--public") + assert register_result.returncode == 0 + + session_hash = _parse_session_hash(register_result.stdout) + public_session = _request_live_json(glaas_url, f"/api/v1/public/sessions/{session_hash}") + + assert public_session["data"]["createdBy"] == expected_user_id diff --git a/tests/unit/test_publish_auth_context.py b/tests/unit/test_publish_auth_context.py new file mode 100644 index 0000000..1ed3098 --- /dev/null +++ b/tests/unit/test_publish_auth_context.py @@ -0,0 +1,43 @@ +from __future__ import annotations + +from pathlib import Path +from unittest.mock import patch + +from roar.publish_auth import load_publish_auth_context + + +def test_public_publish_ignores_repo_binding_and_uses_authenticated_creator_identity( + tmp_path: Path, +) -> None: + config_dir = tmp_path / ".roar" + config_dir.mkdir(parents=True) + (config_dir / "config.toml").write_text( + '[treqs]\nowner_id = "owner-123"\nowner_type = "organization"\nproject_id = "proj-456"\n', + encoding="utf-8", + ) + + with ( + patch("roar.publish_auth.load_auth_state", return_value=None), + patch( + "roar.publish_auth._load_authenticated_creator_identity", + return_value=("glaas:user:ssh-user-123", "ssh-user-123"), + ), + ): + context = load_publish_auth_context( + start_dir=tmp_path, + allow_public_without_binding=True, + ) + + assert context.scope_request is None + assert context.creator_identity == "glaas:user:ssh-user-123" + assert context.db_user_id == "ssh-user-123" + + +def test_private_publish_still_requires_repo_binding_or_public_flag(tmp_path: Path) -> None: + with patch("roar.publish_auth.load_auth_state", return_value=None): + try: + load_publish_auth_context(start_dir=tmp_path, allow_public_without_binding=False) + except RuntimeError as exc: + assert "--public" in str(exc) + else: # pragma: no cover - defensive guard + raise AssertionError("expected missing binding to raise") diff --git a/tests/unit/test_publish_creator_identity.py b/tests/unit/test_publish_creator_identity.py index cd4461d..05cbb93 100644 --- a/tests/unit/test_publish_creator_identity.py +++ b/tests/unit/test_publish_creator_identity.py @@ -31,6 +31,19 @@ def test_glaas_user_id_is_used_when_no_treqs_subject_is_available() -> None: assert resolve_publish_creator_identity(context) == "glaas:user:glaas-user-123" +def test_explicit_creator_identity_override_is_preferred() -> None: + context = PublishAuthContext( + access_token=None, + scope_request=None, + auth_provider=None, + user_sub=None, + db_user_id="glaas-user-123", + creator_identity="glaas:user:ssh-user-456", + ) + + assert resolve_publish_creator_identity(context) == "glaas:user:ssh-user-456" + + def test_missing_authenticated_identity_resolves_to_anonymous() -> None: context = PublishAuthContext( access_token=None,