diff --git a/PRIVACY.md b/PRIVACY.md index 468fbc0..9768d63 100644 --- a/PRIVACY.md +++ b/PRIVACY.md @@ -58,7 +58,7 @@ For air-gapped or egress-restricted environments, see [`docs/admin-guide.md`](do > Treat `~/.jupyter/nbi/config.json` and `~/.jupyter/nbi/user-data.json` as secrets. They contain your API keys and (encrypted) GitHub token. Do not commit them to git, share them, or sync them across users. If a key leaks, rotate it at the provider immediately. -The encrypted GitHub token uses a default password (`nbi-access-token-password`) unless you set `NBI_GH_ACCESS_TOKEN_PASSWORD`. The default is **shared across installs** and provides obfuscation, not real protection. Set a custom password before enabling "remember login" on any shared or multi-tenant system. +The encrypted GitHub token uses a default password (`nbi-access-token-password`) unless you set `NBI_GH_ACCESS_TOKEN_PASSWORD`. The default is **shared across installs** and provides obfuscation, not real protection. Set a custom password before enabling "remember login" on any shared or multi-tenant system. NBI logs a per-process WARNING when the default is in use and escalates the message when `~/.jupyter/nbi/` is group/other-accessible. Operators on shared filesystems can set `NBI_REFUSE_DEFAULT_TOKEN_PASSWORD_ON_SHARED_FS=1` to refuse the write entirely until a per-user password is configured, with `NBI_ALLOW_DEFAULT_TOKEN_PASSWORD=1` available as an explicit per-pod opt-out during a rollout. ## Telemetry diff --git a/docs/admin-guide.md b/docs/admin-guide.md index 774ca69..75f4e42 100644 --- a/docs/admin-guide.md +++ b/docs/admin-guide.md @@ -66,7 +66,7 @@ For Kubeflow or KubeSpawner: mount the user's home directory on a PVC and ensure If users share a home directory across nodes (NFS-backed shared HPC, classroom labs): - **Race conditions in `~/.jupyter/nbi/`.** Concurrent writes from two login nodes can corrupt `config.json`. NBI does not file-lock. Pin each user to one node, or use a per-node config prefix. -- **`NBI_GH_ACCESS_TOKEN_PASSWORD` default is unsafe.** The default password (`nbi-access-token-password`) is shared across installs. On a multi-tenant cluster, anyone with read access to another user's `~/.jupyter/nbi/user-data.json` can decrypt their Copilot token. Set a per-user password (e.g., derived from the Hub user secret), or disable "remember login" entirely (see [Restricting features](#restricting-features-for-managed-deployments)). +- **`NBI_GH_ACCESS_TOKEN_PASSWORD` default is unsafe on shared hosts.** The default password (`nbi-access-token-password`) is shared across installs. On a multi-tenant cluster, anyone with read access to another user's `~/.jupyter/nbi/user-data.json` can decrypt their Copilot token. NBI now logs a per-process WARNING on the first read or write of the stored token when the default password is in use, escalated when `~/.jupyter/nbi/` is readable by group or other. Set `NBI_REFUSE_DEFAULT_TOKEN_PASSWORD_ON_SHARED_FS=1` to upgrade the warning to a hard refusal of the write; admins who knowingly accept the risk can opt out per pod with `NBI_ALLOW_DEFAULT_TOKEN_PASSWORD=1`. The hardening is opt-in to preserve backwards compatibility for single-user deployments where the directory mode is incidental. Set a per-user password (e.g., derived from the Hub user secret), or disable "remember login" entirely (see [Restricting features](#restricting-features-for-managed-deployments)). - **Skill collisions.** Two users sharing `~/.claude/skills/` will see each other's skills. Make sure each user has a unique home. --- @@ -108,6 +108,8 @@ The full surface, in one table. | `tour_config_path` | str | `""` | traitlet | Filesystem path to a YAML/JSON file with admin overrides for the first-run sidebar tour copy. See [`docs/admin-tour-config.md`](admin-tour-config.md). | | `NBI_TOUR_CONFIG_PATH` | str | unset | env (overrides traitlet) | Same as above; env takes precedence. | | `NBI_GH_ACCESS_TOKEN_PASSWORD` | str | `nbi-access-token-password` | env | Password used to encrypt the stored Copilot token in `user-data.json`. **Change in multi-tenant deployments.** | +| `NBI_REFUSE_DEFAULT_TOKEN_PASSWORD_ON_SHARED_FS` | bool | unset | env | When set, refuse to write `user-data.json` if the default `NBI_GH_ACCESS_TOKEN_PASSWORD` is still in use AND `~/.jupyter/nbi/` is readable by group or other. Opt-in to preserve backwards compatibility on single-user deployments where the directory mode is incidental. | +| `NBI_ALLOW_DEFAULT_TOKEN_PASSWORD` | bool | unset | env | Per-pod opt-out that disengages the refuse-on-shared-fs guard above. Admins who knowingly accept the risk (e.g., during a transition before rolling out a per-user password) set this so writes continue. | | `NBI_RULES_AUTO_RELOAD` | bool | `true` | env | When `false`, ruleset edits require a JupyterLab restart to take effect. | | `NBI_CLAUDE_CLI_PATH` | str | unset | env | Absolute path to the Claude Code CLI binary. When unset, NBI looks up `claude` on `PATH`. | | `NBI_OPENCODE_CLI_PATH` | str | unset | env | Absolute path to the opencode CLI. When unset, NBI looks up `opencode` on `PATH`. Gates the opencode launcher tile. | diff --git a/notebook_intelligence/github_copilot.py b/notebook_intelligence/github_copilot.py index 608fb7f..c2b61c5 100644 --- a/notebook_intelligence/github_copilot.py +++ b/notebook_intelligence/github_copilot.py @@ -5,7 +5,7 @@ import base64 from dataclasses import dataclass from enum import Enum -import os, json, time, requests, threading +import os, json, stat, time, requests, threading from typing import Any import uuid import secrets @@ -104,9 +104,98 @@ def get_login_status(): deprecated_user_data_file = os.path.join(os.path.expanduser('~'), ".jupyter", "nbi-data.json") user_data_file = os.path.join(os.path.expanduser('~'), ".jupyter", "nbi", "user-data.json") -access_token_password = os.getenv("NBI_GH_ACCESS_TOKEN_PASSWORD", "nbi-access-token-password") +DEFAULT_ACCESS_TOKEN_PASSWORD = "nbi-access-token-password" +access_token_password = os.getenv("NBI_GH_ACCESS_TOKEN_PASSWORD", DEFAULT_ACCESS_TOKEN_PASSWORD) + + +def _is_default_token_password() -> bool: + return access_token_password == DEFAULT_ACCESS_TOKEN_PASSWORD + + +# Shared vocab matches _resolve_bool_with_env in extension.py so admins +# don't have to remember a different set of "yes" tokens for this env. +_BOOL_TRUE_VALUES = frozenset({"true", "1", "yes", "on"}) + + +def _env_truthy(name: str) -> bool: + return os.environ.get(name, "").strip().lower() in _BOOL_TRUE_VALUES + + +def _user_data_dir_is_shared() -> bool: + """Return True when the directory that holds user-data.json is + readable or writable by group or other. + + POSIX-only; on Windows the helper returns False (ACL semantics + don't map onto S_IRGRP/S_IROTH and the deployment threat model is + different). A True return means at least one read or write bit + beyond owner is set, so another tenant on a shared filesystem + could read the file once written, or plant a token by replacing + the file via a writable parent dir. + """ + if os.name != "posix": + return False + target_dir = os.path.dirname(user_data_file) + try: + st = os.stat(target_dir) + except OSError: + return False + risky_bits = stat.S_IRGRP | stat.S_IROTH | stat.S_IWGRP | stat.S_IWOTH + return bool(st.st_mode & risky_bits) + + +_default_password_warned = False + + +def _warn_default_password_once() -> None: + """Emit the default-password WARNING at most once per process. + + The audit recommends a startup warning on every server boot until + the password is overridden, but a hot endpoint that calls the + read/write helpers on every request would spam the log. Once per + process is the right cadence; operators see it on boot and on + log rotation. + """ + global _default_password_warned + if _default_password_warned or not _is_default_token_password(): + return + _default_password_warned = True + if _user_data_dir_is_shared(): + log.warning( + "Storing the GitHub Copilot token under the default " + "NBI_GH_ACCESS_TOKEN_PASSWORD on a directory that is " + "readable by group or other (%s). Set a per-user " + "NBI_GH_ACCESS_TOKEN_PASSWORD, or set " + "NBI_ALLOW_DEFAULT_TOKEN_PASSWORD=1 to acknowledge the " + "risk explicitly.", + os.path.dirname(user_data_file), + ) + else: + log.warning( + "Storing the GitHub Copilot token under the default " + "NBI_GH_ACCESS_TOKEN_PASSWORD. Set a per-user password " + "in multi-tenant deployments." + ) + + +def _refuse_write_on_shared_default() -> bool: + """Return True when the write should be refused. + + Refusal is opt-in (``NBI_REFUSE_DEFAULT_TOKEN_PASSWORD_ON_SHARED_FS=1``) + because flipping the default to refuse would break existing + single-user deployments where the directory's mode is incidental. + Once a deployment opts in, the refusal triggers only when both the + password is the documented default AND the target directory is + group/other accessible; an admin who knowingly relaxed perms can + still opt out with ``NBI_ALLOW_DEFAULT_TOKEN_PASSWORD=1``. + """ + if _env_truthy("NBI_ALLOW_DEFAULT_TOKEN_PASSWORD"): + return False + if not _env_truthy("NBI_REFUSE_DEFAULT_TOKEN_PASSWORD_ON_SHARED_FS"): + return False + return _is_default_token_password() and _user_data_dir_is_shared() def read_stored_github_access_token() -> str: + _warn_default_password_once() try: if os.path.exists(user_data_file): with open(user_data_file, 'r') as file: @@ -141,6 +230,18 @@ def _save_user_data(user_data: dict) -> None: def write_github_access_token(access_token: str) -> bool: + _warn_default_password_once() + if _refuse_write_on_shared_default(): + log.error( + "Refusing to write %s: the default NBI_GH_ACCESS_TOKEN_PASSWORD " + "is in use on a directory that is readable by group or other, " + "and NBI_REFUSE_DEFAULT_TOKEN_PASSWORD_ON_SHARED_FS is set. " + "Set NBI_GH_ACCESS_TOKEN_PASSWORD to a per-user secret, " + "tighten the directory mode, or set " + "NBI_ALLOW_DEFAULT_TOKEN_PASSWORD=1 to opt out.", + user_data_file, + ) + return False try: encrypted_access_token = encrypt_with_password(access_token_password, access_token.encode()) base64_bytes = base64.b64encode(encrypted_access_token) diff --git a/tests/test_default_password_refusal.py b/tests/test_default_password_refusal.py new file mode 100644 index 0000000..d5cd020 --- /dev/null +++ b/tests/test_default_password_refusal.py @@ -0,0 +1,205 @@ +# Copyright (c) Mehmet Bektas + +"""Tests for the default-token-password guardrails on multi-tenant deployments. + +The Copilot token's encryption password is configurable via +``NBI_GH_ACCESS_TOKEN_PASSWORD`` but defaults to a public literal. On a +shared-home cluster, an admin who leaves the default in place exposes +every user's token to anyone with a read primitive against +``user-data.json``. These tests pin two behaviors: + +1. A one-shot WARNING fires whenever the default password is in use, + escalated in tone when the directory holding ``user-data.json`` is + readable beyond the owner. +2. When ``NBI_REFUSE_DEFAULT_TOKEN_PASSWORD_ON_SHARED_FS`` is set, the + write is refused entirely under those conditions, unless the admin + opts back in via ``NBI_ALLOW_DEFAULT_TOKEN_PASSWORD``. +""" + +from __future__ import annotations + +import json +import logging +import os +import stat +import sys + +import pytest + +POSIX_ONLY = pytest.mark.skipif( + sys.platform == "win32", + reason="POSIX mode bits are meaningless under Windows ACLs", +) + + +@pytest.fixture +def gh(monkeypatch, tmp_path): + """Reload github_copilot pointed at a tmp_path user-data dir. + + The module captures ``access_token_password`` and ``user_data_file`` + at import time, so the fixture re-imports with a controlled + environment + HOME to give each test a clean slate. + """ + import notebook_intelligence.github_copilot as github_copilot + + monkeypatch.delenv("NBI_GH_ACCESS_TOKEN_PASSWORD", raising=False) + monkeypatch.delenv("NBI_REFUSE_DEFAULT_TOKEN_PASSWORD_ON_SHARED_FS", raising=False) + monkeypatch.delenv("NBI_ALLOW_DEFAULT_TOKEN_PASSWORD", raising=False) + target_dir = tmp_path / ".jupyter" / "nbi" + target_dir.mkdir(parents=True) + monkeypatch.setattr( + github_copilot, "user_data_file", str(target_dir / "user-data.json") + ) + # Reset the per-process warn-once flag so each test gets a fresh log. + monkeypatch.setattr(github_copilot, "_default_password_warned", False) + return github_copilot + + +class TestDefaultPasswordWarning: + def test_warns_when_default_password_in_use(self, gh, caplog): + with caplog.at_level(logging.WARNING, logger="notebook_intelligence.github_copilot"): + gh._warn_default_password_once() + assert any( + "default NBI_GH_ACCESS_TOKEN_PASSWORD" in r.message for r in caplog.records + ) + + def test_warns_only_once_per_process(self, gh, caplog): + with caplog.at_level(logging.WARNING, logger="notebook_intelligence.github_copilot"): + gh._warn_default_password_once() + gh._warn_default_password_once() + warnings = [ + r for r in caplog.records if "NBI_GH_ACCESS_TOKEN_PASSWORD" in r.message + ] + assert len(warnings) == 1 + + def test_no_warning_when_password_overridden(self, gh, monkeypatch, caplog): + monkeypatch.setattr(gh, "access_token_password", "per-user-secret") + with caplog.at_level(logging.WARNING, logger="notebook_intelligence.github_copilot"): + gh._warn_default_password_once() + assert all( + "NBI_GH_ACCESS_TOKEN_PASSWORD" not in r.message for r in caplog.records + ) + + @POSIX_ONLY + def test_shared_dir_escalates_warning_message(self, gh, monkeypatch, caplog): + # Loosen the dir mode and confirm the warning text changes to + # call out the shared-host posture AND names the actual + # directory so an operator triaging logs can find the file. + target_dir = os.path.dirname(gh.user_data_file) + os.chmod(target_dir, 0o755) + with caplog.at_level(logging.WARNING, logger="notebook_intelligence.github_copilot"): + gh._warn_default_password_once() + msg = " ".join(r.message for r in caplog.records) + assert "readable by group or other" in msg + assert target_dir in msg + + def test_read_path_also_emits_warning_and_returns_plaintext( + self, gh, caplog, tmp_path, monkeypatch + ): + # The read-side warn covers an exposed-state audit: an admin + # who rotated to a per-user password but left an old + # user-data.json on disk still hears the warning until the + # process restarts AND the read path is exercised. + # + # The contract is "warn AND return the plaintext"; without the + # second half a regression that aborts the read on warn would + # silently log the user out. Write a token, then assert both. + import base64 + import json + + from notebook_intelligence.util import encrypt_with_password + + ciphertext = encrypt_with_password(gh.access_token_password, b"ghu_token") + b64 = base64.b64encode(ciphertext).decode("utf-8") + with open(gh.user_data_file, "w") as f: + json.dump({"github_access_token": b64}, f) + + with caplog.at_level(logging.WARNING, logger="notebook_intelligence.github_copilot"): + plaintext = gh.read_stored_github_access_token() + assert plaintext == "ghu_token" + assert any( + "NBI_GH_ACCESS_TOKEN_PASSWORD" in r.message for r in caplog.records + ) + + @POSIX_ONLY + def test_warning_text_omits_dir_path_in_non_shared_branch(self, gh, caplog): + # The escalated warning includes the directory; the non-shared + # branch should NOT, otherwise log analysis can't distinguish + # the two postures by message content alone. + target_dir = os.path.dirname(gh.user_data_file) + os.chmod(target_dir, 0o700) + with caplog.at_level(logging.WARNING, logger="notebook_intelligence.github_copilot"): + gh._warn_default_password_once() + msg = " ".join(r.message for r in caplog.records) + assert "NBI_GH_ACCESS_TOKEN_PASSWORD" in msg + assert target_dir not in msg + + +@POSIX_ONLY +class TestRefuseWriteOnSharedFs: + def _make_dir_shared(self, gh): + target_dir = os.path.dirname(gh.user_data_file) + os.chmod(target_dir, 0o755) + return target_dir + + def test_refuse_disabled_by_default(self, gh): + # Default behavior is unchanged: even with default password and + # a permissive directory mode, the helper returns False so the + # write proceeds. + self._make_dir_shared(gh) + assert gh._refuse_write_on_shared_default() is False + + def test_refuses_when_opted_in_and_dir_is_shared(self, gh, monkeypatch): + self._make_dir_shared(gh) + monkeypatch.setenv("NBI_REFUSE_DEFAULT_TOKEN_PASSWORD_ON_SHARED_FS", "1") + assert gh._refuse_write_on_shared_default() is True + + def test_does_not_refuse_when_password_overridden(self, gh, monkeypatch): + self._make_dir_shared(gh) + monkeypatch.setenv("NBI_REFUSE_DEFAULT_TOKEN_PASSWORD_ON_SHARED_FS", "1") + monkeypatch.setattr(gh, "access_token_password", "per-user-secret") + assert gh._refuse_write_on_shared_default() is False + + def test_does_not_refuse_when_dir_is_owner_only(self, gh, monkeypatch): + # 0o700 is the owner-only mode; the refusal is gated on + # group/other readability so a single-user pod with a tightly- + # scoped home is unaffected. + target_dir = os.path.dirname(gh.user_data_file) + os.chmod(target_dir, 0o700) + monkeypatch.setenv("NBI_REFUSE_DEFAULT_TOKEN_PASSWORD_ON_SHARED_FS", "1") + assert gh._refuse_write_on_shared_default() is False + + def test_admin_can_opt_back_in(self, gh, monkeypatch): + # Admin acknowledges the risk: refusal disengages even with the + # refuse env set, so a deployment that knowingly relaxed perms + # can keep working through a transition. + self._make_dir_shared(gh) + monkeypatch.setenv("NBI_REFUSE_DEFAULT_TOKEN_PASSWORD_ON_SHARED_FS", "1") + monkeypatch.setenv("NBI_ALLOW_DEFAULT_TOKEN_PASSWORD", "1") + assert gh._refuse_write_on_shared_default() is False + + +@POSIX_ONLY +class TestWriteRefusalEndToEnd: + def test_write_returns_false_when_refused(self, gh, monkeypatch, caplog): + target_dir = os.path.dirname(gh.user_data_file) + os.chmod(target_dir, 0o755) + monkeypatch.setenv("NBI_REFUSE_DEFAULT_TOKEN_PASSWORD_ON_SHARED_FS", "1") + with caplog.at_level(logging.ERROR, logger="notebook_intelligence.github_copilot"): + ok = gh.write_github_access_token("ghu_test_token") + assert ok is False + # Nothing was written; the file should not exist. + assert not os.path.exists(gh.user_data_file) + # The error message must name the file so an operator can find it. + assert any( + "Refusing to write" in r.message and gh.user_data_file in r.message + for r in caplog.records + ) + + def test_write_succeeds_when_refusal_not_engaged(self, gh, monkeypatch): + # Sanity: with no refusal opt-in, the write goes through as + # before. Pinning this prevents a regression where the guard + # accidentally short-circuits the happy path. + ok = gh.write_github_access_token("ghu_test_token") + assert ok is True + assert os.path.exists(gh.user_data_file)