diff --git a/src/chat-sidebar.tsx b/src/chat-sidebar.tsx index 1e2b2ff..3ededa1 100644 --- a/src/chat-sidebar.tsx +++ b/src/chat-sidebar.tsx @@ -72,10 +72,10 @@ import type { Contents } from '@jupyterlab/services'; import { extractLLMGeneratedCode, isDarkTheme, - safeAnchorUri, writeTextToClipboard } from './utils'; import { CheckBoxItem } from './components/checkbox'; +import { SafeAnchor } from './components/safe-anchor'; import { mcpServerSettingsToEnabledState } from './components/mcp-util'; import claudeSvgStr from '../style/icons/claude.svg'; import { AskUserQuestion } from './components/ask-user-question'; @@ -836,23 +836,11 @@ function ChatResponse(props: any) { ); case ResponseStreamDataType.Anchor: { - const safeUri = safeAnchorUri(item.content.uri); - if (!safeUri) { - return ( -
- - {item.content.title} - (link blocked) - -
- ); - } return (
- + {item.content.title} - (opens in new tab) - +
); } @@ -4956,48 +4944,28 @@ function GitHubCopilotLoginDialogBodyComponent(props: any) { memory.
- + GitHub Copilot - (opens in new tab) - {' '} + {' '} requires a subscription and it has a free tier. GitHub Copilot is subject to the{' '} - + GitHub Terms for Additional Products and Features - (opens in new tab) - + .

Privacy and terms

By using Notebook Intelligence with GitHub Copilot subscription you agree to{' '} - + GitHub Copilot chat terms - (opens in new tab) - + . Review the terms to understand about usage, limitations and ways to improve GitHub Copilot. Please review{' '} - + Privacy Statement - (opens in new tab) - + .
@@ -5046,13 +5014,9 @@ function GitHubCopilotLoginDialogBodyComponent(props: any) { {' '} and enter at{' '} - + {deviceActivationURL} - {' '} + {' '} to allow access to GitHub Copilot from this app. Activation could take up to a minute after you enter the code.
diff --git a/src/components/markdown-link.tsx b/src/components/markdown-link.tsx new file mode 100644 index 0000000..d9de333 --- /dev/null +++ b/src/components/markdown-link.tsx @@ -0,0 +1,161 @@ +// Copyright (c) Mehmet Bektas + +import React from 'react'; +import { JupyterFrontEnd } from '@jupyterlab/application'; +import { PathExt } from '@jupyterlab/coreutils'; +import { SafeAnchor } from './safe-anchor'; +import { hasDangerousTextCodepoints } from '../utils'; + +// Match an absolute URI by its scheme prefix so a workspace-relative path +// (`README.md`) is distinguished from a protocol-rooted URL (`http://...`). +// Mirrors the SCHEME_RE in utils.ts; kept local because this discriminant +// answers a different question (presence vs. allowlist). +const SCHEME_PREFIX_RE = /^[A-Za-z][A-Za-z0-9+.-]*:/; + +/** + * True when a freshly-joined workspace path is *not* safe to hand to + * `docmanager:open` or expose on a rendered anchor. Rejects: + * + * - leading `..` segments or absolute paths: the join didn't anchor and + * the path escapes the Jupyter root (ContentsManager rejects too, but + * we want to fail closed visually as well so the status bar/title + * never previews a traversal target), + * - any embedded scheme: `PathExt.join('', 'java\tscript:alert(1)')` + * returns the input verbatim, so a path that looks workspace-relative + * pre-join can unmask into a `javascript:` href when `baseDir` is + * empty (any active doc at server root), + * - dangerous codepoints (bidi-override, zero-width, C0/C1/DEL, etc.): + * the WHATWG URL parser strips these from the scheme during + * recognition, and they also visually impersonate the link target on + * hover / in dev-tools logs. + */ +function isUnsafeWorkspacePath(path: string): boolean { + // Empty / cwd-only paths reach here when react-markdown's built-in + // `urlTransform` strips an unsafe scheme (`javascript:`, `data:`, ...) + // to an empty string before our override runs: the result joins to + // either `""` or `"."`, both of which would render as a dead + // `` that 404s on click. Surface them as blocked-link + // spans so the user sees why nothing happened. + if (path === '' || path === '.' || path === './') { + return true; + } + if (path.startsWith('/') || path === '..' || path.startsWith('../')) { + return true; + } + if (SCHEME_PREFIX_RE.test(path)) { + return true; + } + if (hasDangerousTextCodepoints(path)) { + return true; + } + return false; +} + +type MarkdownLinkProps = { + app: JupyterFrontEnd; + // Directory the LLM-emitted relative link should resolve against. The + // active document's directory matches the user's mental model: a + // workspace-relative link like `[file](README.md)` in a chat scoped to + // `notebooks/proj/work.ipynb` lands at `notebooks/proj/README.md`, not + // at the server-root README. Empty string is treated as "server root". + baseDir: string; + href: unknown; + title?: unknown; + children?: React.ReactNode; +}; + +/** + * Render an anchor node coming out of `react-markdown` so chat-sidebar + * links can never replace the JupyterLab shell or pivot through the + * lab origin. + * + * Three branches: + * - Fragment-only (`#section`): inert plain text. A new-tab open would + * navigate to `about:blank#section`, and a same-tab open would scroll + * the wrong document; neither matches what the LLM meant. + * - Workspace-relative (no scheme, no leading `/`, no `//` prefix): + * resolved against the active document's directory, re-validated, + * and routed through JupyterLab's `docmanager:open` command so a + * `.ipynb` opens with the notebook factory and a `.md` opens in the + * editor. The anchor's `href` stays `"#"` because a populated `href` + * bypasses React's onClick on middle/Cmd-click, letting the browser + * navigate `/lab/` with session cookies attached; the hover + * preview moves to `title` so the user still sees the intended + * target. + * - Everything else: handed to `SafeAnchor`, which enforces the + * `safeAnchorUri` scheme allowlist and emits a `_blank` anchor with + * `rel="noopener noreferrer"`. + */ +export function MarkdownLink({ + app, + baseDir, + href, + title, + children +}: MarkdownLinkProps): React.ReactElement { + if (typeof href === 'string') { + if (href.startsWith('#')) { + return {children}; + } + if ( + !SCHEME_PREFIX_RE.test(href) && + !href.startsWith('/') && + !href.startsWith('//') + ) { + // PathExt.join: plain concatenation + normalization. Resolve() + // would fall back to the browser process cwd when `baseDir` is + // relative, which gives nonsense like `/Users/.../notebooks/...`. + const resolvedPath = PathExt.join(baseDir, href); + // Re-validate post-join. Two attack/confusion shapes the pre-check + // alone misses: `[x](java\tscript:alert(1))` survives the scheme + // sniff because `\t` isn't a scheme char, then unmasks once the + // WHATWG parser sees the joined href; `[x](../../../etc/passwd)` + // looks workspace-relative but escapes the workspace root. + if (isUnsafeWorkspacePath(resolvedPath)) { + return ( + + {children} + + ); + } + // href="#" rather than href={resolvedPath}: a modifier-click on a + // populated href bypasses the React onClick, lets the browser + // navigate the chat sidebar to /lab/ in a new tab, and would + // ride along the user's Jupyter session cookies. The hover preview + // moves to `title` so the user still sees the intended target. + const safeTitleFromMd = + typeof title === 'string' && !hasDangerousTextCodepoints(title) + ? title + : undefined; + const hoverTitle = safeTitleFromMd ?? resolvedPath; + const onClick = (e: React.MouseEvent) => { + e.preventDefault(); + // ContentsManager rejects paths outside the Jupyter root with a + // promise rejection. Catch so the failure surfaces in logs instead + // of an unhandled rejection, and the user can see the rendered + // anchor was attempted even when the target doesn't exist. + Promise.resolve( + app.commands.execute('docmanager:open', { path: resolvedPath }) + ).catch(err => { + console.warn( + `NBI: failed to open workspace path "${resolvedPath}":`, + err + ); + }); + }; + return ( + + {children} + + ); + } + } + return ( + + {children} + + ); +} diff --git a/src/components/safe-anchor.tsx b/src/components/safe-anchor.tsx new file mode 100644 index 0000000..0676b72 --- /dev/null +++ b/src/components/safe-anchor.tsx @@ -0,0 +1,60 @@ +// Copyright (c) Mehmet Bektas + +import React from 'react'; +import { hasDangerousTextCodepoints, safeAnchorUri } from '../utils'; + +type SafeAnchorProps = { + href: string | undefined | null; + children: React.ReactNode; + title?: string; + className?: string; +}; + +/** + * The single render path for anchor elements driven by LLM / tool output. + * + * Runs `href` through `safeAnchorUri`, which mirrors the server-side + * `safe_anchor_uri` allowlist (`http` / `https` / `mailto`) and rejects + * dangerous codepoints. On accept it renders a `_blank` anchor with + * `rel="noopener noreferrer"` and an SR-only "(opens in new tab)" suffix; + * on reject it falls through to plain text plus an SR-only "(link + * blocked)" note so screen readers can tell why the link disappeared. + * + * The `title` attribute is scrubbed for the same dangerous codepoints + * the URI check rejects, since react-markdown forwards CommonMark + * `[text](url "title")` titles to the rendered anchor and an LLM can + * smuggle bidi-override or zero-width characters there to visually + * impersonate the link target on hover. + */ +export function SafeAnchor({ + href, + children, + title, + className +}: SafeAnchorProps): React.ReactElement { + const safeUri = safeAnchorUri(href ?? ''); + if (!safeUri) { + return ( + + {children} + (link blocked) + + ); + } + const safeTitle = + typeof title === 'string' && !hasDangerousTextCodepoints(title) + ? title + : undefined; + return ( + + {children} + (opens in new tab) + + ); +} diff --git a/src/markdown-renderer.tsx b/src/markdown-renderer.tsx index d764699..e39f64a 100644 --- a/src/markdown-renderer.tsx +++ b/src/markdown-renderer.tsx @@ -12,6 +12,8 @@ import { } from 'react-syntax-highlighter/dist/cjs/styles/prism'; import { VscNewFile, VscInsert, VscCopy, VscNotebook, VscAdd } from './icons'; import { JupyterFrontEnd } from '@jupyterlab/application'; +import { PathExt } from '@jupyterlab/coreutils'; +import { MarkdownLink } from './components/markdown-link'; import { isDarkTheme, writeTextToClipboard } from './utils'; import { IActiveDocumentInfo } from './tokens'; @@ -29,11 +31,39 @@ export function MarkdownRenderer({ const app = getApp(); const activeDocumentInfo = getActiveDocumentInfo(); const isNotebook = activeDocumentInfo.filename.endsWith('.ipynb'); + // Resolve workspace-relative LLM links against the active document's + // directory so `[file](README.md)` from a chat scoped to + // `notebooks/proj/work.ipynb` lands at `notebooks/proj/README.md` (the + // user's mental model) rather than the server-root README. + const linkBaseDir = activeDocumentInfo.filePath + ? PathExt.dirname(activeDocumentInfo.filePath) + : ''; return ( + // No `rehype-raw` plugin: raw HTML in chat markdown (e.g. an LLM + // emitting ``) renders as literal text, not + // a DOM anchor, so the only anchor sink is the CommonMark/GFM `a` + // node handled by `SafeAnchor` below. Any future change that enables + // raw HTML needs to add a rehype-sanitize pass alongside. ` autolinks, `[text](url)`, and + // reference-style links all normalize to the same `a` node. + // `MarkdownLink` routes fragment-only and workspace-relative + // hrefs through Lab's docmanager so an LLM-emitted link can't + // replace the JupyterLab shell, and hands everything else to + // SafeAnchor for the `_blank` + scheme-allowlist treatment. + a: ({ href, title, children }: any) => ( + + {children} + + ), code({ node, inline, className, children, getApp, ...props }: any) { const match = /language-(\w+)/.exec(className || ''); const codeString = String(children).replace(/\n$/, ''); diff --git a/src/utils.ts b/src/utils.ts index 935586e..2641729 100644 --- a/src/utils.ts +++ b/src/utils.ts @@ -339,6 +339,28 @@ function isDisallowedUriCodepoint(code: number): boolean { return false; } +/** + * True when `s` contains any codepoint in the same set `safeAnchorUri` + * rejects (C0/DEL/C1, NEL/NBSP/LS/PS/BOM, ZWSP, bidi-override controls). + * Mirrors the Python `has_dangerous_text_codepoints`. Used to scrub the + * `title` attribute on rendered anchors so an LLM-emitted hover tooltip + * can't visually impersonate the link via bidi-reorder or zero-width + * tricks. + */ +export function hasDangerousTextCodepoints( + s: string | undefined | null +): boolean { + if (typeof s !== 'string') { + return false; + } + for (let i = 0; i < s.length; i++) { + if (isDisallowedUriCodepoint(s.charCodeAt(i))) { + return true; + } + } + return false; +} + /** * Return `uri` if its scheme is in the chat-anchor allowlist, else null. * Mirrors the server-side `safe_anchor_uri` check so that anchor parts diff --git a/tests/ts/markdown-renderer.test.tsx b/tests/ts/markdown-renderer.test.tsx new file mode 100644 index 0000000..17b4e22 --- /dev/null +++ b/tests/ts/markdown-renderer.test.tsx @@ -0,0 +1,247 @@ +// Copyright (c) Mehmet Bektas + +import React from 'react'; +import { fireEvent, render, screen, waitFor } from '@testing-library/react'; + +import { MarkdownLink } from '../../src/components/markdown-link'; + +function fakeApp(execute: jest.Mock) { + return { + commands: { execute } + } as any; +} + +describe('MarkdownLink (issue #344)', () => { + it('renders an http link via SafeAnchor with target=_blank', () => { + const execute = jest.fn(); + render( + + the docs + + ); + const anchor = screen.getByRole('link', { name: /the docs/ }); + expect(anchor).toHaveAttribute('href', 'https://example.com/docs'); + expect(anchor).toHaveAttribute('target', '_blank'); + expect(anchor).toHaveAttribute('rel', 'noopener noreferrer'); + expect(execute).not.toHaveBeenCalled(); + }); + + it('blocks javascript: hrefs', () => { + const execute = jest.fn(); + render( + + click me + + ); + expect(screen.queryByRole('link')).toBeNull(); + expect(screen.getByText(/link blocked/)).toHaveClass('nbi-sr-only'); + }); + + it('routes workspace-relative paths through docmanager:open', () => { + const execute = jest.fn().mockResolvedValue(undefined); + render( + + the README + + ); + const anchor = screen.getByRole('link', { name: /the README/ }); + expect(anchor).not.toHaveAttribute('target', '_blank'); + fireEvent.click(anchor); + expect(execute).toHaveBeenCalledWith('docmanager:open', { + path: 'README.md' + }); + }); + + it('resolves workspace-relative paths against baseDir and previews on hover', () => { + const execute = jest.fn().mockResolvedValue(undefined); + render( + + the README + + ); + const anchor = screen.getByRole('link', { name: /the README/ }); + // href is intentionally `#` so a modifier-click can't bypass the + // onClick and let the browser navigate /lab/ in a new tab. + // The resolved path is exposed via `title` for hover preview. + expect(anchor).toHaveAttribute('href', '#'); + expect(anchor).toHaveAttribute('title', 'notebooks/proj/README.md'); + fireEvent.click(anchor); + expect(execute).toHaveBeenCalledWith('docmanager:open', { + path: 'notebooks/proj/README.md' + }); + }); + + it('resolves nested workspace-relative paths', () => { + const execute = jest.fn().mockResolvedValue(undefined); + render( + + intro notebook + + ); + fireEvent.click(screen.getByRole('link', { name: /intro notebook/ })); + expect(execute).toHaveBeenCalledWith('docmanager:open', { + path: 'notebooks/proj/data/intro.ipynb' + }); + }); + + it('swallows docmanager:open rejections without surfacing as unhandled', async () => { + const execute = jest.fn().mockRejectedValue(new Error('not found')); + const warn = jest.spyOn(console, 'warn').mockImplementation(() => {}); + try { + render( + + missing + + ); + fireEvent.click(screen.getByRole('link')); + await waitFor(() => expect(warn).toHaveBeenCalled()); + } finally { + warn.mockRestore(); + } + }); + + it('renders fragment-only links as plain text', () => { + const execute = jest.fn(); + render( + + section + + ); + expect(screen.queryByRole('link')).toBeNull(); + expect(screen.getByText('section')).toBeInTheDocument(); + }); + + it('treats scheme-relative URLs as external (SafeAnchor handles the block)', () => { + const execute = jest.fn(); + render( + + click + + ); + expect(screen.queryByRole('link')).toBeNull(); + }); + + it('falls back to the resolved path when the LLM-supplied title carries bidi-override codepoints', () => { + const execute = jest.fn(); + render( + + the README + + ); + // The dangerous title is dropped; hover preview defaults to the + // (already-validated) resolved path so the user still sees the + // intended target. + expect(screen.getByRole('link')).toHaveAttribute('title', 'README.md'); + }); + + it('blocks workspace-relative paths that resolve outside the workspace', () => { + const execute = jest.fn(); + render( + + click + + ); + expect(screen.queryByRole('link')).toBeNull(); + expect(screen.getByText(/link blocked/)).toHaveClass('nbi-sr-only'); + expect(execute).not.toHaveBeenCalled(); + }); + + it('blocks absolute paths', () => { + const execute = jest.fn(); + render( + + click + + ); + expect(screen.queryByRole('link')).toBeNull(); + expect(execute).not.toHaveBeenCalled(); + }); + + it('blocks scheme-unmask via C0 controls in href when baseDir is empty', () => { + // PathExt.join('', 'java\tscript:alert(1)') returns the input + // unchanged; the WHATWG URL parser strips the tab during scheme + // recognition and ends up interpreting the result as javascript:. + const execute = jest.fn(); + render( + + click + + ); + expect(screen.queryByRole('link')).toBeNull(); + expect(execute).not.toHaveBeenCalled(); + }); + + it('blocks a resolved path that carries dangerous codepoints', () => { + const execute = jest.fn(); + render( + + the README + + ); + expect(screen.queryByRole('link')).toBeNull(); + expect(execute).not.toHaveBeenCalled(); + }); + + it('blocks empty href (react-markdown strips javascript: to empty)', () => { + // react-markdown's built-in urlTransform replaces unsafe schemes with + // an empty string before our override sees them; without this case + // the workspace-relative branch would render an inert `` + // that confuses the user. + const execute = jest.fn(); + render( + + click + + ); + expect(screen.queryByRole('link')).toBeNull(); + expect(screen.getByText(/link blocked/)).toHaveClass('nbi-sr-only'); + expect(execute).not.toHaveBeenCalled(); + }); + + it('passes through a clean workspace-relative title', () => { + const execute = jest.fn(); + render( + + the README + + ); + expect(screen.getByRole('link')).toHaveAttribute( + 'title', + 'See the project README' + ); + }); +}); diff --git a/tests/ts/safe-anchor.test.tsx b/tests/ts/safe-anchor.test.tsx new file mode 100644 index 0000000..93355a5 --- /dev/null +++ b/tests/ts/safe-anchor.test.tsx @@ -0,0 +1,96 @@ +// Copyright (c) Mehmet Bektas + +import React from 'react'; +import { render, screen } from '@testing-library/react'; + +import { SafeAnchor } from '../../src/components/safe-anchor'; + +describe('SafeAnchor', () => { + it('renders an anchor with target=_blank and rel=noopener noreferrer for https hrefs', () => { + render(click); + const anchor = screen.getByRole('link', { name: /click/ }); + expect(anchor).toHaveAttribute('href', 'https://example.com/page'); + expect(anchor).toHaveAttribute('target', '_blank'); + expect(anchor).toHaveAttribute('rel', 'noopener noreferrer'); + }); + + it('appends an "opens in new tab" SR-only suffix so SR users are warned', () => { + render(docs); + expect(screen.getByText(/opens in new tab/)).toHaveClass('nbi-sr-only'); + }); + + it.each([ + ['javascript', 'javascript:alert(1)'], + ['data', 'data:text/html,'], + ['vbscript', 'vbscript:msgbox(1)'], + ['blob', 'blob:https://example.com/abc'], + ['file', 'file:///etc/passwd'], + ['scheme with C0 unmask', 'java\tscript:alert(1)'] + ])('blocks %s schemes by falling back to a non-anchor span', (_, href) => { + render(click); + expect(screen.queryByRole('link')).toBeNull(); + expect(screen.getByText(/link blocked/)).toHaveClass('nbi-sr-only'); + }); + + it.each([ + ['empty', ''], + ['whitespace-only', ' '], + ['null', null], + ['undefined', undefined], + ['no scheme', 'just/some/path'], + ['scheme-relative', '//example.com/path'] + ])('blocks %s hrefs', (_, href) => { + render(click); + expect(screen.queryByRole('link')).toBeNull(); + }); + + it('renders children verbatim on both success and block branches', () => { + const { rerender } = render( + + Important + + ); + expect(screen.getByText('Important').tagName).toBe('STRONG'); + rerender( + + Important + + ); + expect(screen.getByText('Important').tagName).toBe('STRONG'); + }); + + it('forwards a clean title attribute', () => { + render( + + click + + ); + expect(screen.getByRole('link')).toHaveAttribute('title', 'hover text'); + }); + + it('drops a title containing bidi-override codepoints', () => { + // U+202E RIGHT-TO-LEFT OVERRIDE can visually impersonate the link. + render( + + click + + ); + expect(screen.getByRole('link')).not.toHaveAttribute('title'); + }); + + it('accepts mailto: as a safe scheme', () => { + render(email); + expect(screen.getByRole('link')).toHaveAttribute( + 'href', + 'mailto:foo@example.com' + ); + }); + + it('trims surrounding whitespace from an accepted href', () => { + render(click); + expect(screen.getByRole('link')).toHaveAttribute( + 'href', + 'https://example.com' + ); + }); +}); diff --git a/tests/ts/utils.test.ts b/tests/ts/utils.test.ts index 276b510..85f4bec 100644 --- a/tests/ts/utils.test.ts +++ b/tests/ts/utils.test.ts @@ -13,6 +13,7 @@ import { cellOutputAsText, applyCodeToSelectionInEditor, buildResumeCommand, + hasDangerousTextCodepoints, safeAnchorUri, shellSingleQuote, writeTextToClipboard @@ -594,3 +595,32 @@ describe('safeAnchorUri', () => { expect(safeAnchorUri(uri)).toBe(uri); }); }); + +describe('hasDangerousTextCodepoints', () => { + it('returns false for ordinary text', () => { + expect(hasDangerousTextCodepoints('hello world')).toBe(false); + expect(hasDangerousTextCodepoints('a.b/c?d=1')).toBe(false); + }); + + it('returns false for empty / non-string input', () => { + expect(hasDangerousTextCodepoints('')).toBe(false); + expect(hasDangerousTextCodepoints(null)).toBe(false); + expect(hasDangerousTextCodepoints(undefined)).toBe(false); + }); + + it.each([ + ['C0 control (tab)', '\t'], + ['C0 control (LF)', '\n'], + ['DEL', '\x7f'], + ['C1 control', '\x85'], + ['NBSP', '\u00A0'], + ['line separator', '\u2028'], + ['paragraph separator', '\u2029'], + ['BOM', '\uFEFF'], + ['zero-width space', '\u200B'], + ['RTL override', '\u202E'], + ['LRI', '\u2066'] + ])('returns true for %s', (_, s) => { + expect(hasDangerousTextCodepoints(`safe${s}text`)).toBe(true); + }); +});