From d5d6168922165d674c1947b08907ab67110556c6 Mon Sep 17 00:00:00 2001 From: Brent Rager Date: Sat, 27 Jun 2026 08:39:25 -0400 Subject: [PATCH] SMOODEV: Sanitized markdown rendering + smooth streaming reveal in chat-widget MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Assistant replies and citation snippets rendered as raw plain text, so **bold**, numbered lists, and [links](url) showed literally. Render them through a tiny safe-by-default markdown renderer (markdown.ts) that escapes all text (raw ![pixel](http://evil/x.png)', +].join('\n'); + +const MOCK_WS_MD = ` +(() => { + class MockWS { + constructor(url) { + this.url = url; + this.readyState = 0; + this._listeners = { open: [], message: [], close: [], error: [] }; + setTimeout(() => { this.readyState = 1; this._emit('open', {}); }, 5); + } + addEventListener(type, fn) { (this._listeners[type] ||= []).push(fn); } + removeEventListener(type, fn) { + const a = this._listeners[type]; if (!a) return; + const i = a.indexOf(fn); if (i >= 0) a.splice(i, 1); + } + _emit(type, ev) { for (const fn of (this._listeners[type] || []).slice()) fn(ev); } + _msg(obj) { this._emit('message', { data: JSON.stringify(obj) }); } + send(raw) { + let frame; try { frame = JSON.parse(raw); } catch { return; } + const requestId = frame.requestId; + if (frame.action === 'create_conversation_session') { + this._msg({ type: 'immediate_response', requestId, status: 202, + data: { sessionId: 'sess-mock-md', agentId: frame.agentId } }); + return; + } + if (frame.action === 'send_message') { + this._msg({ type: 'immediate_response', requestId, status: 202, data: {} }); + const reply = ${JSON.stringify(MARKDOWN_REPLY)}; + // Stream the reply in a couple of chunks, then finalize. + setTimeout(() => { + this._msg({ type: 'stream_token', requestId, token: reply.slice(0, 20) }); + setTimeout(() => { + this._msg({ type: 'stream_token', requestId, token: reply.slice(20) }); + setTimeout(() => { + this._msg({ type: 'eventual_response', requestId, status: 200, data: { data: { + response: { responseParts: [reply] }, + citations: [{ id: 'c1', title: 'Our Work', score: 0.9, url: 'https://smoo.ai/work', + snippet: '[![Logo](https://x/logo.png)](https://x/) # Our Work We build **great** things for clients across many industries.' }], + } } }); + }, 5); + }, 5); + }, 5); + return; + } + } + close() { this.readyState = 3; this._emit('close', { code: 1000, reason: '' }); } + } + MockWS.CONNECTING = 0; MockWS.OPEN = 1; MockWS.CLOSING = 2; MockWS.CLOSED = 3; + window.WebSocket = MockWS; +})(); +`; + +test('GLOBAL bundle renders sanitized markdown for the final assistant turn (real UI)', async ({ page }) => { + const pageErrors: string[] = []; + page.on('pageerror', (e) => pageErrors.push(`${e.name}: ${e.message}`)); + page.on('console', (m) => { + if (m.type() === 'error') pageErrors.push(`console.error: ${m.text()}`); + }); + + await page.addInitScript(MOCK_WS_MD); + await page.goto('about:blank'); + await page.addScriptTag({ content: GLOBAL_BUNDLE }); + + const result = await page.evaluate( + async ({ endpoint, agentId }) => { + const out: Record = {}; + // @ts-expect-error injected global + const el = window.SmoothAgentChat.mount({ endpoint, agentId, greeting: '' }); + const root = (el as any).shadowRoot as ShadowRoot; + const sleep = (ms: number) => new Promise((r) => setTimeout(r, ms)); + + (root.querySelector('.launcher') as HTMLElement | null)?.click(); + for (let i = 0; i < 100; i++) { + const status = (root.querySelector('.status-text') as HTMLElement | null)?.textContent ?? ''; + if (/ready|online/i.test(status)) break; + await sleep(50); + } + const input = root.querySelector('textarea') as HTMLTextAreaElement; + input.value = 'hi'; + input.dispatchEvent(new Event('input', { bubbles: true })); + (root.querySelector('.send') as HTMLElement | null)?.click(); + + // Wait until the final (non-streaming, markdown) bubble has settled. + for (let i = 0; i < 120; i++) { + if (root.querySelector('.bubble.assistant.md strong')) break; + await sleep(50); + } + + const md = root.querySelector('.bubble.assistant.md'); + out.hasStrong = !!md?.querySelector('strong'); + out.hasListItems = (md?.querySelectorAll('li').length ?? 0) >= 2; + const a = md?.querySelector('a') as HTMLAnchorElement | null; + out.linkHref = a?.getAttribute('href') ?? null; + out.linkRel = a?.getAttribute('rel') ?? null; + out.linkTarget = a?.getAttribute('target') ?? null; + + // XSS: no live '); + assertInert(html); + expect(html).toContain('<script>'); + }); + + it('escapes a raw payload', () => { + const html = renderMarkdown(''); + assertInert(html); + expect(html).toContain('<img'); + }); + + it('renders a markdown image as alt text only — never an ', () => { + const html = renderMarkdown('![y](http://evil/x.png)'); + assertInert(html); + expect(html).not.toContain('evil'); + expect(html).toContain('y'); + }); + + it('strips a javascript: link to plain text (no anchor, no scheme)', () => { + // eslint-disable-next-line no-script-url + const html = renderMarkdown('[x](javascript:alert(1))'); + assertInert(html); + expect(html).not.toContain(' payload', () => { + const html = renderMarkdown('click'); + assertInert(html); + expect(html).toContain('<a'); + }); + + it('rejects a data: URL link', () => { + const html = renderMarkdown('[x](data:text/html,)'); + assertInert(html); + expect(html).not.toContain(' { + assertInert(renderMarkdown('[](https://ok.com)')); + assertInert(renderMarkdown('``')); + }); + + it('escapes HTML inside list items and headings', () => { + assertInert(renderMarkdown('- ')); + assertInert(renderMarkdown('# ')); + }); +}); + +describe('cleanCitationSnippet', () => { + it('strips a leading logo link + image and trailing boilerplate', () => { + const raw = '[![Logo](https://x/logo.png)](https://x/) # Our Work We build great things for clients.'; + const out = cleanCitationSnippet(raw); + expect(out).not.toContain('Logo'); + expect(out).not.toContain('logo.png'); + expect(out).not.toMatch(/^#/); + expect(out.startsWith('Our Work') || out.startsWith('We build')).toBe(true); + }); + + it('strips a bare leading image', () => { + expect(cleanCitationSnippet('![hero](https://x/h.png) Welcome to the site')).toBe('Welcome to the site'); + }); + + it('collapses whitespace', () => { + expect(cleanCitationSnippet('a b\n\n c')).toBe('a b c'); + }); + + it('truncates long text at a word boundary with an ellipsis', () => { + const long = 'word '.repeat(120).trim(); + const out = cleanCitationSnippet(long); + expect(out.length).toBeLessThanOrEqual(262); + expect(out.endsWith('…')).toBe(true); + expect(out).not.toMatch(/\Sword…$/); // ended on a boundary, not mid-word + }); + + it('leaves an already-clean short snippet intact', () => { + expect(cleanCitationSnippet('A clean excerpt.')).toBe('A clean excerpt.'); + }); +}); diff --git a/src/markdown.ts b/src/markdown.ts new file mode 100644 index 0000000..22b923b --- /dev/null +++ b/src/markdown.ts @@ -0,0 +1,368 @@ +/** + * A tiny, safe-by-default Markdown → HTML renderer for the chat widget. + * + * ## Why a hand-rolled renderer (and not markdown-it / snarkdown)? + * + * The widget renders **untrusted** text in two places: the assistant's reply + * (LLM output, which can echo attacker-supplied content) and citation snippets + * (raw scraped page chunks). Today both are written via `textContent`, so + * `**bold**`, numbered lists, and `[links](url)` show up literally. We want + * them rendered — without re-opening the XSS hole that `textContent` was + * guarding against. + * + * markdown-it with `html:false` is safe-by-default but ships ~30 kB min into + * what is an embeddable **global** bundle, where every kilobyte is on the host + * page's critical path. snarkdown is ~1 kB but emits raw HTML, so it would + * require bolting on a separate sanitizer. Instead, this renderer is + * **safe-by-construction**: + * + * 1. It is a *tokenizer*, not an HTML passthrough. It only ever emits a + * fixed allowlist of tags (`p`, `br`, `strong`, `em`, `ul`/`ol`/`li`, + * `code`/`pre`, `a`, `blockquote`). There is no code path that copies a + * tag out of the input — a literal `