Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions .changeset/sanitized-markdown-and-smooth-streaming.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
---
'@smooai/chat-widget': minor
---

Render sanitized Markdown in assistant replies and citation snippets, and smooth out the streaming reveal.

**Sanitized Markdown rendering.** Assistant responses and citation snippets previously showed Markdown literally (`**bold**`, numbered lists, `[links](url)` rendered as raw text). They now render to formatted HTML through a tiny, safe-by-default renderer (`markdown.ts`) that:

- escapes **all** text — raw `<script>`, `<img onerror=…>`, `<iframe>` etc. render as inert text, never markup;
- **drops images entirely** (a scraped tracking pixel can't load) — `![alt](src)` becomes its alt text;
- allows **only `http(s)` links** (`javascript:`/`data:`/relative fall back to plain text) with `target="_blank"` + `rel="noopener noreferrer nofollow"`;
- emits only an allowlisted tag set (`p`, `br`, `strong`, `em`, `ul`/`ol`/`li`, `code`/`pre`, `a`, `blockquote`) and **downgrades headings** to bold lines so they fit a chat bubble.

User bubbles stay plain text; mid-stream text stays plain (partial Markdown renders ugly) and only the final assistant turn is rendered as Markdown. Citation snippets are also cleaned first — leading page boilerplate (logo image/link, nav, whitespace) is trimmed and the excerpt is truncated to ~260 chars at a word boundary.

**Smooth streaming reveal.** The assistant bubble no longer jumps in jerky, uneven chunks as `stream_token` bursts arrive. Incoming token text is buffered and revealed via a `requestAnimationFrame` typewriter at an adaptive rate (chars-per-frame scales with the pending backlog, so it never falls behind the network); only the single streaming bubble is updated per frame (no full list rebuild), and the final turn snaps to the full Markdown render. Respects `prefers-reduced-motion` (snaps instantly) and keeps auto-scroll without fighting a visitor who has scrolled up.
153 changes: 153 additions & 0 deletions e2e/repro-stream-mock.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -144,3 +144,156 @@ test('GLOBAL bundle streams a grounded turn end-to-end (real UI)', async ({ page
expect(result.error, `controller error: ${result.error}`).toBeUndefined();
expect(result.text ?? '').toContain('streamed reply');
});

// A markdown-rich reply that ALSO carries XSS payloads. The final assistant turn
// must render the markdown to real `<strong>`/`<li>`/`<a>` elements while the
// `<script>` / `<img onerror>` payloads must NOT produce live elements in the DOM.
const MARKDOWN_REPLY = [
'Here is **bold** text and a [link](https://smoo.ai/docs).',
'',
'- first item',
'- second item',
'',
'And an inert payload: <script>alert(1)</script> <img src=x onerror=alert(1)> ![pixel](http://evil/x.png)',
].join('\n');

const MOCK_WS_MD = `
(() => {
class MockWS {
constructor(url) {
this.url = url;
this.readyState = 0;
this._listeners = { open: [], message: [], close: [], error: [] };
setTimeout(() => { this.readyState = 1; this._emit('open', {}); }, 5);
}
addEventListener(type, fn) { (this._listeners[type] ||= []).push(fn); }
removeEventListener(type, fn) {
const a = this._listeners[type]; if (!a) return;
const i = a.indexOf(fn); if (i >= 0) a.splice(i, 1);
}
_emit(type, ev) { for (const fn of (this._listeners[type] || []).slice()) fn(ev); }
_msg(obj) { this._emit('message', { data: JSON.stringify(obj) }); }
send(raw) {
let frame; try { frame = JSON.parse(raw); } catch { return; }
const requestId = frame.requestId;
if (frame.action === 'create_conversation_session') {
this._msg({ type: 'immediate_response', requestId, status: 202,
data: { sessionId: 'sess-mock-md', agentId: frame.agentId } });
return;
}
if (frame.action === 'send_message') {
this._msg({ type: 'immediate_response', requestId, status: 202, data: {} });
const reply = ${JSON.stringify(MARKDOWN_REPLY)};
// Stream the reply in a couple of chunks, then finalize.
setTimeout(() => {
this._msg({ type: 'stream_token', requestId, token: reply.slice(0, 20) });
setTimeout(() => {
this._msg({ type: 'stream_token', requestId, token: reply.slice(20) });
setTimeout(() => {
this._msg({ type: 'eventual_response', requestId, status: 200, data: { data: {
response: { responseParts: [reply] },
citations: [{ id: 'c1', title: 'Our Work', score: 0.9, url: 'https://smoo.ai/work',
snippet: '[![Logo](https://x/logo.png)](https://x/) # Our Work We build **great** things for clients across many industries.' }],
} } });
}, 5);
}, 5);
}, 5);
return;
}
}
close() { this.readyState = 3; this._emit('close', { code: 1000, reason: '' }); }
}
MockWS.CONNECTING = 0; MockWS.OPEN = 1; MockWS.CLOSING = 2; MockWS.CLOSED = 3;
window.WebSocket = MockWS;
})();
`;

test('GLOBAL bundle renders sanitized markdown for the final assistant turn (real UI)', async ({ page }) => {
const pageErrors: string[] = [];
page.on('pageerror', (e) => pageErrors.push(`${e.name}: ${e.message}`));
page.on('console', (m) => {
if (m.type() === 'error') pageErrors.push(`console.error: ${m.text()}`);
});

await page.addInitScript(MOCK_WS_MD);
await page.goto('about:blank');
await page.addScriptTag({ content: GLOBAL_BUNDLE });

const result = await page.evaluate(
async ({ endpoint, agentId }) => {
const out: Record<string, unknown> = {};
// @ts-expect-error injected global
const el = window.SmoothAgentChat.mount({ endpoint, agentId, greeting: '' });
const root = (el as any).shadowRoot as ShadowRoot;
const sleep = (ms: number) => new Promise((r) => setTimeout(r, ms));

(root.querySelector('.launcher') as HTMLElement | null)?.click();
for (let i = 0; i < 100; i++) {
const status = (root.querySelector('.status-text') as HTMLElement | null)?.textContent ?? '';
if (/ready|online/i.test(status)) break;
await sleep(50);
}
const input = root.querySelector('textarea') as HTMLTextAreaElement;
input.value = 'hi';
input.dispatchEvent(new Event('input', { bubbles: true }));
(root.querySelector('.send') as HTMLElement | null)?.click();

// Wait until the final (non-streaming, markdown) bubble has settled.
for (let i = 0; i < 120; i++) {
if (root.querySelector('.bubble.assistant.md strong')) break;
await sleep(50);
}

const md = root.querySelector('.bubble.assistant.md');
out.hasStrong = !!md?.querySelector('strong');
out.hasListItems = (md?.querySelectorAll('li').length ?? 0) >= 2;
const a = md?.querySelector('a') as HTMLAnchorElement | null;
out.linkHref = a?.getAttribute('href') ?? null;
out.linkRel = a?.getAttribute('rel') ?? null;
out.linkTarget = a?.getAttribute('target') ?? null;

// XSS: no live <script>/<img>/<iframe> anywhere in the shadow tree.
out.scriptCount = root.querySelectorAll('script').length;
out.imgCount = root.querySelectorAll('img').length;
out.iframeCount = root.querySelectorAll('iframe').length;
// No event-handler attributes survived on any element.
let onAttr = 0;
for (const node of Array.from(root.querySelectorAll('*'))) {
for (const attr of Array.from((node as Element).attributes)) {
if (/^on/i.test(attr.name)) onAttr++;
}
}
out.onHandlerAttrs = onAttr;

// Citation snippet: cleaned (no logo image/link), rendered markdown.
const snip = root.querySelector('.src-snippet');
out.snippetText = snip?.textContent ?? '';
out.snippetHasImg = !!snip?.querySelector('img');
out.snippetHasStrong = !!snip?.querySelector('strong');
return out;
},
{ endpoint: ENDPOINT, agentId: AGENT_ID },
);

console.log('MD result:', JSON.stringify(result, null, 2));
expect(pageErrors, `page errors:\n${pageErrors.join('\n---\n')}`).toEqual([]);

// Markdown rendered to real elements.
expect(result.hasStrong, 'expected **bold** → <strong>').toBe(true);
expect(result.hasListItems, 'expected list → ≥2 <li>').toBe(true);
expect(result.linkHref).toBe('https://smoo.ai/docs');
expect(result.linkTarget).toBe('_blank');
expect(String(result.linkRel)).toContain('noopener');
expect(String(result.linkRel)).toContain('nofollow');

// XSS payloads neutralized — no live nodes, no handler attributes.
expect(result.scriptCount, 'no <script> may exist in the shadow tree').toBe(0);
expect(result.imgCount, 'no <img> may exist in the shadow tree').toBe(0);
expect(result.iframeCount).toBe(0);
expect(result.onHandlerAttrs, 'no on* handler attributes may survive').toBe(0);

// Citation snippet cleaned + rendered.
expect(result.snippetHasImg, 'snippet must drop the logo image').toBe(false);
expect(String(result.snippetText)).not.toContain('Logo');
expect(result.snippetHasStrong, 'snippet markdown should render').toBe(true);
});
Loading
Loading