|
| 1 | +/** |
| 2 | + * @vitest-environment node |
| 3 | + */ |
| 4 | +import { describe, expect, it } from 'vitest' |
| 5 | +import { parseVtt } from '@/connectors/zoom/zoom' |
| 6 | + |
| 7 | +const HEADER = 'WEBVTT\n\n' |
| 8 | + |
| 9 | +describe('parseVtt', () => { |
| 10 | + it.concurrent('returns empty string for input with no cues', () => { |
| 11 | + expect(parseVtt(HEADER)).toBe('') |
| 12 | + }) |
| 13 | + |
| 14 | + it.concurrent('extracts plain spoken text from a single cue', () => { |
| 15 | + const vtt = `${HEADER}00:00:00.000 --> 00:00:02.000\nHello world\n` |
| 16 | + expect(parseVtt(vtt)).toBe('Hello world') |
| 17 | + }) |
| 18 | + |
| 19 | + it.concurrent('preserves WebVTT voice tags as "Speaker: text"', () => { |
| 20 | + const vtt = `${HEADER}00:00:00.000 --> 00:00:02.000\n<v Alice>hello there</v>\n` |
| 21 | + expect(parseVtt(vtt)).toBe('Alice: hello there') |
| 22 | + }) |
| 23 | + |
| 24 | + it.concurrent('preserves voice tags with class suffix', () => { |
| 25 | + const vtt = `${HEADER}00:00:00.000 --> 00:00:02.000\n<v.host Bob>welcome</v>\n` |
| 26 | + expect(parseVtt(vtt)).toBe('Bob: welcome') |
| 27 | + }) |
| 28 | + |
| 29 | + it.concurrent('strips inline formatting tags but keeps text', () => { |
| 30 | + const vtt = `${HEADER}00:00:00.000 --> 00:00:02.000\n<b>bold</b> and <i>italic</i>\n` |
| 31 | + expect(parseVtt(vtt)).toBe('bold and italic') |
| 32 | + }) |
| 33 | + |
| 34 | + it.concurrent('strips karaoke timestamp tags', () => { |
| 35 | + const vtt = `${HEADER}00:00:00.000 --> 00:00:02.000\nhello <00:00:01.000>world\n` |
| 36 | + expect(parseVtt(vtt)).toBe('hello world') |
| 37 | + }) |
| 38 | + |
| 39 | + it.concurrent('strips class spans', () => { |
| 40 | + const vtt = `${HEADER}00:00:00.000 --> 00:00:02.000\n<c.loud>SHOUT</c>\n` |
| 41 | + expect(parseVtt(vtt)).toBe('SHOUT') |
| 42 | + }) |
| 43 | + |
| 44 | + it.concurrent('skips cue identifier lines before timing', () => { |
| 45 | + const vtt = `${HEADER}cue-1\n00:00:00.000 --> 00:00:02.000\nhello\n` |
| 46 | + expect(parseVtt(vtt)).toBe('hello') |
| 47 | + }) |
| 48 | + |
| 49 | + it.concurrent('joins multiple cues with newlines', () => { |
| 50 | + const vtt = `${HEADER}00:00:00.000 --> 00:00:02.000\nfirst\n\n00:00:02.000 --> 00:00:04.000\nsecond\n` |
| 51 | + expect(parseVtt(vtt)).toBe('first\nsecond') |
| 52 | + }) |
| 53 | + |
| 54 | + it.concurrent('collapses repeated whitespace within a cue', () => { |
| 55 | + const vtt = `${HEADER}00:00:00.000 --> 00:00:02.000\nhello world\n` |
| 56 | + expect(parseVtt(vtt)).toBe('hello world') |
| 57 | + }) |
| 58 | + |
| 59 | + it.concurrent('iteratively strips overlapping tags that reconstruct after one pass', () => { |
| 60 | + const crafted = '<<b>b>injected</<b>b>' |
| 61 | + const vtt = `${HEADER}00:00:00.000 --> 00:00:02.000\n${crafted}\n` |
| 62 | + const result = parseVtt(vtt) |
| 63 | + expect(result).not.toMatch(/<\/?[^>]+>/) |
| 64 | + expect(result).toContain('injected') |
| 65 | + }) |
| 66 | + |
| 67 | + it.concurrent('iteratively strips nested script-like tag fragments', () => { |
| 68 | + const crafted = '<scr<script>ipt>alert(1)</scr</script>ipt>' |
| 69 | + const vtt = `${HEADER}00:00:00.000 --> 00:00:02.000\n${crafted}\n` |
| 70 | + const result = parseVtt(vtt) |
| 71 | + expect(result).not.toMatch(/<\/?[^>]+>/) |
| 72 | + expect(result.toLowerCase()).not.toContain('script') |
| 73 | + }) |
| 74 | + |
| 75 | + it.concurrent('sanitizes crafted speaker names that embed tag fragments', () => { |
| 76 | + const vtt = `${HEADER}00:00:00.000 --> 00:00:02.000\n<v <b>Evil</b>>payload</v>\n` |
| 77 | + const result = parseVtt(vtt) |
| 78 | + expect(result).not.toMatch(/<\/?[^>]+>/) |
| 79 | + }) |
| 80 | + |
| 81 | + it.concurrent('terminates on adversarial deeply-nested input', () => { |
| 82 | + const crafted = `${'<'.repeat(50)}b${'>'.repeat(50)}text${'<'.repeat(50)}/b${'>'.repeat(50)}` |
| 83 | + const vtt = `${HEADER}00:00:00.000 --> 00:00:02.000\n${crafted}\n` |
| 84 | + const result = parseVtt(vtt) |
| 85 | + expect(result).not.toMatch(/<\/?[^>]+>/) |
| 86 | + }) |
| 87 | +}) |
0 commit comments