Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 2 additions & 4 deletions src/clis/youtube/transcript.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
* --mode raw: every caption segment as-is with precise timestamps
*/
import { cli, Strategy } from '../../registry.js';
import { parseVideoId } from './utils.js';
import { parseVideoId, prepareYoutubeApiPage } from './utils.js';
import {
groupTranscriptSegments,
formatGroupedTranscript,
Expand All @@ -34,9 +34,7 @@ cli({
// so we let the renderer auto-detect columns from the data keys.
func: async (page, kwargs) => {
const videoId = parseVideoId(kwargs.url);
const videoUrl = `https://www.youtube.com/watch?v=${videoId}`;
await page.goto(videoUrl);
await page.wait(3);
await prepareYoutubeApiPage(page);

const lang = kwargs.lang || '';
const mode = kwargs.mode || 'grouped';
Expand Down
43 changes: 43 additions & 0 deletions src/clis/youtube/utils.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
import { describe, expect, it, vi } from 'vitest';
import { extractJsonAssignmentFromHtml, prepareYoutubeApiPage } from './utils.js';

describe('youtube utils', () => {
it('extractJsonAssignmentFromHtml parses bootstrap objects with nested braces in strings', () => {
const html = `
<script>
var ytInitialPlayerResponse = {
"title": "brace { inside } string",
"nested": { "count": 2, "text": "quote \\"value\\"" }
};
</script>
`;

expect(extractJsonAssignmentFromHtml(html, 'ytInitialPlayerResponse')).toEqual({
title: 'brace { inside } string',
nested: { count: 2, text: 'quote "value"' },
});
});

it('extractJsonAssignmentFromHtml supports window assignments', () => {
const html = `
<script>
window["ytInitialData"] = {"contents":{"items":[1,2,3]}};
</script>
`;

expect(extractJsonAssignmentFromHtml(html, 'ytInitialData')).toEqual({
contents: { items: [1, 2, 3] },
});
});

it('prepareYoutubeApiPage loads the quiet API bootstrap page', async () => {
const page = {
goto: vi.fn().mockResolvedValue(undefined),
wait: vi.fn().mockResolvedValue(undefined),
};

await expect(prepareYoutubeApiPage(page as any)).resolves.toBeUndefined();
expect(page.goto).toHaveBeenCalledWith('https://www.youtube.com', { waitUntil: 'none' });
expect(page.wait).toHaveBeenCalledWith(2);
});
});
69 changes: 69 additions & 0 deletions src/clis/youtube/utils.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
/**
* Shared YouTube utilities — URL parsing, video ID extraction, etc.
*/
import type { IPage } from '../../types.js';

/**
* Extract a YouTube video ID from a URL or bare video ID string.
Expand All @@ -26,3 +27,71 @@ export function parseVideoId(input: string): string {

return input;
}

/**
* Extract a JSON object assigned to a known bootstrap variable inside YouTube HTML.
*/
export function extractJsonAssignmentFromHtml(html: string, keys: string | string[]): Record<string, unknown> | null {
const candidates = Array.isArray(keys) ? keys : [keys];
for (const key of candidates) {
const markers = [
`var ${key} = `,
`window["${key}"] = `,
`window.${key} = `,
`${key} = `,
];
for (const marker of markers) {
const markerIndex = html.indexOf(marker);
if (markerIndex === -1) continue;

const jsonStart = html.indexOf('{', markerIndex + marker.length);
if (jsonStart === -1) continue;

let depth = 0;
let inString = false;
let escaping = false;
for (let i = jsonStart; i < html.length; i += 1) {
const ch = html[i];
if (inString) {
if (escaping) {
escaping = false;
} else if (ch === '\\') {
escaping = true;
} else if (ch === '"') {
inString = false;
}
continue;
}

if (ch === '"') {
inString = true;
continue;
}
if (ch === '{') {
depth += 1;
continue;
}
if (ch === '}') {
depth -= 1;
if (depth === 0) {
try {
return JSON.parse(html.slice(jsonStart, i + 1)) as Record<string, unknown>;
} catch {
break;
}
}
}
}
}
}

return null;
}

/**
* Prepare a quiet YouTube API-capable page without opening the watch UI.
*/
export async function prepareYoutubeApiPage(page: IPage): Promise<void> {
await page.goto('https://www.youtube.com', { waitUntil: 'none' });
await page.wait(2);
}
31 changes: 16 additions & 15 deletions src/clis/youtube/video.ts
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
/**
* YouTube video metadata — read ytInitialPlayerResponse + ytInitialData from video page.
* YouTube video metadata — fetch watch HTML and parse bootstrap data without opening the watch UI.
*/
import { cli, Strategy } from '../../registry.js';
import { parseVideoId } from './utils.js';
import { extractJsonAssignmentFromHtml, parseVideoId, prepareYoutubeApiPage } from './utils.js';
import { CommandExecutionError } from '../../errors.js';

cli({
Expand All @@ -17,24 +17,29 @@ cli({
columns: ['field', 'value'],
func: async (page, kwargs) => {
const videoId = parseVideoId(kwargs.url);
const videoUrl = `https://www.youtube.com/watch?v=${videoId}`;
await page.goto(videoUrl);
await page.wait(3);
await prepareYoutubeApiPage(page);

const data = await page.evaluate(`
(async () => {
const player = window.ytInitialPlayerResponse;
const yt = window.ytInitialData;
if (!player) return { error: 'ytInitialPlayerResponse not found' };
const extractJsonAssignmentFromHtml = ${extractJsonAssignmentFromHtml.toString()};

const watchResp = await fetch('/watch?v=' + encodeURIComponent(${JSON.stringify(videoId)}), {
credentials: 'include',
});
if (!watchResp.ok) return { error: 'Watch HTML returned HTTP ' + watchResp.status };

const html = await watchResp.text();
const player = extractJsonAssignmentFromHtml(html, 'ytInitialPlayerResponse');
const yt = extractJsonAssignmentFromHtml(html, 'ytInitialData');
if (!player) return { error: 'ytInitialPlayerResponse not found in watch HTML' };

const details = player.videoDetails || {};
const microformat = player.microformat?.playerMicroformatRenderer || {};
const contents = yt?.contents?.twoColumnWatchNextResults?.results?.results?.contents || [];

// Try to get full description from ytInitialData
// Try to get full description from watch bootstrap data
let fullDescription = details.shortDescription || '';
try {
const contents = yt?.contents?.twoColumnWatchNextResults
?.results?.results?.contents;
if (contents) {
for (const c of contents) {
const desc = c.videoSecondaryInfoRenderer?.attributedDescription?.content;
Expand All @@ -46,8 +51,6 @@ cli({
// Get like count if available
let likes = '';
try {
const contents = yt?.contents?.twoColumnWatchNextResults
?.results?.results?.contents;
if (contents) {
for (const c of contents) {
const buttons = c.videoPrimaryInfoRenderer?.videoActions
Expand Down Expand Up @@ -75,8 +78,6 @@ cli({
// Get channel subscriber count if available
let subscribers = '';
try {
const contents = yt?.contents?.twoColumnWatchNextResults
?.results?.results?.contents;
if (contents) {
for (const c of contents) {
const owner = c.videoSecondaryInfoRenderer?.owner
Expand Down