From 92040e3b9e5313fb639c33cfc2b0a60fe8819f32 Mon Sep 17 00:00:00 2001 From: jackwener Date: Sun, 29 Mar 2026 17:47:05 +0800 Subject: [PATCH] refactor(douyin): share user video public api --- src/clis/douyin/_shared/public-api.ts | 84 +++++++++++++ src/clis/douyin/user-videos.test.ts | 165 +++++++++++++++++--------- src/clis/douyin/user-videos.ts | 141 ++++++++++++---------- 3 files changed, 273 insertions(+), 117 deletions(-) create mode 100644 src/clis/douyin/_shared/public-api.ts diff --git a/src/clis/douyin/_shared/public-api.ts b/src/clis/douyin/_shared/public-api.ts new file mode 100644 index 00000000..aff381a1 --- /dev/null +++ b/src/clis/douyin/_shared/public-api.ts @@ -0,0 +1,84 @@ +import type { IPage } from '../../../types.js'; +import { browserFetch } from './browser-fetch.js'; + +export interface DouyinComment { + text?: string; + digg_count?: number; + user?: { + nickname?: string; + }; +} + +export interface DouyinVideo { + aweme_id: string; + desc?: string; + video?: { + duration?: number; + play_addr?: { + url_list?: string[]; + }; + }; + statistics?: { + digg_count?: number; + }; +} + +export interface DouyinVideoListResponse { + aweme_list?: DouyinVideo[]; +} + +export interface DouyinCommentListResponse { + comments?: DouyinComment[]; +} + +export async function fetchDouyinUserVideos( + page: IPage, + secUid: string, + count: number, +): Promise { + const params = new URLSearchParams({ + sec_user_id: secUid, + max_cursor: '0', + count: String(count), + aid: '6383', + }); + + const data = await browserFetch( + page, + 'GET', + `https://www.douyin.com/aweme/v1/web/aweme/post/?${params.toString()}`, + { + headers: { referer: 'https://www.douyin.com/' }, + }, + ) as DouyinVideoListResponse; + + return data.aweme_list || []; +} + +export async function fetchDouyinComments( + page: IPage, + awemeId: string, + count: number, +): Promise> { + const params = new URLSearchParams({ + aweme_id: awemeId, + count: String(count), + cursor: '0', + aid: '6383', + }); + + const data = await browserFetch( + page, + 'GET', + `https://www.douyin.com/aweme/v1/web/comment/list/?${params.toString()}`, + { + headers: { referer: 'https://www.douyin.com/' }, + }, + ) as DouyinCommentListResponse; + + return (data.comments || []).slice(0, count).map((comment) => ({ + text: comment.text || '', + digg_count: comment.digg_count ?? 0, + nickname: comment.user?.nickname || '', + })); +} diff --git a/src/clis/douyin/user-videos.test.ts b/src/clis/douyin/user-videos.test.ts index 8acd9093..b9f814b6 100644 --- a/src/clis/douyin/user-videos.test.ts +++ b/src/clis/douyin/user-videos.test.ts @@ -1,63 +1,122 @@ -import { describe, expect, it, vi } from 'vitest'; -import { ArgumentError, CommandExecutionError } from '../../errors.js'; +import { beforeEach, describe, expect, it, vi } from 'vitest'; + +const { fetchDouyinUserVideosMock, fetchDouyinCommentsMock } = vi.hoisted(() => ({ + fetchDouyinUserVideosMock: vi.fn(), + fetchDouyinCommentsMock: vi.fn(), +})); + +vi.mock('./_shared/public-api.js', () => ({ + fetchDouyinUserVideos: fetchDouyinUserVideosMock, + fetchDouyinComments: fetchDouyinCommentsMock, +})); + import { getRegistry } from '../../registry.js'; -import './user-videos.js'; - -function makePage(...evaluateResults: unknown[]) { - return { - goto: vi.fn().mockResolvedValue(undefined), - wait: vi.fn().mockResolvedValue(undefined), - evaluate: vi.fn() - .mockImplementation(() => Promise.resolve(evaluateResults.shift())), - } as any; -} - -describe('douyin user-videos command', () => { - it('throws ArgumentError when limit is not a positive integer', async () => { - const cmd = getRegistry().get('douyin/user-videos'); - const page = makePage(); - - await expect(cmd!.func!(page, { sec_uid: 'test', limit: 0 })).rejects.toThrow(ArgumentError); - expect(page.goto).not.toHaveBeenCalled(); +import { DEFAULT_COMMENT_LIMIT, MAX_USER_VIDEOS_LIMIT, normalizeCommentLimit, normalizeUserVideosLimit } from './user-videos.js'; + +describe('douyin user-videos', () => { + beforeEach(() => { + fetchDouyinUserVideosMock.mockReset(); + fetchDouyinCommentsMock.mockReset(); }); - it('surfaces top-level Douyin API errors through browserFetch semantics', async () => { - const cmd = getRegistry().get('douyin/user-videos'); - const page = makePage({ status_code: 8, status_msg: 'bad uid' }); + it('registers the command', () => { + const registry = getRegistry(); + const values = [...registry.values()]; + const command = values.find((cmd) => cmd.site === 'douyin' && cmd.name === 'user-videos'); + expect(command).toBeDefined(); + }); - await expect(cmd!.func!(page, { sec_uid: 'bad', limit: 3 })).rejects.toThrow(CommandExecutionError); - expect(page.goto).toHaveBeenCalledWith('https://www.douyin.com/user/bad'); - expect(page.evaluate).toHaveBeenCalledTimes(1); + it('clamps limit to a safe maximum', () => { + expect(normalizeUserVideosLimit(100)).toBe(MAX_USER_VIDEOS_LIMIT); + expect(normalizeUserVideosLimit(0)).toBe(1); + expect(normalizeCommentLimit(99)).toBe(DEFAULT_COMMENT_LIMIT); }); - it('passes normalized limit to the API and preserves mapped rows', async () => { - const cmd = getRegistry().get('douyin/user-videos'); - const page = makePage( + it('uses shared public-api helpers and applies clamped limits', async () => { + const registry = getRegistry(); + const command = [...registry.values()].find((cmd) => cmd.site === 'douyin' && cmd.name === 'user-videos'); + expect(command?.func).toBeDefined(); + if (!command?.func) throw new Error('douyin user-videos command not registered'); + + fetchDouyinUserVideosMock.mockResolvedValueOnce([ + { + aweme_id: '1', + desc: 'test video', + video: { duration: 1234, play_addr: { url_list: ['https://example.com/video.mp4'] } }, + statistics: { digg_count: 9 }, + }, + ]); + fetchDouyinCommentsMock.mockResolvedValueOnce([ + { text: 'nice', digg_count: 3, nickname: 'alice' }, + ]); + + const page = { + goto: vi.fn().mockResolvedValue(undefined), + wait: vi.fn().mockResolvedValue(undefined), + }; + + const rows = await command.func(page as any, { + sec_uid: 'MS4w-test', + limit: 100, + comment_limit: 99, + with_comments: true, + }); + + expect(fetchDouyinUserVideosMock).toHaveBeenCalledWith(page, 'MS4w-test', MAX_USER_VIDEOS_LIMIT); + expect(fetchDouyinCommentsMock).toHaveBeenCalledWith(page, '1', DEFAULT_COMMENT_LIMIT); + expect(rows).toEqual([ + { + index: 1, + aweme_id: '1', + title: 'test video', + duration: 1, + digg_count: 9, + play_url: 'https://example.com/video.mp4', + top_comments: [ + { text: 'nice', digg_count: 3, nickname: 'alice' }, + ], + }, + ]); + }); + + it('skips comment enrichment when with_comments is false', async () => { + const registry = getRegistry(); + const command = [...registry.values()].find((cmd) => cmd.site === 'douyin' && cmd.name === 'user-videos'); + expect(command?.func).toBeDefined(); + if (!command?.func) throw new Error('douyin user-videos command not registered'); + + fetchDouyinUserVideosMock.mockResolvedValueOnce([ + { + aweme_id: '2', + desc: 'plain video', + video: { duration: 2000, play_addr: { url_list: ['https://example.com/plain.mp4'] } }, + statistics: { digg_count: 1 }, + }, + ]); + + const page = { + goto: vi.fn().mockResolvedValue(undefined), + wait: vi.fn().mockResolvedValue(undefined), + }; + + const rows = await command.func(page as any, { + sec_uid: 'MS4w-test', + limit: 3, + with_comments: false, + comment_limit: 5, + }); + + expect(fetchDouyinCommentsMock).not.toHaveBeenCalled(); + expect(rows).toEqual([ { - aweme_list: [{ - aweme_id: '1', - desc: 'Video 1', - video: { duration: 2300, play_addr: { url_list: ['https://video.example/1.mp4'] } }, - statistics: { digg_count: 12 }, - }], + index: 1, + aweme_id: '2', + title: 'plain video', + duration: 2, + digg_count: 1, + play_url: 'https://example.com/plain.mp4', + top_comments: [], }, - [{ aweme_id: '1', desc: 'Video 1', video: { duration: 2300, play_addr: { url_list: ['https://video.example/1.mp4'] } }, statistics: { digg_count: 12 }, top_comments: [] }], - ); - - const rows = await cmd!.func!(page, { sec_uid: 'good', limit: 1 }); - - expect(page.evaluate).toHaveBeenNthCalledWith( - 1, - expect.stringContaining('count=1'), - ); - expect(rows).toEqual([{ - index: 1, - aweme_id: '1', - title: 'Video 1', - duration: 2, - digg_count: 12, - play_url: 'https://video.example/1.mp4', - top_comments: [], - }]); + ]); }); }); diff --git a/src/clis/douyin/user-videos.ts b/src/clis/douyin/user-videos.ts index 14047e2c..1402b0ff 100644 --- a/src/clis/douyin/user-videos.ts +++ b/src/clis/douyin/user-videos.ts @@ -1,7 +1,55 @@ import { cli, Strategy } from '../../registry.js'; -import { ArgumentError } from '../../errors.js'; import type { IPage } from '../../types.js'; -import { browserFetch } from './_shared/browser-fetch.js'; +import { fetchDouyinComments, fetchDouyinUserVideos, type DouyinVideo } from './_shared/public-api.js'; + +export const MAX_USER_VIDEOS_LIMIT = 20; +export const USER_VIDEO_COMMENT_CONCURRENCY = 4; +export const DEFAULT_COMMENT_LIMIT = 10; + +type EnrichedDouyinVideo = DouyinVideo & { + top_comments?: Array<{ + text: string; + digg_count: number; + nickname: string; + }>; +}; + +export function normalizeUserVideosLimit(limit: unknown): number { + const numeric = Number(limit); + if (!Number.isFinite(numeric)) return MAX_USER_VIDEOS_LIMIT; + return Math.min(MAX_USER_VIDEOS_LIMIT, Math.max(1, Math.round(numeric))); +} + +export function normalizeCommentLimit(limit: unknown): number { + const numeric = Number(limit); + if (!Number.isFinite(numeric)) return DEFAULT_COMMENT_LIMIT; + return Math.min(DEFAULT_COMMENT_LIMIT, Math.max(1, Math.round(numeric))); +} + +async function mapInBatches( + items: T[], + concurrency: number, + mapper: (item: T) => Promise, +): Promise { + const results: R[] = []; + for (let index = 0; index < items.length; index += concurrency) { + const chunk = items.slice(index, index + concurrency); + results.push(...(await Promise.all(chunk.map(mapper)))); + } + return results; +} + +async function fetchTopComments( + page: IPage, + awemeId: string, + count: number, +): Promise> { + try { + return await fetchDouyinComments(page, awemeId, count); + } catch { + return []; + } +} cli({ site: 'douyin', @@ -11,77 +59,42 @@ cli({ strategy: Strategy.COOKIE, args: [ { name: 'sec_uid', type: 'string', required: true, positional: true, help: '用户 sec_uid(URL 末尾部分)' }, - { name: 'limit', type: 'int', default: 20, help: '获取数量' }, + { name: 'limit', type: 'int', default: 20, help: '获取数量(最大 20)' }, + { name: 'with_comments', type: 'bool', default: true, help: '包含热门评论(默认: true)' }, + { name: 'comment_limit', type: 'int', default: 10, help: '每个视频获取多少条评论(最大 10)' }, ], columns: ['index', 'aweme_id', 'title', 'duration', 'digg_count', 'play_url', 'top_comments'], func: async (page: IPage, kwargs) => { - const limit = Number(kwargs.limit); - if (!Number.isInteger(limit) || limit <= 0) { - throw new ArgumentError('limit must be a positive integer'); - } + const secUid = kwargs.sec_uid as string; + const limit = normalizeUserVideosLimit(kwargs.limit); + const withComments = kwargs.with_comments !== false; + const commentLimit = normalizeCommentLimit(kwargs.comment_limit); - await page.goto(`https://www.douyin.com/user/${kwargs.sec_uid as string}`); + await page.goto(`https://www.douyin.com/user/${secUid}`); await page.wait(3); - const params = new URLSearchParams({ - sec_user_id: String(kwargs.sec_uid), - max_cursor: '0', - count: String(limit), - aid: '6383', - }); - const data = await browserFetch( - page, - 'GET', - `https://www.douyin.com/aweme/v1/web/aweme/post/?${params.toString()}`, - ) as { aweme_list?: Array> }; - const awemeList = (data.aweme_list || []).slice(0, limit); - - const result = await page.evaluate(` - (async () => { - const awemeList = ${JSON.stringify(awemeList)}; - - const withComments = await Promise.all(awemeList.map(async (v) => { - try { - const cp = new URLSearchParams({ - aweme_id: String(v.aweme_id), - count: '10', - cursor: '0', - aid: '6383', - }); - const cr = await fetch('/aweme/v1/web/comment/list/?' + cp.toString(), { - credentials: 'include', - headers: { referer: 'https://www.douyin.com/' }, - }); - const cd = await cr.json(); - const comments = (cd.comments || []).slice(0, 10).map((c) => ({ - text: c.text, - digg_count: c.digg_count, - nickname: c.user && c.user.nickname, - })); - return { ...v, top_comments: comments }; - } catch { - return { ...v, top_comments: [] }; - } - })); - - return withComments; - })() - `) as Array>; + const awemeList = (await fetchDouyinUserVideos(page, secUid, limit)).slice(0, limit); + const videos: EnrichedDouyinVideo[] = withComments + ? await mapInBatches( + awemeList, + USER_VIDEO_COMMENT_CONCURRENCY, + async (video) => ({ + ...video, + top_comments: await fetchTopComments(page, video.aweme_id, commentLimit), + }), + ) + : awemeList.map((video) => ({ ...video, top_comments: [] })); - return (result || []).map((v, i) => { - const video = v.video as Record | undefined; - const playAddr = video?.play_addr as Record | undefined; - const urlList = playAddr?.url_list as string[] | undefined; - const playUrl = urlList?.[0] ?? ''; - const statistics = v.statistics as Record | undefined; + return videos.map((video, index) => { + const playUrl = video.video?.play_addr?.url_list?.[0] ?? ''; return { - index: i + 1, - aweme_id: v.aweme_id as string, - title: v.desc as string, - duration: Math.round(((video?.duration as number) ?? 0) / 1000), - digg_count: (statistics?.digg_count as number) ?? 0, + index: index + 1, + aweme_id: video.aweme_id, + title: video.desc ?? '', + duration: Math.round((video.video?.duration ?? 0) / 1000), + digg_count: video.statistics?.digg_count ?? 0, play_url: playUrl, - top_comments: v.top_comments as unknown[], + top_comments: video.top_comments ?? [], }; }); },