diff --git a/README.md b/README.md index 2355dcef..7722dbee 100644 --- a/README.md +++ b/README.md @@ -119,6 +119,7 @@ git clone git@github.com:jackwener/opencli.git && cd opencli && npm install && n |------|----------| | **xiaohongshu** | `search` `feed` `user` `download` `publish` `comments` `notifications` `creator-notes` `creator-notes-summary` `creator-note-detail` `creator-profile` `creator-stats` | | **bilibili** | `hot` `search` `history` `feed` `ranking` `download` `comments` `dynamic` `favorite` `following` `me` `subtitle` `user-videos` | +| **tieba** | `hot` `posts` `search` `read` | | **twitter** | `trending` `search` `timeline` `bookmarks` `post` `download` `profile` `article` `like` `likes` `notifications` `reply` `reply-dm` `thread` `follow` `unfollow` `followers` `following` `block` `unblock` `bookmark` `unbookmark` `delete` `hide-reply` `accept` | | **reddit** | `hot` `frontpage` `popular` `search` `subreddit` `user` `user-posts` `user-comments` `read` `save` `saved` `subscribe` `upvote` `upvoted` `comment` | diff --git a/README.zh-CN.md b/README.zh-CN.md index 624760a7..61511c6e 100644 --- a/README.zh-CN.md +++ b/README.zh-CN.md @@ -122,6 +122,7 @@ npm install -g @jackwener/opencli@latest |------|------|------| | **twitter** | `trending` `bookmarks` `profile` `search` `timeline` `thread` `following` `followers` `notifications` `post` `reply` `delete` `like` `article` `follow` `unfollow` `bookmark` `unbookmark` `download` `accept` `reply-dm` `block` `unblock` `hide-reply` | 浏览器 | | **reddit** | `hot` `frontpage` `popular` `search` `subreddit` `read` `user` `user-posts` `user-comments` `upvote` `save` `comment` `subscribe` `saved` `upvoted` | 浏览器 | +| **tieba** | `hot` `posts` `search` `read` | 浏览器 | | **cursor** | `status` `send` `read` `new` `dump` `composer` `model` `extract-code` `ask` `screenshot` `history` `export` | 桌面端 | | **bilibili** | `hot` `search` `me` `favorite` `history` `feed` `subtitle` `dynamic` `ranking` `following` `user-videos` `download` | 浏览器 | | **codex** | `status` `send` `read` `new` `dump` `extract-diff` `model` `ask` `screenshot` `history` `export` | 桌面端 | diff --git a/docs/.vitepress/config.mts b/docs/.vitepress/config.mts index 5b4a58cd..cf3b76f9 100644 --- a/docs/.vitepress/config.mts +++ b/docs/.vitepress/config.mts @@ -50,6 +50,7 @@ export default defineConfig({ items: [ { text: 'Twitter / X', link: '/adapters/browser/twitter' }, { text: 'Reddit', link: '/adapters/browser/reddit' }, + { text: 'Tieba', link: '/adapters/browser/tieba' }, { text: 'Bilibili', link: '/adapters/browser/bilibili' }, { text: 'Zhihu', link: '/adapters/browser/zhihu' }, { text: 'Xiaohongshu', link: '/adapters/browser/xiaohongshu' }, diff --git a/docs/adapters/browser/tieba.md b/docs/adapters/browser/tieba.md new file mode 100644 index 00000000..fd6a784e --- /dev/null +++ b/docs/adapters/browser/tieba.md @@ -0,0 +1,45 @@ +# Tieba + +**Mode**: 🔐 Browser · **Domain**: `tieba.baidu.com` + +## Commands + +| Command | Description | +|---------|-------------| +| `opencli tieba hot` | Read Tieba trending topics | +| `opencli tieba posts ` | List threads in one forum | +| `opencli tieba search ` | Search threads across Tieba | +| `opencli tieba read ` | Read one thread page | + +## Usage Examples + +```bash +# Trending topics +opencli tieba hot --limit 5 + +# List forum threads +opencli tieba posts 李毅 --limit 10 + +# Search Tieba +opencli tieba search 编程 --limit 10 + +# Read one thread +opencli tieba read 10163164720 --limit 10 + +# Read page 2 of a thread +opencli tieba read 10163164720 --page 2 --limit 10 + +# JSON output +opencli tieba hot -f json +``` + +## Notes + +- `tieba search` currently supports only `--page 1` +- `tieba read --limit` counts reply rows; page 1 may also include the main post + +## Prerequisites + +- Chrome running and able to open `tieba.baidu.com` +- [Browser Bridge extension](/guide/browser-bridge) installed +- For `posts`, `search`, and `read`, a valid Tieba login session in Chrome is recommended diff --git a/docs/adapters/index.md b/docs/adapters/index.md index 7a82f177..23563877 100644 --- a/docs/adapters/index.md +++ b/docs/adapters/index.md @@ -8,6 +8,7 @@ Run `opencli list` for the live registry. |------|----------|------| | **[twitter](/adapters/browser/twitter)** | `trending` `bookmarks` `profile` `search` `timeline` `thread` `following` `followers` `notifications` `post` `reply` `delete` `like` `article` `follow` `unfollow` `bookmark` `unbookmark` `download` `accept` `reply-dm` `block` `unblock` `hide-reply` | 🔐 Browser | | **[reddit](/adapters/browser/reddit)** | `hot` `frontpage` `popular` `search` `subreddit` `read` `user` `user-posts` `user-comments` `upvote` `save` `comment` `subscribe` `saved` `upvoted` | 🔐 Browser | +| **[tieba](/adapters/browser/tieba)** | `hot` `posts` `search` `read` | 🔐 Browser | | **[bilibili](/adapters/browser/bilibili)** | `hot` `search` `me` `favorite` `history` `feed` `subtitle` `dynamic` `ranking` `following` `user-videos` `download` | 🔐 Browser | | **[zhihu](/adapters/browser/zhihu)** | `hot` `search` `question` `download` | 🔐 Browser | | **[xiaohongshu](/adapters/browser/xiaohongshu)** | `search` `notifications` `feed` `user` `download` `publish` `creator-notes` `creator-note-detail` `creator-notes-summary` `creator-profile` `creator-stats` | 🔐 Browser | diff --git a/src/clis/tieba/commands.test.ts b/src/clis/tieba/commands.test.ts new file mode 100644 index 00000000..0f7626f2 --- /dev/null +++ b/src/clis/tieba/commands.test.ts @@ -0,0 +1,86 @@ +import { describe, expect, it } from 'vitest'; +import { Strategy, getRegistry } from '../../registry.js'; +import './hot.js'; +import './posts.js'; +import './read.js'; +import './search.js'; + +describe('tieba commands', () => { + it('registers all tieba commands as TypeScript adapters', () => { + const hot = getRegistry().get('tieba/hot'); + const posts = getRegistry().get('tieba/posts'); + const search = getRegistry().get('tieba/search'); + const read = getRegistry().get('tieba/read'); + + expect(hot).toBeDefined(); + expect(posts).toBeDefined(); + expect(search).toBeDefined(); + expect(read).toBeDefined(); + expect(typeof hot?.func).toBe('function'); + expect(typeof posts?.func).toBe('function'); + expect(typeof search?.func).toBe('function'); + expect(typeof read?.func).toBe('function'); + }); + + it('keeps the intended browser strategies', () => { + const hot = getRegistry().get('tieba/hot'); + const posts = getRegistry().get('tieba/posts'); + const search = getRegistry().get('tieba/search'); + const read = getRegistry().get('tieba/read'); + + expect(hot?.strategy).toBe(Strategy.PUBLIC); + expect(posts?.strategy).toBe(Strategy.COOKIE); + expect(search?.strategy).toBe(Strategy.COOKIE); + expect(read?.strategy).toBe(Strategy.COOKIE); + expect(hot?.browser).toBe(true); + expect(posts?.browser).toBe(true); + expect(search?.browser).toBe(true); + expect(read?.browser).toBe(true); + }); + + it('keeps the public limit contract at 20 items for list commands', () => { + const hot = getRegistry().get('tieba/hot'); + const posts = getRegistry().get('tieba/posts'); + const search = getRegistry().get('tieba/search'); + + expect(hot?.args.find((arg) => arg.name === 'limit')?.default).toBe(20); + expect(posts?.args.find((arg) => arg.name === 'limit')?.default).toBe(20); + expect(search?.args.find((arg) => arg.name === 'limit')?.default).toBe(20); + }); + + it('rejects tieba read results when navigation lands on the wrong page number', async () => { + const read = getRegistry().get('tieba/read'); + expect(read).toBeDefined(); + expect(typeof read?.func).toBe('function'); + const run = read?.func; + if (!run) throw new Error('tieba/read did not register a handler'); + const page = { + goto: async () => undefined, + evaluate: async () => ({ + pageMeta: { + pathname: '/p/10163164720', + pn: '1', + }, + mainPost: { + title: '测试帖子', + author: '作者', + contentText: '正文', + structuredText: '', + visibleTime: '2026-03-29 12:00', + structuredTime: 0, + hasMedia: false, + }, + replies: [], + }), + }; + + await expect(run(page as never, { + id: '10163164720', + page: 2, + limit: 5, + })).rejects.toMatchObject({ + code: 'EMPTY_RESULT', + hint: expect.stringMatching(/requested page/i), + }); + }); +}); diff --git a/src/clis/tieba/hot.ts b/src/clis/tieba/hot.ts new file mode 100644 index 00000000..6c44266e --- /dev/null +++ b/src/clis/tieba/hot.ts @@ -0,0 +1,52 @@ +import { EmptyResultError } from '../../errors.js'; +import { cli, Strategy } from '../../registry.js'; +import { normalizeTiebaLimit } from './utils.js'; + +cli({ + site: 'tieba', + name: 'hot', + description: 'Tieba hot topics', + domain: 'tieba.baidu.com', + strategy: Strategy.PUBLIC, + browser: true, + navigateBefore: false, + args: [ + { name: 'limit', type: 'int', default: 20, help: 'Number of items to return' }, + ], + columns: ['rank', 'title', 'discussions', 'description'], + func: async (page, kwargs) => { + const limit = normalizeTiebaLimit(kwargs.limit); + // Use the default browser settle path so we do not scrape the previous page. + await page.goto('https://tieba.baidu.com/hottopic/browse/topicList?res_type=1'); + + const raw = await page.evaluate(`(() => { + const items = document.querySelectorAll('li.topic-top-item'); + return Array.from(items).map((item) => { + const titleEl = item.querySelector('a.topic-text'); + const numEl = item.querySelector('span.topic-num'); + const descEl = item.querySelector('p.topic-top-item-desc'); + const href = titleEl?.getAttribute('href') || ''; + + return { + title: titleEl?.textContent?.trim() || '', + discussions: numEl?.textContent?.trim() || '', + description: descEl?.textContent?.trim() || '', + url: href.startsWith('http') ? href : 'https://tieba.baidu.com' + href, + }; + }).filter((item) => item.title).slice(0, ${limit}); + })()`); + + const items = Array.isArray(raw) ? raw as Array> : []; + if (!items.length) { + throw new EmptyResultError('tieba hot', 'Tieba may have blocked the hot page, or the DOM structure may have changed'); + } + + return items.map((item, index) => ({ + rank: index + 1, + title: item.title || '', + discussions: item.discussions || '', + description: item.description || '', + url: item.url || '', + })); + }, +}); diff --git a/src/clis/tieba/posts.ts b/src/clis/tieba/posts.ts new file mode 100644 index 00000000..9679d5f6 --- /dev/null +++ b/src/clis/tieba/posts.ts @@ -0,0 +1,108 @@ +import { EmptyResultError } from '../../errors.js'; +import { cli, Strategy, type CommandArgs } from '../../registry.js'; +import type { IPage } from '../../types.js'; +import { + buildTiebaPostCardsFromPagePc, + buildTiebaPostItems, + normalizeTiebaLimit, + signTiebaPcParams, + type RawTiebaPagePcFeedEntry, +} from './utils.js'; + +interface TiebaPagePcResponse { + error_code?: number; + page_data?: { + feed_list?: RawTiebaPagePcFeedEntry[]; + }; +} + +function getForumPageNumber(kwargs: CommandArgs): number { + return Math.max(1, Number(kwargs.page || 1)); +} + +function getForumUrl(kwargs: CommandArgs): string { + const forum = String(kwargs.forum || ''); + return `https://tieba.baidu.com/f?kw=${encodeURIComponent(forum)}&ie=utf-8&pn=${(getForumPageNumber(kwargs) - 1) * 50}`; +} + +/** + * Rebuild the signed page_pc request instead of scraping only the visible thread cards. + */ +function buildTiebaPagePcParams(kwargs: CommandArgs, limit: number): Record { + return { + kw: encodeURIComponent(String(kwargs.forum || '')), + pn: String(getForumPageNumber(kwargs)), + sort_type: '-1', + is_newfrs: '1', + is_newfeed: '1', + rn: '30', + rn_need: String(Math.min(Math.max(limit + 10, 10), 30)), + tbs: '', + subapp_type: 'pc', + _client_type: '20', + }; +} + +/** + * Tieba expects the signed forum-list request to be replayed with the browser's cookies. + */ +async function fetchTiebaPagePc(page: IPage, kwargs: CommandArgs, limit: number): Promise { + await page.goto(getForumUrl(kwargs), { waitUntil: 'none' }); + await page.wait(2); + + const params = buildTiebaPagePcParams(kwargs, limit); + const cookies = await page.getCookies({ domain: 'tieba.baidu.com' }); + const cookieHeader = cookies.map((item) => `${item.name}=${item.value}`).join('; '); + const body = new URLSearchParams({ + ...params, + sign: signTiebaPcParams(params), + }).toString(); + + const response = await fetch('https://tieba.baidu.com/c/f/frs/page_pc', { + method: 'POST', + headers: { + 'content-type': 'application/x-www-form-urlencoded;charset=UTF-8', + cookie: cookieHeader, + 'x-requested-with': 'XMLHttpRequest', + referer: getForumUrl(kwargs), + 'user-agent': 'Mozilla/5.0', + }, + body, + }); + + const text = await response.text(); + try { + return JSON.parse(text) as TiebaPagePcResponse; + } catch { + return {}; + } +} + +cli({ + site: 'tieba', + name: 'posts', + description: 'Browse posts in a tieba forum', + domain: 'tieba.baidu.com', + strategy: Strategy.COOKIE, + browser: true, + navigateBefore: false, + args: [ + { name: 'forum', positional: true, required: true, type: 'string', help: 'Forum name in Chinese' }, + { name: 'page', type: 'int', default: 1, help: 'Page number' }, + { name: 'limit', type: 'int', default: 20, help: 'Number of items to return' }, + ], + columns: ['rank', 'title', 'author', 'replies'], + func: async (page, kwargs) => { + const limit = normalizeTiebaLimit(kwargs.limit); + const payload = await fetchTiebaPagePc(page, kwargs, limit); + const rawFeeds = Array.isArray(payload.page_data?.feed_list) ? payload.page_data.feed_list : []; + const rawCards = buildTiebaPostCardsFromPagePc(rawFeeds); + const items = buildTiebaPostItems(rawCards, limit); + + if (!items.length || payload.error_code) { + throw new EmptyResultError('tieba posts', 'Tieba may have blocked the forum page, or the DOM structure may have changed'); + } + + return items; + }, +}); diff --git a/src/clis/tieba/read.ts b/src/clis/tieba/read.ts new file mode 100644 index 00000000..35bae997 --- /dev/null +++ b/src/clis/tieba/read.ts @@ -0,0 +1,158 @@ +import { EmptyResultError } from '../../errors.js'; +import { cli, Strategy, type CommandArgs } from '../../registry.js'; +import { buildTiebaReadItems, type RawTiebaReadPayload } from './utils.js'; + +type TiebaReadPageMeta = { + pathname?: string; + pn?: string; +}; + +type RawTiebaReadPagePayload = RawTiebaReadPayload & { + pageMeta?: TiebaReadPageMeta; +}; + +function getThreadUrl(kwargs: CommandArgs): string { + const threadId = String(kwargs.id || ''); + const pageNumber = Math.max(1, Number(kwargs.page || 1)); + return `https://tieba.baidu.com/p/${encodeURIComponent(threadId)}?pn=${pageNumber}`; +} + +/** + * Ensure the browser actually landed on the requested thread page before we trust the DOM. + */ +function assertTiebaReadTargetPage(raw: RawTiebaReadPagePayload, kwargs: CommandArgs): void { + const expectedThreadId = String(kwargs.id || '').trim(); + const expectedPageNumber = Math.max(1, Number(kwargs.page || 1)); + const pathname = String(raw.pageMeta?.pathname || '').trim(); + const actualThreadId = pathname.match(/^\/p\/(\d+)/)?.[1] || ''; + const actualPn = String(raw.pageMeta?.pn || '').trim(); + + if (!actualThreadId || actualThreadId !== expectedThreadId) { + throw new EmptyResultError('tieba read', 'Tieba did not land on the requested thread page'); + } + + if (expectedPageNumber > 1 && actualPn !== String(expectedPageNumber)) { + throw new EmptyResultError('tieba read', 'Tieba did not land on the requested page'); + } +} + +function buildExtractReadEvaluate(): string { + return ` + (async () => { + const wait = (ms) => new Promise((resolve) => setTimeout(resolve, ms)); + const waitFor = async (predicate, timeoutMs = 4000) => { + const start = Date.now(); + while (Date.now() - start < timeoutMs) { + if (predicate()) return true; + await wait(100); + } + return false; + }; + const normalizeText = (value) => (value || '').replace(/\\s+/g, ' ').trim(); + const getVueProps = (element) => { + const vue = element && element.__vue__ ? element.__vue__ : null; + return vue ? (vue._props || vue.$props || {}) : {}; + }; + const extractStructuredText = (content) => { + if (!Array.isArray(content)) return ''; + return content + .map((part) => (part && typeof part === 'object' && typeof part.text === 'string') ? part.text : '') + .join('') + .replace(/\\s+/g, ' ') + .trim(); + }; + const parseFloor = (text) => { + const match = (text || '').match(/第(\\d+)楼/); + return match ? parseInt(match[1], 10) : 0; + }; + + await waitFor(() => { + const hasMainTree = document.querySelector('.pb-title-wrap.pc-pb-title') || document.querySelector('.pb-content-wrap'); + return Boolean(hasMainTree || document.querySelector('.pb-comment-item')); + }); + + const titleNode = document.querySelector('.pb-title-wrap.pc-pb-title'); + const titleProps = getVueProps(titleNode); + const mainUser = document.querySelector('.head-line.user-info:not(.no-extra-margin)'); + const mainUserProps = getVueProps(mainUser); + const contentWrap = document.querySelector('.pb-content-wrap'); + const contentProps = getVueProps(contentWrap); + const structuredContent = Array.isArray(contentProps.content) ? contentProps.content : []; + const visibleContent = normalizeText( + contentWrap?.querySelector('.pb-content-item .text')?.textContent + || contentWrap?.querySelector('.text')?.textContent + || contentWrap?.textContent + ); + + return { + pageMeta: { + pathname: window.location.pathname || '', + pn: new URLSearchParams(window.location.search).get('pn') || '', + }, + mainPost: { + title: typeof titleProps.title === 'string' && titleProps.title.trim() + ? titleProps.title.trim() + : normalizeText(titleNode?.textContent).replace(/-百度贴吧$/, '').trim(), + author: normalizeText( + mainUser?.querySelector('.head-name')?.textContent + || mainUser?.querySelector('.name-info .head-name')?.textContent + || '' + ), + fallbackAuthor: mainUserProps?.userShowInfo?.[0]?.text?.text || '', + contentText: visibleContent, + structuredText: extractStructuredText(structuredContent), + visibleTime: (() => { + const userText = normalizeText(mainUser?.textContent); + const match = userText.match(/(刚刚|昨天|前天|\\d+\\s*(?:分钟|小时|天)前|\\d{2}-\\d{2}(?:\\s+\\d{2}:\\d{2})?|\\d{4}-\\d{2}-\\d{2}(?:\\s+\\d{2}:\\d{2})?)/); + return match ? match[1].trim() : ''; + })(), + structuredTime: mainUserProps?.descInfo?.time || 0, + hasMedia: structuredContent.length > 0 && !extractStructuredText(structuredContent), + }, + replies: Array.from(document.querySelectorAll('.pb-comment-item')).map((item) => { + const meta = item.querySelector('.comment-desc-left')?.textContent?.replace(/\\s+/g, ' ').trim() || ''; + return { + floor: parseFloor(meta), + author: item.querySelector('.head-name')?.textContent?.trim() || '', + content: item.querySelector('.comment-content .pb-content-item .text')?.textContent?.replace(/\\s+/g, ' ').trim() || '', + time: meta, + }; + }), + }; + })() + `; +} + +cli({ + site: 'tieba', + name: 'read', + description: 'Read a tieba thread', + domain: 'tieba.baidu.com', + strategy: Strategy.COOKIE, + browser: true, + navigateBefore: false, + args: [ + { name: 'id', positional: true, required: true, type: 'string', help: 'Thread ID' }, + { name: 'page', type: 'int', default: 1, help: 'Page number' }, + { name: 'limit', type: 'int', default: 30, help: 'Number of replies to return' }, + ], + columns: ['floor', 'author', 'content', 'time'], + func: async (page, kwargs) => { + const pageNumber = Math.max(1, Number(kwargs.page || 1)); + // Use the browser's normal settle path so we do not scrape stale DOM from the previous tab state. + await page.goto(getThreadUrl(kwargs)); + + const raw = (await page.evaluate(buildExtractReadEvaluate()) || {}) as RawTiebaReadPagePayload; + assertTiebaReadTargetPage(raw, kwargs); + + const items = buildTiebaReadItems(raw, { + limit: kwargs.limit, + includeMainPost: pageNumber === 1, + }); + if (!items.length) { + throw new EmptyResultError('tieba read', 'Tieba may have blocked the thread page, or the DOM structure may have changed'); + } + + return items; + }, +}); diff --git a/src/clis/tieba/search.ts b/src/clis/tieba/search.ts new file mode 100644 index 00000000..9d4e4dcb --- /dev/null +++ b/src/clis/tieba/search.ts @@ -0,0 +1,119 @@ +import { ArgumentError, EmptyResultError } from '../../errors.js'; +import { cli, Strategy, type CommandArgs } from '../../registry.js'; +import { buildTiebaSearchItems, type RawTiebaSearchItem, normalizeTiebaLimit } from './utils.js'; + +const MAX_SUPPORTED_PAGE = '1'; + +/** + * Extract search result cards from tieba's current desktop search page. + */ +function buildExtractSearchResultsEvaluate(limit: number): string { + return ` + (async () => { + const wait = (ms) => new Promise((resolve) => setTimeout(resolve, ms)); + const waitFor = async (predicate, timeoutMs = 5000) => { + const start = Date.now(); + while (Date.now() - start < timeoutMs) { + if (predicate()) return true; + await wait(100); + } + return false; + }; + const getVueProps = (element) => { + const vue = element && element.__vue__ ? element.__vue__ : null; + return vue ? (vue._props || vue.$props || {}) : {}; + }; + await waitFor(() => { + const bodyText = document.body?.innerText || ''; + return Boolean( + document.querySelector('.threadcardclass.thread-new3.index-feed-cards') + || document.querySelector('.search-no-result, .search-nodata, .no-result') + || /百度安全验证|安全验证|请完成验证/.test(bodyText) + ); + }); + const items = document.querySelectorAll('.threadcardclass.thread-new3.index-feed-cards'); + return Array.from(items).slice(0, ${limit}).map((item) => { + const forum = item.querySelector('.forum-name-text, .forum-name')?.textContent?.trim() || ''; + const meta = item.querySelector('.user-forum-info')?.textContent?.replace(/\\s+/g, ' ').trim() || ''; + const metaWithoutForum = forum && meta.startsWith(forum) + ? meta.slice(forum.length).trim() + : meta; + const metaMatch = metaWithoutForum.match(/^(.*?)\\s*发布于\\s*(.+)$/); + const actionBar = item.querySelector('.action-bar-container.search-action-bar'); + const businessInfo = getVueProps(actionBar).businessInfo || {}; + const href = item.querySelector('a[href*="/p/"]')?.href || ''; + const threadId = String(businessInfo.thread_id || '').trim(); + const title = item.querySelector('.title-wrap')?.textContent?.trim() + || item.querySelector('.title-content-wrap')?.textContent?.trim() + || ''; + const snippet = item.querySelector('.title-content-wrap')?.textContent?.trim() + || item.querySelector('.abstract-wrap')?.textContent?.trim() + || ''; + + return { + title, + forum, + author: metaMatch ? metaMatch[1].trim() : metaWithoutForum, + time: metaMatch ? metaMatch[2].trim() : '', + snippet: snippet.substring(0, 200), + id: threadId, + url: href || (threadId ? 'https://tieba.baidu.com/p/' + threadId : ''), + }; + }).filter((item) => item.title); + })() + `; +} + +/** + * Normalize CLI args into the concrete search page URL. + */ +function getSearchUrl(kwargs: CommandArgs): string { + const keyword = String(kwargs.keyword || ''); + const pageNumber = Number(kwargs.page || 1); + return `https://tieba.baidu.com/f/search/res?qw=${encodeURIComponent(keyword)}&ie=utf-8&pn=${pageNumber}`; +} + +/** + * Tieba's current desktop search UI no longer exposes a reliable browser-page transition. + */ +function assertSupportedPage(kwargs: CommandArgs): void { + const pageNumber = String(kwargs.page || 1); + if (pageNumber === MAX_SUPPORTED_PAGE) return; + + throw new ArgumentError( + `tieba search currently only supports --page ${MAX_SUPPORTED_PAGE}`, + `Baidu Tieba search no longer exposes stable browser pagination; omit --page or use --page ${MAX_SUPPORTED_PAGE}`, + ); +} + +cli({ + site: 'tieba', + name: 'search', + description: 'Search posts across tieba', + domain: 'tieba.baidu.com', + strategy: Strategy.COOKIE, + browser: true, + navigateBefore: false, + args: [ + { name: 'keyword', positional: true, required: true, type: 'string', help: 'Search keyword' }, + // Restrict unsupported pages before the browser session starts. + { name: 'page', type: 'int', default: 1, choices: ['1'], help: 'Page number (currently only 1 is supported)' }, + { name: 'limit', type: 'int', default: 20, help: 'Number of items to return' }, + ], + columns: ['rank', 'title', 'forum', 'author', 'time'], + func: async (page, kwargs) => { + assertSupportedPage(kwargs); + + const limit = normalizeTiebaLimit(kwargs.limit); + // Use the default browser settle path so we do not read a stale page. + await page.goto(getSearchUrl(kwargs)); + + const raw = await page.evaluate(buildExtractSearchResultsEvaluate(limit)); + const items = buildTiebaSearchItems(Array.isArray(raw) ? raw as RawTiebaSearchItem[] : [], limit); + if (!items.length) { + throw new EmptyResultError('tieba search', 'Tieba may have blocked the result page, or the DOM structure may have changed'); + } + + return items; + }, +}); diff --git a/src/clis/tieba/utils.test.ts b/src/clis/tieba/utils.test.ts new file mode 100644 index 00000000..a9257149 --- /dev/null +++ b/src/clis/tieba/utils.test.ts @@ -0,0 +1,322 @@ +import { describe, expect, it } from 'vitest'; +import { + MAX_TIEBA_LIMIT, + buildTiebaPostCardsFromPagePc, + buildTiebaPostItems, + buildTiebaSearchItems, + buildTiebaReadItems, + normalizeTiebaLimit, + signTiebaPcParams, +} from './utils.js'; + +describe('normalizeTiebaLimit', () => { + it('caps list commands at the declared tieba maximum', () => { + expect(MAX_TIEBA_LIMIT).toBe(20); + expect(normalizeTiebaLimit(undefined)).toBe(20); + expect(normalizeTiebaLimit(25)).toBe(20); + expect(normalizeTiebaLimit(7)).toBe(7); + }); +}); + +describe('signTiebaPcParams', () => { + it('matches Tieba PC forum-list signing for stable page_pc requests', () => { + expect(signTiebaPcParams({ + kw: encodeURIComponent('李毅'), + pn: '1', + sort_type: '-1', + is_newfrs: '1', + is_newfeed: '1', + rn: '30', + rn_need: '20', + tbs: '', + subapp_type: 'pc', + _client_type: '20', + })).toBe('466f2e091dd4ed17c6661a842b5ec342'); + }); +}); + +describe('buildTiebaPostCardsFromPagePc', () => { + it('extracts thread cards from signed page_pc feed payloads', () => { + const cards = buildTiebaPostCardsFromPagePc([ + { + layout: 'feed', + feed: { + schema: 'tiebaapp://router/portal?params=%7B%22pageParams%22%3A%7B%22tid%22%3A10596901456%7D%7D', + log_param: [ + { key: 'tid', value: '10596901456' }, + ], + business_info_map: { + thread_id: '10596901456', + title: '崇拜希特勒的人都是日本的汉奸走狗', + }, + components: [ + { + component: 'feed_head', + feed_head: { + extra_data: [ + { + business_info_map: { time_prefix: '回复于' }, + text: { text: '1774343231' }, + }, + ], + main_data: [ + { + text: { text: '上帝的子民º♬' }, + }, + ], + }, + }, + { + component: 'feed_title', + feed_title: { + data: [{ text_info: { text: '崇拜希特勒的人都是日本的汉奸走狗' } }], + }, + }, + { + component: 'feed_social', + feed_social: { + comment_num: 12, + }, + }, + ], + }, + }, + ]); + + expect(cards).toEqual([ + { + title: '崇拜希特勒的人都是日本的汉奸走狗', + author: '上帝的子民º♬', + descInfo: '回复于2026-03-24 17:07', + commentCount: 12, + actionTexts: [], + threadId: '10596901456', + url: 'https://tieba.baidu.com/p/10596901456', + }, + ]); + }); +}); + +describe('buildTiebaPostItems', () => { + it('builds stable thread ids and urls from card props without page hops', () => { + const items = buildTiebaPostItems([ + { + title: '我来说个事', + author: '暴躁的小伙子', + descInfo: '回复于2分钟前', + actionTexts: ['分享', '评论 5', '点赞 2'], + threadId: '10590564788', + }, + ], 5); + + expect(items).toEqual([ + { + rank: 1, + title: '我来说个事', + author: '暴躁的小伙子', + replies: 5, + last_reply: '2分钟前', + id: '10590564788', + url: 'https://tieba.baidu.com/p/10590564788', + }, + ]); + }); + + it('honors the public 20-item limit contract', () => { + const raw = Array.from({ length: 25 }, (_, index) => ({ + title: `帖子 ${index + 1}`, + author: `作者 ${index + 1}`, + descInfo: '回复于刚刚', + actionTexts: ['分享', `评论 ${index + 1}`], + threadId: String(1000 + index), + })); + + const items = buildTiebaPostItems(raw, 25); + expect(items).toHaveLength(20); + expect(items[19]).toMatchObject({ + rank: 20, + id: '1019', + url: 'https://tieba.baidu.com/p/1019', + }); + }); + + it('parses Chinese count units and keeps date-time last-reply text intact', () => { + const items = buildTiebaPostItems([ + { + title: '复杂格式帖子', + author: '作者', + descInfo: '回复于03-29 11:35', + actionTexts: ['分享', '评论 1.2万'], + url: 'https://tieba.baidu.com/p/123456', + }, + ], 5); + + expect(items[0]).toMatchObject({ + replies: 12000, + last_reply: '03-29 11:35', + id: '123456', + url: 'https://tieba.baidu.com/p/123456', + }); + }); +}); + +describe('buildTiebaSearchItems', () => { + it('keeps up to 20 search results when the page provides more than 10 cards', () => { + const raw = Array.from({ length: 25 }, (_, index) => ({ + title: `结果 ${index + 1}`, + forum: '编程吧', + author: `作者 ${index + 1}`, + time: '2026-03-29', + snippet: `摘要 ${index + 1}`, + id: String(2000 + index), + url: `https://tieba.baidu.com/p/${2000 + index}`, + })); + + const items = buildTiebaSearchItems(raw, 25); + expect(items).toHaveLength(20); + expect(items[19]).toMatchObject({ + rank: 20, + id: '2019', + url: 'https://tieba.baidu.com/p/2019', + }); + }); + + it('fills missing search ids from stable thread urls', () => { + const items = buildTiebaSearchItems([ + { + title: '搜索结果', + forum: '编程吧', + author: '作者', + time: '2026-03-29 11:35', + snippet: '摘要', + id: '', + url: 'https://tieba.baidu.com/p/654321', + }, + ], 5); + + expect(items[0]).toMatchObject({ + id: '654321', + url: 'https://tieba.baidu.com/p/654321', + }); + }); +}); + +describe('buildTiebaReadItems', () => { + it('prefers visible main-post fields and still keeps floor 1 for media-only threads', () => { + const items = buildTiebaReadItems({ + mainPost: { + title: '刚开始读博士的人据说都这样', + author: '湖水之岸', + contentText: '', + structuredText: '', + visibleTime: '03-24', + structuredTime: 1774343231, + hasMedia: true, + }, + replies: [], + }, { limit: 5, includeMainPost: true }); + + expect(items).toEqual([ + { + floor: 1, + author: '湖水之岸', + content: '刚开始读博士的人据说都这样 [media]', + time: '03-24', + }, + ]); + }); + + it('falls back to structured main-post data when visible text is missing', () => { + const items = buildTiebaReadItems({ + mainPost: { + title: '标题', + author: '', + fallbackAuthor: '结构化作者', + contentText: '', + structuredText: '结构化正文', + visibleTime: '', + structuredTime: 1774343231, + hasMedia: false, + }, + replies: [ + { floor: 2, author: '回复者', content: '二楼内容', time: '第2楼 2026-03-25 12:34 广东' }, + ], + }, { limit: 5, includeMainPost: true }); + + expect(items[0]).toMatchObject({ + floor: 1, + author: '结构化作者', + content: '标题 结构化正文', + time: '2026-03-24 17:07', + }); + expect(items[1]).toMatchObject({ + floor: 2, + author: '回复者', + content: '二楼内容', + time: '2026-03-25 12:34', + }); + }); + + it('strips trailing location metadata from reply times', () => { + const items = buildTiebaReadItems({ + mainPost: { + title: '主楼', + author: '楼主', + contentText: '正文', + visibleTime: '03-24', + }, + replies: [ + { floor: 2, author: '二楼', content: '二楼内容', time: '第2楼 3小时前 福建' }, + { floor: 3, author: '三楼', content: '三楼内容', time: '第3楼 刚刚 江苏' }, + ], + }, { limit: 5, includeMainPost: false }); + + expect(items).toEqual([ + { + floor: 2, + author: '二楼', + content: '二楼内容', + time: '3小时前', + }, + { + floor: 3, + author: '三楼', + content: '三楼内容', + time: '刚刚', + }, + ]); + }); + + it('counts limit as replies and skips main post on later pages', () => { + const items = buildTiebaReadItems({ + mainPost: { + title: '主楼', + author: '楼主', + contentText: '正文', + visibleTime: '03-24', + }, + replies: [ + { floor: 2, author: '二楼', content: '二楼内容', time: '第2楼 03-25' }, + { floor: 3, author: '三楼', content: '三楼内容', time: '第3楼 03-26' }, + { floor: 4, author: '四楼', content: '四楼内容', time: '第4楼 03-27' }, + ], + }, { limit: 2, includeMainPost: true }); + + expect(items).toHaveLength(3); + expect(items.map((item) => item.floor)).toEqual([1, 2, 3]); + + const page2 = buildTiebaReadItems({ + mainPost: { + title: '主楼', + author: '楼主', + contentText: '正文', + visibleTime: '03-24', + }, + replies: [ + { floor: 26, author: '二十六楼', content: '二十六楼内容', time: '第26楼 03-29' }, + ], + }, { limit: 2, includeMainPost: false }); + + expect(page2.map((item) => item.floor)).toEqual([26]); + }); +}); diff --git a/src/clis/tieba/utils.ts b/src/clis/tieba/utils.ts new file mode 100644 index 00000000..182a524c --- /dev/null +++ b/src/clis/tieba/utils.ts @@ -0,0 +1,348 @@ +import { createHash } from 'node:crypto'; + +/** + * Shared Tieba parsing helpers used by the browser adapters. + */ + +export const MAX_TIEBA_LIMIT = 20; +const TIEBA_PC_SIGN_SALT = '36770b1f34c9bbf2e7d1a99d2b82fa9e'; +const TIEBA_TIME_ZONE = 'Asia/Shanghai'; + +export interface RawTiebaPostCard { + title?: string; + author?: string; + descInfo?: string; + actionTexts?: string[]; + commentCount?: unknown; + threadId?: unknown; + url?: unknown; +} + +export interface RawTiebaPagePcFeedEntry { + layout?: string; + feed?: { + schema?: unknown; + log_param?: Array<{ key?: unknown; value?: unknown }>; + business_info_map?: Record; + components?: Array>; + }; +} + +export interface TiebaPostItem { + rank: number; + title: string; + author: string; + replies: number; + last_reply: string; + id: string; + url: string; +} + +export interface RawTiebaSearchItem { + title?: string; + forum?: string; + author?: string; + time?: string; + snippet?: string; + id?: string; + url?: string; +} + +export interface TiebaSearchItem { + rank: number; + title: string; + forum: string; + author: string; + time: string; + snippet: string; + id: string; + url: string; +} + +export interface RawTiebaMainPost { + title?: string; + author?: string; + fallbackAuthor?: string; + contentText?: string; + structuredText?: string; + visibleTime?: string; + structuredTime?: unknown; + hasMedia?: boolean; +} + +export interface RawTiebaReply { + floor?: unknown; + author?: string; + content?: string; + time?: string; +} + +export interface RawTiebaReadPayload { + mainPost?: RawTiebaMainPost | null; + replies?: RawTiebaReply[]; +} + +export interface TiebaReadItem { + floor: number; + author: string; + content: string; + time: string; +} + +export interface TiebaReadBuildOptions { + limit?: unknown; + includeMainPost?: boolean; +} + +/** + * Keep the public CLI limit contract aligned with the real implementation. + */ +export function normalizeTiebaLimit(value: unknown, fallback: number = MAX_TIEBA_LIMIT): number { + const parsed = Number(value ?? fallback); + if (!Number.isFinite(parsed) || parsed < 1) return fallback; + return Math.min(Math.trunc(parsed), MAX_TIEBA_LIMIT); +} + +export function normalizeText(value: unknown): string { + return typeof value === 'string' ? value.replace(/\s+/g, ' ').trim() : ''; +} + +/** + * Match Tieba PC's signed request contract so forum list fetching stays stable. + */ +export function signTiebaPcParams(params: Record): string { + const payload = Object.keys(params) + .sort((left, right) => left.localeCompare(right)) + .map((key) => `${key}=${params[key]}`) + .join('') + TIEBA_PC_SIGN_SALT; + return createHash('md5').update(payload).digest('hex'); +} + +export function parseTiebaCount(text: string): number { + const value = normalizeText(text).toUpperCase(); + if (!value) return 0; + const compact = value.replace(/[^\d.W万]/g, ''); + if (compact.endsWith('万')) { + return Math.round(parseFloat(compact.slice(0, -1)) * 10000); + } + if (compact.endsWith('W')) { + return Math.round(parseFloat(compact.slice(0, -1)) * 10000); + } + return parseInt(compact.replace(/[^\d]/g, ''), 10) || 0; +} + +export function parseTiebaLastReply(text: string): string { + const normalized = normalizeText(text).replace(/^回复于/, '').trim(); + const match = normalized.match(/(刚刚|\d+\s*(?:分钟|小时|天)前|\d{2}-\d{2}(?:\s+\d{2}:\d{2})?|\d{4}-\d{2}-\d{2}(?:\s+\d{2}:\d{2})?)/); + return match ? match[1].trim() : normalized; +} + +function buildTiebaThreadUrl(id: string, rawUrl?: unknown): string { + const explicitUrl = normalizeText(rawUrl); + if (explicitUrl) return explicitUrl; + return id ? `https://tieba.baidu.com/p/${id}` : ''; +} + +function resolveTiebaThreadId(raw: RawTiebaPostCard): string { + const direct = normalizeText(raw.threadId); + if (direct) return direct; + + const fromUrl = normalizeText(raw.url).match(/\/p\/(\d+)/); + return fromUrl ? fromUrl[1] : ''; +} + +function getTiebaFeedComponent(feed: RawTiebaPagePcFeedEntry['feed'], name: string): Record { + const components = Array.isArray(feed?.components) ? feed.components : []; + const match = components.find((entry) => normalizeText((entry as Record).component) === name); + if (!match) return {}; + const payload = (match as Record)[name]; + return payload && typeof payload === 'object' ? payload as Record : {}; +} + +function extractTiebaFeedAuthor(feed: RawTiebaPagePcFeedEntry['feed']): string { + const head = getTiebaFeedComponent(feed, 'feed_head'); + const mainData = Array.isArray(head.main_data) ? head.main_data : []; + for (const item of mainData) { + const textRecord = (item as Record).text as Record | undefined; + const author = normalizeText(textRecord?.text); + if (author) return author; + } + return ''; +} + +function extractTiebaFeedTitle(feed: RawTiebaPagePcFeedEntry['feed']): string { + const title = getTiebaFeedComponent(feed, 'feed_title'); + const titleData = Array.isArray(title.data) ? title.data : []; + const firstTitle = titleData[0] as Record | undefined; + const textInfo = firstTitle?.text_info as Record | undefined; + return normalizeText(textInfo?.text) || normalizeText(feed?.business_info_map?.title); +} + +function extractTiebaFeedCommentCount(feed: RawTiebaPagePcFeedEntry['feed']): number { + const social = getTiebaFeedComponent(feed, 'feed_social'); + const commentCount = Number(social.comment_num ?? feed?.business_info_map?.comment_num ?? 0); + return Number.isFinite(commentCount) ? commentCount : 0; +} + +function extractTiebaFeedThreadId(feed: RawTiebaPagePcFeedEntry['feed']): string { + const direct = normalizeText(feed?.business_info_map?.thread_id); + if (direct) return direct; + + const logParams = Array.isArray(feed?.log_param) ? feed.log_param : []; + const fromLog = normalizeText(logParams.find((item) => normalizeText(item?.key) === 'tid')?.value); + if (fromLog) return fromLog; + + const fromSchema = normalizeText(feed?.schema).match(/[?&]tid=(\d+)/); + return fromSchema ? fromSchema[1] : ''; +} + +function extractTiebaFeedLastReply(feed: RawTiebaPagePcFeedEntry['feed']): string { + const head = getTiebaFeedComponent(feed, 'feed_head'); + const extraData = Array.isArray(head.extra_data) ? head.extra_data : []; + const first = extraData[0] as Record | undefined; + const prefix = normalizeText((first?.business_info_map as Record | undefined)?.time_prefix); + const textRecord = first?.text as Record | undefined; + const rawTime = normalizeText(textRecord?.text); + const formattedTime = /^\d+$/.test(rawTime) ? formatTiebaUnixTime(rawTime) : rawTime; + return [prefix, formattedTime].filter(Boolean).join(''); +} + +/** + * Convert Tieba's signed `page_pc` feed entries into the stable card shape used by the CLI. + */ +export function buildTiebaPostCardsFromPagePc(rawFeeds: RawTiebaPagePcFeedEntry[]): RawTiebaPostCard[] { + return rawFeeds + .filter((entry) => normalizeText(entry.layout) === 'feed' && entry.feed) + .map((entry) => { + const feed = entry.feed; + const threadId = extractTiebaFeedThreadId(feed); + return { + title: extractTiebaFeedTitle(feed), + author: extractTiebaFeedAuthor(feed), + descInfo: extractTiebaFeedLastReply(feed), + commentCount: extractTiebaFeedCommentCount(feed), + actionTexts: [], + threadId, + url: buildTiebaThreadUrl(threadId), + }; + }) + .filter((entry) => normalizeText(entry.title)); +} + +export function buildTiebaPostItems(rawCards: RawTiebaPostCard[], requestedLimit: unknown): TiebaPostItem[] { + const limit = normalizeTiebaLimit(requestedLimit); + + return rawCards + .map((raw) => { + const title = normalizeText(raw.title); + const id = resolveTiebaThreadId(raw); + const actionTexts = Array.isArray(raw.actionTexts) ? raw.actionTexts.map(normalizeText).filter(Boolean) : []; + const commentText = actionTexts.find((text) => /评论/.test(text)) || actionTexts[actionTexts.length - 1] || ''; + + return { + title, + author: normalizeText(raw.author), + replies: Number.isFinite(Number(raw.commentCount)) + ? Number(raw.commentCount) + : parseTiebaCount(commentText), + last_reply: parseTiebaLastReply(String(raw.descInfo ?? '')), + id, + url: buildTiebaThreadUrl(id, raw.url), + }; + }) + .filter((item) => item.title) + .slice(0, limit) + .map((item, index) => ({ rank: index + 1, ...item })); +} + +export function buildTiebaSearchItems(rawItems: RawTiebaSearchItem[], requestedLimit: unknown): TiebaSearchItem[] { + const limit = normalizeTiebaLimit(requestedLimit); + + return rawItems + .map((raw) => { + const url = normalizeText(raw.url); + const directId = normalizeText(raw.id); + const idFromUrl = url.match(/\/p\/(\d+)/)?.[1] || ''; + + return { + title: normalizeText(raw.title), + forum: normalizeText(raw.forum), + author: normalizeText(raw.author), + time: normalizeText(raw.time), + snippet: normalizeText(raw.snippet).slice(0, 200), + id: directId || idFromUrl, + url, + }; + }) + .filter((item) => item.title) + .slice(0, limit) + .map((item, index) => ({ rank: index + 1, ...item })); +} + +function formatTiebaUnixTime(value: unknown): string { + const ts = Number(value || 0); + if (!Number.isFinite(ts) || ts <= 0) return ''; + const parts = new Intl.DateTimeFormat('sv-SE', { + timeZone: TIEBA_TIME_ZONE, + year: 'numeric', + month: '2-digit', + day: '2-digit', + hour: '2-digit', + minute: '2-digit', + hour12: false, + }).formatToParts(new Date(ts * 1000)); + const values = Object.fromEntries(parts.map((part) => [part.type, part.value])); + return `${values.year}-${values.month}-${values.day} ${values.hour}:${values.minute}`; +} + +function parseTiebaReplyTime(text: string): string { + const normalized = normalizeText(text); + const withoutFloor = normalized.replace(/^第\d+楼\s+/, '').trim(); + const match = withoutFloor.match(/^(刚刚|昨天|前天|\d+\s*(?:分钟|小时|天)前|\d{2}-\d{2}(?:\s+\d{2}:\d{2})?|\d{4}-\d{2}-\d{2}(?:\s+\d{2}:\d{2})?)/); + return match ? match[1].trim() : withoutFloor; +} + +function buildMainPostItem(mainPost?: RawTiebaMainPost | null): TiebaReadItem | null { + if (!mainPost) return null; + + const title = normalizeText(mainPost.title); + const author = normalizeText(mainPost.author) || normalizeText(mainPost.fallbackAuthor); + const body = normalizeText(mainPost.contentText) || normalizeText(mainPost.structuredText); + const hasMedia = Boolean(mainPost.hasMedia); + const content = [title, body || (hasMedia ? '[media]' : '')].filter(Boolean).join(' ').trim(); + + if (!content) return null; + + return { + floor: 1, + author, + content, + time: normalizeText(mainPost.visibleTime) || formatTiebaUnixTime(mainPost.structuredTime), + }; +} + +export function buildTiebaReadItems(payload: RawTiebaReadPayload, options: TiebaReadBuildOptions = {}): TiebaReadItem[] { + const fallback = Number.isFinite(Number(options.limit)) ? Number(options.limit) : 30; + const limit = Math.max(1, Math.trunc(fallback)); + const includeMainPost = options.includeMainPost !== false; + const items: TiebaReadItem[] = []; + const mainPost = buildMainPostItem(payload.mainPost); + + if (includeMainPost && mainPost) items.push(mainPost); + + const replies = Array.isArray(payload.replies) ? payload.replies : []; + const replyItems: TiebaReadItem[] = []; + for (const reply of replies) { + const floor = Number(reply.floor || 0); + const content = normalizeText(reply.content); + if (!Number.isFinite(floor) || floor < 1 || !content) continue; + replyItems.push({ + floor, + author: normalizeText(reply.author), + content, + time: parseTiebaReplyTime(String(reply.time ?? '')), + }); + } + + return items.concat(replyItems.slice(0, limit)); +} diff --git a/tests/e2e/browser-public.test.ts b/tests/e2e/browser-public.test.ts index 73ff2c49..81732866 100644 --- a/tests/e2e/browser-public.test.ts +++ b/tests/e2e/browser-public.test.ts @@ -38,6 +38,201 @@ function isBrowserBridgeUnavailable(result: CliResult): boolean { return /Browser Bridge.*not connected|Extension.*not connected/i.test(text); } +function isBaiduChallengeText(text: string): boolean { + return /百度安全验证|安全验证|请完成验证|captcha/i.test(text); +} + +function isBaiduChallenge(result: CliResult): boolean { + const text = `${result.stderr}\n${result.stdout}`; + return isBaiduChallengeText(text); +} + +function isTransientBrowserDetach(result: CliResult): boolean { + const text = `${result.stderr}\n${result.stdout}`; + return /Detached while handling command|No tab with id|Debugger is not attached to the tab/i.test(text); +} + +async function runCliWithTransientRetry(args: string[], timeout: number): Promise { + let result = await runCli(args, { timeout }); + if (result.code !== 0 && isTransientBrowserDetach(result)) { + result = await runCli(args, { timeout }); + } + return result; +} + +async function runJsonCliOrThrow(args: string[], label: string, timeout: number, opts: { retryTransient?: boolean } = {}): Promise { + const result = opts.retryTransient + ? await runCliWithTransientRetry(args, timeout) + : await runCli(args, { timeout }); + if (result.code !== 0) { + if (isBrowserBridgeUnavailable(result)) { + console.warn(`${label}: skipped — Browser Bridge extension is unavailable in this environment`); + return null; + } + if (isBaiduChallenge(result)) { + console.warn(`${label}: skipped — Baidu challenge page detected`); + return null; + } + throw new Error(`${label} failed:\n${result.stderr || result.stdout}`); + } + + const data = parseJsonOutput(result.stdout); + if (!Array.isArray(data)) { + throw new Error(`${label} returned non-array JSON:\n${result.stdout.slice(0, 500)}`); + } + return data; +} + +function normalizeTiebaTitle(value: string): string { + return value.replace(/\s+/g, ' ').trim(); +} + +function hasTiebaMainPost(data: any[] | null): boolean { + return Array.isArray(data) && data.some((item: any) => Number(item.floor) === 1); +} + +function expectNonEmptyDataOrSkipEnv(data: any[] | null, label: string): data is any[] { + if (data === null) { + console.warn(`${label}: skipped — environment is unavailable for browser assertions`); + return false; + } + expect(data.length).toBeGreaterThanOrEqual(1); + return true; +} + +function countTiebaReplies(data: any[] | null): number { + if (!Array.isArray(data)) return 0; + return data.filter((item: any) => Number(item.floor) > 1).length; +} + +function maxTiebaFloor(data: any[] | null): number { + if (!Array.isArray(data) || !data.length) return 0; + return Math.max(...data.map((item: any) => Number(item.floor) || 0)); +} + +function getTiebaReplyFloors(data: any[] | null): number[] { + if (!Array.isArray(data)) return []; + return data + .map((item: any) => Number(item.floor) || 0) + .filter((floor) => floor > 1); +} + +function countTiebaReplyFloorOverlap(left: any[] | null, right: any[] | null): number { + const rightFloors = new Set(getTiebaReplyFloors(right)); + return getTiebaReplyFloors(left).filter((floor) => rightFloors.has(floor)).length; +} + +function pickTiebaReadCandidates( + posts: any[] | null, + minReplies: number, +): Array<{ threadId: string; title: string; replies: number }> { + if (!Array.isArray(posts) || !posts.length) return []; + + return [...posts] + .filter((item: any) => item?.id) + .map((item: any) => ({ + threadId: String(item.id || '').trim(), + title: normalizeTiebaTitle(String(item.title || '')), + replies: Number(item.replies) || 0, + })) + .filter((item) => item.threadId && item.title && item.replies >= minReplies) + .sort((left, right) => right.replies - left.replies); +} + +/** + * Pick a live thread that actually exposes enough visible replies for the read assertions. + */ +async function getTiebaReadCandidateOrSkip( + label: string, + options: { minRepliesOnPage1?: number; requirePage2?: boolean } = {}, +): Promise<{ threadId: string; title: string; replies: number } | null> { + const minRepliesOnPage1 = Math.max(1, Number(options.minRepliesOnPage1 || 1)); + const requirePage2 = options.requirePage2 === true; + const posts = await runJsonCliOrThrow(['tieba', 'posts', '李毅', '--limit', '10', '-f', 'json'], `${label} setup`, 90_000, { + retryTransient: true, + }); + if (posts === null) { + return null; + } + if (!Array.isArray(posts) || !posts.length) { + console.warn(`${label}: skipped — could not resolve Tieba posts for setup`); + return null; + } + + const minReplies = requirePage2 ? Math.max(minRepliesOnPage1, 50) : minRepliesOnPage1; + const candidates = pickTiebaReadCandidates(posts, minReplies).slice(0, 5); + if (!candidates.length) { + console.warn(`${label}: skipped — could not find a Tieba thread with enough replies from posts metadata`); + return null; + } + + for (const candidate of candidates) { + const page1Preview = await runJsonCliOrThrow( + ['tieba', 'read', candidate.threadId, '--page', '1', '--limit', String(Math.max(minRepliesOnPage1, 2)), '-f', 'json'], + `${label} preview page 1`, + 90_000, + { retryTransient: true }, + ); + if (page1Preview === null) { + return null; + } + if (!hasTiebaMainPost(page1Preview) || countTiebaReplies(page1Preview) < minRepliesOnPage1) { + continue; + } + + if (requirePage2) { + const page2Preview = await runJsonCliOrThrow( + ['tieba', 'read', candidate.threadId, '--page', '2', '--limit', '1', '-f', 'json'], + `${label} preview page 2`, + 90_000, + { retryTransient: true }, + ); + if (page2Preview === null) { + return null; + } + if (hasTiebaMainPost(page2Preview) || countTiebaReplies(page2Preview) < 1) { + continue; + } + } + + return candidate; + } + + console.warn(`${label}: skipped — could not find a Tieba thread with enough visible replies`); + return null; +} + +describe('tieba e2e helper guards', () => { + it('does not treat generic empty-result errors as a Baidu challenge', () => { + expect(isBaiduChallengeText('tieba posts returned no data\n→ The page structure may have changed — this adapter may be outdated.')).toBe(false); + }); + + it('still recognizes actual Baidu challenge text', () => { + expect(isBaiduChallengeText('百度安全验证,请完成验证后继续')).toBe(true); + }); + + it('counts partial overlap between read pages', () => { + expect(countTiebaReplyFloorOverlap( + [{ floor: 1 }, { floor: 23 }, { floor: 27 }, { floor: 28 }, { floor: 29 }, { floor: 30 }], + [{ floor: 27 }, { floor: 28 }, { floor: 31 }], + )).toBe(2); + }); + + it('picks read fixtures from posts metadata in descending reply order', () => { + expect(pickTiebaReadCandidates([ + { id: '1', title: '普通帖', replies: 2 }, + { id: '2', title: '大帖', replies: 120 }, + { id: '', title: '无效帖', replies: 999 }, + ], 50)).toEqual([{ + threadId: '2', + title: '大帖', + replies: 120, + }]); + + expect(pickTiebaReadCandidates([{ id: '1', title: '普通帖', replies: 2 }], 50)).toEqual([]); + }); +}); + async function expectImdbDataOrChallengeSkip(args: string[], label: string): Promise { const result = await runCli(args, { timeout: 60_000 }); if (result.code !== 0) { @@ -103,6 +298,99 @@ describe('browser public-data commands E2E', () => { expectDataOrSkip(data, 'v2ex daily'); }, 60_000); + // ── tieba ── + it('tieba hot returns trending topics', async () => { + const data = await runJsonCliOrThrow(['tieba', 'hot', '--limit', '5', '-f', 'json'], 'tieba hot', 60_000, { retryTransient: true }); + if (expectNonEmptyDataOrSkipEnv(data, 'tieba hot')) { + expect(data[0]).toHaveProperty('title'); + expect(data[0]).toHaveProperty('discussions'); + } + }, 60_000); + + it('tieba posts returns forum threads', async () => { + const data = await runJsonCliOrThrow(['tieba', 'posts', '李毅', '--limit', '20', '-f', 'json'], 'tieba posts', 90_000, { retryTransient: true }); + if (expectNonEmptyDataOrSkipEnv(data, 'tieba posts')) { + expect(data[0]).toHaveProperty('title'); + expect(String(data[0].id || '')).toMatch(/^\d+$/); + expect(String(data[0].url || '')).toContain('/p/'); + expect(Number.isFinite(Number(data[0].replies))).toBe(true); + expect(data.length).toBeLessThanOrEqual(20); + } + }, 90_000); + + it('tieba posts page 2 returns a different forum slice', async () => { + const data1 = await runJsonCliOrThrow(['tieba', 'posts', '李毅', '--page', '1', '--limit', '5', '-f', 'json'], 'tieba posts page 1', 60_000, { retryTransient: true }); + const data2 = await runJsonCliOrThrow(['tieba', 'posts', '李毅', '--page', '2', '--limit', '5', '-f', 'json'], 'tieba posts page 2', 60_000, { retryTransient: true }); + if (expectNonEmptyDataOrSkipEnv(data1, 'tieba posts page 1') && expectNonEmptyDataOrSkipEnv(data2, 'tieba posts page 2')) { + const ids1 = data1.map((item: any) => String(item.id || '')).filter(Boolean); + const ids2 = data2.map((item: any) => String(item.id || '')).filter(Boolean); + const newIds = ids2.filter((id) => !ids1.includes(id)); + expect(newIds.length).toBeGreaterThan(0); + } + }, 90_000); + + it('tieba search returns results', async () => { + const data = await runJsonCliOrThrow(['tieba', 'search', '编程', '--limit', '20', '-f', 'json'], 'tieba search', 90_000, { retryTransient: true }); + if (expectNonEmptyDataOrSkipEnv(data, 'tieba search')) { + expect(data[0]).toHaveProperty('title'); + expect(String(data[0].id || '')).toMatch(/^\d+$/); + expect(String(data[0].url || '')).toContain('/p/'); + expect(data.length).toBeLessThanOrEqual(20); + } + }, 90_000); + + it('tieba search rejects unsupported pages above 1', async () => { + const result = await runCli(['tieba', 'search', '编程', '--page', '2', '--limit', '3', '-f', 'json'], { + timeout: 60_000, + }); + expect(result.code).toBe(2); + expect(`${result.stderr}\n${result.stdout}`).toContain('Argument "page" must be one of: 1'); + }, 60_000); + + it('tieba read returns thread content', async () => { + const fixture = await getTiebaReadCandidateOrSkip('tieba read'); + if (!fixture) { + return; + } + const data = await runJsonCliOrThrow(['tieba', 'read', fixture.threadId, '--limit', '5', '-f', 'json'], 'tieba read', 90_000, { retryTransient: true }); + if (expectNonEmptyDataOrSkipEnv(data, 'tieba read')) { + expect(data[0]).toHaveProperty('floor'); + expect(data[0]).toHaveProperty('content'); + expect(data.some((item: any) => Number(item.floor) === 1)).toBe(true); + expect(normalizeTiebaTitle(String(data[0].content || ''))).toContain(fixture.title); + } + }, 90_000); + + it('tieba read page 2 omits the main post', async () => { + const fixture = await getTiebaReadCandidateOrSkip('tieba read page', { requirePage2: true }); + if (!fixture) { + return; + } + const data1 = await runJsonCliOrThrow(['tieba', 'read', fixture.threadId, '--page', '1', '--limit', '5', '-f', 'json'], 'tieba read page 1', 90_000, { retryTransient: true }); + const data2 = await runJsonCliOrThrow(['tieba', 'read', fixture.threadId, '--page', '2', '--limit', '5', '-f', 'json'], 'tieba read page 2', 90_000, { retryTransient: true }); + if (expectNonEmptyDataOrSkipEnv(data1, 'tieba read page 1') && expectNonEmptyDataOrSkipEnv(data2, 'tieba read page 2')) { + const overlap = countTiebaReplyFloorOverlap(data1, data2); + expect(normalizeTiebaTitle(String(data1[0].content || ''))).toContain(fixture.title); + expect(hasTiebaMainPost(data1)).toBe(true); + expect(hasTiebaMainPost(data2)).toBe(false); + expect(overlap).toBe(0); + expect(maxTiebaFloor(data2)).toBeGreaterThan(maxTiebaFloor(data1)); + } + }, 90_000); + + it('tieba read limit counts replies instead of consuming the main post slot', async () => { + const fixture = await getTiebaReadCandidateOrSkip('tieba read limit semantics', { minRepliesOnPage1: 2 }); + if (!fixture) { + return; + } + const data = await runJsonCliOrThrow(['tieba', 'read', fixture.threadId, '--page', '1', '--limit', '2', '-f', 'json'], 'tieba read limit semantics', 90_000, { retryTransient: true }); + if (expectNonEmptyDataOrSkipEnv(data, 'tieba read limit semantics')) { + expect(normalizeTiebaTitle(String(data[0].content || ''))).toContain(fixture.title); + expect(hasTiebaMainPost(data)).toBe(true); + expect(countTiebaReplies(data)).toBe(2); + } + }, 90_000); + // ── imdb ── it('imdb top returns chart data', async () => { const data = await expectImdbDataOrChallengeSkip(['imdb', 'top', '--limit', '3', '-f', 'json'], 'imdb top');