diff --git a/README.zh-CN.md b/README.zh-CN.md index 624760a7..413429fc 100644 --- a/README.zh-CN.md +++ b/README.zh-CN.md @@ -131,7 +131,7 @@ npm install -g @jackwener/opencli@latest | **notion** | `status` `search` `read` `new` `write` `sidebar` `favorites` `export` | 桌面端 | | **discord-app** | `status` `send` `read` `channels` `servers` `search` `members` | 桌面端 | | **v2ex** | `hot` `latest` `topic` `node` `user` `member` `replies` `nodes` `daily` `me` `notifications` | 公开 / 浏览器 | -| **xueqiu** | `feed` `hot-stock` `hot` `search` `stock` `watchlist` `earnings-date` `fund-holdings` `fund-snapshot` | 浏览器 | +| **xueqiu** | `feed` `hot-stock` `hot` `search` `stock` `comments` `watchlist` `earnings-date` `fund-holdings` `fund-snapshot` | 浏览器 | | **antigravity** | `status` `send` `read` `new` `dump` `extract-code` `model` `watch` | 桌面端 | | **chatgpt** | `status` `new` `send` `read` `ask` `model` | 桌面端 | | **xiaohongshu** | `search` `notifications` `feed` `user` `download` `publish` `creator-notes` `creator-note-detail` `creator-notes-summary` `creator-profile` `creator-stats` | 浏览器 | diff --git a/docs/adapters/browser/xueqiu.md b/docs/adapters/browser/xueqiu.md index a96b4e68..d9a5d366 100644 --- a/docs/adapters/browser/xueqiu.md +++ b/docs/adapters/browser/xueqiu.md @@ -12,6 +12,7 @@ | `opencli xueqiu hot` | 获取雪球热门动态 | | `opencli xueqiu search` | 搜索雪球股票(代码或名称) | | `opencli xueqiu stock` | 获取雪球股票实时行情 | +| `opencli xueqiu comments` | 获取单只股票的讨论动态(按时间排序) | | `opencli xueqiu watchlist` | 获取雪球自选股列表 | | `opencli xueqiu fund-holdings` | 获取蛋卷基金持仓明细(可用 `--account` 按子账户过滤) | | `opencli xueqiu fund-snapshot` | 获取蛋卷基金快照(总资产、子账户、持仓,推荐 `-f json`) | @@ -28,6 +29,9 @@ opencli xueqiu search 茅台 # View one stock opencli xueqiu stock SH600519 +# View recent discussions for one stock +opencli xueqiu comments SH600519 --limit 5 + # Upcoming earnings dates opencli xueqiu earnings-date SH600519 --next @@ -57,4 +61,5 @@ opencli xueqiu feed -v - `fund-holdings` exposes both market value and share fields (`volume`, `usableRemainShare`) - `fund-snapshot -f json` is the easiest way to persist a full account snapshot for later analysis or diffing +- `comments` returns stock-scoped discussion posts from the symbol page, not reply threads under one parent post - If the commands return empty data, first confirm the logged-in browser can directly see the Danjuan asset page diff --git a/docs/adapters/index.md b/docs/adapters/index.md index 7a82f177..3e71dab4 100644 --- a/docs/adapters/index.md +++ b/docs/adapters/index.md @@ -11,7 +11,7 @@ Run `opencli list` for the live registry. | **[bilibili](/adapters/browser/bilibili)** | `hot` `search` `me` `favorite` `history` `feed` `subtitle` `dynamic` `ranking` `following` `user-videos` `download` | 🔐 Browser | | **[zhihu](/adapters/browser/zhihu)** | `hot` `search` `question` `download` | 🔐 Browser | | **[xiaohongshu](/adapters/browser/xiaohongshu)** | `search` `notifications` `feed` `user` `download` `publish` `creator-notes` `creator-note-detail` `creator-notes-summary` `creator-profile` `creator-stats` | 🔐 Browser | -| **[xueqiu](/adapters/browser/xueqiu)** | `feed` `hot-stock` `hot` `search` `stock` `watchlist` `earnings-date` `fund-holdings` `fund-snapshot` | 🔐 Browser | +| **[xueqiu](/adapters/browser/xueqiu)** | `feed` `hot-stock` `hot` `search` `stock` `comments` `watchlist` `earnings-date` `fund-holdings` `fund-snapshot` | 🔐 Browser | | **[youtube](/adapters/browser/youtube)** | `search` `video` `transcript` | 🔐 Browser | | **[v2ex](/adapters/browser/v2ex)** | `hot` `latest` `topic` `node` `user` `member` `replies` `nodes` `daily` `me` `notifications` | 🌐 / 🔐 | | **[bloomberg](/adapters/browser/bloomberg)** | `main` `markets` `economics` `industries` `tech` `politics` `businessweek` `opinions` `feeds` `news` | 🌐 / 🔐 | diff --git a/src/clis/xueqiu/comments.test.ts b/src/clis/xueqiu/comments.test.ts new file mode 100644 index 00000000..e261e892 --- /dev/null +++ b/src/clis/xueqiu/comments.test.ts @@ -0,0 +1,823 @@ +import { beforeEach, describe, expect, it, vi } from 'vitest'; + +const { mockWarn } = vi.hoisted(() => ({ + mockWarn: vi.fn(), +})); + +vi.mock('../../logger.js', () => ({ + log: { + info: vi.fn(), + warn: mockWarn, + error: vi.fn(), + verbose: vi.fn(), + debug: vi.fn(), + step: vi.fn(), + stepResult: vi.fn(), + }, +})); + +import { ArgumentError, AuthRequiredError, CommandExecutionError, EmptyResultError } from '../../errors.js'; +import { getRegistry } from '../../registry.js'; +import { + classifyXueqiuCommentsResponse, + collectCommentRows, + mergeUniqueCommentRows, + normalizeCommentItem, + normalizeSymbolInput, +} from './comments.js'; + +const command = getRegistry().get('xueqiu/comments'); + +function createCommandPage(response: unknown) { + return { + goto: vi.fn().mockResolvedValue(undefined), + evaluate: vi.fn().mockResolvedValue(response), + } as any; +} + +describe('xueqiu comments', () => { + beforeEach(() => { + mockWarn.mockReset(); + }); + + it('rejects blank symbol before any request is made', () => { + expect(() => normalizeSymbolInput(' ')).toThrow(ArgumentError); + }); + + it('rejects URL-like input before any request is made', () => { + expect(() => normalizeSymbolInput('https://xueqiu.com/S/SH600519')).toThrow(ArgumentError); + }); + + it('normalizes symbol by trimming and upper-casing it', () => { + expect(normalizeSymbolInput(' sh600519 ')).toBe('SH600519'); + }); + + it('accepts supported US and HK-style symbols', () => { + expect(normalizeSymbolInput('aapl')).toBe('AAPL'); + expect(normalizeSymbolInput('00700')).toBe('00700'); + }); + + it('rejects obviously invalid symbols before any request is made', () => { + expect(() => normalizeSymbolInput('INVALID')).toThrow(ArgumentError); + }); + + it('classifies 401 responses as auth failures', () => { + expect( + classifyXueqiuCommentsResponse({ + status: 401, + contentType: 'application/json', + json: null, + textSnippet: '', + }), + ).toMatchObject({ kind: 'auth' }); + }); + + it('classifies html challenge pages as anti-bot failures', () => { + expect( + classifyXueqiuCommentsResponse({ + status: 200, + contentType: 'text/html', + json: null, + textSnippet: '', + }), + ).toMatchObject({ kind: 'anti-bot' }); + }); + + it('classifies 403 html challenge pages as anti-bot failures', () => { + expect( + classifyXueqiuCommentsResponse({ + status: 403, + contentType: 'text/html', + json: null, + textSnippet: '', + }), + ).toMatchObject({ kind: 'anti-bot' }); + }); + + it('classifies 403 html challenge pages without waf markers as anti-bot failures', () => { + expect( + classifyXueqiuCommentsResponse({ + status: 403, + contentType: 'text/html', + json: null, + textSnippet: 'security challenge required', + }), + ).toMatchObject({ kind: 'anti-bot' }); + }); + + it('does not misclassify generic html error pages as anti-bot failures', () => { + expect( + classifyXueqiuCommentsResponse({ + status: 500, + contentType: 'text/html', + json: null, + textSnippet: 'server error', + }), + ).toMatchObject({ kind: 'unknown' }); + }); + + it('classifies html login pages as auth failures', () => { + expect( + classifyXueqiuCommentsResponse({ + status: 200, + contentType: 'text/html', + json: null, + textSnippet: 'login required', + }), + ).toMatchObject({ kind: 'auth' }); + }); + + it('classifies invalid-symbol json envelopes as argument failures', () => { + expect( + classifyXueqiuCommentsResponse({ + status: 200, + contentType: 'application/json', + json: { success: false, error: 'invalid symbol format' }, + textSnippet: '', + }), + ).toMatchObject({ kind: 'argument' }); + }); + + it('does not misclassify required-field backend errors as auth failures', () => { + expect( + classifyXueqiuCommentsResponse({ + status: 200, + contentType: 'application/json', + json: { success: false, message: 'symbol is required' }, + textSnippet: '', + }), + ).toMatchObject({ kind: 'incompatible' }); + }); + + it('classifies json responses without a usable list as incompatible', () => { + expect( + classifyXueqiuCommentsResponse({ + status: 200, + contentType: 'application/json', + json: { success: true, data: { next_max_id: 1 } }, + textSnippet: '', + }), + ).toMatchObject({ kind: 'incompatible' }); + }); + + it('classifies empty discussion lists as empty results', () => { + expect( + classifyXueqiuCommentsResponse({ + status: 200, + contentType: 'application/json', + json: { list: [] }, + textSnippet: '', + }), + ).toMatchObject({ kind: 'empty' }); + }); + + it('classifies unclear json error envelopes as incompatible', () => { + expect( + classifyXueqiuCommentsResponse({ + status: 200, + contentType: 'application/json', + json: { success: false, message: 'unexpected backend state' }, + textSnippet: '', + }), + ).toMatchObject({ kind: 'incompatible' }); + }); + + it('deduplicates rows by stable id while preserving order', () => { + expect( + mergeUniqueCommentRows( + [], + [ + { id: 'a', author: 'alice' }, + { id: 'b', author: 'bob' }, + { id: 'a', author: 'alice-duplicate' }, + ], + ), + ).toEqual([ + { id: 'a', author: 'alice' }, + { id: 'b', author: 'bob' }, + ]); + }); + + it('normalizes one raw discussion item into a cleaned row', () => { + expect( + normalizeCommentItem({ + id: 123, + description: '

hello world

', + created_at: 1700000000000, + user: { screen_name: 'alice', id: 99 }, + reply_count: 2, + retweet_count: 3, + fav_count: 4, + }), + ).toEqual({ + id: '123', + author: 'alice', + text: 'hello world', + likes: 4, + replies: 2, + retweets: 3, + created_at: new Date(1700000000000).toISOString(), + url: 'https://xueqiu.com/99/123', + }); + }); + + it('drops invalid created_at values instead of throwing', () => { + expect( + normalizeCommentItem({ + id: 456, + description: 'hello', + created_at: 'not-a-date', + user: { screen_name: 'bob', id: 100 }, + }), + ).toEqual({ + id: '456', + author: 'bob', + text: 'hello', + likes: 0, + replies: 0, + retweets: 0, + created_at: null, + url: 'https://xueqiu.com/100/456', + }); + }); + + it('drops object-like ids instead of turning them into fake identifiers', () => { + expect( + normalizeCommentItem({ + id: { broken: true }, + description: 'hello', + created_at: 1700000000000, + user: { screen_name: 'eve', id: { broken: true } }, + }), + ).toEqual({ + id: '', + author: 'eve', + text: 'hello', + likes: 0, + replies: 0, + retweets: 0, + created_at: new Date(1700000000000).toISOString(), + url: null, + }); + }); + + it('normalizes invalid count fields to zero', () => { + expect( + normalizeCommentItem({ + id: 789, + description: 'hello', + created_at: 1700000000000, + user: { screen_name: 'carol', id: 101 }, + reply_count: 'oops', + retweet_count: Infinity, + fav_count: '', + }), + ).toEqual({ + id: '789', + author: 'carol', + text: 'hello', + likes: 0, + replies: 0, + retweets: 0, + created_at: new Date(1700000000000).toISOString(), + url: 'https://xueqiu.com/101/789', + }); + }); + + it('registers the xueqiu comments command', () => { + expect(command).toMatchObject({ + site: 'xueqiu', + name: 'comments', + }); + }); + + it('rejects blank symbol before navigating the page', async () => { + const page = { + goto: vi.fn(), + } as any; + + await expect(command!.func!(page, { symbol: ' ', limit: 5 })).rejects.toThrow(ArgumentError); + expect(page.goto).not.toHaveBeenCalled(); + }); + + it('throws auth error when the first page responds with 401', async () => { + const page = createCommandPage({ + status: 401, + contentType: 'application/json', + json: null, + textSnippet: '', + }); + + await expect(command!.func!(page, { symbol: 'sh600519', limit: 5 })).rejects.toThrow(AuthRequiredError); + expect(page.goto).toHaveBeenCalledWith('https://xueqiu.com'); + }); + + it('rejects invalid symbols before navigating the page', async () => { + const page = { + goto: vi.fn(), + } as any; + + await expect(command!.func!(page, { symbol: 'INVALID', limit: 5 })).rejects.toThrow(ArgumentError); + expect(page.goto).not.toHaveBeenCalled(); + }); + + it('rejects non-positive limit before navigating the page', async () => { + const page = { + goto: vi.fn(), + } as any; + + await expect(command!.func!(page, { symbol: 'SH600519', limit: 0 })).rejects.toThrow(ArgumentError); + await expect(command!.func!(page, { symbol: 'SH600519', limit: -1 })).rejects.toThrow(ArgumentError); + expect(page.goto).not.toHaveBeenCalled(); + }); + + it('rejects limits above the supported maximum before navigating the page', async () => { + const page = { + goto: vi.fn(), + } as any; + + await expect(command!.func!(page, { symbol: 'SH600519', limit: 101 })).rejects.toThrow(ArgumentError); + expect(page.goto).not.toHaveBeenCalled(); + }); + + it('throws empty-result error with normalized symbol when the first page is empty', async () => { + const page = createCommandPage({ + status: 200, + contentType: 'application/json', + json: { list: [] }, + textSnippet: '', + }); + const rejection = command!.func!(page, { symbol: 'sh600519', limit: 5 }); + + await expect(rejection).rejects.toThrow(EmptyResultError); + await expect(rejection).rejects.toThrow('SH600519'); + }); + + it('throws argument error when the first page reports an invalid symbol', async () => { + const page = createCommandPage({ + status: 200, + contentType: 'application/json', + json: { success: false, error: 'invalid symbol format' }, + textSnippet: '', + }); + const rejection = command!.func!(page, { symbol: 'sh600519', limit: 5 }); + + await expect(rejection).rejects.toThrow(ArgumentError); + await expect(rejection).rejects.toThrow('SH600519'); + }); + + it('throws a compact incompatible-response error when json shape is unusable', async () => { + const page = createCommandPage({ + status: 200, + contentType: 'application/json', + json: { success: true, data: { next_max_id: 1 } }, + textSnippet: '', + }); + + const rejection = command!.func!(page, { symbol: 'sh600519', limit: 5 }); + + await expect(rejection).rejects.toThrow(CommandExecutionError); + await expect(rejection).rejects.toThrow('Unexpected response'); + }); + + it('throws auth-required error when the first page is an html challenge', async () => { + const page = createCommandPage({ + status: 200, + contentType: 'text/html', + json: null, + textSnippet: '', + }); + + await expect(command!.func!(page, { symbol: 'sh600519', limit: 5 })).rejects.toThrow(AuthRequiredError); + }); + + it('throws command-execution error when the first page fetch fails before any rows are available', async () => { + const page = createCommandPage({ + status: 0, + contentType: 'text/plain', + json: null, + textSnippet: 'network failed', + }); + + const rejection = command!.func!(page, { symbol: 'sh600519', limit: 5 }); + + await expect(rejection).rejects.toThrow(CommandExecutionError); + await expect(rejection).rejects.toThrow('Unexpected response'); + }); + + it('returns normalized rows when the first page includes discussion items', async () => { + const page = createCommandPage({ + status: 200, + contentType: 'application/json', + json: { + list: [ + { + id: 123, + description: '

hello world

', + created_at: 1700000000000, + user: { screen_name: 'alice', id: 99 }, + reply_count: 2, + retweet_count: 3, + fav_count: 4, + }, + ], + }, + textSnippet: '', + }); + + const result = await command!.func!(page, { symbol: 'sh600519', limit: 5 }); + + expect(result).toEqual([ + { + author: 'alice', + text: 'hello world', + likes: 4, + replies: 2, + retweets: 3, + created_at: new Date(1700000000000).toISOString(), + url: 'https://xueqiu.com/99/123', + }, + ]); + expect(Object.keys((result as Array>)[0]).sort()).toEqual([ + 'author', + 'created_at', + 'likes', + 'replies', + 'retweets', + 'text', + 'url', + ]); + }); + + it('collects later pages, deduplicates rows, and trims to limit', async () => { + const fetchPage = vi + .fn() + .mockResolvedValueOnce({ + status: 200, + contentType: 'application/json', + json: { + list: [ + { id: 1, description: 'alpha', user: { screen_name: 'alice', id: 10 } }, + { id: 2, description: 'beta', user: { screen_name: 'bob', id: 11 } }, + ], + }, + textSnippet: '', + }) + .mockResolvedValueOnce({ + status: 200, + contentType: 'application/json', + json: { + list: [ + { id: 2, description: 'beta-duplicate', user: { screen_name: 'bob', id: 11 } }, + { id: 3, description: 'gamma', user: { screen_name: 'carol', id: 12 } }, + ], + }, + textSnippet: '', + }); + + await expect( + collectCommentRows({ + symbol: 'SH600519', + limit: 3, + pageSize: 2, + maxRequests: 5, + fetchPage, + warn: mockWarn, + }), + ).resolves.toMatchObject([ + { id: '1', text: 'alpha' }, + { id: '2', text: 'beta' }, + { id: '3', text: 'gamma' }, + ]); + + expect(fetchPage).toHaveBeenCalledTimes(2); + expect(mockWarn).not.toHaveBeenCalled(); + }); + + it('returns partial rows and emits warning when a later page fails', async () => { + const fetchPage = vi + .fn() + .mockResolvedValueOnce({ + status: 200, + contentType: 'application/json', + json: { + list: [ + { id: 1, description: 'alpha', user: { screen_name: 'alice', id: 10 } }, + { id: 2, description: 'beta', user: { screen_name: 'bob', id: 11 } }, + ], + }, + textSnippet: '', + }) + .mockResolvedValueOnce({ + status: 200, + contentType: 'text/html', + json: null, + textSnippet: '', + }); + + await expect( + collectCommentRows({ + symbol: 'SH600519', + limit: 3, + pageSize: 2, + maxRequests: 5, + fetchPage, + warn: mockWarn, + }), + ).resolves.toMatchObject([ + { id: '1', text: 'alpha' }, + { id: '2', text: 'beta' }, + ]); + + expect(mockWarn).toHaveBeenCalledTimes(1); + expect(mockWarn).toHaveBeenCalledWith(expect.stringContaining('2/3')); + expect(mockWarn).toHaveBeenCalledWith(expect.stringContaining('anti-bot')); + }); + + it('returns partial rows and emits warning when a later page has an unknown fetch failure', async () => { + const fetchPage = vi + .fn() + .mockResolvedValueOnce({ + status: 200, + contentType: 'application/json', + json: { + list: [ + { id: 1, description: 'alpha', user: { screen_name: 'alice', id: 10 } }, + { id: 2, description: 'beta', user: { screen_name: 'bob', id: 11 } }, + ], + }, + textSnippet: '', + }) + .mockResolvedValueOnce({ + status: 0, + contentType: 'text/plain', + json: null, + textSnippet: 'network failed', + }); + + await expect( + collectCommentRows({ + symbol: 'SH600519', + limit: 3, + pageSize: 2, + maxRequests: 5, + fetchPage, + warn: mockWarn, + }), + ).resolves.toMatchObject([ + { id: '1', text: 'alpha' }, + { id: '2', text: 'beta' }, + ]); + + expect(mockWarn).toHaveBeenCalledTimes(1); + expect(mockWarn).toHaveBeenCalledWith(expect.stringContaining('2/3')); + expect(mockWarn).toHaveBeenCalledWith(expect.stringContaining('unknown request failure')); + }); + + it('ends pagination quietly when a later page returns an empty list', async () => { + const fetchPage = vi + .fn() + .mockResolvedValueOnce({ + status: 200, + contentType: 'application/json', + json: { + list: [ + { id: 1, description: 'alpha', user: { screen_name: 'alice', id: 10 } }, + { id: 2, description: 'beta', user: { screen_name: 'bob', id: 11 } }, + ], + }, + textSnippet: '', + }) + .mockResolvedValueOnce({ + status: 200, + contentType: 'application/json', + json: { list: [] }, + textSnippet: '', + }); + + const result = await collectCommentRows({ + symbol: 'SH600519', + limit: 3, + pageSize: 2, + maxRequests: 5, + fetchPage, + warn: mockWarn, + }); + + expect(result).toMatchObject([ + { id: '1', text: 'alpha' }, + { id: '2', text: 'beta' }, + ]); + expect(fetchPage).toHaveBeenCalledTimes(2); + expect(mockWarn).not.toHaveBeenCalled(); + }); + + it('returns partial rows and emits warning when a later page does not advance pagination', async () => { + const fetchPage = vi + .fn() + .mockResolvedValueOnce({ + status: 200, + contentType: 'application/json', + json: { + list: [ + { id: 1, description: 'alpha', user: { screen_name: 'alice', id: 10 } }, + { id: 2, description: 'beta', user: { screen_name: 'bob', id: 11 } }, + ], + }, + textSnippet: '', + }) + .mockResolvedValueOnce({ + status: 200, + contentType: 'application/json', + json: { + list: [ + { id: 1, description: 'alpha-duplicate', user: { screen_name: 'alice', id: 10 } }, + { id: 2, description: 'beta-duplicate', user: { screen_name: 'bob', id: 11 } }, + ], + }, + textSnippet: '', + }); + + const result = await collectCommentRows({ + symbol: 'SH600519', + limit: 3, + pageSize: 2, + maxRequests: 5, + fetchPage, + warn: mockWarn, + }); + + expect(result).toMatchObject([ + { id: '1', text: 'alpha' }, + { id: '2', text: 'beta' }, + ]); + expect(fetchPage).toHaveBeenCalledTimes(2); + expect(mockWarn).toHaveBeenCalledTimes(1); + expect(mockWarn).toHaveBeenCalledWith(expect.stringContaining('2/3')); + expect(mockWarn).toHaveBeenCalledWith(expect.stringContaining('pagination did not advance')); + }); + + it('drops rows without ids and warns when pagination cannot advance', async () => { + const fetchPage = vi + .fn() + .mockResolvedValueOnce({ + status: 200, + contentType: 'application/json', + json: { + list: [ + { id: 1, description: 'alpha', user: { screen_name: 'alice', id: 10 } }, + { id: 2, description: 'beta', user: { screen_name: 'bob', id: 11 } }, + ], + }, + textSnippet: '', + }) + .mockResolvedValueOnce({ + status: 200, + contentType: 'application/json', + json: { + list: [ + { description: 'missing-id-a', user: { screen_name: 'carol', id: 12 } }, + { description: 'missing-id-b', user: { screen_name: 'dave', id: 13 } }, + ], + }, + textSnippet: '', + }); + + const result = await collectCommentRows({ + symbol: 'SH600519', + limit: 3, + pageSize: 2, + maxRequests: 5, + fetchPage, + warn: mockWarn, + }); + + expect(result).toMatchObject([ + { id: '1', text: 'alpha' }, + { id: '2', text: 'beta' }, + ]); + expect(result).toHaveLength(2); + expect(fetchPage).toHaveBeenCalledTimes(2); + expect(mockWarn).toHaveBeenCalledTimes(1); + expect(mockWarn).toHaveBeenCalledWith(expect.stringContaining('2/3')); + expect(mockWarn).toHaveBeenCalledWith(expect.stringContaining('unknown request failure')); + }); + + it('continues pagination when a full page contains both valid rows and missing-id rows', async () => { + const fetchPage = vi + .fn() + .mockResolvedValueOnce({ + status: 200, + contentType: 'application/json', + json: { + list: [ + { id: 1, description: 'alpha', user: { screen_name: 'alice', id: 10 } }, + { description: 'missing-id', user: { screen_name: 'carol', id: 12 } }, + ], + }, + textSnippet: '', + }) + .mockResolvedValueOnce({ + status: 200, + contentType: 'application/json', + json: { + list: [ + { id: 2, description: 'beta', user: { screen_name: 'bob', id: 11 } }, + { id: 3, description: 'gamma', user: { screen_name: 'dave', id: 13 } }, + ], + }, + textSnippet: '', + }); + + const result = await collectCommentRows({ + symbol: 'SH600519', + limit: 3, + pageSize: 2, + maxRequests: 5, + fetchPage, + warn: mockWarn, + }); + + expect(result).toMatchObject([ + { id: '1', text: 'alpha' }, + { id: '2', text: 'beta' }, + { id: '3', text: 'gamma' }, + ]); + expect(fetchPage).toHaveBeenCalledTimes(2); + expect(mockWarn).not.toHaveBeenCalled(); + }); + + it('does not warn when a short final page contains only duplicate rows', async () => { + const fetchPage = vi + .fn() + .mockResolvedValueOnce({ + status: 200, + contentType: 'application/json', + json: { + list: [ + { id: 1, description: 'alpha', user: { screen_name: 'alice', id: 10 } }, + { id: 2, description: 'beta', user: { screen_name: 'bob', id: 11 } }, + ], + }, + textSnippet: '', + }) + .mockResolvedValueOnce({ + status: 200, + contentType: 'application/json', + json: { + list: [ + { id: 2, description: 'beta-duplicate', user: { screen_name: 'bob', id: 11 } }, + ], + }, + textSnippet: '', + }); + + const result = await collectCommentRows({ + symbol: 'SH600519', + limit: 5, + pageSize: 2, + maxRequests: 5, + fetchPage, + warn: mockWarn, + }); + + expect(result).toMatchObject([ + { id: '1', text: 'alpha' }, + { id: '2', text: 'beta' }, + ]); + expect(fetchPage).toHaveBeenCalledTimes(2); + expect(mockWarn).not.toHaveBeenCalled(); + }); + + it('emits warning when pagination stops at the safety cap', async () => { + let nextId = 1; + const fetchPage = vi + .fn() + .mockImplementation(async () => ({ + status: 200, + contentType: 'application/json', + json: { + list: [ + { id: nextId++, description: 'alpha', user: { screen_name: 'alice', id: 10 } }, + { id: nextId++, description: 'beta', user: { screen_name: 'bob', id: 11 } }, + ], + }, + textSnippet: '', + })); + + const result = await collectCommentRows({ + symbol: 'SH600519', + limit: 12, + pageSize: 2, + maxRequests: 5, + fetchPage, + warn: mockWarn, + }); + + expect(result).toHaveLength(10); + expect(fetchPage).toHaveBeenCalledTimes(5); + expect(mockWarn).toHaveBeenCalledTimes(1); + expect(mockWarn).toHaveBeenCalledWith(expect.stringContaining('10/12')); + expect(mockWarn).toHaveBeenCalledWith(expect.stringContaining('reached safety cap')); + }); +}); diff --git a/src/clis/xueqiu/comments.ts b/src/clis/xueqiu/comments.ts new file mode 100644 index 00000000..8e3831a8 --- /dev/null +++ b/src/clis/xueqiu/comments.ts @@ -0,0 +1,461 @@ +import type { IPage } from '../../types.js'; +import { cli, Strategy } from '../../registry.js'; +import { ArgumentError, AuthRequiredError, CommandExecutionError, EmptyResultError } from '../../errors.js'; +import { log } from '../../logger.js'; +import { isRecord } from '../../utils.js'; + +/** + * Minimal browser-response shape used by the classifier. + */ +export interface XueqiuCommentsResponse { + status: number; + contentType: string; + json: unknown; + textSnippet: string; +} + +/** + * Minimal normalized row shape used during pagination and deduplication. + */ +export interface XueqiuCommentRow { + id: string; + author: string; + text?: string; + likes?: number; + replies?: number; + retweets?: number; + created_at?: string | null; + url?: string | null; +} + +/** + * Public CLI row shape. This intentionally omits the internal stable ID used + * only for deduplication, so machine-readable output matches the command + * contract and table columns. + */ +export type XueqiuCommentOutputRow = Omit; + +/** + * Pagination options for collecting enough rows to satisfy `--limit`. + */ +export interface CollectCommentRowsOptions { + symbol: string; + limit: number; + pageSize: number; + maxRequests: number; + fetchPage: (pageNumber: number, pageSize: number) => Promise; + warn?: (message: string) => void; +} + +type XueqiuCommentsKind = 'auth' | 'anti-bot' | 'argument' | 'empty' | 'incompatible' | 'unknown'; + +const XUEQIU_SYMBOL_PATTERN = /^(?:[A-Z]{2}\d{5,6}|\d{4,6}|[A-Z]{1,5}(?:[.-][A-Z]{1,2})?)$/; + +const FAILURE_REASON_BY_KIND: Record = { + auth: 'auth failure', + 'anti-bot': 'anti-bot challenge', + argument: 'invalid symbol', + empty: 'no more discussion data', + incompatible: 'unexpected response shape', + unknown: 'unknown request failure', +}; + +function getCommentList(json: Record): unknown[] | null { + if (Array.isArray(json.list)) return json.list; + if (isRecord(json.data) && Array.isArray(json.data.list)) return json.data.list; + return null; +} + +function isAntiBotHtml(response: XueqiuCommentsResponse, envelopeText: string): boolean { + const htmlText = `${envelopeText} ${response.textSnippet}`.toLowerCase(); + return response.contentType.includes('text/html') + && ( + /captcha|challenge|aliyun_waf|risk/i.test(htmlText) + || /_WAF_|_waf_|renderData|aliyun_waf/i.test(response.textSnippet) + ); +} + +function toFiniteCount(value: unknown): number { + const count = Number(value ?? 0); + return Number.isFinite(count) ? count : 0; +} + +function normalizeIdentifier(value: unknown): string { + if (typeof value === 'string') return value.trim(); + if (typeof value === 'number' && Number.isFinite(value)) return String(value); + return ''; +} + +function buildPaginationStopMessage( + requestNumber: number, + collected: number, + target: number, + reason: string, +): string { + return `xueqiu comments pagination stopped after request ${requestNumber}, ` + + `collected ${collected}/${target} items, ` + + `reason: ${reason}`; +} + +function throwFirstPageFailure(kind: XueqiuCommentsKind, symbol: string): never { + if (kind === 'auth' || kind === 'anti-bot') { + throw new AuthRequiredError('xueqiu.com', 'Stock discussions require login or challenge clearance'); + } + if (kind === 'argument') { + throw new ArgumentError(`xueqiu comments received an invalid symbol: ${symbol}`); + } + if (kind === 'empty') { + throw new EmptyResultError( + `xueqiu/comments ${symbol}`, + `No discussion data found for ${symbol}`, + ); + } + throw new CommandExecutionError( + `Unexpected response while loading xueqiu comments for ${symbol}`, + 'Run the command again with --verbose to inspect the raw site response.', + ); +} + +/** + * Extract the raw item list from one classified JSON payload. + * + * @param json Raw parsed JSON payload from browser fetch. + * @returns Discussion items when the response shape is usable. + */ +export function getCommentItems(json: unknown): Record[] { + if (!isRecord(json)) return []; + + const list = getCommentList(json) ?? []; + + return list.filter((item): item is Record => !!item && typeof item === 'object'); +} + +/** + * Classify one raw browser response before command-level error handling. + * + * @param response Structured browser response payload. + * @returns Tagged result describing the response class. + */ +export function classifyXueqiuCommentsResponse(response: XueqiuCommentsResponse): { kind: XueqiuCommentsKind } { + const jsonRecord = isRecord(response.json) ? response.json : null; + const commentList = jsonRecord ? getCommentList(jsonRecord) : null; + const envelopeText = [ + jsonRecord?.error, + jsonRecord?.errors, + jsonRecord?.code, + jsonRecord?.message, + jsonRecord?.msg, + ].filter(Boolean).join(' ').toLowerCase(); + const responseText = `${envelopeText} ${response.textSnippet}`.toLowerCase(); + + if (isAntiBotHtml(response, envelopeText)) { + return { kind: 'anti-bot' }; + } + if (response.status === 401 || response.status === 403) { + return { kind: 'auth' }; + } + if (/login required|unauthorized|unauthorised|forbidden|not logged in|need login/.test(responseText)) { + return { kind: 'auth' }; + } + if (/invalid symbol|invalid code|bad symbol/.test(envelopeText)) { + return { kind: 'argument' }; + } + if (/no data|no result|not found|no matching/.test(envelopeText)) { + return { kind: 'empty' }; + } + if (commentList && commentList.length === 0) { + return { kind: 'empty' }; + } + if (response.contentType.includes('application/json') && jsonRecord && commentList === null) { + return { kind: 'incompatible' }; + } + return { kind: 'unknown' }; +} + +/** + * Merge one new page of rows while preserving the first occurrence of each ID. + * + * @param current Rows already collected. + * @param incoming Rows from the next page. + * @returns Deduplicated merged rows. + */ +export function mergeUniqueCommentRows( + current: XueqiuCommentRow[], + incoming: XueqiuCommentRow[], +): XueqiuCommentRow[] { + const merged = [...current]; + const seen = new Set(current.map(item => item.id)); + + for (const row of incoming) { + if (seen.has(row.id)) continue; + seen.add(row.id); + merged.push(row); + } + return merged; +} + +/** + * Normalize one raw xueqiu discussion item into the CLI row shape. + * + * Returned rows represent stock-scoped discussion posts, not replies under + * one parent post. + * + * @param item Raw API item. + * @returns Cleaned CLI row. + */ +export function normalizeCommentItem(item: Record): XueqiuCommentRow { + const text = String(item.description ?? '') + .replace(/<[^>]+>/g, ' ') + .replace(/ /g, ' ') + .replace(/&/g, '&') + .replace(/</g, '<') + .replace(/>/g, '>') + .replace(/\s+/g, ' ') + .trim(); + + const id = normalizeIdentifier(item.id); + const userId = normalizeIdentifier(item.user?.id); + + const createdAtDate = item.created_at ? new Date(item.created_at) : null; + const createdAt = createdAtDate && !Number.isNaN(createdAtDate.getTime()) + ? createdAtDate.toISOString() + : null; + + return { + id, + author: String(item.user?.screen_name ?? ''), + text, + likes: toFiniteCount(item.fav_count), + replies: toFiniteCount(item.reply_count), + retweets: toFiniteCount(item.retweet_count), + created_at: createdAt, + url: userId && id ? `https://xueqiu.com/${userId}/${id}` : null, + }; +} + +/** + * Remove internal-only fields before returning rows to the CLI renderer. + * + * @param row Internal row shape used during pagination. + * @returns Public output row that matches the documented command contract. + */ +export function toCommentOutputRow(row: XueqiuCommentRow): XueqiuCommentOutputRow { + const { id: _id, ...outputRow } = row; + return outputRow; +} + +/** + * Convert response classification into a compact warning phrase. + * + * @param kind Classifier result kind. + * @returns Human-readable reason fragment for stderr warnings. + */ +export function describeFailureKind(kind: XueqiuCommentsKind): string { + return FAILURE_REASON_BY_KIND[kind]; +} + +/** + * Fetch one discussion page from inside the browser context so cookies and + * any site-side request state stay attached to the request. + * + * @param page Active browser page. + * @param symbol Normalized stock symbol. + * @param pageNumber Internal page counter, starting from 1. + * @param pageSize Item count per internal request. + * @returns Structured response for command-side classification. + */ +export async function fetchCommentsPage( + page: IPage, + symbol: string, + pageNumber: number, + pageSize: number, +): Promise { + const url = new URL('https://xueqiu.com/query/v1/symbol/search/status'); + url.searchParams.set('symbol', symbol); + url.searchParams.set('count', String(pageSize)); + url.searchParams.set('page', String(pageNumber)); + url.searchParams.set('sort', 'time'); + + return page.evaluate(` + (async () => { + try { + const response = await fetch(${JSON.stringify(url.toString())}, { + credentials: 'include', + headers: { + 'accept': 'application/json, text/plain, */*', + 'x-requested-with': 'XMLHttpRequest', + }, + referrer: ${JSON.stringify(`https://xueqiu.com/S/${symbol}`)}, + referrerPolicy: 'strict-origin-when-cross-origin', + }); + const contentType = response.headers.get('content-type') || ''; + const text = await response.text(); + let json = null; + if (contentType.includes('application/json')) { + try { + json = JSON.parse(text); + } catch { + json = null; + } + } + return { + status: response.status, + contentType, + json, + textSnippet: text.slice(0, 2000), + }; + } catch (error) { + return { + status: 0, + contentType: 'text/plain', + json: null, + textSnippet: error instanceof Error ? error.message : String(error), + }; + } + })() + `) as Promise; +} + +/** + * Collect enough stock discussion rows to satisfy the requested limit. + * + * This helper owns the internal pagination policy so the public command + * contract can stay small and expose only `--limit`. + * + * @param options Pagination inputs and a page-fetch callback. + * @returns Deduplicated normalized rows, possibly partial with a warning. + */ +export async function collectCommentRows(options: CollectCommentRowsOptions): Promise { + const warn = options.warn ?? log.warn; + let rows: XueqiuCommentRow[] = []; + const seenIds = new Set(); + + for (let requestNumber = 1; requestNumber <= options.maxRequests; requestNumber += 1) { + const response = await options.fetchPage(requestNumber, options.pageSize); + const classified = classifyXueqiuCommentsResponse(response); + + if (requestNumber === 1 && classified.kind !== 'unknown') { + throwFirstPageFailure(classified.kind, options.symbol); + } else if (classified.kind === 'empty') { + break; + } else if (classified.kind !== 'unknown') { + warn(buildPaginationStopMessage( + requestNumber, + rows.length, + options.limit, + describeFailureKind(classified.kind), + )); + break; + } + + const rawItems = getCommentItems(response.json); + const pageRows = rawItems + .map(item => normalizeCommentItem(item)) + .filter(row => row.id); + if (pageRows.length === 0) { + if (requestNumber === 1) { + throw new CommandExecutionError( + `Unexpected response while loading xueqiu comments for ${options.symbol}`, + 'Run the command again with --verbose to inspect the raw site response.', + ); + } + if (classified.kind === 'unknown') { + warn(buildPaginationStopMessage( + requestNumber, + rows.length, + options.limit, + describeFailureKind(classified.kind), + )); + } + break; + } + + let advanced = false; + for (const row of pageRows) { + if (seenIds.has(row.id)) continue; + seenIds.add(row.id); + rows.push(row); + advanced = true; + } + + if (rows.length >= options.limit) { + return rows.slice(0, options.limit); + } + if (rawItems.length < options.pageSize) { + break; + } + if (!advanced) { + warn(buildPaginationStopMessage( + requestNumber, + rows.length, + options.limit, + 'pagination did not advance', + )); + break; + } + if (requestNumber === options.maxRequests) { + warn(buildPaginationStopMessage(requestNumber, rows.length, options.limit, 'reached safety cap')); + } + } + + return rows.slice(0, options.limit); +} + +cli({ + site: 'xueqiu', + name: 'comments', + description: '获取单只股票的讨论动态', + domain: 'xueqiu.com', + strategy: Strategy.COOKIE, + browser: true, + navigateBefore: false, + args: [ + { + name: 'symbol', + positional: true, + required: true, + help: 'Stock symbol, e.g. SH600519, AAPL, or 00700', + }, + { name: 'limit', type: 'int', default: 20, help: 'Number of discussion posts to return' }, + ], + columns: ['author', 'text', 'likes', 'replies', 'retweets', 'created_at', 'url'], + func: async (page, args) => { + const symbol = normalizeSymbolInput(args.symbol); + const limit = Number(args.limit); + if (!Number.isInteger(limit) || limit <= 0) { + throw new ArgumentError('xueqiu comments requires --limit to be a positive integer'); + } + if (limit > 100) { + throw new ArgumentError('xueqiu comments supports --limit up to 100'); + } + const pageSize = Math.min(limit, 20); + await page.goto('https://xueqiu.com'); + const rows = await collectCommentRows({ + symbol, + limit, + pageSize, + maxRequests: 5, + fetchPage: (pageNumber, currentPageSize) => fetchCommentsPage(page, symbol, pageNumber, currentPageSize), + warn: log.warn, + }); + return rows.map(row => toCommentOutputRow(row)); + }, +}); + +/** + * Convert raw CLI input into a normalized stock symbol. + * + * @param raw User-provided CLI argument. + * @returns Upper-cased symbol string. + */ +export function normalizeSymbolInput(raw: unknown): string { + const symbol = String(raw ?? '').trim().toUpperCase(); + if (!symbol) throw new ArgumentError('xueqiu comments requires a symbol'); + if (/^HTTPS?:\/\//.test(symbol)) { + throw new ArgumentError('xueqiu comments only accepts a symbol, not a URL'); + } + if (!XUEQIU_SYMBOL_PATTERN.test(symbol)) { + throw new ArgumentError(`xueqiu comments received an invalid symbol: ${symbol}`); + } + return symbol; +} diff --git a/tests/e2e/browser-auth.test.ts b/tests/e2e/browser-auth.test.ts index bcf9bd70..539ced6d 100644 --- a/tests/e2e/browser-auth.test.ts +++ b/tests/e2e/browser-auth.test.ts @@ -79,6 +79,10 @@ describe('login-required commands — graceful failure', () => { await expectGracefulAuthFailure(['xueqiu', 'watchlist', '-f', 'json'], 'xueqiu watchlist'); }, 60_000); + it('xueqiu comments fails gracefully without login', async () => { + await expectGracefulAuthFailure(['xueqiu', 'comments', 'SH600519', '--limit', '3', '-f', 'json'], 'xueqiu comments'); + }, 60_000); + // ── linux-do (requires login — all endpoints need authentication) ── it('linux-do feed fails gracefully without login', async () => { await expectGracefulAuthFailure(['linux-do', 'feed', '--limit', '3', '-f', 'json'], 'linux-do feed');