diff --git a/clis/zhihu/answer-comments.js b/clis/zhihu/answer-comments.js
index b6028f99f..05864a5aa 100644
--- a/clis/zhihu/answer-comments.js
+++ b/clis/zhihu/answer-comments.js
@@ -1,28 +1,9 @@
import { cli, Strategy } from '@jackwener/opencli/registry';
import { ArgumentError, AuthRequiredError, CommandExecutionError, EmptyResultError } from '@jackwener/opencli/errors';
-
-function decodeEntity(codePoint) {
- return Number.isInteger(codePoint) && codePoint >= 0 && codePoint <= 0x10FFFF
- ? String.fromCodePoint(codePoint)
- : null;
-}
+import { stripHtml as stripHtmlText } from './text.js';
function stripHtml(html) {
- if (!html) return '';
- return html
- .replace(/
/gi, '\n')
- .replace(/<\/(?:p|div|h[1-6]|li|blockquote)>/gi, '\n\n')
- .replace(/<[^>]+>/g, '')
- .replace(/ /g, ' ')
- .replace(/</g, '<')
- .replace(/>/g, '>')
- .replace(/&/g, '&')
- .replace(/"/g, '"')
- .replace(/'/g, "'")
- .replace(/(\d+);/g, (entity, value) => decodeEntity(Number(value)) ?? entity)
- .replace(/([0-9a-f]+);/gi, (entity, value) => decodeEntity(Number.parseInt(value, 16)) ?? entity)
- .replace(/\n{3,}/g, '\n\n')
- .trim();
+ return stripHtmlText(html, { preserveBlocks: true });
}
const ANSWER_ID_RE = /^\d+$/;
diff --git a/clis/zhihu/answer-detail.js b/clis/zhihu/answer-detail.js
index 02dd67881..4d968290c 100644
--- a/clis/zhihu/answer-detail.js
+++ b/clis/zhihu/answer-detail.js
@@ -1,38 +1,9 @@
import { cli, Strategy } from '@jackwener/opencli/registry';
import { ArgumentError, AuthRequiredError, CommandExecutionError, EmptyResultError } from '@jackwener/opencli/errors';
+import { stripHtml as stripHtmlText } from './text.js';
-// Light-weight HTML → text, preserving paragraph / heading / list-item
-// line breaks. Zhihu answer `content` is HTML, so we map block-level
-// closing tags + `
` to newlines before stripping the rest.
function stripHtml(html) {
- if (!html) return '';
- return html
- .replace(/
/gi, '\n')
- // Block-level closing tags become paragraph breaks (double
- // newline) so the stripped text stays readable. The trailing
- // `\n{3,}` collapse pass below normalizes accidental triples.
- .replace(/<\/(?:p|div|h[1-6]|li|blockquote)>/gi, '\n\n')
- .replace(/<[^>]+>/g, '')
- .replace(/ /g, ' ')
- .replace(/</g, '<')
- .replace(/>/g, '>')
- .replace(/&/g, '&')
- .replace(/"/g, '"')
- .replace(/'/g, "'")
- .replace(/(\d+);/g, (_, value) => {
- const codePoint = Number(value);
- return Number.isInteger(codePoint) && codePoint >= 0 && codePoint <= 0x10FFFF
- ? String.fromCodePoint(codePoint)
- : _;
- })
- .replace(/([0-9a-f]+);/gi, (_, value) => {
- const codePoint = Number.parseInt(value, 16);
- return Number.isInteger(codePoint) && codePoint >= 0 && codePoint <= 0x10FFFF
- ? String.fromCodePoint(codePoint)
- : _;
- })
- .replace(/\n{3,}/g, '\n\n')
- .trim();
+ return stripHtmlText(html, { preserveBlocks: true });
}
const ANSWER_ID_RE = /^\d+$/;
diff --git a/clis/zhihu/collection.js b/clis/zhihu/collection.js
index be9dee722..bed24b5b4 100644
--- a/clis/zhihu/collection.js
+++ b/clis/zhihu/collection.js
@@ -1,19 +1,7 @@
import { cli, Strategy } from '@jackwener/opencli/registry';
import { ArgumentError, AuthRequiredError, CommandExecutionError, EmptyResultError } from '@jackwener/opencli/errors';
import { log } from '@jackwener/opencli/logger';
-
-function stripHtml(html) {
- return html
- .replace(/<[^>]+>/g, '')
- .replace(/ /g, ' ')
- .replace(/</g, '<')
- .replace(/>/g, '>')
- .replace(/&/g, '&')
- .replace(/"/g, '"')
- .replace(//g, '')
- .replace(/<\/em>/g, '')
- .trim();
-}
+import { stripHtml } from './text.js';
function validatePositiveInt(value, name) {
const n = Number(value);
diff --git a/clis/zhihu/collection.test.js b/clis/zhihu/collection.test.js
index c0b100cdd..75cccb990 100644
--- a/clis/zhihu/collection.test.js
+++ b/clis/zhihu/collection.test.js
@@ -37,7 +37,7 @@ describe('zhihu collection', () => {
question: { id: 789012, title: 'Test Question' },
author: { name: 'test_author' },
voteup_count: 42,
- content: 'Test answer content
',
+ content: '"Test" & answer content
',
url: 'https://www.zhihu.com/question/789012/answer/123456',
},
},
@@ -57,6 +57,7 @@ describe('zhihu collection', () => {
title: 'Test Question',
author: 'test_author',
votes: 42,
+ excerpt: '"Test" & answer content',
url: 'https://www.zhihu.com/question/789012/answer/123456',
});
diff --git a/clis/zhihu/question.js b/clis/zhihu/question.js
index 122a3efa4..fe78c99ee 100644
--- a/clis/zhihu/question.js
+++ b/clis/zhihu/question.js
@@ -1,14 +1,6 @@
import { cli, Strategy } from '@jackwener/opencli/registry';
import { AuthRequiredError, CliError } from '@jackwener/opencli/errors';
-function stripHtml(html) {
- return html
- .replace(/<[^>]+>/g, '')
- .replace(/ /g, ' ')
- .replace(/</g, '<')
- .replace(/>/g, '>')
- .replace(/&/g, '&')
- .trim();
-}
+import { stripHtml } from './text.js';
function answerIdFromUrl(url) {
if (typeof url !== 'string') return '';
diff --git a/clis/zhihu/question.test.js b/clis/zhihu/question.test.js
index 57c8f7401..f42feb1cb 100644
--- a/clis/zhihu/question.test.js
+++ b/clis/zhihu/question.test.js
@@ -20,7 +20,7 @@ describe('zhihu question', () => {
id: '2036567240334653053',
author: { name: 'alice' },
voteup_count: 12,
- content: 'Hello Zhihu',
+ content: '"Hello" & Zhihu
',
},
],
};
@@ -33,7 +33,7 @@ describe('zhihu question', () => {
author: 'alice',
votes: 12,
url: 'https://www.zhihu.com/question/2021881398772981878/answer/2036567240334653053',
- content: 'Hello Zhihu',
+ content: '"Hello" & Zhihu',
},
]);
expect(goto).toHaveBeenCalledWith('https://www.zhihu.com/question/2021881398772981878');
diff --git a/clis/zhihu/search.js b/clis/zhihu/search.js
index 56ac187d4..a58a5e4d0 100644
--- a/clis/zhihu/search.js
+++ b/clis/zhihu/search.js
@@ -1,17 +1,6 @@
import { cli, Strategy } from '@jackwener/opencli/registry';
import { ArgumentError, AuthRequiredError, CommandExecutionError, EmptyResultError } from '@jackwener/opencli/errors';
-
-function stripHtml(html) {
- return (html || '')
- .replace(/<[^>]+>/g, '')
- .replace(/ /g, ' ')
- .replace(/</g, '<')
- .replace(/>/g, '>')
- .replace(/&/g, '&')
- .replace(//g, '')
- .replace(/<\/em>/g, '')
- .trim();
-}
+import { stripHtml } from './text.js';
function itemKey(item) {
const obj = item.object || {};
diff --git a/clis/zhihu/search.test.js b/clis/zhihu/search.test.js
index 6e8c85705..0ee594cfb 100644
--- a/clis/zhihu/search.test.js
+++ b/clis/zhihu/search.test.js
@@ -36,7 +36,7 @@ describe('zhihu search', () => {
type: 'answer',
author: { name: 'alice' },
voteup_count: 12,
- question: { id: 'q1', name: 'Codex question' },
+ question: { id: 'q1', name: 'Codex "question"' },
},
},
{
@@ -57,7 +57,7 @@ describe('zhihu search', () => {
await expect(cmd.func(page, { query: 'codex', limit: 2 })).resolves.toEqual([
{
rank: 1,
- title: 'Codex question',
+ title: 'Codex "question"',
type: 'answer',
author: 'alice',
votes: 12,
diff --git a/clis/zhihu/text.js b/clis/zhihu/text.js
new file mode 100644
index 000000000..7b8f19d97
--- /dev/null
+++ b/clis/zhihu/text.js
@@ -0,0 +1,29 @@
+function decodeEntity(codePoint) {
+ return Number.isInteger(codePoint) && codePoint >= 0 && codePoint <= 0x10FFFF
+ ? String.fromCodePoint(codePoint)
+ : null;
+}
+
+export function stripHtml(html, { preserveBlocks = false } = {}) {
+ if (!html) return '';
+ let text = String(html);
+ if (preserveBlocks) {
+ text = text
+ .replace(/
/gi, '\n')
+ .replace(/<\/(?:p|div|h[1-6]|li|blockquote)>/gi, '\n\n');
+ }
+ return text
+ .replace(/<[^>]+>/g, '')
+ .replace(/ /g, ' ')
+ .replace(/</g, '<')
+ .replace(/>/g, '>')
+ .replace(/&/g, '&')
+ .replace(/"/g, '"')
+ .replace(/'/g, "'")
+ .replace(/(\d+);/g, (entity, value) => decodeEntity(Number(value)) ?? entity)
+ .replace(/([0-9a-f]+);/gi, (entity, value) => decodeEntity(Number.parseInt(value, 16)) ?? entity)
+ .replace(/\n{3,}/g, '\n\n')
+ .trim();
+}
+
+export const __test__ = { decodeEntity };
diff --git a/clis/zhihu/text.test.js b/clis/zhihu/text.test.js
new file mode 100644
index 000000000..5f238e836
--- /dev/null
+++ b/clis/zhihu/text.test.js
@@ -0,0 +1,24 @@
+import { describe, expect, it } from 'vitest';
+import { stripHtml } from './text.js';
+
+describe('zhihu text helpers', () => {
+ it('strips tags and decodes named entities in flat mode', () => {
+ expect(stripHtml('Codex & <CLI>')).toBe('Codex & ');
+ });
+
+ it('decodes decimal and hexadecimal numeric entities', () => {
+ expect(stripHtml('"中文" & 'test'')).toBe('"中文" & \'test\'');
+ });
+
+ it('keeps invalid numeric entities unchanged', () => {
+ expect(stripHtml('bad entity')).toBe('bad entity');
+ });
+
+ it('keeps list excerpts flat by default', () => {
+ expect(stripHtml('first
second
')).toBe('firstsecond');
+ });
+
+ it('preserves block breaks when requested', () => {
+ expect(stripHtml('first
second
', { preserveBlocks: true })).toBe('first\n\nsecond');
+ });
+});