Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
-- CHEK Content Service: ensure emoji-safe utf8mb4 for text columns

ALTER TABLE chek_content_tag CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci;
ALTER TABLE chek_content_wiki_entry CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci;
ALTER TABLE chek_content_post CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci;
ALTER TABLE chek_content_comment CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci;

42 changes: 30 additions & 12 deletions backend-CHEK-crawler/src/platform/xhs.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -21,22 +21,40 @@ function extractNoteIdFromUrl(url) {

async function collectSearchResults(page, keyword, maxLinks, log) {
const url = buildSearchUrl(keyword);
const respPromise = page.waitForResponse(
(r) => r.url().includes('/api/sns/web/v1/search/notes') && r.request().method() === 'POST',
{ timeout: 20_000 }
);

await page.goto(url, { waitUntil: 'domcontentloaded', timeout: 60_000 });
await page.waitForTimeout(1200);
const respPromise = page
.waitForResponse(
(r) => r.url().includes('/api/sns/web/v1/search/notes') && r.request().method() === 'POST',
{ timeout: 20_000 }
)
.catch((e) => {
log({
level: 'warn',
msg: 'xhs_search_api_wait_failed',
keyword,
url,
error: String(e?.message || e || ''),
});
return null;
});

let j = null;
try {
const resp = await respPromise;
j = await resp.json();
} catch {
j = null;
await page.goto(url, { waitUntil: 'domcontentloaded', timeout: 60_000 });
await page.waitForTimeout(1200);
} catch (e) {
log({
level: 'warn',
msg: 'xhs_search_nav_failed',
keyword,
url,
error: String(e?.message || e || ''),
});
return [];
}

let j = null;
const resp = await respPromise;
if (resp) j = await resp.json().catch(() => null);

const items = Array.isArray(j?.data?.items) ? j.data.items : [];
const normalized = items
.map((it) => {
Expand Down