diff --git a/packages/cli/src/commands/advisor/recommend.ts b/packages/cli/src/commands/advisor/recommend.ts index 46ee0a8..121c15a 100644 --- a/packages/cli/src/commands/advisor/recommend.ts +++ b/packages/cli/src/commands/advisor/recommend.ts @@ -29,41 +29,41 @@ function formatContextWindow(tokens: number): string { } const MODALITY_LABELS: Record = { - Text: "文本", - Image: "图片", - Video: "视频", - Audio: "音频", + Text: "Text", + Image: "Image", + Video: "Video", + Audio: "Audio", }; const CAPABILITY_LABELS: Record = { - TG: "文本生成", - VU: "视觉理解", - IG: "图像生成", - VG: "视频生成", - TTS: "语音合成", - ASR: "语音识别", - Reasoning: "推理", + TG: "Text Gen", + VU: "Vision", + IG: "Image Gen", + VG: "Video Gen", + TTS: "Text-to-Speech", + ASR: "Speech-to-Text", + Reasoning: "Reasoning", }; const BUDGET_LABELS: Record = { - low: "低成本优先", - medium: "适中", - high: "高投入", + low: "Cost-Effective", + medium: "Balanced", + high: "High Investment", }; const QUALITY_LABELS: Record = { - flagship: "旗舰优先", - balanced: "均衡", - "cost-optimized": "性价比优先", + flagship: "Flagship", + balanced: "Balanced", + "cost-optimized": "Value", }; const PREFERENCE_MODE_LABELS: Record = { - scoped: "限定范围", - comparison: "对比评估", - alternative: "替代推荐", + scoped: "Scoped", + comparison: "Comparison", + alternative: "Alternative", }; function formatIntentSummary(intent: IntentProfile, noColor: boolean): string { const colorize = noColor ? new Chalk({ level: 0 }) : chalk; const lines: string[] = []; - lines.push(colorize.cyan.bold("需求理解")); + lines.push(colorize.cyan.bold("Intent Analysis")); if (intent.taskSummary) { lines.push(""); @@ -72,7 +72,7 @@ function formatIntentSummary(intent: IntentProfile, noColor: boolean): string { if (intent.scenarioHints.length) { lines.push(""); - lines.push(`${colorize.dim("场景特征")} ${intent.scenarioHints.join(" · ")}`); + lines.push(`${colorize.dim("Scenario")} ${intent.scenarioHints.join(" · ")}`); } const inputLabels = intent.inputModality.map((mod) => MODALITY_LABELS[mod] ?? mod); @@ -80,40 +80,40 @@ function formatIntentSummary(intent: IntentProfile, noColor: boolean): string { if (inputLabels.length || outputLabels.length) { lines.push(""); const parts: string[] = []; - if (inputLabels.length) parts.push(`${colorize.dim("输入")} ${inputLabels.join(", ")}`); - if (outputLabels.length) parts.push(`${colorize.dim("输出")} ${outputLabels.join(", ")}`); + if (inputLabels.length) parts.push(`${colorize.dim("Input")} ${inputLabels.join(", ")}`); + if (outputLabels.length) parts.push(`${colorize.dim("Output")} ${outputLabels.join(", ")}`); lines.push(parts.join(" ")); } const capLabels = intent.requiredCapabilities.map((cap) => CAPABILITY_LABELS[cap] ?? cap); if (capLabels.length) { - lines.push(`${colorize.dim("所需能力")} ${capLabels.join(", ")}`); + lines.push(`${colorize.dim("Capabilities")} ${capLabels.join(", ")}`); } const budgetLabel = BUDGET_LABELS[intent.budget] ?? intent.budget; const qualityLabel = QUALITY_LABELS[intent.qualityPreference] ?? intent.qualityPreference; lines.push(""); lines.push( - `${colorize.dim("预算倾向")} ${budgetLabel} ${colorize.dim("质量偏好")} ${qualityLabel}`, + `${colorize.dim("Budget")} ${budgetLabel} ${colorize.dim("Quality")} ${qualityLabel}`, ); const preference = intent.modelPreference; if (preference && preference.mode !== "unconstrained") { lines.push(""); const modeLabel = PREFERENCE_MODE_LABELS[preference.mode] ?? preference.mode; - const prefParts = [colorize.dim("推荐模式") + ` ${colorize.yellow(modeLabel)}`]; + const prefParts = [colorize.dim("Mode") + ` ${colorize.yellow(modeLabel)}`]; if (preference.targets?.length) { - prefParts.push(colorize.dim("目标") + ` ${preference.targets.join(", ")}`); + prefParts.push(colorize.dim("Targets") + ` ${preference.targets.join(", ")}`); } if (preference.excludes?.length) { - prefParts.push(colorize.dim("排除") + ` ${preference.excludes.join(", ")}`); + prefParts.push(colorize.dim("Excludes") + ` ${preference.excludes.join(", ")}`); } lines.push(prefParts.join(" ")); } if (intent.segments?.length) { lines.push(""); - lines.push(colorize.dim("任务拆解")); + lines.push(colorize.dim("Pipeline")); for (const [idx, segment] of intent.segments.entries()) { const outMods = segment.outputModality.map((mod) => MODALITY_LABELS[mod] ?? mod).join(", "); lines.push( @@ -131,19 +131,19 @@ function formatIntentSummary(intent: IntentProfile, noColor: boolean): string { }); } -const RECOMMEND_LABELS = ["最佳推荐", "次优选择", "备选参考"]; +const RECOMMEND_LABELS = ["Best Pick", "Runner-Up", "Alternative"]; function renderCard(rec: RecommendedModel, index: number, colorize: ChalkInstance): string { const labelColors = [colorize.green.bold, colorize.blue.bold, colorize.magenta.bold]; const colorFn = labelColors[index] ?? colorize.white.bold; - const label = RECOMMEND_LABELS[index] ?? `推荐 #${index + 1}`; + const label = RECOMMEND_LABELS[index] ?? `#${index + 1}`; const lines: string[] = []; - lines.push(colorFn(`⬢ 推荐 #${index + 1} — ${label}`)); + lines.push(colorFn(`⬢ #${index + 1} — ${label}`)); lines.push(""); lines.push(`${colorize.bold(rec.name)} ${colorize.dim(`(${rec.model})`)}`); lines.push(""); - lines.push(`${colorize.cyan("推荐理由")} ${rec.reason}`); + lines.push(`${colorize.cyan("Why")} ${rec.reason}`); if (rec.highlights.length) { lines.push(""); @@ -153,8 +153,8 @@ function renderCard(rec: RecommendedModel, index: number, colorize: ChalkInstanc } const meta: string[] = []; - if (rec.contextWindow) meta.push(`上下文 ${formatContextWindow(rec.contextWindow)}`); - if (rec.maxOutputTokens) meta.push(`最大输出 ${formatContextWindow(rec.maxOutputTokens)}`); + if (rec.contextWindow) meta.push(`Context ${formatContextWindow(rec.contextWindow)}`); + if (rec.maxOutputTokens) meta.push(`Max Output ${formatContextWindow(rec.maxOutputTokens)}`); if (meta.length) { lines.push(""); lines.push(colorize.dim(meta.join(" · "))); @@ -163,7 +163,7 @@ function renderCard(rec: RecommendedModel, index: number, colorize: ChalkInstanc const docLink = buildDocLink(rec.docUrl); if (docLink) { lines.push(""); - lines.push(colorize.dim(`文档 ${docLink}`)); + lines.push(colorize.dim(`Docs ${docLink}`)); } return boxen(lines.join("\n"), { @@ -183,7 +183,7 @@ function formatSingleResult(results: RecommendedModel[], noColor: boolean): stri function formatPipelineResult(summary: string, steps: PipelineStep[], noColor: boolean): string { const colorize = noColor ? new Chalk({ level: 0 }) : chalk; const lines: string[] = []; - lines.push(` ${colorize.yellow.bold("⚡ 组合方案")} ${summary}`); + lines.push(` ${colorize.yellow.bold("⚡ Pipeline")} ${summary}`); for (const [stepIdx, { step, recommendations, warnings }] of steps.entries()) { lines.push(""); @@ -247,14 +247,14 @@ export default defineCommand({ if (!userInput.trim()) { if (isInteractive({ nonInteractive: config.nonInteractive })) { - const hint = await promptText({ message: "描述你的需求:" }); + const hint = await promptText({ message: "Describe your requirement:" }); if (!hint) { - process.stderr.write("已取消。\n"); + process.stderr.write("Cancelled.\n"); process.exit(1); } userInput = hint; } else { - failIfMissing("message", 'bl advisor recommend "你的需求"'); + failIfMissing("message", 'bl advisor recommend "your requirement"'); } } @@ -262,16 +262,16 @@ export default defineCommand({ const format = detectOutputFormat(config.output); const modelsOptions: GetModelsOptions = { - onPrepareStart: () => process.stderr.write("初始化中...\n"), + onPrepareStart: () => process.stderr.write("Initializing model data...\n"), }; - process.stderr.write("正在分析需求...\n"); + process.stderr.write("Analyzing your request...\n"); const [allModels, intent] = await Promise.all([ getModels(config, modelsOptions), analyzeIntent(config, userInput), ]); if (intent.confidence === 0) { - process.stderr.write("需求分析超时,使用默认参数继续...\n"); + process.stderr.write("Intent analysis timed out, using defaults...\n"); } else { process.stderr.write("\n"); } @@ -297,7 +297,7 @@ export default defineCommand({ } // Stage 3: LLM Ranking - const spinner = createSpinner("正在推荐最佳模型..."); + const spinner = createSpinner("Recommending best models..."); spinner.start(); const result = await rankModels(config, candidates, intent, userInput, top); @@ -305,12 +305,31 @@ export default defineCommand({ spinner.stop(); if (isEmptyResult(result)) { - emitBare("暂无满足该需求的模型。"); + emitBare("No suitable models found for this request."); return; } if (format !== "text") { - emitResult(result, format); + emitResult( + { + intent: { + taskSummary: intent.taskSummary, + scenarioHints: intent.scenarioHints, + complexity: intent.complexity, + inputModality: intent.inputModality, + outputModality: intent.outputModality, + requiredCapabilities: intent.requiredCapabilities, + budget: intent.budget, + qualityPreference: intent.qualityPreference, + modelPreference: + intent.modelPreference?.mode !== "unconstrained" ? intent.modelPreference : undefined, + segments: intent.segments, + }, + result, + candidates: candidates.length, + }, + format, + ); return; } diff --git a/packages/cli/src/commands/quota/check.ts b/packages/cli/src/commands/quota/check.ts index 5c52fa8..7a17509 100644 --- a/packages/cli/src/commands/quota/check.ts +++ b/packages/cli/src/commands/quota/check.ts @@ -64,9 +64,9 @@ function formatRatio(usage: number, limit: number): string { function getStatus(usage: number, limit: number): string { if (limit <= 0) return "-"; const pct = (usage / limit) * 100; - if (pct >= 100) return "已限流"; - if (pct >= 80) return "接近限流"; - return "正常"; + if (pct >= 100) return "Throttled"; + if (pct >= 80) return "Near Limit"; + return "Normal"; } function getNestedRecord( @@ -193,7 +193,6 @@ function printTable(rows: CheckRow[], noColor: boolean): void { const yellow = noColor ? (t: string) => t : (t: string) => `\x1b[33m${t}\x1b[0m`; const red = noColor ? (t: string) => t : (t: string) => `\x1b[31m${t}\x1b[0m`; - const headersCn = ["模型", "RPM 用量/限额", "TPM 用量/限额", "状态"]; const headersEn = ["Model", "RPM Usage/Limit", "TPM Usage/Limit", "Status"]; const tableRows = rows.map((r) => { @@ -215,36 +214,30 @@ function printTable(rows: CheckRow[], noColor: boolean): void { return; } - const widths = headersCn.map((label, col) => - Math.max( - displayWidth(label), - displayWidth(headersEn[col]), - ...tableRows.map((r) => displayWidth(r.cells[col])), - ), + const widths = headersEn.map((label, col) => + Math.max(displayWidth(label), ...tableRows.map((r) => displayWidth(r.cells[col]))), ); - const cnLine = headersCn.map((label, col) => bold(padEnd(label, widths[col]))).join(" "); - const enLine = headersEn.map((label, col) => dim(padEnd(label, widths[col]))).join(" "); + const headerLine = headersEn.map((label, col) => bold(padEnd(label, widths[col]))).join(" "); const separator = widths.map((w) => dim("─".repeat(w))).join("──"); - process.stdout.write(cnLine + "\n"); - process.stdout.write(enLine + "\n"); + process.stdout.write(headerLine + "\n"); process.stdout.write(separator + "\n"); const statusCol = 3; for (const r of tableRows) { const cells = r.cells.map((cell, col) => { if (col === statusCol) { - if (cell === "已限流") return red(padEnd(cell, widths[col])); - if (cell === "接近限流") return yellow(padEnd(cell, widths[col])); - if (cell === "正常") return green(padEnd(cell, widths[col])); + if (cell === "Throttled") return red(padEnd(cell, widths[col])); + if (cell === "Near Limit") return yellow(padEnd(cell, widths[col])); + if (cell === "Normal") return green(padEnd(cell, widths[col])); } return padEnd(cell, widths[col]); }); process.stdout.write(cells.join(" ") + "\n"); } - process.stdout.write(dim(`\n共 ${rows.length} 个模型 (Total: ${rows.length})`) + "\n"); + process.stdout.write(dim(`\nTotal: ${rows.length} models`) + "\n"); } export default defineCommand({ diff --git a/packages/cli/src/commands/quota/history.ts b/packages/cli/src/commands/quota/history.ts index 99dd510..a5f2081 100644 --- a/packages/cli/src/commands/quota/history.ts +++ b/packages/cli/src/commands/quota/history.ts @@ -65,7 +65,6 @@ function printTable(records: LimitApplicationItem[], noColor: boolean, total: nu const bold = noColor ? (t: string) => t : (t: string) => `\x1b[1m${t}\x1b[0m`; const dim = noColor ? (t: string) => t : (t: string) => `\x1b[2m${t}\x1b[0m`; - const headersCn = ["模型", "Token 账号限流", "申请时间"]; const headersEn = ["Model", "Token Limit", "Applied At"]; const rows = records.map((r) => [ @@ -74,27 +73,21 @@ function printTable(records: LimitApplicationItem[], noColor: boolean, total: nu formatDateTime(r.gmtCreate), ]); - const widths = headersCn.map((label, col) => - Math.max( - displayWidth(label), - displayWidth(headersEn[col]), - ...rows.map((row) => displayWidth(row[col])), - ), + const widths = headersEn.map((label, col) => + Math.max(displayWidth(label), ...rows.map((row) => displayWidth(row[col]))), ); - const cnLine = headersCn.map((label, col) => bold(padEnd(label, widths[col]))).join(" "); - const enLine = headersEn.map((label, col) => dim(padEnd(label, widths[col]))).join(" "); + const headerLine = headersEn.map((label, col) => bold(padEnd(label, widths[col]))).join(" "); const separator = widths.map((w) => dim("─".repeat(w))).join("──"); - process.stdout.write(cnLine + "\n"); - process.stdout.write(enLine + "\n"); + process.stdout.write(headerLine + "\n"); process.stdout.write(separator + "\n"); for (const row of rows) { process.stdout.write(row.map((cell, col) => padEnd(cell, widths[col])).join(" ") + "\n"); } - process.stdout.write(dim(`\n共 ${total} 条记录 (Total: ${total})`) + "\n"); + process.stdout.write(dim(`\nTotal: ${total} records`) + "\n"); } export default defineCommand({ @@ -161,11 +154,6 @@ export default defineCommand({ throw err; } - if (format === "json") { - emitResult(result, format); - return; - } - const resp = extractResponseData(result as Record); let records = (resp.records as LimitApplicationItem[]) ?? []; const total = (resp.items as number) ?? records.length; @@ -174,6 +162,16 @@ export default defineCommand({ records = records.filter((r) => r.deployedModel === modelFilter); } + if (format === "json") { + const items = records.map((r) => ({ + model: r.deployedModel, + tokenLimit: r.usageLimit, + appliedAt: formatDateTime(r.gmtCreate), + })); + emitResult({ records: items, total: modelFilter ? records.length : total }, format); + return; + } + if (records.length === 0) { process.stdout.write("No quota change history found.\n"); return; diff --git a/packages/cli/src/commands/quota/list.ts b/packages/cli/src/commands/quota/list.ts index 3002757..9b2054d 100644 --- a/packages/cli/src/commands/quota/list.ts +++ b/packages/cli/src/commands/quota/list.ts @@ -108,7 +108,6 @@ function printTable(models: ModelWithQpm[], noColor: boolean): void { const bold = noColor ? (t: string) => t : (t: string) => `\x1b[1m${t}\x1b[0m`; const dim = noColor ? (t: string) => t : (t: string) => `\x1b[2m${t}\x1b[0m`; - const headersCn = ["模型", "RPM", "TPM", "可设上限 TPM"]; const headersEn = ["Model", "Req/min", "Token/min", "Max TPM"]; const rows = models.map((m) => { @@ -135,27 +134,21 @@ function printTable(models: ModelWithQpm[], noColor: boolean): void { return; } - const widths = headersCn.map((label, col) => - Math.max( - displayWidth(label), - displayWidth(headersEn[col]), - ...rows.map((row) => displayWidth(row[col])), - ), + const widths = headersEn.map((label, col) => + Math.max(displayWidth(label), ...rows.map((row) => displayWidth(row[col]))), ); - const cnLine = headersCn.map((label, col) => bold(padEnd(label, widths[col]))).join(" "); - const enLine = headersEn.map((label, col) => dim(padEnd(label, widths[col]))).join(" "); + const headerLine = headersEn.map((label, col) => bold(padEnd(label, widths[col]))).join(" "); const separator = widths.map((w) => dim("─".repeat(w))).join("──"); - process.stdout.write(cnLine + "\n"); - process.stdout.write(enLine + "\n"); + process.stdout.write(headerLine + "\n"); process.stdout.write(separator + "\n"); for (const row of rows) { process.stdout.write(row.map((cell, col) => padEnd(cell, widths[col])).join(" ") + "\n"); } - process.stdout.write(dim(`\n共 ${models.length} 个模型 (Total: ${models.length})`) + "\n"); + process.stdout.write(dim(`\nTotal: ${models.length} models`) + "\n"); } export default defineCommand({ @@ -221,7 +214,25 @@ export default defineCommand({ } if (format === "json") { - emitResult(models, format); + const items = models.map((m) => { + const qpm = m.qpmInfo; + const modelDefault = qpm?.["model-default"]; + const userSpec = qpm?.["user-spec"]; + + const defaultRPM = calculateRPM(modelDefault); + const defaultTPM = calculateTPM(modelDefault); + const currentRPM = calculateRPM(userSpec, modelDefault?.count_limit_period) || defaultRPM; + const currentTPM = calculateTPM(userSpec, modelDefault?.usage_limit_period) || defaultTPM; + const maxTPM = defaultTPM * 2; + + return { + model: m.model, + rpm: currentRPM > 0 ? currentRPM : null, + tpm: currentTPM > 0 ? currentTPM : null, + maxTPM: maxTPM > 0 ? maxTPM : null, + }; + }); + emitResult(items, format); return; } diff --git a/packages/cli/src/commands/usage/free.ts b/packages/cli/src/commands/usage/free.ts index 3dd11a0..986e234 100644 --- a/packages/cli/src/commands/usage/free.ts +++ b/packages/cli/src/commands/usage/free.ts @@ -78,7 +78,6 @@ function printTable( typeMap: Map, noColor: boolean, ): void { - const headersCn = ["模型", "类型", "剩余/总量", "使用率", "过期时间", "用完即停"]; const headersEn = ["Model", "Type", "Remaining/Total", "Usage", "Expires", "Auto-Stop"]; const rows = quotas.map((quota) => { @@ -102,12 +101,8 @@ function printTable( ]; }); - const widths = headersCn.map((label, col) => - Math.max( - displayWidth(label), - displayWidth(headersEn[col]), - ...rows.map((row) => displayWidth(row[col])), - ), + const widths = headersEn.map((label, col) => + Math.max(displayWidth(label), ...rows.map((row) => displayWidth(row[col]))), ); const dim = noColor ? (text: string) => text : (text: string) => `\x1b[2m${text}\x1b[0m`; @@ -115,12 +110,10 @@ function printTable( const green = noColor ? (text: string) => text : (text: string) => `\x1b[32m${text}\x1b[0m`; const yellow = noColor ? (text: string) => text : (text: string) => `\x1b[33m${text}\x1b[0m`; - const autoStopCol = headersCn.length - 1; - const cnLine = headersCn.map((label, col) => bold(padEnd(label, widths[col]))).join(" "); - const enLine = headersEn.map((label, col) => dim(padEnd(label, widths[col]))).join(" "); + const autoStopCol = headersEn.length - 1; + const enLine = headersEn.map((label, col) => bold(padEnd(label, widths[col]))).join(" "); const separator = widths.map((width) => dim("─".repeat(width))).join("──"); - process.stdout.write(cnLine + "\n"); process.stdout.write(enLine + "\n"); process.stdout.write(separator + "\n"); @@ -296,11 +289,6 @@ export default defineCommand({ }), ]); - if (format === "json") { - emitResult(quotaResult, format); - return; - } - const allQuotas = extractQuotas(quotaResult); let quotas = modelFlag ? allQuotas @@ -321,14 +309,44 @@ export default defineCommand({ quotas.sort((a, b) => (a.quotaValidityPeriod ?? 0) - (b.quotaValidityPeriod ?? 0)); } + const stopStatuses = extractFreeTierOnlyStatuses(stopResult); + const stopMap = new Map(stopStatuses.map((status) => [status.model, status.freeTierOnly])); + + if (format === "json") { + const items = quotas.map((quota) => { + const hasQuota = quota.quotaInitTotal != null && quota.quotaTotal != null; + const used = hasQuota ? quota.quotaInitTotal - quota.quotaTotal : 0; + const stopStatus = stopMap.get(quota.model); + const autoStop = + quota.quotaStatus === "UNKNOWN" + ? "unsupported" + : stopStatus === true + ? true + : stopStatus === false + ? false + : null; + return { + model: quota.model, + type: typeMap.get(quota.model) || null, + remaining: hasQuota ? quota.quotaTotal : null, + total: hasQuota ? quota.quotaInitTotal : null, + usagePercent: + hasQuota && quota.quotaInitTotal > 0 + ? Math.round((used / quota.quotaInitTotal) * 1000) / 10 + : null, + expires: quota.quotaValidityPeriod ? formatDate(quota.quotaValidityPeriod) : null, + autoStop, + }; + }); + emitResult(items, format); + return; + } + if (quotas.length === 0) { process.stdout.write("No free-tier quota found.\n"); return; } - const stopStatuses = extractFreeTierOnlyStatuses(stopResult); - const stopMap = new Map(stopStatuses.map((status) => [status.model, status.freeTierOnly])); - printTable(quotas, stopMap, typeMap, config.noColor); }, }); diff --git a/packages/cli/src/commands/usage/stats.ts b/packages/cli/src/commands/usage/stats.ts index fb5074b..7efbcd5 100644 --- a/packages/cli/src/commands/usage/stats.ts +++ b/packages/cli/src/commands/usage/stats.ts @@ -155,33 +155,32 @@ function resolveUsageMap(item: ModelStatisticItem): Record { } interface UsageLabel { - cn: string; en: string; unit?: string; } const USAGE_KEY_LABELS: Record = { - total_token: { cn: "总 Token", en: "Total Tokens", unit: "tokens" }, - input_token: { cn: "输入 Token", en: "Input Tokens", unit: "tokens" }, - output_token: { cn: "输出 Token", en: "Output Tokens", unit: "tokens" }, - input_token_cache: { cn: "缓存 Token", en: "Cached Tokens", unit: "tokens" }, - input_token_cache_read: { cn: "缓存读取", en: "Cache Read", unit: "tokens" }, - input_token_cache_creation: { cn: "缓存创建", en: "Cache Creation", unit: "tokens" }, - thinking_input_token: { cn: "思考输入", en: "Thinking Input", unit: "tokens" }, - thinking_output_token: { cn: "思考输出", en: "Thinking Output", unit: "tokens" }, - text_input_token: { cn: "文本输入", en: "Text Input", unit: "tokens" }, - purein_text_output_token: { cn: "文本输出", en: "Text Output", unit: "tokens" }, - embedding_token: { cn: "向量", en: "Embedding", unit: "tokens" }, - image_number: { cn: "图片数", en: "Images", unit: "张" }, - video_duration: { cn: "视频时长", en: "Video Duration", unit: "秒" }, - content_duration: { cn: "音频时长", en: "Audio Duration", unit: "秒" }, - tts_text_number: { cn: "语音合成", en: "TTS Chars", unit: "字符" }, - total_token_avg: { cn: "平均 Token/次", en: "Avg Tokens/Req" }, + total_token: { en: "Total Tokens", unit: "tokens" }, + input_token: { en: "Input Tokens", unit: "tokens" }, + output_token: { en: "Output Tokens", unit: "tokens" }, + input_token_cache: { en: "Cached Tokens", unit: "tokens" }, + input_token_cache_read: { en: "Cache Read", unit: "tokens" }, + input_token_cache_creation: { en: "Cache Creation", unit: "tokens" }, + thinking_input_token: { en: "Thinking Input", unit: "tokens" }, + thinking_output_token: { en: "Thinking Output", unit: "tokens" }, + text_input_token: { en: "Text Input", unit: "tokens" }, + purein_text_output_token: { en: "Text Output", unit: "tokens" }, + embedding_token: { en: "Embedding", unit: "tokens" }, + image_number: { en: "Images", unit: "images" }, + video_duration: { en: "Video Duration", unit: "seconds" }, + content_duration: { en: "Audio Duration", unit: "seconds" }, + tts_text_number: { en: "TTS Chars", unit: "chars" }, + total_token_avg: { en: "Avg Tokens/Req" }, }; function formatLabel(label: UsageLabel): string { const unitSuffix = label.unit ? ` [${label.unit}]` : ""; - return `${label.cn} (${label.en})${unitSuffix}`; + return `${label.en}${unitSuffix}`; } function printOverview( @@ -195,12 +194,12 @@ function printOverview( const dim = noColor ? (text: string) => text : (text: string) => `\x1b[2m${text}\x1b[0m`; process.stdout.write( - `${dim("时间范围 Period:")} ${formatDate(startTime)} ~ ${formatDate(endTime)} ${dim(`(${days} 天)`)}\n\n`, + `${dim("Period:")} ${formatDate(startTime)} ~ ${formatDate(endTime)} ${dim(`(${days} days)`)}\n\n`, ); const rows: [string, string][] = [ - ["调用模型数 (Models Called)", formatNumber(stat.modelCount ?? 0)], - ["调用成功次数 (Successful Calls)", formatNumber(stat.callSuccessCount ?? 0)], + ["Models Called", formatNumber(stat.modelCount ?? 0)], + ["Successful Calls", formatNumber(stat.callSuccessCount ?? 0)], ]; for (const usage of stat.usages ?? []) { @@ -226,7 +225,7 @@ function printModelTable( const dim = noColor ? (text: string) => text : (text: string) => `\x1b[2m${text}\x1b[0m`; process.stdout.write( - `${dim("时间范围 Period:")} ${formatDate(startTime)} ~ ${formatDate(endTime)} ${dim(`(${days} 天)`)}\n\n`, + `${dim("Period:")} ${formatDate(startTime)} ~ ${formatDate(endTime)} ${dim(`(${days} days)`)}\n\n`, ); if (items.length === 0) { @@ -257,15 +256,6 @@ function printModelTable( return (idxA === -1 ? 999 : idxA) - (idxB === -1 ? 999 : idxB); }); - const headersCn = [ - "模型", - "调用次数", - ...orderedKeys.map((key) => { - const label = USAGE_KEY_LABELS[key]; - if (!label) return key; - return label.unit ? `${label.cn} [${label.unit}]` : label.cn; - }), - ]; const headersEn = [ "Model", "Calls", @@ -280,20 +270,14 @@ function printModelTable( }), ]); - const widths = headersCn.map((label, col) => - Math.max( - displayWidth(label), - displayWidth(headersEn[col]), - ...rows.map((row) => displayWidth(row[col])), - ), + const widths = headersEn.map((label, col) => + Math.max(displayWidth(label), ...rows.map((row) => displayWidth(row[col]))), ); - const cnLine = headersCn.map((label, col) => bold(padEnd(label, widths[col]))).join(" "); - const enLine = headersEn.map((label, col) => dim(padEnd(label, widths[col]))).join(" "); + const headerLine = headersEn.map((label, col) => bold(padEnd(label, widths[col]))).join(" "); const separator = widths.map((width) => dim("─".repeat(width))).join("──"); - process.stdout.write(cnLine + "\n"); - process.stdout.write(enLine + "\n"); + process.stdout.write(headerLine + "\n"); process.stdout.write(separator + "\n"); for (const row of rows) { @@ -301,7 +285,7 @@ function printModelTable( process.stdout.write(cells.join(" ") + "\n"); } - process.stdout.write(dim(`\n共 ${items.length} 个模型 (Total: ${items.length})`) + "\n"); + process.stdout.write(dim(`\nTotal: ${items.length} models`) + "\n"); } export default defineCommand({ @@ -391,16 +375,31 @@ export default defineCommand({ ); const allItems: ModelStatisticItem[] = []; - const jsonResults: unknown[] = []; for (const result of results) { if (!result) continue; - jsonResults.push(result); const listData = extractListData(result); allItems.push(...listData.list); } if (format === "json") { - emitResult(jsonResults.length === 1 ? jsonResults[0] : jsonResults, format); + const items = allItems.map((item) => { + const usage = resolveUsageMap(item); + const clean: Record = { + model: item.model, + successfulCalls: item.callSuccessCount ?? 0, + }; + for (const [key, val] of Object.entries(usage)) { + clean[key] = val; + } + return clean; + }); + emitResult( + { + period: { start: formatDate(startTime), end: formatDate(endTime), days: daysFlag }, + items, + }, + format, + ); return; } @@ -425,12 +424,37 @@ export default defineCommand({ process.exit(1); } + const stat = extractOverviewData(result); + if (format === "json") { - emitResult(result, format); + if (!stat) { + emitResult( + { + period: { start: formatDate(startTime), end: formatDate(endTime), days: daysFlag }, + modelsCalled: 0, + successfulCalls: 0, + }, + format, + ); + return; + } + emitResult( + { + period: { start: formatDate(startTime), end: formatDate(endTime), days: daysFlag }, + modelsCalled: stat.modelCount ?? 0, + successfulCalls: stat.callSuccessCount ?? 0, + usages: (stat.usages ?? []).map((u) => ({ + key: u.key, + value: u.value, + unit: u.unit, + label: USAGE_KEY_LABELS[u.key]?.en ?? u.key, + })), + }, + format, + ); return; } - const stat = extractOverviewData(result); if (!stat) { process.stdout.write("No usage data found.\n"); return; diff --git a/packages/cli/src/commands/workspace/list.ts b/packages/cli/src/commands/workspace/list.ts index b9f6edb..3f9a1e9 100644 --- a/packages/cli/src/commands/workspace/list.ts +++ b/packages/cli/src/commands/workspace/list.ts @@ -46,8 +46,7 @@ function printTable(workspaces: WorkspaceInfo[], noColor: boolean): void { const dim = noColor ? (text: string) => text : (text: string) => `\x1b[2m${text}\x1b[0m`; const green = noColor ? (text: string) => text : (text: string) => `\x1b[32m${text}\x1b[0m`; - const headersCn = ["空间名称", "Workspace ID", "默认空间"]; - const headersEn = ["Name", "", "Default"]; + const headersEn = ["Name", "Workspace ID", "Default"]; const rows = workspaces.map((ws) => [ ws.agentName, @@ -55,20 +54,14 @@ function printTable(workspaces: WorkspaceInfo[], noColor: boolean): void { ws.defaultAgent ? "Yes" : "-", ]); - const widths = headersCn.map((label, col) => - Math.max( - displayWidth(label), - displayWidth(headersEn[col]), - ...rows.map((row) => displayWidth(row[col])), - ), + const widths = headersEn.map((label, col) => + Math.max(displayWidth(label), ...rows.map((row) => displayWidth(row[col]))), ); - const cnLine = headersCn.map((label, col) => bold(padEnd(label, widths[col]))).join(" "); - const enLine = headersEn.map((label, col) => dim(padEnd(label, widths[col]))).join(" "); + const headerLine = headersEn.map((label, col) => bold(padEnd(label, widths[col]))).join(" "); const separator = widths.map((width) => dim("─".repeat(width))).join("──"); - process.stdout.write(cnLine + "\n"); - process.stdout.write(enLine + "\n"); + process.stdout.write(headerLine + "\n"); process.stdout.write(separator + "\n"); for (const row of rows) { @@ -79,9 +72,7 @@ function printTable(workspaces: WorkspaceInfo[], noColor: boolean): void { process.stdout.write(cells.join(" ") + "\n"); } - process.stdout.write( - dim(`\n共 ${workspaces.length} 个空间 (Total: ${workspaces.length})`) + "\n", - ); + process.stdout.write(dim(`\nTotal: ${workspaces.length} workspaces`) + "\n"); } export default defineCommand({ @@ -117,21 +108,30 @@ export default defineCommand({ region, }); - if (format === "json") { - emitResult(result, format); - return; - } - const resp = extractResponseData(result as Record); const dataArr = resp.data as Record[] | undefined; if (!Array.isArray(dataArr) || dataArr.length === 0) { - process.stdout.write("No workspace found.\n"); + if (format === "json") { + emitResult([], format); + } else { + process.stdout.write("No workspace found.\n"); + } return; } let workspaces = dataArr as unknown as WorkspaceInfo[]; if (limit > 0) workspaces = workspaces.slice(0, limit); + if (format === "json") { + const items = workspaces.map((ws) => ({ + workspaceId: ws.workspaceId, + name: ws.agentName, + default: ws.defaultAgent, + })); + emitResult(items, format); + return; + } + printTable(workspaces, config.noColor); }, }); diff --git a/packages/cli/tests/e2e/advisor-recommend.e2e.test.ts b/packages/cli/tests/e2e/advisor-recommend.e2e.test.ts index cac6328..7f6724e 100644 --- a/packages/cli/tests/e2e/advisor-recommend.e2e.test.ts +++ b/packages/cli/tests/e2e/advisor-recommend.e2e.test.ts @@ -2,21 +2,21 @@ import { describe, expect, test } from "vite-plus/test"; import { isDashScopeE2EReady, parseStdoutJson, runCli } from "./helpers.ts"; describe("e2e: advisor recommend", () => { - test("advisor 分组展示子命令帮助且成功退出", async () => { + test("advisor shows subcommand groups and exits successfully", async () => { const { stdout, stderr, exitCode } = await runCli(["advisor"]); expect(exitCode, stderr).toBe(0); expect(`${stdout}\n${stderr}`).toMatch(/advisor|recommend/i); }); - test("advisor recommend --help 正常退出", async () => { + test("advisor recommend --help exits successfully", async () => { const { stderr, exitCode } = await runCli(["advisor", "recommend", "--help"]); expect(exitCode, stderr).toBe(0); expect(stderr).toMatch(/recommend|--message|dry-run/i); }); }); -describe.skipIf(!isDashScopeE2EReady())("e2e: advisor recommend(DashScope)", () => { - test("advisor recommend 缺少 --message 时打印帮助并退出 (0)", async () => { +describe.skipIf(!isDashScopeE2EReady())("e2e: advisor recommend (DashScope)", () => { + test("advisor recommend without --message prints help and exits", async () => { const { stdout, stderr, exitCode } = await runCli([ "advisor", "recommend", @@ -26,13 +26,13 @@ describe.skipIf(!isDashScopeE2EReady())("e2e: advisor recommend(DashScope)", expect(`${stdout}\n${stderr}`).toMatch(/--message|Usage:/i); }); - test("advisor recommend --dry-run 输出意图分析和候选列表", async () => { + test("advisor recommend --dry-run outputs intent analysis and candidates", async () => { const { stdout, stderr, exitCode } = await runCli([ "advisor", "recommend", "--dry-run", "--message", - "我想做一个能理解图片的客服机器人", + "I want to build a customer service bot that understands images", "--non-interactive", "--output", "json", @@ -44,7 +44,7 @@ describe.skipIf(!isDashScopeE2EReady())("e2e: advisor recommend(DashScope)", candidateCount?: number; candidates?: Array<{ model?: string; score?: number }>; }>(stdout); - expect(data.userInput).toBe("我想做一个能理解图片的客服机器人"); + expect(data.userInput).toBe("I want to build a customer service bot that understands images"); expect(data.intent?.requiredCapabilities).toContain("VU"); expect(data.intent?.inputModality).toContain("Image"); expect(data.candidateCount).toBeGreaterThan(0); @@ -52,40 +52,44 @@ describe.skipIf(!isDashScopeE2EReady())("e2e: advisor recommend(DashScope)", expect(data.candidates?.[0]?.score).toBeGreaterThan(0); }, 60_000); - test("advisor recommend 完整推荐流程返回结果", async () => { + test("advisor recommend full flow returns results", async () => { const { stdout, stderr, exitCode } = await runCli([ "advisor", "recommend", "--message", - "低成本高并发的在线客服", + "low-cost high-concurrency online customer service", "--non-interactive", "--output", "json", ]); expect(exitCode, stderr).toBe(0); const data = parseStdoutJson<{ - type?: string; - recommendations?: Array<{ - model?: string; - name?: string; - reason?: string; - }>; + intent?: { taskSummary?: string }; + result?: { + type?: string; + recommendations?: Array<{ + model?: string; + name?: string; + reason?: string; + }>; + }; + candidates?: number; }>(stdout); - expect(data.type).toBe("single"); - expect(data.recommendations?.length).toBeGreaterThan(0); - expect(data.recommendations?.[0]?.model).toBeDefined(); - expect(data.recommendations?.[0]?.reason).toBeDefined(); + expect(data.result?.type).toBe("single"); + expect(data.result?.recommendations?.length).toBeGreaterThan(0); + expect(data.result?.recommendations?.[0]?.model).toBeDefined(); + expect(data.result?.recommendations?.[0]?.reason).toBeDefined(); }, 120_000); - // ---- 模型偏好:正例 ---- + // ---- Model preference: positive cases ---- - test("scoped 偏好 — 限定系列时 intent 含 modelPreference.mode=scoped", async () => { + test("scoped preference — intent contains modelPreference.mode=scoped when family is specified", async () => { const { stdout, stderr, exitCode } = await runCli([ "advisor", "recommend", "--dry-run", "--message", - "deepseek系列中哪个模型最适合用来进行快速推理", + "Which model in the deepseek family is best for fast reasoning?", "--non-interactive", "--output", "json", @@ -103,13 +107,13 @@ describe.skipIf(!isDashScopeE2EReady())("e2e: advisor recommend(DashScope)", ).toBe(true); }, 60_000); - test("comparison 偏好 — 对比模型时 intent 含 modelPreference.mode=comparison", async () => { + test("comparison preference — intent contains modelPreference.mode=comparison when comparing models", async () => { const { stdout, stderr, exitCode } = await runCli([ "advisor", "recommend", "--dry-run", "--message", - "qwen-max和deepseek-v3哪个更适合做代码生成", + "Which is better for code generation, qwen-max or deepseek-v3?", "--non-interactive", "--output", "json", @@ -122,13 +126,13 @@ describe.skipIf(!isDashScopeE2EReady())("e2e: advisor recommend(DashScope)", expect(data.intent?.modelPreference?.targets?.length).toBeGreaterThanOrEqual(2); }, 60_000); - test("excludes 偏好 — 排除模型时 intent 识别出 modelPreference", async () => { + test("excludes preference — intent detects modelPreference when excluding models", async () => { const { stdout, stderr, exitCode } = await runCli([ "advisor", "recommend", "--dry-run", "--message", - "不要qwen,推荐一个适合文本生成的模型", + "Not qwen, recommend a model suitable for text generation", "--non-interactive", "--output", "json", @@ -147,15 +151,15 @@ describe.skipIf(!isDashScopeE2EReady())("e2e: advisor recommend(DashScope)", expect(hasExcludes).toBe(true); }, 60_000); - // ---- 模型偏好:反例 ---- + // ---- Model preference: negative cases ---- - test("无偏好 — 普通需求查询时 intent 不含 modelPreference 或 mode=unconstrained", async () => { + test("no preference — intent has no modelPreference or mode=unconstrained for generic queries", async () => { const { stdout, stderr, exitCode } = await runCli([ "advisor", "recommend", "--dry-run", "--message", - "我要做一个能理解图片的客服机器人", + "I want to build a customer service bot that understands images", "--non-interactive", "--output", "json", diff --git a/packages/cli/tests/e2e/quota.e2e.test.ts b/packages/cli/tests/e2e/quota.e2e.test.ts index 19e537d..997f961 100644 --- a/packages/cli/tests/e2e/quota.e2e.test.ts +++ b/packages/cli/tests/e2e/quota.e2e.test.ts @@ -96,7 +96,7 @@ describe.skipIf(!isConsoleE2EReady())("e2e: quota(Console)", () => { expect(data.data?.input?.supports).toBeUndefined(); }); - test("quota list 文本输出包含双行表头", async () => { + test("quota list 文本输出包含单行英⽂表头", async () => { const { stdout, stderr, exitCode } = await runCli([ "quota", "list", @@ -105,11 +105,9 @@ describe.skipIf(!isConsoleE2EReady())("e2e: quota(Console)", () => { "--no-color", ]); expect(exitCode, stderr).toBe(0); - expect(stdout).toContain("模型"); expect(stdout).toContain("Model"); - expect(stdout).toContain("RPM"); - expect(stdout).toContain("TPM"); - expect(stdout).toContain("可设上限 TPM"); + expect(stdout).toContain("Req/min"); + expect(stdout).toContain("Token/min"); expect(stdout).toContain("Max TPM"); }); @@ -125,7 +123,7 @@ describe.skipIf(!isConsoleE2EReady())("e2e: quota(Console)", () => { ]); expect(exitCode, stderr).toBe(0); expect(stdout).toContain("qwen3.6-plus"); - expect(stdout).toMatch(/共 1 个模型/); + expect(stdout).toMatch(/Total: 1 models/); }); test("quota list --model 不存在的模型报错", async () => { @@ -141,14 +139,19 @@ describe.skipIf(!isConsoleE2EReady())("e2e: quota(Console)", () => { expect(stderr).toContain("no matching models found"); }); - test("quota list JSON 输出包含 qpmInfo", async () => { + test("quota list JSON 输出包含 model/rpm/tpm/maxTPM", async () => { const { stdout, stderr, exitCode } = await runCli(["quota", "list", "--output", "json"]); expect(exitCode, stderr).toBe(0); - const data = parseStdoutJson>(stdout); + const data = + parseStdoutJson< + Array<{ model?: string; rpm?: number | null; tpm?: number | null; maxTPM?: number | null }> + >(stdout); expect(Array.isArray(data)).toBe(true); expect(data.length).toBeGreaterThan(0); expect(data[0].model).toBeTypeOf("string"); - expect(data[0].qpmInfo).toBeDefined(); + expect(data[0].rpm).toBeTypeOf("number"); + expect(data[0].tpm).toBeTypeOf("number"); + expect(data[0].maxTPM).toBeTypeOf("number"); }); test("quota request --dry-run 输出请求参数", async () => { @@ -235,7 +238,7 @@ describe.skipIf(!isConsoleE2EReady())("e2e: quota(Console)", () => { expect(data.apis).toContain("zeldaEasy.bailian-telemetry.monitor.getMonitorData"); }); - test("quota check 文本输出包含双行表头", async () => { + test("quota check 文本输出包含单行英⽂表头", async () => { const { stdout, stderr, exitCode } = await runCli([ "quota", "check", @@ -244,12 +247,10 @@ describe.skipIf(!isConsoleE2EReady())("e2e: quota(Console)", () => { "--no-color", ]); expect(exitCode, stderr).toBe(0); - expect(stdout).toContain("模型"); expect(stdout).toContain("Model"); - expect(stdout).toContain("RPM 用量/限额"); expect(stdout).toContain("RPM Usage/Limit"); - expect(stdout).toContain("TPM 用量/限额"); - expect(stdout).toContain("状态"); + expect(stdout).toContain("TPM Usage/Limit"); + expect(stdout).toContain("Status"); }); test("quota check --model 指定单模型", async () => { @@ -264,7 +265,7 @@ describe.skipIf(!isConsoleE2EReady())("e2e: quota(Console)", () => { ]); expect(exitCode, stderr).toBe(0); expect(stdout).toContain("qwen3.6-plus"); - expect(stdout).toMatch(/共 1 个模型/); + expect(stdout).toMatch(/Total: 1 models/); }); test("quota check --model 逗号分隔多模型", async () => { @@ -280,7 +281,7 @@ describe.skipIf(!isConsoleE2EReady())("e2e: quota(Console)", () => { expect(exitCode, stderr).toBe(0); expect(stdout).toContain("qwen3.6-plus"); expect(stdout).toContain("qwen-plus"); - expect(stdout).toMatch(/共 2 个模型/); + expect(stdout).toMatch(/Total: 2 models/); }); test("quota check JSON 输出包含用量和限额字段", async () => { @@ -311,7 +312,7 @@ describe.skipIf(!isConsoleE2EReady())("e2e: quota(Console)", () => { expect(data[0].tpmLimit).toBeTypeOf("number"); }); - test("quota check 状态列显示正常/接近限流/已限流之一", async () => { + test("quota check 状态列显示 Normal/Near Limit/Throttled 之一", async () => { const { stdout, stderr, exitCode } = await runCli([ "quota", "check", @@ -323,7 +324,7 @@ describe.skipIf(!isConsoleE2EReady())("e2e: quota(Console)", () => { ]); expect(exitCode, stderr).toBe(0); const hasStatus = - stdout.includes("正常") || stdout.includes("接近限流") || stdout.includes("已限流"); + stdout.includes("Normal") || stdout.includes("Near Limit") || stdout.includes("Throttled"); expect(hasStatus).toBe(true); }); diff --git a/packages/cli/tests/e2e/usage-free.e2e.test.ts b/packages/cli/tests/e2e/usage-free.e2e.test.ts index 064f027..ff4087a 100644 --- a/packages/cli/tests/e2e/usage-free.e2e.test.ts +++ b/packages/cli/tests/e2e/usage-free.e2e.test.ts @@ -133,12 +133,21 @@ describe.skipIf(!isConsoleE2EReady())("e2e: usage free(Console)", () => { "json", ]); expect(exitCode, stderr).toBe(0); - const data = parseStdoutJson<{ - code?: string; - successResponse?: boolean; - }>(stdout); - expect(data.code).toBe("200"); - expect(data.successResponse).toBe(true); + const data = parseStdoutJson< + Array<{ + model?: string; + type?: string | null; + remaining?: number | null; + total?: number | null; + usagePercent?: number | null; + expires?: string | null; + autoStop?: boolean | string | null; + }> + >(stdout); + expect(Array.isArray(data)).toBe(true); + expect(data.length).toBeGreaterThan(0); + expect(data[0].model).toBe("qwen3-max"); + expect(data[0].type).toBeTypeOf("string"); }); test("usage free --model 单模型文本输出包含表头", async () => { @@ -276,7 +285,9 @@ describe.skipIf(!isConsoleE2EReady())("e2e: usage free(Console)", () => { "json", ]); expect(exitCode, stderr).toBe(0); - const data = parseStdoutJson<{ code?: string }>(stdout); - expect(data.code).toBe("200"); + const data = parseStdoutJson>(stdout); + expect(Array.isArray(data)).toBe(true); + expect(data.length).toBeGreaterThan(0); + expect(data[0].model).toBe("qwen3-max"); }); }); diff --git a/packages/cli/tests/e2e/usage-stats.e2e.test.ts b/packages/cli/tests/e2e/usage-stats.e2e.test.ts index 29c9e0d..9aab48b 100644 --- a/packages/cli/tests/e2e/usage-stats.e2e.test.ts +++ b/packages/cli/tests/e2e/usage-stats.e2e.test.ts @@ -169,11 +169,17 @@ describe.skipIf(!isConsoleE2EReady())("e2e: usage stats(Console)", () => { ]); expect(exitCode, stderr).toBe(0); const data = parseStdoutJson<{ - code?: string; - successResponse?: boolean; + period?: { start?: string; end?: string; days?: number }; + modelsCalled?: number; + successfulCalls?: number; + usages?: Array<{ key?: string; value?: number }>; }>(stdout); - expect(data.code).toBe("200"); - expect(data.successResponse).toBe(true); + expect(data.period).toBeDefined(); + expect(data.period?.start).toBeTypeOf("string"); + expect(data.period?.end).toBeTypeOf("string"); + expect(data.period?.days).toBeTypeOf("number"); + expect(data.modelsCalled).toBeTypeOf("number"); + expect(data.successfulCalls).toBeTypeOf("number"); }); test("usage stats 概览文本输出包含中英文表头", async () => { diff --git a/packages/core/src/advisor/constants/prompts.ts b/packages/core/src/advisor/constants/prompts.ts index 9d2f04d..2930f92 100644 --- a/packages/core/src/advisor/constants/prompts.ts +++ b/packages/core/src/advisor/constants/prompts.ts @@ -1,196 +1,181 @@ -export const INTENT_MODEL = "qwen-turbo"; +export const INTENT_MODEL = "qwen-flash"; export const RANKING_MODEL = "qwen3.6-flash"; -export const RANKING_MODEL_FAST = "qwen-turbo"; - -export const INTENT_SYSTEM_PROMPT = `你是一个意图分析器。根据用户的需求描述,先理解用户场景,再提取结构化信息。 - -## 分析步骤 -1. 用一句话总结用户的核心需求(taskSummary),要体现具体场景而非泛泛描述 -2. 推断场景特征(scenarioHints),例如:["需要低延迟","面向C端用户","高并发","对话式交互","离线批处理","需要精准度"] -3. 基于场景特征推断 budget 和 qualityPreference - - 只在用户明确表达或场景强烈暗示时偏离默认值 - - 用户明确说"低成本"、"便宜"、"省钱" → budget:"low" - - 用户明确说"最好的"、"高精度"、"不计成本" → qualityPreference:"flagship" - - 场景本身有强约束时才推断:如"日均百万请求的客服" → budget:"low"(高并发=成本敏感) - - 其他情况保持 budget:"medium", qualityPreference:"balanced" -4. 提取模态、能力、特性等结构化字段 - -## 示例 - -用户: "做一个低成本高并发的在线客服" -→ budget:"low", qualityPreference:"cost-optimized"(用户明确说了低成本) - -用户: "法律合同审查,要求高精准度" -→ budget:"medium", qualityPreference:"flagship"(用户明确要求高精准度,但没提预算) - -用户: "我要做一个能理解图片的客服机器人" -→ budget:"medium", qualityPreference:"balanced"(用户没提成本和质量要求,不过度推断) - -用户: "帮我选一个写代码的模型" -→ budget:"medium", qualityPreference:"balanced"(通用需求,无明确倾向) - -用户: "预算有限,做个简单的文本摘要功能" -→ budget:"low", qualityPreference:"cost-optimized"(用户说了预算有限) - -用户: "企业级知识库问答,准确率是第一优先级" -→ budget:"high", qualityPreference:"flagship"(企业级+准确率第一=愿投入高成本) - -用户: "个人学习项目,试试AI生成图片" -→ budget:"low", qualityPreference:"cost-optimized"(个人学习=成本敏感) - -用户: "做一个Agent自动根据用户意图生成动画片" -→ budget:"medium", qualityPreference:"balanced"(复杂pipeline,但没明确成本/质量约束) - -## 模型偏好识别 -分析用户是否提到了特定的模型、模型系列或厂商,据此判断推荐模式: -- 用户未提到任何模型/系列/厂商 → mode:"unconstrained",不填 targets -- 用户限定了范围(如"deepseek系列哪个好"、"通义千问的模型推荐"、"开源的推理模型") → mode:"scoped",targets:["deepseek"] 或 ["通义千问"] -- 用户要对比特定模型(如"wan2.6和wan2.7哪个好"、"qwen-max和deepseek-v3对比"、"qwen-max适合做法律分析吗") → mode:"comparison",targets:["wan2.6","wan2.7"] - - 单模型评估也算 comparison,targets 只填一个 -- 用户以某模型为参照找替代(如"有没有类似qwen-max但更便宜的") → mode:"alternative",targets:["qwen-max"] -- 用户明确排除某些模型/系列(如"除了qwen还有什么好的") → excludes:["qwen"],mode 根据其他条件判断 -- targets 填写用户原文中的模型/系列名称,保持原文写法 - -## 输出字段 -- taskSummary: 一句话场景理解(必须具体,禁止"用户想用AI做某事"这种废话) -- scenarioHints: 推断的场景特征数组 -- complexity: "single"(单一模型可完成)或 "pipeline"(需要多个模型协同) -- segments: 仅 pipeline 时填写,每步包含 step/inputModality/outputModality/requiredCapabilities。 - - step 必须是一句话描述该步骤在用户任务中解决的具体问题,例如"解析天气预报数据,生成适合视频制作的场景描述文本",禁止用编号或泛化的模态标签 - - segments 必须形成模态链路:每步的 inputModality 应包含上一步的 outputModality,确保上下游数据可以衔接 -- inputModality: 用户输入涉及的模态 ["Text","Image","Video","Audio"] -- outputModality: 期望输出的模态 -- requiredCapabilities: 需要的能力。可选代码(必须严格使用,不要自创): - TG=文本生成, Reasoning=推理, VU=视觉理解, IG=图像生成, VG=视频生成, - TTS=语音合成, ASR=语音识别, Realtime-ASR=实时语音识别, - Realtime-Text-to-Speech=实时语音合成, Realtime-Audio-Translate=实时音频翻译, - Realtime-Omni=实时全模态, Multimodal-Omni=全模态, ME=多模态嵌入, - TR=翻译, 3D-generation=3D生成 -- requiredFeatures: 需要的特性 (function-calling, web-search, structured-outputs, prefix-completion) -- budget: "low"/"medium"/"high"(基于场景推断,不要默认 medium) +export const RANKING_MODEL_FAST = "qwen-flash"; + +export const INTENT_SYSTEM_PROMPT = `You are an intent analyzer. Given the user's requirement, understand the scenario first, then extract structured information. + +CRITICAL: You MUST respond entirely in English. Do not use any Chinese characters anywhere in your response. All text fields (taskSummary, scenarioHints) must be in English. + +## Analysis Steps +1. Summarize the user's core need in one sentence (taskSummary) — be specific about the scenario, not generic +2. Infer scenario hints (scenarioHints), e.g.: ["low-latency", "consumer-facing", "high-concurrency", "conversational", "offline-batch", "high-precision"] +3. Infer budget and qualityPreference from scenario hints + - Only deviate from defaults when the user explicitly states or the scenario strongly implies + - User says "low cost", "cheap", "save money" → budget:"low" + - User says "best", "high precision", "cost no object" → qualityPreference:"flagship" + - Infer from scenario constraints only when strong: e.g. "1M requests/day customer service" → budget:"low" (high concurrency = cost-sensitive) + - Otherwise keep budget:"medium", qualityPreference:"balanced" +4. Extract modalities, capabilities, features etc. + +## Model preference detection +Analyze whether the user mentioned specific models, model families, or vendors: +- No models/families/vendors mentioned → mode:"unconstrained", no targets +- User scoped the range (e.g. "recommend from the deepseek family", "open-source reasoning models") → mode:"scoped", targets:["deepseek"] +- User wants to compare specific models (e.g. "compare wan2.6 and wan2.7", "is qwen-max good for legal analysis") → mode:"comparison", targets:["wan2.6","wan2.7"] + - Single model evaluation is also comparison with one target +- User wants alternatives to a reference model (e.g. "something like qwen-max but cheaper") → mode:"alternative", targets:["qwen-max"] +- User explicitly excludes certain models/families (e.g. "good models besides qwen") → excludes:["qwen"], mode determined by other signals +- targets should capture the model/family names as the user wrote them + +## Output fields +- taskSummary: one-sentence scenario understanding (must be specific, never generic like "user wants AI") +- scenarioHints: array of inferred scenario features +- complexity: "single" or "pipeline" +- segments: only for pipeline, each with step/inputModality/outputModality/requiredCapabilities + - step must describe the specific problem this step solves in the user's task, no numbered or generic modal labels + - segments must form a modality chain: each step's inputModality should cover the previous step's outputModality +- inputModality: user input modalities ["Text","Image","Video","Audio"] +- outputModality: expected output modalities +- requiredCapabilities: capability codes (use strictly from the list, don't invent): + TG=Text Generation, Reasoning=Reasoning, VU=Vision Understanding, IG=Image Generation, VG=Video Generation, + TTS=Text-to-Speech, ASR=Speech-to-Text, Realtime-ASR=Realtime Speech-to-Text, + Realtime-Text-to-Speech=Realtime Text-to-Speech, Realtime-Audio-Translate=Realtime Audio Translation, + Realtime-Omni=Realtime Omni-modal, Multimodal-Omni=Multimodal Omni, ME=Multimodal Embedding, + TR=Translation, 3D-generation=3D Generation +- requiredFeatures: required features (function-calling, web-search, structured-outputs, prefix-completion) +- budget: "low"/"medium"/"high" - contextNeed: "standard"/"large"/"extra-large" -- qualityPreference: "flagship"/"balanced"/"cost-optimized"(基于场景推断,不要默认 balanced) -- modelPreference: { mode, targets?, excludes? }(见上方"模型偏好识别") +- qualityPreference: "flagship"/"balanced"/"cost-optimized" +- modelPreference: { mode, targets?, excludes? } -只输出 JSON,不要有其他文字。`; +Output only JSON, no other text.`; -export const SINGLE_SYSTEM_PROMPT = `你是阿里云百炼平台的模型推荐顾问。从以下候选模型中选出最佳推荐。 +export const SINGLE_SYSTEM_PROMPT = `You are a model recommendation advisor for Alibaba Cloud Model Studio. From the candidate models below, select the best recommendations. -## 背景 -系统已根据用户意图预筛选了候选模型,你只需从中精选并排序。 -意图分析中包含 budget 和 qualityPreference 字段,这代表了用户的实际需求层次。 +CRITICAL: You MUST respond entirely in English. Do not use any Chinese characters anywhere in your response. Every field — reason, highlights, step, summary — must be written in English. -## 推荐策略 +## Background +The system has pre-filtered candidate models based on intent analysis. Your job is to rank and pick from these candidates. +The intent includes budget and qualityPreference fields representing the user's actual needs. -推荐 3 个不同档次的模型,但排序必须反映用户的真实需求: +## Recommendation Strategy -- 推荐 #1(最佳推荐):根据 budget 和 qualityPreference 判断哪个档次最适合用户,把那个档次的最佳模型放在第一位 -- 推荐 #2(次优选择):另一个档次中值得考虑的模型,说明与 #1 相比的 tradeoff -- 推荐 #3(备选参考):第三个视角的选择,说明适用场景差异 +Recommend 3 models at different tiers, but ordering must reflect the user's true needs: -关键原则: -- budget:"low" / qualityPreference:"cost-optimized" → 推荐 #1 应该是性价比最高的模型,而非旗舰模型 -- budget:"high" / qualityPreference:"flagship" → 推荐 #1 应该是能力最强的旗舰模型 -- budget:"medium" / qualityPreference:"balanced" → 推荐 #1 应该是综合匹配度最高的模型,不预设档次偏好 +- #1 (Best Pick): Based on budget and qualityPreference, pick the best-fitting tier and put its top model first +- #2 (Runner-Up): A worthy consideration from another tier, explaining tradeoffs vs #1 +- #3 (Alternative): A third-perspective choice, explaining scenario differences -每个推荐都必须说明该模型为什么适合(或作为备选为什么值得考虑),理由必须关联用户的具体需求。 +Key principles: +- budget:"low" / qualityPreference:"cost-optimized" → #1 should be the best value model, not a flagship +- budget:"high" / qualityPreference:"flagship" → #1 should be the most capable flagship model +- budget:"medium" / qualityPreference:"balanced" → #1 should be the best all-around match -## 规则 -- 只能推荐候选列表中的模型,严禁推荐列表外的模型 -- 严禁使用泛泛的推荐理由(如"性能强大"、"综合能力好"、"效果不错"),每条 reason 必须说明该模型解决用户任务中的什么具体问题 -- 三个推荐的理由不允许雷同,每个必须从不同维度论证 -- 有定价信息时:结合 budget 字段权衡,把最符合用户预算的放在最前面 -- 有家族信息时:避免推荐同一家族的多个模型,优先推荐稳定版本 -- 有版本标签时:优先推荐 stable/latest 版本,除非用户明确需要特定版本 -- 没有增强字段的模型:按能力和描述排序即可,不因缺少信息而降权 -- 如果没有合适的模型,返回空数组 -- 如果你认为该需求实际需要多模型协同完成(pipeline),可以输出 type:"pipeline" 格式 -- 输出严格 JSON,不要输出其他内容 +Each recommendation must explain why the model fits (or as an alternative, why it's worth considering), with reasoning tied to the user's specific needs. -## 输出格式 +## Rules +- Only recommend models from the candidate list — never recommend outside it +- No generic reasons ("powerful", "good performance", "effective"). Each reason must describe how the model solves a specific aspect of the user's task +- All three recommendations must have distinct reasoning angles, not duplicate reasons +- When pricing is available: factor in budget, put the most budget-friendly option first +- When family info is available: avoid recommending multiple models from the same family, prefer stable versions +- When version tags are available: prefer stable/latest versions unless the user explicitly needs a specific version +- Models without enriched fields: rank by capability and description — don't penalize for missing info +- If no model fits, return an empty array +- If you believe the task actually requires multi-model collaboration (pipeline), you may output type:"pipeline" format +- Output strict JSON, no other text -单一任务: -{"type":"single","recommendations":[{"model":"模型ID","reason":"推荐理由","highlights":["亮点"]}]} +## Output Format -复合任务(仅当你确信需要多模型协同时): -{"type":"pipeline","summary":"一句话方案描述","steps":[{"step":"步骤描述","recommendations":[{"model":"模型ID","reason":"选择理由","highlights":["亮点"]}]}]}`; +Single task: +{"type":"single","recommendations":[{"model":"model ID","reason":"recommendation reason","highlights":["key highlights"]}]} -export const PIPELINE_SYSTEM_PROMPT = `你是阿里云百炼平台的模型推荐顾问。用户需求已被拆解为多步骤流水线,请为每步选出最佳模型。 +Pipeline (only when confident multi-model is needed): +{"type":"pipeline","summary":"one-line solution description","steps":[{"step":"step description","recommendations":[{"model":"model ID","reason":"reason for choosing","highlights":["highlights"]}]}]}`; -## 背景 -系统已根据各步骤需求预筛选了候选模型。 -意图分析中包含 budget 和 qualityPreference 字段,这代表了用户的实际需求层次。 +export const PIPELINE_SYSTEM_PROMPT = `You are a model recommendation advisor for Alibaba Cloud Model Studio. The user's need has been decomposed into multi-step pipeline. Select the best model for each step. -## 推荐策略 +CRITICAL: You MUST respond entirely in English. Do not use any Chinese characters anywhere in your response. Every field — reason, highlights, step, summary — must be written in English. -每步推荐 3 个不同档次的模型,但排序必须反映用户的真实需求: +## Background +The system has pre-filtered candidate models for each step's requirements. +The intent includes budget and qualityPreference fields representing the user's actual needs. -- 推荐 #1(最佳推荐):根据 budget 和 qualityPreference 判断哪个档次最适合用户,把那个档次的最佳模型放在第一位 -- 推荐 #2(次优选择):另一个档次中值得考虑的模型,说明 tradeoff -- 推荐 #3(备选参考):第三个视角的选择,说明适用场景差异 +## Recommendation Strategy -关键原则: -- budget:"low" / qualityPreference:"cost-optimized" → 推荐 #1 应该是性价比最高的模型 -- budget:"high" / qualityPreference:"flagship" → 推荐 #1 应该是能力最强的旗舰模型 -- budget:"medium" / qualityPreference:"balanced" → 推荐 #1 应该是综合匹配度最高的模型 +Recommend 3 models at different tiers per step, ordering by user needs: -## 规则 -- 只能推荐候选列表中的模型 -- 每步推荐多个模型,按优先级排序,每个推荐给出简短理由和关键亮点 -- step 字段必须用一句话描述该步骤在用户任务中解决的具体问题,禁止用编号或泛化的模态标签(如"输出: Text") -- 严禁使用泛泛的推荐理由,每条 reason 必须说明该模型在这一步解决用户任务中的什么具体问题 -- 有定价信息时:结合 budget 字段权衡,把最符合用户预算的放在最前面 -- 有家族信息时:避免在相邻步骤使用同一家族的不同规格模型,除非确实需要 -- 没有增强字段的模型:按能力和描述排序即可,不因缺少信息而降权 -- 相邻步骤的模型必须模态兼容:上一步模型的输出模态必须被下一步模型的输入模态支持 -- 如果你认为该需求其实单模型可以完成,可以输出 type:"single" 格式 -- 输出严格 JSON +- #1 (Best Pick): Based on budget and qualityPreference, pick the best-fitting tier and put its top model first +- #2 (Runner-Up): A worthy consideration from another tier, explaining tradeoffs +- #3 (Alternative): A third-perspective choice -## 输出格式 +Key principles: +- budget:"low" / qualityPreference:"cost-optimized" → #1 should be the best value model +- budget:"high" / qualityPreference:"flagship" → #1 should be the most capable flagship model +- budget:"medium" / qualityPreference:"balanced" → #1 should be the best all-around match -{"type":"pipeline","summary":"一句话方案描述","steps":[{"step":"该步骤在用户任务中解决的具体问题","recommendations":[{"model":"模型ID","reason":"该模型如何解决这一步的具体问题","highlights":["亮点"]}]}]} +## Rules +- Only recommend models from the candidate list +- Each step recommends multiple models sorted by priority, each with brief reason and key highlights +- The "step" field must describe the specific problem this step solves in the user's task — no numbered or generic modal labels (e.g. "Output: Text") +- No generic reasons. Each reason must describe how the model solves a specific aspect of the user's task at this step +- When pricing is available: factor in budget, put the most budget-friendly option first +- When family info is available: avoid using different tiers of the same family in adjacent steps unless truly needed +- Models without enriched fields: rank by capability and description — don't penalize for missing info +- Adjacent steps must be modality-compatible: the previous step's output modalities must be supported as input modalities by the next step +- If you believe the task can be done with a single model, output type:"single" format +- Output strict JSON -或者(如果你认为单模型即可): -{"type":"single","recommendations":[{"model":"模型ID","reason":"推荐理由","highlights":["亮点"]}]}`; +## Output Format -export const COMPARISON_SYSTEM_PROMPT = `你是阿里云百炼平台的模型对比顾问。用户想对比特定模型,请根据使用场景进行对比分析。 +{"type":"pipeline","summary":"one-line solution description","steps":[{"step":"specific problem this step solves in the user's task","recommendations":[{"model":"model ID","reason":"how this model solves the specific problem at this step","highlights":["highlights"]}]}]} -## 背景 -用户指定了要对比的模型,系统已将这些模型和相关候选预筛选到列表中。 -意图分析中的 modelPreference.targets 是用户要对比的模型。 +Or (if single model suffices): +{"type":"single","recommendations":[{"model":"model ID","reason":"recommendation reason","highlights": +["key highlights"]}]}`; -## 对比策略 -- 用户指定的模型必须全部出现在推荐结果中,按适合程度排序 -- 每个模型的 reason 必须是对比性的,说明该模型相对于其他对比模型的优势和劣势 -- 如果候选中有比用户指定的更合适的模型,可以额外推荐,但用户指定的必须优先包含 -- 单模型评估场景(targets 只有一个):评估该模型是否适合用户需求,同时推荐更优的替代 +export const COMPARISON_SYSTEM_PROMPT = `You are a model comparison advisor for Alibaba Cloud Model Studio. The user wants to compare specific models — analyze them against the use case. -## 规则 -- 只能推荐候选列表中的模型 -- reason 必须包含对比视角:该模型相比其他模型在哪些方面更好/更差 -- highlights 突出各模型的差异化特点 -- 输出严格 JSON,不要输出其他内容 +CRITICAL: You MUST respond entirely in English. Do not use any Chinese characters anywhere in your response. Every field — reason, highlights — must be written in English. -## 输出格式 -{"type":"single","recommendations":[{"model":"模型ID","reason":"对比分析理由","highlights":["差异化亮点"]}]}`; +## Background +The user specified models to compare. The system has pre-filtered these models and related candidates into the list. +The intent's modelPreference.targets are the models to compare. -export const ALTERNATIVE_SYSTEM_PROMPT = `你是阿里云百炼平台的模型替代顾问。用户以某个模型为参照,寻找替代方案。 +## Comparison Strategy +- All user-specified models must appear in the results, sorted by suitability +- Each model's reason must be comparative: describe strengths and weaknesses relative to other models being compared +- If candidates contain better fits than what the user specified, they can be additionally recommended, but user-specified models take priority +- Single-model evaluation (one target): evaluate if the model fits, and recommend better alternatives -## 背景 -用户以某个模型为参照点,想找到在特定维度上更优的替代方案(如更便宜、更快、更强)。 -意图分析中的 modelPreference.targets 是参照模型。 +## Rules +- Only recommend models from the candidate list +- reason must include comparative perspective: how this model is better/worse compared to others +- highlights should emphasize differentiating characteristics +- Output strict JSON -## 替代策略 -- 推荐 #1:如果参照模型在候选中,先评估它是否满足用户需求,给出其基本定位 -- 推荐 #2~#3:推荐替代方案,reason 必须说明相比参照模型在用户关注维度上的 tradeoff -- 关注用户提到的替代维度(如"更便宜"→重点对比定价,"更强"→重点对比能力) - -## 规则 -- 只能推荐候选列表中的模型 -- 参照模型必须包含在结果中(如果在候选列表中) -- 替代推荐的 reason 必须说明与参照模型的具体差异 -- 避免推荐和参照模型同系列的其他版本(除非确实有显著差异) -- 输出严格 JSON,不要输出其他内容 +## Output Format +{"type":"single","recommendations":[{"model":"model ID","reason":"comparative analysis","highlights":["differentiators"]}]}`; -## 输出格式 -{"type":"single","recommendations":[{"model":"模型ID","reason":"替代分析理由","highlights":["差异化亮点"]}]}`; +export const ALTERNATIVE_SYSTEM_PROMPT = `You are a model alternative advisor for Alibaba Cloud Model Studio. The user has a reference model and wants to find alternatives. + +CRITICAL: You MUST respond entirely in English. Do not use any Chinese characters anywhere in your response. Every field — reason, highlights — must be written in English. + +## Background +The user has a reference model and wants to find alternatives that are better in specific dimensions (cheaper, faster, more capable). +The intent's modelPreference.targets is the reference model. + +## Alternative Strategy +- #1: If the reference model is in candidates, first evaluate if it meets the user's needs — give its positioning +- #2~#3: Recommend alternatives. reason must explain the tradeoff vs the reference model in the user's dimensions of interest +- Focus on the user's stated alternative dimension (e.g. "cheaper" → focus on pricing comparison, "better" → focus on capability comparison) + +## Rules +- Only recommend models from the candidate list +- The reference model must be included in results if it's in the candidate list +- Alternative recommendations must explain concrete differences from the reference model +- Avoid recommending other versions from the same family unless there's a significant difference +- Output strict JSON + +## Output Format +{"type":"single","recommendations":[{"model":"model ID","reason":"alternative analysis","highlights":["differentiators"]}]}`; diff --git a/packages/core/src/advisor/embedding.ts b/packages/core/src/advisor/embedding.ts index fb9311e..d8fc620 100644 --- a/packages/core/src/advisor/embedding.ts +++ b/packages/core/src/advisor/embedding.ts @@ -76,20 +76,20 @@ async function embedBatch(config: Config, texts: string[]): Promise } const CAPABILITY_LABELS: Record = { - TG: "文本生成", - Reasoning: "推理", - VU: "视觉理解", - IG: "图像生成", - VG: "视频生成", - TTS: "语音合成", - ASR: "语音识别", + TG: "Text Generation", + Reasoning: "Reasoning", + VU: "Vision Understanding", + IG: "Image Generation", + VG: "Video Generation", + TTS: "Text-to-Speech", + ASR: "Speech-to-Text", }; const MODALITY_LABELS: Record = { - Text: "文本", - Image: "图片/图像", - Video: "视频", - Audio: "音频/语音", + Text: "Text", + Image: "Image", + Video: "Video", + Audio: "Audio", }; interface GroupData { @@ -135,12 +135,12 @@ function buildModelText(model: ModelProfile, descriptions: Map): model.name, model.model, description, - caps ? `能力: ${caps}` : "", - inputMods ? `输入: ${inputMods}` : "", - outputMods ? `输出: ${outputMods}` : "", - model.features?.length ? `特性: ${model.features.join(", ")}` : "", + caps ? `Capabilities: ${caps}` : "", + inputMods ? `Input: ${inputMods}` : "", + outputMods ? `Output: ${outputMods}` : "", + model.features?.length ? `Features: ${model.features.join(", ")}` : "", model.familyName || "", - model.category ? `定位: ${model.category}` : "", + model.category ? `Category: ${model.category}` : "", ].filter(Boolean); return parts.join(" | "); diff --git a/packages/core/src/advisor/recommend.ts b/packages/core/src/advisor/recommend.ts index 863334c..53244d8 100644 --- a/packages/core/src/advisor/recommend.ts +++ b/packages/core/src/advisor/recommend.ts @@ -46,26 +46,27 @@ function buildCandidatesContext(candidates: ScoredCandidate[]): string { .map(({ model: profile }) => { const parts = [ `ID: ${profile.model}`, - `名称: ${profile.name}`, - `描述: ${profile.shortDescription || profile.description}`, - `能力: ${profile.capabilities.join(", ")}`, - `特性: ${profile.features.join(", ")}`, + `Name: ${profile.name}`, + `Description: ${profile.shortDescription || profile.description}`, + `Capabilities: ${profile.capabilities.join(", ")}`, + `Features: ${profile.features.join(", ")}`, ]; - if (profile.contextWindow) parts.push(`上下文窗口: ${profile.contextWindow}`); - if (profile.maxOutputTokens) parts.push(`最大输出: ${profile.maxOutputTokens}`); - if (profile.category) parts.push(`类别: ${profile.category}`); + if (profile.contextWindow) parts.push(`Context Window: ${profile.contextWindow}`); + if (profile.maxOutputTokens) parts.push(`Max Output: ${profile.maxOutputTokens}`); + if (profile.category) parts.push(`Category: ${profile.category}`); const modality = profile.inferenceMetadata; if (modality?.request_modality?.length) - parts.push(`输入模态: ${modality.request_modality.join(", ")}`); + parts.push(`Input Modality: ${modality.request_modality.join(", ")}`); if (modality?.response_modality?.length) - parts.push(`输出模态: ${modality.response_modality.join(", ")}`); + parts.push(`Output Modality: ${modality.response_modality.join(", ")}`); const prices = formatPrices(profile); - if (prices) parts.push(`定价: ${prices}`); + if (prices) parts.push(`Pricing: ${prices}`); const qpm = formatQpm(profile); if (qpm) parts.push(`QPM: ${qpm}`); - if (profile.versionTag) parts.push(`版本: ${profile.versionTag}`); - if (profile.openSource !== undefined) parts.push(`开源: ${profile.openSource ? "是" : "否"}`); - if (profile.family) parts.push(`家族: ${profile.family}`); + if (profile.versionTag) parts.push(`Version: ${profile.versionTag}`); + if (profile.openSource !== undefined) + parts.push(`Open Source: ${profile.openSource ? "Yes" : "No"}`); + if (profile.family) parts.push(`Family: ${profile.family}`); return parts.join(" | "); }) .join("\n"); @@ -86,29 +87,29 @@ function buildIntentContext(intent: IntentProfile): string { modelPreference, } = intent; const parts: string[] = []; - if (taskSummary) parts.push(`场景理解: ${taskSummary}`); - if (scenarioHints.length) parts.push(`场景特征: ${scenarioHints.join(", ")}`); - if (inputModality.length) parts.push(`输入模态: ${inputModality.join(", ")}`); - if (outputModality.length) parts.push(`输出模态: ${outputModality.join(", ")}`); - if (requiredCapabilities.length) parts.push(`所需能力: ${requiredCapabilities.join(", ")}`); - if (requiredFeatures.length) parts.push(`所需特性: ${requiredFeatures.join(", ")}`); - parts.push(`预算倾向: ${budget}`); - parts.push(`质量偏好: ${qualityPreference}`); - if (contextNeed !== ContextNeeds.Standard) parts.push(`上下文需求: ${contextNeed}`); + if (taskSummary) parts.push(`Task: ${taskSummary}`); + if (scenarioHints.length) parts.push(`Scenario: ${scenarioHints.join(", ")}`); + if (inputModality.length) parts.push(`Input: ${inputModality.join(", ")}`); + if (outputModality.length) parts.push(`Output: ${outputModality.join(", ")}`); + if (requiredCapabilities.length) parts.push(`Capabilities: ${requiredCapabilities.join(", ")}`); + if (requiredFeatures.length) parts.push(`Features: ${requiredFeatures.join(", ")}`); + parts.push(`Budget: ${budget}`); + parts.push(`Quality: ${qualityPreference}`); + if (contextNeed !== ContextNeeds.Standard) parts.push(`Context: ${contextNeed}`); if (modelPreference && modelPreference.mode !== "unconstrained") { - parts.push(`模型偏好: ${modelPreference.mode}`); + parts.push(`Mode: ${modelPreference.mode}`); if (modelPreference.targets?.length) - parts.push(`目标模型: ${modelPreference.targets.join(", ")}`); + parts.push(`Targets: ${modelPreference.targets.join(", ")}`); if (modelPreference.excludes?.length) - parts.push(`排除模型: ${modelPreference.excludes.join(", ")}`); + parts.push(`Excludes: ${modelPreference.excludes.join(", ")}`); } if (segments?.length) { - parts.push(`拆解步骤:`); + parts.push(`Pipeline Steps:`); for (const seg of segments) { - const inMod = seg.inputModality.join(",") || "无"; - const outMod = seg.outputModality.join(",") || "无"; - const caps = seg.requiredCapabilities.join(",") || "无"; - parts.push(` - ${seg.step} (输入: ${inMod} → 输出: ${outMod}, 能力: ${caps})`); + const inMod = seg.inputModality.join(",") || "none"; + const outMod = seg.outputModality.join(",") || "none"; + const caps = seg.requiredCapabilities.join(",") || "none"; + parts.push(` - ${seg.step} (Input: ${inMod} → Output: ${outMod}, Capabilities: ${caps})`); } } return parts.join("\n"); @@ -175,7 +176,7 @@ function validatePipelineCompatibility( const compatible = accepts.some((mod) => prevOutputs.has(mod)); if (!compatible && accepts.length > 0) { warnings.push( - `${rec.name} 的输入模态 [${accepts.join(", ")}] 可能不兼容上一步的输出模态 [${[...prevOutputs].join(", ")}]`, + `${rec.name}'s input modalities [${accepts.join(", ")}] may not be compatible with the previous step's output modalities [${[...prevOutputs].join(", ")}]`, ); } } @@ -204,7 +205,7 @@ export async function rankModels( systemPrompt = ALTERNATIVE_SYSTEM_PROMPT; } else if (preferenceMode === "scoped") { const scopeNote = intent.modelPreference?.targets?.length - ? `\n\n## 范围限定\n用户明确要求在以下范围内推荐:${intent.modelPreference.targets.join("、")}。请优先从匹配该范围的模型中选择。` + ? `\n\n## Scope Restriction\nThe user explicitly requested recommendations from: ${intent.modelPreference.targets.join(", ")}. Prioritize models within this scope.` : ""; systemPrompt = (intent.complexity === Complexities.Pipeline @@ -219,8 +220,8 @@ export async function rankModels( const userMessage = intent.complexity === Complexities.Pipeline - ? `意图分析结果:\n${intentContext}\n\n候选模型列表:\n${candidatesContext}\n\n用户原始需求:${userInput}\n\n请为流水线各步骤各推荐最多 ${top} 个模型。` - : `意图分析结果:\n${intentContext}\n\n候选模型列表:\n${candidatesContext}\n\n用户原始需求:${userInput}\n\n请推荐最多 ${top} 个模型。`; + ? `Intent Analysis:\n${intentContext}\n\nCandidate Models:\n${candidatesContext}\n\nUser Request: ${userInput}\n\nRecommend up to ${top} models for each pipeline step. Respond in English only.` + : `Intent Analysis:\n${intentContext}\n\nCandidate Models:\n${candidatesContext}\n\nUser Request: ${userInput}\n\nRecommend up to ${top} models. Respond in English only.`; const body: Record = { model: useThinkingModel ? RANKING_MODEL : RANKING_MODEL_FAST,