Skip to content

Commit e22dff3

Browse files
committed
feat: add model preferrence
1 parent 7914a6d commit e22dff3

8 files changed

Lines changed: 364 additions & 5 deletions

File tree

packages/cli/src/commands/advisor/recommend.ts

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,11 @@ const QUALITY_LABELS: Record<string, string> = {
5353
balanced: "均衡",
5454
"cost-optimized": "性价比优先",
5555
};
56+
const PREFERENCE_MODE_LABELS: Record<string, string> = {
57+
scoped: "限定范围",
58+
comparison: "对比评估",
59+
alternative: "替代推荐",
60+
};
5661

5762
function formatIntentSummary(intent: IntentProfile, noColor: boolean): string {
5863
const colorize = noColor ? new Chalk({ level: 0 }) : chalk;
@@ -92,6 +97,20 @@ function formatIntentSummary(intent: IntentProfile, noColor: boolean): string {
9297
`${colorize.dim("预算倾向")} ${budgetLabel} ${colorize.dim("质量偏好")} ${qualityLabel}`,
9398
);
9499

100+
const preference = intent.modelPreference;
101+
if (preference && preference.mode !== "unconstrained") {
102+
lines.push("");
103+
const modeLabel = PREFERENCE_MODE_LABELS[preference.mode] ?? preference.mode;
104+
const prefParts = [colorize.dim("推荐模式") + ` ${colorize.yellow(modeLabel)}`];
105+
if (preference.targets?.length) {
106+
prefParts.push(colorize.dim("目标") + ` ${preference.targets.join(", ")}`);
107+
}
108+
if (preference.excludes?.length) {
109+
prefParts.push(colorize.dim("排除") + ` ${preference.excludes.join(", ")}`);
110+
}
111+
lines.push(prefParts.join(" "));
112+
}
113+
95114
if (intent.segments?.length) {
96115
lines.push("");
97116
lines.push(colorize.dim("任务拆解"));

packages/cli/tests/e2e/advisor-recommend.e2e.test.ts

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,4 +76,95 @@ describe.skipIf(!isDashScopeE2EReady())("e2e: advisor recommend(DashScope)",
7676
expect(data.recommendations?.[0]?.model).toBeDefined();
7777
expect(data.recommendations?.[0]?.reason).toBeDefined();
7878
}, 120_000);
79+
80+
// ---- 模型偏好:正例 ----
81+
82+
test("scoped 偏好 — 限定系列时 intent 含 modelPreference.mode=scoped", async () => {
83+
const { stdout, stderr, exitCode } = await runCli([
84+
"advisor",
85+
"recommend",
86+
"--dry-run",
87+
"--message",
88+
"deepseek系列中哪个模型最适合用来进行快速推理",
89+
"--non-interactive",
90+
"--output",
91+
"json",
92+
]);
93+
expect(exitCode, stderr).toBe(0);
94+
const data = parseStdoutJson<{
95+
intent?: { modelPreference?: { mode?: string; targets?: string[] } };
96+
}>(stdout);
97+
expect(data.intent?.modelPreference?.mode).toBe("scoped");
98+
expect(data.intent?.modelPreference?.targets?.length).toBeGreaterThan(0);
99+
expect(
100+
data.intent?.modelPreference?.targets?.some((target) =>
101+
target.toLowerCase().includes("deepseek"),
102+
),
103+
).toBe(true);
104+
}, 60_000);
105+
106+
test("comparison 偏好 — 对比模型时 intent 含 modelPreference.mode=comparison", async () => {
107+
const { stdout, stderr, exitCode } = await runCli([
108+
"advisor",
109+
"recommend",
110+
"--dry-run",
111+
"--message",
112+
"qwen-max和deepseek-v3哪个更适合做代码生成",
113+
"--non-interactive",
114+
"--output",
115+
"json",
116+
]);
117+
expect(exitCode, stderr).toBe(0);
118+
const data = parseStdoutJson<{
119+
intent?: { modelPreference?: { mode?: string; targets?: string[] } };
120+
}>(stdout);
121+
expect(data.intent?.modelPreference?.mode).toBe("comparison");
122+
expect(data.intent?.modelPreference?.targets?.length).toBeGreaterThanOrEqual(2);
123+
}, 60_000);
124+
125+
test("excludes 偏好 — 排除模型时 intent 识别出 modelPreference", async () => {
126+
const { stdout, stderr, exitCode } = await runCli([
127+
"advisor",
128+
"recommend",
129+
"--dry-run",
130+
"--message",
131+
"不要qwen,推荐一个适合文本生成的模型",
132+
"--non-interactive",
133+
"--output",
134+
"json",
135+
]);
136+
expect(exitCode, stderr).toBe(0);
137+
const data = parseStdoutJson<{
138+
intent?: {
139+
modelPreference?: { mode?: string; excludes?: string[]; targets?: string[] };
140+
};
141+
}>(stdout);
142+
const pref = data.intent?.modelPreference;
143+
expect(pref).toBeDefined();
144+
const hasExcludes =
145+
(pref?.excludes?.length ?? 0) > 0 ||
146+
(pref?.mode !== "unconstrained" && pref?.mode !== undefined);
147+
expect(hasExcludes).toBe(true);
148+
}, 60_000);
149+
150+
// ---- 模型偏好:反例 ----
151+
152+
test("无偏好 — 普通需求查询时 intent 不含 modelPreference 或 mode=unconstrained", async () => {
153+
const { stdout, stderr, exitCode } = await runCli([
154+
"advisor",
155+
"recommend",
156+
"--dry-run",
157+
"--message",
158+
"我要做一个能理解图片的客服机器人",
159+
"--non-interactive",
160+
"--output",
161+
"json",
162+
]);
163+
expect(exitCode, stderr).toBe(0);
164+
const data = parseStdoutJson<{
165+
intent?: { modelPreference?: { mode?: string } };
166+
}>(stdout);
167+
const mode = data.intent?.modelPreference?.mode;
168+
expect(mode === undefined || mode === "unconstrained").toBe(true);
169+
}, 60_000);
79170
});

packages/core/src/advisor/constants/prompts.ts

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,16 @@ export const INTENT_SYSTEM_PROMPT = `你是一个意图分析器。根据用户
4141
用户: "做一个Agent自动根据用户意图生成动画片"
4242
→ budget:"medium", qualityPreference:"balanced"(复杂pipeline,但没明确成本/质量约束)
4343
44+
## 模型偏好识别
45+
分析用户是否提到了特定的模型、模型系列或厂商,据此判断推荐模式:
46+
- 用户未提到任何模型/系列/厂商 → mode:"unconstrained",不填 targets
47+
- 用户限定了范围(如"deepseek系列哪个好"、"通义千问的模型推荐"、"开源的推理模型") → mode:"scoped",targets:["deepseek"] 或 ["通义千问"]
48+
- 用户要对比特定模型(如"wan2.6和wan2.7哪个好"、"qwen-max和deepseek-v3对比"、"qwen-max适合做法律分析吗") → mode:"comparison",targets:["wan2.6","wan2.7"]
49+
- 单模型评估也算 comparison,targets 只填一个
50+
- 用户以某模型为参照找替代(如"有没有类似qwen-max但更便宜的") → mode:"alternative",targets:["qwen-max"]
51+
- 用户明确排除某些模型/系列(如"除了qwen还有什么好的") → excludes:["qwen"],mode 根据其他条件判断
52+
- targets 填写用户原文中的模型/系列名称,保持原文写法
53+
4454
## 输出字段
4555
- taskSummary: 一句话场景理解(必须具体,禁止"用户想用AI做某事"这种废话)
4656
- scenarioHints: 推断的场景特征数组
@@ -60,6 +70,7 @@ export const INTENT_SYSTEM_PROMPT = `你是一个意图分析器。根据用户
6070
- budget: "low"/"medium"/"high"(基于场景推断,不要默认 medium)
6171
- contextNeed: "standard"/"large"/"extra-large"
6272
- qualityPreference: "flagship"/"balanced"/"cost-optimized"(基于场景推断,不要默认 balanced)
73+
- modelPreference: { mode, targets?, excludes? }(见上方"模型偏好识别")
6374
6475
只输出 JSON,不要有其他文字。`;
6576

@@ -141,3 +152,45 @@ export const PIPELINE_SYSTEM_PROMPT = `你是阿里云百炼平台的模型推
141152
142153
或者(如果你认为单模型即可):
143154
{"type":"single","recommendations":[{"model":"模型ID","reason":"推荐理由","highlights":["亮点"]}]}`;
155+
156+
export const COMPARISON_SYSTEM_PROMPT = `你是阿里云百炼平台的模型对比顾问。用户想对比特定模型,请根据使用场景进行对比分析。
157+
158+
## 背景
159+
用户指定了要对比的模型,系统已将这些模型和相关候选预筛选到列表中。
160+
意图分析中的 modelPreference.targets 是用户要对比的模型。
161+
162+
## 对比策略
163+
- 用户指定的模型必须全部出现在推荐结果中,按适合程度排序
164+
- 每个模型的 reason 必须是对比性的,说明该模型相对于其他对比模型的优势和劣势
165+
- 如果候选中有比用户指定的更合适的模型,可以额外推荐,但用户指定的必须优先包含
166+
- 单模型评估场景(targets 只有一个):评估该模型是否适合用户需求,同时推荐更优的替代
167+
168+
## 规则
169+
- 只能推荐候选列表中的模型
170+
- reason 必须包含对比视角:该模型相比其他模型在哪些方面更好/更差
171+
- highlights 突出各模型的差异化特点
172+
- 输出严格 JSON,不要输出其他内容
173+
174+
## 输出格式
175+
{"type":"single","recommendations":[{"model":"模型ID","reason":"对比分析理由","highlights":["差异化亮点"]}]}`;
176+
177+
export const ALTERNATIVE_SYSTEM_PROMPT = `你是阿里云百炼平台的模型替代顾问。用户以某个模型为参照,寻找替代方案。
178+
179+
## 背景
180+
用户以某个模型为参照点,想找到在特定维度上更优的替代方案(如更便宜、更快、更强)。
181+
意图分析中的 modelPreference.targets 是参照模型。
182+
183+
## 替代策略
184+
- 推荐 #1:如果参照模型在候选中,先评估它是否满足用户需求,给出其基本定位
185+
- 推荐 #2~#3:推荐替代方案,reason 必须说明相比参照模型在用户关注维度上的 tradeoff
186+
- 关注用户提到的替代维度(如"更便宜"→重点对比定价,"更强"→重点对比能力)
187+
188+
## 规则
189+
- 只能推荐候选列表中的模型
190+
- 参照模型必须包含在结果中(如果在候选列表中)
191+
- 替代推荐的 reason 必须说明与参照模型的具体差异
192+
- 避免推荐和参照模型同系列的其他版本(除非确实有显著差异)
193+
- 输出严格 JSON,不要输出其他内容
194+
195+
## 输出格式
196+
{"type":"single","recommendations":[{"model":"模型ID","reason":"替代分析理由","highlights":["差异化亮点"]}]}`;

packages/core/src/advisor/index.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,12 @@ export type {
1717
IntentSegment,
1818
Modality,
1919
ModelCategory,
20+
ModelPreference,
2021
ModelPrice,
2122
ModelProfile,
2223
PipelineResult,
2324
PipelineStep,
25+
PreferenceMode,
2426
QpmLimit,
2527
QualityPreference,
2628
RecommendedModel,

packages/core/src/advisor/intent.ts

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,19 @@ export async function analyzeIntent(config: Config, input: string): Promise<Inte
3333
if (!jsonMatch) return DEFAULT_INTENT;
3434

3535
const parsed = JSON.parse(jsonMatch[0]);
36+
const VALID_MODES = ["scoped", "comparison", "alternative"] as const;
37+
const rawPref = parsed.modelPreference as Record<string, unknown> | undefined;
38+
const modelPreference =
39+
rawPref && typeof rawPref === "object"
40+
? {
41+
mode: VALID_MODES.includes(rawPref.mode as (typeof VALID_MODES)[number])
42+
? (rawPref.mode as (typeof VALID_MODES)[number])
43+
: ("unconstrained" as const),
44+
targets: Array.isArray(rawPref.targets) ? (rawPref.targets as string[]) : undefined,
45+
excludes: Array.isArray(rawPref.excludes) ? (rawPref.excludes as string[]) : undefined,
46+
}
47+
: undefined;
48+
3649
return {
3750
complexity:
3851
parsed.complexity === Complexities.Pipeline ? Complexities.Pipeline : Complexities.Single,
@@ -58,6 +71,7 @@ export async function analyzeIntent(config: Config, input: string): Promise<Inte
5871
contextNeed: parsed.contextNeed ?? DEFAULT_INTENT.contextNeed,
5972
qualityPreference: parsed.qualityPreference ?? DEFAULT_INTENT.qualityPreference,
6073
confidence: 1,
74+
modelPreference,
6175
};
6276
} catch {
6377
return DEFAULT_INTENT;

0 commit comments

Comments
 (0)