From 68498c61ff443607929b97fd22219ae0a2677482 Mon Sep 17 00:00:00 2001 From: wuyuanfr <18270469842@163.com> Date: Thu, 25 Jun 2026 19:15:06 +0800 Subject: [PATCH 01/31] fix(model_management): preserve connectivity success when capacity suggestion path raises The connectivity check endpoint /model/temporary_healthcheck runs _capacity_suggestion_for_model_request inline after a successful verify_model_config_connectivity. Per W11 spec ("Suggestion failure never changes connectivity success or failure"), an unexpected error inside the suggestion path must not turn a successful connectivity result into HTTP 500. The prior code caught ValueError (covering the typed InvalidInput case and Pydantic v2 ValidationError, which is a ValueError subclass), but non-ValueError exceptions -- e.g. AttributeError/TypeError from a malformed catalog profile entry, or future V2 provider-discovery HTTP errors -- would propagate to the outer except Exception in check_temporary_model_health and surface to operators as a misleading "Failed to verify model connectivity" 500. Restore the catch-all degrade-to-None branch and log at WARNING (not DEBUG) so the real root cause is visible in default production log streams without DEBUG enabled. Connectivity stays 200 with capacity_suggestion: null; the per-row catalog issue surfaces in logs where operators can act on it. Co-Authored-By: Claude Opus 4.7 --- backend/apps/model_managment_app.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/backend/apps/model_managment_app.py b/backend/apps/model_managment_app.py index a92937e12..634726d66 100644 --- a/backend/apps/model_managment_app.py +++ b/backend/apps/model_managment_app.py @@ -114,6 +114,9 @@ def _capacity_suggestion_for_model_request(request: ModelRequest): except ValueError as exc: logger.debug("Capacity suggestion unavailable for connectivity request: %s", exc) return None + except Exception as exc: + logger.warning("Capacity suggestion failed during connectivity request: %s", exc) + return None @router.post("/create") From f555fda77d1d4a6dda4d215fae01f673cbe7e7ac Mon Sep 17 00:00:00 2001 From: wuyuanfr <18270469842@163.com> Date: Thu, 25 Jun 2026 19:15:31 +0800 Subject: [PATCH 02/31] refactor(w11): collapse Add/Edit capacity-suggestion controls The Add dialog had two ways to trigger a catalog suggestion: clicking the bottom connectivity-validation button (which the backend extends with capacity_suggestion in /temporary_healthcheck's response) and a secondary "Check" button beside the toggle that called the standalone /suggest-capacity endpoint. In V1 catalog-only mode the two paths overlap on every realistic add flow -- the user must run connectivity anyway because the Add button is gated on it -- so the standalone button is UX noise without functional value. Collapse Add to a single toggle whose state gates both the embedded suggestion result and the explanatory hint. The Edit dialog keeps its explicit Check button per spec ("show 'Suggestion available' after validation or explicit check") because existing rows may need to refresh a suggestion without re-running connectivity, but the long-form hint sentence is redundant: title + toggle + a button labelled "Check" already names the feature and the action. Removing the hint matches the spec's i18n key list, which never listed model.dialog.capacity.suggestion.hint to begin with. Add dialog changes: - Drop checkingCapacitySuggestion state, canSuggestCapacity guard, and handleSuggestCapacity handler. - Drop the secondary Button and its wrapping shrink-0 flex container; the Switch becomes a direct child of the outer justify-between row. - Drop the suggestionLoading prop from ModelCapacityFields entirely. It only controlled the spinner on the "Use suggestion" button inside the suggestion-result panel, which only renders after a suggestion is set -- at which point verifyingConnectivity is already false, so binding it added no observable effect. - Replace the shared "hint" copy with a new key "hintAdd" whose wording reflects the actual trigger ("Suggested from the approved catalog after connectivity passes."), and gate it on capacitySuggestionEnabled so the toggle's off-state no longer contradicts itself with copy that promises automatic behavior. Edit dialog changes: - Remove the hint
and its wrapping container; the title becomes a direct flex child alongside the Switch+Check controls. i18n: - Drop the obsolete "model.dialog.capacity.suggestion.hint" key from en and zh; add "hintAdd" used only by Add dialog. No backend wire change. Edit dialog still calls /suggest-capacity through its existing Check button for the bare-row repair flow. Co-Authored-By: Claude Opus 4.7 --- .../components/model/ModelAddDialog.tsx | 70 +++---------------- .../components/model/ModelEditDialog.tsx | 9 +-- frontend/public/locales/en/common.json | 2 +- frontend/public/locales/zh/common.json | 2 +- 4 files changed, 14 insertions(+), 69 deletions(-) diff --git a/frontend/app/[locale]/models/components/model/ModelAddDialog.tsx b/frontend/app/[locale]/models/components/model/ModelAddDialog.tsx index a0eeb1bb1..5f3c03535 100644 --- a/frontend/app/[locale]/models/components/model/ModelAddDialog.tsx +++ b/frontend/app/[locale]/models/components/model/ModelAddDialog.tsx @@ -294,8 +294,6 @@ export const ModelAddDialog = ({ const [form, setForm] = useState(DEFAULT_FORM_STATE); const [loading, setLoading] = useState(false); const [verifyingConnectivity, setVerifyingConnectivity] = useState(false); - const [checkingCapacitySuggestion, setCheckingCapacitySuggestion] = - useState(false); const [capacitySuggestionEnabled, setCapacitySuggestionEnabled] = useState(true); const [capacitySuggestion, setCapacitySuggestion] = @@ -504,12 +502,6 @@ export const ModelAddDialog = ({ } }; - const canSuggestCapacity = () => - supportsCapacityFields && - !form.isBatchImport && - form.name.trim() !== "" && - (form.url.trim() !== "" || form.provider.trim() !== ""); - const applyCapacitySuggestion = (suggestion: CapacitySuggestion | null) => { const next = capacityFormFromSuggestion(suggestion); if (!next || Object.keys(next).length === 0) return; @@ -527,37 +519,6 @@ export const ModelAddDialog = ({ setAcceptedCapacitySuggestion(suggestion); }; - const handleSuggestCapacity = async () => { - if (!canSuggestCapacity()) { - message.warning(t("model.dialog.capacity.suggestion.missingInput")); - return; - } - setCheckingCapacitySuggestion(true); - try { - const suggestion = await modelService.suggestCapacity({ - modelName: form.name.trim(), - baseUrl: form.url.trim(), - // Only send providerHint when the user actually picked it (batch mode - // exposes the dropdown). In single-add mode the form keeps a hidden - // default ("modelengine") that the user never sees, so forwarding it - // would falsely pin catalog lookup to that provider. - ...(form.isBatchImport ? { providerHint: form.provider } : {}), - apiKey: form.apiKey.trim() || undefined, - modelType: resolveConnectivityModelType(form.type), - }); - setCapacitySuggestion(suggestion); - if (!suggestion.suggestions) { - setAcceptedCapacitySuggestion(null); - } - } catch (error) { - setCapacitySuggestion(null); - setAcceptedCapacitySuggestion(null); - message.error(t("model.dialog.capacity.suggestion.failed")); - } finally { - setCheckingCapacitySuggestion(false); - } - }; - // Verify if the vector dimension is valid const isValidVectorDimension = (value: string): boolean => { const dimension = Number.parseInt(value, 10); @@ -1869,27 +1830,17 @@ export const ModelAddDialog = ({
{t("model.dialog.capacity.suggestion.title")}
-
- {t("model.dialog.capacity.suggestion.hint")} -
-
-
- - + {capacitySuggestionEnabled && ( +
+ {t("model.dialog.capacity.suggestion.hintAdd")} +
+ )}
+ )} applyCapacitySuggestion(capacitySuggestion) } diff --git a/frontend/app/[locale]/models/components/model/ModelEditDialog.tsx b/frontend/app/[locale]/models/components/model/ModelEditDialog.tsx index e086c6d44..b4c2ac936 100644 --- a/frontend/app/[locale]/models/components/model/ModelEditDialog.tsx +++ b/frontend/app/[locale]/models/components/model/ModelEditDialog.tsx @@ -601,13 +601,8 @@ export const ModelEditDialog = ({ {supportsCapacityFields && (
-
-
- {t("model.dialog.capacity.suggestion.title")} -
-
- {t("model.dialog.capacity.suggestion.hint")} -
+
+ {t("model.dialog.capacity.suggestion.title")}
Date: Fri, 26 Jun 2026 09:39:34 +0800 Subject: [PATCH 03/31] feat(w11): backend SLO instrumentation + cross-tenant capacity-coverage test Phase 1.5 backend foundation per W11 spec L706-710 (SLO metrics), L86-89/L944-948 (visibility env flag), and L312-322 (cross-tenant test). No frontend change in this commit; V1.5 surfaces consume these signals in follow-up frontend commits. Metrics (4 instruments, each guarded behind try/except so a missing OpenTelemetry runtime does not break the dispatch path): 1. model_capacity_suggestion_requests_total{match_kind, model_type, provider} -- counter wrapping suggest_capacity. Drives the "70% of new manual-add LLM rows produce match_kind != none" SLO. 2. model_capacity_suggestion_latency_ms{match_kind, provider} -- histogram around the same call. Used to verify V2 provider-discovery p95 stays under the model-add latency budget. 3. model_capacity_suggestion_accept_total{match_kind, provider} -- counter emitted by the app layer when the operator save payload carries accepted_suggestion_match_kind. Numerator for the "95% accepted -> profile dispatch" SLO ratio. 4. model_capacity_suggestion_dispatch_profile_hit_total{provider} -- counter emitted in _resolve_input_budget when the resolved snapshot carries a non-null capability_profile_version. Denominator for the same SLO. Accept signal pipe (audit-only): - consts/model.py: ModelRequest gains accepted_suggestion_match_kind and accepted_capability_profile_version. Both Optional[str], never persisted to model_record_t. - model_management_service.py: pop_capacity_accept_signal strips both fields from save payloads and returns the popped values so the app layer can label the counter. - model_managment_app.py: /create and /update endpoints call pop_capacity_accept_signal before invoking the service, then forward the popped match_kind to _record_capacity_suggestion_accept after the save returns. The dict the service sees no longer contains these fields, preserving the "audit only -- not persisted" contract. - The V1.5 frontend (next commit) will ship these fields on the wire; until then the counter reads zero, which is the correct baseline. suggest_capacity refactor: - Inner body extracted to _suggest_capacity_inner so the public function can time end-to-end and emit requests_total + latency_ms exactly once per completed call. ValueError paths still raise -- client-shape errors must not pollute SLO ratios so the recorder fires only on terminal CapacitySuggestionResult returns. Visibility env flag (CAPACITY_VISIBILITY_ENABLED): - Already declared in consts/const.py (default true) and consumed by get_capacity_coverage. Confirmed wired end-to-end; no code change needed here. The flag stays the developer-level rollback lever per W11 spec; tenant_config_t overlay remains a follow-up. Cross-tenant isolation test (spec L312-322): - test_get_capacity_coverage_cross_tenant_isolation routes mocked get_model_records by tenant_id and asserts each tenant only sees its own bare rows in both bare_models[] and total_llm_vlm. Closes the spec's required "tenant B row must not appear in tenant A's response" coverage. Test coverage added: - Cross-tenant isolation for /capacity-coverage. - pop_capacity_accept_signal extraction + dict mutation contract. - accept_total OTel-optional no-op + label-cardinality (lower-cased provider) wiring. - suggest_capacity records requests_total + latency_ms on catalog match, on "none" with provider fallback to "unknown", does NOT record on ValueError, and runs cleanly when instruments are None. - _resolve_input_budget records dispatch_profile_hit_total only when capability_profile_version is non-null; recorder no-op when counter is None. Total: 8 files, +527 lines. All targeted unit suites pass (test_model_capacity_suggestion_service 16/16, test_model_management_service 70/70, test_create_agent_info 174/174). Co-Authored-By: Claude Opus 4.7 --- backend/agents/create_agent_info.py | 39 +++++ backend/apps/model_managment_app.py | 12 ++ backend/consts/model.py | 6 + .../model_capacity_suggestion_service.py | 98 +++++++++++++ backend/services/model_management_service.py | 59 ++++++++ test/backend/agents/test_create_agent_info.py | 71 ++++++++++ .../test_model_capacity_suggestion_service.py | 108 ++++++++++++++ .../services/test_model_management_service.py | 134 ++++++++++++++++++ 8 files changed, 527 insertions(+) diff --git a/backend/agents/create_agent_info.py b/backend/agents/create_agent_info.py index c81306fc9..cf790654d 100644 --- a/backend/agents/create_agent_info.py +++ b/backend/agents/create_agent_info.py @@ -85,6 +85,43 @@ _CAPACITY_WARNING_LOCK = threading.Lock() +# W11 spec line 710: emitted every time _resolve_input_budget resolves a row +# whose dispatch-time capability_profile_version is non-null (i.e. the W1 +# exact catalog lookup succeeded). Combined with +# model_capacity_suggestion_accept_total at save time gives the SLO ratio +# "95% of accepted catalog suggestions produce the expected runtime profile". +# Guarded so a missing OpenTelemetry runtime never breaks agent startup. +try: + from opentelemetry import metrics as _otel_metrics + + _capacity_dispatch_meter = _otel_metrics.get_meter(__name__) + _capacity_dispatch_profile_hit_total = _capacity_dispatch_meter.create_counter( + name="model_capacity_suggestion_dispatch_profile_hit_total", + description=( + "Count of agent dispatches where the resolved W1 capacity " + "snapshot reports a non-null capability_profile_version " + "(i.e. the runtime profile match succeeded). Labelled by " + "provider." + ), + unit="dispatches", + ) +except Exception: # pragma: no cover - OTel is optional at runtime + _capacity_dispatch_profile_hit_total = None + + +def _record_dispatch_profile_hit(provider: Optional[str]) -> None: + """Emit dispatch_profile_hit_total for one successful runtime profile match.""" + if _capacity_dispatch_profile_hit_total is None: + return + try: + _capacity_dispatch_profile_hit_total.add( + 1, + {"provider": (provider or "unknown").lower()}, + ) + except Exception: # pragma: no cover - never break agent run for telemetry + pass + + def _operator_overrides_from_model_info(model_info: Optional[dict]) -> dict: """Extract the W1 operator-override fields from a model_record_t row.""" if not isinstance(model_info, dict): @@ -223,6 +260,8 @@ def _resolve_input_budget( snapshot.capability_profile_version, snapshot.fingerprint, ) + if snapshot.capability_profile_version: + _record_dispatch_profile_hit(provider) return ( snapshot.provider_input_limit_tokens, _capacity_snapshot_for_monitoring(snapshot), diff --git a/backend/apps/model_managment_app.py b/backend/apps/model_managment_app.py index 634726d66..44d6bfba0 100644 --- a/backend/apps/model_managment_app.py +++ b/backend/apps/model_managment_app.py @@ -55,6 +55,8 @@ list_llm_models_for_tenant, list_models_for_admin, get_capacity_coverage, + pop_capacity_accept_signal, + _record_capacity_suggestion_accept, ) from utils.auth_utils import get_current_user_id @@ -136,9 +138,14 @@ async def create_model(request: ModelRequest, authorization: Optional[str] = Hea try: user_id, tenant_id = get_current_user_id(authorization) model_data = request.model_dump() + accept_signal = pop_capacity_accept_signal(model_data) logger.debug( f"Start to create model, user_id: {user_id}, tenant_id: {tenant_id}") await create_model_for_tenant(user_id, tenant_id, model_data) + if accept_signal is not None: + _record_capacity_suggestion_accept( + accept_signal["match_kind"], request.model_factory + ) return JSONResponse(status_code=HTTPStatus.OK, content={ "message": "Model created successfully" }) @@ -301,7 +308,12 @@ async def update_single_model( """ try: user_id, tenant_id = get_current_user_id(authorization) + accept_signal = pop_capacity_accept_signal(request) await update_single_model_for_tenant(user_id, tenant_id, display_name, request) + if accept_signal is not None: + _record_capacity_suggestion_accept( + accept_signal["match_kind"], request.get("model_factory") + ) return JSONResponse(status_code=HTTPStatus.OK, content={ "message": "Model updated successfully" }) diff --git a/backend/consts/model.py b/backend/consts/model.py index 39f577a98..4ed9e8981 100644 --- a/backend/consts/model.py +++ b/backend/consts/model.py @@ -146,6 +146,12 @@ class ModelRequest(BaseModel): tokenizer_family: Optional[str] = None capacity_source: Optional[str] = None capability_profile_version: Optional[str] = None + # W11 accept-signal fields (audit/metrics only — never persisted). Sent by + # the frontend when the operator clicks "Use suggestion" and saves; the + # app layer pops them before the dict reaches the service/DB layer and + # forwards them to model_capacity_suggestion_accept_total. + accepted_suggestion_match_kind: Optional[str] = None + accepted_capability_profile_version: Optional[str] = None class CapacitySuggestionFields(BaseModel): diff --git a/backend/services/model_capacity_suggestion_service.py b/backend/services/model_capacity_suggestion_service.py index 723f0fd8e..8fa9b2063 100644 --- a/backend/services/model_capacity_suggestion_service.py +++ b/backend/services/model_capacity_suggestion_service.py @@ -1,10 +1,79 @@ +import logging import re +import time from dataclasses import dataclass from enum import Enum from typing import Any, Mapping, Optional from consts.const import CAPACITY_SUGGESTION_ENABLED +logger = logging.getLogger(__name__) + +# OpenTelemetry instruments for W11 catalog match observability. +# Spec lines 706-708. Guarded the same way as the SDK monitor module: if +# OpenTelemetry is not installed (some deployments run without it), the +# instruments are None and the recording becomes a no-op. +try: + from opentelemetry import metrics as _otel_metrics + + _suggestion_meter = _otel_metrics.get_meter(__name__) + _capacity_suggestion_requests_total = _suggestion_meter.create_counter( + name="model_capacity_suggestion_requests_total", + description=( + "Count of capacity-suggestion service invocations, labelled by " + "match_kind, model_type, and inferred provider. Drives the SLO " + "'at least 70% of new manual-add LLM rows produce match_kind " + "!= none' (W11 spec)." + ), + unit="requests", + ) + _capacity_suggestion_latency_ms = _suggestion_meter.create_histogram( + name="model_capacity_suggestion_latency_ms", + description=( + "End-to-end latency of suggest_capacity, labelled by match_kind " + "and provider. Used to verify provider-discovery p95 stays under " + "the model-add latency budget (W11 spec)." + ), + unit="ms", + ) +except Exception: # pragma: no cover - OTel is optional at runtime + _capacity_suggestion_requests_total = None + _capacity_suggestion_latency_ms = None + + +def _record_suggestion_request( + match_kind: str, + provider: Optional[str], + model_type: Optional[str], + duration_ms: float, +) -> None: + """Emit the requests_total counter and latency_ms histogram for one call. + + Recording never raises -- a broken telemetry stack must not break the + suggestion path. + """ + safe_provider = (provider or "unknown").lower() + if _capacity_suggestion_requests_total is not None: + try: + _capacity_suggestion_requests_total.add( + 1, + { + "match_kind": match_kind, + "model_type": (model_type or "unknown").lower(), + "provider": safe_provider, + }, + ) + except Exception: # pragma: no cover + pass + if _capacity_suggestion_latency_ms is not None: + try: + _capacity_suggestion_latency_ms.record( + duration_ms, + {"match_kind": match_kind, "provider": safe_provider}, + ) + except Exception: # pragma: no cover + pass + ProfileKey = tuple[str, str] CapabilityProfileLike = Any @@ -233,6 +302,35 @@ def suggest_capacity( api_key: Optional[str] = None, catalog: Optional[Mapping[ProfileKey, CapabilityProfileLike]] = None, enabled: bool = CAPACITY_SUGGESTION_ENABLED, +) -> CapacitySuggestionResult: + start_perf = time.perf_counter() + result = _suggest_capacity_inner( + model_name=model_name, + base_url=base_url, + provider_hint=provider_hint, + model_type=model_type, + api_key=api_key, + catalog=catalog, + enabled=enabled, + ) + duration_ms = (time.perf_counter() - start_perf) * 1000.0 + _record_suggestion_request( + match_kind=result.match_kind.value, + provider=result.suggested_provider, + model_type=model_type, + duration_ms=duration_ms, + ) + return result + + +def _suggest_capacity_inner( + model_name: str, + base_url: Optional[str], + provider_hint: Optional[str], + model_type: Optional[str], + api_key: Optional[str], + catalog: Optional[Mapping[ProfileKey, CapabilityProfileLike]], + enabled: bool, ) -> CapacitySuggestionResult: del api_key diff --git a/backend/services/model_management_service.py b/backend/services/model_management_service.py index a8f28e133..3a1470801 100644 --- a/backend/services/model_management_service.py +++ b/backend/services/model_management_service.py @@ -72,8 +72,22 @@ ), unit="errors", ) + # W11 spec line 709: emitted when the operator clicks "Use suggestion" and + # saves. Combined with model_capacity_suggestion_dispatch_profile_hit_total + # at /agent/run, gives the "95% of accepted catalog suggestions produce + # the expected runtime capability_profile_version" SLO ratio. + _capacity_suggestion_accept_total = _capacity_suggestion_meter.create_counter( + name="model_capacity_suggestion_accept_total", + description=( + "Count of model save events that carried an accepted W11 " + "capacity suggestion, labelled by match_kind and provider. " + "Audit signal only -- not persisted to model_record_t." + ), + unit="accepts", + ) except Exception: # pragma: no cover - OTel is optional at runtime _capacity_suggestion_coverage_errors_total = None + _capacity_suggestion_accept_total = None # Per-process dedup for the warning log emitted when the catalog-matcher @@ -101,6 +115,51 @@ def _record_capacity_coverage_error(model_id: Optional[Any], exc: Exception) -> pass +# Wire-only fields the frontend ships when the operator clicks "Use suggestion" +# and saves. They are audit/metrics input; runtime never reads them. The app +# layer pops them off the request payload via `pop_capacity_accept_signal` so +# the service/DB layer never sees them. +_ACCEPT_SIGNAL_KEYS = ( + "accepted_suggestion_match_kind", + "accepted_capability_profile_version", +) + + +def pop_capacity_accept_signal(payload: Dict[str, Any]) -> Optional[Dict[str, Any]]: + """Strip audit-only accept-signal fields from a save payload and return them. + + Returns the popped values as {'match_kind': ..., 'capability_profile_version': ...} + when match_kind is present, else None. Callers forward the dict to + `_record_capacity_suggestion_accept` once the model_factory is known. + """ + if not isinstance(payload, dict): + return None + popped = {key: payload.pop(key, None) for key in _ACCEPT_SIGNAL_KEYS} + match_kind = popped.get("accepted_suggestion_match_kind") + if not match_kind: + return None + return { + "match_kind": match_kind, + "capability_profile_version": popped.get("accepted_capability_profile_version"), + } + + +def _record_capacity_suggestion_accept(match_kind: str, provider: Optional[str]) -> None: + """Emit the accept_total counter for one operator-accepted suggestion save.""" + if _capacity_suggestion_accept_total is None: + return + try: + _capacity_suggestion_accept_total.add( + 1, + { + "match_kind": match_kind, + "provider": (provider or "unknown").lower(), + }, + ) + except Exception: # pragma: no cover - never break save for telemetry + pass + + def _has_display_name_conflict(existing_models: List[Dict[str, Any]], model_type: Optional[str]) -> bool: """Allow the three multimodal slots to share display names across slots.""" if not existing_models: diff --git a/test/backend/agents/test_create_agent_info.py b/test/backend/agents/test_create_agent_info.py index b3eb54b1b..e2ca3d21b 100644 --- a/test/backend/agents/test_create_agent_info.py +++ b/test/backend/agents/test_create_agent_info.py @@ -5683,3 +5683,74 @@ def test_merge_with_empty_override_params(self): tool_record = {"params": [{"name": "param1", "default": "default1"}]} result = _merge_tool_params(tool_record, {}) assert result == {"param1": "default1"} + + +# --------------------------------------------------------------------------- +# W11 V1.5 - dispatch_profile_hit_total metric wiring +# --------------------------------------------------------------------------- + + +class TestDispatchProfileHitMetric: + """Spec L710: every successful capacity resolve where the resolved snapshot + carries a non-null capability_profile_version increments the dispatch + profile-hit counter. Combined with accept_total at save time, this gives + the 95% SLO 'accepted catalog suggestions produce expected runtime + profile' (W11 spec L1162-1163). + """ + + def test_profile_hit_recorded_when_snapshot_carries_profile_version(self): + counter = MagicMock() + snapshot = MockModelCapacitySnapshot( + model_name="gpt-4o", + capability_profile_version="openai/gpt-4o@1", + ) + with patch.object( + create_agent_info_module, + "_capacity_dispatch_profile_hit_total", + counter, + ), patch.object( + create_agent_info_module, + "resolve_capacity", + return_value=snapshot, + ): + create_agent_info_module._resolve_input_budget( + {"model_factory": "openai", "model_name": "gpt-4o"} + ) + + counter.add.assert_called_once_with(1, {"provider": "openai"}) + + def test_profile_hit_not_recorded_without_profile_version(self): + """An operator-configured row (no catalog match) resolves successfully + but `capability_profile_version` stays None. Counter must not fire -- + otherwise the SLO ratio is inflated by non-catalog dispatches. + """ + counter = MagicMock() + snapshot = MockModelCapacitySnapshot( + model_name="custom-local", + capability_profile_version=None, + ) + with patch.object( + create_agent_info_module, + "_capacity_dispatch_profile_hit_total", + counter, + ), patch.object( + create_agent_info_module, + "resolve_capacity", + return_value=snapshot, + ): + create_agent_info_module._resolve_input_budget( + {"model_factory": "custom", "model_name": "custom-local"} + ) + + counter.add.assert_not_called() + + def test_recorder_no_op_when_counter_disabled(self): + """OTel-optional guard: the helper must not raise when the counter + is None so agent dispatch works in deployments without OpenTelemetry. + """ + with patch.object( + create_agent_info_module, + "_capacity_dispatch_profile_hit_total", + None, + ): + create_agent_info_module._record_dispatch_profile_hit("openai") diff --git a/test/backend/services/test_model_capacity_suggestion_service.py b/test/backend/services/test_model_capacity_suggestion_service.py index fc6ffdc67..6845ca5b7 100644 --- a/test/backend/services/test_model_capacity_suggestion_service.py +++ b/test/backend/services/test_model_capacity_suggestion_service.py @@ -7,12 +7,15 @@ if backend_dir not in sys.path: sys.path.append(backend_dir) +from unittest import mock + from services.model_capacity_suggestion_service import ( CapacitySuggestionMatchKind, pick_provider, pick_provider_from_base_url, suggest_capacity, ) +import services.model_capacity_suggestion_service as suggestion_module class Profile: @@ -179,3 +182,108 @@ def test_pick_provider_from_base_url_dashscope_wins_over_aliyuncs(): # Both substrings present; order in HOST_PROVIDER_PATTERNS makes # dashscope win, which is the correct (more-specific) routing. assert pick_provider_from_base_url("https://dashscope.aliyuncs.com/v1") == "dashscope" + + +# --------------------------------------------------------------------------- +# W11 V1.5 - request/latency metrics wiring +# --------------------------------------------------------------------------- + + +def test_suggest_capacity_records_requests_and_latency_on_catalog_match(): + """Spec L706-708: every suggest_capacity invocation records one entry in + requests_total (labelled by match_kind, model_type, provider) and one + sample in latency_ms (labelled by match_kind, provider). A successful + catalog match must fire the recorder exactly once with the right labels. + """ + counter = mock.MagicMock() + histogram = mock.MagicMock() + + with mock.patch.object(suggestion_module, "_capacity_suggestion_requests_total", counter), \ + mock.patch.object(suggestion_module, "_capacity_suggestion_latency_ms", histogram): + result = suggest_capacity( + model_name="gpt-4o", + base_url="https://api.openai.com/v1", + model_type="llm", + catalog=CATALOG, + ) + + assert result.match_kind == CapacitySuggestionMatchKind.CATALOG_EXACT + counter.add.assert_called_once() + add_args = counter.add.call_args + assert add_args.args[0] == 1 + assert add_args.args[1] == { + "match_kind": "catalog_exact", + "model_type": "llm", + "provider": "openai", + } + histogram.record.assert_called_once() + record_args = histogram.record.call_args + assert record_args.args[0] >= 0 # non-negative duration in ms + assert record_args.args[1] == { + "match_kind": "catalog_exact", + "provider": "openai", + } + + +def test_suggest_capacity_records_none_match_with_unknown_provider_label(): + """When no provider can be inferred the result.suggested_provider is None + and the metric labels fall back to provider='unknown'. Cardinality stays + bounded -- we never emit raw user input as a label. + """ + counter = mock.MagicMock() + histogram = mock.MagicMock() + + with mock.patch.object(suggestion_module, "_capacity_suggestion_requests_total", counter), \ + mock.patch.object(suggestion_module, "_capacity_suggestion_latency_ms", histogram): + result = suggest_capacity( + model_name="unknown-local-model", + base_url="http://localhost:8000/v1", + model_type="llm", + catalog=CATALOG, + ) + + assert result.match_kind == CapacitySuggestionMatchKind.NONE + assert counter.add.call_args.args[1] == { + "match_kind": "none", + "model_type": "llm", + "provider": "unknown", + } + assert histogram.record.call_args.args[1] == { + "match_kind": "none", + "provider": "unknown", + } + + +def test_suggest_capacity_validation_error_does_not_record(): + """A ValueError (model_name required / too long) is a client-shape error + raised before the matcher runs. It must not increment requests_total -- + that counter is for completed evaluations only, and SLO ratios would + otherwise be skewed by client input mistakes. + """ + counter = mock.MagicMock() + histogram = mock.MagicMock() + + with mock.patch.object(suggestion_module, "_capacity_suggestion_requests_total", counter), \ + mock.patch.object(suggestion_module, "_capacity_suggestion_latency_ms", histogram), \ + pytest.raises(ValueError): + suggest_capacity(model_name="", catalog=CATALOG) + + counter.add.assert_not_called() + histogram.record.assert_not_called() + + +def test_suggest_capacity_no_op_when_instruments_disabled(): + """Same OTel-optional guard as the other recorders: if the instruments + are None (OTel not installed in this deployment), suggest_capacity still + returns the correct result without raising. + """ + with mock.patch.object(suggestion_module, "_capacity_suggestion_requests_total", None), \ + mock.patch.object(suggestion_module, "_capacity_suggestion_latency_ms", None): + result = suggest_capacity( + model_name="gpt-4o", + base_url="https://api.openai.com/v1", + model_type="llm", + catalog=CATALOG, + ) + + assert result.match_kind == CapacitySuggestionMatchKind.CATALOG_EXACT diff --git a/test/backend/services/test_model_management_service.py b/test/backend/services/test_model_management_service.py index 9ea88306a..4a15e987d 100644 --- a/test/backend/services/test_model_management_service.py +++ b/test/backend/services/test_model_management_service.py @@ -2023,3 +2023,137 @@ def test_record_capacity_coverage_error_no_op_when_counter_disabled(): with mock.patch.object(svc, "_capacity_suggestion_coverage_errors_total", None): # Should not raise. svc._record_capacity_coverage_error(7, RuntimeError("boom")) + + +# --------------------------------------------------------------------------- +# W11 V1.5 - cross-tenant isolation and accept-signal metrics +# --------------------------------------------------------------------------- + + +def test_get_capacity_coverage_cross_tenant_isolation(): + """Spec L312-322: a bare row in tenant B must not appear in tenant A's + response. The service layer relies on `get_model_records(None, tenant_id)` + for the scoping; this test verifies the contract by routing records by + tenant_id at the mock boundary and asserting both tenants see only their + own bare rows. + """ + svc = import_svc() + + tenant_a_rows = [ + { + "model_id": 11, + "model_repo": "", + "model_name": "tenant-a-bare", + "model_factory": "OpenAI-API-Compatible", + "model_type": "llm", + "context_window_tokens": None, + "max_output_tokens": None, + "max_tokens": 8192, + "base_url": "https://api.tenant-a.example.com/v1", + }, + ] + tenant_b_rows = [ + { + "model_id": 22, + "model_repo": "", + "model_name": "tenant-b-bare", + "model_factory": "OpenAI-API-Compatible", + "model_type": "llm", + "context_window_tokens": None, + "max_output_tokens": None, + "max_tokens": 16384, + "base_url": "https://api.tenant-b.example.com/v1", + }, + ] + + def get_records_by_tenant(_filters, tenant_id): + if tenant_id == "tenant-a": + return list(tenant_a_rows) + if tenant_id == "tenant-b": + return list(tenant_b_rows) + return [] + + with mock.patch.object(svc, "get_model_records", side_effect=get_records_by_tenant), \ + mock.patch.object(svc, "_capacity_suggestion_available", return_value=False): + result_a = svc.get_capacity_coverage("tenant-a") + result_b = svc.get_capacity_coverage("tenant-b") + + assert [m["model_id"] for m in result_a["bare_models"]] == [11] + assert [m["model_id"] for m in result_b["bare_models"]] == [22] + # Neither tenant must see the other's model_id anywhere in its payload. + assert all(m["model_id"] != 22 for m in result_a["bare_models"]) + assert all(m["model_id"] != 11 for m in result_b["bare_models"]) + assert result_a["total_llm_vlm"] == 1 + assert result_b["total_llm_vlm"] == 1 + + +def test_pop_capacity_accept_signal_extracts_and_strips(): + """The frontend ships accepted_suggestion_match_kind / + accepted_capability_profile_version on save. Spec L500-502 marks them + audit-only; the app layer must strip them before the dict reaches the + DB write, and return the popped values so the recorder can label the + counter. + """ + svc = import_svc() + + payload = { + "model_name": "gpt-4o", + "model_factory": "openai", + "context_window_tokens": 128000, + "max_output_tokens": 16384, + "capacity_source": "operator", + "accepted_suggestion_match_kind": "catalog_exact", + "accepted_capability_profile_version": "openai/gpt-4o@1", + } + + signal = svc.pop_capacity_accept_signal(payload) + + assert signal == { + "match_kind": "catalog_exact", + "capability_profile_version": "openai/gpt-4o@1", + } + # Audit fields must not leak through to DB write. + assert "accepted_suggestion_match_kind" not in payload + assert "accepted_capability_profile_version" not in payload + # Real model fields are untouched. + assert payload["model_name"] == "gpt-4o" + assert payload["context_window_tokens"] == 128000 + + +def test_pop_capacity_accept_signal_returns_none_without_match_kind(): + svc = import_svc() + + # Plain save: no accept fields at all. + assert svc.pop_capacity_accept_signal({"model_name": "x"}) is None + + # match_kind missing but version present -> still treated as "no accept" + # since match_kind is the metric-label key and version alone is meaningless. + only_version = {"accepted_capability_profile_version": "x/y@1"} + assert svc.pop_capacity_accept_signal(only_version) is None + # The orphan version field is still stripped so it cannot reach the DB. + assert "accepted_capability_profile_version" not in only_version + + +def test_record_capacity_suggestion_accept_no_op_when_counter_disabled(): + """Same OTel-optional guard as the coverage-errors recorder.""" + svc = import_svc() + + with mock.patch.object(svc, "_capacity_suggestion_accept_total", None): + # Should not raise. + svc._record_capacity_suggestion_accept("catalog_exact", "openai") + + +def test_record_capacity_suggestion_accept_labels_counter(): + """When the counter is wired, the recorder forwards match_kind and a + lower-cased provider label so dashboards can compute per-provider + accept rates without inconsistent casing. + """ + svc = import_svc() + counter = mock.MagicMock() + + with mock.patch.object(svc, "_capacity_suggestion_accept_total", counter): + svc._record_capacity_suggestion_accept("catalog_fuzzy", "DashScope") + + counter.add.assert_called_once_with( + 1, {"match_kind": "catalog_fuzzy", "provider": "dashscope"} + ) From e442a551506e6cdd8ee772b102a1b738a4b8e5ee Mon Sep 17 00:00:00 2001 From: wuyuanfr <18270469842@163.com> Date: Fri, 26 Jun 2026 10:04:20 +0800 Subject: [PATCH 04/31] feat(w11): V1.5 bare-capacity tag + preset selector + permission helper MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Mark bare-capacity LLM/VLM rows in the Manage Models list with the existing yellow "缺容量" / "Missing capacity" tag. Keep the aggregation banner on the Models page as the entry-point signal, but rewrite its copy to hand off to the per-row tag instead of duplicating per-row UI. Auto-fire /suggest-capacity from inside ModelEditDialog whenever it opens on a bare-capacity row, regardless of how the dialog was opened. Expose preset selectors on the capacity panel and ship the model-management permission helper for V1.5 surfaces #2/#3. Per spec line numbers cross-referenced inline: #1 -- per-row tag as visual indicator (spec L143-167): - Both badge sites in ModelDeleteDialog (provider-browser row L1507+ and added-model row L1652+) retain the existing yellow text tag (bg-yellow-100 border-yellow-200 text-yellow-700). We considered a warning-triangle icon and a separate click-target on the badge, then rolled both back: "缺容量"/"Missing capacity" reads as a status at the same glance an icon would, while the existing row onClick already opens the edit dialog -- so a button on the badge added complexity that ModelEditDialog now subsumes internally. - ModelEditDialog derives `isBareCapacityModel` from the loaded model (context_window_tokens or max_output_tokens null) and a single useEffect auto-fires handleSuggestCapacity once on open when the model is bare, the suggestion switch is on, and the form fields needed for the call are present. Any entry path -- row click, future gear-icon shortcut, deep link -- gets the same affordance, so the operator never has to also click "Check" on a bare row. - The deprecated model.dialog.capacityCoverage.{tag, warning, warningWithSuggestion} keys are dropped from en + zh in favour of a single spec-namespaced model.list.capacityWarning.tag key. No per-suggestion variants because the tag is purely a state label; the suggestion handoff happens inside the edit dialog where the green/info Alert carries that nuance instead. #5 -- aggregation banner kept as entry-point signal, copy retuned: - The summary Alert on the Models page (modelConfig.tsx) stays -- per-row tags live inside ModelDeleteDialog which is one click away. Without the banner, users on the Models page have no signal that any row needs attention. - Description copy rewritten so the banner points at the new per-row flow: "Click Manage, then click the warning icon on each affected row to repair." Removes the redundant "edit a marked model" wording. - Warning copy adds an "output token cap is not enforced" clause so the consequence (not just the symptom) is visible at a glance. #4 -- permission helper (spec L167-178): - frontend/lib/auth.ts gains canManageModels(role, isSpeedMode). Allowed roles: SU, ADMIN, DEV, SPEED. USER is excluded so regular agent authors see read-only notices rather than dead repair links. ASSET_OWNER is excluded -- model records are tenant scope, not asset-admin scope. Speed mode bypasses for the single-user dev experience, mirroring how other surfaces (chatHeader, etc.) treat it. - The banner and tag in this commit both live on /models which is already route-gated for non-USER roles, so no in-place gate is needed yet. The helper exists so the V1.5 agent-edit-selector commit (#2) and the dashboard widget commit (#3) consume the same primitive instead of reinventing role parsing. #8 -- preset selectors for context_window / output_reserve / max_output (spec L757-790): - ModelCapacityFields.tsx gains two preset arrays mirroring spec L767-790 verbatim (9 context-window values 4K..1M, 7 output values 256..16K). The context-window list is identical to MAX_TOKEN_OPTIONS in ModelMaxTokensInput; kept as a local constant rather than cross-importing so the two surfaces stay independently editable. - renderNumberInput gains an optional `presetOptions` parameter. When the field has no catalog suggestion yet (per spec L762-765 "when no suggestion exists ... render as preset-capable selector"), the input renders as AutoComplete with the preset list; otherwise it stays a plain numeric Input so an explicit catalog value doesn't get visually buried behind dropdown chrome. - Wired for contextWindowTokens, maxOutputTokens, and defaultOutputReserveTokens. maxOutputTokens reuses the 256..16K list so operators see the same dropdown choices they already see for the reserve field; values above 16K (e.g. GPT-4.1's 32K cap, GLM-5.1's 131K cap) still work via free-text typing through AutoComplete. maxInputTokens keeps plain numeric input -- it is an explicit operator-side limit, not common-preset land. - validateCapacityForm continues to enforce positive integers downstream. i18n delta summary: - DROPPED: model.dialog.capacityCoverage.tag, model.dialog.capacityCoverage.warning, model.dialog.capacityCoverage.warningWithSuggestion - ADDED: model.list.capacityWarning.tag (single state label, no tooltip variants) - REVISED (kept): modelConfig.capacityCoverage.warning + description with new entry-point copy; .manage button label unchanged. Net: 6 files, +148/-77. Typecheck clean (only pre-existing .next/types/validator.ts noise from the unrelated left-nav rename). No backend wire change. Co-Authored-By: Claude Opus 4.7 --- .../components/model/ModelCapacityFields.tsx | 104 +++++++++++++++--- .../components/model/ModelDeleteDialog.tsx | 34 ++---- .../components/model/ModelEditDialog.tsx | 60 +++++++++- frontend/lib/auth.ts | 30 +++++ frontend/public/locales/en/common.json | 10 +- frontend/public/locales/zh/common.json | 10 +- 6 files changed, 192 insertions(+), 56 deletions(-) diff --git a/frontend/app/[locale]/models/components/model/ModelCapacityFields.tsx b/frontend/app/[locale]/models/components/model/ModelCapacityFields.tsx index efe4c8e4a..edfbea597 100644 --- a/frontend/app/[locale]/models/components/model/ModelCapacityFields.tsx +++ b/frontend/app/[locale]/models/components/model/ModelCapacityFields.tsx @@ -1,8 +1,41 @@ -import { Alert, Button, Input, Tag, Tooltip } from "antd"; +import { Alert, AutoComplete, Button, Input, Tag, Tooltip } from "antd"; import { useTranslation } from "react-i18next"; import type { CapacitySuggestion } from "@/types/modelConfig"; +// W11 spec L767-790. Common token-count presets surfaced as a fallback +// preset selector when no catalog suggestion populates the field. The +// values mirror MAX_TOKEN_OPTIONS in ModelMaxTokensInput so the two +// surfaces (legacy max_tokens batch input and capacity panel) offer +// the same dropdown choices. Operators can still type a custom value; +// AutoComplete accepts free numeric input. +const CONTEXT_WINDOW_PRESET_OPTIONS = [ + { value: "4096", label: "4K / 4,096" }, + { value: "8192", label: "8K / 8,192" }, + { value: "16384", label: "16K / 16,384" }, + { value: "32768", label: "32K / 32,768" }, + { value: "65536", label: "64K / 65,536" }, + { value: "131072", label: "128K / 131,072" }, + { value: "204800", label: "200K / 204,800" }, + { value: "262144", label: "256K / 262,144" }, + { value: "1048576", label: "1M / 1,048,576" }, +]; + +// Shared by both default_output_reserve_tokens and max_output_tokens. The +// reserve list maps to spec L782-790 verbatim; reusing it for max_output +// gives operators the same dropdown choices they already see for the +// reserve field. Values above 16K (e.g. GPT-4.1's 32K cap, GLM-5.1's +// 131K cap) still work via free-text typing through AutoComplete. +const OUTPUT_RESERVE_PRESET_OPTIONS = [ + { value: "256", label: "256" }, + { value: "512", label: "512" }, + { value: "1024", label: "1K / 1,024" }, + { value: "2048", label: "2K / 2,048" }, + { value: "4096", label: "4K / 4,096" }, + { value: "8192", label: "8K / 8,192" }, + { value: "16384", label: "16K / 16,384" }, +]; + export type CapacitySource = | "operator" | "profile" @@ -276,18 +309,45 @@ export const ModelCapacityFields = ({ } : {}; + // Per W11 spec L762-765, the context-window and output-reserve fields + // expose a preset selector when no catalog suggestion is available. The + // suggestion-set check is per-field: if the suggestion populated this + // exact field, plain numeric input avoids burying the suggested value + // behind dropdown chrome. Otherwise show the preset list to help + // operators avoid typos like "1280000" instead of "128000". + const suggestionFields = suggestion?.suggestions ?? null; + const fieldHasSuggestion = ( + field: keyof ModelCapacityFormState + ): boolean => { + if (!suggestionFields) return false; + const suggested = (suggestionFields as Record)[field]; + return suggested != null && suggested !== ""; + }; + const renderNumberInput = ( field: keyof ModelCapacityFormState, labelKey: string, - tooltipKey: string - ) => ( -
- + tooltipKey: string, + presetOptions?: { value: string; label: string }[] + ) => { + const showPreset = presetOptions && !fieldHasSuggestion(field); + const inputControl = showPreset ? ( + onChange(field, String(next ?? ""))} + filterOption={(input, option) => + String(option?.label ?? "") + .toLowerCase() + .includes(input.toLowerCase()) || + String(option?.value ?? "").includes(input) + } + > + + + ) : ( onChange(field, event.target.value)} /> -
- ); + ); + return ( +
+ + {inputControl} +
+ ); + }; const content = (
@@ -421,7 +492,8 @@ export const ModelCapacityFields = ({ {renderNumberInput( "contextWindowTokens", "model.dialog.capacity.contextWindowTokens", - "model.dialog.capacity.contextWindowTokens.tooltip" + "model.dialog.capacity.contextWindowTokens.tooltip", + CONTEXT_WINDOW_PRESET_OPTIONS )} {renderNumberInput( "maxInputTokens", @@ -431,7 +503,8 @@ export const ModelCapacityFields = ({ {renderNumberInput( "maxOutputTokens", "model.dialog.capacity.maxOutputTokens", - "model.dialog.capacity.maxOutputTokens.tooltip" + "model.dialog.capacity.maxOutputTokens.tooltip", + OUTPUT_RESERVE_PRESET_OPTIONS )} {/* defaultOutputReserveTokens is rendered in both add and edit modes so newly added rows do not silently fall back to the SDK default at @@ -440,7 +513,8 @@ export const ModelCapacityFields = ({ {renderNumberInput( "defaultOutputReserveTokens", "model.dialog.capacity.defaultOutputReserveTokens", - "model.dialog.capacity.defaultOutputReserveTokens.tooltip" + "model.dialog.capacity.defaultOutputReserveTokens.tooltip", + OUTPUT_RESERVE_PRESET_OPTIONS )}
diff --git a/frontend/app/[locale]/models/components/model/ModelDeleteDialog.tsx b/frontend/app/[locale]/models/components/model/ModelDeleteDialog.tsx index 48d54086c..3722fbe49 100644 --- a/frontend/app/[locale]/models/components/model/ModelDeleteDialog.tsx +++ b/frontend/app/[locale]/models/components/model/ModelDeleteDialog.tsx @@ -1504,20 +1504,10 @@ export const ModelDeleteDialog = ({ {String(providerModel.model_tag)} )} - {isBareCapacity && ( - - - {t("model.dialog.capacityCoverage.tag")} - - + {isBareCapacity && existingModel && ( + + {t("model.list.capacityWarning.tag")} + )}
@@ -1650,19 +1640,9 @@ export const ModelDeleteDialog = ({ {model.displayName || model.name} ({model.name})
{isBareCapacity && ( - - - {t("model.dialog.capacityCoverage.tag")} - - + + {t("model.list.capacityWarning.tag")} + )}
)} {/* Edit model dialog */} + {/* key forces full unmount/remount when model changes, preventing + stale capacitySuggestion state from flashing on the first render + before the [model] effect clears it. Without key, the component + returns null (line 559) but never unmounts, so useState keeps + the previous model's suggestion alive for one render cycle. */} setEditModel(null)} diff --git a/frontend/app/[locale]/models/components/model/ModelEditDialog.tsx b/frontend/app/[locale]/models/components/model/ModelEditDialog.tsx index 3cdba0521..c887259e1 100644 --- a/frontend/app/[locale]/models/components/model/ModelEditDialog.tsx +++ b/frontend/app/[locale]/models/components/model/ModelEditDialog.tsx @@ -91,28 +91,22 @@ export const ModelEditDialog = ({ message: "", }); - // Monotonic request token for /suggest-capacity. Incremented on every - // new call, dialog close, and model change; the async handler compares - // its captured token against the current ref before committing - // setState, so a stale qwen-for-row-A response cannot win over a fresh - // glm-for-row-B response when the user cancels A and immediately edits - // B (the original bug -- previous request was racing the new one and - // sometimes overwriting it after navigation). + // Monotonic request token for /suggest-capacity. Used by manual Check + // clicks: when the operator clicks twice quickly with different inputs, + // the slower response must not overwrite the faster newer one. The + // navigation race (open A, cancel, open B) is handled by the + // key-based remount on the parent (ModelDeleteDialog), so we no longer + // need a separate "reset on close" effect here. const suggestionRequestRef = useRef(0); - // Reset capacity-related state every time the dialog closes. Without - // this, the next open render briefly shows the previous model's - // suggestion before the [model] effect overwrites it, and a slow - // in-flight response from the previous model can also overwrite the - // fresh model's correct result. The ref bump tells any pending - // handleSuggestCapacity to drop its response. - useEffect(() => { - if (isOpen) return; - suggestionRequestRef.current += 1; - setCapacitySuggestion(null); - setAcceptedCapacitySuggestion(null); - setCheckingCapacitySuggestion(false); - }, [isOpen]); + // Auto-suggest fires at most once per dialog instance. With the parent's + // key remount, "per instance" == "per model", which is the desired + // semantic. The fired-once guard is needed because the auto-suggest + // effect depends on `form.name` and `form.url`, which change as the + // [model] effect populates the form on first mount AND every time the + // operator types in those inputs -- only the populate transition + // should trigger an API call. + const autoSuggestFiredRef = useRef(false); useEffect(() => { if (model) { @@ -248,19 +242,36 @@ export const ModelEditDialog = ({ // click, badge click, future gear-icon shortcut) gets the same // affordance. No-op if the model already has capacity, the suggestion // switch is off, or required form fields are missing at open time. + // + // form.name and form.url are in the dependency list because the + // [model] effect above populates them asynchronously after this + // component mounts. With the parent's key remount, the first render + // here has form.name == "" / form.url == "", so canSuggestCapacity() + // is false and we cannot fire yet. The [model] effect's setForm + // then re-renders with populated values, this effect re-runs, and + // canSuggestCapacity() finally returns true. The autoSuggestFiredRef + // guards against re-firing later when the operator types into name + // or url -- only the populate transition should kick off auto-suggest. const isBareCapacityModel = Boolean( model && supportsCapacityFields && (!model.contextWindowTokens || !model.maxOutputTokens) ); useEffect(() => { + if (autoSuggestFiredRef.current) return; if (!isOpen || !isBareCapacityModel) return; if (!capacitySuggestionEnabled) return; if (!canSuggestCapacity()) return; + autoSuggestFiredRef.current = true; handleSuggestCapacity(); - // Fire once per open; do not re-fire on re-render. // eslint-disable-next-line react-hooks/exhaustive-deps - }, [isOpen, isBareCapacityModel]); + }, [ + isOpen, + isBareCapacityModel, + capacitySuggestionEnabled, + form.name, + form.url, + ]); const isFormValid = () => { if ( From 00c8c62dd6a2d65c5e7fa92d32ba4e2ac1e31ccf Mon Sep 17 00:00:00 2001 From: wuyuanfr <18270469842@163.com> Date: Fri, 26 Jun 2026 15:52:38 +0800 Subject: [PATCH 06/31] feat(w11): V1.5 bare-capacity surfaces + dual legacy hint + accept-signal SLO wiring Closes Week N+2/N+3 punch list for W11 V1.5. UI surfaces (#2 + #3): - Agent-edit model selector: bare-capacity subtitle on dropdown items and a non-blocking form Alert above Save when a bare model is picked. Admin/dev/su/speed see "fix in Model Management", others see "ask administrator". Permission gate via canManageModels(). - ModelCapacityCoverageWidget renders at top of resource-manage Models tab; hides on bare_count=0 or non-admin. Shared useCapacityCoverage hook backs both the widget and the agent-edit selector. Legacy max_tokens hint (#7): - Dual-target buttons (Fill into Context Window / Fill into Max Output) with heuristic ordering: values >= 16384 lead with Context Window, values < 16384 lead with Max Output. Each button hides once its target field is filled; the alert hides once both are filled. Old single-button "Apply as max_output_tokens" was reversed semantically: legacy max_tokens columns from the pre-W1 era were more often the provider context window, but at small values they really were the output cap -- the operator picks. Constructor audit (#16): - test_model_consts pins ModelRequest and ModelCapacitySuggestionResponse field sets so a silent rename trips a test. - test_prepare_model_dict_persists_operator_capacity now pins all 7 capacity fields + canonical model_factory/model_name in the ModelRequest constructor kwargs. SLO data flow fix: - Frontend was never sending the W11 accept signal, so model_capacity_suggestion_accept_total stayed at zero and the "95% accepted suggestions hit profile" SLO could not be computed. buildCapacityRequestBody now threads acceptedSuggestionMatchKind + acceptedCapabilityProfileVersion; ModelAddDialog and ModelEditDialog include them in save payloads when the operator clicked "Use suggestion". - Two new app-layer integration tests pin: (1) accept signal present -> recorder fires with correct labels and audit fields are stripped from the service-layer payload; (2) plain save -> recorder does not fire (so accept_total stays aligned with dispatch_profile_hit_total as the SLO denominator). i18n: full spec keyset present in both en/zh (model.list.capacityWarning.*, agent.modelSelector.bareCapacity.*, dashboard.capacityCoverage.*, model.dialog.capacity.suggestion.*, model.dialog.capacity.preset.*, model.dialog.capacity.legacyMaxTokens.*). Co-Authored-By: Claude Opus 4.7 --- .../agentInfo/AgentGenerateDetail.tsx | 102 +++++++++++++++--- .../components/model/ModelAddDialog.tsx | 18 ++++ .../components/model/ModelCapacityFields.tsx | 81 ++++++++++---- .../components/model/ModelEditDialog.tsx | 49 +++++++-- .../resources/ModelCapacityCoverageWidget.tsx | 66 ++++++++++++ .../components/resources/ModelList.tsx | 2 + frontend/hooks/model/useCapacityCoverage.ts | 52 +++++++++ frontend/public/locales/en/common.json | 24 ++++- frontend/public/locales/zh/common.json | 24 ++++- frontend/services/modelService.ts | 22 ++++ test/backend/app/test_model_managment_app.py | 60 +++++++++++ .../services/test_model_provider_service.py | 13 ++- test/backend/test_model_consts.py | 50 +++++++++ 13 files changed, 511 insertions(+), 52 deletions(-) create mode 100644 frontend/app/[locale]/resource-manage/components/resources/ModelCapacityCoverageWidget.tsx create mode 100644 frontend/hooks/model/useCapacityCoverage.ts diff --git a/frontend/app/[locale]/agents/components/agentInfo/AgentGenerateDetail.tsx b/frontend/app/[locale]/agents/components/agentInfo/AgentGenerateDetail.tsx index e07204cab..cecc02d2b 100644 --- a/frontend/app/[locale]/agents/components/agentInfo/AgentGenerateDetail.tsx +++ b/frontend/app/[locale]/agents/components/agentInfo/AgentGenerateDetail.tsx @@ -33,6 +33,8 @@ import { useAgentGeneration } from "@/hooks/agent/useAgentGeneration"; import { useAuthorizationContext } from "@/components/providers/AuthorizationProvider"; import { useDeployment } from "@/components/providers/deploymentProvider"; import { useModelList } from "@/hooks/model/useModelList"; +import { useCapacityCoverage } from "@/hooks/model/useCapacityCoverage"; +import { canManageModels } from "@/lib/auth"; import { useConfig } from "@/hooks/useConfig"; import { useGroupList, useGroupDetails } from "@/hooks/group/useGroupList"; import { usePromptTemplateList } from "@/hooks/agent/usePromptTemplateList"; @@ -71,6 +73,8 @@ export default function AgentGenerateDetail({}) { const { defaultLlmModelConfig } = useConfig(); const { availableLlmModels, models, isLoading: loadingModels } = useModelList(); + const { bareModelIds: bareCapacityModelIds } = useCapacityCoverage(); + const userCanManageModels = canManageModels(user?.role, isSpeedMode); const { templates: promptTemplates, isLoading: loadingPromptTemplates, @@ -537,11 +541,52 @@ export default function AgentGenerateDetail({}) { }; // Select options for available models - const modelSelectOptions = availableLlmModels.map((model) => ({ - value: model.displayName || model.name, - label: model.displayName || model.name, - disabled: model.connect_status !== "available", - })); + // Bare-capacity rows (`context_window_tokens IS NULL OR max_output_tokens IS + // NULL`) stay selectable per W11 spec; the warning is the inline subtitle + // and the non-blocking form notice below. + const modelSelectOptions = availableLlmModels.map((model) => { + const isBare = bareCapacityModelIds.has(model.id); + const displayLabel = model.displayName || model.name; + return { + value: displayLabel, + label: isBare ? ( + + {displayLabel} + + {t("agent.modelSelector.bareCapacity.subtitle")} + + + ) : ( + displayLabel + ), + disabled: model.connect_status !== "available", + }; + }); + + const isSelectedMainModelBare = Boolean( + selectedMainAgentModel && bareCapacityModelIds.has(selectedMainAgentModel.id) + ); + + const selectedBusinessLogicModel = useMemo(() => { + const businessName = + form.getFieldValue("businessLogicModelName") || + editedAgent.business_logic_model_name || + ""; + if (!businessName) return undefined; + return availableLlmModels.find( + (m) => m.displayName === businessName || m.name === businessName + ); + }, [ + availableLlmModels, + editedAgent.business_logic_model_name, + form, + forceRefreshKey, + ]); + + const isSelectedBusinessLogicModelBare = Boolean( + selectedBusinessLogicModel && + bareCapacityModelIds.has(selectedBusinessLogicModel.id) + ); const promptTemplateSelectOptions = useMemo(() => { const options = promptTemplates.map((template) => ({ @@ -688,6 +733,23 @@ export default function AgentGenerateDetail({}) { + {(isSelectedMainModelBare || isSelectedBusinessLogicModelBare) && ( + + )} @@ -879,15 +941,27 @@ export default function AgentGenerateDetail({}) { }); }} > - {availableLlmModels.map((model) => ( - - {model.displayName} - - ))} + {availableLlmModels.map((model) => { + const isBare = bareCapacityModelIds.has(model.id); + return ( + + {isBare ? ( + + {model.displayName} + + {t("agent.modelSelector.bareCapacity.subtitle")} + + + ) : ( + model.displayName + )} + + ); + })} diff --git a/frontend/app/[locale]/models/components/model/ModelAddDialog.tsx b/frontend/app/[locale]/models/components/model/ModelAddDialog.tsx index 5f3c03535..9473bf6f6 100644 --- a/frontend/app/[locale]/models/components/model/ModelAddDialog.tsx +++ b/frontend/app/[locale]/models/components/model/ModelAddDialog.tsx @@ -1145,6 +1145,22 @@ export const ModelAddDialog = ({ maxTokensValue = 0; } + // W11 accept-signal: pop the audit fields from acceptedCapacitySuggestion + // so the app layer can label model_capacity_suggestion_accept_total. + // Emitting once per save keeps the counter aligned with the + // dispatch_profile_hit_total denominator (spec L709-710). + const acceptSignalKwargs = acceptedCapacitySuggestion + ? { + acceptedSuggestionMatchKind: acceptedCapacitySuggestion.matchKind, + ...(acceptedCapacitySuggestion.capabilityProfileVersion + ? { + acceptedCapabilityProfileVersion: + acceptedCapacitySuggestion.capabilityProfileVersion, + } + : {}), + } + : {}; + // Add to the backend service - use manage interface if tenantId is provided if (tenantId) { const modelParams: any = { @@ -1157,6 +1173,7 @@ export const ModelAddDialog = ({ displayName: form.displayName || form.name, modelFactory: form.provider, ...(supportsCapacityFields ? buildCapacityPayload(form) : {}), + ...acceptSignalKwargs, }; // Add STT specific fields @@ -1199,6 +1216,7 @@ export const ModelAddDialog = ({ displayName: form.displayName || form.name, modelFactory: form.provider, ...(supportsCapacityFields ? buildCapacityPayload(form) : {}), + ...acceptSignalKwargs, }; // Add STT specific fields diff --git a/frontend/app/[locale]/models/components/model/ModelCapacityFields.tsx b/frontend/app/[locale]/models/components/model/ModelCapacityFields.tsx index edfbea597..712ab62fd 100644 --- a/frontend/app/[locale]/models/components/model/ModelCapacityFields.tsx +++ b/frontend/app/[locale]/models/components/model/ModelCapacityFields.tsx @@ -1,4 +1,4 @@ -import { Alert, AutoComplete, Button, Input, Tag, Tooltip } from "antd"; +import { Alert, AutoComplete, Button, Input, Space, Tag, Tooltip } from "antd"; import { useTranslation } from "react-i18next"; import type { CapacitySuggestion } from "@/types/modelConfig"; @@ -281,13 +281,30 @@ export const ModelCapacityFields = ({ }: ModelCapacityFieldsProps) => { const { t } = useTranslation(); - // Show the actionable legacy-value prompt only while the input is still - // empty -- once the user applies (or types their own value), the prompt - // disappears so we don't keep nagging. + // Legacy max_tokens can mean either thing -- before W1 split capacity, + // operators sometimes typed the provider context window there + // (128000, 32768, ...) and sometimes the per-call output cap (4096, + // 8192, ...). We can't tell from the value alone, so we surface both + // target fields and let the operator pick. The button order is the + // only heuristic: values >= LEGACY_CONTEXT_WINDOW_THRESHOLD are + // far more likely to be context windows (no real model caps output + // at 32K+ in practice), so the "Apply as Context Window" button leads; + // below the threshold the "Apply as Max Output" button leads. + // + // Each button is independently gated by its target field being empty + // -- once the operator commits a value to that column we stop nagging + // about it. When both fields are filled the whole alert hides. + const LEGACY_CONTEXT_WINDOW_THRESHOLD = 16_384; + const legacyValuePositive = + legacyMaxTokensCandidate !== undefined && legacyMaxTokensCandidate > 0; + const canApplyAsContextWindow = + legacyValuePositive && value.contextWindowTokens.trim() === ""; + const canApplyAsMaxOutput = + legacyValuePositive && value.maxOutputTokens.trim() === ""; const showLegacyMaxTokensPrompt = - legacyMaxTokensCandidate !== undefined && - legacyMaxTokensCandidate > 0 && - value.maxOutputTokens.trim() === ""; + canApplyAsContextWindow || canApplyAsMaxOutput; + const contextWindowIsRecommended = + (legacyMaxTokensCandidate ?? 0) >= LEGACY_CONTEXT_WINDOW_THRESHOLD; const source = capacitySource || ""; const sourceColor = SOURCE_COLORS[source] || "default"; @@ -392,25 +409,43 @@ export const ModelCapacityFields = ({ - onChange( - "maxOutputTokens", - String(legacyMaxTokensCandidate) - ) - } - > - {t("model.dialog.capacity.legacyMaxTokens.apply", { - defaultValue: "Apply", + + {(contextWindowIsRecommended + ? ["context", "output"] + : ["output", "context"] + ).map((target, idx) => { + if (target === "context" && !canApplyAsContextWindow) { + return null; + } + if (target === "output" && !canApplyAsMaxOutput) { + return null; + } + const labelKey = + target === "context" + ? "model.dialog.capacity.legacyMaxTokens.applyAsContext" + : "model.dialog.capacity.legacyMaxTokens.applyAsOutput"; + const fieldName = + target === "context" + ? "contextWindowTokens" + : "maxOutputTokens"; + return ( + + ); })} - + } /> ) : showDeprecatedMaxTokensWarning ? ( diff --git a/frontend/app/[locale]/models/components/model/ModelEditDialog.tsx b/frontend/app/[locale]/models/components/model/ModelEditDialog.tsx index c887259e1..e2bd5b1e2 100644 --- a/frontend/app/[locale]/models/components/model/ModelEditDialog.tsx +++ b/frontend/app/[locale]/models/components/model/ModelEditDialog.tsx @@ -459,6 +459,18 @@ export const ModelEditDialog = ({ : undefined : undefined, ...(supportsCapacityFields ? buildCapacityPayload(form) : {}), + ...(acceptedCapacitySuggestion + ? { + acceptedSuggestionMatchKind: + acceptedCapacitySuggestion.matchKind, + ...(acceptedCapacitySuggestion.capabilityProfileVersion + ? { + acceptedCapabilityProfileVersion: + acceptedCapacitySuggestion.capabilityProfileVersion, + } + : {}), + } + : {}), }); } else { await modelService.updateSingleModel({ @@ -504,6 +516,18 @@ export const ModelEditDialog = ({ } : {}), ...(supportsCapacityFields ? buildCapacityPayload(form) : {}), + ...(acceptedCapacitySuggestion + ? { + acceptedSuggestionMatchKind: + acceptedCapacitySuggestion.matchKind, + ...(acceptedCapacitySuggestion.capabilityProfileVersion + ? { + acceptedCapabilityProfileVersion: + acceptedCapacitySuggestion.capabilityProfileVersion, + } + : {}), + } + : {}), }); } @@ -701,17 +725,22 @@ export const ModelEditDialog = ({ applyCapacitySuggestion(capacitySuggestion) } // Legacy max_tokens is now surfaced via the actionable - // legacyMaxTokensCandidate prompt (no more silent promote in - // capacityFormFromModel). Keep the plain deprecation banner - // fallback for the rare case where the record has neither - // column populated, so users still see the migration nudge. + // legacyMaxTokensCandidate prompt with two-target buttons + // (Context Window vs Max Output). The prompt is offered while + // EITHER target field is still empty -- ModelCapacityFields + // hides individual buttons once that column is filled, and the + // whole alert disappears once both are filled. The plain + // deprecation banner only kicks in if both targets are filled + // but the legacy column still has a value. showDeprecatedMaxTokensWarning={ Boolean(model.maxTokens) && - !model.maxOutputTokens && - !form.maxOutputTokens + Boolean(model.contextWindowTokens || form.contextWindowTokens) && + Boolean(model.maxOutputTokens || form.maxOutputTokens) } legacyMaxTokensCandidate={ - model.maxOutputTokens ? undefined : model.maxTokens + model.contextWindowTokens && model.maxOutputTokens + ? undefined + : model.maxTokens } />
@@ -1089,11 +1118,11 @@ export const ProviderConfigEditDialog = ({ // context_window/max_output optional; DEFAULT_* substitute at save. showDeprecatedMaxTokensWarning={ Boolean(initialMaxTokens) && - !initialCapacity?.maxOutputTokens && - !capacityForm.maxOutputTokens + Boolean(initialCapacity?.contextWindowTokens || capacityForm.contextWindowTokens) && + Boolean(initialCapacity?.maxOutputTokens || capacityForm.maxOutputTokens) } legacyMaxTokensCandidate={ - initialCapacity?.maxOutputTokens + initialCapacity?.contextWindowTokens && initialCapacity?.maxOutputTokens ? undefined : initialCapacity?.maxTokens } diff --git a/frontend/app/[locale]/resource-manage/components/resources/ModelCapacityCoverageWidget.tsx b/frontend/app/[locale]/resource-manage/components/resources/ModelCapacityCoverageWidget.tsx new file mode 100644 index 000000000..783c48667 --- /dev/null +++ b/frontend/app/[locale]/resource-manage/components/resources/ModelCapacityCoverageWidget.tsx @@ -0,0 +1,66 @@ +"use client"; + +import React from "react"; +import { useTranslation } from "react-i18next"; +import { Card, Button, Skeleton, Flex } from "antd"; +import { AlertTriangle } from "lucide-react"; + +import { useCapacityCoverage } from "@/hooks/model/useCapacityCoverage"; +import { useAuthorizationContext } from "@/components/providers/AuthorizationProvider"; +import { useDeployment } from "@/components/providers/deploymentProvider"; +import { canManageModels } from "@/lib/auth"; + +interface Props { + onViewAll?: () => void; +} + +export default function ModelCapacityCoverageWidget({ onViewAll }: Props) { + const { t } = useTranslation("common"); + const { user } = useAuthorizationContext(); + const { isSpeedMode } = useDeployment(); + const visibleToOperator = canManageModels(user?.role, isSpeedMode); + + const { coverage, isLoading } = useCapacityCoverage({ + enabled: visibleToOperator, + }); + + if (!visibleToOperator) return null; + if (isLoading) { + return ( + + + + ); + } + if (!coverage || coverage.bareCount === 0) return null; + + return ( + + + + + + + {t("dashboard.capacityCoverage.title")} + + + {t("dashboard.capacityCoverage.subtitle", { + bareCount: coverage.bareCount, + total: coverage.totalLlmVlm, + })} + + + + {onViewAll && ( + + )} + + + ); +} diff --git a/frontend/app/[locale]/resource-manage/components/resources/ModelList.tsx b/frontend/app/[locale]/resource-manage/components/resources/ModelList.tsx index 6715852f7..11ff72c97 100644 --- a/frontend/app/[locale]/resource-manage/components/resources/ModelList.tsx +++ b/frontend/app/[locale]/resource-manage/components/resources/ModelList.tsx @@ -15,6 +15,7 @@ import type { ModelMonitoringItem } from "@/types/monitoring"; import { MODEL_TYPES } from "@/const/modelConfig"; import { ModelAddDialog } from "../../../models/components/model/ModelAddDialog"; import { ModelEditDialog } from "../../../models/components/model/ModelEditDialog"; +import ModelCapacityCoverageWidget from "./ModelCapacityCoverageWidget"; import { CheckCircle, CircleSlash, XCircle, CircleEllipsis, CircleHelp } from "lucide-react"; interface UnifiedModelRow extends ModelOption { @@ -361,6 +362,7 @@ export default function ModelList({ tenantId }: { tenantId: string | null }) { return (
+
=> + modelService.getCapacityCoverage(), + staleTime: options?.staleTime ?? 60_000, + enabled: options?.enabled ?? true, + }); + + const coverage = query.data ?? EMPTY_COVERAGE; + + const bareModelIds = useMemo( + () => new Set(coverage.bareModels.map((m) => m.modelId)), + [coverage] + ); + + const suggestionAvailableModelIds = useMemo( + () => + new Set( + coverage.bareModels + .filter((m) => m.suggestionAvailable) + .map((m) => m.modelId) + ), + [coverage] + ); + + return { + ...query, + coverage, + bareModelIds, + suggestionAvailableModelIds, + invalidate: () => + queryClient.invalidateQueries({ queryKey: ["modelCapacityCoverage"] }), + }; +} diff --git a/frontend/public/locales/en/common.json b/frontend/public/locales/en/common.json index 2f8eed214..375a80a9e 100644 --- a/frontend/public/locales/en/common.json +++ b/frontend/public/locales/en/common.json @@ -852,8 +852,8 @@ "model.dialog.capacity.error.reserveExceedsOutput": "Output reserve cannot exceed max output tokens.", "model.dialog.capacity.error.requiredMissing": "Context window and max input tokens are required.", "model.dialog.capacity.deprecatedMaxTokens": "max_tokens is deprecated; use max_output_tokens.", - "model.dialog.capacity.legacyMaxTokensDetected": "Detected legacy max_tokens = {{value}}. Apply it as max_output_tokens?", - "model.dialog.capacity.legacyMaxTokens.apply": "Apply", + "model.dialog.capacity.legacyMaxTokens.applyAsContext": "Fill into Context Window", + "model.dialog.capacity.legacyMaxTokens.applyAsOutput": "Fill into Max Output", "model.dialog.capacity.source.operator": "Operator", "model.dialog.capacity.source.profile": "Profile", "model.dialog.capacity.source.provider_candidate": "Provider Candidate", @@ -876,6 +876,26 @@ "model.dialog.capacity.suggestion.confidence.medium": "Medium confidence", "model.dialog.capacity.suggestion.confidence.low": "Low confidence", "model.list.capacityWarning.tag": "Missing capacity", + "model.list.capacityWarning.badgeTooltip": "Output token cap is not enforced for this model. Click to fill capacity values now.", + "model.list.capacityWarning.tooltipAction": "Open capacity panel", + "agent.modelSelector.bareCapacity.subtitle": "Output cap not enforced — configure capacity in Model Management.", + "agent.modelSelector.bareCapacity.formNotice": "The selected model \"{{modelName}}\" has no capacity configured. The agent will run, but output-token enforcement and budget consistency checks are off until capacity is set in Model Management.", + "agent.modelSelector.bareCapacity.formNoticeNoPermission": "The selected model \"{{modelName}}\" has no capacity configured. The agent will run, but output-token enforcement is off. Ask a model administrator to configure capacity for this model.", + "dashboard.capacityCoverage.title": "Model capacity coverage", + "dashboard.capacityCoverage.subtitle": "{{bareCount}} of {{total}} LLM/VLM models are missing capacity values. Output-token enforcement is off for those models.", + "dashboard.capacityCoverage.viewAll": "View all", + "model.dialog.capacity.suggestion.matchExact": "Catalog exact match", + "model.dialog.capacity.suggestion.matchFuzzy": "Catalog fuzzy match", + "model.dialog.capacity.suggestion.matchProviderDiscovery": "Provider-discovered capacity", + "model.dialog.capacity.suggestion.useSuggestion": "Use suggestion", + "model.dialog.capacity.suggestion.canonicalName": "Canonical model name: {{name}}", + "model.dialog.capacity.suggestion.candidateWarning": "Multiple candidates matched; the saved canonical name may need review.", + "model.dialog.capacity.suggestion.profileMissWarning": "Saving without the canonical model name means runtime may not claim profile capacity until W1 exact lookup succeeds.", + "model.dialog.capacity.suggestion.toggle": "Suggest capacity", + "model.dialog.capacity.preset.custom": "Custom value", + "model.dialog.capacity.preset.contextWindow": "Context window preset", + "model.dialog.capacity.preset.outputReserve": "Output reserve preset", + "model.dialog.capacity.legacyMaxTokensHint": "Legacy max_tokens is {{maxTokens}}. This value may be either the model's context window (typical for 16K and above) or the per-call max output cap (typical for 4K and below). Pick the field that matches the model's actual capability and save.", "model.dialog.capacity.batchDefault.title": "Batch default capacity", "model.dialog.capacity.batchDefault.hint": "Values entered here apply as the default capacity for every LLM/VLM model in this batch import. Click the gear icon on a row to override a specific model.", "model.dialog.batch.requireRowCapacity": "Some enabled rows are missing context window or max output tokens. Open the gear icon to fill them in before confirming.", diff --git a/frontend/public/locales/zh/common.json b/frontend/public/locales/zh/common.json index bcf6bcc54..230a731c3 100644 --- a/frontend/public/locales/zh/common.json +++ b/frontend/public/locales/zh/common.json @@ -823,8 +823,8 @@ "model.dialog.capacity.error.reserveExceedsOutput": "输出预留Token数不能超过最大输出Token数。", "model.dialog.capacity.error.requiredMissing": "上下文窗口和最大输入Token数为必填项。", "model.dialog.capacity.deprecatedMaxTokens": "max_tokens 已废弃,请使用 max_output_tokens。", - "model.dialog.capacity.legacyMaxTokensDetected": "检测到旧的「最大Tokens数」为 {{value}},是否填入最大输出Token数?", - "model.dialog.capacity.legacyMaxTokens.apply": "应用", + "model.dialog.capacity.legacyMaxTokens.applyAsContext": "填入「上下文窗口」", + "model.dialog.capacity.legacyMaxTokens.applyAsOutput": "填入「最大输出」", "model.dialog.capacity.source.operator": "人工配置", "model.dialog.capacity.source.profile": "能力档案", "model.dialog.capacity.source.provider_candidate": "供应商候选", @@ -847,6 +847,26 @@ "model.dialog.capacity.suggestion.confidence.medium": "中置信度", "model.dialog.capacity.suggestion.confidence.low": "低置信度", "model.list.capacityWarning.tag": "缺容量", + "model.list.capacityWarning.badgeTooltip": "该模型未启用输出 Token 限额。点击此处补全容量配置。", + "model.list.capacityWarning.tooltipAction": "打开容量面板", + "agent.modelSelector.bareCapacity.subtitle": "输出限额未启用 — 请在模型管理中补全容量。", + "agent.modelSelector.bareCapacity.formNotice": "所选模型 \"{{modelName}}\" 尚未配置容量。Agent 可正常运行,但输出 Token 限额和预算一致性校验关闭,直到在「模型管理」中补全容量。", + "agent.modelSelector.bareCapacity.formNoticeNoPermission": "所选模型 \"{{modelName}}\" 尚未配置容量。Agent 可正常运行,但输出 Token 限额关闭。请联系模型管理员补全该模型的容量配置。", + "dashboard.capacityCoverage.title": "模型容量覆盖", + "dashboard.capacityCoverage.subtitle": "{{total}} 个 LLM/VLM 模型中有 {{bareCount}} 个缺少容量字段。这些模型的输出 Token 限额未启用。", + "dashboard.capacityCoverage.viewAll": "查看全部", + "model.dialog.capacity.suggestion.matchExact": "目录精确匹配", + "model.dialog.capacity.suggestion.matchFuzzy": "目录模糊匹配", + "model.dialog.capacity.suggestion.matchProviderDiscovery": "供应商发现", + "model.dialog.capacity.suggestion.useSuggestion": "使用建议值", + "model.dialog.capacity.suggestion.canonicalName": "规范化模型名:{{name}}", + "model.dialog.capacity.suggestion.candidateWarning": "存在多个候选匹配,保存前请确认规范化名称。", + "model.dialog.capacity.suggestion.profileMissWarning": "未保存规范化模型名时,运行期可能无法命中能力档案,直到 W1 精确查找命中。", + "model.dialog.capacity.suggestion.toggle": "建议容量", + "model.dialog.capacity.preset.custom": "自定义值", + "model.dialog.capacity.preset.contextWindow": "上下文窗口预设", + "model.dialog.capacity.preset.outputReserve": "输出预留预设", + "model.dialog.capacity.legacyMaxTokensHint": "历史 max_tokens 为 {{maxTokens}}。该值可能是模型的上下文窗口(16K 及以上常见),也可能是单次最大输出(4K 及以下常见)。请根据模型实际能力选择填入哪个字段。", "model.dialog.capacity.batchDefault.title": "批量默认容量", "model.dialog.capacity.batchDefault.hint": "此处填写的数值将作为本次批量导入所有 LLM/VLM 模型的默认容量。如需为某个模型单独设置,请点击对应行的⚙图标覆盖。", "model.dialog.batch.requireRowCapacity": "存在已打开开关的模型缺少上下文窗口或最大输出Token数,请点击对应行的⚙图标补全后再确认。", diff --git a/frontend/services/modelService.ts b/frontend/services/modelService.ts index d054a9274..66246cb81 100644 --- a/frontend/services/modelService.ts +++ b/frontend/services/modelService.ts @@ -43,6 +43,8 @@ const buildCapacityRequestBody = (model: { defaultOutputReserveTokens?: number; tokenizerFamily?: string; capacitySource?: string; + acceptedSuggestionMatchKind?: string; + acceptedCapabilityProfileVersion?: string; }) => ({ ...(model.contextWindowTokens !== undefined ? { context_window_tokens: model.contextWindowTokens } @@ -62,6 +64,18 @@ const buildCapacityRequestBody = (model: { ...(model.capacitySource !== undefined ? { capacity_source: model.capacitySource } : {}), + // W11 accept-signal: audit-only fields the app layer pops before the + // service write so model_capacity_suggestion_accept_total can count + // accepted catalog matches. + ...(model.acceptedSuggestionMatchKind !== undefined + ? { accepted_suggestion_match_kind: model.acceptedSuggestionMatchKind } + : {}), + ...(model.acceptedCapabilityProfileVersion !== undefined + ? { + accepted_capability_profile_version: + model.acceptedCapabilityProfileVersion, + } + : {}), }); const mapCapacitySuggestionFromApi = ( @@ -198,6 +212,8 @@ export const modelService = { defaultOutputReserveTokens?: number; tokenizerFamily?: string; capacitySource?: string; + acceptedSuggestionMatchKind?: string; + acceptedCapabilityProfileVersion?: string; }): Promise => { try { const requestBody: any = { @@ -420,6 +436,8 @@ export const modelService = { defaultOutputReserveTokens?: number; tokenizerFamily?: string; capacitySource?: string; + acceptedSuggestionMatchKind?: string; + acceptedCapabilityProfileVersion?: string; }): Promise => { try { const response = await fetch( @@ -923,6 +941,8 @@ export const modelService = { defaultOutputReserveTokens?: number; tokenizerFamily?: string; capacitySource?: string; + acceptedSuggestionMatchKind?: string; + acceptedCapabilityProfileVersion?: string; }): Promise => { try { const requestBody: any = { @@ -1005,6 +1025,8 @@ export const modelService = { defaultOutputReserveTokens?: number; tokenizerFamily?: string; capacitySource?: string; + acceptedSuggestionMatchKind?: string; + acceptedCapabilityProfileVersion?: string; }): Promise => { try { const response = await fetch( diff --git a/test/backend/app/test_model_managment_app.py b/test/backend/app/test_model_managment_app.py index cbdc04c15..38300e988 100644 --- a/test/backend/app/test_model_managment_app.py +++ b/test/backend/app/test_model_managment_app.py @@ -290,6 +290,66 @@ async def _create(*args, **kwargs): mock_create.assert_called_once() +@pytest.mark.asyncio +async def test_create_model_records_accept_signal_when_present(client, auth_header, user_credentials, sample_model_data, mocker): + """End-to-end SLO data-flow check: when the frontend ships the W11 accept + signal on a successful save, the app layer must (1) strip the audit-only + fields before the DB write, and (2) call the metric recorder so + model_capacity_suggestion_accept_total increments. Spec L709-710. + """ + mocker.patch('backend.apps.model_managment_app.get_current_user_id', return_value=user_credentials) + + async def _create(*args, **kwargs): + return None + + mock_create = mocker.patch('backend.apps.model_managment_app.create_model_for_tenant', side_effect=_create) + mock_record = mocker.patch('backend.apps.model_managment_app._record_capacity_suggestion_accept') + + payload = { + **sample_model_data, + "context_window_tokens": 128000, + "max_output_tokens": 16384, + "capacity_source": "operator", + "accepted_suggestion_match_kind": "catalog_exact", + "accepted_capability_profile_version": "openai/gpt-4o@1", + } + response = client.post("/model/create", json=payload, headers=auth_header) + + assert response.status_code == HTTPStatus.OK + + # Audit fields must NOT reach the service layer. + create_args = mock_create.await_args + sent = create_args.args[2] + assert "accepted_suggestion_match_kind" not in sent + assert "accepted_capability_profile_version" not in sent + # Real capacity fields ARE forwarded. + assert sent["context_window_tokens"] == 128000 + assert sent["max_output_tokens"] == 16384 + + # Metric recorder called with the labels the SLO dashboard expects. + mock_record.assert_called_once_with("catalog_exact", payload["provider"]) + + +@pytest.mark.asyncio +async def test_create_model_skips_accept_recorder_without_match_kind(client, auth_header, user_credentials, sample_model_data, mocker): + """Ordinary saves (no Use-suggestion click) must NOT fire the recorder. + Otherwise accept_total inflates and the SLO ratio against + dispatch_profile_hit_total becomes meaningless. + """ + mocker.patch('backend.apps.model_managment_app.get_current_user_id', return_value=user_credentials) + + async def _create(*args, **kwargs): + return None + + mocker.patch('backend.apps.model_managment_app.create_model_for_tenant', side_effect=_create) + mock_record = mocker.patch('backend.apps.model_managment_app._record_capacity_suggestion_accept') + + response = client.post("/model/create", json=sample_model_data, headers=auth_header) + + assert response.status_code == HTTPStatus.OK + mock_record.assert_not_called() + + @pytest.mark.asyncio async def test_create_model_conflict(client, auth_header, user_credentials, sample_model_data, mocker): """Test model creation with name conflict.""" diff --git a/test/backend/services/test_model_provider_service.py b/test/backend/services/test_model_provider_service.py index b88cb38a3..9828246c5 100644 --- a/test/backend/services/test_model_provider_service.py +++ b/test/backend/services/test_model_provider_service.py @@ -574,11 +574,12 @@ async def test_prepare_model_dict_persists_operator_capacity(): "model_type": "llm", "max_tokens": 31920, "context_window_tokens": 200000, - "max_input_tokens": None, + "max_input_tokens": 180000, "max_output_tokens": 31920, "default_output_reserve_tokens": 4096, "tokenizer_family": "qwen", "capacity_source": "operator", + "capability_profile_version": "dashscope/glm-5.2@1", } await prepare_model_dict( @@ -589,15 +590,25 @@ async def test_prepare_model_dict_persists_operator_capacity(): ) _, kwargs = mock_model_request.call_args + # W11 spec L721-727: pin every capacity field the constructor must + # thread for the accepted-suggestion save path. Missing any of these + # silently drops the field on the DB row and reproduces CM-031. assert kwargs["context_window_tokens"] == 200000 + assert kwargs["max_input_tokens"] == 180000 assert kwargs["max_output_tokens"] == 31920 assert kwargs["default_output_reserve_tokens"] == 4096 assert kwargs["tokenizer_family"] == "qwen" + assert kwargs["capability_profile_version"] == "dashscope/glm-5.2@1" # capacity_source is forced to "operator" by the prepare_model_dict # contract: only operator-marked values reach the row, and the # marker itself is normalized to the canonical value rather than # echoing whatever the caller sent. assert kwargs["capacity_source"] == "operator" + # Canonical provider/model values land via constructor kwargs too, + # so model_factory + model_name are pinned to catch regressions + # in split_repo_name plumbing. + assert kwargs["model_factory"] == "dashscope" + assert kwargs["model_name"] == "glm-5.2" @pytest.mark.asyncio diff --git a/test/backend/test_model_consts.py b/test/backend/test_model_consts.py index 78e77ce77..bf3874dce 100644 --- a/test/backend/test_model_consts.py +++ b/test/backend/test_model_consts.py @@ -28,3 +28,53 @@ def test_model_request_and_validation(): assert req.filename == "f" +def test_model_request_threads_w11_capacity_and_accept_fields(): + """W11 spec L721-727 + L500-502: ModelRequest must carry every capacity + column the save handler can persist AND the audit-only accept-signal + fields shipped by the frontend after a "Use suggestion" save. Pinning the + field set here prevents a silent rename from dropping a column on the + DB row or breaking the accept counter. + """ + fields = set(model_consts.ModelRequest.model_fields.keys()) + required = { + # W1/W2 capacity columns (persisted) + "context_window_tokens", + "max_input_tokens", + "max_output_tokens", + "default_output_reserve_tokens", + "tokenizer_family", + "capacity_source", + "capability_profile_version", + # Canonical provider/model values + "model_factory", + "model_name", + # Accept-signal audit fields (wire-only, stripped by app layer) + "accepted_suggestion_match_kind", + "accepted_capability_profile_version", + } + missing = required - fields + assert not missing, f"ModelRequest missing W11 fields: {missing}" + + +def test_capacity_suggestion_response_has_required_fields(): + """Pin ModelCapacitySuggestionResponse schema so a downstream rename + (e.g. suggested_provider -> canonical_provider) trips a test instead + of silently dropping the field from the API contract. + """ + fields = set(model_consts.ModelCapacitySuggestionResponse.model_fields.keys()) + required = { + "suggestions", + "match_kind", + "match_confidence", + "match_explanation", + "suggested_provider", + "canonical_model_name", + "capability_profile_version", + "capacity_source_on_accept", + } + missing = required - fields + assert not missing, ( + f"ModelCapacitySuggestionResponse missing W11 fields: {missing}" + ) + + From 775b0c849b92d70d04215ed6190271cd7fa3bf62 Mon Sep 17 00:00:00 2001 From: wuyuanfr <18270469842@163.com> Date: Fri, 26 Jun 2026 16:36:41 +0800 Subject: [PATCH 07/31] =?UTF-8?q?fix(w11):=20compact=20bare-capacity=20UI?= =?UTF-8?q?=20=E2=80=94=20icon+tooltip=20in=20model=20selector,=20vertical?= =?UTF-8?q?=20layout=20for=20legacy=20hint?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Agent model selector: replace inline yellow subtitle with TriangleAlert icon + hover tooltip to reduce visual clutter in dropdown options - ModelCapacityFields: switch legacy max_tokens Alert from action prop (horizontal) to description prop (vertical) so hint text stacks above apply buttons within the same alert box - Add i18n key agent.modelSelector.bareCapacity.tooltip (zh/en) --- .../agentInfo/AgentGenerateDetail.tsx | 19 ++++++++++--------- .../components/model/ModelCapacityFields.tsx | 4 ++-- frontend/public/locales/en/common.json | 1 + frontend/public/locales/zh/common.json | 1 + 4 files changed, 14 insertions(+), 11 deletions(-) diff --git a/frontend/app/[locale]/agents/components/agentInfo/AgentGenerateDetail.tsx b/frontend/app/[locale]/agents/components/agentInfo/AgentGenerateDetail.tsx index cecc02d2b..b016f530c 100644 --- a/frontend/app/[locale]/agents/components/agentInfo/AgentGenerateDetail.tsx +++ b/frontend/app/[locale]/agents/components/agentInfo/AgentGenerateDetail.tsx @@ -14,9 +14,10 @@ import { Card, App, Alert, + Tooltip, } from "antd"; import { Tabs, TabsList, TabsTrigger, TabsContent } from "@/components/ui/tabs"; -import { Zap, Maximize2, Settings2, Sparkles } from "lucide-react"; +import { Zap, Maximize2, Settings2, Sparkles, TriangleAlert } from "lucide-react"; import { Textarea } from "@/components/ui/textarea"; import { @@ -550,11 +551,11 @@ export default function AgentGenerateDetail({}) { return { value: displayLabel, label: isBare ? ( - + {displayLabel} - - {t("agent.modelSelector.bareCapacity.subtitle")} - + + + ) : ( displayLabel @@ -950,11 +951,11 @@ export default function AgentGenerateDetail({}) { disabled={model.connect_status !== "available"} > {isBare ? ( - + {model.displayName} - - {t("agent.modelSelector.bareCapacity.subtitle")} - + + + ) : ( model.displayName diff --git a/frontend/app/[locale]/models/components/model/ModelCapacityFields.tsx b/frontend/app/[locale]/models/components/model/ModelCapacityFields.tsx index 712ab62fd..b5b2bf15c 100644 --- a/frontend/app/[locale]/models/components/model/ModelCapacityFields.tsx +++ b/frontend/app/[locale]/models/components/model/ModelCapacityFields.tsx @@ -412,8 +412,8 @@ export const ModelCapacityFields = ({ message={t("model.dialog.capacity.legacyMaxTokensHint", { maxTokens: legacyMaxTokensCandidate, })} - action={ - + description={ + {(contextWindowIsRecommended ? ["context", "output"] : ["output", "context"] diff --git a/frontend/public/locales/en/common.json b/frontend/public/locales/en/common.json index 375a80a9e..f521a92f2 100644 --- a/frontend/public/locales/en/common.json +++ b/frontend/public/locales/en/common.json @@ -879,6 +879,7 @@ "model.list.capacityWarning.badgeTooltip": "Output token cap is not enforced for this model. Click to fill capacity values now.", "model.list.capacityWarning.tooltipAction": "Open capacity panel", "agent.modelSelector.bareCapacity.subtitle": "Output cap not enforced — configure capacity in Model Management.", + "agent.modelSelector.bareCapacity.tooltip": "Model capacity limits not configured — please configure in Model Management", "agent.modelSelector.bareCapacity.formNotice": "The selected model \"{{modelName}}\" has no capacity configured. The agent will run, but output-token enforcement and budget consistency checks are off until capacity is set in Model Management.", "agent.modelSelector.bareCapacity.formNoticeNoPermission": "The selected model \"{{modelName}}\" has no capacity configured. The agent will run, but output-token enforcement is off. Ask a model administrator to configure capacity for this model.", "dashboard.capacityCoverage.title": "Model capacity coverage", diff --git a/frontend/public/locales/zh/common.json b/frontend/public/locales/zh/common.json index 230a731c3..a1613e658 100644 --- a/frontend/public/locales/zh/common.json +++ b/frontend/public/locales/zh/common.json @@ -850,6 +850,7 @@ "model.list.capacityWarning.badgeTooltip": "该模型未启用输出 Token 限额。点击此处补全容量配置。", "model.list.capacityWarning.tooltipAction": "打开容量面板", "agent.modelSelector.bareCapacity.subtitle": "输出限额未启用 — 请在模型管理中补全容量。", + "agent.modelSelector.bareCapacity.tooltip": "模型容量配置缺少限额,请到模型管理中补全容量", "agent.modelSelector.bareCapacity.formNotice": "所选模型 \"{{modelName}}\" 尚未配置容量。Agent 可正常运行,但输出 Token 限额和预算一致性校验关闭,直到在「模型管理」中补全容量。", "agent.modelSelector.bareCapacity.formNoticeNoPermission": "所选模型 \"{{modelName}}\" 尚未配置容量。Agent 可正常运行,但输出 Token 限额关闭。请联系模型管理员补全该模型的容量配置。", "dashboard.capacityCoverage.title": "模型容量覆盖", From d6165cb4cac6c21ceaac3ea42673534263d364c3 Mon Sep 17 00:00:00 2001 From: wuyuanfr <18270469842@163.com> Date: Fri, 26 Jun 2026 17:14:19 +0800 Subject: [PATCH 08/31] =?UTF-8?q?fix(w11):=20close=20remaining=20spec=20ga?= =?UTF-8?q?ps=20=E2=80=94=20bare-capacity=20badge=20in=20model=20list=20ta?= =?UTF-8?q?ble=20+=20fuzzy=20canonicalization=20warning?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Gap 1 — Model Management list page badge: - ModelList.tsx: add useCapacityCoverage hook + TriangleAlert badge in the Name column for bare-capacity LLM/VLM rows - Badge shows yellow warning icon inline with model name - Hover tooltip explains enforcement is off; click opens ModelEditDialog (which auto-fires capacity suggestion for bare models) Gap 2 — Fuzzy canonicalization warning: - ModelCapacityFields.tsx: add acceptedSuggestion prop; render profileMissWarning text when catalog_fuzzy suggestion is shown but the user hasn't accepted the canonical model name - ModelAddDialog.tsx + ModelEditDialog.tsx: pass acceptedCapacitySuggestion through to ModelCapacityFields --- .../components/model/ModelAddDialog.tsx | 1 + .../components/model/ModelCapacityFields.tsx | 12 +++++++++ .../components/model/ModelEditDialog.tsx | 1 + .../components/resources/ModelList.tsx | 26 ++++++++++++++++++- 4 files changed, 39 insertions(+), 1 deletion(-) diff --git a/frontend/app/[locale]/models/components/model/ModelAddDialog.tsx b/frontend/app/[locale]/models/components/model/ModelAddDialog.tsx index 9473bf6f6..094ed4391 100644 --- a/frontend/app/[locale]/models/components/model/ModelAddDialog.tsx +++ b/frontend/app/[locale]/models/components/model/ModelAddDialog.tsx @@ -1877,6 +1877,7 @@ export const ModelAddDialog = ({ onUseSuggestion={() => applyCapacitySuggestion(capacitySuggestion) } + acceptedSuggestion={acceptedCapacitySuggestion} />
)} diff --git a/frontend/app/[locale]/models/components/model/ModelCapacityFields.tsx b/frontend/app/[locale]/models/components/model/ModelCapacityFields.tsx index b5b2bf15c..a2b8f8dca 100644 --- a/frontend/app/[locale]/models/components/model/ModelCapacityFields.tsx +++ b/frontend/app/[locale]/models/components/model/ModelCapacityFields.tsx @@ -89,6 +89,8 @@ interface ModelCapacityFieldsProps { * `applyDefaults` option -- callers should pass matching booleans. */ applyDefaultsOnEmpty?: boolean; + /** Currently accepted suggestion, used to detect fuzzy canonicalization mismatch */ + acceptedSuggestion?: CapacitySuggestion | null; } const SOURCE_COLORS: Record = { @@ -278,6 +280,7 @@ export const ModelCapacityFields = ({ suggestionLoading = false, legacyMaxTokensCandidate, applyDefaultsOnEmpty = true, + acceptedSuggestion, }: ModelCapacityFieldsProps) => { const { t } = useTranslation(); @@ -495,6 +498,15 @@ export const ModelCapacityFields = ({ {suggestion.suggestedProvider && ( {suggestion.suggestedProvider} )} + {suggestion.matchKind === "catalog_fuzzy" && + (!acceptedSuggestion || + (acceptedSuggestion && + acceptedSuggestion.canonicalModelName !== + suggestion.canonicalModelName)) && ( +
+ {t("model.dialog.capacity.suggestion.profileMissWarning")} +
+ )} {onUseSuggestion && (