From 68498c61ff443607929b97fd22219ae0a2677482 Mon Sep 17 00:00:00 2001
From: wuyuanfr <18270469842@163.com>
Date: Thu, 25 Jun 2026 19:15:06 +0800
Subject: [PATCH 01/31] fix(model_management): preserve connectivity success
 when capacity suggestion path raises

The connectivity check endpoint /model/temporary_healthcheck runs
_capacity_suggestion_for_model_request inline after a successful
verify_model_config_connectivity. Per W11 spec ("Suggestion failure
never changes connectivity success or failure"), an unexpected error
inside the suggestion path must not turn a successful connectivity
result into HTTP 500.

The prior code caught ValueError (covering the typed InvalidInput case
and Pydantic v2 ValidationError, which is a ValueError subclass), but
non-ValueError exceptions -- e.g. AttributeError/TypeError from a
malformed catalog profile entry, or future V2 provider-discovery HTTP
errors -- would propagate to the outer except Exception in
check_temporary_model_health and surface to operators as a misleading
"Failed to verify model connectivity" 500.

Restore the catch-all degrade-to-None branch and log at WARNING (not
DEBUG) so the real root cause is visible in default production log
streams without DEBUG enabled. Connectivity stays 200 with
capacity_suggestion: null; the per-row catalog issue surfaces in logs
where operators can act on it.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 backend/apps/model_managment_app.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/backend/apps/model_managment_app.py b/backend/apps/model_managment_app.py
index a92937e12..634726d66 100644
--- a/backend/apps/model_managment_app.py
+++ b/backend/apps/model_managment_app.py
@@ -114,6 +114,9 @@ def _capacity_suggestion_for_model_request(request: ModelRequest):
     except ValueError as exc:
         logger.debug("Capacity suggestion unavailable for connectivity request: %s", exc)
         return None
+    except Exception as exc:
+        logger.warning("Capacity suggestion failed during connectivity request: %s", exc)
+        return None
 
 
 @router.post("/create")

From f555fda77d1d4a6dda4d215fae01f673cbe7e7ac Mon Sep 17 00:00:00 2001
From: wuyuanfr <18270469842@163.com>
Date: Thu, 25 Jun 2026 19:15:31 +0800
Subject: [PATCH 02/31] refactor(w11): collapse Add/Edit capacity-suggestion
 controls

The Add dialog had two ways to trigger a catalog suggestion: clicking
the bottom connectivity-validation button (which the backend extends
with capacity_suggestion in /temporary_healthcheck's response) and a
secondary "Check" button beside the toggle that called the standalone
/suggest-capacity endpoint. In V1 catalog-only mode the two paths
overlap on every realistic add flow -- the user must run connectivity
anyway because the Add button is gated on it -- so the standalone
button is UX noise without functional value. Collapse Add to a single
toggle whose state gates both the embedded suggestion result and the
explanatory hint.

The Edit dialog keeps its explicit Check button per spec ("show
'Suggestion available' after validation or explicit check") because
existing rows may need to refresh a suggestion without re-running
connectivity, but the long-form hint sentence is redundant: title +
toggle + a button labelled "Check" already names the feature and the
action. Removing the hint matches the spec's i18n key list, which
never listed model.dialog.capacity.suggestion.hint to begin with.

Add dialog changes:
- Drop checkingCapacitySuggestion state, canSuggestCapacity guard,
  and handleSuggestCapacity handler.
- Drop the secondary Button and its wrapping shrink-0 flex container;
  the Switch becomes a direct child of the outer justify-between row.
- Drop the suggestionLoading prop from ModelCapacityFields entirely.
  It only controlled the spinner on the "Use suggestion" button inside
  the suggestion-result panel, which only renders after a suggestion
  is set -- at which point verifyingConnectivity is already false, so
  binding it added no observable effect.
- Replace the shared "hint" copy with a new key "hintAdd" whose
  wording reflects the actual trigger ("Suggested from the approved
  catalog after connectivity passes."), and gate it on
  capacitySuggestionEnabled so the toggle's off-state no longer
  contradicts itself with copy that promises automatic behavior.

Edit dialog changes:
- Remove the hint <div> and its wrapping container; the title becomes
  a direct flex child alongside the Switch+Check controls.

i18n:
- Drop the obsolete "model.dialog.capacity.suggestion.hint" key from
  en and zh; add "hintAdd" used only by Add dialog.

No backend wire change. Edit dialog still calls /suggest-capacity
through its existing Check button for the bare-row repair flow.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 .../components/model/ModelAddDialog.tsx       | 70 +++----------------
 .../components/model/ModelEditDialog.tsx      |  9 +--
 frontend/public/locales/en/common.json        |  2 +-
 frontend/public/locales/zh/common.json        |  2 +-
 4 files changed, 14 insertions(+), 69 deletions(-)

diff --git a/frontend/app/[locale]/models/components/model/ModelAddDialog.tsx b/frontend/app/[locale]/models/components/model/ModelAddDialog.tsx
index a0eeb1bb1..5f3c03535 100644
--- a/frontend/app/[locale]/models/components/model/ModelAddDialog.tsx
+++ b/frontend/app/[locale]/models/components/model/ModelAddDialog.tsx
@@ -294,8 +294,6 @@ export const ModelAddDialog = ({
   const [form, setForm] = useState(DEFAULT_FORM_STATE);
   const [loading, setLoading] = useState(false);
   const [verifyingConnectivity, setVerifyingConnectivity] = useState(false);
-  const [checkingCapacitySuggestion, setCheckingCapacitySuggestion] =
-    useState(false);
   const [capacitySuggestionEnabled, setCapacitySuggestionEnabled] =
     useState(true);
   const [capacitySuggestion, setCapacitySuggestion] =
@@ -504,12 +502,6 @@ export const ModelAddDialog = ({
     }
   };
 
-  const canSuggestCapacity = () =>
-    supportsCapacityFields &&
-    !form.isBatchImport &&
-    form.name.trim() !== "" &&
-    (form.url.trim() !== "" || form.provider.trim() !== "");
-
   const applyCapacitySuggestion = (suggestion: CapacitySuggestion | null) => {
     const next = capacityFormFromSuggestion(suggestion);
     if (!next || Object.keys(next).length === 0) return;
@@ -527,37 +519,6 @@ export const ModelAddDialog = ({
     setAcceptedCapacitySuggestion(suggestion);
   };
 
-  const handleSuggestCapacity = async () => {
-    if (!canSuggestCapacity()) {
-      message.warning(t("model.dialog.capacity.suggestion.missingInput"));
-      return;
-    }
-    setCheckingCapacitySuggestion(true);
-    try {
-      const suggestion = await modelService.suggestCapacity({
-        modelName: form.name.trim(),
-        baseUrl: form.url.trim(),
-        // Only send providerHint when the user actually picked it (batch mode
-        // exposes the dropdown). In single-add mode the form keeps a hidden
-        // default ("modelengine") that the user never sees, so forwarding it
-        // would falsely pin catalog lookup to that provider.
-        ...(form.isBatchImport ? { providerHint: form.provider } : {}),
-        apiKey: form.apiKey.trim() || undefined,
-        modelType: resolveConnectivityModelType(form.type),
-      });
-      setCapacitySuggestion(suggestion);
-      if (!suggestion.suggestions) {
-        setAcceptedCapacitySuggestion(null);
-      }
-    } catch (error) {
-      setCapacitySuggestion(null);
-      setAcceptedCapacitySuggestion(null);
-      message.error(t("model.dialog.capacity.suggestion.failed"));
-    } finally {
-      setCheckingCapacitySuggestion(false);
-    }
-  };
-
   // Verify if the vector dimension is valid
   const isValidVectorDimension = (value: string): boolean => {
     const dimension = Number.parseInt(value, 10);
@@ -1869,27 +1830,17 @@ export const ModelAddDialog = ({
                   <div className="text-sm font-medium text-gray-700">
                     {t("model.dialog.capacity.suggestion.title")}
                   </div>
-                  <div className="text-xs text-gray-500">
-                    {t("model.dialog.capacity.suggestion.hint")}
-                  </div>
-                </div>
-                <div className="flex shrink-0 items-center gap-2">
-                  <Switch
-                    size="small"
-                    checked={capacitySuggestionEnabled}
-                    onChange={setCapacitySuggestionEnabled}
-                  />
-                  <Button
-                    size="small"
-                    onClick={handleSuggestCapacity}
-                    loading={checkingCapacitySuggestion}
-                    disabled={
-                      !capacitySuggestionEnabled || !canSuggestCapacity()
-                    }
-                  >
-                    {t("model.dialog.capacity.suggestion.check")}
-                  </Button>
+                  {capacitySuggestionEnabled && (
+                    <div className="text-xs text-gray-500">
+                      {t("model.dialog.capacity.suggestion.hintAdd")}
+                    </div>
+                  )}
                 </div>
+                <Switch
+                  size="small"
+                  checked={capacitySuggestionEnabled}
+                  onChange={setCapacitySuggestionEnabled}
+                />
               </div>
             )}
             <ModelCapacityFields
@@ -1905,7 +1856,6 @@ export const ModelAddDialog = ({
                   ? capacitySuggestion
                   : null
               }
-              suggestionLoading={checkingCapacitySuggestion}
               onUseSuggestion={() =>
                 applyCapacitySuggestion(capacitySuggestion)
               }
diff --git a/frontend/app/[locale]/models/components/model/ModelEditDialog.tsx b/frontend/app/[locale]/models/components/model/ModelEditDialog.tsx
index e086c6d44..b4c2ac936 100644
--- a/frontend/app/[locale]/models/components/model/ModelEditDialog.tsx
+++ b/frontend/app/[locale]/models/components/model/ModelEditDialog.tsx
@@ -601,13 +601,8 @@ export const ModelEditDialog = ({
         {supportsCapacityFields && (
           <div className="space-y-2">
             <div className="flex items-center justify-between gap-3 rounded-md border border-gray-200 bg-gray-50 p-3">
-              <div>
-                <div className="text-sm font-medium text-gray-700">
-                  {t("model.dialog.capacity.suggestion.title")}
-                </div>
-                <div className="text-xs text-gray-500">
-                  {t("model.dialog.capacity.suggestion.hint")}
-                </div>
+              <div className="text-sm font-medium text-gray-700">
+                {t("model.dialog.capacity.suggestion.title")}
               </div>
               <div className="flex shrink-0 items-center gap-2">
                 <Switch
diff --git a/frontend/public/locales/en/common.json b/frontend/public/locales/en/common.json
index e5c3e006e..53f55283c 100644
--- a/frontend/public/locales/en/common.json
+++ b/frontend/public/locales/en/common.json
@@ -860,7 +860,7 @@
   "model.dialog.capacity.source.legacy": "Legacy",
   "model.dialog.capacity.source.unknown": "Unknown",
   "model.dialog.capacity.suggestion.title": "Capacity suggestion",
-  "model.dialog.capacity.suggestion.hint": "Check the approved catalog and apply the result only when you choose to use it.",
+  "model.dialog.capacity.suggestion.hintAdd": "Suggested from the approved catalog after connectivity passes.",
   "model.dialog.capacity.suggestion.check": "Check",
   "model.dialog.capacity.suggestion.use": "Use suggestion",
   "model.dialog.capacity.suggestion.found": "Capacity suggestion found",
diff --git a/frontend/public/locales/zh/common.json b/frontend/public/locales/zh/common.json
index 1e7757af4..4d028e52a 100644
--- a/frontend/public/locales/zh/common.json
+++ b/frontend/public/locales/zh/common.json
@@ -831,7 +831,7 @@
   "model.dialog.capacity.source.legacy": "旧字段",
   "model.dialog.capacity.source.unknown": "未知",
   "model.dialog.capacity.suggestion.title": "容量建议",
-  "model.dialog.capacity.suggestion.hint": "从已审核目录检查容量；只有点击使用后才会写入表单。",
+  "model.dialog.capacity.suggestion.hintAdd": "连通性测试通过后，自动从已审核目录给出容量建议。",
   "model.dialog.capacity.suggestion.check": "检查",
   "model.dialog.capacity.suggestion.use": "使用建议",
   "model.dialog.capacity.suggestion.found": "已找到容量建议",

From f0e82d32b11efdf8e464afd467ecf2e41617502d Mon Sep 17 00:00:00 2001
From: wuyuanfr <18270469842@163.com>
Date: Fri, 26 Jun 2026 09:39:34 +0800
Subject: [PATCH 03/31] feat(w11): backend SLO instrumentation + cross-tenant
 capacity-coverage test

Phase 1.5 backend foundation per W11 spec L706-710 (SLO metrics),
L86-89/L944-948 (visibility env flag), and L312-322 (cross-tenant test).
No frontend change in this commit; V1.5 surfaces consume these signals in
follow-up frontend commits.

Metrics (4 instruments, each guarded behind try/except so a missing
OpenTelemetry runtime does not break the dispatch path):

1. model_capacity_suggestion_requests_total{match_kind, model_type,
   provider} -- counter wrapping suggest_capacity. Drives the
   "70% of new manual-add LLM rows produce match_kind != none" SLO.
2. model_capacity_suggestion_latency_ms{match_kind, provider} --
   histogram around the same call. Used to verify V2 provider-discovery
   p95 stays under the model-add latency budget.
3. model_capacity_suggestion_accept_total{match_kind, provider} --
   counter emitted by the app layer when the operator save payload
   carries accepted_suggestion_match_kind. Numerator for the
   "95% accepted -> profile dispatch" SLO ratio.
4. model_capacity_suggestion_dispatch_profile_hit_total{provider} --
   counter emitted in _resolve_input_budget when the resolved snapshot
   carries a non-null capability_profile_version. Denominator for the
   same SLO.

Accept signal pipe (audit-only):
- consts/model.py: ModelRequest gains accepted_suggestion_match_kind
  and accepted_capability_profile_version. Both Optional[str], never
  persisted to model_record_t.
- model_management_service.py: pop_capacity_accept_signal strips both
  fields from save payloads and returns the popped values so the app
  layer can label the counter.
- model_managment_app.py: /create and /update endpoints call
  pop_capacity_accept_signal before invoking the service, then forward
  the popped match_kind to _record_capacity_suggestion_accept after the
  save returns. The dict the service sees no longer contains these
  fields, preserving the "audit only -- not persisted" contract.
- The V1.5 frontend (next commit) will ship these fields on the wire;
  until then the counter reads zero, which is the correct baseline.

suggest_capacity refactor:
- Inner body extracted to _suggest_capacity_inner so the public
  function can time end-to-end and emit requests_total + latency_ms
  exactly once per completed call. ValueError paths still raise --
  client-shape errors must not pollute SLO ratios so the recorder
  fires only on terminal CapacitySuggestionResult returns.

Visibility env flag (CAPACITY_VISIBILITY_ENABLED):
- Already declared in consts/const.py (default true) and consumed by
  get_capacity_coverage. Confirmed wired end-to-end; no code change
  needed here. The flag stays the developer-level rollback lever per
  W11 spec; tenant_config_t overlay remains a follow-up.

Cross-tenant isolation test (spec L312-322):
- test_get_capacity_coverage_cross_tenant_isolation routes mocked
  get_model_records by tenant_id and asserts each tenant only sees
  its own bare rows in both bare_models[] and total_llm_vlm. Closes
  the spec's required "tenant B row must not appear in tenant A's
  response" coverage.

Test coverage added:
- Cross-tenant isolation for /capacity-coverage.
- pop_capacity_accept_signal extraction + dict mutation contract.
- accept_total OTel-optional no-op + label-cardinality (lower-cased
  provider) wiring.
- suggest_capacity records requests_total + latency_ms on catalog
  match, on "none" with provider fallback to "unknown", does NOT
  record on ValueError, and runs cleanly when instruments are None.
- _resolve_input_budget records dispatch_profile_hit_total only when
  capability_profile_version is non-null; recorder no-op when counter
  is None.

Total: 8 files, +527 lines. All targeted unit suites pass
(test_model_capacity_suggestion_service 16/16,
test_model_management_service 70/70,
test_create_agent_info 174/174).

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 backend/agents/create_agent_info.py           |  39 +++++
 backend/apps/model_managment_app.py           |  12 ++
 backend/consts/model.py                       |   6 +
 .../model_capacity_suggestion_service.py      |  98 +++++++++++++
 backend/services/model_management_service.py  |  59 ++++++++
 test/backend/agents/test_create_agent_info.py |  71 ++++++++++
 .../test_model_capacity_suggestion_service.py | 108 ++++++++++++++
 .../services/test_model_management_service.py | 134 ++++++++++++++++++
 8 files changed, 527 insertions(+)

diff --git a/backend/agents/create_agent_info.py b/backend/agents/create_agent_info.py
index c81306fc9..cf790654d 100644
--- a/backend/agents/create_agent_info.py
+++ b/backend/agents/create_agent_info.py
@@ -85,6 +85,43 @@
 _CAPACITY_WARNING_LOCK = threading.Lock()
 
 
+# W11 spec line 710: emitted every time _resolve_input_budget resolves a row
+# whose dispatch-time capability_profile_version is non-null (i.e. the W1
+# exact catalog lookup succeeded). Combined with
+# model_capacity_suggestion_accept_total at save time gives the SLO ratio
+# "95% of accepted catalog suggestions produce the expected runtime profile".
+# Guarded so a missing OpenTelemetry runtime never breaks agent startup.
+try:
+    from opentelemetry import metrics as _otel_metrics
+
+    _capacity_dispatch_meter = _otel_metrics.get_meter(__name__)
+    _capacity_dispatch_profile_hit_total = _capacity_dispatch_meter.create_counter(
+        name="model_capacity_suggestion_dispatch_profile_hit_total",
+        description=(
+            "Count of agent dispatches where the resolved W1 capacity "
+            "snapshot reports a non-null capability_profile_version "
+            "(i.e. the runtime profile match succeeded). Labelled by "
+            "provider."
+        ),
+        unit="dispatches",
+    )
+except Exception:  # pragma: no cover - OTel is optional at runtime
+    _capacity_dispatch_profile_hit_total = None
+
+
+def _record_dispatch_profile_hit(provider: Optional[str]) -> None:
+    """Emit dispatch_profile_hit_total for one successful runtime profile match."""
+    if _capacity_dispatch_profile_hit_total is None:
+        return
+    try:
+        _capacity_dispatch_profile_hit_total.add(
+            1,
+            {"provider": (provider or "unknown").lower()},
+        )
+    except Exception:  # pragma: no cover - never break agent run for telemetry
+        pass
+
+
 def _operator_overrides_from_model_info(model_info: Optional[dict]) -> dict:
     """Extract the W1 operator-override fields from a model_record_t row."""
     if not isinstance(model_info, dict):
@@ -223,6 +260,8 @@ def _resolve_input_budget(
             snapshot.capability_profile_version,
             snapshot.fingerprint,
         )
+        if snapshot.capability_profile_version:
+            _record_dispatch_profile_hit(provider)
         return (
             snapshot.provider_input_limit_tokens,
             _capacity_snapshot_for_monitoring(snapshot),
diff --git a/backend/apps/model_managment_app.py b/backend/apps/model_managment_app.py
index 634726d66..44d6bfba0 100644
--- a/backend/apps/model_managment_app.py
+++ b/backend/apps/model_managment_app.py
@@ -55,6 +55,8 @@
     list_llm_models_for_tenant,
     list_models_for_admin,
     get_capacity_coverage,
+    pop_capacity_accept_signal,
+    _record_capacity_suggestion_accept,
 )
 from utils.auth_utils import get_current_user_id
 
@@ -136,9 +138,14 @@ async def create_model(request: ModelRequest, authorization: Optional[str] = Hea
     try:
         user_id, tenant_id = get_current_user_id(authorization)
         model_data = request.model_dump()
+        accept_signal = pop_capacity_accept_signal(model_data)
         logger.debug(
             f"Start to create model, user_id: {user_id}, tenant_id: {tenant_id}")
         await create_model_for_tenant(user_id, tenant_id, model_data)
+        if accept_signal is not None:
+            _record_capacity_suggestion_accept(
+                accept_signal["match_kind"], request.model_factory
+            )
         return JSONResponse(status_code=HTTPStatus.OK, content={
             "message": "Model created successfully"
         })
@@ -301,7 +308,12 @@ async def update_single_model(
     """
     try:
         user_id, tenant_id = get_current_user_id(authorization)
+        accept_signal = pop_capacity_accept_signal(request)
         await update_single_model_for_tenant(user_id, tenant_id, display_name, request)
+        if accept_signal is not None:
+            _record_capacity_suggestion_accept(
+                accept_signal["match_kind"], request.get("model_factory")
+            )
         return JSONResponse(status_code=HTTPStatus.OK, content={
             "message": "Model updated successfully"
         })
diff --git a/backend/consts/model.py b/backend/consts/model.py
index 39f577a98..4ed9e8981 100644
--- a/backend/consts/model.py
+++ b/backend/consts/model.py
@@ -146,6 +146,12 @@ class ModelRequest(BaseModel):
     tokenizer_family: Optional[str] = None
     capacity_source: Optional[str] = None
     capability_profile_version: Optional[str] = None
+    # W11 accept-signal fields (audit/metrics only — never persisted). Sent by
+    # the frontend when the operator clicks "Use suggestion" and saves; the
+    # app layer pops them before the dict reaches the service/DB layer and
+    # forwards them to model_capacity_suggestion_accept_total.
+    accepted_suggestion_match_kind: Optional[str] = None
+    accepted_capability_profile_version: Optional[str] = None
 
 
 class CapacitySuggestionFields(BaseModel):
diff --git a/backend/services/model_capacity_suggestion_service.py b/backend/services/model_capacity_suggestion_service.py
index 723f0fd8e..8fa9b2063 100644
--- a/backend/services/model_capacity_suggestion_service.py
+++ b/backend/services/model_capacity_suggestion_service.py
@@ -1,10 +1,79 @@
+import logging
 import re
+import time
 from dataclasses import dataclass
 from enum import Enum
 from typing import Any, Mapping, Optional
 
 from consts.const import CAPACITY_SUGGESTION_ENABLED
 
+logger = logging.getLogger(__name__)
+
+# OpenTelemetry instruments for W11 catalog match observability.
+# Spec lines 706-708. Guarded the same way as the SDK monitor module: if
+# OpenTelemetry is not installed (some deployments run without it), the
+# instruments are None and the recording becomes a no-op.
+try:
+    from opentelemetry import metrics as _otel_metrics
+
+    _suggestion_meter = _otel_metrics.get_meter(__name__)
+    _capacity_suggestion_requests_total = _suggestion_meter.create_counter(
+        name="model_capacity_suggestion_requests_total",
+        description=(
+            "Count of capacity-suggestion service invocations, labelled by "
+            "match_kind, model_type, and inferred provider. Drives the SLO "
+            "'at least 70% of new manual-add LLM rows produce match_kind "
+            "!= none' (W11 spec)."
+        ),
+        unit="requests",
+    )
+    _capacity_suggestion_latency_ms = _suggestion_meter.create_histogram(
+        name="model_capacity_suggestion_latency_ms",
+        description=(
+            "End-to-end latency of suggest_capacity, labelled by match_kind "
+            "and provider. Used to verify provider-discovery p95 stays under "
+            "the model-add latency budget (W11 spec)."
+        ),
+        unit="ms",
+    )
+except Exception:  # pragma: no cover - OTel is optional at runtime
+    _capacity_suggestion_requests_total = None
+    _capacity_suggestion_latency_ms = None
+
+
+def _record_suggestion_request(
+    match_kind: str,
+    provider: Optional[str],
+    model_type: Optional[str],
+    duration_ms: float,
+) -> None:
+    """Emit the requests_total counter and latency_ms histogram for one call.
+
+    Recording never raises -- a broken telemetry stack must not break the
+    suggestion path.
+    """
+    safe_provider = (provider or "unknown").lower()
+    if _capacity_suggestion_requests_total is not None:
+        try:
+            _capacity_suggestion_requests_total.add(
+                1,
+                {
+                    "match_kind": match_kind,
+                    "model_type": (model_type or "unknown").lower(),
+                    "provider": safe_provider,
+                },
+            )
+        except Exception:  # pragma: no cover
+            pass
+    if _capacity_suggestion_latency_ms is not None:
+        try:
+            _capacity_suggestion_latency_ms.record(
+                duration_ms,
+                {"match_kind": match_kind, "provider": safe_provider},
+            )
+        except Exception:  # pragma: no cover
+            pass
+
 
 ProfileKey = tuple[str, str]
 CapabilityProfileLike = Any
@@ -233,6 +302,35 @@ def suggest_capacity(
     api_key: Optional[str] = None,
     catalog: Optional[Mapping[ProfileKey, CapabilityProfileLike]] = None,
     enabled: bool = CAPACITY_SUGGESTION_ENABLED,
+) -> CapacitySuggestionResult:
+    start_perf = time.perf_counter()
+    result = _suggest_capacity_inner(
+        model_name=model_name,
+        base_url=base_url,
+        provider_hint=provider_hint,
+        model_type=model_type,
+        api_key=api_key,
+        catalog=catalog,
+        enabled=enabled,
+    )
+    duration_ms = (time.perf_counter() - start_perf) * 1000.0
+    _record_suggestion_request(
+        match_kind=result.match_kind.value,
+        provider=result.suggested_provider,
+        model_type=model_type,
+        duration_ms=duration_ms,
+    )
+    return result
+
+
+def _suggest_capacity_inner(
+    model_name: str,
+    base_url: Optional[str],
+    provider_hint: Optional[str],
+    model_type: Optional[str],
+    api_key: Optional[str],
+    catalog: Optional[Mapping[ProfileKey, CapabilityProfileLike]],
+    enabled: bool,
 ) -> CapacitySuggestionResult:
     del api_key
 
diff --git a/backend/services/model_management_service.py b/backend/services/model_management_service.py
index a8f28e133..3a1470801 100644
--- a/backend/services/model_management_service.py
+++ b/backend/services/model_management_service.py
@@ -72,8 +72,22 @@
         ),
         unit="errors",
     )
+    # W11 spec line 709: emitted when the operator clicks "Use suggestion" and
+    # saves. Combined with model_capacity_suggestion_dispatch_profile_hit_total
+    # at /agent/run, gives the "95% of accepted catalog suggestions produce
+    # the expected runtime capability_profile_version" SLO ratio.
+    _capacity_suggestion_accept_total = _capacity_suggestion_meter.create_counter(
+        name="model_capacity_suggestion_accept_total",
+        description=(
+            "Count of model save events that carried an accepted W11 "
+            "capacity suggestion, labelled by match_kind and provider. "
+            "Audit signal only -- not persisted to model_record_t."
+        ),
+        unit="accepts",
+    )
 except Exception:  # pragma: no cover - OTel is optional at runtime
     _capacity_suggestion_coverage_errors_total = None
+    _capacity_suggestion_accept_total = None
 
 
 # Per-process dedup for the warning log emitted when the catalog-matcher
@@ -101,6 +115,51 @@ def _record_capacity_coverage_error(model_id: Optional[Any], exc: Exception) ->
         pass
 
 
+# Wire-only fields the frontend ships when the operator clicks "Use suggestion"
+# and saves. They are audit/metrics input; runtime never reads them. The app
+# layer pops them off the request payload via `pop_capacity_accept_signal` so
+# the service/DB layer never sees them.
+_ACCEPT_SIGNAL_KEYS = (
+    "accepted_suggestion_match_kind",
+    "accepted_capability_profile_version",
+)
+
+
+def pop_capacity_accept_signal(payload: Dict[str, Any]) -> Optional[Dict[str, Any]]:
+    """Strip audit-only accept-signal fields from a save payload and return them.
+
+    Returns the popped values as {'match_kind': ..., 'capability_profile_version': ...}
+    when match_kind is present, else None. Callers forward the dict to
+    `_record_capacity_suggestion_accept` once the model_factory is known.
+    """
+    if not isinstance(payload, dict):
+        return None
+    popped = {key: payload.pop(key, None) for key in _ACCEPT_SIGNAL_KEYS}
+    match_kind = popped.get("accepted_suggestion_match_kind")
+    if not match_kind:
+        return None
+    return {
+        "match_kind": match_kind,
+        "capability_profile_version": popped.get("accepted_capability_profile_version"),
+    }
+
+
+def _record_capacity_suggestion_accept(match_kind: str, provider: Optional[str]) -> None:
+    """Emit the accept_total counter for one operator-accepted suggestion save."""
+    if _capacity_suggestion_accept_total is None:
+        return
+    try:
+        _capacity_suggestion_accept_total.add(
+            1,
+            {
+                "match_kind": match_kind,
+                "provider": (provider or "unknown").lower(),
+            },
+        )
+    except Exception:  # pragma: no cover - never break save for telemetry
+        pass
+
+
 def _has_display_name_conflict(existing_models: List[Dict[str, Any]], model_type: Optional[str]) -> bool:
     """Allow the three multimodal slots to share display names across slots."""
     if not existing_models:
diff --git a/test/backend/agents/test_create_agent_info.py b/test/backend/agents/test_create_agent_info.py
index b3eb54b1b..e2ca3d21b 100644
--- a/test/backend/agents/test_create_agent_info.py
+++ b/test/backend/agents/test_create_agent_info.py
@@ -5683,3 +5683,74 @@ def test_merge_with_empty_override_params(self):
         tool_record = {"params": [{"name": "param1", "default": "default1"}]}
         result = _merge_tool_params(tool_record, {})
         assert result == {"param1": "default1"}
+
+
+# ---------------------------------------------------------------------------
+# W11 V1.5 - dispatch_profile_hit_total metric wiring
+# ---------------------------------------------------------------------------
+
+
+class TestDispatchProfileHitMetric:
+    """Spec L710: every successful capacity resolve where the resolved snapshot
+    carries a non-null capability_profile_version increments the dispatch
+    profile-hit counter. Combined with accept_total at save time, this gives
+    the 95% SLO 'accepted catalog suggestions produce expected runtime
+    profile' (W11 spec L1162-1163).
+    """
+
+    def test_profile_hit_recorded_when_snapshot_carries_profile_version(self):
+        counter = MagicMock()
+        snapshot = MockModelCapacitySnapshot(
+            model_name="gpt-4o",
+            capability_profile_version="openai/gpt-4o@1",
+        )
+        with patch.object(
+            create_agent_info_module,
+            "_capacity_dispatch_profile_hit_total",
+            counter,
+        ), patch.object(
+            create_agent_info_module,
+            "resolve_capacity",
+            return_value=snapshot,
+        ):
+            create_agent_info_module._resolve_input_budget(
+                {"model_factory": "openai", "model_name": "gpt-4o"}
+            )
+
+        counter.add.assert_called_once_with(1, {"provider": "openai"})
+
+    def test_profile_hit_not_recorded_without_profile_version(self):
+        """An operator-configured row (no catalog match) resolves successfully
+        but `capability_profile_version` stays None. Counter must not fire --
+        otherwise the SLO ratio is inflated by non-catalog dispatches.
+        """
+        counter = MagicMock()
+        snapshot = MockModelCapacitySnapshot(
+            model_name="custom-local",
+            capability_profile_version=None,
+        )
+        with patch.object(
+            create_agent_info_module,
+            "_capacity_dispatch_profile_hit_total",
+            counter,
+        ), patch.object(
+            create_agent_info_module,
+            "resolve_capacity",
+            return_value=snapshot,
+        ):
+            create_agent_info_module._resolve_input_budget(
+                {"model_factory": "custom", "model_name": "custom-local"}
+            )
+
+        counter.add.assert_not_called()
+
+    def test_recorder_no_op_when_counter_disabled(self):
+        """OTel-optional guard: the helper must not raise when the counter
+        is None so agent dispatch works in deployments without OpenTelemetry.
+        """
+        with patch.object(
+            create_agent_info_module,
+            "_capacity_dispatch_profile_hit_total",
+            None,
+        ):
+            create_agent_info_module._record_dispatch_profile_hit("openai")
diff --git a/test/backend/services/test_model_capacity_suggestion_service.py b/test/backend/services/test_model_capacity_suggestion_service.py
index fc6ffdc67..6845ca5b7 100644
--- a/test/backend/services/test_model_capacity_suggestion_service.py
+++ b/test/backend/services/test_model_capacity_suggestion_service.py
@@ -7,12 +7,15 @@
 if backend_dir not in sys.path:
     sys.path.append(backend_dir)
 
+from unittest import mock
+
 from services.model_capacity_suggestion_service import (
     CapacitySuggestionMatchKind,
     pick_provider,
     pick_provider_from_base_url,
     suggest_capacity,
 )
+import services.model_capacity_suggestion_service as suggestion_module
 
 
 class Profile:
@@ -179,3 +182,108 @@ def test_pick_provider_from_base_url_dashscope_wins_over_aliyuncs():
     # Both substrings present; order in HOST_PROVIDER_PATTERNS makes
     # dashscope win, which is the correct (more-specific) routing.
     assert pick_provider_from_base_url("https://dashscope.aliyuncs.com/v1") == "dashscope"
+
+
+# ---------------------------------------------------------------------------
+# W11 V1.5 - request/latency metrics wiring
+# ---------------------------------------------------------------------------
+
+
+def test_suggest_capacity_records_requests_and_latency_on_catalog_match():
+    """Spec L706-708: every suggest_capacity invocation records one entry in
+    requests_total (labelled by match_kind, model_type, provider) and one
+    sample in latency_ms (labelled by match_kind, provider). A successful
+    catalog match must fire the recorder exactly once with the right labels.
+    """
+    counter = mock.MagicMock()
+    histogram = mock.MagicMock()
+
+    with mock.patch.object(suggestion_module, "_capacity_suggestion_requests_total", counter), \
+            mock.patch.object(suggestion_module, "_capacity_suggestion_latency_ms", histogram):
+        result = suggest_capacity(
+            model_name="gpt-4o",
+            base_url="https://api.openai.com/v1",
+            model_type="llm",
+            catalog=CATALOG,
+        )
+
+    assert result.match_kind == CapacitySuggestionMatchKind.CATALOG_EXACT
+    counter.add.assert_called_once()
+    add_args = counter.add.call_args
+    assert add_args.args[0] == 1
+    assert add_args.args[1] == {
+        "match_kind": "catalog_exact",
+        "model_type": "llm",
+        "provider": "openai",
+    }
+    histogram.record.assert_called_once()
+    record_args = histogram.record.call_args
+    assert record_args.args[0] >= 0  # non-negative duration in ms
+    assert record_args.args[1] == {
+        "match_kind": "catalog_exact",
+        "provider": "openai",
+    }
+
+
+def test_suggest_capacity_records_none_match_with_unknown_provider_label():
+    """When no provider can be inferred the result.suggested_provider is None
+    and the metric labels fall back to provider='unknown'. Cardinality stays
+    bounded -- we never emit raw user input as a label.
+    """
+    counter = mock.MagicMock()
+    histogram = mock.MagicMock()
+
+    with mock.patch.object(suggestion_module, "_capacity_suggestion_requests_total", counter), \
+            mock.patch.object(suggestion_module, "_capacity_suggestion_latency_ms", histogram):
+        result = suggest_capacity(
+            model_name="unknown-local-model",
+            base_url="http://localhost:8000/v1",
+            model_type="llm",
+            catalog=CATALOG,
+        )
+
+    assert result.match_kind == CapacitySuggestionMatchKind.NONE
+    assert counter.add.call_args.args[1] == {
+        "match_kind": "none",
+        "model_type": "llm",
+        "provider": "unknown",
+    }
+    assert histogram.record.call_args.args[1] == {
+        "match_kind": "none",
+        "provider": "unknown",
+    }
+
+
+def test_suggest_capacity_validation_error_does_not_record():
+    """A ValueError (model_name required / too long) is a client-shape error
+    raised before the matcher runs. It must not increment requests_total --
+    that counter is for completed evaluations only, and SLO ratios would
+    otherwise be skewed by client input mistakes.
+    """
+    counter = mock.MagicMock()
+    histogram = mock.MagicMock()
+
+    with mock.patch.object(suggestion_module, "_capacity_suggestion_requests_total", counter), \
+            mock.patch.object(suggestion_module, "_capacity_suggestion_latency_ms", histogram), \
+            pytest.raises(ValueError):
+        suggest_capacity(model_name="", catalog=CATALOG)
+
+    counter.add.assert_not_called()
+    histogram.record.assert_not_called()
+
+
+def test_suggest_capacity_no_op_when_instruments_disabled():
+    """Same OTel-optional guard as the other recorders: if the instruments
+    are None (OTel not installed in this deployment), suggest_capacity still
+    returns the correct result without raising.
+    """
+    with mock.patch.object(suggestion_module, "_capacity_suggestion_requests_total", None), \
+            mock.patch.object(suggestion_module, "_capacity_suggestion_latency_ms", None):
+        result = suggest_capacity(
+            model_name="gpt-4o",
+            base_url="https://api.openai.com/v1",
+            model_type="llm",
+            catalog=CATALOG,
+        )
+
+    assert result.match_kind == CapacitySuggestionMatchKind.CATALOG_EXACT
diff --git a/test/backend/services/test_model_management_service.py b/test/backend/services/test_model_management_service.py
index 9ea88306a..4a15e987d 100644
--- a/test/backend/services/test_model_management_service.py
+++ b/test/backend/services/test_model_management_service.py
@@ -2023,3 +2023,137 @@ def test_record_capacity_coverage_error_no_op_when_counter_disabled():
     with mock.patch.object(svc, "_capacity_suggestion_coverage_errors_total", None):
         # Should not raise.
         svc._record_capacity_coverage_error(7, RuntimeError("boom"))
+
+
+# ---------------------------------------------------------------------------
+# W11 V1.5 - cross-tenant isolation and accept-signal metrics
+# ---------------------------------------------------------------------------
+
+
+def test_get_capacity_coverage_cross_tenant_isolation():
+    """Spec L312-322: a bare row in tenant B must not appear in tenant A's
+    response. The service layer relies on `get_model_records(None, tenant_id)`
+    for the scoping; this test verifies the contract by routing records by
+    tenant_id at the mock boundary and asserting both tenants see only their
+    own bare rows.
+    """
+    svc = import_svc()
+
+    tenant_a_rows = [
+        {
+            "model_id": 11,
+            "model_repo": "",
+            "model_name": "tenant-a-bare",
+            "model_factory": "OpenAI-API-Compatible",
+            "model_type": "llm",
+            "context_window_tokens": None,
+            "max_output_tokens": None,
+            "max_tokens": 8192,
+            "base_url": "https://api.tenant-a.example.com/v1",
+        },
+    ]
+    tenant_b_rows = [
+        {
+            "model_id": 22,
+            "model_repo": "",
+            "model_name": "tenant-b-bare",
+            "model_factory": "OpenAI-API-Compatible",
+            "model_type": "llm",
+            "context_window_tokens": None,
+            "max_output_tokens": None,
+            "max_tokens": 16384,
+            "base_url": "https://api.tenant-b.example.com/v1",
+        },
+    ]
+
+    def get_records_by_tenant(_filters, tenant_id):
+        if tenant_id == "tenant-a":
+            return list(tenant_a_rows)
+        if tenant_id == "tenant-b":
+            return list(tenant_b_rows)
+        return []
+
+    with mock.patch.object(svc, "get_model_records", side_effect=get_records_by_tenant), \
+            mock.patch.object(svc, "_capacity_suggestion_available", return_value=False):
+        result_a = svc.get_capacity_coverage("tenant-a")
+        result_b = svc.get_capacity_coverage("tenant-b")
+
+    assert [m["model_id"] for m in result_a["bare_models"]] == [11]
+    assert [m["model_id"] for m in result_b["bare_models"]] == [22]
+    # Neither tenant must see the other's model_id anywhere in its payload.
+    assert all(m["model_id"] != 22 for m in result_a["bare_models"])
+    assert all(m["model_id"] != 11 for m in result_b["bare_models"])
+    assert result_a["total_llm_vlm"] == 1
+    assert result_b["total_llm_vlm"] == 1
+
+
+def test_pop_capacity_accept_signal_extracts_and_strips():
+    """The frontend ships accepted_suggestion_match_kind /
+    accepted_capability_profile_version on save. Spec L500-502 marks them
+    audit-only; the app layer must strip them before the dict reaches the
+    DB write, and return the popped values so the recorder can label the
+    counter.
+    """
+    svc = import_svc()
+
+    payload = {
+        "model_name": "gpt-4o",
+        "model_factory": "openai",
+        "context_window_tokens": 128000,
+        "max_output_tokens": 16384,
+        "capacity_source": "operator",
+        "accepted_suggestion_match_kind": "catalog_exact",
+        "accepted_capability_profile_version": "openai/gpt-4o@1",
+    }
+
+    signal = svc.pop_capacity_accept_signal(payload)
+
+    assert signal == {
+        "match_kind": "catalog_exact",
+        "capability_profile_version": "openai/gpt-4o@1",
+    }
+    # Audit fields must not leak through to DB write.
+    assert "accepted_suggestion_match_kind" not in payload
+    assert "accepted_capability_profile_version" not in payload
+    # Real model fields are untouched.
+    assert payload["model_name"] == "gpt-4o"
+    assert payload["context_window_tokens"] == 128000
+
+
+def test_pop_capacity_accept_signal_returns_none_without_match_kind():
+    svc = import_svc()
+
+    # Plain save: no accept fields at all.
+    assert svc.pop_capacity_accept_signal({"model_name": "x"}) is None
+
+    # match_kind missing but version present -> still treated as "no accept"
+    # since match_kind is the metric-label key and version alone is meaningless.
+    only_version = {"accepted_capability_profile_version": "x/y@1"}
+    assert svc.pop_capacity_accept_signal(only_version) is None
+    # The orphan version field is still stripped so it cannot reach the DB.
+    assert "accepted_capability_profile_version" not in only_version
+
+
+def test_record_capacity_suggestion_accept_no_op_when_counter_disabled():
+    """Same OTel-optional guard as the coverage-errors recorder."""
+    svc = import_svc()
+
+    with mock.patch.object(svc, "_capacity_suggestion_accept_total", None):
+        # Should not raise.
+        svc._record_capacity_suggestion_accept("catalog_exact", "openai")
+
+
+def test_record_capacity_suggestion_accept_labels_counter():
+    """When the counter is wired, the recorder forwards match_kind and a
+    lower-cased provider label so dashboards can compute per-provider
+    accept rates without inconsistent casing.
+    """
+    svc = import_svc()
+    counter = mock.MagicMock()
+
+    with mock.patch.object(svc, "_capacity_suggestion_accept_total", counter):
+        svc._record_capacity_suggestion_accept("catalog_fuzzy", "DashScope")
+
+    counter.add.assert_called_once_with(
+        1, {"match_kind": "catalog_fuzzy", "provider": "dashscope"}
+    )

From e442a551506e6cdd8ee772b102a1b738a4b8e5ee Mon Sep 17 00:00:00 2001
From: wuyuanfr <18270469842@163.com>
Date: Fri, 26 Jun 2026 10:04:20 +0800
Subject: [PATCH 04/31] feat(w11): V1.5 bare-capacity tag + preset selector +
 permission helper
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Mark bare-capacity LLM/VLM rows in the Manage Models list with the
existing yellow "缺容量" / "Missing capacity" tag. Keep the
aggregation banner on the Models page as the entry-point signal, but
rewrite its copy to hand off to the per-row tag instead of duplicating
per-row UI. Auto-fire /suggest-capacity from inside ModelEditDialog
whenever it opens on a bare-capacity row, regardless of how the dialog
was opened. Expose preset selectors on the capacity panel and ship the
model-management permission helper for V1.5 surfaces #2/#3.

Per spec line numbers cross-referenced inline:

#1 -- per-row tag as visual indicator (spec L143-167):
- Both badge sites in ModelDeleteDialog (provider-browser row L1507+
  and added-model row L1652+) retain the existing yellow text tag
  (bg-yellow-100 border-yellow-200 text-yellow-700). We considered a
  warning-triangle icon and a separate click-target on the badge,
  then rolled both back: "缺容量"/"Missing capacity" reads as a
  status at the same glance an icon would, while the existing row
  onClick already opens the edit dialog -- so a button on the badge
  added complexity that ModelEditDialog now subsumes internally.
- ModelEditDialog derives `isBareCapacityModel` from the loaded model
  (context_window_tokens or max_output_tokens null) and a single
  useEffect auto-fires handleSuggestCapacity once on open when the
  model is bare, the suggestion switch is on, and the form fields
  needed for the call are present. Any entry path -- row click,
  future gear-icon shortcut, deep link -- gets the same affordance,
  so the operator never has to also click "Check" on a bare row.
- The deprecated model.dialog.capacityCoverage.{tag, warning,
  warningWithSuggestion} keys are dropped from en + zh in favour of
  a single spec-namespaced model.list.capacityWarning.tag key. No
  per-suggestion variants because the tag is purely a state label;
  the suggestion handoff happens inside the edit dialog where the
  green/info Alert carries that nuance instead.

#5 -- aggregation banner kept as entry-point signal, copy retuned:
- The summary Alert on the Models page (modelConfig.tsx) stays --
  per-row tags live inside ModelDeleteDialog which is one click
  away. Without the banner, users on the Models page have no signal
  that any row needs attention.
- Description copy rewritten so the banner points at the new per-row
  flow: "Click Manage, then click the warning icon on each affected
  row to repair." Removes the redundant "edit a marked model"
  wording.
- Warning copy adds an "output token cap is not enforced" clause so
  the consequence (not just the symptom) is visible at a glance.

#4 -- permission helper (spec L167-178):
- frontend/lib/auth.ts gains canManageModels(role, isSpeedMode).
  Allowed roles: SU, ADMIN, DEV, SPEED. USER is excluded so regular
  agent authors see read-only notices rather than dead repair links.
  ASSET_OWNER is excluded -- model records are tenant scope, not
  asset-admin scope. Speed mode bypasses for the single-user dev
  experience, mirroring how other surfaces (chatHeader, etc.) treat it.
- The banner and tag in this commit both live on /models which is
  already route-gated for non-USER roles, so no in-place gate is
  needed yet. The helper exists so the V1.5 agent-edit-selector
  commit (#2) and the dashboard widget commit (#3) consume the same
  primitive instead of reinventing role parsing.

#8 -- preset selectors for context_window / output_reserve /
max_output (spec L757-790):
- ModelCapacityFields.tsx gains two preset arrays mirroring spec
  L767-790 verbatim (9 context-window values 4K..1M, 7 output
  values 256..16K). The context-window list is identical to
  MAX_TOKEN_OPTIONS in ModelMaxTokensInput; kept as a local
  constant rather than cross-importing so the two surfaces stay
  independently editable.
- renderNumberInput gains an optional `presetOptions` parameter.
  When the field has no catalog suggestion yet (per spec L762-765
  "when no suggestion exists ... render as preset-capable selector"),
  the input renders as AutoComplete with the preset list; otherwise
  it stays a plain numeric Input so an explicit catalog value
  doesn't get visually buried behind dropdown chrome.
- Wired for contextWindowTokens, maxOutputTokens, and
  defaultOutputReserveTokens. maxOutputTokens reuses the 256..16K
  list so operators see the same dropdown choices they already see
  for the reserve field; values above 16K (e.g. GPT-4.1's 32K cap,
  GLM-5.1's 131K cap) still work via free-text typing through
  AutoComplete. maxInputTokens keeps plain numeric input -- it is
  an explicit operator-side limit, not common-preset land.
- validateCapacityForm continues to enforce positive integers
  downstream.

i18n delta summary:
- DROPPED: model.dialog.capacityCoverage.tag,
  model.dialog.capacityCoverage.warning,
  model.dialog.capacityCoverage.warningWithSuggestion
- ADDED: model.list.capacityWarning.tag (single state label, no
  tooltip variants)
- REVISED (kept): modelConfig.capacityCoverage.warning + description
  with new entry-point copy; .manage button label unchanged.

Net: 6 files, +148/-77. Typecheck clean (only pre-existing
.next/types/validator.ts noise from the unrelated left-nav rename).
No backend wire change.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 .../components/model/ModelCapacityFields.tsx  | 104 +++++++++++++++---
 .../components/model/ModelDeleteDialog.tsx    |  34 ++----
 .../components/model/ModelEditDialog.tsx      |  60 +++++++++-
 frontend/lib/auth.ts                          |  30 +++++
 frontend/public/locales/en/common.json        |  10 +-
 frontend/public/locales/zh/common.json        |  10 +-
 6 files changed, 192 insertions(+), 56 deletions(-)

diff --git a/frontend/app/[locale]/models/components/model/ModelCapacityFields.tsx b/frontend/app/[locale]/models/components/model/ModelCapacityFields.tsx
index efe4c8e4a..edfbea597 100644
--- a/frontend/app/[locale]/models/components/model/ModelCapacityFields.tsx
+++ b/frontend/app/[locale]/models/components/model/ModelCapacityFields.tsx
@@ -1,8 +1,41 @@
-import { Alert, Button, Input, Tag, Tooltip } from "antd";
+import { Alert, AutoComplete, Button, Input, Tag, Tooltip } from "antd";
 import { useTranslation } from "react-i18next";
 
 import type { CapacitySuggestion } from "@/types/modelConfig";
 
+// W11 spec L767-790. Common token-count presets surfaced as a fallback
+// preset selector when no catalog suggestion populates the field. The
+// values mirror MAX_TOKEN_OPTIONS in ModelMaxTokensInput so the two
+// surfaces (legacy max_tokens batch input and capacity panel) offer
+// the same dropdown choices. Operators can still type a custom value;
+// AutoComplete accepts free numeric input.
+const CONTEXT_WINDOW_PRESET_OPTIONS = [
+  { value: "4096", label: "4K / 4,096" },
+  { value: "8192", label: "8K / 8,192" },
+  { value: "16384", label: "16K / 16,384" },
+  { value: "32768", label: "32K / 32,768" },
+  { value: "65536", label: "64K / 65,536" },
+  { value: "131072", label: "128K / 131,072" },
+  { value: "204800", label: "200K / 204,800" },
+  { value: "262144", label: "256K / 262,144" },
+  { value: "1048576", label: "1M / 1,048,576" },
+];
+
+// Shared by both default_output_reserve_tokens and max_output_tokens. The
+// reserve list maps to spec L782-790 verbatim; reusing it for max_output
+// gives operators the same dropdown choices they already see for the
+// reserve field. Values above 16K (e.g. GPT-4.1's 32K cap, GLM-5.1's
+// 131K cap) still work via free-text typing through AutoComplete.
+const OUTPUT_RESERVE_PRESET_OPTIONS = [
+  { value: "256", label: "256" },
+  { value: "512", label: "512" },
+  { value: "1024", label: "1K / 1,024" },
+  { value: "2048", label: "2K / 2,048" },
+  { value: "4096", label: "4K / 4,096" },
+  { value: "8192", label: "8K / 8,192" },
+  { value: "16384", label: "16K / 16,384" },
+];
+
 export type CapacitySource =
   | "operator"
   | "profile"
@@ -276,18 +309,45 @@ export const ModelCapacityFields = ({
       }
     : {};
 
+  // Per W11 spec L762-765, the context-window and output-reserve fields
+  // expose a preset selector when no catalog suggestion is available. The
+  // suggestion-set check is per-field: if the suggestion populated this
+  // exact field, plain numeric input avoids burying the suggested value
+  // behind dropdown chrome. Otherwise show the preset list to help
+  // operators avoid typos like "1280000" instead of "128000".
+  const suggestionFields = suggestion?.suggestions ?? null;
+  const fieldHasSuggestion = (
+    field: keyof ModelCapacityFormState
+  ): boolean => {
+    if (!suggestionFields) return false;
+    const suggested = (suggestionFields as Record<string, unknown>)[field];
+    return suggested != null && suggested !== "";
+  };
+
   const renderNumberInput = (
     field: keyof ModelCapacityFormState,
     labelKey: string,
-    tooltipKey: string
-  ) => (
-    <div>
-      <label className="block mb-1 text-sm font-medium text-gray-700">
-        <Tooltip title={t(tooltipKey)}>
-          <span>{t(labelKey)}</span>
-        </Tooltip>
-        {requiredSet.has(field) && <span className="text-red-500 ml-1">*</span>}
-      </label>
+    tooltipKey: string,
+    presetOptions?: { value: string; label: string }[]
+  ) => {
+    const showPreset = presetOptions && !fieldHasSuggestion(field);
+    const inputControl = showPreset ? (
+      <AutoComplete
+        className="w-full"
+        value={value[field]}
+        options={presetOptions}
+        placeholder={defaultPlaceholders[field]}
+        onChange={(next) => onChange(field, String(next ?? ""))}
+        filterOption={(input, option) =>
+          String(option?.label ?? "")
+            .toLowerCase()
+            .includes(input.toLowerCase()) ||
+          String(option?.value ?? "").includes(input)
+        }
+      >
+        <Input inputMode="numeric" pattern="[0-9]*" />
+      </AutoComplete>
+    ) : (
       <Input
         type="number"
         min="1"
@@ -295,8 +355,19 @@ export const ModelCapacityFields = ({
         placeholder={defaultPlaceholders[field]}
         onChange={(event) => onChange(field, event.target.value)}
       />
-    </div>
-  );
+    );
+    return (
+      <div>
+        <label className="block mb-1 text-sm font-medium text-gray-700">
+          <Tooltip title={t(tooltipKey)}>
+            <span>{t(labelKey)}</span>
+          </Tooltip>
+          {requiredSet.has(field) && <span className="text-red-500 ml-1">*</span>}
+        </label>
+        {inputControl}
+      </div>
+    );
+  };
 
   const content = (
     <div className="space-y-3">
@@ -421,7 +492,8 @@ export const ModelCapacityFields = ({
         {renderNumberInput(
           "contextWindowTokens",
           "model.dialog.capacity.contextWindowTokens",
-          "model.dialog.capacity.contextWindowTokens.tooltip"
+          "model.dialog.capacity.contextWindowTokens.tooltip",
+          CONTEXT_WINDOW_PRESET_OPTIONS
         )}
         {renderNumberInput(
           "maxInputTokens",
@@ -431,7 +503,8 @@ export const ModelCapacityFields = ({
         {renderNumberInput(
           "maxOutputTokens",
           "model.dialog.capacity.maxOutputTokens",
-          "model.dialog.capacity.maxOutputTokens.tooltip"
+          "model.dialog.capacity.maxOutputTokens.tooltip",
+          OUTPUT_RESERVE_PRESET_OPTIONS
         )}
         {/* defaultOutputReserveTokens is rendered in both add and edit modes
             so newly added rows do not silently fall back to the SDK default at
@@ -440,7 +513,8 @@ export const ModelCapacityFields = ({
         {renderNumberInput(
           "defaultOutputReserveTokens",
           "model.dialog.capacity.defaultOutputReserveTokens",
-          "model.dialog.capacity.defaultOutputReserveTokens.tooltip"
+          "model.dialog.capacity.defaultOutputReserveTokens.tooltip",
+          OUTPUT_RESERVE_PRESET_OPTIONS
         )}
       </div>
 
diff --git a/frontend/app/[locale]/models/components/model/ModelDeleteDialog.tsx b/frontend/app/[locale]/models/components/model/ModelDeleteDialog.tsx
index 48d54086c..3722fbe49 100644
--- a/frontend/app/[locale]/models/components/model/ModelDeleteDialog.tsx
+++ b/frontend/app/[locale]/models/components/model/ModelDeleteDialog.tsx
@@ -1504,20 +1504,10 @@ export const ModelDeleteDialog = ({
                           {String(providerModel.model_tag)}
                         </span>
                       )}
-                      {isBareCapacity && (
-                        <Tooltip
-                          title={
-                            hasSuggestion
-                              ? t(
-                                  "model.dialog.capacityCoverage.warningWithSuggestion"
-                                )
-                              : t("model.dialog.capacityCoverage.warning")
-                          }
-                        >
-                          <span className="ml-2 px-1.5 py-0.5 text-xs rounded bg-yellow-100 text-yellow-700 border border-yellow-200">
-                            {t("model.dialog.capacityCoverage.tag")}
-                          </span>
-                        </Tooltip>
+                      {isBareCapacity && existingModel && (
+                        <span className="ml-2 px-1.5 py-0.5 text-xs rounded bg-yellow-100 text-yellow-700 border border-yellow-200">
+                          {t("model.list.capacityWarning.tag")}
+                        </span>
                       )}
                     </div>
                     <div className="flex items-center space-x-2">
@@ -1650,19 +1640,9 @@ export const ModelDeleteDialog = ({
                           {model.displayName || model.name} ({model.name})
                         </div>
                         {isBareCapacity && (
-                          <Tooltip
-                            title={
-                              hasSuggestion
-                                ? t(
-                                    "model.dialog.capacityCoverage.warningWithSuggestion"
-                                  )
-                                : t("model.dialog.capacityCoverage.warning")
-                            }
-                          >
-                            <span className="mt-1 inline-flex w-fit px-1.5 py-0.5 text-xs rounded bg-yellow-100 text-yellow-700 border border-yellow-200">
-                              {t("model.dialog.capacityCoverage.tag")}
-                            </span>
-                          </Tooltip>
+                          <span className="mt-1 inline-flex w-fit px-1.5 py-0.5 text-xs rounded bg-yellow-100 text-yellow-700 border border-yellow-200">
+                            {t("model.list.capacityWarning.tag")}
+                          </span>
                         )}
                       </div>
                       <button
diff --git a/frontend/app/[locale]/models/components/model/ModelEditDialog.tsx b/frontend/app/[locale]/models/components/model/ModelEditDialog.tsx
index b4c2ac936..3cdba0521 100644
--- a/frontend/app/[locale]/models/components/model/ModelEditDialog.tsx
+++ b/frontend/app/[locale]/models/components/model/ModelEditDialog.tsx
@@ -1,4 +1,4 @@
-﻿import { useState, useEffect } from "react";
+﻿import { useState, useEffect, useRef } from "react";
 import { useTranslation } from "react-i18next";
 
 import { Alert, Modal, Select, Input, Button, Switch, App } from "antd";
@@ -91,6 +91,29 @@ export const ModelEditDialog = ({
     message: "",
   });
 
+  // Monotonic request token for /suggest-capacity. Incremented on every
+  // new call, dialog close, and model change; the async handler compares
+  // its captured token against the current ref before committing
+  // setState, so a stale qwen-for-row-A response cannot win over a fresh
+  // glm-for-row-B response when the user cancels A and immediately edits
+  // B (the original bug -- previous request was racing the new one and
+  // sometimes overwriting it after navigation).
+  const suggestionRequestRef = useRef(0);
+
+  // Reset capacity-related state every time the dialog closes. Without
+  // this, the next open render briefly shows the previous model's
+  // suggestion before the [model] effect overwrites it, and a slow
+  // in-flight response from the previous model can also overwrite the
+  // fresh model's correct result. The ref bump tells any pending
+  // handleSuggestCapacity to drop its response.
+  useEffect(() => {
+    if (isOpen) return;
+    suggestionRequestRef.current += 1;
+    setCapacitySuggestion(null);
+    setAcceptedCapacitySuggestion(null);
+    setCheckingCapacitySuggestion(false);
+  }, [isOpen]);
+
   useEffect(() => {
     if (model) {
       setForm({
@@ -184,6 +207,13 @@ export const ModelEditDialog = ({
       message.warning(t("model.dialog.capacity.suggestion.missingInput"));
       return;
     }
+    // Capture a token for this call. The [isOpen] reset effect and any
+    // subsequent handleSuggestCapacity invocation will bump the ref;
+    // when we receive our response we check the ref hasn't moved on. If
+    // it has -- the user cancelled and reopened a different model, or
+    // they clicked "Check" again with different inputs -- silently drop
+    // the response so it cannot overwrite the newer state.
+    const myToken = (suggestionRequestRef.current += 1);
     setCheckingCapacitySuggestion(true);
     try {
       const suggestion = await modelService.suggestCapacity({
@@ -193,19 +223,45 @@ export const ModelEditDialog = ({
         apiKey: form.apiKey.trim() || undefined,
         modelType: connectivityModelType,
       });
+      if (myToken !== suggestionRequestRef.current) return;
       setCapacitySuggestion(suggestion);
       if (!suggestion.suggestions) {
         setAcceptedCapacitySuggestion(null);
       }
     } catch (error) {
+      if (myToken !== suggestionRequestRef.current) return;
       setCapacitySuggestion(null);
       setAcceptedCapacitySuggestion(null);
       message.error(t("model.dialog.capacity.suggestion.failed"));
     } finally {
-      setCheckingCapacitySuggestion(false);
+      if (myToken === suggestionRequestRef.current) {
+        setCheckingCapacitySuggestion(false);
+      }
     }
   };
 
+  // W11 V1.5: when the dialog opens on a bare-capacity LLM/VLM row
+  // (per-row badge condition: context_window_tokens or max_output_tokens
+  // is null), auto-fire /suggest-capacity once so the operator does not
+  // have to also click "Check". The trigger is derived from `model`
+  // itself rather than a caller-supplied flag, so any entry path (row
+  // click, badge click, future gear-icon shortcut) gets the same
+  // affordance. No-op if the model already has capacity, the suggestion
+  // switch is off, or required form fields are missing at open time.
+  const isBareCapacityModel = Boolean(
+    model &&
+      supportsCapacityFields &&
+      (!model.contextWindowTokens || !model.maxOutputTokens)
+  );
+  useEffect(() => {
+    if (!isOpen || !isBareCapacityModel) return;
+    if (!capacitySuggestionEnabled) return;
+    if (!canSuggestCapacity()) return;
+    handleSuggestCapacity();
+    // Fire once per open; do not re-fire on re-render.
+    // eslint-disable-next-line react-hooks/exhaustive-deps
+  }, [isOpen, isBareCapacityModel]);
+
   const isFormValid = () => {
     if (
       supportsCapacityFields &&
diff --git a/frontend/lib/auth.ts b/frontend/lib/auth.ts
index 330028bc3..0bd7f62fa 100644
--- a/frontend/lib/auth.ts
+++ b/frontend/lib/auth.ts
@@ -96,3 +96,33 @@ export function getEffectiveRoutePath(pathname: string): string {
   }
   return "/" + (segments.join("/") || "");
 }
+
+/**
+ * Roles whose users can manage the tenant's model catalog (add/edit/delete
+ * model records, fix bare-capacity rows). Used by W11 V1.5 bare-capacity
+ * surfaces (per-row badge in Manage Models, agent-edit selector warning,
+ * dashboard widget) to gate the "open repair" affordance per spec L167-178.
+ *
+ * USER is intentionally excluded: regular agent authors see a non-actionable
+ * notice telling them to ask an administrator, not a link they cannot follow.
+ * ASSET_OWNER manages assets (knowledge/agents) but not model records.
+ */
+const MODEL_MANAGEMENT_ROLES: ReadonlySet<string> = new Set([
+  USER_ROLES.SU,
+  USER_ROLES.ADMIN,
+  USER_ROLES.DEV,
+  USER_ROLES.SPEED,
+]);
+
+/**
+ * Return true when the given role can act on the W11 repair affordances.
+ * Speed-mode deployments bypass role gating (single-user dev experience).
+ */
+export function canManageModels(
+  role: string | undefined | null,
+  isSpeedMode = false
+): boolean {
+  if (isSpeedMode) return true;
+  if (!role) return false;
+  return MODEL_MANAGEMENT_ROLES.has(role);
+}
diff --git a/frontend/public/locales/en/common.json b/frontend/public/locales/en/common.json
index 53f55283c..2f8eed214 100644
--- a/frontend/public/locales/en/common.json
+++ b/frontend/public/locales/en/common.json
@@ -875,9 +875,7 @@
   "model.dialog.capacity.suggestion.confidence.high": "High confidence",
   "model.dialog.capacity.suggestion.confidence.medium": "Medium confidence",
   "model.dialog.capacity.suggestion.confidence.low": "Low confidence",
-  "model.dialog.capacityCoverage.tag": "Missing capacity",
-  "model.dialog.capacityCoverage.warning": "This model is missing context window or max output tokens. Open edit settings to fill capacity.",
-  "model.dialog.capacityCoverage.warningWithSuggestion": "This model is missing capacity. A catalog suggestion may be available in the edit dialog.",
+  "model.list.capacityWarning.tag": "Missing capacity",
   "model.dialog.capacity.batchDefault.title": "Batch default capacity",
   "model.dialog.capacity.batchDefault.hint": "Values entered here apply as the default capacity for every LLM/VLM model in this batch import. Click the gear icon on a row to override a specific model.",
   "model.dialog.batch.requireRowCapacity": "Some enabled rows are missing context window or max output tokens. Open the gear icon to fill them in before confirming.",
@@ -1027,11 +1025,11 @@
   "modelConfig.message.addSuccess": "Model added successfully",
   "modelConfig.button.syncModelEngine": "Sync ModelEngine Models",
   "modelConfig.button.addCustomModel": "Add Model",
+  "modelConfig.capacityCoverage.warning": "{{bareCount}} of {{total}} LLM/VLM models are missing capacity — output token cap is not enforced.",
+  "modelConfig.capacityCoverage.description": "{{suggestionCount}} have an approved catalog suggestion ready to apply. Click Manage, then click the warning icon on each affected row to repair.",
+  "modelConfig.capacityCoverage.manage": "Manage",
   "modelConfig.button.editCustomModel": "Edit or Delete Model",
   "modelConfig.button.checkConnectivity": "Check Model Connectivity",
-  "modelConfig.capacityCoverage.warning": "{{bareCount}} of {{total}} LLM/VLM models are missing capacity fields.",
-  "modelConfig.capacityCoverage.description": "{{suggestionCount}} model(s) may have catalog suggestions. Open Manage Models, then edit a marked model to repair it.",
-  "modelConfig.capacityCoverage.manage": "Manage",
   "modelConfig.button.sync": "Sync",
   "modelConfig.button.add": "Add",
   "modelConfig.button.edit": "Edit",
diff --git a/frontend/public/locales/zh/common.json b/frontend/public/locales/zh/common.json
index 4d028e52a..bcf6bcc54 100644
--- a/frontend/public/locales/zh/common.json
+++ b/frontend/public/locales/zh/common.json
@@ -846,9 +846,7 @@
   "model.dialog.capacity.suggestion.confidence.high": "高置信度",
   "model.dialog.capacity.suggestion.confidence.medium": "中置信度",
   "model.dialog.capacity.suggestion.confidence.low": "低置信度",
-  "model.dialog.capacityCoverage.tag": "缺容量",
-  "model.dialog.capacityCoverage.warning": "此模型缺少上下文窗口或最大输出Token数。请打开编辑配置补全容量。",
-  "model.dialog.capacityCoverage.warningWithSuggestion": "此模型缺少容量。编辑弹窗中可能有目录建议可用。",
+  "model.list.capacityWarning.tag": "缺容量",
   "model.dialog.capacity.batchDefault.title": "批量默认容量",
   "model.dialog.capacity.batchDefault.hint": "此处填写的数值将作为本次批量导入所有 LLM/VLM 模型的默认容量。如需为某个模型单独设置，请点击对应行的⚙图标覆盖。",
   "model.dialog.batch.requireRowCapacity": "存在已打开开关的模型缺少上下文窗口或最大输出Token数，请点击对应行的⚙图标补全后再确认。",
@@ -998,11 +996,11 @@
   "modelConfig.message.addSuccess": "添加模型成功",
   "modelConfig.button.syncModelEngine": "同步ModelEngine模型",
   "modelConfig.button.addCustomModel": "添加模型",
+  "modelConfig.capacityCoverage.warning": "{{total}} 个 LLM/VLM 模型中有 {{bareCount}} 个未配置容量，输出 token 限额未启用。",
+  "modelConfig.capacityCoverage.description": "其中 {{suggestionCount}} 个有已审核目录建议可一键应用。点击\"管理\"打开列表，逐行点击警告图标即可修复。",
+  "modelConfig.capacityCoverage.manage": "管理",
   "modelConfig.button.editCustomModel": "修改或删除模型",
   "modelConfig.button.checkConnectivity": "检查模型连通性",
-  "modelConfig.capacityCoverage.warning": "{{total}} 个 LLM/VLM 模型中有 {{bareCount}} 个缺少容量字段。",
-  "modelConfig.capacityCoverage.description": "其中 {{suggestionCount}} 个可能有目录建议。打开修改或删除模型，编辑带标记的模型即可修复。",
-  "modelConfig.capacityCoverage.manage": "管理",
   "modelConfig.button.sync": "同步",
   "modelConfig.button.add": "添加",
   "modelConfig.button.edit": "修改",

From e0ef307f06dafa0c1505e7a1d440f23e200e02c8 Mon Sep 17 00:00:00 2001
From: wuyuanfr <18270469842@163.com>
Date: Fri, 26 Jun 2026 14:37:31 +0800
Subject: [PATCH 05/31] fix(w11): unify ModelEditDialog state-per-model via key
 remount + auto-suggest population guard

Two paired bugs in the V1.5 auto-suggest path, both surfacing as
"open glm-5 shows qwen3.7-max suggestion" after the operator cancels
qwen and immediately clicks glm-5 in the Manage Models list:

1. Stale render. ModelEditDialog returns null when `model` is falsy
   (line ~559) but React does not unmount on null return -- it just
   commits null and keeps the component instance alive, useState
   intact. With React 18's automatic batching, the cancel and the
   subsequent row click coalesce into one commit; the [isOpen] reset
   effect I added in e442a5515 saw isOpen=true on its single run and
   skipped the cleanup, so capacitySuggestion stayed as qwenResult
   for the first render with model=glm5. The user briefly saw the
   wrong suggestion before the [model] effect cleared it.

2. Stale API call. Even after the first render flickered to qwen and
   then to null, the auto-suggest effect fired with closure-captured
   form values that were still qwen's (form was a single useState
   instance, the [model] effect's setForm had not been flushed yet at
   the time the auto-suggest effect ran in the same commit cycle).
   modelService.suggestCapacity({ modelName: "qwen3.7-max", ... })
   was sent to the backend, and /suggest-capacity dutifully returned
   qwen3.7-max@1. The request token from the earlier amend did not
   help here because the API call was not racing -- it was sending
   the wrong input.

Fixes in this commit:

a) ModelDeleteDialog passes `key={editModel?.displayName || "__none__"}`
   to ModelEditDialog. Each new editModel forces a full unmount +
   remount, which resets every useState/useRef to its initial value.
   That eliminates the stale-render path (1).

b) ModelEditDialog auto-suggest effect depends on `form.name` and
   `form.url` in addition to `[isOpen, isBareCapacityModel,
   capacitySuggestionEnabled]`. On a fresh mount, form starts empty
   (useState defaults); canSuggestCapacity() is false on the first
   pass so we do not fire. After the [model] effect's setForm
   re-renders, form.name and form.url change, the effect re-runs,
   canSuggestCapacity() now returns true with the correct values,
   and we send the API request scoped to the new model. That fixes
   the stale-input path (2).

c) `autoSuggestFiredRef = useRef(false)` guards against re-firing
   when the operator subsequently types into the name or url fields.
   We still want exactly one auto-suggest per dialog instance, and
   thanks to (a) one instance == one model.

Dead code removed:
- The [isOpen] reset effect from e442a5515. Key-based remount
  supersedes it: the component is unmounted on close, so there is
  no state to reset.
- Its companion comments about "reset on close" semantics.

Retained:
- suggestionRequestRef token logic in handleSuggestCapacity. Covers
  a separate concern (rapid manual Check clicks on the same model
  with different inputs, where the older response must not overwrite
  the newer one). Key remount does not address this because there
  is no model swap.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 .../components/model/ModelDeleteDialog.tsx    |  6 ++
 .../components/model/ModelEditDialog.tsx      | 55 +++++++++++--------
 2 files changed, 39 insertions(+), 22 deletions(-)

diff --git a/frontend/app/[locale]/models/components/model/ModelDeleteDialog.tsx b/frontend/app/[locale]/models/components/model/ModelDeleteDialog.tsx
index 3722fbe49..e42992b1b 100644
--- a/frontend/app/[locale]/models/components/model/ModelDeleteDialog.tsx
+++ b/frontend/app/[locale]/models/components/model/ModelDeleteDialog.tsx
@@ -1718,7 +1718,13 @@ export const ModelDeleteDialog = ({
         </div>
       )}
       {/* Edit model dialog */}
+      {/* key forces full unmount/remount when model changes, preventing
+          stale capacitySuggestion state from flashing on the first render
+          before the [model] effect clears it. Without key, the component
+          returns null (line 559) but never unmounts, so useState keeps
+          the previous model's suggestion alive for one render cycle. */}
       <ModelEditDialog
+        key={editModel?.displayName || "__none__"}
         isOpen={!!editModel}
         model={editModel}
         onClose={() => setEditModel(null)}
diff --git a/frontend/app/[locale]/models/components/model/ModelEditDialog.tsx b/frontend/app/[locale]/models/components/model/ModelEditDialog.tsx
index 3cdba0521..c887259e1 100644
--- a/frontend/app/[locale]/models/components/model/ModelEditDialog.tsx
+++ b/frontend/app/[locale]/models/components/model/ModelEditDialog.tsx
@@ -91,28 +91,22 @@ export const ModelEditDialog = ({
     message: "",
   });
 
-  // Monotonic request token for /suggest-capacity. Incremented on every
-  // new call, dialog close, and model change; the async handler compares
-  // its captured token against the current ref before committing
-  // setState, so a stale qwen-for-row-A response cannot win over a fresh
-  // glm-for-row-B response when the user cancels A and immediately edits
-  // B (the original bug -- previous request was racing the new one and
-  // sometimes overwriting it after navigation).
+  // Monotonic request token for /suggest-capacity. Used by manual Check
+  // clicks: when the operator clicks twice quickly with different inputs,
+  // the slower response must not overwrite the faster newer one. The
+  // navigation race (open A, cancel, open B) is handled by the
+  // key-based remount on the parent (ModelDeleteDialog), so we no longer
+  // need a separate "reset on close" effect here.
   const suggestionRequestRef = useRef(0);
 
-  // Reset capacity-related state every time the dialog closes. Without
-  // this, the next open render briefly shows the previous model's
-  // suggestion before the [model] effect overwrites it, and a slow
-  // in-flight response from the previous model can also overwrite the
-  // fresh model's correct result. The ref bump tells any pending
-  // handleSuggestCapacity to drop its response.
-  useEffect(() => {
-    if (isOpen) return;
-    suggestionRequestRef.current += 1;
-    setCapacitySuggestion(null);
-    setAcceptedCapacitySuggestion(null);
-    setCheckingCapacitySuggestion(false);
-  }, [isOpen]);
+  // Auto-suggest fires at most once per dialog instance. With the parent's
+  // key remount, "per instance" == "per model", which is the desired
+  // semantic. The fired-once guard is needed because the auto-suggest
+  // effect depends on `form.name` and `form.url`, which change as the
+  // [model] effect populates the form on first mount AND every time the
+  // operator types in those inputs -- only the populate transition
+  // should trigger an API call.
+  const autoSuggestFiredRef = useRef(false);
 
   useEffect(() => {
     if (model) {
@@ -248,19 +242,36 @@ export const ModelEditDialog = ({
   // click, badge click, future gear-icon shortcut) gets the same
   // affordance. No-op if the model already has capacity, the suggestion
   // switch is off, or required form fields are missing at open time.
+  //
+  // form.name and form.url are in the dependency list because the
+  // [model] effect above populates them asynchronously after this
+  // component mounts. With the parent's key remount, the first render
+  // here has form.name == "" / form.url == "", so canSuggestCapacity()
+  // is false and we cannot fire yet. The [model] effect's setForm
+  // then re-renders with populated values, this effect re-runs, and
+  // canSuggestCapacity() finally returns true. The autoSuggestFiredRef
+  // guards against re-firing later when the operator types into name
+  // or url -- only the populate transition should kick off auto-suggest.
   const isBareCapacityModel = Boolean(
     model &&
       supportsCapacityFields &&
       (!model.contextWindowTokens || !model.maxOutputTokens)
   );
   useEffect(() => {
+    if (autoSuggestFiredRef.current) return;
     if (!isOpen || !isBareCapacityModel) return;
     if (!capacitySuggestionEnabled) return;
     if (!canSuggestCapacity()) return;
+    autoSuggestFiredRef.current = true;
     handleSuggestCapacity();
-    // Fire once per open; do not re-fire on re-render.
     // eslint-disable-next-line react-hooks/exhaustive-deps
-  }, [isOpen, isBareCapacityModel]);
+  }, [
+    isOpen,
+    isBareCapacityModel,
+    capacitySuggestionEnabled,
+    form.name,
+    form.url,
+  ]);
 
   const isFormValid = () => {
     if (

From 00c8c62dd6a2d65c5e7fa92d32ba4e2ac1e31ccf Mon Sep 17 00:00:00 2001
From: wuyuanfr <18270469842@163.com>
Date: Fri, 26 Jun 2026 15:52:38 +0800
Subject: [PATCH 06/31] feat(w11): V1.5 bare-capacity surfaces + dual legacy
 hint + accept-signal SLO wiring

Closes Week N+2/N+3 punch list for W11 V1.5.

UI surfaces (#2 + #3):
- Agent-edit model selector: bare-capacity subtitle on dropdown items
  and a non-blocking form Alert above Save when a bare model is picked.
  Admin/dev/su/speed see "fix in Model Management", others see
  "ask administrator". Permission gate via canManageModels().
- ModelCapacityCoverageWidget renders at top of resource-manage Models
  tab; hides on bare_count=0 or non-admin. Shared useCapacityCoverage
  hook backs both the widget and the agent-edit selector.

Legacy max_tokens hint (#7):
- Dual-target buttons (Fill into Context Window / Fill into Max Output)
  with heuristic ordering: values >= 16384 lead with Context Window,
  values < 16384 lead with Max Output. Each button hides once its
  target field is filled; the alert hides once both are filled. Old
  single-button "Apply as max_output_tokens" was reversed semantically:
  legacy max_tokens columns from the pre-W1 era were more often the
  provider context window, but at small values they really were the
  output cap -- the operator picks.

Constructor audit (#16):
- test_model_consts pins ModelRequest and ModelCapacitySuggestionResponse
  field sets so a silent rename trips a test.
- test_prepare_model_dict_persists_operator_capacity now pins all 7
  capacity fields + canonical model_factory/model_name in the
  ModelRequest constructor kwargs.

SLO data flow fix:
- Frontend was never sending the W11 accept signal, so
  model_capacity_suggestion_accept_total stayed at zero and the
  "95% accepted suggestions hit profile" SLO could not be computed.
  buildCapacityRequestBody now threads acceptedSuggestionMatchKind +
  acceptedCapabilityProfileVersion; ModelAddDialog and ModelEditDialog
  include them in save payloads when the operator clicked "Use suggestion".
- Two new app-layer integration tests pin: (1) accept signal present
  -> recorder fires with correct labels and audit fields are stripped
  from the service-layer payload; (2) plain save -> recorder does not
  fire (so accept_total stays aligned with dispatch_profile_hit_total
  as the SLO denominator).

i18n: full spec keyset present in both en/zh
(model.list.capacityWarning.*, agent.modelSelector.bareCapacity.*,
dashboard.capacityCoverage.*, model.dialog.capacity.suggestion.*,
model.dialog.capacity.preset.*, model.dialog.capacity.legacyMaxTokens.*).

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 .../agentInfo/AgentGenerateDetail.tsx         | 102 +++++++++++++++---
 .../components/model/ModelAddDialog.tsx       |  18 ++++
 .../components/model/ModelCapacityFields.tsx  |  81 ++++++++++----
 .../components/model/ModelEditDialog.tsx      |  49 +++++++--
 .../resources/ModelCapacityCoverageWidget.tsx |  66 ++++++++++++
 .../components/resources/ModelList.tsx        |   2 +
 frontend/hooks/model/useCapacityCoverage.ts   |  52 +++++++++
 frontend/public/locales/en/common.json        |  24 ++++-
 frontend/public/locales/zh/common.json        |  24 ++++-
 frontend/services/modelService.ts             |  22 ++++
 test/backend/app/test_model_managment_app.py  |  60 +++++++++++
 .../services/test_model_provider_service.py   |  13 ++-
 test/backend/test_model_consts.py             |  50 +++++++++
 13 files changed, 511 insertions(+), 52 deletions(-)
 create mode 100644 frontend/app/[locale]/resource-manage/components/resources/ModelCapacityCoverageWidget.tsx
 create mode 100644 frontend/hooks/model/useCapacityCoverage.ts

diff --git a/frontend/app/[locale]/agents/components/agentInfo/AgentGenerateDetail.tsx b/frontend/app/[locale]/agents/components/agentInfo/AgentGenerateDetail.tsx
index e07204cab..cecc02d2b 100644
--- a/frontend/app/[locale]/agents/components/agentInfo/AgentGenerateDetail.tsx
+++ b/frontend/app/[locale]/agents/components/agentInfo/AgentGenerateDetail.tsx
@@ -33,6 +33,8 @@ import { useAgentGeneration } from "@/hooks/agent/useAgentGeneration";
 import { useAuthorizationContext } from "@/components/providers/AuthorizationProvider";
 import { useDeployment } from "@/components/providers/deploymentProvider";
 import { useModelList } from "@/hooks/model/useModelList";
+import { useCapacityCoverage } from "@/hooks/model/useCapacityCoverage";
+import { canManageModels } from "@/lib/auth";
 import { useConfig } from "@/hooks/useConfig";
 import { useGroupList, useGroupDetails } from "@/hooks/group/useGroupList";
 import { usePromptTemplateList } from "@/hooks/agent/usePromptTemplateList";
@@ -71,6 +73,8 @@ export default function AgentGenerateDetail({}) {
 
   const { defaultLlmModelConfig } = useConfig();
   const { availableLlmModels, models, isLoading: loadingModels } = useModelList();
+  const { bareModelIds: bareCapacityModelIds } = useCapacityCoverage();
+  const userCanManageModels = canManageModels(user?.role, isSpeedMode);
   const {
     templates: promptTemplates,
     isLoading: loadingPromptTemplates,
@@ -537,11 +541,52 @@ export default function AgentGenerateDetail({}) {
   };
 
   // Select options for available models
-  const modelSelectOptions = availableLlmModels.map((model) => ({
-    value: model.displayName || model.name,
-    label: model.displayName || model.name,
-    disabled: model.connect_status !== "available",
-  }));
+  // Bare-capacity rows (`context_window_tokens IS NULL OR max_output_tokens IS
+  // NULL`) stay selectable per W11 spec; the warning is the inline subtitle
+  // and the non-blocking form notice below.
+  const modelSelectOptions = availableLlmModels.map((model) => {
+    const isBare = bareCapacityModelIds.has(model.id);
+    const displayLabel = model.displayName || model.name;
+    return {
+      value: displayLabel,
+      label: isBare ? (
+        <Flex vertical gap={0}>
+          <span>{displayLabel}</span>
+          <span className="text-[11px] text-yellow-700">
+            {t("agent.modelSelector.bareCapacity.subtitle")}
+          </span>
+        </Flex>
+      ) : (
+        displayLabel
+      ),
+      disabled: model.connect_status !== "available",
+    };
+  });
+
+  const isSelectedMainModelBare = Boolean(
+    selectedMainAgentModel && bareCapacityModelIds.has(selectedMainAgentModel.id)
+  );
+
+  const selectedBusinessLogicModel = useMemo(() => {
+    const businessName =
+      form.getFieldValue("businessLogicModelName") ||
+      editedAgent.business_logic_model_name ||
+      "";
+    if (!businessName) return undefined;
+    return availableLlmModels.find(
+      (m) => m.displayName === businessName || m.name === businessName
+    );
+  }, [
+    availableLlmModels,
+    editedAgent.business_logic_model_name,
+    form,
+    forceRefreshKey,
+  ]);
+
+  const isSelectedBusinessLogicModelBare = Boolean(
+    selectedBusinessLogicModel &&
+      bareCapacityModelIds.has(selectedBusinessLogicModel.id)
+  );
 
   const promptTemplateSelectOptions = useMemo(() => {
     const options = promptTemplates.map((template) => ({
@@ -688,6 +733,23 @@ export default function AgentGenerateDetail({}) {
                       </span>
                     </Button>
                   </Flex>
+                  {(isSelectedMainModelBare || isSelectedBusinessLogicModelBare) && (
+                    <Alert
+                      type="warning"
+                      showIcon
+                      message={t(
+                        userCanManageModels
+                          ? "agent.modelSelector.bareCapacity.formNotice"
+                          : "agent.modelSelector.bareCapacity.formNoticeNoPermission",
+                        {
+                          modelName:
+                            (isSelectedMainModelBare && selectedMainAgentModel?.displayName) ||
+                            (isSelectedBusinessLogicModelBare && selectedBusinessLogicModel?.displayName) ||
+                            "",
+                        }
+                      )}
+                    />
+                  )}
                 </Flex>
               </Form>
             </Card>
@@ -879,15 +941,27 @@ export default function AgentGenerateDetail({}) {
                                 });
                               }}
                             >
-                              {availableLlmModels.map((model) => (
-                                <Select.Option
-                                  key={model.id}
-                                  value={model.displayName}
-                                  disabled={model.connect_status !== "available"}
-                                >
-                                  {model.displayName}
-                                </Select.Option>
-                              ))}
+                              {availableLlmModels.map((model) => {
+                                const isBare = bareCapacityModelIds.has(model.id);
+                                return (
+                                  <Select.Option
+                                    key={model.id}
+                                    value={model.displayName}
+                                    disabled={model.connect_status !== "available"}
+                                  >
+                                    {isBare ? (
+                                      <Flex vertical gap={0}>
+                                        <span>{model.displayName}</span>
+                                        <span className="text-[11px] text-yellow-700">
+                                          {t("agent.modelSelector.bareCapacity.subtitle")}
+                                        </span>
+                                      </Flex>
+                                    ) : (
+                                      model.displayName
+                                    )}
+                                  </Select.Option>
+                                );
+                              })}
                             </Select>
                           </Form.Item>
                         </Col>
diff --git a/frontend/app/[locale]/models/components/model/ModelAddDialog.tsx b/frontend/app/[locale]/models/components/model/ModelAddDialog.tsx
index 5f3c03535..9473bf6f6 100644
--- a/frontend/app/[locale]/models/components/model/ModelAddDialog.tsx
+++ b/frontend/app/[locale]/models/components/model/ModelAddDialog.tsx
@@ -1145,6 +1145,22 @@ export const ModelAddDialog = ({
         maxTokensValue = 0;
       }
 
+      // W11 accept-signal: pop the audit fields from acceptedCapacitySuggestion
+      // so the app layer can label model_capacity_suggestion_accept_total.
+      // Emitting once per save keeps the counter aligned with the
+      // dispatch_profile_hit_total denominator (spec L709-710).
+      const acceptSignalKwargs = acceptedCapacitySuggestion
+        ? {
+            acceptedSuggestionMatchKind: acceptedCapacitySuggestion.matchKind,
+            ...(acceptedCapacitySuggestion.capabilityProfileVersion
+              ? {
+                  acceptedCapabilityProfileVersion:
+                    acceptedCapacitySuggestion.capabilityProfileVersion,
+                }
+              : {}),
+          }
+        : {};
+
       // Add to the backend service - use manage interface if tenantId is provided
       if (tenantId) {
         const modelParams: any = {
@@ -1157,6 +1173,7 @@ export const ModelAddDialog = ({
           displayName: form.displayName || form.name,
           modelFactory: form.provider,
           ...(supportsCapacityFields ? buildCapacityPayload(form) : {}),
+          ...acceptSignalKwargs,
         };
 
         // Add STT specific fields
@@ -1199,6 +1216,7 @@ export const ModelAddDialog = ({
           displayName: form.displayName || form.name,
           modelFactory: form.provider,
           ...(supportsCapacityFields ? buildCapacityPayload(form) : {}),
+          ...acceptSignalKwargs,
         };
 
         // Add STT specific fields
diff --git a/frontend/app/[locale]/models/components/model/ModelCapacityFields.tsx b/frontend/app/[locale]/models/components/model/ModelCapacityFields.tsx
index edfbea597..712ab62fd 100644
--- a/frontend/app/[locale]/models/components/model/ModelCapacityFields.tsx
+++ b/frontend/app/[locale]/models/components/model/ModelCapacityFields.tsx
@@ -1,4 +1,4 @@
-import { Alert, AutoComplete, Button, Input, Tag, Tooltip } from "antd";
+import { Alert, AutoComplete, Button, Input, Space, Tag, Tooltip } from "antd";
 import { useTranslation } from "react-i18next";
 
 import type { CapacitySuggestion } from "@/types/modelConfig";
@@ -281,13 +281,30 @@ export const ModelCapacityFields = ({
 }: ModelCapacityFieldsProps) => {
   const { t } = useTranslation();
 
-  // Show the actionable legacy-value prompt only while the input is still
-  // empty -- once the user applies (or types their own value), the prompt
-  // disappears so we don't keep nagging.
+  // Legacy max_tokens can mean either thing -- before W1 split capacity,
+  // operators sometimes typed the provider context window there
+  // (128000, 32768, ...) and sometimes the per-call output cap (4096,
+  // 8192, ...). We can't tell from the value alone, so we surface both
+  // target fields and let the operator pick. The button order is the
+  // only heuristic: values >= LEGACY_CONTEXT_WINDOW_THRESHOLD are
+  // far more likely to be context windows (no real model caps output
+  // at 32K+ in practice), so the "Apply as Context Window" button leads;
+  // below the threshold the "Apply as Max Output" button leads.
+  //
+  // Each button is independently gated by its target field being empty
+  // -- once the operator commits a value to that column we stop nagging
+  // about it. When both fields are filled the whole alert hides.
+  const LEGACY_CONTEXT_WINDOW_THRESHOLD = 16_384;
+  const legacyValuePositive =
+    legacyMaxTokensCandidate !== undefined && legacyMaxTokensCandidate > 0;
+  const canApplyAsContextWindow =
+    legacyValuePositive && value.contextWindowTokens.trim() === "";
+  const canApplyAsMaxOutput =
+    legacyValuePositive && value.maxOutputTokens.trim() === "";
   const showLegacyMaxTokensPrompt =
-    legacyMaxTokensCandidate !== undefined &&
-    legacyMaxTokensCandidate > 0 &&
-    value.maxOutputTokens.trim() === "";
+    canApplyAsContextWindow || canApplyAsMaxOutput;
+  const contextWindowIsRecommended =
+    (legacyMaxTokensCandidate ?? 0) >= LEGACY_CONTEXT_WINDOW_THRESHOLD;
 
   const source = capacitySource || "";
   const sourceColor = SOURCE_COLORS[source] || "default";
@@ -392,25 +409,43 @@ export const ModelCapacityFields = ({
         <Alert
           type="warning"
           showIcon
-          message={t("model.dialog.capacity.legacyMaxTokensDetected", {
-            value: legacyMaxTokensCandidate,
-            defaultValue: `Detected legacy max_tokens = ${legacyMaxTokensCandidate}. Apply it as max_output_tokens?`,
+          message={t("model.dialog.capacity.legacyMaxTokensHint", {
+            maxTokens: legacyMaxTokensCandidate,
           })}
           action={
-            <Button
-              size="small"
-              type="primary"
-              onClick={() =>
-                onChange(
-                  "maxOutputTokens",
-                  String(legacyMaxTokensCandidate)
-                )
-              }
-            >
-              {t("model.dialog.capacity.legacyMaxTokens.apply", {
-                defaultValue: "Apply",
+            <Space size={6} wrap>
+              {(contextWindowIsRecommended
+                ? ["context", "output"]
+                : ["output", "context"]
+              ).map((target, idx) => {
+                if (target === "context" && !canApplyAsContextWindow) {
+                  return null;
+                }
+                if (target === "output" && !canApplyAsMaxOutput) {
+                  return null;
+                }
+                const labelKey =
+                  target === "context"
+                    ? "model.dialog.capacity.legacyMaxTokens.applyAsContext"
+                    : "model.dialog.capacity.legacyMaxTokens.applyAsOutput";
+                const fieldName =
+                  target === "context"
+                    ? "contextWindowTokens"
+                    : "maxOutputTokens";
+                return (
+                  <Button
+                    key={target}
+                    size="small"
+                    type={idx === 0 ? "primary" : "default"}
+                    onClick={() =>
+                      onChange(fieldName, String(legacyMaxTokensCandidate))
+                    }
+                  >
+                    {t(labelKey)}
+                  </Button>
+                );
               })}
-            </Button>
+            </Space>
           }
         />
       ) : showDeprecatedMaxTokensWarning ? (
diff --git a/frontend/app/[locale]/models/components/model/ModelEditDialog.tsx b/frontend/app/[locale]/models/components/model/ModelEditDialog.tsx
index c887259e1..e2bd5b1e2 100644
--- a/frontend/app/[locale]/models/components/model/ModelEditDialog.tsx
+++ b/frontend/app/[locale]/models/components/model/ModelEditDialog.tsx
@@ -459,6 +459,18 @@ export const ModelEditDialog = ({
                 : undefined
               : undefined,
           ...(supportsCapacityFields ? buildCapacityPayload(form) : {}),
+          ...(acceptedCapacitySuggestion
+            ? {
+                acceptedSuggestionMatchKind:
+                  acceptedCapacitySuggestion.matchKind,
+                ...(acceptedCapacitySuggestion.capabilityProfileVersion
+                  ? {
+                      acceptedCapabilityProfileVersion:
+                        acceptedCapacitySuggestion.capabilityProfileVersion,
+                    }
+                  : {}),
+              }
+            : {}),
         });
       } else {
         await modelService.updateSingleModel({
@@ -504,6 +516,18 @@ export const ModelEditDialog = ({
               }
             : {}),
           ...(supportsCapacityFields ? buildCapacityPayload(form) : {}),
+          ...(acceptedCapacitySuggestion
+            ? {
+                acceptedSuggestionMatchKind:
+                  acceptedCapacitySuggestion.matchKind,
+                ...(acceptedCapacitySuggestion.capabilityProfileVersion
+                  ? {
+                      acceptedCapabilityProfileVersion:
+                        acceptedCapacitySuggestion.capabilityProfileVersion,
+                    }
+                  : {}),
+              }
+            : {}),
         });
       }
 
@@ -701,17 +725,22 @@ export const ModelEditDialog = ({
                 applyCapacitySuggestion(capacitySuggestion)
               }
               // Legacy max_tokens is now surfaced via the actionable
-              // legacyMaxTokensCandidate prompt (no more silent promote in
-              // capacityFormFromModel). Keep the plain deprecation banner
-              // fallback for the rare case where the record has neither
-              // column populated, so users still see the migration nudge.
+              // legacyMaxTokensCandidate prompt with two-target buttons
+              // (Context Window vs Max Output). The prompt is offered while
+              // EITHER target field is still empty -- ModelCapacityFields
+              // hides individual buttons once that column is filled, and the
+              // whole alert disappears once both are filled. The plain
+              // deprecation banner only kicks in if both targets are filled
+              // but the legacy column still has a value.
               showDeprecatedMaxTokensWarning={
                 Boolean(model.maxTokens) &&
-                !model.maxOutputTokens &&
-                !form.maxOutputTokens
+                Boolean(model.contextWindowTokens || form.contextWindowTokens) &&
+                Boolean(model.maxOutputTokens || form.maxOutputTokens)
               }
               legacyMaxTokensCandidate={
-                model.maxOutputTokens ? undefined : model.maxTokens
+                model.contextWindowTokens && model.maxOutputTokens
+                  ? undefined
+                  : model.maxTokens
               }
             />
           </div>
@@ -1089,11 +1118,11 @@ export const ProviderConfigEditDialog = ({
             // context_window/max_output optional; DEFAULT_* substitute at save.
             showDeprecatedMaxTokensWarning={
               Boolean(initialMaxTokens) &&
-              !initialCapacity?.maxOutputTokens &&
-              !capacityForm.maxOutputTokens
+              Boolean(initialCapacity?.contextWindowTokens || capacityForm.contextWindowTokens) &&
+              Boolean(initialCapacity?.maxOutputTokens || capacityForm.maxOutputTokens)
             }
             legacyMaxTokensCandidate={
-              initialCapacity?.maxOutputTokens
+              initialCapacity?.contextWindowTokens && initialCapacity?.maxOutputTokens
                 ? undefined
                 : initialCapacity?.maxTokens
             }
diff --git a/frontend/app/[locale]/resource-manage/components/resources/ModelCapacityCoverageWidget.tsx b/frontend/app/[locale]/resource-manage/components/resources/ModelCapacityCoverageWidget.tsx
new file mode 100644
index 000000000..783c48667
--- /dev/null
+++ b/frontend/app/[locale]/resource-manage/components/resources/ModelCapacityCoverageWidget.tsx
@@ -0,0 +1,66 @@
+"use client";
+
+import React from "react";
+import { useTranslation } from "react-i18next";
+import { Card, Button, Skeleton, Flex } from "antd";
+import { AlertTriangle } from "lucide-react";
+
+import { useCapacityCoverage } from "@/hooks/model/useCapacityCoverage";
+import { useAuthorizationContext } from "@/components/providers/AuthorizationProvider";
+import { useDeployment } from "@/components/providers/deploymentProvider";
+import { canManageModels } from "@/lib/auth";
+
+interface Props {
+  onViewAll?: () => void;
+}
+
+export default function ModelCapacityCoverageWidget({ onViewAll }: Props) {
+  const { t } = useTranslation("common");
+  const { user } = useAuthorizationContext();
+  const { isSpeedMode } = useDeployment();
+  const visibleToOperator = canManageModels(user?.role, isSpeedMode);
+
+  const { coverage, isLoading } = useCapacityCoverage({
+    enabled: visibleToOperator,
+  });
+
+  if (!visibleToOperator) return null;
+  if (isLoading) {
+    return (
+      <Card size="small" className="mb-3">
+        <Skeleton active paragraph={{ rows: 1 }} title={false} />
+      </Card>
+    );
+  }
+  if (!coverage || coverage.bareCount === 0) return null;
+
+  return (
+    <Card
+      size="small"
+      className="mb-3 border-yellow-200"
+      styles={{ body: { padding: "12px 16px" } }}
+    >
+      <Flex align="center" justify="space-between" gap={12} wrap="wrap">
+        <Flex align="center" gap={10} className="min-w-0 flex-1">
+          <AlertTriangle className="h-5 w-5 text-yellow-600 shrink-0" />
+          <Flex vertical gap={2} className="min-w-0">
+            <span className="text-sm font-medium text-gray-800">
+              {t("dashboard.capacityCoverage.title")}
+            </span>
+            <span className="text-xs text-gray-600">
+              {t("dashboard.capacityCoverage.subtitle", {
+                bareCount: coverage.bareCount,
+                total: coverage.totalLlmVlm,
+              })}
+            </span>
+          </Flex>
+        </Flex>
+        {onViewAll && (
+          <Button size="small" type="link" onClick={onViewAll}>
+            {t("dashboard.capacityCoverage.viewAll")}
+          </Button>
+        )}
+      </Flex>
+    </Card>
+  );
+}
diff --git a/frontend/app/[locale]/resource-manage/components/resources/ModelList.tsx b/frontend/app/[locale]/resource-manage/components/resources/ModelList.tsx
index 6715852f7..11ff72c97 100644
--- a/frontend/app/[locale]/resource-manage/components/resources/ModelList.tsx
+++ b/frontend/app/[locale]/resource-manage/components/resources/ModelList.tsx
@@ -15,6 +15,7 @@ import type { ModelMonitoringItem } from "@/types/monitoring";
 import { MODEL_TYPES } from "@/const/modelConfig";
 import { ModelAddDialog } from "../../../models/components/model/ModelAddDialog";
 import { ModelEditDialog } from "../../../models/components/model/ModelEditDialog";
+import ModelCapacityCoverageWidget from "./ModelCapacityCoverageWidget";
 import { CheckCircle, CircleSlash, XCircle, CircleEllipsis, CircleHelp } from "lucide-react";
 
 interface UnifiedModelRow extends ModelOption {
@@ -361,6 +362,7 @@ export default function ModelList({ tenantId }: { tenantId: string | null }) {
 
   return (
     <div className="flex flex-col h-full overflow-hidden">
+      <ModelCapacityCoverageWidget />
       <div className="flex items-center justify-between mb-4 flex-shrink-0">
         <div className="flex items-center gap-3">
           <Segmented
diff --git a/frontend/hooks/model/useCapacityCoverage.ts b/frontend/hooks/model/useCapacityCoverage.ts
new file mode 100644
index 000000000..54aa247f5
--- /dev/null
+++ b/frontend/hooks/model/useCapacityCoverage.ts
@@ -0,0 +1,52 @@
+import { useMemo } from "react";
+import { useQuery, useQueryClient } from "@tanstack/react-query";
+
+import { modelService } from "@/services/modelService";
+import { CapacityCoverage } from "@/types/modelConfig";
+
+const EMPTY_COVERAGE: CapacityCoverage = {
+  totalLlmVlm: 0,
+  bareCount: 0,
+  bareModels: [],
+};
+
+export function useCapacityCoverage(options?: {
+  enabled?: boolean;
+  staleTime?: number;
+}) {
+  const queryClient = useQueryClient();
+
+  const query = useQuery({
+    queryKey: ["modelCapacityCoverage"],
+    queryFn: async (): Promise<CapacityCoverage> =>
+      modelService.getCapacityCoverage(),
+    staleTime: options?.staleTime ?? 60_000,
+    enabled: options?.enabled ?? true,
+  });
+
+  const coverage = query.data ?? EMPTY_COVERAGE;
+
+  const bareModelIds = useMemo(
+    () => new Set(coverage.bareModels.map((m) => m.modelId)),
+    [coverage]
+  );
+
+  const suggestionAvailableModelIds = useMemo(
+    () =>
+      new Set(
+        coverage.bareModels
+          .filter((m) => m.suggestionAvailable)
+          .map((m) => m.modelId)
+      ),
+    [coverage]
+  );
+
+  return {
+    ...query,
+    coverage,
+    bareModelIds,
+    suggestionAvailableModelIds,
+    invalidate: () =>
+      queryClient.invalidateQueries({ queryKey: ["modelCapacityCoverage"] }),
+  };
+}
diff --git a/frontend/public/locales/en/common.json b/frontend/public/locales/en/common.json
index 2f8eed214..375a80a9e 100644
--- a/frontend/public/locales/en/common.json
+++ b/frontend/public/locales/en/common.json
@@ -852,8 +852,8 @@
   "model.dialog.capacity.error.reserveExceedsOutput": "Output reserve cannot exceed max output tokens.",
   "model.dialog.capacity.error.requiredMissing": "Context window and max input tokens are required.",
   "model.dialog.capacity.deprecatedMaxTokens": "max_tokens is deprecated; use max_output_tokens.",
-  "model.dialog.capacity.legacyMaxTokensDetected": "Detected legacy max_tokens = {{value}}. Apply it as max_output_tokens?",
-  "model.dialog.capacity.legacyMaxTokens.apply": "Apply",
+  "model.dialog.capacity.legacyMaxTokens.applyAsContext": "Fill into Context Window",
+  "model.dialog.capacity.legacyMaxTokens.applyAsOutput": "Fill into Max Output",
   "model.dialog.capacity.source.operator": "Operator",
   "model.dialog.capacity.source.profile": "Profile",
   "model.dialog.capacity.source.provider_candidate": "Provider Candidate",
@@ -876,6 +876,26 @@
   "model.dialog.capacity.suggestion.confidence.medium": "Medium confidence",
   "model.dialog.capacity.suggestion.confidence.low": "Low confidence",
   "model.list.capacityWarning.tag": "Missing capacity",
+  "model.list.capacityWarning.badgeTooltip": "Output token cap is not enforced for this model. Click to fill capacity values now.",
+  "model.list.capacityWarning.tooltipAction": "Open capacity panel",
+  "agent.modelSelector.bareCapacity.subtitle": "Output cap not enforced — configure capacity in Model Management.",
+  "agent.modelSelector.bareCapacity.formNotice": "The selected model \"{{modelName}}\" has no capacity configured. The agent will run, but output-token enforcement and budget consistency checks are off until capacity is set in Model Management.",
+  "agent.modelSelector.bareCapacity.formNoticeNoPermission": "The selected model \"{{modelName}}\" has no capacity configured. The agent will run, but output-token enforcement is off. Ask a model administrator to configure capacity for this model.",
+  "dashboard.capacityCoverage.title": "Model capacity coverage",
+  "dashboard.capacityCoverage.subtitle": "{{bareCount}} of {{total}} LLM/VLM models are missing capacity values. Output-token enforcement is off for those models.",
+  "dashboard.capacityCoverage.viewAll": "View all",
+  "model.dialog.capacity.suggestion.matchExact": "Catalog exact match",
+  "model.dialog.capacity.suggestion.matchFuzzy": "Catalog fuzzy match",
+  "model.dialog.capacity.suggestion.matchProviderDiscovery": "Provider-discovered capacity",
+  "model.dialog.capacity.suggestion.useSuggestion": "Use suggestion",
+  "model.dialog.capacity.suggestion.canonicalName": "Canonical model name: {{name}}",
+  "model.dialog.capacity.suggestion.candidateWarning": "Multiple candidates matched; the saved canonical name may need review.",
+  "model.dialog.capacity.suggestion.profileMissWarning": "Saving without the canonical model name means runtime may not claim profile capacity until W1 exact lookup succeeds.",
+  "model.dialog.capacity.suggestion.toggle": "Suggest capacity",
+  "model.dialog.capacity.preset.custom": "Custom value",
+  "model.dialog.capacity.preset.contextWindow": "Context window preset",
+  "model.dialog.capacity.preset.outputReserve": "Output reserve preset",
+  "model.dialog.capacity.legacyMaxTokensHint": "Legacy max_tokens is {{maxTokens}}. This value may be either the model's context window (typical for 16K and above) or the per-call max output cap (typical for 4K and below). Pick the field that matches the model's actual capability and save.",
   "model.dialog.capacity.batchDefault.title": "Batch default capacity",
   "model.dialog.capacity.batchDefault.hint": "Values entered here apply as the default capacity for every LLM/VLM model in this batch import. Click the gear icon on a row to override a specific model.",
   "model.dialog.batch.requireRowCapacity": "Some enabled rows are missing context window or max output tokens. Open the gear icon to fill them in before confirming.",
diff --git a/frontend/public/locales/zh/common.json b/frontend/public/locales/zh/common.json
index bcf6bcc54..230a731c3 100644
--- a/frontend/public/locales/zh/common.json
+++ b/frontend/public/locales/zh/common.json
@@ -823,8 +823,8 @@
   "model.dialog.capacity.error.reserveExceedsOutput": "输出预留Token数不能超过最大输出Token数。",
   "model.dialog.capacity.error.requiredMissing": "上下文窗口和最大输入Token数为必填项。",
   "model.dialog.capacity.deprecatedMaxTokens": "max_tokens 已废弃，请使用 max_output_tokens。",
-  "model.dialog.capacity.legacyMaxTokensDetected": "检测到旧的「最大Tokens数」为 {{value}}，是否填入最大输出Token数？",
-  "model.dialog.capacity.legacyMaxTokens.apply": "应用",
+  "model.dialog.capacity.legacyMaxTokens.applyAsContext": "填入「上下文窗口」",
+  "model.dialog.capacity.legacyMaxTokens.applyAsOutput": "填入「最大输出」",
   "model.dialog.capacity.source.operator": "人工配置",
   "model.dialog.capacity.source.profile": "能力档案",
   "model.dialog.capacity.source.provider_candidate": "供应商候选",
@@ -847,6 +847,26 @@
   "model.dialog.capacity.suggestion.confidence.medium": "中置信度",
   "model.dialog.capacity.suggestion.confidence.low": "低置信度",
   "model.list.capacityWarning.tag": "缺容量",
+  "model.list.capacityWarning.badgeTooltip": "该模型未启用输出 Token 限额。点击此处补全容量配置。",
+  "model.list.capacityWarning.tooltipAction": "打开容量面板",
+  "agent.modelSelector.bareCapacity.subtitle": "输出限额未启用 — 请在模型管理中补全容量。",
+  "agent.modelSelector.bareCapacity.formNotice": "所选模型 \"{{modelName}}\" 尚未配置容量。Agent 可正常运行，但输出 Token 限额和预算一致性校验关闭，直到在「模型管理」中补全容量。",
+  "agent.modelSelector.bareCapacity.formNoticeNoPermission": "所选模型 \"{{modelName}}\" 尚未配置容量。Agent 可正常运行，但输出 Token 限额关闭。请联系模型管理员补全该模型的容量配置。",
+  "dashboard.capacityCoverage.title": "模型容量覆盖",
+  "dashboard.capacityCoverage.subtitle": "{{total}} 个 LLM/VLM 模型中有 {{bareCount}} 个缺少容量字段。这些模型的输出 Token 限额未启用。",
+  "dashboard.capacityCoverage.viewAll": "查看全部",
+  "model.dialog.capacity.suggestion.matchExact": "目录精确匹配",
+  "model.dialog.capacity.suggestion.matchFuzzy": "目录模糊匹配",
+  "model.dialog.capacity.suggestion.matchProviderDiscovery": "供应商发现",
+  "model.dialog.capacity.suggestion.useSuggestion": "使用建议值",
+  "model.dialog.capacity.suggestion.canonicalName": "规范化模型名：{{name}}",
+  "model.dialog.capacity.suggestion.candidateWarning": "存在多个候选匹配，保存前请确认规范化名称。",
+  "model.dialog.capacity.suggestion.profileMissWarning": "未保存规范化模型名时，运行期可能无法命中能力档案，直到 W1 精确查找命中。",
+  "model.dialog.capacity.suggestion.toggle": "建议容量",
+  "model.dialog.capacity.preset.custom": "自定义值",
+  "model.dialog.capacity.preset.contextWindow": "上下文窗口预设",
+  "model.dialog.capacity.preset.outputReserve": "输出预留预设",
+  "model.dialog.capacity.legacyMaxTokensHint": "历史 max_tokens 为 {{maxTokens}}。该值可能是模型的上下文窗口（16K 及以上常见），也可能是单次最大输出（4K 及以下常见）。请根据模型实际能力选择填入哪个字段。",
   "model.dialog.capacity.batchDefault.title": "批量默认容量",
   "model.dialog.capacity.batchDefault.hint": "此处填写的数值将作为本次批量导入所有 LLM/VLM 模型的默认容量。如需为某个模型单独设置，请点击对应行的⚙图标覆盖。",
   "model.dialog.batch.requireRowCapacity": "存在已打开开关的模型缺少上下文窗口或最大输出Token数，请点击对应行的⚙图标补全后再确认。",
diff --git a/frontend/services/modelService.ts b/frontend/services/modelService.ts
index d054a9274..66246cb81 100644
--- a/frontend/services/modelService.ts
+++ b/frontend/services/modelService.ts
@@ -43,6 +43,8 @@ const buildCapacityRequestBody = (model: {
   defaultOutputReserveTokens?: number;
   tokenizerFamily?: string;
   capacitySource?: string;
+  acceptedSuggestionMatchKind?: string;
+  acceptedCapabilityProfileVersion?: string;
 }) => ({
   ...(model.contextWindowTokens !== undefined
     ? { context_window_tokens: model.contextWindowTokens }
@@ -62,6 +64,18 @@ const buildCapacityRequestBody = (model: {
   ...(model.capacitySource !== undefined
     ? { capacity_source: model.capacitySource }
     : {}),
+  // W11 accept-signal: audit-only fields the app layer pops before the
+  // service write so model_capacity_suggestion_accept_total can count
+  // accepted catalog matches.
+  ...(model.acceptedSuggestionMatchKind !== undefined
+    ? { accepted_suggestion_match_kind: model.acceptedSuggestionMatchKind }
+    : {}),
+  ...(model.acceptedCapabilityProfileVersion !== undefined
+    ? {
+        accepted_capability_profile_version:
+          model.acceptedCapabilityProfileVersion,
+      }
+    : {}),
 });
 
 const mapCapacitySuggestionFromApi = (
@@ -198,6 +212,8 @@ export const modelService = {
     defaultOutputReserveTokens?: number;
     tokenizerFamily?: string;
     capacitySource?: string;
+    acceptedSuggestionMatchKind?: string;
+    acceptedCapabilityProfileVersion?: string;
   }): Promise<void> => {
     try {
       const requestBody: any = {
@@ -420,6 +436,8 @@ export const modelService = {
     defaultOutputReserveTokens?: number;
     tokenizerFamily?: string;
     capacitySource?: string;
+    acceptedSuggestionMatchKind?: string;
+    acceptedCapabilityProfileVersion?: string;
   }): Promise<void> => {
     try {
       const response = await fetch(
@@ -923,6 +941,8 @@ export const modelService = {
     defaultOutputReserveTokens?: number;
     tokenizerFamily?: string;
     capacitySource?: string;
+    acceptedSuggestionMatchKind?: string;
+    acceptedCapabilityProfileVersion?: string;
   }): Promise<void> => {
     try {
       const requestBody: any = {
@@ -1005,6 +1025,8 @@ export const modelService = {
     defaultOutputReserveTokens?: number;
     tokenizerFamily?: string;
     capacitySource?: string;
+    acceptedSuggestionMatchKind?: string;
+    acceptedCapabilityProfileVersion?: string;
   }): Promise<void> => {
     try {
       const response = await fetch(
diff --git a/test/backend/app/test_model_managment_app.py b/test/backend/app/test_model_managment_app.py
index cbdc04c15..38300e988 100644
--- a/test/backend/app/test_model_managment_app.py
+++ b/test/backend/app/test_model_managment_app.py
@@ -290,6 +290,66 @@ async def _create(*args, **kwargs):
     mock_create.assert_called_once()
 
 
+@pytest.mark.asyncio
+async def test_create_model_records_accept_signal_when_present(client, auth_header, user_credentials, sample_model_data, mocker):
+    """End-to-end SLO data-flow check: when the frontend ships the W11 accept
+    signal on a successful save, the app layer must (1) strip the audit-only
+    fields before the DB write, and (2) call the metric recorder so
+    model_capacity_suggestion_accept_total increments. Spec L709-710.
+    """
+    mocker.patch('backend.apps.model_managment_app.get_current_user_id', return_value=user_credentials)
+
+    async def _create(*args, **kwargs):
+        return None
+
+    mock_create = mocker.patch('backend.apps.model_managment_app.create_model_for_tenant', side_effect=_create)
+    mock_record = mocker.patch('backend.apps.model_managment_app._record_capacity_suggestion_accept')
+
+    payload = {
+        **sample_model_data,
+        "context_window_tokens": 128000,
+        "max_output_tokens": 16384,
+        "capacity_source": "operator",
+        "accepted_suggestion_match_kind": "catalog_exact",
+        "accepted_capability_profile_version": "openai/gpt-4o@1",
+    }
+    response = client.post("/model/create", json=payload, headers=auth_header)
+
+    assert response.status_code == HTTPStatus.OK
+
+    # Audit fields must NOT reach the service layer.
+    create_args = mock_create.await_args
+    sent = create_args.args[2]
+    assert "accepted_suggestion_match_kind" not in sent
+    assert "accepted_capability_profile_version" not in sent
+    # Real capacity fields ARE forwarded.
+    assert sent["context_window_tokens"] == 128000
+    assert sent["max_output_tokens"] == 16384
+
+    # Metric recorder called with the labels the SLO dashboard expects.
+    mock_record.assert_called_once_with("catalog_exact", payload["provider"])
+
+
+@pytest.mark.asyncio
+async def test_create_model_skips_accept_recorder_without_match_kind(client, auth_header, user_credentials, sample_model_data, mocker):
+    """Ordinary saves (no Use-suggestion click) must NOT fire the recorder.
+    Otherwise accept_total inflates and the SLO ratio against
+    dispatch_profile_hit_total becomes meaningless.
+    """
+    mocker.patch('backend.apps.model_managment_app.get_current_user_id', return_value=user_credentials)
+
+    async def _create(*args, **kwargs):
+        return None
+
+    mocker.patch('backend.apps.model_managment_app.create_model_for_tenant', side_effect=_create)
+    mock_record = mocker.patch('backend.apps.model_managment_app._record_capacity_suggestion_accept')
+
+    response = client.post("/model/create", json=sample_model_data, headers=auth_header)
+
+    assert response.status_code == HTTPStatus.OK
+    mock_record.assert_not_called()
+
+
 @pytest.mark.asyncio
 async def test_create_model_conflict(client, auth_header, user_credentials, sample_model_data, mocker):
     """Test model creation with name conflict."""
diff --git a/test/backend/services/test_model_provider_service.py b/test/backend/services/test_model_provider_service.py
index b88cb38a3..9828246c5 100644
--- a/test/backend/services/test_model_provider_service.py
+++ b/test/backend/services/test_model_provider_service.py
@@ -574,11 +574,12 @@ async def test_prepare_model_dict_persists_operator_capacity():
             "model_type": "llm",
             "max_tokens": 31920,
             "context_window_tokens": 200000,
-            "max_input_tokens": None,
+            "max_input_tokens": 180000,
             "max_output_tokens": 31920,
             "default_output_reserve_tokens": 4096,
             "tokenizer_family": "qwen",
             "capacity_source": "operator",
+            "capability_profile_version": "dashscope/glm-5.2@1",
         }
 
         await prepare_model_dict(
@@ -589,15 +590,25 @@ async def test_prepare_model_dict_persists_operator_capacity():
         )
 
         _, kwargs = mock_model_request.call_args
+        # W11 spec L721-727: pin every capacity field the constructor must
+        # thread for the accepted-suggestion save path. Missing any of these
+        # silently drops the field on the DB row and reproduces CM-031.
         assert kwargs["context_window_tokens"] == 200000
+        assert kwargs["max_input_tokens"] == 180000
         assert kwargs["max_output_tokens"] == 31920
         assert kwargs["default_output_reserve_tokens"] == 4096
         assert kwargs["tokenizer_family"] == "qwen"
+        assert kwargs["capability_profile_version"] == "dashscope/glm-5.2@1"
         # capacity_source is forced to "operator" by the prepare_model_dict
         # contract: only operator-marked values reach the row, and the
         # marker itself is normalized to the canonical value rather than
         # echoing whatever the caller sent.
         assert kwargs["capacity_source"] == "operator"
+        # Canonical provider/model values land via constructor kwargs too,
+        # so model_factory + model_name are pinned to catch regressions
+        # in split_repo_name plumbing.
+        assert kwargs["model_factory"] == "dashscope"
+        assert kwargs["model_name"] == "glm-5.2"
 
 
 @pytest.mark.asyncio
diff --git a/test/backend/test_model_consts.py b/test/backend/test_model_consts.py
index 78e77ce77..bf3874dce 100644
--- a/test/backend/test_model_consts.py
+++ b/test/backend/test_model_consts.py
@@ -28,3 +28,53 @@ def test_model_request_and_validation():
     assert req.filename == "f"
 
 
+def test_model_request_threads_w11_capacity_and_accept_fields():
+    """W11 spec L721-727 + L500-502: ModelRequest must carry every capacity
+    column the save handler can persist AND the audit-only accept-signal
+    fields shipped by the frontend after a "Use suggestion" save. Pinning the
+    field set here prevents a silent rename from dropping a column on the
+    DB row or breaking the accept counter.
+    """
+    fields = set(model_consts.ModelRequest.model_fields.keys())
+    required = {
+        # W1/W2 capacity columns (persisted)
+        "context_window_tokens",
+        "max_input_tokens",
+        "max_output_tokens",
+        "default_output_reserve_tokens",
+        "tokenizer_family",
+        "capacity_source",
+        "capability_profile_version",
+        # Canonical provider/model values
+        "model_factory",
+        "model_name",
+        # Accept-signal audit fields (wire-only, stripped by app layer)
+        "accepted_suggestion_match_kind",
+        "accepted_capability_profile_version",
+    }
+    missing = required - fields
+    assert not missing, f"ModelRequest missing W11 fields: {missing}"
+
+
+def test_capacity_suggestion_response_has_required_fields():
+    """Pin ModelCapacitySuggestionResponse schema so a downstream rename
+    (e.g. suggested_provider -> canonical_provider) trips a test instead
+    of silently dropping the field from the API contract.
+    """
+    fields = set(model_consts.ModelCapacitySuggestionResponse.model_fields.keys())
+    required = {
+        "suggestions",
+        "match_kind",
+        "match_confidence",
+        "match_explanation",
+        "suggested_provider",
+        "canonical_model_name",
+        "capability_profile_version",
+        "capacity_source_on_accept",
+    }
+    missing = required - fields
+    assert not missing, (
+        f"ModelCapacitySuggestionResponse missing W11 fields: {missing}"
+    )
+
+

From 775b0c849b92d70d04215ed6190271cd7fa3bf62 Mon Sep 17 00:00:00 2001
From: wuyuanfr <18270469842@163.com>
Date: Fri, 26 Jun 2026 16:36:41 +0800
Subject: [PATCH 07/31] =?UTF-8?q?fix(w11):=20compact=20bare-capacity=20UI?=
 =?UTF-8?q?=20=E2=80=94=20icon+tooltip=20in=20model=20selector,=20vertical?=
 =?UTF-8?q?=20layout=20for=20legacy=20hint?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Agent model selector: replace inline yellow subtitle with TriangleAlert
  icon + hover tooltip to reduce visual clutter in dropdown options
- ModelCapacityFields: switch legacy max_tokens Alert from action prop
  (horizontal) to description prop (vertical) so hint text stacks above
  apply buttons within the same alert box
- Add i18n key agent.modelSelector.bareCapacity.tooltip (zh/en)
---
 .../agentInfo/AgentGenerateDetail.tsx         | 19 ++++++++++---------
 .../components/model/ModelCapacityFields.tsx  |  4 ++--
 frontend/public/locales/en/common.json        |  1 +
 frontend/public/locales/zh/common.json        |  1 +
 4 files changed, 14 insertions(+), 11 deletions(-)

diff --git a/frontend/app/[locale]/agents/components/agentInfo/AgentGenerateDetail.tsx b/frontend/app/[locale]/agents/components/agentInfo/AgentGenerateDetail.tsx
index cecc02d2b..b016f530c 100644
--- a/frontend/app/[locale]/agents/components/agentInfo/AgentGenerateDetail.tsx
+++ b/frontend/app/[locale]/agents/components/agentInfo/AgentGenerateDetail.tsx
@@ -14,9 +14,10 @@ import {
   Card,
   App,
   Alert,
+  Tooltip,
 } from "antd";
 import { Tabs, TabsList, TabsTrigger, TabsContent } from "@/components/ui/tabs";
-import { Zap, Maximize2, Settings2, Sparkles } from "lucide-react";
+import { Zap, Maximize2, Settings2, Sparkles, TriangleAlert } from "lucide-react";
 import { Textarea } from "@/components/ui/textarea";
 
 import {
@@ -550,11 +551,11 @@ export default function AgentGenerateDetail({}) {
     return {
       value: displayLabel,
       label: isBare ? (
-        <Flex vertical gap={0}>
+        <Flex align="center" gap={6}>
           <span>{displayLabel}</span>
-          <span className="text-[11px] text-yellow-700">
-            {t("agent.modelSelector.bareCapacity.subtitle")}
-          </span>
+          <Tooltip title={t("agent.modelSelector.bareCapacity.tooltip")}>
+            <TriangleAlert size={14} className="text-yellow-500 shrink-0" />
+          </Tooltip>
         </Flex>
       ) : (
         displayLabel
@@ -950,11 +951,11 @@ export default function AgentGenerateDetail({}) {
                                     disabled={model.connect_status !== "available"}
                                   >
                                     {isBare ? (
-                                      <Flex vertical gap={0}>
+                                      <Flex align="center" gap={6}>
                                         <span>{model.displayName}</span>
-                                        <span className="text-[11px] text-yellow-700">
-                                          {t("agent.modelSelector.bareCapacity.subtitle")}
-                                        </span>
+                                        <Tooltip title={t("agent.modelSelector.bareCapacity.tooltip")}>
+                                          <TriangleAlert size={14} className="text-yellow-500 shrink-0" />
+                                        </Tooltip>
                                       </Flex>
                                     ) : (
                                       model.displayName
diff --git a/frontend/app/[locale]/models/components/model/ModelCapacityFields.tsx b/frontend/app/[locale]/models/components/model/ModelCapacityFields.tsx
index 712ab62fd..b5b2bf15c 100644
--- a/frontend/app/[locale]/models/components/model/ModelCapacityFields.tsx
+++ b/frontend/app/[locale]/models/components/model/ModelCapacityFields.tsx
@@ -412,8 +412,8 @@ export const ModelCapacityFields = ({
           message={t("model.dialog.capacity.legacyMaxTokensHint", {
             maxTokens: legacyMaxTokensCandidate,
           })}
-          action={
-            <Space size={6} wrap>
+          description={
+            <Space size={6} wrap className="mt-2">
               {(contextWindowIsRecommended
                 ? ["context", "output"]
                 : ["output", "context"]
diff --git a/frontend/public/locales/en/common.json b/frontend/public/locales/en/common.json
index 375a80a9e..f521a92f2 100644
--- a/frontend/public/locales/en/common.json
+++ b/frontend/public/locales/en/common.json
@@ -879,6 +879,7 @@
   "model.list.capacityWarning.badgeTooltip": "Output token cap is not enforced for this model. Click to fill capacity values now.",
   "model.list.capacityWarning.tooltipAction": "Open capacity panel",
   "agent.modelSelector.bareCapacity.subtitle": "Output cap not enforced — configure capacity in Model Management.",
+  "agent.modelSelector.bareCapacity.tooltip": "Model capacity limits not configured — please configure in Model Management",
   "agent.modelSelector.bareCapacity.formNotice": "The selected model \"{{modelName}}\" has no capacity configured. The agent will run, but output-token enforcement and budget consistency checks are off until capacity is set in Model Management.",
   "agent.modelSelector.bareCapacity.formNoticeNoPermission": "The selected model \"{{modelName}}\" has no capacity configured. The agent will run, but output-token enforcement is off. Ask a model administrator to configure capacity for this model.",
   "dashboard.capacityCoverage.title": "Model capacity coverage",
diff --git a/frontend/public/locales/zh/common.json b/frontend/public/locales/zh/common.json
index 230a731c3..a1613e658 100644
--- a/frontend/public/locales/zh/common.json
+++ b/frontend/public/locales/zh/common.json
@@ -850,6 +850,7 @@
   "model.list.capacityWarning.badgeTooltip": "该模型未启用输出 Token 限额。点击此处补全容量配置。",
   "model.list.capacityWarning.tooltipAction": "打开容量面板",
   "agent.modelSelector.bareCapacity.subtitle": "输出限额未启用 — 请在模型管理中补全容量。",
+  "agent.modelSelector.bareCapacity.tooltip": "模型容量配置缺少限额，请到模型管理中补全容量",
   "agent.modelSelector.bareCapacity.formNotice": "所选模型 \"{{modelName}}\" 尚未配置容量。Agent 可正常运行，但输出 Token 限额和预算一致性校验关闭，直到在「模型管理」中补全容量。",
   "agent.modelSelector.bareCapacity.formNoticeNoPermission": "所选模型 \"{{modelName}}\" 尚未配置容量。Agent 可正常运行，但输出 Token 限额关闭。请联系模型管理员补全该模型的容量配置。",
   "dashboard.capacityCoverage.title": "模型容量覆盖",

From d6165cb4cac6c21ceaac3ea42673534263d364c3 Mon Sep 17 00:00:00 2001
From: wuyuanfr <18270469842@163.com>
Date: Fri, 26 Jun 2026 17:14:19 +0800
Subject: [PATCH 08/31] =?UTF-8?q?fix(w11):=20close=20remaining=20spec=20ga?=
 =?UTF-8?q?ps=20=E2=80=94=20bare-capacity=20badge=20in=20model=20list=20ta?=
 =?UTF-8?q?ble=20+=20fuzzy=20canonicalization=20warning?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Gap 1 — Model Management list page badge:
- ModelList.tsx: add useCapacityCoverage hook + TriangleAlert badge in
  the Name column for bare-capacity LLM/VLM rows
- Badge shows yellow warning icon inline with model name
- Hover tooltip explains enforcement is off; click opens ModelEditDialog
  (which auto-fires capacity suggestion for bare models)

Gap 2 — Fuzzy canonicalization warning:
- ModelCapacityFields.tsx: add acceptedSuggestion prop; render
  profileMissWarning text when catalog_fuzzy suggestion is shown but
  the user hasn't accepted the canonical model name
- ModelAddDialog.tsx + ModelEditDialog.tsx: pass acceptedCapacitySuggestion
  through to ModelCapacityFields
---
 .../components/model/ModelAddDialog.tsx       |  1 +
 .../components/model/ModelCapacityFields.tsx  | 12 +++++++++
 .../components/model/ModelEditDialog.tsx      |  1 +
 .../components/resources/ModelList.tsx        | 26 ++++++++++++++++++-
 4 files changed, 39 insertions(+), 1 deletion(-)

diff --git a/frontend/app/[locale]/models/components/model/ModelAddDialog.tsx b/frontend/app/[locale]/models/components/model/ModelAddDialog.tsx
index 9473bf6f6..094ed4391 100644
--- a/frontend/app/[locale]/models/components/model/ModelAddDialog.tsx
+++ b/frontend/app/[locale]/models/components/model/ModelAddDialog.tsx
@@ -1877,6 +1877,7 @@ export const ModelAddDialog = ({
               onUseSuggestion={() =>
                 applyCapacitySuggestion(capacitySuggestion)
               }
+              acceptedSuggestion={acceptedCapacitySuggestion}
             />
           </div>
         )}
diff --git a/frontend/app/[locale]/models/components/model/ModelCapacityFields.tsx b/frontend/app/[locale]/models/components/model/ModelCapacityFields.tsx
index b5b2bf15c..a2b8f8dca 100644
--- a/frontend/app/[locale]/models/components/model/ModelCapacityFields.tsx
+++ b/frontend/app/[locale]/models/components/model/ModelCapacityFields.tsx
@@ -89,6 +89,8 @@ interface ModelCapacityFieldsProps {
    * `applyDefaults` option -- callers should pass matching booleans.
    */
   applyDefaultsOnEmpty?: boolean;
+  /** Currently accepted suggestion, used to detect fuzzy canonicalization mismatch */
+  acceptedSuggestion?: CapacitySuggestion | null;
 }
 
 const SOURCE_COLORS: Record<string, string> = {
@@ -278,6 +280,7 @@ export const ModelCapacityFields = ({
   suggestionLoading = false,
   legacyMaxTokensCandidate,
   applyDefaultsOnEmpty = true,
+  acceptedSuggestion,
 }: ModelCapacityFieldsProps) => {
   const { t } = useTranslation();
 
@@ -495,6 +498,15 @@ export const ModelCapacityFields = ({
                   {suggestion.suggestedProvider && (
                     <Tag color="purple">{suggestion.suggestedProvider}</Tag>
                   )}
+                  {suggestion.matchKind === "catalog_fuzzy" &&
+                    (!acceptedSuggestion ||
+                      (acceptedSuggestion &&
+                        acceptedSuggestion.canonicalModelName !==
+                          suggestion.canonicalModelName)) && (
+                      <div className="text-xs text-yellow-700 mt-1">
+                        {t("model.dialog.capacity.suggestion.profileMissWarning")}
+                      </div>
+                    )}
                   {onUseSuggestion && (
                     <Button
                       size="small"
diff --git a/frontend/app/[locale]/models/components/model/ModelEditDialog.tsx b/frontend/app/[locale]/models/components/model/ModelEditDialog.tsx
index e2bd5b1e2..3d31e21e8 100644
--- a/frontend/app/[locale]/models/components/model/ModelEditDialog.tsx
+++ b/frontend/app/[locale]/models/components/model/ModelEditDialog.tsx
@@ -724,6 +724,7 @@ export const ModelEditDialog = ({
               onUseSuggestion={() =>
                 applyCapacitySuggestion(capacitySuggestion)
               }
+              acceptedSuggestion={acceptedCapacitySuggestion}
               // Legacy max_tokens is now surfaced via the actionable
               // legacyMaxTokensCandidate prompt with two-target buttons
               // (Context Window vs Max Output). The prompt is offered while
diff --git a/frontend/app/[locale]/resource-manage/components/resources/ModelList.tsx b/frontend/app/[locale]/resource-manage/components/resources/ModelList.tsx
index 11ff72c97..fe6f3d984 100644
--- a/frontend/app/[locale]/resource-manage/components/resources/ModelList.tsx
+++ b/frontend/app/[locale]/resource-manage/components/resources/ModelList.tsx
@@ -3,11 +3,12 @@
 import React, { useState, useMemo } from "react";
 import { useTranslation } from "react-i18next";
 import { Table, Button, Popconfirm, message, Tag, Segmented, Tooltip } from "antd";
-import { Edit, Trash2, RefreshCw } from "lucide-react";
+import { Edit, Trash2, RefreshCw, TriangleAlert } from "lucide-react";
 import { ColumnsType } from "antd/es/table";
 import type { TablePaginationConfig } from "antd";
 import { FilterValue, SorterResult } from "antd/es/table/interface";
 import { useManageTenantModels } from "@/hooks/model/useManageTenantModels";
+import { useCapacityCoverage } from "@/hooks/model/useCapacityCoverage";
 import { useMonitoringData, type TimeRange } from "@/hooks/useMonitoringData";
 import { modelService } from "@/services/modelService";
 import { type ModelOption, type ModelType } from "@/types/modelConfig";
@@ -30,6 +31,8 @@ interface UnifiedModelRow extends ModelOption {
 export default function ModelList({ tenantId }: { tenantId: string | null }) {
   const { t } = useTranslation("common");
 
+  const { bareModelIds } = useCapacityCoverage();
+
   const [page, setPage] = useState(1);
   const [pageSize, setPageSize] = useState(10);
 
@@ -222,6 +225,27 @@ export default function ModelList({ tenantId }: { tenantId: string | null }) {
       key: "displayName",
       width: 180,
       ellipsis: true,
+      render: (displayName: string, record: UnifiedModelRow) => {
+        const isBareCapacity = record.id && bareModelIds.has(record.id) && (record.type === 'llm' || record.type === 'vlm');
+        
+        return (
+          <div className="flex items-center">
+            <span className="truncate">{displayName}</span>
+            {isBareCapacity && (
+              <Tooltip title={t("model.list.capacityWarning.badgeTooltip")}>
+                <TriangleAlert
+                  size={14}
+                  className="text-yellow-500 cursor-pointer ml-1.5 shrink-0"
+                  onClick={(e) => {
+                    e.stopPropagation();
+                    openEdit(record);
+                  }}
+                />
+              </Tooltip>
+            )}
+          </div>
+        );
+      },
     },
     {
       title: t("common.type"),

From f65f859e464ab29d3ddfa30e6be66a5673a5db61 Mon Sep 17 00:00:00 2001
From: wuyuanfr <18270469842@163.com>
Date: Fri, 26 Jun 2026 17:37:04 +0800
Subject: [PATCH 09/31] fix(w11): remove obsolete deprecatedMaxTokens warning
 from ModelEditDialog

buildCapacityPayload mirrors max_output_tokens into the legacy max_tokens
column on every save, so a populated max_tokens is expected behavior, not
a deprecation signal. The showDeprecatedMaxTokensWarning condition was
always true for any model that went through the W11 save path, producing
a misleading warning for every edit.

Remove: showDeprecatedMaxTokensWarning prop, rendering branch, and the
deprecatedMaxTokens i18n keys from both locales.
---
 .../models/components/model/ModelCapacityFields.tsx    |  8 --------
 .../models/components/model/ModelEditDialog.tsx        | 10 ----------
 frontend/public/locales/en/common.json                 |  1 -
 frontend/public/locales/zh/common.json                 |  1 -
 4 files changed, 20 deletions(-)

diff --git a/frontend/app/[locale]/models/components/model/ModelCapacityFields.tsx b/frontend/app/[locale]/models/components/model/ModelCapacityFields.tsx
index a2b8f8dca..f2d82b746 100644
--- a/frontend/app/[locale]/models/components/model/ModelCapacityFields.tsx
+++ b/frontend/app/[locale]/models/components/model/ModelCapacityFields.tsx
@@ -60,7 +60,6 @@ interface ModelCapacityFieldsProps {
   validationError?: string | null;
   capacitySource?: CapacitySource | null;
   capabilityProfileVersion?: string | null;
-  showDeprecatedMaxTokensWarning?: boolean;
   /**
    * 'add' shows a flat panel with the four user-facing fields
    * (context_window, max_input, max_output, tokenizer) and supports required
@@ -272,7 +271,6 @@ export const ModelCapacityFields = ({
   validationError,
   capacitySource,
   capabilityProfileVersion,
-  showDeprecatedMaxTokensWarning,
   formMode = "edit",
   requiredFields = [],
   suggestion,
@@ -451,12 +449,6 @@ export const ModelCapacityFields = ({
             </Space>
           }
         />
-      ) : showDeprecatedMaxTokensWarning ? (
-        <Alert
-          type="warning"
-          showIcon
-          message={t("model.dialog.capacity.deprecatedMaxTokens")}
-        />
       ) : null}
 
       {suggestion && (
diff --git a/frontend/app/[locale]/models/components/model/ModelEditDialog.tsx b/frontend/app/[locale]/models/components/model/ModelEditDialog.tsx
index 3d31e21e8..5079ba3a5 100644
--- a/frontend/app/[locale]/models/components/model/ModelEditDialog.tsx
+++ b/frontend/app/[locale]/models/components/model/ModelEditDialog.tsx
@@ -733,11 +733,6 @@ export const ModelEditDialog = ({
               // whole alert disappears once both are filled. The plain
               // deprecation banner only kicks in if both targets are filled
               // but the legacy column still has a value.
-              showDeprecatedMaxTokensWarning={
-                Boolean(model.maxTokens) &&
-                Boolean(model.contextWindowTokens || form.contextWindowTokens) &&
-                Boolean(model.maxOutputTokens || form.maxOutputTokens)
-              }
               legacyMaxTokensCandidate={
                 model.contextWindowTokens && model.maxOutputTokens
                   ? undefined
@@ -1117,11 +1112,6 @@ export const ProviderConfigEditDialog = ({
             capacitySource={initialCapacity?.capacitySource}
             capabilityProfileVersion={initialCapacity?.capabilityProfileVersion}
             // context_window/max_output optional; DEFAULT_* substitute at save.
-            showDeprecatedMaxTokensWarning={
-              Boolean(initialMaxTokens) &&
-              Boolean(initialCapacity?.contextWindowTokens || capacityForm.contextWindowTokens) &&
-              Boolean(initialCapacity?.maxOutputTokens || capacityForm.maxOutputTokens)
-            }
             legacyMaxTokensCandidate={
               initialCapacity?.contextWindowTokens && initialCapacity?.maxOutputTokens
                 ? undefined
diff --git a/frontend/public/locales/en/common.json b/frontend/public/locales/en/common.json
index f521a92f2..a55a14cbe 100644
--- a/frontend/public/locales/en/common.json
+++ b/frontend/public/locales/en/common.json
@@ -851,7 +851,6 @@
   "model.dialog.capacity.error.inputExceedsWindow": "Max input tokens cannot exceed the context window (any excess is silently clipped, so please adjust the value directly).",
   "model.dialog.capacity.error.reserveExceedsOutput": "Output reserve cannot exceed max output tokens.",
   "model.dialog.capacity.error.requiredMissing": "Context window and max input tokens are required.",
-  "model.dialog.capacity.deprecatedMaxTokens": "max_tokens is deprecated; use max_output_tokens.",
   "model.dialog.capacity.legacyMaxTokens.applyAsContext": "Fill into Context Window",
   "model.dialog.capacity.legacyMaxTokens.applyAsOutput": "Fill into Max Output",
   "model.dialog.capacity.source.operator": "Operator",
diff --git a/frontend/public/locales/zh/common.json b/frontend/public/locales/zh/common.json
index a1613e658..770c7457e 100644
--- a/frontend/public/locales/zh/common.json
+++ b/frontend/public/locales/zh/common.json
@@ -822,7 +822,6 @@
   "model.dialog.capacity.error.inputExceedsWindow": "最大输入Token数不能超过上下文窗口（超出部分会被自动忽略，请直接调整数值）。",
   "model.dialog.capacity.error.reserveExceedsOutput": "输出预留Token数不能超过最大输出Token数。",
   "model.dialog.capacity.error.requiredMissing": "上下文窗口和最大输入Token数为必填项。",
-  "model.dialog.capacity.deprecatedMaxTokens": "max_tokens 已废弃，请使用 max_output_tokens。",
   "model.dialog.capacity.legacyMaxTokens.applyAsContext": "填入「上下文窗口」",
   "model.dialog.capacity.legacyMaxTokens.applyAsOutput": "填入「最大输出」",
   "model.dialog.capacity.source.operator": "人工配置",

From 04b4bc06e2cd9ecc294d04b01d8a2f8a3b2cbc21 Mon Sep 17 00:00:00 2001
From: wuyuanfr <18270469842@163.com>
Date: Sat, 27 Jun 2026 09:49:37 +0800
Subject: [PATCH 10/31] fix(w11): backfill bare LLM/VLM rows with safe capacity
 defaults

The catalog backfill (v2.2.0_0617) only covers exact (model_factory,
model_name) matches. Rows added via the manual-add path (model_factory
= 'OpenAI-API-Compatible') or any model not in the approved catalog
remain bare, disabling W2 output-token enforcement.

This migration fills remaining bare LLM/VLM rows with save-time
defaults: context_window=32768, max_output=4096, reserve=4096.
Idempotent (only writes when NULL), scoped to LLM/VLM, and includes
max_tokens alias reconciliation.
---
 ...2_0627_backfill_bare_capacity_defaults.sql | 81 +++++++++++++++++++
 1 file changed, 81 insertions(+)
 create mode 100644 docker/sql/v2.2.2_0627_backfill_bare_capacity_defaults.sql

diff --git a/docker/sql/v2.2.2_0627_backfill_bare_capacity_defaults.sql b/docker/sql/v2.2.2_0627_backfill_bare_capacity_defaults.sql
new file mode 100644
index 000000000..fbdd6e62c
--- /dev/null
+++ b/docker/sql/v2.2.2_0627_backfill_bare_capacity_defaults.sql
@@ -0,0 +1,81 @@
+-- Migration kind: RECOMMENDED_DATA_FIX
+-- Required for: upgraded deployments where LLM/VLM rows still have NULL
+--   capacity columns after the catalog backfill (v2.2.0_0617).
+-- Safe to skip when: fresh deployment, or all LLM/VLM rows already have
+--   context_window_tokens and max_output_tokens populated.
+-- Reason: the catalog backfill only covers exact (model_factory, model_name)
+--   matches. Rows added via the manual-add path (model_factory =
+--   'OpenAI-API-Compatible' per CM-031) or any model not in the approved
+--   catalog remain bare. This migration applies safe defaults so W2
+--   output-token enforcement and W1 dispatch consistency checks activate.
+--
+-- Defaults match the save-time defaults in buildCapacityPayload:
+--   context_window_tokens  = 32768
+--   max_output_tokens      = 4096
+--   default_output_reserve = 4096
+--
+-- Pre-run self-check:
+--
+--   SELECT model_id, model_name, model_factory, model_type,
+--          context_window_tokens, max_output_tokens
+--     FROM nexent.model_record_t
+--    WHERE delete_flag = 'N'
+--      AND COALESCE(model_type, 'llm') IN ('llm', 'vlm')
+--      AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
+--
+-- If the result is empty, this migration is a no-op and safe to skip.
+
+-- ============================================================
+-- Backfill bare LLM/VLM rows with safe capacity defaults
+-- ============================================================
+-- Idempotent: only writes when the target column IS NULL.
+-- Scoped to LLM/VLM rows (embedding/rerank/stt/tts excluded).
+-- capacity_source = 'operator' because these are operator-level defaults,
+-- not catalog profile matches.
+
+DO $$
+DECLARE
+    v_updated INTEGER := 0;
+BEGIN
+    UPDATE nexent.model_record_t
+       SET context_window_tokens = CASE
+               WHEN context_window_tokens IS NULL
+               THEN GREATEST(32768, COALESCE(max_output_tokens, 0) + 1)
+               ELSE context_window_tokens
+           END,
+           max_output_tokens = CASE
+               WHEN max_output_tokens IS NULL
+               THEN LEAST(4096, COALESCE(context_window_tokens, 32768) - 1)
+               ELSE max_output_tokens
+           END,
+           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens, 4096),
+           capacity_source = COALESCE(capacity_source, 'operator')
+     WHERE delete_flag = 'N'
+       AND COALESCE(model_type, 'llm') IN ('llm', 'vlm')
+       AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
+
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    RAISE NOTICE 'Bare capacity defaults: % LLM/VLM row(s) backfilled', v_updated;
+END $$;
+
+-- ============================================================
+-- Reconcile the legacy max_tokens column with max_output_tokens
+-- ============================================================
+-- Same reconcile as v2.2.0_0617 but scoped to the rows this migration
+-- just touched, plus any rows that gained max_output_tokens since the
+-- last reconcile run.
+
+DO $$
+DECLARE
+    v_updated INTEGER := 0;
+BEGIN
+    UPDATE nexent.model_record_t
+       SET max_tokens = max_output_tokens
+     WHERE delete_flag = 'N'
+       AND max_output_tokens IS NOT NULL
+       AND COALESCE(max_tokens, -1) <> max_output_tokens
+       AND COALESCE(model_type, '') NOT IN ('embedding', 'multi_embedding');
+
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    RAISE NOTICE 'max_tokens alias reconcile: % row(s) updated', v_updated;
+END $$;

From 3d133399aedbd7698b794629649082c6b5f41e6c Mon Sep 17 00:00:00 2001
From: wuyuanfr <18270469842@163.com>
Date: Sat, 27 Jun 2026 09:59:38 +0800
Subject: [PATCH 11/31] fix(i18n): rename 'catalog suggestion' to 'capacity
 suggestion' in coverage widget text

---
 frontend/public/locales/en/common.json | 2 +-
 frontend/public/locales/zh/common.json | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/frontend/public/locales/en/common.json b/frontend/public/locales/en/common.json
index a55a14cbe..b5b46971f 100644
--- a/frontend/public/locales/en/common.json
+++ b/frontend/public/locales/en/common.json
@@ -1046,7 +1046,7 @@
   "modelConfig.button.syncModelEngine": "Sync ModelEngine Models",
   "modelConfig.button.addCustomModel": "Add Model",
   "modelConfig.capacityCoverage.warning": "{{bareCount}} of {{total}} LLM/VLM models are missing capacity — output token cap is not enforced.",
-  "modelConfig.capacityCoverage.description": "{{suggestionCount}} have an approved catalog suggestion ready to apply. Click Manage, then click the warning icon on each affected row to repair.",
+  "modelConfig.capacityCoverage.description": "{{suggestionCount}} have an approved capacity suggestion ready to apply. Click Manage, then click the warning icon on each affected row to repair.",
   "modelConfig.capacityCoverage.manage": "Manage",
   "modelConfig.button.editCustomModel": "Edit or Delete Model",
   "modelConfig.button.checkConnectivity": "Check Model Connectivity",
diff --git a/frontend/public/locales/zh/common.json b/frontend/public/locales/zh/common.json
index 770c7457e..8402c31d2 100644
--- a/frontend/public/locales/zh/common.json
+++ b/frontend/public/locales/zh/common.json
@@ -1017,7 +1017,7 @@
   "modelConfig.button.syncModelEngine": "同步ModelEngine模型",
   "modelConfig.button.addCustomModel": "添加模型",
   "modelConfig.capacityCoverage.warning": "{{total}} 个 LLM/VLM 模型中有 {{bareCount}} 个未配置容量，输出 token 限额未启用。",
-  "modelConfig.capacityCoverage.description": "其中 {{suggestionCount}} 个有已审核目录建议可一键应用。点击\"管理\"打开列表，逐行点击警告图标即可修复。",
+  "modelConfig.capacityCoverage.description": "其中 {{suggestionCount}} 个有已审核容量建议可一键应用。点击\"管理\"打开列表，逐行点击警告图标即可修复。",
   "modelConfig.capacityCoverage.manage": "管理",
   "modelConfig.button.editCustomModel": "修改或删除模型",
   "modelConfig.button.checkConnectivity": "检查模型连通性",

From f785f8200100bca06fb3dc7d54efb04b2f67528f Mon Sep 17 00:00:00 2001
From: wuyuanfr <18270469842@163.com>
Date: Sat, 27 Jun 2026 10:22:48 +0800
Subject: [PATCH 12/31] feat(w11): expand capability catalog to 66 entries with
 SiliconFlow models

Add 54 new catalog entries for models hosted on SiliconFlow:
- DeepSeek: V4-Pro, V4-Flash, V3.2, V3.1-Terminus, R1, V3, R1-0528-Qwen3-8B
  plus Pro/ tier variants (11 entries)
- Qwen: Qwen3.6, Qwen3.5 (7 sizes), Qwen3-VL (6 variants), Qwen3-Omni (3),
  Qwen3-Coder, Qwen3 dense (3), Qwen2.5 (5) (26 entries)
- GLM/Zhipu: GLM-4 (3), GLM-5.2, GLM-4.5V, GLM-4.5-Air, Pro/GLM-5.1 (7 entries)
- Other: Seed-OSS, Ling (2), MiniMax (2), Kimi-K2.7-Code, Nex-N2-Pro,
  Step-3.5-Flash, Hunyuan (2) (10 entries)

CATALOG_REVISION bumped to 2026-06-27.1.

Migration script v2.2.2_0627_backfill_expanded_catalog.sql backfills
matching bare rows for existing deployments.
---
 backend/consts/capability_profiles.py         | 553 ++++++++++++++-
 .../v2.2.2_0627_backfill_expanded_catalog.sql | 646 ++++++++++++++++++
 2 files changed, 1198 insertions(+), 1 deletion(-)
 create mode 100644 docker/sql/v2.2.2_0627_backfill_expanded_catalog.sql

diff --git a/backend/consts/capability_profiles.py b/backend/consts/capability_profiles.py
index d6f30f4dd..7c7d406d5 100644
--- a/backend/consts/capability_profiles.py
+++ b/backend/consts/capability_profiles.py
@@ -22,7 +22,7 @@
 logger = logging.getLogger(__name__)
 
 
-CATALOG_REVISION = "2026-06-23.4"
+CATALOG_REVISION = "2026-06-27.1"
 
 
 CATALOG: Dict[ProfileKey, CapabilityProfile] = {
@@ -159,4 +159,555 @@
         default_output_reserve_tokens=8_192,
         tokenizer_family="deepseek",
     ),
+    # SiliconFlow hosted models. Capacity specs sourced from SiliconFlow pricing
+    # page (siliconflow.com/pricing), official model documentation on HuggingFace
+    # and GitHub, and provider API docs. Re-verify at PR merge time.
+    #
+    # DeepSeek models hosted on SiliconFlow. deepseek-ai/ prefixed names are
+    # the standard tier; Pro/ prefixed names are the premium tier with the
+    # same model weights but higher throughput. Specs from DeepSeek API docs
+    # (api-docs.deepseek.com) and SiliconFlow pricing.
+    ("deepseek", "deepseek-ai/DeepSeek-V4-Pro"): CapabilityProfile(
+        provider="deepseek",
+        model_name="deepseek-ai/DeepSeek-V4-Pro",
+        capability_profile_version="deepseek/deepseek-v4-pro-sf@1",
+        window_shape="combined",
+        context_window_tokens=1_048_576,
+        max_output_tokens=384_000,
+        default_output_reserve_tokens=8_192,
+        tokenizer_family="deepseek",
+    ),
+    ("deepseek", "deepseek-ai/DeepSeek-V4-Flash"): CapabilityProfile(
+        provider="deepseek",
+        model_name="deepseek-ai/DeepSeek-V4-Flash",
+        capability_profile_version="deepseek/deepseek-v4-flash-sf@1",
+        window_shape="combined",
+        context_window_tokens=1_048_576,
+        max_output_tokens=384_000,
+        default_output_reserve_tokens=8_192,
+        tokenizer_family="deepseek",
+    ),
+    ("deepseek", "deepseek-ai/DeepSeek-V3.2"): CapabilityProfile(
+        provider="deepseek",
+        model_name="deepseek-ai/DeepSeek-V3.2",
+        capability_profile_version="deepseek/deepseek-v3.2@1",
+        window_shape="combined",
+        context_window_tokens=164_000,
+        max_output_tokens=8_192,
+        default_output_reserve_tokens=4_096,
+        tokenizer_family="deepseek",
+    ),
+    ("deepseek", "deepseek-ai/DeepSeek-V3.1-Terminus"): CapabilityProfile(
+        provider="deepseek",
+        model_name="deepseek-ai/DeepSeek-V3.1-Terminus",
+        capability_profile_version="deepseek/deepseek-v3.1-terminus@1",
+        window_shape="combined",
+        context_window_tokens=164_000,
+        max_output_tokens=8_192,
+        default_output_reserve_tokens=4_096,
+        tokenizer_family="deepseek",
+    ),
+    ("deepseek", "deepseek-ai/DeepSeek-R1"): CapabilityProfile(
+        provider="deepseek",
+        model_name="deepseek-ai/DeepSeek-R1",
+        capability_profile_version="deepseek/deepseek-r1@1",
+        window_shape="combined",
+        context_window_tokens=163_840,
+        max_output_tokens=16_384,
+        default_output_reserve_tokens=8_192,
+        tokenizer_family="deepseek",
+    ),
+    ("deepseek", "deepseek-ai/DeepSeek-V3"): CapabilityProfile(
+        provider="deepseek",
+        model_name="deepseek-ai/DeepSeek-V3",
+        capability_profile_version="deepseek/deepseek-v3@1",
+        window_shape="combined",
+        context_window_tokens=164_000,
+        max_output_tokens=8_192,
+        default_output_reserve_tokens=4_096,
+        tokenizer_family="deepseek",
+    ),
+    ("deepseek", "deepseek-ai/DeepSeek-R1-0528-Qwen3-8B"): CapabilityProfile(
+        provider="deepseek",
+        model_name="deepseek-ai/DeepSeek-R1-0528-Qwen3-8B",
+        capability_profile_version="deepseek/deepseek-r1-0528-qwen3-8b@1",
+        window_shape="combined",
+        context_window_tokens=131_072,
+        max_output_tokens=16_384,
+        default_output_reserve_tokens=4_096,
+        tokenizer_family="deepseek",
+    ),
+    ("deepseek", "Pro/deepseek-ai/DeepSeek-V3.2"): CapabilityProfile(
+        provider="deepseek",
+        model_name="Pro/deepseek-ai/DeepSeek-V3.2",
+        capability_profile_version="deepseek/deepseek-v3.2-pro@1",
+        window_shape="combined",
+        context_window_tokens=164_000,
+        max_output_tokens=8_192,
+        default_output_reserve_tokens=4_096,
+        tokenizer_family="deepseek",
+    ),
+    ("deepseek", "Pro/deepseek-ai/DeepSeek-V3.1-Terminus"): CapabilityProfile(
+        provider="deepseek",
+        model_name="Pro/deepseek-ai/DeepSeek-V3.1-Terminus",
+        capability_profile_version="deepseek/deepseek-v3.1-terminus-pro@1",
+        window_shape="combined",
+        context_window_tokens=164_000,
+        max_output_tokens=8_192,
+        default_output_reserve_tokens=4_096,
+        tokenizer_family="deepseek",
+    ),
+    ("deepseek", "Pro/deepseek-ai/DeepSeek-R1"): CapabilityProfile(
+        provider="deepseek",
+        model_name="Pro/deepseek-ai/DeepSeek-R1",
+        capability_profile_version="deepseek/deepseek-r1-pro@1",
+        window_shape="combined",
+        context_window_tokens=163_840,
+        max_output_tokens=16_384,
+        default_output_reserve_tokens=8_192,
+        tokenizer_family="deepseek",
+    ),
+    ("deepseek", "Pro/deepseek-ai/DeepSeek-V3"): CapabilityProfile(
+        provider="deepseek",
+        model_name="Pro/deepseek-ai/DeepSeek-V3",
+        capability_profile_version="deepseek/deepseek-v3-pro@1",
+        window_shape="combined",
+        context_window_tokens=164_000,
+        max_output_tokens=8_192,
+        default_output_reserve_tokens=4_096,
+        tokenizer_family="deepseek",
+    ),
+    # Qwen family on SiliconFlow
+    ("silicon", "Qwen/Qwen3.6-35B-A3B"): CapabilityProfile(
+        provider="silicon",
+        model_name="Qwen/Qwen3.6-35B-A3B",
+        capability_profile_version="silicon/qwen3.6-35b-a3b@1",
+        window_shape="combined",
+        context_window_tokens=262_144,
+        max_output_tokens=16_384,
+        default_output_reserve_tokens=4_096,
+        tokenizer_family="qwen",
+    ),
+    ("silicon", "Qwen/Qwen3.5-397B-A17B"): CapabilityProfile(
+        provider="silicon",
+        model_name="Qwen/Qwen3.5-397B-A17B",
+        capability_profile_version="silicon/qwen3.5-397b-a17b@1",
+        window_shape="combined",
+        context_window_tokens=262_144,
+        max_output_tokens=16_384,
+        default_output_reserve_tokens=4_096,
+        tokenizer_family="qwen",
+    ),
+    ("silicon", "Qwen/Qwen3.5-122B-A10B"): CapabilityProfile(
+        provider="silicon",
+        model_name="Qwen/Qwen3.5-122B-A10B",
+        capability_profile_version="silicon/qwen3.5-122b-a10b@1",
+        window_shape="combined",
+        context_window_tokens=262_144,
+        max_output_tokens=16_384,
+        default_output_reserve_tokens=4_096,
+        tokenizer_family="qwen",
+    ),
+    ("silicon", "Qwen/Qwen3.5-35B-A3B"): CapabilityProfile(
+        provider="silicon",
+        model_name="Qwen/Qwen3.5-35B-A3B",
+        capability_profile_version="silicon/qwen3.5-35b-a3b@1",
+        window_shape="combined",
+        context_window_tokens=262_144,
+        max_output_tokens=16_384,
+        default_output_reserve_tokens=4_096,
+        tokenizer_family="qwen",
+    ),
+    ("silicon", "Qwen/Qwen3.5-27B"): CapabilityProfile(
+        provider="silicon",
+        model_name="Qwen/Qwen3.5-27B",
+        capability_profile_version="silicon/qwen3.5-27b@1",
+        window_shape="combined",
+        context_window_tokens=262_144,
+        max_output_tokens=16_384,
+        default_output_reserve_tokens=4_096,
+        tokenizer_family="qwen",
+    ),
+    ("silicon", "Qwen/Qwen3.5-9B"): CapabilityProfile(
+        provider="silicon",
+        model_name="Qwen/Qwen3.5-9B",
+        capability_profile_version="silicon/qwen3.5-9b@1",
+        window_shape="combined",
+        context_window_tokens=262_144,
+        max_output_tokens=16_384,
+        default_output_reserve_tokens=4_096,
+        tokenizer_family="qwen",
+    ),
+    ("silicon", "Qwen/Qwen3.5-4B"): CapabilityProfile(
+        provider="silicon",
+        model_name="Qwen/Qwen3.5-4B",
+        capability_profile_version="silicon/qwen3.5-4b@1",
+        window_shape="combined",
+        context_window_tokens=262_144,
+        max_output_tokens=16_384,
+        default_output_reserve_tokens=4_096,
+        tokenizer_family="qwen",
+    ),
+    ("silicon", "Qwen/Qwen3-VL-32B-Instruct"): CapabilityProfile(
+        provider="silicon",
+        model_name="Qwen/Qwen3-VL-32B-Instruct",
+        capability_profile_version="silicon/qwen3-vl-32b-instruct@1",
+        window_shape="combined",
+        context_window_tokens=262_144,
+        max_output_tokens=16_384,
+        default_output_reserve_tokens=4_096,
+        tokenizer_family="qwen",
+    ),
+    ("silicon", "Qwen/Qwen3-VL-32B-Thinking"): CapabilityProfile(
+        provider="silicon",
+        model_name="Qwen/Qwen3-VL-32B-Thinking",
+        capability_profile_version="silicon/qwen3-vl-32b-thinking@1",
+        window_shape="combined",
+        context_window_tokens=262_144,
+        max_output_tokens=32_768,
+        default_output_reserve_tokens=8_192,
+        tokenizer_family="qwen",
+    ),
+    ("silicon", "Qwen/Qwen3-VL-8B-Instruct"): CapabilityProfile(
+        provider="silicon",
+        model_name="Qwen/Qwen3-VL-8B-Instruct",
+        capability_profile_version="silicon/qwen3-vl-8b-instruct@1",
+        window_shape="combined",
+        context_window_tokens=262_144,
+        max_output_tokens=16_384,
+        default_output_reserve_tokens=4_096,
+        tokenizer_family="qwen",
+    ),
+    ("silicon", "Qwen/Qwen3-VL-8B-Thinking"): CapabilityProfile(
+        provider="silicon",
+        model_name="Qwen/Qwen3-VL-8B-Thinking",
+        capability_profile_version="silicon/qwen3-vl-8b-thinking@1",
+        window_shape="combined",
+        context_window_tokens=262_144,
+        max_output_tokens=32_768,
+        default_output_reserve_tokens=8_192,
+        tokenizer_family="qwen",
+    ),
+    ("silicon", "Qwen/Qwen3-VL-30B-A3B-Instruct"): CapabilityProfile(
+        provider="silicon",
+        model_name="Qwen/Qwen3-VL-30B-A3B-Instruct",
+        capability_profile_version="silicon/qwen3-vl-30b-a3b-instruct@1",
+        window_shape="combined",
+        context_window_tokens=262_144,
+        max_output_tokens=16_384,
+        default_output_reserve_tokens=4_096,
+        tokenizer_family="qwen",
+    ),
+    ("silicon", "Qwen/Qwen3-VL-30B-A3B-Thinking"): CapabilityProfile(
+        provider="silicon",
+        model_name="Qwen/Qwen3-VL-30B-A3B-Thinking",
+        capability_profile_version="silicon/qwen3-vl-30b-a3b-thinking@1",
+        window_shape="combined",
+        context_window_tokens=262_144,
+        max_output_tokens=32_768,
+        default_output_reserve_tokens=8_192,
+        tokenizer_family="qwen",
+    ),
+    ("silicon", "Qwen/Qwen3-Omni-30B-A3B-Instruct"): CapabilityProfile(
+        provider="silicon",
+        model_name="Qwen/Qwen3-Omni-30B-A3B-Instruct",
+        capability_profile_version="silicon/qwen3-omni-30b-a3b-instruct@1",
+        window_shape="combined",
+        context_window_tokens=32_768,
+        max_output_tokens=16_384,
+        default_output_reserve_tokens=4_096,
+        tokenizer_family="qwen",
+    ),
+    ("silicon", "Qwen/Qwen3-Omni-30B-A3B-Thinking"): CapabilityProfile(
+        provider="silicon",
+        model_name="Qwen/Qwen3-Omni-30B-A3B-Thinking",
+        capability_profile_version="silicon/qwen3-omni-30b-a3b-thinking@1",
+        window_shape="combined",
+        context_window_tokens=32_768,
+        max_output_tokens=16_384,
+        default_output_reserve_tokens=4_096,
+        tokenizer_family="qwen",
+    ),
+    ("silicon", "Qwen/Qwen3-Omni-30B-A3B-Captioner"): CapabilityProfile(
+        provider="silicon",
+        model_name="Qwen/Qwen3-Omni-30B-A3B-Captioner",
+        capability_profile_version="silicon/qwen3-omni-30b-a3b-captioner@1",
+        window_shape="combined",
+        context_window_tokens=32_768,
+        max_output_tokens=16_384,
+        default_output_reserve_tokens=4_096,
+        tokenizer_family="qwen",
+    ),
+    ("silicon", "Qwen/Qwen3-Coder-30B-A3B-Instruct"): CapabilityProfile(
+        provider="silicon",
+        model_name="Qwen/Qwen3-Coder-30B-A3B-Instruct",
+        capability_profile_version="silicon/qwen3-coder-30b-a3b-instruct@1",
+        window_shape="combined",
+        context_window_tokens=262_144,
+        max_output_tokens=65_536,
+        default_output_reserve_tokens=8_192,
+        tokenizer_family="qwen",
+    ),
+    ("silicon", "Qwen/Qwen3-30B-A3B-Instruct-2507"): CapabilityProfile(
+        provider="silicon",
+        model_name="Qwen/Qwen3-30B-A3B-Instruct-2507",
+        capability_profile_version="silicon/qwen3-30b-a3b-instruct-2507@1",
+        window_shape="combined",
+        context_window_tokens=262_144,
+        max_output_tokens=16_384,
+        default_output_reserve_tokens=4_096,
+        tokenizer_family="qwen",
+    ),
+    ("silicon", "Qwen/Qwen3-32B"): CapabilityProfile(
+        provider="silicon",
+        model_name="Qwen/Qwen3-32B",
+        capability_profile_version="silicon/qwen3-32b@1",
+        window_shape="combined",
+        context_window_tokens=131_072,
+        max_output_tokens=16_384,
+        default_output_reserve_tokens=4_096,
+        tokenizer_family="qwen",
+    ),
+    ("silicon", "Qwen/Qwen3-14B"): CapabilityProfile(
+        provider="silicon",
+        model_name="Qwen/Qwen3-14B",
+        capability_profile_version="silicon/qwen3-14b@1",
+        window_shape="combined",
+        context_window_tokens=131_072,
+        max_output_tokens=16_384,
+        default_output_reserve_tokens=4_096,
+        tokenizer_family="qwen",
+    ),
+    ("silicon", "Qwen/Qwen3-8B"): CapabilityProfile(
+        provider="silicon",
+        model_name="Qwen/Qwen3-8B",
+        capability_profile_version="silicon/qwen3-8b@1",
+        window_shape="combined",
+        context_window_tokens=131_072,
+        max_output_tokens=16_384,
+        default_output_reserve_tokens=4_096,
+        tokenizer_family="qwen",
+    ),
+    ("silicon", "Qwen/Qwen2.5-72B-Instruct-128K"): CapabilityProfile(
+        provider="silicon",
+        model_name="Qwen/Qwen2.5-72B-Instruct-128K",
+        capability_profile_version="silicon/qwen2.5-72b-instruct-128k@1",
+        window_shape="combined",
+        context_window_tokens=131_072,
+        max_output_tokens=8_192,
+        default_output_reserve_tokens=4_096,
+        tokenizer_family="qwen",
+    ),
+    ("silicon", "Qwen/Qwen2.5-72B-Instruct"): CapabilityProfile(
+        provider="silicon",
+        model_name="Qwen/Qwen2.5-72B-Instruct",
+        capability_profile_version="silicon/qwen2.5-72b-instruct@1",
+        window_shape="combined",
+        context_window_tokens=32_768,
+        max_output_tokens=8_192,
+        default_output_reserve_tokens=4_096,
+        tokenizer_family="qwen",
+    ),
+    ("silicon", "Qwen/Qwen2.5-32B-Instruct"): CapabilityProfile(
+        provider="silicon",
+        model_name="Qwen/Qwen2.5-32B-Instruct",
+        capability_profile_version="silicon/qwen2.5-32b-instruct@1",
+        window_shape="combined",
+        context_window_tokens=32_768,
+        max_output_tokens=8_192,
+        default_output_reserve_tokens=4_096,
+        tokenizer_family="qwen",
+    ),
+    ("silicon", "Qwen/Qwen2.5-14B-Instruct"): CapabilityProfile(
+        provider="silicon",
+        model_name="Qwen/Qwen2.5-14B-Instruct",
+        capability_profile_version="silicon/qwen2.5-14b-instruct@1",
+        window_shape="combined",
+        context_window_tokens=32_768,
+        max_output_tokens=8_192,
+        default_output_reserve_tokens=4_096,
+        tokenizer_family="qwen",
+    ),
+    ("silicon", "Qwen/Qwen2.5-7B-Instruct"): CapabilityProfile(
+        provider="silicon",
+        model_name="Qwen/Qwen2.5-7B-Instruct",
+        capability_profile_version="silicon/qwen2.5-7b-instruct@1",
+        window_shape="combined",
+        context_window_tokens=32_768,
+        max_output_tokens=8_192,
+        default_output_reserve_tokens=4_096,
+        tokenizer_family="qwen",
+    ),
+    # GLM/Zhipu family on SiliconFlow
+    ("silicon", "THUDM/GLM-4-32B-0414"): CapabilityProfile(
+        provider="silicon",
+        model_name="THUDM/GLM-4-32B-0414",
+        capability_profile_version="silicon/glm-4-32b-0414@1",
+        window_shape="combined",
+        context_window_tokens=32_768,
+        max_output_tokens=8_192,
+        default_output_reserve_tokens=4_096,
+        tokenizer_family="chatglm",
+    ),
+    ("silicon", "THUDM/GLM-Z1-9B-0414"): CapabilityProfile(
+        provider="silicon",
+        model_name="THUDM/GLM-Z1-9B-0414",
+        capability_profile_version="silicon/glm-z1-9b-0414@1",
+        window_shape="combined",
+        context_window_tokens=32_768,
+        max_output_tokens=8_192,
+        default_output_reserve_tokens=4_096,
+        tokenizer_family="chatglm",
+    ),
+    ("silicon", "THUDM/GLM-4-9B-0414"): CapabilityProfile(
+        provider="silicon",
+        model_name="THUDM/GLM-4-9B-0414",
+        capability_profile_version="silicon/glm-4-9b-0414@1",
+        window_shape="combined",
+        context_window_tokens=32_768,
+        max_output_tokens=8_192,
+        default_output_reserve_tokens=4_096,
+        tokenizer_family="chatglm",
+    ),
+    ("silicon", "zai-org/GLM-5.2"): CapabilityProfile(
+        provider="silicon",
+        model_name="zai-org/GLM-5.2",
+        capability_profile_version="silicon/glm-5.2@1",
+        window_shape="combined",
+        context_window_tokens=1_048_576,
+        max_output_tokens=131_072,
+        default_output_reserve_tokens=8_192,
+        tokenizer_family="chatglm",
+    ),
+    ("silicon", "zai-org/GLM-4.5V"): CapabilityProfile(
+        provider="silicon",
+        model_name="zai-org/GLM-4.5V",
+        capability_profile_version="silicon/glm-4.5v@1",
+        window_shape="combined",
+        context_window_tokens=131_072,
+        max_output_tokens=16_384,
+        default_output_reserve_tokens=4_096,
+        tokenizer_family="chatglm",
+    ),
+    ("silicon", "zai-org/GLM-4.5-Air"): CapabilityProfile(
+        provider="silicon",
+        model_name="zai-org/GLM-4.5-Air",
+        capability_profile_version="silicon/glm-4.5-air@1",
+        window_shape="combined",
+        context_window_tokens=131_072,
+        max_output_tokens=16_384,
+        default_output_reserve_tokens=4_096,
+        tokenizer_family="chatglm",
+    ),
+    ("silicon", "Pro/zai-org/GLM-5.1"): CapabilityProfile(
+        provider="silicon",
+        model_name="Pro/zai-org/GLM-5.1",
+        capability_profile_version="silicon/glm-5.1-pro@1",
+        window_shape="combined",
+        context_window_tokens=202_752,
+        max_output_tokens=131_072,
+        default_output_reserve_tokens=8_192,
+        tokenizer_family="chatglm",
+    ),
+    # Other models on SiliconFlow
+    ("silicon", "ByteDance-Seed/Seed-OSS-36B-Instruct"): CapabilityProfile(
+        provider="silicon",
+        model_name="ByteDance-Seed/Seed-OSS-36B-Instruct",
+        capability_profile_version="silicon/seed-oss-36b-instruct@1",
+        window_shape="combined",
+        context_window_tokens=524_288,
+        max_output_tokens=16_384,
+        default_output_reserve_tokens=4_096,
+        tokenizer_family="seed",
+    ),
+    ("silicon", "inclusionAI/Ling-flash-2.0"): CapabilityProfile(
+        provider="silicon",
+        model_name="inclusionAI/Ling-flash-2.0",
+        capability_profile_version="silicon/ling-flash-2.0@1",
+        window_shape="combined",
+        context_window_tokens=131_072,
+        max_output_tokens=16_384,
+        default_output_reserve_tokens=4_096,
+        tokenizer_family="ling",
+    ),
+    ("silicon", "inclusionAI/Ling-mini-2.0"): CapabilityProfile(
+        provider="silicon",
+        model_name="inclusionAI/Ling-mini-2.0",
+        capability_profile_version="silicon/ling-mini-2.0@1",
+        window_shape="combined",
+        context_window_tokens=131_072,
+        max_output_tokens=16_384,
+        default_output_reserve_tokens=4_096,
+        tokenizer_family="ling",
+    ),
+    ("silicon", "MiniMaxAI/MiniMax-M2.5"): CapabilityProfile(
+        provider="silicon",
+        model_name="MiniMaxAI/MiniMax-M2.5",
+        capability_profile_version="silicon/minimax-m2.5@1",
+        window_shape="combined",
+        context_window_tokens=204_800,
+        max_output_tokens=16_384,
+        default_output_reserve_tokens=4_096,
+        tokenizer_family="minimax",
+    ),
+    ("silicon", "Pro/MiniMaxAI/MiniMax-M2.5"): CapabilityProfile(
+        provider="silicon",
+        model_name="Pro/MiniMaxAI/MiniMax-M2.5",
+        capability_profile_version="silicon/minimax-m2.5-pro@1",
+        window_shape="combined",
+        context_window_tokens=204_800,
+        max_output_tokens=16_384,
+        default_output_reserve_tokens=4_096,
+        tokenizer_family="minimax",
+    ),
+    ("silicon", "moonshotai/Kimi-K2.7-Code"): CapabilityProfile(
+        provider="silicon",
+        model_name="moonshotai/Kimi-K2.7-Code",
+        capability_profile_version="silicon/kimi-k2.7-code@1",
+        window_shape="combined",
+        context_window_tokens=262_144,
+        max_output_tokens=32_768,
+        default_output_reserve_tokens=8_192,
+        tokenizer_family="moonshot",
+    ),
+    ("silicon", "nex-agi/Nex-N2-Pro"): CapabilityProfile(
+        provider="silicon",
+        model_name="nex-agi/Nex-N2-Pro",
+        capability_profile_version="silicon/nex-n2-pro@1",
+        window_shape="combined",
+        context_window_tokens=262_144,
+        max_output_tokens=16_384,
+        default_output_reserve_tokens=4_096,
+        tokenizer_family="qwen",
+    ),
+    ("silicon", "stepfun-ai/Step-3.5-Flash"): CapabilityProfile(
+        provider="silicon",
+        model_name="stepfun-ai/Step-3.5-Flash",
+        capability_profile_version="silicon/step-3.5-flash@1",
+        window_shape="combined",
+        context_window_tokens=262_144,
+        max_output_tokens=16_384,
+        default_output_reserve_tokens=4_096,
+        tokenizer_family="step",
+    ),
+    ("silicon", "tencent/Hunyuan-MT-7B"): CapabilityProfile(
+        provider="silicon",
+        model_name="tencent/Hunyuan-MT-7B",
+        capability_profile_version="silicon/hunyuan-mt-7b@1",
+        window_shape="combined",
+        context_window_tokens=32_768,
+        max_output_tokens=2_048,
+        default_output_reserve_tokens=1_024,
+        tokenizer_family="hunyuan",
+    ),
+    ("silicon", "tencent/Hunyuan-A13B-Instruct"): CapabilityProfile(
+        provider="silicon",
+        model_name="tencent/Hunyuan-A13B-Instruct",
+        capability_profile_version="silicon/hunyuan-a13b-instruct@1",
+        window_shape="combined",
+        context_window_tokens=131_072,
+        max_output_tokens=8_192,
+        default_output_reserve_tokens=4_096,
+        tokenizer_family="hunyuan",
+    ),
 }
diff --git a/docker/sql/v2.2.2_0627_backfill_expanded_catalog.sql b/docker/sql/v2.2.2_0627_backfill_expanded_catalog.sql
new file mode 100644
index 000000000..be3533d60
--- /dev/null
+++ b/docker/sql/v2.2.2_0627_backfill_expanded_catalog.sql
@@ -0,0 +1,646 @@
+-- Migration kind: RECOMMENDED_DATA_FIX
+-- Required for: upgraded deployments with existing model_record_t rows
+--   whose (model_factory, model_name) now match expanded catalog entries.
+-- Safe to skip when: fresh deployment, or no matching rows exist.
+-- Reason: the v2.2.0_0617 catalog backfill only covered 10 model entries.
+--   This migration extends coverage to 54 additional SiliconFlow-hosted models.
+--
+-- Idempotent: only writes when context_window_tokens IS NULL.
+-- Catalog source of truth: backend/consts/capability_profiles.py
+--   CATALOG_REVISION 2026-06-27.1
+
+-- Pre-run self-check:
+--
+--   SELECT model_id, model_name, model_factory,
+--          context_window_tokens, max_output_tokens
+--     FROM nexent.model_record_t
+--    WHERE delete_flag = 'N'
+--      AND context_window_tokens IS NULL
+--      AND (
+--        (LOWER(model_factory) = 'deepseek')
+--        OR (LOWER(model_factory) = 'silicon')
+--      );
+--
+-- If the result is empty, this migration is a no-op and safe to skip.
+
+DO $$
+DECLARE
+    v_updated INTEGER := 0;
+    v_total   INTEGER := 0;
+BEGIN
+    -- deepseek models on SiliconFlow (11 entries)
+    UPDATE nexent.model_record_t
+       SET context_window_tokens = 1048576,
+           max_output_tokens = 384000,
+           default_output_reserve_tokens = 8192
+     WHERE LOWER(model_factory) = 'deepseek'
+       AND model_name = 'deepseek-ai/DeepSeek-V4-Pro'
+       AND delete_flag = 'N'
+       AND context_window_tokens IS NULL;
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET context_window_tokens = 1048576,
+           max_output_tokens = 384000,
+           default_output_reserve_tokens = 8192
+     WHERE LOWER(model_factory) = 'deepseek'
+       AND model_name = 'deepseek-ai/DeepSeek-V4-Flash'
+       AND delete_flag = 'N'
+       AND context_window_tokens IS NULL;
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET context_window_tokens = 164000,
+           max_output_tokens = 8192,
+           default_output_reserve_tokens = 4096
+     WHERE LOWER(model_factory) = 'deepseek'
+       AND model_name = 'deepseek-ai/DeepSeek-V3.2'
+       AND delete_flag = 'N'
+       AND context_window_tokens IS NULL;
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET context_window_tokens = 164000,
+           max_output_tokens = 8192,
+           default_output_reserve_tokens = 4096
+     WHERE LOWER(model_factory) = 'deepseek'
+       AND model_name = 'deepseek-ai/DeepSeek-V3.1-Terminus'
+       AND delete_flag = 'N'
+       AND context_window_tokens IS NULL;
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET context_window_tokens = 163840,
+           max_output_tokens = 16384,
+           default_output_reserve_tokens = 8192
+     WHERE LOWER(model_factory) = 'deepseek'
+       AND model_name = 'deepseek-ai/DeepSeek-R1'
+       AND delete_flag = 'N'
+       AND context_window_tokens IS NULL;
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET context_window_tokens = 164000,
+           max_output_tokens = 8192,
+           default_output_reserve_tokens = 4096
+     WHERE LOWER(model_factory) = 'deepseek'
+       AND model_name = 'deepseek-ai/DeepSeek-V3'
+       AND delete_flag = 'N'
+       AND context_window_tokens IS NULL;
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET context_window_tokens = 131072,
+           max_output_tokens = 16384,
+           default_output_reserve_tokens = 4096
+     WHERE LOWER(model_factory) = 'deepseek'
+       AND model_name = 'deepseek-ai/DeepSeek-R1-0528-Qwen3-8B'
+       AND delete_flag = 'N'
+       AND context_window_tokens IS NULL;
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET context_window_tokens = 164000,
+           max_output_tokens = 8192,
+           default_output_reserve_tokens = 4096
+     WHERE LOWER(model_factory) = 'deepseek'
+       AND model_name = 'Pro/deepseek-ai/DeepSeek-V3.2'
+       AND delete_flag = 'N'
+       AND context_window_tokens IS NULL;
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET context_window_tokens = 164000,
+           max_output_tokens = 8192,
+           default_output_reserve_tokens = 4096
+     WHERE LOWER(model_factory) = 'deepseek'
+       AND model_name = 'Pro/deepseek-ai/DeepSeek-V3.1-Terminus'
+       AND delete_flag = 'N'
+       AND context_window_tokens IS NULL;
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET context_window_tokens = 163840,
+           max_output_tokens = 16384,
+           default_output_reserve_tokens = 8192
+     WHERE LOWER(model_factory) = 'deepseek'
+       AND model_name = 'Pro/deepseek-ai/DeepSeek-R1'
+       AND delete_flag = 'N'
+       AND context_window_tokens IS NULL;
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET context_window_tokens = 164000,
+           max_output_tokens = 8192,
+           default_output_reserve_tokens = 4096
+     WHERE LOWER(model_factory) = 'deepseek'
+       AND model_name = 'Pro/deepseek-ai/DeepSeek-V3'
+       AND delete_flag = 'N'
+       AND context_window_tokens IS NULL;
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    -- silicon models on SiliconFlow (43 entries)
+    UPDATE nexent.model_record_t
+       SET context_window_tokens = 262144,
+           max_output_tokens = 16384,
+           default_output_reserve_tokens = 4096
+     WHERE LOWER(model_factory) = 'silicon'
+       AND model_name = 'Qwen/Qwen3.6-35B-A3B'
+       AND delete_flag = 'N'
+       AND context_window_tokens IS NULL;
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET context_window_tokens = 262144,
+           max_output_tokens = 16384,
+           default_output_reserve_tokens = 4096
+     WHERE LOWER(model_factory) = 'silicon'
+       AND model_name = 'Qwen/Qwen3.5-397B-A17B'
+       AND delete_flag = 'N'
+       AND context_window_tokens IS NULL;
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET context_window_tokens = 262144,
+           max_output_tokens = 16384,
+           default_output_reserve_tokens = 4096
+     WHERE LOWER(model_factory) = 'silicon'
+       AND model_name = 'Qwen/Qwen3.5-122B-A10B'
+       AND delete_flag = 'N'
+       AND context_window_tokens IS NULL;
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET context_window_tokens = 262144,
+           max_output_tokens = 16384,
+           default_output_reserve_tokens = 4096
+     WHERE LOWER(model_factory) = 'silicon'
+       AND model_name = 'Qwen/Qwen3.5-35B-A3B'
+       AND delete_flag = 'N'
+       AND context_window_tokens IS NULL;
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET context_window_tokens = 262144,
+           max_output_tokens = 16384,
+           default_output_reserve_tokens = 4096
+     WHERE LOWER(model_factory) = 'silicon'
+       AND model_name = 'Qwen/Qwen3.5-27B'
+       AND delete_flag = 'N'
+       AND context_window_tokens IS NULL;
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET context_window_tokens = 262144,
+           max_output_tokens = 16384,
+           default_output_reserve_tokens = 4096
+     WHERE LOWER(model_factory) = 'silicon'
+       AND model_name = 'Qwen/Qwen3.5-9B'
+       AND delete_flag = 'N'
+       AND context_window_tokens IS NULL;
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET context_window_tokens = 262144,
+           max_output_tokens = 16384,
+           default_output_reserve_tokens = 4096
+     WHERE LOWER(model_factory) = 'silicon'
+       AND model_name = 'Qwen/Qwen3.5-4B'
+       AND delete_flag = 'N'
+       AND context_window_tokens IS NULL;
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET context_window_tokens = 262144,
+           max_output_tokens = 16384,
+           default_output_reserve_tokens = 4096
+     WHERE LOWER(model_factory) = 'silicon'
+       AND model_name = 'Qwen/Qwen3-VL-32B-Instruct'
+       AND delete_flag = 'N'
+       AND context_window_tokens IS NULL;
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET context_window_tokens = 262144,
+           max_output_tokens = 32768,
+           default_output_reserve_tokens = 8192
+     WHERE LOWER(model_factory) = 'silicon'
+       AND model_name = 'Qwen/Qwen3-VL-32B-Thinking'
+       AND delete_flag = 'N'
+       AND context_window_tokens IS NULL;
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET context_window_tokens = 262144,
+           max_output_tokens = 16384,
+           default_output_reserve_tokens = 4096
+     WHERE LOWER(model_factory) = 'silicon'
+       AND model_name = 'Qwen/Qwen3-VL-8B-Instruct'
+       AND delete_flag = 'N'
+       AND context_window_tokens IS NULL;
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET context_window_tokens = 262144,
+           max_output_tokens = 32768,
+           default_output_reserve_tokens = 8192
+     WHERE LOWER(model_factory) = 'silicon'
+       AND model_name = 'Qwen/Qwen3-VL-8B-Thinking'
+       AND delete_flag = 'N'
+       AND context_window_tokens IS NULL;
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET context_window_tokens = 262144,
+           max_output_tokens = 16384,
+           default_output_reserve_tokens = 4096
+     WHERE LOWER(model_factory) = 'silicon'
+       AND model_name = 'Qwen/Qwen3-VL-30B-A3B-Instruct'
+       AND delete_flag = 'N'
+       AND context_window_tokens IS NULL;
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET context_window_tokens = 262144,
+           max_output_tokens = 32768,
+           default_output_reserve_tokens = 8192
+     WHERE LOWER(model_factory) = 'silicon'
+       AND model_name = 'Qwen/Qwen3-VL-30B-A3B-Thinking'
+       AND delete_flag = 'N'
+       AND context_window_tokens IS NULL;
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET context_window_tokens = 32768,
+           max_output_tokens = 16384,
+           default_output_reserve_tokens = 4096
+     WHERE LOWER(model_factory) = 'silicon'
+       AND model_name = 'Qwen/Qwen3-Omni-30B-A3B-Instruct'
+       AND delete_flag = 'N'
+       AND context_window_tokens IS NULL;
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET context_window_tokens = 32768,
+           max_output_tokens = 16384,
+           default_output_reserve_tokens = 4096
+     WHERE LOWER(model_factory) = 'silicon'
+       AND model_name = 'Qwen/Qwen3-Omni-30B-A3B-Thinking'
+       AND delete_flag = 'N'
+       AND context_window_tokens IS NULL;
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET context_window_tokens = 32768,
+           max_output_tokens = 16384,
+           default_output_reserve_tokens = 4096
+     WHERE LOWER(model_factory) = 'silicon'
+       AND model_name = 'Qwen/Qwen3-Omni-30B-A3B-Captioner'
+       AND delete_flag = 'N'
+       AND context_window_tokens IS NULL;
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET context_window_tokens = 262144,
+           max_output_tokens = 65536,
+           default_output_reserve_tokens = 8192
+     WHERE LOWER(model_factory) = 'silicon'
+       AND model_name = 'Qwen/Qwen3-Coder-30B-A3B-Instruct'
+       AND delete_flag = 'N'
+       AND context_window_tokens IS NULL;
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET context_window_tokens = 262144,
+           max_output_tokens = 16384,
+           default_output_reserve_tokens = 4096
+     WHERE LOWER(model_factory) = 'silicon'
+       AND model_name = 'Qwen/Qwen3-30B-A3B-Instruct-2507'
+       AND delete_flag = 'N'
+       AND context_window_tokens IS NULL;
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET context_window_tokens = 131072,
+           max_output_tokens = 16384,
+           default_output_reserve_tokens = 4096
+     WHERE LOWER(model_factory) = 'silicon'
+       AND model_name = 'Qwen/Qwen3-32B'
+       AND delete_flag = 'N'
+       AND context_window_tokens IS NULL;
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET context_window_tokens = 131072,
+           max_output_tokens = 16384,
+           default_output_reserve_tokens = 4096
+     WHERE LOWER(model_factory) = 'silicon'
+       AND model_name = 'Qwen/Qwen3-14B'
+       AND delete_flag = 'N'
+       AND context_window_tokens IS NULL;
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET context_window_tokens = 131072,
+           max_output_tokens = 16384,
+           default_output_reserve_tokens = 4096
+     WHERE LOWER(model_factory) = 'silicon'
+       AND model_name = 'Qwen/Qwen3-8B'
+       AND delete_flag = 'N'
+       AND context_window_tokens IS NULL;
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET context_window_tokens = 131072,
+           max_output_tokens = 8192,
+           default_output_reserve_tokens = 4096
+     WHERE LOWER(model_factory) = 'silicon'
+       AND model_name = 'Qwen/Qwen2.5-72B-Instruct-128K'
+       AND delete_flag = 'N'
+       AND context_window_tokens IS NULL;
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET context_window_tokens = 32768,
+           max_output_tokens = 8192,
+           default_output_reserve_tokens = 4096
+     WHERE LOWER(model_factory) = 'silicon'
+       AND model_name = 'Qwen/Qwen2.5-72B-Instruct'
+       AND delete_flag = 'N'
+       AND context_window_tokens IS NULL;
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET context_window_tokens = 32768,
+           max_output_tokens = 8192,
+           default_output_reserve_tokens = 4096
+     WHERE LOWER(model_factory) = 'silicon'
+       AND model_name = 'Qwen/Qwen2.5-32B-Instruct'
+       AND delete_flag = 'N'
+       AND context_window_tokens IS NULL;
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET context_window_tokens = 32768,
+           max_output_tokens = 8192,
+           default_output_reserve_tokens = 4096
+     WHERE LOWER(model_factory) = 'silicon'
+       AND model_name = 'Qwen/Qwen2.5-14B-Instruct'
+       AND delete_flag = 'N'
+       AND context_window_tokens IS NULL;
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET context_window_tokens = 32768,
+           max_output_tokens = 8192,
+           default_output_reserve_tokens = 4096
+     WHERE LOWER(model_factory) = 'silicon'
+       AND model_name = 'Qwen/Qwen2.5-7B-Instruct'
+       AND delete_flag = 'N'
+       AND context_window_tokens IS NULL;
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET context_window_tokens = 32768,
+           max_output_tokens = 8192,
+           default_output_reserve_tokens = 4096
+     WHERE LOWER(model_factory) = 'silicon'
+       AND model_name = 'THUDM/GLM-4-32B-0414'
+       AND delete_flag = 'N'
+       AND context_window_tokens IS NULL;
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET context_window_tokens = 32768,
+           max_output_tokens = 8192,
+           default_output_reserve_tokens = 4096
+     WHERE LOWER(model_factory) = 'silicon'
+       AND model_name = 'THUDM/GLM-Z1-9B-0414'
+       AND delete_flag = 'N'
+       AND context_window_tokens IS NULL;
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET context_window_tokens = 32768,
+           max_output_tokens = 8192,
+           default_output_reserve_tokens = 4096
+     WHERE LOWER(model_factory) = 'silicon'
+       AND model_name = 'THUDM/GLM-4-9B-0414'
+       AND delete_flag = 'N'
+       AND context_window_tokens IS NULL;
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET context_window_tokens = 1048576,
+           max_output_tokens = 131072,
+           default_output_reserve_tokens = 8192
+     WHERE LOWER(model_factory) = 'silicon'
+       AND model_name = 'zai-org/GLM-5.2'
+       AND delete_flag = 'N'
+       AND context_window_tokens IS NULL;
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET context_window_tokens = 131072,
+           max_output_tokens = 16384,
+           default_output_reserve_tokens = 4096
+     WHERE LOWER(model_factory) = 'silicon'
+       AND model_name = 'zai-org/GLM-4.5V'
+       AND delete_flag = 'N'
+       AND context_window_tokens IS NULL;
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET context_window_tokens = 131072,
+           max_output_tokens = 16384,
+           default_output_reserve_tokens = 4096
+     WHERE LOWER(model_factory) = 'silicon'
+       AND model_name = 'zai-org/GLM-4.5-Air'
+       AND delete_flag = 'N'
+       AND context_window_tokens IS NULL;
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET context_window_tokens = 202752,
+           max_output_tokens = 131072,
+           default_output_reserve_tokens = 8192
+     WHERE LOWER(model_factory) = 'silicon'
+       AND model_name = 'Pro/zai-org/GLM-5.1'
+       AND delete_flag = 'N'
+       AND context_window_tokens IS NULL;
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET context_window_tokens = 524288,
+           max_output_tokens = 16384,
+           default_output_reserve_tokens = 4096
+     WHERE LOWER(model_factory) = 'silicon'
+       AND model_name = 'ByteDance-Seed/Seed-OSS-36B-Instruct'
+       AND delete_flag = 'N'
+       AND context_window_tokens IS NULL;
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET context_window_tokens = 131072,
+           max_output_tokens = 16384,
+           default_output_reserve_tokens = 4096
+     WHERE LOWER(model_factory) = 'silicon'
+       AND model_name = 'inclusionAI/Ling-flash-2.0'
+       AND delete_flag = 'N'
+       AND context_window_tokens IS NULL;
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET context_window_tokens = 131072,
+           max_output_tokens = 16384,
+           default_output_reserve_tokens = 4096
+     WHERE LOWER(model_factory) = 'silicon'
+       AND model_name = 'inclusionAI/Ling-mini-2.0'
+       AND delete_flag = 'N'
+       AND context_window_tokens IS NULL;
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET context_window_tokens = 204800,
+           max_output_tokens = 16384,
+           default_output_reserve_tokens = 4096
+     WHERE LOWER(model_factory) = 'silicon'
+       AND model_name = 'MiniMaxAI/MiniMax-M2.5'
+       AND delete_flag = 'N'
+       AND context_window_tokens IS NULL;
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET context_window_tokens = 204800,
+           max_output_tokens = 16384,
+           default_output_reserve_tokens = 4096
+     WHERE LOWER(model_factory) = 'silicon'
+       AND model_name = 'Pro/MiniMaxAI/MiniMax-M2.5'
+       AND delete_flag = 'N'
+       AND context_window_tokens IS NULL;
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET context_window_tokens = 262144,
+           max_output_tokens = 32768,
+           default_output_reserve_tokens = 8192
+     WHERE LOWER(model_factory) = 'silicon'
+       AND model_name = 'moonshotai/Kimi-K2.7-Code'
+       AND delete_flag = 'N'
+       AND context_window_tokens IS NULL;
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET context_window_tokens = 262144,
+           max_output_tokens = 16384,
+           default_output_reserve_tokens = 4096
+     WHERE LOWER(model_factory) = 'silicon'
+       AND model_name = 'nex-agi/Nex-N2-Pro'
+       AND delete_flag = 'N'
+       AND context_window_tokens IS NULL;
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET context_window_tokens = 262144,
+           max_output_tokens = 16384,
+           default_output_reserve_tokens = 4096
+     WHERE LOWER(model_factory) = 'silicon'
+       AND model_name = 'stepfun-ai/Step-3.5-Flash'
+       AND delete_flag = 'N'
+       AND context_window_tokens IS NULL;
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET context_window_tokens = 32768,
+           max_output_tokens = 2048,
+           default_output_reserve_tokens = 1024
+     WHERE LOWER(model_factory) = 'silicon'
+       AND model_name = 'tencent/Hunyuan-MT-7B'
+       AND delete_flag = 'N'
+       AND context_window_tokens IS NULL;
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET context_window_tokens = 131072,
+           max_output_tokens = 8192,
+           default_output_reserve_tokens = 4096
+     WHERE LOWER(model_factory) = 'silicon'
+       AND model_name = 'tencent/Hunyuan-A13B-Instruct'
+       AND delete_flag = 'N'
+       AND context_window_tokens IS NULL;
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    RAISE NOTICE 'W11 expanded catalog backfill: % row(s) updated', v_total;
+END $$;
+
+-- Reconcile the legacy max_tokens column with max_output_tokens
+-- for rows touched by this migration.
+
+DO $$
+DECLARE
+    v_updated INTEGER := 0;
+BEGIN
+    UPDATE nexent.model_record_t
+       SET max_tokens = max_output_tokens
+     WHERE delete_flag = 'N'
+       AND max_output_tokens IS NOT NULL
+       AND COALESCE(max_tokens, -1) <> max_output_tokens
+       AND COALESCE(model_type, '') NOT IN ('embedding', 'multi_embedding');
+
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    RAISE NOTICE 'max_tokens alias reconcile: % row(s) updated', v_updated;
+END $$;

From 8b0497cbe4fffa6922db70e13aa445e87e9c1e3e Mon Sep 17 00:00:00 2001
From: wuyuanfr <18270469842@163.com>
Date: Sat, 27 Jun 2026 10:32:56 +0800
Subject: [PATCH 13/31] feat(w11): auto-backfill capacity from catalog on
 startup

Replace manual SQL migration scripts with automatic catalog-driven
backfill that runs on nexent-config container startup. The
capability_profiles.CATALOG is now the single source of truth.

New: backend/services/catalog_backfill_service.py
- Phase 1: match model_record_t rows against catalog entries, fill
  NULL capacity columns with catalog values
- Phase 2: fill remaining bare LLM/VLM rows with safe defaults
  (32K context, 4K output), enforcing max_output < context_window
- Phase 3: reconcile legacy max_tokens with max_output_tokens

Startup hook added to config_app.py. Manual SQL scripts deleted:
- v2.2.2_0627_backfill_bare_capacity_defaults.sql
- v2.2.2_0627_backfill_expanded_catalog.sql

Verified: backfill runs on startup, idempotent (0 updates when all
rows already populated).
---
 backend/apps/config_app.py                    |  11 +
 backend/services/catalog_backfill_service.py  | 169 +++++
 ...2_0627_backfill_bare_capacity_defaults.sql |  81 ---
 .../v2.2.2_0627_backfill_expanded_catalog.sql | 646 ------------------
 4 files changed, 180 insertions(+), 727 deletions(-)
 create mode 100644 backend/services/catalog_backfill_service.py
 delete mode 100644 docker/sql/v2.2.2_0627_backfill_bare_capacity_defaults.sql
 delete mode 100644 docker/sql/v2.2.2_0627_backfill_expanded_catalog.sql

diff --git a/backend/apps/config_app.py b/backend/apps/config_app.py
index 9ffadfe5e..26db9411c 100644
--- a/backend/apps/config_app.py
+++ b/backend/apps/config_app.py
@@ -37,6 +37,7 @@
 from apps.cas_app import router as cas_router
 from consts.const import IS_SPEED_MODE
 from services.prompt_template_service import sync_system_default_prompt_template
+from services.catalog_backfill_service import backfill_capacity_from_catalog
 
 # Create logger instance
 logger = logging.getLogger("base_app")
@@ -54,6 +55,16 @@ async def sync_default_prompt_template_on_startup():
     except Exception as exc:
         logger.error(f"Failed to sync system default prompt template: {str(exc)}")
 
+
+@app.on_event("startup")
+async def backfill_capacity_on_startup():
+    """Backfill bare model capacity from the approved catalog on startup."""
+    try:
+        summary = backfill_capacity_from_catalog()
+        logger.info("Catalog capacity backfill complete: %s", summary)
+    except Exception as exc:
+        logger.error(f"Failed to backfill capacity from catalog: {str(exc)}")
+
 app.include_router(model_manager_router)
 app.include_router(config_sync_router)
 app.include_router(agent_router)
diff --git a/backend/services/catalog_backfill_service.py b/backend/services/catalog_backfill_service.py
new file mode 100644
index 000000000..7ba19083b
--- /dev/null
+++ b/backend/services/catalog_backfill_service.py
@@ -0,0 +1,169 @@
+"""Automatic catalog-driven capacity backfill for bare model_record_t rows.
+
+Reads the approved capability_profiles.CATALOG and fills NULL capacity
+columns on matching LLM/VLM rows at backend startup. This makes the
+catalog the single source of truth — no manual SQL migration scripts
+needed when new models are added to the catalog.
+
+Idempotent: only writes when the target column IS NULL.
+Safe: enforces max_output < context_window when filling defaults.
+Cross-tenant: backfills all tenants' bare rows in one pass.
+"""
+import logging
+
+from sqlalchemy import func, or_, select, update
+
+from consts.capability_profiles import CATALOG, CATALOG_REVISION
+from database.client import get_db_session
+from database.db_models import ModelRecord
+
+logger = logging.getLogger(__name__)
+
+LLM_VLM_TYPES = {"llm", "vlm", "vlm2", "vlm3"}
+
+DEFAULT_CONTEXT_WINDOW = 32_768
+DEFAULT_MAX_OUTPUT = 4_096
+DEFAULT_RESERVE = 4_096
+
+
+def backfill_capacity_from_catalog() -> dict:
+    """Backfill bare-capacity LLM/VLM rows from the approved catalog.
+
+    For each catalog entry, find model_record_t rows with matching
+    (model_factory, model_name) where capacity columns are NULL, and
+    fill them with catalog values. Rows not in the catalog get safe
+    defaults (32K context, 4K output).
+
+    Returns a summary dict with counts.
+    """
+    catalog_updated = 0
+    default_updated = 0
+    reconcile_updated = 0
+
+    with get_db_session() as session:
+        # Phase 1: backfill rows that match a catalog entry
+        for (provider, model_name), profile in CATALOG.items():
+            stmt = (
+                select(ModelRecord)
+                .where(
+                    ModelRecord.delete_flag == "N",
+                    func.lower(ModelRecord.model_factory) == provider.lower(),
+                    ModelRecord.model_name == model_name,
+                    ModelRecord.model_type.in_(list(LLM_VLM_TYPES)),
+                    or_(
+                        ModelRecord.context_window_tokens.is_(None),
+                        ModelRecord.max_output_tokens.is_(None),
+                    ),
+                )
+            )
+            records = session.scalars(stmt).all()
+
+            for record in records:
+                ctx = record.context_window_tokens
+                mout = record.max_output_tokens
+
+                new_ctx = ctx if ctx is not None else max(
+                    profile.context_window_tokens,
+                    (mout or 0) + 1,
+                )
+                new_mout = mout if mout is not None else min(
+                    profile.max_output_tokens,
+                    (ctx or profile.context_window_tokens) - 1,
+                )
+                new_reserve = (
+                    record.default_output_reserve_tokens
+                    if record.default_output_reserve_tokens is not None
+                    else profile.default_output_reserve_tokens
+                )
+
+                update_stmt = (
+                    update(ModelRecord)
+                    .where(ModelRecord.model_id == record.model_id)
+                    .values(
+                        context_window_tokens=new_ctx,
+                        max_output_tokens=new_mout,
+                        default_output_reserve_tokens=new_reserve,
+                        capacity_source=record.capacity_source or "profile",
+                        capability_profile_version=(
+                            record.capability_profile_version
+                            or profile.capability_profile_version
+                        ),
+                        update_time=func.current_timestamp(),
+                    )
+                )
+                session.execute(update_stmt)
+                catalog_updated += 1
+
+        # Phase 2: backfill remaining bare LLM/VLM rows with safe defaults
+        bare_stmt = (
+            select(ModelRecord)
+            .where(
+                ModelRecord.delete_flag == "N",
+                ModelRecord.model_type.in_(list(LLM_VLM_TYPES)),
+                or_(
+                    ModelRecord.context_window_tokens.is_(None),
+                    ModelRecord.max_output_tokens.is_(None),
+                ),
+            )
+        )
+        bare_records = session.scalars(bare_stmt).all()
+
+        for record in bare_records:
+            ctx = record.context_window_tokens
+            mout = record.max_output_tokens
+
+            new_ctx = ctx if ctx is not None else max(
+                DEFAULT_CONTEXT_WINDOW, (mout or 0) + 1,
+            )
+            new_mout = mout if mout is not None else min(
+                DEFAULT_MAX_OUTPUT, (ctx or DEFAULT_CONTEXT_WINDOW) - 1,
+            )
+            new_reserve = (
+                record.default_output_reserve_tokens
+                if record.default_output_reserve_tokens is not None
+                else DEFAULT_RESERVE
+            )
+
+            update_stmt = (
+                update(ModelRecord)
+                .where(ModelRecord.model_id == record.model_id)
+                .values(
+                    context_window_tokens=new_ctx,
+                    max_output_tokens=new_mout,
+                    default_output_reserve_tokens=new_reserve,
+                    capacity_source=record.capacity_source or "operator",
+                    update_time=func.current_timestamp(),
+                )
+            )
+            session.execute(update_stmt)
+            default_updated += 1
+
+        # Phase 3: reconcile legacy max_tokens with max_output_tokens
+        reconcile_stmt = (
+            update(ModelRecord)
+            .where(
+                ModelRecord.delete_flag == "N",
+                ModelRecord.max_output_tokens.isnot(None),
+                func.coalesce(ModelRecord.max_tokens, -1)
+                != ModelRecord.max_output_tokens,
+                func.coalesce(ModelRecord.model_type, "").notin_(
+                    ["embedding", "multi_embedding"]
+                ),
+            )
+            .values(
+                max_tokens=ModelRecord.max_output_tokens,
+                update_time=func.current_timestamp(),
+            )
+        )
+        result = session.execute(reconcile_stmt)
+        reconcile_updated = result.rowcount
+
+    summary = {
+        "catalog_revision": CATALOG_REVISION,
+        "catalog_entries": len(CATALOG),
+        "catalog_backfilled": catalog_updated,
+        "default_backfilled": default_updated,
+        "max_tokens_reconciled": reconcile_updated,
+    }
+    logger.info("Catalog capacity backfill complete: %s", summary)
+    return summary
diff --git a/docker/sql/v2.2.2_0627_backfill_bare_capacity_defaults.sql b/docker/sql/v2.2.2_0627_backfill_bare_capacity_defaults.sql
deleted file mode 100644
index fbdd6e62c..000000000
--- a/docker/sql/v2.2.2_0627_backfill_bare_capacity_defaults.sql
+++ /dev/null
@@ -1,81 +0,0 @@
--- Migration kind: RECOMMENDED_DATA_FIX
--- Required for: upgraded deployments where LLM/VLM rows still have NULL
---   capacity columns after the catalog backfill (v2.2.0_0617).
--- Safe to skip when: fresh deployment, or all LLM/VLM rows already have
---   context_window_tokens and max_output_tokens populated.
--- Reason: the catalog backfill only covers exact (model_factory, model_name)
---   matches. Rows added via the manual-add path (model_factory =
---   'OpenAI-API-Compatible' per CM-031) or any model not in the approved
---   catalog remain bare. This migration applies safe defaults so W2
---   output-token enforcement and W1 dispatch consistency checks activate.
---
--- Defaults match the save-time defaults in buildCapacityPayload:
---   context_window_tokens  = 32768
---   max_output_tokens      = 4096
---   default_output_reserve = 4096
---
--- Pre-run self-check:
---
---   SELECT model_id, model_name, model_factory, model_type,
---          context_window_tokens, max_output_tokens
---     FROM nexent.model_record_t
---    WHERE delete_flag = 'N'
---      AND COALESCE(model_type, 'llm') IN ('llm', 'vlm')
---      AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
---
--- If the result is empty, this migration is a no-op and safe to skip.
-
--- ============================================================
--- Backfill bare LLM/VLM rows with safe capacity defaults
--- ============================================================
--- Idempotent: only writes when the target column IS NULL.
--- Scoped to LLM/VLM rows (embedding/rerank/stt/tts excluded).
--- capacity_source = 'operator' because these are operator-level defaults,
--- not catalog profile matches.
-
-DO $$
-DECLARE
-    v_updated INTEGER := 0;
-BEGIN
-    UPDATE nexent.model_record_t
-       SET context_window_tokens = CASE
-               WHEN context_window_tokens IS NULL
-               THEN GREATEST(32768, COALESCE(max_output_tokens, 0) + 1)
-               ELSE context_window_tokens
-           END,
-           max_output_tokens = CASE
-               WHEN max_output_tokens IS NULL
-               THEN LEAST(4096, COALESCE(context_window_tokens, 32768) - 1)
-               ELSE max_output_tokens
-           END,
-           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens, 4096),
-           capacity_source = COALESCE(capacity_source, 'operator')
-     WHERE delete_flag = 'N'
-       AND COALESCE(model_type, 'llm') IN ('llm', 'vlm')
-       AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
-
-    GET DIAGNOSTICS v_updated = ROW_COUNT;
-    RAISE NOTICE 'Bare capacity defaults: % LLM/VLM row(s) backfilled', v_updated;
-END $$;
-
--- ============================================================
--- Reconcile the legacy max_tokens column with max_output_tokens
--- ============================================================
--- Same reconcile as v2.2.0_0617 but scoped to the rows this migration
--- just touched, plus any rows that gained max_output_tokens since the
--- last reconcile run.
-
-DO $$
-DECLARE
-    v_updated INTEGER := 0;
-BEGIN
-    UPDATE nexent.model_record_t
-       SET max_tokens = max_output_tokens
-     WHERE delete_flag = 'N'
-       AND max_output_tokens IS NOT NULL
-       AND COALESCE(max_tokens, -1) <> max_output_tokens
-       AND COALESCE(model_type, '') NOT IN ('embedding', 'multi_embedding');
-
-    GET DIAGNOSTICS v_updated = ROW_COUNT;
-    RAISE NOTICE 'max_tokens alias reconcile: % row(s) updated', v_updated;
-END $$;
diff --git a/docker/sql/v2.2.2_0627_backfill_expanded_catalog.sql b/docker/sql/v2.2.2_0627_backfill_expanded_catalog.sql
deleted file mode 100644
index be3533d60..000000000
--- a/docker/sql/v2.2.2_0627_backfill_expanded_catalog.sql
+++ /dev/null
@@ -1,646 +0,0 @@
--- Migration kind: RECOMMENDED_DATA_FIX
--- Required for: upgraded deployments with existing model_record_t rows
---   whose (model_factory, model_name) now match expanded catalog entries.
--- Safe to skip when: fresh deployment, or no matching rows exist.
--- Reason: the v2.2.0_0617 catalog backfill only covered 10 model entries.
---   This migration extends coverage to 54 additional SiliconFlow-hosted models.
---
--- Idempotent: only writes when context_window_tokens IS NULL.
--- Catalog source of truth: backend/consts/capability_profiles.py
---   CATALOG_REVISION 2026-06-27.1
-
--- Pre-run self-check:
---
---   SELECT model_id, model_name, model_factory,
---          context_window_tokens, max_output_tokens
---     FROM nexent.model_record_t
---    WHERE delete_flag = 'N'
---      AND context_window_tokens IS NULL
---      AND (
---        (LOWER(model_factory) = 'deepseek')
---        OR (LOWER(model_factory) = 'silicon')
---      );
---
--- If the result is empty, this migration is a no-op and safe to skip.
-
-DO $$
-DECLARE
-    v_updated INTEGER := 0;
-    v_total   INTEGER := 0;
-BEGIN
-    -- deepseek models on SiliconFlow (11 entries)
-    UPDATE nexent.model_record_t
-       SET context_window_tokens = 1048576,
-           max_output_tokens = 384000,
-           default_output_reserve_tokens = 8192
-     WHERE LOWER(model_factory) = 'deepseek'
-       AND model_name = 'deepseek-ai/DeepSeek-V4-Pro'
-       AND delete_flag = 'N'
-       AND context_window_tokens IS NULL;
-    GET DIAGNOSTICS v_updated = ROW_COUNT;
-    v_total := v_total + v_updated;
-
-    UPDATE nexent.model_record_t
-       SET context_window_tokens = 1048576,
-           max_output_tokens = 384000,
-           default_output_reserve_tokens = 8192
-     WHERE LOWER(model_factory) = 'deepseek'
-       AND model_name = 'deepseek-ai/DeepSeek-V4-Flash'
-       AND delete_flag = 'N'
-       AND context_window_tokens IS NULL;
-    GET DIAGNOSTICS v_updated = ROW_COUNT;
-    v_total := v_total + v_updated;
-
-    UPDATE nexent.model_record_t
-       SET context_window_tokens = 164000,
-           max_output_tokens = 8192,
-           default_output_reserve_tokens = 4096
-     WHERE LOWER(model_factory) = 'deepseek'
-       AND model_name = 'deepseek-ai/DeepSeek-V3.2'
-       AND delete_flag = 'N'
-       AND context_window_tokens IS NULL;
-    GET DIAGNOSTICS v_updated = ROW_COUNT;
-    v_total := v_total + v_updated;
-
-    UPDATE nexent.model_record_t
-       SET context_window_tokens = 164000,
-           max_output_tokens = 8192,
-           default_output_reserve_tokens = 4096
-     WHERE LOWER(model_factory) = 'deepseek'
-       AND model_name = 'deepseek-ai/DeepSeek-V3.1-Terminus'
-       AND delete_flag = 'N'
-       AND context_window_tokens IS NULL;
-    GET DIAGNOSTICS v_updated = ROW_COUNT;
-    v_total := v_total + v_updated;
-
-    UPDATE nexent.model_record_t
-       SET context_window_tokens = 163840,
-           max_output_tokens = 16384,
-           default_output_reserve_tokens = 8192
-     WHERE LOWER(model_factory) = 'deepseek'
-       AND model_name = 'deepseek-ai/DeepSeek-R1'
-       AND delete_flag = 'N'
-       AND context_window_tokens IS NULL;
-    GET DIAGNOSTICS v_updated = ROW_COUNT;
-    v_total := v_total + v_updated;
-
-    UPDATE nexent.model_record_t
-       SET context_window_tokens = 164000,
-           max_output_tokens = 8192,
-           default_output_reserve_tokens = 4096
-     WHERE LOWER(model_factory) = 'deepseek'
-       AND model_name = 'deepseek-ai/DeepSeek-V3'
-       AND delete_flag = 'N'
-       AND context_window_tokens IS NULL;
-    GET DIAGNOSTICS v_updated = ROW_COUNT;
-    v_total := v_total + v_updated;
-
-    UPDATE nexent.model_record_t
-       SET context_window_tokens = 131072,
-           max_output_tokens = 16384,
-           default_output_reserve_tokens = 4096
-     WHERE LOWER(model_factory) = 'deepseek'
-       AND model_name = 'deepseek-ai/DeepSeek-R1-0528-Qwen3-8B'
-       AND delete_flag = 'N'
-       AND context_window_tokens IS NULL;
-    GET DIAGNOSTICS v_updated = ROW_COUNT;
-    v_total := v_total + v_updated;
-
-    UPDATE nexent.model_record_t
-       SET context_window_tokens = 164000,
-           max_output_tokens = 8192,
-           default_output_reserve_tokens = 4096
-     WHERE LOWER(model_factory) = 'deepseek'
-       AND model_name = 'Pro/deepseek-ai/DeepSeek-V3.2'
-       AND delete_flag = 'N'
-       AND context_window_tokens IS NULL;
-    GET DIAGNOSTICS v_updated = ROW_COUNT;
-    v_total := v_total + v_updated;
-
-    UPDATE nexent.model_record_t
-       SET context_window_tokens = 164000,
-           max_output_tokens = 8192,
-           default_output_reserve_tokens = 4096
-     WHERE LOWER(model_factory) = 'deepseek'
-       AND model_name = 'Pro/deepseek-ai/DeepSeek-V3.1-Terminus'
-       AND delete_flag = 'N'
-       AND context_window_tokens IS NULL;
-    GET DIAGNOSTICS v_updated = ROW_COUNT;
-    v_total := v_total + v_updated;
-
-    UPDATE nexent.model_record_t
-       SET context_window_tokens = 163840,
-           max_output_tokens = 16384,
-           default_output_reserve_tokens = 8192
-     WHERE LOWER(model_factory) = 'deepseek'
-       AND model_name = 'Pro/deepseek-ai/DeepSeek-R1'
-       AND delete_flag = 'N'
-       AND context_window_tokens IS NULL;
-    GET DIAGNOSTICS v_updated = ROW_COUNT;
-    v_total := v_total + v_updated;
-
-    UPDATE nexent.model_record_t
-       SET context_window_tokens = 164000,
-           max_output_tokens = 8192,
-           default_output_reserve_tokens = 4096
-     WHERE LOWER(model_factory) = 'deepseek'
-       AND model_name = 'Pro/deepseek-ai/DeepSeek-V3'
-       AND delete_flag = 'N'
-       AND context_window_tokens IS NULL;
-    GET DIAGNOSTICS v_updated = ROW_COUNT;
-    v_total := v_total + v_updated;
-
-    -- silicon models on SiliconFlow (43 entries)
-    UPDATE nexent.model_record_t
-       SET context_window_tokens = 262144,
-           max_output_tokens = 16384,
-           default_output_reserve_tokens = 4096
-     WHERE LOWER(model_factory) = 'silicon'
-       AND model_name = 'Qwen/Qwen3.6-35B-A3B'
-       AND delete_flag = 'N'
-       AND context_window_tokens IS NULL;
-    GET DIAGNOSTICS v_updated = ROW_COUNT;
-    v_total := v_total + v_updated;
-
-    UPDATE nexent.model_record_t
-       SET context_window_tokens = 262144,
-           max_output_tokens = 16384,
-           default_output_reserve_tokens = 4096
-     WHERE LOWER(model_factory) = 'silicon'
-       AND model_name = 'Qwen/Qwen3.5-397B-A17B'
-       AND delete_flag = 'N'
-       AND context_window_tokens IS NULL;
-    GET DIAGNOSTICS v_updated = ROW_COUNT;
-    v_total := v_total + v_updated;
-
-    UPDATE nexent.model_record_t
-       SET context_window_tokens = 262144,
-           max_output_tokens = 16384,
-           default_output_reserve_tokens = 4096
-     WHERE LOWER(model_factory) = 'silicon'
-       AND model_name = 'Qwen/Qwen3.5-122B-A10B'
-       AND delete_flag = 'N'
-       AND context_window_tokens IS NULL;
-    GET DIAGNOSTICS v_updated = ROW_COUNT;
-    v_total := v_total + v_updated;
-
-    UPDATE nexent.model_record_t
-       SET context_window_tokens = 262144,
-           max_output_tokens = 16384,
-           default_output_reserve_tokens = 4096
-     WHERE LOWER(model_factory) = 'silicon'
-       AND model_name = 'Qwen/Qwen3.5-35B-A3B'
-       AND delete_flag = 'N'
-       AND context_window_tokens IS NULL;
-    GET DIAGNOSTICS v_updated = ROW_COUNT;
-    v_total := v_total + v_updated;
-
-    UPDATE nexent.model_record_t
-       SET context_window_tokens = 262144,
-           max_output_tokens = 16384,
-           default_output_reserve_tokens = 4096
-     WHERE LOWER(model_factory) = 'silicon'
-       AND model_name = 'Qwen/Qwen3.5-27B'
-       AND delete_flag = 'N'
-       AND context_window_tokens IS NULL;
-    GET DIAGNOSTICS v_updated = ROW_COUNT;
-    v_total := v_total + v_updated;
-
-    UPDATE nexent.model_record_t
-       SET context_window_tokens = 262144,
-           max_output_tokens = 16384,
-           default_output_reserve_tokens = 4096
-     WHERE LOWER(model_factory) = 'silicon'
-       AND model_name = 'Qwen/Qwen3.5-9B'
-       AND delete_flag = 'N'
-       AND context_window_tokens IS NULL;
-    GET DIAGNOSTICS v_updated = ROW_COUNT;
-    v_total := v_total + v_updated;
-
-    UPDATE nexent.model_record_t
-       SET context_window_tokens = 262144,
-           max_output_tokens = 16384,
-           default_output_reserve_tokens = 4096
-     WHERE LOWER(model_factory) = 'silicon'
-       AND model_name = 'Qwen/Qwen3.5-4B'
-       AND delete_flag = 'N'
-       AND context_window_tokens IS NULL;
-    GET DIAGNOSTICS v_updated = ROW_COUNT;
-    v_total := v_total + v_updated;
-
-    UPDATE nexent.model_record_t
-       SET context_window_tokens = 262144,
-           max_output_tokens = 16384,
-           default_output_reserve_tokens = 4096
-     WHERE LOWER(model_factory) = 'silicon'
-       AND model_name = 'Qwen/Qwen3-VL-32B-Instruct'
-       AND delete_flag = 'N'
-       AND context_window_tokens IS NULL;
-    GET DIAGNOSTICS v_updated = ROW_COUNT;
-    v_total := v_total + v_updated;
-
-    UPDATE nexent.model_record_t
-       SET context_window_tokens = 262144,
-           max_output_tokens = 32768,
-           default_output_reserve_tokens = 8192
-     WHERE LOWER(model_factory) = 'silicon'
-       AND model_name = 'Qwen/Qwen3-VL-32B-Thinking'
-       AND delete_flag = 'N'
-       AND context_window_tokens IS NULL;
-    GET DIAGNOSTICS v_updated = ROW_COUNT;
-    v_total := v_total + v_updated;
-
-    UPDATE nexent.model_record_t
-       SET context_window_tokens = 262144,
-           max_output_tokens = 16384,
-           default_output_reserve_tokens = 4096
-     WHERE LOWER(model_factory) = 'silicon'
-       AND model_name = 'Qwen/Qwen3-VL-8B-Instruct'
-       AND delete_flag = 'N'
-       AND context_window_tokens IS NULL;
-    GET DIAGNOSTICS v_updated = ROW_COUNT;
-    v_total := v_total + v_updated;
-
-    UPDATE nexent.model_record_t
-       SET context_window_tokens = 262144,
-           max_output_tokens = 32768,
-           default_output_reserve_tokens = 8192
-     WHERE LOWER(model_factory) = 'silicon'
-       AND model_name = 'Qwen/Qwen3-VL-8B-Thinking'
-       AND delete_flag = 'N'
-       AND context_window_tokens IS NULL;
-    GET DIAGNOSTICS v_updated = ROW_COUNT;
-    v_total := v_total + v_updated;
-
-    UPDATE nexent.model_record_t
-       SET context_window_tokens = 262144,
-           max_output_tokens = 16384,
-           default_output_reserve_tokens = 4096
-     WHERE LOWER(model_factory) = 'silicon'
-       AND model_name = 'Qwen/Qwen3-VL-30B-A3B-Instruct'
-       AND delete_flag = 'N'
-       AND context_window_tokens IS NULL;
-    GET DIAGNOSTICS v_updated = ROW_COUNT;
-    v_total := v_total + v_updated;
-
-    UPDATE nexent.model_record_t
-       SET context_window_tokens = 262144,
-           max_output_tokens = 32768,
-           default_output_reserve_tokens = 8192
-     WHERE LOWER(model_factory) = 'silicon'
-       AND model_name = 'Qwen/Qwen3-VL-30B-A3B-Thinking'
-       AND delete_flag = 'N'
-       AND context_window_tokens IS NULL;
-    GET DIAGNOSTICS v_updated = ROW_COUNT;
-    v_total := v_total + v_updated;
-
-    UPDATE nexent.model_record_t
-       SET context_window_tokens = 32768,
-           max_output_tokens = 16384,
-           default_output_reserve_tokens = 4096
-     WHERE LOWER(model_factory) = 'silicon'
-       AND model_name = 'Qwen/Qwen3-Omni-30B-A3B-Instruct'
-       AND delete_flag = 'N'
-       AND context_window_tokens IS NULL;
-    GET DIAGNOSTICS v_updated = ROW_COUNT;
-    v_total := v_total + v_updated;
-
-    UPDATE nexent.model_record_t
-       SET context_window_tokens = 32768,
-           max_output_tokens = 16384,
-           default_output_reserve_tokens = 4096
-     WHERE LOWER(model_factory) = 'silicon'
-       AND model_name = 'Qwen/Qwen3-Omni-30B-A3B-Thinking'
-       AND delete_flag = 'N'
-       AND context_window_tokens IS NULL;
-    GET DIAGNOSTICS v_updated = ROW_COUNT;
-    v_total := v_total + v_updated;
-
-    UPDATE nexent.model_record_t
-       SET context_window_tokens = 32768,
-           max_output_tokens = 16384,
-           default_output_reserve_tokens = 4096
-     WHERE LOWER(model_factory) = 'silicon'
-       AND model_name = 'Qwen/Qwen3-Omni-30B-A3B-Captioner'
-       AND delete_flag = 'N'
-       AND context_window_tokens IS NULL;
-    GET DIAGNOSTICS v_updated = ROW_COUNT;
-    v_total := v_total + v_updated;
-
-    UPDATE nexent.model_record_t
-       SET context_window_tokens = 262144,
-           max_output_tokens = 65536,
-           default_output_reserve_tokens = 8192
-     WHERE LOWER(model_factory) = 'silicon'
-       AND model_name = 'Qwen/Qwen3-Coder-30B-A3B-Instruct'
-       AND delete_flag = 'N'
-       AND context_window_tokens IS NULL;
-    GET DIAGNOSTICS v_updated = ROW_COUNT;
-    v_total := v_total + v_updated;
-
-    UPDATE nexent.model_record_t
-       SET context_window_tokens = 262144,
-           max_output_tokens = 16384,
-           default_output_reserve_tokens = 4096
-     WHERE LOWER(model_factory) = 'silicon'
-       AND model_name = 'Qwen/Qwen3-30B-A3B-Instruct-2507'
-       AND delete_flag = 'N'
-       AND context_window_tokens IS NULL;
-    GET DIAGNOSTICS v_updated = ROW_COUNT;
-    v_total := v_total + v_updated;
-
-    UPDATE nexent.model_record_t
-       SET context_window_tokens = 131072,
-           max_output_tokens = 16384,
-           default_output_reserve_tokens = 4096
-     WHERE LOWER(model_factory) = 'silicon'
-       AND model_name = 'Qwen/Qwen3-32B'
-       AND delete_flag = 'N'
-       AND context_window_tokens IS NULL;
-    GET DIAGNOSTICS v_updated = ROW_COUNT;
-    v_total := v_total + v_updated;
-
-    UPDATE nexent.model_record_t
-       SET context_window_tokens = 131072,
-           max_output_tokens = 16384,
-           default_output_reserve_tokens = 4096
-     WHERE LOWER(model_factory) = 'silicon'
-       AND model_name = 'Qwen/Qwen3-14B'
-       AND delete_flag = 'N'
-       AND context_window_tokens IS NULL;
-    GET DIAGNOSTICS v_updated = ROW_COUNT;
-    v_total := v_total + v_updated;
-
-    UPDATE nexent.model_record_t
-       SET context_window_tokens = 131072,
-           max_output_tokens = 16384,
-           default_output_reserve_tokens = 4096
-     WHERE LOWER(model_factory) = 'silicon'
-       AND model_name = 'Qwen/Qwen3-8B'
-       AND delete_flag = 'N'
-       AND context_window_tokens IS NULL;
-    GET DIAGNOSTICS v_updated = ROW_COUNT;
-    v_total := v_total + v_updated;
-
-    UPDATE nexent.model_record_t
-       SET context_window_tokens = 131072,
-           max_output_tokens = 8192,
-           default_output_reserve_tokens = 4096
-     WHERE LOWER(model_factory) = 'silicon'
-       AND model_name = 'Qwen/Qwen2.5-72B-Instruct-128K'
-       AND delete_flag = 'N'
-       AND context_window_tokens IS NULL;
-    GET DIAGNOSTICS v_updated = ROW_COUNT;
-    v_total := v_total + v_updated;
-
-    UPDATE nexent.model_record_t
-       SET context_window_tokens = 32768,
-           max_output_tokens = 8192,
-           default_output_reserve_tokens = 4096
-     WHERE LOWER(model_factory) = 'silicon'
-       AND model_name = 'Qwen/Qwen2.5-72B-Instruct'
-       AND delete_flag = 'N'
-       AND context_window_tokens IS NULL;
-    GET DIAGNOSTICS v_updated = ROW_COUNT;
-    v_total := v_total + v_updated;
-
-    UPDATE nexent.model_record_t
-       SET context_window_tokens = 32768,
-           max_output_tokens = 8192,
-           default_output_reserve_tokens = 4096
-     WHERE LOWER(model_factory) = 'silicon'
-       AND model_name = 'Qwen/Qwen2.5-32B-Instruct'
-       AND delete_flag = 'N'
-       AND context_window_tokens IS NULL;
-    GET DIAGNOSTICS v_updated = ROW_COUNT;
-    v_total := v_total + v_updated;
-
-    UPDATE nexent.model_record_t
-       SET context_window_tokens = 32768,
-           max_output_tokens = 8192,
-           default_output_reserve_tokens = 4096
-     WHERE LOWER(model_factory) = 'silicon'
-       AND model_name = 'Qwen/Qwen2.5-14B-Instruct'
-       AND delete_flag = 'N'
-       AND context_window_tokens IS NULL;
-    GET DIAGNOSTICS v_updated = ROW_COUNT;
-    v_total := v_total + v_updated;
-
-    UPDATE nexent.model_record_t
-       SET context_window_tokens = 32768,
-           max_output_tokens = 8192,
-           default_output_reserve_tokens = 4096
-     WHERE LOWER(model_factory) = 'silicon'
-       AND model_name = 'Qwen/Qwen2.5-7B-Instruct'
-       AND delete_flag = 'N'
-       AND context_window_tokens IS NULL;
-    GET DIAGNOSTICS v_updated = ROW_COUNT;
-    v_total := v_total + v_updated;
-
-    UPDATE nexent.model_record_t
-       SET context_window_tokens = 32768,
-           max_output_tokens = 8192,
-           default_output_reserve_tokens = 4096
-     WHERE LOWER(model_factory) = 'silicon'
-       AND model_name = 'THUDM/GLM-4-32B-0414'
-       AND delete_flag = 'N'
-       AND context_window_tokens IS NULL;
-    GET DIAGNOSTICS v_updated = ROW_COUNT;
-    v_total := v_total + v_updated;
-
-    UPDATE nexent.model_record_t
-       SET context_window_tokens = 32768,
-           max_output_tokens = 8192,
-           default_output_reserve_tokens = 4096
-     WHERE LOWER(model_factory) = 'silicon'
-       AND model_name = 'THUDM/GLM-Z1-9B-0414'
-       AND delete_flag = 'N'
-       AND context_window_tokens IS NULL;
-    GET DIAGNOSTICS v_updated = ROW_COUNT;
-    v_total := v_total + v_updated;
-
-    UPDATE nexent.model_record_t
-       SET context_window_tokens = 32768,
-           max_output_tokens = 8192,
-           default_output_reserve_tokens = 4096
-     WHERE LOWER(model_factory) = 'silicon'
-       AND model_name = 'THUDM/GLM-4-9B-0414'
-       AND delete_flag = 'N'
-       AND context_window_tokens IS NULL;
-    GET DIAGNOSTICS v_updated = ROW_COUNT;
-    v_total := v_total + v_updated;
-
-    UPDATE nexent.model_record_t
-       SET context_window_tokens = 1048576,
-           max_output_tokens = 131072,
-           default_output_reserve_tokens = 8192
-     WHERE LOWER(model_factory) = 'silicon'
-       AND model_name = 'zai-org/GLM-5.2'
-       AND delete_flag = 'N'
-       AND context_window_tokens IS NULL;
-    GET DIAGNOSTICS v_updated = ROW_COUNT;
-    v_total := v_total + v_updated;
-
-    UPDATE nexent.model_record_t
-       SET context_window_tokens = 131072,
-           max_output_tokens = 16384,
-           default_output_reserve_tokens = 4096
-     WHERE LOWER(model_factory) = 'silicon'
-       AND model_name = 'zai-org/GLM-4.5V'
-       AND delete_flag = 'N'
-       AND context_window_tokens IS NULL;
-    GET DIAGNOSTICS v_updated = ROW_COUNT;
-    v_total := v_total + v_updated;
-
-    UPDATE nexent.model_record_t
-       SET context_window_tokens = 131072,
-           max_output_tokens = 16384,
-           default_output_reserve_tokens = 4096
-     WHERE LOWER(model_factory) = 'silicon'
-       AND model_name = 'zai-org/GLM-4.5-Air'
-       AND delete_flag = 'N'
-       AND context_window_tokens IS NULL;
-    GET DIAGNOSTICS v_updated = ROW_COUNT;
-    v_total := v_total + v_updated;
-
-    UPDATE nexent.model_record_t
-       SET context_window_tokens = 202752,
-           max_output_tokens = 131072,
-           default_output_reserve_tokens = 8192
-     WHERE LOWER(model_factory) = 'silicon'
-       AND model_name = 'Pro/zai-org/GLM-5.1'
-       AND delete_flag = 'N'
-       AND context_window_tokens IS NULL;
-    GET DIAGNOSTICS v_updated = ROW_COUNT;
-    v_total := v_total + v_updated;
-
-    UPDATE nexent.model_record_t
-       SET context_window_tokens = 524288,
-           max_output_tokens = 16384,
-           default_output_reserve_tokens = 4096
-     WHERE LOWER(model_factory) = 'silicon'
-       AND model_name = 'ByteDance-Seed/Seed-OSS-36B-Instruct'
-       AND delete_flag = 'N'
-       AND context_window_tokens IS NULL;
-    GET DIAGNOSTICS v_updated = ROW_COUNT;
-    v_total := v_total + v_updated;
-
-    UPDATE nexent.model_record_t
-       SET context_window_tokens = 131072,
-           max_output_tokens = 16384,
-           default_output_reserve_tokens = 4096
-     WHERE LOWER(model_factory) = 'silicon'
-       AND model_name = 'inclusionAI/Ling-flash-2.0'
-       AND delete_flag = 'N'
-       AND context_window_tokens IS NULL;
-    GET DIAGNOSTICS v_updated = ROW_COUNT;
-    v_total := v_total + v_updated;
-
-    UPDATE nexent.model_record_t
-       SET context_window_tokens = 131072,
-           max_output_tokens = 16384,
-           default_output_reserve_tokens = 4096
-     WHERE LOWER(model_factory) = 'silicon'
-       AND model_name = 'inclusionAI/Ling-mini-2.0'
-       AND delete_flag = 'N'
-       AND context_window_tokens IS NULL;
-    GET DIAGNOSTICS v_updated = ROW_COUNT;
-    v_total := v_total + v_updated;
-
-    UPDATE nexent.model_record_t
-       SET context_window_tokens = 204800,
-           max_output_tokens = 16384,
-           default_output_reserve_tokens = 4096
-     WHERE LOWER(model_factory) = 'silicon'
-       AND model_name = 'MiniMaxAI/MiniMax-M2.5'
-       AND delete_flag = 'N'
-       AND context_window_tokens IS NULL;
-    GET DIAGNOSTICS v_updated = ROW_COUNT;
-    v_total := v_total + v_updated;
-
-    UPDATE nexent.model_record_t
-       SET context_window_tokens = 204800,
-           max_output_tokens = 16384,
-           default_output_reserve_tokens = 4096
-     WHERE LOWER(model_factory) = 'silicon'
-       AND model_name = 'Pro/MiniMaxAI/MiniMax-M2.5'
-       AND delete_flag = 'N'
-       AND context_window_tokens IS NULL;
-    GET DIAGNOSTICS v_updated = ROW_COUNT;
-    v_total := v_total + v_updated;
-
-    UPDATE nexent.model_record_t
-       SET context_window_tokens = 262144,
-           max_output_tokens = 32768,
-           default_output_reserve_tokens = 8192
-     WHERE LOWER(model_factory) = 'silicon'
-       AND model_name = 'moonshotai/Kimi-K2.7-Code'
-       AND delete_flag = 'N'
-       AND context_window_tokens IS NULL;
-    GET DIAGNOSTICS v_updated = ROW_COUNT;
-    v_total := v_total + v_updated;
-
-    UPDATE nexent.model_record_t
-       SET context_window_tokens = 262144,
-           max_output_tokens = 16384,
-           default_output_reserve_tokens = 4096
-     WHERE LOWER(model_factory) = 'silicon'
-       AND model_name = 'nex-agi/Nex-N2-Pro'
-       AND delete_flag = 'N'
-       AND context_window_tokens IS NULL;
-    GET DIAGNOSTICS v_updated = ROW_COUNT;
-    v_total := v_total + v_updated;
-
-    UPDATE nexent.model_record_t
-       SET context_window_tokens = 262144,
-           max_output_tokens = 16384,
-           default_output_reserve_tokens = 4096
-     WHERE LOWER(model_factory) = 'silicon'
-       AND model_name = 'stepfun-ai/Step-3.5-Flash'
-       AND delete_flag = 'N'
-       AND context_window_tokens IS NULL;
-    GET DIAGNOSTICS v_updated = ROW_COUNT;
-    v_total := v_total + v_updated;
-
-    UPDATE nexent.model_record_t
-       SET context_window_tokens = 32768,
-           max_output_tokens = 2048,
-           default_output_reserve_tokens = 1024
-     WHERE LOWER(model_factory) = 'silicon'
-       AND model_name = 'tencent/Hunyuan-MT-7B'
-       AND delete_flag = 'N'
-       AND context_window_tokens IS NULL;
-    GET DIAGNOSTICS v_updated = ROW_COUNT;
-    v_total := v_total + v_updated;
-
-    UPDATE nexent.model_record_t
-       SET context_window_tokens = 131072,
-           max_output_tokens = 8192,
-           default_output_reserve_tokens = 4096
-     WHERE LOWER(model_factory) = 'silicon'
-       AND model_name = 'tencent/Hunyuan-A13B-Instruct'
-       AND delete_flag = 'N'
-       AND context_window_tokens IS NULL;
-    GET DIAGNOSTICS v_updated = ROW_COUNT;
-    v_total := v_total + v_updated;
-
-    RAISE NOTICE 'W11 expanded catalog backfill: % row(s) updated', v_total;
-END $$;
-
--- Reconcile the legacy max_tokens column with max_output_tokens
--- for rows touched by this migration.
-
-DO $$
-DECLARE
-    v_updated INTEGER := 0;
-BEGIN
-    UPDATE nexent.model_record_t
-       SET max_tokens = max_output_tokens
-     WHERE delete_flag = 'N'
-       AND max_output_tokens IS NOT NULL
-       AND COALESCE(max_tokens, -1) <> max_output_tokens
-       AND COALESCE(model_type, '') NOT IN ('embedding', 'multi_embedding');
-
-    GET DIAGNOSTICS v_updated = ROW_COUNT;
-    RAISE NOTICE 'max_tokens alias reconcile: % row(s) updated', v_updated;
-END $$;

From 2a9cbcbfaf1beed45ba2c9a1af0a4beea2f8adcb Mon Sep 17 00:00:00 2001
From: wuyuanfr <18270469842@163.com>
Date: Sat, 27 Jun 2026 11:02:07 +0800
Subject: [PATCH 14/31] fix(w11): plug 3 production bugs in V1.5
 capacity-suggestion accept-signal wiring

Audit of yesterday's W11 V1.5 commits (f0e82d32b..f65f859e4) surfaced
three live bugs in the operator-accept SLO data flow. The crash one
(#1) is what tripped the SiliconFlow batch_create report; the other
two are observability holes that drop production signal silently.

#1 -- /provider/batch_create + /manage/batch_create crash on insert
    Reported as "Failed to batch create models: Unconsumed column
    names: accepted_capability_profile_version, accepted_suggestion_
    match_kind". Root cause: f0e82d32b added the two audit-only fields
    to ModelRequest with the contract "app layer pops before service
    sees it", which holds for /create and /update -- but the batch
    path goes through prepare_model_dict, and that function rebuilds
    the dict via ModelRequest(...).model_dump(), which resurrects the
    two fields as None even if the app layer had popped them. The
    resurrected keys then fall through to create_model_record ->
    SQLAlchemy insert -> the table has no such columns -> raise.
    Worse, the /provider/batch_create app layer was not even popping
    in the first place.
    Fix:
    - prepare_model_dict: model_dump(exclude={...}) so the audit
      fields cannot resurface for any caller, present or future.
      Single defensive choke point.
    - /provider/batch_create + /manage/batch_create: per-model
      pop_capacity_accept_signal + emit _record_capacity_suggestion_
      accept(provider) on success, so the batch path now also
      contributes to model_capacity_suggestion_accept_total.

#2 -- /manage/create + /manage/update silently drop the accept signal
    The ManageTenantModelCreateRequest / ManageTenantModelUpdateRequest
    Pydantic schemas in f0e82d32b were not updated when ModelRequest
    gained the two accepted_* fields. With Pydantic's default
    extra="ignore", the frontend wire payload's accept_* fields were
    silently dropped at the schema boundary -- the service never saw
    them, the recorder never fired. accept_total under-reported every
    save coming from the SU / asset-owner surface (ModelEditDialog
    with tenantId, used by AssetOwnerResourcesComp and UserManageComp).
    In any deployment that leans on the centralized asset-owner model
    pool, this is the majority of accept events -- the SLO numerator
    was effectively half-blind.
    Fix:
    - Declare accepted_suggestion_match_kind + accepted_capability_
      profile_version on both manage schemas with the same audit-only
      contract.
    - Both /manage/create and /manage/update now pop the signal off
      model_data before calling the service (otherwise the new fields
      would crash update_model_record / create_model_record the same
      way #1 did), then emit the recorder with provider=request.
      model_factory after the persist call succeeds.

#3 -- ModelList badge silently hides on vlm2/vlm3 rows
    d6165cb4c added the bare-capacity TriangleAlert badge in
    ModelList.tsx with a redundant frontend type guard
    \`record.type === 'llm' || record.type === 'vlm'\`. Backend's
    CAPACITY_COVERAGE_MODEL_TYPES is {'llm','vlm','vlm2','vlm3'} --
    bareModelIds from /capacity-coverage already filters by that
    set, but the frontend guard re-stated a smaller version that
    drifted. Bare vlm2 (image-gen) and vlm3 (video-und) rows never
    showed the warning icon or the click-to-fix entry point even
    though the backend marked them bare.
    Fix: drop the frontend type guard entirely and trust the
    authoritative bareModelIds set. Eliminates the duplicated-truth
    that caused the drift, so future type additions (vlm4, etc.) do
    not silently re-create the same gap.

Regression tests:
- test_prepare_model_dict_excludes_w11_accept_signal_fields pins the
  exclude kwarg so a future "let's clean up the dump call" cannot
  re-open #1.
- test_provider_batch_create_strips_accept_signal_and_records covers
  the batch-app contract: per-model pop + recorder fires once per
  accepted row, labelled with provider.
- test_manage_create_model_records_accept_signal_when_present and
  test_manage_update_model_records_accept_signal_when_present cover
  #2: audit fields stripped from the service-layer payload, recorder
  fires with provider=model_factory.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 backend/apps/model_managment_app.py           |  39 +++++
 backend/consts/model.py                       |  14 ++
 backend/services/model_provider_service.py    |  11 +-
 .../components/resources/ModelList.tsx        |  10 +-
 test/backend/app/test_model_managment_app.py  | 148 ++++++++++++++++++
 .../services/test_model_provider_service.py   |  42 +++++
 6 files changed, 261 insertions(+), 3 deletions(-)

diff --git a/backend/apps/model_managment_app.py b/backend/apps/model_managment_app.py
index 44d6bfba0..edc64ef14 100644
--- a/backend/apps/model_managment_app.py
+++ b/backend/apps/model_managment_app.py
@@ -252,7 +252,18 @@ async def batch_create_models(request: BatchCreateModelsRequest, authorization:
     try:
         user_id, tenant_id = get_current_user_id(authorization)
         batch_model_config = request.model_dump()
+        # Strip W11 accept-signal fields off every model entry before the
+        # batch reaches the service/DB layer. Same audit-only contract as
+        # the single-create path: pop now, emit the SLO counter on success.
+        accept_signals = [
+            signal
+            for model in batch_model_config.get("models", [])
+            if (signal := pop_capacity_accept_signal(model)) is not None
+        ]
         await batch_create_models_for_tenant(user_id, tenant_id, batch_model_config)
+        provider = batch_model_config.get("provider")
+        for signal in accept_signals:
+            _record_capacity_suggestion_accept(signal["match_kind"], provider)
         return JSONResponse(status_code=HTTPStatus.OK, content={
             "message": "Batch create models successfully"
         })
@@ -545,7 +556,18 @@ async def manage_create_model(
             f"Start to create model for tenant, user_id: {user_id}, target_tenant_id: {request.tenant_id}")
 
         model_data = request.model_dump(exclude={'tenant_id'})
+        # Strip W11 accept-signal fields before the dict reaches the
+        # service (which calls create_model_record -> SQLAlchemy insert).
+        # Without the pop, the fields would fall through to .values() and
+        # raise "Unconsumed column names"; without the recorder call,
+        # operator-accepted suggestions saved by SU/asset-owner via
+        # /manage/* would silently miss the accept_total SLO numerator.
+        accept_signal = pop_capacity_accept_signal(model_data)
         await create_model_for_tenant(user_id, request.tenant_id, model_data)
+        if accept_signal is not None:
+            _record_capacity_suggestion_accept(
+                accept_signal["match_kind"], request.model_factory
+            )
         return JSONResponse(status_code=HTTPStatus.OK, content={
             "message": "Model created successfully",
             "data": {"tenant_id": request.tenant_id}
@@ -582,9 +604,16 @@ async def manage_update_model(
             f"current_display_name: {request.current_display_name}")
 
         model_data = request.model_dump(exclude={'tenant_id', 'current_display_name'}, exclude_unset=True)
+        # Same audit-only contract as /manage/create above: pop before
+        # the dict reaches update_model_record, emit after persist.
+        accept_signal = pop_capacity_accept_signal(model_data)
         await update_single_model_for_tenant(
             user_id, request.tenant_id, request.current_display_name, model_data
         )
+        if accept_signal is not None:
+            _record_capacity_suggestion_accept(
+                accept_signal["match_kind"], request.model_factory
+            )
         return JSONResponse(status_code=HTTPStatus.OK, content={
             "message": "Model updated successfully",
             "data": {"tenant_id": request.tenant_id}
@@ -666,7 +695,17 @@ async def manage_batch_create_models(
             f"provider: {request.provider}, type: {request.type}, models count: {len(request.models)}")
 
         batch_model_config = request.model_dump()
+        # Mirror /provider/batch_create: pop W11 accept-signal fields per
+        # model before the dict reaches the service/DB layer; emit the SLO
+        # counter only after the batch persist call succeeds.
+        accept_signals = [
+            signal
+            for model in batch_model_config.get("models", [])
+            if (signal := pop_capacity_accept_signal(model)) is not None
+        ]
         await batch_create_models_for_tenant(user_id, request.tenant_id, batch_model_config)
+        for signal in accept_signals:
+            _record_capacity_suggestion_accept(signal["match_kind"], request.provider)
         return JSONResponse(status_code=HTTPStatus.OK, content={
             "message": "Batch create models successfully",
             "data": {
diff --git a/backend/consts/model.py b/backend/consts/model.py
index 4ed9e8981..02db6c3e6 100644
--- a/backend/consts/model.py
+++ b/backend/consts/model.py
@@ -1021,6 +1021,15 @@ class ManageTenantModelCreateRequest(BaseModel):
     access_token: Optional[str] = Field(None, description="Access token for STT models (e.g., Volcano Engine)")
     timeout_seconds: Optional[int] = Field(None, description="Request timeout in seconds")
     concurrency_limit: Optional[int] = Field(None, description="Maximum concurrent requests for this model")
+    # W11 accept-signal fields. Same audit-only contract as ModelRequest:
+    # the app layer pops them off model_data before the dict reaches the
+    # service/DB layer and forwards them to
+    # model_capacity_suggestion_accept_total. Declared here so Pydantic's
+    # default extra="ignore" does not silently drop the wire signal --
+    # without these declarations the SLO numerator misses every accept
+    # that lands via the SU/asset-owner surface.
+    accepted_suggestion_match_kind: Optional[str] = Field(None, description="Audit-only: catalog match_kind the operator accepted")
+    accepted_capability_profile_version: Optional[str] = Field(None, description="Audit-only: capability profile version of the accepted suggestion")
 
 
 class ManageTenantModelUpdateRequest(BaseModel):
@@ -1043,6 +1052,11 @@ class ManageTenantModelUpdateRequest(BaseModel):
     access_token: Optional[str] = Field(None, description="Access token for STT models")
     timeout_seconds: Optional[int] = Field(None, description="Request timeout in seconds")
     concurrency_limit: Optional[int] = Field(None, description="Maximum concurrent requests for this model")
+    # W11 accept-signal fields. See ManageTenantModelCreateRequest for the
+    # contract. The app layer pops them before calling the service so
+    # update_model_record never sees them.
+    accepted_suggestion_match_kind: Optional[str] = Field(None, description="Audit-only: catalog match_kind the operator accepted")
+    accepted_capability_profile_version: Optional[str] = Field(None, description="Audit-only: capability profile version of the accepted suggestion")
 
 
 class ManageTenantModelDeleteRequest(BaseModel):
diff --git a/backend/services/model_provider_service.py b/backend/services/model_provider_service.py
index 31867bedc..32ca5a532 100644
--- a/backend/services/model_provider_service.py
+++ b/backend/services/model_provider_service.py
@@ -151,7 +151,16 @@ async def prepare_model_dict(provider: str, model: dict, model_url: str, model_a
         **capacity_kwargs,
     )
 
-    model_dict = model_obj.model_dump()
+    # W11 accept-signal fields live on ModelRequest for app-layer ingest but
+    # are audit-only and have no DB column. model_dump() would otherwise
+    # resurrect them as None and SQLAlchemy raises "Unconsumed column names"
+    # at insert time. Exclude before the dict reaches create_model_record.
+    model_dict = model_obj.model_dump(
+        exclude={
+            "accepted_suggestion_match_kind",
+            "accepted_capability_profile_version",
+        }
+    )
     model_dict["model_repo"] = model_repo or ""
 
     # Determine the correct base_url and, for embeddings, update the actual
diff --git a/frontend/app/[locale]/resource-manage/components/resources/ModelList.tsx b/frontend/app/[locale]/resource-manage/components/resources/ModelList.tsx
index fe6f3d984..bbe404d89 100644
--- a/frontend/app/[locale]/resource-manage/components/resources/ModelList.tsx
+++ b/frontend/app/[locale]/resource-manage/components/resources/ModelList.tsx
@@ -226,8 +226,14 @@ export default function ModelList({ tenantId }: { tenantId: string | null }) {
       width: 180,
       ellipsis: true,
       render: (displayName: string, record: UnifiedModelRow) => {
-        const isBareCapacity = record.id && bareModelIds.has(record.id) && (record.type === 'llm' || record.type === 'vlm');
-        
+        // bareModelIds comes from /capacity-coverage which already filters
+        // by CAPACITY_COVERAGE_MODEL_TYPES on the backend (llm/vlm/vlm2/vlm3).
+        // The earlier `type === 'llm' || 'vlm'` guard was a duplicated
+        // type list that drifted -- it silently hid the badge on vlm2
+        // (image-gen) and vlm3 (video-und) rows even when backend marked
+        // them bare. Drop the guard and trust the authoritative set.
+        const isBareCapacity = Boolean(record.id && bareModelIds.has(record.id));
+
         return (
           <div className="flex items-center">
             <span className="truncate">{displayName}</span>
diff --git a/test/backend/app/test_model_managment_app.py b/test/backend/app/test_model_managment_app.py
index 38300e988..75fa41dd6 100644
--- a/test/backend/app/test_model_managment_app.py
+++ b/test/backend/app/test_model_managment_app.py
@@ -488,6 +488,55 @@ async def test_provider_batch_create_exception(client, auth_header, user_credent
     mock_batch.assert_called_once()
 
 
+@pytest.mark.asyncio
+async def test_provider_batch_create_strips_accept_signal_and_records(
+    client, auth_header, user_credentials, mocker
+):
+    """Same audit-only contract as /create: per-model accept-signal fields
+    must be popped before the batch reaches the service layer (otherwise
+    SQLAlchemy raises 'Unconsumed column names' on insert), and the SLO
+    counter fires once per accepted row after the persist call succeeds.
+    """
+    mocker.patch(
+        'backend.apps.model_managment_app.get_current_user_id',
+        return_value=user_credentials,
+    )
+
+    async def _batch(*args, **kwargs):
+        return None
+
+    mock_batch = mocker.patch(
+        'backend.apps.model_managment_app.batch_create_models_for_tenant',
+        side_effect=_batch,
+    )
+    mock_record = mocker.patch(
+        'backend.apps.model_managment_app._record_capacity_suggestion_accept'
+    )
+
+    payload = {
+        "models": [
+            {
+                "id": "prov/modelA",
+                "accepted_suggestion_match_kind": "catalog_exact",
+                "accepted_capability_profile_version": "prov/modelA@1",
+            },
+            {"id": "prov/modelB"},
+        ],
+        "provider": "prov",
+        "type": "llm",
+        "api_key": "k",
+    }
+    response = client.post(
+        "/model/provider/batch_create", json=payload, headers=auth_header)
+
+    assert response.status_code == HTTPStatus.OK
+    sent = mock_batch.await_args.args[2]
+    for model in sent["models"]:
+        assert "accepted_suggestion_match_kind" not in model
+        assert "accepted_capability_profile_version" not in model
+    mock_record.assert_called_once_with("catalog_exact", "prov")
+
+
 # Tests for /model/delete endpoint
 @pytest.mark.asyncio
 async def test_delete_model_success(client, auth_header, user_credentials, mocker):
@@ -1069,6 +1118,57 @@ async def _create(*args, **kwargs):
     )
 
 
+@pytest.mark.asyncio
+async def test_manage_create_model_records_accept_signal_when_present(
+    client, auth_header, user_credentials, mocker
+):
+    """Same SLO contract as /create: when SU/asset-owner saves a model
+    through /manage/create with the accept signal in the payload, the
+    audit fields must be stripped before reaching the service and the
+    accept_total recorder must fire with provider=model_factory. Before
+    the fix the manage Pydantic schema did not declare the two fields,
+    so Pydantic silently dropped them and the SLO numerator missed
+    every accept that landed via the SU surface.
+    """
+    mocker.patch(
+        'backend.apps.model_managment_app.get_current_user_id',
+        return_value=user_credentials,
+    )
+
+    async def _create(*args, **kwargs):
+        return None
+
+    mock_create = mocker.patch(
+        'backend.apps.model_managment_app.create_model_for_tenant',
+        side_effect=_create,
+    )
+    mock_record = mocker.patch(
+        'backend.apps.model_managment_app._record_capacity_suggestion_accept'
+    )
+
+    request_data = {
+        "tenant_id": "target_tenant",
+        "model_name": "gpt-4o",
+        "model_type": "llm",
+        "model_factory": "openai",
+        "base_url": "https://api.openai.com/v1",
+        "api_key": "k",
+        "context_window_tokens": 128000,
+        "max_output_tokens": 16384,
+        "capacity_source": "operator",
+        "accepted_suggestion_match_kind": "catalog_exact",
+        "accepted_capability_profile_version": "openai/gpt-4o@1",
+    }
+    response = client.post(
+        "/model/manage/create", json=request_data, headers=auth_header)
+
+    assert response.status_code == HTTPStatus.OK
+    sent = mock_create.await_args.args[2]
+    assert "accepted_suggestion_match_kind" not in sent
+    assert "accepted_capability_profile_version" not in sent
+    mock_record.assert_called_once_with("catalog_exact", "openai")
+
+
 @pytest.mark.asyncio
 async def test_manage_create_model_conflict(client, auth_header, user_credentials, mocker):
     """Test model creation with conflict error."""
@@ -1148,6 +1248,54 @@ async def _update(*args, **kwargs):
     )
 
 
+@pytest.mark.asyncio
+async def test_manage_update_model_records_accept_signal_when_present(
+    client, auth_header, user_credentials, mocker
+):
+    """Mirror of test_manage_create_model_records_accept_signal_when_present
+    for the /manage/update path. The SU surface routes the same accept
+    signal through updateManageTenantModel; the recorder must fire with
+    provider=model_factory and the audit fields must not leak into the
+    DB update.
+    """
+    mocker.patch(
+        'backend.apps.model_managment_app.get_current_user_id',
+        return_value=user_credentials,
+    )
+
+    async def _update(*args, **kwargs):
+        return None
+
+    mock_update = mocker.patch(
+        'backend.apps.model_managment_app.update_single_model_for_tenant',
+        side_effect=_update,
+    )
+    mock_record = mocker.patch(
+        'backend.apps.model_managment_app._record_capacity_suggestion_accept'
+    )
+
+    request_data = {
+        "tenant_id": "target_tenant",
+        "current_display_name": "GPT-4o",
+        "model_factory": "openai",
+        "base_url": "https://api.openai.com/v1",
+        "api_key": "k",
+        "context_window_tokens": 128000,
+        "max_output_tokens": 16384,
+        "capacity_source": "operator",
+        "accepted_suggestion_match_kind": "catalog_fuzzy",
+        "accepted_capability_profile_version": "openai/gpt-4o@1",
+    }
+    response = client.post(
+        "/model/manage/update", json=request_data, headers=auth_header)
+
+    assert response.status_code == HTTPStatus.OK
+    sent = mock_update.await_args.args[3]
+    assert "accepted_suggestion_match_kind" not in sent
+    assert "accepted_capability_profile_version" not in sent
+    mock_record.assert_called_once_with("catalog_fuzzy", "openai")
+
+
 @pytest.mark.asyncio
 async def test_manage_update_model_not_found(client, auth_header, user_credentials, mocker):
     """Test model update with not found error."""
diff --git a/test/backend/services/test_model_provider_service.py b/test/backend/services/test_model_provider_service.py
index 9828246c5..1df83a45d 100644
--- a/test/backend/services/test_model_provider_service.py
+++ b/test/backend/services/test_model_provider_service.py
@@ -463,6 +463,48 @@ async def test_prepare_model_dict_llm():
         assert result == expected
 
 
+@pytest.mark.asyncio
+async def test_prepare_model_dict_excludes_w11_accept_signal_fields():
+    """ModelRequest exposes accepted_suggestion_match_kind /
+    accepted_capability_profile_version for app-layer ingest but they are
+    audit-only and have no DB column. model_dump() must exclude them so
+    SQLAlchemy does not raise 'Unconsumed column names' on insert when the
+    batch_create path reuses prepare_model_dict.
+    """
+    with mock.patch(
+        "backend.services.model_provider_service.split_repo_name",
+        return_value=("openai", "gpt-4"),
+    ), mock.patch(
+        "backend.services.model_provider_service.add_repo_to_name",
+        return_value="openai/gpt-4",
+    ), mock.patch(
+        "backend.services.model_provider_service.ModelRequest"
+    ) as mock_model_request, mock.patch(
+        "backend.services.model_provider_service.embedding_dimension_check",
+        new_callable=mock.AsyncMock,
+    ):
+        mock_model_req_instance = mock.MagicMock()
+        mock_model_req_instance.model_dump.return_value = {
+            "model_factory": "openai",
+            "model_name": "gpt-4",
+            "model_type": "llm",
+        }
+        mock_model_request.return_value = mock_model_req_instance
+
+        await prepare_model_dict(
+            "openai",
+            {"id": "openai/gpt-4", "model_type": "llm"},
+            "https://api.openai.com/v1",
+            "test-key",
+        )
+
+        _, dump_kwargs = mock_model_req_instance.model_dump.call_args
+        assert dump_kwargs.get("exclude") == {
+            "accepted_suggestion_match_kind",
+            "accepted_capability_profile_version",
+        }
+
+
 @pytest.mark.asyncio
 async def test_prepare_model_dict_does_not_persist_provider_capacity_candidates():
     """Provider capacity candidates remain UI hints until an operator saves them.

From e8aacc215736546f76be7be169e4ca02cd522ebc Mon Sep 17 00:00:00 2001
From: wuyuanfr <18270469842@163.com>
Date: Sat, 27 Jun 2026 11:04:45 +0800
Subject: [PATCH 15/31] refactor(w11): replace startup backfill with SQL
 generator

Replace the automatic Python backfill on container startup with a
deterministic SQL generation approach. The capability_profiles.py
catalog remains the single source of truth.

New: scripts/generate_backfill_sql.py
- Reads CATALOG from capability_profiles.py
- Emits idempotent SQL with COALESCE protection
- Enforces max_output < context_window via GREATEST/LEAST
- Three phases: catalog match, safe defaults, max_tokens reconcile

Generated: docker/sql/v2.2.2_0627_backfill_from_catalog.sql
- 66 catalog entries + safe defaults + max_tokens reconcile
- Operator runs manually during deployment

Removed: backend/services/catalog_backfill_service.py
Removed: startup hook from config_app.py

Developer workflow:
1. Edit capability_profiles.py (add/update models)
2. Run: python scripts/generate_backfill_sql.py > docker/sql/...
3. Commit both files
4. Operator runs SQL during deployment
---
 backend/apps/config_app.py                    |   11 -
 backend/services/catalog_backfill_service.py  |  169 ---
 .../sql/v2.2.2_0627_backfill_from_catalog.sql | 1066 +++++++++++++++++
 scripts/generate_backfill_sql.py              |  173 +++
 4 files changed, 1239 insertions(+), 180 deletions(-)
 delete mode 100644 backend/services/catalog_backfill_service.py
 create mode 100644 docker/sql/v2.2.2_0627_backfill_from_catalog.sql
 create mode 100644 scripts/generate_backfill_sql.py

diff --git a/backend/apps/config_app.py b/backend/apps/config_app.py
index 26db9411c..9ffadfe5e 100644
--- a/backend/apps/config_app.py
+++ b/backend/apps/config_app.py
@@ -37,7 +37,6 @@
 from apps.cas_app import router as cas_router
 from consts.const import IS_SPEED_MODE
 from services.prompt_template_service import sync_system_default_prompt_template
-from services.catalog_backfill_service import backfill_capacity_from_catalog
 
 # Create logger instance
 logger = logging.getLogger("base_app")
@@ -55,16 +54,6 @@ async def sync_default_prompt_template_on_startup():
     except Exception as exc:
         logger.error(f"Failed to sync system default prompt template: {str(exc)}")
 
-
-@app.on_event("startup")
-async def backfill_capacity_on_startup():
-    """Backfill bare model capacity from the approved catalog on startup."""
-    try:
-        summary = backfill_capacity_from_catalog()
-        logger.info("Catalog capacity backfill complete: %s", summary)
-    except Exception as exc:
-        logger.error(f"Failed to backfill capacity from catalog: {str(exc)}")
-
 app.include_router(model_manager_router)
 app.include_router(config_sync_router)
 app.include_router(agent_router)
diff --git a/backend/services/catalog_backfill_service.py b/backend/services/catalog_backfill_service.py
deleted file mode 100644
index 7ba19083b..000000000
--- a/backend/services/catalog_backfill_service.py
+++ /dev/null
@@ -1,169 +0,0 @@
-"""Automatic catalog-driven capacity backfill for bare model_record_t rows.
-
-Reads the approved capability_profiles.CATALOG and fills NULL capacity
-columns on matching LLM/VLM rows at backend startup. This makes the
-catalog the single source of truth — no manual SQL migration scripts
-needed when new models are added to the catalog.
-
-Idempotent: only writes when the target column IS NULL.
-Safe: enforces max_output < context_window when filling defaults.
-Cross-tenant: backfills all tenants' bare rows in one pass.
-"""
-import logging
-
-from sqlalchemy import func, or_, select, update
-
-from consts.capability_profiles import CATALOG, CATALOG_REVISION
-from database.client import get_db_session
-from database.db_models import ModelRecord
-
-logger = logging.getLogger(__name__)
-
-LLM_VLM_TYPES = {"llm", "vlm", "vlm2", "vlm3"}
-
-DEFAULT_CONTEXT_WINDOW = 32_768
-DEFAULT_MAX_OUTPUT = 4_096
-DEFAULT_RESERVE = 4_096
-
-
-def backfill_capacity_from_catalog() -> dict:
-    """Backfill bare-capacity LLM/VLM rows from the approved catalog.
-
-    For each catalog entry, find model_record_t rows with matching
-    (model_factory, model_name) where capacity columns are NULL, and
-    fill them with catalog values. Rows not in the catalog get safe
-    defaults (32K context, 4K output).
-
-    Returns a summary dict with counts.
-    """
-    catalog_updated = 0
-    default_updated = 0
-    reconcile_updated = 0
-
-    with get_db_session() as session:
-        # Phase 1: backfill rows that match a catalog entry
-        for (provider, model_name), profile in CATALOG.items():
-            stmt = (
-                select(ModelRecord)
-                .where(
-                    ModelRecord.delete_flag == "N",
-                    func.lower(ModelRecord.model_factory) == provider.lower(),
-                    ModelRecord.model_name == model_name,
-                    ModelRecord.model_type.in_(list(LLM_VLM_TYPES)),
-                    or_(
-                        ModelRecord.context_window_tokens.is_(None),
-                        ModelRecord.max_output_tokens.is_(None),
-                    ),
-                )
-            )
-            records = session.scalars(stmt).all()
-
-            for record in records:
-                ctx = record.context_window_tokens
-                mout = record.max_output_tokens
-
-                new_ctx = ctx if ctx is not None else max(
-                    profile.context_window_tokens,
-                    (mout or 0) + 1,
-                )
-                new_mout = mout if mout is not None else min(
-                    profile.max_output_tokens,
-                    (ctx or profile.context_window_tokens) - 1,
-                )
-                new_reserve = (
-                    record.default_output_reserve_tokens
-                    if record.default_output_reserve_tokens is not None
-                    else profile.default_output_reserve_tokens
-                )
-
-                update_stmt = (
-                    update(ModelRecord)
-                    .where(ModelRecord.model_id == record.model_id)
-                    .values(
-                        context_window_tokens=new_ctx,
-                        max_output_tokens=new_mout,
-                        default_output_reserve_tokens=new_reserve,
-                        capacity_source=record.capacity_source or "profile",
-                        capability_profile_version=(
-                            record.capability_profile_version
-                            or profile.capability_profile_version
-                        ),
-                        update_time=func.current_timestamp(),
-                    )
-                )
-                session.execute(update_stmt)
-                catalog_updated += 1
-
-        # Phase 2: backfill remaining bare LLM/VLM rows with safe defaults
-        bare_stmt = (
-            select(ModelRecord)
-            .where(
-                ModelRecord.delete_flag == "N",
-                ModelRecord.model_type.in_(list(LLM_VLM_TYPES)),
-                or_(
-                    ModelRecord.context_window_tokens.is_(None),
-                    ModelRecord.max_output_tokens.is_(None),
-                ),
-            )
-        )
-        bare_records = session.scalars(bare_stmt).all()
-
-        for record in bare_records:
-            ctx = record.context_window_tokens
-            mout = record.max_output_tokens
-
-            new_ctx = ctx if ctx is not None else max(
-                DEFAULT_CONTEXT_WINDOW, (mout or 0) + 1,
-            )
-            new_mout = mout if mout is not None else min(
-                DEFAULT_MAX_OUTPUT, (ctx or DEFAULT_CONTEXT_WINDOW) - 1,
-            )
-            new_reserve = (
-                record.default_output_reserve_tokens
-                if record.default_output_reserve_tokens is not None
-                else DEFAULT_RESERVE
-            )
-
-            update_stmt = (
-                update(ModelRecord)
-                .where(ModelRecord.model_id == record.model_id)
-                .values(
-                    context_window_tokens=new_ctx,
-                    max_output_tokens=new_mout,
-                    default_output_reserve_tokens=new_reserve,
-                    capacity_source=record.capacity_source or "operator",
-                    update_time=func.current_timestamp(),
-                )
-            )
-            session.execute(update_stmt)
-            default_updated += 1
-
-        # Phase 3: reconcile legacy max_tokens with max_output_tokens
-        reconcile_stmt = (
-            update(ModelRecord)
-            .where(
-                ModelRecord.delete_flag == "N",
-                ModelRecord.max_output_tokens.isnot(None),
-                func.coalesce(ModelRecord.max_tokens, -1)
-                != ModelRecord.max_output_tokens,
-                func.coalesce(ModelRecord.model_type, "").notin_(
-                    ["embedding", "multi_embedding"]
-                ),
-            )
-            .values(
-                max_tokens=ModelRecord.max_output_tokens,
-                update_time=func.current_timestamp(),
-            )
-        )
-        result = session.execute(reconcile_stmt)
-        reconcile_updated = result.rowcount
-
-    summary = {
-        "catalog_revision": CATALOG_REVISION,
-        "catalog_entries": len(CATALOG),
-        "catalog_backfilled": catalog_updated,
-        "default_backfilled": default_updated,
-        "max_tokens_reconciled": reconcile_updated,
-    }
-    logger.info("Catalog capacity backfill complete: %s", summary)
-    return summary
diff --git a/docker/sql/v2.2.2_0627_backfill_from_catalog.sql b/docker/sql/v2.2.2_0627_backfill_from_catalog.sql
new file mode 100644
index 000000000..c40ef8513
--- /dev/null
+++ b/docker/sql/v2.2.2_0627_backfill_from_catalog.sql
@@ -0,0 +1,1066 @@
+-- Generated by scripts/generate_backfill_sql.py on 2026-06-27
+-- Catalog revision: 2026-06-27.1
+-- Catalog entries: 66
+--
+-- Migration kind: RECOMMENDED_DATA_FIX
+-- Idempotent: COALESCE protects existing non-NULL values.
+-- Safe: enforces max_output < context_window via GREATEST/LEAST.
+--
+-- Pre-run self-check:
+--
+--   SELECT model_id, model_name, model_factory,
+--          context_window_tokens, max_output_tokens
+--     FROM nexent.model_record_t
+--    WHERE delete_flag = 'N'
+--      AND COALESCE(model_type, 'llm') IN ('llm', 'vlm')
+--      AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
+--
+-- If the result is empty, this migration is a no-op.
+
+-- ============================================================
+-- Phase 1: Backfill rows matching approved catalog entries
+-- ============================================================
+
+DO $$
+DECLARE
+    v_updated INTEGER := 0;
+    v_total   INTEGER := 0;
+BEGIN
+    -- dashscope (4 entries)
+    UPDATE nexent.model_record_t
+       SET context_window_tokens = COALESCE(context_window_tokens,
+           GREATEST(131072, COALESCE(max_output_tokens, 0) + 1)),
+           max_output_tokens = COALESCE(max_output_tokens,
+           LEAST(16384, COALESCE(context_window_tokens, 131072) - 1)),
+           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens, 4096),
+           capacity_source = COALESCE(capacity_source, 'profile'),
+           capability_profile_version = COALESCE(capability_profile_version, 'dashscope/qwen-plus@1')
+     WHERE LOWER(model_factory) = 'dashscope'
+       AND model_name = 'qwen-plus'
+       AND delete_flag = 'N'
+       AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET context_window_tokens = COALESCE(context_window_tokens,
+           GREATEST(1000000, COALESCE(max_output_tokens, 0) + 1)),
+           max_output_tokens = COALESCE(max_output_tokens,
+           LEAST(16384, COALESCE(context_window_tokens, 1000000) - 1)),
+           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens, 4096),
+           capacity_source = COALESCE(capacity_source, 'profile'),
+           capability_profile_version = COALESCE(capability_profile_version, 'dashscope/qwen-turbo@1')
+     WHERE LOWER(model_factory) = 'dashscope'
+       AND model_name = 'qwen-turbo'
+       AND delete_flag = 'N'
+       AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET context_window_tokens = COALESCE(context_window_tokens,
+           GREATEST(1000000, COALESCE(max_output_tokens, 0) + 1)),
+           max_output_tokens = COALESCE(max_output_tokens,
+           LEAST(65536, COALESCE(context_window_tokens, 1000000) - 1)),
+           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens, 8192),
+           capacity_source = COALESCE(capacity_source, 'profile'),
+           capability_profile_version = COALESCE(capability_profile_version, 'dashscope/qwen3.7-max@1')
+     WHERE LOWER(model_factory) = 'dashscope'
+       AND model_name = 'qwen3.7-max'
+       AND delete_flag = 'N'
+       AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET context_window_tokens = COALESCE(context_window_tokens,
+           GREATEST(200000, COALESCE(max_output_tokens, 0) + 1)),
+           max_output_tokens = COALESCE(max_output_tokens,
+           LEAST(131072, COALESCE(context_window_tokens, 200000) - 1)),
+           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens, 8192),
+           capacity_source = COALESCE(capacity_source, 'profile'),
+           capability_profile_version = COALESCE(capability_profile_version, 'dashscope/glm-5.1@1')
+     WHERE LOWER(model_factory) = 'dashscope'
+       AND model_name = 'glm-5.1'
+       AND delete_flag = 'N'
+       AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    -- deepseek (15 entries)
+    UPDATE nexent.model_record_t
+       SET context_window_tokens = COALESCE(context_window_tokens,
+           GREATEST(1000000, COALESCE(max_output_tokens, 0) + 1)),
+           max_output_tokens = COALESCE(max_output_tokens,
+           LEAST(384000, COALESCE(context_window_tokens, 1000000) - 1)),
+           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens, 8192),
+           capacity_source = COALESCE(capacity_source, 'profile'),
+           capability_profile_version = COALESCE(capability_profile_version, 'deepseek/deepseek-chat@2')
+     WHERE LOWER(model_factory) = 'deepseek'
+       AND model_name = 'deepseek-chat'
+       AND delete_flag = 'N'
+       AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET context_window_tokens = COALESCE(context_window_tokens,
+           GREATEST(1000000, COALESCE(max_output_tokens, 0) + 1)),
+           max_output_tokens = COALESCE(max_output_tokens,
+           LEAST(384000, COALESCE(context_window_tokens, 1000000) - 1)),
+           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens, 8192),
+           capacity_source = COALESCE(capacity_source, 'profile'),
+           capability_profile_version = COALESCE(capability_profile_version, 'deepseek/deepseek-reasoner@2')
+     WHERE LOWER(model_factory) = 'deepseek'
+       AND model_name = 'deepseek-reasoner'
+       AND delete_flag = 'N'
+       AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET context_window_tokens = COALESCE(context_window_tokens,
+           GREATEST(1000000, COALESCE(max_output_tokens, 0) + 1)),
+           max_output_tokens = COALESCE(max_output_tokens,
+           LEAST(384000, COALESCE(context_window_tokens, 1000000) - 1)),
+           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens, 8192),
+           capacity_source = COALESCE(capacity_source, 'profile'),
+           capability_profile_version = COALESCE(capability_profile_version, 'deepseek/deepseek-v4-flash@1')
+     WHERE LOWER(model_factory) = 'deepseek'
+       AND model_name = 'deepseek-v4-flash'
+       AND delete_flag = 'N'
+       AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET context_window_tokens = COALESCE(context_window_tokens,
+           GREATEST(1000000, COALESCE(max_output_tokens, 0) + 1)),
+           max_output_tokens = COALESCE(max_output_tokens,
+           LEAST(384000, COALESCE(context_window_tokens, 1000000) - 1)),
+           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens, 8192),
+           capacity_source = COALESCE(capacity_source, 'profile'),
+           capability_profile_version = COALESCE(capability_profile_version, 'deepseek/deepseek-v4-pro@1')
+     WHERE LOWER(model_factory) = 'deepseek'
+       AND model_name = 'deepseek-v4-pro'
+       AND delete_flag = 'N'
+       AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET context_window_tokens = COALESCE(context_window_tokens,
+           GREATEST(1048576, COALESCE(max_output_tokens, 0) + 1)),
+           max_output_tokens = COALESCE(max_output_tokens,
+           LEAST(384000, COALESCE(context_window_tokens, 1048576) - 1)),
+           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens, 8192),
+           capacity_source = COALESCE(capacity_source, 'profile'),
+           capability_profile_version = COALESCE(capability_profile_version, 'deepseek/deepseek-v4-pro-sf@1')
+     WHERE LOWER(model_factory) = 'deepseek'
+       AND model_name = 'deepseek-ai/DeepSeek-V4-Pro'
+       AND delete_flag = 'N'
+       AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET context_window_tokens = COALESCE(context_window_tokens,
+           GREATEST(1048576, COALESCE(max_output_tokens, 0) + 1)),
+           max_output_tokens = COALESCE(max_output_tokens,
+           LEAST(384000, COALESCE(context_window_tokens, 1048576) - 1)),
+           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens, 8192),
+           capacity_source = COALESCE(capacity_source, 'profile'),
+           capability_profile_version = COALESCE(capability_profile_version, 'deepseek/deepseek-v4-flash-sf@1')
+     WHERE LOWER(model_factory) = 'deepseek'
+       AND model_name = 'deepseek-ai/DeepSeek-V4-Flash'
+       AND delete_flag = 'N'
+       AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET context_window_tokens = COALESCE(context_window_tokens,
+           GREATEST(164000, COALESCE(max_output_tokens, 0) + 1)),
+           max_output_tokens = COALESCE(max_output_tokens,
+           LEAST(8192, COALESCE(context_window_tokens, 164000) - 1)),
+           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens, 4096),
+           capacity_source = COALESCE(capacity_source, 'profile'),
+           capability_profile_version = COALESCE(capability_profile_version, 'deepseek/deepseek-v3.2@1')
+     WHERE LOWER(model_factory) = 'deepseek'
+       AND model_name = 'deepseek-ai/DeepSeek-V3.2'
+       AND delete_flag = 'N'
+       AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET context_window_tokens = COALESCE(context_window_tokens,
+           GREATEST(164000, COALESCE(max_output_tokens, 0) + 1)),
+           max_output_tokens = COALESCE(max_output_tokens,
+           LEAST(8192, COALESCE(context_window_tokens, 164000) - 1)),
+           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens, 4096),
+           capacity_source = COALESCE(capacity_source, 'profile'),
+           capability_profile_version = COALESCE(capability_profile_version, 'deepseek/deepseek-v3.1-terminus@1')
+     WHERE LOWER(model_factory) = 'deepseek'
+       AND model_name = 'deepseek-ai/DeepSeek-V3.1-Terminus'
+       AND delete_flag = 'N'
+       AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET context_window_tokens = COALESCE(context_window_tokens,
+           GREATEST(163840, COALESCE(max_output_tokens, 0) + 1)),
+           max_output_tokens = COALESCE(max_output_tokens,
+           LEAST(16384, COALESCE(context_window_tokens, 163840) - 1)),
+           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens, 8192),
+           capacity_source = COALESCE(capacity_source, 'profile'),
+           capability_profile_version = COALESCE(capability_profile_version, 'deepseek/deepseek-r1@1')
+     WHERE LOWER(model_factory) = 'deepseek'
+       AND model_name = 'deepseek-ai/DeepSeek-R1'
+       AND delete_flag = 'N'
+       AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET context_window_tokens = COALESCE(context_window_tokens,
+           GREATEST(164000, COALESCE(max_output_tokens, 0) + 1)),
+           max_output_tokens = COALESCE(max_output_tokens,
+           LEAST(8192, COALESCE(context_window_tokens, 164000) - 1)),
+           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens, 4096),
+           capacity_source = COALESCE(capacity_source, 'profile'),
+           capability_profile_version = COALESCE(capability_profile_version, 'deepseek/deepseek-v3@1')
+     WHERE LOWER(model_factory) = 'deepseek'
+       AND model_name = 'deepseek-ai/DeepSeek-V3'
+       AND delete_flag = 'N'
+       AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET context_window_tokens = COALESCE(context_window_tokens,
+           GREATEST(131072, COALESCE(max_output_tokens, 0) + 1)),
+           max_output_tokens = COALESCE(max_output_tokens,
+           LEAST(16384, COALESCE(context_window_tokens, 131072) - 1)),
+           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens, 4096),
+           capacity_source = COALESCE(capacity_source, 'profile'),
+           capability_profile_version = COALESCE(capability_profile_version, 'deepseek/deepseek-r1-0528-qwen3-8b@1')
+     WHERE LOWER(model_factory) = 'deepseek'
+       AND model_name = 'deepseek-ai/DeepSeek-R1-0528-Qwen3-8B'
+       AND delete_flag = 'N'
+       AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET context_window_tokens = COALESCE(context_window_tokens,
+           GREATEST(164000, COALESCE(max_output_tokens, 0) + 1)),
+           max_output_tokens = COALESCE(max_output_tokens,
+           LEAST(8192, COALESCE(context_window_tokens, 164000) - 1)),
+           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens, 4096),
+           capacity_source = COALESCE(capacity_source, 'profile'),
+           capability_profile_version = COALESCE(capability_profile_version, 'deepseek/deepseek-v3.2-pro@1')
+     WHERE LOWER(model_factory) = 'deepseek'
+       AND model_name = 'Pro/deepseek-ai/DeepSeek-V3.2'
+       AND delete_flag = 'N'
+       AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET context_window_tokens = COALESCE(context_window_tokens,
+           GREATEST(164000, COALESCE(max_output_tokens, 0) + 1)),
+           max_output_tokens = COALESCE(max_output_tokens,
+           LEAST(8192, COALESCE(context_window_tokens, 164000) - 1)),
+           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens, 4096),
+           capacity_source = COALESCE(capacity_source, 'profile'),
+           capability_profile_version = COALESCE(capability_profile_version, 'deepseek/deepseek-v3.1-terminus-pro@1')
+     WHERE LOWER(model_factory) = 'deepseek'
+       AND model_name = 'Pro/deepseek-ai/DeepSeek-V3.1-Terminus'
+       AND delete_flag = 'N'
+       AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET context_window_tokens = COALESCE(context_window_tokens,
+           GREATEST(163840, COALESCE(max_output_tokens, 0) + 1)),
+           max_output_tokens = COALESCE(max_output_tokens,
+           LEAST(16384, COALESCE(context_window_tokens, 163840) - 1)),
+           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens, 8192),
+           capacity_source = COALESCE(capacity_source, 'profile'),
+           capability_profile_version = COALESCE(capability_profile_version, 'deepseek/deepseek-r1-pro@1')
+     WHERE LOWER(model_factory) = 'deepseek'
+       AND model_name = 'Pro/deepseek-ai/DeepSeek-R1'
+       AND delete_flag = 'N'
+       AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET context_window_tokens = COALESCE(context_window_tokens,
+           GREATEST(164000, COALESCE(max_output_tokens, 0) + 1)),
+           max_output_tokens = COALESCE(max_output_tokens,
+           LEAST(8192, COALESCE(context_window_tokens, 164000) - 1)),
+           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens, 4096),
+           capacity_source = COALESCE(capacity_source, 'profile'),
+           capability_profile_version = COALESCE(capability_profile_version, 'deepseek/deepseek-v3-pro@1')
+     WHERE LOWER(model_factory) = 'deepseek'
+       AND model_name = 'Pro/deepseek-ai/DeepSeek-V3'
+       AND delete_flag = 'N'
+       AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    -- openai (2 entries)
+    UPDATE nexent.model_record_t
+       SET context_window_tokens = COALESCE(context_window_tokens,
+           GREATEST(128000, COALESCE(max_output_tokens, 0) + 1)),
+           max_output_tokens = COALESCE(max_output_tokens,
+           LEAST(16384, COALESCE(context_window_tokens, 128000) - 1)),
+           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens, 4096),
+           capacity_source = COALESCE(capacity_source, 'profile'),
+           capability_profile_version = COALESCE(capability_profile_version, 'openai/gpt-4o@1')
+     WHERE LOWER(model_factory) = 'openai'
+       AND model_name = 'gpt-4o'
+       AND delete_flag = 'N'
+       AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET context_window_tokens = COALESCE(context_window_tokens,
+           GREATEST(1000000, COALESCE(max_output_tokens, 0) + 1)),
+           max_output_tokens = COALESCE(max_output_tokens,
+           LEAST(32768, COALESCE(context_window_tokens, 1000000) - 1)),
+           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens, 8192),
+           capacity_source = COALESCE(capacity_source, 'profile'),
+           capability_profile_version = COALESCE(capability_profile_version, 'openai/gpt-4.1@1')
+     WHERE LOWER(model_factory) = 'openai'
+       AND model_name = 'gpt-4.1'
+       AND delete_flag = 'N'
+       AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    -- silicon (45 entries)
+    UPDATE nexent.model_record_t
+       SET context_window_tokens = COALESCE(context_window_tokens,
+           GREATEST(262144, COALESCE(max_output_tokens, 0) + 1)),
+           max_output_tokens = COALESCE(max_output_tokens,
+           LEAST(65536, COALESCE(context_window_tokens, 262144) - 1)),
+           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens, 8192),
+           capacity_source = COALESCE(capacity_source, 'profile'),
+           capability_profile_version = COALESCE(capability_profile_version, 'silicon/qwen3.6-27b@1')
+     WHERE LOWER(model_factory) = 'silicon'
+       AND model_name = 'Qwen/Qwen3.6-27B'
+       AND delete_flag = 'N'
+       AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET context_window_tokens = COALESCE(context_window_tokens,
+           GREATEST(262144, COALESCE(max_output_tokens, 0) + 1)),
+           max_output_tokens = COALESCE(max_output_tokens,
+           LEAST(131072, COALESCE(context_window_tokens, 262144) - 1)),
+           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens, 8192),
+           capacity_source = COALESCE(capacity_source, 'profile'),
+           capability_profile_version = COALESCE(capability_profile_version, 'silicon/kimi-k2.6@1')
+     WHERE LOWER(model_factory) = 'silicon'
+       AND model_name = 'Pro/moonshotai/Kimi-K2.6'
+       AND delete_flag = 'N'
+       AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET context_window_tokens = COALESCE(context_window_tokens,
+           GREATEST(262144, COALESCE(max_output_tokens, 0) + 1)),
+           max_output_tokens = COALESCE(max_output_tokens,
+           LEAST(16384, COALESCE(context_window_tokens, 262144) - 1)),
+           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens, 4096),
+           capacity_source = COALESCE(capacity_source, 'profile'),
+           capability_profile_version = COALESCE(capability_profile_version, 'silicon/qwen3.6-35b-a3b@1')
+     WHERE LOWER(model_factory) = 'silicon'
+       AND model_name = 'Qwen/Qwen3.6-35B-A3B'
+       AND delete_flag = 'N'
+       AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET context_window_tokens = COALESCE(context_window_tokens,
+           GREATEST(262144, COALESCE(max_output_tokens, 0) + 1)),
+           max_output_tokens = COALESCE(max_output_tokens,
+           LEAST(16384, COALESCE(context_window_tokens, 262144) - 1)),
+           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens, 4096),
+           capacity_source = COALESCE(capacity_source, 'profile'),
+           capability_profile_version = COALESCE(capability_profile_version, 'silicon/qwen3.5-397b-a17b@1')
+     WHERE LOWER(model_factory) = 'silicon'
+       AND model_name = 'Qwen/Qwen3.5-397B-A17B'
+       AND delete_flag = 'N'
+       AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET context_window_tokens = COALESCE(context_window_tokens,
+           GREATEST(262144, COALESCE(max_output_tokens, 0) + 1)),
+           max_output_tokens = COALESCE(max_output_tokens,
+           LEAST(16384, COALESCE(context_window_tokens, 262144) - 1)),
+           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens, 4096),
+           capacity_source = COALESCE(capacity_source, 'profile'),
+           capability_profile_version = COALESCE(capability_profile_version, 'silicon/qwen3.5-122b-a10b@1')
+     WHERE LOWER(model_factory) = 'silicon'
+       AND model_name = 'Qwen/Qwen3.5-122B-A10B'
+       AND delete_flag = 'N'
+       AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET context_window_tokens = COALESCE(context_window_tokens,
+           GREATEST(262144, COALESCE(max_output_tokens, 0) + 1)),
+           max_output_tokens = COALESCE(max_output_tokens,
+           LEAST(16384, COALESCE(context_window_tokens, 262144) - 1)),
+           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens, 4096),
+           capacity_source = COALESCE(capacity_source, 'profile'),
+           capability_profile_version = COALESCE(capability_profile_version, 'silicon/qwen3.5-35b-a3b@1')
+     WHERE LOWER(model_factory) = 'silicon'
+       AND model_name = 'Qwen/Qwen3.5-35B-A3B'
+       AND delete_flag = 'N'
+       AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET context_window_tokens = COALESCE(context_window_tokens,
+           GREATEST(262144, COALESCE(max_output_tokens, 0) + 1)),
+           max_output_tokens = COALESCE(max_output_tokens,
+           LEAST(16384, COALESCE(context_window_tokens, 262144) - 1)),
+           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens, 4096),
+           capacity_source = COALESCE(capacity_source, 'profile'),
+           capability_profile_version = COALESCE(capability_profile_version, 'silicon/qwen3.5-27b@1')
+     WHERE LOWER(model_factory) = 'silicon'
+       AND model_name = 'Qwen/Qwen3.5-27B'
+       AND delete_flag = 'N'
+       AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET context_window_tokens = COALESCE(context_window_tokens,
+           GREATEST(262144, COALESCE(max_output_tokens, 0) + 1)),
+           max_output_tokens = COALESCE(max_output_tokens,
+           LEAST(16384, COALESCE(context_window_tokens, 262144) - 1)),
+           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens, 4096),
+           capacity_source = COALESCE(capacity_source, 'profile'),
+           capability_profile_version = COALESCE(capability_profile_version, 'silicon/qwen3.5-9b@1')
+     WHERE LOWER(model_factory) = 'silicon'
+       AND model_name = 'Qwen/Qwen3.5-9B'
+       AND delete_flag = 'N'
+       AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET context_window_tokens = COALESCE(context_window_tokens,
+           GREATEST(262144, COALESCE(max_output_tokens, 0) + 1)),
+           max_output_tokens = COALESCE(max_output_tokens,
+           LEAST(16384, COALESCE(context_window_tokens, 262144) - 1)),
+           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens, 4096),
+           capacity_source = COALESCE(capacity_source, 'profile'),
+           capability_profile_version = COALESCE(capability_profile_version, 'silicon/qwen3.5-4b@1')
+     WHERE LOWER(model_factory) = 'silicon'
+       AND model_name = 'Qwen/Qwen3.5-4B'
+       AND delete_flag = 'N'
+       AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET context_window_tokens = COALESCE(context_window_tokens,
+           GREATEST(262144, COALESCE(max_output_tokens, 0) + 1)),
+           max_output_tokens = COALESCE(max_output_tokens,
+           LEAST(16384, COALESCE(context_window_tokens, 262144) - 1)),
+           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens, 4096),
+           capacity_source = COALESCE(capacity_source, 'profile'),
+           capability_profile_version = COALESCE(capability_profile_version, 'silicon/qwen3-vl-32b-instruct@1')
+     WHERE LOWER(model_factory) = 'silicon'
+       AND model_name = 'Qwen/Qwen3-VL-32B-Instruct'
+       AND delete_flag = 'N'
+       AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET context_window_tokens = COALESCE(context_window_tokens,
+           GREATEST(262144, COALESCE(max_output_tokens, 0) + 1)),
+           max_output_tokens = COALESCE(max_output_tokens,
+           LEAST(32768, COALESCE(context_window_tokens, 262144) - 1)),
+           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens, 8192),
+           capacity_source = COALESCE(capacity_source, 'profile'),
+           capability_profile_version = COALESCE(capability_profile_version, 'silicon/qwen3-vl-32b-thinking@1')
+     WHERE LOWER(model_factory) = 'silicon'
+       AND model_name = 'Qwen/Qwen3-VL-32B-Thinking'
+       AND delete_flag = 'N'
+       AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET context_window_tokens = COALESCE(context_window_tokens,
+           GREATEST(262144, COALESCE(max_output_tokens, 0) + 1)),
+           max_output_tokens = COALESCE(max_output_tokens,
+           LEAST(16384, COALESCE(context_window_tokens, 262144) - 1)),
+           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens, 4096),
+           capacity_source = COALESCE(capacity_source, 'profile'),
+           capability_profile_version = COALESCE(capability_profile_version, 'silicon/qwen3-vl-8b-instruct@1')
+     WHERE LOWER(model_factory) = 'silicon'
+       AND model_name = 'Qwen/Qwen3-VL-8B-Instruct'
+       AND delete_flag = 'N'
+       AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET context_window_tokens = COALESCE(context_window_tokens,
+           GREATEST(262144, COALESCE(max_output_tokens, 0) + 1)),
+           max_output_tokens = COALESCE(max_output_tokens,
+           LEAST(32768, COALESCE(context_window_tokens, 262144) - 1)),
+           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens, 8192),
+           capacity_source = COALESCE(capacity_source, 'profile'),
+           capability_profile_version = COALESCE(capability_profile_version, 'silicon/qwen3-vl-8b-thinking@1')
+     WHERE LOWER(model_factory) = 'silicon'
+       AND model_name = 'Qwen/Qwen3-VL-8B-Thinking'
+       AND delete_flag = 'N'
+       AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET context_window_tokens = COALESCE(context_window_tokens,
+           GREATEST(262144, COALESCE(max_output_tokens, 0) + 1)),
+           max_output_tokens = COALESCE(max_output_tokens,
+           LEAST(16384, COALESCE(context_window_tokens, 262144) - 1)),
+           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens, 4096),
+           capacity_source = COALESCE(capacity_source, 'profile'),
+           capability_profile_version = COALESCE(capability_profile_version, 'silicon/qwen3-vl-30b-a3b-instruct@1')
+     WHERE LOWER(model_factory) = 'silicon'
+       AND model_name = 'Qwen/Qwen3-VL-30B-A3B-Instruct'
+       AND delete_flag = 'N'
+       AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET context_window_tokens = COALESCE(context_window_tokens,
+           GREATEST(262144, COALESCE(max_output_tokens, 0) + 1)),
+           max_output_tokens = COALESCE(max_output_tokens,
+           LEAST(32768, COALESCE(context_window_tokens, 262144) - 1)),
+           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens, 8192),
+           capacity_source = COALESCE(capacity_source, 'profile'),
+           capability_profile_version = COALESCE(capability_profile_version, 'silicon/qwen3-vl-30b-a3b-thinking@1')
+     WHERE LOWER(model_factory) = 'silicon'
+       AND model_name = 'Qwen/Qwen3-VL-30B-A3B-Thinking'
+       AND delete_flag = 'N'
+       AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET context_window_tokens = COALESCE(context_window_tokens,
+           GREATEST(32768, COALESCE(max_output_tokens, 0) + 1)),
+           max_output_tokens = COALESCE(max_output_tokens,
+           LEAST(16384, COALESCE(context_window_tokens, 32768) - 1)),
+           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens, 4096),
+           capacity_source = COALESCE(capacity_source, 'profile'),
+           capability_profile_version = COALESCE(capability_profile_version, 'silicon/qwen3-omni-30b-a3b-instruct@1')
+     WHERE LOWER(model_factory) = 'silicon'
+       AND model_name = 'Qwen/Qwen3-Omni-30B-A3B-Instruct'
+       AND delete_flag = 'N'
+       AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET context_window_tokens = COALESCE(context_window_tokens,
+           GREATEST(32768, COALESCE(max_output_tokens, 0) + 1)),
+           max_output_tokens = COALESCE(max_output_tokens,
+           LEAST(16384, COALESCE(context_window_tokens, 32768) - 1)),
+           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens, 4096),
+           capacity_source = COALESCE(capacity_source, 'profile'),
+           capability_profile_version = COALESCE(capability_profile_version, 'silicon/qwen3-omni-30b-a3b-thinking@1')
+     WHERE LOWER(model_factory) = 'silicon'
+       AND model_name = 'Qwen/Qwen3-Omni-30B-A3B-Thinking'
+       AND delete_flag = 'N'
+       AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET context_window_tokens = COALESCE(context_window_tokens,
+           GREATEST(32768, COALESCE(max_output_tokens, 0) + 1)),
+           max_output_tokens = COALESCE(max_output_tokens,
+           LEAST(16384, COALESCE(context_window_tokens, 32768) - 1)),
+           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens, 4096),
+           capacity_source = COALESCE(capacity_source, 'profile'),
+           capability_profile_version = COALESCE(capability_profile_version, 'silicon/qwen3-omni-30b-a3b-captioner@1')
+     WHERE LOWER(model_factory) = 'silicon'
+       AND model_name = 'Qwen/Qwen3-Omni-30B-A3B-Captioner'
+       AND delete_flag = 'N'
+       AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET context_window_tokens = COALESCE(context_window_tokens,
+           GREATEST(262144, COALESCE(max_output_tokens, 0) + 1)),
+           max_output_tokens = COALESCE(max_output_tokens,
+           LEAST(65536, COALESCE(context_window_tokens, 262144) - 1)),
+           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens, 8192),
+           capacity_source = COALESCE(capacity_source, 'profile'),
+           capability_profile_version = COALESCE(capability_profile_version, 'silicon/qwen3-coder-30b-a3b-instruct@1')
+     WHERE LOWER(model_factory) = 'silicon'
+       AND model_name = 'Qwen/Qwen3-Coder-30B-A3B-Instruct'
+       AND delete_flag = 'N'
+       AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET context_window_tokens = COALESCE(context_window_tokens,
+           GREATEST(262144, COALESCE(max_output_tokens, 0) + 1)),
+           max_output_tokens = COALESCE(max_output_tokens,
+           LEAST(16384, COALESCE(context_window_tokens, 262144) - 1)),
+           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens, 4096),
+           capacity_source = COALESCE(capacity_source, 'profile'),
+           capability_profile_version = COALESCE(capability_profile_version, 'silicon/qwen3-30b-a3b-instruct-2507@1')
+     WHERE LOWER(model_factory) = 'silicon'
+       AND model_name = 'Qwen/Qwen3-30B-A3B-Instruct-2507'
+       AND delete_flag = 'N'
+       AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET context_window_tokens = COALESCE(context_window_tokens,
+           GREATEST(131072, COALESCE(max_output_tokens, 0) + 1)),
+           max_output_tokens = COALESCE(max_output_tokens,
+           LEAST(16384, COALESCE(context_window_tokens, 131072) - 1)),
+           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens, 4096),
+           capacity_source = COALESCE(capacity_source, 'profile'),
+           capability_profile_version = COALESCE(capability_profile_version, 'silicon/qwen3-32b@1')
+     WHERE LOWER(model_factory) = 'silicon'
+       AND model_name = 'Qwen/Qwen3-32B'
+       AND delete_flag = 'N'
+       AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET context_window_tokens = COALESCE(context_window_tokens,
+           GREATEST(131072, COALESCE(max_output_tokens, 0) + 1)),
+           max_output_tokens = COALESCE(max_output_tokens,
+           LEAST(16384, COALESCE(context_window_tokens, 131072) - 1)),
+           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens, 4096),
+           capacity_source = COALESCE(capacity_source, 'profile'),
+           capability_profile_version = COALESCE(capability_profile_version, 'silicon/qwen3-14b@1')
+     WHERE LOWER(model_factory) = 'silicon'
+       AND model_name = 'Qwen/Qwen3-14B'
+       AND delete_flag = 'N'
+       AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET context_window_tokens = COALESCE(context_window_tokens,
+           GREATEST(131072, COALESCE(max_output_tokens, 0) + 1)),
+           max_output_tokens = COALESCE(max_output_tokens,
+           LEAST(16384, COALESCE(context_window_tokens, 131072) - 1)),
+           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens, 4096),
+           capacity_source = COALESCE(capacity_source, 'profile'),
+           capability_profile_version = COALESCE(capability_profile_version, 'silicon/qwen3-8b@1')
+     WHERE LOWER(model_factory) = 'silicon'
+       AND model_name = 'Qwen/Qwen3-8B'
+       AND delete_flag = 'N'
+       AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET context_window_tokens = COALESCE(context_window_tokens,
+           GREATEST(131072, COALESCE(max_output_tokens, 0) + 1)),
+           max_output_tokens = COALESCE(max_output_tokens,
+           LEAST(8192, COALESCE(context_window_tokens, 131072) - 1)),
+           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens, 4096),
+           capacity_source = COALESCE(capacity_source, 'profile'),
+           capability_profile_version = COALESCE(capability_profile_version, 'silicon/qwen2.5-72b-instruct-128k@1')
+     WHERE LOWER(model_factory) = 'silicon'
+       AND model_name = 'Qwen/Qwen2.5-72B-Instruct-128K'
+       AND delete_flag = 'N'
+       AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET context_window_tokens = COALESCE(context_window_tokens,
+           GREATEST(32768, COALESCE(max_output_tokens, 0) + 1)),
+           max_output_tokens = COALESCE(max_output_tokens,
+           LEAST(8192, COALESCE(context_window_tokens, 32768) - 1)),
+           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens, 4096),
+           capacity_source = COALESCE(capacity_source, 'profile'),
+           capability_profile_version = COALESCE(capability_profile_version, 'silicon/qwen2.5-72b-instruct@1')
+     WHERE LOWER(model_factory) = 'silicon'
+       AND model_name = 'Qwen/Qwen2.5-72B-Instruct'
+       AND delete_flag = 'N'
+       AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET context_window_tokens = COALESCE(context_window_tokens,
+           GREATEST(32768, COALESCE(max_output_tokens, 0) + 1)),
+           max_output_tokens = COALESCE(max_output_tokens,
+           LEAST(8192, COALESCE(context_window_tokens, 32768) - 1)),
+           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens, 4096),
+           capacity_source = COALESCE(capacity_source, 'profile'),
+           capability_profile_version = COALESCE(capability_profile_version, 'silicon/qwen2.5-32b-instruct@1')
+     WHERE LOWER(model_factory) = 'silicon'
+       AND model_name = 'Qwen/Qwen2.5-32B-Instruct'
+       AND delete_flag = 'N'
+       AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET context_window_tokens = COALESCE(context_window_tokens,
+           GREATEST(32768, COALESCE(max_output_tokens, 0) + 1)),
+           max_output_tokens = COALESCE(max_output_tokens,
+           LEAST(8192, COALESCE(context_window_tokens, 32768) - 1)),
+           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens, 4096),
+           capacity_source = COALESCE(capacity_source, 'profile'),
+           capability_profile_version = COALESCE(capability_profile_version, 'silicon/qwen2.5-14b-instruct@1')
+     WHERE LOWER(model_factory) = 'silicon'
+       AND model_name = 'Qwen/Qwen2.5-14B-Instruct'
+       AND delete_flag = 'N'
+       AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET context_window_tokens = COALESCE(context_window_tokens,
+           GREATEST(32768, COALESCE(max_output_tokens, 0) + 1)),
+           max_output_tokens = COALESCE(max_output_tokens,
+           LEAST(8192, COALESCE(context_window_tokens, 32768) - 1)),
+           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens, 4096),
+           capacity_source = COALESCE(capacity_source, 'profile'),
+           capability_profile_version = COALESCE(capability_profile_version, 'silicon/qwen2.5-7b-instruct@1')
+     WHERE LOWER(model_factory) = 'silicon'
+       AND model_name = 'Qwen/Qwen2.5-7B-Instruct'
+       AND delete_flag = 'N'
+       AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET context_window_tokens = COALESCE(context_window_tokens,
+           GREATEST(32768, COALESCE(max_output_tokens, 0) + 1)),
+           max_output_tokens = COALESCE(max_output_tokens,
+           LEAST(8192, COALESCE(context_window_tokens, 32768) - 1)),
+           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens, 4096),
+           capacity_source = COALESCE(capacity_source, 'profile'),
+           capability_profile_version = COALESCE(capability_profile_version, 'silicon/glm-4-32b-0414@1')
+     WHERE LOWER(model_factory) = 'silicon'
+       AND model_name = 'THUDM/GLM-4-32B-0414'
+       AND delete_flag = 'N'
+       AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET context_window_tokens = COALESCE(context_window_tokens,
+           GREATEST(32768, COALESCE(max_output_tokens, 0) + 1)),
+           max_output_tokens = COALESCE(max_output_tokens,
+           LEAST(8192, COALESCE(context_window_tokens, 32768) - 1)),
+           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens, 4096),
+           capacity_source = COALESCE(capacity_source, 'profile'),
+           capability_profile_version = COALESCE(capability_profile_version, 'silicon/glm-z1-9b-0414@1')
+     WHERE LOWER(model_factory) = 'silicon'
+       AND model_name = 'THUDM/GLM-Z1-9B-0414'
+       AND delete_flag = 'N'
+       AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET context_window_tokens = COALESCE(context_window_tokens,
+           GREATEST(32768, COALESCE(max_output_tokens, 0) + 1)),
+           max_output_tokens = COALESCE(max_output_tokens,
+           LEAST(8192, COALESCE(context_window_tokens, 32768) - 1)),
+           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens, 4096),
+           capacity_source = COALESCE(capacity_source, 'profile'),
+           capability_profile_version = COALESCE(capability_profile_version, 'silicon/glm-4-9b-0414@1')
+     WHERE LOWER(model_factory) = 'silicon'
+       AND model_name = 'THUDM/GLM-4-9B-0414'
+       AND delete_flag = 'N'
+       AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET context_window_tokens = COALESCE(context_window_tokens,
+           GREATEST(1048576, COALESCE(max_output_tokens, 0) + 1)),
+           max_output_tokens = COALESCE(max_output_tokens,
+           LEAST(131072, COALESCE(context_window_tokens, 1048576) - 1)),
+           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens, 8192),
+           capacity_source = COALESCE(capacity_source, 'profile'),
+           capability_profile_version = COALESCE(capability_profile_version, 'silicon/glm-5.2@1')
+     WHERE LOWER(model_factory) = 'silicon'
+       AND model_name = 'zai-org/GLM-5.2'
+       AND delete_flag = 'N'
+       AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET context_window_tokens = COALESCE(context_window_tokens,
+           GREATEST(131072, COALESCE(max_output_tokens, 0) + 1)),
+           max_output_tokens = COALESCE(max_output_tokens,
+           LEAST(16384, COALESCE(context_window_tokens, 131072) - 1)),
+           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens, 4096),
+           capacity_source = COALESCE(capacity_source, 'profile'),
+           capability_profile_version = COALESCE(capability_profile_version, 'silicon/glm-4.5v@1')
+     WHERE LOWER(model_factory) = 'silicon'
+       AND model_name = 'zai-org/GLM-4.5V'
+       AND delete_flag = 'N'
+       AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET context_window_tokens = COALESCE(context_window_tokens,
+           GREATEST(131072, COALESCE(max_output_tokens, 0) + 1)),
+           max_output_tokens = COALESCE(max_output_tokens,
+           LEAST(16384, COALESCE(context_window_tokens, 131072) - 1)),
+           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens, 4096),
+           capacity_source = COALESCE(capacity_source, 'profile'),
+           capability_profile_version = COALESCE(capability_profile_version, 'silicon/glm-4.5-air@1')
+     WHERE LOWER(model_factory) = 'silicon'
+       AND model_name = 'zai-org/GLM-4.5-Air'
+       AND delete_flag = 'N'
+       AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET context_window_tokens = COALESCE(context_window_tokens,
+           GREATEST(202752, COALESCE(max_output_tokens, 0) + 1)),
+           max_output_tokens = COALESCE(max_output_tokens,
+           LEAST(131072, COALESCE(context_window_tokens, 202752) - 1)),
+           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens, 8192),
+           capacity_source = COALESCE(capacity_source, 'profile'),
+           capability_profile_version = COALESCE(capability_profile_version, 'silicon/glm-5.1-pro@1')
+     WHERE LOWER(model_factory) = 'silicon'
+       AND model_name = 'Pro/zai-org/GLM-5.1'
+       AND delete_flag = 'N'
+       AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET context_window_tokens = COALESCE(context_window_tokens,
+           GREATEST(524288, COALESCE(max_output_tokens, 0) + 1)),
+           max_output_tokens = COALESCE(max_output_tokens,
+           LEAST(16384, COALESCE(context_window_tokens, 524288) - 1)),
+           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens, 4096),
+           capacity_source = COALESCE(capacity_source, 'profile'),
+           capability_profile_version = COALESCE(capability_profile_version, 'silicon/seed-oss-36b-instruct@1')
+     WHERE LOWER(model_factory) = 'silicon'
+       AND model_name = 'ByteDance-Seed/Seed-OSS-36B-Instruct'
+       AND delete_flag = 'N'
+       AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET context_window_tokens = COALESCE(context_window_tokens,
+           GREATEST(131072, COALESCE(max_output_tokens, 0) + 1)),
+           max_output_tokens = COALESCE(max_output_tokens,
+           LEAST(16384, COALESCE(context_window_tokens, 131072) - 1)),
+           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens, 4096),
+           capacity_source = COALESCE(capacity_source, 'profile'),
+           capability_profile_version = COALESCE(capability_profile_version, 'silicon/ling-flash-2.0@1')
+     WHERE LOWER(model_factory) = 'silicon'
+       AND model_name = 'inclusionAI/Ling-flash-2.0'
+       AND delete_flag = 'N'
+       AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET context_window_tokens = COALESCE(context_window_tokens,
+           GREATEST(131072, COALESCE(max_output_tokens, 0) + 1)),
+           max_output_tokens = COALESCE(max_output_tokens,
+           LEAST(16384, COALESCE(context_window_tokens, 131072) - 1)),
+           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens, 4096),
+           capacity_source = COALESCE(capacity_source, 'profile'),
+           capability_profile_version = COALESCE(capability_profile_version, 'silicon/ling-mini-2.0@1')
+     WHERE LOWER(model_factory) = 'silicon'
+       AND model_name = 'inclusionAI/Ling-mini-2.0'
+       AND delete_flag = 'N'
+       AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET context_window_tokens = COALESCE(context_window_tokens,
+           GREATEST(204800, COALESCE(max_output_tokens, 0) + 1)),
+           max_output_tokens = COALESCE(max_output_tokens,
+           LEAST(16384, COALESCE(context_window_tokens, 204800) - 1)),
+           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens, 4096),
+           capacity_source = COALESCE(capacity_source, 'profile'),
+           capability_profile_version = COALESCE(capability_profile_version, 'silicon/minimax-m2.5@1')
+     WHERE LOWER(model_factory) = 'silicon'
+       AND model_name = 'MiniMaxAI/MiniMax-M2.5'
+       AND delete_flag = 'N'
+       AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET context_window_tokens = COALESCE(context_window_tokens,
+           GREATEST(204800, COALESCE(max_output_tokens, 0) + 1)),
+           max_output_tokens = COALESCE(max_output_tokens,
+           LEAST(16384, COALESCE(context_window_tokens, 204800) - 1)),
+           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens, 4096),
+           capacity_source = COALESCE(capacity_source, 'profile'),
+           capability_profile_version = COALESCE(capability_profile_version, 'silicon/minimax-m2.5-pro@1')
+     WHERE LOWER(model_factory) = 'silicon'
+       AND model_name = 'Pro/MiniMaxAI/MiniMax-M2.5'
+       AND delete_flag = 'N'
+       AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET context_window_tokens = COALESCE(context_window_tokens,
+           GREATEST(262144, COALESCE(max_output_tokens, 0) + 1)),
+           max_output_tokens = COALESCE(max_output_tokens,
+           LEAST(32768, COALESCE(context_window_tokens, 262144) - 1)),
+           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens, 8192),
+           capacity_source = COALESCE(capacity_source, 'profile'),
+           capability_profile_version = COALESCE(capability_profile_version, 'silicon/kimi-k2.7-code@1')
+     WHERE LOWER(model_factory) = 'silicon'
+       AND model_name = 'moonshotai/Kimi-K2.7-Code'
+       AND delete_flag = 'N'
+       AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET context_window_tokens = COALESCE(context_window_tokens,
+           GREATEST(262144, COALESCE(max_output_tokens, 0) + 1)),
+           max_output_tokens = COALESCE(max_output_tokens,
+           LEAST(16384, COALESCE(context_window_tokens, 262144) - 1)),
+           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens, 4096),
+           capacity_source = COALESCE(capacity_source, 'profile'),
+           capability_profile_version = COALESCE(capability_profile_version, 'silicon/nex-n2-pro@1')
+     WHERE LOWER(model_factory) = 'silicon'
+       AND model_name = 'nex-agi/Nex-N2-Pro'
+       AND delete_flag = 'N'
+       AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET context_window_tokens = COALESCE(context_window_tokens,
+           GREATEST(262144, COALESCE(max_output_tokens, 0) + 1)),
+           max_output_tokens = COALESCE(max_output_tokens,
+           LEAST(16384, COALESCE(context_window_tokens, 262144) - 1)),
+           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens, 4096),
+           capacity_source = COALESCE(capacity_source, 'profile'),
+           capability_profile_version = COALESCE(capability_profile_version, 'silicon/step-3.5-flash@1')
+     WHERE LOWER(model_factory) = 'silicon'
+       AND model_name = 'stepfun-ai/Step-3.5-Flash'
+       AND delete_flag = 'N'
+       AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET context_window_tokens = COALESCE(context_window_tokens,
+           GREATEST(32768, COALESCE(max_output_tokens, 0) + 1)),
+           max_output_tokens = COALESCE(max_output_tokens,
+           LEAST(2048, COALESCE(context_window_tokens, 32768) - 1)),
+           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens, 1024),
+           capacity_source = COALESCE(capacity_source, 'profile'),
+           capability_profile_version = COALESCE(capability_profile_version, 'silicon/hunyuan-mt-7b@1')
+     WHERE LOWER(model_factory) = 'silicon'
+       AND model_name = 'tencent/Hunyuan-MT-7B'
+       AND delete_flag = 'N'
+       AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET context_window_tokens = COALESCE(context_window_tokens,
+           GREATEST(131072, COALESCE(max_output_tokens, 0) + 1)),
+           max_output_tokens = COALESCE(max_output_tokens,
+           LEAST(8192, COALESCE(context_window_tokens, 131072) - 1)),
+           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens, 4096),
+           capacity_source = COALESCE(capacity_source, 'profile'),
+           capability_profile_version = COALESCE(capability_profile_version, 'silicon/hunyuan-a13b-instruct@1')
+     WHERE LOWER(model_factory) = 'silicon'
+       AND model_name = 'tencent/Hunyuan-A13B-Instruct'
+       AND delete_flag = 'N'
+       AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    RAISE NOTICE 'Catalog backfill: % row(s) updated', v_total;
+END $$;
+
+-- ============================================================
+-- Phase 2: Safe defaults for remaining bare LLM/VLM rows
+-- ============================================================
+
+DO $$
+DECLARE
+    v_updated INTEGER := 0;
+BEGIN
+    UPDATE nexent.model_record_t
+       SET context_window_tokens = COALESCE(context_window_tokens,
+           GREATEST(32768, COALESCE(max_output_tokens, 0) + 1)),
+           max_output_tokens = COALESCE(max_output_tokens,
+           LEAST(4096, COALESCE(context_window_tokens, 32768) - 1)),
+           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens, 4096),
+           capacity_source = COALESCE(capacity_source, 'operator')
+     WHERE delete_flag = 'N'
+       AND COALESCE(model_type, 'llm') IN ('llm', 'vlm')
+       AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
+
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    RAISE NOTICE 'Safe defaults: % LLM/VLM row(s) backfilled', v_updated;
+END $$;
+
+-- ============================================================
+-- Phase 3: Reconcile legacy max_tokens with max_output_tokens
+-- ============================================================
+
+DO $$
+DECLARE
+    v_updated INTEGER := 0;
+BEGIN
+    UPDATE nexent.model_record_t
+       SET max_tokens = max_output_tokens
+     WHERE delete_flag = 'N'
+       AND max_output_tokens IS NOT NULL
+       AND COALESCE(max_tokens, -1) <> max_output_tokens
+       AND COALESCE(model_type, '') NOT IN ('embedding', 'multi_embedding');
+
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    RAISE NOTICE 'max_tokens reconcile: % row(s) updated', v_updated;
+END $$;
diff --git a/scripts/generate_backfill_sql.py b/scripts/generate_backfill_sql.py
new file mode 100644
index 000000000..a3451f16d
--- /dev/null
+++ b/scripts/generate_backfill_sql.py
@@ -0,0 +1,173 @@
+#!/usr/bin/env python3
+"""Generate idempotent backfill SQL from capability_profiles.CATALOG.
+
+Usage:
+    python scripts/generate_backfill_sql.py > docker/sql/v2.2.x_MMDD_backfill_from_catalog.sql
+
+Run whenever capability_profiles.py changes, then commit the generated SQL.
+"""
+import sys
+import os
+import types
+from datetime import date
+from collections import namedtuple
+
+_project_root = os.path.join(os.path.dirname(__file__), "..")
+sys.path.insert(0, os.path.join(_project_root, "backend"))
+
+# Stub SDK types to avoid pulling in the full nexent SDK dependency chain
+_nexent_stub = types.ModuleType("nexent")
+_nexent_core = types.ModuleType("nexent.core")
+_nexent_models = types.ModuleType("nexent.core.models")
+_nexent_resolver = types.ModuleType("nexent.core.models.capacity_resolver")
+
+ProfileKey = tuple
+
+class CapabilityProfile:
+    """Minimal stub that accepts any keyword arguments."""
+    def __init__(self, **kwargs):
+        for k, v in kwargs.items():
+            setattr(self, k, v)
+
+_nexent_resolver.ProfileKey = ProfileKey
+_nexent_resolver.CapabilityProfile = CapabilityProfile
+sys.modules["nexent"] = _nexent_stub
+sys.modules["nexent.core"] = _nexent_core
+sys.modules["nexent.core.models"] = _nexent_models
+sys.modules["nexent.core.models.capacity_resolver"] = _nexent_resolver
+
+from consts.capability_profiles import CATALOG, CATALOG_REVISION
+
+DEFAULT_CONTEXT_WINDOW = 32_768
+DEFAULT_MAX_OUTPUT = 4_096
+DEFAULT_RESERVE = 4_096
+
+
+def _sql_int(value: int) -> str:
+    return str(value)
+
+
+def _sql_str(value: str) -> str:
+    return value.replace("'", "''")
+
+
+def main() -> None:
+    today = date.today().strftime("%Y-%m-%d")
+    lines: list[str] = []
+
+    lines.append(f"-- Generated by scripts/generate_backfill_sql.py on {today}")
+    lines.append(f"-- Catalog revision: {CATALOG_REVISION}")
+    lines.append(f"-- Catalog entries: {len(CATALOG)}")
+    lines.append("--")
+    lines.append("-- Migration kind: RECOMMENDED_DATA_FIX")
+    lines.append("-- Idempotent: COALESCE protects existing non-NULL values.")
+    lines.append("-- Safe: enforces max_output < context_window via GREATEST/LEAST.")
+    lines.append("--")
+    lines.append("-- Pre-run self-check:")
+    lines.append("--")
+    lines.append("--   SELECT model_id, model_name, model_factory,")
+    lines.append("--          context_window_tokens, max_output_tokens")
+    lines.append("--     FROM nexent.model_record_t")
+    lines.append("--    WHERE delete_flag = 'N'")
+    lines.append("--      AND COALESCE(model_type, 'llm') IN ('llm', 'vlm')")
+    lines.append("--      AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);")
+    lines.append("--")
+    lines.append("-- If the result is empty, this migration is a no-op.")
+    lines.append("")
+
+    # Phase 1: catalog-driven backfill
+    lines.append("-- ============================================================")
+    lines.append("-- Phase 1: Backfill rows matching approved catalog entries")
+    lines.append("-- ============================================================")
+    lines.append("")
+    lines.append("DO $$")
+    lines.append("DECLARE")
+    lines.append("    v_updated INTEGER := 0;")
+    lines.append("    v_total   INTEGER := 0;")
+    lines.append("BEGIN")
+
+    from collections import defaultdict
+    by_provider: dict[str, list] = defaultdict(list)
+    for (provider, model_name), profile in CATALOG.items():
+        by_provider[provider].append((model_name, profile))
+
+    for provider in sorted(by_provider.keys()):
+        entries = by_provider[provider]
+        lines.append(f"    -- {provider} ({len(entries)} entries)")
+        for model_name, profile in entries:
+            ctx = profile.context_window_tokens
+            mout = profile.max_output_tokens
+            reserve = profile.default_output_reserve_tokens
+            version = _sql_str(profile.capability_profile_version)
+            escaped_model = _sql_str(model_name)
+
+            lines.append(f"    UPDATE nexent.model_record_t")
+            lines.append(f"       SET context_window_tokens = COALESCE(context_window_tokens,")
+            lines.append(f"           GREATEST({_sql_int(ctx)}, COALESCE(max_output_tokens, 0) + 1)),")
+            lines.append(f"           max_output_tokens = COALESCE(max_output_tokens,")
+            lines.append(f"           LEAST({_sql_int(mout)}, COALESCE(context_window_tokens, {_sql_int(ctx)}) - 1)),")
+            lines.append(f"           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens, {_sql_int(reserve)}),")
+            lines.append(f"           capacity_source = COALESCE(capacity_source, 'profile'),")
+            lines.append(f"           capability_profile_version = COALESCE(capability_profile_version, '{version}')")
+            lines.append(f"     WHERE LOWER(model_factory) = '{_sql_str(provider.lower())}'")
+            lines.append(f"       AND model_name = '{escaped_model}'")
+            lines.append(f"       AND delete_flag = 'N'")
+            lines.append(f"       AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);")
+            lines.append(f"    GET DIAGNOSTICS v_updated = ROW_COUNT;")
+            lines.append(f"    v_total := v_total + v_updated;")
+            lines.append("")
+
+    lines.append("    RAISE NOTICE 'Catalog backfill: % row(s) updated', v_total;")
+    lines.append("END $$;")
+    lines.append("")
+
+    # Phase 2: safe defaults for remaining bare rows
+    lines.append("-- ============================================================")
+    lines.append("-- Phase 2: Safe defaults for remaining bare LLM/VLM rows")
+    lines.append("-- ============================================================")
+    lines.append("")
+    lines.append("DO $$")
+    lines.append("DECLARE")
+    lines.append("    v_updated INTEGER := 0;")
+    lines.append("BEGIN")
+    lines.append("    UPDATE nexent.model_record_t")
+    lines.append(f"       SET context_window_tokens = COALESCE(context_window_tokens,")
+    lines.append(f"           GREATEST({_sql_int(DEFAULT_CONTEXT_WINDOW)}, COALESCE(max_output_tokens, 0) + 1)),")
+    lines.append(f"           max_output_tokens = COALESCE(max_output_tokens,")
+    lines.append(f"           LEAST({_sql_int(DEFAULT_MAX_OUTPUT)}, COALESCE(context_window_tokens, {_sql_int(DEFAULT_CONTEXT_WINDOW)}) - 1)),")
+    lines.append(f"           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens, {_sql_int(DEFAULT_RESERVE)}),")
+    lines.append(f"           capacity_source = COALESCE(capacity_source, 'operator')")
+    lines.append("     WHERE delete_flag = 'N'")
+    lines.append("       AND COALESCE(model_type, 'llm') IN ('llm', 'vlm')")
+    lines.append("       AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);")
+    lines.append("")
+    lines.append("    GET DIAGNOSTICS v_updated = ROW_COUNT;")
+    lines.append("    RAISE NOTICE 'Safe defaults: % LLM/VLM row(s) backfilled', v_updated;")
+    lines.append("END $$;")
+    lines.append("")
+
+    # Phase 3: reconcile max_tokens
+    lines.append("-- ============================================================")
+    lines.append("-- Phase 3: Reconcile legacy max_tokens with max_output_tokens")
+    lines.append("-- ============================================================")
+    lines.append("")
+    lines.append("DO $$")
+    lines.append("DECLARE")
+    lines.append("    v_updated INTEGER := 0;")
+    lines.append("BEGIN")
+    lines.append("    UPDATE nexent.model_record_t")
+    lines.append("       SET max_tokens = max_output_tokens")
+    lines.append("     WHERE delete_flag = 'N'")
+    lines.append("       AND max_output_tokens IS NOT NULL")
+    lines.append("       AND COALESCE(max_tokens, -1) <> max_output_tokens")
+    lines.append("       AND COALESCE(model_type, '') NOT IN ('embedding', 'multi_embedding');")
+    lines.append("")
+    lines.append("    GET DIAGNOSTICS v_updated = ROW_COUNT;")
+    lines.append("    RAISE NOTICE 'max_tokens reconcile: % row(s) updated', v_updated;")
+    lines.append("END $$;")
+
+    print("\n".join(lines))
+
+
+if __name__ == "__main__":
+    main()

From a9550fa6ac7fd91b65ee7dbc7232797ee6bad8af Mon Sep 17 00:00:00 2001
From: wuyuanfr <18270469842@163.com>
Date: Sat, 27 Jun 2026 11:12:54 +0800
Subject: [PATCH 16/31] fix(w11): add reserve <= max_output safety guard to
 backfill SQL

Add Phase 4 to generated backfill SQL that clamps
default_output_reserve_tokens to max_output_tokens when reserve
exceeds max_output. This prevents RequestedOutputExceedsCap errors
at runtime that silently disable W2 capacity enforcement.

Also add LEAST guard to Phase 1 and Phase 2 so newly filled reserve
values never exceed the actual max_output_tokens.

Verified: Phase 4 fixed 1 existing row with reserve > max_output.
---
 .../sql/v2.2.2_0627_backfill_from_catalog.sql | 220 ++++++++++++------
 scripts/generate_backfill_sql.py              |  27 ++-
 2 files changed, 178 insertions(+), 69 deletions(-)

diff --git a/docker/sql/v2.2.2_0627_backfill_from_catalog.sql b/docker/sql/v2.2.2_0627_backfill_from_catalog.sql
index c40ef8513..22bd82870 100644
--- a/docker/sql/v2.2.2_0627_backfill_from_catalog.sql
+++ b/docker/sql/v2.2.2_0627_backfill_from_catalog.sql
@@ -32,7 +32,8 @@ BEGIN
            GREATEST(131072, COALESCE(max_output_tokens, 0) + 1)),
            max_output_tokens = COALESCE(max_output_tokens,
            LEAST(16384, COALESCE(context_window_tokens, 131072) - 1)),
-           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens, 4096),
+           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
+           LEAST(4096, COALESCE(max_output_tokens, 16384))),
            capacity_source = COALESCE(capacity_source, 'profile'),
            capability_profile_version = COALESCE(capability_profile_version, 'dashscope/qwen-plus@1')
      WHERE LOWER(model_factory) = 'dashscope'
@@ -47,7 +48,8 @@ BEGIN
            GREATEST(1000000, COALESCE(max_output_tokens, 0) + 1)),
            max_output_tokens = COALESCE(max_output_tokens,
            LEAST(16384, COALESCE(context_window_tokens, 1000000) - 1)),
-           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens, 4096),
+           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
+           LEAST(4096, COALESCE(max_output_tokens, 16384))),
            capacity_source = COALESCE(capacity_source, 'profile'),
            capability_profile_version = COALESCE(capability_profile_version, 'dashscope/qwen-turbo@1')
      WHERE LOWER(model_factory) = 'dashscope'
@@ -62,7 +64,8 @@ BEGIN
            GREATEST(1000000, COALESCE(max_output_tokens, 0) + 1)),
            max_output_tokens = COALESCE(max_output_tokens,
            LEAST(65536, COALESCE(context_window_tokens, 1000000) - 1)),
-           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens, 8192),
+           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
+           LEAST(8192, COALESCE(max_output_tokens, 65536))),
            capacity_source = COALESCE(capacity_source, 'profile'),
            capability_profile_version = COALESCE(capability_profile_version, 'dashscope/qwen3.7-max@1')
      WHERE LOWER(model_factory) = 'dashscope'
@@ -77,7 +80,8 @@ BEGIN
            GREATEST(200000, COALESCE(max_output_tokens, 0) + 1)),
            max_output_tokens = COALESCE(max_output_tokens,
            LEAST(131072, COALESCE(context_window_tokens, 200000) - 1)),
-           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens, 8192),
+           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
+           LEAST(8192, COALESCE(max_output_tokens, 131072))),
            capacity_source = COALESCE(capacity_source, 'profile'),
            capability_profile_version = COALESCE(capability_profile_version, 'dashscope/glm-5.1@1')
      WHERE LOWER(model_factory) = 'dashscope'
@@ -93,7 +97,8 @@ BEGIN
            GREATEST(1000000, COALESCE(max_output_tokens, 0) + 1)),
            max_output_tokens = COALESCE(max_output_tokens,
            LEAST(384000, COALESCE(context_window_tokens, 1000000) - 1)),
-           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens, 8192),
+           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
+           LEAST(8192, COALESCE(max_output_tokens, 384000))),
            capacity_source = COALESCE(capacity_source, 'profile'),
            capability_profile_version = COALESCE(capability_profile_version, 'deepseek/deepseek-chat@2')
      WHERE LOWER(model_factory) = 'deepseek'
@@ -108,7 +113,8 @@ BEGIN
            GREATEST(1000000, COALESCE(max_output_tokens, 0) + 1)),
            max_output_tokens = COALESCE(max_output_tokens,
            LEAST(384000, COALESCE(context_window_tokens, 1000000) - 1)),
-           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens, 8192),
+           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
+           LEAST(8192, COALESCE(max_output_tokens, 384000))),
            capacity_source = COALESCE(capacity_source, 'profile'),
            capability_profile_version = COALESCE(capability_profile_version, 'deepseek/deepseek-reasoner@2')
      WHERE LOWER(model_factory) = 'deepseek'
@@ -123,7 +129,8 @@ BEGIN
            GREATEST(1000000, COALESCE(max_output_tokens, 0) + 1)),
            max_output_tokens = COALESCE(max_output_tokens,
            LEAST(384000, COALESCE(context_window_tokens, 1000000) - 1)),
-           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens, 8192),
+           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
+           LEAST(8192, COALESCE(max_output_tokens, 384000))),
            capacity_source = COALESCE(capacity_source, 'profile'),
            capability_profile_version = COALESCE(capability_profile_version, 'deepseek/deepseek-v4-flash@1')
      WHERE LOWER(model_factory) = 'deepseek'
@@ -138,7 +145,8 @@ BEGIN
            GREATEST(1000000, COALESCE(max_output_tokens, 0) + 1)),
            max_output_tokens = COALESCE(max_output_tokens,
            LEAST(384000, COALESCE(context_window_tokens, 1000000) - 1)),
-           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens, 8192),
+           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
+           LEAST(8192, COALESCE(max_output_tokens, 384000))),
            capacity_source = COALESCE(capacity_source, 'profile'),
            capability_profile_version = COALESCE(capability_profile_version, 'deepseek/deepseek-v4-pro@1')
      WHERE LOWER(model_factory) = 'deepseek'
@@ -153,7 +161,8 @@ BEGIN
            GREATEST(1048576, COALESCE(max_output_tokens, 0) + 1)),
            max_output_tokens = COALESCE(max_output_tokens,
            LEAST(384000, COALESCE(context_window_tokens, 1048576) - 1)),
-           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens, 8192),
+           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
+           LEAST(8192, COALESCE(max_output_tokens, 384000))),
            capacity_source = COALESCE(capacity_source, 'profile'),
            capability_profile_version = COALESCE(capability_profile_version, 'deepseek/deepseek-v4-pro-sf@1')
      WHERE LOWER(model_factory) = 'deepseek'
@@ -168,7 +177,8 @@ BEGIN
            GREATEST(1048576, COALESCE(max_output_tokens, 0) + 1)),
            max_output_tokens = COALESCE(max_output_tokens,
            LEAST(384000, COALESCE(context_window_tokens, 1048576) - 1)),
-           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens, 8192),
+           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
+           LEAST(8192, COALESCE(max_output_tokens, 384000))),
            capacity_source = COALESCE(capacity_source, 'profile'),
            capability_profile_version = COALESCE(capability_profile_version, 'deepseek/deepseek-v4-flash-sf@1')
      WHERE LOWER(model_factory) = 'deepseek'
@@ -183,7 +193,8 @@ BEGIN
            GREATEST(164000, COALESCE(max_output_tokens, 0) + 1)),
            max_output_tokens = COALESCE(max_output_tokens,
            LEAST(8192, COALESCE(context_window_tokens, 164000) - 1)),
-           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens, 4096),
+           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
+           LEAST(4096, COALESCE(max_output_tokens, 8192))),
            capacity_source = COALESCE(capacity_source, 'profile'),
            capability_profile_version = COALESCE(capability_profile_version, 'deepseek/deepseek-v3.2@1')
      WHERE LOWER(model_factory) = 'deepseek'
@@ -198,7 +209,8 @@ BEGIN
            GREATEST(164000, COALESCE(max_output_tokens, 0) + 1)),
            max_output_tokens = COALESCE(max_output_tokens,
            LEAST(8192, COALESCE(context_window_tokens, 164000) - 1)),
-           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens, 4096),
+           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
+           LEAST(4096, COALESCE(max_output_tokens, 8192))),
            capacity_source = COALESCE(capacity_source, 'profile'),
            capability_profile_version = COALESCE(capability_profile_version, 'deepseek/deepseek-v3.1-terminus@1')
      WHERE LOWER(model_factory) = 'deepseek'
@@ -213,7 +225,8 @@ BEGIN
            GREATEST(163840, COALESCE(max_output_tokens, 0) + 1)),
            max_output_tokens = COALESCE(max_output_tokens,
            LEAST(16384, COALESCE(context_window_tokens, 163840) - 1)),
-           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens, 8192),
+           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
+           LEAST(8192, COALESCE(max_output_tokens, 16384))),
            capacity_source = COALESCE(capacity_source, 'profile'),
            capability_profile_version = COALESCE(capability_profile_version, 'deepseek/deepseek-r1@1')
      WHERE LOWER(model_factory) = 'deepseek'
@@ -228,7 +241,8 @@ BEGIN
            GREATEST(164000, COALESCE(max_output_tokens, 0) + 1)),
            max_output_tokens = COALESCE(max_output_tokens,
            LEAST(8192, COALESCE(context_window_tokens, 164000) - 1)),
-           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens, 4096),
+           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
+           LEAST(4096, COALESCE(max_output_tokens, 8192))),
            capacity_source = COALESCE(capacity_source, 'profile'),
            capability_profile_version = COALESCE(capability_profile_version, 'deepseek/deepseek-v3@1')
      WHERE LOWER(model_factory) = 'deepseek'
@@ -243,7 +257,8 @@ BEGIN
            GREATEST(131072, COALESCE(max_output_tokens, 0) + 1)),
            max_output_tokens = COALESCE(max_output_tokens,
            LEAST(16384, COALESCE(context_window_tokens, 131072) - 1)),
-           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens, 4096),
+           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
+           LEAST(4096, COALESCE(max_output_tokens, 16384))),
            capacity_source = COALESCE(capacity_source, 'profile'),
            capability_profile_version = COALESCE(capability_profile_version, 'deepseek/deepseek-r1-0528-qwen3-8b@1')
      WHERE LOWER(model_factory) = 'deepseek'
@@ -258,7 +273,8 @@ BEGIN
            GREATEST(164000, COALESCE(max_output_tokens, 0) + 1)),
            max_output_tokens = COALESCE(max_output_tokens,
            LEAST(8192, COALESCE(context_window_tokens, 164000) - 1)),
-           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens, 4096),
+           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
+           LEAST(4096, COALESCE(max_output_tokens, 8192))),
            capacity_source = COALESCE(capacity_source, 'profile'),
            capability_profile_version = COALESCE(capability_profile_version, 'deepseek/deepseek-v3.2-pro@1')
      WHERE LOWER(model_factory) = 'deepseek'
@@ -273,7 +289,8 @@ BEGIN
            GREATEST(164000, COALESCE(max_output_tokens, 0) + 1)),
            max_output_tokens = COALESCE(max_output_tokens,
            LEAST(8192, COALESCE(context_window_tokens, 164000) - 1)),
-           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens, 4096),
+           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
+           LEAST(4096, COALESCE(max_output_tokens, 8192))),
            capacity_source = COALESCE(capacity_source, 'profile'),
            capability_profile_version = COALESCE(capability_profile_version, 'deepseek/deepseek-v3.1-terminus-pro@1')
      WHERE LOWER(model_factory) = 'deepseek'
@@ -288,7 +305,8 @@ BEGIN
            GREATEST(163840, COALESCE(max_output_tokens, 0) + 1)),
            max_output_tokens = COALESCE(max_output_tokens,
            LEAST(16384, COALESCE(context_window_tokens, 163840) - 1)),
-           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens, 8192),
+           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
+           LEAST(8192, COALESCE(max_output_tokens, 16384))),
            capacity_source = COALESCE(capacity_source, 'profile'),
            capability_profile_version = COALESCE(capability_profile_version, 'deepseek/deepseek-r1-pro@1')
      WHERE LOWER(model_factory) = 'deepseek'
@@ -303,7 +321,8 @@ BEGIN
            GREATEST(164000, COALESCE(max_output_tokens, 0) + 1)),
            max_output_tokens = COALESCE(max_output_tokens,
            LEAST(8192, COALESCE(context_window_tokens, 164000) - 1)),
-           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens, 4096),
+           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
+           LEAST(4096, COALESCE(max_output_tokens, 8192))),
            capacity_source = COALESCE(capacity_source, 'profile'),
            capability_profile_version = COALESCE(capability_profile_version, 'deepseek/deepseek-v3-pro@1')
      WHERE LOWER(model_factory) = 'deepseek'
@@ -319,7 +338,8 @@ BEGIN
            GREATEST(128000, COALESCE(max_output_tokens, 0) + 1)),
            max_output_tokens = COALESCE(max_output_tokens,
            LEAST(16384, COALESCE(context_window_tokens, 128000) - 1)),
-           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens, 4096),
+           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
+           LEAST(4096, COALESCE(max_output_tokens, 16384))),
            capacity_source = COALESCE(capacity_source, 'profile'),
            capability_profile_version = COALESCE(capability_profile_version, 'openai/gpt-4o@1')
      WHERE LOWER(model_factory) = 'openai'
@@ -334,7 +354,8 @@ BEGIN
            GREATEST(1000000, COALESCE(max_output_tokens, 0) + 1)),
            max_output_tokens = COALESCE(max_output_tokens,
            LEAST(32768, COALESCE(context_window_tokens, 1000000) - 1)),
-           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens, 8192),
+           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
+           LEAST(8192, COALESCE(max_output_tokens, 32768))),
            capacity_source = COALESCE(capacity_source, 'profile'),
            capability_profile_version = COALESCE(capability_profile_version, 'openai/gpt-4.1@1')
      WHERE LOWER(model_factory) = 'openai'
@@ -350,7 +371,8 @@ BEGIN
            GREATEST(262144, COALESCE(max_output_tokens, 0) + 1)),
            max_output_tokens = COALESCE(max_output_tokens,
            LEAST(65536, COALESCE(context_window_tokens, 262144) - 1)),
-           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens, 8192),
+           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
+           LEAST(8192, COALESCE(max_output_tokens, 65536))),
            capacity_source = COALESCE(capacity_source, 'profile'),
            capability_profile_version = COALESCE(capability_profile_version, 'silicon/qwen3.6-27b@1')
      WHERE LOWER(model_factory) = 'silicon'
@@ -365,7 +387,8 @@ BEGIN
            GREATEST(262144, COALESCE(max_output_tokens, 0) + 1)),
            max_output_tokens = COALESCE(max_output_tokens,
            LEAST(131072, COALESCE(context_window_tokens, 262144) - 1)),
-           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens, 8192),
+           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
+           LEAST(8192, COALESCE(max_output_tokens, 131072))),
            capacity_source = COALESCE(capacity_source, 'profile'),
            capability_profile_version = COALESCE(capability_profile_version, 'silicon/kimi-k2.6@1')
      WHERE LOWER(model_factory) = 'silicon'
@@ -380,7 +403,8 @@ BEGIN
            GREATEST(262144, COALESCE(max_output_tokens, 0) + 1)),
            max_output_tokens = COALESCE(max_output_tokens,
            LEAST(16384, COALESCE(context_window_tokens, 262144) - 1)),
-           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens, 4096),
+           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
+           LEAST(4096, COALESCE(max_output_tokens, 16384))),
            capacity_source = COALESCE(capacity_source, 'profile'),
            capability_profile_version = COALESCE(capability_profile_version, 'silicon/qwen3.6-35b-a3b@1')
      WHERE LOWER(model_factory) = 'silicon'
@@ -395,7 +419,8 @@ BEGIN
            GREATEST(262144, COALESCE(max_output_tokens, 0) + 1)),
            max_output_tokens = COALESCE(max_output_tokens,
            LEAST(16384, COALESCE(context_window_tokens, 262144) - 1)),
-           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens, 4096),
+           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
+           LEAST(4096, COALESCE(max_output_tokens, 16384))),
            capacity_source = COALESCE(capacity_source, 'profile'),
            capability_profile_version = COALESCE(capability_profile_version, 'silicon/qwen3.5-397b-a17b@1')
      WHERE LOWER(model_factory) = 'silicon'
@@ -410,7 +435,8 @@ BEGIN
            GREATEST(262144, COALESCE(max_output_tokens, 0) + 1)),
            max_output_tokens = COALESCE(max_output_tokens,
            LEAST(16384, COALESCE(context_window_tokens, 262144) - 1)),
-           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens, 4096),
+           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
+           LEAST(4096, COALESCE(max_output_tokens, 16384))),
            capacity_source = COALESCE(capacity_source, 'profile'),
            capability_profile_version = COALESCE(capability_profile_version, 'silicon/qwen3.5-122b-a10b@1')
      WHERE LOWER(model_factory) = 'silicon'
@@ -425,7 +451,8 @@ BEGIN
            GREATEST(262144, COALESCE(max_output_tokens, 0) + 1)),
            max_output_tokens = COALESCE(max_output_tokens,
            LEAST(16384, COALESCE(context_window_tokens, 262144) - 1)),
-           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens, 4096),
+           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
+           LEAST(4096, COALESCE(max_output_tokens, 16384))),
            capacity_source = COALESCE(capacity_source, 'profile'),
            capability_profile_version = COALESCE(capability_profile_version, 'silicon/qwen3.5-35b-a3b@1')
      WHERE LOWER(model_factory) = 'silicon'
@@ -440,7 +467,8 @@ BEGIN
            GREATEST(262144, COALESCE(max_output_tokens, 0) + 1)),
            max_output_tokens = COALESCE(max_output_tokens,
            LEAST(16384, COALESCE(context_window_tokens, 262144) - 1)),
-           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens, 4096),
+           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
+           LEAST(4096, COALESCE(max_output_tokens, 16384))),
            capacity_source = COALESCE(capacity_source, 'profile'),
            capability_profile_version = COALESCE(capability_profile_version, 'silicon/qwen3.5-27b@1')
      WHERE LOWER(model_factory) = 'silicon'
@@ -455,7 +483,8 @@ BEGIN
            GREATEST(262144, COALESCE(max_output_tokens, 0) + 1)),
            max_output_tokens = COALESCE(max_output_tokens,
            LEAST(16384, COALESCE(context_window_tokens, 262144) - 1)),
-           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens, 4096),
+           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
+           LEAST(4096, COALESCE(max_output_tokens, 16384))),
            capacity_source = COALESCE(capacity_source, 'profile'),
            capability_profile_version = COALESCE(capability_profile_version, 'silicon/qwen3.5-9b@1')
      WHERE LOWER(model_factory) = 'silicon'
@@ -470,7 +499,8 @@ BEGIN
            GREATEST(262144, COALESCE(max_output_tokens, 0) + 1)),
            max_output_tokens = COALESCE(max_output_tokens,
            LEAST(16384, COALESCE(context_window_tokens, 262144) - 1)),
-           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens, 4096),
+           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
+           LEAST(4096, COALESCE(max_output_tokens, 16384))),
            capacity_source = COALESCE(capacity_source, 'profile'),
            capability_profile_version = COALESCE(capability_profile_version, 'silicon/qwen3.5-4b@1')
      WHERE LOWER(model_factory) = 'silicon'
@@ -485,7 +515,8 @@ BEGIN
            GREATEST(262144, COALESCE(max_output_tokens, 0) + 1)),
            max_output_tokens = COALESCE(max_output_tokens,
            LEAST(16384, COALESCE(context_window_tokens, 262144) - 1)),
-           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens, 4096),
+           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
+           LEAST(4096, COALESCE(max_output_tokens, 16384))),
            capacity_source = COALESCE(capacity_source, 'profile'),
            capability_profile_version = COALESCE(capability_profile_version, 'silicon/qwen3-vl-32b-instruct@1')
      WHERE LOWER(model_factory) = 'silicon'
@@ -500,7 +531,8 @@ BEGIN
            GREATEST(262144, COALESCE(max_output_tokens, 0) + 1)),
            max_output_tokens = COALESCE(max_output_tokens,
            LEAST(32768, COALESCE(context_window_tokens, 262144) - 1)),
-           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens, 8192),
+           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
+           LEAST(8192, COALESCE(max_output_tokens, 32768))),
            capacity_source = COALESCE(capacity_source, 'profile'),
            capability_profile_version = COALESCE(capability_profile_version, 'silicon/qwen3-vl-32b-thinking@1')
      WHERE LOWER(model_factory) = 'silicon'
@@ -515,7 +547,8 @@ BEGIN
            GREATEST(262144, COALESCE(max_output_tokens, 0) + 1)),
            max_output_tokens = COALESCE(max_output_tokens,
            LEAST(16384, COALESCE(context_window_tokens, 262144) - 1)),
-           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens, 4096),
+           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
+           LEAST(4096, COALESCE(max_output_tokens, 16384))),
            capacity_source = COALESCE(capacity_source, 'profile'),
            capability_profile_version = COALESCE(capability_profile_version, 'silicon/qwen3-vl-8b-instruct@1')
      WHERE LOWER(model_factory) = 'silicon'
@@ -530,7 +563,8 @@ BEGIN
            GREATEST(262144, COALESCE(max_output_tokens, 0) + 1)),
            max_output_tokens = COALESCE(max_output_tokens,
            LEAST(32768, COALESCE(context_window_tokens, 262144) - 1)),
-           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens, 8192),
+           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
+           LEAST(8192, COALESCE(max_output_tokens, 32768))),
            capacity_source = COALESCE(capacity_source, 'profile'),
            capability_profile_version = COALESCE(capability_profile_version, 'silicon/qwen3-vl-8b-thinking@1')
      WHERE LOWER(model_factory) = 'silicon'
@@ -545,7 +579,8 @@ BEGIN
            GREATEST(262144, COALESCE(max_output_tokens, 0) + 1)),
            max_output_tokens = COALESCE(max_output_tokens,
            LEAST(16384, COALESCE(context_window_tokens, 262144) - 1)),
-           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens, 4096),
+           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
+           LEAST(4096, COALESCE(max_output_tokens, 16384))),
            capacity_source = COALESCE(capacity_source, 'profile'),
            capability_profile_version = COALESCE(capability_profile_version, 'silicon/qwen3-vl-30b-a3b-instruct@1')
      WHERE LOWER(model_factory) = 'silicon'
@@ -560,7 +595,8 @@ BEGIN
            GREATEST(262144, COALESCE(max_output_tokens, 0) + 1)),
            max_output_tokens = COALESCE(max_output_tokens,
            LEAST(32768, COALESCE(context_window_tokens, 262144) - 1)),
-           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens, 8192),
+           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
+           LEAST(8192, COALESCE(max_output_tokens, 32768))),
            capacity_source = COALESCE(capacity_source, 'profile'),
            capability_profile_version = COALESCE(capability_profile_version, 'silicon/qwen3-vl-30b-a3b-thinking@1')
      WHERE LOWER(model_factory) = 'silicon'
@@ -575,7 +611,8 @@ BEGIN
            GREATEST(32768, COALESCE(max_output_tokens, 0) + 1)),
            max_output_tokens = COALESCE(max_output_tokens,
            LEAST(16384, COALESCE(context_window_tokens, 32768) - 1)),
-           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens, 4096),
+           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
+           LEAST(4096, COALESCE(max_output_tokens, 16384))),
            capacity_source = COALESCE(capacity_source, 'profile'),
            capability_profile_version = COALESCE(capability_profile_version, 'silicon/qwen3-omni-30b-a3b-instruct@1')
      WHERE LOWER(model_factory) = 'silicon'
@@ -590,7 +627,8 @@ BEGIN
            GREATEST(32768, COALESCE(max_output_tokens, 0) + 1)),
            max_output_tokens = COALESCE(max_output_tokens,
            LEAST(16384, COALESCE(context_window_tokens, 32768) - 1)),
-           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens, 4096),
+           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
+           LEAST(4096, COALESCE(max_output_tokens, 16384))),
            capacity_source = COALESCE(capacity_source, 'profile'),
            capability_profile_version = COALESCE(capability_profile_version, 'silicon/qwen3-omni-30b-a3b-thinking@1')
      WHERE LOWER(model_factory) = 'silicon'
@@ -605,7 +643,8 @@ BEGIN
            GREATEST(32768, COALESCE(max_output_tokens, 0) + 1)),
            max_output_tokens = COALESCE(max_output_tokens,
            LEAST(16384, COALESCE(context_window_tokens, 32768) - 1)),
-           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens, 4096),
+           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
+           LEAST(4096, COALESCE(max_output_tokens, 16384))),
            capacity_source = COALESCE(capacity_source, 'profile'),
            capability_profile_version = COALESCE(capability_profile_version, 'silicon/qwen3-omni-30b-a3b-captioner@1')
      WHERE LOWER(model_factory) = 'silicon'
@@ -620,7 +659,8 @@ BEGIN
            GREATEST(262144, COALESCE(max_output_tokens, 0) + 1)),
            max_output_tokens = COALESCE(max_output_tokens,
            LEAST(65536, COALESCE(context_window_tokens, 262144) - 1)),
-           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens, 8192),
+           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
+           LEAST(8192, COALESCE(max_output_tokens, 65536))),
            capacity_source = COALESCE(capacity_source, 'profile'),
            capability_profile_version = COALESCE(capability_profile_version, 'silicon/qwen3-coder-30b-a3b-instruct@1')
      WHERE LOWER(model_factory) = 'silicon'
@@ -635,7 +675,8 @@ BEGIN
            GREATEST(262144, COALESCE(max_output_tokens, 0) + 1)),
            max_output_tokens = COALESCE(max_output_tokens,
            LEAST(16384, COALESCE(context_window_tokens, 262144) - 1)),
-           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens, 4096),
+           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
+           LEAST(4096, COALESCE(max_output_tokens, 16384))),
            capacity_source = COALESCE(capacity_source, 'profile'),
            capability_profile_version = COALESCE(capability_profile_version, 'silicon/qwen3-30b-a3b-instruct-2507@1')
      WHERE LOWER(model_factory) = 'silicon'
@@ -650,7 +691,8 @@ BEGIN
            GREATEST(131072, COALESCE(max_output_tokens, 0) + 1)),
            max_output_tokens = COALESCE(max_output_tokens,
            LEAST(16384, COALESCE(context_window_tokens, 131072) - 1)),
-           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens, 4096),
+           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
+           LEAST(4096, COALESCE(max_output_tokens, 16384))),
            capacity_source = COALESCE(capacity_source, 'profile'),
            capability_profile_version = COALESCE(capability_profile_version, 'silicon/qwen3-32b@1')
      WHERE LOWER(model_factory) = 'silicon'
@@ -665,7 +707,8 @@ BEGIN
            GREATEST(131072, COALESCE(max_output_tokens, 0) + 1)),
            max_output_tokens = COALESCE(max_output_tokens,
            LEAST(16384, COALESCE(context_window_tokens, 131072) - 1)),
-           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens, 4096),
+           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
+           LEAST(4096, COALESCE(max_output_tokens, 16384))),
            capacity_source = COALESCE(capacity_source, 'profile'),
            capability_profile_version = COALESCE(capability_profile_version, 'silicon/qwen3-14b@1')
      WHERE LOWER(model_factory) = 'silicon'
@@ -680,7 +723,8 @@ BEGIN
            GREATEST(131072, COALESCE(max_output_tokens, 0) + 1)),
            max_output_tokens = COALESCE(max_output_tokens,
            LEAST(16384, COALESCE(context_window_tokens, 131072) - 1)),
-           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens, 4096),
+           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
+           LEAST(4096, COALESCE(max_output_tokens, 16384))),
            capacity_source = COALESCE(capacity_source, 'profile'),
            capability_profile_version = COALESCE(capability_profile_version, 'silicon/qwen3-8b@1')
      WHERE LOWER(model_factory) = 'silicon'
@@ -695,7 +739,8 @@ BEGIN
            GREATEST(131072, COALESCE(max_output_tokens, 0) + 1)),
            max_output_tokens = COALESCE(max_output_tokens,
            LEAST(8192, COALESCE(context_window_tokens, 131072) - 1)),
-           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens, 4096),
+           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
+           LEAST(4096, COALESCE(max_output_tokens, 8192))),
            capacity_source = COALESCE(capacity_source, 'profile'),
            capability_profile_version = COALESCE(capability_profile_version, 'silicon/qwen2.5-72b-instruct-128k@1')
      WHERE LOWER(model_factory) = 'silicon'
@@ -710,7 +755,8 @@ BEGIN
            GREATEST(32768, COALESCE(max_output_tokens, 0) + 1)),
            max_output_tokens = COALESCE(max_output_tokens,
            LEAST(8192, COALESCE(context_window_tokens, 32768) - 1)),
-           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens, 4096),
+           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
+           LEAST(4096, COALESCE(max_output_tokens, 8192))),
            capacity_source = COALESCE(capacity_source, 'profile'),
            capability_profile_version = COALESCE(capability_profile_version, 'silicon/qwen2.5-72b-instruct@1')
      WHERE LOWER(model_factory) = 'silicon'
@@ -725,7 +771,8 @@ BEGIN
            GREATEST(32768, COALESCE(max_output_tokens, 0) + 1)),
            max_output_tokens = COALESCE(max_output_tokens,
            LEAST(8192, COALESCE(context_window_tokens, 32768) - 1)),
-           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens, 4096),
+           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
+           LEAST(4096, COALESCE(max_output_tokens, 8192))),
            capacity_source = COALESCE(capacity_source, 'profile'),
            capability_profile_version = COALESCE(capability_profile_version, 'silicon/qwen2.5-32b-instruct@1')
      WHERE LOWER(model_factory) = 'silicon'
@@ -740,7 +787,8 @@ BEGIN
            GREATEST(32768, COALESCE(max_output_tokens, 0) + 1)),
            max_output_tokens = COALESCE(max_output_tokens,
            LEAST(8192, COALESCE(context_window_tokens, 32768) - 1)),
-           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens, 4096),
+           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
+           LEAST(4096, COALESCE(max_output_tokens, 8192))),
            capacity_source = COALESCE(capacity_source, 'profile'),
            capability_profile_version = COALESCE(capability_profile_version, 'silicon/qwen2.5-14b-instruct@1')
      WHERE LOWER(model_factory) = 'silicon'
@@ -755,7 +803,8 @@ BEGIN
            GREATEST(32768, COALESCE(max_output_tokens, 0) + 1)),
            max_output_tokens = COALESCE(max_output_tokens,
            LEAST(8192, COALESCE(context_window_tokens, 32768) - 1)),
-           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens, 4096),
+           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
+           LEAST(4096, COALESCE(max_output_tokens, 8192))),
            capacity_source = COALESCE(capacity_source, 'profile'),
            capability_profile_version = COALESCE(capability_profile_version, 'silicon/qwen2.5-7b-instruct@1')
      WHERE LOWER(model_factory) = 'silicon'
@@ -770,7 +819,8 @@ BEGIN
            GREATEST(32768, COALESCE(max_output_tokens, 0) + 1)),
            max_output_tokens = COALESCE(max_output_tokens,
            LEAST(8192, COALESCE(context_window_tokens, 32768) - 1)),
-           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens, 4096),
+           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
+           LEAST(4096, COALESCE(max_output_tokens, 8192))),
            capacity_source = COALESCE(capacity_source, 'profile'),
            capability_profile_version = COALESCE(capability_profile_version, 'silicon/glm-4-32b-0414@1')
      WHERE LOWER(model_factory) = 'silicon'
@@ -785,7 +835,8 @@ BEGIN
            GREATEST(32768, COALESCE(max_output_tokens, 0) + 1)),
            max_output_tokens = COALESCE(max_output_tokens,
            LEAST(8192, COALESCE(context_window_tokens, 32768) - 1)),
-           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens, 4096),
+           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
+           LEAST(4096, COALESCE(max_output_tokens, 8192))),
            capacity_source = COALESCE(capacity_source, 'profile'),
            capability_profile_version = COALESCE(capability_profile_version, 'silicon/glm-z1-9b-0414@1')
      WHERE LOWER(model_factory) = 'silicon'
@@ -800,7 +851,8 @@ BEGIN
            GREATEST(32768, COALESCE(max_output_tokens, 0) + 1)),
            max_output_tokens = COALESCE(max_output_tokens,
            LEAST(8192, COALESCE(context_window_tokens, 32768) - 1)),
-           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens, 4096),
+           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
+           LEAST(4096, COALESCE(max_output_tokens, 8192))),
            capacity_source = COALESCE(capacity_source, 'profile'),
            capability_profile_version = COALESCE(capability_profile_version, 'silicon/glm-4-9b-0414@1')
      WHERE LOWER(model_factory) = 'silicon'
@@ -815,7 +867,8 @@ BEGIN
            GREATEST(1048576, COALESCE(max_output_tokens, 0) + 1)),
            max_output_tokens = COALESCE(max_output_tokens,
            LEAST(131072, COALESCE(context_window_tokens, 1048576) - 1)),
-           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens, 8192),
+           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
+           LEAST(8192, COALESCE(max_output_tokens, 131072))),
            capacity_source = COALESCE(capacity_source, 'profile'),
            capability_profile_version = COALESCE(capability_profile_version, 'silicon/glm-5.2@1')
      WHERE LOWER(model_factory) = 'silicon'
@@ -830,7 +883,8 @@ BEGIN
            GREATEST(131072, COALESCE(max_output_tokens, 0) + 1)),
            max_output_tokens = COALESCE(max_output_tokens,
            LEAST(16384, COALESCE(context_window_tokens, 131072) - 1)),
-           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens, 4096),
+           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
+           LEAST(4096, COALESCE(max_output_tokens, 16384))),
            capacity_source = COALESCE(capacity_source, 'profile'),
            capability_profile_version = COALESCE(capability_profile_version, 'silicon/glm-4.5v@1')
      WHERE LOWER(model_factory) = 'silicon'
@@ -845,7 +899,8 @@ BEGIN
            GREATEST(131072, COALESCE(max_output_tokens, 0) + 1)),
            max_output_tokens = COALESCE(max_output_tokens,
            LEAST(16384, COALESCE(context_window_tokens, 131072) - 1)),
-           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens, 4096),
+           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
+           LEAST(4096, COALESCE(max_output_tokens, 16384))),
            capacity_source = COALESCE(capacity_source, 'profile'),
            capability_profile_version = COALESCE(capability_profile_version, 'silicon/glm-4.5-air@1')
      WHERE LOWER(model_factory) = 'silicon'
@@ -860,7 +915,8 @@ BEGIN
            GREATEST(202752, COALESCE(max_output_tokens, 0) + 1)),
            max_output_tokens = COALESCE(max_output_tokens,
            LEAST(131072, COALESCE(context_window_tokens, 202752) - 1)),
-           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens, 8192),
+           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
+           LEAST(8192, COALESCE(max_output_tokens, 131072))),
            capacity_source = COALESCE(capacity_source, 'profile'),
            capability_profile_version = COALESCE(capability_profile_version, 'silicon/glm-5.1-pro@1')
      WHERE LOWER(model_factory) = 'silicon'
@@ -875,7 +931,8 @@ BEGIN
            GREATEST(524288, COALESCE(max_output_tokens, 0) + 1)),
            max_output_tokens = COALESCE(max_output_tokens,
            LEAST(16384, COALESCE(context_window_tokens, 524288) - 1)),
-           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens, 4096),
+           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
+           LEAST(4096, COALESCE(max_output_tokens, 16384))),
            capacity_source = COALESCE(capacity_source, 'profile'),
            capability_profile_version = COALESCE(capability_profile_version, 'silicon/seed-oss-36b-instruct@1')
      WHERE LOWER(model_factory) = 'silicon'
@@ -890,7 +947,8 @@ BEGIN
            GREATEST(131072, COALESCE(max_output_tokens, 0) + 1)),
            max_output_tokens = COALESCE(max_output_tokens,
            LEAST(16384, COALESCE(context_window_tokens, 131072) - 1)),
-           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens, 4096),
+           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
+           LEAST(4096, COALESCE(max_output_tokens, 16384))),
            capacity_source = COALESCE(capacity_source, 'profile'),
            capability_profile_version = COALESCE(capability_profile_version, 'silicon/ling-flash-2.0@1')
      WHERE LOWER(model_factory) = 'silicon'
@@ -905,7 +963,8 @@ BEGIN
            GREATEST(131072, COALESCE(max_output_tokens, 0) + 1)),
            max_output_tokens = COALESCE(max_output_tokens,
            LEAST(16384, COALESCE(context_window_tokens, 131072) - 1)),
-           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens, 4096),
+           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
+           LEAST(4096, COALESCE(max_output_tokens, 16384))),
            capacity_source = COALESCE(capacity_source, 'profile'),
            capability_profile_version = COALESCE(capability_profile_version, 'silicon/ling-mini-2.0@1')
      WHERE LOWER(model_factory) = 'silicon'
@@ -920,7 +979,8 @@ BEGIN
            GREATEST(204800, COALESCE(max_output_tokens, 0) + 1)),
            max_output_tokens = COALESCE(max_output_tokens,
            LEAST(16384, COALESCE(context_window_tokens, 204800) - 1)),
-           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens, 4096),
+           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
+           LEAST(4096, COALESCE(max_output_tokens, 16384))),
            capacity_source = COALESCE(capacity_source, 'profile'),
            capability_profile_version = COALESCE(capability_profile_version, 'silicon/minimax-m2.5@1')
      WHERE LOWER(model_factory) = 'silicon'
@@ -935,7 +995,8 @@ BEGIN
            GREATEST(204800, COALESCE(max_output_tokens, 0) + 1)),
            max_output_tokens = COALESCE(max_output_tokens,
            LEAST(16384, COALESCE(context_window_tokens, 204800) - 1)),
-           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens, 4096),
+           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
+           LEAST(4096, COALESCE(max_output_tokens, 16384))),
            capacity_source = COALESCE(capacity_source, 'profile'),
            capability_profile_version = COALESCE(capability_profile_version, 'silicon/minimax-m2.5-pro@1')
      WHERE LOWER(model_factory) = 'silicon'
@@ -950,7 +1011,8 @@ BEGIN
            GREATEST(262144, COALESCE(max_output_tokens, 0) + 1)),
            max_output_tokens = COALESCE(max_output_tokens,
            LEAST(32768, COALESCE(context_window_tokens, 262144) - 1)),
-           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens, 8192),
+           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
+           LEAST(8192, COALESCE(max_output_tokens, 32768))),
            capacity_source = COALESCE(capacity_source, 'profile'),
            capability_profile_version = COALESCE(capability_profile_version, 'silicon/kimi-k2.7-code@1')
      WHERE LOWER(model_factory) = 'silicon'
@@ -965,7 +1027,8 @@ BEGIN
            GREATEST(262144, COALESCE(max_output_tokens, 0) + 1)),
            max_output_tokens = COALESCE(max_output_tokens,
            LEAST(16384, COALESCE(context_window_tokens, 262144) - 1)),
-           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens, 4096),
+           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
+           LEAST(4096, COALESCE(max_output_tokens, 16384))),
            capacity_source = COALESCE(capacity_source, 'profile'),
            capability_profile_version = COALESCE(capability_profile_version, 'silicon/nex-n2-pro@1')
      WHERE LOWER(model_factory) = 'silicon'
@@ -980,7 +1043,8 @@ BEGIN
            GREATEST(262144, COALESCE(max_output_tokens, 0) + 1)),
            max_output_tokens = COALESCE(max_output_tokens,
            LEAST(16384, COALESCE(context_window_tokens, 262144) - 1)),
-           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens, 4096),
+           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
+           LEAST(4096, COALESCE(max_output_tokens, 16384))),
            capacity_source = COALESCE(capacity_source, 'profile'),
            capability_profile_version = COALESCE(capability_profile_version, 'silicon/step-3.5-flash@1')
      WHERE LOWER(model_factory) = 'silicon'
@@ -995,7 +1059,8 @@ BEGIN
            GREATEST(32768, COALESCE(max_output_tokens, 0) + 1)),
            max_output_tokens = COALESCE(max_output_tokens,
            LEAST(2048, COALESCE(context_window_tokens, 32768) - 1)),
-           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens, 1024),
+           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
+           LEAST(1024, COALESCE(max_output_tokens, 2048))),
            capacity_source = COALESCE(capacity_source, 'profile'),
            capability_profile_version = COALESCE(capability_profile_version, 'silicon/hunyuan-mt-7b@1')
      WHERE LOWER(model_factory) = 'silicon'
@@ -1010,7 +1075,8 @@ BEGIN
            GREATEST(131072, COALESCE(max_output_tokens, 0) + 1)),
            max_output_tokens = COALESCE(max_output_tokens,
            LEAST(8192, COALESCE(context_window_tokens, 131072) - 1)),
-           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens, 4096),
+           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
+           LEAST(4096, COALESCE(max_output_tokens, 8192))),
            capacity_source = COALESCE(capacity_source, 'profile'),
            capability_profile_version = COALESCE(capability_profile_version, 'silicon/hunyuan-a13b-instruct@1')
      WHERE LOWER(model_factory) = 'silicon'
@@ -1036,7 +1102,8 @@ BEGIN
            GREATEST(32768, COALESCE(max_output_tokens, 0) + 1)),
            max_output_tokens = COALESCE(max_output_tokens,
            LEAST(4096, COALESCE(context_window_tokens, 32768) - 1)),
-           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens, 4096),
+           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
+           LEAST(4096, COALESCE(max_output_tokens, 4096))),
            capacity_source = COALESCE(capacity_source, 'operator')
      WHERE delete_flag = 'N'
        AND COALESCE(model_type, 'llm') IN ('llm', 'vlm')
@@ -1064,3 +1131,22 @@ BEGIN
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     RAISE NOTICE 'max_tokens reconcile: % row(s) updated', v_updated;
 END $$;
+
+-- ============================================================
+-- Phase 4: Clamp default_output_reserve_tokens to max_output_tokens
+-- ============================================================
+
+DO $$
+DECLARE
+    v_updated INTEGER := 0;
+BEGIN
+    UPDATE nexent.model_record_t
+       SET default_output_reserve_tokens = max_output_tokens
+     WHERE delete_flag = 'N'
+       AND default_output_reserve_tokens IS NOT NULL
+       AND max_output_tokens IS NOT NULL
+       AND default_output_reserve_tokens > max_output_tokens;
+
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    RAISE NOTICE 'reserve clamp: % row(s) updated', v_updated;
+END $$;
diff --git a/scripts/generate_backfill_sql.py b/scripts/generate_backfill_sql.py
index a3451f16d..2c81beaf3 100644
--- a/scripts/generate_backfill_sql.py
+++ b/scripts/generate_backfill_sql.py
@@ -106,7 +106,8 @@ def main() -> None:
             lines.append(f"           GREATEST({_sql_int(ctx)}, COALESCE(max_output_tokens, 0) + 1)),")
             lines.append(f"           max_output_tokens = COALESCE(max_output_tokens,")
             lines.append(f"           LEAST({_sql_int(mout)}, COALESCE(context_window_tokens, {_sql_int(ctx)}) - 1)),")
-            lines.append(f"           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens, {_sql_int(reserve)}),")
+            lines.append(f"           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,")
+            lines.append(f"           LEAST({_sql_int(reserve)}, COALESCE(max_output_tokens, {_sql_int(mout)}))),")
             lines.append(f"           capacity_source = COALESCE(capacity_source, 'profile'),")
             lines.append(f"           capability_profile_version = COALESCE(capability_profile_version, '{version}')")
             lines.append(f"     WHERE LOWER(model_factory) = '{_sql_str(provider.lower())}'")
@@ -135,7 +136,8 @@ def main() -> None:
     lines.append(f"           GREATEST({_sql_int(DEFAULT_CONTEXT_WINDOW)}, COALESCE(max_output_tokens, 0) + 1)),")
     lines.append(f"           max_output_tokens = COALESCE(max_output_tokens,")
     lines.append(f"           LEAST({_sql_int(DEFAULT_MAX_OUTPUT)}, COALESCE(context_window_tokens, {_sql_int(DEFAULT_CONTEXT_WINDOW)}) - 1)),")
-    lines.append(f"           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens, {_sql_int(DEFAULT_RESERVE)}),")
+    lines.append(f"           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,")
+    lines.append(f"           LEAST({_sql_int(DEFAULT_RESERVE)}, COALESCE(max_output_tokens, {_sql_int(DEFAULT_MAX_OUTPUT)}))),")
     lines.append(f"           capacity_source = COALESCE(capacity_source, 'operator')")
     lines.append("     WHERE delete_flag = 'N'")
     lines.append("       AND COALESCE(model_type, 'llm') IN ('llm', 'vlm')")
@@ -165,6 +167,27 @@ def main() -> None:
     lines.append("    GET DIAGNOSTICS v_updated = ROW_COUNT;")
     lines.append("    RAISE NOTICE 'max_tokens reconcile: % row(s) updated', v_updated;")
     lines.append("END $$;")
+    lines.append("")
+
+    # Phase 4: clamp reserve to max_output
+    lines.append("-- ============================================================")
+    lines.append("-- Phase 4: Clamp default_output_reserve_tokens to max_output_tokens")
+    lines.append("-- ============================================================")
+    lines.append("")
+    lines.append("DO $$")
+    lines.append("DECLARE")
+    lines.append("    v_updated INTEGER := 0;")
+    lines.append("BEGIN")
+    lines.append("    UPDATE nexent.model_record_t")
+    lines.append("       SET default_output_reserve_tokens = max_output_tokens")
+    lines.append("     WHERE delete_flag = 'N'")
+    lines.append("       AND default_output_reserve_tokens IS NOT NULL")
+    lines.append("       AND max_output_tokens IS NOT NULL")
+    lines.append("       AND default_output_reserve_tokens > max_output_tokens;")
+    lines.append("")
+    lines.append("    GET DIAGNOSTICS v_updated = ROW_COUNT;")
+    lines.append("    RAISE NOTICE 'reserve clamp: % row(s) updated', v_updated;")
+    lines.append("END $$;")
 
     print("\n".join(lines))
 

From 9d1547e16d1c51e5ba2618892955be4ea8aa233b Mon Sep 17 00:00:00 2001
From: wuyuanfr <18270469842@163.com>
Date: Sat, 27 Jun 2026 11:15:43 +0800
Subject: [PATCH 17/31] fix(w11): use capacity_source='unknown' for
 safe-default backfill rows
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Phase 2 fills bare rows with system defaults (32K/4K), not
operator-confirmed values. Marking them as 'operator' was
semantically wrong — it caused downstream code to treat these
rows as operator-verified, skipping suggestion prompts and
inflating SLO accuracy metrics.

Changed to 'unknown' which accurately reflects that no one has
reviewed these capacity values.
---
 docker/sql/v2.2.2_0627_backfill_from_catalog.sql | 2 +-
 scripts/generate_backfill_sql.py                 | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/docker/sql/v2.2.2_0627_backfill_from_catalog.sql b/docker/sql/v2.2.2_0627_backfill_from_catalog.sql
index 22bd82870..dc66f787c 100644
--- a/docker/sql/v2.2.2_0627_backfill_from_catalog.sql
+++ b/docker/sql/v2.2.2_0627_backfill_from_catalog.sql
@@ -1104,7 +1104,7 @@ BEGIN
            LEAST(4096, COALESCE(context_window_tokens, 32768) - 1)),
            default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
            LEAST(4096, COALESCE(max_output_tokens, 4096))),
-           capacity_source = COALESCE(capacity_source, 'operator')
+           capacity_source = COALESCE(capacity_source, 'unknown')
      WHERE delete_flag = 'N'
        AND COALESCE(model_type, 'llm') IN ('llm', 'vlm')
        AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
diff --git a/scripts/generate_backfill_sql.py b/scripts/generate_backfill_sql.py
index 2c81beaf3..565a6ffc9 100644
--- a/scripts/generate_backfill_sql.py
+++ b/scripts/generate_backfill_sql.py
@@ -138,7 +138,7 @@ def main() -> None:
     lines.append(f"           LEAST({_sql_int(DEFAULT_MAX_OUTPUT)}, COALESCE(context_window_tokens, {_sql_int(DEFAULT_CONTEXT_WINDOW)}) - 1)),")
     lines.append(f"           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,")
     lines.append(f"           LEAST({_sql_int(DEFAULT_RESERVE)}, COALESCE(max_output_tokens, {_sql_int(DEFAULT_MAX_OUTPUT)}))),")
-    lines.append(f"           capacity_source = COALESCE(capacity_source, 'operator')")
+    lines.append(f"           capacity_source = COALESCE(capacity_source, 'unknown')")
     lines.append("     WHERE delete_flag = 'N'")
     lines.append("       AND COALESCE(model_type, 'llm') IN ('llm', 'vlm')")
     lines.append("       AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);")

From 0b944075c32194d5fbeb39592e632dee97e16978 Mon Sep 17 00:00:00 2001
From: wuyuanfr <18270469842@163.com>
Date: Sat, 27 Jun 2026 11:22:45 +0800
Subject: [PATCH 18/31] feat(w11): add capacity_source='default' for
 system-default backfill rows

Add 'default' as a legitimate capacity_source value to distinguish
rows filled by the backfill safe-defaults from truly unknown sources.

- SDK: CapacitySource Literal type, agent_model description,
  monitoring _dominant_capacity_source priority list
- Backend: create_agent_info priority list, db_models column doc
- Frontend: i18n keys for en/zh
- SQL generator: Phase 2 now uses 'default' instead of 'unknown'
---
 backend/agents/create_agent_info.py              | 2 +-
 backend/database/db_models.py                    | 2 +-
 docker/sql/v2.2.2_0627_backfill_from_catalog.sql | 2 +-
 frontend/public/locales/en/common.json           | 1 +
 frontend/public/locales/zh/common.json           | 1 +
 scripts/generate_backfill_sql.py                 | 2 +-
 sdk/nexent/core/agents/agent_model.py            | 2 +-
 sdk/nexent/core/models/capacity_resolver.py      | 2 +-
 sdk/nexent/monitor/monitoring.py                 | 2 +-
 9 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/backend/agents/create_agent_info.py b/backend/agents/create_agent_info.py
index cf790654d..d6e81a5a5 100644
--- a/backend/agents/create_agent_info.py
+++ b/backend/agents/create_agent_info.py
@@ -138,7 +138,7 @@ def _dominant_capacity_source(field_sources: dict) -> Optional[str]:
     values = [value for value in field_sources.values() if value]
     if not values:
         return None
-    for preferred in ("operator", "profile", "provider_candidate", "legacy", "unknown"):
+    for preferred in ("operator", "profile", "provider_candidate", "legacy", "default", "unknown"):
         if preferred in values:
             return preferred
     return values[0]
diff --git a/backend/database/db_models.py b/backend/database/db_models.py
index dc10d3c62..1e67c6065 100644
--- a/backend/database/db_models.py
+++ b/backend/database/db_models.py
@@ -199,7 +199,7 @@ class ModelRecord(TableBase):
     tokenizer_family = Column(
         String(100), doc="Token-counting strategy or provider/model tokenizer identifier mapped via tokenizer_registry. Nullable.")
     capacity_source = Column(
-        String(100), doc="Source of the persisted capacity value. Optional values: operator, profile, provider_candidate, legacy, unknown.")
+        String(100), doc="Source of the persisted capacity value. Optional values: operator, profile, provider_candidate, legacy, default, unknown.")
     capability_profile_version = Column(
         String(100), doc="Version of the approved provider/model capability profile used by the request, e.g. openai/gpt-4o@1.")
 
diff --git a/docker/sql/v2.2.2_0627_backfill_from_catalog.sql b/docker/sql/v2.2.2_0627_backfill_from_catalog.sql
index dc66f787c..19299573a 100644
--- a/docker/sql/v2.2.2_0627_backfill_from_catalog.sql
+++ b/docker/sql/v2.2.2_0627_backfill_from_catalog.sql
@@ -1104,7 +1104,7 @@ BEGIN
            LEAST(4096, COALESCE(context_window_tokens, 32768) - 1)),
            default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
            LEAST(4096, COALESCE(max_output_tokens, 4096))),
-           capacity_source = COALESCE(capacity_source, 'unknown')
+           capacity_source = COALESCE(capacity_source, 'default')
      WHERE delete_flag = 'N'
        AND COALESCE(model_type, 'llm') IN ('llm', 'vlm')
        AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
diff --git a/frontend/public/locales/en/common.json b/frontend/public/locales/en/common.json
index b5b46971f..2a2d43d49 100644
--- a/frontend/public/locales/en/common.json
+++ b/frontend/public/locales/en/common.json
@@ -858,6 +858,7 @@
   "model.dialog.capacity.source.provider_candidate": "Provider Candidate",
   "model.dialog.capacity.source.legacy": "Legacy",
   "model.dialog.capacity.source.unknown": "Unknown",
+  "model.dialog.capacity.source.default": "System Default",
   "model.dialog.capacity.suggestion.title": "Capacity suggestion",
   "model.dialog.capacity.suggestion.hintAdd": "Suggested from the approved catalog after connectivity passes.",
   "model.dialog.capacity.suggestion.check": "Check",
diff --git a/frontend/public/locales/zh/common.json b/frontend/public/locales/zh/common.json
index 8402c31d2..20f023a2d 100644
--- a/frontend/public/locales/zh/common.json
+++ b/frontend/public/locales/zh/common.json
@@ -829,6 +829,7 @@
   "model.dialog.capacity.source.provider_candidate": "供应商候选",
   "model.dialog.capacity.source.legacy": "旧字段",
   "model.dialog.capacity.source.unknown": "未知",
+  "model.dialog.capacity.source.default": "系统默认",
   "model.dialog.capacity.suggestion.title": "容量建议",
   "model.dialog.capacity.suggestion.hintAdd": "连通性测试通过后，自动从已审核目录给出容量建议。",
   "model.dialog.capacity.suggestion.check": "检查",
diff --git a/scripts/generate_backfill_sql.py b/scripts/generate_backfill_sql.py
index 565a6ffc9..92d0354da 100644
--- a/scripts/generate_backfill_sql.py
+++ b/scripts/generate_backfill_sql.py
@@ -138,7 +138,7 @@ def main() -> None:
     lines.append(f"           LEAST({_sql_int(DEFAULT_MAX_OUTPUT)}, COALESCE(context_window_tokens, {_sql_int(DEFAULT_CONTEXT_WINDOW)}) - 1)),")
     lines.append(f"           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,")
     lines.append(f"           LEAST({_sql_int(DEFAULT_RESERVE)}, COALESCE(max_output_tokens, {_sql_int(DEFAULT_MAX_OUTPUT)}))),")
-    lines.append(f"           capacity_source = COALESCE(capacity_source, 'unknown')")
+    lines.append(f"           capacity_source = COALESCE(capacity_source, 'default')")
     lines.append("     WHERE delete_flag = 'N'")
     lines.append("       AND COALESCE(model_type, 'llm') IN ('llm', 'vlm')")
     lines.append("       AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);")
diff --git a/sdk/nexent/core/agents/agent_model.py b/sdk/nexent/core/agents/agent_model.py
index cad66256d..ec67605c6 100644
--- a/sdk/nexent/core/agents/agent_model.py
+++ b/sdk/nexent/core/agents/agent_model.py
@@ -80,7 +80,7 @@ class ModelConfig(BaseModel):
         default=None,
     )
     capacity_source: Optional[str] = Field(
-        description="Source of the persisted capacity value: operator | profile | provider_candidate | legacy | unknown.",
+        description="Source of the persisted capacity value: operator | profile | provider_candidate | legacy | default | unknown.",
         default=None,
     )
     capability_profile_version: Optional[str] = Field(
diff --git a/sdk/nexent/core/models/capacity_resolver.py b/sdk/nexent/core/models/capacity_resolver.py
index cb7af2e4d..da83e6a66 100644
--- a/sdk/nexent/core/models/capacity_resolver.py
+++ b/sdk/nexent/core/models/capacity_resolver.py
@@ -17,7 +17,7 @@
 CountingMode = Literal["exact", "estimated"]
 WindowShape = Literal["combined", "separate"]
 CapacitySource = Literal[
-    "operator", "profile", "provider_candidate", "legacy", "unknown"
+    "operator", "profile", "provider_candidate", "legacy", "default", "unknown"
 ]
 ReasoningWindowBehavior = Literal["none", "reserved", "unknown"]
 ProviderOverheadBehavior = Literal["negligible", "bounded", "unknown"]
diff --git a/sdk/nexent/monitor/monitoring.py b/sdk/nexent/monitor/monitoring.py
index b3bef9cd0..e9381665d 100644
--- a/sdk/nexent/monitor/monitoring.py
+++ b/sdk/nexent/monitor/monitoring.py
@@ -1945,7 +1945,7 @@ def _dominant_capacity_source(field_sources: Any) -> Optional[str]:
     values = [value for value in field_sources.values() if value]
     if not values:
         return None
-    for preferred in ("operator", "profile", "provider_candidate", "legacy", "unknown"):
+    for preferred in ("operator", "profile", "provider_candidate", "legacy", "default", "unknown"):
         if preferred in values:
             return preferred
     return str(values[0])

From 31fc5905b5c888ee05ab32aa57752bfb5f7b0671 Mon Sep 17 00:00:00 2001
From: wuyuanfr <18270469842@163.com>
Date: Sat, 27 Jun 2026 11:55:39 +0800
Subject: [PATCH 19/31] refactor(w11): remove Phase 3 max_tokens reconcile from
 backfill SQL

The SDK ModelConfig validator already auto-syncs max_tokens and
max_output_tokens in memory. The DB-level reconcile was redundant
and could silently overwrite operator-intentional legacy max_tokens
values (e.g. operator set max_tokens=16384 for longer output, but
Phase 1a/2 would fill max_output_tokens from catalog/default, then
Phase 3 would overwrite the operator's 16384 with the catalog value).

Phases now:
  1a  Catalog match -> fill bare rows
  1b  Catalog match -> tag already-filled rows
  2   Safe defaults for remaining bare LLM/VLM rows
  3   Clamp reserve to <= max_output_tokens
---
 .../sql/v2.2.2_0627_backfill_from_catalog.sql | 1036 +++++++++++++++--
 scripts/generate_backfill_sql.py              |  147 ++-
 2 files changed, 1058 insertions(+), 125 deletions(-)

diff --git a/docker/sql/v2.2.2_0627_backfill_from_catalog.sql b/docker/sql/v2.2.2_0627_backfill_from_catalog.sql
index 19299573a..502a59c96 100644
--- a/docker/sql/v2.2.2_0627_backfill_from_catalog.sql
+++ b/docker/sql/v2.2.2_0627_backfill_from_catalog.sql
@@ -3,22 +3,33 @@
 -- Catalog entries: 66
 --
 -- Migration kind: RECOMMENDED_DATA_FIX
--- Idempotent: COALESCE protects existing non-NULL values.
+-- Idempotent: COALESCE + IS NULL guards protect existing values.
 -- Safe: enforces max_output < context_window via GREATEST/LEAST.
 --
--- Pre-run self-check:
+-- Phases:
+--   1a  Bare LLM/VLM rows that match a catalog entry by
+--       (model_factory, model_repo, model_name) -> fill capacity
+--       fields + tag capacity_source='profile' + profile_version.
+--   1b  Already-filled rows that match a catalog entry AND whose
+--       context_window_tokens and max_output_tokens exactly equal
+--       the catalog values -> tag profile_version only. capacity_
+--       source stays whatever it was (typically 'operator'); we
+--       don't rewrite provenance, we just add the dispatch tag so
+--       dispatch_profile_hit_total can fire.
+--    2  Remaining bare LLM/VLM rows -> safe defaults.
+--    3  Clamp default_output_reserve_tokens to <= max_output_tokens.
 --
---   SELECT model_id, model_name, model_factory,
---          context_window_tokens, max_output_tokens
+-- Pre-run self-check (rows whose capability_profile_version is NULL):
+--
+--   SELECT model_id, model_repo, model_name, model_factory,
+--          context_window_tokens, max_output_tokens, capability_profile_version
 --     FROM nexent.model_record_t
 --    WHERE delete_flag = 'N'
 --      AND COALESCE(model_type, 'llm') IN ('llm', 'vlm')
---      AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
---
--- If the result is empty, this migration is a no-op.
+--      AND capability_profile_version IS NULL;
 
 -- ============================================================
--- Phase 1: Backfill rows matching approved catalog entries
+-- Phase 1a: Backfill bare rows that match approved catalog entries
 -- ============================================================
 
 DO $$
@@ -37,6 +48,7 @@ BEGIN
            capacity_source = COALESCE(capacity_source, 'profile'),
            capability_profile_version = COALESCE(capability_profile_version, 'dashscope/qwen-plus@1')
      WHERE LOWER(model_factory) = 'dashscope'
+       AND (model_repo IS NULL OR model_repo = '')
        AND model_name = 'qwen-plus'
        AND delete_flag = 'N'
        AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
@@ -53,6 +65,7 @@ BEGIN
            capacity_source = COALESCE(capacity_source, 'profile'),
            capability_profile_version = COALESCE(capability_profile_version, 'dashscope/qwen-turbo@1')
      WHERE LOWER(model_factory) = 'dashscope'
+       AND (model_repo IS NULL OR model_repo = '')
        AND model_name = 'qwen-turbo'
        AND delete_flag = 'N'
        AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
@@ -69,6 +82,7 @@ BEGIN
            capacity_source = COALESCE(capacity_source, 'profile'),
            capability_profile_version = COALESCE(capability_profile_version, 'dashscope/qwen3.7-max@1')
      WHERE LOWER(model_factory) = 'dashscope'
+       AND (model_repo IS NULL OR model_repo = '')
        AND model_name = 'qwen3.7-max'
        AND delete_flag = 'N'
        AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
@@ -85,6 +99,7 @@ BEGIN
            capacity_source = COALESCE(capacity_source, 'profile'),
            capability_profile_version = COALESCE(capability_profile_version, 'dashscope/glm-5.1@1')
      WHERE LOWER(model_factory) = 'dashscope'
+       AND (model_repo IS NULL OR model_repo = '')
        AND model_name = 'glm-5.1'
        AND delete_flag = 'N'
        AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
@@ -102,6 +117,7 @@ BEGIN
            capacity_source = COALESCE(capacity_source, 'profile'),
            capability_profile_version = COALESCE(capability_profile_version, 'deepseek/deepseek-chat@2')
      WHERE LOWER(model_factory) = 'deepseek'
+       AND (model_repo IS NULL OR model_repo = '')
        AND model_name = 'deepseek-chat'
        AND delete_flag = 'N'
        AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
@@ -118,6 +134,7 @@ BEGIN
            capacity_source = COALESCE(capacity_source, 'profile'),
            capability_profile_version = COALESCE(capability_profile_version, 'deepseek/deepseek-reasoner@2')
      WHERE LOWER(model_factory) = 'deepseek'
+       AND (model_repo IS NULL OR model_repo = '')
        AND model_name = 'deepseek-reasoner'
        AND delete_flag = 'N'
        AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
@@ -134,6 +151,7 @@ BEGIN
            capacity_source = COALESCE(capacity_source, 'profile'),
            capability_profile_version = COALESCE(capability_profile_version, 'deepseek/deepseek-v4-flash@1')
      WHERE LOWER(model_factory) = 'deepseek'
+       AND (model_repo IS NULL OR model_repo = '')
        AND model_name = 'deepseek-v4-flash'
        AND delete_flag = 'N'
        AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
@@ -150,6 +168,7 @@ BEGIN
            capacity_source = COALESCE(capacity_source, 'profile'),
            capability_profile_version = COALESCE(capability_profile_version, 'deepseek/deepseek-v4-pro@1')
      WHERE LOWER(model_factory) = 'deepseek'
+       AND (model_repo IS NULL OR model_repo = '')
        AND model_name = 'deepseek-v4-pro'
        AND delete_flag = 'N'
        AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
@@ -166,7 +185,8 @@ BEGIN
            capacity_source = COALESCE(capacity_source, 'profile'),
            capability_profile_version = COALESCE(capability_profile_version, 'deepseek/deepseek-v4-pro-sf@1')
      WHERE LOWER(model_factory) = 'deepseek'
-       AND model_name = 'deepseek-ai/DeepSeek-V4-Pro'
+       AND model_repo = 'deepseek-ai'
+       AND model_name = 'DeepSeek-V4-Pro'
        AND delete_flag = 'N'
        AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
     GET DIAGNOSTICS v_updated = ROW_COUNT;
@@ -182,7 +202,8 @@ BEGIN
            capacity_source = COALESCE(capacity_source, 'profile'),
            capability_profile_version = COALESCE(capability_profile_version, 'deepseek/deepseek-v4-flash-sf@1')
      WHERE LOWER(model_factory) = 'deepseek'
-       AND model_name = 'deepseek-ai/DeepSeek-V4-Flash'
+       AND model_repo = 'deepseek-ai'
+       AND model_name = 'DeepSeek-V4-Flash'
        AND delete_flag = 'N'
        AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
     GET DIAGNOSTICS v_updated = ROW_COUNT;
@@ -198,7 +219,8 @@ BEGIN
            capacity_source = COALESCE(capacity_source, 'profile'),
            capability_profile_version = COALESCE(capability_profile_version, 'deepseek/deepseek-v3.2@1')
      WHERE LOWER(model_factory) = 'deepseek'
-       AND model_name = 'deepseek-ai/DeepSeek-V3.2'
+       AND model_repo = 'deepseek-ai'
+       AND model_name = 'DeepSeek-V3.2'
        AND delete_flag = 'N'
        AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
     GET DIAGNOSTICS v_updated = ROW_COUNT;
@@ -214,7 +236,8 @@ BEGIN
            capacity_source = COALESCE(capacity_source, 'profile'),
            capability_profile_version = COALESCE(capability_profile_version, 'deepseek/deepseek-v3.1-terminus@1')
      WHERE LOWER(model_factory) = 'deepseek'
-       AND model_name = 'deepseek-ai/DeepSeek-V3.1-Terminus'
+       AND model_repo = 'deepseek-ai'
+       AND model_name = 'DeepSeek-V3.1-Terminus'
        AND delete_flag = 'N'
        AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
     GET DIAGNOSTICS v_updated = ROW_COUNT;
@@ -230,7 +253,8 @@ BEGIN
            capacity_source = COALESCE(capacity_source, 'profile'),
            capability_profile_version = COALESCE(capability_profile_version, 'deepseek/deepseek-r1@1')
      WHERE LOWER(model_factory) = 'deepseek'
-       AND model_name = 'deepseek-ai/DeepSeek-R1'
+       AND model_repo = 'deepseek-ai'
+       AND model_name = 'DeepSeek-R1'
        AND delete_flag = 'N'
        AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
     GET DIAGNOSTICS v_updated = ROW_COUNT;
@@ -246,7 +270,8 @@ BEGIN
            capacity_source = COALESCE(capacity_source, 'profile'),
            capability_profile_version = COALESCE(capability_profile_version, 'deepseek/deepseek-v3@1')
      WHERE LOWER(model_factory) = 'deepseek'
-       AND model_name = 'deepseek-ai/DeepSeek-V3'
+       AND model_repo = 'deepseek-ai'
+       AND model_name = 'DeepSeek-V3'
        AND delete_flag = 'N'
        AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
     GET DIAGNOSTICS v_updated = ROW_COUNT;
@@ -262,7 +287,8 @@ BEGIN
            capacity_source = COALESCE(capacity_source, 'profile'),
            capability_profile_version = COALESCE(capability_profile_version, 'deepseek/deepseek-r1-0528-qwen3-8b@1')
      WHERE LOWER(model_factory) = 'deepseek'
-       AND model_name = 'deepseek-ai/DeepSeek-R1-0528-Qwen3-8B'
+       AND model_repo = 'deepseek-ai'
+       AND model_name = 'DeepSeek-R1-0528-Qwen3-8B'
        AND delete_flag = 'N'
        AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
     GET DIAGNOSTICS v_updated = ROW_COUNT;
@@ -278,7 +304,8 @@ BEGIN
            capacity_source = COALESCE(capacity_source, 'profile'),
            capability_profile_version = COALESCE(capability_profile_version, 'deepseek/deepseek-v3.2-pro@1')
      WHERE LOWER(model_factory) = 'deepseek'
-       AND model_name = 'Pro/deepseek-ai/DeepSeek-V3.2'
+       AND model_repo = 'Pro'
+       AND model_name = 'deepseek-ai/DeepSeek-V3.2'
        AND delete_flag = 'N'
        AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
     GET DIAGNOSTICS v_updated = ROW_COUNT;
@@ -294,7 +321,8 @@ BEGIN
            capacity_source = COALESCE(capacity_source, 'profile'),
            capability_profile_version = COALESCE(capability_profile_version, 'deepseek/deepseek-v3.1-terminus-pro@1')
      WHERE LOWER(model_factory) = 'deepseek'
-       AND model_name = 'Pro/deepseek-ai/DeepSeek-V3.1-Terminus'
+       AND model_repo = 'Pro'
+       AND model_name = 'deepseek-ai/DeepSeek-V3.1-Terminus'
        AND delete_flag = 'N'
        AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
     GET DIAGNOSTICS v_updated = ROW_COUNT;
@@ -310,7 +338,8 @@ BEGIN
            capacity_source = COALESCE(capacity_source, 'profile'),
            capability_profile_version = COALESCE(capability_profile_version, 'deepseek/deepseek-r1-pro@1')
      WHERE LOWER(model_factory) = 'deepseek'
-       AND model_name = 'Pro/deepseek-ai/DeepSeek-R1'
+       AND model_repo = 'Pro'
+       AND model_name = 'deepseek-ai/DeepSeek-R1'
        AND delete_flag = 'N'
        AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
     GET DIAGNOSTICS v_updated = ROW_COUNT;
@@ -326,7 +355,8 @@ BEGIN
            capacity_source = COALESCE(capacity_source, 'profile'),
            capability_profile_version = COALESCE(capability_profile_version, 'deepseek/deepseek-v3-pro@1')
      WHERE LOWER(model_factory) = 'deepseek'
-       AND model_name = 'Pro/deepseek-ai/DeepSeek-V3'
+       AND model_repo = 'Pro'
+       AND model_name = 'deepseek-ai/DeepSeek-V3'
        AND delete_flag = 'N'
        AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
     GET DIAGNOSTICS v_updated = ROW_COUNT;
@@ -343,6 +373,7 @@ BEGIN
            capacity_source = COALESCE(capacity_source, 'profile'),
            capability_profile_version = COALESCE(capability_profile_version, 'openai/gpt-4o@1')
      WHERE LOWER(model_factory) = 'openai'
+       AND (model_repo IS NULL OR model_repo = '')
        AND model_name = 'gpt-4o'
        AND delete_flag = 'N'
        AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
@@ -359,6 +390,7 @@ BEGIN
            capacity_source = COALESCE(capacity_source, 'profile'),
            capability_profile_version = COALESCE(capability_profile_version, 'openai/gpt-4.1@1')
      WHERE LOWER(model_factory) = 'openai'
+       AND (model_repo IS NULL OR model_repo = '')
        AND model_name = 'gpt-4.1'
        AND delete_flag = 'N'
        AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
@@ -376,7 +408,8 @@ BEGIN
            capacity_source = COALESCE(capacity_source, 'profile'),
            capability_profile_version = COALESCE(capability_profile_version, 'silicon/qwen3.6-27b@1')
      WHERE LOWER(model_factory) = 'silicon'
-       AND model_name = 'Qwen/Qwen3.6-27B'
+       AND model_repo = 'Qwen'
+       AND model_name = 'Qwen3.6-27B'
        AND delete_flag = 'N'
        AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
     GET DIAGNOSTICS v_updated = ROW_COUNT;
@@ -392,7 +425,8 @@ BEGIN
            capacity_source = COALESCE(capacity_source, 'profile'),
            capability_profile_version = COALESCE(capability_profile_version, 'silicon/kimi-k2.6@1')
      WHERE LOWER(model_factory) = 'silicon'
-       AND model_name = 'Pro/moonshotai/Kimi-K2.6'
+       AND model_repo = 'Pro'
+       AND model_name = 'moonshotai/Kimi-K2.6'
        AND delete_flag = 'N'
        AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
     GET DIAGNOSTICS v_updated = ROW_COUNT;
@@ -408,7 +442,8 @@ BEGIN
            capacity_source = COALESCE(capacity_source, 'profile'),
            capability_profile_version = COALESCE(capability_profile_version, 'silicon/qwen3.6-35b-a3b@1')
      WHERE LOWER(model_factory) = 'silicon'
-       AND model_name = 'Qwen/Qwen3.6-35B-A3B'
+       AND model_repo = 'Qwen'
+       AND model_name = 'Qwen3.6-35B-A3B'
        AND delete_flag = 'N'
        AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
     GET DIAGNOSTICS v_updated = ROW_COUNT;
@@ -424,7 +459,8 @@ BEGIN
            capacity_source = COALESCE(capacity_source, 'profile'),
            capability_profile_version = COALESCE(capability_profile_version, 'silicon/qwen3.5-397b-a17b@1')
      WHERE LOWER(model_factory) = 'silicon'
-       AND model_name = 'Qwen/Qwen3.5-397B-A17B'
+       AND model_repo = 'Qwen'
+       AND model_name = 'Qwen3.5-397B-A17B'
        AND delete_flag = 'N'
        AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
     GET DIAGNOSTICS v_updated = ROW_COUNT;
@@ -440,7 +476,8 @@ BEGIN
            capacity_source = COALESCE(capacity_source, 'profile'),
            capability_profile_version = COALESCE(capability_profile_version, 'silicon/qwen3.5-122b-a10b@1')
      WHERE LOWER(model_factory) = 'silicon'
-       AND model_name = 'Qwen/Qwen3.5-122B-A10B'
+       AND model_repo = 'Qwen'
+       AND model_name = 'Qwen3.5-122B-A10B'
        AND delete_flag = 'N'
        AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
     GET DIAGNOSTICS v_updated = ROW_COUNT;
@@ -456,7 +493,8 @@ BEGIN
            capacity_source = COALESCE(capacity_source, 'profile'),
            capability_profile_version = COALESCE(capability_profile_version, 'silicon/qwen3.5-35b-a3b@1')
      WHERE LOWER(model_factory) = 'silicon'
-       AND model_name = 'Qwen/Qwen3.5-35B-A3B'
+       AND model_repo = 'Qwen'
+       AND model_name = 'Qwen3.5-35B-A3B'
        AND delete_flag = 'N'
        AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
     GET DIAGNOSTICS v_updated = ROW_COUNT;
@@ -472,7 +510,8 @@ BEGIN
            capacity_source = COALESCE(capacity_source, 'profile'),
            capability_profile_version = COALESCE(capability_profile_version, 'silicon/qwen3.5-27b@1')
      WHERE LOWER(model_factory) = 'silicon'
-       AND model_name = 'Qwen/Qwen3.5-27B'
+       AND model_repo = 'Qwen'
+       AND model_name = 'Qwen3.5-27B'
        AND delete_flag = 'N'
        AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
     GET DIAGNOSTICS v_updated = ROW_COUNT;
@@ -488,7 +527,8 @@ BEGIN
            capacity_source = COALESCE(capacity_source, 'profile'),
            capability_profile_version = COALESCE(capability_profile_version, 'silicon/qwen3.5-9b@1')
      WHERE LOWER(model_factory) = 'silicon'
-       AND model_name = 'Qwen/Qwen3.5-9B'
+       AND model_repo = 'Qwen'
+       AND model_name = 'Qwen3.5-9B'
        AND delete_flag = 'N'
        AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
     GET DIAGNOSTICS v_updated = ROW_COUNT;
@@ -504,7 +544,8 @@ BEGIN
            capacity_source = COALESCE(capacity_source, 'profile'),
            capability_profile_version = COALESCE(capability_profile_version, 'silicon/qwen3.5-4b@1')
      WHERE LOWER(model_factory) = 'silicon'
-       AND model_name = 'Qwen/Qwen3.5-4B'
+       AND model_repo = 'Qwen'
+       AND model_name = 'Qwen3.5-4B'
        AND delete_flag = 'N'
        AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
     GET DIAGNOSTICS v_updated = ROW_COUNT;
@@ -520,7 +561,8 @@ BEGIN
            capacity_source = COALESCE(capacity_source, 'profile'),
            capability_profile_version = COALESCE(capability_profile_version, 'silicon/qwen3-vl-32b-instruct@1')
      WHERE LOWER(model_factory) = 'silicon'
-       AND model_name = 'Qwen/Qwen3-VL-32B-Instruct'
+       AND model_repo = 'Qwen'
+       AND model_name = 'Qwen3-VL-32B-Instruct'
        AND delete_flag = 'N'
        AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
     GET DIAGNOSTICS v_updated = ROW_COUNT;
@@ -536,7 +578,8 @@ BEGIN
            capacity_source = COALESCE(capacity_source, 'profile'),
            capability_profile_version = COALESCE(capability_profile_version, 'silicon/qwen3-vl-32b-thinking@1')
      WHERE LOWER(model_factory) = 'silicon'
-       AND model_name = 'Qwen/Qwen3-VL-32B-Thinking'
+       AND model_repo = 'Qwen'
+       AND model_name = 'Qwen3-VL-32B-Thinking'
        AND delete_flag = 'N'
        AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
     GET DIAGNOSTICS v_updated = ROW_COUNT;
@@ -552,7 +595,8 @@ BEGIN
            capacity_source = COALESCE(capacity_source, 'profile'),
            capability_profile_version = COALESCE(capability_profile_version, 'silicon/qwen3-vl-8b-instruct@1')
      WHERE LOWER(model_factory) = 'silicon'
-       AND model_name = 'Qwen/Qwen3-VL-8B-Instruct'
+       AND model_repo = 'Qwen'
+       AND model_name = 'Qwen3-VL-8B-Instruct'
        AND delete_flag = 'N'
        AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
     GET DIAGNOSTICS v_updated = ROW_COUNT;
@@ -568,7 +612,8 @@ BEGIN
            capacity_source = COALESCE(capacity_source, 'profile'),
            capability_profile_version = COALESCE(capability_profile_version, 'silicon/qwen3-vl-8b-thinking@1')
      WHERE LOWER(model_factory) = 'silicon'
-       AND model_name = 'Qwen/Qwen3-VL-8B-Thinking'
+       AND model_repo = 'Qwen'
+       AND model_name = 'Qwen3-VL-8B-Thinking'
        AND delete_flag = 'N'
        AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
     GET DIAGNOSTICS v_updated = ROW_COUNT;
@@ -584,7 +629,8 @@ BEGIN
            capacity_source = COALESCE(capacity_source, 'profile'),
            capability_profile_version = COALESCE(capability_profile_version, 'silicon/qwen3-vl-30b-a3b-instruct@1')
      WHERE LOWER(model_factory) = 'silicon'
-       AND model_name = 'Qwen/Qwen3-VL-30B-A3B-Instruct'
+       AND model_repo = 'Qwen'
+       AND model_name = 'Qwen3-VL-30B-A3B-Instruct'
        AND delete_flag = 'N'
        AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
     GET DIAGNOSTICS v_updated = ROW_COUNT;
@@ -600,7 +646,8 @@ BEGIN
            capacity_source = COALESCE(capacity_source, 'profile'),
            capability_profile_version = COALESCE(capability_profile_version, 'silicon/qwen3-vl-30b-a3b-thinking@1')
      WHERE LOWER(model_factory) = 'silicon'
-       AND model_name = 'Qwen/Qwen3-VL-30B-A3B-Thinking'
+       AND model_repo = 'Qwen'
+       AND model_name = 'Qwen3-VL-30B-A3B-Thinking'
        AND delete_flag = 'N'
        AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
     GET DIAGNOSTICS v_updated = ROW_COUNT;
@@ -616,7 +663,8 @@ BEGIN
            capacity_source = COALESCE(capacity_source, 'profile'),
            capability_profile_version = COALESCE(capability_profile_version, 'silicon/qwen3-omni-30b-a3b-instruct@1')
      WHERE LOWER(model_factory) = 'silicon'
-       AND model_name = 'Qwen/Qwen3-Omni-30B-A3B-Instruct'
+       AND model_repo = 'Qwen'
+       AND model_name = 'Qwen3-Omni-30B-A3B-Instruct'
        AND delete_flag = 'N'
        AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
     GET DIAGNOSTICS v_updated = ROW_COUNT;
@@ -632,7 +680,8 @@ BEGIN
            capacity_source = COALESCE(capacity_source, 'profile'),
            capability_profile_version = COALESCE(capability_profile_version, 'silicon/qwen3-omni-30b-a3b-thinking@1')
      WHERE LOWER(model_factory) = 'silicon'
-       AND model_name = 'Qwen/Qwen3-Omni-30B-A3B-Thinking'
+       AND model_repo = 'Qwen'
+       AND model_name = 'Qwen3-Omni-30B-A3B-Thinking'
        AND delete_flag = 'N'
        AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
     GET DIAGNOSTICS v_updated = ROW_COUNT;
@@ -648,7 +697,8 @@ BEGIN
            capacity_source = COALESCE(capacity_source, 'profile'),
            capability_profile_version = COALESCE(capability_profile_version, 'silicon/qwen3-omni-30b-a3b-captioner@1')
      WHERE LOWER(model_factory) = 'silicon'
-       AND model_name = 'Qwen/Qwen3-Omni-30B-A3B-Captioner'
+       AND model_repo = 'Qwen'
+       AND model_name = 'Qwen3-Omni-30B-A3B-Captioner'
        AND delete_flag = 'N'
        AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
     GET DIAGNOSTICS v_updated = ROW_COUNT;
@@ -664,7 +714,8 @@ BEGIN
            capacity_source = COALESCE(capacity_source, 'profile'),
            capability_profile_version = COALESCE(capability_profile_version, 'silicon/qwen3-coder-30b-a3b-instruct@1')
      WHERE LOWER(model_factory) = 'silicon'
-       AND model_name = 'Qwen/Qwen3-Coder-30B-A3B-Instruct'
+       AND model_repo = 'Qwen'
+       AND model_name = 'Qwen3-Coder-30B-A3B-Instruct'
        AND delete_flag = 'N'
        AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
     GET DIAGNOSTICS v_updated = ROW_COUNT;
@@ -680,7 +731,8 @@ BEGIN
            capacity_source = COALESCE(capacity_source, 'profile'),
            capability_profile_version = COALESCE(capability_profile_version, 'silicon/qwen3-30b-a3b-instruct-2507@1')
      WHERE LOWER(model_factory) = 'silicon'
-       AND model_name = 'Qwen/Qwen3-30B-A3B-Instruct-2507'
+       AND model_repo = 'Qwen'
+       AND model_name = 'Qwen3-30B-A3B-Instruct-2507'
        AND delete_flag = 'N'
        AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
     GET DIAGNOSTICS v_updated = ROW_COUNT;
@@ -696,7 +748,8 @@ BEGIN
            capacity_source = COALESCE(capacity_source, 'profile'),
            capability_profile_version = COALESCE(capability_profile_version, 'silicon/qwen3-32b@1')
      WHERE LOWER(model_factory) = 'silicon'
-       AND model_name = 'Qwen/Qwen3-32B'
+       AND model_repo = 'Qwen'
+       AND model_name = 'Qwen3-32B'
        AND delete_flag = 'N'
        AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
     GET DIAGNOSTICS v_updated = ROW_COUNT;
@@ -712,7 +765,8 @@ BEGIN
            capacity_source = COALESCE(capacity_source, 'profile'),
            capability_profile_version = COALESCE(capability_profile_version, 'silicon/qwen3-14b@1')
      WHERE LOWER(model_factory) = 'silicon'
-       AND model_name = 'Qwen/Qwen3-14B'
+       AND model_repo = 'Qwen'
+       AND model_name = 'Qwen3-14B'
        AND delete_flag = 'N'
        AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
     GET DIAGNOSTICS v_updated = ROW_COUNT;
@@ -728,7 +782,8 @@ BEGIN
            capacity_source = COALESCE(capacity_source, 'profile'),
            capability_profile_version = COALESCE(capability_profile_version, 'silicon/qwen3-8b@1')
      WHERE LOWER(model_factory) = 'silicon'
-       AND model_name = 'Qwen/Qwen3-8B'
+       AND model_repo = 'Qwen'
+       AND model_name = 'Qwen3-8B'
        AND delete_flag = 'N'
        AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
     GET DIAGNOSTICS v_updated = ROW_COUNT;
@@ -744,7 +799,8 @@ BEGIN
            capacity_source = COALESCE(capacity_source, 'profile'),
            capability_profile_version = COALESCE(capability_profile_version, 'silicon/qwen2.5-72b-instruct-128k@1')
      WHERE LOWER(model_factory) = 'silicon'
-       AND model_name = 'Qwen/Qwen2.5-72B-Instruct-128K'
+       AND model_repo = 'Qwen'
+       AND model_name = 'Qwen2.5-72B-Instruct-128K'
        AND delete_flag = 'N'
        AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
     GET DIAGNOSTICS v_updated = ROW_COUNT;
@@ -760,7 +816,8 @@ BEGIN
            capacity_source = COALESCE(capacity_source, 'profile'),
            capability_profile_version = COALESCE(capability_profile_version, 'silicon/qwen2.5-72b-instruct@1')
      WHERE LOWER(model_factory) = 'silicon'
-       AND model_name = 'Qwen/Qwen2.5-72B-Instruct'
+       AND model_repo = 'Qwen'
+       AND model_name = 'Qwen2.5-72B-Instruct'
        AND delete_flag = 'N'
        AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
     GET DIAGNOSTICS v_updated = ROW_COUNT;
@@ -776,7 +833,8 @@ BEGIN
            capacity_source = COALESCE(capacity_source, 'profile'),
            capability_profile_version = COALESCE(capability_profile_version, 'silicon/qwen2.5-32b-instruct@1')
      WHERE LOWER(model_factory) = 'silicon'
-       AND model_name = 'Qwen/Qwen2.5-32B-Instruct'
+       AND model_repo = 'Qwen'
+       AND model_name = 'Qwen2.5-32B-Instruct'
        AND delete_flag = 'N'
        AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
     GET DIAGNOSTICS v_updated = ROW_COUNT;
@@ -792,7 +850,8 @@ BEGIN
            capacity_source = COALESCE(capacity_source, 'profile'),
            capability_profile_version = COALESCE(capability_profile_version, 'silicon/qwen2.5-14b-instruct@1')
      WHERE LOWER(model_factory) = 'silicon'
-       AND model_name = 'Qwen/Qwen2.5-14B-Instruct'
+       AND model_repo = 'Qwen'
+       AND model_name = 'Qwen2.5-14B-Instruct'
        AND delete_flag = 'N'
        AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
     GET DIAGNOSTICS v_updated = ROW_COUNT;
@@ -808,7 +867,8 @@ BEGIN
            capacity_source = COALESCE(capacity_source, 'profile'),
            capability_profile_version = COALESCE(capability_profile_version, 'silicon/qwen2.5-7b-instruct@1')
      WHERE LOWER(model_factory) = 'silicon'
-       AND model_name = 'Qwen/Qwen2.5-7B-Instruct'
+       AND model_repo = 'Qwen'
+       AND model_name = 'Qwen2.5-7B-Instruct'
        AND delete_flag = 'N'
        AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
     GET DIAGNOSTICS v_updated = ROW_COUNT;
@@ -824,7 +884,8 @@ BEGIN
            capacity_source = COALESCE(capacity_source, 'profile'),
            capability_profile_version = COALESCE(capability_profile_version, 'silicon/glm-4-32b-0414@1')
      WHERE LOWER(model_factory) = 'silicon'
-       AND model_name = 'THUDM/GLM-4-32B-0414'
+       AND model_repo = 'THUDM'
+       AND model_name = 'GLM-4-32B-0414'
        AND delete_flag = 'N'
        AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
     GET DIAGNOSTICS v_updated = ROW_COUNT;
@@ -840,7 +901,8 @@ BEGIN
            capacity_source = COALESCE(capacity_source, 'profile'),
            capability_profile_version = COALESCE(capability_profile_version, 'silicon/glm-z1-9b-0414@1')
      WHERE LOWER(model_factory) = 'silicon'
-       AND model_name = 'THUDM/GLM-Z1-9B-0414'
+       AND model_repo = 'THUDM'
+       AND model_name = 'GLM-Z1-9B-0414'
        AND delete_flag = 'N'
        AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
     GET DIAGNOSTICS v_updated = ROW_COUNT;
@@ -856,7 +918,8 @@ BEGIN
            capacity_source = COALESCE(capacity_source, 'profile'),
            capability_profile_version = COALESCE(capability_profile_version, 'silicon/glm-4-9b-0414@1')
      WHERE LOWER(model_factory) = 'silicon'
-       AND model_name = 'THUDM/GLM-4-9B-0414'
+       AND model_repo = 'THUDM'
+       AND model_name = 'GLM-4-9B-0414'
        AND delete_flag = 'N'
        AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
     GET DIAGNOSTICS v_updated = ROW_COUNT;
@@ -872,7 +935,8 @@ BEGIN
            capacity_source = COALESCE(capacity_source, 'profile'),
            capability_profile_version = COALESCE(capability_profile_version, 'silicon/glm-5.2@1')
      WHERE LOWER(model_factory) = 'silicon'
-       AND model_name = 'zai-org/GLM-5.2'
+       AND model_repo = 'zai-org'
+       AND model_name = 'GLM-5.2'
        AND delete_flag = 'N'
        AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
     GET DIAGNOSTICS v_updated = ROW_COUNT;
@@ -888,7 +952,8 @@ BEGIN
            capacity_source = COALESCE(capacity_source, 'profile'),
            capability_profile_version = COALESCE(capability_profile_version, 'silicon/glm-4.5v@1')
      WHERE LOWER(model_factory) = 'silicon'
-       AND model_name = 'zai-org/GLM-4.5V'
+       AND model_repo = 'zai-org'
+       AND model_name = 'GLM-4.5V'
        AND delete_flag = 'N'
        AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
     GET DIAGNOSTICS v_updated = ROW_COUNT;
@@ -904,7 +969,8 @@ BEGIN
            capacity_source = COALESCE(capacity_source, 'profile'),
            capability_profile_version = COALESCE(capability_profile_version, 'silicon/glm-4.5-air@1')
      WHERE LOWER(model_factory) = 'silicon'
-       AND model_name = 'zai-org/GLM-4.5-Air'
+       AND model_repo = 'zai-org'
+       AND model_name = 'GLM-4.5-Air'
        AND delete_flag = 'N'
        AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
     GET DIAGNOSTICS v_updated = ROW_COUNT;
@@ -920,7 +986,8 @@ BEGIN
            capacity_source = COALESCE(capacity_source, 'profile'),
            capability_profile_version = COALESCE(capability_profile_version, 'silicon/glm-5.1-pro@1')
      WHERE LOWER(model_factory) = 'silicon'
-       AND model_name = 'Pro/zai-org/GLM-5.1'
+       AND model_repo = 'Pro'
+       AND model_name = 'zai-org/GLM-5.1'
        AND delete_flag = 'N'
        AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
     GET DIAGNOSTICS v_updated = ROW_COUNT;
@@ -936,7 +1003,8 @@ BEGIN
            capacity_source = COALESCE(capacity_source, 'profile'),
            capability_profile_version = COALESCE(capability_profile_version, 'silicon/seed-oss-36b-instruct@1')
      WHERE LOWER(model_factory) = 'silicon'
-       AND model_name = 'ByteDance-Seed/Seed-OSS-36B-Instruct'
+       AND model_repo = 'ByteDance-Seed'
+       AND model_name = 'Seed-OSS-36B-Instruct'
        AND delete_flag = 'N'
        AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
     GET DIAGNOSTICS v_updated = ROW_COUNT;
@@ -952,7 +1020,8 @@ BEGIN
            capacity_source = COALESCE(capacity_source, 'profile'),
            capability_profile_version = COALESCE(capability_profile_version, 'silicon/ling-flash-2.0@1')
      WHERE LOWER(model_factory) = 'silicon'
-       AND model_name = 'inclusionAI/Ling-flash-2.0'
+       AND model_repo = 'inclusionAI'
+       AND model_name = 'Ling-flash-2.0'
        AND delete_flag = 'N'
        AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
     GET DIAGNOSTICS v_updated = ROW_COUNT;
@@ -968,7 +1037,8 @@ BEGIN
            capacity_source = COALESCE(capacity_source, 'profile'),
            capability_profile_version = COALESCE(capability_profile_version, 'silicon/ling-mini-2.0@1')
      WHERE LOWER(model_factory) = 'silicon'
-       AND model_name = 'inclusionAI/Ling-mini-2.0'
+       AND model_repo = 'inclusionAI'
+       AND model_name = 'Ling-mini-2.0'
        AND delete_flag = 'N'
        AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
     GET DIAGNOSTICS v_updated = ROW_COUNT;
@@ -984,7 +1054,8 @@ BEGIN
            capacity_source = COALESCE(capacity_source, 'profile'),
            capability_profile_version = COALESCE(capability_profile_version, 'silicon/minimax-m2.5@1')
      WHERE LOWER(model_factory) = 'silicon'
-       AND model_name = 'MiniMaxAI/MiniMax-M2.5'
+       AND model_repo = 'MiniMaxAI'
+       AND model_name = 'MiniMax-M2.5'
        AND delete_flag = 'N'
        AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
     GET DIAGNOSTICS v_updated = ROW_COUNT;
@@ -1000,7 +1071,8 @@ BEGIN
            capacity_source = COALESCE(capacity_source, 'profile'),
            capability_profile_version = COALESCE(capability_profile_version, 'silicon/minimax-m2.5-pro@1')
      WHERE LOWER(model_factory) = 'silicon'
-       AND model_name = 'Pro/MiniMaxAI/MiniMax-M2.5'
+       AND model_repo = 'Pro'
+       AND model_name = 'MiniMaxAI/MiniMax-M2.5'
        AND delete_flag = 'N'
        AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
     GET DIAGNOSTICS v_updated = ROW_COUNT;
@@ -1016,7 +1088,8 @@ BEGIN
            capacity_source = COALESCE(capacity_source, 'profile'),
            capability_profile_version = COALESCE(capability_profile_version, 'silicon/kimi-k2.7-code@1')
      WHERE LOWER(model_factory) = 'silicon'
-       AND model_name = 'moonshotai/Kimi-K2.7-Code'
+       AND model_repo = 'moonshotai'
+       AND model_name = 'Kimi-K2.7-Code'
        AND delete_flag = 'N'
        AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
     GET DIAGNOSTICS v_updated = ROW_COUNT;
@@ -1032,7 +1105,8 @@ BEGIN
            capacity_source = COALESCE(capacity_source, 'profile'),
            capability_profile_version = COALESCE(capability_profile_version, 'silicon/nex-n2-pro@1')
      WHERE LOWER(model_factory) = 'silicon'
-       AND model_name = 'nex-agi/Nex-N2-Pro'
+       AND model_repo = 'nex-agi'
+       AND model_name = 'Nex-N2-Pro'
        AND delete_flag = 'N'
        AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
     GET DIAGNOSTICS v_updated = ROW_COUNT;
@@ -1048,7 +1122,8 @@ BEGIN
            capacity_source = COALESCE(capacity_source, 'profile'),
            capability_profile_version = COALESCE(capability_profile_version, 'silicon/step-3.5-flash@1')
      WHERE LOWER(model_factory) = 'silicon'
-       AND model_name = 'stepfun-ai/Step-3.5-Flash'
+       AND model_repo = 'stepfun-ai'
+       AND model_name = 'Step-3.5-Flash'
        AND delete_flag = 'N'
        AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
     GET DIAGNOSTICS v_updated = ROW_COUNT;
@@ -1064,7 +1139,8 @@ BEGIN
            capacity_source = COALESCE(capacity_source, 'profile'),
            capability_profile_version = COALESCE(capability_profile_version, 'silicon/hunyuan-mt-7b@1')
      WHERE LOWER(model_factory) = 'silicon'
-       AND model_name = 'tencent/Hunyuan-MT-7B'
+       AND model_repo = 'tencent'
+       AND model_name = 'Hunyuan-MT-7B'
        AND delete_flag = 'N'
        AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
     GET DIAGNOSTICS v_updated = ROW_COUNT;
@@ -1080,41 +1156,828 @@ BEGIN
            capacity_source = COALESCE(capacity_source, 'profile'),
            capability_profile_version = COALESCE(capability_profile_version, 'silicon/hunyuan-a13b-instruct@1')
      WHERE LOWER(model_factory) = 'silicon'
-       AND model_name = 'tencent/Hunyuan-A13B-Instruct'
+       AND model_repo = 'tencent'
+       AND model_name = 'Hunyuan-A13B-Instruct'
        AND delete_flag = 'N'
        AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
-    RAISE NOTICE 'Catalog backfill: % row(s) updated', v_total;
+    RAISE NOTICE 'Phase 1a catalog backfill (bare): % row(s) updated', v_total;
 END $$;
 
 -- ============================================================
--- Phase 2: Safe defaults for remaining bare LLM/VLM rows
+-- Phase 1b: Tag already-filled rows whose ctx/max_out exactly match
+--           the catalog with capability_profile_version. Does not
+--           rewrite capacity_source (operator intent preserved).
 -- ============================================================
 
 DO $$
 DECLARE
     v_updated INTEGER := 0;
+    v_total   INTEGER := 0;
 BEGIN
+    -- dashscope (4 entries)
     UPDATE nexent.model_record_t
-       SET context_window_tokens = COALESCE(context_window_tokens,
-           GREATEST(32768, COALESCE(max_output_tokens, 0) + 1)),
-           max_output_tokens = COALESCE(max_output_tokens,
-           LEAST(4096, COALESCE(context_window_tokens, 32768) - 1)),
-           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
-           LEAST(4096, COALESCE(max_output_tokens, 4096))),
-           capacity_source = COALESCE(capacity_source, 'default')
-     WHERE delete_flag = 'N'
-       AND COALESCE(model_type, 'llm') IN ('llm', 'vlm')
-       AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
+       SET capability_profile_version = 'dashscope/qwen-plus@1'
+     WHERE LOWER(model_factory) = 'dashscope'
+       AND (model_repo IS NULL OR model_repo = '')
+       AND model_name = 'qwen-plus'
+       AND delete_flag = 'N'
+       AND context_window_tokens = 131072
+       AND max_output_tokens = 16384
+       AND capability_profile_version IS NULL;
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
 
+    UPDATE nexent.model_record_t
+       SET capability_profile_version = 'dashscope/qwen-turbo@1'
+     WHERE LOWER(model_factory) = 'dashscope'
+       AND (model_repo IS NULL OR model_repo = '')
+       AND model_name = 'qwen-turbo'
+       AND delete_flag = 'N'
+       AND context_window_tokens = 1000000
+       AND max_output_tokens = 16384
+       AND capability_profile_version IS NULL;
     GET DIAGNOSTICS v_updated = ROW_COUNT;
-    RAISE NOTICE 'Safe defaults: % LLM/VLM row(s) backfilled', v_updated;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET capability_profile_version = 'dashscope/qwen3.7-max@1'
+     WHERE LOWER(model_factory) = 'dashscope'
+       AND (model_repo IS NULL OR model_repo = '')
+       AND model_name = 'qwen3.7-max'
+       AND delete_flag = 'N'
+       AND context_window_tokens = 1000000
+       AND max_output_tokens = 65536
+       AND capability_profile_version IS NULL;
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET capability_profile_version = 'dashscope/glm-5.1@1'
+     WHERE LOWER(model_factory) = 'dashscope'
+       AND (model_repo IS NULL OR model_repo = '')
+       AND model_name = 'glm-5.1'
+       AND delete_flag = 'N'
+       AND context_window_tokens = 200000
+       AND max_output_tokens = 131072
+       AND capability_profile_version IS NULL;
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    -- deepseek (15 entries)
+    UPDATE nexent.model_record_t
+       SET capability_profile_version = 'deepseek/deepseek-chat@2'
+     WHERE LOWER(model_factory) = 'deepseek'
+       AND (model_repo IS NULL OR model_repo = '')
+       AND model_name = 'deepseek-chat'
+       AND delete_flag = 'N'
+       AND context_window_tokens = 1000000
+       AND max_output_tokens = 384000
+       AND capability_profile_version IS NULL;
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET capability_profile_version = 'deepseek/deepseek-reasoner@2'
+     WHERE LOWER(model_factory) = 'deepseek'
+       AND (model_repo IS NULL OR model_repo = '')
+       AND model_name = 'deepseek-reasoner'
+       AND delete_flag = 'N'
+       AND context_window_tokens = 1000000
+       AND max_output_tokens = 384000
+       AND capability_profile_version IS NULL;
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET capability_profile_version = 'deepseek/deepseek-v4-flash@1'
+     WHERE LOWER(model_factory) = 'deepseek'
+       AND (model_repo IS NULL OR model_repo = '')
+       AND model_name = 'deepseek-v4-flash'
+       AND delete_flag = 'N'
+       AND context_window_tokens = 1000000
+       AND max_output_tokens = 384000
+       AND capability_profile_version IS NULL;
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET capability_profile_version = 'deepseek/deepseek-v4-pro@1'
+     WHERE LOWER(model_factory) = 'deepseek'
+       AND (model_repo IS NULL OR model_repo = '')
+       AND model_name = 'deepseek-v4-pro'
+       AND delete_flag = 'N'
+       AND context_window_tokens = 1000000
+       AND max_output_tokens = 384000
+       AND capability_profile_version IS NULL;
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET capability_profile_version = 'deepseek/deepseek-v4-pro-sf@1'
+     WHERE LOWER(model_factory) = 'deepseek'
+       AND model_repo = 'deepseek-ai'
+       AND model_name = 'DeepSeek-V4-Pro'
+       AND delete_flag = 'N'
+       AND context_window_tokens = 1048576
+       AND max_output_tokens = 384000
+       AND capability_profile_version IS NULL;
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET capability_profile_version = 'deepseek/deepseek-v4-flash-sf@1'
+     WHERE LOWER(model_factory) = 'deepseek'
+       AND model_repo = 'deepseek-ai'
+       AND model_name = 'DeepSeek-V4-Flash'
+       AND delete_flag = 'N'
+       AND context_window_tokens = 1048576
+       AND max_output_tokens = 384000
+       AND capability_profile_version IS NULL;
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET capability_profile_version = 'deepseek/deepseek-v3.2@1'
+     WHERE LOWER(model_factory) = 'deepseek'
+       AND model_repo = 'deepseek-ai'
+       AND model_name = 'DeepSeek-V3.2'
+       AND delete_flag = 'N'
+       AND context_window_tokens = 164000
+       AND max_output_tokens = 8192
+       AND capability_profile_version IS NULL;
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET capability_profile_version = 'deepseek/deepseek-v3.1-terminus@1'
+     WHERE LOWER(model_factory) = 'deepseek'
+       AND model_repo = 'deepseek-ai'
+       AND model_name = 'DeepSeek-V3.1-Terminus'
+       AND delete_flag = 'N'
+       AND context_window_tokens = 164000
+       AND max_output_tokens = 8192
+       AND capability_profile_version IS NULL;
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET capability_profile_version = 'deepseek/deepseek-r1@1'
+     WHERE LOWER(model_factory) = 'deepseek'
+       AND model_repo = 'deepseek-ai'
+       AND model_name = 'DeepSeek-R1'
+       AND delete_flag = 'N'
+       AND context_window_tokens = 163840
+       AND max_output_tokens = 16384
+       AND capability_profile_version IS NULL;
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET capability_profile_version = 'deepseek/deepseek-v3@1'
+     WHERE LOWER(model_factory) = 'deepseek'
+       AND model_repo = 'deepseek-ai'
+       AND model_name = 'DeepSeek-V3'
+       AND delete_flag = 'N'
+       AND context_window_tokens = 164000
+       AND max_output_tokens = 8192
+       AND capability_profile_version IS NULL;
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET capability_profile_version = 'deepseek/deepseek-r1-0528-qwen3-8b@1'
+     WHERE LOWER(model_factory) = 'deepseek'
+       AND model_repo = 'deepseek-ai'
+       AND model_name = 'DeepSeek-R1-0528-Qwen3-8B'
+       AND delete_flag = 'N'
+       AND context_window_tokens = 131072
+       AND max_output_tokens = 16384
+       AND capability_profile_version IS NULL;
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET capability_profile_version = 'deepseek/deepseek-v3.2-pro@1'
+     WHERE LOWER(model_factory) = 'deepseek'
+       AND model_repo = 'Pro'
+       AND model_name = 'deepseek-ai/DeepSeek-V3.2'
+       AND delete_flag = 'N'
+       AND context_window_tokens = 164000
+       AND max_output_tokens = 8192
+       AND capability_profile_version IS NULL;
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET capability_profile_version = 'deepseek/deepseek-v3.1-terminus-pro@1'
+     WHERE LOWER(model_factory) = 'deepseek'
+       AND model_repo = 'Pro'
+       AND model_name = 'deepseek-ai/DeepSeek-V3.1-Terminus'
+       AND delete_flag = 'N'
+       AND context_window_tokens = 164000
+       AND max_output_tokens = 8192
+       AND capability_profile_version IS NULL;
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET capability_profile_version = 'deepseek/deepseek-r1-pro@1'
+     WHERE LOWER(model_factory) = 'deepseek'
+       AND model_repo = 'Pro'
+       AND model_name = 'deepseek-ai/DeepSeek-R1'
+       AND delete_flag = 'N'
+       AND context_window_tokens = 163840
+       AND max_output_tokens = 16384
+       AND capability_profile_version IS NULL;
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET capability_profile_version = 'deepseek/deepseek-v3-pro@1'
+     WHERE LOWER(model_factory) = 'deepseek'
+       AND model_repo = 'Pro'
+       AND model_name = 'deepseek-ai/DeepSeek-V3'
+       AND delete_flag = 'N'
+       AND context_window_tokens = 164000
+       AND max_output_tokens = 8192
+       AND capability_profile_version IS NULL;
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    -- openai (2 entries)
+    UPDATE nexent.model_record_t
+       SET capability_profile_version = 'openai/gpt-4o@1'
+     WHERE LOWER(model_factory) = 'openai'
+       AND (model_repo IS NULL OR model_repo = '')
+       AND model_name = 'gpt-4o'
+       AND delete_flag = 'N'
+       AND context_window_tokens = 128000
+       AND max_output_tokens = 16384
+       AND capability_profile_version IS NULL;
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET capability_profile_version = 'openai/gpt-4.1@1'
+     WHERE LOWER(model_factory) = 'openai'
+       AND (model_repo IS NULL OR model_repo = '')
+       AND model_name = 'gpt-4.1'
+       AND delete_flag = 'N'
+       AND context_window_tokens = 1000000
+       AND max_output_tokens = 32768
+       AND capability_profile_version IS NULL;
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    -- silicon (45 entries)
+    UPDATE nexent.model_record_t
+       SET capability_profile_version = 'silicon/qwen3.6-27b@1'
+     WHERE LOWER(model_factory) = 'silicon'
+       AND model_repo = 'Qwen'
+       AND model_name = 'Qwen3.6-27B'
+       AND delete_flag = 'N'
+       AND context_window_tokens = 262144
+       AND max_output_tokens = 65536
+       AND capability_profile_version IS NULL;
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET capability_profile_version = 'silicon/kimi-k2.6@1'
+     WHERE LOWER(model_factory) = 'silicon'
+       AND model_repo = 'Pro'
+       AND model_name = 'moonshotai/Kimi-K2.6'
+       AND delete_flag = 'N'
+       AND context_window_tokens = 262144
+       AND max_output_tokens = 131072
+       AND capability_profile_version IS NULL;
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET capability_profile_version = 'silicon/qwen3.6-35b-a3b@1'
+     WHERE LOWER(model_factory) = 'silicon'
+       AND model_repo = 'Qwen'
+       AND model_name = 'Qwen3.6-35B-A3B'
+       AND delete_flag = 'N'
+       AND context_window_tokens = 262144
+       AND max_output_tokens = 16384
+       AND capability_profile_version IS NULL;
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET capability_profile_version = 'silicon/qwen3.5-397b-a17b@1'
+     WHERE LOWER(model_factory) = 'silicon'
+       AND model_repo = 'Qwen'
+       AND model_name = 'Qwen3.5-397B-A17B'
+       AND delete_flag = 'N'
+       AND context_window_tokens = 262144
+       AND max_output_tokens = 16384
+       AND capability_profile_version IS NULL;
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET capability_profile_version = 'silicon/qwen3.5-122b-a10b@1'
+     WHERE LOWER(model_factory) = 'silicon'
+       AND model_repo = 'Qwen'
+       AND model_name = 'Qwen3.5-122B-A10B'
+       AND delete_flag = 'N'
+       AND context_window_tokens = 262144
+       AND max_output_tokens = 16384
+       AND capability_profile_version IS NULL;
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET capability_profile_version = 'silicon/qwen3.5-35b-a3b@1'
+     WHERE LOWER(model_factory) = 'silicon'
+       AND model_repo = 'Qwen'
+       AND model_name = 'Qwen3.5-35B-A3B'
+       AND delete_flag = 'N'
+       AND context_window_tokens = 262144
+       AND max_output_tokens = 16384
+       AND capability_profile_version IS NULL;
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET capability_profile_version = 'silicon/qwen3.5-27b@1'
+     WHERE LOWER(model_factory) = 'silicon'
+       AND model_repo = 'Qwen'
+       AND model_name = 'Qwen3.5-27B'
+       AND delete_flag = 'N'
+       AND context_window_tokens = 262144
+       AND max_output_tokens = 16384
+       AND capability_profile_version IS NULL;
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET capability_profile_version = 'silicon/qwen3.5-9b@1'
+     WHERE LOWER(model_factory) = 'silicon'
+       AND model_repo = 'Qwen'
+       AND model_name = 'Qwen3.5-9B'
+       AND delete_flag = 'N'
+       AND context_window_tokens = 262144
+       AND max_output_tokens = 16384
+       AND capability_profile_version IS NULL;
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET capability_profile_version = 'silicon/qwen3.5-4b@1'
+     WHERE LOWER(model_factory) = 'silicon'
+       AND model_repo = 'Qwen'
+       AND model_name = 'Qwen3.5-4B'
+       AND delete_flag = 'N'
+       AND context_window_tokens = 262144
+       AND max_output_tokens = 16384
+       AND capability_profile_version IS NULL;
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET capability_profile_version = 'silicon/qwen3-vl-32b-instruct@1'
+     WHERE LOWER(model_factory) = 'silicon'
+       AND model_repo = 'Qwen'
+       AND model_name = 'Qwen3-VL-32B-Instruct'
+       AND delete_flag = 'N'
+       AND context_window_tokens = 262144
+       AND max_output_tokens = 16384
+       AND capability_profile_version IS NULL;
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET capability_profile_version = 'silicon/qwen3-vl-32b-thinking@1'
+     WHERE LOWER(model_factory) = 'silicon'
+       AND model_repo = 'Qwen'
+       AND model_name = 'Qwen3-VL-32B-Thinking'
+       AND delete_flag = 'N'
+       AND context_window_tokens = 262144
+       AND max_output_tokens = 32768
+       AND capability_profile_version IS NULL;
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET capability_profile_version = 'silicon/qwen3-vl-8b-instruct@1'
+     WHERE LOWER(model_factory) = 'silicon'
+       AND model_repo = 'Qwen'
+       AND model_name = 'Qwen3-VL-8B-Instruct'
+       AND delete_flag = 'N'
+       AND context_window_tokens = 262144
+       AND max_output_tokens = 16384
+       AND capability_profile_version IS NULL;
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET capability_profile_version = 'silicon/qwen3-vl-8b-thinking@1'
+     WHERE LOWER(model_factory) = 'silicon'
+       AND model_repo = 'Qwen'
+       AND model_name = 'Qwen3-VL-8B-Thinking'
+       AND delete_flag = 'N'
+       AND context_window_tokens = 262144
+       AND max_output_tokens = 32768
+       AND capability_profile_version IS NULL;
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET capability_profile_version = 'silicon/qwen3-vl-30b-a3b-instruct@1'
+     WHERE LOWER(model_factory) = 'silicon'
+       AND model_repo = 'Qwen'
+       AND model_name = 'Qwen3-VL-30B-A3B-Instruct'
+       AND delete_flag = 'N'
+       AND context_window_tokens = 262144
+       AND max_output_tokens = 16384
+       AND capability_profile_version IS NULL;
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET capability_profile_version = 'silicon/qwen3-vl-30b-a3b-thinking@1'
+     WHERE LOWER(model_factory) = 'silicon'
+       AND model_repo = 'Qwen'
+       AND model_name = 'Qwen3-VL-30B-A3B-Thinking'
+       AND delete_flag = 'N'
+       AND context_window_tokens = 262144
+       AND max_output_tokens = 32768
+       AND capability_profile_version IS NULL;
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET capability_profile_version = 'silicon/qwen3-omni-30b-a3b-instruct@1'
+     WHERE LOWER(model_factory) = 'silicon'
+       AND model_repo = 'Qwen'
+       AND model_name = 'Qwen3-Omni-30B-A3B-Instruct'
+       AND delete_flag = 'N'
+       AND context_window_tokens = 32768
+       AND max_output_tokens = 16384
+       AND capability_profile_version IS NULL;
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET capability_profile_version = 'silicon/qwen3-omni-30b-a3b-thinking@1'
+     WHERE LOWER(model_factory) = 'silicon'
+       AND model_repo = 'Qwen'
+       AND model_name = 'Qwen3-Omni-30B-A3B-Thinking'
+       AND delete_flag = 'N'
+       AND context_window_tokens = 32768
+       AND max_output_tokens = 16384
+       AND capability_profile_version IS NULL;
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET capability_profile_version = 'silicon/qwen3-omni-30b-a3b-captioner@1'
+     WHERE LOWER(model_factory) = 'silicon'
+       AND model_repo = 'Qwen'
+       AND model_name = 'Qwen3-Omni-30B-A3B-Captioner'
+       AND delete_flag = 'N'
+       AND context_window_tokens = 32768
+       AND max_output_tokens = 16384
+       AND capability_profile_version IS NULL;
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET capability_profile_version = 'silicon/qwen3-coder-30b-a3b-instruct@1'
+     WHERE LOWER(model_factory) = 'silicon'
+       AND model_repo = 'Qwen'
+       AND model_name = 'Qwen3-Coder-30B-A3B-Instruct'
+       AND delete_flag = 'N'
+       AND context_window_tokens = 262144
+       AND max_output_tokens = 65536
+       AND capability_profile_version IS NULL;
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET capability_profile_version = 'silicon/qwen3-30b-a3b-instruct-2507@1'
+     WHERE LOWER(model_factory) = 'silicon'
+       AND model_repo = 'Qwen'
+       AND model_name = 'Qwen3-30B-A3B-Instruct-2507'
+       AND delete_flag = 'N'
+       AND context_window_tokens = 262144
+       AND max_output_tokens = 16384
+       AND capability_profile_version IS NULL;
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET capability_profile_version = 'silicon/qwen3-32b@1'
+     WHERE LOWER(model_factory) = 'silicon'
+       AND model_repo = 'Qwen'
+       AND model_name = 'Qwen3-32B'
+       AND delete_flag = 'N'
+       AND context_window_tokens = 131072
+       AND max_output_tokens = 16384
+       AND capability_profile_version IS NULL;
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET capability_profile_version = 'silicon/qwen3-14b@1'
+     WHERE LOWER(model_factory) = 'silicon'
+       AND model_repo = 'Qwen'
+       AND model_name = 'Qwen3-14B'
+       AND delete_flag = 'N'
+       AND context_window_tokens = 131072
+       AND max_output_tokens = 16384
+       AND capability_profile_version IS NULL;
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET capability_profile_version = 'silicon/qwen3-8b@1'
+     WHERE LOWER(model_factory) = 'silicon'
+       AND model_repo = 'Qwen'
+       AND model_name = 'Qwen3-8B'
+       AND delete_flag = 'N'
+       AND context_window_tokens = 131072
+       AND max_output_tokens = 16384
+       AND capability_profile_version IS NULL;
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET capability_profile_version = 'silicon/qwen2.5-72b-instruct-128k@1'
+     WHERE LOWER(model_factory) = 'silicon'
+       AND model_repo = 'Qwen'
+       AND model_name = 'Qwen2.5-72B-Instruct-128K'
+       AND delete_flag = 'N'
+       AND context_window_tokens = 131072
+       AND max_output_tokens = 8192
+       AND capability_profile_version IS NULL;
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET capability_profile_version = 'silicon/qwen2.5-72b-instruct@1'
+     WHERE LOWER(model_factory) = 'silicon'
+       AND model_repo = 'Qwen'
+       AND model_name = 'Qwen2.5-72B-Instruct'
+       AND delete_flag = 'N'
+       AND context_window_tokens = 32768
+       AND max_output_tokens = 8192
+       AND capability_profile_version IS NULL;
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET capability_profile_version = 'silicon/qwen2.5-32b-instruct@1'
+     WHERE LOWER(model_factory) = 'silicon'
+       AND model_repo = 'Qwen'
+       AND model_name = 'Qwen2.5-32B-Instruct'
+       AND delete_flag = 'N'
+       AND context_window_tokens = 32768
+       AND max_output_tokens = 8192
+       AND capability_profile_version IS NULL;
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET capability_profile_version = 'silicon/qwen2.5-14b-instruct@1'
+     WHERE LOWER(model_factory) = 'silicon'
+       AND model_repo = 'Qwen'
+       AND model_name = 'Qwen2.5-14B-Instruct'
+       AND delete_flag = 'N'
+       AND context_window_tokens = 32768
+       AND max_output_tokens = 8192
+       AND capability_profile_version IS NULL;
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET capability_profile_version = 'silicon/qwen2.5-7b-instruct@1'
+     WHERE LOWER(model_factory) = 'silicon'
+       AND model_repo = 'Qwen'
+       AND model_name = 'Qwen2.5-7B-Instruct'
+       AND delete_flag = 'N'
+       AND context_window_tokens = 32768
+       AND max_output_tokens = 8192
+       AND capability_profile_version IS NULL;
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET capability_profile_version = 'silicon/glm-4-32b-0414@1'
+     WHERE LOWER(model_factory) = 'silicon'
+       AND model_repo = 'THUDM'
+       AND model_name = 'GLM-4-32B-0414'
+       AND delete_flag = 'N'
+       AND context_window_tokens = 32768
+       AND max_output_tokens = 8192
+       AND capability_profile_version IS NULL;
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET capability_profile_version = 'silicon/glm-z1-9b-0414@1'
+     WHERE LOWER(model_factory) = 'silicon'
+       AND model_repo = 'THUDM'
+       AND model_name = 'GLM-Z1-9B-0414'
+       AND delete_flag = 'N'
+       AND context_window_tokens = 32768
+       AND max_output_tokens = 8192
+       AND capability_profile_version IS NULL;
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET capability_profile_version = 'silicon/glm-4-9b-0414@1'
+     WHERE LOWER(model_factory) = 'silicon'
+       AND model_repo = 'THUDM'
+       AND model_name = 'GLM-4-9B-0414'
+       AND delete_flag = 'N'
+       AND context_window_tokens = 32768
+       AND max_output_tokens = 8192
+       AND capability_profile_version IS NULL;
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET capability_profile_version = 'silicon/glm-5.2@1'
+     WHERE LOWER(model_factory) = 'silicon'
+       AND model_repo = 'zai-org'
+       AND model_name = 'GLM-5.2'
+       AND delete_flag = 'N'
+       AND context_window_tokens = 1048576
+       AND max_output_tokens = 131072
+       AND capability_profile_version IS NULL;
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET capability_profile_version = 'silicon/glm-4.5v@1'
+     WHERE LOWER(model_factory) = 'silicon'
+       AND model_repo = 'zai-org'
+       AND model_name = 'GLM-4.5V'
+       AND delete_flag = 'N'
+       AND context_window_tokens = 131072
+       AND max_output_tokens = 16384
+       AND capability_profile_version IS NULL;
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET capability_profile_version = 'silicon/glm-4.5-air@1'
+     WHERE LOWER(model_factory) = 'silicon'
+       AND model_repo = 'zai-org'
+       AND model_name = 'GLM-4.5-Air'
+       AND delete_flag = 'N'
+       AND context_window_tokens = 131072
+       AND max_output_tokens = 16384
+       AND capability_profile_version IS NULL;
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET capability_profile_version = 'silicon/glm-5.1-pro@1'
+     WHERE LOWER(model_factory) = 'silicon'
+       AND model_repo = 'Pro'
+       AND model_name = 'zai-org/GLM-5.1'
+       AND delete_flag = 'N'
+       AND context_window_tokens = 202752
+       AND max_output_tokens = 131072
+       AND capability_profile_version IS NULL;
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET capability_profile_version = 'silicon/seed-oss-36b-instruct@1'
+     WHERE LOWER(model_factory) = 'silicon'
+       AND model_repo = 'ByteDance-Seed'
+       AND model_name = 'Seed-OSS-36B-Instruct'
+       AND delete_flag = 'N'
+       AND context_window_tokens = 524288
+       AND max_output_tokens = 16384
+       AND capability_profile_version IS NULL;
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET capability_profile_version = 'silicon/ling-flash-2.0@1'
+     WHERE LOWER(model_factory) = 'silicon'
+       AND model_repo = 'inclusionAI'
+       AND model_name = 'Ling-flash-2.0'
+       AND delete_flag = 'N'
+       AND context_window_tokens = 131072
+       AND max_output_tokens = 16384
+       AND capability_profile_version IS NULL;
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET capability_profile_version = 'silicon/ling-mini-2.0@1'
+     WHERE LOWER(model_factory) = 'silicon'
+       AND model_repo = 'inclusionAI'
+       AND model_name = 'Ling-mini-2.0'
+       AND delete_flag = 'N'
+       AND context_window_tokens = 131072
+       AND max_output_tokens = 16384
+       AND capability_profile_version IS NULL;
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET capability_profile_version = 'silicon/minimax-m2.5@1'
+     WHERE LOWER(model_factory) = 'silicon'
+       AND model_repo = 'MiniMaxAI'
+       AND model_name = 'MiniMax-M2.5'
+       AND delete_flag = 'N'
+       AND context_window_tokens = 204800
+       AND max_output_tokens = 16384
+       AND capability_profile_version IS NULL;
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET capability_profile_version = 'silicon/minimax-m2.5-pro@1'
+     WHERE LOWER(model_factory) = 'silicon'
+       AND model_repo = 'Pro'
+       AND model_name = 'MiniMaxAI/MiniMax-M2.5'
+       AND delete_flag = 'N'
+       AND context_window_tokens = 204800
+       AND max_output_tokens = 16384
+       AND capability_profile_version IS NULL;
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET capability_profile_version = 'silicon/kimi-k2.7-code@1'
+     WHERE LOWER(model_factory) = 'silicon'
+       AND model_repo = 'moonshotai'
+       AND model_name = 'Kimi-K2.7-Code'
+       AND delete_flag = 'N'
+       AND context_window_tokens = 262144
+       AND max_output_tokens = 32768
+       AND capability_profile_version IS NULL;
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET capability_profile_version = 'silicon/nex-n2-pro@1'
+     WHERE LOWER(model_factory) = 'silicon'
+       AND model_repo = 'nex-agi'
+       AND model_name = 'Nex-N2-Pro'
+       AND delete_flag = 'N'
+       AND context_window_tokens = 262144
+       AND max_output_tokens = 16384
+       AND capability_profile_version IS NULL;
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET capability_profile_version = 'silicon/step-3.5-flash@1'
+     WHERE LOWER(model_factory) = 'silicon'
+       AND model_repo = 'stepfun-ai'
+       AND model_name = 'Step-3.5-Flash'
+       AND delete_flag = 'N'
+       AND context_window_tokens = 262144
+       AND max_output_tokens = 16384
+       AND capability_profile_version IS NULL;
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET capability_profile_version = 'silicon/hunyuan-mt-7b@1'
+     WHERE LOWER(model_factory) = 'silicon'
+       AND model_repo = 'tencent'
+       AND model_name = 'Hunyuan-MT-7B'
+       AND delete_flag = 'N'
+       AND context_window_tokens = 32768
+       AND max_output_tokens = 2048
+       AND capability_profile_version IS NULL;
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET capability_profile_version = 'silicon/hunyuan-a13b-instruct@1'
+     WHERE LOWER(model_factory) = 'silicon'
+       AND model_repo = 'tencent'
+       AND model_name = 'Hunyuan-A13B-Instruct'
+       AND delete_flag = 'N'
+       AND context_window_tokens = 131072
+       AND max_output_tokens = 8192
+       AND capability_profile_version IS NULL;
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    RAISE NOTICE 'Phase 1b catalog tag (matching filled): % row(s) updated', v_total;
 END $$;
 
 -- ============================================================
--- Phase 3: Reconcile legacy max_tokens with max_output_tokens
+-- Phase 2: Safe defaults for remaining bare LLM/VLM rows
 -- ============================================================
 
 DO $$
@@ -1122,18 +1985,23 @@ DECLARE
     v_updated INTEGER := 0;
 BEGIN
     UPDATE nexent.model_record_t
-       SET max_tokens = max_output_tokens
+       SET context_window_tokens = COALESCE(context_window_tokens,
+           GREATEST(32768, COALESCE(max_output_tokens, 0) + 1)),
+           max_output_tokens = COALESCE(max_output_tokens,
+           LEAST(4096, COALESCE(context_window_tokens, 32768) - 1)),
+           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
+           LEAST(4096, COALESCE(max_output_tokens, 4096))),
+           capacity_source = COALESCE(capacity_source, 'default')
      WHERE delete_flag = 'N'
-       AND max_output_tokens IS NOT NULL
-       AND COALESCE(max_tokens, -1) <> max_output_tokens
-       AND COALESCE(model_type, '') NOT IN ('embedding', 'multi_embedding');
+       AND COALESCE(model_type, 'llm') IN ('llm', 'vlm')
+       AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
 
     GET DIAGNOSTICS v_updated = ROW_COUNT;
-    RAISE NOTICE 'max_tokens reconcile: % row(s) updated', v_updated;
+    RAISE NOTICE 'Safe defaults: % LLM/VLM row(s) backfilled', v_updated;
 END $$;
 
 -- ============================================================
--- Phase 4: Clamp default_output_reserve_tokens to max_output_tokens
+-- Phase 3: Clamp default_output_reserve_tokens to max_output_tokens
 -- ============================================================
 
 DO $$
diff --git a/scripts/generate_backfill_sql.py b/scripts/generate_backfill_sql.py
index 92d0354da..e325baaa1 100644
--- a/scripts/generate_backfill_sql.py
+++ b/scripts/generate_backfill_sql.py
@@ -51,6 +51,31 @@ def _sql_str(value: str) -> str:
     return value.replace("'", "''")
 
 
+def _split_repo_name(full_id: str) -> tuple[str, str]:
+    """Split a catalog's full model identifier into (model_repo, model_name).
+
+    The model_record_t table stores these as two columns. Catalog keys like
+    "Qwen/Qwen2.5-14B-Instruct" must be split on the first '/' to match;
+    bare names like "qwen-plus" or "gpt-4o" land with empty model_repo.
+    """
+    if "/" in full_id:
+        repo, name = full_id.split("/", 1)
+        return repo, name
+    return "", full_id
+
+
+def _sql_repo_match(repo: str) -> str:
+    """Build the WHERE fragment that matches the table's model_repo column.
+
+    Bare-name catalog entries (no '/') can land in the table as either
+    model_repo='' or model_repo IS NULL depending on the create path, so
+    accept both. Namespaced entries match the exact string.
+    """
+    if repo == "":
+        return "(model_repo IS NULL OR model_repo = '')"
+    return f"model_repo = '{_sql_str(repo)}'"
+
+
 def main() -> None:
     today = date.today().strftime("%Y-%m-%d")
     lines: list[str] = []
@@ -60,24 +85,43 @@ def main() -> None:
     lines.append(f"-- Catalog entries: {len(CATALOG)}")
     lines.append("--")
     lines.append("-- Migration kind: RECOMMENDED_DATA_FIX")
-    lines.append("-- Idempotent: COALESCE protects existing non-NULL values.")
+    lines.append("-- Idempotent: COALESCE + IS NULL guards protect existing values.")
     lines.append("-- Safe: enforces max_output < context_window via GREATEST/LEAST.")
     lines.append("--")
-    lines.append("-- Pre-run self-check:")
+    lines.append("-- Phases:")
+    lines.append("--   1a  Bare LLM/VLM rows that match a catalog entry by")
+    lines.append("--       (model_factory, model_repo, model_name) -> fill capacity")
+    lines.append("--       fields + tag capacity_source='profile' + profile_version.")
+    lines.append("--   1b  Already-filled rows that match a catalog entry AND whose")
+    lines.append("--       context_window_tokens and max_output_tokens exactly equal")
+    lines.append("--       the catalog values -> tag profile_version only. capacity_")
+    lines.append("--       source stays whatever it was (typically 'operator'); we")
+    lines.append("--       don't rewrite provenance, we just add the dispatch tag so")
+    lines.append("--       dispatch_profile_hit_total can fire.")
+    lines.append("--    2  Remaining bare LLM/VLM rows -> safe defaults.")
+    lines.append("--    3  Clamp default_output_reserve_tokens to <= max_output_tokens.")
+    lines.append("--")
+    lines.append("-- Pre-run self-check (rows whose capability_profile_version is NULL):")
     lines.append("--")
-    lines.append("--   SELECT model_id, model_name, model_factory,")
-    lines.append("--          context_window_tokens, max_output_tokens")
+    lines.append("--   SELECT model_id, model_repo, model_name, model_factory,")
+    lines.append("--          context_window_tokens, max_output_tokens, capability_profile_version")
     lines.append("--     FROM nexent.model_record_t")
     lines.append("--    WHERE delete_flag = 'N'")
     lines.append("--      AND COALESCE(model_type, 'llm') IN ('llm', 'vlm')")
-    lines.append("--      AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);")
-    lines.append("--")
-    lines.append("-- If the result is empty, this migration is a no-op.")
+    lines.append("--      AND capability_profile_version IS NULL;")
     lines.append("")
 
-    # Phase 1: catalog-driven backfill
+    # Group catalog by provider so the generated SQL has tidy section headers
+    from collections import defaultdict
+    by_provider: dict[str, list] = defaultdict(list)
+    for (provider, full_id), profile in CATALOG.items():
+        by_provider[provider].append((full_id, profile))
+
+    # --------------------------------------------------------------
+    # Phase 1a: catalog match + bare -> fill capacity + tag
+    # --------------------------------------------------------------
     lines.append("-- ============================================================")
-    lines.append("-- Phase 1: Backfill rows matching approved catalog entries")
+    lines.append("-- Phase 1a: Backfill bare rows that match approved catalog entries")
     lines.append("-- ============================================================")
     lines.append("")
     lines.append("DO $$")
@@ -86,20 +130,17 @@ def main() -> None:
     lines.append("    v_total   INTEGER := 0;")
     lines.append("BEGIN")
 
-    from collections import defaultdict
-    by_provider: dict[str, list] = defaultdict(list)
-    for (provider, model_name), profile in CATALOG.items():
-        by_provider[provider].append((model_name, profile))
-
     for provider in sorted(by_provider.keys()):
         entries = by_provider[provider]
         lines.append(f"    -- {provider} ({len(entries)} entries)")
-        for model_name, profile in entries:
+        for full_id, profile in entries:
             ctx = profile.context_window_tokens
             mout = profile.max_output_tokens
             reserve = profile.default_output_reserve_tokens
             version = _sql_str(profile.capability_profile_version)
-            escaped_model = _sql_str(model_name)
+            repo, name = _split_repo_name(full_id)
+            repo_match = _sql_repo_match(repo)
+            escaped_name = _sql_str(name)
 
             lines.append(f"    UPDATE nexent.model_record_t")
             lines.append(f"       SET context_window_tokens = COALESCE(context_window_tokens,")
@@ -111,14 +152,59 @@ def main() -> None:
             lines.append(f"           capacity_source = COALESCE(capacity_source, 'profile'),")
             lines.append(f"           capability_profile_version = COALESCE(capability_profile_version, '{version}')")
             lines.append(f"     WHERE LOWER(model_factory) = '{_sql_str(provider.lower())}'")
-            lines.append(f"       AND model_name = '{escaped_model}'")
+            lines.append(f"       AND {repo_match}")
+            lines.append(f"       AND model_name = '{escaped_name}'")
             lines.append(f"       AND delete_flag = 'N'")
             lines.append(f"       AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);")
             lines.append(f"    GET DIAGNOSTICS v_updated = ROW_COUNT;")
             lines.append(f"    v_total := v_total + v_updated;")
             lines.append("")
 
-    lines.append("    RAISE NOTICE 'Catalog backfill: % row(s) updated', v_total;")
+    lines.append("    RAISE NOTICE 'Phase 1a catalog backfill (bare): % row(s) updated', v_total;")
+    lines.append("END $$;")
+    lines.append("")
+
+    # --------------------------------------------------------------
+    # Phase 1b: catalog match + already-filled values match catalog
+    #           -> tag profile_version only (don't touch capacity)
+    # --------------------------------------------------------------
+    lines.append("-- ============================================================")
+    lines.append("-- Phase 1b: Tag already-filled rows whose ctx/max_out exactly match")
+    lines.append("--           the catalog with capability_profile_version. Does not")
+    lines.append("--           rewrite capacity_source (operator intent preserved).")
+    lines.append("-- ============================================================")
+    lines.append("")
+    lines.append("DO $$")
+    lines.append("DECLARE")
+    lines.append("    v_updated INTEGER := 0;")
+    lines.append("    v_total   INTEGER := 0;")
+    lines.append("BEGIN")
+
+    for provider in sorted(by_provider.keys()):
+        entries = by_provider[provider]
+        lines.append(f"    -- {provider} ({len(entries)} entries)")
+        for full_id, profile in entries:
+            ctx = profile.context_window_tokens
+            mout = profile.max_output_tokens
+            version = _sql_str(profile.capability_profile_version)
+            repo, name = _split_repo_name(full_id)
+            repo_match = _sql_repo_match(repo)
+            escaped_name = _sql_str(name)
+
+            lines.append(f"    UPDATE nexent.model_record_t")
+            lines.append(f"       SET capability_profile_version = '{version}'")
+            lines.append(f"     WHERE LOWER(model_factory) = '{_sql_str(provider.lower())}'")
+            lines.append(f"       AND {repo_match}")
+            lines.append(f"       AND model_name = '{escaped_name}'")
+            lines.append(f"       AND delete_flag = 'N'")
+            lines.append(f"       AND context_window_tokens = {_sql_int(ctx)}")
+            lines.append(f"       AND max_output_tokens = {_sql_int(mout)}")
+            lines.append(f"       AND capability_profile_version IS NULL;")
+            lines.append(f"    GET DIAGNOSTICS v_updated = ROW_COUNT;")
+            lines.append(f"    v_total := v_total + v_updated;")
+            lines.append("")
+
+    lines.append("    RAISE NOTICE 'Phase 1b catalog tag (matching filled): % row(s) updated', v_total;")
     lines.append("END $$;")
     lines.append("")
 
@@ -148,30 +234,9 @@ def main() -> None:
     lines.append("END $$;")
     lines.append("")
 
-    # Phase 3: reconcile max_tokens
-    lines.append("-- ============================================================")
-    lines.append("-- Phase 3: Reconcile legacy max_tokens with max_output_tokens")
-    lines.append("-- ============================================================")
-    lines.append("")
-    lines.append("DO $$")
-    lines.append("DECLARE")
-    lines.append("    v_updated INTEGER := 0;")
-    lines.append("BEGIN")
-    lines.append("    UPDATE nexent.model_record_t")
-    lines.append("       SET max_tokens = max_output_tokens")
-    lines.append("     WHERE delete_flag = 'N'")
-    lines.append("       AND max_output_tokens IS NOT NULL")
-    lines.append("       AND COALESCE(max_tokens, -1) <> max_output_tokens")
-    lines.append("       AND COALESCE(model_type, '') NOT IN ('embedding', 'multi_embedding');")
-    lines.append("")
-    lines.append("    GET DIAGNOSTICS v_updated = ROW_COUNT;")
-    lines.append("    RAISE NOTICE 'max_tokens reconcile: % row(s) updated', v_updated;")
-    lines.append("END $$;")
-    lines.append("")
-
-    # Phase 4: clamp reserve to max_output
+    # Phase 3: clamp reserve to max_output
     lines.append("-- ============================================================")
-    lines.append("-- Phase 4: Clamp default_output_reserve_tokens to max_output_tokens")
+    lines.append("-- Phase 3: Clamp default_output_reserve_tokens to max_output_tokens")
     lines.append("-- ============================================================")
     lines.append("")
     lines.append("DO $$")

From 4baf92b3f24203882d854312e7975e5a73de38f7 Mon Sep 17 00:00:00 2001
From: wuyuanfr <18270469842@163.com>
Date: Sat, 27 Jun 2026 14:08:43 +0800
Subject: [PATCH 20/31] fix(sdk): remove reverse max_tokens backfill from
 ModelConfig validator

The validator was bidirectionally syncing max_tokens <-> max_output_tokens,
but max_tokens is a legacy deprecated field. Writing max_output_tokens back
into max_tokens on the Pydantic model risks propagating synthetic values
to serialized/persisted configs, making legacy fields appear operator-set.

Keep only the forward direction: max_tokens -> max_output_tokens (legacy
migration path). The reverse alias in OpenAIModel.__init__ is safe because
it is memory-only and needed for the OpenAI wire format (which uses
max_tokens as the API field name).
---
 sdk/nexent/core/agents/agent_model.py | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/sdk/nexent/core/agents/agent_model.py b/sdk/nexent/core/agents/agent_model.py
index ec67605c6..04baf9533 100644
--- a/sdk/nexent/core/agents/agent_model.py
+++ b/sdk/nexent/core/agents/agent_model.py
@@ -99,10 +99,13 @@ class ModelConfig(BaseModel):
     @model_validator(mode="after")
     def _backfill_max_output_from_legacy_max_tokens(self) -> "ModelConfig":
         if self.max_output_tokens is None and self.max_tokens is not None:
-            self.max_output_tokens = self.max_tokens
-        elif self.max_output_tokens is not None and self.max_tokens is None:
-            # Keep legacy attribute populated so callers reading it keep working.
-            self.max_tokens = self.max_output_tokens
+            fallback = self.max_tokens
+            if (
+                self.context_window_tokens is not None
+                and fallback > self.context_window_tokens
+            ):
+                fallback = self.context_window_tokens - 1
+            self.max_output_tokens = max(fallback, 1)
         return self
 
 

From b7d6b2f5a13d0d135337704ae565534aaf2fd076 Mon Sep 17 00:00:00 2001
From: wuyuanfr <18270469842@163.com>
Date: Sat, 27 Jun 2026 14:30:33 +0800
Subject: [PATCH 21/31] chore(sql): remove superseded v2.2.0_0617 capacity data
 fix migration

v2.2.2_0627_backfill_from_catalog.sql is a strict superset:
- 66 catalog entries vs 10
- COALESCE + GREATEST/LEAST safety guards
- Phase 1b profile tagging + Phase 2 safe defaults + Phase 3 reserve clamp
- Removed the dangerous max_tokens reconcile that silently overwrote
  operator-intentional legacy values
---
 ...7_context_management_capacity_data_fix.sql | 205 ------------------
 1 file changed, 205 deletions(-)
 delete mode 100644 deploy/sql/migrations/v2.2.0_0617_context_management_capacity_data_fix.sql

diff --git a/deploy/sql/migrations/v2.2.0_0617_context_management_capacity_data_fix.sql b/deploy/sql/migrations/v2.2.0_0617_context_management_capacity_data_fix.sql
deleted file mode 100644
index 371a2fed3..000000000
--- a/deploy/sql/migrations/v2.2.0_0617_context_management_capacity_data_fix.sql
+++ /dev/null
@@ -1,205 +0,0 @@
--- Migration kind: RECOMMENDED_DATA_FIX
--- Required for: upgraded deployments with existing model_record_t rows.
--- Safe to skip when: fresh deployment, or operators will manually fill capacity fields.
--- Reason: improves legacy model capacity completeness and reconciles the temporary max_tokens alias.
---
--- ------------------------------------------------------------
--- Pre-run self-check (recommended before applying)
--- ------------------------------------------------------------
--- The reconcile block at the bottom of this file rewrites `max_tokens` to
--- match the freshly backfilled `max_output_tokens`. If an operator
--- previously tightened `max_tokens` below the catalog value on a row this
--- migration touches (cost control, prompt-budget caps, etc.), that tighter
--- value will be overwritten with the catalog value.
---
--- Run this query first to surface any such rows:
---
---   SELECT model_id, model_name, model_factory, max_tokens, max_output_tokens
---     FROM nexent.model_record_t
---    WHERE delete_flag = 'N'
---      AND max_tokens IS NOT NULL
---      AND (
---        (LOWER(model_factory)='openai'    AND model_name IN ('gpt-4o','gpt-4.1'))
---        OR (LOWER(model_factory)='dashscope' AND model_name IN ('qwen-plus','qwen-turbo','qwen3.7-max','glm-5.1'))
---        OR (LOWER(model_factory)='silicon'  AND model_name IN ('Qwen/Qwen3.6-27B','Pro/moonshotai/Kimi-K2.6'))
---        OR (LOWER(model_factory)='deepseek' AND model_name IN ('deepseek-v4-flash','deepseek-v4-pro'))
---      );
---
--- If the result is empty: safe to apply the whole file.
--- If the result has rows the operator deliberately tightened: run only the
--- first `DO $$` block (catalog backfill) and skip the second (reconcile),
--- or back up the affected rows before applying.
-
--- ============================================================
--- Backfill capacity columns on legacy model_record_t rows
--- ============================================================
--- Matches (model_factory, model_name) against W1 day-one catalog entries.
--- Idempotent: only writes when context_window_tokens IS NULL, so re-running on
--- already-backfilled rows is a no-op.
---
--- Catalog source of truth: backend/consts/capability_profiles.py (W1 ADR
--- Decision 1). If the catalog is bumped, mirror the change here in a new
--- migration; do not edit this file in place after it has been released.
---
--- Coverage caveat: rows whose model_factory does not match a catalog provider
--- key (commonly the manual-add default 'OpenAI-API-Compatible' per CM-031)
--- will not be backfilled by this migration. Operators must either update
--- model_factory directly, re-save the model through the W1-aware UI, or wait
--- for W17. Startup logs surface the residual count.
-
-DO $$
-DECLARE
-    v_updated INTEGER := 0;
-    v_total   INTEGER := 0;
-BEGIN
-    -- openai/gpt-4o
-    UPDATE nexent.model_record_t
-       SET context_window_tokens = 128000,
-           max_output_tokens = 16384,
-           default_output_reserve_tokens = 4096
-     WHERE LOWER(model_factory) = 'openai'
-       AND model_name = 'gpt-4o'
-       AND delete_flag = 'N'
-       AND context_window_tokens IS NULL;
-    GET DIAGNOSTICS v_updated = ROW_COUNT;
-    v_total := v_total + v_updated;
-
-    -- openai/gpt-4.1
-    UPDATE nexent.model_record_t
-       SET context_window_tokens = 1000000,
-           max_output_tokens = 32768,
-           default_output_reserve_tokens = 8192
-     WHERE LOWER(model_factory) = 'openai'
-       AND model_name = 'gpt-4.1'
-       AND delete_flag = 'N'
-       AND context_window_tokens IS NULL;
-    GET DIAGNOSTICS v_updated = ROW_COUNT;
-    v_total := v_total + v_updated;
-
-    -- dashscope/qwen-plus
-    UPDATE nexent.model_record_t
-       SET context_window_tokens = 131072,
-           max_output_tokens = 16384,
-           default_output_reserve_tokens = 4096
-     WHERE LOWER(model_factory) = 'dashscope'
-       AND model_name = 'qwen-plus'
-       AND delete_flag = 'N'
-       AND context_window_tokens IS NULL;
-    GET DIAGNOSTICS v_updated = ROW_COUNT;
-    v_total := v_total + v_updated;
-
-    -- dashscope/qwen-turbo
-    UPDATE nexent.model_record_t
-       SET context_window_tokens = 1000000,
-           max_output_tokens = 16384,
-           default_output_reserve_tokens = 4096
-     WHERE LOWER(model_factory) = 'dashscope'
-       AND model_name = 'qwen-turbo'
-       AND delete_flag = 'N'
-       AND context_window_tokens IS NULL;
-    GET DIAGNOSTICS v_updated = ROW_COUNT;
-    v_total := v_total + v_updated;
-
-    -- dashscope/qwen3.7-max
-    UPDATE nexent.model_record_t
-       SET context_window_tokens = 1000000,
-           max_output_tokens = 65536,
-           default_output_reserve_tokens = 8192
-     WHERE LOWER(model_factory) = 'dashscope'
-       AND model_name = 'qwen3.7-max'
-       AND delete_flag = 'N'
-       AND context_window_tokens IS NULL;
-    GET DIAGNOSTICS v_updated = ROW_COUNT;
-    v_total := v_total + v_updated;
-
-    -- dashscope/glm-5.1
-    UPDATE nexent.model_record_t
-       SET context_window_tokens = 200000,
-           max_output_tokens = 131072,
-           default_output_reserve_tokens = 8192
-     WHERE LOWER(model_factory) = 'dashscope'
-       AND model_name = 'glm-5.1'
-       AND delete_flag = 'N'
-       AND context_window_tokens IS NULL;
-    GET DIAGNOSTICS v_updated = ROW_COUNT;
-    v_total := v_total + v_updated;
-
-    -- silicon/Qwen/Qwen3.6-27B
-    UPDATE nexent.model_record_t
-       SET context_window_tokens = 262144,
-           max_output_tokens = 65536,
-           default_output_reserve_tokens = 8192
-     WHERE LOWER(model_factory) = 'silicon'
-       AND model_name = 'Qwen/Qwen3.6-27B'
-       AND delete_flag = 'N'
-       AND context_window_tokens IS NULL;
-    GET DIAGNOSTICS v_updated = ROW_COUNT;
-    v_total := v_total + v_updated;
-
-    -- silicon/Pro/moonshotai/Kimi-K2.6
-    UPDATE nexent.model_record_t
-       SET context_window_tokens = 262144,
-           max_output_tokens = 131072,
-           default_output_reserve_tokens = 8192
-     WHERE LOWER(model_factory) = 'silicon'
-       AND model_name = 'Pro/moonshotai/Kimi-K2.6'
-       AND delete_flag = 'N'
-       AND context_window_tokens IS NULL;
-    GET DIAGNOSTICS v_updated = ROW_COUNT;
-    v_total := v_total + v_updated;
-
-    -- deepseek/deepseek-v4-flash
-    -- (deepseek-chat / deepseek-reasoner intentionally omitted: they alias to
-    -- v4-flash and are scheduled for deprecation at 2026-07-24, and pre-W1
-    -- deployments may have legacy max_tokens values for those names that
-    -- this backfill should not clobber.)
-    UPDATE nexent.model_record_t
-       SET context_window_tokens = 1000000,
-           max_output_tokens = 384000,
-           default_output_reserve_tokens = 8192
-     WHERE LOWER(model_factory) = 'deepseek'
-       AND model_name = 'deepseek-v4-flash'
-       AND delete_flag = 'N'
-       AND context_window_tokens IS NULL;
-    GET DIAGNOSTICS v_updated = ROW_COUNT;
-    v_total := v_total + v_updated;
-
-    -- deepseek/deepseek-v4-pro
-    UPDATE nexent.model_record_t
-       SET context_window_tokens = 1000000,
-           max_output_tokens = 384000,
-           default_output_reserve_tokens = 8192
-     WHERE LOWER(model_factory) = 'deepseek'
-       AND model_name = 'deepseek-v4-pro'
-       AND delete_flag = 'N'
-       AND context_window_tokens IS NULL;
-    GET DIAGNOSTICS v_updated = ROW_COUNT;
-    v_total := v_total + v_updated;
-
-    RAISE NOTICE 'W2 catalog backfill: % row(s) updated', v_total;
-END $$;
-
--- ============================================================
--- Reconcile the legacy max_tokens column with max_output_tokens
--- ============================================================
--- Runs after the catalog backfill above because the backfill writes
--- max_output_tokens. Scope and safety:
---   * Only touches rows where max_output_tokens IS NOT NULL.
---   * Skips embedding rows because they reuse max_tokens as the vector dimension.
---   * Only updates rows where the two columns actually disagree.
---   * delete_flag = 'N' so soft-deleted rows are left alone.
-
-DO $$
-DECLARE
-    v_updated INTEGER := 0;
-BEGIN
-    UPDATE nexent.model_record_t
-       SET max_tokens = max_output_tokens
-     WHERE delete_flag = 'N'
-       AND max_output_tokens IS NOT NULL
-       AND COALESCE(max_tokens, -1) <> max_output_tokens
-       AND COALESCE(model_type, '') NOT IN ('embedding', 'multi_embedding');
-
-    GET DIAGNOSTICS v_updated = ROW_COUNT;
-    RAISE NOTICE 'max_tokens alias reconcile: % row(s) updated', v_updated;
-END $$;

From 5e08815c9f4dd808bd9ea08f85d5edefbe0ba6bc Mon Sep 17 00:00:00 2001
From: wuyuanfr <18270469842@163.com>
Date: Sat, 27 Jun 2026 14:57:57 +0800
Subject: [PATCH 22/31] fix(w11): Phase 1b now upgrades capacity_source
 'default' to 'profile'

Phase 1b previously only tagged rows with capability_profile_version
when profile_version was NULL. Rows that already had the correct
profile_version but stale capacity_source='default' were missed.

Updated condition to also match rows where:
- capability_profile_version already equals the catalog value
- capacity_source is still 'default'

This fixes the case where Phase 2 filled safe defaults (source='default'),
then a subsequent run or manual edit aligned the values with catalog,
but capacity_source was never upgraded to 'profile'.

Verified: 2 rows (Qwen2.5-32B, Qwen2.5-14B) correctly upgraded
from 'default' to 'profile'.
---
 .../v2.2.2_0627_backfill_from_catalog.sql     | 336 +++++++++++-------
 scripts/generate_backfill_sql.py              |  13 +-
 2 files changed, 210 insertions(+), 139 deletions(-)

diff --git a/deploy/sql/migrations/v2.2.2_0627_backfill_from_catalog.sql b/deploy/sql/migrations/v2.2.2_0627_backfill_from_catalog.sql
index 502a59c96..96373e075 100644
--- a/deploy/sql/migrations/v2.2.2_0627_backfill_from_catalog.sql
+++ b/deploy/sql/migrations/v2.2.2_0627_backfill_from_catalog.sql
@@ -1168,8 +1168,10 @@ END $$;
 
 -- ============================================================
 -- Phase 1b: Tag already-filled rows whose ctx/max_out exactly match
---           the catalog with capability_profile_version. Does not
---           rewrite capacity_source (operator intent preserved).
+--           the catalog with capability_profile_version. Upgrades
+--           capacity_source from 'default' to 'profile' (values now
+--           come from catalog, not system defaults). Preserves
+--           'operator' and other explicit sources.
 -- ============================================================
 
 DO $$
@@ -1179,797 +1181,863 @@ DECLARE
 BEGIN
     -- dashscope (4 entries)
     UPDATE nexent.model_record_t
-       SET capability_profile_version = 'dashscope/qwen-plus@1'
+       SET capability_profile_version = 'dashscope/qwen-plus@1',
+           capacity_source = CASE WHEN capacity_source = 'default' THEN 'profile' ELSE capacity_source END
      WHERE LOWER(model_factory) = 'dashscope'
        AND (model_repo IS NULL OR model_repo = '')
        AND model_name = 'qwen-plus'
        AND delete_flag = 'N'
        AND context_window_tokens = 131072
        AND max_output_tokens = 16384
-       AND capability_profile_version IS NULL;
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'dashscope/qwen-plus@1' AND capacity_source = 'default'));
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
     UPDATE nexent.model_record_t
-       SET capability_profile_version = 'dashscope/qwen-turbo@1'
+       SET capability_profile_version = 'dashscope/qwen-turbo@1',
+           capacity_source = CASE WHEN capacity_source = 'default' THEN 'profile' ELSE capacity_source END
      WHERE LOWER(model_factory) = 'dashscope'
        AND (model_repo IS NULL OR model_repo = '')
        AND model_name = 'qwen-turbo'
        AND delete_flag = 'N'
        AND context_window_tokens = 1000000
        AND max_output_tokens = 16384
-       AND capability_profile_version IS NULL;
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'dashscope/qwen-turbo@1' AND capacity_source = 'default'));
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
     UPDATE nexent.model_record_t
-       SET capability_profile_version = 'dashscope/qwen3.7-max@1'
+       SET capability_profile_version = 'dashscope/qwen3.7-max@1',
+           capacity_source = CASE WHEN capacity_source = 'default' THEN 'profile' ELSE capacity_source END
      WHERE LOWER(model_factory) = 'dashscope'
        AND (model_repo IS NULL OR model_repo = '')
        AND model_name = 'qwen3.7-max'
        AND delete_flag = 'N'
        AND context_window_tokens = 1000000
        AND max_output_tokens = 65536
-       AND capability_profile_version IS NULL;
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'dashscope/qwen3.7-max@1' AND capacity_source = 'default'));
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
     UPDATE nexent.model_record_t
-       SET capability_profile_version = 'dashscope/glm-5.1@1'
+       SET capability_profile_version = 'dashscope/glm-5.1@1',
+           capacity_source = CASE WHEN capacity_source = 'default' THEN 'profile' ELSE capacity_source END
      WHERE LOWER(model_factory) = 'dashscope'
        AND (model_repo IS NULL OR model_repo = '')
        AND model_name = 'glm-5.1'
        AND delete_flag = 'N'
        AND context_window_tokens = 200000
        AND max_output_tokens = 131072
-       AND capability_profile_version IS NULL;
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'dashscope/glm-5.1@1' AND capacity_source = 'default'));
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
     -- deepseek (15 entries)
     UPDATE nexent.model_record_t
-       SET capability_profile_version = 'deepseek/deepseek-chat@2'
+       SET capability_profile_version = 'deepseek/deepseek-chat@2',
+           capacity_source = CASE WHEN capacity_source = 'default' THEN 'profile' ELSE capacity_source END
      WHERE LOWER(model_factory) = 'deepseek'
        AND (model_repo IS NULL OR model_repo = '')
        AND model_name = 'deepseek-chat'
        AND delete_flag = 'N'
        AND context_window_tokens = 1000000
        AND max_output_tokens = 384000
-       AND capability_profile_version IS NULL;
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'deepseek/deepseek-chat@2' AND capacity_source = 'default'));
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
     UPDATE nexent.model_record_t
-       SET capability_profile_version = 'deepseek/deepseek-reasoner@2'
+       SET capability_profile_version = 'deepseek/deepseek-reasoner@2',
+           capacity_source = CASE WHEN capacity_source = 'default' THEN 'profile' ELSE capacity_source END
      WHERE LOWER(model_factory) = 'deepseek'
        AND (model_repo IS NULL OR model_repo = '')
        AND model_name = 'deepseek-reasoner'
        AND delete_flag = 'N'
        AND context_window_tokens = 1000000
        AND max_output_tokens = 384000
-       AND capability_profile_version IS NULL;
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'deepseek/deepseek-reasoner@2' AND capacity_source = 'default'));
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
     UPDATE nexent.model_record_t
-       SET capability_profile_version = 'deepseek/deepseek-v4-flash@1'
+       SET capability_profile_version = 'deepseek/deepseek-v4-flash@1',
+           capacity_source = CASE WHEN capacity_source = 'default' THEN 'profile' ELSE capacity_source END
      WHERE LOWER(model_factory) = 'deepseek'
        AND (model_repo IS NULL OR model_repo = '')
        AND model_name = 'deepseek-v4-flash'
        AND delete_flag = 'N'
        AND context_window_tokens = 1000000
        AND max_output_tokens = 384000
-       AND capability_profile_version IS NULL;
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'deepseek/deepseek-v4-flash@1' AND capacity_source = 'default'));
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
     UPDATE nexent.model_record_t
-       SET capability_profile_version = 'deepseek/deepseek-v4-pro@1'
+       SET capability_profile_version = 'deepseek/deepseek-v4-pro@1',
+           capacity_source = CASE WHEN capacity_source = 'default' THEN 'profile' ELSE capacity_source END
      WHERE LOWER(model_factory) = 'deepseek'
        AND (model_repo IS NULL OR model_repo = '')
        AND model_name = 'deepseek-v4-pro'
        AND delete_flag = 'N'
        AND context_window_tokens = 1000000
        AND max_output_tokens = 384000
-       AND capability_profile_version IS NULL;
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'deepseek/deepseek-v4-pro@1' AND capacity_source = 'default'));
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
     UPDATE nexent.model_record_t
-       SET capability_profile_version = 'deepseek/deepseek-v4-pro-sf@1'
+       SET capability_profile_version = 'deepseek/deepseek-v4-pro-sf@1',
+           capacity_source = CASE WHEN capacity_source = 'default' THEN 'profile' ELSE capacity_source END
      WHERE LOWER(model_factory) = 'deepseek'
        AND model_repo = 'deepseek-ai'
        AND model_name = 'DeepSeek-V4-Pro'
        AND delete_flag = 'N'
        AND context_window_tokens = 1048576
        AND max_output_tokens = 384000
-       AND capability_profile_version IS NULL;
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'deepseek/deepseek-v4-pro-sf@1' AND capacity_source = 'default'));
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
     UPDATE nexent.model_record_t
-       SET capability_profile_version = 'deepseek/deepseek-v4-flash-sf@1'
+       SET capability_profile_version = 'deepseek/deepseek-v4-flash-sf@1',
+           capacity_source = CASE WHEN capacity_source = 'default' THEN 'profile' ELSE capacity_source END
      WHERE LOWER(model_factory) = 'deepseek'
        AND model_repo = 'deepseek-ai'
        AND model_name = 'DeepSeek-V4-Flash'
        AND delete_flag = 'N'
        AND context_window_tokens = 1048576
        AND max_output_tokens = 384000
-       AND capability_profile_version IS NULL;
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'deepseek/deepseek-v4-flash-sf@1' AND capacity_source = 'default'));
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
     UPDATE nexent.model_record_t
-       SET capability_profile_version = 'deepseek/deepseek-v3.2@1'
+       SET capability_profile_version = 'deepseek/deepseek-v3.2@1',
+           capacity_source = CASE WHEN capacity_source = 'default' THEN 'profile' ELSE capacity_source END
      WHERE LOWER(model_factory) = 'deepseek'
        AND model_repo = 'deepseek-ai'
        AND model_name = 'DeepSeek-V3.2'
        AND delete_flag = 'N'
        AND context_window_tokens = 164000
        AND max_output_tokens = 8192
-       AND capability_profile_version IS NULL;
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'deepseek/deepseek-v3.2@1' AND capacity_source = 'default'));
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
     UPDATE nexent.model_record_t
-       SET capability_profile_version = 'deepseek/deepseek-v3.1-terminus@1'
+       SET capability_profile_version = 'deepseek/deepseek-v3.1-terminus@1',
+           capacity_source = CASE WHEN capacity_source = 'default' THEN 'profile' ELSE capacity_source END
      WHERE LOWER(model_factory) = 'deepseek'
        AND model_repo = 'deepseek-ai'
        AND model_name = 'DeepSeek-V3.1-Terminus'
        AND delete_flag = 'N'
        AND context_window_tokens = 164000
        AND max_output_tokens = 8192
-       AND capability_profile_version IS NULL;
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'deepseek/deepseek-v3.1-terminus@1' AND capacity_source = 'default'));
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
     UPDATE nexent.model_record_t
-       SET capability_profile_version = 'deepseek/deepseek-r1@1'
+       SET capability_profile_version = 'deepseek/deepseek-r1@1',
+           capacity_source = CASE WHEN capacity_source = 'default' THEN 'profile' ELSE capacity_source END
      WHERE LOWER(model_factory) = 'deepseek'
        AND model_repo = 'deepseek-ai'
        AND model_name = 'DeepSeek-R1'
        AND delete_flag = 'N'
        AND context_window_tokens = 163840
        AND max_output_tokens = 16384
-       AND capability_profile_version IS NULL;
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'deepseek/deepseek-r1@1' AND capacity_source = 'default'));
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
     UPDATE nexent.model_record_t
-       SET capability_profile_version = 'deepseek/deepseek-v3@1'
+       SET capability_profile_version = 'deepseek/deepseek-v3@1',
+           capacity_source = CASE WHEN capacity_source = 'default' THEN 'profile' ELSE capacity_source END
      WHERE LOWER(model_factory) = 'deepseek'
        AND model_repo = 'deepseek-ai'
        AND model_name = 'DeepSeek-V3'
        AND delete_flag = 'N'
        AND context_window_tokens = 164000
        AND max_output_tokens = 8192
-       AND capability_profile_version IS NULL;
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'deepseek/deepseek-v3@1' AND capacity_source = 'default'));
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
     UPDATE nexent.model_record_t
-       SET capability_profile_version = 'deepseek/deepseek-r1-0528-qwen3-8b@1'
+       SET capability_profile_version = 'deepseek/deepseek-r1-0528-qwen3-8b@1',
+           capacity_source = CASE WHEN capacity_source = 'default' THEN 'profile' ELSE capacity_source END
      WHERE LOWER(model_factory) = 'deepseek'
        AND model_repo = 'deepseek-ai'
        AND model_name = 'DeepSeek-R1-0528-Qwen3-8B'
        AND delete_flag = 'N'
        AND context_window_tokens = 131072
        AND max_output_tokens = 16384
-       AND capability_profile_version IS NULL;
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'deepseek/deepseek-r1-0528-qwen3-8b@1' AND capacity_source = 'default'));
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
     UPDATE nexent.model_record_t
-       SET capability_profile_version = 'deepseek/deepseek-v3.2-pro@1'
+       SET capability_profile_version = 'deepseek/deepseek-v3.2-pro@1',
+           capacity_source = CASE WHEN capacity_source = 'default' THEN 'profile' ELSE capacity_source END
      WHERE LOWER(model_factory) = 'deepseek'
        AND model_repo = 'Pro'
        AND model_name = 'deepseek-ai/DeepSeek-V3.2'
        AND delete_flag = 'N'
        AND context_window_tokens = 164000
        AND max_output_tokens = 8192
-       AND capability_profile_version IS NULL;
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'deepseek/deepseek-v3.2-pro@1' AND capacity_source = 'default'));
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
     UPDATE nexent.model_record_t
-       SET capability_profile_version = 'deepseek/deepseek-v3.1-terminus-pro@1'
+       SET capability_profile_version = 'deepseek/deepseek-v3.1-terminus-pro@1',
+           capacity_source = CASE WHEN capacity_source = 'default' THEN 'profile' ELSE capacity_source END
      WHERE LOWER(model_factory) = 'deepseek'
        AND model_repo = 'Pro'
        AND model_name = 'deepseek-ai/DeepSeek-V3.1-Terminus'
        AND delete_flag = 'N'
        AND context_window_tokens = 164000
        AND max_output_tokens = 8192
-       AND capability_profile_version IS NULL;
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'deepseek/deepseek-v3.1-terminus-pro@1' AND capacity_source = 'default'));
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
     UPDATE nexent.model_record_t
-       SET capability_profile_version = 'deepseek/deepseek-r1-pro@1'
+       SET capability_profile_version = 'deepseek/deepseek-r1-pro@1',
+           capacity_source = CASE WHEN capacity_source = 'default' THEN 'profile' ELSE capacity_source END
      WHERE LOWER(model_factory) = 'deepseek'
        AND model_repo = 'Pro'
        AND model_name = 'deepseek-ai/DeepSeek-R1'
        AND delete_flag = 'N'
        AND context_window_tokens = 163840
        AND max_output_tokens = 16384
-       AND capability_profile_version IS NULL;
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'deepseek/deepseek-r1-pro@1' AND capacity_source = 'default'));
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
     UPDATE nexent.model_record_t
-       SET capability_profile_version = 'deepseek/deepseek-v3-pro@1'
+       SET capability_profile_version = 'deepseek/deepseek-v3-pro@1',
+           capacity_source = CASE WHEN capacity_source = 'default' THEN 'profile' ELSE capacity_source END
      WHERE LOWER(model_factory) = 'deepseek'
        AND model_repo = 'Pro'
        AND model_name = 'deepseek-ai/DeepSeek-V3'
        AND delete_flag = 'N'
        AND context_window_tokens = 164000
        AND max_output_tokens = 8192
-       AND capability_profile_version IS NULL;
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'deepseek/deepseek-v3-pro@1' AND capacity_source = 'default'));
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
     -- openai (2 entries)
     UPDATE nexent.model_record_t
-       SET capability_profile_version = 'openai/gpt-4o@1'
+       SET capability_profile_version = 'openai/gpt-4o@1',
+           capacity_source = CASE WHEN capacity_source = 'default' THEN 'profile' ELSE capacity_source END
      WHERE LOWER(model_factory) = 'openai'
        AND (model_repo IS NULL OR model_repo = '')
        AND model_name = 'gpt-4o'
        AND delete_flag = 'N'
        AND context_window_tokens = 128000
        AND max_output_tokens = 16384
-       AND capability_profile_version IS NULL;
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'openai/gpt-4o@1' AND capacity_source = 'default'));
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
     UPDATE nexent.model_record_t
-       SET capability_profile_version = 'openai/gpt-4.1@1'
+       SET capability_profile_version = 'openai/gpt-4.1@1',
+           capacity_source = CASE WHEN capacity_source = 'default' THEN 'profile' ELSE capacity_source END
      WHERE LOWER(model_factory) = 'openai'
        AND (model_repo IS NULL OR model_repo = '')
        AND model_name = 'gpt-4.1'
        AND delete_flag = 'N'
        AND context_window_tokens = 1000000
        AND max_output_tokens = 32768
-       AND capability_profile_version IS NULL;
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'openai/gpt-4.1@1' AND capacity_source = 'default'));
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
     -- silicon (45 entries)
     UPDATE nexent.model_record_t
-       SET capability_profile_version = 'silicon/qwen3.6-27b@1'
+       SET capability_profile_version = 'silicon/qwen3.6-27b@1',
+           capacity_source = CASE WHEN capacity_source = 'default' THEN 'profile' ELSE capacity_source END
      WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'Qwen'
        AND model_name = 'Qwen3.6-27B'
        AND delete_flag = 'N'
        AND context_window_tokens = 262144
        AND max_output_tokens = 65536
-       AND capability_profile_version IS NULL;
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/qwen3.6-27b@1' AND capacity_source = 'default'));
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
     UPDATE nexent.model_record_t
-       SET capability_profile_version = 'silicon/kimi-k2.6@1'
+       SET capability_profile_version = 'silicon/kimi-k2.6@1',
+           capacity_source = CASE WHEN capacity_source = 'default' THEN 'profile' ELSE capacity_source END
      WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'Pro'
        AND model_name = 'moonshotai/Kimi-K2.6'
        AND delete_flag = 'N'
        AND context_window_tokens = 262144
        AND max_output_tokens = 131072
-       AND capability_profile_version IS NULL;
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/kimi-k2.6@1' AND capacity_source = 'default'));
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
     UPDATE nexent.model_record_t
-       SET capability_profile_version = 'silicon/qwen3.6-35b-a3b@1'
+       SET capability_profile_version = 'silicon/qwen3.6-35b-a3b@1',
+           capacity_source = CASE WHEN capacity_source = 'default' THEN 'profile' ELSE capacity_source END
      WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'Qwen'
        AND model_name = 'Qwen3.6-35B-A3B'
        AND delete_flag = 'N'
        AND context_window_tokens = 262144
        AND max_output_tokens = 16384
-       AND capability_profile_version IS NULL;
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/qwen3.6-35b-a3b@1' AND capacity_source = 'default'));
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
     UPDATE nexent.model_record_t
-       SET capability_profile_version = 'silicon/qwen3.5-397b-a17b@1'
+       SET capability_profile_version = 'silicon/qwen3.5-397b-a17b@1',
+           capacity_source = CASE WHEN capacity_source = 'default' THEN 'profile' ELSE capacity_source END
      WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'Qwen'
        AND model_name = 'Qwen3.5-397B-A17B'
        AND delete_flag = 'N'
        AND context_window_tokens = 262144
        AND max_output_tokens = 16384
-       AND capability_profile_version IS NULL;
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/qwen3.5-397b-a17b@1' AND capacity_source = 'default'));
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
     UPDATE nexent.model_record_t
-       SET capability_profile_version = 'silicon/qwen3.5-122b-a10b@1'
+       SET capability_profile_version = 'silicon/qwen3.5-122b-a10b@1',
+           capacity_source = CASE WHEN capacity_source = 'default' THEN 'profile' ELSE capacity_source END
      WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'Qwen'
        AND model_name = 'Qwen3.5-122B-A10B'
        AND delete_flag = 'N'
        AND context_window_tokens = 262144
        AND max_output_tokens = 16384
-       AND capability_profile_version IS NULL;
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/qwen3.5-122b-a10b@1' AND capacity_source = 'default'));
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
     UPDATE nexent.model_record_t
-       SET capability_profile_version = 'silicon/qwen3.5-35b-a3b@1'
+       SET capability_profile_version = 'silicon/qwen3.5-35b-a3b@1',
+           capacity_source = CASE WHEN capacity_source = 'default' THEN 'profile' ELSE capacity_source END
      WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'Qwen'
        AND model_name = 'Qwen3.5-35B-A3B'
        AND delete_flag = 'N'
        AND context_window_tokens = 262144
        AND max_output_tokens = 16384
-       AND capability_profile_version IS NULL;
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/qwen3.5-35b-a3b@1' AND capacity_source = 'default'));
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
     UPDATE nexent.model_record_t
-       SET capability_profile_version = 'silicon/qwen3.5-27b@1'
+       SET capability_profile_version = 'silicon/qwen3.5-27b@1',
+           capacity_source = CASE WHEN capacity_source = 'default' THEN 'profile' ELSE capacity_source END
      WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'Qwen'
        AND model_name = 'Qwen3.5-27B'
        AND delete_flag = 'N'
        AND context_window_tokens = 262144
        AND max_output_tokens = 16384
-       AND capability_profile_version IS NULL;
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/qwen3.5-27b@1' AND capacity_source = 'default'));
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
     UPDATE nexent.model_record_t
-       SET capability_profile_version = 'silicon/qwen3.5-9b@1'
+       SET capability_profile_version = 'silicon/qwen3.5-9b@1',
+           capacity_source = CASE WHEN capacity_source = 'default' THEN 'profile' ELSE capacity_source END
      WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'Qwen'
        AND model_name = 'Qwen3.5-9B'
        AND delete_flag = 'N'
        AND context_window_tokens = 262144
        AND max_output_tokens = 16384
-       AND capability_profile_version IS NULL;
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/qwen3.5-9b@1' AND capacity_source = 'default'));
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
     UPDATE nexent.model_record_t
-       SET capability_profile_version = 'silicon/qwen3.5-4b@1'
+       SET capability_profile_version = 'silicon/qwen3.5-4b@1',
+           capacity_source = CASE WHEN capacity_source = 'default' THEN 'profile' ELSE capacity_source END
      WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'Qwen'
        AND model_name = 'Qwen3.5-4B'
        AND delete_flag = 'N'
        AND context_window_tokens = 262144
        AND max_output_tokens = 16384
-       AND capability_profile_version IS NULL;
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/qwen3.5-4b@1' AND capacity_source = 'default'));
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
     UPDATE nexent.model_record_t
-       SET capability_profile_version = 'silicon/qwen3-vl-32b-instruct@1'
+       SET capability_profile_version = 'silicon/qwen3-vl-32b-instruct@1',
+           capacity_source = CASE WHEN capacity_source = 'default' THEN 'profile' ELSE capacity_source END
      WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'Qwen'
        AND model_name = 'Qwen3-VL-32B-Instruct'
        AND delete_flag = 'N'
        AND context_window_tokens = 262144
        AND max_output_tokens = 16384
-       AND capability_profile_version IS NULL;
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/qwen3-vl-32b-instruct@1' AND capacity_source = 'default'));
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
     UPDATE nexent.model_record_t
-       SET capability_profile_version = 'silicon/qwen3-vl-32b-thinking@1'
+       SET capability_profile_version = 'silicon/qwen3-vl-32b-thinking@1',
+           capacity_source = CASE WHEN capacity_source = 'default' THEN 'profile' ELSE capacity_source END
      WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'Qwen'
        AND model_name = 'Qwen3-VL-32B-Thinking'
        AND delete_flag = 'N'
        AND context_window_tokens = 262144
        AND max_output_tokens = 32768
-       AND capability_profile_version IS NULL;
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/qwen3-vl-32b-thinking@1' AND capacity_source = 'default'));
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
     UPDATE nexent.model_record_t
-       SET capability_profile_version = 'silicon/qwen3-vl-8b-instruct@1'
+       SET capability_profile_version = 'silicon/qwen3-vl-8b-instruct@1',
+           capacity_source = CASE WHEN capacity_source = 'default' THEN 'profile' ELSE capacity_source END
      WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'Qwen'
        AND model_name = 'Qwen3-VL-8B-Instruct'
        AND delete_flag = 'N'
        AND context_window_tokens = 262144
        AND max_output_tokens = 16384
-       AND capability_profile_version IS NULL;
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/qwen3-vl-8b-instruct@1' AND capacity_source = 'default'));
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
     UPDATE nexent.model_record_t
-       SET capability_profile_version = 'silicon/qwen3-vl-8b-thinking@1'
+       SET capability_profile_version = 'silicon/qwen3-vl-8b-thinking@1',
+           capacity_source = CASE WHEN capacity_source = 'default' THEN 'profile' ELSE capacity_source END
      WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'Qwen'
        AND model_name = 'Qwen3-VL-8B-Thinking'
        AND delete_flag = 'N'
        AND context_window_tokens = 262144
        AND max_output_tokens = 32768
-       AND capability_profile_version IS NULL;
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/qwen3-vl-8b-thinking@1' AND capacity_source = 'default'));
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
     UPDATE nexent.model_record_t
-       SET capability_profile_version = 'silicon/qwen3-vl-30b-a3b-instruct@1'
+       SET capability_profile_version = 'silicon/qwen3-vl-30b-a3b-instruct@1',
+           capacity_source = CASE WHEN capacity_source = 'default' THEN 'profile' ELSE capacity_source END
      WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'Qwen'
        AND model_name = 'Qwen3-VL-30B-A3B-Instruct'
        AND delete_flag = 'N'
        AND context_window_tokens = 262144
        AND max_output_tokens = 16384
-       AND capability_profile_version IS NULL;
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/qwen3-vl-30b-a3b-instruct@1' AND capacity_source = 'default'));
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
     UPDATE nexent.model_record_t
-       SET capability_profile_version = 'silicon/qwen3-vl-30b-a3b-thinking@1'
+       SET capability_profile_version = 'silicon/qwen3-vl-30b-a3b-thinking@1',
+           capacity_source = CASE WHEN capacity_source = 'default' THEN 'profile' ELSE capacity_source END
      WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'Qwen'
        AND model_name = 'Qwen3-VL-30B-A3B-Thinking'
        AND delete_flag = 'N'
        AND context_window_tokens = 262144
        AND max_output_tokens = 32768
-       AND capability_profile_version IS NULL;
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/qwen3-vl-30b-a3b-thinking@1' AND capacity_source = 'default'));
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
     UPDATE nexent.model_record_t
-       SET capability_profile_version = 'silicon/qwen3-omni-30b-a3b-instruct@1'
+       SET capability_profile_version = 'silicon/qwen3-omni-30b-a3b-instruct@1',
+           capacity_source = CASE WHEN capacity_source = 'default' THEN 'profile' ELSE capacity_source END
      WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'Qwen'
        AND model_name = 'Qwen3-Omni-30B-A3B-Instruct'
        AND delete_flag = 'N'
        AND context_window_tokens = 32768
        AND max_output_tokens = 16384
-       AND capability_profile_version IS NULL;
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/qwen3-omni-30b-a3b-instruct@1' AND capacity_source = 'default'));
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
     UPDATE nexent.model_record_t
-       SET capability_profile_version = 'silicon/qwen3-omni-30b-a3b-thinking@1'
+       SET capability_profile_version = 'silicon/qwen3-omni-30b-a3b-thinking@1',
+           capacity_source = CASE WHEN capacity_source = 'default' THEN 'profile' ELSE capacity_source END
      WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'Qwen'
        AND model_name = 'Qwen3-Omni-30B-A3B-Thinking'
        AND delete_flag = 'N'
        AND context_window_tokens = 32768
        AND max_output_tokens = 16384
-       AND capability_profile_version IS NULL;
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/qwen3-omni-30b-a3b-thinking@1' AND capacity_source = 'default'));
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
     UPDATE nexent.model_record_t
-       SET capability_profile_version = 'silicon/qwen3-omni-30b-a3b-captioner@1'
+       SET capability_profile_version = 'silicon/qwen3-omni-30b-a3b-captioner@1',
+           capacity_source = CASE WHEN capacity_source = 'default' THEN 'profile' ELSE capacity_source END
      WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'Qwen'
        AND model_name = 'Qwen3-Omni-30B-A3B-Captioner'
        AND delete_flag = 'N'
        AND context_window_tokens = 32768
        AND max_output_tokens = 16384
-       AND capability_profile_version IS NULL;
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/qwen3-omni-30b-a3b-captioner@1' AND capacity_source = 'default'));
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
     UPDATE nexent.model_record_t
-       SET capability_profile_version = 'silicon/qwen3-coder-30b-a3b-instruct@1'
+       SET capability_profile_version = 'silicon/qwen3-coder-30b-a3b-instruct@1',
+           capacity_source = CASE WHEN capacity_source = 'default' THEN 'profile' ELSE capacity_source END
      WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'Qwen'
        AND model_name = 'Qwen3-Coder-30B-A3B-Instruct'
        AND delete_flag = 'N'
        AND context_window_tokens = 262144
        AND max_output_tokens = 65536
-       AND capability_profile_version IS NULL;
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/qwen3-coder-30b-a3b-instruct@1' AND capacity_source = 'default'));
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
     UPDATE nexent.model_record_t
-       SET capability_profile_version = 'silicon/qwen3-30b-a3b-instruct-2507@1'
+       SET capability_profile_version = 'silicon/qwen3-30b-a3b-instruct-2507@1',
+           capacity_source = CASE WHEN capacity_source = 'default' THEN 'profile' ELSE capacity_source END
      WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'Qwen'
        AND model_name = 'Qwen3-30B-A3B-Instruct-2507'
        AND delete_flag = 'N'
        AND context_window_tokens = 262144
        AND max_output_tokens = 16384
-       AND capability_profile_version IS NULL;
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/qwen3-30b-a3b-instruct-2507@1' AND capacity_source = 'default'));
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
     UPDATE nexent.model_record_t
-       SET capability_profile_version = 'silicon/qwen3-32b@1'
+       SET capability_profile_version = 'silicon/qwen3-32b@1',
+           capacity_source = CASE WHEN capacity_source = 'default' THEN 'profile' ELSE capacity_source END
      WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'Qwen'
        AND model_name = 'Qwen3-32B'
        AND delete_flag = 'N'
        AND context_window_tokens = 131072
        AND max_output_tokens = 16384
-       AND capability_profile_version IS NULL;
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/qwen3-32b@1' AND capacity_source = 'default'));
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
     UPDATE nexent.model_record_t
-       SET capability_profile_version = 'silicon/qwen3-14b@1'
+       SET capability_profile_version = 'silicon/qwen3-14b@1',
+           capacity_source = CASE WHEN capacity_source = 'default' THEN 'profile' ELSE capacity_source END
      WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'Qwen'
        AND model_name = 'Qwen3-14B'
        AND delete_flag = 'N'
        AND context_window_tokens = 131072
        AND max_output_tokens = 16384
-       AND capability_profile_version IS NULL;
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/qwen3-14b@1' AND capacity_source = 'default'));
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
     UPDATE nexent.model_record_t
-       SET capability_profile_version = 'silicon/qwen3-8b@1'
+       SET capability_profile_version = 'silicon/qwen3-8b@1',
+           capacity_source = CASE WHEN capacity_source = 'default' THEN 'profile' ELSE capacity_source END
      WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'Qwen'
        AND model_name = 'Qwen3-8B'
        AND delete_flag = 'N'
        AND context_window_tokens = 131072
        AND max_output_tokens = 16384
-       AND capability_profile_version IS NULL;
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/qwen3-8b@1' AND capacity_source = 'default'));
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
     UPDATE nexent.model_record_t
-       SET capability_profile_version = 'silicon/qwen2.5-72b-instruct-128k@1'
+       SET capability_profile_version = 'silicon/qwen2.5-72b-instruct-128k@1',
+           capacity_source = CASE WHEN capacity_source = 'default' THEN 'profile' ELSE capacity_source END
      WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'Qwen'
        AND model_name = 'Qwen2.5-72B-Instruct-128K'
        AND delete_flag = 'N'
        AND context_window_tokens = 131072
        AND max_output_tokens = 8192
-       AND capability_profile_version IS NULL;
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/qwen2.5-72b-instruct-128k@1' AND capacity_source = 'default'));
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
     UPDATE nexent.model_record_t
-       SET capability_profile_version = 'silicon/qwen2.5-72b-instruct@1'
+       SET capability_profile_version = 'silicon/qwen2.5-72b-instruct@1',
+           capacity_source = CASE WHEN capacity_source = 'default' THEN 'profile' ELSE capacity_source END
      WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'Qwen'
        AND model_name = 'Qwen2.5-72B-Instruct'
        AND delete_flag = 'N'
        AND context_window_tokens = 32768
        AND max_output_tokens = 8192
-       AND capability_profile_version IS NULL;
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/qwen2.5-72b-instruct@1' AND capacity_source = 'default'));
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
     UPDATE nexent.model_record_t
-       SET capability_profile_version = 'silicon/qwen2.5-32b-instruct@1'
+       SET capability_profile_version = 'silicon/qwen2.5-32b-instruct@1',
+           capacity_source = CASE WHEN capacity_source = 'default' THEN 'profile' ELSE capacity_source END
      WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'Qwen'
        AND model_name = 'Qwen2.5-32B-Instruct'
        AND delete_flag = 'N'
        AND context_window_tokens = 32768
        AND max_output_tokens = 8192
-       AND capability_profile_version IS NULL;
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/qwen2.5-32b-instruct@1' AND capacity_source = 'default'));
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
     UPDATE nexent.model_record_t
-       SET capability_profile_version = 'silicon/qwen2.5-14b-instruct@1'
+       SET capability_profile_version = 'silicon/qwen2.5-14b-instruct@1',
+           capacity_source = CASE WHEN capacity_source = 'default' THEN 'profile' ELSE capacity_source END
      WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'Qwen'
        AND model_name = 'Qwen2.5-14B-Instruct'
        AND delete_flag = 'N'
        AND context_window_tokens = 32768
        AND max_output_tokens = 8192
-       AND capability_profile_version IS NULL;
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/qwen2.5-14b-instruct@1' AND capacity_source = 'default'));
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
     UPDATE nexent.model_record_t
-       SET capability_profile_version = 'silicon/qwen2.5-7b-instruct@1'
+       SET capability_profile_version = 'silicon/qwen2.5-7b-instruct@1',
+           capacity_source = CASE WHEN capacity_source = 'default' THEN 'profile' ELSE capacity_source END
      WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'Qwen'
        AND model_name = 'Qwen2.5-7B-Instruct'
        AND delete_flag = 'N'
        AND context_window_tokens = 32768
        AND max_output_tokens = 8192
-       AND capability_profile_version IS NULL;
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/qwen2.5-7b-instruct@1' AND capacity_source = 'default'));
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
     UPDATE nexent.model_record_t
-       SET capability_profile_version = 'silicon/glm-4-32b-0414@1'
+       SET capability_profile_version = 'silicon/glm-4-32b-0414@1',
+           capacity_source = CASE WHEN capacity_source = 'default' THEN 'profile' ELSE capacity_source END
      WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'THUDM'
        AND model_name = 'GLM-4-32B-0414'
        AND delete_flag = 'N'
        AND context_window_tokens = 32768
        AND max_output_tokens = 8192
-       AND capability_profile_version IS NULL;
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/glm-4-32b-0414@1' AND capacity_source = 'default'));
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
     UPDATE nexent.model_record_t
-       SET capability_profile_version = 'silicon/glm-z1-9b-0414@1'
+       SET capability_profile_version = 'silicon/glm-z1-9b-0414@1',
+           capacity_source = CASE WHEN capacity_source = 'default' THEN 'profile' ELSE capacity_source END
      WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'THUDM'
        AND model_name = 'GLM-Z1-9B-0414'
        AND delete_flag = 'N'
        AND context_window_tokens = 32768
        AND max_output_tokens = 8192
-       AND capability_profile_version IS NULL;
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/glm-z1-9b-0414@1' AND capacity_source = 'default'));
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
     UPDATE nexent.model_record_t
-       SET capability_profile_version = 'silicon/glm-4-9b-0414@1'
+       SET capability_profile_version = 'silicon/glm-4-9b-0414@1',
+           capacity_source = CASE WHEN capacity_source = 'default' THEN 'profile' ELSE capacity_source END
      WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'THUDM'
        AND model_name = 'GLM-4-9B-0414'
        AND delete_flag = 'N'
        AND context_window_tokens = 32768
        AND max_output_tokens = 8192
-       AND capability_profile_version IS NULL;
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/glm-4-9b-0414@1' AND capacity_source = 'default'));
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
     UPDATE nexent.model_record_t
-       SET capability_profile_version = 'silicon/glm-5.2@1'
+       SET capability_profile_version = 'silicon/glm-5.2@1',
+           capacity_source = CASE WHEN capacity_source = 'default' THEN 'profile' ELSE capacity_source END
      WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'zai-org'
        AND model_name = 'GLM-5.2'
        AND delete_flag = 'N'
        AND context_window_tokens = 1048576
        AND max_output_tokens = 131072
-       AND capability_profile_version IS NULL;
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/glm-5.2@1' AND capacity_source = 'default'));
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
     UPDATE nexent.model_record_t
-       SET capability_profile_version = 'silicon/glm-4.5v@1'
+       SET capability_profile_version = 'silicon/glm-4.5v@1',
+           capacity_source = CASE WHEN capacity_source = 'default' THEN 'profile' ELSE capacity_source END
      WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'zai-org'
        AND model_name = 'GLM-4.5V'
        AND delete_flag = 'N'
        AND context_window_tokens = 131072
        AND max_output_tokens = 16384
-       AND capability_profile_version IS NULL;
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/glm-4.5v@1' AND capacity_source = 'default'));
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
     UPDATE nexent.model_record_t
-       SET capability_profile_version = 'silicon/glm-4.5-air@1'
+       SET capability_profile_version = 'silicon/glm-4.5-air@1',
+           capacity_source = CASE WHEN capacity_source = 'default' THEN 'profile' ELSE capacity_source END
      WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'zai-org'
        AND model_name = 'GLM-4.5-Air'
        AND delete_flag = 'N'
        AND context_window_tokens = 131072
        AND max_output_tokens = 16384
-       AND capability_profile_version IS NULL;
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/glm-4.5-air@1' AND capacity_source = 'default'));
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
     UPDATE nexent.model_record_t
-       SET capability_profile_version = 'silicon/glm-5.1-pro@1'
+       SET capability_profile_version = 'silicon/glm-5.1-pro@1',
+           capacity_source = CASE WHEN capacity_source = 'default' THEN 'profile' ELSE capacity_source END
      WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'Pro'
        AND model_name = 'zai-org/GLM-5.1'
        AND delete_flag = 'N'
        AND context_window_tokens = 202752
        AND max_output_tokens = 131072
-       AND capability_profile_version IS NULL;
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/glm-5.1-pro@1' AND capacity_source = 'default'));
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
     UPDATE nexent.model_record_t
-       SET capability_profile_version = 'silicon/seed-oss-36b-instruct@1'
+       SET capability_profile_version = 'silicon/seed-oss-36b-instruct@1',
+           capacity_source = CASE WHEN capacity_source = 'default' THEN 'profile' ELSE capacity_source END
      WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'ByteDance-Seed'
        AND model_name = 'Seed-OSS-36B-Instruct'
        AND delete_flag = 'N'
        AND context_window_tokens = 524288
        AND max_output_tokens = 16384
-       AND capability_profile_version IS NULL;
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/seed-oss-36b-instruct@1' AND capacity_source = 'default'));
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
     UPDATE nexent.model_record_t
-       SET capability_profile_version = 'silicon/ling-flash-2.0@1'
+       SET capability_profile_version = 'silicon/ling-flash-2.0@1',
+           capacity_source = CASE WHEN capacity_source = 'default' THEN 'profile' ELSE capacity_source END
      WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'inclusionAI'
        AND model_name = 'Ling-flash-2.0'
        AND delete_flag = 'N'
        AND context_window_tokens = 131072
        AND max_output_tokens = 16384
-       AND capability_profile_version IS NULL;
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/ling-flash-2.0@1' AND capacity_source = 'default'));
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
     UPDATE nexent.model_record_t
-       SET capability_profile_version = 'silicon/ling-mini-2.0@1'
+       SET capability_profile_version = 'silicon/ling-mini-2.0@1',
+           capacity_source = CASE WHEN capacity_source = 'default' THEN 'profile' ELSE capacity_source END
      WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'inclusionAI'
        AND model_name = 'Ling-mini-2.0'
        AND delete_flag = 'N'
        AND context_window_tokens = 131072
        AND max_output_tokens = 16384
-       AND capability_profile_version IS NULL;
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/ling-mini-2.0@1' AND capacity_source = 'default'));
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
     UPDATE nexent.model_record_t
-       SET capability_profile_version = 'silicon/minimax-m2.5@1'
+       SET capability_profile_version = 'silicon/minimax-m2.5@1',
+           capacity_source = CASE WHEN capacity_source = 'default' THEN 'profile' ELSE capacity_source END
      WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'MiniMaxAI'
        AND model_name = 'MiniMax-M2.5'
        AND delete_flag = 'N'
        AND context_window_tokens = 204800
        AND max_output_tokens = 16384
-       AND capability_profile_version IS NULL;
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/minimax-m2.5@1' AND capacity_source = 'default'));
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
     UPDATE nexent.model_record_t
-       SET capability_profile_version = 'silicon/minimax-m2.5-pro@1'
+       SET capability_profile_version = 'silicon/minimax-m2.5-pro@1',
+           capacity_source = CASE WHEN capacity_source = 'default' THEN 'profile' ELSE capacity_source END
      WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'Pro'
        AND model_name = 'MiniMaxAI/MiniMax-M2.5'
        AND delete_flag = 'N'
        AND context_window_tokens = 204800
        AND max_output_tokens = 16384
-       AND capability_profile_version IS NULL;
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/minimax-m2.5-pro@1' AND capacity_source = 'default'));
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
     UPDATE nexent.model_record_t
-       SET capability_profile_version = 'silicon/kimi-k2.7-code@1'
+       SET capability_profile_version = 'silicon/kimi-k2.7-code@1',
+           capacity_source = CASE WHEN capacity_source = 'default' THEN 'profile' ELSE capacity_source END
      WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'moonshotai'
        AND model_name = 'Kimi-K2.7-Code'
        AND delete_flag = 'N'
        AND context_window_tokens = 262144
        AND max_output_tokens = 32768
-       AND capability_profile_version IS NULL;
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/kimi-k2.7-code@1' AND capacity_source = 'default'));
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
     UPDATE nexent.model_record_t
-       SET capability_profile_version = 'silicon/nex-n2-pro@1'
+       SET capability_profile_version = 'silicon/nex-n2-pro@1',
+           capacity_source = CASE WHEN capacity_source = 'default' THEN 'profile' ELSE capacity_source END
      WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'nex-agi'
        AND model_name = 'Nex-N2-Pro'
        AND delete_flag = 'N'
        AND context_window_tokens = 262144
        AND max_output_tokens = 16384
-       AND capability_profile_version IS NULL;
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/nex-n2-pro@1' AND capacity_source = 'default'));
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
     UPDATE nexent.model_record_t
-       SET capability_profile_version = 'silicon/step-3.5-flash@1'
+       SET capability_profile_version = 'silicon/step-3.5-flash@1',
+           capacity_source = CASE WHEN capacity_source = 'default' THEN 'profile' ELSE capacity_source END
      WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'stepfun-ai'
        AND model_name = 'Step-3.5-Flash'
        AND delete_flag = 'N'
        AND context_window_tokens = 262144
        AND max_output_tokens = 16384
-       AND capability_profile_version IS NULL;
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/step-3.5-flash@1' AND capacity_source = 'default'));
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
     UPDATE nexent.model_record_t
-       SET capability_profile_version = 'silicon/hunyuan-mt-7b@1'
+       SET capability_profile_version = 'silicon/hunyuan-mt-7b@1',
+           capacity_source = CASE WHEN capacity_source = 'default' THEN 'profile' ELSE capacity_source END
      WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'tencent'
        AND model_name = 'Hunyuan-MT-7B'
        AND delete_flag = 'N'
        AND context_window_tokens = 32768
        AND max_output_tokens = 2048
-       AND capability_profile_version IS NULL;
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/hunyuan-mt-7b@1' AND capacity_source = 'default'));
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
     UPDATE nexent.model_record_t
-       SET capability_profile_version = 'silicon/hunyuan-a13b-instruct@1'
+       SET capability_profile_version = 'silicon/hunyuan-a13b-instruct@1',
+           capacity_source = CASE WHEN capacity_source = 'default' THEN 'profile' ELSE capacity_source END
      WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'tencent'
        AND model_name = 'Hunyuan-A13B-Instruct'
        AND delete_flag = 'N'
        AND context_window_tokens = 131072
        AND max_output_tokens = 8192
-       AND capability_profile_version IS NULL;
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/hunyuan-a13b-instruct@1' AND capacity_source = 'default'));
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
diff --git a/scripts/generate_backfill_sql.py b/scripts/generate_backfill_sql.py
index e325baaa1..d246852f4 100644
--- a/scripts/generate_backfill_sql.py
+++ b/scripts/generate_backfill_sql.py
@@ -166,12 +166,14 @@ def main() -> None:
 
     # --------------------------------------------------------------
     # Phase 1b: catalog match + already-filled values match catalog
-    #           -> tag profile_version only (don't touch capacity)
+    #           -> tag profile_version + upgrade capacity_source from 'default' to 'profile'
     # --------------------------------------------------------------
     lines.append("-- ============================================================")
     lines.append("-- Phase 1b: Tag already-filled rows whose ctx/max_out exactly match")
-    lines.append("--           the catalog with capability_profile_version. Does not")
-    lines.append("--           rewrite capacity_source (operator intent preserved).")
+    lines.append("--           the catalog with capability_profile_version. Upgrades")
+    lines.append("--           capacity_source from 'default' to 'profile' (values now")
+    lines.append("--           come from catalog, not system defaults). Preserves")
+    lines.append("--           'operator' and other explicit sources.")
     lines.append("-- ============================================================")
     lines.append("")
     lines.append("DO $$")
@@ -192,14 +194,15 @@ def main() -> None:
             escaped_name = _sql_str(name)
 
             lines.append(f"    UPDATE nexent.model_record_t")
-            lines.append(f"       SET capability_profile_version = '{version}'")
+            lines.append(f"       SET capability_profile_version = '{version}',")
+            lines.append(f"           capacity_source = CASE WHEN capacity_source = 'default' THEN 'profile' ELSE capacity_source END")
             lines.append(f"     WHERE LOWER(model_factory) = '{_sql_str(provider.lower())}'")
             lines.append(f"       AND {repo_match}")
             lines.append(f"       AND model_name = '{escaped_name}'")
             lines.append(f"       AND delete_flag = 'N'")
             lines.append(f"       AND context_window_tokens = {_sql_int(ctx)}")
             lines.append(f"       AND max_output_tokens = {_sql_int(mout)}")
-            lines.append(f"       AND capability_profile_version IS NULL;")
+            lines.append(f"       AND (capability_profile_version IS NULL OR (capability_profile_version = '{version}' AND capacity_source = 'default'));")
             lines.append(f"    GET DIAGNOSTICS v_updated = ROW_COUNT;")
             lines.append(f"    v_total := v_total + v_updated;")
             lines.append("")

From fafdcfd200fb1c3910941f31b2ec7ac7323a0b79 Mon Sep 17 00:00:00 2001
From: wuyuanfr <18270469842@163.com>
Date: Sat, 27 Jun 2026 15:10:33 +0800
Subject: [PATCH 23/31] refactor(w11): use PL/pgSQL constants in generated
 backfill SQL
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Replace repeated string literals with CONSTANT declarations in each
DO block to satisfy SonarQube rules R49/R50/R83:
- c_active_flag for 'N' (delete_flag)
- c_source_profile for 'profile' (capacity_source)
- c_source_default for 'default' (capacity_source)

Reduces literal duplication:
- 'N': 135 → 5 (only in comments)
- 'profile': 134 → 4 (only in comments + constant)
- 'default': 132 → not in top 20 (only in comments + constant)

Verified: SQL executes successfully with constants.
---
 .../v2.2.2_0627_backfill_from_catalog.sql     | 674 +++++++++---------
 scripts/generate_backfill_sql.py              |  24 +-
 2 files changed, 357 insertions(+), 341 deletions(-)

diff --git a/deploy/sql/migrations/v2.2.2_0627_backfill_from_catalog.sql b/deploy/sql/migrations/v2.2.2_0627_backfill_from_catalog.sql
index 96373e075..170416b6a 100644
--- a/deploy/sql/migrations/v2.2.2_0627_backfill_from_catalog.sql
+++ b/deploy/sql/migrations/v2.2.2_0627_backfill_from_catalog.sql
@@ -36,6 +36,8 @@ DO $$
 DECLARE
     v_updated INTEGER := 0;
     v_total   INTEGER := 0;
+    c_active_flag     CONSTANT TEXT := 'N';
+    c_source_profile  CONSTANT TEXT := 'profile';
 BEGIN
     -- dashscope (4 entries)
     UPDATE nexent.model_record_t
@@ -45,12 +47,12 @@ BEGIN
            LEAST(16384, COALESCE(context_window_tokens, 131072) - 1)),
            default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
            LEAST(4096, COALESCE(max_output_tokens, 16384))),
-           capacity_source = COALESCE(capacity_source, 'profile'),
+           capacity_source = COALESCE(capacity_source, c_source_profile),
            capability_profile_version = COALESCE(capability_profile_version, 'dashscope/qwen-plus@1')
      WHERE LOWER(model_factory) = 'dashscope'
        AND (model_repo IS NULL OR model_repo = '')
        AND model_name = 'qwen-plus'
-       AND delete_flag = 'N'
+       AND delete_flag = c_active_flag
        AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
@@ -62,12 +64,12 @@ BEGIN
            LEAST(16384, COALESCE(context_window_tokens, 1000000) - 1)),
            default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
            LEAST(4096, COALESCE(max_output_tokens, 16384))),
-           capacity_source = COALESCE(capacity_source, 'profile'),
+           capacity_source = COALESCE(capacity_source, c_source_profile),
            capability_profile_version = COALESCE(capability_profile_version, 'dashscope/qwen-turbo@1')
      WHERE LOWER(model_factory) = 'dashscope'
        AND (model_repo IS NULL OR model_repo = '')
        AND model_name = 'qwen-turbo'
-       AND delete_flag = 'N'
+       AND delete_flag = c_active_flag
        AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
@@ -79,12 +81,12 @@ BEGIN
            LEAST(65536, COALESCE(context_window_tokens, 1000000) - 1)),
            default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
            LEAST(8192, COALESCE(max_output_tokens, 65536))),
-           capacity_source = COALESCE(capacity_source, 'profile'),
+           capacity_source = COALESCE(capacity_source, c_source_profile),
            capability_profile_version = COALESCE(capability_profile_version, 'dashscope/qwen3.7-max@1')
      WHERE LOWER(model_factory) = 'dashscope'
        AND (model_repo IS NULL OR model_repo = '')
        AND model_name = 'qwen3.7-max'
-       AND delete_flag = 'N'
+       AND delete_flag = c_active_flag
        AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
@@ -96,12 +98,12 @@ BEGIN
            LEAST(131072, COALESCE(context_window_tokens, 200000) - 1)),
            default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
            LEAST(8192, COALESCE(max_output_tokens, 131072))),
-           capacity_source = COALESCE(capacity_source, 'profile'),
+           capacity_source = COALESCE(capacity_source, c_source_profile),
            capability_profile_version = COALESCE(capability_profile_version, 'dashscope/glm-5.1@1')
      WHERE LOWER(model_factory) = 'dashscope'
        AND (model_repo IS NULL OR model_repo = '')
        AND model_name = 'glm-5.1'
-       AND delete_flag = 'N'
+       AND delete_flag = c_active_flag
        AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
@@ -114,12 +116,12 @@ BEGIN
            LEAST(384000, COALESCE(context_window_tokens, 1000000) - 1)),
            default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
            LEAST(8192, COALESCE(max_output_tokens, 384000))),
-           capacity_source = COALESCE(capacity_source, 'profile'),
+           capacity_source = COALESCE(capacity_source, c_source_profile),
            capability_profile_version = COALESCE(capability_profile_version, 'deepseek/deepseek-chat@2')
      WHERE LOWER(model_factory) = 'deepseek'
        AND (model_repo IS NULL OR model_repo = '')
        AND model_name = 'deepseek-chat'
-       AND delete_flag = 'N'
+       AND delete_flag = c_active_flag
        AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
@@ -131,12 +133,12 @@ BEGIN
            LEAST(384000, COALESCE(context_window_tokens, 1000000) - 1)),
            default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
            LEAST(8192, COALESCE(max_output_tokens, 384000))),
-           capacity_source = COALESCE(capacity_source, 'profile'),
+           capacity_source = COALESCE(capacity_source, c_source_profile),
            capability_profile_version = COALESCE(capability_profile_version, 'deepseek/deepseek-reasoner@2')
      WHERE LOWER(model_factory) = 'deepseek'
        AND (model_repo IS NULL OR model_repo = '')
        AND model_name = 'deepseek-reasoner'
-       AND delete_flag = 'N'
+       AND delete_flag = c_active_flag
        AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
@@ -148,12 +150,12 @@ BEGIN
            LEAST(384000, COALESCE(context_window_tokens, 1000000) - 1)),
            default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
            LEAST(8192, COALESCE(max_output_tokens, 384000))),
-           capacity_source = COALESCE(capacity_source, 'profile'),
+           capacity_source = COALESCE(capacity_source, c_source_profile),
            capability_profile_version = COALESCE(capability_profile_version, 'deepseek/deepseek-v4-flash@1')
      WHERE LOWER(model_factory) = 'deepseek'
        AND (model_repo IS NULL OR model_repo = '')
        AND model_name = 'deepseek-v4-flash'
-       AND delete_flag = 'N'
+       AND delete_flag = c_active_flag
        AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
@@ -165,12 +167,12 @@ BEGIN
            LEAST(384000, COALESCE(context_window_tokens, 1000000) - 1)),
            default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
            LEAST(8192, COALESCE(max_output_tokens, 384000))),
-           capacity_source = COALESCE(capacity_source, 'profile'),
+           capacity_source = COALESCE(capacity_source, c_source_profile),
            capability_profile_version = COALESCE(capability_profile_version, 'deepseek/deepseek-v4-pro@1')
      WHERE LOWER(model_factory) = 'deepseek'
        AND (model_repo IS NULL OR model_repo = '')
        AND model_name = 'deepseek-v4-pro'
-       AND delete_flag = 'N'
+       AND delete_flag = c_active_flag
        AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
@@ -182,12 +184,12 @@ BEGIN
            LEAST(384000, COALESCE(context_window_tokens, 1048576) - 1)),
            default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
            LEAST(8192, COALESCE(max_output_tokens, 384000))),
-           capacity_source = COALESCE(capacity_source, 'profile'),
+           capacity_source = COALESCE(capacity_source, c_source_profile),
            capability_profile_version = COALESCE(capability_profile_version, 'deepseek/deepseek-v4-pro-sf@1')
      WHERE LOWER(model_factory) = 'deepseek'
        AND model_repo = 'deepseek-ai'
        AND model_name = 'DeepSeek-V4-Pro'
-       AND delete_flag = 'N'
+       AND delete_flag = c_active_flag
        AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
@@ -199,12 +201,12 @@ BEGIN
            LEAST(384000, COALESCE(context_window_tokens, 1048576) - 1)),
            default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
            LEAST(8192, COALESCE(max_output_tokens, 384000))),
-           capacity_source = COALESCE(capacity_source, 'profile'),
+           capacity_source = COALESCE(capacity_source, c_source_profile),
            capability_profile_version = COALESCE(capability_profile_version, 'deepseek/deepseek-v4-flash-sf@1')
      WHERE LOWER(model_factory) = 'deepseek'
        AND model_repo = 'deepseek-ai'
        AND model_name = 'DeepSeek-V4-Flash'
-       AND delete_flag = 'N'
+       AND delete_flag = c_active_flag
        AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
@@ -216,12 +218,12 @@ BEGIN
            LEAST(8192, COALESCE(context_window_tokens, 164000) - 1)),
            default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
            LEAST(4096, COALESCE(max_output_tokens, 8192))),
-           capacity_source = COALESCE(capacity_source, 'profile'),
+           capacity_source = COALESCE(capacity_source, c_source_profile),
            capability_profile_version = COALESCE(capability_profile_version, 'deepseek/deepseek-v3.2@1')
      WHERE LOWER(model_factory) = 'deepseek'
        AND model_repo = 'deepseek-ai'
        AND model_name = 'DeepSeek-V3.2'
-       AND delete_flag = 'N'
+       AND delete_flag = c_active_flag
        AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
@@ -233,12 +235,12 @@ BEGIN
            LEAST(8192, COALESCE(context_window_tokens, 164000) - 1)),
            default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
            LEAST(4096, COALESCE(max_output_tokens, 8192))),
-           capacity_source = COALESCE(capacity_source, 'profile'),
+           capacity_source = COALESCE(capacity_source, c_source_profile),
            capability_profile_version = COALESCE(capability_profile_version, 'deepseek/deepseek-v3.1-terminus@1')
      WHERE LOWER(model_factory) = 'deepseek'
        AND model_repo = 'deepseek-ai'
        AND model_name = 'DeepSeek-V3.1-Terminus'
-       AND delete_flag = 'N'
+       AND delete_flag = c_active_flag
        AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
@@ -250,12 +252,12 @@ BEGIN
            LEAST(16384, COALESCE(context_window_tokens, 163840) - 1)),
            default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
            LEAST(8192, COALESCE(max_output_tokens, 16384))),
-           capacity_source = COALESCE(capacity_source, 'profile'),
+           capacity_source = COALESCE(capacity_source, c_source_profile),
            capability_profile_version = COALESCE(capability_profile_version, 'deepseek/deepseek-r1@1')
      WHERE LOWER(model_factory) = 'deepseek'
        AND model_repo = 'deepseek-ai'
        AND model_name = 'DeepSeek-R1'
-       AND delete_flag = 'N'
+       AND delete_flag = c_active_flag
        AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
@@ -267,12 +269,12 @@ BEGIN
            LEAST(8192, COALESCE(context_window_tokens, 164000) - 1)),
            default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
            LEAST(4096, COALESCE(max_output_tokens, 8192))),
-           capacity_source = COALESCE(capacity_source, 'profile'),
+           capacity_source = COALESCE(capacity_source, c_source_profile),
            capability_profile_version = COALESCE(capability_profile_version, 'deepseek/deepseek-v3@1')
      WHERE LOWER(model_factory) = 'deepseek'
        AND model_repo = 'deepseek-ai'
        AND model_name = 'DeepSeek-V3'
-       AND delete_flag = 'N'
+       AND delete_flag = c_active_flag
        AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
@@ -284,12 +286,12 @@ BEGIN
            LEAST(16384, COALESCE(context_window_tokens, 131072) - 1)),
            default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
            LEAST(4096, COALESCE(max_output_tokens, 16384))),
-           capacity_source = COALESCE(capacity_source, 'profile'),
+           capacity_source = COALESCE(capacity_source, c_source_profile),
            capability_profile_version = COALESCE(capability_profile_version, 'deepseek/deepseek-r1-0528-qwen3-8b@1')
      WHERE LOWER(model_factory) = 'deepseek'
        AND model_repo = 'deepseek-ai'
        AND model_name = 'DeepSeek-R1-0528-Qwen3-8B'
-       AND delete_flag = 'N'
+       AND delete_flag = c_active_flag
        AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
@@ -301,12 +303,12 @@ BEGIN
            LEAST(8192, COALESCE(context_window_tokens, 164000) - 1)),
            default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
            LEAST(4096, COALESCE(max_output_tokens, 8192))),
-           capacity_source = COALESCE(capacity_source, 'profile'),
+           capacity_source = COALESCE(capacity_source, c_source_profile),
            capability_profile_version = COALESCE(capability_profile_version, 'deepseek/deepseek-v3.2-pro@1')
      WHERE LOWER(model_factory) = 'deepseek'
        AND model_repo = 'Pro'
        AND model_name = 'deepseek-ai/DeepSeek-V3.2'
-       AND delete_flag = 'N'
+       AND delete_flag = c_active_flag
        AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
@@ -318,12 +320,12 @@ BEGIN
            LEAST(8192, COALESCE(context_window_tokens, 164000) - 1)),
            default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
            LEAST(4096, COALESCE(max_output_tokens, 8192))),
-           capacity_source = COALESCE(capacity_source, 'profile'),
+           capacity_source = COALESCE(capacity_source, c_source_profile),
            capability_profile_version = COALESCE(capability_profile_version, 'deepseek/deepseek-v3.1-terminus-pro@1')
      WHERE LOWER(model_factory) = 'deepseek'
        AND model_repo = 'Pro'
        AND model_name = 'deepseek-ai/DeepSeek-V3.1-Terminus'
-       AND delete_flag = 'N'
+       AND delete_flag = c_active_flag
        AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
@@ -335,12 +337,12 @@ BEGIN
            LEAST(16384, COALESCE(context_window_tokens, 163840) - 1)),
            default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
            LEAST(8192, COALESCE(max_output_tokens, 16384))),
-           capacity_source = COALESCE(capacity_source, 'profile'),
+           capacity_source = COALESCE(capacity_source, c_source_profile),
            capability_profile_version = COALESCE(capability_profile_version, 'deepseek/deepseek-r1-pro@1')
      WHERE LOWER(model_factory) = 'deepseek'
        AND model_repo = 'Pro'
        AND model_name = 'deepseek-ai/DeepSeek-R1'
-       AND delete_flag = 'N'
+       AND delete_flag = c_active_flag
        AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
@@ -352,12 +354,12 @@ BEGIN
            LEAST(8192, COALESCE(context_window_tokens, 164000) - 1)),
            default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
            LEAST(4096, COALESCE(max_output_tokens, 8192))),
-           capacity_source = COALESCE(capacity_source, 'profile'),
+           capacity_source = COALESCE(capacity_source, c_source_profile),
            capability_profile_version = COALESCE(capability_profile_version, 'deepseek/deepseek-v3-pro@1')
      WHERE LOWER(model_factory) = 'deepseek'
        AND model_repo = 'Pro'
        AND model_name = 'deepseek-ai/DeepSeek-V3'
-       AND delete_flag = 'N'
+       AND delete_flag = c_active_flag
        AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
@@ -370,12 +372,12 @@ BEGIN
            LEAST(16384, COALESCE(context_window_tokens, 128000) - 1)),
            default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
            LEAST(4096, COALESCE(max_output_tokens, 16384))),
-           capacity_source = COALESCE(capacity_source, 'profile'),
+           capacity_source = COALESCE(capacity_source, c_source_profile),
            capability_profile_version = COALESCE(capability_profile_version, 'openai/gpt-4o@1')
      WHERE LOWER(model_factory) = 'openai'
        AND (model_repo IS NULL OR model_repo = '')
        AND model_name = 'gpt-4o'
-       AND delete_flag = 'N'
+       AND delete_flag = c_active_flag
        AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
@@ -387,12 +389,12 @@ BEGIN
            LEAST(32768, COALESCE(context_window_tokens, 1000000) - 1)),
            default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
            LEAST(8192, COALESCE(max_output_tokens, 32768))),
-           capacity_source = COALESCE(capacity_source, 'profile'),
+           capacity_source = COALESCE(capacity_source, c_source_profile),
            capability_profile_version = COALESCE(capability_profile_version, 'openai/gpt-4.1@1')
      WHERE LOWER(model_factory) = 'openai'
        AND (model_repo IS NULL OR model_repo = '')
        AND model_name = 'gpt-4.1'
-       AND delete_flag = 'N'
+       AND delete_flag = c_active_flag
        AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
@@ -405,12 +407,12 @@ BEGIN
            LEAST(65536, COALESCE(context_window_tokens, 262144) - 1)),
            default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
            LEAST(8192, COALESCE(max_output_tokens, 65536))),
-           capacity_source = COALESCE(capacity_source, 'profile'),
+           capacity_source = COALESCE(capacity_source, c_source_profile),
            capability_profile_version = COALESCE(capability_profile_version, 'silicon/qwen3.6-27b@1')
      WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'Qwen'
        AND model_name = 'Qwen3.6-27B'
-       AND delete_flag = 'N'
+       AND delete_flag = c_active_flag
        AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
@@ -422,12 +424,12 @@ BEGIN
            LEAST(131072, COALESCE(context_window_tokens, 262144) - 1)),
            default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
            LEAST(8192, COALESCE(max_output_tokens, 131072))),
-           capacity_source = COALESCE(capacity_source, 'profile'),
+           capacity_source = COALESCE(capacity_source, c_source_profile),
            capability_profile_version = COALESCE(capability_profile_version, 'silicon/kimi-k2.6@1')
      WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'Pro'
        AND model_name = 'moonshotai/Kimi-K2.6'
-       AND delete_flag = 'N'
+       AND delete_flag = c_active_flag
        AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
@@ -439,12 +441,12 @@ BEGIN
            LEAST(16384, COALESCE(context_window_tokens, 262144) - 1)),
            default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
            LEAST(4096, COALESCE(max_output_tokens, 16384))),
-           capacity_source = COALESCE(capacity_source, 'profile'),
+           capacity_source = COALESCE(capacity_source, c_source_profile),
            capability_profile_version = COALESCE(capability_profile_version, 'silicon/qwen3.6-35b-a3b@1')
      WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'Qwen'
        AND model_name = 'Qwen3.6-35B-A3B'
-       AND delete_flag = 'N'
+       AND delete_flag = c_active_flag
        AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
@@ -456,12 +458,12 @@ BEGIN
            LEAST(16384, COALESCE(context_window_tokens, 262144) - 1)),
            default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
            LEAST(4096, COALESCE(max_output_tokens, 16384))),
-           capacity_source = COALESCE(capacity_source, 'profile'),
+           capacity_source = COALESCE(capacity_source, c_source_profile),
            capability_profile_version = COALESCE(capability_profile_version, 'silicon/qwen3.5-397b-a17b@1')
      WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'Qwen'
        AND model_name = 'Qwen3.5-397B-A17B'
-       AND delete_flag = 'N'
+       AND delete_flag = c_active_flag
        AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
@@ -473,12 +475,12 @@ BEGIN
            LEAST(16384, COALESCE(context_window_tokens, 262144) - 1)),
            default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
            LEAST(4096, COALESCE(max_output_tokens, 16384))),
-           capacity_source = COALESCE(capacity_source, 'profile'),
+           capacity_source = COALESCE(capacity_source, c_source_profile),
            capability_profile_version = COALESCE(capability_profile_version, 'silicon/qwen3.5-122b-a10b@1')
      WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'Qwen'
        AND model_name = 'Qwen3.5-122B-A10B'
-       AND delete_flag = 'N'
+       AND delete_flag = c_active_flag
        AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
@@ -490,12 +492,12 @@ BEGIN
            LEAST(16384, COALESCE(context_window_tokens, 262144) - 1)),
            default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
            LEAST(4096, COALESCE(max_output_tokens, 16384))),
-           capacity_source = COALESCE(capacity_source, 'profile'),
+           capacity_source = COALESCE(capacity_source, c_source_profile),
            capability_profile_version = COALESCE(capability_profile_version, 'silicon/qwen3.5-35b-a3b@1')
      WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'Qwen'
        AND model_name = 'Qwen3.5-35B-A3B'
-       AND delete_flag = 'N'
+       AND delete_flag = c_active_flag
        AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
@@ -507,12 +509,12 @@ BEGIN
            LEAST(16384, COALESCE(context_window_tokens, 262144) - 1)),
            default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
            LEAST(4096, COALESCE(max_output_tokens, 16384))),
-           capacity_source = COALESCE(capacity_source, 'profile'),
+           capacity_source = COALESCE(capacity_source, c_source_profile),
            capability_profile_version = COALESCE(capability_profile_version, 'silicon/qwen3.5-27b@1')
      WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'Qwen'
        AND model_name = 'Qwen3.5-27B'
-       AND delete_flag = 'N'
+       AND delete_flag = c_active_flag
        AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
@@ -524,12 +526,12 @@ BEGIN
            LEAST(16384, COALESCE(context_window_tokens, 262144) - 1)),
            default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
            LEAST(4096, COALESCE(max_output_tokens, 16384))),
-           capacity_source = COALESCE(capacity_source, 'profile'),
+           capacity_source = COALESCE(capacity_source, c_source_profile),
            capability_profile_version = COALESCE(capability_profile_version, 'silicon/qwen3.5-9b@1')
      WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'Qwen'
        AND model_name = 'Qwen3.5-9B'
-       AND delete_flag = 'N'
+       AND delete_flag = c_active_flag
        AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
@@ -541,12 +543,12 @@ BEGIN
            LEAST(16384, COALESCE(context_window_tokens, 262144) - 1)),
            default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
            LEAST(4096, COALESCE(max_output_tokens, 16384))),
-           capacity_source = COALESCE(capacity_source, 'profile'),
+           capacity_source = COALESCE(capacity_source, c_source_profile),
            capability_profile_version = COALESCE(capability_profile_version, 'silicon/qwen3.5-4b@1')
      WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'Qwen'
        AND model_name = 'Qwen3.5-4B'
-       AND delete_flag = 'N'
+       AND delete_flag = c_active_flag
        AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
@@ -558,12 +560,12 @@ BEGIN
            LEAST(16384, COALESCE(context_window_tokens, 262144) - 1)),
            default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
            LEAST(4096, COALESCE(max_output_tokens, 16384))),
-           capacity_source = COALESCE(capacity_source, 'profile'),
+           capacity_source = COALESCE(capacity_source, c_source_profile),
            capability_profile_version = COALESCE(capability_profile_version, 'silicon/qwen3-vl-32b-instruct@1')
      WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'Qwen'
        AND model_name = 'Qwen3-VL-32B-Instruct'
-       AND delete_flag = 'N'
+       AND delete_flag = c_active_flag
        AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
@@ -575,12 +577,12 @@ BEGIN
            LEAST(32768, COALESCE(context_window_tokens, 262144) - 1)),
            default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
            LEAST(8192, COALESCE(max_output_tokens, 32768))),
-           capacity_source = COALESCE(capacity_source, 'profile'),
+           capacity_source = COALESCE(capacity_source, c_source_profile),
            capability_profile_version = COALESCE(capability_profile_version, 'silicon/qwen3-vl-32b-thinking@1')
      WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'Qwen'
        AND model_name = 'Qwen3-VL-32B-Thinking'
-       AND delete_flag = 'N'
+       AND delete_flag = c_active_flag
        AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
@@ -592,12 +594,12 @@ BEGIN
            LEAST(16384, COALESCE(context_window_tokens, 262144) - 1)),
            default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
            LEAST(4096, COALESCE(max_output_tokens, 16384))),
-           capacity_source = COALESCE(capacity_source, 'profile'),
+           capacity_source = COALESCE(capacity_source, c_source_profile),
            capability_profile_version = COALESCE(capability_profile_version, 'silicon/qwen3-vl-8b-instruct@1')
      WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'Qwen'
        AND model_name = 'Qwen3-VL-8B-Instruct'
-       AND delete_flag = 'N'
+       AND delete_flag = c_active_flag
        AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
@@ -609,12 +611,12 @@ BEGIN
            LEAST(32768, COALESCE(context_window_tokens, 262144) - 1)),
            default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
            LEAST(8192, COALESCE(max_output_tokens, 32768))),
-           capacity_source = COALESCE(capacity_source, 'profile'),
+           capacity_source = COALESCE(capacity_source, c_source_profile),
            capability_profile_version = COALESCE(capability_profile_version, 'silicon/qwen3-vl-8b-thinking@1')
      WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'Qwen'
        AND model_name = 'Qwen3-VL-8B-Thinking'
-       AND delete_flag = 'N'
+       AND delete_flag = c_active_flag
        AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
@@ -626,12 +628,12 @@ BEGIN
            LEAST(16384, COALESCE(context_window_tokens, 262144) - 1)),
            default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
            LEAST(4096, COALESCE(max_output_tokens, 16384))),
-           capacity_source = COALESCE(capacity_source, 'profile'),
+           capacity_source = COALESCE(capacity_source, c_source_profile),
            capability_profile_version = COALESCE(capability_profile_version, 'silicon/qwen3-vl-30b-a3b-instruct@1')
      WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'Qwen'
        AND model_name = 'Qwen3-VL-30B-A3B-Instruct'
-       AND delete_flag = 'N'
+       AND delete_flag = c_active_flag
        AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
@@ -643,12 +645,12 @@ BEGIN
            LEAST(32768, COALESCE(context_window_tokens, 262144) - 1)),
            default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
            LEAST(8192, COALESCE(max_output_tokens, 32768))),
-           capacity_source = COALESCE(capacity_source, 'profile'),
+           capacity_source = COALESCE(capacity_source, c_source_profile),
            capability_profile_version = COALESCE(capability_profile_version, 'silicon/qwen3-vl-30b-a3b-thinking@1')
      WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'Qwen'
        AND model_name = 'Qwen3-VL-30B-A3B-Thinking'
-       AND delete_flag = 'N'
+       AND delete_flag = c_active_flag
        AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
@@ -660,12 +662,12 @@ BEGIN
            LEAST(16384, COALESCE(context_window_tokens, 32768) - 1)),
            default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
            LEAST(4096, COALESCE(max_output_tokens, 16384))),
-           capacity_source = COALESCE(capacity_source, 'profile'),
+           capacity_source = COALESCE(capacity_source, c_source_profile),
            capability_profile_version = COALESCE(capability_profile_version, 'silicon/qwen3-omni-30b-a3b-instruct@1')
      WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'Qwen'
        AND model_name = 'Qwen3-Omni-30B-A3B-Instruct'
-       AND delete_flag = 'N'
+       AND delete_flag = c_active_flag
        AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
@@ -677,12 +679,12 @@ BEGIN
            LEAST(16384, COALESCE(context_window_tokens, 32768) - 1)),
            default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
            LEAST(4096, COALESCE(max_output_tokens, 16384))),
-           capacity_source = COALESCE(capacity_source, 'profile'),
+           capacity_source = COALESCE(capacity_source, c_source_profile),
            capability_profile_version = COALESCE(capability_profile_version, 'silicon/qwen3-omni-30b-a3b-thinking@1')
      WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'Qwen'
        AND model_name = 'Qwen3-Omni-30B-A3B-Thinking'
-       AND delete_flag = 'N'
+       AND delete_flag = c_active_flag
        AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
@@ -694,12 +696,12 @@ BEGIN
            LEAST(16384, COALESCE(context_window_tokens, 32768) - 1)),
            default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
            LEAST(4096, COALESCE(max_output_tokens, 16384))),
-           capacity_source = COALESCE(capacity_source, 'profile'),
+           capacity_source = COALESCE(capacity_source, c_source_profile),
            capability_profile_version = COALESCE(capability_profile_version, 'silicon/qwen3-omni-30b-a3b-captioner@1')
      WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'Qwen'
        AND model_name = 'Qwen3-Omni-30B-A3B-Captioner'
-       AND delete_flag = 'N'
+       AND delete_flag = c_active_flag
        AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
@@ -711,12 +713,12 @@ BEGIN
            LEAST(65536, COALESCE(context_window_tokens, 262144) - 1)),
            default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
            LEAST(8192, COALESCE(max_output_tokens, 65536))),
-           capacity_source = COALESCE(capacity_source, 'profile'),
+           capacity_source = COALESCE(capacity_source, c_source_profile),
            capability_profile_version = COALESCE(capability_profile_version, 'silicon/qwen3-coder-30b-a3b-instruct@1')
      WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'Qwen'
        AND model_name = 'Qwen3-Coder-30B-A3B-Instruct'
-       AND delete_flag = 'N'
+       AND delete_flag = c_active_flag
        AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
@@ -728,12 +730,12 @@ BEGIN
            LEAST(16384, COALESCE(context_window_tokens, 262144) - 1)),
            default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
            LEAST(4096, COALESCE(max_output_tokens, 16384))),
-           capacity_source = COALESCE(capacity_source, 'profile'),
+           capacity_source = COALESCE(capacity_source, c_source_profile),
            capability_profile_version = COALESCE(capability_profile_version, 'silicon/qwen3-30b-a3b-instruct-2507@1')
      WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'Qwen'
        AND model_name = 'Qwen3-30B-A3B-Instruct-2507'
-       AND delete_flag = 'N'
+       AND delete_flag = c_active_flag
        AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
@@ -745,12 +747,12 @@ BEGIN
            LEAST(16384, COALESCE(context_window_tokens, 131072) - 1)),
            default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
            LEAST(4096, COALESCE(max_output_tokens, 16384))),
-           capacity_source = COALESCE(capacity_source, 'profile'),
+           capacity_source = COALESCE(capacity_source, c_source_profile),
            capability_profile_version = COALESCE(capability_profile_version, 'silicon/qwen3-32b@1')
      WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'Qwen'
        AND model_name = 'Qwen3-32B'
-       AND delete_flag = 'N'
+       AND delete_flag = c_active_flag
        AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
@@ -762,12 +764,12 @@ BEGIN
            LEAST(16384, COALESCE(context_window_tokens, 131072) - 1)),
            default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
            LEAST(4096, COALESCE(max_output_tokens, 16384))),
-           capacity_source = COALESCE(capacity_source, 'profile'),
+           capacity_source = COALESCE(capacity_source, c_source_profile),
            capability_profile_version = COALESCE(capability_profile_version, 'silicon/qwen3-14b@1')
      WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'Qwen'
        AND model_name = 'Qwen3-14B'
-       AND delete_flag = 'N'
+       AND delete_flag = c_active_flag
        AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
@@ -779,12 +781,12 @@ BEGIN
            LEAST(16384, COALESCE(context_window_tokens, 131072) - 1)),
            default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
            LEAST(4096, COALESCE(max_output_tokens, 16384))),
-           capacity_source = COALESCE(capacity_source, 'profile'),
+           capacity_source = COALESCE(capacity_source, c_source_profile),
            capability_profile_version = COALESCE(capability_profile_version, 'silicon/qwen3-8b@1')
      WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'Qwen'
        AND model_name = 'Qwen3-8B'
-       AND delete_flag = 'N'
+       AND delete_flag = c_active_flag
        AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
@@ -796,12 +798,12 @@ BEGIN
            LEAST(8192, COALESCE(context_window_tokens, 131072) - 1)),
            default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
            LEAST(4096, COALESCE(max_output_tokens, 8192))),
-           capacity_source = COALESCE(capacity_source, 'profile'),
+           capacity_source = COALESCE(capacity_source, c_source_profile),
            capability_profile_version = COALESCE(capability_profile_version, 'silicon/qwen2.5-72b-instruct-128k@1')
      WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'Qwen'
        AND model_name = 'Qwen2.5-72B-Instruct-128K'
-       AND delete_flag = 'N'
+       AND delete_flag = c_active_flag
        AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
@@ -813,12 +815,12 @@ BEGIN
            LEAST(8192, COALESCE(context_window_tokens, 32768) - 1)),
            default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
            LEAST(4096, COALESCE(max_output_tokens, 8192))),
-           capacity_source = COALESCE(capacity_source, 'profile'),
+           capacity_source = COALESCE(capacity_source, c_source_profile),
            capability_profile_version = COALESCE(capability_profile_version, 'silicon/qwen2.5-72b-instruct@1')
      WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'Qwen'
        AND model_name = 'Qwen2.5-72B-Instruct'
-       AND delete_flag = 'N'
+       AND delete_flag = c_active_flag
        AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
@@ -830,12 +832,12 @@ BEGIN
            LEAST(8192, COALESCE(context_window_tokens, 32768) - 1)),
            default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
            LEAST(4096, COALESCE(max_output_tokens, 8192))),
-           capacity_source = COALESCE(capacity_source, 'profile'),
+           capacity_source = COALESCE(capacity_source, c_source_profile),
            capability_profile_version = COALESCE(capability_profile_version, 'silicon/qwen2.5-32b-instruct@1')
      WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'Qwen'
        AND model_name = 'Qwen2.5-32B-Instruct'
-       AND delete_flag = 'N'
+       AND delete_flag = c_active_flag
        AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
@@ -847,12 +849,12 @@ BEGIN
            LEAST(8192, COALESCE(context_window_tokens, 32768) - 1)),
            default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
            LEAST(4096, COALESCE(max_output_tokens, 8192))),
-           capacity_source = COALESCE(capacity_source, 'profile'),
+           capacity_source = COALESCE(capacity_source, c_source_profile),
            capability_profile_version = COALESCE(capability_profile_version, 'silicon/qwen2.5-14b-instruct@1')
      WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'Qwen'
        AND model_name = 'Qwen2.5-14B-Instruct'
-       AND delete_flag = 'N'
+       AND delete_flag = c_active_flag
        AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
@@ -864,12 +866,12 @@ BEGIN
            LEAST(8192, COALESCE(context_window_tokens, 32768) - 1)),
            default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
            LEAST(4096, COALESCE(max_output_tokens, 8192))),
-           capacity_source = COALESCE(capacity_source, 'profile'),
+           capacity_source = COALESCE(capacity_source, c_source_profile),
            capability_profile_version = COALESCE(capability_profile_version, 'silicon/qwen2.5-7b-instruct@1')
      WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'Qwen'
        AND model_name = 'Qwen2.5-7B-Instruct'
-       AND delete_flag = 'N'
+       AND delete_flag = c_active_flag
        AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
@@ -881,12 +883,12 @@ BEGIN
            LEAST(8192, COALESCE(context_window_tokens, 32768) - 1)),
            default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
            LEAST(4096, COALESCE(max_output_tokens, 8192))),
-           capacity_source = COALESCE(capacity_source, 'profile'),
+           capacity_source = COALESCE(capacity_source, c_source_profile),
            capability_profile_version = COALESCE(capability_profile_version, 'silicon/glm-4-32b-0414@1')
      WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'THUDM'
        AND model_name = 'GLM-4-32B-0414'
-       AND delete_flag = 'N'
+       AND delete_flag = c_active_flag
        AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
@@ -898,12 +900,12 @@ BEGIN
            LEAST(8192, COALESCE(context_window_tokens, 32768) - 1)),
            default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
            LEAST(4096, COALESCE(max_output_tokens, 8192))),
-           capacity_source = COALESCE(capacity_source, 'profile'),
+           capacity_source = COALESCE(capacity_source, c_source_profile),
            capability_profile_version = COALESCE(capability_profile_version, 'silicon/glm-z1-9b-0414@1')
      WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'THUDM'
        AND model_name = 'GLM-Z1-9B-0414'
-       AND delete_flag = 'N'
+       AND delete_flag = c_active_flag
        AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
@@ -915,12 +917,12 @@ BEGIN
            LEAST(8192, COALESCE(context_window_tokens, 32768) - 1)),
            default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
            LEAST(4096, COALESCE(max_output_tokens, 8192))),
-           capacity_source = COALESCE(capacity_source, 'profile'),
+           capacity_source = COALESCE(capacity_source, c_source_profile),
            capability_profile_version = COALESCE(capability_profile_version, 'silicon/glm-4-9b-0414@1')
      WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'THUDM'
        AND model_name = 'GLM-4-9B-0414'
-       AND delete_flag = 'N'
+       AND delete_flag = c_active_flag
        AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
@@ -932,12 +934,12 @@ BEGIN
            LEAST(131072, COALESCE(context_window_tokens, 1048576) - 1)),
            default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
            LEAST(8192, COALESCE(max_output_tokens, 131072))),
-           capacity_source = COALESCE(capacity_source, 'profile'),
+           capacity_source = COALESCE(capacity_source, c_source_profile),
            capability_profile_version = COALESCE(capability_profile_version, 'silicon/glm-5.2@1')
      WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'zai-org'
        AND model_name = 'GLM-5.2'
-       AND delete_flag = 'N'
+       AND delete_flag = c_active_flag
        AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
@@ -949,12 +951,12 @@ BEGIN
            LEAST(16384, COALESCE(context_window_tokens, 131072) - 1)),
            default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
            LEAST(4096, COALESCE(max_output_tokens, 16384))),
-           capacity_source = COALESCE(capacity_source, 'profile'),
+           capacity_source = COALESCE(capacity_source, c_source_profile),
            capability_profile_version = COALESCE(capability_profile_version, 'silicon/glm-4.5v@1')
      WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'zai-org'
        AND model_name = 'GLM-4.5V'
-       AND delete_flag = 'N'
+       AND delete_flag = c_active_flag
        AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
@@ -966,12 +968,12 @@ BEGIN
            LEAST(16384, COALESCE(context_window_tokens, 131072) - 1)),
            default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
            LEAST(4096, COALESCE(max_output_tokens, 16384))),
-           capacity_source = COALESCE(capacity_source, 'profile'),
+           capacity_source = COALESCE(capacity_source, c_source_profile),
            capability_profile_version = COALESCE(capability_profile_version, 'silicon/glm-4.5-air@1')
      WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'zai-org'
        AND model_name = 'GLM-4.5-Air'
-       AND delete_flag = 'N'
+       AND delete_flag = c_active_flag
        AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
@@ -983,12 +985,12 @@ BEGIN
            LEAST(131072, COALESCE(context_window_tokens, 202752) - 1)),
            default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
            LEAST(8192, COALESCE(max_output_tokens, 131072))),
-           capacity_source = COALESCE(capacity_source, 'profile'),
+           capacity_source = COALESCE(capacity_source, c_source_profile),
            capability_profile_version = COALESCE(capability_profile_version, 'silicon/glm-5.1-pro@1')
      WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'Pro'
        AND model_name = 'zai-org/GLM-5.1'
-       AND delete_flag = 'N'
+       AND delete_flag = c_active_flag
        AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
@@ -1000,12 +1002,12 @@ BEGIN
            LEAST(16384, COALESCE(context_window_tokens, 524288) - 1)),
            default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
            LEAST(4096, COALESCE(max_output_tokens, 16384))),
-           capacity_source = COALESCE(capacity_source, 'profile'),
+           capacity_source = COALESCE(capacity_source, c_source_profile),
            capability_profile_version = COALESCE(capability_profile_version, 'silicon/seed-oss-36b-instruct@1')
      WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'ByteDance-Seed'
        AND model_name = 'Seed-OSS-36B-Instruct'
-       AND delete_flag = 'N'
+       AND delete_flag = c_active_flag
        AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
@@ -1017,12 +1019,12 @@ BEGIN
            LEAST(16384, COALESCE(context_window_tokens, 131072) - 1)),
            default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
            LEAST(4096, COALESCE(max_output_tokens, 16384))),
-           capacity_source = COALESCE(capacity_source, 'profile'),
+           capacity_source = COALESCE(capacity_source, c_source_profile),
            capability_profile_version = COALESCE(capability_profile_version, 'silicon/ling-flash-2.0@1')
      WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'inclusionAI'
        AND model_name = 'Ling-flash-2.0'
-       AND delete_flag = 'N'
+       AND delete_flag = c_active_flag
        AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
@@ -1034,12 +1036,12 @@ BEGIN
            LEAST(16384, COALESCE(context_window_tokens, 131072) - 1)),
            default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
            LEAST(4096, COALESCE(max_output_tokens, 16384))),
-           capacity_source = COALESCE(capacity_source, 'profile'),
+           capacity_source = COALESCE(capacity_source, c_source_profile),
            capability_profile_version = COALESCE(capability_profile_version, 'silicon/ling-mini-2.0@1')
      WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'inclusionAI'
        AND model_name = 'Ling-mini-2.0'
-       AND delete_flag = 'N'
+       AND delete_flag = c_active_flag
        AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
@@ -1051,12 +1053,12 @@ BEGIN
            LEAST(16384, COALESCE(context_window_tokens, 204800) - 1)),
            default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
            LEAST(4096, COALESCE(max_output_tokens, 16384))),
-           capacity_source = COALESCE(capacity_source, 'profile'),
+           capacity_source = COALESCE(capacity_source, c_source_profile),
            capability_profile_version = COALESCE(capability_profile_version, 'silicon/minimax-m2.5@1')
      WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'MiniMaxAI'
        AND model_name = 'MiniMax-M2.5'
-       AND delete_flag = 'N'
+       AND delete_flag = c_active_flag
        AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
@@ -1068,12 +1070,12 @@ BEGIN
            LEAST(16384, COALESCE(context_window_tokens, 204800) - 1)),
            default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
            LEAST(4096, COALESCE(max_output_tokens, 16384))),
-           capacity_source = COALESCE(capacity_source, 'profile'),
+           capacity_source = COALESCE(capacity_source, c_source_profile),
            capability_profile_version = COALESCE(capability_profile_version, 'silicon/minimax-m2.5-pro@1')
      WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'Pro'
        AND model_name = 'MiniMaxAI/MiniMax-M2.5'
-       AND delete_flag = 'N'
+       AND delete_flag = c_active_flag
        AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
@@ -1085,12 +1087,12 @@ BEGIN
            LEAST(32768, COALESCE(context_window_tokens, 262144) - 1)),
            default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
            LEAST(8192, COALESCE(max_output_tokens, 32768))),
-           capacity_source = COALESCE(capacity_source, 'profile'),
+           capacity_source = COALESCE(capacity_source, c_source_profile),
            capability_profile_version = COALESCE(capability_profile_version, 'silicon/kimi-k2.7-code@1')
      WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'moonshotai'
        AND model_name = 'Kimi-K2.7-Code'
-       AND delete_flag = 'N'
+       AND delete_flag = c_active_flag
        AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
@@ -1102,12 +1104,12 @@ BEGIN
            LEAST(16384, COALESCE(context_window_tokens, 262144) - 1)),
            default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
            LEAST(4096, COALESCE(max_output_tokens, 16384))),
-           capacity_source = COALESCE(capacity_source, 'profile'),
+           capacity_source = COALESCE(capacity_source, c_source_profile),
            capability_profile_version = COALESCE(capability_profile_version, 'silicon/nex-n2-pro@1')
      WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'nex-agi'
        AND model_name = 'Nex-N2-Pro'
-       AND delete_flag = 'N'
+       AND delete_flag = c_active_flag
        AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
@@ -1119,12 +1121,12 @@ BEGIN
            LEAST(16384, COALESCE(context_window_tokens, 262144) - 1)),
            default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
            LEAST(4096, COALESCE(max_output_tokens, 16384))),
-           capacity_source = COALESCE(capacity_source, 'profile'),
+           capacity_source = COALESCE(capacity_source, c_source_profile),
            capability_profile_version = COALESCE(capability_profile_version, 'silicon/step-3.5-flash@1')
      WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'stepfun-ai'
        AND model_name = 'Step-3.5-Flash'
-       AND delete_flag = 'N'
+       AND delete_flag = c_active_flag
        AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
@@ -1136,12 +1138,12 @@ BEGIN
            LEAST(2048, COALESCE(context_window_tokens, 32768) - 1)),
            default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
            LEAST(1024, COALESCE(max_output_tokens, 2048))),
-           capacity_source = COALESCE(capacity_source, 'profile'),
+           capacity_source = COALESCE(capacity_source, c_source_profile),
            capability_profile_version = COALESCE(capability_profile_version, 'silicon/hunyuan-mt-7b@1')
      WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'tencent'
        AND model_name = 'Hunyuan-MT-7B'
-       AND delete_flag = 'N'
+       AND delete_flag = c_active_flag
        AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
@@ -1153,12 +1155,12 @@ BEGIN
            LEAST(8192, COALESCE(context_window_tokens, 131072) - 1)),
            default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
            LEAST(4096, COALESCE(max_output_tokens, 8192))),
-           capacity_source = COALESCE(capacity_source, 'profile'),
+           capacity_source = COALESCE(capacity_source, c_source_profile),
            capability_profile_version = COALESCE(capability_profile_version, 'silicon/hunyuan-a13b-instruct@1')
      WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'tencent'
        AND model_name = 'Hunyuan-A13B-Instruct'
-       AND delete_flag = 'N'
+       AND delete_flag = c_active_flag
        AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
@@ -1178,866 +1180,869 @@ DO $$
 DECLARE
     v_updated INTEGER := 0;
     v_total   INTEGER := 0;
+    c_active_flag     CONSTANT TEXT := 'N';
+    c_source_default  CONSTANT TEXT := 'default';
+    c_source_profile  CONSTANT TEXT := 'profile';
 BEGIN
     -- dashscope (4 entries)
     UPDATE nexent.model_record_t
        SET capability_profile_version = 'dashscope/qwen-plus@1',
-           capacity_source = CASE WHEN capacity_source = 'default' THEN 'profile' ELSE capacity_source END
+           capacity_source = CASE WHEN capacity_source = c_source_default THEN c_source_profile ELSE capacity_source END
      WHERE LOWER(model_factory) = 'dashscope'
        AND (model_repo IS NULL OR model_repo = '')
        AND model_name = 'qwen-plus'
-       AND delete_flag = 'N'
+       AND delete_flag = c_active_flag
        AND context_window_tokens = 131072
        AND max_output_tokens = 16384
-       AND (capability_profile_version IS NULL OR (capability_profile_version = 'dashscope/qwen-plus@1' AND capacity_source = 'default'));
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'dashscope/qwen-plus@1' AND capacity_source = c_source_default));
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
     UPDATE nexent.model_record_t
        SET capability_profile_version = 'dashscope/qwen-turbo@1',
-           capacity_source = CASE WHEN capacity_source = 'default' THEN 'profile' ELSE capacity_source END
+           capacity_source = CASE WHEN capacity_source = c_source_default THEN c_source_profile ELSE capacity_source END
      WHERE LOWER(model_factory) = 'dashscope'
        AND (model_repo IS NULL OR model_repo = '')
        AND model_name = 'qwen-turbo'
-       AND delete_flag = 'N'
+       AND delete_flag = c_active_flag
        AND context_window_tokens = 1000000
        AND max_output_tokens = 16384
-       AND (capability_profile_version IS NULL OR (capability_profile_version = 'dashscope/qwen-turbo@1' AND capacity_source = 'default'));
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'dashscope/qwen-turbo@1' AND capacity_source = c_source_default));
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
     UPDATE nexent.model_record_t
        SET capability_profile_version = 'dashscope/qwen3.7-max@1',
-           capacity_source = CASE WHEN capacity_source = 'default' THEN 'profile' ELSE capacity_source END
+           capacity_source = CASE WHEN capacity_source = c_source_default THEN c_source_profile ELSE capacity_source END
      WHERE LOWER(model_factory) = 'dashscope'
        AND (model_repo IS NULL OR model_repo = '')
        AND model_name = 'qwen3.7-max'
-       AND delete_flag = 'N'
+       AND delete_flag = c_active_flag
        AND context_window_tokens = 1000000
        AND max_output_tokens = 65536
-       AND (capability_profile_version IS NULL OR (capability_profile_version = 'dashscope/qwen3.7-max@1' AND capacity_source = 'default'));
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'dashscope/qwen3.7-max@1' AND capacity_source = c_source_default));
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
     UPDATE nexent.model_record_t
        SET capability_profile_version = 'dashscope/glm-5.1@1',
-           capacity_source = CASE WHEN capacity_source = 'default' THEN 'profile' ELSE capacity_source END
+           capacity_source = CASE WHEN capacity_source = c_source_default THEN c_source_profile ELSE capacity_source END
      WHERE LOWER(model_factory) = 'dashscope'
        AND (model_repo IS NULL OR model_repo = '')
        AND model_name = 'glm-5.1'
-       AND delete_flag = 'N'
+       AND delete_flag = c_active_flag
        AND context_window_tokens = 200000
        AND max_output_tokens = 131072
-       AND (capability_profile_version IS NULL OR (capability_profile_version = 'dashscope/glm-5.1@1' AND capacity_source = 'default'));
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'dashscope/glm-5.1@1' AND capacity_source = c_source_default));
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
     -- deepseek (15 entries)
     UPDATE nexent.model_record_t
        SET capability_profile_version = 'deepseek/deepseek-chat@2',
-           capacity_source = CASE WHEN capacity_source = 'default' THEN 'profile' ELSE capacity_source END
+           capacity_source = CASE WHEN capacity_source = c_source_default THEN c_source_profile ELSE capacity_source END
      WHERE LOWER(model_factory) = 'deepseek'
        AND (model_repo IS NULL OR model_repo = '')
        AND model_name = 'deepseek-chat'
-       AND delete_flag = 'N'
+       AND delete_flag = c_active_flag
        AND context_window_tokens = 1000000
        AND max_output_tokens = 384000
-       AND (capability_profile_version IS NULL OR (capability_profile_version = 'deepseek/deepseek-chat@2' AND capacity_source = 'default'));
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'deepseek/deepseek-chat@2' AND capacity_source = c_source_default));
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
     UPDATE nexent.model_record_t
        SET capability_profile_version = 'deepseek/deepseek-reasoner@2',
-           capacity_source = CASE WHEN capacity_source = 'default' THEN 'profile' ELSE capacity_source END
+           capacity_source = CASE WHEN capacity_source = c_source_default THEN c_source_profile ELSE capacity_source END
      WHERE LOWER(model_factory) = 'deepseek'
        AND (model_repo IS NULL OR model_repo = '')
        AND model_name = 'deepseek-reasoner'
-       AND delete_flag = 'N'
+       AND delete_flag = c_active_flag
        AND context_window_tokens = 1000000
        AND max_output_tokens = 384000
-       AND (capability_profile_version IS NULL OR (capability_profile_version = 'deepseek/deepseek-reasoner@2' AND capacity_source = 'default'));
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'deepseek/deepseek-reasoner@2' AND capacity_source = c_source_default));
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
     UPDATE nexent.model_record_t
        SET capability_profile_version = 'deepseek/deepseek-v4-flash@1',
-           capacity_source = CASE WHEN capacity_source = 'default' THEN 'profile' ELSE capacity_source END
+           capacity_source = CASE WHEN capacity_source = c_source_default THEN c_source_profile ELSE capacity_source END
      WHERE LOWER(model_factory) = 'deepseek'
        AND (model_repo IS NULL OR model_repo = '')
        AND model_name = 'deepseek-v4-flash'
-       AND delete_flag = 'N'
+       AND delete_flag = c_active_flag
        AND context_window_tokens = 1000000
        AND max_output_tokens = 384000
-       AND (capability_profile_version IS NULL OR (capability_profile_version = 'deepseek/deepseek-v4-flash@1' AND capacity_source = 'default'));
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'deepseek/deepseek-v4-flash@1' AND capacity_source = c_source_default));
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
     UPDATE nexent.model_record_t
        SET capability_profile_version = 'deepseek/deepseek-v4-pro@1',
-           capacity_source = CASE WHEN capacity_source = 'default' THEN 'profile' ELSE capacity_source END
+           capacity_source = CASE WHEN capacity_source = c_source_default THEN c_source_profile ELSE capacity_source END
      WHERE LOWER(model_factory) = 'deepseek'
        AND (model_repo IS NULL OR model_repo = '')
        AND model_name = 'deepseek-v4-pro'
-       AND delete_flag = 'N'
+       AND delete_flag = c_active_flag
        AND context_window_tokens = 1000000
        AND max_output_tokens = 384000
-       AND (capability_profile_version IS NULL OR (capability_profile_version = 'deepseek/deepseek-v4-pro@1' AND capacity_source = 'default'));
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'deepseek/deepseek-v4-pro@1' AND capacity_source = c_source_default));
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
     UPDATE nexent.model_record_t
        SET capability_profile_version = 'deepseek/deepseek-v4-pro-sf@1',
-           capacity_source = CASE WHEN capacity_source = 'default' THEN 'profile' ELSE capacity_source END
+           capacity_source = CASE WHEN capacity_source = c_source_default THEN c_source_profile ELSE capacity_source END
      WHERE LOWER(model_factory) = 'deepseek'
        AND model_repo = 'deepseek-ai'
        AND model_name = 'DeepSeek-V4-Pro'
-       AND delete_flag = 'N'
+       AND delete_flag = c_active_flag
        AND context_window_tokens = 1048576
        AND max_output_tokens = 384000
-       AND (capability_profile_version IS NULL OR (capability_profile_version = 'deepseek/deepseek-v4-pro-sf@1' AND capacity_source = 'default'));
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'deepseek/deepseek-v4-pro-sf@1' AND capacity_source = c_source_default));
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
     UPDATE nexent.model_record_t
        SET capability_profile_version = 'deepseek/deepseek-v4-flash-sf@1',
-           capacity_source = CASE WHEN capacity_source = 'default' THEN 'profile' ELSE capacity_source END
+           capacity_source = CASE WHEN capacity_source = c_source_default THEN c_source_profile ELSE capacity_source END
      WHERE LOWER(model_factory) = 'deepseek'
        AND model_repo = 'deepseek-ai'
        AND model_name = 'DeepSeek-V4-Flash'
-       AND delete_flag = 'N'
+       AND delete_flag = c_active_flag
        AND context_window_tokens = 1048576
        AND max_output_tokens = 384000
-       AND (capability_profile_version IS NULL OR (capability_profile_version = 'deepseek/deepseek-v4-flash-sf@1' AND capacity_source = 'default'));
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'deepseek/deepseek-v4-flash-sf@1' AND capacity_source = c_source_default));
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
     UPDATE nexent.model_record_t
        SET capability_profile_version = 'deepseek/deepseek-v3.2@1',
-           capacity_source = CASE WHEN capacity_source = 'default' THEN 'profile' ELSE capacity_source END
+           capacity_source = CASE WHEN capacity_source = c_source_default THEN c_source_profile ELSE capacity_source END
      WHERE LOWER(model_factory) = 'deepseek'
        AND model_repo = 'deepseek-ai'
        AND model_name = 'DeepSeek-V3.2'
-       AND delete_flag = 'N'
+       AND delete_flag = c_active_flag
        AND context_window_tokens = 164000
        AND max_output_tokens = 8192
-       AND (capability_profile_version IS NULL OR (capability_profile_version = 'deepseek/deepseek-v3.2@1' AND capacity_source = 'default'));
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'deepseek/deepseek-v3.2@1' AND capacity_source = c_source_default));
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
     UPDATE nexent.model_record_t
        SET capability_profile_version = 'deepseek/deepseek-v3.1-terminus@1',
-           capacity_source = CASE WHEN capacity_source = 'default' THEN 'profile' ELSE capacity_source END
+           capacity_source = CASE WHEN capacity_source = c_source_default THEN c_source_profile ELSE capacity_source END
      WHERE LOWER(model_factory) = 'deepseek'
        AND model_repo = 'deepseek-ai'
        AND model_name = 'DeepSeek-V3.1-Terminus'
-       AND delete_flag = 'N'
+       AND delete_flag = c_active_flag
        AND context_window_tokens = 164000
        AND max_output_tokens = 8192
-       AND (capability_profile_version IS NULL OR (capability_profile_version = 'deepseek/deepseek-v3.1-terminus@1' AND capacity_source = 'default'));
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'deepseek/deepseek-v3.1-terminus@1' AND capacity_source = c_source_default));
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
     UPDATE nexent.model_record_t
        SET capability_profile_version = 'deepseek/deepseek-r1@1',
-           capacity_source = CASE WHEN capacity_source = 'default' THEN 'profile' ELSE capacity_source END
+           capacity_source = CASE WHEN capacity_source = c_source_default THEN c_source_profile ELSE capacity_source END
      WHERE LOWER(model_factory) = 'deepseek'
        AND model_repo = 'deepseek-ai'
        AND model_name = 'DeepSeek-R1'
-       AND delete_flag = 'N'
+       AND delete_flag = c_active_flag
        AND context_window_tokens = 163840
        AND max_output_tokens = 16384
-       AND (capability_profile_version IS NULL OR (capability_profile_version = 'deepseek/deepseek-r1@1' AND capacity_source = 'default'));
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'deepseek/deepseek-r1@1' AND capacity_source = c_source_default));
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
     UPDATE nexent.model_record_t
        SET capability_profile_version = 'deepseek/deepseek-v3@1',
-           capacity_source = CASE WHEN capacity_source = 'default' THEN 'profile' ELSE capacity_source END
+           capacity_source = CASE WHEN capacity_source = c_source_default THEN c_source_profile ELSE capacity_source END
      WHERE LOWER(model_factory) = 'deepseek'
        AND model_repo = 'deepseek-ai'
        AND model_name = 'DeepSeek-V3'
-       AND delete_flag = 'N'
+       AND delete_flag = c_active_flag
        AND context_window_tokens = 164000
        AND max_output_tokens = 8192
-       AND (capability_profile_version IS NULL OR (capability_profile_version = 'deepseek/deepseek-v3@1' AND capacity_source = 'default'));
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'deepseek/deepseek-v3@1' AND capacity_source = c_source_default));
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
     UPDATE nexent.model_record_t
        SET capability_profile_version = 'deepseek/deepseek-r1-0528-qwen3-8b@1',
-           capacity_source = CASE WHEN capacity_source = 'default' THEN 'profile' ELSE capacity_source END
+           capacity_source = CASE WHEN capacity_source = c_source_default THEN c_source_profile ELSE capacity_source END
      WHERE LOWER(model_factory) = 'deepseek'
        AND model_repo = 'deepseek-ai'
        AND model_name = 'DeepSeek-R1-0528-Qwen3-8B'
-       AND delete_flag = 'N'
+       AND delete_flag = c_active_flag
        AND context_window_tokens = 131072
        AND max_output_tokens = 16384
-       AND (capability_profile_version IS NULL OR (capability_profile_version = 'deepseek/deepseek-r1-0528-qwen3-8b@1' AND capacity_source = 'default'));
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'deepseek/deepseek-r1-0528-qwen3-8b@1' AND capacity_source = c_source_default));
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
     UPDATE nexent.model_record_t
        SET capability_profile_version = 'deepseek/deepseek-v3.2-pro@1',
-           capacity_source = CASE WHEN capacity_source = 'default' THEN 'profile' ELSE capacity_source END
+           capacity_source = CASE WHEN capacity_source = c_source_default THEN c_source_profile ELSE capacity_source END
      WHERE LOWER(model_factory) = 'deepseek'
        AND model_repo = 'Pro'
        AND model_name = 'deepseek-ai/DeepSeek-V3.2'
-       AND delete_flag = 'N'
+       AND delete_flag = c_active_flag
        AND context_window_tokens = 164000
        AND max_output_tokens = 8192
-       AND (capability_profile_version IS NULL OR (capability_profile_version = 'deepseek/deepseek-v3.2-pro@1' AND capacity_source = 'default'));
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'deepseek/deepseek-v3.2-pro@1' AND capacity_source = c_source_default));
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
     UPDATE nexent.model_record_t
        SET capability_profile_version = 'deepseek/deepseek-v3.1-terminus-pro@1',
-           capacity_source = CASE WHEN capacity_source = 'default' THEN 'profile' ELSE capacity_source END
+           capacity_source = CASE WHEN capacity_source = c_source_default THEN c_source_profile ELSE capacity_source END
      WHERE LOWER(model_factory) = 'deepseek'
        AND model_repo = 'Pro'
        AND model_name = 'deepseek-ai/DeepSeek-V3.1-Terminus'
-       AND delete_flag = 'N'
+       AND delete_flag = c_active_flag
        AND context_window_tokens = 164000
        AND max_output_tokens = 8192
-       AND (capability_profile_version IS NULL OR (capability_profile_version = 'deepseek/deepseek-v3.1-terminus-pro@1' AND capacity_source = 'default'));
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'deepseek/deepseek-v3.1-terminus-pro@1' AND capacity_source = c_source_default));
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
     UPDATE nexent.model_record_t
        SET capability_profile_version = 'deepseek/deepseek-r1-pro@1',
-           capacity_source = CASE WHEN capacity_source = 'default' THEN 'profile' ELSE capacity_source END
+           capacity_source = CASE WHEN capacity_source = c_source_default THEN c_source_profile ELSE capacity_source END
      WHERE LOWER(model_factory) = 'deepseek'
        AND model_repo = 'Pro'
        AND model_name = 'deepseek-ai/DeepSeek-R1'
-       AND delete_flag = 'N'
+       AND delete_flag = c_active_flag
        AND context_window_tokens = 163840
        AND max_output_tokens = 16384
-       AND (capability_profile_version IS NULL OR (capability_profile_version = 'deepseek/deepseek-r1-pro@1' AND capacity_source = 'default'));
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'deepseek/deepseek-r1-pro@1' AND capacity_source = c_source_default));
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
     UPDATE nexent.model_record_t
        SET capability_profile_version = 'deepseek/deepseek-v3-pro@1',
-           capacity_source = CASE WHEN capacity_source = 'default' THEN 'profile' ELSE capacity_source END
+           capacity_source = CASE WHEN capacity_source = c_source_default THEN c_source_profile ELSE capacity_source END
      WHERE LOWER(model_factory) = 'deepseek'
        AND model_repo = 'Pro'
        AND model_name = 'deepseek-ai/DeepSeek-V3'
-       AND delete_flag = 'N'
+       AND delete_flag = c_active_flag
        AND context_window_tokens = 164000
        AND max_output_tokens = 8192
-       AND (capability_profile_version IS NULL OR (capability_profile_version = 'deepseek/deepseek-v3-pro@1' AND capacity_source = 'default'));
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'deepseek/deepseek-v3-pro@1' AND capacity_source = c_source_default));
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
     -- openai (2 entries)
     UPDATE nexent.model_record_t
        SET capability_profile_version = 'openai/gpt-4o@1',
-           capacity_source = CASE WHEN capacity_source = 'default' THEN 'profile' ELSE capacity_source END
+           capacity_source = CASE WHEN capacity_source = c_source_default THEN c_source_profile ELSE capacity_source END
      WHERE LOWER(model_factory) = 'openai'
        AND (model_repo IS NULL OR model_repo = '')
        AND model_name = 'gpt-4o'
-       AND delete_flag = 'N'
+       AND delete_flag = c_active_flag
        AND context_window_tokens = 128000
        AND max_output_tokens = 16384
-       AND (capability_profile_version IS NULL OR (capability_profile_version = 'openai/gpt-4o@1' AND capacity_source = 'default'));
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'openai/gpt-4o@1' AND capacity_source = c_source_default));
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
     UPDATE nexent.model_record_t
        SET capability_profile_version = 'openai/gpt-4.1@1',
-           capacity_source = CASE WHEN capacity_source = 'default' THEN 'profile' ELSE capacity_source END
+           capacity_source = CASE WHEN capacity_source = c_source_default THEN c_source_profile ELSE capacity_source END
      WHERE LOWER(model_factory) = 'openai'
        AND (model_repo IS NULL OR model_repo = '')
        AND model_name = 'gpt-4.1'
-       AND delete_flag = 'N'
+       AND delete_flag = c_active_flag
        AND context_window_tokens = 1000000
        AND max_output_tokens = 32768
-       AND (capability_profile_version IS NULL OR (capability_profile_version = 'openai/gpt-4.1@1' AND capacity_source = 'default'));
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'openai/gpt-4.1@1' AND capacity_source = c_source_default));
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
     -- silicon (45 entries)
     UPDATE nexent.model_record_t
        SET capability_profile_version = 'silicon/qwen3.6-27b@1',
-           capacity_source = CASE WHEN capacity_source = 'default' THEN 'profile' ELSE capacity_source END
+           capacity_source = CASE WHEN capacity_source = c_source_default THEN c_source_profile ELSE capacity_source END
      WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'Qwen'
        AND model_name = 'Qwen3.6-27B'
-       AND delete_flag = 'N'
+       AND delete_flag = c_active_flag
        AND context_window_tokens = 262144
        AND max_output_tokens = 65536
-       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/qwen3.6-27b@1' AND capacity_source = 'default'));
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/qwen3.6-27b@1' AND capacity_source = c_source_default));
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
     UPDATE nexent.model_record_t
        SET capability_profile_version = 'silicon/kimi-k2.6@1',
-           capacity_source = CASE WHEN capacity_source = 'default' THEN 'profile' ELSE capacity_source END
+           capacity_source = CASE WHEN capacity_source = c_source_default THEN c_source_profile ELSE capacity_source END
      WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'Pro'
        AND model_name = 'moonshotai/Kimi-K2.6'
-       AND delete_flag = 'N'
+       AND delete_flag = c_active_flag
        AND context_window_tokens = 262144
        AND max_output_tokens = 131072
-       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/kimi-k2.6@1' AND capacity_source = 'default'));
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/kimi-k2.6@1' AND capacity_source = c_source_default));
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
     UPDATE nexent.model_record_t
        SET capability_profile_version = 'silicon/qwen3.6-35b-a3b@1',
-           capacity_source = CASE WHEN capacity_source = 'default' THEN 'profile' ELSE capacity_source END
+           capacity_source = CASE WHEN capacity_source = c_source_default THEN c_source_profile ELSE capacity_source END
      WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'Qwen'
        AND model_name = 'Qwen3.6-35B-A3B'
-       AND delete_flag = 'N'
+       AND delete_flag = c_active_flag
        AND context_window_tokens = 262144
        AND max_output_tokens = 16384
-       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/qwen3.6-35b-a3b@1' AND capacity_source = 'default'));
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/qwen3.6-35b-a3b@1' AND capacity_source = c_source_default));
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
     UPDATE nexent.model_record_t
        SET capability_profile_version = 'silicon/qwen3.5-397b-a17b@1',
-           capacity_source = CASE WHEN capacity_source = 'default' THEN 'profile' ELSE capacity_source END
+           capacity_source = CASE WHEN capacity_source = c_source_default THEN c_source_profile ELSE capacity_source END
      WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'Qwen'
        AND model_name = 'Qwen3.5-397B-A17B'
-       AND delete_flag = 'N'
+       AND delete_flag = c_active_flag
        AND context_window_tokens = 262144
        AND max_output_tokens = 16384
-       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/qwen3.5-397b-a17b@1' AND capacity_source = 'default'));
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/qwen3.5-397b-a17b@1' AND capacity_source = c_source_default));
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
     UPDATE nexent.model_record_t
        SET capability_profile_version = 'silicon/qwen3.5-122b-a10b@1',
-           capacity_source = CASE WHEN capacity_source = 'default' THEN 'profile' ELSE capacity_source END
+           capacity_source = CASE WHEN capacity_source = c_source_default THEN c_source_profile ELSE capacity_source END
      WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'Qwen'
        AND model_name = 'Qwen3.5-122B-A10B'
-       AND delete_flag = 'N'
+       AND delete_flag = c_active_flag
        AND context_window_tokens = 262144
        AND max_output_tokens = 16384
-       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/qwen3.5-122b-a10b@1' AND capacity_source = 'default'));
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/qwen3.5-122b-a10b@1' AND capacity_source = c_source_default));
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
     UPDATE nexent.model_record_t
        SET capability_profile_version = 'silicon/qwen3.5-35b-a3b@1',
-           capacity_source = CASE WHEN capacity_source = 'default' THEN 'profile' ELSE capacity_source END
+           capacity_source = CASE WHEN capacity_source = c_source_default THEN c_source_profile ELSE capacity_source END
      WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'Qwen'
        AND model_name = 'Qwen3.5-35B-A3B'
-       AND delete_flag = 'N'
+       AND delete_flag = c_active_flag
        AND context_window_tokens = 262144
        AND max_output_tokens = 16384
-       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/qwen3.5-35b-a3b@1' AND capacity_source = 'default'));
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/qwen3.5-35b-a3b@1' AND capacity_source = c_source_default));
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
     UPDATE nexent.model_record_t
        SET capability_profile_version = 'silicon/qwen3.5-27b@1',
-           capacity_source = CASE WHEN capacity_source = 'default' THEN 'profile' ELSE capacity_source END
+           capacity_source = CASE WHEN capacity_source = c_source_default THEN c_source_profile ELSE capacity_source END
      WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'Qwen'
        AND model_name = 'Qwen3.5-27B'
-       AND delete_flag = 'N'
+       AND delete_flag = c_active_flag
        AND context_window_tokens = 262144
        AND max_output_tokens = 16384
-       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/qwen3.5-27b@1' AND capacity_source = 'default'));
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/qwen3.5-27b@1' AND capacity_source = c_source_default));
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
     UPDATE nexent.model_record_t
        SET capability_profile_version = 'silicon/qwen3.5-9b@1',
-           capacity_source = CASE WHEN capacity_source = 'default' THEN 'profile' ELSE capacity_source END
+           capacity_source = CASE WHEN capacity_source = c_source_default THEN c_source_profile ELSE capacity_source END
      WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'Qwen'
        AND model_name = 'Qwen3.5-9B'
-       AND delete_flag = 'N'
+       AND delete_flag = c_active_flag
        AND context_window_tokens = 262144
        AND max_output_tokens = 16384
-       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/qwen3.5-9b@1' AND capacity_source = 'default'));
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/qwen3.5-9b@1' AND capacity_source = c_source_default));
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
     UPDATE nexent.model_record_t
        SET capability_profile_version = 'silicon/qwen3.5-4b@1',
-           capacity_source = CASE WHEN capacity_source = 'default' THEN 'profile' ELSE capacity_source END
+           capacity_source = CASE WHEN capacity_source = c_source_default THEN c_source_profile ELSE capacity_source END
      WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'Qwen'
        AND model_name = 'Qwen3.5-4B'
-       AND delete_flag = 'N'
+       AND delete_flag = c_active_flag
        AND context_window_tokens = 262144
        AND max_output_tokens = 16384
-       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/qwen3.5-4b@1' AND capacity_source = 'default'));
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/qwen3.5-4b@1' AND capacity_source = c_source_default));
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
     UPDATE nexent.model_record_t
        SET capability_profile_version = 'silicon/qwen3-vl-32b-instruct@1',
-           capacity_source = CASE WHEN capacity_source = 'default' THEN 'profile' ELSE capacity_source END
+           capacity_source = CASE WHEN capacity_source = c_source_default THEN c_source_profile ELSE capacity_source END
      WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'Qwen'
        AND model_name = 'Qwen3-VL-32B-Instruct'
-       AND delete_flag = 'N'
+       AND delete_flag = c_active_flag
        AND context_window_tokens = 262144
        AND max_output_tokens = 16384
-       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/qwen3-vl-32b-instruct@1' AND capacity_source = 'default'));
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/qwen3-vl-32b-instruct@1' AND capacity_source = c_source_default));
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
     UPDATE nexent.model_record_t
        SET capability_profile_version = 'silicon/qwen3-vl-32b-thinking@1',
-           capacity_source = CASE WHEN capacity_source = 'default' THEN 'profile' ELSE capacity_source END
+           capacity_source = CASE WHEN capacity_source = c_source_default THEN c_source_profile ELSE capacity_source END
      WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'Qwen'
        AND model_name = 'Qwen3-VL-32B-Thinking'
-       AND delete_flag = 'N'
+       AND delete_flag = c_active_flag
        AND context_window_tokens = 262144
        AND max_output_tokens = 32768
-       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/qwen3-vl-32b-thinking@1' AND capacity_source = 'default'));
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/qwen3-vl-32b-thinking@1' AND capacity_source = c_source_default));
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
     UPDATE nexent.model_record_t
        SET capability_profile_version = 'silicon/qwen3-vl-8b-instruct@1',
-           capacity_source = CASE WHEN capacity_source = 'default' THEN 'profile' ELSE capacity_source END
+           capacity_source = CASE WHEN capacity_source = c_source_default THEN c_source_profile ELSE capacity_source END
      WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'Qwen'
        AND model_name = 'Qwen3-VL-8B-Instruct'
-       AND delete_flag = 'N'
+       AND delete_flag = c_active_flag
        AND context_window_tokens = 262144
        AND max_output_tokens = 16384
-       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/qwen3-vl-8b-instruct@1' AND capacity_source = 'default'));
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/qwen3-vl-8b-instruct@1' AND capacity_source = c_source_default));
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
     UPDATE nexent.model_record_t
        SET capability_profile_version = 'silicon/qwen3-vl-8b-thinking@1',
-           capacity_source = CASE WHEN capacity_source = 'default' THEN 'profile' ELSE capacity_source END
+           capacity_source = CASE WHEN capacity_source = c_source_default THEN c_source_profile ELSE capacity_source END
      WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'Qwen'
        AND model_name = 'Qwen3-VL-8B-Thinking'
-       AND delete_flag = 'N'
+       AND delete_flag = c_active_flag
        AND context_window_tokens = 262144
        AND max_output_tokens = 32768
-       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/qwen3-vl-8b-thinking@1' AND capacity_source = 'default'));
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/qwen3-vl-8b-thinking@1' AND capacity_source = c_source_default));
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
     UPDATE nexent.model_record_t
        SET capability_profile_version = 'silicon/qwen3-vl-30b-a3b-instruct@1',
-           capacity_source = CASE WHEN capacity_source = 'default' THEN 'profile' ELSE capacity_source END
+           capacity_source = CASE WHEN capacity_source = c_source_default THEN c_source_profile ELSE capacity_source END
      WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'Qwen'
        AND model_name = 'Qwen3-VL-30B-A3B-Instruct'
-       AND delete_flag = 'N'
+       AND delete_flag = c_active_flag
        AND context_window_tokens = 262144
        AND max_output_tokens = 16384
-       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/qwen3-vl-30b-a3b-instruct@1' AND capacity_source = 'default'));
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/qwen3-vl-30b-a3b-instruct@1' AND capacity_source = c_source_default));
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
     UPDATE nexent.model_record_t
        SET capability_profile_version = 'silicon/qwen3-vl-30b-a3b-thinking@1',
-           capacity_source = CASE WHEN capacity_source = 'default' THEN 'profile' ELSE capacity_source END
+           capacity_source = CASE WHEN capacity_source = c_source_default THEN c_source_profile ELSE capacity_source END
      WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'Qwen'
        AND model_name = 'Qwen3-VL-30B-A3B-Thinking'
-       AND delete_flag = 'N'
+       AND delete_flag = c_active_flag
        AND context_window_tokens = 262144
        AND max_output_tokens = 32768
-       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/qwen3-vl-30b-a3b-thinking@1' AND capacity_source = 'default'));
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/qwen3-vl-30b-a3b-thinking@1' AND capacity_source = c_source_default));
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
     UPDATE nexent.model_record_t
        SET capability_profile_version = 'silicon/qwen3-omni-30b-a3b-instruct@1',
-           capacity_source = CASE WHEN capacity_source = 'default' THEN 'profile' ELSE capacity_source END
+           capacity_source = CASE WHEN capacity_source = c_source_default THEN c_source_profile ELSE capacity_source END
      WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'Qwen'
        AND model_name = 'Qwen3-Omni-30B-A3B-Instruct'
-       AND delete_flag = 'N'
+       AND delete_flag = c_active_flag
        AND context_window_tokens = 32768
        AND max_output_tokens = 16384
-       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/qwen3-omni-30b-a3b-instruct@1' AND capacity_source = 'default'));
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/qwen3-omni-30b-a3b-instruct@1' AND capacity_source = c_source_default));
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
     UPDATE nexent.model_record_t
        SET capability_profile_version = 'silicon/qwen3-omni-30b-a3b-thinking@1',
-           capacity_source = CASE WHEN capacity_source = 'default' THEN 'profile' ELSE capacity_source END
+           capacity_source = CASE WHEN capacity_source = c_source_default THEN c_source_profile ELSE capacity_source END
      WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'Qwen'
        AND model_name = 'Qwen3-Omni-30B-A3B-Thinking'
-       AND delete_flag = 'N'
+       AND delete_flag = c_active_flag
        AND context_window_tokens = 32768
        AND max_output_tokens = 16384
-       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/qwen3-omni-30b-a3b-thinking@1' AND capacity_source = 'default'));
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/qwen3-omni-30b-a3b-thinking@1' AND capacity_source = c_source_default));
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
     UPDATE nexent.model_record_t
        SET capability_profile_version = 'silicon/qwen3-omni-30b-a3b-captioner@1',
-           capacity_source = CASE WHEN capacity_source = 'default' THEN 'profile' ELSE capacity_source END
+           capacity_source = CASE WHEN capacity_source = c_source_default THEN c_source_profile ELSE capacity_source END
      WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'Qwen'
        AND model_name = 'Qwen3-Omni-30B-A3B-Captioner'
-       AND delete_flag = 'N'
+       AND delete_flag = c_active_flag
        AND context_window_tokens = 32768
        AND max_output_tokens = 16384
-       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/qwen3-omni-30b-a3b-captioner@1' AND capacity_source = 'default'));
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/qwen3-omni-30b-a3b-captioner@1' AND capacity_source = c_source_default));
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
     UPDATE nexent.model_record_t
        SET capability_profile_version = 'silicon/qwen3-coder-30b-a3b-instruct@1',
-           capacity_source = CASE WHEN capacity_source = 'default' THEN 'profile' ELSE capacity_source END
+           capacity_source = CASE WHEN capacity_source = c_source_default THEN c_source_profile ELSE capacity_source END
      WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'Qwen'
        AND model_name = 'Qwen3-Coder-30B-A3B-Instruct'
-       AND delete_flag = 'N'
+       AND delete_flag = c_active_flag
        AND context_window_tokens = 262144
        AND max_output_tokens = 65536
-       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/qwen3-coder-30b-a3b-instruct@1' AND capacity_source = 'default'));
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/qwen3-coder-30b-a3b-instruct@1' AND capacity_source = c_source_default));
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
     UPDATE nexent.model_record_t
        SET capability_profile_version = 'silicon/qwen3-30b-a3b-instruct-2507@1',
-           capacity_source = CASE WHEN capacity_source = 'default' THEN 'profile' ELSE capacity_source END
+           capacity_source = CASE WHEN capacity_source = c_source_default THEN c_source_profile ELSE capacity_source END
      WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'Qwen'
        AND model_name = 'Qwen3-30B-A3B-Instruct-2507'
-       AND delete_flag = 'N'
+       AND delete_flag = c_active_flag
        AND context_window_tokens = 262144
        AND max_output_tokens = 16384
-       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/qwen3-30b-a3b-instruct-2507@1' AND capacity_source = 'default'));
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/qwen3-30b-a3b-instruct-2507@1' AND capacity_source = c_source_default));
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
     UPDATE nexent.model_record_t
        SET capability_profile_version = 'silicon/qwen3-32b@1',
-           capacity_source = CASE WHEN capacity_source = 'default' THEN 'profile' ELSE capacity_source END
+           capacity_source = CASE WHEN capacity_source = c_source_default THEN c_source_profile ELSE capacity_source END
      WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'Qwen'
        AND model_name = 'Qwen3-32B'
-       AND delete_flag = 'N'
+       AND delete_flag = c_active_flag
        AND context_window_tokens = 131072
        AND max_output_tokens = 16384
-       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/qwen3-32b@1' AND capacity_source = 'default'));
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/qwen3-32b@1' AND capacity_source = c_source_default));
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
     UPDATE nexent.model_record_t
        SET capability_profile_version = 'silicon/qwen3-14b@1',
-           capacity_source = CASE WHEN capacity_source = 'default' THEN 'profile' ELSE capacity_source END
+           capacity_source = CASE WHEN capacity_source = c_source_default THEN c_source_profile ELSE capacity_source END
      WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'Qwen'
        AND model_name = 'Qwen3-14B'
-       AND delete_flag = 'N'
+       AND delete_flag = c_active_flag
        AND context_window_tokens = 131072
        AND max_output_tokens = 16384
-       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/qwen3-14b@1' AND capacity_source = 'default'));
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/qwen3-14b@1' AND capacity_source = c_source_default));
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
     UPDATE nexent.model_record_t
        SET capability_profile_version = 'silicon/qwen3-8b@1',
-           capacity_source = CASE WHEN capacity_source = 'default' THEN 'profile' ELSE capacity_source END
+           capacity_source = CASE WHEN capacity_source = c_source_default THEN c_source_profile ELSE capacity_source END
      WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'Qwen'
        AND model_name = 'Qwen3-8B'
-       AND delete_flag = 'N'
+       AND delete_flag = c_active_flag
        AND context_window_tokens = 131072
        AND max_output_tokens = 16384
-       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/qwen3-8b@1' AND capacity_source = 'default'));
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/qwen3-8b@1' AND capacity_source = c_source_default));
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
     UPDATE nexent.model_record_t
        SET capability_profile_version = 'silicon/qwen2.5-72b-instruct-128k@1',
-           capacity_source = CASE WHEN capacity_source = 'default' THEN 'profile' ELSE capacity_source END
+           capacity_source = CASE WHEN capacity_source = c_source_default THEN c_source_profile ELSE capacity_source END
      WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'Qwen'
        AND model_name = 'Qwen2.5-72B-Instruct-128K'
-       AND delete_flag = 'N'
+       AND delete_flag = c_active_flag
        AND context_window_tokens = 131072
        AND max_output_tokens = 8192
-       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/qwen2.5-72b-instruct-128k@1' AND capacity_source = 'default'));
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/qwen2.5-72b-instruct-128k@1' AND capacity_source = c_source_default));
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
     UPDATE nexent.model_record_t
        SET capability_profile_version = 'silicon/qwen2.5-72b-instruct@1',
-           capacity_source = CASE WHEN capacity_source = 'default' THEN 'profile' ELSE capacity_source END
+           capacity_source = CASE WHEN capacity_source = c_source_default THEN c_source_profile ELSE capacity_source END
      WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'Qwen'
        AND model_name = 'Qwen2.5-72B-Instruct'
-       AND delete_flag = 'N'
+       AND delete_flag = c_active_flag
        AND context_window_tokens = 32768
        AND max_output_tokens = 8192
-       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/qwen2.5-72b-instruct@1' AND capacity_source = 'default'));
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/qwen2.5-72b-instruct@1' AND capacity_source = c_source_default));
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
     UPDATE nexent.model_record_t
        SET capability_profile_version = 'silicon/qwen2.5-32b-instruct@1',
-           capacity_source = CASE WHEN capacity_source = 'default' THEN 'profile' ELSE capacity_source END
+           capacity_source = CASE WHEN capacity_source = c_source_default THEN c_source_profile ELSE capacity_source END
      WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'Qwen'
        AND model_name = 'Qwen2.5-32B-Instruct'
-       AND delete_flag = 'N'
+       AND delete_flag = c_active_flag
        AND context_window_tokens = 32768
        AND max_output_tokens = 8192
-       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/qwen2.5-32b-instruct@1' AND capacity_source = 'default'));
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/qwen2.5-32b-instruct@1' AND capacity_source = c_source_default));
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
     UPDATE nexent.model_record_t
        SET capability_profile_version = 'silicon/qwen2.5-14b-instruct@1',
-           capacity_source = CASE WHEN capacity_source = 'default' THEN 'profile' ELSE capacity_source END
+           capacity_source = CASE WHEN capacity_source = c_source_default THEN c_source_profile ELSE capacity_source END
      WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'Qwen'
        AND model_name = 'Qwen2.5-14B-Instruct'
-       AND delete_flag = 'N'
+       AND delete_flag = c_active_flag
        AND context_window_tokens = 32768
        AND max_output_tokens = 8192
-       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/qwen2.5-14b-instruct@1' AND capacity_source = 'default'));
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/qwen2.5-14b-instruct@1' AND capacity_source = c_source_default));
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
     UPDATE nexent.model_record_t
        SET capability_profile_version = 'silicon/qwen2.5-7b-instruct@1',
-           capacity_source = CASE WHEN capacity_source = 'default' THEN 'profile' ELSE capacity_source END
+           capacity_source = CASE WHEN capacity_source = c_source_default THEN c_source_profile ELSE capacity_source END
      WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'Qwen'
        AND model_name = 'Qwen2.5-7B-Instruct'
-       AND delete_flag = 'N'
+       AND delete_flag = c_active_flag
        AND context_window_tokens = 32768
        AND max_output_tokens = 8192
-       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/qwen2.5-7b-instruct@1' AND capacity_source = 'default'));
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/qwen2.5-7b-instruct@1' AND capacity_source = c_source_default));
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
     UPDATE nexent.model_record_t
        SET capability_profile_version = 'silicon/glm-4-32b-0414@1',
-           capacity_source = CASE WHEN capacity_source = 'default' THEN 'profile' ELSE capacity_source END
+           capacity_source = CASE WHEN capacity_source = c_source_default THEN c_source_profile ELSE capacity_source END
      WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'THUDM'
        AND model_name = 'GLM-4-32B-0414'
-       AND delete_flag = 'N'
+       AND delete_flag = c_active_flag
        AND context_window_tokens = 32768
        AND max_output_tokens = 8192
-       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/glm-4-32b-0414@1' AND capacity_source = 'default'));
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/glm-4-32b-0414@1' AND capacity_source = c_source_default));
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
     UPDATE nexent.model_record_t
        SET capability_profile_version = 'silicon/glm-z1-9b-0414@1',
-           capacity_source = CASE WHEN capacity_source = 'default' THEN 'profile' ELSE capacity_source END
+           capacity_source = CASE WHEN capacity_source = c_source_default THEN c_source_profile ELSE capacity_source END
      WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'THUDM'
        AND model_name = 'GLM-Z1-9B-0414'
-       AND delete_flag = 'N'
+       AND delete_flag = c_active_flag
        AND context_window_tokens = 32768
        AND max_output_tokens = 8192
-       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/glm-z1-9b-0414@1' AND capacity_source = 'default'));
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/glm-z1-9b-0414@1' AND capacity_source = c_source_default));
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
     UPDATE nexent.model_record_t
        SET capability_profile_version = 'silicon/glm-4-9b-0414@1',
-           capacity_source = CASE WHEN capacity_source = 'default' THEN 'profile' ELSE capacity_source END
+           capacity_source = CASE WHEN capacity_source = c_source_default THEN c_source_profile ELSE capacity_source END
      WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'THUDM'
        AND model_name = 'GLM-4-9B-0414'
-       AND delete_flag = 'N'
+       AND delete_flag = c_active_flag
        AND context_window_tokens = 32768
        AND max_output_tokens = 8192
-       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/glm-4-9b-0414@1' AND capacity_source = 'default'));
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/glm-4-9b-0414@1' AND capacity_source = c_source_default));
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
     UPDATE nexent.model_record_t
        SET capability_profile_version = 'silicon/glm-5.2@1',
-           capacity_source = CASE WHEN capacity_source = 'default' THEN 'profile' ELSE capacity_source END
+           capacity_source = CASE WHEN capacity_source = c_source_default THEN c_source_profile ELSE capacity_source END
      WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'zai-org'
        AND model_name = 'GLM-5.2'
-       AND delete_flag = 'N'
+       AND delete_flag = c_active_flag
        AND context_window_tokens = 1048576
        AND max_output_tokens = 131072
-       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/glm-5.2@1' AND capacity_source = 'default'));
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/glm-5.2@1' AND capacity_source = c_source_default));
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
     UPDATE nexent.model_record_t
        SET capability_profile_version = 'silicon/glm-4.5v@1',
-           capacity_source = CASE WHEN capacity_source = 'default' THEN 'profile' ELSE capacity_source END
+           capacity_source = CASE WHEN capacity_source = c_source_default THEN c_source_profile ELSE capacity_source END
      WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'zai-org'
        AND model_name = 'GLM-4.5V'
-       AND delete_flag = 'N'
+       AND delete_flag = c_active_flag
        AND context_window_tokens = 131072
        AND max_output_tokens = 16384
-       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/glm-4.5v@1' AND capacity_source = 'default'));
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/glm-4.5v@1' AND capacity_source = c_source_default));
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
     UPDATE nexent.model_record_t
        SET capability_profile_version = 'silicon/glm-4.5-air@1',
-           capacity_source = CASE WHEN capacity_source = 'default' THEN 'profile' ELSE capacity_source END
+           capacity_source = CASE WHEN capacity_source = c_source_default THEN c_source_profile ELSE capacity_source END
      WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'zai-org'
        AND model_name = 'GLM-4.5-Air'
-       AND delete_flag = 'N'
+       AND delete_flag = c_active_flag
        AND context_window_tokens = 131072
        AND max_output_tokens = 16384
-       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/glm-4.5-air@1' AND capacity_source = 'default'));
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/glm-4.5-air@1' AND capacity_source = c_source_default));
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
     UPDATE nexent.model_record_t
        SET capability_profile_version = 'silicon/glm-5.1-pro@1',
-           capacity_source = CASE WHEN capacity_source = 'default' THEN 'profile' ELSE capacity_source END
+           capacity_source = CASE WHEN capacity_source = c_source_default THEN c_source_profile ELSE capacity_source END
      WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'Pro'
        AND model_name = 'zai-org/GLM-5.1'
-       AND delete_flag = 'N'
+       AND delete_flag = c_active_flag
        AND context_window_tokens = 202752
        AND max_output_tokens = 131072
-       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/glm-5.1-pro@1' AND capacity_source = 'default'));
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/glm-5.1-pro@1' AND capacity_source = c_source_default));
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
     UPDATE nexent.model_record_t
        SET capability_profile_version = 'silicon/seed-oss-36b-instruct@1',
-           capacity_source = CASE WHEN capacity_source = 'default' THEN 'profile' ELSE capacity_source END
+           capacity_source = CASE WHEN capacity_source = c_source_default THEN c_source_profile ELSE capacity_source END
      WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'ByteDance-Seed'
        AND model_name = 'Seed-OSS-36B-Instruct'
-       AND delete_flag = 'N'
+       AND delete_flag = c_active_flag
        AND context_window_tokens = 524288
        AND max_output_tokens = 16384
-       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/seed-oss-36b-instruct@1' AND capacity_source = 'default'));
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/seed-oss-36b-instruct@1' AND capacity_source = c_source_default));
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
     UPDATE nexent.model_record_t
        SET capability_profile_version = 'silicon/ling-flash-2.0@1',
-           capacity_source = CASE WHEN capacity_source = 'default' THEN 'profile' ELSE capacity_source END
+           capacity_source = CASE WHEN capacity_source = c_source_default THEN c_source_profile ELSE capacity_source END
      WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'inclusionAI'
        AND model_name = 'Ling-flash-2.0'
-       AND delete_flag = 'N'
+       AND delete_flag = c_active_flag
        AND context_window_tokens = 131072
        AND max_output_tokens = 16384
-       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/ling-flash-2.0@1' AND capacity_source = 'default'));
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/ling-flash-2.0@1' AND capacity_source = c_source_default));
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
     UPDATE nexent.model_record_t
        SET capability_profile_version = 'silicon/ling-mini-2.0@1',
-           capacity_source = CASE WHEN capacity_source = 'default' THEN 'profile' ELSE capacity_source END
+           capacity_source = CASE WHEN capacity_source = c_source_default THEN c_source_profile ELSE capacity_source END
      WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'inclusionAI'
        AND model_name = 'Ling-mini-2.0'
-       AND delete_flag = 'N'
+       AND delete_flag = c_active_flag
        AND context_window_tokens = 131072
        AND max_output_tokens = 16384
-       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/ling-mini-2.0@1' AND capacity_source = 'default'));
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/ling-mini-2.0@1' AND capacity_source = c_source_default));
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
     UPDATE nexent.model_record_t
        SET capability_profile_version = 'silicon/minimax-m2.5@1',
-           capacity_source = CASE WHEN capacity_source = 'default' THEN 'profile' ELSE capacity_source END
+           capacity_source = CASE WHEN capacity_source = c_source_default THEN c_source_profile ELSE capacity_source END
      WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'MiniMaxAI'
        AND model_name = 'MiniMax-M2.5'
-       AND delete_flag = 'N'
+       AND delete_flag = c_active_flag
        AND context_window_tokens = 204800
        AND max_output_tokens = 16384
-       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/minimax-m2.5@1' AND capacity_source = 'default'));
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/minimax-m2.5@1' AND capacity_source = c_source_default));
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
     UPDATE nexent.model_record_t
        SET capability_profile_version = 'silicon/minimax-m2.5-pro@1',
-           capacity_source = CASE WHEN capacity_source = 'default' THEN 'profile' ELSE capacity_source END
+           capacity_source = CASE WHEN capacity_source = c_source_default THEN c_source_profile ELSE capacity_source END
      WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'Pro'
        AND model_name = 'MiniMaxAI/MiniMax-M2.5'
-       AND delete_flag = 'N'
+       AND delete_flag = c_active_flag
        AND context_window_tokens = 204800
        AND max_output_tokens = 16384
-       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/minimax-m2.5-pro@1' AND capacity_source = 'default'));
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/minimax-m2.5-pro@1' AND capacity_source = c_source_default));
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
     UPDATE nexent.model_record_t
        SET capability_profile_version = 'silicon/kimi-k2.7-code@1',
-           capacity_source = CASE WHEN capacity_source = 'default' THEN 'profile' ELSE capacity_source END
+           capacity_source = CASE WHEN capacity_source = c_source_default THEN c_source_profile ELSE capacity_source END
      WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'moonshotai'
        AND model_name = 'Kimi-K2.7-Code'
-       AND delete_flag = 'N'
+       AND delete_flag = c_active_flag
        AND context_window_tokens = 262144
        AND max_output_tokens = 32768
-       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/kimi-k2.7-code@1' AND capacity_source = 'default'));
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/kimi-k2.7-code@1' AND capacity_source = c_source_default));
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
     UPDATE nexent.model_record_t
        SET capability_profile_version = 'silicon/nex-n2-pro@1',
-           capacity_source = CASE WHEN capacity_source = 'default' THEN 'profile' ELSE capacity_source END
+           capacity_source = CASE WHEN capacity_source = c_source_default THEN c_source_profile ELSE capacity_source END
      WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'nex-agi'
        AND model_name = 'Nex-N2-Pro'
-       AND delete_flag = 'N'
+       AND delete_flag = c_active_flag
        AND context_window_tokens = 262144
        AND max_output_tokens = 16384
-       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/nex-n2-pro@1' AND capacity_source = 'default'));
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/nex-n2-pro@1' AND capacity_source = c_source_default));
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
     UPDATE nexent.model_record_t
        SET capability_profile_version = 'silicon/step-3.5-flash@1',
-           capacity_source = CASE WHEN capacity_source = 'default' THEN 'profile' ELSE capacity_source END
+           capacity_source = CASE WHEN capacity_source = c_source_default THEN c_source_profile ELSE capacity_source END
      WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'stepfun-ai'
        AND model_name = 'Step-3.5-Flash'
-       AND delete_flag = 'N'
+       AND delete_flag = c_active_flag
        AND context_window_tokens = 262144
        AND max_output_tokens = 16384
-       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/step-3.5-flash@1' AND capacity_source = 'default'));
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/step-3.5-flash@1' AND capacity_source = c_source_default));
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
     UPDATE nexent.model_record_t
        SET capability_profile_version = 'silicon/hunyuan-mt-7b@1',
-           capacity_source = CASE WHEN capacity_source = 'default' THEN 'profile' ELSE capacity_source END
+           capacity_source = CASE WHEN capacity_source = c_source_default THEN c_source_profile ELSE capacity_source END
      WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'tencent'
        AND model_name = 'Hunyuan-MT-7B'
-       AND delete_flag = 'N'
+       AND delete_flag = c_active_flag
        AND context_window_tokens = 32768
        AND max_output_tokens = 2048
-       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/hunyuan-mt-7b@1' AND capacity_source = 'default'));
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/hunyuan-mt-7b@1' AND capacity_source = c_source_default));
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
     UPDATE nexent.model_record_t
        SET capability_profile_version = 'silicon/hunyuan-a13b-instruct@1',
-           capacity_source = CASE WHEN capacity_source = 'default' THEN 'profile' ELSE capacity_source END
+           capacity_source = CASE WHEN capacity_source = c_source_default THEN c_source_profile ELSE capacity_source END
      WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'tencent'
        AND model_name = 'Hunyuan-A13B-Instruct'
-       AND delete_flag = 'N'
+       AND delete_flag = c_active_flag
        AND context_window_tokens = 131072
        AND max_output_tokens = 8192
-       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/hunyuan-a13b-instruct@1' AND capacity_source = 'default'));
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/hunyuan-a13b-instruct@1' AND capacity_source = c_source_default));
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
@@ -2051,6 +2056,8 @@ END $$;
 DO $$
 DECLARE
     v_updated INTEGER := 0;
+    c_active_flag     CONSTANT TEXT := 'N';
+    c_source_default  CONSTANT TEXT := 'default';
 BEGIN
     UPDATE nexent.model_record_t
        SET context_window_tokens = COALESCE(context_window_tokens,
@@ -2059,8 +2066,8 @@ BEGIN
            LEAST(4096, COALESCE(context_window_tokens, 32768) - 1)),
            default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
            LEAST(4096, COALESCE(max_output_tokens, 4096))),
-           capacity_source = COALESCE(capacity_source, 'default')
-     WHERE delete_flag = 'N'
+           capacity_source = COALESCE(capacity_source, c_source_default)
+     WHERE delete_flag = c_active_flag
        AND COALESCE(model_type, 'llm') IN ('llm', 'vlm')
        AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
 
@@ -2075,10 +2082,11 @@ END $$;
 DO $$
 DECLARE
     v_updated INTEGER := 0;
+    c_active_flag     CONSTANT TEXT := 'N';
 BEGIN
     UPDATE nexent.model_record_t
        SET default_output_reserve_tokens = max_output_tokens
-     WHERE delete_flag = 'N'
+     WHERE delete_flag = c_active_flag
        AND default_output_reserve_tokens IS NOT NULL
        AND max_output_tokens IS NOT NULL
        AND default_output_reserve_tokens > max_output_tokens;
diff --git a/scripts/generate_backfill_sql.py b/scripts/generate_backfill_sql.py
index d246852f4..e68910488 100644
--- a/scripts/generate_backfill_sql.py
+++ b/scripts/generate_backfill_sql.py
@@ -128,6 +128,8 @@ def main() -> None:
     lines.append("DECLARE")
     lines.append("    v_updated INTEGER := 0;")
     lines.append("    v_total   INTEGER := 0;")
+    lines.append("    c_active_flag     CONSTANT TEXT := 'N';")
+    lines.append("    c_source_profile  CONSTANT TEXT := 'profile';")
     lines.append("BEGIN")
 
     for provider in sorted(by_provider.keys()):
@@ -149,12 +151,12 @@ def main() -> None:
             lines.append(f"           LEAST({_sql_int(mout)}, COALESCE(context_window_tokens, {_sql_int(ctx)}) - 1)),")
             lines.append(f"           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,")
             lines.append(f"           LEAST({_sql_int(reserve)}, COALESCE(max_output_tokens, {_sql_int(mout)}))),")
-            lines.append(f"           capacity_source = COALESCE(capacity_source, 'profile'),")
+            lines.append(f"           capacity_source = COALESCE(capacity_source, c_source_profile),")
             lines.append(f"           capability_profile_version = COALESCE(capability_profile_version, '{version}')")
             lines.append(f"     WHERE LOWER(model_factory) = '{_sql_str(provider.lower())}'")
             lines.append(f"       AND {repo_match}")
             lines.append(f"       AND model_name = '{escaped_name}'")
-            lines.append(f"       AND delete_flag = 'N'")
+            lines.append(f"       AND delete_flag = c_active_flag")
             lines.append(f"       AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);")
             lines.append(f"    GET DIAGNOSTICS v_updated = ROW_COUNT;")
             lines.append(f"    v_total := v_total + v_updated;")
@@ -180,6 +182,9 @@ def main() -> None:
     lines.append("DECLARE")
     lines.append("    v_updated INTEGER := 0;")
     lines.append("    v_total   INTEGER := 0;")
+    lines.append("    c_active_flag     CONSTANT TEXT := 'N';")
+    lines.append("    c_source_default  CONSTANT TEXT := 'default';")
+    lines.append("    c_source_profile  CONSTANT TEXT := 'profile';")
     lines.append("BEGIN")
 
     for provider in sorted(by_provider.keys()):
@@ -195,14 +200,14 @@ def main() -> None:
 
             lines.append(f"    UPDATE nexent.model_record_t")
             lines.append(f"       SET capability_profile_version = '{version}',")
-            lines.append(f"           capacity_source = CASE WHEN capacity_source = 'default' THEN 'profile' ELSE capacity_source END")
+            lines.append(f"           capacity_source = CASE WHEN capacity_source = c_source_default THEN c_source_profile ELSE capacity_source END")
             lines.append(f"     WHERE LOWER(model_factory) = '{_sql_str(provider.lower())}'")
             lines.append(f"       AND {repo_match}")
             lines.append(f"       AND model_name = '{escaped_name}'")
-            lines.append(f"       AND delete_flag = 'N'")
+            lines.append(f"       AND delete_flag = c_active_flag")
             lines.append(f"       AND context_window_tokens = {_sql_int(ctx)}")
             lines.append(f"       AND max_output_tokens = {_sql_int(mout)}")
-            lines.append(f"       AND (capability_profile_version IS NULL OR (capability_profile_version = '{version}' AND capacity_source = 'default'));")
+            lines.append(f"       AND (capability_profile_version IS NULL OR (capability_profile_version = '{version}' AND capacity_source = c_source_default));")
             lines.append(f"    GET DIAGNOSTICS v_updated = ROW_COUNT;")
             lines.append(f"    v_total := v_total + v_updated;")
             lines.append("")
@@ -219,6 +224,8 @@ def main() -> None:
     lines.append("DO $$")
     lines.append("DECLARE")
     lines.append("    v_updated INTEGER := 0;")
+    lines.append("    c_active_flag     CONSTANT TEXT := 'N';")
+    lines.append("    c_source_default  CONSTANT TEXT := 'default';")
     lines.append("BEGIN")
     lines.append("    UPDATE nexent.model_record_t")
     lines.append(f"       SET context_window_tokens = COALESCE(context_window_tokens,")
@@ -227,8 +234,8 @@ def main() -> None:
     lines.append(f"           LEAST({_sql_int(DEFAULT_MAX_OUTPUT)}, COALESCE(context_window_tokens, {_sql_int(DEFAULT_CONTEXT_WINDOW)}) - 1)),")
     lines.append(f"           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,")
     lines.append(f"           LEAST({_sql_int(DEFAULT_RESERVE)}, COALESCE(max_output_tokens, {_sql_int(DEFAULT_MAX_OUTPUT)}))),")
-    lines.append(f"           capacity_source = COALESCE(capacity_source, 'default')")
-    lines.append("     WHERE delete_flag = 'N'")
+    lines.append(f"           capacity_source = COALESCE(capacity_source, c_source_default)")
+    lines.append("     WHERE delete_flag = c_active_flag")
     lines.append("       AND COALESCE(model_type, 'llm') IN ('llm', 'vlm')")
     lines.append("       AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);")
     lines.append("")
@@ -245,10 +252,11 @@ def main() -> None:
     lines.append("DO $$")
     lines.append("DECLARE")
     lines.append("    v_updated INTEGER := 0;")
+    lines.append("    c_active_flag     CONSTANT TEXT := 'N';")
     lines.append("BEGIN")
     lines.append("    UPDATE nexent.model_record_t")
     lines.append("       SET default_output_reserve_tokens = max_output_tokens")
-    lines.append("     WHERE delete_flag = 'N'")
+    lines.append("     WHERE delete_flag = c_active_flag")
     lines.append("       AND default_output_reserve_tokens IS NOT NULL")
     lines.append("       AND max_output_tokens IS NOT NULL")
     lines.append("       AND default_output_reserve_tokens > max_output_tokens;")

From 9769a656d27910040737a19625c852a6bec4800c Mon Sep 17 00:00:00 2001
From: wuyuanfr <18270469842@163.com>
Date: Sat, 27 Jun 2026 15:13:48 +0800
Subject: [PATCH 24/31] fix(test): use model_factory instead of provider in
 accept signal test

The test was asserting against payload['provider'] which is not a
ModelRequest field. The app layer uses request.model_factory (default
'OpenAI-API-Compatible'), so the assertion failed.

Fix: explicitly set model_factory in the payload and assert against it.
---
 test/backend/app/test_model_managment_app.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/test/backend/app/test_model_managment_app.py b/test/backend/app/test_model_managment_app.py
index 75fa41dd6..8def4d3e2 100644
--- a/test/backend/app/test_model_managment_app.py
+++ b/test/backend/app/test_model_managment_app.py
@@ -307,6 +307,7 @@ async def _create(*args, **kwargs):
 
     payload = {
         **sample_model_data,
+        "model_factory": "huggingface",
         "context_window_tokens": 128000,
         "max_output_tokens": 16384,
         "capacity_source": "operator",
@@ -327,7 +328,7 @@ async def _create(*args, **kwargs):
     assert sent["max_output_tokens"] == 16384
 
     # Metric recorder called with the labels the SLO dashboard expects.
-    mock_record.assert_called_once_with("catalog_exact", payload["provider"])
+    mock_record.assert_called_once_with("catalog_exact", payload["model_factory"])
 
 
 @pytest.mark.asyncio

From 212a0dd9ae772f8ef796a03e30fb04468da40534 Mon Sep 17 00:00:00 2001
From: wuyuanfr <18270469842@163.com>
Date: Sat, 27 Jun 2026 15:34:01 +0800
Subject: [PATCH 25/31] fix(catalog): use 'silicon' provider for
 SiliconFlow-hosted DeepSeek models

The 11 DeepSeek models hosted on SiliconFlow were incorrectly using
'deepseek' as the catalog key provider. When operators add these models
via SiliconFlow provider browser, DB stores model_factory='silicon',
so migration SQL WHERE LOWER(model_factory)='deepseek' never matched.

Changed catalog key from ('deepseek', 'deepseek-ai/...') to
('silicon', 'deepseek-ai/...') for all 11 SiliconFlow-hosted entries.
Updated capability_profile_version prefix from 'deepseek/' to 'silicon/'.

Kept tokenizer_family='deepseek' (tokenizer identifier, not provider).

Original 4 DeepSeek official API entries (deepseek-chat, deepseek-reasoner,
deepseek-v4-flash, deepseek-v4-pro) remain unchanged with provider='deepseek'.
---
 backend/consts/capability_profiles.py         |  67 ++--
 .../v2.2.2_0627_backfill_from_catalog.sql     | 362 +++++++++---------
 2 files changed, 215 insertions(+), 214 deletions(-)

diff --git a/backend/consts/capability_profiles.py b/backend/consts/capability_profiles.py
index 7c7d406d5..2791c7791 100644
--- a/backend/consts/capability_profiles.py
+++ b/backend/consts/capability_profiles.py
@@ -167,110 +167,111 @@
     # the standard tier; Pro/ prefixed names are the premium tier with the
     # same model weights but higher throughput. Specs from DeepSeek API docs
     # (api-docs.deepseek.com) and SiliconFlow pricing.
-    ("deepseek", "deepseek-ai/DeepSeek-V4-Pro"): CapabilityProfile(
-        provider="deepseek",
+    # Catalog key uses "silicon" (hosting provider / DB model_factory), not "deepseek" (model vendor).
+    ("silicon", "deepseek-ai/DeepSeek-V4-Pro"): CapabilityProfile(
+        provider="silicon",
         model_name="deepseek-ai/DeepSeek-V4-Pro",
-        capability_profile_version="deepseek/deepseek-v4-pro-sf@1",
+        capability_profile_version="silicon/deepseek-v4-pro-sf@1",
         window_shape="combined",
         context_window_tokens=1_048_576,
         max_output_tokens=384_000,
         default_output_reserve_tokens=8_192,
         tokenizer_family="deepseek",
     ),
-    ("deepseek", "deepseek-ai/DeepSeek-V4-Flash"): CapabilityProfile(
-        provider="deepseek",
+    ("silicon", "deepseek-ai/DeepSeek-V4-Flash"): CapabilityProfile(
+        provider="silicon",
         model_name="deepseek-ai/DeepSeek-V4-Flash",
-        capability_profile_version="deepseek/deepseek-v4-flash-sf@1",
+        capability_profile_version="silicon/deepseek-v4-flash-sf@1",
         window_shape="combined",
         context_window_tokens=1_048_576,
         max_output_tokens=384_000,
         default_output_reserve_tokens=8_192,
         tokenizer_family="deepseek",
     ),
-    ("deepseek", "deepseek-ai/DeepSeek-V3.2"): CapabilityProfile(
-        provider="deepseek",
+    ("silicon", "deepseek-ai/DeepSeek-V3.2"): CapabilityProfile(
+        provider="silicon",
         model_name="deepseek-ai/DeepSeek-V3.2",
-        capability_profile_version="deepseek/deepseek-v3.2@1",
+        capability_profile_version="silicon/deepseek-v3.2@1",
         window_shape="combined",
         context_window_tokens=164_000,
         max_output_tokens=8_192,
         default_output_reserve_tokens=4_096,
         tokenizer_family="deepseek",
     ),
-    ("deepseek", "deepseek-ai/DeepSeek-V3.1-Terminus"): CapabilityProfile(
-        provider="deepseek",
+    ("silicon", "deepseek-ai/DeepSeek-V3.1-Terminus"): CapabilityProfile(
+        provider="silicon",
         model_name="deepseek-ai/DeepSeek-V3.1-Terminus",
-        capability_profile_version="deepseek/deepseek-v3.1-terminus@1",
+        capability_profile_version="silicon/deepseek-v3.1-terminus@1",
         window_shape="combined",
         context_window_tokens=164_000,
         max_output_tokens=8_192,
         default_output_reserve_tokens=4_096,
         tokenizer_family="deepseek",
     ),
-    ("deepseek", "deepseek-ai/DeepSeek-R1"): CapabilityProfile(
-        provider="deepseek",
+    ("silicon", "deepseek-ai/DeepSeek-R1"): CapabilityProfile(
+        provider="silicon",
         model_name="deepseek-ai/DeepSeek-R1",
-        capability_profile_version="deepseek/deepseek-r1@1",
+        capability_profile_version="silicon/deepseek-r1@1",
         window_shape="combined",
         context_window_tokens=163_840,
         max_output_tokens=16_384,
         default_output_reserve_tokens=8_192,
         tokenizer_family="deepseek",
     ),
-    ("deepseek", "deepseek-ai/DeepSeek-V3"): CapabilityProfile(
-        provider="deepseek",
+    ("silicon", "deepseek-ai/DeepSeek-V3"): CapabilityProfile(
+        provider="silicon",
         model_name="deepseek-ai/DeepSeek-V3",
-        capability_profile_version="deepseek/deepseek-v3@1",
+        capability_profile_version="silicon/deepseek-v3@1",
         window_shape="combined",
         context_window_tokens=164_000,
         max_output_tokens=8_192,
         default_output_reserve_tokens=4_096,
         tokenizer_family="deepseek",
     ),
-    ("deepseek", "deepseek-ai/DeepSeek-R1-0528-Qwen3-8B"): CapabilityProfile(
-        provider="deepseek",
+    ("silicon", "deepseek-ai/DeepSeek-R1-0528-Qwen3-8B"): CapabilityProfile(
+        provider="silicon",
         model_name="deepseek-ai/DeepSeek-R1-0528-Qwen3-8B",
-        capability_profile_version="deepseek/deepseek-r1-0528-qwen3-8b@1",
+        capability_profile_version="silicon/deepseek-r1-0528-qwen3-8b@1",
         window_shape="combined",
         context_window_tokens=131_072,
         max_output_tokens=16_384,
         default_output_reserve_tokens=4_096,
         tokenizer_family="deepseek",
     ),
-    ("deepseek", "Pro/deepseek-ai/DeepSeek-V3.2"): CapabilityProfile(
-        provider="deepseek",
+    ("silicon", "Pro/deepseek-ai/DeepSeek-V3.2"): CapabilityProfile(
+        provider="silicon",
         model_name="Pro/deepseek-ai/DeepSeek-V3.2",
-        capability_profile_version="deepseek/deepseek-v3.2-pro@1",
+        capability_profile_version="silicon/deepseek-v3.2-pro@1",
         window_shape="combined",
         context_window_tokens=164_000,
         max_output_tokens=8_192,
         default_output_reserve_tokens=4_096,
         tokenizer_family="deepseek",
     ),
-    ("deepseek", "Pro/deepseek-ai/DeepSeek-V3.1-Terminus"): CapabilityProfile(
-        provider="deepseek",
+    ("silicon", "Pro/deepseek-ai/DeepSeek-V3.1-Terminus"): CapabilityProfile(
+        provider="silicon",
         model_name="Pro/deepseek-ai/DeepSeek-V3.1-Terminus",
-        capability_profile_version="deepseek/deepseek-v3.1-terminus-pro@1",
+        capability_profile_version="silicon/deepseek-v3.1-terminus-pro@1",
         window_shape="combined",
         context_window_tokens=164_000,
         max_output_tokens=8_192,
         default_output_reserve_tokens=4_096,
         tokenizer_family="deepseek",
     ),
-    ("deepseek", "Pro/deepseek-ai/DeepSeek-R1"): CapabilityProfile(
-        provider="deepseek",
+    ("silicon", "Pro/deepseek-ai/DeepSeek-R1"): CapabilityProfile(
+        provider="silicon",
         model_name="Pro/deepseek-ai/DeepSeek-R1",
-        capability_profile_version="deepseek/deepseek-r1-pro@1",
+        capability_profile_version="silicon/deepseek-r1-pro@1",
         window_shape="combined",
         context_window_tokens=163_840,
         max_output_tokens=16_384,
         default_output_reserve_tokens=8_192,
         tokenizer_family="deepseek",
     ),
-    ("deepseek", "Pro/deepseek-ai/DeepSeek-V3"): CapabilityProfile(
-        provider="deepseek",
+    ("silicon", "Pro/deepseek-ai/DeepSeek-V3"): CapabilityProfile(
+        provider="silicon",
         model_name="Pro/deepseek-ai/DeepSeek-V3",
-        capability_profile_version="deepseek/deepseek-v3-pro@1",
+        capability_profile_version="silicon/deepseek-v3-pro@1",
         window_shape="combined",
         context_window_tokens=164_000,
         max_output_tokens=8_192,
diff --git a/deploy/sql/migrations/v2.2.2_0627_backfill_from_catalog.sql b/deploy/sql/migrations/v2.2.2_0627_backfill_from_catalog.sql
index 170416b6a..7d3ab54b4 100644
--- a/deploy/sql/migrations/v2.2.2_0627_backfill_from_catalog.sql
+++ b/deploy/sql/migrations/v2.2.2_0627_backfill_from_catalog.sql
@@ -108,7 +108,7 @@ BEGIN
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
-    -- deepseek (15 entries)
+    -- deepseek (4 entries)
     UPDATE nexent.model_record_t
        SET context_window_tokens = COALESCE(context_window_tokens,
            GREATEST(1000000, COALESCE(max_output_tokens, 0) + 1)),
@@ -177,6 +177,76 @@ BEGIN
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
+    -- openai (2 entries)
+    UPDATE nexent.model_record_t
+       SET context_window_tokens = COALESCE(context_window_tokens,
+           GREATEST(128000, COALESCE(max_output_tokens, 0) + 1)),
+           max_output_tokens = COALESCE(max_output_tokens,
+           LEAST(16384, COALESCE(context_window_tokens, 128000) - 1)),
+           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
+           LEAST(4096, COALESCE(max_output_tokens, 16384))),
+           capacity_source = COALESCE(capacity_source, c_source_profile),
+           capability_profile_version = COALESCE(capability_profile_version, 'openai/gpt-4o@1')
+     WHERE LOWER(model_factory) = 'openai'
+       AND (model_repo IS NULL OR model_repo = '')
+       AND model_name = 'gpt-4o'
+       AND delete_flag = c_active_flag
+       AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET context_window_tokens = COALESCE(context_window_tokens,
+           GREATEST(1000000, COALESCE(max_output_tokens, 0) + 1)),
+           max_output_tokens = COALESCE(max_output_tokens,
+           LEAST(32768, COALESCE(context_window_tokens, 1000000) - 1)),
+           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
+           LEAST(8192, COALESCE(max_output_tokens, 32768))),
+           capacity_source = COALESCE(capacity_source, c_source_profile),
+           capability_profile_version = COALESCE(capability_profile_version, 'openai/gpt-4.1@1')
+     WHERE LOWER(model_factory) = 'openai'
+       AND (model_repo IS NULL OR model_repo = '')
+       AND model_name = 'gpt-4.1'
+       AND delete_flag = c_active_flag
+       AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    -- silicon (56 entries)
+    UPDATE nexent.model_record_t
+       SET context_window_tokens = COALESCE(context_window_tokens,
+           GREATEST(262144, COALESCE(max_output_tokens, 0) + 1)),
+           max_output_tokens = COALESCE(max_output_tokens,
+           LEAST(65536, COALESCE(context_window_tokens, 262144) - 1)),
+           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
+           LEAST(8192, COALESCE(max_output_tokens, 65536))),
+           capacity_source = COALESCE(capacity_source, c_source_profile),
+           capability_profile_version = COALESCE(capability_profile_version, 'silicon/qwen3.6-27b@1')
+     WHERE LOWER(model_factory) = 'silicon'
+       AND model_repo = 'Qwen'
+       AND model_name = 'Qwen3.6-27B'
+       AND delete_flag = c_active_flag
+       AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET context_window_tokens = COALESCE(context_window_tokens,
+           GREATEST(262144, COALESCE(max_output_tokens, 0) + 1)),
+           max_output_tokens = COALESCE(max_output_tokens,
+           LEAST(131072, COALESCE(context_window_tokens, 262144) - 1)),
+           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
+           LEAST(8192, COALESCE(max_output_tokens, 131072))),
+           capacity_source = COALESCE(capacity_source, c_source_profile),
+           capability_profile_version = COALESCE(capability_profile_version, 'silicon/kimi-k2.6@1')
+     WHERE LOWER(model_factory) = 'silicon'
+       AND model_repo = 'Pro'
+       AND model_name = 'moonshotai/Kimi-K2.6'
+       AND delete_flag = c_active_flag
+       AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
     UPDATE nexent.model_record_t
        SET context_window_tokens = COALESCE(context_window_tokens,
            GREATEST(1048576, COALESCE(max_output_tokens, 0) + 1)),
@@ -185,8 +255,8 @@ BEGIN
            default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
            LEAST(8192, COALESCE(max_output_tokens, 384000))),
            capacity_source = COALESCE(capacity_source, c_source_profile),
-           capability_profile_version = COALESCE(capability_profile_version, 'deepseek/deepseek-v4-pro-sf@1')
-     WHERE LOWER(model_factory) = 'deepseek'
+           capability_profile_version = COALESCE(capability_profile_version, 'silicon/deepseek-v4-pro-sf@1')
+     WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'deepseek-ai'
        AND model_name = 'DeepSeek-V4-Pro'
        AND delete_flag = c_active_flag
@@ -202,8 +272,8 @@ BEGIN
            default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
            LEAST(8192, COALESCE(max_output_tokens, 384000))),
            capacity_source = COALESCE(capacity_source, c_source_profile),
-           capability_profile_version = COALESCE(capability_profile_version, 'deepseek/deepseek-v4-flash-sf@1')
-     WHERE LOWER(model_factory) = 'deepseek'
+           capability_profile_version = COALESCE(capability_profile_version, 'silicon/deepseek-v4-flash-sf@1')
+     WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'deepseek-ai'
        AND model_name = 'DeepSeek-V4-Flash'
        AND delete_flag = c_active_flag
@@ -219,8 +289,8 @@ BEGIN
            default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
            LEAST(4096, COALESCE(max_output_tokens, 8192))),
            capacity_source = COALESCE(capacity_source, c_source_profile),
-           capability_profile_version = COALESCE(capability_profile_version, 'deepseek/deepseek-v3.2@1')
-     WHERE LOWER(model_factory) = 'deepseek'
+           capability_profile_version = COALESCE(capability_profile_version, 'silicon/deepseek-v3.2@1')
+     WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'deepseek-ai'
        AND model_name = 'DeepSeek-V3.2'
        AND delete_flag = c_active_flag
@@ -236,8 +306,8 @@ BEGIN
            default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
            LEAST(4096, COALESCE(max_output_tokens, 8192))),
            capacity_source = COALESCE(capacity_source, c_source_profile),
-           capability_profile_version = COALESCE(capability_profile_version, 'deepseek/deepseek-v3.1-terminus@1')
-     WHERE LOWER(model_factory) = 'deepseek'
+           capability_profile_version = COALESCE(capability_profile_version, 'silicon/deepseek-v3.1-terminus@1')
+     WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'deepseek-ai'
        AND model_name = 'DeepSeek-V3.1-Terminus'
        AND delete_flag = c_active_flag
@@ -253,8 +323,8 @@ BEGIN
            default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
            LEAST(8192, COALESCE(max_output_tokens, 16384))),
            capacity_source = COALESCE(capacity_source, c_source_profile),
-           capability_profile_version = COALESCE(capability_profile_version, 'deepseek/deepseek-r1@1')
-     WHERE LOWER(model_factory) = 'deepseek'
+           capability_profile_version = COALESCE(capability_profile_version, 'silicon/deepseek-r1@1')
+     WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'deepseek-ai'
        AND model_name = 'DeepSeek-R1'
        AND delete_flag = c_active_flag
@@ -270,8 +340,8 @@ BEGIN
            default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
            LEAST(4096, COALESCE(max_output_tokens, 8192))),
            capacity_source = COALESCE(capacity_source, c_source_profile),
-           capability_profile_version = COALESCE(capability_profile_version, 'deepseek/deepseek-v3@1')
-     WHERE LOWER(model_factory) = 'deepseek'
+           capability_profile_version = COALESCE(capability_profile_version, 'silicon/deepseek-v3@1')
+     WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'deepseek-ai'
        AND model_name = 'DeepSeek-V3'
        AND delete_flag = c_active_flag
@@ -287,8 +357,8 @@ BEGIN
            default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
            LEAST(4096, COALESCE(max_output_tokens, 16384))),
            capacity_source = COALESCE(capacity_source, c_source_profile),
-           capability_profile_version = COALESCE(capability_profile_version, 'deepseek/deepseek-r1-0528-qwen3-8b@1')
-     WHERE LOWER(model_factory) = 'deepseek'
+           capability_profile_version = COALESCE(capability_profile_version, 'silicon/deepseek-r1-0528-qwen3-8b@1')
+     WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'deepseek-ai'
        AND model_name = 'DeepSeek-R1-0528-Qwen3-8B'
        AND delete_flag = c_active_flag
@@ -304,8 +374,8 @@ BEGIN
            default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
            LEAST(4096, COALESCE(max_output_tokens, 8192))),
            capacity_source = COALESCE(capacity_source, c_source_profile),
-           capability_profile_version = COALESCE(capability_profile_version, 'deepseek/deepseek-v3.2-pro@1')
-     WHERE LOWER(model_factory) = 'deepseek'
+           capability_profile_version = COALESCE(capability_profile_version, 'silicon/deepseek-v3.2-pro@1')
+     WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'Pro'
        AND model_name = 'deepseek-ai/DeepSeek-V3.2'
        AND delete_flag = c_active_flag
@@ -321,8 +391,8 @@ BEGIN
            default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
            LEAST(4096, COALESCE(max_output_tokens, 8192))),
            capacity_source = COALESCE(capacity_source, c_source_profile),
-           capability_profile_version = COALESCE(capability_profile_version, 'deepseek/deepseek-v3.1-terminus-pro@1')
-     WHERE LOWER(model_factory) = 'deepseek'
+           capability_profile_version = COALESCE(capability_profile_version, 'silicon/deepseek-v3.1-terminus-pro@1')
+     WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'Pro'
        AND model_name = 'deepseek-ai/DeepSeek-V3.1-Terminus'
        AND delete_flag = c_active_flag
@@ -338,8 +408,8 @@ BEGIN
            default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
            LEAST(8192, COALESCE(max_output_tokens, 16384))),
            capacity_source = COALESCE(capacity_source, c_source_profile),
-           capability_profile_version = COALESCE(capability_profile_version, 'deepseek/deepseek-r1-pro@1')
-     WHERE LOWER(model_factory) = 'deepseek'
+           capability_profile_version = COALESCE(capability_profile_version, 'silicon/deepseek-r1-pro@1')
+     WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'Pro'
        AND model_name = 'deepseek-ai/DeepSeek-R1'
        AND delete_flag = c_active_flag
@@ -355,80 +425,10 @@ BEGIN
            default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
            LEAST(4096, COALESCE(max_output_tokens, 8192))),
            capacity_source = COALESCE(capacity_source, c_source_profile),
-           capability_profile_version = COALESCE(capability_profile_version, 'deepseek/deepseek-v3-pro@1')
-     WHERE LOWER(model_factory) = 'deepseek'
-       AND model_repo = 'Pro'
-       AND model_name = 'deepseek-ai/DeepSeek-V3'
-       AND delete_flag = c_active_flag
-       AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
-    GET DIAGNOSTICS v_updated = ROW_COUNT;
-    v_total := v_total + v_updated;
-
-    -- openai (2 entries)
-    UPDATE nexent.model_record_t
-       SET context_window_tokens = COALESCE(context_window_tokens,
-           GREATEST(128000, COALESCE(max_output_tokens, 0) + 1)),
-           max_output_tokens = COALESCE(max_output_tokens,
-           LEAST(16384, COALESCE(context_window_tokens, 128000) - 1)),
-           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
-           LEAST(4096, COALESCE(max_output_tokens, 16384))),
-           capacity_source = COALESCE(capacity_source, c_source_profile),
-           capability_profile_version = COALESCE(capability_profile_version, 'openai/gpt-4o@1')
-     WHERE LOWER(model_factory) = 'openai'
-       AND (model_repo IS NULL OR model_repo = '')
-       AND model_name = 'gpt-4o'
-       AND delete_flag = c_active_flag
-       AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
-    GET DIAGNOSTICS v_updated = ROW_COUNT;
-    v_total := v_total + v_updated;
-
-    UPDATE nexent.model_record_t
-       SET context_window_tokens = COALESCE(context_window_tokens,
-           GREATEST(1000000, COALESCE(max_output_tokens, 0) + 1)),
-           max_output_tokens = COALESCE(max_output_tokens,
-           LEAST(32768, COALESCE(context_window_tokens, 1000000) - 1)),
-           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
-           LEAST(8192, COALESCE(max_output_tokens, 32768))),
-           capacity_source = COALESCE(capacity_source, c_source_profile),
-           capability_profile_version = COALESCE(capability_profile_version, 'openai/gpt-4.1@1')
-     WHERE LOWER(model_factory) = 'openai'
-       AND (model_repo IS NULL OR model_repo = '')
-       AND model_name = 'gpt-4.1'
-       AND delete_flag = c_active_flag
-       AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
-    GET DIAGNOSTICS v_updated = ROW_COUNT;
-    v_total := v_total + v_updated;
-
-    -- silicon (45 entries)
-    UPDATE nexent.model_record_t
-       SET context_window_tokens = COALESCE(context_window_tokens,
-           GREATEST(262144, COALESCE(max_output_tokens, 0) + 1)),
-           max_output_tokens = COALESCE(max_output_tokens,
-           LEAST(65536, COALESCE(context_window_tokens, 262144) - 1)),
-           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
-           LEAST(8192, COALESCE(max_output_tokens, 65536))),
-           capacity_source = COALESCE(capacity_source, c_source_profile),
-           capability_profile_version = COALESCE(capability_profile_version, 'silicon/qwen3.6-27b@1')
-     WHERE LOWER(model_factory) = 'silicon'
-       AND model_repo = 'Qwen'
-       AND model_name = 'Qwen3.6-27B'
-       AND delete_flag = c_active_flag
-       AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
-    GET DIAGNOSTICS v_updated = ROW_COUNT;
-    v_total := v_total + v_updated;
-
-    UPDATE nexent.model_record_t
-       SET context_window_tokens = COALESCE(context_window_tokens,
-           GREATEST(262144, COALESCE(max_output_tokens, 0) + 1)),
-           max_output_tokens = COALESCE(max_output_tokens,
-           LEAST(131072, COALESCE(context_window_tokens, 262144) - 1)),
-           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,
-           LEAST(8192, COALESCE(max_output_tokens, 131072))),
-           capacity_source = COALESCE(capacity_source, c_source_profile),
-           capability_profile_version = COALESCE(capability_profile_version, 'silicon/kimi-k2.6@1')
+           capability_profile_version = COALESCE(capability_profile_version, 'silicon/deepseek-v3-pro@1')
      WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'Pro'
-       AND model_name = 'moonshotai/Kimi-K2.6'
+       AND model_name = 'deepseek-ai/DeepSeek-V3'
        AND delete_flag = c_active_flag
        AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
     GET DIAGNOSTICS v_updated = ROW_COUNT;
@@ -1237,7 +1237,7 @@ BEGIN
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
-    -- deepseek (15 entries)
+    -- deepseek (4 entries)
     UPDATE nexent.model_record_t
        SET capability_profile_version = 'deepseek/deepseek-chat@2',
            capacity_source = CASE WHEN capacity_source = c_source_default THEN c_source_profile ELSE capacity_source END
@@ -1290,200 +1290,200 @@ BEGIN
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
+    -- openai (2 entries)
+    UPDATE nexent.model_record_t
+       SET capability_profile_version = 'openai/gpt-4o@1',
+           capacity_source = CASE WHEN capacity_source = c_source_default THEN c_source_profile ELSE capacity_source END
+     WHERE LOWER(model_factory) = 'openai'
+       AND (model_repo IS NULL OR model_repo = '')
+       AND model_name = 'gpt-4o'
+       AND delete_flag = c_active_flag
+       AND context_window_tokens = 128000
+       AND max_output_tokens = 16384
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'openai/gpt-4o@1' AND capacity_source = c_source_default));
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
     UPDATE nexent.model_record_t
-       SET capability_profile_version = 'deepseek/deepseek-v4-pro-sf@1',
+       SET capability_profile_version = 'openai/gpt-4.1@1',
            capacity_source = CASE WHEN capacity_source = c_source_default THEN c_source_profile ELSE capacity_source END
-     WHERE LOWER(model_factory) = 'deepseek'
+     WHERE LOWER(model_factory) = 'openai'
+       AND (model_repo IS NULL OR model_repo = '')
+       AND model_name = 'gpt-4.1'
+       AND delete_flag = c_active_flag
+       AND context_window_tokens = 1000000
+       AND max_output_tokens = 32768
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'openai/gpt-4.1@1' AND capacity_source = c_source_default));
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    -- silicon (56 entries)
+    UPDATE nexent.model_record_t
+       SET capability_profile_version = 'silicon/qwen3.6-27b@1',
+           capacity_source = CASE WHEN capacity_source = c_source_default THEN c_source_profile ELSE capacity_source END
+     WHERE LOWER(model_factory) = 'silicon'
+       AND model_repo = 'Qwen'
+       AND model_name = 'Qwen3.6-27B'
+       AND delete_flag = c_active_flag
+       AND context_window_tokens = 262144
+       AND max_output_tokens = 65536
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/qwen3.6-27b@1' AND capacity_source = c_source_default));
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET capability_profile_version = 'silicon/kimi-k2.6@1',
+           capacity_source = CASE WHEN capacity_source = c_source_default THEN c_source_profile ELSE capacity_source END
+     WHERE LOWER(model_factory) = 'silicon'
+       AND model_repo = 'Pro'
+       AND model_name = 'moonshotai/Kimi-K2.6'
+       AND delete_flag = c_active_flag
+       AND context_window_tokens = 262144
+       AND max_output_tokens = 131072
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/kimi-k2.6@1' AND capacity_source = c_source_default));
+    GET DIAGNOSTICS v_updated = ROW_COUNT;
+    v_total := v_total + v_updated;
+
+    UPDATE nexent.model_record_t
+       SET capability_profile_version = 'silicon/deepseek-v4-pro-sf@1',
+           capacity_source = CASE WHEN capacity_source = c_source_default THEN c_source_profile ELSE capacity_source END
+     WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'deepseek-ai'
        AND model_name = 'DeepSeek-V4-Pro'
        AND delete_flag = c_active_flag
        AND context_window_tokens = 1048576
        AND max_output_tokens = 384000
-       AND (capability_profile_version IS NULL OR (capability_profile_version = 'deepseek/deepseek-v4-pro-sf@1' AND capacity_source = c_source_default));
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/deepseek-v4-pro-sf@1' AND capacity_source = c_source_default));
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
     UPDATE nexent.model_record_t
-       SET capability_profile_version = 'deepseek/deepseek-v4-flash-sf@1',
+       SET capability_profile_version = 'silicon/deepseek-v4-flash-sf@1',
            capacity_source = CASE WHEN capacity_source = c_source_default THEN c_source_profile ELSE capacity_source END
-     WHERE LOWER(model_factory) = 'deepseek'
+     WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'deepseek-ai'
        AND model_name = 'DeepSeek-V4-Flash'
        AND delete_flag = c_active_flag
        AND context_window_tokens = 1048576
        AND max_output_tokens = 384000
-       AND (capability_profile_version IS NULL OR (capability_profile_version = 'deepseek/deepseek-v4-flash-sf@1' AND capacity_source = c_source_default));
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/deepseek-v4-flash-sf@1' AND capacity_source = c_source_default));
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
     UPDATE nexent.model_record_t
-       SET capability_profile_version = 'deepseek/deepseek-v3.2@1',
+       SET capability_profile_version = 'silicon/deepseek-v3.2@1',
            capacity_source = CASE WHEN capacity_source = c_source_default THEN c_source_profile ELSE capacity_source END
-     WHERE LOWER(model_factory) = 'deepseek'
+     WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'deepseek-ai'
        AND model_name = 'DeepSeek-V3.2'
        AND delete_flag = c_active_flag
        AND context_window_tokens = 164000
        AND max_output_tokens = 8192
-       AND (capability_profile_version IS NULL OR (capability_profile_version = 'deepseek/deepseek-v3.2@1' AND capacity_source = c_source_default));
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/deepseek-v3.2@1' AND capacity_source = c_source_default));
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
     UPDATE nexent.model_record_t
-       SET capability_profile_version = 'deepseek/deepseek-v3.1-terminus@1',
+       SET capability_profile_version = 'silicon/deepseek-v3.1-terminus@1',
            capacity_source = CASE WHEN capacity_source = c_source_default THEN c_source_profile ELSE capacity_source END
-     WHERE LOWER(model_factory) = 'deepseek'
+     WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'deepseek-ai'
        AND model_name = 'DeepSeek-V3.1-Terminus'
        AND delete_flag = c_active_flag
        AND context_window_tokens = 164000
        AND max_output_tokens = 8192
-       AND (capability_profile_version IS NULL OR (capability_profile_version = 'deepseek/deepseek-v3.1-terminus@1' AND capacity_source = c_source_default));
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/deepseek-v3.1-terminus@1' AND capacity_source = c_source_default));
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
     UPDATE nexent.model_record_t
-       SET capability_profile_version = 'deepseek/deepseek-r1@1',
+       SET capability_profile_version = 'silicon/deepseek-r1@1',
            capacity_source = CASE WHEN capacity_source = c_source_default THEN c_source_profile ELSE capacity_source END
-     WHERE LOWER(model_factory) = 'deepseek'
+     WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'deepseek-ai'
        AND model_name = 'DeepSeek-R1'
        AND delete_flag = c_active_flag
        AND context_window_tokens = 163840
        AND max_output_tokens = 16384
-       AND (capability_profile_version IS NULL OR (capability_profile_version = 'deepseek/deepseek-r1@1' AND capacity_source = c_source_default));
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/deepseek-r1@1' AND capacity_source = c_source_default));
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
     UPDATE nexent.model_record_t
-       SET capability_profile_version = 'deepseek/deepseek-v3@1',
+       SET capability_profile_version = 'silicon/deepseek-v3@1',
            capacity_source = CASE WHEN capacity_source = c_source_default THEN c_source_profile ELSE capacity_source END
-     WHERE LOWER(model_factory) = 'deepseek'
+     WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'deepseek-ai'
        AND model_name = 'DeepSeek-V3'
        AND delete_flag = c_active_flag
        AND context_window_tokens = 164000
        AND max_output_tokens = 8192
-       AND (capability_profile_version IS NULL OR (capability_profile_version = 'deepseek/deepseek-v3@1' AND capacity_source = c_source_default));
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/deepseek-v3@1' AND capacity_source = c_source_default));
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
     UPDATE nexent.model_record_t
-       SET capability_profile_version = 'deepseek/deepseek-r1-0528-qwen3-8b@1',
+       SET capability_profile_version = 'silicon/deepseek-r1-0528-qwen3-8b@1',
            capacity_source = CASE WHEN capacity_source = c_source_default THEN c_source_profile ELSE capacity_source END
-     WHERE LOWER(model_factory) = 'deepseek'
+     WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'deepseek-ai'
        AND model_name = 'DeepSeek-R1-0528-Qwen3-8B'
        AND delete_flag = c_active_flag
        AND context_window_tokens = 131072
        AND max_output_tokens = 16384
-       AND (capability_profile_version IS NULL OR (capability_profile_version = 'deepseek/deepseek-r1-0528-qwen3-8b@1' AND capacity_source = c_source_default));
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/deepseek-r1-0528-qwen3-8b@1' AND capacity_source = c_source_default));
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
     UPDATE nexent.model_record_t
-       SET capability_profile_version = 'deepseek/deepseek-v3.2-pro@1',
+       SET capability_profile_version = 'silicon/deepseek-v3.2-pro@1',
            capacity_source = CASE WHEN capacity_source = c_source_default THEN c_source_profile ELSE capacity_source END
-     WHERE LOWER(model_factory) = 'deepseek'
+     WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'Pro'
        AND model_name = 'deepseek-ai/DeepSeek-V3.2'
        AND delete_flag = c_active_flag
        AND context_window_tokens = 164000
        AND max_output_tokens = 8192
-       AND (capability_profile_version IS NULL OR (capability_profile_version = 'deepseek/deepseek-v3.2-pro@1' AND capacity_source = c_source_default));
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/deepseek-v3.2-pro@1' AND capacity_source = c_source_default));
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
     UPDATE nexent.model_record_t
-       SET capability_profile_version = 'deepseek/deepseek-v3.1-terminus-pro@1',
+       SET capability_profile_version = 'silicon/deepseek-v3.1-terminus-pro@1',
            capacity_source = CASE WHEN capacity_source = c_source_default THEN c_source_profile ELSE capacity_source END
-     WHERE LOWER(model_factory) = 'deepseek'
+     WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'Pro'
        AND model_name = 'deepseek-ai/DeepSeek-V3.1-Terminus'
        AND delete_flag = c_active_flag
        AND context_window_tokens = 164000
        AND max_output_tokens = 8192
-       AND (capability_profile_version IS NULL OR (capability_profile_version = 'deepseek/deepseek-v3.1-terminus-pro@1' AND capacity_source = c_source_default));
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/deepseek-v3.1-terminus-pro@1' AND capacity_source = c_source_default));
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
     UPDATE nexent.model_record_t
-       SET capability_profile_version = 'deepseek/deepseek-r1-pro@1',
+       SET capability_profile_version = 'silicon/deepseek-r1-pro@1',
            capacity_source = CASE WHEN capacity_source = c_source_default THEN c_source_profile ELSE capacity_source END
-     WHERE LOWER(model_factory) = 'deepseek'
+     WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'Pro'
        AND model_name = 'deepseek-ai/DeepSeek-R1'
        AND delete_flag = c_active_flag
        AND context_window_tokens = 163840
        AND max_output_tokens = 16384
-       AND (capability_profile_version IS NULL OR (capability_profile_version = 'deepseek/deepseek-r1-pro@1' AND capacity_source = c_source_default));
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/deepseek-r1-pro@1' AND capacity_source = c_source_default));
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 
     UPDATE nexent.model_record_t
-       SET capability_profile_version = 'deepseek/deepseek-v3-pro@1',
+       SET capability_profile_version = 'silicon/deepseek-v3-pro@1',
            capacity_source = CASE WHEN capacity_source = c_source_default THEN c_source_profile ELSE capacity_source END
-     WHERE LOWER(model_factory) = 'deepseek'
+     WHERE LOWER(model_factory) = 'silicon'
        AND model_repo = 'Pro'
        AND model_name = 'deepseek-ai/DeepSeek-V3'
        AND delete_flag = c_active_flag
        AND context_window_tokens = 164000
        AND max_output_tokens = 8192
-       AND (capability_profile_version IS NULL OR (capability_profile_version = 'deepseek/deepseek-v3-pro@1' AND capacity_source = c_source_default));
-    GET DIAGNOSTICS v_updated = ROW_COUNT;
-    v_total := v_total + v_updated;
-
-    -- openai (2 entries)
-    UPDATE nexent.model_record_t
-       SET capability_profile_version = 'openai/gpt-4o@1',
-           capacity_source = CASE WHEN capacity_source = c_source_default THEN c_source_profile ELSE capacity_source END
-     WHERE LOWER(model_factory) = 'openai'
-       AND (model_repo IS NULL OR model_repo = '')
-       AND model_name = 'gpt-4o'
-       AND delete_flag = c_active_flag
-       AND context_window_tokens = 128000
-       AND max_output_tokens = 16384
-       AND (capability_profile_version IS NULL OR (capability_profile_version = 'openai/gpt-4o@1' AND capacity_source = c_source_default));
-    GET DIAGNOSTICS v_updated = ROW_COUNT;
-    v_total := v_total + v_updated;
-
-    UPDATE nexent.model_record_t
-       SET capability_profile_version = 'openai/gpt-4.1@1',
-           capacity_source = CASE WHEN capacity_source = c_source_default THEN c_source_profile ELSE capacity_source END
-     WHERE LOWER(model_factory) = 'openai'
-       AND (model_repo IS NULL OR model_repo = '')
-       AND model_name = 'gpt-4.1'
-       AND delete_flag = c_active_flag
-       AND context_window_tokens = 1000000
-       AND max_output_tokens = 32768
-       AND (capability_profile_version IS NULL OR (capability_profile_version = 'openai/gpt-4.1@1' AND capacity_source = c_source_default));
-    GET DIAGNOSTICS v_updated = ROW_COUNT;
-    v_total := v_total + v_updated;
-
-    -- silicon (45 entries)
-    UPDATE nexent.model_record_t
-       SET capability_profile_version = 'silicon/qwen3.6-27b@1',
-           capacity_source = CASE WHEN capacity_source = c_source_default THEN c_source_profile ELSE capacity_source END
-     WHERE LOWER(model_factory) = 'silicon'
-       AND model_repo = 'Qwen'
-       AND model_name = 'Qwen3.6-27B'
-       AND delete_flag = c_active_flag
-       AND context_window_tokens = 262144
-       AND max_output_tokens = 65536
-       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/qwen3.6-27b@1' AND capacity_source = c_source_default));
-    GET DIAGNOSTICS v_updated = ROW_COUNT;
-    v_total := v_total + v_updated;
-
-    UPDATE nexent.model_record_t
-       SET capability_profile_version = 'silicon/kimi-k2.6@1',
-           capacity_source = CASE WHEN capacity_source = c_source_default THEN c_source_profile ELSE capacity_source END
-     WHERE LOWER(model_factory) = 'silicon'
-       AND model_repo = 'Pro'
-       AND model_name = 'moonshotai/Kimi-K2.6'
-       AND delete_flag = c_active_flag
-       AND context_window_tokens = 262144
-       AND max_output_tokens = 131072
-       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/kimi-k2.6@1' AND capacity_source = c_source_default));
+       AND (capability_profile_version IS NULL OR (capability_profile_version = 'silicon/deepseek-v3-pro@1' AND capacity_source = c_source_default));
     GET DIAGNOSTICS v_updated = ROW_COUNT;
     v_total := v_total + v_updated;
 

From 1c3f21ac2aed4d48fa40bacf7ab65d1ea2928973 Mon Sep 17 00:00:00 2001
From: wuyuanfr <18270469842@163.com>
Date: Sat, 27 Jun 2026 15:57:30 +0800
Subject: [PATCH 26/31] fix(w11): use rsplit in _split_repo_name to match
 backend split logic

_split_repo_name used split('/', 1) which splits on the FIRST slash.
The backend model_name_utils.split_repo_name splits on the LAST slash
(rsplit equivalent). For 3-segment IDs like 'Pro/deepseek-ai/DeepSeek-V3.2':

  Generator (broken): repo='Pro', name='deepseek-ai/DeepSeek-V3.2'
  Backend (correct):  repo='Pro/deepseek-ai', name='DeepSeek-V3.2'

This caused all 10 Pro/ prefixed catalog entries to never match in
Phase 1a/1b, falling through to Phase 2 safe defaults instead of
getting correct catalog values.

Fix: split('/', 1) -> rsplit('/', 1)

Verified: DeepSeek-V3.2 (Pro/deepseek-ai) now correctly backfilled
with catalog values (164K ctx, 8K output, profile source).
---
 .../v2.2.2_0627_backfill_from_catalog.sql     | 56 +++++++++----------
 scripts/generate_backfill_sql.py              |  8 +--
 2 files changed, 32 insertions(+), 32 deletions(-)

diff --git a/deploy/sql/migrations/v2.2.2_0627_backfill_from_catalog.sql b/deploy/sql/migrations/v2.2.2_0627_backfill_from_catalog.sql
index 7d3ab54b4..ea5d81ff3 100644
--- a/deploy/sql/migrations/v2.2.2_0627_backfill_from_catalog.sql
+++ b/deploy/sql/migrations/v2.2.2_0627_backfill_from_catalog.sql
@@ -240,8 +240,8 @@ BEGIN
            capacity_source = COALESCE(capacity_source, c_source_profile),
            capability_profile_version = COALESCE(capability_profile_version, 'silicon/kimi-k2.6@1')
      WHERE LOWER(model_factory) = 'silicon'
-       AND model_repo = 'Pro'
-       AND model_name = 'moonshotai/Kimi-K2.6'
+       AND model_repo = 'Pro/moonshotai'
+       AND model_name = 'Kimi-K2.6'
        AND delete_flag = c_active_flag
        AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
     GET DIAGNOSTICS v_updated = ROW_COUNT;
@@ -376,8 +376,8 @@ BEGIN
            capacity_source = COALESCE(capacity_source, c_source_profile),
            capability_profile_version = COALESCE(capability_profile_version, 'silicon/deepseek-v3.2-pro@1')
      WHERE LOWER(model_factory) = 'silicon'
-       AND model_repo = 'Pro'
-       AND model_name = 'deepseek-ai/DeepSeek-V3.2'
+       AND model_repo = 'Pro/deepseek-ai'
+       AND model_name = 'DeepSeek-V3.2'
        AND delete_flag = c_active_flag
        AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
     GET DIAGNOSTICS v_updated = ROW_COUNT;
@@ -393,8 +393,8 @@ BEGIN
            capacity_source = COALESCE(capacity_source, c_source_profile),
            capability_profile_version = COALESCE(capability_profile_version, 'silicon/deepseek-v3.1-terminus-pro@1')
      WHERE LOWER(model_factory) = 'silicon'
-       AND model_repo = 'Pro'
-       AND model_name = 'deepseek-ai/DeepSeek-V3.1-Terminus'
+       AND model_repo = 'Pro/deepseek-ai'
+       AND model_name = 'DeepSeek-V3.1-Terminus'
        AND delete_flag = c_active_flag
        AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
     GET DIAGNOSTICS v_updated = ROW_COUNT;
@@ -410,8 +410,8 @@ BEGIN
            capacity_source = COALESCE(capacity_source, c_source_profile),
            capability_profile_version = COALESCE(capability_profile_version, 'silicon/deepseek-r1-pro@1')
      WHERE LOWER(model_factory) = 'silicon'
-       AND model_repo = 'Pro'
-       AND model_name = 'deepseek-ai/DeepSeek-R1'
+       AND model_repo = 'Pro/deepseek-ai'
+       AND model_name = 'DeepSeek-R1'
        AND delete_flag = c_active_flag
        AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
     GET DIAGNOSTICS v_updated = ROW_COUNT;
@@ -427,8 +427,8 @@ BEGIN
            capacity_source = COALESCE(capacity_source, c_source_profile),
            capability_profile_version = COALESCE(capability_profile_version, 'silicon/deepseek-v3-pro@1')
      WHERE LOWER(model_factory) = 'silicon'
-       AND model_repo = 'Pro'
-       AND model_name = 'deepseek-ai/DeepSeek-V3'
+       AND model_repo = 'Pro/deepseek-ai'
+       AND model_name = 'DeepSeek-V3'
        AND delete_flag = c_active_flag
        AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
     GET DIAGNOSTICS v_updated = ROW_COUNT;
@@ -988,8 +988,8 @@ BEGIN
            capacity_source = COALESCE(capacity_source, c_source_profile),
            capability_profile_version = COALESCE(capability_profile_version, 'silicon/glm-5.1-pro@1')
      WHERE LOWER(model_factory) = 'silicon'
-       AND model_repo = 'Pro'
-       AND model_name = 'zai-org/GLM-5.1'
+       AND model_repo = 'Pro/zai-org'
+       AND model_name = 'GLM-5.1'
        AND delete_flag = c_active_flag
        AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
     GET DIAGNOSTICS v_updated = ROW_COUNT;
@@ -1073,8 +1073,8 @@ BEGIN
            capacity_source = COALESCE(capacity_source, c_source_profile),
            capability_profile_version = COALESCE(capability_profile_version, 'silicon/minimax-m2.5-pro@1')
      WHERE LOWER(model_factory) = 'silicon'
-       AND model_repo = 'Pro'
-       AND model_name = 'MiniMaxAI/MiniMax-M2.5'
+       AND model_repo = 'Pro/MiniMaxAI'
+       AND model_name = 'MiniMax-M2.5'
        AND delete_flag = c_active_flag
        AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);
     GET DIAGNOSTICS v_updated = ROW_COUNT;
@@ -1335,8 +1335,8 @@ BEGIN
        SET capability_profile_version = 'silicon/kimi-k2.6@1',
            capacity_source = CASE WHEN capacity_source = c_source_default THEN c_source_profile ELSE capacity_source END
      WHERE LOWER(model_factory) = 'silicon'
-       AND model_repo = 'Pro'
-       AND model_name = 'moonshotai/Kimi-K2.6'
+       AND model_repo = 'Pro/moonshotai'
+       AND model_name = 'Kimi-K2.6'
        AND delete_flag = c_active_flag
        AND context_window_tokens = 262144
        AND max_output_tokens = 131072
@@ -1439,8 +1439,8 @@ BEGIN
        SET capability_profile_version = 'silicon/deepseek-v3.2-pro@1',
            capacity_source = CASE WHEN capacity_source = c_source_default THEN c_source_profile ELSE capacity_source END
      WHERE LOWER(model_factory) = 'silicon'
-       AND model_repo = 'Pro'
-       AND model_name = 'deepseek-ai/DeepSeek-V3.2'
+       AND model_repo = 'Pro/deepseek-ai'
+       AND model_name = 'DeepSeek-V3.2'
        AND delete_flag = c_active_flag
        AND context_window_tokens = 164000
        AND max_output_tokens = 8192
@@ -1452,8 +1452,8 @@ BEGIN
        SET capability_profile_version = 'silicon/deepseek-v3.1-terminus-pro@1',
            capacity_source = CASE WHEN capacity_source = c_source_default THEN c_source_profile ELSE capacity_source END
      WHERE LOWER(model_factory) = 'silicon'
-       AND model_repo = 'Pro'
-       AND model_name = 'deepseek-ai/DeepSeek-V3.1-Terminus'
+       AND model_repo = 'Pro/deepseek-ai'
+       AND model_name = 'DeepSeek-V3.1-Terminus'
        AND delete_flag = c_active_flag
        AND context_window_tokens = 164000
        AND max_output_tokens = 8192
@@ -1465,8 +1465,8 @@ BEGIN
        SET capability_profile_version = 'silicon/deepseek-r1-pro@1',
            capacity_source = CASE WHEN capacity_source = c_source_default THEN c_source_profile ELSE capacity_source END
      WHERE LOWER(model_factory) = 'silicon'
-       AND model_repo = 'Pro'
-       AND model_name = 'deepseek-ai/DeepSeek-R1'
+       AND model_repo = 'Pro/deepseek-ai'
+       AND model_name = 'DeepSeek-R1'
        AND delete_flag = c_active_flag
        AND context_window_tokens = 163840
        AND max_output_tokens = 16384
@@ -1478,8 +1478,8 @@ BEGIN
        SET capability_profile_version = 'silicon/deepseek-v3-pro@1',
            capacity_source = CASE WHEN capacity_source = c_source_default THEN c_source_profile ELSE capacity_source END
      WHERE LOWER(model_factory) = 'silicon'
-       AND model_repo = 'Pro'
-       AND model_name = 'deepseek-ai/DeepSeek-V3'
+       AND model_repo = 'Pro/deepseek-ai'
+       AND model_name = 'DeepSeek-V3'
        AND delete_flag = c_active_flag
        AND context_window_tokens = 164000
        AND max_output_tokens = 8192
@@ -1907,8 +1907,8 @@ BEGIN
        SET capability_profile_version = 'silicon/glm-5.1-pro@1',
            capacity_source = CASE WHEN capacity_source = c_source_default THEN c_source_profile ELSE capacity_source END
      WHERE LOWER(model_factory) = 'silicon'
-       AND model_repo = 'Pro'
-       AND model_name = 'zai-org/GLM-5.1'
+       AND model_repo = 'Pro/zai-org'
+       AND model_name = 'GLM-5.1'
        AND delete_flag = c_active_flag
        AND context_window_tokens = 202752
        AND max_output_tokens = 131072
@@ -1972,8 +1972,8 @@ BEGIN
        SET capability_profile_version = 'silicon/minimax-m2.5-pro@1',
            capacity_source = CASE WHEN capacity_source = c_source_default THEN c_source_profile ELSE capacity_source END
      WHERE LOWER(model_factory) = 'silicon'
-       AND model_repo = 'Pro'
-       AND model_name = 'MiniMaxAI/MiniMax-M2.5'
+       AND model_repo = 'Pro/MiniMaxAI'
+       AND model_name = 'MiniMax-M2.5'
        AND delete_flag = c_active_flag
        AND context_window_tokens = 204800
        AND max_output_tokens = 16384
diff --git a/scripts/generate_backfill_sql.py b/scripts/generate_backfill_sql.py
index e68910488..586c27e3d 100644
--- a/scripts/generate_backfill_sql.py
+++ b/scripts/generate_backfill_sql.py
@@ -54,12 +54,12 @@ def _sql_str(value: str) -> str:
 def _split_repo_name(full_id: str) -> tuple[str, str]:
     """Split a catalog's full model identifier into (model_repo, model_name).
 
-    The model_record_t table stores these as two columns. Catalog keys like
-    "Qwen/Qwen2.5-14B-Instruct" must be split on the first '/' to match;
-    bare names like "qwen-plus" or "gpt-4o" land with empty model_repo.
+    Must match backend/utils/model_name_utils.split_repo_name which splits
+    on the LAST '/' (rsplit). For 'Pro/deepseek-ai/DeepSeek-V3.2' this
+    yields repo='Pro/deepseek-ai', name='DeepSeek-V3.2'.
     """
     if "/" in full_id:
-        repo, name = full_id.split("/", 1)
+        repo, name = full_id.rsplit("/", 1)
         return repo, name
     return "", full_id
 

From 290506d9dba39905e8262ccdaa20949f3de6c125 Mon Sep 17 00:00:00 2001
From: wuyuanfr <18270469842@163.com>
Date: Sat, 27 Jun 2026 16:29:07 +0800
Subject: [PATCH 27/31] fix(sdk): prevent legacy max_tokens semantic drift in
 ModelConfig validator

Pre-W1 models used max_tokens to mean 'total context window' (input + output).
Post-W1 redefined max_tokens as max_output_tokens (output only).

When validator copied large legacy values (e.g., 32768) directly to
max_output_tokens, providers rejected requests with 'max_tokens exceeded
max_seq_len' because there was no space left for input.

Added heuristic: if max_tokens >= 32768, assume it's the old 'total context
window' semantics and use conservative default (4096) instead of copying.
This prevents the semantic drift while still supporting legitimate small
output limits (< 32768).
---
 sdk/nexent/core/agents/agent_model.py | 20 +++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

diff --git a/sdk/nexent/core/agents/agent_model.py b/sdk/nexent/core/agents/agent_model.py
index af97be41b..524334d12 100644
--- a/sdk/nexent/core/agents/agent_model.py
+++ b/sdk/nexent/core/agents/agent_model.py
@@ -106,13 +106,19 @@ class ModelConfig(BaseModel):
     @model_validator(mode="after")
     def _backfill_max_output_from_legacy_max_tokens(self) -> "ModelConfig":
         if self.max_output_tokens is None and self.max_tokens is not None:
-            fallback = self.max_tokens
-            if (
-                self.context_window_tokens is not None
-                and fallback > self.context_window_tokens
-            ):
-                fallback = self.context_window_tokens - 1
-            self.max_output_tokens = max(fallback, 1)
+            # Heuristic: if max_tokens >= 32768, it's likely the old
+            # "total context window" semantics (pre-W1), not an output limit.
+            # Don't copy it directly; use a conservative default instead.
+            if self.max_tokens >= 32768:
+                self.max_output_tokens = 4096
+            else:
+                fallback = self.max_tokens
+                if (
+                    self.context_window_tokens is not None
+                    and fallback > self.context_window_tokens
+                ):
+                    fallback = self.context_window_tokens - 1
+                self.max_output_tokens = max(fallback, 1)
         return self
 
 

From d854b13af88d461c6cb40ba561a2466f681baf7b Mon Sep 17 00:00:00 2001
From: wuyuanfr <18270469842@163.com>
Date: Mon, 29 Jun 2026 17:21:07 +0800
Subject: [PATCH 28/31] fix(model): stop sending hidden form.provider as
 modelFactory on single-add
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Single-add mode has no provider dropdown — form.provider stays at its
hidden default 'modelengine', which was forwarded as modelFactory to
the backend, overriding the Pydantic default 'OpenAI-API-Compatible'.

Introduced in 4becd6992 which added modelFactory: form.provider to both
save paths but missed the isBatchImport guard already present on the
suggest-capacity path in the same commit.

STT/TTS paths are unaffected — they set modelFactory from
form.sttProvider/ttsProvider downstream. Embedding is unaffected —
backend _infer_model_factory overrides from base_url.
---
 .../app/[locale]/models/components/model/ModelAddDialog.tsx     | 2 --
 1 file changed, 2 deletions(-)

diff --git a/frontend/app/[locale]/models/components/model/ModelAddDialog.tsx b/frontend/app/[locale]/models/components/model/ModelAddDialog.tsx
index 094ed4391..1dc6baafd 100644
--- a/frontend/app/[locale]/models/components/model/ModelAddDialog.tsx
+++ b/frontend/app/[locale]/models/components/model/ModelAddDialog.tsx
@@ -1171,7 +1171,6 @@ export const ModelAddDialog = ({
           apiKey: form.apiKey.trim() === "" ? "sk-no-api-key" : form.apiKey,
           maxTokens: maxTokensValue,
           displayName: form.displayName || form.name,
-          modelFactory: form.provider,
           ...(supportsCapacityFields ? buildCapacityPayload(form) : {}),
           ...acceptSignalKwargs,
         };
@@ -1214,7 +1213,6 @@ export const ModelAddDialog = ({
           apiKey: form.apiKey.trim() === "" ? "sk-no-api-key" : form.apiKey,
           maxTokens: maxTokensValue,
           displayName: form.displayName || form.name,
-          modelFactory: form.provider,
           ...(supportsCapacityFields ? buildCapacityPayload(form) : {}),
           ...acceptSignalKwargs,
         };

From 8d22427ac320074d4283d6548e3ec45847346e98 Mon Sep 17 00:00:00 2001
From: wuyuanfr <18270469842@163.com>
Date: Mon, 29 Jun 2026 17:21:24 +0800
Subject: [PATCH 29/31] refactor: move generate_backfill_sql.py to
 deploy/sql/migrations/

The script's sole purpose is generating SQL migration files that live
in deploy/sql/migrations/. Keeping the generator alongside its output
is more natural than a standalone top-level scripts/ directory.

- Updated _project_root path (three levels up instead of one)
- Updated docstring usage path and generated SQL self-reference
- Removed empty scripts/ directory
---
 {scripts => deploy/sql/migrations}/generate_backfill_sql.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)
 rename {scripts => deploy/sql/migrations}/generate_backfill_sql.py (97%)

diff --git a/scripts/generate_backfill_sql.py b/deploy/sql/migrations/generate_backfill_sql.py
similarity index 97%
rename from scripts/generate_backfill_sql.py
rename to deploy/sql/migrations/generate_backfill_sql.py
index 586c27e3d..239a5c93a 100644
--- a/scripts/generate_backfill_sql.py
+++ b/deploy/sql/migrations/generate_backfill_sql.py
@@ -2,7 +2,7 @@
 """Generate idempotent backfill SQL from capability_profiles.CATALOG.
 
 Usage:
-    python scripts/generate_backfill_sql.py > docker/sql/v2.2.x_MMDD_backfill_from_catalog.sql
+    python deploy/sql/migrations/generate_backfill_sql.py > deploy/sql/migrations/v2.2.x_MMDD_backfill_from_catalog.sql
 
 Run whenever capability_profiles.py changes, then commit the generated SQL.
 """
@@ -12,7 +12,7 @@
 from datetime import date
 from collections import namedtuple
 
-_project_root = os.path.join(os.path.dirname(__file__), "..")
+_project_root = os.path.join(os.path.dirname(__file__), "..", "..", "..")
 sys.path.insert(0, os.path.join(_project_root, "backend"))
 
 # Stub SDK types to avoid pulling in the full nexent SDK dependency chain
@@ -80,7 +80,7 @@ def main() -> None:
     today = date.today().strftime("%Y-%m-%d")
     lines: list[str] = []
 
-    lines.append(f"-- Generated by scripts/generate_backfill_sql.py on {today}")
+    lines.append(f"-- Generated by deploy/sql/migrations/generate_backfill_sql.py on {today}")
     lines.append(f"-- Catalog revision: {CATALOG_REVISION}")
     lines.append(f"-- Catalog entries: {len(CATALOG)}")
     lines.append("--")

From bdf1c0efeb56a681bee84e3af7a2a028370cd8c6 Mon Sep 17 00:00:00 2001
From: wuyuanfr <18270469842@163.com>
Date: Mon, 29 Jun 2026 17:22:11 +0800
Subject: [PATCH 30/31] chore: remove duplicate
 scripts/generate_backfill_sql.py from upstream merge

Upstream PR #3317 added the generator to scripts/, but we already
relocated it to deploy/sql/migrations/ in 8d22427ac. Keep the
relocated version with corrected paths.
---
 scripts/generate_backfill_sql.py | 272 -------------------------------
 1 file changed, 272 deletions(-)
 delete mode 100644 scripts/generate_backfill_sql.py

diff --git a/scripts/generate_backfill_sql.py b/scripts/generate_backfill_sql.py
deleted file mode 100644
index 586c27e3d..000000000
--- a/scripts/generate_backfill_sql.py
+++ /dev/null
@@ -1,272 +0,0 @@
-#!/usr/bin/env python3
-"""Generate idempotent backfill SQL from capability_profiles.CATALOG.
-
-Usage:
-    python scripts/generate_backfill_sql.py > docker/sql/v2.2.x_MMDD_backfill_from_catalog.sql
-
-Run whenever capability_profiles.py changes, then commit the generated SQL.
-"""
-import sys
-import os
-import types
-from datetime import date
-from collections import namedtuple
-
-_project_root = os.path.join(os.path.dirname(__file__), "..")
-sys.path.insert(0, os.path.join(_project_root, "backend"))
-
-# Stub SDK types to avoid pulling in the full nexent SDK dependency chain
-_nexent_stub = types.ModuleType("nexent")
-_nexent_core = types.ModuleType("nexent.core")
-_nexent_models = types.ModuleType("nexent.core.models")
-_nexent_resolver = types.ModuleType("nexent.core.models.capacity_resolver")
-
-ProfileKey = tuple
-
-class CapabilityProfile:
-    """Minimal stub that accepts any keyword arguments."""
-    def __init__(self, **kwargs):
-        for k, v in kwargs.items():
-            setattr(self, k, v)
-
-_nexent_resolver.ProfileKey = ProfileKey
-_nexent_resolver.CapabilityProfile = CapabilityProfile
-sys.modules["nexent"] = _nexent_stub
-sys.modules["nexent.core"] = _nexent_core
-sys.modules["nexent.core.models"] = _nexent_models
-sys.modules["nexent.core.models.capacity_resolver"] = _nexent_resolver
-
-from consts.capability_profiles import CATALOG, CATALOG_REVISION
-
-DEFAULT_CONTEXT_WINDOW = 32_768
-DEFAULT_MAX_OUTPUT = 4_096
-DEFAULT_RESERVE = 4_096
-
-
-def _sql_int(value: int) -> str:
-    return str(value)
-
-
-def _sql_str(value: str) -> str:
-    return value.replace("'", "''")
-
-
-def _split_repo_name(full_id: str) -> tuple[str, str]:
-    """Split a catalog's full model identifier into (model_repo, model_name).
-
-    Must match backend/utils/model_name_utils.split_repo_name which splits
-    on the LAST '/' (rsplit). For 'Pro/deepseek-ai/DeepSeek-V3.2' this
-    yields repo='Pro/deepseek-ai', name='DeepSeek-V3.2'.
-    """
-    if "/" in full_id:
-        repo, name = full_id.rsplit("/", 1)
-        return repo, name
-    return "", full_id
-
-
-def _sql_repo_match(repo: str) -> str:
-    """Build the WHERE fragment that matches the table's model_repo column.
-
-    Bare-name catalog entries (no '/') can land in the table as either
-    model_repo='' or model_repo IS NULL depending on the create path, so
-    accept both. Namespaced entries match the exact string.
-    """
-    if repo == "":
-        return "(model_repo IS NULL OR model_repo = '')"
-    return f"model_repo = '{_sql_str(repo)}'"
-
-
-def main() -> None:
-    today = date.today().strftime("%Y-%m-%d")
-    lines: list[str] = []
-
-    lines.append(f"-- Generated by scripts/generate_backfill_sql.py on {today}")
-    lines.append(f"-- Catalog revision: {CATALOG_REVISION}")
-    lines.append(f"-- Catalog entries: {len(CATALOG)}")
-    lines.append("--")
-    lines.append("-- Migration kind: RECOMMENDED_DATA_FIX")
-    lines.append("-- Idempotent: COALESCE + IS NULL guards protect existing values.")
-    lines.append("-- Safe: enforces max_output < context_window via GREATEST/LEAST.")
-    lines.append("--")
-    lines.append("-- Phases:")
-    lines.append("--   1a  Bare LLM/VLM rows that match a catalog entry by")
-    lines.append("--       (model_factory, model_repo, model_name) -> fill capacity")
-    lines.append("--       fields + tag capacity_source='profile' + profile_version.")
-    lines.append("--   1b  Already-filled rows that match a catalog entry AND whose")
-    lines.append("--       context_window_tokens and max_output_tokens exactly equal")
-    lines.append("--       the catalog values -> tag profile_version only. capacity_")
-    lines.append("--       source stays whatever it was (typically 'operator'); we")
-    lines.append("--       don't rewrite provenance, we just add the dispatch tag so")
-    lines.append("--       dispatch_profile_hit_total can fire.")
-    lines.append("--    2  Remaining bare LLM/VLM rows -> safe defaults.")
-    lines.append("--    3  Clamp default_output_reserve_tokens to <= max_output_tokens.")
-    lines.append("--")
-    lines.append("-- Pre-run self-check (rows whose capability_profile_version is NULL):")
-    lines.append("--")
-    lines.append("--   SELECT model_id, model_repo, model_name, model_factory,")
-    lines.append("--          context_window_tokens, max_output_tokens, capability_profile_version")
-    lines.append("--     FROM nexent.model_record_t")
-    lines.append("--    WHERE delete_flag = 'N'")
-    lines.append("--      AND COALESCE(model_type, 'llm') IN ('llm', 'vlm')")
-    lines.append("--      AND capability_profile_version IS NULL;")
-    lines.append("")
-
-    # Group catalog by provider so the generated SQL has tidy section headers
-    from collections import defaultdict
-    by_provider: dict[str, list] = defaultdict(list)
-    for (provider, full_id), profile in CATALOG.items():
-        by_provider[provider].append((full_id, profile))
-
-    # --------------------------------------------------------------
-    # Phase 1a: catalog match + bare -> fill capacity + tag
-    # --------------------------------------------------------------
-    lines.append("-- ============================================================")
-    lines.append("-- Phase 1a: Backfill bare rows that match approved catalog entries")
-    lines.append("-- ============================================================")
-    lines.append("")
-    lines.append("DO $$")
-    lines.append("DECLARE")
-    lines.append("    v_updated INTEGER := 0;")
-    lines.append("    v_total   INTEGER := 0;")
-    lines.append("    c_active_flag     CONSTANT TEXT := 'N';")
-    lines.append("    c_source_profile  CONSTANT TEXT := 'profile';")
-    lines.append("BEGIN")
-
-    for provider in sorted(by_provider.keys()):
-        entries = by_provider[provider]
-        lines.append(f"    -- {provider} ({len(entries)} entries)")
-        for full_id, profile in entries:
-            ctx = profile.context_window_tokens
-            mout = profile.max_output_tokens
-            reserve = profile.default_output_reserve_tokens
-            version = _sql_str(profile.capability_profile_version)
-            repo, name = _split_repo_name(full_id)
-            repo_match = _sql_repo_match(repo)
-            escaped_name = _sql_str(name)
-
-            lines.append(f"    UPDATE nexent.model_record_t")
-            lines.append(f"       SET context_window_tokens = COALESCE(context_window_tokens,")
-            lines.append(f"           GREATEST({_sql_int(ctx)}, COALESCE(max_output_tokens, 0) + 1)),")
-            lines.append(f"           max_output_tokens = COALESCE(max_output_tokens,")
-            lines.append(f"           LEAST({_sql_int(mout)}, COALESCE(context_window_tokens, {_sql_int(ctx)}) - 1)),")
-            lines.append(f"           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,")
-            lines.append(f"           LEAST({_sql_int(reserve)}, COALESCE(max_output_tokens, {_sql_int(mout)}))),")
-            lines.append(f"           capacity_source = COALESCE(capacity_source, c_source_profile),")
-            lines.append(f"           capability_profile_version = COALESCE(capability_profile_version, '{version}')")
-            lines.append(f"     WHERE LOWER(model_factory) = '{_sql_str(provider.lower())}'")
-            lines.append(f"       AND {repo_match}")
-            lines.append(f"       AND model_name = '{escaped_name}'")
-            lines.append(f"       AND delete_flag = c_active_flag")
-            lines.append(f"       AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);")
-            lines.append(f"    GET DIAGNOSTICS v_updated = ROW_COUNT;")
-            lines.append(f"    v_total := v_total + v_updated;")
-            lines.append("")
-
-    lines.append("    RAISE NOTICE 'Phase 1a catalog backfill (bare): % row(s) updated', v_total;")
-    lines.append("END $$;")
-    lines.append("")
-
-    # --------------------------------------------------------------
-    # Phase 1b: catalog match + already-filled values match catalog
-    #           -> tag profile_version + upgrade capacity_source from 'default' to 'profile'
-    # --------------------------------------------------------------
-    lines.append("-- ============================================================")
-    lines.append("-- Phase 1b: Tag already-filled rows whose ctx/max_out exactly match")
-    lines.append("--           the catalog with capability_profile_version. Upgrades")
-    lines.append("--           capacity_source from 'default' to 'profile' (values now")
-    lines.append("--           come from catalog, not system defaults). Preserves")
-    lines.append("--           'operator' and other explicit sources.")
-    lines.append("-- ============================================================")
-    lines.append("")
-    lines.append("DO $$")
-    lines.append("DECLARE")
-    lines.append("    v_updated INTEGER := 0;")
-    lines.append("    v_total   INTEGER := 0;")
-    lines.append("    c_active_flag     CONSTANT TEXT := 'N';")
-    lines.append("    c_source_default  CONSTANT TEXT := 'default';")
-    lines.append("    c_source_profile  CONSTANT TEXT := 'profile';")
-    lines.append("BEGIN")
-
-    for provider in sorted(by_provider.keys()):
-        entries = by_provider[provider]
-        lines.append(f"    -- {provider} ({len(entries)} entries)")
-        for full_id, profile in entries:
-            ctx = profile.context_window_tokens
-            mout = profile.max_output_tokens
-            version = _sql_str(profile.capability_profile_version)
-            repo, name = _split_repo_name(full_id)
-            repo_match = _sql_repo_match(repo)
-            escaped_name = _sql_str(name)
-
-            lines.append(f"    UPDATE nexent.model_record_t")
-            lines.append(f"       SET capability_profile_version = '{version}',")
-            lines.append(f"           capacity_source = CASE WHEN capacity_source = c_source_default THEN c_source_profile ELSE capacity_source END")
-            lines.append(f"     WHERE LOWER(model_factory) = '{_sql_str(provider.lower())}'")
-            lines.append(f"       AND {repo_match}")
-            lines.append(f"       AND model_name = '{escaped_name}'")
-            lines.append(f"       AND delete_flag = c_active_flag")
-            lines.append(f"       AND context_window_tokens = {_sql_int(ctx)}")
-            lines.append(f"       AND max_output_tokens = {_sql_int(mout)}")
-            lines.append(f"       AND (capability_profile_version IS NULL OR (capability_profile_version = '{version}' AND capacity_source = c_source_default));")
-            lines.append(f"    GET DIAGNOSTICS v_updated = ROW_COUNT;")
-            lines.append(f"    v_total := v_total + v_updated;")
-            lines.append("")
-
-    lines.append("    RAISE NOTICE 'Phase 1b catalog tag (matching filled): % row(s) updated', v_total;")
-    lines.append("END $$;")
-    lines.append("")
-
-    # Phase 2: safe defaults for remaining bare rows
-    lines.append("-- ============================================================")
-    lines.append("-- Phase 2: Safe defaults for remaining bare LLM/VLM rows")
-    lines.append("-- ============================================================")
-    lines.append("")
-    lines.append("DO $$")
-    lines.append("DECLARE")
-    lines.append("    v_updated INTEGER := 0;")
-    lines.append("    c_active_flag     CONSTANT TEXT := 'N';")
-    lines.append("    c_source_default  CONSTANT TEXT := 'default';")
-    lines.append("BEGIN")
-    lines.append("    UPDATE nexent.model_record_t")
-    lines.append(f"       SET context_window_tokens = COALESCE(context_window_tokens,")
-    lines.append(f"           GREATEST({_sql_int(DEFAULT_CONTEXT_WINDOW)}, COALESCE(max_output_tokens, 0) + 1)),")
-    lines.append(f"           max_output_tokens = COALESCE(max_output_tokens,")
-    lines.append(f"           LEAST({_sql_int(DEFAULT_MAX_OUTPUT)}, COALESCE(context_window_tokens, {_sql_int(DEFAULT_CONTEXT_WINDOW)}) - 1)),")
-    lines.append(f"           default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,")
-    lines.append(f"           LEAST({_sql_int(DEFAULT_RESERVE)}, COALESCE(max_output_tokens, {_sql_int(DEFAULT_MAX_OUTPUT)}))),")
-    lines.append(f"           capacity_source = COALESCE(capacity_source, c_source_default)")
-    lines.append("     WHERE delete_flag = c_active_flag")
-    lines.append("       AND COALESCE(model_type, 'llm') IN ('llm', 'vlm')")
-    lines.append("       AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);")
-    lines.append("")
-    lines.append("    GET DIAGNOSTICS v_updated = ROW_COUNT;")
-    lines.append("    RAISE NOTICE 'Safe defaults: % LLM/VLM row(s) backfilled', v_updated;")
-    lines.append("END $$;")
-    lines.append("")
-
-    # Phase 3: clamp reserve to max_output
-    lines.append("-- ============================================================")
-    lines.append("-- Phase 3: Clamp default_output_reserve_tokens to max_output_tokens")
-    lines.append("-- ============================================================")
-    lines.append("")
-    lines.append("DO $$")
-    lines.append("DECLARE")
-    lines.append("    v_updated INTEGER := 0;")
-    lines.append("    c_active_flag     CONSTANT TEXT := 'N';")
-    lines.append("BEGIN")
-    lines.append("    UPDATE nexent.model_record_t")
-    lines.append("       SET default_output_reserve_tokens = max_output_tokens")
-    lines.append("     WHERE delete_flag = c_active_flag")
-    lines.append("       AND default_output_reserve_tokens IS NOT NULL")
-    lines.append("       AND max_output_tokens IS NOT NULL")
-    lines.append("       AND default_output_reserve_tokens > max_output_tokens;")
-    lines.append("")
-    lines.append("    GET DIAGNOSTICS v_updated = ROW_COUNT;")
-    lines.append("    RAISE NOTICE 'reserve clamp: % row(s) updated', v_updated;")
-    lines.append("END $$;")
-
-    print("\n".join(lines))
-
-
-if __name__ == "__main__":
-    main()

From 935821218002ddc9f977a7409dd53dc33ca7caec Mon Sep 17 00:00:00 2001
From: wuyuanfr <18270469842@163.com>
Date: Mon, 29 Jun 2026 17:48:53 +0800
Subject: [PATCH 31/31] fix: preserve embedding legacy inference, extend VLM
 only

- Embedding/multi_embedding keeps old logic (only dashscope recognized)
- VLM uses extended inference so tokenpony URLs can trigger catalog healthcheck
- Prevents unintended behavior change for embedding models
---
 backend/services/model_health_service.py | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/backend/services/model_health_service.py b/backend/services/model_health_service.py
index 35fff2a23..5d472799d 100644
--- a/backend/services/model_health_service.py
+++ b/backend/services/model_health_service.py
@@ -38,8 +38,17 @@ def _normalize_embedding_url(base_url: str) -> str:
 def _infer_model_factory(model_type: str, base_url: str, current_factory: Optional[str] = None) -> Optional[str]:
     """Infer model_factory from base_url if not already set or is generic.
 
-    Uses the shared W11 host map so embedding and LLM/VLM inference do not drift.
+    For embedding/multi_embedding, uses legacy logic (only dashscope) to avoid
+    changing existing behavior. For other types (VLM), uses extended inference
+    so tokenpony URLs can be recognized for catalog healthcheck.
     """
+    # Embedding types: keep legacy behavior (only dashscope)
+    if model_type in EMBEDDING_TYPES:
+        if "dashscope" in base_url.lower():
+            return DASHSCOPE_MODEL_FACTORY
+        return current_factory
+
+    # Non-embedding types (VLM, etc): use extended inference
     try:
         from services.model_capacity_suggestion_service import pick_provider_from_base_url