Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
68498c6
fix(model_management): preserve connectivity success when capacity su…
wuyuanfr Jun 25, 2026
f555fda
refactor(w11): collapse Add/Edit capacity-suggestion controls
wuyuanfr Jun 25, 2026
f0e82d3
feat(w11): backend SLO instrumentation + cross-tenant capacity-covera…
wuyuanfr Jun 26, 2026
e442a55
feat(w11): V1.5 bare-capacity tag + preset selector + permission helper
wuyuanfr Jun 26, 2026
e0ef307
fix(w11): unify ModelEditDialog state-per-model via key remount + aut…
wuyuanfr Jun 26, 2026
00c8c62
feat(w11): V1.5 bare-capacity surfaces + dual legacy hint + accept-si…
wuyuanfr Jun 26, 2026
775b0c8
fix(w11): compact bare-capacity UI — icon+tooltip in model selector, …
wuyuanfr Jun 26, 2026
d6165cb
fix(w11): close remaining spec gaps — bare-capacity badge in model li…
wuyuanfr Jun 26, 2026
f65f859
fix(w11): remove obsolete deprecatedMaxTokens warning from ModelEditD…
wuyuanfr Jun 26, 2026
04b4bc0
fix(w11): backfill bare LLM/VLM rows with safe capacity defaults
wuyuanfr Jun 27, 2026
3d13339
fix(i18n): rename 'catalog suggestion' to 'capacity suggestion' in co…
wuyuanfr Jun 27, 2026
f785f82
feat(w11): expand capability catalog to 66 entries with SiliconFlow m…
wuyuanfr Jun 27, 2026
8b0497c
feat(w11): auto-backfill capacity from catalog on startup
wuyuanfr Jun 27, 2026
2a9cbcb
fix(w11): plug 3 production bugs in V1.5 capacity-suggestion accept-s…
wuyuanfr Jun 27, 2026
e8aacc2
refactor(w11): replace startup backfill with SQL generator
wuyuanfr Jun 27, 2026
a9550fa
fix(w11): add reserve <= max_output safety guard to backfill SQL
wuyuanfr Jun 27, 2026
9d1547e
fix(w11): use capacity_source='unknown' for safe-default backfill rows
wuyuanfr Jun 27, 2026
0b94407
feat(w11): add capacity_source='default' for system-default backfill …
wuyuanfr Jun 27, 2026
31fc590
refactor(w11): remove Phase 3 max_tokens reconcile from backfill SQL
wuyuanfr Jun 27, 2026
4baf92b
fix(sdk): remove reverse max_tokens backfill from ModelConfig validator
wuyuanfr Jun 27, 2026
46f59b7
Merge remote-tracking branch 'upstream/develop' into feature/w11-capa…
wuyuanfr Jun 27, 2026
b7d6b2f
chore(sql): remove superseded v2.2.0_0617 capacity data fix migration
wuyuanfr Jun 27, 2026
5e08815
fix(w11): Phase 1b now upgrades capacity_source 'default' to 'profile'
wuyuanfr Jun 27, 2026
fafdcfd
refactor(w11): use PL/pgSQL constants in generated backfill SQL
wuyuanfr Jun 27, 2026
9769a65
fix(test): use model_factory instead of provider in accept signal test
wuyuanfr Jun 27, 2026
212a0dd
fix(catalog): use 'silicon' provider for SiliconFlow-hosted DeepSeek …
wuyuanfr Jun 27, 2026
1c3f21a
fix(w11): use rsplit in _split_repo_name to match backend split logic
wuyuanfr Jun 27, 2026
290506d
fix(sdk): prevent legacy max_tokens semantic drift in ModelConfig val…
wuyuanfr Jun 27, 2026
d854b13
fix(model): stop sending hidden form.provider as modelFactory on sing…
wuyuanfr Jun 29, 2026
8d22427
refactor: move generate_backfill_sql.py to deploy/sql/migrations/
wuyuanfr Jun 29, 2026
c7b869d
Merge remote-tracking branch 'upstream/develop' into feature/w11-capa…
wuyuanfr Jun 29, 2026
bdf1c0e
chore: remove duplicate scripts/generate_backfill_sql.py from upstrea…
wuyuanfr Jun 29, 2026
9358212
fix: preserve embedding legacy inference, extend VLM only
wuyuanfr Jun 29, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 10 additions & 1 deletion backend/services/model_health_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,17 @@ def _normalize_embedding_url(base_url: str) -> str:
def _infer_model_factory(model_type: str, base_url: str, current_factory: Optional[str] = None) -> Optional[str]:
"""Infer model_factory from base_url if not already set or is generic.

Uses the shared W11 host map so embedding and LLM/VLM inference do not drift.
For embedding/multi_embedding, uses legacy logic (only dashscope) to avoid
changing existing behavior. For other types (VLM), uses extended inference
so tokenpony URLs can be recognized for catalog healthcheck.
"""
# Embedding types: keep legacy behavior (only dashscope)
if model_type in EMBEDDING_TYPES:
if "dashscope" in base_url.lower():
return DASHSCOPE_MODEL_FACTORY
return current_factory

# Non-embedding types (VLM, etc): use extended inference
try:
from services.model_capacity_suggestion_service import pick_provider_from_base_url

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"""Generate idempotent backfill SQL from capability_profiles.CATALOG.

Usage:
python scripts/generate_backfill_sql.py > docker/sql/v2.2.x_MMDD_backfill_from_catalog.sql
python deploy/sql/migrations/generate_backfill_sql.py > deploy/sql/migrations/v2.2.x_MMDD_backfill_from_catalog.sql

Run whenever capability_profiles.py changes, then commit the generated SQL.
"""
Expand All @@ -12,7 +12,7 @@
from datetime import date
from collections import namedtuple

_project_root = os.path.join(os.path.dirname(__file__), "..")
_project_root = os.path.join(os.path.dirname(__file__), "..", "..", "..")
sys.path.insert(0, os.path.join(_project_root, "backend"))

# Stub SDK types to avoid pulling in the full nexent SDK dependency chain
Expand Down Expand Up @@ -80,7 +80,7 @@
today = date.today().strftime("%Y-%m-%d")
lines: list[str] = []

lines.append(f"-- Generated by scripts/generate_backfill_sql.py on {today}")
lines.append(f"-- Generated by deploy/sql/migrations/generate_backfill_sql.py on {today}")
lines.append(f"-- Catalog revision: {CATALOG_REVISION}")
lines.append(f"-- Catalog entries: {len(CATALOG)}")
lines.append("--")
Expand Down Expand Up @@ -120,15 +120,15 @@
# --------------------------------------------------------------
# Phase 1a: catalog match + bare -> fill capacity + tag
# --------------------------------------------------------------
lines.append("-- ============================================================")

Check failure on line 123 in deploy/sql/migrations/generate_backfill_sql.py

View check run for this annotation

SonarQubeCloud / SonarCloud Code Analysis

Define a constant instead of duplicating this literal "-- ============================================================" 8 times.

See more on https://sonarcloud.io/project/issues?id=ModelEngine-Group_nexent&issues=AZ8SvkBdLB8vlARaFgA1&open=AZ8SvkBdLB8vlARaFgA1&pullRequest=3329
lines.append("-- Phase 1a: Backfill bare rows that match approved catalog entries")
lines.append("-- ============================================================")
lines.append("")
lines.append("DO $$")

Check failure on line 127 in deploy/sql/migrations/generate_backfill_sql.py

View check run for this annotation

SonarQubeCloud / SonarCloud Code Analysis

Define a constant instead of duplicating this literal "DO $$" 4 times.

See more on https://sonarcloud.io/project/issues?id=ModelEngine-Group_nexent&issues=AZ8SvkBdLB8vlARaFgAy&open=AZ8SvkBdLB8vlARaFgAy&pullRequest=3329
lines.append("DECLARE")
lines.append(" v_updated INTEGER := 0;")

Check failure on line 129 in deploy/sql/migrations/generate_backfill_sql.py

View check run for this annotation

SonarQubeCloud / SonarCloud Code Analysis

Define a constant instead of duplicating this literal " v_updated INTEGER := 0;" 4 times.

See more on https://sonarcloud.io/project/issues?id=ModelEngine-Group_nexent&issues=AZ8SvkBdLB8vlARaFgAz&open=AZ8SvkBdLB8vlARaFgAz&pullRequest=3329
lines.append(" v_total INTEGER := 0;")
lines.append(" c_active_flag CONSTANT TEXT := 'N';")

Check failure on line 131 in deploy/sql/migrations/generate_backfill_sql.py

View check run for this annotation

SonarQubeCloud / SonarCloud Code Analysis

Define a constant instead of duplicating this literal " c_active_flag CONSTANT TEXT := 'N';" 4 times.

See more on https://sonarcloud.io/project/issues?id=ModelEngine-Group_nexent&issues=AZ8SvkBdLB8vlARaFgAx&open=AZ8SvkBdLB8vlARaFgAx&pullRequest=3329
lines.append(" c_source_profile CONSTANT TEXT := 'profile';")
lines.append("BEGIN")

Expand All @@ -144,26 +144,26 @@
repo_match = _sql_repo_match(repo)
escaped_name = _sql_str(name)

lines.append(f" UPDATE nexent.model_record_t")

Check warning on line 147 in deploy/sql/migrations/generate_backfill_sql.py

View check run for this annotation

SonarQubeCloud / SonarCloud Code Analysis

Add replacement fields or use a normal string instead of an f-string.

See more on https://sonarcloud.io/project/issues?id=ModelEngine-Group_nexent&issues=AZ8SvkBdLB8vlARaFgA2&open=AZ8SvkBdLB8vlARaFgA2&pullRequest=3329
lines.append(f" SET context_window_tokens = COALESCE(context_window_tokens,")

Check warning on line 148 in deploy/sql/migrations/generate_backfill_sql.py

View check run for this annotation

SonarQubeCloud / SonarCloud Code Analysis

Add replacement fields or use a normal string instead of an f-string.

See more on https://sonarcloud.io/project/issues?id=ModelEngine-Group_nexent&issues=AZ8SvkBdLB8vlARaFgA3&open=AZ8SvkBdLB8vlARaFgA3&pullRequest=3329
lines.append(f" GREATEST({_sql_int(ctx)}, COALESCE(max_output_tokens, 0) + 1)),")
lines.append(f" max_output_tokens = COALESCE(max_output_tokens,")

Check warning on line 150 in deploy/sql/migrations/generate_backfill_sql.py

View check run for this annotation

SonarQubeCloud / SonarCloud Code Analysis

Add replacement fields or use a normal string instead of an f-string.

See more on https://sonarcloud.io/project/issues?id=ModelEngine-Group_nexent&issues=AZ8SvkBdLB8vlARaFgA4&open=AZ8SvkBdLB8vlARaFgA4&pullRequest=3329
lines.append(f" LEAST({_sql_int(mout)}, COALESCE(context_window_tokens, {_sql_int(ctx)}) - 1)),")
lines.append(f" default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,")

Check warning on line 152 in deploy/sql/migrations/generate_backfill_sql.py

View check run for this annotation

SonarQubeCloud / SonarCloud Code Analysis

Add replacement fields or use a normal string instead of an f-string.

See more on https://sonarcloud.io/project/issues?id=ModelEngine-Group_nexent&issues=AZ8SvkBdLB8vlARaFgA5&open=AZ8SvkBdLB8vlARaFgA5&pullRequest=3329
lines.append(f" LEAST({_sql_int(reserve)}, COALESCE(max_output_tokens, {_sql_int(mout)}))),")
lines.append(f" capacity_source = COALESCE(capacity_source, c_source_profile),")

Check warning on line 154 in deploy/sql/migrations/generate_backfill_sql.py

View check run for this annotation

SonarQubeCloud / SonarCloud Code Analysis

Add replacement fields or use a normal string instead of an f-string.

See more on https://sonarcloud.io/project/issues?id=ModelEngine-Group_nexent&issues=AZ8SvkBdLB8vlARaFgA6&open=AZ8SvkBdLB8vlARaFgA6&pullRequest=3329
lines.append(f" capability_profile_version = COALESCE(capability_profile_version, '{version}')")
lines.append(f" WHERE LOWER(model_factory) = '{_sql_str(provider.lower())}'")
lines.append(f" AND {repo_match}")
lines.append(f" AND model_name = '{escaped_name}'")
lines.append(f" AND delete_flag = c_active_flag")

Check warning on line 159 in deploy/sql/migrations/generate_backfill_sql.py

View check run for this annotation

SonarQubeCloud / SonarCloud Code Analysis

Add replacement fields or use a normal string instead of an f-string.

See more on https://sonarcloud.io/project/issues?id=ModelEngine-Group_nexent&issues=AZ8SvkBdLB8vlARaFgA7&open=AZ8SvkBdLB8vlARaFgA7&pullRequest=3329
lines.append(f" AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);")

Check warning on line 160 in deploy/sql/migrations/generate_backfill_sql.py

View check run for this annotation

SonarQubeCloud / SonarCloud Code Analysis

Add replacement fields or use a normal string instead of an f-string.

See more on https://sonarcloud.io/project/issues?id=ModelEngine-Group_nexent&issues=AZ8SvkBdLB8vlARaFgA8&open=AZ8SvkBdLB8vlARaFgA8&pullRequest=3329
lines.append(f" GET DIAGNOSTICS v_updated = ROW_COUNT;")

Check warning on line 161 in deploy/sql/migrations/generate_backfill_sql.py

View check run for this annotation

SonarQubeCloud / SonarCloud Code Analysis

Add replacement fields or use a normal string instead of an f-string.

See more on https://sonarcloud.io/project/issues?id=ModelEngine-Group_nexent&issues=AZ8SvkBdLB8vlARaFgA9&open=AZ8SvkBdLB8vlARaFgA9&pullRequest=3329
lines.append(f" v_total := v_total + v_updated;")

Check warning on line 162 in deploy/sql/migrations/generate_backfill_sql.py

View check run for this annotation

SonarQubeCloud / SonarCloud Code Analysis

Add replacement fields or use a normal string instead of an f-string.

See more on https://sonarcloud.io/project/issues?id=ModelEngine-Group_nexent&issues=AZ8SvkBdLB8vlARaFgA-&open=AZ8SvkBdLB8vlARaFgA-&pullRequest=3329
lines.append("")

lines.append(" RAISE NOTICE 'Phase 1a catalog backfill (bare): % row(s) updated', v_total;")
lines.append("END $$;")

Check failure on line 166 in deploy/sql/migrations/generate_backfill_sql.py

View check run for this annotation

SonarQubeCloud / SonarCloud Code Analysis

Define a constant instead of duplicating this literal "END $$;" 4 times.

See more on https://sonarcloud.io/project/issues?id=ModelEngine-Group_nexent&issues=AZ8SvkBdLB8vlARaFgA0&open=AZ8SvkBdLB8vlARaFgA0&pullRequest=3329
lines.append("")

# --------------------------------------------------------------
Expand Down Expand Up @@ -198,18 +198,18 @@
repo_match = _sql_repo_match(repo)
escaped_name = _sql_str(name)

lines.append(f" UPDATE nexent.model_record_t")

Check warning on line 201 in deploy/sql/migrations/generate_backfill_sql.py

View check run for this annotation

SonarQubeCloud / SonarCloud Code Analysis

Add replacement fields or use a normal string instead of an f-string.

See more on https://sonarcloud.io/project/issues?id=ModelEngine-Group_nexent&issues=AZ8SvkBdLB8vlARaFgA_&open=AZ8SvkBdLB8vlARaFgA_&pullRequest=3329
lines.append(f" SET capability_profile_version = '{version}',")
lines.append(f" capacity_source = CASE WHEN capacity_source = c_source_default THEN c_source_profile ELSE capacity_source END")

Check warning on line 203 in deploy/sql/migrations/generate_backfill_sql.py

View check run for this annotation

SonarQubeCloud / SonarCloud Code Analysis

Add replacement fields or use a normal string instead of an f-string.

See more on https://sonarcloud.io/project/issues?id=ModelEngine-Group_nexent&issues=AZ8SvkBdLB8vlARaFgBA&open=AZ8SvkBdLB8vlARaFgBA&pullRequest=3329
lines.append(f" WHERE LOWER(model_factory) = '{_sql_str(provider.lower())}'")
lines.append(f" AND {repo_match}")
lines.append(f" AND model_name = '{escaped_name}'")
lines.append(f" AND delete_flag = c_active_flag")

Check warning on line 207 in deploy/sql/migrations/generate_backfill_sql.py

View check run for this annotation

SonarQubeCloud / SonarCloud Code Analysis

Add replacement fields or use a normal string instead of an f-string.

See more on https://sonarcloud.io/project/issues?id=ModelEngine-Group_nexent&issues=AZ8SvkBdLB8vlARaFgBB&open=AZ8SvkBdLB8vlARaFgBB&pullRequest=3329
lines.append(f" AND context_window_tokens = {_sql_int(ctx)}")
lines.append(f" AND max_output_tokens = {_sql_int(mout)}")
lines.append(f" AND (capability_profile_version IS NULL OR (capability_profile_version = '{version}' AND capacity_source = c_source_default));")
lines.append(f" GET DIAGNOSTICS v_updated = ROW_COUNT;")

Check warning on line 211 in deploy/sql/migrations/generate_backfill_sql.py

View check run for this annotation

SonarQubeCloud / SonarCloud Code Analysis

Add replacement fields or use a normal string instead of an f-string.

See more on https://sonarcloud.io/project/issues?id=ModelEngine-Group_nexent&issues=AZ8SvkBdLB8vlARaFgBC&open=AZ8SvkBdLB8vlARaFgBC&pullRequest=3329
lines.append(f" v_total := v_total + v_updated;")

Check warning on line 212 in deploy/sql/migrations/generate_backfill_sql.py

View check run for this annotation

SonarQubeCloud / SonarCloud Code Analysis

Add replacement fields or use a normal string instead of an f-string.

See more on https://sonarcloud.io/project/issues?id=ModelEngine-Group_nexent&issues=AZ8SvkBdLB8vlARaFgBD&open=AZ8SvkBdLB8vlARaFgBD&pullRequest=3329
lines.append("")

lines.append(" RAISE NOTICE 'Phase 1b catalog tag (matching filled): % row(s) updated', v_total;")
Expand All @@ -228,13 +228,13 @@
lines.append(" c_source_default CONSTANT TEXT := 'default';")
lines.append("BEGIN")
lines.append(" UPDATE nexent.model_record_t")
lines.append(f" SET context_window_tokens = COALESCE(context_window_tokens,")

Check warning on line 231 in deploy/sql/migrations/generate_backfill_sql.py

View check run for this annotation

SonarQubeCloud / SonarCloud Code Analysis

Add replacement fields or use a normal string instead of an f-string.

See more on https://sonarcloud.io/project/issues?id=ModelEngine-Group_nexent&issues=AZ8SvkBdLB8vlARaFgBE&open=AZ8SvkBdLB8vlARaFgBE&pullRequest=3329
lines.append(f" GREATEST({_sql_int(DEFAULT_CONTEXT_WINDOW)}, COALESCE(max_output_tokens, 0) + 1)),")
lines.append(f" max_output_tokens = COALESCE(max_output_tokens,")

Check warning on line 233 in deploy/sql/migrations/generate_backfill_sql.py

View check run for this annotation

SonarQubeCloud / SonarCloud Code Analysis

Add replacement fields or use a normal string instead of an f-string.

See more on https://sonarcloud.io/project/issues?id=ModelEngine-Group_nexent&issues=AZ8SvkBdLB8vlARaFgBF&open=AZ8SvkBdLB8vlARaFgBF&pullRequest=3329
lines.append(f" LEAST({_sql_int(DEFAULT_MAX_OUTPUT)}, COALESCE(context_window_tokens, {_sql_int(DEFAULT_CONTEXT_WINDOW)}) - 1)),")
lines.append(f" default_output_reserve_tokens = COALESCE(default_output_reserve_tokens,")

Check warning on line 235 in deploy/sql/migrations/generate_backfill_sql.py

View check run for this annotation

SonarQubeCloud / SonarCloud Code Analysis

Add replacement fields or use a normal string instead of an f-string.

See more on https://sonarcloud.io/project/issues?id=ModelEngine-Group_nexent&issues=AZ8SvkBdLB8vlARaFgBG&open=AZ8SvkBdLB8vlARaFgBG&pullRequest=3329
lines.append(f" LEAST({_sql_int(DEFAULT_RESERVE)}, COALESCE(max_output_tokens, {_sql_int(DEFAULT_MAX_OUTPUT)}))),")
lines.append(f" capacity_source = COALESCE(capacity_source, c_source_default)")

Check warning on line 237 in deploy/sql/migrations/generate_backfill_sql.py

View check run for this annotation

SonarQubeCloud / SonarCloud Code Analysis

Add replacement fields or use a normal string instead of an f-string.

See more on https://sonarcloud.io/project/issues?id=ModelEngine-Group_nexent&issues=AZ8SvkBdLB8vlARaFgBH&open=AZ8SvkBdLB8vlARaFgBH&pullRequest=3329
lines.append(" WHERE delete_flag = c_active_flag")
lines.append(" AND COALESCE(model_type, 'llm') IN ('llm', 'vlm')")
lines.append(" AND (context_window_tokens IS NULL OR max_output_tokens IS NULL);")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1171,7 +1171,6 @@ export const ModelAddDialog = ({
apiKey: form.apiKey.trim() === "" ? "sk-no-api-key" : form.apiKey,
maxTokens: maxTokensValue,
displayName: form.displayName || form.name,
modelFactory: form.provider,
...(supportsCapacityFields ? buildCapacityPayload(form) : {}),
...acceptSignalKwargs,
};
Expand Down Expand Up @@ -1214,7 +1213,6 @@ export const ModelAddDialog = ({
apiKey: form.apiKey.trim() === "" ? "sk-no-api-key" : form.apiKey,
maxTokens: maxTokensValue,
displayName: form.displayName || form.name,
modelFactory: form.provider,
...(supportsCapacityFields ? buildCapacityPayload(form) : {}),
...acceptSignalKwargs,
};
Expand Down
Loading