diff --git a/backend/services/model_health_service.py b/backend/services/model_health_service.py index 35fff2a23..5d472799d 100644 --- a/backend/services/model_health_service.py +++ b/backend/services/model_health_service.py @@ -38,8 +38,17 @@ def _normalize_embedding_url(base_url: str) -> str: def _infer_model_factory(model_type: str, base_url: str, current_factory: Optional[str] = None) -> Optional[str]: """Infer model_factory from base_url if not already set or is generic. - Uses the shared W11 host map so embedding and LLM/VLM inference do not drift. + For embedding/multi_embedding, uses legacy logic (only dashscope) to avoid + changing existing behavior. For other types (VLM), uses extended inference + so tokenpony URLs can be recognized for catalog healthcheck. """ + # Embedding types: keep legacy behavior (only dashscope) + if model_type in EMBEDDING_TYPES: + if "dashscope" in base_url.lower(): + return DASHSCOPE_MODEL_FACTORY + return current_factory + + # Non-embedding types (VLM, etc): use extended inference try: from services.model_capacity_suggestion_service import pick_provider_from_base_url diff --git a/scripts/generate_backfill_sql.py b/deploy/sql/migrations/generate_backfill_sql.py similarity index 97% rename from scripts/generate_backfill_sql.py rename to deploy/sql/migrations/generate_backfill_sql.py index 586c27e3d..239a5c93a 100644 --- a/scripts/generate_backfill_sql.py +++ b/deploy/sql/migrations/generate_backfill_sql.py @@ -2,7 +2,7 @@ """Generate idempotent backfill SQL from capability_profiles.CATALOG. Usage: - python scripts/generate_backfill_sql.py > docker/sql/v2.2.x_MMDD_backfill_from_catalog.sql + python deploy/sql/migrations/generate_backfill_sql.py > deploy/sql/migrations/v2.2.x_MMDD_backfill_from_catalog.sql Run whenever capability_profiles.py changes, then commit the generated SQL. """ @@ -12,7 +12,7 @@ from datetime import date from collections import namedtuple -_project_root = os.path.join(os.path.dirname(__file__), "..") +_project_root = os.path.join(os.path.dirname(__file__), "..", "..", "..") sys.path.insert(0, os.path.join(_project_root, "backend")) # Stub SDK types to avoid pulling in the full nexent SDK dependency chain @@ -80,7 +80,7 @@ def main() -> None: today = date.today().strftime("%Y-%m-%d") lines: list[str] = [] - lines.append(f"-- Generated by scripts/generate_backfill_sql.py on {today}") + lines.append(f"-- Generated by deploy/sql/migrations/generate_backfill_sql.py on {today}") lines.append(f"-- Catalog revision: {CATALOG_REVISION}") lines.append(f"-- Catalog entries: {len(CATALOG)}") lines.append("--") diff --git a/frontend/app/[locale]/models/components/model/ModelAddDialog.tsx b/frontend/app/[locale]/models/components/model/ModelAddDialog.tsx index 094ed4391..1dc6baafd 100644 --- a/frontend/app/[locale]/models/components/model/ModelAddDialog.tsx +++ b/frontend/app/[locale]/models/components/model/ModelAddDialog.tsx @@ -1171,7 +1171,6 @@ export const ModelAddDialog = ({ apiKey: form.apiKey.trim() === "" ? "sk-no-api-key" : form.apiKey, maxTokens: maxTokensValue, displayName: form.displayName || form.name, - modelFactory: form.provider, ...(supportsCapacityFields ? buildCapacityPayload(form) : {}), ...acceptSignalKwargs, }; @@ -1214,7 +1213,6 @@ export const ModelAddDialog = ({ apiKey: form.apiKey.trim() === "" ? "sk-no-api-key" : form.apiKey, maxTokens: maxTokensValue, displayName: form.displayName || form.name, - modelFactory: form.provider, ...(supportsCapacityFields ? buildCapacityPayload(form) : {}), ...acceptSignalKwargs, };