From f4cb4e962042d7fb1706f382e0ebc85df780c81d Mon Sep 17 00:00:00 2001 From: Brendan O'Leary Date: Mon, 9 Mar 2026 16:58:09 -0400 Subject: [PATCH 1/2] Fail fast on invalid OpenRouter model names MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Validates model ID against OpenRouter's /api/v1/models endpoint before running any tasks. This prevents wasting compute on a full benchmark run that will just score 0% because the model doesn't exist. Features: - Queries OpenRouter API to check if model exists - Suggests close matches if model name looks like a typo - Lists available models from the same provider as hints - Gracefully skips validation if API key not set or API errors - Exits with error code 1 immediately if model is invalid Example output for typo: ❌ Model 'anthropic/claude-sonet-4' not found on OpenRouter. Did you mean: anthropic/claude-sonnet-4? --- scripts/benchmark.py | 9 +++++ scripts/lib_agent.py | 91 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 100 insertions(+) diff --git a/scripts/benchmark.py b/scripts/benchmark.py index 36b5e05..32380e6 100644 --- a/scripts/benchmark.py +++ b/scripts/benchmark.py @@ -27,7 +27,9 @@ cleanup_agent_sessions, ensure_agent_exists, execute_openclaw_task, + ModelValidationError, slugify_model, + validate_openrouter_model, ) from lib_grading import GradeResult, grade_task from lib_tasks import Task, TaskLoader @@ -487,6 +489,13 @@ def main(): # Use a shared workspace for the agent - we'll copy fixtures per task agent_workspace = Path(f"/tmp/pinchbench/{run_id}/agent_workspace") + # Validate model exists before wasting time on tasks + try: + validate_openrouter_model(args.model) + except ModelValidationError as exc: + logger.error("❌ %s", exc) + sys.exit(1) + ensure_agent_exists(agent_id, args.model, agent_workspace) cleanup_agent_sessions(agent_id) diff --git a/scripts/lib_agent.py b/scripts/lib_agent.py index 24e456d..b563352 100644 --- a/scripts/lib_agent.py +++ b/scripts/lib_agent.py @@ -11,11 +11,19 @@ import time from pathlib import Path from typing import Any, Dict, List +from urllib import error, request from lib_tasks import Task logger = logging.getLogger(__name__) + + +class ModelValidationError(Exception): + """Raised when a model ID is invalid or inaccessible.""" + pass + + MAX_OPENCLAW_MESSAGE_CHARS = int(os.environ.get("PINCHBENCH_MAX_MSG_CHARS", "4000")) @@ -35,6 +43,89 @@ def normalize_model_id(model_id: str) -> str: return f"openrouter/{model_id}" +def validate_openrouter_model(model_id: str, timeout_seconds: float = 10.0) -> bool: + """ + Validate that a model ID exists on OpenRouter. + + Args: + model_id: Model ID (with or without openrouter/ prefix) + timeout_seconds: HTTP request timeout + + Returns: + True if model is valid and accessible + + Raises: + ModelValidationError: If model doesn't exist or validation fails + """ + # Strip openrouter/ prefix if present + bare_model_id = model_id + if bare_model_id.startswith("openrouter/"): + bare_model_id = bare_model_id[len("openrouter/"):] + + # Skip validation for non-OpenRouter models + if "/" not in bare_model_id: + logger.info("Skipping model validation for non-OpenRouter model: %s", model_id) + return True + + api_key = os.environ.get("OPENROUTER_API_KEY") + if not api_key: + logger.warning("OPENROUTER_API_KEY not set, skipping model validation") + return True + + logger.info("🔍 Validating model: %s", bare_model_id) + + # Query OpenRouter models API + endpoint = "https://openrouter.ai/api/v1/models" + headers = { + "Authorization": f"Bearer {api_key}", + "HTTP-Referer": "https://pinchbench.com", + "X-Title": "PinchBench", + } + + req = request.Request(endpoint, headers=headers, method="GET") + try: + with request.urlopen(req, timeout=timeout_seconds) as resp: + data = json.loads(resp.read().decode("utf-8")) + except error.HTTPError as exc: + logger.warning("OpenRouter API error during validation: %s", exc) + # Don't fail on API errors - maybe rate limited or temporary issue + return True + except error.URLError as exc: + logger.warning("Network error during model validation: %s", exc) + return True + except json.JSONDecodeError as exc: + logger.warning("Failed to parse OpenRouter response: %s", exc) + return True + + models = data.get("data", []) + model_ids = {m.get("id") for m in models if isinstance(m, dict)} + + if bare_model_id in model_ids: + logger.info("✅ Model validated: %s", bare_model_id) + return True + + # Check for close matches (typos) + close_matches = [] + bare_lower = bare_model_id.lower() + for mid in model_ids: + if bare_lower in mid.lower() or mid.lower() in bare_lower: + close_matches.append(mid) + + error_msg = f"Model '{bare_model_id}' not found on OpenRouter." + if close_matches: + close_matches_str = ", ".join(sorted(close_matches)[:5]) + error_msg += f" Did you mean: {close_matches_str}?" + else: + # Try to suggest based on provider + provider = bare_model_id.split("/")[0] if "/" in bare_model_id else None + if provider: + provider_models = [m for m in model_ids if m.startswith(f"{provider}/")] + if provider_models: + error_msg += f" Available {provider} models: {', '.join(sorted(provider_models)[:5])}" + + raise ModelValidationError(error_msg) + + def _get_agent_workspace(agent_id: str) -> Path | None: """Get the workspace path for an agent from OpenClaw config.""" try: From 31a714bbec411fe7f472b007c95b23f889032e0d Mon Sep 17 00:00:00 2001 From: Brendan O'Leary Date: Tue, 10 Mar 2026 15:25:50 -0400 Subject: [PATCH 2/2] Address review feedback: optimize model validation 1. Check specific model endpoint first (/api/v1/models/{id}) for fast path - Only fetches full catalog when model not found (for suggestions) - Reduces latency in happy path 2. Filter None values from model_ids set - Prevents None from leaking into 'Did you mean' suggestions --- scripts/lib_agent.py | 41 ++++++++++++++++++++++++++++------------- 1 file changed, 28 insertions(+), 13 deletions(-) diff --git a/scripts/lib_agent.py b/scripts/lib_agent.py index b563352..ac8fba5 100644 --- a/scripts/lib_agent.py +++ b/scripts/lib_agent.py @@ -74,35 +74,50 @@ def validate_openrouter_model(model_id: str, timeout_seconds: float = 10.0) -> b logger.info("🔍 Validating model: %s", bare_model_id) - # Query OpenRouter models API - endpoint = "https://openrouter.ai/api/v1/models" headers = { "Authorization": f"Bearer {api_key}", "HTTP-Referer": "https://pinchbench.com", "X-Title": "PinchBench", } - req = request.Request(endpoint, headers=headers, method="GET") + # First, try the specific model endpoint (fast path for valid models) + encoded_model_id = bare_model_id.replace("/", "%2F") + specific_endpoint = f"https://openrouter.ai/api/v1/models/{encoded_model_id}" + req = request.Request(specific_endpoint, headers=headers, method="GET") try: with request.urlopen(req, timeout=timeout_seconds) as resp: - data = json.loads(resp.read().decode("utf-8")) + # Model exists - validation passed + logger.info("✅ Model validated: %s", bare_model_id) + return True except error.HTTPError as exc: - logger.warning("OpenRouter API error during validation: %s", exc) - # Don't fail on API errors - maybe rate limited or temporary issue - return True + if exc.code == 404: + # Model not found - fall through to fetch full catalog for suggestions + pass + else: + logger.warning("OpenRouter API error during validation: %s", exc) + return True except error.URLError as exc: logger.warning("Network error during model validation: %s", exc) return True + + # Model not found - fetch full catalog for "did you mean" suggestions + catalog_endpoint = "https://openrouter.ai/api/v1/models" + req = request.Request(catalog_endpoint, headers=headers, method="GET") + try: + with request.urlopen(req, timeout=timeout_seconds) as resp: + data = json.loads(resp.read().decode("utf-8")) + except error.HTTPError as exc: + logger.warning("OpenRouter API error fetching model catalog: %s", exc) + raise ModelValidationError(f"Model '{bare_model_id}' not found on OpenRouter.") + except error.URLError as exc: + logger.warning("Network error fetching model catalog: %s", exc) + raise ModelValidationError(f"Model '{bare_model_id}' not found on OpenRouter.") except json.JSONDecodeError as exc: logger.warning("Failed to parse OpenRouter response: %s", exc) - return True + raise ModelValidationError(f"Model '{bare_model_id}' not found on OpenRouter.") models = data.get("data", []) - model_ids = {m.get("id") for m in models if isinstance(m, dict)} - - if bare_model_id in model_ids: - logger.info("✅ Model validated: %s", bare_model_id) - return True + model_ids = {m.get("id") for m in models if isinstance(m, dict) and m.get("id")} # Check for close matches (typos) close_matches = []