vantage-sh · IAmJSD · Mar 27, 2026 · Mar 20, 2026 · Mar 20, 2026 · Mar 20, 2026
diff --git a/scraper/constants.ts b/scraper/constants.ts
@@ -77,10 +77,16 @@ const MODEL_REASONING_PREFIXES = {
     qwen3: true,
     // Nvidia
     "nvidia-nemotron-nano": true,
+    "nvidia-nemotron-3-super": true,
     // Minimax AI
     "minimax-m2": false,
     // GLM
     "glm-": true,
+    // Microsoft
+    "phi-4": false,
+    phi: false,
+    // Writer
+    "writer-palmyra": true,
 } as const;
 
 export function isReasoningModel(modelId: string): boolean {
@@ -320,6 +326,16 @@ export function isSelfHostableModel(modelId: string, provider: string): boolean
         return true;
     }
 
+    if (provider === "Microsoft") {
+        // Microsoft Phi models are open-source and self-hostable
+        return true;
+    }
+
+    if (provider === "Writer") {
+        // Writer Palmyra model is not self-hostable
+        return false;
+    }
+
     throw new Error(
         `Unknown self-hostable status for model ID: ${modelId} with provider: ${provider}. Please update isSelfHostableModel in scraper/constants.ts.`
     );
@@ -368,6 +384,9 @@ const TRANSFORMERS_TOKENIZER_PATHS: Record<string, string> = {
     gemma: "google/gemma-2-9b-it",
     // IBM Granite
     granite: "ibm-granite/granite-3.0-8b-instruct",
+    // Microsoft Phi
+    "phi-4": "microsoft/Phi-4",
+    phi: "microsoft/Phi-4",
 };
 
 export function getTokenizerForModel(modelId: string, provider: string): Tokenizers | undefined {

diff --git a/scraper/runner.ts b/scraper/runner.ts
@@ -10,13 +10,19 @@ import scrapeDeepseekData from "./scrapers/deepseek";
 import scrapeForexData from "./scrapers/forex";
 import scrapeAwsImageData from "./scrapers/aws-image";
 import scrapeOpenaiImageData from "./scrapers/openai-image";
+import scrapeGcpImageData from "./scrapers/gcp-image";
+import scrapeAzureData from "./scrapers/azure";
 import { writeFileSync } from "fs";
 import { dirname, join } from "path";
 import { fileURLToPath } from "url";
 
 async function main() {
+    const selfPath = dirname(fileURLToPath(import.meta.url));
+    const dataJsonPath = join(selfPath, "..", "public", "data.json");
+
     // Invoke all scrapers to build the data format
     const fmt: DataFormat = {
+        scrapedAt: new Date().toISOString(),
         vendors: {},
         models: {},
         imageModels: {},
@@ -31,14 +37,14 @@ async function main() {
         scrapeOpenaiData(fmt),
         scrapeDeepseekData(fmt),
         scrapeForexData(),
+        scrapeAzureData(fmt),
         // Image generation scrapers
         scrapeAwsImageData(fmt),
         scrapeOpenaiImageData(fmt),
+        scrapeGcpImageData(fmt),
     ]);
 
     // Output the data as JSON
-    const selfPath = dirname(fileURLToPath(import.meta.url));
-    const dataJsonPath = join(selfPath, "..", "public", "data.json");
     writeFileSync(dataJsonPath, JSON.stringify(fmt, null, 4), "utf-8");
     console.log(`Wrote data to ${dataJsonPath}`);
     // Note: tiktoken BPE files are fetched at build time via src/pages/tiktoken/[encoding].tiktoken.ts

diff --git a/scraper/scrapers/aws-image.ts b/scraper/scrapers/aws-image.ts
@@ -65,7 +65,7 @@ export default async function scrapeAwsImageData(fmt: DataFormat) {
     }
 
     for (const [_modelId, model] of Object.entries(AWS_IMAGE_MODELS)) {
-        await addImageModelToFormat(fmt, "aws", "us-east-1", model);
+        await addImageModelToFormat(fmt, "aws", "us-east-1", model, "hardcoded", "2026-03-20");
     }
 
     console.log(

diff --git a/scraper/scrapers/aws.ts b/scraper/scrapers/aws.ts
@@ -43,6 +43,7 @@ const PROVIDERS = {
     "Minimax AI": "CN",
     "Moonshot AI": "CN",
     "Z AI": "CN",
+    Writer: "US",
 } as const;
 
 function providerToCountryCode(provider: string): string {
@@ -91,6 +92,7 @@ async function processPriceDimension(
             latencyMs: perfMetrics?.latencyMs ?? 0,
             tokensPerSecond: perfMetrics?.tokensPerSecond ?? 0,
             lowCapacity: false,
+            priceSource: "scraped",
         };
         modelEntry.vendors.push(vendor);
     }

diff --git a/scraper/scrapers/azure.ts b/scraper/scrapers/azure.ts
@@ -0,0 +1,150 @@
+import type { DataFormat } from "@/src/dataFormat";
+import { addModelToFormat, type ModelDefinition } from "../shared";
+import {
+    getModelsForProvider,
+    getCachedInputCost,
+    cleanModelName,
+    type LiteLLMModel,
+} from "../litellm";
+
+// Reuse the same display name overrides as the OpenAI scraper for GPT/o-series models
+const OPENAI_MODEL_NAME_OVERRIDES: Record<string, string> = {
+    "gpt-4o": "GPT-4o",
+    "gpt-4o-mini": "GPT-4o Mini",
+    "gpt-4-turbo": "GPT-4 Turbo",
+    "gpt-4": "GPT-4",
+    "gpt-3.5-turbo": "GPT-3.5 Turbo",
+    o1: "GPT-o1",
+    "o1-mini": "GPT-o1 Mini",
+    o3: "GPT-o3",
+    "o3-mini": "GPT-o3 Mini",
+    "o4-mini": "GPT-o4 Mini",
+    "gpt-4.1": "GPT-4.1",
+    "gpt-4.1-mini": "GPT-4.1 Mini",
+    "gpt-4.1-nano": "GPT-4.1 Nano",
+    "gpt-5": "GPT-5",
+};
+
+const MICROSOFT_MODEL_NAME_OVERRIDES: Record<string, string> = {
+    "phi-4": "Phi-4",
+    "phi-4-mini": "Phi-4 Mini",
+    "phi-4-mini-instruct": "Phi-4 Mini",
+};
+
+// Prefixes of models to include from Azure
+const INCLUDED_MODEL_PREFIXES = [
+    "gpt-4o",
+    "gpt-4-turbo",
+    "gpt-4.1",
+    "gpt-4",
+    "gpt-5",
+    "o1",
+    "o3",
+    "o4",
+    "phi-4",
+];
+
+function shouldIncludeModel(baseId: string): boolean {
+    if (baseId.includes("audio") || baseId.includes("realtime")) return false;
+    if (baseId.includes("embedding")) return false;
+    if (baseId.includes("preview")) return false;
+    return INCLUDED_MODEL_PREFIXES.some((prefix) => baseId.startsWith(prefix));
+}
+
+function getProvider(baseId: string): "OpenAI" | "Microsoft" {
+    if (baseId.startsWith("phi-")) return "Microsoft";
+    return "OpenAI";
+}
+
+function getModelName(baseId: string): string | null {
+    const provider = getProvider(baseId);
+
+    if (provider === "Microsoft") {
+        // Exact match first
+        if (MICROSOFT_MODEL_NAME_OVERRIDES[baseId]) {
+            return MICROSOFT_MODEL_NAME_OVERRIDES[baseId];
+        }
+        // Prefix match
+        for (const [key, name] of Object.entries(MICROSOFT_MODEL_NAME_OVERRIDES)) {
+            if (baseId.startsWith(key)) return name;
+        }
+        return cleanModelName(baseId, "azure");
+    }
+
+    // OpenAI-hosted models on Azure
+    if (OPENAI_MODEL_NAME_OVERRIDES[baseId]) {
+        return OPENAI_MODEL_NAME_OVERRIDES[baseId];
+    }
+    for (const [key, name] of Object.entries(OPENAI_MODEL_NAME_OVERRIDES)) {
+        if (baseId.startsWith(key)) return name;
+    }
+    return cleanModelName(baseId, "azure");
+}
+
+function litellmModelToDefinition(modelId: string, model: LiteLLMModel): ModelDefinition | null {
+    if (!model.input_cost_per_token || !model.output_cost_per_token) {
+        return null;
+    }
+
+    // Strip the azure/ prefix to get the base model ID
+    const baseId = modelId.replace(/^azure\//, "");
+
+    const name = getModelName(baseId);
+    if (!name) return null;
+
+    return {
+        name,
+        provider: getProvider(baseId),
+        pricing: {
+            input: model.input_cost_per_token,
+            output: model.output_cost_per_token,
+            cachedInput: getCachedInputCost(model),
+        },
+        maxInputTokens: model.max_input_tokens,
+        maxOutputTokens: model.max_output_tokens ?? model.max_tokens,
+    };
+}
+
+export default async function scrapeAzureData(fmt: DataFormat) {
+    const models = await getModelsForProvider("azure", "chat");
+    const addedModels = new Set<string>();
+
+    for (const [modelId, model] of models) {
+        const baseId = modelId.replace(/^azure\//, "");
+        if (!shouldIncludeModel(baseId)) continue;
+
+        const definition = litellmModelToDefinition(modelId, model);
+        if (!definition) continue;
+
+        // Deduplicate by model name
+        if (addedModels.has(definition.name)) continue;
+        addedModels.add(definition.name);
+
+        await addModelToFormat(fmt, "azure", "eastus", definition);
+    }
+
+    fmt.vendors["azure"] = {
+        cleanName: "Azure AI",
+        learnMoreUrl: "https://azure.microsoft.com/en-us/products/ai-services/openai-service",
+        euOrUKRegions: ["westeurope", "northeurope", "uksouth", "swedencentral"],
+        usaRegions: ["eastus", "eastus2", "westus", "westus3", "northcentralus", "southcentralus"],
+        regionCleanNames: {
+            "": {
+                eastus: "East US (Virginia)",
+                eastus2: "East US 2 (Virginia)",
+                westus: "West US (California)",
+                westus3: "West US 3 (Arizona)",
+                northcentralus: "North Central US (Illinois)",
+                southcentralus: "South Central US (Texas)",
+                westeurope: "West Europe (Netherlands)",
+                northeurope: "North Europe (Ireland)",
+                uksouth: "UK South (London)",
+                swedencentral: "Sweden Central",
+                australiaeast: "Australia East (New South Wales)",
+                japaneast: "Japan East (Tokyo)",
+            },
+        },
+    };
+
+    console.log(`Finished scraping Azure AI data (${addedModels.size} models from LiteLLM)`);
+}
diff --git a/scraper/scrapers/gcp-image.ts b/scraper/scrapers/gcp-image.ts
@@ -0,0 +1,43 @@
+import type { DataFormat } from "@/src/dataFormat";
+import { addImageModelToFormat, type ImageModelDefinition } from "../shared";
+
+// GCP Vertex AI image generation model pricing
+// Prices are per image
+// Source: https://cloud.google.com/vertex-ai/generative-ai/pricing
+const GCP_IMAGE_MODELS: Record<string, ImageModelDefinition> = {
+    "imagen-3.0-generate": {
+        name: "Imagen 3",
+        provider: "Google",
+        supportedResolutions: ["1024x1024"],
+        supportsNegativePrompts: false,
+        pricing: [{ resolution: "1024x1024", pricePerImage: 0.04 }],
+    },
+    "imagen-3.0-fast-generate": {
+        name: "Imagen 3 Fast",
+        provider: "Google",
+        supportedResolutions: ["1024x1024"],
+        supportsNegativePrompts: false,
+        pricing: [{ resolution: "1024x1024", pricePerImage: 0.02 }],
+    },
+    "imagegeneration@006": {
+        name: "Imagen 2",
+        provider: "Google",
+        supportedResolutions: ["1024x1024"],
+        supportsNegativePrompts: true,
+        pricing: [{ resolution: "1024x1024", pricePerImage: 0.02 }],
+    },
+};
+
+export default async function scrapeGcpImageData(fmt: DataFormat) {
+    if (!fmt.imageModels) {
+        fmt.imageModels = {};
+    }
+
+    for (const [_modelId, model] of Object.entries(GCP_IMAGE_MODELS)) {
+        await addImageModelToFormat(fmt, "gcp", "us-central1", model, "hardcoded", "2026-03-20");
+    }
+
+    console.log(
+        `Finished scraping GCP Vertex AI image generation data (${Object.keys(GCP_IMAGE_MODELS).length} models)`
+    );
+}
diff --git a/scraper/scrapers/openai-image.ts b/scraper/scrapers/openai-image.ts
@@ -58,7 +58,7 @@ export default async function scrapeOpenaiImageData(fmt: DataFormat) {
     }
 
     for (const [_modelId, model] of Object.entries(OPENAI_IMAGE_MODELS)) {
-        await addImageModelToFormat(fmt, "openai", "global", model);
+        await addImageModelToFormat(fmt, "openai", "global", model, "hardcoded", "2026-03-20");
     }
 
     console.log(

diff --git a/scraper/shared.ts b/scraper/shared.ts
@@ -22,6 +22,7 @@ export const PROVIDERS: Record<string, string> = {
     Google: "US",
     IBM: "US",
     Alibaba: "CN",
+    Microsoft: "US",
 };
 
 export function slugify(name: string, provider: string): string {
@@ -66,7 +67,8 @@ export async function addModelToFormat(
     fmt: DataFormat,
     vendorRef: string,
     regionCode: string,
-    model: ModelDefinition
+    model: ModelDefinition,
+    source: "scraped" | "hardcoded" = "scraped"
 ): Promise<void> {
     const slugifiedModel = slugify(model.name, model.provider);
     let modelEntry = fmt.models[slugifiedModel];
@@ -120,6 +122,7 @@ export async function addModelToFormat(
             latencyMs: perfMetrics?.latencyMs ?? 0,
             tokensPerSecond: perfMetrics?.tokensPerSecond ?? 0,
             lowCapacity: false,
+            priceSource: source,
         };
         modelEntry.vendors.push(vendor);
     }
@@ -142,6 +145,7 @@ export const IMAGE_PROVIDERS: Record<string, string> = {
     "Stability AI": "GB",
     Amazon: "US",
     OpenAI: "US",
+    Google: "US",
 };
 
 export type ImageModelPricing = {
@@ -171,7 +175,9 @@ export async function addImageModelToFormat(
     fmt: DataFormat,
     vendorRef: string,
     regionCode: string,
-    model: ImageModelDefinition
+    model: ImageModelDefinition,
+    source: "scraped" | "hardcoded" = "scraped",
+    priceVerifiedAt?: string // ISO date string, should be set when source === "hardcoded"
 ): Promise<void> {
     const slugifiedModel = slugify(model.name, model.provider);
     let modelEntry = fmt.imageModels[slugifiedModel];
@@ -196,6 +202,8 @@ export async function addImageModelToFormat(
             regionPricing: {},
             latencyMs: 0,
             lowCapacity: false,
+            priceSource: source,
+            ...(priceVerifiedAt ? { priceVerifiedAt } : {}),
         };
         modelEntry.vendors.push(vendor);
     }

diff --git a/src/components/ModelTypeTabs.tsx b/src/components/ModelTypeTabs.tsx
@@ -12,7 +12,8 @@ function usePath() {
             window.addEventListener("popstate", onStoreChange);
             return () => window.removeEventListener("popstate", onStoreChange);
         },
-        () => window.location.pathname
+        () => window.location.pathname,
+        () => ""
     );
 }