Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions scraper/constants.ts
Original file line number Diff line number Diff line change
Expand Up @@ -77,10 +77,16 @@ const MODEL_REASONING_PREFIXES = {
qwen3: true,
// Nvidia
"nvidia-nemotron-nano": true,
"nvidia-nemotron-3-super": true,
// Minimax AI
"minimax-m2": false,
// GLM
"glm-": true,
// Microsoft
"phi-4": false,
phi: false,
// Writer
"writer-palmyra": true,
} as const;

export function isReasoningModel(modelId: string): boolean {
Expand Down Expand Up @@ -320,6 +326,16 @@ export function isSelfHostableModel(modelId: string, provider: string): boolean
return true;
}

if (provider === "Microsoft") {
// Microsoft Phi models are open-source and self-hostable
return true;
}

if (provider === "Writer") {
// Writer Palmyra model is not self-hostable
return false;
}

throw new Error(
`Unknown self-hostable status for model ID: ${modelId} with provider: ${provider}. Please update isSelfHostableModel in scraper/constants.ts.`
);
Expand Down Expand Up @@ -368,6 +384,9 @@ const TRANSFORMERS_TOKENIZER_PATHS: Record<string, string> = {
gemma: "google/gemma-2-9b-it",
// IBM Granite
granite: "ibm-granite/granite-3.0-8b-instruct",
// Microsoft Phi
"phi-4": "microsoft/Phi-4",
phi: "microsoft/Phi-4",
};

export function getTokenizerForModel(modelId: string, provider: string): Tokenizers | undefined {
Expand Down
10 changes: 8 additions & 2 deletions scraper/runner.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,19 @@ import scrapeDeepseekData from "./scrapers/deepseek";
import scrapeForexData from "./scrapers/forex";
import scrapeAwsImageData from "./scrapers/aws-image";
import scrapeOpenaiImageData from "./scrapers/openai-image";
import scrapeGcpImageData from "./scrapers/gcp-image";
import scrapeAzureData from "./scrapers/azure";
import { writeFileSync } from "fs";
import { dirname, join } from "path";
import { fileURLToPath } from "url";

async function main() {
const selfPath = dirname(fileURLToPath(import.meta.url));
const dataJsonPath = join(selfPath, "..", "public", "data.json");

// Invoke all scrapers to build the data format
const fmt: DataFormat = {
scrapedAt: new Date().toISOString(),
vendors: {},
models: {},
imageModels: {},
Expand All @@ -31,14 +37,14 @@ async function main() {
scrapeOpenaiData(fmt),
scrapeDeepseekData(fmt),
scrapeForexData(),
scrapeAzureData(fmt),
// Image generation scrapers
scrapeAwsImageData(fmt),
scrapeOpenaiImageData(fmt),
scrapeGcpImageData(fmt),
]);

// Output the data as JSON
const selfPath = dirname(fileURLToPath(import.meta.url));
const dataJsonPath = join(selfPath, "..", "public", "data.json");
writeFileSync(dataJsonPath, JSON.stringify(fmt, null, 4), "utf-8");
console.log(`Wrote data to ${dataJsonPath}`);
// Note: tiktoken BPE files are fetched at build time via src/pages/tiktoken/[encoding].tiktoken.ts
Expand Down
2 changes: 1 addition & 1 deletion scraper/scrapers/aws-image.ts
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ export default async function scrapeAwsImageData(fmt: DataFormat) {
}

for (const [_modelId, model] of Object.entries(AWS_IMAGE_MODELS)) {
await addImageModelToFormat(fmt, "aws", "us-east-1", model);
await addImageModelToFormat(fmt, "aws", "us-east-1", model, "hardcoded", "2026-03-20");
}

console.log(
Expand Down
2 changes: 2 additions & 0 deletions scraper/scrapers/aws.ts
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ const PROVIDERS = {
"Minimax AI": "CN",
"Moonshot AI": "CN",
"Z AI": "CN",
Writer: "US",
} as const;

function providerToCountryCode(provider: string): string {
Expand Down Expand Up @@ -91,6 +92,7 @@ async function processPriceDimension(
latencyMs: perfMetrics?.latencyMs ?? 0,
tokensPerSecond: perfMetrics?.tokensPerSecond ?? 0,
lowCapacity: false,
priceSource: "scraped",
};
modelEntry.vendors.push(vendor);
}
Expand Down
150 changes: 150 additions & 0 deletions scraper/scrapers/azure.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
import type { DataFormat } from "@/src/dataFormat";
import { addModelToFormat, type ModelDefinition } from "../shared";
import {
getModelsForProvider,
getCachedInputCost,
cleanModelName,
type LiteLLMModel,
} from "../litellm";

// Reuse the same display name overrides as the OpenAI scraper for GPT/o-series models
const OPENAI_MODEL_NAME_OVERRIDES: Record<string, string> = {
"gpt-4o": "GPT-4o",
"gpt-4o-mini": "GPT-4o Mini",
"gpt-4-turbo": "GPT-4 Turbo",
"gpt-4": "GPT-4",
"gpt-3.5-turbo": "GPT-3.5 Turbo",
o1: "GPT-o1",
"o1-mini": "GPT-o1 Mini",
o3: "GPT-o3",
"o3-mini": "GPT-o3 Mini",
"o4-mini": "GPT-o4 Mini",
"gpt-4.1": "GPT-4.1",
"gpt-4.1-mini": "GPT-4.1 Mini",
"gpt-4.1-nano": "GPT-4.1 Nano",
"gpt-5": "GPT-5",
};

const MICROSOFT_MODEL_NAME_OVERRIDES: Record<string, string> = {
"phi-4": "Phi-4",
"phi-4-mini": "Phi-4 Mini",
"phi-4-mini-instruct": "Phi-4 Mini",
};

// Prefixes of models to include from Azure
const INCLUDED_MODEL_PREFIXES = [
"gpt-4o",
"gpt-4-turbo",
"gpt-4.1",
"gpt-4",
"gpt-5",
"o1",
"o3",
"o4",
"phi-4",
];

function shouldIncludeModel(baseId: string): boolean {
if (baseId.includes("audio") || baseId.includes("realtime")) return false;
if (baseId.includes("embedding")) return false;
if (baseId.includes("preview")) return false;
return INCLUDED_MODEL_PREFIXES.some((prefix) => baseId.startsWith(prefix));
}

function getProvider(baseId: string): "OpenAI" | "Microsoft" {
if (baseId.startsWith("phi-")) return "Microsoft";
return "OpenAI";
}

function getModelName(baseId: string): string | null {
const provider = getProvider(baseId);

if (provider === "Microsoft") {
// Exact match first
if (MICROSOFT_MODEL_NAME_OVERRIDES[baseId]) {
return MICROSOFT_MODEL_NAME_OVERRIDES[baseId];
}
// Prefix match
for (const [key, name] of Object.entries(MICROSOFT_MODEL_NAME_OVERRIDES)) {
if (baseId.startsWith(key)) return name;
}
return cleanModelName(baseId, "azure");
}

// OpenAI-hosted models on Azure
if (OPENAI_MODEL_NAME_OVERRIDES[baseId]) {
return OPENAI_MODEL_NAME_OVERRIDES[baseId];
}
for (const [key, name] of Object.entries(OPENAI_MODEL_NAME_OVERRIDES)) {
if (baseId.startsWith(key)) return name;
}
return cleanModelName(baseId, "azure");
}

function litellmModelToDefinition(modelId: string, model: LiteLLMModel): ModelDefinition | null {
if (!model.input_cost_per_token || !model.output_cost_per_token) {
return null;
}

// Strip the azure/ prefix to get the base model ID
const baseId = modelId.replace(/^azure\//, "");

const name = getModelName(baseId);
if (!name) return null;

return {
name,
provider: getProvider(baseId),
pricing: {
input: model.input_cost_per_token,
output: model.output_cost_per_token,
cachedInput: getCachedInputCost(model),
},
maxInputTokens: model.max_input_tokens,
maxOutputTokens: model.max_output_tokens ?? model.max_tokens,
};
}

export default async function scrapeAzureData(fmt: DataFormat) {
const models = await getModelsForProvider("azure", "chat");
const addedModels = new Set<string>();

for (const [modelId, model] of models) {
const baseId = modelId.replace(/^azure\//, "");
if (!shouldIncludeModel(baseId)) continue;

const definition = litellmModelToDefinition(modelId, model);
if (!definition) continue;

// Deduplicate by model name
if (addedModels.has(definition.name)) continue;
addedModels.add(definition.name);

await addModelToFormat(fmt, "azure", "eastus", definition);
}

fmt.vendors["azure"] = {
cleanName: "Azure AI",
learnMoreUrl: "https://azure.microsoft.com/en-us/products/ai-services/openai-service",
euOrUKRegions: ["westeurope", "northeurope", "uksouth", "swedencentral"],
usaRegions: ["eastus", "eastus2", "westus", "westus3", "northcentralus", "southcentralus"],
regionCleanNames: {
"": {
eastus: "East US (Virginia)",
eastus2: "East US 2 (Virginia)",
westus: "West US (California)",
westus3: "West US 3 (Arizona)",
northcentralus: "North Central US (Illinois)",
southcentralus: "South Central US (Texas)",
westeurope: "West Europe (Netherlands)",
northeurope: "North Europe (Ireland)",
uksouth: "UK South (London)",
swedencentral: "Sweden Central",
australiaeast: "Australia East (New South Wales)",
japaneast: "Japan East (Tokyo)",
},
},
};

console.log(`Finished scraping Azure AI data (${addedModels.size} models from LiteLLM)`);
}
43 changes: 43 additions & 0 deletions scraper/scrapers/gcp-image.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
import type { DataFormat } from "@/src/dataFormat";
import { addImageModelToFormat, type ImageModelDefinition } from "../shared";

// GCP Vertex AI image generation model pricing
// Prices are per image
// Source: https://cloud.google.com/vertex-ai/generative-ai/pricing
const GCP_IMAGE_MODELS: Record<string, ImageModelDefinition> = {
"imagen-3.0-generate": {
name: "Imagen 3",
provider: "Google",
supportedResolutions: ["1024x1024"],
supportsNegativePrompts: false,
pricing: [{ resolution: "1024x1024", pricePerImage: 0.04 }],
},
"imagen-3.0-fast-generate": {
name: "Imagen 3 Fast",
provider: "Google",
supportedResolutions: ["1024x1024"],
supportsNegativePrompts: false,
pricing: [{ resolution: "1024x1024", pricePerImage: 0.02 }],
},
"imagegeneration@006": {
name: "Imagen 2",
provider: "Google",
supportedResolutions: ["1024x1024"],
supportsNegativePrompts: true,
pricing: [{ resolution: "1024x1024", pricePerImage: 0.02 }],
},
};

export default async function scrapeGcpImageData(fmt: DataFormat) {
if (!fmt.imageModels) {
fmt.imageModels = {};
}

for (const [_modelId, model] of Object.entries(GCP_IMAGE_MODELS)) {
await addImageModelToFormat(fmt, "gcp", "us-central1", model, "hardcoded", "2026-03-20");
}

console.log(
`Finished scraping GCP Vertex AI image generation data (${Object.keys(GCP_IMAGE_MODELS).length} models)`
);
}
2 changes: 1 addition & 1 deletion scraper/scrapers/openai-image.ts
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ export default async function scrapeOpenaiImageData(fmt: DataFormat) {
}

for (const [_modelId, model] of Object.entries(OPENAI_IMAGE_MODELS)) {
await addImageModelToFormat(fmt, "openai", "global", model);
await addImageModelToFormat(fmt, "openai", "global", model, "hardcoded", "2026-03-20");
}

console.log(
Expand Down
12 changes: 10 additions & 2 deletions scraper/shared.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ export const PROVIDERS: Record<string, string> = {
Google: "US",
IBM: "US",
Alibaba: "CN",
Microsoft: "US",
};

export function slugify(name: string, provider: string): string {
Expand Down Expand Up @@ -66,7 +67,8 @@ export async function addModelToFormat(
fmt: DataFormat,
vendorRef: string,
regionCode: string,
model: ModelDefinition
model: ModelDefinition,
source: "scraped" | "hardcoded" = "scraped"
): Promise<void> {
const slugifiedModel = slugify(model.name, model.provider);
let modelEntry = fmt.models[slugifiedModel];
Expand Down Expand Up @@ -120,6 +122,7 @@ export async function addModelToFormat(
latencyMs: perfMetrics?.latencyMs ?? 0,
tokensPerSecond: perfMetrics?.tokensPerSecond ?? 0,
lowCapacity: false,
priceSource: source,
};
modelEntry.vendors.push(vendor);
}
Expand All @@ -142,6 +145,7 @@ export const IMAGE_PROVIDERS: Record<string, string> = {
"Stability AI": "GB",
Amazon: "US",
OpenAI: "US",
Google: "US",
};

export type ImageModelPricing = {
Expand Down Expand Up @@ -171,7 +175,9 @@ export async function addImageModelToFormat(
fmt: DataFormat,
vendorRef: string,
regionCode: string,
model: ImageModelDefinition
model: ImageModelDefinition,
source: "scraped" | "hardcoded" = "scraped",
priceVerifiedAt?: string // ISO date string, should be set when source === "hardcoded"
): Promise<void> {
const slugifiedModel = slugify(model.name, model.provider);
let modelEntry = fmt.imageModels[slugifiedModel];
Expand All @@ -196,6 +202,8 @@ export async function addImageModelToFormat(
regionPricing: {},
latencyMs: 0,
lowCapacity: false,
priceSource: source,
...(priceVerifiedAt ? { priceVerifiedAt } : {}),
};
modelEntry.vendors.push(vendor);
}
Expand Down
3 changes: 2 additions & 1 deletion src/components/ModelTypeTabs.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@ function usePath() {
window.addEventListener("popstate", onStoreChange);
return () => window.removeEventListener("popstate", onStoreChange);
},
() => window.location.pathname
() => window.location.pathname,
() => ""
);
}

Expand Down
Loading
Loading