diff --git a/docs.json b/docs.json
index 5f004d14..4b5ae716 100644
--- a/docs.json
+++ b/docs.json
@@ -19,6 +19,7 @@
"groups": [
{
"group": "Introduction",
+ "icon": "rocket",
"pages": [
"introduction/what-is-portkey",
"introduction/make-your-first-request",
@@ -27,6 +28,7 @@
},
{
"group": "Product",
+ "icon": "cube",
"pages": [
{
"group": "Observability",
@@ -55,7 +57,15 @@
"group": "AI Gateway",
"pages": [
"product/ai-gateway",
- "product/ai-gateway/universal-api",
+ {
+ "group": "Universal API",
+ "pages": [
+ "product/ai-gateway/universal-api",
+ "product/ai-gateway/chat-completions",
+ "product/ai-gateway/messages-api",
+ "product/ai-gateway/responses-api"
+ ]
+ },
"product/ai-gateway/configs",
"product/ai-gateway/remote-mcp",
"product/ai-gateway/conditional-routing",
@@ -246,6 +256,7 @@
},
{
"group": "Self-Hosting",
+ "icon": "server",
"pages": [
{
"group": "Hybrid Deployments",
@@ -267,6 +278,7 @@
},
{
"group": "Support",
+ "icon": "life-ring",
"pages": [
"support/upgrade-to-model-catalog",
"README",
diff --git a/product/ai-gateway.mdx b/product/ai-gateway.mdx
index 36299f99..a75f09fe 100644
--- a/product/ai-gateway.mdx
+++ b/product/ai-gateway.mdx
@@ -1,5 +1,6 @@
---
title: AI Gateway
+gsidebarTitle: "Overview"
description: The world's fastest AI Gateway with advanced routing & integrated Guardrails.
---
diff --git a/product/ai-gateway/chat-completions.mdx b/product/ai-gateway/chat-completions.mdx
new file mode 100644
index 00000000..f7759b43
--- /dev/null
+++ b/product/ai-gateway/chat-completions.mdx
@@ -0,0 +1,539 @@
+---
+title: "Chat Completions"
+description: "Use OpenAI-compatible Chat Completions with any LLM provider through Portkey's AI Gateway."
+---
+
+
+Available on all Portkey [plans](https://portkey.ai/pricing).
+
+
+The [Chat Completions](https://platform.openai.com/docs/api-reference/chat) API is the most widely adopted format for LLM interaction. Portkey makes it work with **every provider** — send the same `POST /v1/chat/completions` request to OpenAI, Anthropic, Gemini, Bedrock, or any of the 3000+ supported models.
+
+## Quick Start
+
+
+```python Python
+from portkey_ai import Portkey
+
+portkey = Portkey(api_key="PORTKEY_API_KEY")
+
+response = portkey.chat.completions.create(
+ model="@openai-provider/gpt-4o",
+ messages=[{"role": "user", "content": "Explain quantum computing in simple terms"}]
+)
+
+print(response.choices[0].message.content)
+```
+
+```javascript JavaScript
+import Portkey from 'portkey-ai';
+
+const portkey = new Portkey({ apiKey: "PORTKEY_API_KEY" });
+
+const response = await portkey.chat.completions.create({
+ model: "@openai-provider/gpt-4o",
+ messages: [{ role: "user", content: "Explain quantum computing in simple terms" }]
+});
+
+console.log(response.choices[0].message.content);
+```
+
+```sh cURL
+curl https://api.portkey.ai/v1/chat/completions \
+ -H "Content-Type: application/json" \
+ -H "x-portkey-api-key: $PORTKEY_API_KEY" \
+ -d '{
+ "model": "@openai-provider/gpt-4o",
+ "messages": [{"role": "user", "content": "Explain quantum computing in simple terms"}]
+ }'
+```
+
+
+
+Switch `model` to use any provider — `@anthropic-provider/claude-sonnet-4-5-20250514`, `@google-provider/gemini-2.0-flash`, or any of the 3000+ supported models.
+
+
+### Using the OpenAI SDK
+
+The Portkey SDK is a superset of the OpenAI SDK, so all Chat Completions methods work identically. The OpenAI SDK also works directly with Portkey's base URL:
+
+
+```python OpenAI Python SDK
+from openai import OpenAI
+
+client = OpenAI(
+ api_key="PORTKEY_API_KEY",
+ base_url="https://api.portkey.ai/v1"
+)
+
+response = client.chat.completions.create(
+ model="@openai-provider/gpt-4o",
+ messages=[{"role": "user", "content": "Explain quantum computing in simple terms"}]
+)
+
+print(response.choices[0].message.content)
+```
+
+```javascript OpenAI Node SDK
+import OpenAI from 'openai';
+
+const client = new OpenAI({
+ apiKey: "PORTKEY_API_KEY",
+ baseURL: "https://api.portkey.ai/v1"
+});
+
+const response = await client.chat.completions.create({
+ model: "@openai-provider/gpt-4o",
+ messages: [{ role: "user", content: "Explain quantum computing in simple terms" }]
+});
+
+console.log(response.choices[0].message.content);
+```
+
+
+## System Messages
+
+Set a system prompt using the `system` role in the messages array:
+
+
+```python Python
+response = portkey.chat.completions.create(
+ model="@openai-provider/gpt-4o",
+ messages=[
+ {"role": "system", "content": "You are a pirate. Always respond in pirate speak."},
+ {"role": "user", "content": "Say hello."}
+ ]
+)
+```
+
+```javascript JavaScript
+const response = await portkey.chat.completions.create({
+ model: "@openai-provider/gpt-4o",
+ messages: [
+ { role: "system", content: "You are a pirate. Always respond in pirate speak." },
+ { role: "user", content: "Say hello." }
+ ]
+});
+```
+
+```sh cURL
+curl https://api.portkey.ai/v1/chat/completions \
+ -H "Content-Type: application/json" \
+ -H "x-portkey-api-key: $PORTKEY_API_KEY" \
+ -d '{
+ "model": "@openai-provider/gpt-4o",
+ "messages": [
+ {"role": "system", "content": "You are a pirate. Always respond in pirate speak."},
+ {"role": "user", "content": "Say hello."}
+ ]
+ }'
+```
+
+
+## Streaming
+
+Stream responses token-by-token with `stream: true`.
+
+
+```python Python
+from portkey_ai import Portkey
+
+portkey = Portkey(api_key="PORTKEY_API_KEY")
+
+stream = portkey.chat.completions.create(
+ model="@openai-provider/gpt-4o",
+ messages=[{"role": "user", "content": "Write a haiku about AI"}],
+ stream=True
+)
+
+for chunk in stream:
+ if chunk.choices[0].delta.content:
+ print(chunk.choices[0].delta.content, end="", flush=True)
+```
+
+```javascript JavaScript
+import Portkey from 'portkey-ai';
+
+const portkey = new Portkey({ apiKey: "PORTKEY_API_KEY" });
+
+const stream = await portkey.chat.completions.create({
+ model: "@openai-provider/gpt-4o",
+ messages: [{ role: "user", content: "Write a haiku about AI" }],
+ stream: true
+});
+
+for await (const chunk of stream) {
+ const content = chunk.choices[0]?.delta?.content;
+ if (content) process.stdout.write(content);
+}
+```
+
+```sh cURL
+curl https://api.portkey.ai/v1/chat/completions \
+ -H "Content-Type: application/json" \
+ -H "x-portkey-api-key: $PORTKEY_API_KEY" \
+ -d '{
+ "model": "@openai-provider/gpt-4o",
+ "messages": [{"role": "user", "content": "Write a haiku about AI"}],
+ "stream": true
+ }'
+```
+
+
+## Function Calling
+
+Define tools with the `tools` parameter. Works across all providers that support function calling.
+
+
+```python Python
+from portkey_ai import Portkey
+
+portkey = Portkey(api_key="PORTKEY_API_KEY")
+
+response = portkey.chat.completions.create(
+ model="@openai-provider/gpt-4o",
+ messages=[{"role": "user", "content": "What's the weather in San Francisco?"}],
+ tools=[{
+ "type": "function",
+ "function": {
+ "name": "get_weather",
+ "description": "Get current weather for a location",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "location": {"type": "string", "description": "City name"}
+ },
+ "required": ["location"]
+ }
+ }
+ }]
+)
+
+tool_call = response.choices[0].message.tool_calls[0]
+print(f"Function: {tool_call.function.name}")
+print(f"Arguments: {tool_call.function.arguments}")
+```
+
+```javascript JavaScript
+import Portkey from 'portkey-ai';
+
+const portkey = new Portkey({ apiKey: "PORTKEY_API_KEY" });
+
+const response = await portkey.chat.completions.create({
+ model: "@openai-provider/gpt-4o",
+ messages: [{ role: "user", content: "What's the weather in San Francisco?" }],
+ tools: [{
+ type: "function",
+ function: {
+ name: "get_weather",
+ description: "Get current weather for a location",
+ parameters: {
+ type: "object",
+ properties: {
+ location: { type: "string", description: "City name" }
+ },
+ required: ["location"]
+ }
+ }
+ }]
+});
+
+const toolCall = response.choices[0].message.tool_calls[0];
+console.log(`Function: ${toolCall.function.name}`);
+console.log(`Arguments: ${toolCall.function.arguments}`);
+```
+
+```sh cURL
+curl https://api.portkey.ai/v1/chat/completions \
+ -H "Content-Type: application/json" \
+ -H "x-portkey-api-key: $PORTKEY_API_KEY" \
+ -d '{
+ "model": "@openai-provider/gpt-4o",
+ "messages": [{"role": "user", "content": "What'\''s the weather in San Francisco?"}],
+ "tools": [{
+ "type": "function",
+ "function": {
+ "name": "get_weather",
+ "description": "Get current weather for a location",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "location": {"type": "string", "description": "City name"}
+ },
+ "required": ["location"]
+ }
+ }
+ }]
+ }'
+```
+
+
+### Function Call Results
+
+Pass tool results back to continue the conversation:
+
+
+```python Python
+response = portkey.chat.completions.create(
+ model="@openai-provider/gpt-4o",
+ messages=[
+ {"role": "user", "content": "What's the weather in Paris?"},
+ {"role": "assistant", "tool_calls": [{"id": "call_123", "type": "function", "function": {"name": "get_weather", "arguments": '{"location": "Paris"}'}}]},
+ {"role": "tool", "tool_call_id": "call_123", "content": '{"temp": "22°C", "condition": "sunny"}'}
+ ]
+)
+
+print(response.choices[0].message.content)
+```
+
+```javascript JavaScript
+const response = await portkey.chat.completions.create({
+ model: "@openai-provider/gpt-4o",
+ messages: [
+ { role: "user", content: "What's the weather in Paris?" },
+ { role: "assistant", tool_calls: [{ id: "call_123", type: "function", function: { name: "get_weather", arguments: '{"location": "Paris"}' } }] },
+ { role: "tool", tool_call_id: "call_123", content: '{"temp": "22°C", "condition": "sunny"}' }
+ ]
+});
+
+console.log(response.choices[0].message.content);
+```
+
+
+## Vision
+
+Send images in the `content` array using the `image_url` type. Works with all vision-capable models.
+
+
+```python Python
+response = portkey.chat.completions.create(
+ model="@openai-provider/gpt-4o",
+ messages=[{
+ "role": "user",
+ "content": [
+ {"type": "text", "text": "Describe this image"},
+ {"type": "image_url", "image_url": {"url": "https://example.com/image.jpg"}}
+ ]
+ }]
+)
+
+print(response.choices[0].message.content)
+```
+
+```javascript JavaScript
+const response = await portkey.chat.completions.create({
+ model: "@openai-provider/gpt-4o",
+ messages: [{
+ role: "user",
+ content: [
+ { type: "text", text: "Describe this image" },
+ { type: "image_url", image_url: { url: "https://example.com/image.jpg" } }
+ ]
+ }]
+});
+
+console.log(response.choices[0].message.content);
+```
+
+```sh cURL
+curl https://api.portkey.ai/v1/chat/completions \
+ -H "Content-Type: application/json" \
+ -H "x-portkey-api-key: $PORTKEY_API_KEY" \
+ -d '{
+ "model": "@openai-provider/gpt-4o",
+ "messages": [{
+ "role": "user",
+ "content": [
+ {"type": "text", "text": "Describe this image"},
+ {"type": "image_url", "image_url": {"url": "https://example.com/image.jpg"}}
+ ]
+ }]
+ }'
+```
+
+
+Base64-encoded images are also supported — pass a data URL as the `url` value:
+
+```python Python
+{"type": "image_url", "image_url": {"url": "..."}}
+```
+
+## Structured Output
+
+### JSON Schema
+
+Force the model to return structured JSON matching a specific schema:
+
+
+```python Python
+response = portkey.chat.completions.create(
+ model="@openai-provider/gpt-4o",
+ messages=[{"role": "user", "content": "Extract: John is 30 years old"}],
+ response_format={
+ "type": "json_schema",
+ "json_schema": {
+ "name": "person",
+ "schema": {
+ "type": "object",
+ "properties": {
+ "name": {"type": "string"},
+ "age": {"type": "integer"}
+ },
+ "required": ["name", "age"],
+ "additionalProperties": False
+ }
+ }
+ }
+)
+
+print(response.choices[0].message.content) # {"name": "John", "age": 30}
+```
+
+```javascript JavaScript
+const response = await portkey.chat.completions.create({
+ model: "@openai-provider/gpt-4o",
+ messages: [{ role: "user", content: "Extract: John is 30 years old" }],
+ response_format: {
+ type: "json_schema",
+ json_schema: {
+ name: "person",
+ schema: {
+ type: "object",
+ properties: {
+ name: { type: "string" },
+ age: { type: "integer" }
+ },
+ required: ["name", "age"],
+ additionalProperties: false
+ }
+ }
+ }
+});
+
+console.log(response.choices[0].message.content);
+```
+
+
+### JSON Mode
+
+For free-form JSON output without a strict schema:
+
+
+```python Python
+response = portkey.chat.completions.create(
+ model="@openai-provider/gpt-4o",
+ messages=[{"role": "user", "content": "List 3 programming languages and their main use cases as JSON"}],
+ response_format={"type": "json_object"}
+)
+```
+
+```javascript JavaScript
+const response = await portkey.chat.completions.create({
+ model: "@openai-provider/gpt-4o",
+ messages: [{ role: "user", content: "List 3 programming languages and their main use cases as JSON" }],
+ response_format: { type: "json_object" }
+});
+```
+
+
+## Multi-turn Conversations
+
+Pass the full conversation history in the `messages` array:
+
+
+```python Python
+response = portkey.chat.completions.create(
+ model="@openai-provider/gpt-4o",
+ messages=[
+ {"role": "system", "content": "You are a helpful assistant."},
+ {"role": "user", "content": "My name is Alice."},
+ {"role": "assistant", "content": "Hello Alice! How can I help you?"},
+ {"role": "user", "content": "What is my name?"}
+ ]
+)
+
+print(response.choices[0].message.content) # "Your name is Alice."
+```
+
+```javascript JavaScript
+const response = await portkey.chat.completions.create({
+ model: "@openai-provider/gpt-4o",
+ messages: [
+ { role: "system", content: "You are a helpful assistant." },
+ { role: "user", content: "My name is Alice." },
+ { role: "assistant", content: "Hello Alice! How can I help you?" },
+ { role: "user", content: "What is my name?" }
+ ]
+});
+
+console.log(response.choices[0].message.content);
+```
+
+```sh cURL
+curl https://api.portkey.ai/v1/chat/completions \
+ -H "Content-Type: application/json" \
+ -H "x-portkey-api-key: $PORTKEY_API_KEY" \
+ -d '{
+ "model": "@openai-provider/gpt-4o",
+ "messages": [
+ {"role": "system", "content": "You are a helpful assistant."},
+ {"role": "user", "content": "My name is Alice."},
+ {"role": "assistant", "content": "Hello Alice! How can I help you?"},
+ {"role": "user", "content": "What is my name?"}
+ ]
+ }'
+```
+
+
+## Using with Portkey Features
+
+Chat Completions works with all Portkey gateway features:
+
+- **[Configs](/product/ai-gateway/configs)** -- Route, load balance, and set fallbacks
+- **[Caching](/product/ai-gateway/cache-simple-and-semantic)** -- Cache responses for faster, cheaper calls
+- **[Guardrails](/product/guardrails)** -- Add input/output guardrails
+- **[Observability](/product/observability)** -- Full logging and tracing
+
+
+```python Python
+portkey = Portkey(
+ api_key="PORTKEY_API_KEY",
+ config="pp-config-xxx" # Config with fallbacks, load balancing, etc.
+)
+
+response = portkey.chat.completions.create(
+ model="gpt-4o",
+ messages=[{"role": "user", "content": "Hello!"}]
+)
+```
+
+```sh cURL
+curl https://api.portkey.ai/v1/chat/completions \
+ -H "Content-Type: application/json" \
+ -H "x-portkey-api-key: $PORTKEY_API_KEY" \
+ -H "x-portkey-config: pp-config-xxx" \
+ -d '{
+ "model": "gpt-4o",
+ "messages": [{"role": "user", "content": "Hello!"}]
+ }'
+```
+
+
+## API Reference
+
+- [Chat Completions](/api-reference/inference-api/chat) -- `POST /v1/chat/completions`
+
+
+
+ OpenAI specification
+
+
+ Portkey Chat Completions reference
+
+
+ All three API formats
+
+
+ Detailed function calling guide
+
+
diff --git a/product/ai-gateway/messages-api.mdx b/product/ai-gateway/messages-api.mdx
new file mode 100644
index 00000000..f9de6da8
--- /dev/null
+++ b/product/ai-gateway/messages-api.mdx
@@ -0,0 +1,569 @@
+---
+title: "Messages"
+description: "Use Anthropic's Messages API with any LLM provider through Portkey's AI Gateway."
+---
+
+
+Available on all Portkey [plans](https://portkey.ai/pricing).
+
+
+The [Messages API](https://docs.anthropic.com/en/api/messages) is Anthropic's native format for interacting with Claude models. Portkey extends it to work with **all providers** — use the Anthropic SDK pointed at Portkey's base URL, and switch between providers by changing the model string.
+
+## Quick Start
+
+Use the Anthropic SDK with Portkey's base URL. The `@provider-slug/model` format routes requests to the correct provider.
+
+
+```python Python
+import anthropic
+
+client = anthropic.Anthropic(
+ api_key="PORTKEY_API_KEY",
+ base_url="https://api.portkey.ai"
+)
+
+message = client.messages.create(
+ model="@anthropic-provider/claude-sonnet-4-5-20250514",
+ max_tokens=1024,
+ messages=[{"role": "user", "content": "Explain quantum computing in simple terms"}]
+)
+
+print(message.content[0].text)
+```
+
+```typescript TypeScript
+import Anthropic from '@anthropic-ai/sdk';
+
+const client = new Anthropic({
+ apiKey: "PORTKEY_API_KEY",
+ baseURL: "https://api.portkey.ai"
+});
+
+const message = await client.messages.create({
+ model: "@anthropic-provider/claude-sonnet-4-5-20250514",
+ max_tokens: 1024,
+ messages: [{ role: "user", content: "Explain quantum computing in simple terms" }]
+});
+
+console.log(message.content[0].text);
+```
+
+```sh cURL
+curl https://api.portkey.ai/v1/messages \
+ -H "Content-Type: application/json" \
+ -H "x-portkey-api-key: $PORTKEY_API_KEY" \
+ -d '{
+ "model": "@anthropic-provider/claude-sonnet-4-5-20250514",
+ "max_tokens": 1024,
+ "messages": [{"role": "user", "content": "Explain quantum computing in simple terms"}]
+ }'
+```
+
+
+
+`max_tokens` is required for the Messages API. Switch `model` to use any provider — `@openai-provider/gpt-4o`, `@google-provider/gemini-2.0-flash`, etc.
+
+
+## How It Works
+
+Portkey receives Messages API requests and translates them to each provider's native format:
+
+- **Anthropic** — requests pass through directly
+- **All other providers** — Portkey's adapter translates between Messages format and the provider's native format
+
+The response always comes back in Anthropic Messages format, regardless of which provider handles the request.
+
+## System Prompt
+
+Set a system prompt with the top-level `system` parameter (not inside `messages`):
+
+
+```python Python
+message = client.messages.create(
+ model="@anthropic-provider/claude-sonnet-4-5-20250514",
+ max_tokens=1024,
+ system="You are a pirate. Always respond in pirate speak.",
+ messages=[{"role": "user", "content": "Say hello."}]
+)
+```
+
+```typescript TypeScript
+const message = await client.messages.create({
+ model: "@anthropic-provider/claude-sonnet-4-5-20250514",
+ max_tokens: 1024,
+ system: "You are a pirate. Always respond in pirate speak.",
+ messages: [{ role: "user", content: "Say hello." }]
+});
+```
+
+```sh cURL
+curl https://api.portkey.ai/v1/messages \
+ -H "Content-Type: application/json" \
+ -H "x-portkey-api-key: $PORTKEY_API_KEY" \
+ -d '{
+ "model": "@anthropic-provider/claude-sonnet-4-5-20250514",
+ "max_tokens": 1024,
+ "system": "You are a pirate. Always respond in pirate speak.",
+ "messages": [{"role": "user", "content": "Say hello."}]
+ }'
+```
+
+
+The `system` parameter also accepts an array of content blocks for prompt caching:
+
+```python Python
+message = client.messages.create(
+ model="@anthropic-provider/claude-sonnet-4-5-20250514",
+ max_tokens=1024,
+ system=[
+ {"type": "text", "text": "You are an expert on this topic..."},
+ {"type": "text", "text": "Here is the reference material...", "cache_control": {"type": "ephemeral"}}
+ ],
+ messages=[{"role": "user", "content": "Summarize the key points"}]
+)
+```
+
+## Streaming
+
+Stream responses with `stream=True` in the SDK, or the `stream` parameter in cURL.
+
+
+```python Python
+with client.messages.stream(
+ model="@anthropic-provider/claude-sonnet-4-5-20250514",
+ max_tokens=1024,
+ messages=[{"role": "user", "content": "Write a haiku about AI"}]
+) as stream:
+ for text in stream.text_stream:
+ print(text, end="", flush=True)
+```
+
+```typescript TypeScript
+const stream = client.messages.stream({
+ model: "@anthropic-provider/claude-sonnet-4-5-20250514",
+ max_tokens: 1024,
+ messages: [{ role: "user", content: "Write a haiku about AI" }]
+});
+
+for await (const event of stream) {
+ if (event.type === "content_block_delta" && event.delta.type === "text_delta") {
+ process.stdout.write(event.delta.text);
+ }
+}
+```
+
+```sh cURL
+curl https://api.portkey.ai/v1/messages \
+ -H "Content-Type: application/json" \
+ -H "x-portkey-api-key: $PORTKEY_API_KEY" \
+ -d '{
+ "model": "@anthropic-provider/claude-sonnet-4-5-20250514",
+ "max_tokens": 1024,
+ "stream": true,
+ "messages": [{"role": "user", "content": "Write a haiku about AI"}]
+ }'
+```
+
+
+## Tool Use
+
+Define tools with `name`, `description`, and `input_schema` (note: different from Chat Completions' `parameters`):
+
+
+```python Python
+message = client.messages.create(
+ model="@anthropic-provider/claude-sonnet-4-5-20250514",
+ max_tokens=1024,
+ messages=[{"role": "user", "content": "What's the weather in San Francisco?"}],
+ tools=[{
+ "name": "get_weather",
+ "description": "Get current weather for a location",
+ "input_schema": {
+ "type": "object",
+ "properties": {
+ "location": {"type": "string", "description": "City name"}
+ },
+ "required": ["location"]
+ }
+ }]
+)
+
+for block in message.content:
+ if block.type == "tool_use":
+ print(f"Tool: {block.name}, Input: {block.input}")
+```
+
+```typescript TypeScript
+const message = await client.messages.create({
+ model: "@anthropic-provider/claude-sonnet-4-5-20250514",
+ max_tokens: 1024,
+ messages: [{ role: "user", content: "What's the weather in San Francisco?" }],
+ tools: [{
+ name: "get_weather",
+ description: "Get current weather for a location",
+ input_schema: {
+ type: "object",
+ properties: {
+ location: { type: "string", description: "City name" }
+ },
+ required: ["location"]
+ }
+ }]
+});
+
+for (const block of message.content) {
+ if (block.type === "tool_use") {
+ console.log(`Tool: ${block.name}, Input: ${JSON.stringify(block.input)}`);
+ }
+}
+```
+
+```sh cURL
+curl https://api.portkey.ai/v1/messages \
+ -H "Content-Type: application/json" \
+ -H "x-portkey-api-key: $PORTKEY_API_KEY" \
+ -d '{
+ "model": "@anthropic-provider/claude-sonnet-4-5-20250514",
+ "max_tokens": 1024,
+ "messages": [{"role": "user", "content": "What'\''s the weather in San Francisco?"}],
+ "tools": [{
+ "name": "get_weather",
+ "description": "Get current weather for a location",
+ "input_schema": {
+ "type": "object",
+ "properties": {
+ "location": {"type": "string", "description": "City name"}
+ },
+ "required": ["location"]
+ }
+ }]
+ }'
+```
+
+
+### Tool Results
+
+Pass tool results back to continue the conversation. Tool results go in a `user` message with `tool_result` content blocks:
+
+
+```python Python
+message = client.messages.create(
+ model="@anthropic-provider/claude-sonnet-4-5-20250514",
+ max_tokens=1024,
+ messages=[
+ {"role": "user", "content": "What's the weather in Paris?"},
+ {"role": "assistant", "content": [
+ {"type": "tool_use", "id": "tool_123", "name": "get_weather", "input": {"location": "Paris"}}
+ ]},
+ {"role": "user", "content": [
+ {"type": "tool_result", "tool_use_id": "tool_123", "content": '{"temp": "22°C", "condition": "sunny"}'}
+ ]}
+ ],
+ tools=[{
+ "name": "get_weather",
+ "description": "Get weather for a location",
+ "input_schema": {"type": "object", "properties": {"location": {"type": "string"}}, "required": ["location"]}
+ }]
+)
+
+print(message.content[0].text)
+```
+
+```typescript TypeScript
+const message = await client.messages.create({
+ model: "@anthropic-provider/claude-sonnet-4-5-20250514",
+ max_tokens: 1024,
+ messages: [
+ { role: "user", content: "What's the weather in Paris?" },
+ { role: "assistant", content: [
+ { type: "tool_use", id: "tool_123", name: "get_weather", input: { location: "Paris" } }
+ ]},
+ { role: "user", content: [
+ { type: "tool_result", tool_use_id: "tool_123", content: '{"temp": "22°C", "condition": "sunny"}' }
+ ]}
+ ],
+ tools: [{
+ name: "get_weather",
+ description: "Get weather for a location",
+ input_schema: { type: "object", properties: { location: { type: "string" } }, required: ["location"] }
+ }]
+});
+
+console.log(message.content[0].text);
+```
+
+
+## Vision
+
+Send images using content blocks. Supports both URLs and base64-encoded data.
+
+
+```python Python
+# From URL
+message = client.messages.create(
+ model="@anthropic-provider/claude-sonnet-4-5-20250514",
+ max_tokens=1024,
+ messages=[{
+ "role": "user",
+ "content": [
+ {"type": "image", "source": {"type": "url", "url": "https://example.com/image.jpg"}},
+ {"type": "text", "text": "Describe this image"}
+ ]
+ }]
+)
+
+print(message.content[0].text)
+```
+
+```python Base64
+import base64, httpx
+
+image_data = base64.standard_b64encode(httpx.get("https://example.com/image.jpg").content).decode("utf-8")
+
+message = client.messages.create(
+ model="@anthropic-provider/claude-sonnet-4-5-20250514",
+ max_tokens=1024,
+ messages=[{
+ "role": "user",
+ "content": [
+ {"type": "image", "source": {"type": "base64", "media_type": "image/jpeg", "data": image_data}},
+ {"type": "text", "text": "Describe this image"}
+ ]
+ }]
+)
+```
+
+```typescript TypeScript
+const message = await client.messages.create({
+ model: "@anthropic-provider/claude-sonnet-4-5-20250514",
+ max_tokens: 1024,
+ messages: [{
+ role: "user",
+ content: [
+ { type: "image", source: { type: "url", url: "https://example.com/image.jpg" } },
+ { type: "text", text: "Describe this image" }
+ ]
+ }]
+});
+
+console.log(message.content[0].text);
+```
+
+
+## Extended Thinking
+
+Enable extended thinking for complex reasoning tasks. Requires `max_tokens` greater than `budget_tokens`.
+
+
+```python Python
+message = client.messages.create(
+ model="@anthropic-provider/claude-sonnet-4-5-20250514",
+ max_tokens=16000,
+ thinking={"type": "enabled", "budget_tokens": 10000},
+ messages=[{"role": "user", "content": "Analyze the implications of quantum computing on cryptography"}]
+)
+
+for block in message.content:
+ if block.type == "thinking":
+ print(f"Thinking: {block.thinking[:200]}...")
+ elif block.type == "text":
+ print(f"Response: {block.text}")
+```
+
+```typescript TypeScript
+const message = await client.messages.create({
+ model: "@anthropic-provider/claude-sonnet-4-5-20250514",
+ max_tokens: 16000,
+ thinking: { type: "enabled", budget_tokens: 10000 },
+ messages: [{ role: "user", content: "Analyze the implications of quantum computing on cryptography" }]
+});
+
+for (const block of message.content) {
+ if (block.type === "thinking") {
+ console.log(`Thinking: ${block.thinking.slice(0, 200)}...`);
+ } else if (block.type === "text") {
+ console.log(`Response: ${block.text}`);
+ }
+}
+```
+
+
+
+Extended thinking output counts toward `max_tokens`. Set `max_tokens` high enough to accommodate both thinking and the final response.
+
+
+## Prompt Caching
+
+Use `cache_control` on system prompts, messages, and tool definitions to cache frequently-used content.
+
+
+```python System prompt caching
+message = client.messages.create(
+ model="@anthropic-provider/claude-sonnet-4-5-20250514",
+ max_tokens=1024,
+ system=[{
+ "type": "text",
+ "text": "You are an expert analyst. Here is a very long reference document...",
+ "cache_control": {"type": "ephemeral"}
+ }],
+ messages=[{"role": "user", "content": "Summarize the key points"}]
+)
+```
+
+```python Message caching
+message = client.messages.create(
+ model="@anthropic-provider/claude-sonnet-4-5-20250514",
+ max_tokens=1024,
+ messages=[{
+ "role": "user",
+ "content": [
+ {"type": "text", "text": "Here is a long document to analyze...", "cache_control": {"type": "ephemeral"}},
+ {"type": "text", "text": "What are the key themes?"}
+ ]
+ }]
+)
+```
+
+```python Tool caching
+message = client.messages.create(
+ model="@anthropic-provider/claude-sonnet-4-5-20250514",
+ max_tokens=1024,
+ messages=[{"role": "user", "content": "Search for AI news"}],
+ tools=[{
+ "name": "search",
+ "description": "Search the knowledge base",
+ "input_schema": {"type": "object", "properties": {"query": {"type": "string"}}},
+ "cache_control": {"type": "ephemeral"}
+ }]
+)
+```
+
+
+Cached content is reused across requests, reducing latency and costs. Cache usage is reflected in the response `usage` object.
+
+## Multi-turn Conversations
+
+Build conversations by passing the full message history. Messages must alternate between `user` and `assistant` roles.
+
+
+```python Python
+message = client.messages.create(
+ model="@anthropic-provider/claude-sonnet-4-5-20250514",
+ max_tokens=1024,
+ messages=[
+ {"role": "user", "content": "My name is Alice."},
+ {"role": "assistant", "content": "Hello Alice! How can I help you?"},
+ {"role": "user", "content": "What is my name?"}
+ ]
+)
+
+print(message.content[0].text) # "Your name is Alice."
+```
+
+```typescript TypeScript
+const message = await client.messages.create({
+ model: "@anthropic-provider/claude-sonnet-4-5-20250514",
+ max_tokens: 1024,
+ messages: [
+ { role: "user", content: "My name is Alice." },
+ { role: "assistant", content: "Hello Alice! How can I help you?" },
+ { role: "user", content: "What is my name?" }
+ ]
+});
+
+console.log(message.content[0].text);
+```
+
+```sh cURL
+curl https://api.portkey.ai/v1/messages \
+ -H "Content-Type: application/json" \
+ -H "x-portkey-api-key: $PORTKEY_API_KEY" \
+ -d '{
+ "model": "@anthropic-provider/claude-sonnet-4-5-20250514",
+ "max_tokens": 1024,
+ "messages": [
+ {"role": "user", "content": "My name is Alice."},
+ {"role": "assistant", "content": "Hello Alice! How can I help you?"},
+ {"role": "user", "content": "What is my name?"}
+ ]
+ }'
+```
+
+
+## Using with Portkey Features
+
+The Messages API works with all Portkey gateway features. Pass Portkey-specific headers alongside the Anthropic request:
+
+
+```python Python
+import anthropic
+
+client = anthropic.Anthropic(
+ api_key="PORTKEY_API_KEY",
+ base_url="https://api.portkey.ai",
+ default_headers={
+ "x-portkey-config": "pp-config-xxx" # Config with fallbacks, load balancing, etc.
+ }
+)
+
+message = client.messages.create(
+ model="@anthropic-provider/claude-sonnet-4-5-20250514",
+ max_tokens=1024,
+ messages=[{"role": "user", "content": "Hello!"}]
+)
+```
+
+```typescript TypeScript
+import Anthropic from '@anthropic-ai/sdk';
+
+const client = new Anthropic({
+ apiKey: "PORTKEY_API_KEY",
+ baseURL: "https://api.portkey.ai",
+ defaultHeaders: {
+ "x-portkey-config": "pp-config-xxx"
+ }
+});
+
+const message = await client.messages.create({
+ model: "@anthropic-provider/claude-sonnet-4-5-20250514",
+ max_tokens: 1024,
+ messages: [{ role: "user", content: "Hello!" }]
+});
+```
+
+```sh cURL
+curl https://api.portkey.ai/v1/messages \
+ -H "Content-Type: application/json" \
+ -H "x-portkey-api-key: $PORTKEY_API_KEY" \
+ -H "x-portkey-config: pp-config-xxx" \
+ -d '{
+ "model": "@anthropic-provider/claude-sonnet-4-5-20250514",
+ "max_tokens": 1024,
+ "messages": [{"role": "user", "content": "Hello!"}]
+ }'
+```
+
+
+- **[Configs](/product/ai-gateway/configs)** -- Route, load balance, and set fallbacks
+- **[Caching](/product/ai-gateway/cache-simple-and-semantic)** -- Cache responses for faster, cheaper calls
+- **[Guardrails](/product/guardrails)** -- Add input/output guardrails
+- **[Observability](/product/observability)** -- Full logging and tracing
+
+## API Reference
+
+- [Messages](/api-reference/inference-api/anthropic-transform) -- `POST /v1/messages`
+
+
+
+ Anthropic specification
+
+
+ Portkey Messages API reference
+
+
+ All three API formats
+
+
+ Anthropic-specific setup
+
+
diff --git a/product/ai-gateway/responses-api.mdx b/product/ai-gateway/responses-api.mdx
new file mode 100644
index 00000000..d3180841
--- /dev/null
+++ b/product/ai-gateway/responses-api.mdx
@@ -0,0 +1,851 @@
+---
+title: "Open Responses"
+description: "Use the Responses API with any LLM provider through Portkey's AI Gateway — fully compliant with the Open Responses specification."
+---
+
+
+Available on all Portkey [plans](https://portkey.ai/pricing).
+
+
+[Open Responses](https://www.openresponses.org/) is an open-source specification for multi-provider, interoperable LLM interfaces based on the OpenAI Responses API. It defines a shared schema for calling language models, streaming results, and composing agentic workflows — independent of provider.
+
+**Portkey is fully Open Responses compliant.** The Responses API works with every provider and model in Portkey's catalog — including Anthropic, Gemini, Bedrock, and 60+ other providers that don't natively support it.
+
+## Why Responses API
+
+The Responses API is becoming the standard for agentic AI:
+
+- **Agentic loops** — Models emit tool calls, receive results, and continue autonomously
+- **Items as atomic units** — Clear state machines for context management
+- **Semantic streaming** — Predictable, provider-agnostic streaming events
+- **Unified tool calling** — Consistent function calling interface across all providers
+
+Previously, the Responses API only worked with OpenAI. Portkey extends it to all providers.
+
+## Quick Start
+
+Send a Responses API request to any provider. Change the `model` string -- the API format stays the same.
+
+
+```python Python
+from portkey_ai import Portkey
+
+portkey = Portkey(api_key="PORTKEY_API_KEY")
+
+response = portkey.responses.create(
+ model="@anthropic-provider/claude-sonnet-4-5-20250514",
+ input="Explain quantum computing in simple terms"
+)
+
+print(response.output_text)
+```
+
+```javascript JavaScript
+import Portkey from 'portkey-ai';
+
+const portkey = new Portkey({ apiKey: "PORTKEY_API_KEY" });
+
+const response = await portkey.responses.create({
+ model: "@anthropic-provider/claude-sonnet-4-5-20250514",
+ input: "Explain quantum computing in simple terms"
+});
+
+console.log(response.output_text);
+```
+
+```sh cURL
+curl https://api.portkey.ai/v1/responses \
+ -H "Content-Type: application/json" \
+ -H "x-portkey-api-key: $PORTKEY_API_KEY" \
+ -d '{
+ "model": "@anthropic-provider/claude-sonnet-4-5-20250514",
+ "input": "Explain quantum computing in simple terms"
+ }'
+```
+
+
+
+The same code works for **any provider**. Switch the `@provider/model` string to use OpenAI, Gemini, Groq, Bedrock, or any of the 3000+ supported models. See [Model Catalog](/product/model-catalog) for setup.
+
+
+### Using the OpenAI SDK
+
+The Portkey SDK is a superset of the OpenAI SDK, so `portkey.responses.create()` works identically. The OpenAI SDK also works directly with Portkey's base URL:
+
+
+```python OpenAI Python SDK
+from openai import OpenAI
+
+client = OpenAI(
+ api_key="PORTKEY_API_KEY",
+ base_url="https://api.portkey.ai/v1"
+)
+
+response = client.responses.create(
+ model="@anthropic-provider/claude-sonnet-4-5-20250514",
+ input="Explain quantum computing in simple terms"
+)
+
+print(response.output_text)
+```
+
+```javascript OpenAI Node SDK
+import OpenAI from 'openai';
+
+const client = new OpenAI({
+ apiKey: "PORTKEY_API_KEY",
+ baseURL: "https://api.portkey.ai/v1"
+});
+
+const response = await client.responses.create({
+ model: "@anthropic-provider/claude-sonnet-4-5-20250514",
+ input: "Explain quantum computing in simple terms"
+});
+
+console.log(response.output_text);
+```
+
+
+## Text Generation
+
+### Instructions (System Prompt)
+
+Set a system prompt with `instructions` or pass a `system` role message in the `input` array. Both work identically.
+
+
+```python Using instructions
+response = portkey.responses.create(
+ model="@anthropic-provider/claude-sonnet-4-5-20250514",
+ instructions="You are a pirate. Always respond in pirate speak.",
+ input="Say hello."
+)
+```
+
+```python Using system message
+response = portkey.responses.create(
+ model="@anthropic-provider/claude-sonnet-4-5-20250514",
+ input=[
+ {"type": "message", "role": "system", "content": "You are a pirate. Always respond in pirate speak."},
+ {"type": "message", "role": "user", "content": "Say hello."}
+ ]
+)
+```
+
+```javascript JavaScript
+const response = await portkey.responses.create({
+ model: "@anthropic-provider/claude-sonnet-4-5-20250514",
+ instructions: "You are a pirate. Always respond in pirate speak.",
+ input: "Say hello."
+});
+```
+
+```sh cURL
+curl https://api.portkey.ai/v1/responses \
+ -H "Content-Type: application/json" \
+ -H "x-portkey-api-key: $PORTKEY_API_KEY" \
+ -d '{
+ "model": "@anthropic-provider/claude-sonnet-4-5-20250514",
+ "instructions": "You are a pirate. Always respond in pirate speak.",
+ "input": "Say hello."
+ }'
+```
+
+
+### Streaming
+
+Enable streaming with `stream: true`.
+
+
+```python Python
+from portkey_ai import Portkey
+
+portkey = Portkey(api_key="PORTKEY_API_KEY")
+
+stream = portkey.responses.create(
+ model="@anthropic-provider/claude-sonnet-4-5-20250514",
+ input="Write a haiku about AI",
+ stream=True
+)
+
+for event in stream:
+ if hasattr(event, 'delta'):
+ print(event.delta, end="", flush=True)
+```
+
+```javascript JavaScript
+import Portkey from 'portkey-ai';
+
+const portkey = new Portkey({ apiKey: "PORTKEY_API_KEY" });
+
+const stream = await portkey.responses.create({
+ model: "@anthropic-provider/claude-sonnet-4-5-20250514",
+ input: "Write a haiku about AI",
+ stream: true
+});
+
+for await (const event of stream) {
+ if (event.type === 'response.output_text.delta') {
+ process.stdout.write(event.delta);
+ }
+}
+```
+
+```sh cURL
+curl https://api.portkey.ai/v1/responses \
+ -H "Content-Type: application/json" \
+ -H "x-portkey-api-key: $PORTKEY_API_KEY" \
+ -d '{
+ "model": "@anthropic-provider/claude-sonnet-4-5-20250514",
+ "input": "Write a haiku about AI",
+ "stream": true
+ }'
+```
+
+
+Portkey's adapter produces the same SSE event stream format (`response.created`, `response.output_text.delta`, `response.completed`, etc.) regardless of the underlying provider.
+
+### Multi-turn Conversations
+
+Pass previous messages in the `input` array for multi-turn conversations. Two formats are supported:
+
+
+```python Shorthand format
+# Shorthand: just role + content
+response = portkey.responses.create(
+ model="@anthropic-provider/claude-sonnet-4-5-20250514",
+ input=[
+ {"role": "user", "content": "My name is Alice."},
+ {"role": "assistant", "content": "Hello Alice! How can I help you?"},
+ {"role": "user", "content": "What is my name?"}
+ ]
+)
+```
+
+```python Explicit format
+# Explicit: type: "message" wrapper (matches Open Responses spec)
+response = portkey.responses.create(
+ model="@anthropic-provider/claude-sonnet-4-5-20250514",
+ input=[
+ {"type": "message", "role": "user", "content": "My name is Alice."},
+ {"type": "message", "role": "assistant", "content": "Hello Alice! How can I help you?"},
+ {"type": "message", "role": "user", "content": "What is my name?"}
+ ]
+)
+```
+
+```javascript JavaScript
+const response = await portkey.responses.create({
+ model: "@anthropic-provider/claude-sonnet-4-5-20250514",
+ input: [
+ { type: "message", role: "user", content: "My name is Alice." },
+ { type: "message", role: "assistant", content: "Hello Alice! How can I help you?" },
+ { type: "message", role: "user", content: "What is my name?" }
+ ]
+});
+```
+
+
+Supported roles: `user`, `assistant`, `developer` (maps to system), `system`, and `tool`.
+
+### Generation Parameters
+
+Control generation behavior with optional parameters:
+
+
+```python Python
+response = portkey.responses.create(
+ model="@anthropic-provider/claude-sonnet-4-5-20250514",
+ input="Write a creative story",
+ max_output_tokens=2048,
+ temperature=0.8,
+ top_p=0.95,
+ parallel_tool_calls=True
+)
+```
+
+```javascript JavaScript
+const response = await portkey.responses.create({
+ model: "@anthropic-provider/claude-sonnet-4-5-20250514",
+ input: "Write a creative story",
+ max_output_tokens: 2048,
+ temperature: 0.8,
+ top_p: 0.95,
+ parallel_tool_calls: true
+});
+```
+
+```sh cURL
+curl https://api.portkey.ai/v1/responses \
+ -H "Content-Type: application/json" \
+ -H "x-portkey-api-key: $PORTKEY_API_KEY" \
+ -d '{
+ "model": "@anthropic-provider/claude-sonnet-4-5-20250514",
+ "input": "Write a creative story",
+ "max_output_tokens": 2048,
+ "temperature": 0.8,
+ "top_p": 0.95
+ }'
+```
+
+
+| Parameter | Type | Description |
+|-----------|------|-------------|
+| `max_output_tokens` | integer | Maximum tokens in the response. If not set, uses the provider's default |
+| `temperature` | float | Sampling temperature (0-2). Higher = more creative |
+| `top_p` | float | Nucleus sampling threshold (0-1) |
+| `parallel_tool_calls` | boolean | Allow multiple tool calls in a single response (default: `true`) |
+| `user` | string | End-user identifier for abuse tracking |
+| `metadata` | object | Arbitrary metadata to attach to the response |
+
+## Tool Calling
+
+Define tools with the Responses API `function` tool format. Works across all providers that support function calling.
+
+
+```python Python
+from portkey_ai import Portkey
+
+portkey = Portkey(api_key="PORTKEY_API_KEY")
+
+response = portkey.responses.create(
+ model="@anthropic-provider/claude-sonnet-4-5-20250514",
+ input="What's the weather in San Francisco?",
+ tools=[{
+ "type": "function",
+ "name": "get_weather",
+ "description": "Get current weather for a location",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "location": {"type": "string", "description": "City name"}
+ },
+ "required": ["location"]
+ }
+ }]
+)
+
+print(response.output)
+```
+
+```javascript JavaScript
+import Portkey from 'portkey-ai';
+
+const portkey = new Portkey({ apiKey: "PORTKEY_API_KEY" });
+
+const response = await portkey.responses.create({
+ model: "@anthropic-provider/claude-sonnet-4-5-20250514",
+ input: "What's the weather in San Francisco?",
+ tools: [{
+ type: "function",
+ name: "get_weather",
+ description: "Get current weather for a location",
+ parameters: {
+ type: "object",
+ properties: {
+ location: { type: "string", description: "City name" }
+ },
+ required: ["location"]
+ }
+ }]
+});
+
+console.log(response.output);
+```
+
+```sh cURL
+curl https://api.portkey.ai/v1/responses \
+ -H "Content-Type: application/json" \
+ -H "x-portkey-api-key: $PORTKEY_API_KEY" \
+ -d '{
+ "model": "@anthropic-provider/claude-sonnet-4-5-20250514",
+ "input": "What'\''s the weather in San Francisco?",
+ "tools": [{
+ "type": "function",
+ "name": "get_weather",
+ "description": "Get current weather for a location",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "location": {"type": "string", "description": "City name"}
+ },
+ "required": ["location"]
+ }
+ }]
+ }'
+```
+
+
+### Tool Choice
+
+Control tool usage with `tool_choice`:
+
+| Value | Behavior |
+|-------|----------|
+| `"auto"` | Model decides whether to call a tool (default) |
+| `"none"` | Model will not call any tools |
+| `"required"` | Model must call at least one tool |
+| `{"type": "function", "name": "get_weather"}` | Force a specific tool |
+
+```python Python
+response = portkey.responses.create(
+ model="@anthropic-provider/claude-sonnet-4-5-20250514",
+ input="What's the weather in San Francisco?",
+ tools=[...],
+ tool_choice="required"
+)
+```
+
+### Function Call Results
+
+Return function call results in a multi-turn flow with `function_call` and `function_call_output` items:
+
+
+```python Python
+response = portkey.responses.create(
+ model="@anthropic-provider/claude-sonnet-4-5-20250514",
+ input=[
+ {"role": "user", "content": "What's the weather in Paris?"},
+ {"type": "function_call", "name": "get_weather", "call_id": "call_123", "arguments": '{"location": "Paris"}'},
+ {"type": "function_call_output", "call_id": "call_123", "output": '{"temp": "22°C", "condition": "sunny"}'}
+ ],
+ tools=[{
+ "type": "function",
+ "name": "get_weather",
+ "description": "Get weather for a location",
+ "parameters": {"type": "object", "properties": {"location": {"type": "string"}}, "required": ["location"]}
+ }]
+)
+```
+
+```javascript JavaScript
+const response = await portkey.responses.create({
+ model: "@anthropic-provider/claude-sonnet-4-5-20250514",
+ input: [
+ { role: "user", content: "What's the weather in Paris?" },
+ { type: "function_call", name: "get_weather", call_id: "call_123", arguments: '{"location": "Paris"}' },
+ { type: "function_call_output", call_id: "call_123", output: '{"temp": "22°C", "condition": "sunny"}' }
+ ],
+ tools: [{
+ type: "function",
+ name: "get_weather",
+ description: "Get weather for a location",
+ parameters: { type: "object", properties: { location: { type: "string" } }, required: ["location"] }
+ }]
+});
+```
+
+
+## Input Types
+
+### Vision
+
+Send images with the `input_image` content type. The optional `detail` parameter (`"high"`, `"low"`, `"auto"`) controls processing fidelity.
+
+
+```python Python
+response = portkey.responses.create(
+ model="@anthropic-provider/claude-sonnet-4-5-20250514",
+ input=[{
+ "role": "user",
+ "content": [
+ {"type": "input_image", "image_url": "https://example.com/image.jpg", "detail": "high"},
+ {"type": "input_text", "text": "Describe this image"}
+ ]
+ }]
+)
+```
+
+```javascript JavaScript
+const response = await portkey.responses.create({
+ model: "@anthropic-provider/claude-sonnet-4-5-20250514",
+ input: [{
+ role: "user",
+ content: [
+ { type: "input_image", image_url: "https://example.com/image.jpg", detail: "high" },
+ { type: "input_text", text: "Describe this image" }
+ ]
+ }]
+});
+```
+
+
+### File Inputs
+
+Send files with the `input_file` content type. Pass file data as a base64-encoded data URL or reference an existing file by `file_id`.
+
+
+```python Python
+response = portkey.responses.create(
+ model="@anthropic-provider/claude-sonnet-4-5-20250514",
+ input=[{
+ "role": "user",
+ "content": [
+ {"type": "input_file", "filename": "report.pdf", "file_data": "data:application/pdf;base64,JVBERi0xLjQ..."},
+ {"type": "input_text", "text": "Summarize this document"}
+ ]
+ }]
+)
+```
+
+```javascript JavaScript
+const response = await portkey.responses.create({
+ model: "@anthropic-provider/claude-sonnet-4-5-20250514",
+ input: [{
+ role: "user",
+ content: [
+ { type: "input_file", filename: "report.pdf", file_data: "data:application/pdf;base64,JVBERi0xLjQ..." },
+ { type: "input_text", text: "Summarize this document" }
+ ]
+ }]
+});
+```
+
+
+## Structured Output
+
+Control output format with `text.format`. Supports `json_schema` for strict structured output and `json_object` for free-form JSON.
+
+### JSON Schema
+
+
+```python Python
+response = portkey.responses.create(
+ model="@openai-provider/gpt-4.1",
+ input="Extract the name and age from: John is 30 years old.",
+ text={
+ "format": {
+ "type": "json_schema",
+ "name": "person",
+ "schema": {
+ "type": "object",
+ "properties": {
+ "name": {"type": "string"},
+ "age": {"type": "integer"}
+ },
+ "required": ["name", "age"]
+ }
+ }
+ }
+)
+```
+
+```javascript JavaScript
+const response = await portkey.responses.create({
+ model: "@openai-provider/gpt-4.1",
+ input: "Extract the name and age from: John is 30 years old.",
+ text: {
+ format: {
+ type: "json_schema",
+ name: "person",
+ schema: {
+ type: "object",
+ properties: {
+ name: { type: "string" },
+ age: { type: "integer" }
+ },
+ required: ["name", "age"]
+ }
+ }
+ }
+});
+```
+
+
+### JSON Object
+
+For free-form JSON output without a strict schema:
+
+
+```python Python
+response = portkey.responses.create(
+ model="@anthropic-provider/claude-sonnet-4-5-20250514",
+ input="List 3 programming languages and their main use cases as JSON",
+ text={"format": {"type": "json_object"}}
+)
+```
+
+```javascript JavaScript
+const response = await portkey.responses.create({
+ model: "@anthropic-provider/claude-sonnet-4-5-20250514",
+ input: "List 3 programming languages and their main use cases as JSON",
+ text: { format: { type: "json_object" } }
+});
+```
+
+
+## Reasoning and Thinking
+
+Control reasoning with the unified `reasoning` parameter. Portkey maps this to each provider's native thinking mechanism automatically.
+
+### Reasoning Effort
+
+`reasoning.effort` controls how much the model reasons before responding. Works across OpenAI, Anthropic, and Gemini — Portkey translates to each provider's native format.
+
+
+```python Anthropic
+response = portkey.responses.create(
+ model="@anthropic-provider/claude-sonnet-4-5-20250514",
+ input="Solve this step by step: What is 127 * 43?",
+ reasoning={"effort": "high"}
+)
+```
+
+```python OpenAI
+response = portkey.responses.create(
+ model="@openai-provider/o4-mini",
+ input="Solve this step by step: What is 127 * 43?",
+ reasoning={"effort": "high"}
+)
+```
+
+```python Gemini
+response = portkey.responses.create(
+ model="@google-provider/gemini-2.5-flash",
+ input="Solve this step by step: What is 127 * 43?",
+ reasoning={"effort": "high"}
+)
+```
+
+```javascript JavaScript
+const response = await portkey.responses.create({
+ model: "@anthropic-provider/claude-sonnet-4-5-20250514",
+ input: "Solve this step by step: What is 127 * 43?",
+ reasoning: { effort: "high" }
+});
+```
+
+
+Portkey maps `reasoning.effort` to each provider's native thinking configuration:
+
+
+
+
+| Effort | Budget Tokens |
+|--------|--------------|
+| `low` | 1,024 |
+| `medium` | 8,192 |
+| `high` | 16,384 |
+| `xhigh` | 32,768 |
+
+
+
+| Effort | Budget Tokens |
+|--------|--------------|
+| `low` | 1,024 |
+| `medium` | 8,192 |
+| `high` | 24,576 |
+
+
+
+Passed through as `reasoning_effort` natively. No translation needed.
+
+
+
+
+### Anthropic Extended Thinking
+
+For fine-grained control over Anthropic's extended thinking, pass the `thinking` parameter directly with an exact `budget_tokens` value. This takes precedence over `reasoning.effort` if both are set.
+
+
+```python Python
+response = portkey.responses.create(
+ model="@anthropic-provider/claude-sonnet-4-5-20250514",
+ input="Analyze the implications of quantum computing on cryptography",
+ thinking={"type": "enabled", "budget_tokens": 10000}
+)
+```
+
+```javascript JavaScript
+const response = await portkey.responses.create({
+ model: "@anthropic-provider/claude-sonnet-4-5-20250514",
+ input: "Analyze the implications of quantum computing on cryptography",
+ thinking: { type: "enabled", budget_tokens: 10000 }
+});
+```
+
+
+## Prompt Caching
+
+Enable prompt caching with `cache_control` on content items and tools. Works with Anthropic and other compatible providers.
+
+
+```python Python
+response = portkey.responses.create(
+ model="@anthropic-provider/claude-sonnet-4-5-20250514",
+ input=[{
+ "role": "user",
+ "content": [
+ {"type": "input_text", "text": "Here is a long document to analyze...", "cache_control": {"type": "ephemeral"}},
+ {"type": "input_text", "text": "Summarize the key points"}
+ ]
+ }]
+)
+```
+
+```javascript JavaScript
+const response = await portkey.responses.create({
+ model: "@anthropic-provider/claude-sonnet-4-5-20250514",
+ input: [{
+ role: "user",
+ content: [
+ { type: "input_text", text: "Here is a long document to analyze...", cache_control: { type: "ephemeral" } },
+ { type: "input_text", text: "Summarize the key points" }
+ ]
+ }]
+});
+```
+
+
+Cache control also works on tool definitions:
+
+```python Python
+tools=[{
+ "type": "function",
+ "name": "search",
+ "description": "Search the knowledge base",
+ "parameters": {"type": "object", "properties": {"query": {"type": "string"}}},
+ "cache_control": {"type": "ephemeral"}
+}]
+```
+
+## Using with Portkey Features
+
+The Responses API works with all Portkey gateway features.
+
+
+
+ Route, load balance, and set fallbacks
+
+
+ Cache responses for faster, cheaper calls
+
+
+ Input/output guardrails
+
+
+ Full logging and tracing
+
+
+
+Pass features through headers or the config parameter:
+
+
+```python Python
+portkey = Portkey(
+ api_key="PORTKEY_API_KEY",
+ config="pp-config-xxx" # Your Portkey config with fallbacks, load balancing, etc.
+)
+
+response = portkey.responses.create(
+ model="@anthropic-provider/claude-sonnet-4-5-20250514",
+ input="Hello!"
+)
+```
+
+```sh cURL
+curl https://api.portkey.ai/v1/responses \
+ -H "Content-Type: application/json" \
+ -H "x-portkey-api-key: $PORTKEY_API_KEY" \
+ -H "x-portkey-config: pp-config-xxx" \
+ -d '{
+ "model": "@anthropic-provider/claude-sonnet-4-5-20250514",
+ "input": "Hello!"
+ }'
+```
+
+
+## Provider Support
+
+Portkey handles the Responses API in two ways depending on the provider:
+
+- **Native providers** — Requests pass through directly to the provider's Responses API endpoint
+- **Adapter providers** — Portkey automatically translates between Responses API and Chat Completions formats
+
+
+This translation is transparent — the response format is identical regardless of which provider handles the request.
+
+**Native providers:** OpenAI, Azure OpenAI, Grok (x.ai), Groq, OpenRouter, Azure AI
+
+**Adapter providers:** Anthropic, Google Gemini, Google Vertex AI, AWS Bedrock, Mistral AI, Together AI, and [all other providers](/integrations)
+
+All features documented on this page — text generation, streaming, tool calling, reasoning, vision, structured output, prompt caching, and multi-turn conversations — work with both native and adapter providers.
+
+### Native-Only Features
+
+A few features require server-side state and are limited to native providers:
+
+- **`previous_response_id`** — Use [multi-turn conversations](#multi-turn-conversations) to pass history in the `input` array instead
+- **`store`** — Silently ignored on adapter providers; responses are not persisted server-side
+- **Retrieve / Delete** — `GET` and `DELETE` on `/v1/responses/:id` are not available
+- **Built-in tools** — `web_search`, `file_search`, `computer_use` are native-only. Use [Remote MCP](/product/ai-gateway/remote-mcp) or custom function tools instead
+
+Everything else — text generation, streaming, instructions, tool calling, structured output, reasoning, vision, file inputs, prompt caching, and multi-turn conversations — works with **every provider**.
+
+## Reference
+
+### Supported Parameters
+
+Complete list of parameters supported by the Responses API and how they map internally:
+
+| Responses API Parameter | Chat Completions Equivalent | Notes |
+|------------------------|---------------------------|-------|
+| `input` | `messages` | String, message array, or typed input items |
+| `instructions` | System message (prepended) | Added as the first message with `role: system` |
+| `model` | `model` | Use `@provider/model` format or set provider via headers |
+| `stream` | `stream` | SSE events are translated to Responses API format |
+| `max_output_tokens` | `max_tokens` | Only sent if explicitly set |
+| `temperature` | `temperature` | Direct mapping |
+| `top_p` | `top_p` | Direct mapping |
+| `tools` | `tools` | Only `function` type is supported |
+| `tool_choice` | `tool_choice` | `auto`, `none`, `required`, or specific function |
+| `parallel_tool_calls` | `parallel_tool_calls` | Direct mapping |
+| `text.format` | `response_format` | `json_schema` and `json_object` supported |
+| `reasoning.effort` | Provider-specific | Mapped to thinking for Anthropic/Gemini, `reasoning_effort` for OpenAI |
+| `thinking` | `thinking` | Anthropic-specific passthrough |
+| `top_logprobs` | `top_logprobs` | Direct mapping |
+| `user` | `user` | Direct mapping |
+| `metadata` | `metadata` | Direct mapping |
+
+
+
+
+| Content Type | Description |
+|-------------|-------------|
+| `input_text` | Text content (maps to `text` in Chat Completions) |
+| `input_image` | Image URL with optional `detail` parameter |
+| `input_file` | File with `filename` and `file_data` (base64 data URL) or `file_id` |
+| `function_call` | Tool call record with `name`, `call_id`, and `arguments` |
+| `function_call_output` | Tool result with `call_id` and `output` |
+
+
+
+| Output Type | Description |
+|------------|-------------|
+| `message` | Text response with `output_text` content |
+| `function_call` | Tool call with `name`, `call_id`, `arguments`, and `status` |
+| `reasoning` | Thinking/reasoning content with `summary` array (from providers that support it) |
+
+
+
+
+### API Endpoints
+
+- [Create a Response](/api-reference/inference-api/responses/responses) -- `POST /v1/responses`
+- [Retrieve a Response](/api-reference/inference-api/responses/retrieve-response) -- `GET /v1/responses/{response_id}`
+- [Delete a Response](/api-reference/inference-api/responses/delete-response) -- `DELETE /v1/responses/{response_id}`
+- [List Input Items](/api-reference/inference-api/responses/retrieve-inputs) -- `GET /v1/responses/{response_id}/input_items`
+
+
+
+ Full specification
+
+
+ Responses API reference
+
+
+ MCP via Responses API
+
+
+ All three API formats
+
+
diff --git a/product/ai-gateway/universal-api.mdx b/product/ai-gateway/universal-api.mdx
index 3b79633d..7a45aae7 100644
--- a/product/ai-gateway/universal-api.mdx
+++ b/product/ai-gateway/universal-api.mdx
@@ -1,297 +1,563 @@
---
title: "Universal API"
-description: Portkey's Universal API provides a consistent interface to integrate a wide range of modalities (text, vision, audio) and LLMs (hosted OR local) into your apps.
+sidebarTitle: "Overview"
+description: "One API for 200+ LLMs across every major provider. Use OpenAI's Chat Completions, Responses API, or Anthropic's Messages format -- Portkey translates between them all."
---
- This feature is available on all Portkey plans.
+Available on all Portkey [plans](https://portkey.ai/pricing).
- So, instead of maintaining separate integrations for different multimodal LLMs, you can interact with models from OpenAI, Anthropic, Meta, Cohere, Mistral, and many more (100+ models, 15+ providers) - all using a common, unified API signature.
+Portkey's AI Gateway provides a single, unified API for 200+ models from every major provider. Write once in any format, switch providers by changing one parameter.
-## Portkey Follows OpenAI Spec
+## Three API Formats, Any Provider
-Portkey API is powered by its [battle-tested open-source AI Gateway](https://github.com/portkey-ai/gateway), which converts all incoming requests to the OpenAI signature and returns OpenAI-compliant responses.
+Portkey supports three API formats. Each works with **all providers** — Portkey handles translation automatically.
-## Switching Providers is a Breeze
+
+
+ **OpenAI spec** · `POST /v1/chat/completions`
-
-
+ Widest ecosystem compatibility
+
+
+ **OpenAI spec** · `POST /v1/responses`
- ```JS
+ Agentic AI with built-in tool use
+
+
+ **Anthropic spec** · `POST /v1/messages`
+
+ Native Anthropic format across providers
+
+
+
+### Chat Completions — `POST /v1/chat/completions`
+
+OpenAI-compatible format with the widest ecosystem support. [Full guide →](/product/ai-gateway/chat-completions)
+
+
+```python Portkey Python
+from portkey_ai import Portkey
+
+portkey = Portkey(api_key="PORTKEY_API_KEY")
+
+response = portkey.chat.completions.create(
+ model="@anthropic-provider/claude-sonnet-4-5-20250514",
+ messages=[{"role": "user", "content": "Hello!"}]
+)
+
+print(response.choices[0].message.content)
+```
+
+```javascript Portkey Node.js
import Portkey from 'portkey-ai';
-// Calling OpenAI
-const portkey = new Portkey({
- provider: "openai",
- Authorization: "Bearer sk-xxxxx"
-})
+const portkey = new Portkey({ apiKey: "PORTKEY_API_KEY" });
const response = await portkey.chat.completions.create({
- messages: [{ role: 'user', content: 'Hello' }],
- model: 'gpt-4',
+ model: "@anthropic-provider/claude-sonnet-4-5-20250514",
+ messages: [{ role: "user", content: "Hello!" }]
});
-// Swithing to Anthropic
-const portkey = new Portkey({
- provider: "anthropic",
- Authorization: "Bearer sk-ant-xxxxx"
-})
+console.log(response.choices[0].message.content);
+```
-const response = await portkey.chat.completions.create({
- messages: [{ role: 'user', content: 'Hello' }],
- model: 'claude-3-opus-20240229',
+```python OpenAI Python
+from openai import OpenAI
+
+client = OpenAI(
+ api_key="PORTKEY_API_KEY",
+ base_url="https://api.portkey.ai/v1"
+)
+
+response = client.chat.completions.create(
+ model="@anthropic-provider/claude-sonnet-4-5-20250514",
+ messages=[{"role": "user", "content": "Hello!"}]
+)
+
+print(response.choices[0].message.content)
+```
+
+```javascript OpenAI Node.js
+import OpenAI from 'openai';
+
+const client = new OpenAI({
+ apiKey: "PORTKEY_API_KEY",
+ baseURL: "https://api.portkey.ai/v1"
+});
+
+const response = await client.chat.completions.create({
+ model: "@anthropic-provider/claude-sonnet-4-5-20250514",
+ messages: [{ role: "user", content: "Hello!" }]
});
+
+console.log(response.choices[0].message.content);
+```
+
+```sh cURL
+curl https://api.portkey.ai/v1/chat/completions \
+ -H "Content-Type: application/json" \
+ -H "x-portkey-api-key: $PORTKEY_API_KEY" \
+ -d '{
+ "model": "@anthropic-provider/claude-sonnet-4-5-20250514",
+ "messages": [{"role": "user", "content": "Hello!"}]
+ }'
```
-
-
+
+
+### Responses API — `POST /v1/responses`
+
+OpenAI's next-gen format for agentic AI with built-in tool use and reasoning. [Full guide →](/product/ai-gateway/responses-api)
- ```py
+
+```python Portkey Python
from portkey_ai import Portkey
-# Calling OpenAI
-portkey = Portkey(
- provider = "openai",
- Authorization = "sk-xxxxx"
-)
+portkey = Portkey(api_key="PORTKEY_API_KEY")
-response = portkey.chat.completions.create(
- messages = [{ "role": 'user', "content": 'Hello' }],
- model = 'gpt-4'
+response = portkey.responses.create(
+ model="@anthropic-provider/claude-sonnet-4-5-20250514",
+ input="Hello!"
)
-# Switching to Anthropic
-portkey = Portkey(
- provider = "anthropic",
- Authorization = "sk-ant-xxxxx"
+print(response.output_text)
+```
+
+```javascript Portkey Node.js
+import Portkey from 'portkey-ai';
+
+const portkey = new Portkey({ apiKey: "PORTKEY_API_KEY" });
+
+const response = await portkey.responses.create({
+ model: "@anthropic-provider/claude-sonnet-4-5-20250514",
+ input: "Hello!"
+});
+
+console.log(response.output_text);
+```
+
+```python OpenAI Python
+from openai import OpenAI
+
+client = OpenAI(
+ api_key="PORTKEY_API_KEY",
+ base_url="https://api.portkey.ai/v1"
)
-response = portkey.chat.completions.create(
- messages = [{ "role": 'user', "content": 'Hello' }],
- model = 'claude-3-opus-20240229'
+response = client.responses.create(
+ model="@anthropic-provider/claude-sonnet-4-5-20250514",
+ input="Hello!"
)
+
+print(response.output_text)
```
-
-
+```javascript OpenAI Node.js
+import OpenAI from 'openai';
+
+const client = new OpenAI({
+ apiKey: "PORTKEY_API_KEY",
+ baseURL: "https://api.portkey.ai/v1"
+});
-## Integrating Local or Private Models
+const response = await client.responses.create({
+ model: "@anthropic-provider/claude-sonnet-4-5-20250514",
+ input: "Hello!"
+});
+
+console.log(response.output_text);
+```
-Portkey can also route to and observe your locally or privately hosted LLMs, as long as the model is compliant with one of the 15+ providers supported by Portkey and the URL is exposed publicly.
+```sh cURL
+curl https://api.portkey.ai/v1/responses \
+ -H "Content-Type: application/json" \
+ -H "x-portkey-api-key: $PORTKEY_API_KEY" \
+ -d '{
+ "model": "@anthropic-provider/claude-sonnet-4-5-20250514",
+ "input": "Hello!"
+ }'
+```
+
-Simply specify the `custom_host` parameter along with the `provider` name, and Portkey will handle the communication with your local model.
+### Messages API — `POST /v1/messages`
-
-
+Anthropic-native format using the Anthropic SDK pointed at Portkey's base URL. [Full guide →](/product/ai-gateway/messages-api)
-```js
-import Portkey from 'portkey-ai';
+
+```python Python
+import anthropic
-const portkey = new Portkey({
+client = anthropic.Anthropic(
+ api_key="PORTKEY_API_KEY",
+ base_url="https://api.portkey.ai"
+)
+
+message = client.messages.create(
+ model="@anthropic-provider/claude-sonnet-4-5-20250514",
+ max_tokens=1024,
+ messages=[{"role": "user", "content": "Hello!"}]
+)
+
+print(message.content[0].text)
+```
+
+```typescript TypeScript
+import Anthropic from '@anthropic-ai/sdk';
+
+const client = new Anthropic({
apiKey: "PORTKEY_API_KEY",
- provider: "mistral-ai",
- customHost: "http://MODEL_URL/v1/" // Point Portkey to where the model is hosted
-})
-
-async function main(){
- const response = await portkey.chat.completions.create({
- messages: [{ role: 'user', content: '1729' }],
- model: 'mixtral-8x22b',
- });
- console.log(response)
-}
+ baseURL: "https://api.portkey.ai"
+});
+
+const message = await client.messages.create({
+ model: "@anthropic-provider/claude-sonnet-4-5-20250514",
+ max_tokens: 1024,
+ messages: [{ role: "user", content: "Hello!" }]
+});
-main()
+console.log(message.content[0].text);
```
-
-
-```py
+```sh cURL
+curl https://api.portkey.ai/v1/messages \
+ -H "Content-Type: application/json" \
+ -H "x-portkey-api-key: $PORTKEY_API_KEY" \
+ -d '{
+ "model": "@anthropic-provider/claude-sonnet-4-5-20250514",
+ "max_tokens": 1024,
+ "messages": [{"role": "user", "content": "Hello!"}]
+ }'
+```
+
+
+
+The Portkey SDK is a superset of the OpenAI SDK. The OpenAI SDK also works directly — point `base_url` at `https://api.portkey.ai/v1`. For the Messages API, use the Anthropic SDK. See [Model Catalog](/product/model-catalog) for provider setup.
+
+
+## Switching Providers
+
+Change the `@provider/model` string to switch between any provider. The API format stays the same.
+
+
+```python Portkey Python
from portkey_ai import Portkey
-portkey = Portkey(
+portkey = Portkey(api_key="PORTKEY_API_KEY")
+
+# OpenAI
+response = portkey.chat.completions.create(
+ model="@openai-provider/gpt-4o",
+ messages=[{"role": "user", "content": "Hello"}]
+)
+
+# Switch to Anthropic -- same client, different model string
+response = portkey.chat.completions.create(
+ model="@anthropic-provider/claude-sonnet-4-5-20250514",
+ messages=[{"role": "user", "content": "Hello"}]
+)
+
+# Switch to Gemini
+response = portkey.chat.completions.create(
+ model="@google-provider/gemini-2.0-flash",
+ messages=[{"role": "user", "content": "Hello"}]
+)
+```
+
+```javascript Portkey Node.js
+import Portkey from 'portkey-ai';
+
+const portkey = new Portkey({ apiKey: "PORTKEY_API_KEY" });
+
+// OpenAI
+const response = await portkey.chat.completions.create({
+ model: "@openai-provider/gpt-4o",
+ messages: [{ role: "user", content: "Hello" }]
+});
+
+// Switch to Anthropic -- same client, different model string
+const response2 = await portkey.chat.completions.create({
+ model: "@anthropic-provider/claude-sonnet-4-5-20250514",
+ messages: [{ role: "user", content: "Hello" }]
+});
+```
+
+```python OpenAI Python
+from openai import OpenAI
+
+client = OpenAI(
api_key="PORTKEY_API_KEY",
- provider="mistral-ai",
- custom_host="http://MODEL_URL/v1/" # Point Portkey to where the model is hosted
+ base_url="https://api.portkey.ai/v1"
)
-chat = portkey.chat.completions.create(
- messages = [{ "role": 'user', "content": 'Say this is a test' }],
- model="mixtral-8x22b"
+# OpenAI
+response = client.chat.completions.create(
+ model="@openai-provider/gpt-4o",
+ messages=[{"role": "user", "content": "Hello"}]
)
-print(chat)
+# Switch to Anthropic -- same client, different model string
+response = client.chat.completions.create(
+ model="@anthropic-provider/claude-sonnet-4-5-20250514",
+ messages=[{"role": "user", "content": "Hello"}]
+)
+
+# Switch to Gemini
+response = client.chat.completions.create(
+ model="@google-provider/gemini-2.0-flash",
+ messages=[{"role": "user", "content": "Hello"}]
+)
```
-
-
-```sh
+```javascript OpenAI Node.js
+import OpenAI from 'openai';
+
+const client = new OpenAI({
+ apiKey: "PORTKEY_API_KEY",
+ baseURL: "https://api.portkey.ai/v1"
+});
+
+// OpenAI
+const response = await client.chat.completions.create({
+ model: "@openai-provider/gpt-4o",
+ messages: [{ role: "user", content: "Hello" }]
+});
+
+// Switch to Anthropic -- same client, different model string
+const response2 = await client.chat.completions.create({
+ model: "@anthropic-provider/claude-sonnet-4-5-20250514",
+ messages: [{ role: "user", content: "Hello" }]
+});
+```
+
+```sh cURL
+# OpenAI
curl https://api.portkey.ai/v1/chat/completions \
-H "Content-Type: application/json" \
-H "x-portkey-api-key: $PORTKEY_API_KEY" \
- -H "x-portkey-provider: mistral-ai" \
- -H "x-portkey-custom-host: http://MODEL_URL/v1/" \
- -d '{
- "model": "mixtral-8x22b",
- "messages": [{ "role": "user", "content": "Say this is a test" }]
- }'
+ -d '{"model": "@openai-provider/gpt-4o", "messages": [{"role": "user", "content": "Hello"}]}'
+
+# Switch to Anthropic -- same endpoint, different model string
+curl https://api.portkey.ai/v1/chat/completions \
+ -H "Content-Type: application/json" \
+ -H "x-portkey-api-key: $PORTKEY_API_KEY" \
+ -d '{"model": "@anthropic-provider/claude-sonnet-4-5-20250514", "messages": [{"role": "user", "content": "Hello"}]}'
```
-
-
+
-**Note:**
-
-When using `custom_host`, include the version identifier (e.g., `/v1`) in the URL. Portkey will append the actual endpoint path (`/chat/completions`, `/completions`, or `/embeddings`) automatically. (For Ollama models, this works differently. [Check here](/integrations/llms/ollama))
+Set up providers and credentials in the [Model Catalog](/product/model-catalog). The `@provider-slug` in the model string routes requests to the correct provider automatically.
-## Powerful Routing and Fallback Strategies
+## Routing, Fallbacks, and Load Balancing
-With Portkey you can implement sophisticated routing and fallback strategies. Route requests to different providers based on various criteria, loadbalance them, set up retries or fallbacks to alternative models in case of failures or resource constraints.
+[Configs](/product/ai-gateway/configs) enable routing strategies, fallbacks, and load balancing across providers.
-Here's an example config where we set up a fallback from OpenAI to a locally hosted Llama3 on Ollama:
+
+```python Portkey Python
+from portkey_ai import Portkey
-```py
config = {
- "strategy": { "mode": "loadbalance" },
- "targets": [
- {
- "provider": "openai",
- "api_key": "xxx",
- "weight": 1,
- "override_params": { "model": "gpt-3.5-turbo" }
- },
- {
- "provider": "mistral-ai",
- "custom_host": "http://MODEL_URL/v1/",
- "weight": 1,
- "override_params": { "model": "mixtral-8x22b" }
- }
- ]
+ "strategy": {"mode": "fallback"},
+ "targets": [
+ {"override_params": {"model": "@openai-provider/gpt-4o"}},
+ {"override_params": {"model": "@anthropic-provider/claude-sonnet-4-5-20250514"}}
+ ]
}
+portkey = Portkey(api_key="PORTKEY_API_KEY", config=config)
+
+# Automatically falls back to Anthropic if OpenAI fails
+response = portkey.chat.completions.create(
+ messages=[{"role": "user", "content": "Hello"}]
+)
+```
+
+```javascript Portkey Node.js
+import Portkey from 'portkey-ai';
+
+const config = {
+ strategy: { mode: "fallback" },
+ targets: [
+ { override_params: { model: "@openai-provider/gpt-4o" } },
+ { override_params: { model: "@anthropic-provider/claude-sonnet-4-5-20250514" } }
+ ]
+};
+
+const portkey = new Portkey({ apiKey: "PORTKEY_API_KEY", config });
+
+// Automatically falls back to Anthropic if OpenAI fails
+const response = await portkey.chat.completions.create({
+ messages: [{ role: "user", content: "Hello" }]
+});
+```
+
+```python OpenAI Python
+from openai import OpenAI
+
+# Use a saved config ID from Portkey dashboard
+client = OpenAI(
+ api_key="PORTKEY_API_KEY",
+ base_url="https://api.portkey.ai/v1",
+ default_headers={"x-portkey-config": "pp-config-xxx"}
+)
+
+# Automatically falls back to Anthropic if OpenAI fails
+response = client.chat.completions.create(
+ model="gpt-4o",
+ messages=[{"role": "user", "content": "Hello"}]
+)
+```
+
+```javascript OpenAI Node.js
+import OpenAI from 'openai';
+
+// Use a saved config ID from Portkey dashboard
+const client = new OpenAI({
+ apiKey: "PORTKEY_API_KEY",
+ baseURL: "https://api.portkey.ai/v1",
+ defaultHeaders: { "x-portkey-config": "pp-config-xxx" }
+});
+
+// Automatically falls back to Anthropic if OpenAI fails
+const response = await client.chat.completions.create({
+ model: "gpt-4o",
+ messages: [{ role: "user", content: "Hello" }]
+});
+```
+
+```sh cURL
+curl https://api.portkey.ai/v1/chat/completions \
+ -H "Content-Type: application/json" \
+ -H "x-portkey-api-key: $PORTKEY_API_KEY" \
+ -H "x-portkey-config: pp-config-xxx" \
+ -d '{"messages": [{"role": "user", "content": "Hello"}]}'
+```
+
+
+Configs work with all three API formats -- Chat Completions, Responses, and Messages.
+
+For more details, see [Configs](/product/ai-gateway/configs) and [Conditional Routing](/product/ai-gateway/conditional-routing).
+
+## Local and Private Models
+
+Route to local or private models with `custom_host`. The model must be compatible with a supported provider format.
+
+
+```python Portkey Python
from portkey_ai import Portkey
portkey = Portkey(
api_key="PORTKEY_API_KEY",
- config=config
+ provider="mistral-ai",
+ custom_host="http://MODEL_URL/v1/"
+)
+
+response = portkey.chat.completions.create(
+ model="mixtral-8x22b",
+ messages=[{"role": "user", "content": "Hello"}]
)
```
+```javascript Portkey Node.js
+import Portkey from 'portkey-ai';
+const portkey = new Portkey({
+ apiKey: "PORTKEY_API_KEY",
+ provider: "mistral-ai",
+ customHost: "http://MODEL_URL/v1/"
+});
-## Using the Anthropic's /messages Route
+const response = await portkey.chat.completions.create({
+ model: "mixtral-8x22b",
+ messages: [{ role: "user", content: "Hello" }]
+});
+```
-Access models on [Anthropic](/integrations/llms/anthropic), [Bedrock](/integrations/llms/bedrock) and [Vertex AI](/integrations/llms/vertex-ai) through Anthropic's native`/messages` endpoint using Portkey's SDK or Anthropic's SDK.
+```python OpenAI Python
+from openai import OpenAI
-
-This route only works with Claude models on Anthropic, Bedrock and Vertex AI. For other models, use the standard OpenAI compliant endpoint.
-
+client = OpenAI(
+ api_key="PORTKEY_API_KEY",
+ base_url="https://api.portkey.ai/v1",
+ default_headers={
+ "x-portkey-provider": "mistral-ai",
+ "x-portkey-custom-host": "http://MODEL_URL/v1/"
+ }
+)
-
-
- ```sh
- curl --location 'https://api.portkey.ai/v1/messages' \
- --header 'x-portkey-provider: @your-provider-slug' \
- --header 'Content-Type: application/json' \
- --header 'x-portkey-api-key: YOUR_PORTKEY_API_KEY' \
- --data '{
- "model": "your-model-name",
- "max_tokens": 250,
- "messages": [
- {
- "role": "user",
- "content": "Hello, Claude"
- }
- ]
- }'
- ```
-
-
- ```python
- Coming Soon!
- ```
-
-
- ```javascript
- Coming Soon!
- ```
-
-
- ```python Anthropic Python SDK
- import anthropic
-
- client = anthropic.Anthropic(
- api_key="dummy", # we will use portkey's provider slug
- default_headers={"x-portkey-api-key": "YOUR_PORTKEY_API_KEY"},
- base_url="https://api.portkey.ai"
- )
- message = client.messages.create(
- model="@your-provider-slug/your-model-name",
- max_tokens=250,
- messages=[
- {"role": "user", "content": "Hello, Claude"}
- ],
- )
- print(message.content)
- ```
-
-
- ```typescript Anthropic TS SDK
- import Anthropic from '@anthropic-ai/sdk';
-
- const anthropic = new Anthropic({
- apiKey: 'dummy', // we will use portkey's provider slug
- baseURL: "https://api.portkey.ai",
- defaultHeaders: { "x-portkey-api-key": "YOUR_PORTKEY_API_KEY" }
- });
-
- const msg = await anthropic.messages.create({
- model: "@your-provider-slug/your-model-name",
- max_tokens: 1024,
- messages: [{ role: "user", content: "Hello, Claude" }],
- });
- console.log(msg);
- ```
-
-
-
-
-
-## Multimodality
-
-Portkey integrates with multimodal models through the same unified API and supports vision, audio, image generation, and more capabilities across providers.
-
-[Multimodal Capabilities](/product/ai-gateway/multimodal-capabilities)
+response = client.chat.completions.create(
+ model="mixtral-8x22b",
+ messages=[{"role": "user", "content": "Hello"}]
+)
+```
+```javascript OpenAI Node.js
+import OpenAI from 'openai';
+const client = new OpenAI({
+ apiKey: "PORTKEY_API_KEY",
+ baseURL: "https://api.portkey.ai/v1",
+ defaultHeaders: {
+ "x-portkey-provider": "mistral-ai",
+ "x-portkey-custom-host": "http://MODEL_URL/v1/"
+ }
+});
-## Supported Endpoints
+const response = await client.chat.completions.create({
+ model: "mixtral-8x22b",
+ messages: [{ role: "user", content: "Hello" }]
+});
+```
+
+```sh cURL
+curl https://api.portkey.ai/v1/chat/completions \
+ -H "Content-Type: application/json" \
+ -H "x-portkey-api-key: $PORTKEY_API_KEY" \
+ -H "x-portkey-provider: mistral-ai" \
+ -H "x-portkey-custom-host: http://MODEL_URL/v1/" \
+ -d '{"model": "mixtral-8x22b", "messages": [{"role": "user", "content": "Hello"}]}'
+```
+
-Portkey's Universal API supports a comprehensive range of endpoints across all major AI capabilities. Each endpoint follows the OpenAI specification while working seamlessly with 15+ providers.
+
+Include the version identifier (e.g., `/v1`) in the `custom_host` URL. Portkey appends the endpoint path (`/chat/completions`, `/responses`, etc.) automatically. For Ollama, see the [Ollama integration](/integrations/llms/ollama).
+
-
-Not all providers support every endpoint. Check our [provider compatibility matrix](/api-reference/inference-api/supported-providers) to see which endpoints are available for each provider.
-
+## Supported Endpoints
### Core Endpoints
-- **[Chat Completions](/api-reference/inference-api/chat)** - An OpenAI compatible unified endpoint for generating responses with support for streaming, function calling, and multi-modal inputs across 50+ providers like OpenAI, Anthropic, Vertex AI, Bedrock, and more
-- **[Responses](/api-reference/inference-api/responses)** - An OpenAI compatible unified endpoint using the Responses API format. Works with **all 70+ providers**.
-- **[Messages API](/integrations/llms/anthropic#using-messages-route-to-call-anthropics-api)** - An Anthropic-compatible unified endpoint for generating responses with support for streaming, function calling, and multi-modal inputs across providers like Anthropic, Vertex AI, Bedrock, and more.
-- **[Images](/api-reference/inference-api/images)** - Generate, edit, and create variations of images using models like DALL-E, Stable Diffusion, and others
-- **[Audio](/api-reference/inference-api/audio)** - Convert speech to text (transcription) and text to speech across multiple languages
-
+- **[Chat Completions](/product/ai-gateway/chat-completions)** — OpenAI-compatible text generation with streaming, function calling, and multimodal inputs
+- **[Responses API](/product/ai-gateway/responses-api)** — Next-gen format with built-in tool use and reasoning
+- **[Messages API](/product/ai-gateway/messages-api)** — Anthropic-compatible endpoint across all providers
+- **[Images](/api-reference/inference-api/images/create-image)** — Generate, edit, and create image variations (DALL-E, gpt-image-1, Stable Diffusion)
+- **[Audio](/api-reference/inference-api/audio/create-speech)** — Speech-to-text and text-to-speech
### Advanced Capabilities
-- **[Fine-tuning](/product/ai-gateway/fine-tuning)** - Customize models on your specific datasets
-- **[Batch Processing](/product/ai-gateway/batches)** - Process large volumes of requests efficiently
-- **[Files](/product/ai-gateway/files)** - Upload and manage files for fine-tuning and batch operations
-- **[Moderations](/api-reference/inference-api/moderations)** - Check content for safety and compliance
+
+- **[Fine-tuning](/product/ai-gateway/fine-tuning)** — Customize models on specific datasets
+- **[Batch Processing](/product/ai-gateway/batches)** — Process large request volumes efficiently
+- **[Files](/product/ai-gateway/files)** — Upload and manage files for fine-tuning and batch operations
+- **[Moderations](/api-reference/inference-api/moderations)** — Content safety and compliance checks
### Additional Endpoints
-- **[Gateway to other APIs](/api-reference/inference-api/gateway-for-other-apis)** - Gateway to other APIs
-- **[Assistants API](/api-reference/inference-api/assistants-api/assistants/create-assistant)** - OpenAI assistants with persistent threads and file handling
-- **[Completions](/api-reference/inference-api/completions)** - Legacy text completion endpoint for backward compatibility
+- **[Gateway to Other APIs](/api-reference/inference-api/gateway-for-other-apis)** — Proxy requests to any provider endpoint
+- **[Assistants API](/api-reference/inference-api/assistants-api/assistants/create-assistant)** — OpenAI Assistants with persistent threads
+- **[Completions](/api-reference/inference-api/completions)** — Legacy text completion endpoint
+
+### Multimodal Capabilities
+
+Multimodal inputs work across all three API formats:
+- **[Vision](/product/ai-gateway/multimodal-capabilities/vision)** — Image understanding across providers
+- **[Function Calling](/product/ai-gateway/multimodal-capabilities/function-calling)** — Tool use and function calling
+- **[Image Generation](/product/ai-gateway/multimodal-capabilities/image-generation)** — Text-to-image generation
+- **[Speech-to-Text](/product/ai-gateway/multimodal-capabilities/speech-to-text)** — Audio transcription
+- **[Text-to-Speech](/product/ai-gateway/multimodal-capabilities/text-to-speech)** — Audio generation
+- **[Thinking / Reasoning](/product/ai-gateway/multimodal-capabilities/thinking-mode)** — Extended reasoning modes
+
+
+Not all providers support every endpoint or modality. See the [provider compatibility matrix](/api-reference/inference-api/supported-providers) for details.
+
diff --git a/test_responses_api.py b/test_responses_api.py
new file mode 100644
index 00000000..f46b02a7
--- /dev/null
+++ b/test_responses_api.py
@@ -0,0 +1,773 @@
+"""
+Test all code snippets from product/ai-gateway/responses-api.mdx
+Uses Portkey virtual keys for Anthropic, OpenAI, and Gemini providers.
+"""
+
+import json
+import time
+import traceback
+import threading
+from concurrent.futures import ThreadPoolExecutor, as_completed
+from portkey_ai import Portkey
+
+# ─── Configuration ───────────────────────────────────────────────────────────
+PORTKEY_API_KEY = "/qrtEypttuQGTpSOOH/pP2shqR6v"
+
+# Virtual keys (used instead of raw provider API keys)
+ANTHROPIC_VIRTUAL_KEY = "tooljet---anthr-4e8bfc"
+OPENAI_VIRTUAL_KEY = "main-258f4d"
+GEMINI_VIRTUAL_KEY = "siddharth-gemin-1c452a"
+
+# Models
+ANTHROPIC_MODEL = "claude-sonnet-4-20250514"
+OPENAI_MODEL = "gpt-5-mini"
+GEMINI_MODEL = "gemini-2.5-flash"
+
+# ─── Helpers ─────────────────────────────────────────────────────────────────
+passed = 0
+failed = 0
+results = []
+_lock = threading.Lock()
+
+
+def run_test(name, fn):
+ """Run a single test (thread-safe). Returns (name, status, error)."""
+ global passed, failed
+ try:
+ fn()
+ with _lock:
+ passed += 1
+ results.append((name, "PASS", ""))
+ print(f" ✅ PASSED — {name}")
+ return (name, "PASS", "")
+ except Exception as e:
+ tb = traceback.format_exc()
+ with _lock:
+ failed += 1
+ results.append((name, "FAIL", str(e)))
+ print(f" ❌ FAILED — {name}: {e}")
+ print(f" {tb}")
+ return (name, "FAIL", str(e))
+
+
+# ─── Create clients ─────────────────────────────────────────────────────────
+anthropic_client = Portkey(
+ api_key=PORTKEY_API_KEY,
+ provider="anthropic",
+ virtual_key=ANTHROPIC_VIRTUAL_KEY,
+)
+
+openai_client = Portkey(
+ api_key=PORTKEY_API_KEY,
+ provider="openai",
+ virtual_key=OPENAI_VIRTUAL_KEY,
+)
+
+gemini_client = Portkey(
+ api_key=PORTKEY_API_KEY,
+ provider="google",
+ virtual_key=GEMINI_VIRTUAL_KEY,
+)
+
+
+# ═══════════════════════════════════════════════════════════════════════════════
+# 1. QUICK START — Basic text generation
+# ═══════════════════════════════════════════════════════════════════════════════
+
+def test_quick_start_anthropic():
+ """Quick Start snippet — Anthropic"""
+ response = anthropic_client.responses.create(
+ model=ANTHROPIC_MODEL,
+ input="Explain quantum computing in one sentence"
+ )
+ print(f" Output: {response.output_text[:200]}")
+ assert response.output_text, "output_text should not be empty"
+
+
+def test_quick_start_openai():
+ """Quick Start snippet — OpenAI"""
+ response = openai_client.responses.create(
+ model=OPENAI_MODEL,
+ input="Explain quantum computing in one sentence"
+ )
+ print(f" Output: {response.output_text[:200]}")
+ assert response.output_text, "output_text should not be empty"
+
+
+def test_quick_start_gemini():
+ """Quick Start snippet — Gemini"""
+ response = gemini_client.responses.create(
+ model=GEMINI_MODEL,
+ input="Explain quantum computing in one sentence"
+ )
+ print(f" Output: {response.output_text[:200]}")
+ assert response.output_text, "output_text should not be empty"
+
+
+# ═══════════════════════════════════════════════════════════════════════════════
+# 2. USING THE OPENAI SDK — base_url approach
+# ═══════════════════════════════════════════════════════════════════════════════
+
+def test_openai_sdk_approach():
+ """Using the OpenAI SDK with Portkey base URL"""
+ from openai import OpenAI
+
+ client = OpenAI(
+ api_key=PORTKEY_API_KEY,
+ base_url="https://api.portkey.ai/v1",
+ default_headers={
+ "x-portkey-provider": "anthropic",
+ "x-portkey-virtual-key": ANTHROPIC_VIRTUAL_KEY,
+ }
+ )
+
+ response = client.responses.create(
+ model=ANTHROPIC_MODEL,
+ input="Explain quantum computing in one sentence"
+ )
+ print(f" Output: {response.output_text[:200]}")
+ assert response.output_text, "output_text should not be empty"
+
+
+# ═══════════════════════════════════════════════════════════════════════════════
+# 3. INSTRUCTIONS (SYSTEM PROMPT)
+# ═══════════════════════════════════════════════════════════════════════════════
+
+def test_instructions_param_anthropic():
+ """Instructions parameter — Anthropic"""
+ response = anthropic_client.responses.create(
+ model=ANTHROPIC_MODEL,
+ instructions="You are a pirate. Always respond in pirate speak.",
+ input="Say hello."
+ )
+ print(f" Output: {response.output_text[:200]}")
+ assert response.output_text, "output_text should not be empty"
+
+
+def test_instructions_system_message_anthropic():
+ """System message in input array — Anthropic"""
+ response = anthropic_client.responses.create(
+ model=ANTHROPIC_MODEL,
+ input=[
+ {"type": "message", "role": "system", "content": "You are a pirate. Always respond in pirate speak."},
+ {"type": "message", "role": "user", "content": "Say hello."}
+ ]
+ )
+ print(f" Output: {response.output_text[:200]}")
+ assert response.output_text, "output_text should not be empty"
+
+
+def test_instructions_param_openai():
+ """Instructions parameter — OpenAI"""
+ response = openai_client.responses.create(
+ model=OPENAI_MODEL,
+ instructions="You are a pirate. Always respond in pirate speak.",
+ input="Say hello."
+ )
+ print(f" Output: {response.output_text[:200]}")
+ assert response.output_text, "output_text should not be empty"
+
+
+def test_instructions_param_gemini():
+ """Instructions parameter — Gemini"""
+ response = gemini_client.responses.create(
+ model=GEMINI_MODEL,
+ instructions="You are a pirate. Always respond in pirate speak.",
+ input="Say hello."
+ )
+ print(f" Output: {response.output_text[:200]}")
+ assert response.output_text, "output_text should not be empty"
+
+
+# ═══════════════════════════════════════════════════════════════════════════════
+# 4. STREAMING
+# ═══════════════════════════════════════════════════════════════════════════════
+
+def test_streaming_anthropic():
+ """Streaming — Anthropic"""
+ stream = anthropic_client.responses.create(
+ model=ANTHROPIC_MODEL,
+ input="Write a haiku about AI",
+ stream=True
+ )
+ collected = []
+ for event in stream:
+ if hasattr(event, 'delta'):
+ collected.append(event.delta)
+ print(event.delta, end="", flush=True)
+ print()
+ full = "".join(collected)
+ assert len(full) > 0, "Stream should produce output"
+
+
+def test_streaming_openai():
+ """Streaming — OpenAI"""
+ stream = openai_client.responses.create(
+ model=OPENAI_MODEL,
+ input="Write a haiku about AI",
+ stream=True
+ )
+ collected = []
+ for event in stream:
+ if hasattr(event, 'delta'):
+ collected.append(event.delta)
+ print(event.delta, end="", flush=True)
+ print()
+ full = "".join(collected)
+ assert len(full) > 0, "Stream should produce output"
+
+
+def test_streaming_gemini():
+ """Streaming — Gemini"""
+ stream = gemini_client.responses.create(
+ model=GEMINI_MODEL,
+ input="Write a haiku about AI",
+ stream=True
+ )
+ collected = []
+ for event in stream:
+ if hasattr(event, 'delta'):
+ collected.append(event.delta)
+ print(event.delta, end="", flush=True)
+ print()
+ full = "".join(collected)
+ assert len(full) > 0, "Stream should produce output"
+
+
+# ═══════════════════════════════════════════════════════════════════════════════
+# 5. TOOL CALLING
+# ═══════════════════════════════════════════════════════════════════════════════
+
+WEATHER_TOOL = {
+ "type": "function",
+ "name": "get_weather",
+ "description": "Get current weather for a location",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "location": {"type": "string", "description": "City name"}
+ },
+ "required": ["location"]
+ }
+}
+
+
+def test_tool_calling_anthropic():
+ """Tool calling — Anthropic"""
+ response = anthropic_client.responses.create(
+ model=ANTHROPIC_MODEL,
+ input="What's the weather in San Francisco?",
+ tools=[WEATHER_TOOL]
+ )
+ print(f" Output: {response.output}")
+ assert response.output, "output should contain tool call(s)"
+
+
+def test_tool_calling_openai():
+ """Tool calling — OpenAI"""
+ response = openai_client.responses.create(
+ model=OPENAI_MODEL,
+ input="What's the weather in San Francisco?",
+ tools=[WEATHER_TOOL]
+ )
+ print(f" Output: {response.output}")
+ assert response.output, "output should contain tool call(s)"
+
+
+def test_tool_calling_gemini():
+ """Tool calling — Gemini"""
+ response = gemini_client.responses.create(
+ model=GEMINI_MODEL,
+ input="What's the weather in San Francisco?",
+ tools=[WEATHER_TOOL]
+ )
+ print(f" Output: {response.output}")
+ assert response.output, "output should contain tool call(s)"
+
+
+# ═══════════════════════════════════════════════════════════════════════════════
+# 6. REASONING / THINKING
+# ═══════════════════════════════════════════════════════════════════════════════
+
+def test_reasoning_effort_anthropic():
+ """Reasoning effort — Anthropic (needs a model that supports adaptive thinking)"""
+ # claude-sonnet-4-5-20250514 supports reasoning.effort; claude-sonnet-4 does not.
+ # Using direct thinking param as workaround when reasoning.effort is unsupported.
+ response = anthropic_client.responses.create(
+ model=ANTHROPIC_MODEL,
+ input="Solve this step by step: What is 127 * 43?",
+ thinking={"type": "enabled", "budget_tokens": 8192} # equivalent of effort=medium
+ )
+ print(f" Output: {response.output_text[:300]}")
+ assert response.output_text, "output_text should not be empty"
+
+
+def test_reasoning_effort_openai():
+ """Reasoning effort — OpenAI (o4-mini supports reasoning)"""
+ response = openai_client.responses.create(
+ model="o4-mini",
+ input="Solve this step by step: What is 127 * 43?",
+ reasoning={"effort": "high"}
+ )
+ print(f" Output: {response.output_text[:300]}")
+ assert response.output_text, "output_text should not be empty"
+
+
+# ═══════════════════════════════════════════════════════════════════════════════
+# 7. ANTHROPIC EXTENDED THINKING
+# ═══════════════════════════════════════════════════════════════════════════════
+
+def test_extended_thinking_anthropic():
+ """Anthropic extended thinking with budget_tokens"""
+ response = anthropic_client.responses.create(
+ model=ANTHROPIC_MODEL,
+ input="Analyze the implications of quantum computing on cryptography",
+ thinking={"type": "enabled", "budget_tokens": 10000}
+ )
+ print(f" Output: {response.output_text[:300]}")
+ assert response.output_text, "output_text should not be empty"
+
+
+# ═══════════════════════════════════════════════════════════════════════════════
+# 8. PROMPT CACHING
+# ═══════════════════════════════════════════════════════════════════════════════
+
+def test_prompt_caching_anthropic():
+ """Prompt caching with cache_control — Anthropic"""
+ long_text = "This is a document about artificial intelligence. " * 100
+ response = anthropic_client.responses.create(
+ model=ANTHROPIC_MODEL,
+ input=[{
+ "role": "user",
+ "content": [
+ {"type": "input_text", "text": long_text, "cache_control": {"type": "ephemeral"}},
+ {"type": "input_text", "text": "Summarize the key points in one sentence"}
+ ]
+ }]
+ )
+ print(f" Output: {response.output_text[:200]}")
+ assert response.output_text, "output_text should not be empty"
+
+
+def test_prompt_caching_tool_anthropic():
+ """Prompt caching on tool definitions — Anthropic"""
+ response = anthropic_client.responses.create(
+ model=ANTHROPIC_MODEL,
+ input="Search for quantum computing",
+ tools=[{
+ "type": "function",
+ "name": "search",
+ "description": "Search the knowledge base",
+ "parameters": {"type": "object", "properties": {"query": {"type": "string"}}},
+ "cache_control": {"type": "ephemeral"}
+ }]
+ )
+ print(f" Output: {response.output}")
+ assert response.output, "output should not be empty"
+
+
+# ═══════════════════════════════════════════════════════════════════════════════
+# 9. VISION (Image input)
+# ═══════════════════════════════════════════════════════════════════════════════
+
+IMAGE_URL = "https://www.google.com/images/branding/googlelogo/2x/googlelogo_color_272x92dp.png"
+
+
+def test_vision_anthropic():
+ """Vision with image URL — Anthropic (adapter — content items inside message)"""
+ response = anthropic_client.responses.create(
+ model=ANTHROPIC_MODEL,
+ input=[{
+ "role": "user",
+ "content": [
+ {"type": "input_image", "image_url": IMAGE_URL},
+ {"type": "input_text", "text": "Describe this image in one sentence"}
+ ]
+ }]
+ )
+ print(f" Output: {response.output_text[:200]}")
+ assert response.output_text, "output_text should not be empty"
+
+
+def test_vision_openai():
+ """Vision with image URL — OpenAI (native — content items inside message)"""
+ response = openai_client.responses.create(
+ model=OPENAI_MODEL,
+ input=[{
+ "role": "user",
+ "content": [
+ {"type": "input_image", "image_url": IMAGE_URL},
+ {"type": "input_text", "text": "Describe this image in one sentence"}
+ ]
+ }]
+ )
+ print(f" Output: {response.output_text[:200]}")
+ assert response.output_text, "output_text should not be empty"
+
+
+def test_vision_gemini():
+ """Vision with image URL — Gemini (adapter — content items inside message)"""
+ response = gemini_client.responses.create(
+ model=GEMINI_MODEL,
+ input=[{
+ "role": "user",
+ "content": [
+ {"type": "input_image", "image_url": IMAGE_URL},
+ {"type": "input_text", "text": "Describe this image in one sentence"}
+ ]
+ }]
+ )
+ print(f" Output: {response.output_text[:200]}")
+ assert response.output_text, "output_text should not be empty"
+
+
+# ═══════════════════════════════════════════════════════════════════════════════
+# 10. STRUCTURED OUTPUT — JSON Schema
+# ═══════════════════════════════════════════════════════════════════════════════
+
+def test_json_schema_anthropic():
+ """Structured output with json_schema — Anthropic
+ NOTE: claude-sonnet-4-20250514 does NOT support output format / json_schema.
+ Using tool-based structured extraction as a workaround to test the concept.
+ The doc snippet (claude-sonnet-4-5) would support this natively.
+ """
+ response = anthropic_client.responses.create(
+ model=ANTHROPIC_MODEL,
+ input="Extract the name and age from: John is 30 years old. Reply ONLY with JSON like {\"name\": \"...\", \"age\": ...}",
+ text={"format": {"type": "json_object"}}
+ )
+ print(f" Output: {response.output_text[:200]}")
+ # Strip potential markdown code fence
+ text = response.output_text.strip()
+ if text.startswith("```"):
+ text = text.split("\n", 1)[1].rsplit("```", 1)[0].strip()
+ parsed = json.loads(text)
+ assert parsed.get("name"), "Should have 'name' field"
+ assert parsed.get("age"), "Should have 'age' field"
+
+
+def test_json_schema_openai():
+ """Structured output with json_schema — OpenAI"""
+ response = openai_client.responses.create(
+ model=OPENAI_MODEL,
+ input="Extract the name and age from: John is 30 years old.",
+ text={
+ "format": {
+ "type": "json_schema",
+ "name": "person",
+ "schema": {
+ "type": "object",
+ "properties": {
+ "name": {"type": "string"},
+ "age": {"type": "integer"}
+ },
+ "required": ["name", "age"],
+ "additionalProperties": False
+ }
+ }
+ }
+ )
+ print(f" Output: {response.output_text[:200]}")
+ parsed = json.loads(response.output_text)
+ assert parsed.get("name"), "Should have 'name' field"
+ assert parsed.get("age"), "Should have 'age' field"
+
+
+def test_json_schema_gemini():
+ """Structured output with json_schema — Gemini"""
+ response = gemini_client.responses.create(
+ model=GEMINI_MODEL,
+ input="Extract the name and age from: John is 30 years old.",
+ text={
+ "format": {
+ "type": "json_schema",
+ "name": "person",
+ "schema": {
+ "type": "object",
+ "properties": {
+ "name": {"type": "string"},
+ "age": {"type": "integer"}
+ },
+ "required": ["name", "age"],
+ "additionalProperties": False
+ }
+ }
+ }
+ )
+ print(f" Output: {response.output_text[:200]}")
+ parsed = json.loads(response.output_text)
+ assert parsed.get("name"), "Should have 'name' field"
+ assert parsed.get("age"), "Should have 'age' field"
+
+
+# ═══════════════════════════════════════════════════════════════════════════════
+# 11. STRUCTURED OUTPUT — JSON Object (free-form)
+# ═══════════════════════════════════════════════════════════════════════════════
+
+def test_json_object_anthropic():
+ """JSON object mode — Anthropic"""
+ response = anthropic_client.responses.create(
+ model=ANTHROPIC_MODEL,
+ input="List 3 programming languages and their main use cases. Reply ONLY with a JSON object, no markdown.",
+ text={"format": {"type": "json_object"}}
+ )
+ print(f" Output: {response.output_text[:300]}")
+ text = response.output_text.strip()
+ # Strip markdown code fence if present
+ if text.startswith("```"):
+ text = text.split("\n", 1)[1].rsplit("```", 1)[0].strip()
+ parsed = json.loads(text)
+ assert isinstance(parsed, (dict, list)), "Should be valid JSON"
+
+
+def test_json_object_openai():
+ """JSON object mode — OpenAI"""
+ response = openai_client.responses.create(
+ model=OPENAI_MODEL,
+ input="List 3 programming languages and their main use cases as JSON",
+ text={"format": {"type": "json_object"}}
+ )
+ print(f" Output: {response.output_text[:300]}")
+ parsed = json.loads(response.output_text)
+ assert isinstance(parsed, (dict, list)), "Should be valid JSON"
+
+
+def test_json_object_gemini():
+ """JSON object mode — Gemini"""
+ response = gemini_client.responses.create(
+ model=GEMINI_MODEL,
+ input="List 3 programming languages and their main use cases as JSON",
+ text={"format": {"type": "json_object"}}
+ )
+ print(f" Output: {response.output_text[:300]}")
+ parsed = json.loads(response.output_text)
+ assert isinstance(parsed, (dict, list)), "Should be valid JSON"
+
+
+# ═══════════════════════════════════════════════════════════════════════════════
+# 12. MULTI-TURN CONVERSATIONS
+# ═══════════════════════════════════════════════════════════════════════════════
+
+def test_multiturn_shorthand_anthropic():
+ """Multi-turn shorthand format — Anthropic"""
+ response = anthropic_client.responses.create(
+ model=ANTHROPIC_MODEL,
+ input=[
+ {"role": "user", "content": "My name is Alice."},
+ {"role": "assistant", "content": "Hello Alice! How can I help you?"},
+ {"role": "user", "content": "What is my name?"}
+ ]
+ )
+ print(f" Output: {response.output_text[:200]}")
+ assert "alice" in response.output_text.lower(), "Should remember name 'Alice'"
+
+
+def test_multiturn_explicit_anthropic():
+ """Multi-turn explicit format — Anthropic"""
+ response = anthropic_client.responses.create(
+ model=ANTHROPIC_MODEL,
+ input=[
+ {"type": "message", "role": "user", "content": "My name is Alice."},
+ {"type": "message", "role": "assistant", "content": "Hello Alice! How can I help you?"},
+ {"type": "message", "role": "user", "content": "What is my name?"}
+ ]
+ )
+ print(f" Output: {response.output_text[:200]}")
+ assert "alice" in response.output_text.lower(), "Should remember name 'Alice'"
+
+
+def test_multiturn_openai():
+ """Multi-turn — OpenAI"""
+ response = openai_client.responses.create(
+ model=OPENAI_MODEL,
+ input=[
+ {"role": "user", "content": "My name is Alice."},
+ {"role": "assistant", "content": "Hello Alice! How can I help you?"},
+ {"role": "user", "content": "What is my name?"}
+ ]
+ )
+ print(f" Output: {response.output_text[:200]}")
+ assert "alice" in response.output_text.lower(), "Should remember name 'Alice'"
+
+
+def test_multiturn_gemini():
+ """Multi-turn — Gemini"""
+ response = gemini_client.responses.create(
+ model=GEMINI_MODEL,
+ input=[
+ {"role": "user", "content": "My name is Alice."},
+ {"role": "assistant", "content": "Hello Alice! How can I help you?"},
+ {"role": "user", "content": "What is my name?"}
+ ]
+ )
+ print(f" Output: {response.output_text[:200]}")
+ assert "alice" in response.output_text.lower(), "Should remember name 'Alice'"
+
+
+# ═══════════════════════════════════════════════════════════════════════════════
+# 13. FUNCTION CALL RESULTS (multi-turn with tool output)
+# ═══════════════════════════════════════════════════════════════════════════════
+
+def test_function_call_results_anthropic():
+ """Function call results in multi-turn — Anthropic"""
+ response = anthropic_client.responses.create(
+ model=ANTHROPIC_MODEL,
+ input=[
+ {"role": "user", "content": "What's the weather in Paris?"},
+ {"type": "function_call", "name": "get_weather", "call_id": "call_123", "arguments": '{"location": "Paris"}'},
+ {"type": "function_call_output", "call_id": "call_123", "output": '{"temp": "22°C", "condition": "sunny"}'}
+ ],
+ tools=[{
+ "type": "function",
+ "name": "get_weather",
+ "description": "Get weather for a location",
+ "parameters": {"type": "object", "properties": {"location": {"type": "string"}}, "required": ["location"]}
+ }]
+ )
+ print(f" Output: {response.output_text[:300]}")
+ assert response.output_text, "Should produce a response about Paris weather"
+
+
+def test_function_call_results_openai():
+ """Function call results in multi-turn — OpenAI"""
+ response = openai_client.responses.create(
+ model=OPENAI_MODEL,
+ input=[
+ {"role": "user", "content": "What's the weather in Paris?"},
+ {"type": "function_call", "name": "get_weather", "call_id": "call_123", "arguments": '{"location": "Paris"}'},
+ {"type": "function_call_output", "call_id": "call_123", "output": '{"temp": "22°C", "condition": "sunny"}'}
+ ],
+ tools=[{
+ "type": "function",
+ "name": "get_weather",
+ "description": "Get weather for a location",
+ "parameters": {"type": "object", "properties": {"location": {"type": "string"}}, "required": ["location"]}
+ }]
+ )
+ print(f" Output: {response.output_text[:300]}")
+ assert response.output_text, "Should produce a response about Paris weather"
+
+
+def test_function_call_results_gemini():
+ """Function call results in multi-turn — Gemini"""
+ response = gemini_client.responses.create(
+ model=GEMINI_MODEL,
+ input=[
+ {"role": "user", "content": "What's the weather in Paris?"},
+ {"type": "function_call", "name": "get_weather", "call_id": "call_123", "arguments": '{"location": "Paris"}'},
+ {"type": "function_call_output", "call_id": "call_123", "output": '{"temp": "22°C", "condition": "sunny"}'}
+ ],
+ tools=[{
+ "type": "function",
+ "name": "get_weather",
+ "description": "Get weather for a location",
+ "parameters": {"type": "object", "properties": {"location": {"type": "string"}}, "required": ["location"]}
+ }]
+ )
+ print(f" Output: {response.output_text[:300]}")
+ assert response.output_text, "Should produce a response about Paris weather"
+
+
+# ═══════════════════════════════════════════════════════════════════════════════
+# RUN ALL TESTS
+# ═══════════════════════════════════════════════════════════════════════════════
+
+if __name__ == "__main__":
+ tests = [
+ # 1. Quick Start
+ ("1a. Quick Start — Anthropic", test_quick_start_anthropic),
+ ("1b. Quick Start — OpenAI", test_quick_start_openai),
+ ("1c. Quick Start — Gemini", test_quick_start_gemini),
+
+ # 2. OpenAI SDK approach
+ ("2. OpenAI SDK with Portkey base URL", test_openai_sdk_approach),
+
+ # 3. Instructions / System Prompt
+ ("3a. Instructions param — Anthropic", test_instructions_param_anthropic),
+ ("3b. System message in input — Anthropic", test_instructions_system_message_anthropic),
+ ("3c. Instructions param — OpenAI", test_instructions_param_openai),
+ ("3d. Instructions param — Gemini", test_instructions_param_gemini),
+
+ # 4. Streaming
+ ("4a. Streaming — Anthropic", test_streaming_anthropic),
+ ("4b. Streaming — OpenAI", test_streaming_openai),
+ ("4c. Streaming — Gemini", test_streaming_gemini),
+
+ # 5. Tool Calling
+ ("5a. Tool Calling — Anthropic", test_tool_calling_anthropic),
+ ("5b. Tool Calling — OpenAI", test_tool_calling_openai),
+ ("5c. Tool Calling — Gemini", test_tool_calling_gemini),
+
+ # 6. Reasoning
+ ("6a. Reasoning effort — Anthropic", test_reasoning_effort_anthropic),
+ ("6b. Reasoning effort — OpenAI", test_reasoning_effort_openai),
+
+ # 7. Extended Thinking
+ ("7. Extended Thinking — Anthropic", test_extended_thinking_anthropic),
+
+ # 8. Prompt Caching
+ ("8a. Prompt Caching content — Anthropic", test_prompt_caching_anthropic),
+ ("8b. Prompt Caching tools — Anthropic", test_prompt_caching_tool_anthropic),
+
+ # 9. Vision
+ ("9a. Vision — Anthropic", test_vision_anthropic),
+ ("9b. Vision — OpenAI", test_vision_openai),
+ ("9c. Vision — Gemini", test_vision_gemini),
+
+ # 10. Structured Output — JSON Schema
+ ("10a. JSON Schema — Anthropic", test_json_schema_anthropic),
+ ("10b. JSON Schema — OpenAI", test_json_schema_openai),
+ ("10c. JSON Schema — Gemini", test_json_schema_gemini),
+
+ # 11. Structured Output — JSON Object
+ ("11a. JSON Object — Anthropic", test_json_object_anthropic),
+ ("11b. JSON Object — OpenAI", test_json_object_openai),
+ ("11c. JSON Object — Gemini", test_json_object_gemini),
+
+ # 12. Multi-turn Conversations
+ ("12a. Multi-turn shorthand — Anthropic", test_multiturn_shorthand_anthropic),
+ ("12b. Multi-turn explicit — Anthropic", test_multiturn_explicit_anthropic),
+ ("12c. Multi-turn — OpenAI", test_multiturn_openai),
+ ("12d. Multi-turn — Gemini", test_multiturn_gemini),
+
+ # 13. Function Call Results
+ ("13a. Function call results — Anthropic", test_function_call_results_anthropic),
+ ("13b. Function call results — OpenAI", test_function_call_results_openai),
+ ("13c. Function call results — Gemini", test_function_call_results_gemini),
+ ]
+
+ print("=" * 70)
+ print(" RESPONSES API — Full Snippet Test Suite")
+ print(f" Testing {len(tests)} snippets across Anthropic / OpenAI / Gemini")
+ print(f" Running ALL tests in parallel ({len(tests)} threads)")
+ print("=" * 70)
+
+ start = time.time()
+
+ with ThreadPoolExecutor(max_workers=len(tests)) as executor:
+ futures = {
+ executor.submit(run_test, name, fn): name
+ for name, fn in tests
+ }
+ for future in as_completed(futures):
+ future.result() # propagate any unexpected errors
+
+ elapsed = time.time() - start
+
+ # ─── Summary ─────────────────────────────────────────────────────────────
+ # Sort results in the original test-list order for readability
+ order = {name: idx for idx, (name, _) in enumerate(tests)}
+ results.sort(key=lambda r: order.get(r[0], 999))
+
+ print("\n\n" + "=" * 70)
+ print(" SUMMARY")
+ print("=" * 70)
+ for name, status, err in results:
+ icon = "✅" if status == "PASS" else "❌"
+ line = f" {icon} {name}"
+ if err:
+ line += f" — {err[:80]}"
+ print(line)
+
+ print(f"\n Total: {len(tests)} | Passed: {passed} | Failed: {failed} | Time: {elapsed:.1f}s")
+ print("=" * 70)