Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion architecture/gateway.md
Original file line number Diff line number Diff line change
Expand Up @@ -265,7 +265,7 @@ Cluster inference routes store only `provider_name`, `model_id`, and optional
timeout. The gateway resolves endpoint URLs, protocols, credentials, auth
style, and route-shaping metadata from the provider record when supervisors call
`GetInferenceBundle`. Supported provider types for cluster inference are
`openai`, `anthropic`, `nvidia`, and `google-vertex-ai`.
`openai`, `anthropic`, `nvidia`, `deepinfra`, and `google-vertex-ai`.

The bundle carries enough information for sandbox-local routers to construct
upstream URLs without re-deriving provider-specific routing logic. Each resolved
Expand Down
27 changes: 26 additions & 1 deletion crates/openshell-core/src/inference.rs
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,17 @@ static NVIDIA_PROFILE: InferenceProviderProfile = InferenceProviderProfile {
passthrough_headers: &["x-model-id"],
};

static DEEPINFRA_PROFILE: InferenceProviderProfile = InferenceProviderProfile {
provider_type: "deepinfra",
default_base_url: "https://api.deepinfra.com/v1/openai",
protocols: OPENAI_PROTOCOLS,
credential_key_names: &["DEEPINFRA_API_KEY"],
base_url_config_keys: &["DEEPINFRA_BASE_URL"],
auth: AuthHeader::Bearer,
default_headers: &[],
passthrough_headers: &["x-model-id"],
};

/// Canonicalize an inference provider type string to a well-known identifier.
///
/// Returns `Some(canonical_name)` for recognized inference providers,
Expand All @@ -167,6 +178,7 @@ pub fn normalize_inference_provider_type(input: &str) -> Option<&'static str> {
"openai" => Some("openai"),
"anthropic" => Some("anthropic"),
"nvidia" => Some("nvidia"),
"deepinfra" => Some("deepinfra"),
"google-vertex-ai" | "vertex" | "vertex-ai" | "google-vertex" | "gcp-vertex" => {
Some("google-vertex-ai")
}
Expand All @@ -183,6 +195,7 @@ pub fn profile_for(provider_type: &str) -> Option<&'static InferenceProviderProf
"openai" => Some(&OPENAI_PROFILE),
"anthropic" => Some(&ANTHROPIC_PROFILE),
"nvidia" => Some(&NVIDIA_PROFILE),
"deepinfra" => Some(&DEEPINFRA_PROFILE),
"google-vertex-ai" => Some(&VERTEX_AI_PROFILE),
_ => None,
}
Expand Down Expand Up @@ -303,12 +316,24 @@ mod tests {
assert!(profile_for("openai").is_some());
assert!(profile_for("anthropic").is_some());
assert!(profile_for("nvidia").is_some());
assert!(profile_for("deepinfra").is_some());
assert!(profile_for("OpenAI").is_some()); // case insensitive
}

#[test]
fn profile_for_deepinfra() {
let profile = profile_for("deepinfra").expect("deepinfra profile should exist");
assert_eq!(profile.provider_type, "deepinfra");
assert_eq!(
profile.default_base_url,
"https://api.deepinfra.com/v1/openai"
);
assert_eq!(profile.auth, AuthHeader::Bearer);
}

#[test]
fn openai_compatible_profiles_include_embeddings() {
for provider_type in ["openai", "nvidia"] {
for provider_type in ["openai", "nvidia", "deepinfra"] {
let profile = profile_for(provider_type).expect("provider profile should exist");
assert!(
profile.protocols.contains(&"openai_embeddings"),
Expand Down
3 changes: 3 additions & 0 deletions crates/openshell-core/src/telemetry.rs
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,7 @@ pub enum ProviderProfile {
Claude,
Codex,
Copilot,
Deepinfra,
Github,
Gitlab,
Nvidia,
Expand All @@ -222,6 +223,7 @@ impl ProviderProfile {
Self::Claude => "claude",
Self::Codex => "codex",
Self::Copilot => "copilot",
Self::Deepinfra => "deepinfra",
Self::Github => "github",
Self::Gitlab => "gitlab",
Self::Nvidia => "nvidia",
Expand All @@ -239,6 +241,7 @@ impl ProviderProfile {
"claude" | "claude-code" => Self::Claude,
"codex" => Self::Codex,
"copilot" => Self::Copilot,
"deepinfra" => Self::Deepinfra,
"github" | "gh" => Self::Github,
"gitlab" | "glab" => Self::Gitlab,
"nvidia" => Self::Nvidia,
Expand Down
1 change: 1 addition & 0 deletions crates/openshell-providers/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@ impl ProviderRegistry {
registry.register(providers::openai::SPEC);
registry.register(providers::anthropic::SPEC);
registry.register(providers::nvidia::SPEC);
registry.register(providers::deepinfra::SPEC);
registry.register(providers::gitlab::SPEC);
registry.register(providers::github::SPEC);
registry.register(providers::outlook::OutlookProvider);
Expand Down
1 change: 1 addition & 0 deletions crates/openshell-providers/src/profiles.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ const BUILT_IN_PROFILE_YAMLS: &[&str] = &[
include_str!("../../../providers/codex.yaml"),
include_str!("../../../providers/copilot.yaml"),
include_str!("../../../providers/cursor.yaml"),
include_str!("../../../providers/deepinfra.yaml"),
include_str!("../../../providers/github.yaml"),
include_str!("../../../providers/google-vertex-ai.yaml"),
include_str!("../../../providers/nvidia.yaml"),
Expand Down
15 changes: 15 additions & 0 deletions crates/openshell-providers/src/providers/deepinfra.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

use crate::ProviderDiscoverySpec;

pub const SPEC: ProviderDiscoverySpec = ProviderDiscoverySpec {
id: "deepinfra",
credential_env_vars: &["DEEPINFRA_API_KEY"],
};

test_discovers_env_credential!(
discovers_deepinfra_env_credentials,
"DEEPINFRA_API_KEY",
"di-test-123"
);
1 change: 1 addition & 0 deletions crates/openshell-providers/src/providers/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ pub mod anthropic;
pub mod claude;
pub mod codex;
pub mod copilot;
pub mod deepinfra;
pub mod generic;
pub mod github;
pub mod gitlab;
Expand Down
54 changes: 53 additions & 1 deletion crates/openshell-router/src/backend.rs
Original file line number Diff line number Diff line change
Expand Up @@ -768,7 +768,21 @@ fn build_provider_url(

fn build_backend_url(endpoint: &str, path: &str) -> String {
let base = endpoint.trim_end_matches('/');
if base.ends_with("/v1") && (path == "/v1" || path.starts_with("/v1/")) {
// Strip the /v1 prefix from the request path when the base URL's path
// component has /v1 as its first segment (e.g. openai/nvidia: "/v1",
// deepinfra: "/v1/openai") or its final segment (e.g. groq:
// "/openai/v1"). This covers all known provider shapes while preserving
// the full path for proxy endpoints where /v1 is buried in the middle
// (e.g. "https://proxy.example/api/v1/openai" → path "/api/v1/openai",
// neither first nor last segment).
let base_path_has_v1_edge_segment = base
.find("://")
.and_then(|i| base[i + 3..].find('/').map(|j| i + 3 + j))
.is_some_and(|path_start| {
let base_path = &base[path_start..];
base_path.starts_with("/v1/") || base_path.ends_with("/v1")
});
if base_path_has_v1_edge_segment && (path == "/v1" || path.starts_with("/v1/")) {
return format!("{base}{}", &path[3..]);
}

Expand Down Expand Up @@ -831,6 +845,44 @@ mod tests {
);
}

#[test]
fn build_backend_url_dedupes_v1_for_base_with_v1_subpath() {
// DeepInfra base URL contains /v1/ internally — /v1 in the request
// path must still be stripped so chat/completions is not doubled.
assert_eq!(
build_backend_url(
"https://api.deepinfra.com/v1/openai",
"/v1/chat/completions"
),
"https://api.deepinfra.com/v1/openai/chat/completions"
);
}

#[test]
fn build_backend_url_dedupes_v1_for_base_ending_with_v1() {
// Providers like Groq use a base URL where /v1 is the final segment
// below a non-root prefix (e.g. /openai/v1). The /v1 in the request
// path must still be stripped so it is not doubled.
assert_eq!(
build_backend_url("https://api.groq.com/openai/v1", "/v1/chat/completions"),
"https://api.groq.com/openai/v1/chat/completions"
);
}

#[test]
fn build_backend_url_preserves_v1_for_nested_proxy_path() {
// A proxy whose base path has /v1 buried in the middle (neither first
// nor last segment) must NOT have /v1 stripped — the full request
// path must be appended so the upstream receives the correct prefix.
assert_eq!(
build_backend_url(
"https://proxy.example/api/v1/openai",
"/v1/chat/completions"
),
"https://proxy.example/api/v1/openai/v1/chat/completions"
);
}

fn test_route(endpoint: &str, protocols: &[&str], auth: AuthHeader) -> ResolvedRoute {
ResolvedRoute {
name: "inference.local".to_string(),
Expand Down
2 changes: 2 additions & 0 deletions crates/openshell-server/src/grpc/provider.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2148,6 +2148,7 @@ fn telemetry_provider_profile(provider_type: &str) -> TelemetryProviderProfile {
Some("claude" | "claude-code") => TelemetryProviderProfile::Claude,
Some("codex") => TelemetryProviderProfile::Codex,
Some("copilot") => TelemetryProviderProfile::Copilot,
Some("deepinfra") => TelemetryProviderProfile::Deepinfra,
Some("github") => TelemetryProviderProfile::Github,
Some("gitlab") => TelemetryProviderProfile::Gitlab,
Some("nvidia") => TelemetryProviderProfile::Nvidia,
Expand Down Expand Up @@ -2646,6 +2647,7 @@ mod tests {
"codex",
"copilot",
"cursor",
"deepinfra",
"github",
"google-vertex-ai",
"nvidia",
Expand Down
2 changes: 1 addition & 1 deletion crates/openshell-server/src/inference.rs
Original file line number Diff line number Diff line change
Expand Up @@ -620,7 +620,7 @@ fn resolve_provider_route(
let profile = openshell_core::inference::profile_for(&provider_type).ok_or_else(|| {
Status::invalid_argument(format!(
"provider '{name}' has unsupported type '{raw_provider_type}' for cluster inference \
(supported: openai, anthropic, nvidia, google-vertex-ai)",
(supported: openai, anthropic, nvidia, deepinfra, google-vertex-ai)",
name = provider.object_name()
))
})?;
Expand Down
3 changes: 2 additions & 1 deletion docs/sandboxes/manage-providers.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,7 @@ The following provider types are supported.
| `claude` | `ANTHROPIC_API_KEY`, `CLAUDE_API_KEY` | Claude Code, Anthropic API |
| `codex` | `OPENAI_API_KEY` | OpenAI Codex |
| `copilot` | `COPILOT_GITHUB_TOKEN`, `GH_TOKEN`, `GITHUB_TOKEN` | GitHub Copilot CLI |
| `deepinfra` | `DEEPINFRA_API_KEY` | DeepInfra inference API |
| `generic` | User-defined | Any service with custom credentials |
| `github` | `GITHUB_TOKEN`, `GH_TOKEN` | GitHub API and `gh` CLI. Refer to [GitHub Sandbox](/get-started/tutorials/github-sandbox). |
| `gitlab` | `GITLAB_TOKEN`, `GLAB_TOKEN`, `CI_JOB_TOKEN` | GitLab API, `glab` CLI |
Expand Down Expand Up @@ -278,7 +279,7 @@ The following providers have been tested with `inference.local`. Any provider th
| Google Vertex AI | `vertex-prod` | `google-vertex-ai` | Regional, global, or multi-region Vertex endpoint | `GOOGLE_VERTEX_AI_TOKEN` or `GOOGLE_VERTEX_AI_SERVICE_ACCOUNT_TOKEN` |
| Baseten | `baseten` | `openai` | `https://inference.baseten.co/v1` | `OPENAI_API_KEY` |
| Bitdeer AI | `bitdeer` | `openai` | `https://api-inference.bitdeer.ai/v1` | `OPENAI_API_KEY` |
| Deepinfra | `deepinfra` | `openai` | `https://api.deepinfra.com/v1/openai` | `OPENAI_API_KEY` |
| DeepInfra | `deepinfra` | `deepinfra` | `https://api.deepinfra.com/v1/openai` | `DEEPINFRA_API_KEY` |
| Groq | `groq` | `openai` | `https://api.groq.com/openai/v1` | `OPENAI_API_KEY` |
| Ollama (local) | `ollama` | `openai` | `http://host.openshell.internal:11434/v1` | `OPENAI_API_KEY` |
| LM Studio (local) | `lmstudio` | `openai` | `http://host.openshell.internal:1234/v1` | `OPENAI_API_KEY` |
Expand Down
1 change: 1 addition & 0 deletions docs/sandboxes/providers-v2.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@ Built-in Providers v2 profiles currently include:
| `codex` | `agent` | `CODEX_AUTH_ACCESS_TOKEN`, `CODEX_AUTH_REFRESH_TOKEN`, `CODEX_AUTH_ACCOUNT_ID`, `CODEX_AUTH_ID_TOKEN` |
| `copilot` | `agent` | `COPILOT_GITHUB_TOKEN`, `GH_TOKEN`, `GITHUB_TOKEN` |
| `cursor` | `agent` | None |
| `deepinfra` | `inference` | `DEEPINFRA_API_KEY` |
| `github` | `source_control` | `GITHUB_TOKEN`, `GH_TOKEN` |
| `google-vertex-ai` | `inference` | `GOOGLE_SERVICE_ACCOUNT_KEY`, `GOOGLE_VERTEX_AI_SERVICE_ACCOUNT_TOKEN`, `VERTEX_AI_SERVICE_ACCOUNT_TOKEN`, `GOOGLE_VERTEX_AI_TOKEN`, `VERTEX_AI_TOKEN` |
| `nvidia` | `inference` | `NVIDIA_API_KEY` |
Expand Down
24 changes: 24 additions & 0 deletions providers/deepinfra.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

id: deepinfra
display_name: DeepInfra
description: DeepInfra inference endpoints
category: inference
inference_capable: true
credentials:
- name: api_key
description: DeepInfra API key
env_vars: [DEEPINFRA_API_KEY]
required: true
auth_style: bearer
header_name: authorization
discovery:
credentials: [api_key]
endpoints:
- host: api.deepinfra.com
port: 443
protocol: rest
access: read-write
enforcement: enforce
binaries: [/usr/bin/curl, /usr/local/bin/curl]
Loading