From b9a714f4685e3d062f3b8d31acb45e7b22cffb00 Mon Sep 17 00:00:00 2001
From: Milos Milutinovic <codemastermilos@gmail.com>
Date: Sun, 14 Jun 2026 18:46:40 +0200
Subject: [PATCH 1/5] feat(providers): add DeepInfra as a built-in inference
 provider (v2 only)

- Adds `deepinfra` as a built-in Providers v2 profile (`providers/deepinfra.yaml`)
  with inference category, Bearer auth, and `DEEPINFRA_API_KEY` discovery
- Adds `DEEPINFRA_PROFILE` to inference routing so `inference.local` works
  with the `deepinfra` provider type
- Fixes `build_backend_url` to strip `/v1` from request paths when the base
  URL contains `/v1/` as an internal segment (e.g. `api.deepinfra.com/v1/openai`),
  preventing double-versioned paths like `.../v1/openai/v1/chat/completions`
- Updates `docs/sandboxes/providers-v2.mdx` and `docs/sandboxes/manage-providers.mdx`
  with DeepInfra entries; removes the old v1 workaround row that used `openai`
  type with `OPENAI_API_KEY`

Signed-off-by: Milos Milutinovic <codemastermilos@gmail.com>
---
 crates/openshell-core/src/inference.rs     | 25 ++++++++++++++++++++++
 crates/openshell-providers/src/profiles.rs |  1 +
 crates/openshell-router/src/backend.rs     | 19 +++++++++++++++-
 docs/sandboxes/manage-providers.mdx        |  3 ++-
 docs/sandboxes/providers-v2.mdx            |  1 +
 providers/deepinfra.yaml                   | 24 +++++++++++++++++++++
 6 files changed, 71 insertions(+), 2 deletions(-)
 create mode 100644 providers/deepinfra.yaml

diff --git a/crates/openshell-core/src/inference.rs b/crates/openshell-core/src/inference.rs
index 3071d53cd..fc48ef142 100644
--- a/crates/openshell-core/src/inference.rs
+++ b/crates/openshell-core/src/inference.rs
@@ -155,6 +155,17 @@ static NVIDIA_PROFILE: InferenceProviderProfile = InferenceProviderProfile {
     passthrough_headers: &["x-model-id"],
 };
 
+static DEEPINFRA_PROFILE: InferenceProviderProfile = InferenceProviderProfile {
+    provider_type: "deepinfra",
+    default_base_url: "https://api.deepinfra.com/v1/openai",
+    protocols: OPENAI_PROTOCOLS,
+    credential_key_names: &["DEEPINFRA_API_KEY"],
+    base_url_config_keys: &["DEEPINFRA_BASE_URL"],
+    auth: AuthHeader::Bearer,
+    default_headers: &[],
+    passthrough_headers: &["x-model-id"],
+};
+
 /// Canonicalize an inference provider type string to a well-known identifier.
 ///
 /// Returns `Some(canonical_name)` for recognized inference providers,
@@ -167,6 +178,7 @@ pub fn normalize_inference_provider_type(input: &str) -> Option<&'static str> {
         "openai" => Some("openai"),
         "anthropic" => Some("anthropic"),
         "nvidia" => Some("nvidia"),
+        "deepinfra" => Some("deepinfra"),
         "google-vertex-ai" | "vertex" | "vertex-ai" | "google-vertex" | "gcp-vertex" => {
             Some("google-vertex-ai")
         }
@@ -183,6 +195,7 @@ pub fn profile_for(provider_type: &str) -> Option<&'static InferenceProviderProf
         "openai" => Some(&OPENAI_PROFILE),
         "anthropic" => Some(&ANTHROPIC_PROFILE),
         "nvidia" => Some(&NVIDIA_PROFILE),
+        "deepinfra" => Some(&DEEPINFRA_PROFILE),
         "google-vertex-ai" => Some(&VERTEX_AI_PROFILE),
         _ => None,
     }
@@ -303,9 +316,21 @@ mod tests {
         assert!(profile_for("openai").is_some());
         assert!(profile_for("anthropic").is_some());
         assert!(profile_for("nvidia").is_some());
+        assert!(profile_for("deepinfra").is_some());
         assert!(profile_for("OpenAI").is_some()); // case insensitive
     }
 
+    #[test]
+    fn profile_for_deepinfra() {
+        let profile = profile_for("deepinfra").expect("deepinfra profile should exist");
+        assert_eq!(profile.provider_type, "deepinfra");
+        assert_eq!(
+            profile.default_base_url,
+            "https://api.deepinfra.com/v1/openai"
+        );
+        assert_eq!(profile.auth, AuthHeader::Bearer);
+    }
+
     #[test]
     fn openai_compatible_profiles_include_embeddings() {
         for provider_type in ["openai", "nvidia"] {
diff --git a/crates/openshell-providers/src/profiles.rs b/crates/openshell-providers/src/profiles.rs
index d2a35ca80..d31085b64 100644
--- a/crates/openshell-providers/src/profiles.rs
+++ b/crates/openshell-providers/src/profiles.rs
@@ -24,6 +24,7 @@ const BUILT_IN_PROFILE_YAMLS: &[&str] = &[
     include_str!("../../../providers/codex.yaml"),
     include_str!("../../../providers/copilot.yaml"),
     include_str!("../../../providers/cursor.yaml"),
+    include_str!("../../../providers/deepinfra.yaml"),
     include_str!("../../../providers/github.yaml"),
     include_str!("../../../providers/google-vertex-ai.yaml"),
     include_str!("../../../providers/nvidia.yaml"),
diff --git a/crates/openshell-router/src/backend.rs b/crates/openshell-router/src/backend.rs
index 9eb63c88b..8a1fbf570 100644
--- a/crates/openshell-router/src/backend.rs
+++ b/crates/openshell-router/src/backend.rs
@@ -768,7 +768,11 @@ fn build_provider_url(
 
 fn build_backend_url(endpoint: &str, path: &str) -> String {
     let base = endpoint.trim_end_matches('/');
-    if base.ends_with("/v1") && (path == "/v1" || path.starts_with("/v1/")) {
+    // Strip the /v1 prefix from the request path when the base URL already
+    // contains a /v1 segment — either ending with it (e.g. openai, nvidia)
+    // or containing it internally (e.g. deepinfra: /v1/openai).
+    let base_has_v1 = base.ends_with("/v1") || base.contains("/v1/");
+    if base_has_v1 && (path == "/v1" || path.starts_with("/v1/")) {
         return format!("{base}{}", &path[3..]);
     }
 
@@ -831,6 +835,19 @@ mod tests {
         );
     }
 
+    #[test]
+    fn build_backend_url_dedupes_v1_for_base_with_v1_subpath() {
+        // DeepInfra base URL contains /v1/ internally — /v1 in the request
+        // path must still be stripped so chat/completions is not doubled.
+        assert_eq!(
+            build_backend_url(
+                "https://api.deepinfra.com/v1/openai",
+                "/v1/chat/completions"
+            ),
+            "https://api.deepinfra.com/v1/openai/chat/completions"
+        );
+    }
+
     fn test_route(endpoint: &str, protocols: &[&str], auth: AuthHeader) -> ResolvedRoute {
         ResolvedRoute {
             name: "inference.local".to_string(),
diff --git a/docs/sandboxes/manage-providers.mdx b/docs/sandboxes/manage-providers.mdx
index a6b9654d0..beeb3c1d9 100644
--- a/docs/sandboxes/manage-providers.mdx
+++ b/docs/sandboxes/manage-providers.mdx
@@ -250,6 +250,7 @@ The following provider types are supported.
 | `claude` | `ANTHROPIC_API_KEY`, `CLAUDE_API_KEY` | Claude Code, Anthropic API |
 | `codex` | `OPENAI_API_KEY` | OpenAI Codex |
 | `copilot` | `COPILOT_GITHUB_TOKEN`, `GH_TOKEN`, `GITHUB_TOKEN` | GitHub Copilot CLI |
+| `deepinfra` | `DEEPINFRA_API_KEY` | DeepInfra inference API |
 | `generic` | User-defined | Any service with custom credentials |
 | `github` | `GITHUB_TOKEN`, `GH_TOKEN` | GitHub API and `gh` CLI. Refer to [GitHub Sandbox](/get-started/tutorials/github-sandbox). |
 | `gitlab` | `GITLAB_TOKEN`, `GLAB_TOKEN`, `CI_JOB_TOKEN` | GitLab API, `glab` CLI |
@@ -278,7 +279,7 @@ The following providers have been tested with `inference.local`. Any provider th
 | Google Vertex AI | `vertex-prod` | `google-vertex-ai` | Regional, global, or multi-region Vertex endpoint | `GOOGLE_VERTEX_AI_TOKEN` or `GOOGLE_VERTEX_AI_SERVICE_ACCOUNT_TOKEN` |
 | Baseten | `baseten` | `openai` | `https://inference.baseten.co/v1` | `OPENAI_API_KEY` |
 | Bitdeer AI | `bitdeer` | `openai` | `https://api-inference.bitdeer.ai/v1` | `OPENAI_API_KEY` |
-| Deepinfra | `deepinfra` | `openai` | `https://api.deepinfra.com/v1/openai` | `OPENAI_API_KEY` |
+| DeepInfra | `deepinfra` | `deepinfra` | `https://api.deepinfra.com/v1/openai` | `DEEPINFRA_API_KEY` |
 | Groq | `groq` | `openai` | `https://api.groq.com/openai/v1` | `OPENAI_API_KEY` |
 | Ollama (local) | `ollama` | `openai` | `http://host.openshell.internal:11434/v1` | `OPENAI_API_KEY` |
 | LM Studio (local) | `lmstudio` | `openai` | `http://host.openshell.internal:1234/v1` | `OPENAI_API_KEY` |
diff --git a/docs/sandboxes/providers-v2.mdx b/docs/sandboxes/providers-v2.mdx
index 1456c5cfa..f78d9e060 100644
--- a/docs/sandboxes/providers-v2.mdx
+++ b/docs/sandboxes/providers-v2.mdx
@@ -97,6 +97,7 @@ Built-in Providers v2 profiles currently include:
 | `codex` | `agent` | `CODEX_AUTH_ACCESS_TOKEN`, `CODEX_AUTH_REFRESH_TOKEN`, `CODEX_AUTH_ACCOUNT_ID`, `CODEX_AUTH_ID_TOKEN` |
 | `copilot` | `agent` | `COPILOT_GITHUB_TOKEN`, `GH_TOKEN`, `GITHUB_TOKEN` |
 | `cursor` | `agent` | None |
+| `deepinfra` | `inference` | `DEEPINFRA_API_KEY` |
 | `github` | `source_control` | `GITHUB_TOKEN`, `GH_TOKEN` |
 | `google-vertex-ai` | `inference` | `GOOGLE_SERVICE_ACCOUNT_KEY`, `GOOGLE_VERTEX_AI_SERVICE_ACCOUNT_TOKEN`, `VERTEX_AI_SERVICE_ACCOUNT_TOKEN`, `GOOGLE_VERTEX_AI_TOKEN`, `VERTEX_AI_TOKEN` |
 | `nvidia` | `inference` | `NVIDIA_API_KEY` |
diff --git a/providers/deepinfra.yaml b/providers/deepinfra.yaml
new file mode 100644
index 000000000..ffc2e3e41
--- /dev/null
+++ b/providers/deepinfra.yaml
@@ -0,0 +1,24 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+id: deepinfra
+display_name: DeepInfra
+description: DeepInfra inference endpoints
+category: inference
+inference_capable: true
+credentials:
+  - name: api_key
+    description: DeepInfra API key
+    env_vars: [DEEPINFRA_API_KEY]
+    required: true
+    auth_style: bearer
+    header_name: authorization
+discovery:
+  credentials: [api_key]
+endpoints:
+  - host: api.deepinfra.com
+    port: 443
+    protocol: rest
+    access: read-write
+    enforcement: enforce
+binaries: [/usr/bin/curl, /usr/local/bin/curl]

From 1c3fc5bd47f91f22038eddafb49026ae675f9295 Mon Sep 17 00:00:00 2001
From: Milos Milutinovic <codemastermilos@gmail.com>
Date: Mon, 15 Jun 2026 20:46:56 +0200
Subject: [PATCH 2/5] fix(providers): address gator review findings for
 DeepInfra provider
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Narrow build_backend_url /v1 dedupe to URLs whose path component is
  exactly /v1 or starts with /v1/ — prevents regression on proxy
  endpoints where /v1 is buried deeper (e.g. /api/v1/openai); add
  regression test for the nested proxy path case
- Add deepinfra provider plugin with DEEPINFRA_API_KEY discovery,
  registered in ProviderRegistry so known_types() and TUI include it
- Add deepinfra to unsupported-inference-provider error message in
  openshell-server for accurate user-facing debugging guidance
- Add deepinfra to openai_compatible_profiles_include_embeddings test
  to lock in the OpenAI-compatible protocol contract

Signed-off-by: Milos Milutinovic <codemastermilos@gmail.com>
---
 crates/openshell-core/src/inference.rs        |  2 +-
 crates/openshell-providers/src/lib.rs         |  1 +
 .../src/providers/deepinfra.rs                | 15 +++++++++
 .../openshell-providers/src/providers/mod.rs  |  1 +
 crates/openshell-router/src/backend.rs        | 33 ++++++++++++++++---
 crates/openshell-server/src/inference.rs      |  2 +-
 6 files changed, 47 insertions(+), 7 deletions(-)
 create mode 100644 crates/openshell-providers/src/providers/deepinfra.rs

diff --git a/crates/openshell-core/src/inference.rs b/crates/openshell-core/src/inference.rs
index fc48ef142..141423f8e 100644
--- a/crates/openshell-core/src/inference.rs
+++ b/crates/openshell-core/src/inference.rs
@@ -333,7 +333,7 @@ mod tests {
 
     #[test]
     fn openai_compatible_profiles_include_embeddings() {
-        for provider_type in ["openai", "nvidia"] {
+        for provider_type in ["openai", "nvidia", "deepinfra"] {
             let profile = profile_for(provider_type).expect("provider profile should exist");
             assert!(
                 profile.protocols.contains(&"openai_embeddings"),
diff --git a/crates/openshell-providers/src/lib.rs b/crates/openshell-providers/src/lib.rs
index 1d0d5a192..1f0497327 100644
--- a/crates/openshell-providers/src/lib.rs
+++ b/crates/openshell-providers/src/lib.rs
@@ -116,6 +116,7 @@ impl ProviderRegistry {
         registry.register(providers::openai::SPEC);
         registry.register(providers::anthropic::SPEC);
         registry.register(providers::nvidia::SPEC);
+        registry.register(providers::deepinfra::SPEC);
         registry.register(providers::gitlab::SPEC);
         registry.register(providers::github::SPEC);
         registry.register(providers::outlook::OutlookProvider);
diff --git a/crates/openshell-providers/src/providers/deepinfra.rs b/crates/openshell-providers/src/providers/deepinfra.rs
new file mode 100644
index 000000000..6d1f3f5ba
--- /dev/null
+++ b/crates/openshell-providers/src/providers/deepinfra.rs
@@ -0,0 +1,15 @@
+// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+use crate::ProviderDiscoverySpec;
+
+pub const SPEC: ProviderDiscoverySpec = ProviderDiscoverySpec {
+    id: "deepinfra",
+    credential_env_vars: &["DEEPINFRA_API_KEY"],
+};
+
+test_discovers_env_credential!(
+    discovers_deepinfra_env_credentials,
+    "DEEPINFRA_API_KEY",
+    "di-test-123"
+);
diff --git a/crates/openshell-providers/src/providers/mod.rs b/crates/openshell-providers/src/providers/mod.rs
index dfe5935a1..ddb8270cf 100644
--- a/crates/openshell-providers/src/providers/mod.rs
+++ b/crates/openshell-providers/src/providers/mod.rs
@@ -34,6 +34,7 @@ pub mod anthropic;
 pub mod claude;
 pub mod codex;
 pub mod copilot;
+pub mod deepinfra;
 pub mod generic;
 pub mod github;
 pub mod gitlab;
diff --git a/crates/openshell-router/src/backend.rs b/crates/openshell-router/src/backend.rs
index 8a1fbf570..ac2bf1d4f 100644
--- a/crates/openshell-router/src/backend.rs
+++ b/crates/openshell-router/src/backend.rs
@@ -768,11 +768,20 @@ fn build_provider_url(
 
 fn build_backend_url(endpoint: &str, path: &str) -> String {
     let base = endpoint.trim_end_matches('/');
-    // Strip the /v1 prefix from the request path when the base URL already
-    // contains a /v1 segment — either ending with it (e.g. openai, nvidia)
-    // or containing it internally (e.g. deepinfra: /v1/openai).
-    let base_has_v1 = base.ends_with("/v1") || base.contains("/v1/");
-    if base_has_v1 && (path == "/v1" || path.starts_with("/v1/")) {
+    // Strip the /v1 prefix from the request path only when the base URL's
+    // own path component is exactly /v1 (e.g. openai, nvidia: ".../v1") or
+    // starts with /v1/ (e.g. deepinfra: ".../v1/openai"). This avoids
+    // doubling /v1 for those providers while preserving the full path for
+    // proxy endpoints where /v1 appears deeper in the path
+    // (e.g. "https://proxy.example/api/v1/openai").
+    let base_path_is_v1_rooted = base
+        .find("://")
+        .and_then(|i| base[i + 3..].find('/').map(|j| i + 3 + j))
+        .is_some_and(|path_start| {
+            let base_path = &base[path_start..];
+            base_path == "/v1" || base_path.starts_with("/v1/")
+        });
+    if base_path_is_v1_rooted && (path == "/v1" || path.starts_with("/v1/")) {
         return format!("{base}{}", &path[3..]);
     }
 
@@ -848,6 +857,20 @@ mod tests {
         );
     }
 
+    #[test]
+    fn build_backend_url_preserves_v1_for_nested_proxy_path() {
+        // A proxy whose base path has /v1 buried deeper (not at the root of
+        // the path) must NOT have /v1 stripped — the full request path must
+        // be appended so the upstream receives the correct API version prefix.
+        assert_eq!(
+            build_backend_url(
+                "https://proxy.example/api/v1/openai",
+                "/v1/chat/completions"
+            ),
+            "https://proxy.example/api/v1/openai/v1/chat/completions"
+        );
+    }
+
     fn test_route(endpoint: &str, protocols: &[&str], auth: AuthHeader) -> ResolvedRoute {
         ResolvedRoute {
             name: "inference.local".to_string(),
diff --git a/crates/openshell-server/src/inference.rs b/crates/openshell-server/src/inference.rs
index 13496cd99..46ee01e68 100644
--- a/crates/openshell-server/src/inference.rs
+++ b/crates/openshell-server/src/inference.rs
@@ -620,7 +620,7 @@ fn resolve_provider_route(
     let profile = openshell_core::inference::profile_for(&provider_type).ok_or_else(|| {
         Status::invalid_argument(format!(
             "provider '{name}' has unsupported type '{raw_provider_type}' for cluster inference \
-                 (supported: openai, anthropic, nvidia, google-vertex-ai)",
+                 (supported: openai, anthropic, nvidia, deepinfra, google-vertex-ai)",
             name = provider.object_name()
         ))
     })?;

From ce0ccda6aa14b8ee33e1fa0c9709e7695bbe4df1 Mon Sep 17 00:00:00 2001
From: Milos Milutinovic <codemastermilos@gmail.com>
Date: Mon, 15 Jun 2026 20:59:15 +0200
Subject: [PATCH 3/5] fix(router): handle /v1 as final path segment in
 build_backend_url dedup
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Extends the /v1 deduplication logic to also strip /v1 from request paths
when the base URL's path ends with /v1 (e.g. https://api.groq.com/openai/v1).
The previous fix only matched paths starting with /v1/, which regressed
providers like Groq whose base path has /v1 as the last segment rather than
the first. The nested-proxy exclusion (e.g. /api/v1/openai) is preserved
since /v1 appears in the middle — neither first nor last segment. Adds a
regression test for the Groq-style base URL.

Signed-off-by: Milos Milutinovic <codemastermilos@gmail.com>
---
 crates/openshell-router/src/backend.rs | 39 ++++++++++++++++++--------
 1 file changed, 27 insertions(+), 12 deletions(-)

diff --git a/crates/openshell-router/src/backend.rs b/crates/openshell-router/src/backend.rs
index ac2bf1d4f..59302d67e 100644
--- a/crates/openshell-router/src/backend.rs
+++ b/crates/openshell-router/src/backend.rs
@@ -768,20 +768,21 @@ fn build_provider_url(
 
 fn build_backend_url(endpoint: &str, path: &str) -> String {
     let base = endpoint.trim_end_matches('/');
-    // Strip the /v1 prefix from the request path only when the base URL's
-    // own path component is exactly /v1 (e.g. openai, nvidia: ".../v1") or
-    // starts with /v1/ (e.g. deepinfra: ".../v1/openai"). This avoids
-    // doubling /v1 for those providers while preserving the full path for
-    // proxy endpoints where /v1 appears deeper in the path
-    // (e.g. "https://proxy.example/api/v1/openai").
-    let base_path_is_v1_rooted = base
+    // Strip the /v1 prefix from the request path when the base URL's path
+    // component has /v1 as its first segment (e.g. openai/nvidia: "/v1",
+    // deepinfra: "/v1/openai") or its final segment (e.g. groq:
+    // "/openai/v1"). This covers all known provider shapes while preserving
+    // the full path for proxy endpoints where /v1 is buried in the middle
+    // (e.g. "https://proxy.example/api/v1/openai" → path "/api/v1/openai",
+    // neither first nor last segment).
+    let base_path_has_v1_edge_segment = base
         .find("://")
         .and_then(|i| base[i + 3..].find('/').map(|j| i + 3 + j))
         .is_some_and(|path_start| {
             let base_path = &base[path_start..];
-            base_path == "/v1" || base_path.starts_with("/v1/")
+            base_path.starts_with("/v1/") || base_path.ends_with("/v1")
         });
-    if base_path_is_v1_rooted && (path == "/v1" || path.starts_with("/v1/")) {
+    if base_path_has_v1_edge_segment && (path == "/v1" || path.starts_with("/v1/")) {
         return format!("{base}{}", &path[3..]);
     }
 
@@ -857,11 +858,25 @@ mod tests {
         );
     }
 
+    #[test]
+    fn build_backend_url_dedupes_v1_for_base_ending_with_v1() {
+        // Providers like Groq use a base URL where /v1 is the final segment
+        // below a non-root prefix (e.g. /openai/v1). The /v1 in the request
+        // path must still be stripped so it is not doubled.
+        assert_eq!(
+            build_backend_url(
+                "https://api.groq.com/openai/v1",
+                "/v1/chat/completions"
+            ),
+            "https://api.groq.com/openai/v1/chat/completions"
+        );
+    }
+
     #[test]
     fn build_backend_url_preserves_v1_for_nested_proxy_path() {
-        // A proxy whose base path has /v1 buried deeper (not at the root of
-        // the path) must NOT have /v1 stripped — the full request path must
-        // be appended so the upstream receives the correct API version prefix.
+        // A proxy whose base path has /v1 buried in the middle (neither first
+        // nor last segment) must NOT have /v1 stripped — the full request
+        // path must be appended so the upstream receives the correct prefix.
         assert_eq!(
             build_backend_url(
                 "https://proxy.example/api/v1/openai",

From d4e5e980c697a45ae1023af98c7bf058fc717464 Mon Sep 17 00:00:00 2001
From: Milos Milutinovic <codemastermilos@gmail.com>
Date: Mon, 15 Jun 2026 21:32:55 +0200
Subject: [PATCH 4/5] fix(providers): add deepinfra telemetry bucket and update
 profile list test

- Add DeepInfra variant to ProviderProfile telemetry enum and from_raw()
  mapping so deepinfra providers are tracked in their own bucket rather
  than falling through to Custom
- Map deepinfra in telemetry_provider_profile() in openshell-server
- Add deepinfra to list_provider_profiles_returns_built_in_profile_categories
  test (sorted between cursor and github)
- Update architecture/gateway.md inference provider list to include deepinfra

Signed-off-by: Milos Milutinovic <codemastermilos@gmail.com>
---
 architecture/gateway.md                      | 2 +-
 crates/openshell-core/src/telemetry.rs       | 3 +++
 crates/openshell-server/src/grpc/provider.rs | 2 ++
 3 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/architecture/gateway.md b/architecture/gateway.md
index 7afec0767..72d6daf63 100644
--- a/architecture/gateway.md
+++ b/architecture/gateway.md
@@ -265,7 +265,7 @@ Cluster inference routes store only `provider_name`, `model_id`, and optional
 timeout. The gateway resolves endpoint URLs, protocols, credentials, auth
 style, and route-shaping metadata from the provider record when supervisors call
 `GetInferenceBundle`. Supported provider types for cluster inference are
-`openai`, `anthropic`, `nvidia`, and `google-vertex-ai`.
+`openai`, `anthropic`, `nvidia`, `deepinfra`, and `google-vertex-ai`.
 
 The bundle carries enough information for sandbox-local routers to construct
 upstream URLs without re-deriving provider-specific routing logic. Each resolved
diff --git a/crates/openshell-core/src/telemetry.rs b/crates/openshell-core/src/telemetry.rs
index 96f68d35c..49ce620f4 100644
--- a/crates/openshell-core/src/telemetry.rs
+++ b/crates/openshell-core/src/telemetry.rs
@@ -205,6 +205,7 @@ pub enum ProviderProfile {
     Claude,
     Codex,
     Copilot,
+    Deepinfra,
     Github,
     Gitlab,
     Nvidia,
@@ -222,6 +223,7 @@ impl ProviderProfile {
             Self::Claude => "claude",
             Self::Codex => "codex",
             Self::Copilot => "copilot",
+            Self::Deepinfra => "deepinfra",
             Self::Github => "github",
             Self::Gitlab => "gitlab",
             Self::Nvidia => "nvidia",
@@ -239,6 +241,7 @@ impl ProviderProfile {
             "claude" | "claude-code" => Self::Claude,
             "codex" => Self::Codex,
             "copilot" => Self::Copilot,
+            "deepinfra" => Self::Deepinfra,
             "github" | "gh" => Self::Github,
             "gitlab" | "glab" => Self::Gitlab,
             "nvidia" => Self::Nvidia,
diff --git a/crates/openshell-server/src/grpc/provider.rs b/crates/openshell-server/src/grpc/provider.rs
index 641118206..3f760e834 100644
--- a/crates/openshell-server/src/grpc/provider.rs
+++ b/crates/openshell-server/src/grpc/provider.rs
@@ -2148,6 +2148,7 @@ fn telemetry_provider_profile(provider_type: &str) -> TelemetryProviderProfile {
         Some("claude" | "claude-code") => TelemetryProviderProfile::Claude,
         Some("codex") => TelemetryProviderProfile::Codex,
         Some("copilot") => TelemetryProviderProfile::Copilot,
+        Some("deepinfra") => TelemetryProviderProfile::Deepinfra,
         Some("github") => TelemetryProviderProfile::Github,
         Some("gitlab") => TelemetryProviderProfile::Gitlab,
         Some("nvidia") => TelemetryProviderProfile::Nvidia,
@@ -2646,6 +2647,7 @@ mod tests {
                 "codex",
                 "copilot",
                 "cursor",
+                "deepinfra",
                 "github",
                 "google-vertex-ai",
                 "nvidia",

From 3fd8231eb5c996a0b739fc5309fe7c2dd3882dd4 Mon Sep 17 00:00:00 2001
From: Milos Milutinovic <codemastermilos@gmail.com>
Date: Tue, 16 Jun 2026 00:33:43 +0200
Subject: [PATCH 5/5] style(router): apply cargo fmt to backend.rs

Signed-off-by: Milos Milutinovic <codemastermilos@gmail.com>
---
 crates/openshell-router/src/backend.rs | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/crates/openshell-router/src/backend.rs b/crates/openshell-router/src/backend.rs
index 59302d67e..da99cd783 100644
--- a/crates/openshell-router/src/backend.rs
+++ b/crates/openshell-router/src/backend.rs
@@ -864,10 +864,7 @@ mod tests {
         // below a non-root prefix (e.g. /openai/v1). The /v1 in the request
         // path must still be stripped so it is not doubled.
         assert_eq!(
-            build_backend_url(
-                "https://api.groq.com/openai/v1",
-                "/v1/chat/completions"
-            ),
+            build_backend_url("https://api.groq.com/openai/v1", "/v1/chat/completions"),
             "https://api.groq.com/openai/v1/chat/completions"
         );
     }