NVIDIA · khaile · Mar 20, 2026
@@ -34,7 +34,7 @@ flowchart TB
     subgraph EXT["External Services"]
         HOSTS["Allowed Hosts (github.com, api.anthropic.com, ...)"]
         CREDS["Provider APIs (Claude, GitHub, GitLab, ...)"]
-        BACKEND["Inference Backends (OpenAI, Anthropic, NVIDIA, local)"]
+        BACKEND["Inference Backends (OpenAI, Anthropic, NVIDIA, Groq, local)"]
     end
 
     CLI -- "gRPC / HTTPS" --> SERVER
@@ -155,7 +155,7 @@ AI agents typically need credentials to access external services -- an API key f
 
 The provider system handles:
 
-- **Automatic discovery**: The CLI scans the user's local machine for existing credentials (environment variables, configuration files) and offers to upload them to the gateway. Supported providers include Claude, Codex, OpenCode, OpenAI, Anthropic, NVIDIA, GitHub, GitLab, and others.
+- **Automatic discovery**: The CLI scans the user's local machine for existing credentials (environment variables, configuration files) and offers to upload them to the gateway. Supported providers include Claude, Codex, OpenCode, OpenAI, Anthropic, NVIDIA, Groq, GitHub, GitLab, and others.
 - **Secure storage**: Credentials are stored on the gateway, separate from sandbox definitions. They never appear in Kubernetes pod specifications.
 - **Runtime injection**: When a sandbox starts, the supervisor process fetches the credentials from the gateway via gRPC and injects them as environment variables into every process it spawns (both the initial agent process and any SSH sessions).
 - **CLI management**: Users can create, update, list, and delete providers through standard CLI commands.

@@ -41,13 +41,14 @@ File: `crates/openshell-core/src/inference.rs`
 
 `InferenceProviderProfile` is the single source of truth for provider-specific inference knowledge: default endpoint, supported protocols, credential key lookup order, auth header style, and default headers.
 
-Three profiles are defined:
+Four profiles are defined:
 
 | Provider | Default Base URL | Protocols | Auth | Default Headers |
 |----------|-----------------|-----------|------|-----------------|
 | `openai` | `https://api.openai.com/v1` | `openai_chat_completions`, `openai_completions`, `openai_responses`, `model_discovery` | `Authorization: Bearer` | (none) |
 | `anthropic` | `https://api.anthropic.com/v1` | `anthropic_messages`, `model_discovery` | `x-api-key` | `anthropic-version: 2023-06-01` |
 | `nvidia` | `https://integrate.api.nvidia.com/v1` | `openai_chat_completions`, `openai_completions`, `openai_responses`, `model_discovery` | `Authorization: Bearer` | (none) |
+| `groq` | `https://api.groq.com/openai/v1` | `openai_chat_completions`, `openai_completions`, `openai_responses`, `model_discovery` | `Authorization: Bearer` | (none) |
 
 Each profile also defines `credential_key_names` (e.g. `["OPENAI_API_KEY"]`) and `base_url_config_keys` (e.g. `["OPENAI_BASE_URL"]`) used by the gateway to resolve credentials and endpoint overrides from provider records.
 
@@ -302,7 +303,7 @@ Cluster inference commands:
 - `openshell inference get` -- displays both user and system inference configuration
 - `openshell inference get --system` -- displays only the system inference configuration
 
-The `--provider` flag references a provider record name (not a provider type). The provider must already exist in the cluster and have a supported inference type (`openai`, `anthropic`, or `nvidia`).
+The `--provider` flag references a provider record name (not a provider type). The provider must already exist in the cluster and have a supported inference type (`openai`, `anthropic`, `nvidia`, or `groq`).
 
 Inference writes verify by default. `--no-verify` is the explicit opt-out for endpoints that are not up yet.
 
@@ -314,10 +315,11 @@ Files:
 - `crates/openshell-providers/src/providers/openai.rs` -- `OpenaiProvider`
 - `crates/openshell-providers/src/providers/anthropic.rs` -- `AnthropicProvider`
 - `crates/openshell-providers/src/providers/nvidia.rs` -- `NvidiaProvider`
+- `crates/openshell-providers/src/providers/groq.rs` -- `GroqProvider`
 
 Provider discovery and inference routing are separate concerns:
 
 - `ProviderPlugin` (in `openshell-providers`) handles credential *discovery* -- scanning environment variables to find API keys.
 - `InferenceProviderProfile` (in `openshell-core`) handles how to *use* discovered credentials to make inference API calls.
 
-The `openai`, `anthropic`, and `nvidia` provider plugins each discover credentials from their canonical environment variable (`OPENAI_API_KEY`, `ANTHROPIC_API_KEY`, `NVIDIA_API_KEY`). These credentials are stored in provider records and looked up by the gateway at bundle resolution time.
+The `openai`, `anthropic`, `nvidia`, and `groq` provider plugins each discover credentials from their canonical environment variable (`OPENAI_API_KEY`, `ANTHROPIC_API_KEY`, `NVIDIA_API_KEY`, `GROQ_API_KEY`). These credentials are stored in provider records and looked up by the gateway at bundle resolution time.
@@ -56,6 +56,13 @@ const OPENAI_PROTOCOLS: &[&str] = &[
 
 const ANTHROPIC_PROTOCOLS: &[&str] = &["anthropic_messages", "model_discovery"];
 
+const GROQ_PROTOCOLS: &[&str] = &[
+    "openai_chat_completions",
+    "openai_completions",
+    "openai_responses",
+    "model_discovery",
+];
+
 static OPENAI_PROFILE: InferenceProviderProfile = InferenceProviderProfile {
     provider_type: "openai",
     default_base_url: "https://api.openai.com/v1",
@@ -86,6 +93,16 @@ static NVIDIA_PROFILE: InferenceProviderProfile = InferenceProviderProfile {
     default_headers: &[],
 };
 
+static GROQ_PROFILE: InferenceProviderProfile = InferenceProviderProfile {
+    provider_type: "groq",
+    default_base_url: "https://api.groq.com/openai/v1",
+    protocols: GROQ_PROTOCOLS,
+    credential_key_names: &["GROQ_API_KEY"],
+    base_url_config_keys: &["GROQ_BASE_URL"],
+    auth: AuthHeader::Bearer,
+    default_headers: &[],
+};
+
 /// Look up the inference provider profile for a given provider type.
 ///
 /// Returns `None` for provider types that don't support inference routing
@@ -95,6 +112,7 @@ pub fn profile_for(provider_type: &str) -> Option<&'static InferenceProviderProf
         "openai" => Some(&OPENAI_PROFILE),
         "anthropic" => Some(&ANTHROPIC_PROFILE),
         "nvidia" => Some(&NVIDIA_PROFILE),
+        "groq" => Some(&GROQ_PROFILE),
         _ => None,
     }
 }
@@ -176,7 +194,9 @@ mod tests {
         assert!(profile_for("openai").is_some());
         assert!(profile_for("anthropic").is_some());
         assert!(profile_for("nvidia").is_some());
+        assert!(profile_for("groq").is_some());
         assert!(profile_for("OpenAI").is_some()); // case insensitive
+        assert!(profile_for("GROQ").is_some()); // case insensitive
     }
 
     #[test]

@@ -85,6 +85,7 @@ impl ProviderRegistry {
         registry.register(providers::gitlab::GitlabProvider);
         registry.register(providers::github::GithubProvider);
         registry.register(providers::outlook::OutlookProvider);
+        registry.register(providers::groq::GroqProvider);
         registry
     }
 
@@ -133,6 +134,7 @@ pub fn normalize_provider_type(input: &str) -> Option<&'static str> {
         "openai" => Some("openai"),
         "anthropic" => Some("anthropic"),
         "nvidia" => Some("nvidia"),
+        "groq" => Some("groq"),
         "gitlab" | "glab" => Some("gitlab"),
         "github" | "gh" => Some("github"),
         "outlook" => Some("outlook"),
@@ -164,6 +166,8 @@ mod tests {
         assert_eq!(normalize_provider_type("openai"), Some("openai"));
         assert_eq!(normalize_provider_type("anthropic"), Some("anthropic"));
         assert_eq!(normalize_provider_type("nvidia"), Some("nvidia"));
+        assert_eq!(normalize_provider_type("groq"), Some("groq"));
+        assert_eq!(normalize_provider_type("GROQ"), Some("groq"));
         assert_eq!(normalize_provider_type("unknown"), None);
     }
 

@@ -0,0 +1,46 @@
+// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+use crate::{
+    ProviderDiscoverySpec, ProviderError, ProviderPlugin, RealDiscoveryContext, discover_with_spec,
+};
+
+pub struct GroqProvider;
+
+pub const SPEC: ProviderDiscoverySpec = ProviderDiscoverySpec {
+    id: "groq",
+    credential_env_vars: &["GROQ_API_KEY"],
+};
+
+impl ProviderPlugin for GroqProvider {
+    fn id(&self) -> &'static str {
+        SPEC.id
+    }
+
+    fn discover_existing(&self) -> Result<Option<crate::DiscoveredProvider>, ProviderError> {
+        discover_with_spec(&SPEC, &RealDiscoveryContext)
+    }
+
+    fn credential_env_vars(&self) -> &'static [&'static str] {
+        SPEC.credential_env_vars
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::SPEC;
+    use crate::discover_with_spec;
+    use crate::test_helpers::MockDiscoveryContext;
+
+    #[test]
+    fn discovers_groq_env_credentials() {
+        let ctx = MockDiscoveryContext::new().with_env("GROQ_API_KEY", "gsk-test-key");
+        let discovered = discover_with_spec(&SPEC, &ctx)
+            .expect("discovery")
+            .expect("provider");
+        assert_eq!(
+            discovered.credentials.get("GROQ_API_KEY"),
+            Some(&"gsk-test-key".to_string())
+        );
+    }
+}
@@ -7,6 +7,7 @@ pub mod codex;
 pub mod generic;
 pub mod github;
 pub mod gitlab;
+pub mod groq;
 pub mod nvidia;
 pub mod openai;
 pub mod opencode;

@@ -228,7 +228,7 @@ fn resolve_provider_route(provider: &Provider) -> Result<ResolvedProviderRoute,
     let profile = openshell_core::inference::profile_for(&provider_type).ok_or_else(|| {
         Status::invalid_argument(format!(
             "provider '{name}' has unsupported type '{provider_type}' for cluster inference \
-                 (supported: openai, anthropic, nvidia)",
+                 (supported: openai, anthropic, nvidia, groq)",
             name = provider.name
         ))
     })?;

@@ -44,7 +44,7 @@ If code calls an external inference host directly, that traffic is evaluated onl
 |---|---|
 | Credentials | No sandbox API keys needed. Credentials come from the configured provider record. |
 | Configuration | One provider and one model define sandbox inference for the active gateway. Every sandbox on that gateway sees the same `inference.local` backend. |
-| Provider support | NVIDIA, any OpenAI-compatible provider, and Anthropic all work through the same endpoint. |
+| Provider support | NVIDIA, any OpenAI-compatible provider (including Groq), and Anthropic all work through the same endpoint. |
 | Hot-refresh | OpenShell picks up provider credential changes and inference updates without recreating sandboxes. Changes propagate within about 5 seconds by default. |
 
 ## Supported API Patterns

@@ -13,9 +13,9 @@ experimental = true
 
 [tools]
 python = "3.13.12"
-rust = "stable"
+rust = "1.94.0"
 kubectl = "1.35.1"
-uv = "0.10.2"
+uv = "0.10.12"
 protoc = "29.6"
 helm = "4.1.1"
 "ubi:mozilla/sccache" = { version = "0.14.0", matching = "sccache-v" }