From 41692dad0a7786f81ab2c775c5e5b628ac75a1be Mon Sep 17 00:00:00 2001
From: Khai Le <khaile.to@gmail.com>
Date: Fri, 20 Mar 2026 20:14:10 +0700
Subject: [PATCH] feat(provider): Add Groq as a supported inference provider

---
 architecture/README.md                        |  4 +-
 architecture/inference-routing.md             |  8 ++--
 crates/openshell-core/src/inference.rs        | 20 ++++++++
 crates/openshell-providers/src/lib.rs         |  4 ++
 .../openshell-providers/src/providers/groq.rs | 46 +++++++++++++++++++
 .../openshell-providers/src/providers/mod.rs  |  1 +
 crates/openshell-server/src/inference.rs      |  2 +-
 docs/inference/index.md                       |  2 +-
 mise.toml                                     |  4 +-
 9 files changed, 82 insertions(+), 9 deletions(-)
 create mode 100644 crates/openshell-providers/src/providers/groq.rs

diff --git a/architecture/README.md b/architecture/README.md
index d65b9b23..0036bb3b 100644
--- a/architecture/README.md
+++ b/architecture/README.md
@@ -34,7 +34,7 @@ flowchart TB
     subgraph EXT["External Services"]
         HOSTS["Allowed Hosts (github.com, api.anthropic.com, ...)"]
         CREDS["Provider APIs (Claude, GitHub, GitLab, ...)"]
-        BACKEND["Inference Backends (OpenAI, Anthropic, NVIDIA, local)"]
+        BACKEND["Inference Backends (OpenAI, Anthropic, NVIDIA, Groq, local)"]
     end
 
     CLI -- "gRPC / HTTPS" --> SERVER
@@ -155,7 +155,7 @@ AI agents typically need credentials to access external services -- an API key f
 
 The provider system handles:
 
-- **Automatic discovery**: The CLI scans the user's local machine for existing credentials (environment variables, configuration files) and offers to upload them to the gateway. Supported providers include Claude, Codex, OpenCode, OpenAI, Anthropic, NVIDIA, GitHub, GitLab, and others.
+- **Automatic discovery**: The CLI scans the user's local machine for existing credentials (environment variables, configuration files) and offers to upload them to the gateway. Supported providers include Claude, Codex, OpenCode, OpenAI, Anthropic, NVIDIA, Groq, GitHub, GitLab, and others.
 - **Secure storage**: Credentials are stored on the gateway, separate from sandbox definitions. They never appear in Kubernetes pod specifications.
 - **Runtime injection**: When a sandbox starts, the supervisor process fetches the credentials from the gateway via gRPC and injects them as environment variables into every process it spawns (both the initial agent process and any SSH sessions).
 - **CLI management**: Users can create, update, list, and delete providers through standard CLI commands.
diff --git a/architecture/inference-routing.md b/architecture/inference-routing.md
index 0d3a95af..a018925b 100644
--- a/architecture/inference-routing.md
+++ b/architecture/inference-routing.md
@@ -41,13 +41,14 @@ File: `crates/openshell-core/src/inference.rs`
 
 `InferenceProviderProfile` is the single source of truth for provider-specific inference knowledge: default endpoint, supported protocols, credential key lookup order, auth header style, and default headers.
 
-Three profiles are defined:
+Four profiles are defined:
 
 | Provider | Default Base URL | Protocols | Auth | Default Headers |
 |----------|-----------------|-----------|------|-----------------|
 | `openai` | `https://api.openai.com/v1` | `openai_chat_completions`, `openai_completions`, `openai_responses`, `model_discovery` | `Authorization: Bearer` | (none) |
 | `anthropic` | `https://api.anthropic.com/v1` | `anthropic_messages`, `model_discovery` | `x-api-key` | `anthropic-version: 2023-06-01` |
 | `nvidia` | `https://integrate.api.nvidia.com/v1` | `openai_chat_completions`, `openai_completions`, `openai_responses`, `model_discovery` | `Authorization: Bearer` | (none) |
+| `groq` | `https://api.groq.com/openai/v1` | `openai_chat_completions`, `openai_completions`, `openai_responses`, `model_discovery` | `Authorization: Bearer` | (none) |
 
 Each profile also defines `credential_key_names` (e.g. `["OPENAI_API_KEY"]`) and `base_url_config_keys` (e.g. `["OPENAI_BASE_URL"]`) used by the gateway to resolve credentials and endpoint overrides from provider records.
 
@@ -302,7 +303,7 @@ Cluster inference commands:
 - `openshell inference get` -- displays both user and system inference configuration
 - `openshell inference get --system` -- displays only the system inference configuration
 
-The `--provider` flag references a provider record name (not a provider type). The provider must already exist in the cluster and have a supported inference type (`openai`, `anthropic`, or `nvidia`).
+The `--provider` flag references a provider record name (not a provider type). The provider must already exist in the cluster and have a supported inference type (`openai`, `anthropic`, `nvidia`, or `groq`).
 
 Inference writes verify by default. `--no-verify` is the explicit opt-out for endpoints that are not up yet.
 
@@ -314,10 +315,11 @@ Files:
 - `crates/openshell-providers/src/providers/openai.rs` -- `OpenaiProvider`
 - `crates/openshell-providers/src/providers/anthropic.rs` -- `AnthropicProvider`
 - `crates/openshell-providers/src/providers/nvidia.rs` -- `NvidiaProvider`
+- `crates/openshell-providers/src/providers/groq.rs` -- `GroqProvider`
 
 Provider discovery and inference routing are separate concerns:
 
 - `ProviderPlugin` (in `openshell-providers`) handles credential *discovery* -- scanning environment variables to find API keys.
 - `InferenceProviderProfile` (in `openshell-core`) handles how to *use* discovered credentials to make inference API calls.
 
-The `openai`, `anthropic`, and `nvidia` provider plugins each discover credentials from their canonical environment variable (`OPENAI_API_KEY`, `ANTHROPIC_API_KEY`, `NVIDIA_API_KEY`). These credentials are stored in provider records and looked up by the gateway at bundle resolution time.
+The `openai`, `anthropic`, `nvidia`, and `groq` provider plugins each discover credentials from their canonical environment variable (`OPENAI_API_KEY`, `ANTHROPIC_API_KEY`, `NVIDIA_API_KEY`, `GROQ_API_KEY`). These credentials are stored in provider records and looked up by the gateway at bundle resolution time.
diff --git a/crates/openshell-core/src/inference.rs b/crates/openshell-core/src/inference.rs
index a06c427f..41d1d6dd 100644
--- a/crates/openshell-core/src/inference.rs
+++ b/crates/openshell-core/src/inference.rs
@@ -56,6 +56,13 @@ const OPENAI_PROTOCOLS: &[&str] = &[
 
 const ANTHROPIC_PROTOCOLS: &[&str] = &["anthropic_messages", "model_discovery"];
 
+const GROQ_PROTOCOLS: &[&str] = &[
+    "openai_chat_completions",
+    "openai_completions",
+    "openai_responses",
+    "model_discovery",
+];
+
 static OPENAI_PROFILE: InferenceProviderProfile = InferenceProviderProfile {
     provider_type: "openai",
     default_base_url: "https://api.openai.com/v1",
@@ -86,6 +93,16 @@ static NVIDIA_PROFILE: InferenceProviderProfile = InferenceProviderProfile {
     default_headers: &[],
 };
 
+static GROQ_PROFILE: InferenceProviderProfile = InferenceProviderProfile {
+    provider_type: "groq",
+    default_base_url: "https://api.groq.com/openai/v1",
+    protocols: GROQ_PROTOCOLS,
+    credential_key_names: &["GROQ_API_KEY"],
+    base_url_config_keys: &["GROQ_BASE_URL"],
+    auth: AuthHeader::Bearer,
+    default_headers: &[],
+};
+
 /// Look up the inference provider profile for a given provider type.
 ///
 /// Returns `None` for provider types that don't support inference routing
@@ -95,6 +112,7 @@ pub fn profile_for(provider_type: &str) -> Option<&'static InferenceProviderProf
         "openai" => Some(&OPENAI_PROFILE),
         "anthropic" => Some(&ANTHROPIC_PROFILE),
         "nvidia" => Some(&NVIDIA_PROFILE),
+        "groq" => Some(&GROQ_PROFILE),
         _ => None,
     }
 }
@@ -176,7 +194,9 @@ mod tests {
         assert!(profile_for("openai").is_some());
         assert!(profile_for("anthropic").is_some());
         assert!(profile_for("nvidia").is_some());
+        assert!(profile_for("groq").is_some());
         assert!(profile_for("OpenAI").is_some()); // case insensitive
+        assert!(profile_for("GROQ").is_some()); // case insensitive
     }
 
     #[test]
diff --git a/crates/openshell-providers/src/lib.rs b/crates/openshell-providers/src/lib.rs
index 143466d2..ff03f195 100644
--- a/crates/openshell-providers/src/lib.rs
+++ b/crates/openshell-providers/src/lib.rs
@@ -85,6 +85,7 @@ impl ProviderRegistry {
         registry.register(providers::gitlab::GitlabProvider);
         registry.register(providers::github::GithubProvider);
         registry.register(providers::outlook::OutlookProvider);
+        registry.register(providers::groq::GroqProvider);
         registry
     }
 
@@ -133,6 +134,7 @@ pub fn normalize_provider_type(input: &str) -> Option<&'static str> {
         "openai" => Some("openai"),
         "anthropic" => Some("anthropic"),
         "nvidia" => Some("nvidia"),
+        "groq" => Some("groq"),
         "gitlab" | "glab" => Some("gitlab"),
         "github" | "gh" => Some("github"),
         "outlook" => Some("outlook"),
@@ -164,6 +166,8 @@ mod tests {
         assert_eq!(normalize_provider_type("openai"), Some("openai"));
         assert_eq!(normalize_provider_type("anthropic"), Some("anthropic"));
         assert_eq!(normalize_provider_type("nvidia"), Some("nvidia"));
+        assert_eq!(normalize_provider_type("groq"), Some("groq"));
+        assert_eq!(normalize_provider_type("GROQ"), Some("groq"));
         assert_eq!(normalize_provider_type("unknown"), None);
     }
 
diff --git a/crates/openshell-providers/src/providers/groq.rs b/crates/openshell-providers/src/providers/groq.rs
new file mode 100644
index 00000000..079e11b5
--- /dev/null
+++ b/crates/openshell-providers/src/providers/groq.rs
@@ -0,0 +1,46 @@
+// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+use crate::{
+    ProviderDiscoverySpec, ProviderError, ProviderPlugin, RealDiscoveryContext, discover_with_spec,
+};
+
+pub struct GroqProvider;
+
+pub const SPEC: ProviderDiscoverySpec = ProviderDiscoverySpec {
+    id: "groq",
+    credential_env_vars: &["GROQ_API_KEY"],
+};
+
+impl ProviderPlugin for GroqProvider {
+    fn id(&self) -> &'static str {
+        SPEC.id
+    }
+
+    fn discover_existing(&self) -> Result<Option<crate::DiscoveredProvider>, ProviderError> {
+        discover_with_spec(&SPEC, &RealDiscoveryContext)
+    }
+
+    fn credential_env_vars(&self) -> &'static [&'static str] {
+        SPEC.credential_env_vars
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::SPEC;
+    use crate::discover_with_spec;
+    use crate::test_helpers::MockDiscoveryContext;
+
+    #[test]
+    fn discovers_groq_env_credentials() {
+        let ctx = MockDiscoveryContext::new().with_env("GROQ_API_KEY", "gsk-test-key");
+        let discovered = discover_with_spec(&SPEC, &ctx)
+            .expect("discovery")
+            .expect("provider");
+        assert_eq!(
+            discovered.credentials.get("GROQ_API_KEY"),
+            Some(&"gsk-test-key".to_string())
+        );
+    }
+}
diff --git a/crates/openshell-providers/src/providers/mod.rs b/crates/openshell-providers/src/providers/mod.rs
index 8ab52ed9..1468501f 100644
--- a/crates/openshell-providers/src/providers/mod.rs
+++ b/crates/openshell-providers/src/providers/mod.rs
@@ -7,6 +7,7 @@ pub mod codex;
 pub mod generic;
 pub mod github;
 pub mod gitlab;
+pub mod groq;
 pub mod nvidia;
 pub mod openai;
 pub mod opencode;
diff --git a/crates/openshell-server/src/inference.rs b/crates/openshell-server/src/inference.rs
index 78b95944..ea0eb1ec 100644
--- a/crates/openshell-server/src/inference.rs
+++ b/crates/openshell-server/src/inference.rs
@@ -228,7 +228,7 @@ fn resolve_provider_route(provider: &Provider) -> Result<ResolvedProviderRoute,
     let profile = openshell_core::inference::profile_for(&provider_type).ok_or_else(|| {
         Status::invalid_argument(format!(
             "provider '{name}' has unsupported type '{provider_type}' for cluster inference \
-                 (supported: openai, anthropic, nvidia)",
+                 (supported: openai, anthropic, nvidia, groq)",
             name = provider.name
         ))
     })?;
diff --git a/docs/inference/index.md b/docs/inference/index.md
index 92e2b103..15b35631 100644
--- a/docs/inference/index.md
+++ b/docs/inference/index.md
@@ -44,7 +44,7 @@ If code calls an external inference host directly, that traffic is evaluated onl
 |---|---|
 | Credentials | No sandbox API keys needed. Credentials come from the configured provider record. |
 | Configuration | One provider and one model define sandbox inference for the active gateway. Every sandbox on that gateway sees the same `inference.local` backend. |
-| Provider support | NVIDIA, any OpenAI-compatible provider, and Anthropic all work through the same endpoint. |
+| Provider support | NVIDIA, any OpenAI-compatible provider (including Groq), and Anthropic all work through the same endpoint. |
 | Hot-refresh | OpenShell picks up provider credential changes and inference updates without recreating sandboxes. Changes propagate within about 5 seconds by default. |
 
 ## Supported API Patterns
diff --git a/mise.toml b/mise.toml
index 115b13c0..918750ea 100644
--- a/mise.toml
+++ b/mise.toml
@@ -13,9 +13,9 @@ experimental = true
 
 [tools]
 python = "3.13.12"
-rust = "stable"
+rust = "1.94.0"
 kubectl = "1.35.1"
-uv = "0.10.2"
+uv = "0.10.12"
 protoc = "29.6"
 helm = "4.1.1"
 "ubi:mozilla/sccache" = { version = "0.14.0", matching = "sccache-v" }