From 41692dad0a7786f81ab2c775c5e5b628ac75a1be Mon Sep 17 00:00:00 2001 From: Khai Le Date: Fri, 20 Mar 2026 20:14:10 +0700 Subject: [PATCH] feat(provider): Add Groq as a supported inference provider --- architecture/README.md | 4 +- architecture/inference-routing.md | 8 ++-- crates/openshell-core/src/inference.rs | 20 ++++++++ crates/openshell-providers/src/lib.rs | 4 ++ .../openshell-providers/src/providers/groq.rs | 46 +++++++++++++++++++ .../openshell-providers/src/providers/mod.rs | 1 + crates/openshell-server/src/inference.rs | 2 +- docs/inference/index.md | 2 +- mise.toml | 4 +- 9 files changed, 82 insertions(+), 9 deletions(-) create mode 100644 crates/openshell-providers/src/providers/groq.rs diff --git a/architecture/README.md b/architecture/README.md index d65b9b23..0036bb3b 100644 --- a/architecture/README.md +++ b/architecture/README.md @@ -34,7 +34,7 @@ flowchart TB subgraph EXT["External Services"] HOSTS["Allowed Hosts (github.com, api.anthropic.com, ...)"] CREDS["Provider APIs (Claude, GitHub, GitLab, ...)"] - BACKEND["Inference Backends (OpenAI, Anthropic, NVIDIA, local)"] + BACKEND["Inference Backends (OpenAI, Anthropic, NVIDIA, Groq, local)"] end CLI -- "gRPC / HTTPS" --> SERVER @@ -155,7 +155,7 @@ AI agents typically need credentials to access external services -- an API key f The provider system handles: -- **Automatic discovery**: The CLI scans the user's local machine for existing credentials (environment variables, configuration files) and offers to upload them to the gateway. Supported providers include Claude, Codex, OpenCode, OpenAI, Anthropic, NVIDIA, GitHub, GitLab, and others. +- **Automatic discovery**: The CLI scans the user's local machine for existing credentials (environment variables, configuration files) and offers to upload them to the gateway. Supported providers include Claude, Codex, OpenCode, OpenAI, Anthropic, NVIDIA, Groq, GitHub, GitLab, and others. - **Secure storage**: Credentials are stored on the gateway, separate from sandbox definitions. They never appear in Kubernetes pod specifications. - **Runtime injection**: When a sandbox starts, the supervisor process fetches the credentials from the gateway via gRPC and injects them as environment variables into every process it spawns (both the initial agent process and any SSH sessions). - **CLI management**: Users can create, update, list, and delete providers through standard CLI commands. diff --git a/architecture/inference-routing.md b/architecture/inference-routing.md index 0d3a95af..a018925b 100644 --- a/architecture/inference-routing.md +++ b/architecture/inference-routing.md @@ -41,13 +41,14 @@ File: `crates/openshell-core/src/inference.rs` `InferenceProviderProfile` is the single source of truth for provider-specific inference knowledge: default endpoint, supported protocols, credential key lookup order, auth header style, and default headers. -Three profiles are defined: +Four profiles are defined: | Provider | Default Base URL | Protocols | Auth | Default Headers | |----------|-----------------|-----------|------|-----------------| | `openai` | `https://api.openai.com/v1` | `openai_chat_completions`, `openai_completions`, `openai_responses`, `model_discovery` | `Authorization: Bearer` | (none) | | `anthropic` | `https://api.anthropic.com/v1` | `anthropic_messages`, `model_discovery` | `x-api-key` | `anthropic-version: 2023-06-01` | | `nvidia` | `https://integrate.api.nvidia.com/v1` | `openai_chat_completions`, `openai_completions`, `openai_responses`, `model_discovery` | `Authorization: Bearer` | (none) | +| `groq` | `https://api.groq.com/openai/v1` | `openai_chat_completions`, `openai_completions`, `openai_responses`, `model_discovery` | `Authorization: Bearer` | (none) | Each profile also defines `credential_key_names` (e.g. `["OPENAI_API_KEY"]`) and `base_url_config_keys` (e.g. `["OPENAI_BASE_URL"]`) used by the gateway to resolve credentials and endpoint overrides from provider records. @@ -302,7 +303,7 @@ Cluster inference commands: - `openshell inference get` -- displays both user and system inference configuration - `openshell inference get --system` -- displays only the system inference configuration -The `--provider` flag references a provider record name (not a provider type). The provider must already exist in the cluster and have a supported inference type (`openai`, `anthropic`, or `nvidia`). +The `--provider` flag references a provider record name (not a provider type). The provider must already exist in the cluster and have a supported inference type (`openai`, `anthropic`, `nvidia`, or `groq`). Inference writes verify by default. `--no-verify` is the explicit opt-out for endpoints that are not up yet. @@ -314,10 +315,11 @@ Files: - `crates/openshell-providers/src/providers/openai.rs` -- `OpenaiProvider` - `crates/openshell-providers/src/providers/anthropic.rs` -- `AnthropicProvider` - `crates/openshell-providers/src/providers/nvidia.rs` -- `NvidiaProvider` +- `crates/openshell-providers/src/providers/groq.rs` -- `GroqProvider` Provider discovery and inference routing are separate concerns: - `ProviderPlugin` (in `openshell-providers`) handles credential *discovery* -- scanning environment variables to find API keys. - `InferenceProviderProfile` (in `openshell-core`) handles how to *use* discovered credentials to make inference API calls. -The `openai`, `anthropic`, and `nvidia` provider plugins each discover credentials from their canonical environment variable (`OPENAI_API_KEY`, `ANTHROPIC_API_KEY`, `NVIDIA_API_KEY`). These credentials are stored in provider records and looked up by the gateway at bundle resolution time. +The `openai`, `anthropic`, `nvidia`, and `groq` provider plugins each discover credentials from their canonical environment variable (`OPENAI_API_KEY`, `ANTHROPIC_API_KEY`, `NVIDIA_API_KEY`, `GROQ_API_KEY`). These credentials are stored in provider records and looked up by the gateway at bundle resolution time. diff --git a/crates/openshell-core/src/inference.rs b/crates/openshell-core/src/inference.rs index a06c427f..41d1d6dd 100644 --- a/crates/openshell-core/src/inference.rs +++ b/crates/openshell-core/src/inference.rs @@ -56,6 +56,13 @@ const OPENAI_PROTOCOLS: &[&str] = &[ const ANTHROPIC_PROTOCOLS: &[&str] = &["anthropic_messages", "model_discovery"]; +const GROQ_PROTOCOLS: &[&str] = &[ + "openai_chat_completions", + "openai_completions", + "openai_responses", + "model_discovery", +]; + static OPENAI_PROFILE: InferenceProviderProfile = InferenceProviderProfile { provider_type: "openai", default_base_url: "https://api.openai.com/v1", @@ -86,6 +93,16 @@ static NVIDIA_PROFILE: InferenceProviderProfile = InferenceProviderProfile { default_headers: &[], }; +static GROQ_PROFILE: InferenceProviderProfile = InferenceProviderProfile { + provider_type: "groq", + default_base_url: "https://api.groq.com/openai/v1", + protocols: GROQ_PROTOCOLS, + credential_key_names: &["GROQ_API_KEY"], + base_url_config_keys: &["GROQ_BASE_URL"], + auth: AuthHeader::Bearer, + default_headers: &[], +}; + /// Look up the inference provider profile for a given provider type. /// /// Returns `None` for provider types that don't support inference routing @@ -95,6 +112,7 @@ pub fn profile_for(provider_type: &str) -> Option<&'static InferenceProviderProf "openai" => Some(&OPENAI_PROFILE), "anthropic" => Some(&ANTHROPIC_PROFILE), "nvidia" => Some(&NVIDIA_PROFILE), + "groq" => Some(&GROQ_PROFILE), _ => None, } } @@ -176,7 +194,9 @@ mod tests { assert!(profile_for("openai").is_some()); assert!(profile_for("anthropic").is_some()); assert!(profile_for("nvidia").is_some()); + assert!(profile_for("groq").is_some()); assert!(profile_for("OpenAI").is_some()); // case insensitive + assert!(profile_for("GROQ").is_some()); // case insensitive } #[test] diff --git a/crates/openshell-providers/src/lib.rs b/crates/openshell-providers/src/lib.rs index 143466d2..ff03f195 100644 --- a/crates/openshell-providers/src/lib.rs +++ b/crates/openshell-providers/src/lib.rs @@ -85,6 +85,7 @@ impl ProviderRegistry { registry.register(providers::gitlab::GitlabProvider); registry.register(providers::github::GithubProvider); registry.register(providers::outlook::OutlookProvider); + registry.register(providers::groq::GroqProvider); registry } @@ -133,6 +134,7 @@ pub fn normalize_provider_type(input: &str) -> Option<&'static str> { "openai" => Some("openai"), "anthropic" => Some("anthropic"), "nvidia" => Some("nvidia"), + "groq" => Some("groq"), "gitlab" | "glab" => Some("gitlab"), "github" | "gh" => Some("github"), "outlook" => Some("outlook"), @@ -164,6 +166,8 @@ mod tests { assert_eq!(normalize_provider_type("openai"), Some("openai")); assert_eq!(normalize_provider_type("anthropic"), Some("anthropic")); assert_eq!(normalize_provider_type("nvidia"), Some("nvidia")); + assert_eq!(normalize_provider_type("groq"), Some("groq")); + assert_eq!(normalize_provider_type("GROQ"), Some("groq")); assert_eq!(normalize_provider_type("unknown"), None); } diff --git a/crates/openshell-providers/src/providers/groq.rs b/crates/openshell-providers/src/providers/groq.rs new file mode 100644 index 00000000..079e11b5 --- /dev/null +++ b/crates/openshell-providers/src/providers/groq.rs @@ -0,0 +1,46 @@ +// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +use crate::{ + ProviderDiscoverySpec, ProviderError, ProviderPlugin, RealDiscoveryContext, discover_with_spec, +}; + +pub struct GroqProvider; + +pub const SPEC: ProviderDiscoverySpec = ProviderDiscoverySpec { + id: "groq", + credential_env_vars: &["GROQ_API_KEY"], +}; + +impl ProviderPlugin for GroqProvider { + fn id(&self) -> &'static str { + SPEC.id + } + + fn discover_existing(&self) -> Result, ProviderError> { + discover_with_spec(&SPEC, &RealDiscoveryContext) + } + + fn credential_env_vars(&self) -> &'static [&'static str] { + SPEC.credential_env_vars + } +} + +#[cfg(test)] +mod tests { + use super::SPEC; + use crate::discover_with_spec; + use crate::test_helpers::MockDiscoveryContext; + + #[test] + fn discovers_groq_env_credentials() { + let ctx = MockDiscoveryContext::new().with_env("GROQ_API_KEY", "gsk-test-key"); + let discovered = discover_with_spec(&SPEC, &ctx) + .expect("discovery") + .expect("provider"); + assert_eq!( + discovered.credentials.get("GROQ_API_KEY"), + Some(&"gsk-test-key".to_string()) + ); + } +} diff --git a/crates/openshell-providers/src/providers/mod.rs b/crates/openshell-providers/src/providers/mod.rs index 8ab52ed9..1468501f 100644 --- a/crates/openshell-providers/src/providers/mod.rs +++ b/crates/openshell-providers/src/providers/mod.rs @@ -7,6 +7,7 @@ pub mod codex; pub mod generic; pub mod github; pub mod gitlab; +pub mod groq; pub mod nvidia; pub mod openai; pub mod opencode; diff --git a/crates/openshell-server/src/inference.rs b/crates/openshell-server/src/inference.rs index 78b95944..ea0eb1ec 100644 --- a/crates/openshell-server/src/inference.rs +++ b/crates/openshell-server/src/inference.rs @@ -228,7 +228,7 @@ fn resolve_provider_route(provider: &Provider) -> Result